python

2026-03-30 20:25:42 +05:00
parent 139f9f1bd2
commit 373ed28445
2449 changed files with 53602 additions and 0 deletions
--- a/custom_components/deepstack_object/image_processing.py
+++ b/custom_components/deepstack_object/image_processing.py
@@ -0,0 +1,534 @@
+"""
+Component that will perform object detection and identification via deepstack.
+
+For more details about this platform, please refer to the documentation at
+https://home-assistant.io/components/image_processing.deepstack_object
+"""
+from collections import namedtuple, Counter
+import datetime
+import io
+import logging
+import os
+import re
+from datetime import timedelta
+from typing import Tuple, Dict, List
+from pathlib import Path
+
+from PIL import Image, ImageDraw
+
+import deepstack.core as ds
+import homeassistant.helpers.config_validation as cv
+import homeassistant.util.dt as dt_util
+import voluptuous as vol
+from homeassistant.util.pil import draw_box
+from homeassistant.components.image_processing import (
+    ATTR_CONFIDENCE,
+    CONF_CONFIDENCE,
+    CONF_ENTITY_ID,
+    CONF_NAME,
+    CONF_SOURCE,
+    DEFAULT_CONFIDENCE,
+    DOMAIN,
+    PLATFORM_SCHEMA,
+    ImageProcessingEntity,
+)
+from homeassistant.const import (
+    ATTR_ENTITY_ID,
+    ATTR_NAME,
+    CONF_IP_ADDRESS,
+    CONF_PORT,
+)
+from homeassistant.core import split_entity_id
+
+_LOGGER = logging.getLogger(__name__)
+
+ANIMAL = "animal"
+ANIMALS = [
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+]
+OTHER = "other"
+PERSON = "person"
+VEHICLE = "vehicle"
+VEHICLES = ["bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck"]
+OBJECT_TYPES = [ANIMAL, OTHER, PERSON, VEHICLE]
+
+
+CONF_API_KEY = "api_key"
+CONF_TARGET = "target"
+CONF_TARGETS = "targets"
+CONF_TIMEOUT = "timeout"
+CONF_SAVE_FILE_FORMAT = "save_file_format"
+CONF_SAVE_FILE_FOLDER = "save_file_folder"
+CONF_SAVE_TIMESTAMPTED_FILE = "save_timestamped_file"
+CONF_ALWAYS_SAVE_LATEST_FILE = "always_save_latest_file"
+CONF_SHOW_BOXES = "show_boxes"
+CONF_ROI_Y_MIN = "roi_y_min"
+CONF_ROI_X_MIN = "roi_x_min"
+CONF_ROI_Y_MAX = "roi_y_max"
+CONF_ROI_X_MAX = "roi_x_max"
+CONF_SCALE = "scale"
+CONF_CUSTOM_MODEL = "custom_model"
+CONF_CROP_ROI = "crop_to_roi"
+
+DATETIME_FORMAT = "%Y-%m-%d_%H-%M-%S-%f"
+DEFAULT_API_KEY = ""
+DEFAULT_TARGETS = [{CONF_TARGET: PERSON}]
+DEFAULT_TIMEOUT = 10
+DEFAULT_ROI_Y_MIN = 0.0
+DEFAULT_ROI_Y_MAX = 1.0
+DEFAULT_ROI_X_MIN = 0.0
+DEFAULT_ROI_X_MAX = 1.0
+DEAULT_SCALE = 1.0
+DEFAULT_ROI = (
+    DEFAULT_ROI_Y_MIN,
+    DEFAULT_ROI_X_MIN,
+    DEFAULT_ROI_Y_MAX,
+    DEFAULT_ROI_X_MAX,
+)
+
+EVENT_OBJECT_DETECTED = "deepstack.object_detected"
+BOX = "box"
+FILE = "file"
+OBJECT = "object"
+SAVED_FILE = "saved_file"
+MIN_CONFIDENCE = 0.1
+JPG = "jpg"
+PNG = "png"
+
+# rgb(red, green, blue)
+RED = (255, 0, 0)  # For objects within the ROI
+GREEN = (0, 255, 0)  # For ROI box
+YELLOW = (255, 255, 0)  # Unused
+
+TARGETS_SCHEMA = {
+    vol.Required(CONF_TARGET): cv.string,
+    vol.Optional(CONF_CONFIDENCE): vol.All(
+        vol.Coerce(float), vol.Range(min=10, max=100)
+    ),
+}
+
+
+PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
+    {
+        vol.Required(CONF_IP_ADDRESS): cv.string,
+        vol.Required(CONF_PORT): cv.port,
+        vol.Optional(CONF_API_KEY, default=DEFAULT_API_KEY): cv.string,
+        vol.Optional(CONF_TIMEOUT, default=DEFAULT_TIMEOUT): cv.positive_int,
+        vol.Optional(CONF_CUSTOM_MODEL, default=""): cv.string,
+        vol.Optional(CONF_TARGETS, default=DEFAULT_TARGETS): vol.All(
+            cv.ensure_list, [vol.Schema(TARGETS_SCHEMA)]
+        ),
+        vol.Optional(CONF_ROI_Y_MIN, default=DEFAULT_ROI_Y_MIN): cv.small_float,
+        vol.Optional(CONF_ROI_X_MIN, default=DEFAULT_ROI_X_MIN): cv.small_float,
+        vol.Optional(CONF_ROI_Y_MAX, default=DEFAULT_ROI_Y_MAX): cv.small_float,
+        vol.Optional(CONF_ROI_X_MAX, default=DEFAULT_ROI_X_MAX): cv.small_float,
+        vol.Optional(CONF_SCALE, default=DEAULT_SCALE): vol.All(
+            vol.Coerce(float, vol.Range(min=0.1, max=1))
+        ),
+        vol.Optional(CONF_SAVE_FILE_FOLDER): cv.isdir,
+        vol.Optional(CONF_SAVE_FILE_FORMAT, default=JPG): vol.In([JPG, PNG]),
+        vol.Optional(CONF_SAVE_TIMESTAMPTED_FILE, default=False): cv.boolean,
+        vol.Optional(CONF_ALWAYS_SAVE_LATEST_FILE, default=False): cv.boolean,
+        vol.Optional(CONF_SHOW_BOXES, default=True): cv.boolean,
+        vol.Optional(CONF_CROP_ROI, default=False): cv.boolean,
+    }
+)
+
+Box = namedtuple("Box", "y_min x_min y_max x_max")
+Point = namedtuple("Point", "y x")
+
+
+def point_in_box(box: Box, point: Point) -> bool:
+    """Return true if point lies in box"""
+    if (box.x_min <= point.x <= box.x_max) and (box.y_min <= point.y <= box.y_max):
+        return True
+    return False
+
+
+def object_in_roi(roi: dict, centroid: dict) -> bool:
+    """Convenience to convert dicts to the Point and Box."""
+    target_center_point = Point(centroid["y"], centroid["x"])
+    roi_box = Box(roi["y_min"], roi["x_min"], roi["y_max"], roi["x_max"])
+    return point_in_box(roi_box, target_center_point)
+
+
+def get_valid_filename(name: str) -> str:
+    return re.sub(r"(?u)[^-\w.]", "", str(name).strip().replace(" ", "_"))
+
+
+def get_object_type(object_name: str) -> str:
+    if object_name == PERSON:
+        return PERSON
+    elif object_name in ANIMALS:
+        return ANIMAL
+    elif object_name in VEHICLES:
+        return VEHICLE
+    else:
+        return OTHER
+
+
+def get_objects(predictions: list, img_width: int, img_height: int) -> List[Dict]:
+    """Return objects with formatting and extra info."""
+    objects = []
+    decimal_places = 3
+    for pred in predictions:
+        box_width = pred["x_max"] - pred["x_min"]
+        box_height = pred["y_max"] - pred["y_min"]
+        box = {
+            "height": round(box_height / img_height, decimal_places),
+            "width": round(box_width / img_width, decimal_places),
+            "y_min": round(pred["y_min"] / img_height, decimal_places),
+            "x_min": round(pred["x_min"] / img_width, decimal_places),
+            "y_max": round(pred["y_max"] / img_height, decimal_places),
+            "x_max": round(pred["x_max"] / img_width, decimal_places),
+        }
+        box_area = round(box["height"] * box["width"], decimal_places)
+        centroid = {
+            "x": round(box["x_min"] + (box["width"] / 2), decimal_places),
+            "y": round(box["y_min"] + (box["height"] / 2), decimal_places),
+        }
+        name = pred["label"]
+        object_type = get_object_type(name)
+        confidence = round(pred["confidence"] * 100, decimal_places)
+
+        objects.append(
+            {
+                "bounding_box": box,
+                "box_area": box_area,
+                "centroid": centroid,
+                "name": name,
+                "object_type": object_type,
+                "confidence": confidence,
+            }
+        )
+    return objects
+
+
+def setup_platform(hass, config, add_devices, discovery_info=None):
+    """Set up the classifier."""
+    save_file_folder = config.get(CONF_SAVE_FILE_FOLDER)
+    if save_file_folder:
+        save_file_folder = Path(save_file_folder)
+
+    entities = []
+    for camera in config[CONF_SOURCE]:
+        object_entity = ObjectClassifyEntity(
+            ip_address=config.get(CONF_IP_ADDRESS),
+            port=config.get(CONF_PORT),
+            api_key=config.get(CONF_API_KEY),
+            timeout=config.get(CONF_TIMEOUT),
+            custom_model=config.get(CONF_CUSTOM_MODEL),
+            targets=config.get(CONF_TARGETS),
+            confidence=config.get(CONF_CONFIDENCE),
+            roi_y_min=config[CONF_ROI_Y_MIN],
+            roi_x_min=config[CONF_ROI_X_MIN],
+            roi_y_max=config[CONF_ROI_Y_MAX],
+            roi_x_max=config[CONF_ROI_X_MAX],
+            scale=config[CONF_SCALE],
+            show_boxes=config[CONF_SHOW_BOXES],
+            save_file_folder=save_file_folder,
+            save_file_format=config[CONF_SAVE_FILE_FORMAT],
+            save_timestamped_file=config.get(CONF_SAVE_TIMESTAMPTED_FILE),
+            always_save_latest_file=config.get(CONF_ALWAYS_SAVE_LATEST_FILE),
+            crop_roi=config[CONF_CROP_ROI],
+            camera_entity=camera.get(CONF_ENTITY_ID),
+            name=camera.get(CONF_NAME),
+        )
+        entities.append(object_entity)
+    add_devices(entities)
+
+
+class ObjectClassifyEntity(ImageProcessingEntity):
+    """Perform a object classification."""
+
+    def __init__(
+        self,
+        ip_address,
+        port,
+        api_key,
+        timeout,
+        custom_model,
+        targets,
+        confidence,
+        roi_y_min,
+        roi_x_min,
+        roi_y_max,
+        roi_x_max,
+        scale,
+        show_boxes,
+        save_file_folder,
+        save_file_format,
+        save_timestamped_file,
+        always_save_latest_file,
+        crop_roi,
+        camera_entity,
+        name=None,
+    ):
+        """Init with the API key and model id."""
+        super().__init__()
+        self._dsobject = ds.DeepstackObject(
+            ip=ip_address,
+            port=port,
+            api_key=api_key,
+            timeout=timeout,
+            min_confidence=MIN_CONFIDENCE,
+            custom_model=custom_model,
+        )
+        self._custom_model = custom_model
+        self._confidence = confidence
+        self._summary = {}
+        self._targets = targets
+        for target in self._targets:
+            if CONF_CONFIDENCE not in target.keys():
+                target.update({CONF_CONFIDENCE: self._confidence})
+        self._targets_names = [
+            target[CONF_TARGET] for target in targets
+        ]  # can be a name or a type
+        self._camera = camera_entity
+        if name:
+            self._name = name
+        else:
+            camera_name = split_entity_id(camera_entity)[1]
+            self._name = "deepstack_object_{}".format(camera_name)
+
+        self._state = None
+        self._objects = []  # The parsed raw data
+        self._targets_found = []
+        self._last_detection = None
+
+        self._roi_dict = {
+            "y_min": roi_y_min,
+            "x_min": roi_x_min,
+            "y_max": roi_y_max,
+            "x_max": roi_x_max,
+        }
+        self._crop_roi = crop_roi
+        self._scale = scale
+        self._show_boxes = show_boxes
+        self._image_width = None
+        self._image_height = None
+        self._save_file_folder = save_file_folder
+        self._save_file_format = save_file_format
+        self._always_save_latest_file = always_save_latest_file
+        self._save_timestamped_file = save_timestamped_file
+        self._always_save_latest_file = always_save_latest_file
+        self._image = None
+
+    def process_image(self, image):
+        """Process an image."""
+        self._image = Image.open(io.BytesIO(bytearray(image)))
+        self._image_width, self._image_height = self._image.size
+        # scale to roi
+        if self._crop_roi:
+            roi = (
+                self._image_width * self._roi_dict["x_min"],
+                self._image_height * self._roi_dict["y_min"],
+                self._image_width * (self._roi_dict["x_max"]),
+                self._image_height * (self._roi_dict["y_max"])
+            )
+            self._image = self._image.crop(roi)
+            self._image_width, self._image_height = self._image.size
+            with io.BytesIO() as output:
+                self._image.save(output, format="JPEG")
+                image = output.getvalue()
+            _LOGGER.debug(
+                (
+                    f"Image cropped with : {self._roi_dict} W={self._image_width} H={self._image_height}"
+                )
+            )
+        # resize image if different then default
+        if self._scale != DEAULT_SCALE:
+            newsize = (self._image_width * self._scale, self._image_width * self._scale)
+            self._image.thumbnail(newsize, Image.ANTIALIAS)
+            self._image_width, self._image_height = self._image.size
+            with io.BytesIO() as output:
+                self._image.save(output, format="JPEG")
+                image = output.getvalue()
+            _LOGGER.debug(
+                (
+                    f"Image scaled with : {self._scale} W={self._image_width} H={self._image_height}"
+                )
+            )
+
+        self._state = None
+        self._objects = []  # The parsed raw data
+        self._targets_found = []
+        self._summary = {}
+        saved_image_path = None
+
+        try:
+            predictions = self._dsobject.detect(image)
+        except ds.DeepstackException as exc:
+            _LOGGER.error("Deepstack error : %s", exc)
+            return
+
+        self._objects = get_objects(predictions, self._image_width, self._image_height)
+        self._targets_found = []
+
+        for obj in self._objects:
+            if not (
+                (obj["name"] in self._targets_names)
+                or (obj["object_type"] in self._targets_names)
+            ):
+                continue
+            ## Then check if the type has a configured confidence, if yes assign
+            ## Then if a confidence for a named object, this takes precedence over type confidence
+            confidence = None
+            for target in self._targets:
+                if obj["object_type"] == target[CONF_TARGET]:
+                    confidence = target[CONF_CONFIDENCE]
+            for target in self._targets:
+                if obj["name"] == target[CONF_TARGET]:
+                    confidence = target[CONF_CONFIDENCE]
+            if obj["confidence"] > confidence:
+                if not self._crop_roi and not object_in_roi(self._roi_dict, obj["centroid"]):
+                    continue
+                self._targets_found.append(obj)
+
+        self._state = len(self._targets_found)
+        if self._state > 0:
+            self._last_detection = dt_util.now().strftime(DATETIME_FORMAT)
+
+        targets_found = [
+            obj["name"] for obj in self._targets_found
+        ]  # Just the list of target names, e.g. [car, car, person]
+        self._summary = dict(Counter(targets_found))  # e.g. {'car':2, 'person':1}
+
+        if self._save_file_folder:
+            if self._state > 0 or self._always_save_latest_file:
+                saved_image_path = self.save_image(
+                    self._targets_found,
+                    self._save_file_folder,
+                )
+
+        # Fire events
+        for target in self._targets_found:
+            target_event_data = target.copy()
+            target_event_data[ATTR_ENTITY_ID] = self.entity_id
+            if saved_image_path:
+                target_event_data[SAVED_FILE] = saved_image_path
+            self.hass.bus.fire(EVENT_OBJECT_DETECTED, target_event_data)
+
+    @property
+    def camera_entity(self):
+        """Return camera entity id from process pictures."""
+        return self._camera
+
+    @property
+    def state(self):
+        """Return the state of the entity."""
+        return self._state
+
+    @property
+    def name(self):
+        """Return the name of the sensor."""
+        return self._name
+
+    @property
+    def unit_of_measurement(self):
+        """Return the unit of measurement."""
+        return "targets"
+
+    @property
+    def should_poll(self):
+        """Return the polling state."""
+        return False
+
+    @property
+    def extra_state_attributes(self) -> Dict:
+        """Return device specific state attributes."""
+        attr = {}
+        attr["targets"] = self._targets
+        attr["targets_found"] = [
+            {obj["name"]: obj["confidence"]} for obj in self._targets_found
+        ]
+        attr["summary"] = self._summary
+        if self._last_detection:
+            attr["last_target_detection"] = self._last_detection
+        if self._custom_model:
+            attr["custom_model"] = self._custom_model
+        attr["all_objects"] = [
+            {obj["name"]: obj["confidence"]} for obj in self._objects
+        ]
+        if self._save_file_folder:
+            attr[CONF_SAVE_FILE_FOLDER] = str(self._save_file_folder)
+            attr[CONF_SAVE_FILE_FORMAT] = self._save_file_format
+            attr[CONF_SAVE_TIMESTAMPTED_FILE] = self._save_timestamped_file
+            attr[CONF_ALWAYS_SAVE_LATEST_FILE] = self._always_save_latest_file
+        return attr
+
+    def save_image(self, targets, directory) -> str:
+        """Draws the actual bounding box of the detected objects.
+
+        Returns: saved_image_path, which is the path to the saved timestamped file if configured, else the default saved image.
+        """
+        try:
+            img = self._image.convert("RGB")
+        except UnidentifiedImageError:
+            _LOGGER.warning("Deepstack unable to process image, bad data")
+            return
+        draw = ImageDraw.Draw(img)
+
+        roi_tuple = tuple(self._roi_dict.values())
+        if roi_tuple != DEFAULT_ROI and self._show_boxes and not self._crop_roi:
+            draw_box(
+                draw,
+                roi_tuple,
+                img.width,
+                img.height,
+                text="ROI",
+                color=GREEN,
+            )
+
+        for obj in targets:
+            if not self._show_boxes:
+                break
+            name = obj["name"]
+            confidence = obj["confidence"]
+            box = obj["bounding_box"]
+            centroid = obj["centroid"]
+            box_label = f"{name}: {confidence:.1f}%"
+
+            draw_box(
+                draw,
+                (box["y_min"], box["x_min"], box["y_max"], box["x_max"]),
+                img.width,
+                img.height,
+                text=box_label,
+                color=RED,
+            )
+
+            # draw bullseye
+            draw.text(
+                (centroid["x"] * img.width, centroid["y"] * img.height),
+                text="X",
+                fill=RED,
+            )
+
+        # Save images, returning the path of saved image as str
+        latest_save_path = (
+            directory
+            / f"{get_valid_filename(self._name).lower()}_latest.{self._save_file_format}"
+        )
+        img.save(latest_save_path)
+        _LOGGER.info("Deepstack saved file %s", latest_save_path)
+        saved_image_path = latest_save_path
+
+        if self._save_timestamped_file:
+            timestamp_save_path = (
+                directory
+                / f"{self._name}_{self._last_detection}.{self._save_file_format}"
+            )
+            img.save(timestamp_save_path)
+            _LOGGER.info("Deepstack saved file %s", timestamp_save_path)
+            saved_image_path = timestamp_save_path
+        return str(saved_image_path)