diff --git a/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_cloud_processor.py b/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_cloud_processor.py index 4a1fe540..d82797f9 100644 --- a/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_cloud_processor.py +++ b/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_cloud_processor.py @@ -70,12 +70,16 @@ async def on_detection_completed(event: roboflow.DetectionCompletedEvent): Example: ["person", "sports ball"] Verify that the classes a supported by the given model. Default - None (all classes are detected). + client: optional custom instance of `inference_sdk.InferenceHTTPClient`. annotate: if True, annotate the detected objects with boxes and labels. Default - True. dim_background_factor: how much to dim the background around detected objects from 0 to 1.0. Effective only when annotate=True. Default - 0.0 (no dimming). - client: optional custom instance of `inference_sdk.InferenceHTTPClient`. + annotate_text_scale: annotation text scale. Default - 0.75. + annotate_text_padding: annotation text padding. Default - 1. + annotate_box_thickness: annotation box thickness. Default - 2. + annotate_text_position: annotation text position. Default - `sv.Position.TOP_CENTER`. Examples: Example usage: @@ -104,10 +108,14 @@ def __init__( api_url: Optional[str] = None, conf_threshold: float = 0.5, fps: int = 5, - annotate: bool = True, classes: Optional[list[str]] = None, - dim_background_factor: float = 0.0, client: Optional[InferenceHTTPClient] = None, + annotate: bool = True, + dim_background_factor: float = 0.0, + annotate_text_scale: float = 0.75, + annotate_text_padding: int = 1, + annotate_box_thickness: int = 2, + annotate_text_position: sv.Position = sv.Position.TOP_CENTER, ): super().__init__(interval=0, receive_audio=False, receive_video=True) @@ -139,6 +147,10 @@ def __init__( self.fps = fps self.dim_background_factor = max(0.0, dim_background_factor) self.annotate = annotate + self._annotate_text_scale = annotate_text_scale + self._annotate_text_padding = annotate_text_padding + self._annotate_box_thickness = annotate_box_thickness + self._annotate_text_position = annotate_text_position self._events: Optional[EventManager] = None self._client.configure( @@ -237,7 +249,14 @@ async def _process_frame(self, frame: av.VideoFrame): if self.annotate: # Annotate frame with detections annotated_image = annotate_image( - image, detections, classes, dim_factor=self.dim_background_factor + image, + detections, + classes, + dim_factor=self.dim_background_factor, + text_scale=self._annotate_text_scale, + text_position=self._annotate_text_position, + text_padding=self._annotate_text_padding, + box_thickness=self._annotate_box_thickness, ) # Convert back to av.VideoFrame diff --git a/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_local_processor.py b/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_local_processor.py index 17416053..b1fc02e9 100644 --- a/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_local_processor.py +++ b/plugins/roboflow/vision_agents/plugins/roboflow/roboflow_local_processor.py @@ -89,13 +89,17 @@ async def on_detection_completed(event: roboflow.DetectionCompletedEvent): Example: ["person", "sports ball"] Verify that the classes a supported by the given model. Default - None (all classes are detected). + model: optional instance of `RFDETRModel` to be used for detections. + Use it provide a model of choosing with custom parameters. annotate: if True, annotate the detected objects with boxes and labels. Default - True. dim_background_factor: how much to dim the background around detected objects from 0 to 1.0. Effective only when annotate=True. Default - 0.0 (no dimming). - model: optional instance of `RFDETRModel` to be used for detections. - Use it provide a model of choosing with custom parameters. + annotate_text_scale: annotation text scale. Default - 0.75. + annotate_text_padding: annotation text padding. Default - 1. + annotate_box_thickness: annotation box thickness. Default - 2. + annotate_text_position: annotation text position. Default - `sv.Position.TOP_CENTER`. """ name = "roboflow_local" @@ -106,9 +110,13 @@ def __init__( conf_threshold: float = 0.5, fps: int = 10, classes: Optional[list[str]] = None, + model: Optional[RFDETR] = None, annotate: bool = True, dim_background_factor: float = 0.0, - model: Optional[RFDETR] = None, + annotate_text_scale: float = 0.75, + annotate_text_padding: int = 1, + annotate_box_thickness: int = 2, + annotate_text_position: sv.Position = sv.Position.TOP_CENTER, ): super().__init__(interval=0, receive_audio=False, receive_video=True) @@ -153,6 +161,10 @@ def __init__( fps=self.fps, max_queue_size=self.fps, # Buffer 1s of the video ) + self._annotate_text_scale = annotate_text_scale + self._annotate_text_padding = annotate_text_padding + self._annotate_box_thickness = annotate_box_thickness + self._annotate_text_position = annotate_text_position async def process_video( self, @@ -267,6 +279,10 @@ async def _process_frame(self, frame: av.VideoFrame) -> None: detections, classes=self._model.class_names, dim_factor=self.dim_background_factor, + text_scale=self._annotate_text_scale, + text_position=self._annotate_text_position, + text_padding=self._annotate_text_padding, + box_thickness=self._annotate_box_thickness, ) # Convert back to av.VideoFrame annotated_frame = av.VideoFrame.from_ndarray(annotated_image) diff --git a/plugins/roboflow/vision_agents/plugins/roboflow/utils.py b/plugins/roboflow/vision_agents/plugins/roboflow/utils.py index 7f563b89..3b6900fd 100644 --- a/plugins/roboflow/vision_agents/plugins/roboflow/utils.py +++ b/plugins/roboflow/vision_agents/plugins/roboflow/utils.py @@ -10,6 +10,10 @@ def annotate_image( detections: sv.Detections, classes: dict[int, str], dim_factor: Optional[float] = None, + text_scale: float = 0.75, + text_padding: int = 1, + box_thickness: int = 2, + text_position: sv.Position = sv.Position.TOP_CENTER, ) -> np.ndarray: """ Draw bounding boxes and labels on frame. @@ -23,14 +27,16 @@ def annotate_image( cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1) image[mask == 0] = (image[mask == 0] * dim_factor).astype(np.uint8) - boxed_image = sv.BoxAnnotator(thickness=1).annotate(image.copy(), detections) + boxed_image = sv.BoxAnnotator(thickness=box_thickness).annotate( + image.copy(), detections + ) detected_class_ids: Iterable[int] = ( detections.class_id if detections.class_id is not None else [] ) labels = [classes[class_id] for class_id in detected_class_ids] labeled_image = sv.LabelAnnotator( - text_position=sv.Position.BOTTOM_CENTER, - text_scale=0.25, - text_padding=1, + text_position=text_position, + text_scale=text_scale, + text_padding=text_padding, ).annotate(boxed_image, detections, labels) return labeled_image