Skip to content

Commit 1e7b65f

Browse files
committed
fix: async detection for YOLO and Roboflow processors
- Remove fps parameter from moondream processors (use detection_fps only) - Add async detection pattern to YOLOPoseProcessor with detection_fps=15.0 - Fix YOLOPoseVideoTrack to output at 30 FPS (was 1 FPS) - Add async detection pattern to RoboflowCloudDetectionProcessor with detection_fps=5.0 - Add async detection pattern to RoboflowLocalDetectionProcessor with detection_fps=10.0 - All processors now pass frames through at full FPS while detection runs in background
1 parent f68dc9c commit 1e7b65f

File tree

5 files changed

+318
-205
lines changed

5 files changed

+318
-205
lines changed

plugins/moondream/vision_agents/plugins/moondream/detection/moondream_cloud_processor.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ class CloudDetectionProcessor(
5151
detect_objects: Object(s) to detect. Moondream uses zero-shot detection,
5252
so any object string works. Examples: "person", "car",
5353
"basketball", ["person", "car", "dog"]. Default: "person"
54-
fps: Frame processing rate (default: 30)
55-
detection_fps: Rate at which to send frames for detection (default: 2).
54+
detection_fps: Rate at which to send frames for detection (default: 5.0).
5655
Lower values reduce API calls while maintaining smooth video.
5756
interval: Processing interval in seconds (default: 0)
5857
max_workers: Number of worker threads for CPU-intensive operations (default: 10)
@@ -65,7 +64,6 @@ def __init__(
6564
api_key: Optional[str] = None,
6665
conf_threshold: float = 0.3,
6766
detect_objects: Union[str, List[str]] = "person",
68-
fps: int = 30,
6967
detection_fps: float = 5.0,
7068
interval: int = 0,
7169
max_workers: int = 10,
@@ -74,7 +72,6 @@ def __init__(
7472

7573
self.api_key = api_key or os.getenv("MOONDREAM_API_KEY")
7674
self.conf_threshold = conf_threshold
77-
self.fps = fps
7875
self.detection_fps = detection_fps
7976
self.max_workers = max_workers
8077
self._shutdown = False
@@ -122,7 +119,7 @@ def __init__(
122119

123120
logger.info("🌙 Moondream Processor initialized")
124121
logger.info(f"🎯 Detection configured for objects: {self.detect_objects}")
125-
logger.info(f"📹 Video FPS: {fps}, Detection FPS: {detection_fps}")
122+
logger.info(f"📹 Detection FPS: {detection_fps}")
126123

127124
async def process_video(
128125
self,
@@ -141,20 +138,17 @@ async def process_video(
141138
logger.info("✅ Moondream process_video starting")
142139

143140
if shared_forwarder is not None:
144-
# Use the shared forwarder
141+
# Use the shared forwarder at its native FPS
145142
self._video_forwarder = shared_forwarder
146-
logger.info(
147-
f"🎥 Moondream subscribing to shared VideoForwarder at {self.fps} FPS"
148-
)
143+
logger.info("🎥 Moondream subscribing to shared VideoForwarder")
149144
self._video_forwarder.add_frame_handler(
150-
self._process_and_add_frame, fps=float(self.fps), name="moondream"
145+
self._process_and_add_frame, name="moondream"
151146
)
152147
else:
153-
# Create our own VideoForwarder
148+
# Create our own VideoForwarder at default FPS
154149
self._video_forwarder = VideoForwarder(
155150
incoming_track, # type: ignore[arg-type]
156-
max_buffer=30, # 1 second at 30fps
157-
fps=self.fps,
151+
max_buffer=30,
158152
name="moondream_forwarder",
159153
)
160154

@@ -232,7 +226,9 @@ async def _process_and_add_frame(self, frame: av.VideoFrame):
232226
now = asyncio.get_event_loop().time()
233227

234228
# Check if we should start a new detection
235-
detection_interval = 1.0 / self.detection_fps if self.detection_fps > 0 else float("inf")
229+
detection_interval = (
230+
1.0 / self.detection_fps if self.detection_fps > 0 else float("inf")
231+
)
236232
should_detect = (
237233
not self._detection_in_progress
238234
and (now - self._last_detection_time) >= detection_interval
@@ -275,7 +271,9 @@ async def _run_detection_background(self, frame_array: np.ndarray):
275271
results = await self._run_inference(frame_array)
276272
self._cached_results = results
277273
self._last_results = results
278-
logger.debug(f"🔍 Detection complete: {len(results.get('detections', []))} objects")
274+
logger.debug(
275+
f"🔍 Detection complete: {len(results.get('detections', []))} objects"
276+
)
279277
except Exception as e:
280278
logger.warning(f"⚠️ Background detection failed: {e}")
281279
finally:

plugins/moondream/vision_agents/plugins/moondream/detection/moondream_local_processor.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ class LocalDetectionProcessor(
5353
detect_objects: Object(s) to detect. Moondream uses zero-shot detection,
5454
so any object string works. Examples: "person", "car",
5555
"basketball", ["person", "car", "dog"]. Default: "person"
56-
fps: Frame processing rate (default: 30)
5756
detection_fps: Rate at which to run detection (default: 10.0).
5857
Lower values reduce CPU/GPU load while maintaining smooth video.
5958
interval: Processing interval in seconds (default: 0)
@@ -71,7 +70,6 @@ def __init__(
7170
self,
7271
conf_threshold: float = 0.3,
7372
detect_objects: Union[str, List[str]] = "person",
74-
fps: int = 30,
7573
detection_fps: float = 10.0,
7674
interval: int = 0,
7775
max_workers: int = 10,
@@ -87,7 +85,6 @@ def __init__(
8785
self.options = options
8886
self.model_name = model_name
8987
self.conf_threshold = conf_threshold
90-
self.fps = fps
9188
self.detection_fps = detection_fps
9289
self.max_workers = max_workers
9390
self._shutdown = False
@@ -135,7 +132,7 @@ def __init__(
135132
logger.info("🌙 Moondream Local Processor initialized")
136133
logger.info(f"🎯 Detection configured for objects: {self.detect_objects}")
137134
logger.info(f"🔧 Device: {self.device}")
138-
logger.info(f"📹 Video FPS: {fps}, Detection FPS: {detection_fps}")
135+
logger.info(f"📹 Detection FPS: {detection_fps}")
139136

140137
@property
141138
def device(self) -> str:
@@ -241,18 +238,17 @@ async def process_video(
241238
await self._prepare_moondream()
242239

243240
if shared_forwarder is not None:
241+
# Use the shared forwarder at its native FPS
244242
self._video_forwarder = shared_forwarder
245-
logger.info(
246-
f"🎥 Moondream subscribing to shared VideoForwarder at {self.fps} FPS"
247-
)
243+
logger.info("🎥 Moondream subscribing to shared VideoForwarder")
248244
self._video_forwarder.add_frame_handler(
249-
self._process_and_add_frame, fps=float(self.fps), name="moondream_local"
245+
self._process_and_add_frame, name="moondream_local"
250246
)
251247
else:
248+
# Create our own VideoForwarder at default FPS
252249
self._video_forwarder = VideoForwarder(
253250
incoming_track, # type: ignore[arg-type]
254-
max_buffer=30, # 1 second at 30fps
255-
fps=self.fps,
251+
max_buffer=30,
256252
name="moondream_local_forwarder",
257253
)
258254

@@ -322,7 +318,9 @@ async def _process_and_add_frame(self, frame: av.VideoFrame):
322318
now = asyncio.get_event_loop().time()
323319

324320
# Check if we should start a new detection
325-
detection_interval = 1.0 / self.detection_fps if self.detection_fps > 0 else float("inf")
321+
detection_interval = (
322+
1.0 / self.detection_fps if self.detection_fps > 0 else float("inf")
323+
)
326324
should_detect = (
327325
not self._detection_in_progress
328326
and (now - self._last_detection_time) >= detection_interval
@@ -364,7 +362,9 @@ async def _run_detection_background(self, frame_array: np.ndarray):
364362
results = await self._run_inference(frame_array)
365363
self._cached_results = results
366364
self._last_results = results
367-
logger.debug(f"🔍 Detection complete: {len(results.get('detections', []))} objects")
365+
logger.debug(
366+
f"🔍 Detection complete: {len(results.get('detections', []))} objects"
367+
)
368368
except Exception as e:
369369
logger.warning(f"⚠️ Background detection failed: {e}")
370370
finally:

0 commit comments

Comments
 (0)