From 4e33f968d5b69c571aab9ca0b6f26e0bf088b2f8 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Tue, 19 May 2026 17:41:27 -0700 Subject: [PATCH 1/9] fix(ml): create null detection markers only after real saves succeed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue #1310: null detections (empty-bbox sentinels marking "image processed, nothing found") were created before create_detections / create_classifications / create_and_update_occurrences_for_detections ran. Two consequences: 1. If any of those downstream steps failed, the image was already flagged as processed via the null marker — filter_processed_images would skip it on the next run, leaving the image permanently in a "processed but no detections" state. Observed on project 171 (400 captures with only null detections). 2. create_and_update_occurrences_for_detections iterated every detection including nulls, so each null marker spawned a phantom Occurrence with determination=NULL. Fix in ami/ml/models/pipeline.py save_results: - Run create_detections / create_classifications / create_and_update_occurrences on the real DetectionResponses only. - After those succeed, build null DetectionResponses for images that ended up without any detections and persist them via a second create_detections call. - Null responses never enter the classification / occurrence loops, so no phantom Occurrence is created even in the happy path. Tests in ami/ml/tests.py TestPipeline: - test_null_detection_does_not_create_phantom_occurrence: asserts the happy path "pipeline found nothing" creates the null marker but no Occurrence. - test_captures_not_marked_processed_after_failure: asserts that when a downstream step (create_classifications) raises, the image without a real detection is left unmarked and filter_processed_images re-yields it. Co-Authored-By: Claude --- ami/ml/models/pipeline.py | 25 +++++++++----- ami/ml/tests.py | 73 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py index c259e4aea..ac3d2cf3f 100644 --- a/ami/ml/models/pipeline.py +++ b/ami/ml/models/pipeline.py @@ -952,15 +952,6 @@ def save_results( "Algorithms and category maps must be registered before processing, using /info endpoint." ) - # Ensure all images have detections - # if not, add a NULL detection (empty bbox) to the results - null_detections = create_null_detections_for_undetected_images( - results=results, - detection_algorithm=detection_algorithm, - logger=job_logger, - ) - results.detections = results.detections + null_detections - detections = create_detections( detections=results.detections, algorithms_known=algorithms_known, @@ -981,6 +972,22 @@ def save_results( logger=job_logger, ) + # Mark images with no real detections as processed by creating null-bbox sentinels. + # Issue #1310: must run AFTER the real-detection / classification / occurrence steps + # so a failure earlier in the pipeline leaves the image unmarked (and therefore + # re-processed by filter_processed_images on the next run). Null DetectionResponses + # are kept out of the real-detection list so they bypass occurrence creation entirely. + null_detection_responses = create_null_detections_for_undetected_images( + results=results, + detection_algorithm=detection_algorithm, + logger=job_logger, + ) + create_detections( + detections=null_detection_responses, + algorithms_known=algorithms_known, + logger=job_logger, + ) + # Update precalculated counts on source images and events source_images = list(source_images) logger.info(f"Updating calculated fields for {len(source_images)} source images") diff --git a/ami/ml/tests.py b/ami/ml/tests.py index 0eaa0a237..171cfcd0c 100644 --- a/ami/ml/tests.py +++ b/ami/ml/tests.py @@ -13,6 +13,7 @@ Deployment, Detection, Event, + Occurrence, Project, SourceImage, SourceImageCollection, @@ -1024,6 +1025,78 @@ def test_null_detection_deduplication_same_pipeline(self): null_detections = image.detections.filter(bbox__isnull=True) self.assertEqual(null_detections.count(), 1, "Same pipeline should not create duplicate null detections") + def test_null_detection_does_not_create_phantom_occurrence(self): + """ + Issue #1310: a null detection (empty-bbox sentinel marking "image processed, + nothing found") must NOT spawn an Occurrence. Occurrences with no + determination and no real detections leak to the API as ghost rows. + """ + image = self.test_images[0] + results = self.fake_pipeline_results([image], self.pipeline) + results.detections = [] # pipeline found nothing + + save_results(results) + + null_dets = image.detections.filter(bbox__isnull=True) + self.assertEqual(null_dets.count(), 1, "Null marker should still be created") + self.assertIsNone( + null_dets.first().occurrence, + "Null detection must NOT be associated with an Occurrence", + ) + # No phantom Occurrence in DB tied to this image at all + phantom_occs = Occurrence.objects.filter(detections__source_image=image, determination__isnull=True) + self.assertEqual( + phantom_occs.count(), + 0, + "No Occurrence with NULL determination should exist for an image that had no detections", + ) + + def test_captures_not_marked_processed_after_failure(self): + """ + Issue #1310: null markers should only flag images as processed AFTER all + downstream save steps (classifications, occurrences) succeed. If any + downstream step raises, the image must remain unmarked so the next run + re-processes it. + + Reproduces the field bug where 400 images ended up with null markers but + no real detections — created when null-creation ran ahead of a later step + that failed. + """ + from unittest.mock import patch + + from ami.ml.models.pipeline import filter_processed_images + + # Mix: image_with_real has a detection in the response, image_without_real does not. + # The without-real image is the one that would get a null marker. + image_with_real, image_without_real = self.test_images + results = self.fake_pipeline_results(self.test_images, self.pipeline) + # Trim detections to only the first image so the second qualifies for null-marker creation + results.detections = [d for d in results.detections if str(d.source_image_id) == str(image_with_real.pk)] + + # Inject failure in a step that runs AFTER detection bulk_create + with patch( + "ami.ml.models.pipeline.create_classifications", + side_effect=RuntimeError("simulated classification failure"), + ): + with self.assertRaises(RuntimeError): + save_results(results) + + # The image with no real detection must NOT have a null marker — + # the run failed, so it should be re-tried. + null_dets = image_without_real.detections.filter(bbox__isnull=True) + self.assertEqual( + null_dets.count(), + 0, + "Image without real detections must not be marked processed when downstream step fails", + ) + # filter_processed_images should still yield it for the next run + retry_yield = list(filter_processed_images([image_without_real], self.pipeline)) + self.assertEqual( + retry_yield, + [image_without_real], + "Image with failed run must be re-yielded for processing", + ) + class TestAlgorithmCategoryMaps(TestCase): def setUp(self): From 356d49540957b5a2fc8cdf5e0b0aae9bb796a9e1 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:45:13 -0700 Subject: [PATCH 2/9] docs(planning): PR #1312 null-marker followup plan + session notes Plan for the takeaway-review follow-up work on PR #1312: move-null-to-end + null-detection abstraction (DetectionQuerySet.valid / .null_markers, Detection.is_null_marker, Detection.build_null_marker) + sweep call sites + tighten OccurrenceQuerySet.valid + cleanup command for project 171. Captures rationale for splitting transaction.atomic into a separate follow-up PR (PR-1261 scar). Co-Authored-By: Claude --- .../planning/pr-1312-null-marker-followup.md | 154 ++++++++++++++++++ ...5-19-pr-1312-premptive-processed-marker.md | 97 +++++++++++ 2 files changed, 251 insertions(+) create mode 100644 docs/claude/planning/pr-1312-null-marker-followup.md create mode 100644 docs/claude/sessions/2026-05-19-pr-1312-premptive-processed-marker.md diff --git a/docs/claude/planning/pr-1312-null-marker-followup.md b/docs/claude/planning/pr-1312-null-marker-followup.md new file mode 100644 index 000000000..e0b6a9964 --- /dev/null +++ b/docs/claude/planning/pr-1312-null-marker-followup.md @@ -0,0 +1,154 @@ +# PR #1312 follow-up — null-marker abstraction + move-to-end + cleanup + +Created 2026-05-20. Builds on existing branch `fix/premptive-processed-marker`. + +Splits remaining work from the takeaway review of PR #1312 into two PRs. + +## PR-A (this plan): move null to end + null-detection abstraction + project 171 cleanup + +Extends PR #1312. Adds commits on top of the existing `4e33f96` ordering fix. + +### Scope rationale + +Current PR moves null markers AFTER real detection / classification / occurrence saves but BEFORE `source_image.save()` loop, `create_detection_images.delay()`, `update_calculated_fields_for_events`, and `Deployment.update_calculated_fields`. Per Copilot's review: any raise in those four steps still leaves a null marker persisted while the function failed. + +PR-A closes that window by moving null persistence to the absolute final step. Failure window left = the return statement — effectively zero. + +Bundled because all changes touch the same call graph (`save_results` body + `Detection` manager + `OccurrenceQuerySet.valid()` + cleanup of the rows the bug produced). Reviewing them together gives a single coherent semantic for what "null marker" means going forward. + +### Final order in `save_results` + +1. `create_detections(real_responses, ...)` +2. `create_classifications(...)` +3. `create_and_update_occurrences_for_detections(...)` +4. `source_image.save()` loop +5. `create_detection_images.delay(...)` +6. `update_calculated_fields_for_events(pks=event_ids)` +7. `Deployment.update_calculated_fields(save=True)` loop +8. `create_detections(null_responses, ...)` ← was step 0, now last write +9. return + +### Null-detection abstraction + +`ami/main/models.py`: + +```python +class Detection(BaseModel): + NULL_BBOX = None # canonical sentinel value for new rows + + @property + def is_null_marker(self) -> bool: + return self.bbox is None or self.bbox == [] + + @classmethod + def build_null_marker(cls, source_image, detection_algorithm) -> "Detection": + return cls( + source_image=source_image, + bbox=cls.NULL_BBOX, + detection_algorithm=detection_algorithm, + timestamp=now(), + ) + + +class DetectionQuerySet(BaseQuerySet): + def valid(self): + """ + Detections suitable for consumer queries — excludes null-marker sentinels. + Future predicates to fold in here: soft-delete tombstones, missing + detection_algorithm, missing classifications. + Consumers asking 'give me detections' should always go through .valid(). + """ + return self.exclude(NULL_DETECTIONS_FILTER) + + def null_markers(self): + """ + Sentinel rows recording 'this algorithm ran against this image and + found nothing.' Only for SourceImage-level 'has this been processed?' + questions. Detection consumers should use .valid() instead. + """ + return self.filter(NULL_DETECTIONS_FILTER) +``` + +Rename existing `DetectionQuerySet.null_detections()` (`ami/main/models.py:2721`) → `null_markers()`. Single sweep across codebase. + +### Call-site sweep + +`.exclude(NULL_DETECTIONS_FILTER)` → `.valid()`: +- `ami/main/models.py:817, 1229, 2037, 2243` +- `ami/main/api/views.py:614, 913` +- `ami/ml/models/pipeline.py:99, 103` + +`.filter(NULL_DETECTIONS_FILTER)` → `.null_markers()`: +- `ami/main/models.py:2722` (the method itself, becomes the body of `null_markers()`) + +Drifted inline at `ami/main/models.py:4108` (`~Q(...bbox__isnull=True) & ~Q(...bbox=[])`) → rewrite to use `.valid()` via subquery or join-based predicate. Verify it still works against ORM aggregation. + +Inline `bbox__isnull=True` at `ami/ml/models/pipeline.py:454-458` — intentional (only `IS NULL` form, comment explains). Leave alone but add a one-line comment pointing readers to `Detection.NULL_BBOX` for the canonical sentinel. + +### Occurrence valid() tightening + +`OccurrenceQuerySet.valid()` at `ami/main/models.py:2913` currently only excludes `detections__isnull=True`. Extend to also exclude: +- Occurrences whose only detections are null markers +- Occurrences with `determination__isnull=True` + +Use new `Detection.objects.valid()` helper to express "has at least one valid detection." + +### Project 171 cleanup + +Management command `ami/main/management/commands/cleanup_null_only_occurrences.py`: +- For each Occurrence in given project with no `Detection.objects.valid()` rows: delete the Occurrence and its null-marker Detection rows +- Source images then re-yield from `filter_processed_images` on next pipeline run +- Idempotent: re-running on a cleaned project is a no-op +- Dry-run mode by default + +### TDD test plan + +Tests added to `ami/ml/tests.py::TestPipeline` (extends the two already added in `4e33f96`): + +1. **Broker outage path** — patch `create_detection_images.delay` to raise `OperationalError`. Assert: + - `RuntimeError`/`OperationalError` propagates + - Zero null markers on the undetected image + - `filter_processed_images` yields the image on a second run + +2. **Calc-field DB error path** — patch `update_calculated_fields_for_events` to raise. Same assertions as #1. + +3. **`Detection.is_null_marker` property** — direct property test for both `bbox=None` and `bbox=[]` legacy form. + +4. **`Detection.objects.valid()` and `.null_markers()`** — assert disjoint querysets covering full Detection set for a fixture image with one real + one null detection. + +5. **OccurrenceQuerySet.valid() exclusion** — fixture with three occurrences: real detection only / null detection only / null determination. Assert `valid()` returns only the first. + +6. **Cleanup management command dry-run** — fixture project with phantom occurrences. Run command with `--dry-run`. Assert reported counts match. Run without dry-run. Assert deletion. + +### Commit shape + +Roughly: +1. `test(ml): RED test for broker-outage leaving null marker` +2. `fix(ml): move null-marker creation to final step in save_results` +3. `refactor(main): add DetectionQuerySet.valid()/.null_markers() + Detection.is_null_marker + build_null_marker` +4. `refactor(main): sweep call sites to use .valid() / .null_markers()` +5. `fix(main): tighten OccurrenceQuerySet.valid() to exclude null-only and null-determination` +6. `feat(main): cleanup_null_only_occurrences management command` + +### Out of scope for PR-A + +- `transaction.atomic()` + `transaction.on_commit` wrap — see PR-B below +- Re-classification gap in `filter_processed_images` — separate ticket +- `bbox = '[]'` legacy data migration — `NULL_DETECTIONS_FILTER` absorbs it; rewrite is unnecessary churn + +## PR-B (follow-up): narrow transaction.atomic() + on_commit + +After PR-A merges. Tracked at `docs/claude/planning/pr-1312-tx-wrap-followup.md` (to be written). + +Wraps `create_detections` + `create_classifications` + `create_and_update_occurrences_for_detections` + final `create_detections(null_responses, ...)` in `transaction.atomic()`. Dispatches `create_detection_images.delay` via `transaction.on_commit`. Calc-field updates stay outside. + +Closes the narrow "real detection committed but classification failed mid-bulk-create" window that PR-A leaves open. Separate PR because tx changes carry concurrency risk (PR-1261 scar) — needs its own multi-worker contention e2e plan and clean revert path. + +## E2E validation plan (PR-A) + +Reusing the dev-box atomic-rollback testing pattern from the previous session: + +1. Happy path: full async_api job. Assert no new phantom occurrences, all real detections persist, null markers on undetected images persist. +2. Broker-outage simulation: kill RabbitMQ mid-job or patch `delay()` to raise. Assert image stays in `filter_processed_images` yield list. +3. Calc-field DB error: patch `update_calculated_fields_for_events` to raise. Same assertion. +4. Cleanup command: run dry-run on project 171, capture counts, run for real, assert API no longer returns null-only Occurrences. diff --git a/docs/claude/sessions/2026-05-19-pr-1312-premptive-processed-marker.md b/docs/claude/sessions/2026-05-19-pr-1312-premptive-processed-marker.md new file mode 100644 index 000000000..09d700c68 --- /dev/null +++ b/docs/claude/sessions/2026-05-19-pr-1312-premptive-processed-marker.md @@ -0,0 +1,97 @@ +# PR #1312 — fix preemptive null-detection marker (Issue #1310) + +Created 2026-05-19. Branch: `fix/premptive-processed-marker`. URL: https://github.com/RolnickLab/antenna/pull/1312 + +## Status + +- ✅ PR opened against `main` +- ✅ TDD: 2 new failing tests added (`ami/ml/tests.py::TestPipeline`), then code fix made them pass +- ✅ Local test suites: `ami.ml.tests` + `ami.jobs.tests` = 170/170 pass; 18/18 in `TestPipeline` +- ✅ Manual e2e on serbia dev server (2026-05-19) — 3 paths validated, see below + +## Bug + +Issue #1310: project 171 had ~400 captures marked as processed with only null-bbox detections, no real detections, and phantom Occurrences with `determination=NULL` leaking to API. + +Root cause in `ami/ml/models/pipeline.py::save_results` (pre-fix order): +1. `create_null_detections_for_undetected_images` ran FIRST, built null `DetectionResponse`s for images with no real detections +2. Null responses merged into `results.detections` (`results.detections = results.detections + null_detections`) +3. `create_detections` bulk-saved the merged list (null + real) +4. `create_classifications` and `create_and_update_occurrences_for_detections` then iterated **all** detections — including nulls — so each null spawned an Occurrence with `determination=NULL` + +Failure mode: any exception in steps 3-4 left the image with a null marker already in DB but no real detections + phantom Occurrences. `filter_processed_images` then permanently skipped these images. + +## Fix (PR #1312) + +`ami/ml/models/pipeline.py::save_results`: +- Real `create_detections` / `create_classifications` / `create_and_update_occurrences_for_detections` run first on real DetectionResponses only +- THEN `create_null_detections_for_undetected_images` builds null responses, passed to a SECOND `create_detections` call +- Nulls never enter the classification or occurrence loops → no phantom Occurrences even on happy path +- If any earlier step raises, null markers are never persisted → `filter_processed_images` re-yields the image on retry + +## Tests added + +`ami/ml/tests.py::TestPipeline`: +- `test_null_detection_does_not_create_phantom_occurrence` (line ~1028) — happy path: pipeline finds nothing, null marker created, no Occurrence +- `test_captures_not_marked_processed_after_failure` (line ~1054) — patches `create_classifications` to raise; asserts no null marker persisted, `filter_processed_images` re-yields image + +Both tests confirmed RED against pre-fix code, GREEN after fix. + +## Verified safe (during brainstorm) + +Skipping Occurrence creation for null detections is safe because: +- `Detection.occurrence` FK is `null=True, on_delete=SET_NULL` (`ami/main/models.py:2764-2770`) +- Tracking not yet implemented — `create_and_update_occurrences_for_detections` carries `@TODO remove when we implement tracking!` comment +- All `Detection.objects.filter(occurrence=…)` traversals start FROM an Occurrence; never traverse from a null detection +- `create_classifications` loops paired responses; null DetectionResponses have empty `.classifications` → never iterated +- `seed_synthetic_occurrences` already excludes `occurrence__isnull=True` + +## Out-of-scope follow-ups (in PR body) + +1. **API filter for null-only / no-determination occurrences.** `OccurrenceQuerySet.valid()` (`ami/main/models.py:2913-2914`) currently only excludes `detections__isnull=True`. Doesn't exclude null-only-detection occurrences or `determination__isnull=True`. With this PR no new phantom Occurrences will be created, but project 171's existing phantoms still surface via API until `valid()` is tightened or `OccurrenceViewSet.get_queryset` (`ami/main/api/views.py:1220-1238`) adds the exclusion. + +2. **Cleanup of project 171's broken state.** ~400 source images have null-only detections + phantom Occurrences. Need management command to delete null-only Detection rows + their phantom Occurrences so `filter_processed_images` re-processes them. + +3. **TODO ticket: re-classification gap.** `filter_processed_images` notes "we don't yet have a mechanism to reclassify detections" — current behavior is to reprocess from scratch. Not blocking but worth tracking. + +## E2E results (serbia dev box, 2026-05-19) + +Serbia originally on `copilot/implement-option-a-job-logs`. Fetched + checked out `fix/premptive-processed-marker`, restarted django + celeryworker, confirmed reordered `save_results` loaded via `inspect.getsource`. After testing, restored to original branch. + +### Path 1: happy path — async_api job (full stack) + +Triggered via `manage.py test_ml_job_e2e --project 9 --collection 38 --pipeline quebec_vermont_moths_2023 --dispatch-mode async_api`. Job 162 succeeded in 44s. NATS path, 10 images, 8 save_results batches. Detection / occurrence counts unchanged (project 9 has `reprocess_existing_detections=False`, so existing rows were not duplicated). No new phantom occurrences. + +### Path 2: zero-detection path — synthetic save_results inside atomic + +Live script `/tmp/test_save_results_live.py` (copied into django container). Built a `PipelineResultsResponse` with two source images: 173110 (one bbox detection) and 173740 (no detection in `results.detections`). Called `save_results` inside `transaction.atomic()` with explicit rollback at end. + +In-flight state observed: +- si=173110: 1 new real detection + pre-existing pristine null det (52287 → pre-existing phantom occ 52073) +- si=173740: 1 new null marker (det 61357, `bbox=None`, `occurrence_id=None`) +- New phantom occurrences (excluding pre-existing 52073): **0** + +Post-rollback: baseline restored. + +### Path 3: failure path — patch `create_classifications` to raise + +Live script `/tmp/test_failure_path.py`. Same `PipelineResultsResponse` as path 2, but wrapped `save_results` call in `patch("ami.ml.models.pipeline.create_classifications", side_effect=RuntimeError(...))`. Confirmed RuntimeError propagates up out of `save_results`. In-flight state: +- si=173110: 1 new real detection created (create_detections ran before classification raised) +- si=173740: **0 new null markers** — `create_null_detections_for_undetected_images` never ran because the exception fired before it. + +This confirms the core fix: image without real detections stays unmarked when downstream save fails, so `filter_processed_images` will re-yield it on retry. + +### Same `save_results` is shared by v1 sync + v2 async (NATS) + +Confirmed: path 1 was async_api (NATS), but the function under test is identical for sync_api too. Single e2e on either dispatch path validates both. + +## Files touched + +- `ami/ml/models/pipeline.py` — `save_results` body reordered, +null_detection_responses path +- `ami/ml/tests.py` — Occurrence import + 2 new tests + +## Compose gotcha (worth remembering) + +For tests on main repo while a worktree override is active in `docker-compose.override.yml`, use `docker-compose.ci.yml` which doesn't pull the override (only mounts `.:/app:z` from current directory). The local dev stack will read the bound worktree subdir instead of the main repo. + +Stash trap encountered: `git stash` on the main repo branch did NOT stash uncommitted edits to `ami/ml/tests.py` + `ami/ml/models/pipeline.py` cleanly during a baseline-verification run. Edits were lost on `stash pop`. Workaround: don't stash to compare against baseline; instead run the test pre-edit on a clean checkout, or `git diff` the file directly. From 2b782d365224b361fefa3516b887650b30f638d2 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:45:20 -0700 Subject: [PATCH 3/9] test(ml): RED test for broker-outage leaving null marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds test_null_marker_not_persisted_when_broker_dispatch_fails to TestPipeline. Patches create_detection_images.delay to raise, asserts the unmatched image has no null marker persisted and that filter_processed_images yields it for re-processing. Verified RED against current ordering — null persistence still runs before delay, so the assertion fails. Next commit moves null persistence to the absolute final step. Co-Authored-By: Claude --- ami/ml/tests.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/ami/ml/tests.py b/ami/ml/tests.py index 171cfcd0c..83b4907fb 100644 --- a/ami/ml/tests.py +++ b/ami/ml/tests.py @@ -1097,6 +1097,45 @@ def test_captures_not_marked_processed_after_failure(self): "Image with failed run must be re-yielded for processing", ) + def test_null_marker_not_persisted_when_broker_dispatch_fails(self): + """ + Issue #1310 (takeaway-review follow-up): null markers must be the FINAL + write in save_results. Failures in any of the trailing steps — + create_detection_images.delay (broker outage), update_calculated_fields_for_events + (DB error), Deployment.update_calculated_fields (DB error) — must leave the + image unmarked. + + This test patches the celery dispatch to raise, simulating a broker + outage between the real-detection save and the null-marker save. + """ + from unittest.mock import patch + + from ami.ml.models.pipeline import filter_processed_images + + image_with_real, image_without_real = self.test_images + results = self.fake_pipeline_results(self.test_images, self.pipeline) + results.detections = [d for d in results.detections if str(d.source_image_id) == str(image_with_real.pk)] + + with patch( + "ami.ml.models.pipeline.create_detection_images.delay", + side_effect=RuntimeError("simulated broker outage"), + ): + with self.assertRaises(RuntimeError): + save_results(results) + + null_dets = image_without_real.detections.filter(bbox__isnull=True) + self.assertEqual( + null_dets.count(), + 0, + "Null marker must not be persisted when create_detection_images.delay fails", + ) + retry_yield = list(filter_processed_images([image_without_real], self.pipeline)) + self.assertEqual( + retry_yield, + [image_without_real], + "Image with failed broker dispatch must be re-yielded for processing", + ) + class TestAlgorithmCategoryMaps(TestCase): def setUp(self): From 08103daa764a3865a82863550363d86cf2336c4f Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:46:04 -0700 Subject: [PATCH 4/9] fix(ml): move null-marker creation to final step in save_results Closes the failure window the previous fix left open: null markers were persisted after real-detection / classification / occurrence saves but BEFORE source_image.save, create_detection_images.delay, update_calculated_fields_for_events, and Deployment.update_calculated_fields. A raise in any of those four steps (broker outage, DB error) still left the image flagged as processed. Null markers now run as the last write in save_results so they only persist when every prior step succeeds. Remaining failure window is the return statement. Makes RED test from prior commit pass. Co-Authored-By: Claude --- ami/ml/models/pipeline.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py index ac3d2cf3f..2a1742f0b 100644 --- a/ami/ml/models/pipeline.py +++ b/ami/ml/models/pipeline.py @@ -972,22 +972,6 @@ def save_results( logger=job_logger, ) - # Mark images with no real detections as processed by creating null-bbox sentinels. - # Issue #1310: must run AFTER the real-detection / classification / occurrence steps - # so a failure earlier in the pipeline leaves the image unmarked (and therefore - # re-processed by filter_processed_images on the next run). Null DetectionResponses - # are kept out of the real-detection list so they bypass occurrence creation entirely. - null_detection_responses = create_null_detections_for_undetected_images( - results=results, - detection_algorithm=detection_algorithm, - logger=job_logger, - ) - create_detections( - detections=null_detection_responses, - algorithms_known=algorithms_known, - logger=job_logger, - ) - # Update precalculated counts on source images and events source_images = list(source_images) logger.info(f"Updating calculated fields for {len(source_images)} source images") @@ -1006,6 +990,22 @@ def save_results( for deployment in Deployment.objects.filter(pk__in=deployment_ids): deployment.update_calculated_fields(save=True) + # Mark images with no real detections as processed by creating null-bbox sentinels. + # Issue #1310: MUST be the final write. Persisting a null marker is the signal that + # filter_processed_images uses to skip an image on future runs, so it can only be + # written after every step that might fail has succeeded. Any raise earlier in the + # pipeline leaves the image unmarked so it gets retried on the next run. + null_detection_responses = create_null_detections_for_undetected_images( + results=results, + detection_algorithm=detection_algorithm, + logger=job_logger, + ) + create_detections( + detections=null_detection_responses, + algorithms_known=algorithms_known, + logger=job_logger, + ) + total_time = time.time() - start_time job_logger.info(f"Saved results from pipeline {pipeline} in {total_time:.2f} seconds") From 1c108bd7933a77517f6243a2bf3f5d28eb4e94b1 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:48:35 -0700 Subject: [PATCH 5/9] refactor(main): add null-marker abstraction on Detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a single source of truth for "this detection row is a sentinel that records that an algorithm ran against an image and found nothing": - Detection.NULL_BBOX = None (canonical bbox value for new null markers) - Detection.is_null_marker (recognises both bbox=None and legacy bbox=[]) - Detection.build_null_marker(source_image, detection_algorithm) classmethod - DetectionQuerySet.valid() — consumer default, excludes null markers - DetectionQuerySet.null_markers() — narrow, for "has this image been processed?" checks (renamed from .null_detections()) valid() is named to grow: future predicates to fold in include soft-delete tombstones, detections missing an algorithm reference, and detections missing classifications. Consumers asking "give me detections" should default to .valid(). Adds TestDetectionNullMarker covering: is_null_marker for bbox=None / bbox=[] / real bbox, build_null_marker field setup, and disjointness of .valid() / .null_markers() over a fixture with all three row types. Next commit sweeps existing inline NULL_DETECTIONS_FILTER usage to the new API. Co-Authored-By: Claude --- ami/main/models.py | 39 +++++++++++++++++++++++- ami/main/tests.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/ami/main/models.py b/ami/main/models.py index b30b4e645..50592c181 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -2718,7 +2718,23 @@ def save(self, *args, **kwargs): class DetectionQuerySet(BaseQuerySet): - def null_detections(self): + def valid(self): + """ + Detections suitable for consumer queries — excludes null-marker sentinels. + + Null markers are rows that record "an algorithm ran against this image and + found nothing." Consumers asking "give me detections" should always go + through .valid(). Future predicates to fold in here: soft-delete tombstones, + detections missing an algorithm reference, detections missing classifications. + """ + return self.exclude(NULL_DETECTIONS_FILTER) + + def null_markers(self): + """ + Sentinel rows that record "this algorithm ran against this image and found + nothing." Only relevant for SourceImage-level "has this been processed?" + questions. Detection consumers should use .valid() instead. + """ return self.filter(NULL_DETECTIONS_FILTER) @@ -2796,6 +2812,27 @@ class Detection(BaseModel): objects = DetectionManager() + NULL_BBOX = None + """Canonical bbox value for null markers (rows that record 'an algorithm ran but + found nothing'). Use Detection.build_null_marker() to construct them. The legacy + bbox=[] form is still recognised by .null_markers() / .is_null_marker for + backwards compatibility with historical rows.""" + + @property + def is_null_marker(self) -> bool: + """True for sentinel rows representing 'no detections found by this algorithm.'""" + return self.bbox is None or self.bbox == [] + + @classmethod + def build_null_marker(cls, source_image, detection_algorithm) -> "Detection": + """Construct (without saving) a null-marker Detection for the given image+algorithm.""" + return cls( + source_image=source_image, + bbox=cls.NULL_BBOX, + detection_algorithm=detection_algorithm, + timestamp=timezone.now(), + ) + def get_bbox(self): if self.bbox: return BoundingBox( diff --git a/ami/main/tests.py b/ami/main/tests.py index 517c4c87b..c1c7c9f5a 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -4761,3 +4761,78 @@ def test_registration_order_preserves_occurrence_retrieve(self): retrieve_response = self.client.get(f"/api/v2/occurrences/{occurrence.pk}/?project_id={self.project.pk}") self.assertEqual(stats_response.status_code, 200, "stats URL must resolve") self.assertEqual(retrieve_response.status_code, 200, "occurrence retrieve must still work") + + +class TestDetectionNullMarker(TestCase): + """ + Covers the null-marker abstraction added for Issue #1310 follow-up: + Detection.is_null_marker, Detection.build_null_marker, and + DetectionQuerySet.valid() / .null_markers(). + """ + + def setUp(self): + from ami.ml.models.algorithm import Algorithm + + self.project = Project.objects.create(name="Null Marker Test Project") + self.deployment = Deployment.objects.create(project=self.project, name="dep") + self.source_image = SourceImage.objects.create( + deployment=self.deployment, + project=self.project, + path="nullmarker-test.jpg", + ) + self.algorithm = Algorithm.objects.create(name="test-detector", key="test-detector", task_type="detection") + + def test_is_null_marker_for_bbox_none(self): + det = Detection.objects.create( + source_image=self.source_image, + bbox=None, + detection_algorithm=self.algorithm, + ) + self.assertTrue(det.is_null_marker) + + def test_is_null_marker_for_bbox_empty_list_legacy(self): + det = Detection.objects.create( + source_image=self.source_image, + bbox=[], + detection_algorithm=self.algorithm, + ) + self.assertTrue(det.is_null_marker) + + def test_is_null_marker_false_for_real_detection(self): + det = Detection.objects.create( + source_image=self.source_image, + bbox=[0.0, 0.0, 1.0, 1.0], + detection_algorithm=self.algorithm, + ) + self.assertFalse(det.is_null_marker) + + def test_build_null_marker_sets_canonical_fields(self): + det = Detection.build_null_marker(self.source_image, self.algorithm) + self.assertIsNone(det.bbox) + self.assertEqual(det.source_image, self.source_image) + self.assertEqual(det.detection_algorithm, self.algorithm) + self.assertIsNotNone(det.timestamp) + self.assertTrue(det.is_null_marker) + + def test_valid_and_null_markers_are_disjoint_and_complete(self): + real = Detection.objects.create( + source_image=self.source_image, + bbox=[0.0, 0.0, 1.0, 1.0], + detection_algorithm=self.algorithm, + ) + null_via_none = Detection.objects.create( + source_image=self.source_image, + bbox=None, + detection_algorithm=self.algorithm, + ) + null_via_empty = Detection.objects.create( + source_image=self.source_image, + bbox=[], + detection_algorithm=self.algorithm, + ) + scoped = Detection.objects.filter(source_image=self.source_image) + valid_pks = set(scoped.valid().values_list("pk", flat=True)) + null_pks = set(scoped.null_markers().values_list("pk", flat=True)) + self.assertEqual(valid_pks, {real.pk}) + self.assertEqual(null_pks, {null_via_none.pk, null_via_empty.pk}) + self.assertEqual(valid_pks & null_pks, set()) From 2554e623adb9e9ccf8161c6d8c540cf0d160a161 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:54:39 -0700 Subject: [PATCH 6/9] refactor(main): sweep NULL_DETECTIONS_FILTER call sites to .valid()/.null_markers() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates 7 inline NULL_DETECTIONS_FILTER usages to the new manager methods: - Detection.objects.exclude(NULL_DETECTIONS_FILTER) → .valid() - self.detections.exclude(NULL_DETECTIONS_FILTER) → self.detections.all().valid() - subquery .exclude(NULL_DETECTIONS_FILTER) → .valid() - aggregate filter at SourceImageCollectionQuerySet.with_source_images_with_detections_count was the drifted inline ~Q(...bbox__isnull=True) & ~Q(...bbox=[]); now uses a new null_detections_q(prefix) helper for relation-prefixed Q expressions. Touched: - ami/main/models.py: Deployment.get_detections_count, Event.get_detections_count, SourceImage.create_occurrences_from_detections, _annotate_detections_count_subquery, SourceImageCollectionQuerySet.with_source_images_with_detections_count - ami/main/api/views.py: OccurrenceViewSet prefetch_queryset, DetectionViewSet.queryset - ami/ml/models/pipeline.py: filter_processed_images null-only and unclassified checks NULL_DETECTIONS_FILTER constant is retained at module level. Direct get_or_create_detection lookup keeps bbox__isnull=True (algorithm-scoped, narrower than .null_markers() which also includes legacy bbox=[] from other pipelines); added a comment pointing readers to Detection.NULL_BBOX for the canonical sentinel. 176/176 tests in ami.ml, ami.main.TestDetectionNullMarker, ami.jobs pass. Co-Authored-By: Claude --- ami/main/api/views.py | 5 ++--- ami/main/models.py | 20 +++++++++++++++----- ami/ml/models/pipeline.py | 11 ++++++----- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/ami/main/api/views.py b/ami/main/api/views.py index 5a6d5aece..2716005db 100644 --- a/ami/main/api/views.py +++ b/ami/main/api/views.py @@ -37,7 +37,6 @@ from ami.utils.storages import ConnectionTestResult from ..models import ( - NULL_DETECTIONS_FILTER, Classification, Deployment, Detection, @@ -611,7 +610,7 @@ def prefetch_detections(self, queryset: QuerySet, project: Project | None = None score = get_default_classification_threshold(project, self.request) prefetch_queryset = ( - Detection.objects.exclude(NULL_DETECTIONS_FILTER) + Detection.objects.valid() .annotate( determination_score=models.Max("occurrence__detections__classifications__score"), # Store whether this occurrence should be included based on default filters @@ -910,7 +909,7 @@ class DetectionViewSet(DefaultViewSet, ProjectMixin): """ require_project_for_list = True # Unfiltered list scans are too expensive on this table - queryset = Detection.objects.exclude(NULL_DETECTIONS_FILTER).select_related("source_image", "detection_algorithm") + queryset = Detection.objects.valid().select_related("source_image", "detection_algorithm") serializer_class = DetectionSerializer filterset_fields = ["source_image", "detection_algorithm", "source_image__project"] ordering_fields = ["created_at", "updated_at", "detection_score", "timestamp"] diff --git a/ami/main/models.py b/ami/main/models.py index 50592c181..4c9144361 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -98,6 +98,16 @@ class TaxonRank(OrderedEnum): NULL_DETECTIONS_FILTER = Q(bbox__isnull=True) | Q(bbox=[]) +def null_detections_q(prefix: str = "") -> Q: + """ + Return a Q expression matching null-marker Detection rows, optionally prefixed + for use across relations (e.g. null_detections_q("images__detections__") for an + aggregate filter on a parent table). For Detection queries directly, prefer + Detection.objects.null_markers() / .valid() instead. + """ + return Q(**{f"{prefix}bbox__isnull": True}) | Q(**{f"{prefix}bbox": []}) + + def get_media_url(path: str) -> str: """ If path is a full URL, return it as-is. @@ -814,7 +824,7 @@ def get_detections_count(self) -> int | None: was processed and no detections were found) to stay consistent with ``SourceImage.get_detections_count`` and ``Event.get_detections_count``. """ - qs = Detection.objects.filter(source_image__deployment=self).exclude(NULL_DETECTIONS_FILTER) + qs = Detection.objects.filter(source_image__deployment=self).valid() filter_q = build_occurrence_default_filters_q( project=self.project, request=None, @@ -1226,7 +1236,7 @@ def get_detections_count(self) -> int | None: Excludes null-bbox placeholder detections to stay consistent with ``SourceImage.get_detections_count`` and ``Deployment.get_detections_count``. """ - qs = Detection.objects.filter(source_image__event=self).exclude(NULL_DETECTIONS_FILTER) + qs = Detection.objects.filter(source_image__event=self).valid() filter_q = build_occurrence_default_filters_q( project=self.project, request=None, @@ -2034,7 +2044,7 @@ def get_detections_count(self) -> int: Excludes detections without bounding boxes — those are placeholder records indicating the image was successfully processed and no detections were found. """ - qs = self.detections.exclude(NULL_DETECTIONS_FILTER) + qs = self.detections.all().valid() project = self.project if not project: return qs.distinct().count() @@ -2240,7 +2250,7 @@ def update_detection_counts( if null_only: qs = qs.filter(detections_count__isnull=True) - detection_qs = Detection.objects.filter(source_image_id=models.OuterRef("pk")).exclude(NULL_DETECTIONS_FILTER) + detection_qs = Detection.objects.filter(source_image_id=models.OuterRef("pk")).valid() if project is not None: filter_q = build_occurrence_default_filters_q( project=project, @@ -4142,7 +4152,7 @@ def with_source_images_with_detections_count(self): return self.annotate( source_images_with_detections_count=models.Count( "images", - filter=(~models.Q(images__detections__bbox__isnull=True) & ~models.Q(images__detections__bbox=[])), + filter=~null_detections_q("images__detections__"), distinct=True, ) ) diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py index 2a1742f0b..f7293c68c 100644 --- a/ami/ml/models/pipeline.py +++ b/ami/ml/models/pipeline.py @@ -23,7 +23,6 @@ from ami.base.models import BaseModel, BaseQuerySet from ami.base.schemas import ConfigurableStage, default_stages from ami.main.models import ( - NULL_DETECTIONS_FILTER, Classification, Deployment, Detection, @@ -96,11 +95,11 @@ def filter_processed_images( task_logger.debug(f"Image {image} needs processing: has no existing detections from pipeline's detector") # If there are no existing detections from this pipeline, send the image yield image - elif not existing_detections.exclude(NULL_DETECTIONS_FILTER).exists(): # type: ignore + elif not existing_detections.valid().exists(): # All detections for this image are null (processed but nothing found) — skip task_logger.debug(f"Image {image} has only null detections from pipeline {pipeline}, skipping!") continue - elif existing_detections.exclude(NULL_DETECTIONS_FILTER).filter(classifications__isnull=True).exists(): + elif existing_detections.valid().filter(classifications__isnull=True).exists(): # Check if any real detections (non-null) have no classifications task_logger.debug( f"Image {image} needs processing: has existing detections with no classifications " @@ -440,8 +439,10 @@ def get_or_create_detection( if serialized_bbox is None: # Null detection: algorithm-specific lookup so different pipelines don't share sentinels. - # Use bbox__isnull=True because JSONField filter(bbox=None) matches JSON null literal, - # not SQL NULL which is what Detection(bbox=None) stores. + # Use bbox__isnull=True (not Detection.objects.null_markers()) because we want to find + # an exact match for THIS algorithm — null_markers() also includes legacy bbox=[] rows + # from other pipelines and would be wider than this dedup needs. + # See Detection.NULL_BBOX for the canonical sentinel value used by new writes. assert detection_resp.algorithm, f"No detection algorithm was specified for detection {detection_repr}" try: detection_algo = algorithms_known[detection_resp.algorithm.key] From 8d7e060bdf5703534ea1412db811f632e4411c82 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:56:52 -0700 Subject: [PATCH 7/9] fix(main): tighten OccurrenceQuerySet.valid to exclude phantoms OccurrenceQuerySet.valid() previously only excluded occurrences with no detections at all. Field bug from Issue #1310 created two new phantom shapes that still leaked to the API: 1. Occurrences whose only detections are null-marker sentinels (no real bounding box backing them). 2. Occurrences with determination__isnull=True. valid() now requires at least one .valid() Detection (real, non-null) AND a non-null determination. Built on top of the new Detection.objects.valid() helper so both layers stay in sync as the predicate grows (soft-delete, missing algo, etc.). Downstream callers updated automatically: ami/exports/format_types.py and OccurrenceViewSet.get_queryset both invoke OccurrenceQuerySet.valid and will now filter out the project-171 phantoms once existing rows are cleaned up (next commit). Adds TestOccurrenceValidQuerySet covering all three exclusion shapes. Co-Authored-By: Claude --- ami/main/models.py | 15 +++++++- ami/main/tests.py | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/ami/main/models.py b/ami/main/models.py index 4c9144361..93492171a 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -2958,7 +2958,20 @@ def __str__(self) -> str: class OccurrenceQuerySet(BaseQuerySet): def valid(self): - return self.exclude(detections__isnull=True) + """ + Occurrences fit to surface in API responses: at least one real detection AND + a determination set. + + Excludes: + - Occurrences with no detections at all (orphans) + - Occurrences whose only detections are null-marker sentinels (Issue #1310: + field bug created phantom occurrences with no real bounding box backing + them) + - Occurrences with determination__isnull=True (no taxonomic identification, + same field bug shape) + """ + has_valid_detection = Exists(Detection.objects.valid().filter(occurrence_id=OuterRef("pk"))) + return self.filter(has_valid_detection).exclude(determination__isnull=True) def with_detections_count(self): return self.annotate(detections_count=models.Count("detections", distinct=True)) diff --git a/ami/main/tests.py b/ami/main/tests.py index c1c7c9f5a..a884a219c 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -4836,3 +4836,90 @@ def test_valid_and_null_markers_are_disjoint_and_complete(self): self.assertEqual(valid_pks, {real.pk}) self.assertEqual(null_pks, {null_via_none.pk, null_via_empty.pk}) self.assertEqual(valid_pks & null_pks, set()) + + +class TestOccurrenceValidQuerySet(TestCase): + """ + Covers OccurrenceQuerySet.valid() tightening for Issue #1310: + excludes occurrences with no real detections and occurrences missing a + determination, in addition to the existing detections__isnull=True + exclusion. + """ + + def setUp(self): + from ami.main.models import Taxon + from ami.ml.models.algorithm import Algorithm + + self.project = Project.objects.create(name="Occurrence Valid Test Project") + self.deployment = Deployment.objects.create(project=self.project, name="dep") + self.event = Event.objects.create( + project=self.project, + deployment=self.deployment, + group_by="2024-01-01", + start=datetime.datetime(2024, 1, 1, 0, 0), + ) + self.source_image = SourceImage.objects.create( + deployment=self.deployment, + project=self.project, + event=self.event, + path="occvalid-test.jpg", + ) + self.algorithm = Algorithm.objects.create(name="valid-detector", key="valid-detector", task_type="detection") + self.taxon = Taxon.objects.create(name="Occurrence Valid Test Taxon") + + def _make_occurrence_with_real_detection(self) -> Occurrence: + occ = Occurrence.objects.create( + project=self.project, + event=self.event, + deployment=self.deployment, + determination=self.taxon, + ) + Detection.objects.create( + source_image=self.source_image, + bbox=[0.0, 0.0, 1.0, 1.0], + detection_algorithm=self.algorithm, + occurrence=occ, + ) + return occ + + def _make_occurrence_with_only_null_detection(self) -> Occurrence: + occ = Occurrence.objects.create( + project=self.project, + event=self.event, + deployment=self.deployment, + determination=self.taxon, + ) + Detection.objects.create( + source_image=self.source_image, + bbox=None, + detection_algorithm=self.algorithm, + occurrence=occ, + ) + return occ + + def _make_occurrence_with_null_determination(self) -> Occurrence: + occ = Occurrence.objects.create( + project=self.project, + event=self.event, + deployment=self.deployment, + determination=None, + ) + Detection.objects.create( + source_image=self.source_image, + bbox=[0.0, 0.0, 1.0, 1.0], + detection_algorithm=self.algorithm, + occurrence=occ, + ) + return occ + + def test_valid_returns_only_real_with_determination(self): + real = self._make_occurrence_with_real_detection() + null_only = self._make_occurrence_with_only_null_detection() + no_determination = self._make_occurrence_with_null_determination() + + valid_qs = Occurrence.objects.filter(project=self.project).valid() + valid_pks = set(valid_qs.values_list("pk", flat=True)) + + self.assertIn(real.pk, valid_pks) + self.assertNotIn(null_only.pk, valid_pks) + self.assertNotIn(no_determination.pk, valid_pks) From e5f004aac61bbd696dbb810a8b29dfe693605918 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 15:58:58 -0700 Subject: [PATCH 8/9] feat(main): cleanup_null_only_occurrences management command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One-shot per-project cleanup for the Issue #1310 field bug. Deletes: - Phantom occurrences (no valid detections OR null determination) - Orphan null-marker Detection rows on source images with no real detections After running, the affected source images become eligible for re-processing by filter_processed_images on the next ML run. Dry-run by default; pass --commit to delete. python manage.py cleanup_null_only_occurrences --project 171 # dry-run python manage.py cleanup_null_only_occurrences --project 171 --commit Idempotent — re-running on a cleaned project reports zero candidates and exits without touching the database. Adds TestCleanupNullOnlyOccurrencesCommand covering dry-run, commit, and idempotency. Valid occurrences and null markers on images with at least one real detection are explicitly preserved. Co-Authored-By: Claude --- .../commands/cleanup_null_only_occurrences.py | 84 +++++++++++++++ ami/main/tests.py | 102 ++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 ami/main/management/commands/cleanup_null_only_occurrences.py diff --git a/ami/main/management/commands/cleanup_null_only_occurrences.py b/ami/main/management/commands/cleanup_null_only_occurrences.py new file mode 100644 index 000000000..11e2dd26f --- /dev/null +++ b/ami/main/management/commands/cleanup_null_only_occurrences.py @@ -0,0 +1,84 @@ +""" +Delete phantom Occurrences and orphan null-marker Detections left by the Issue #1310 +field bug, on a per-project basis. + +The bug created two categories of rows that should never have been persisted: +- Occurrence rows with no real detections (or with determination=NULL), surfaced as + ghost rows in the API. +- Detection rows that mark a SourceImage as "processed" while no real detections + exist for it — these prevent filter_processed_images from re-yielding the image + on the next ML run. + +After cleanup, the source images become eligible for re-processing. + +Dry-run by default. Pass --commit to delete. +""" + +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction +from django.db.models import Exists, OuterRef + +from ami.main.models import Detection, Occurrence, Project + + +class Command(BaseCommand): + help = "Delete phantom Occurrences and orphan null-marker Detections (Issue #1310)." + + def add_arguments(self, parser): + parser.add_argument( + "--project", + type=int, + required=True, + help="Project ID to clean up.", + ) + parser.add_argument( + "--commit", + action="store_true", + help="Actually delete. Defaults to dry-run.", + ) + + def handle(self, *args, **options): + project_id: int = options["project"] + commit: bool = options["commit"] + + try: + project = Project.objects.get(pk=project_id) + except Project.DoesNotExist as err: + raise CommandError(f"Project {project_id} does not exist") from err + + all_occs = Occurrence.objects.filter(project=project) + valid_occs = all_occs.valid() + phantom_occs = all_occs.exclude(pk__in=valid_occs.values("pk")) + + has_valid_detection = Detection.objects.valid().filter(source_image_id=OuterRef("source_image_id")) + orphan_null_markers = ( + Detection.objects.filter(source_image__project=project) + .null_markers() + .annotate(_has_valid=Exists(has_valid_detection)) + .filter(_has_valid=False) + ) + + phantom_count = phantom_occs.count() + null_count = orphan_null_markers.count() + + self.stdout.write(f"Project #{project.pk} ({project.name}):") + self.stdout.write(f" Phantom occurrences (no valid detection or null determination): {phantom_count}") + self.stdout.write(f" Orphan null-marker detections on images with no real detections: {null_count}") + + if phantom_count == 0 and null_count == 0: + self.stdout.write(self.style.SUCCESS("Nothing to clean up.")) + return + + if not commit: + self.stdout.write(self.style.WARNING("Dry run — pass --commit to delete.")) + return + + with transaction.atomic(): + null_deleted, _ = orphan_null_markers.delete() + phantom_deleted, _ = phantom_occs.delete() + + self.stdout.write( + self.style.SUCCESS( + f"Deleted {phantom_deleted} phantom occurrences and {null_deleted} orphan null markers." + ) + ) diff --git a/ami/main/tests.py b/ami/main/tests.py index a884a219c..20917d614 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -4923,3 +4923,105 @@ def test_valid_returns_only_real_with_determination(self): self.assertIn(real.pk, valid_pks) self.assertNotIn(null_only.pk, valid_pks) self.assertNotIn(no_determination.pk, valid_pks) + + +class TestCleanupNullOnlyOccurrencesCommand(TestCase): + """ + Covers ami/main/management/commands/cleanup_null_only_occurrences.py. + Verifies dry-run reports counts without deleting and --commit deletes + phantom occurrences and orphan null markers, leaving valid rows alone. + """ + + def setUp(self): + from ami.main.models import Taxon + from ami.ml.models.algorithm import Algorithm + + self.project = Project.objects.create(name="Cleanup Cmd Test Project") + self.deployment = Deployment.objects.create(project=self.project, name="dep") + self.event = Event.objects.create( + project=self.project, + deployment=self.deployment, + group_by="2024-01-01", + start=datetime.datetime(2024, 1, 1, 0, 0), + ) + self.algorithm = Algorithm.objects.create( + name="cleanup-detector", key="cleanup-detector", task_type="detection" + ) + self.taxon = Taxon.objects.create(name="Cleanup Test Taxon") + + self.img_with_real = SourceImage.objects.create( + deployment=self.deployment, + project=self.project, + event=self.event, + path="with-real.jpg", + ) + self.img_only_null = SourceImage.objects.create( + deployment=self.deployment, + project=self.project, + event=self.event, + path="only-null.jpg", + ) + + self.valid_occurrence = Occurrence.objects.create( + project=self.project, + event=self.event, + deployment=self.deployment, + determination=self.taxon, + ) + self.real_detection = Detection.objects.create( + source_image=self.img_with_real, + bbox=[0.0, 0.0, 1.0, 1.0], + detection_algorithm=self.algorithm, + occurrence=self.valid_occurrence, + ) + self.null_on_processed_image = Detection.objects.create( + source_image=self.img_with_real, + bbox=None, + detection_algorithm=self.algorithm, + ) + + self.phantom_occurrence = Occurrence.objects.create( + project=self.project, + event=self.event, + deployment=self.deployment, + determination=self.taxon, + ) + self.phantom_null = Detection.objects.create( + source_image=self.img_only_null, + bbox=None, + detection_algorithm=self.algorithm, + occurrence=self.phantom_occurrence, + ) + + def _call_command(self, *args): + from io import StringIO + + from django.core.management import call_command + + out = StringIO() + call_command("cleanup_null_only_occurrences", *args, stdout=out) + return out.getvalue() + + def test_dry_run_reports_counts_without_deleting(self): + output = self._call_command(f"--project={self.project.pk}") + self.assertIn("Phantom occurrences", output) + self.assertIn("Orphan null-marker detections", output) + self.assertIn("Dry run", output) + self.assertTrue(Occurrence.objects.filter(pk=self.phantom_occurrence.pk).exists()) + self.assertTrue(Detection.objects.filter(pk=self.phantom_null.pk).exists()) + + def test_commit_deletes_phantoms_and_orphan_null_markers(self): + self._call_command(f"--project={self.project.pk}", "--commit") + self.assertFalse(Occurrence.objects.filter(pk=self.phantom_occurrence.pk).exists()) + self.assertFalse(Detection.objects.filter(pk=self.phantom_null.pk).exists()) + self.assertTrue(Occurrence.objects.filter(pk=self.valid_occurrence.pk).exists()) + self.assertTrue(Detection.objects.filter(pk=self.real_detection.pk).exists()) + self.assertTrue( + Detection.objects.filter(pk=self.null_on_processed_image.pk).exists(), + "Null markers on images with at least one real detection must be kept", + ) + + def test_commit_is_idempotent(self): + self._call_command(f"--project={self.project.pk}", "--commit") + second_run = self._call_command(f"--project={self.project.pk}", "--commit") + self.assertIn("Nothing to clean up", second_run) From dda794f8bc58b7888a5927ca031348a2d1bc3813 Mon Sep 17 00:00:00 2001 From: Michael Bunsen Date: Wed, 20 May 2026 16:31:53 -0700 Subject: [PATCH 9/9] fix(main,ml): apply CodeRabbit review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five drift / quick-win fixes from PR #1312 review: 1. ami/main/api/views.py — filter_by_has_detections now uses Detection.objects.valid() so /captures/?has_detections=true agrees with the with_detections prefetch (which already drops null markers). Without this, has_detections=true could return captures whose filtered_detections array was empty. 2. ami/main/models.py — SourceImageCollection.sample_detections_only now samples by Detection.objects.valid() instead of detections__isnull=False, matching the tightened source_images_with_detections_count annotation. A detections_only collection no longer admits images that have only null markers. 3. ami/main/models.py — Detection.build_null_marker drops timestamp=timezone.now(). Detection.save() backfills timestamp from the source image's capture time, so the marker sorts/filters by capture time rather than processing time. Test asserts timestamp is None on the builder output. 4. ami/ml/models/pipeline.py — get_or_create_detection null-marker dedup now goes through .null_markers() so legacy bbox=[] sentinels from older runs are re-used. The lookup is still detection_algorithm-scoped, so the wider .null_markers() predicate stays narrow at the call site (no false matches across algorithms). 5. ami/main/management/commands/cleanup_null_only_occurrences.py — success message now reports the pre-calculated phantom_count / null_count instead of the .delete() return tuple, which includes cascade-deleted rows and would mislead the operator about what the command targeted. 68/68 in ami.main null-marker tests + ami.ml.tests pass. Co-Authored-By: Claude --- ami/main/api/views.py | 2 +- .../commands/cleanup_null_only_occurrences.py | 12 +++++----- ami/main/models.py | 6 ++--- ami/main/tests.py | 5 ++++- ami/ml/models/pipeline.py | 22 ++++++++++--------- 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/ami/main/api/views.py b/ami/main/api/views.py index 2716005db..d86962097 100644 --- a/ami/main/api/views.py +++ b/ami/main/api/views.py @@ -580,7 +580,7 @@ def filter_by_has_detections(self, queryset: QuerySet) -> QuerySet: if has_detections is not None: has_detections = BooleanField(required=False).clean(has_detections) queryset = queryset.annotate( - has_detections=models.Exists(Detection.objects.filter(source_image=models.OuterRef("pk"))), + has_detections=models.Exists(Detection.objects.valid().filter(source_image=models.OuterRef("pk"))), ).filter(has_detections=has_detections) return queryset diff --git a/ami/main/management/commands/cleanup_null_only_occurrences.py b/ami/main/management/commands/cleanup_null_only_occurrences.py index 11e2dd26f..a974c9fec 100644 --- a/ami/main/management/commands/cleanup_null_only_occurrences.py +++ b/ami/main/management/commands/cleanup_null_only_occurrences.py @@ -74,11 +74,13 @@ def handle(self, *args, **options): return with transaction.atomic(): - null_deleted, _ = orphan_null_markers.delete() - phantom_deleted, _ = phantom_occs.delete() + orphan_null_markers.delete() + phantom_occs.delete() + # Report the pre-calculated counts of the rows we targeted directly. The tuple from + # .delete() also counts cascade-deleted related rows (e.g. classifications under a + # phantom occurrence's detections), which would inflate the numbers and confuse the + # operator about what the command actually targeted. self.stdout.write( - self.style.SUCCESS( - f"Deleted {phantom_deleted} phantom occurrences and {null_deleted} orphan null markers." - ) + self.style.SUCCESS(f"Deleted {phantom_count} phantom occurrences and {null_count} orphan null markers.") ) diff --git a/ami/main/models.py b/ami/main/models.py index 93492171a..655ee60a0 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -2840,7 +2840,6 @@ def build_null_marker(cls, source_image, detection_algorithm) -> "Detection": source_image=source_image, bbox=cls.NULL_BBOX, detection_algorithm=detection_algorithm, - timestamp=timezone.now(), ) def get_bbox(self): @@ -4557,10 +4556,11 @@ def sample_greatest_file_size_from_each_event(self, num_each: int = 1): return captures def sample_detections_only(self): - """Sample all source images with detections""" + """Sample all source images with at least one real (non-null-marker) detection.""" qs = self.get_queryset() - return qs.filter(detections__isnull=False).distinct() + valid_detection_image_ids = Detection.objects.valid().values("source_image_id") + return qs.filter(pk__in=valid_detection_image_ids).distinct() def sample_full( self, diff --git a/ami/main/tests.py b/ami/main/tests.py index 20917d614..cf5be71e0 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -4811,7 +4811,10 @@ def test_build_null_marker_sets_canonical_fields(self): self.assertIsNone(det.bbox) self.assertEqual(det.source_image, self.source_image) self.assertEqual(det.detection_algorithm, self.algorithm) - self.assertIsNotNone(det.timestamp) + # timestamp is intentionally not set on the builder — Detection.save() backfills + # it from the source image's capture time, so the marker sorts by capture time + # rather than processing time. + self.assertIsNone(det.timestamp) self.assertTrue(det.is_null_marker) def test_valid_and_null_markers_are_disjoint_and_complete(self): diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py index f7293c68c..9b0da5b3a 100644 --- a/ami/ml/models/pipeline.py +++ b/ami/ml/models/pipeline.py @@ -438,11 +438,10 @@ def get_or_create_detection( ), f"Detection belongs to a different source image: {detection_repr}" if serialized_bbox is None: - # Null detection: algorithm-specific lookup so different pipelines don't share sentinels. - # Use bbox__isnull=True (not Detection.objects.null_markers()) because we want to find - # an exact match for THIS algorithm — null_markers() also includes legacy bbox=[] rows - # from other pipelines and would be wider than this dedup needs. - # See Detection.NULL_BBOX for the canonical sentinel value used by new writes. + # Null marker: algorithm-specific lookup so different pipelines don't share sentinels. + # Use .null_markers() so legacy bbox=[] sentinels from older runs are also matched and + # re-used instead of producing duplicate rows. Detection.NULL_BBOX is the canonical + # sentinel value used for new writes; .null_markers() recognises both forms. assert detection_resp.algorithm, f"No detection algorithm was specified for detection {detection_repr}" try: detection_algo = algorithms_known[detection_resp.algorithm.key] @@ -452,11 +451,14 @@ def get_or_create_detection( "The processing service must declare it in the /info endpoint. " f"Known algorithms: {list(algorithms_known.keys())}" ) from err - existing_detection = Detection.objects.filter( - source_image=source_image, - bbox__isnull=True, - detection_algorithm=detection_algo, - ).first() + existing_detection = ( + Detection.objects.filter( + source_image=source_image, + detection_algorithm=detection_algo, + ) + .null_markers() + .first() + ) else: # Real detection: algorithm-agnostic — same bbox = same physical detection existing_detection = Detection.objects.filter(