diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py index a34e7d561..db6aa48bf 100644 --- a/ami/main/api/serializers.py +++ b/ami/main/api/serializers.py @@ -588,6 +588,14 @@ def get_taxa(self, obj): return [{"id": taxon.id, "name": taxon.name} for taxon in obj.taxa.all()] +def agreement_requested(request: Request | None) -> bool: + """Whether ``with_agreement=true`` is set, gating the heavier agreed_exact_count.""" + if request is None: + return False + value = request.query_params.get("with_agreement", "") + return str(value).lower() in ("true", "1", "yes", "on") + + class TaxonListSerializer(DefaultSerializer): # latest_detection = DetectionNestedSerializer(read_only=True) occurrences = serializers.SerializerMethodField() @@ -595,6 +603,12 @@ class TaxonListSerializer(DefaultSerializer): parent_id = serializers.PrimaryKeyRelatedField(queryset=Taxon.objects.all(), source="parent") tags = serializers.SerializerMethodField() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # agreed_exact_count is a gated annotation: omit it unless with_agreement=true. + if not agreement_requested(self.context.get("request")): + self.fields.pop("agreed_exact_count", None) + def get_tags(self, obj): tag_list = getattr(obj, "prefetched_tags", []) return TagSerializer(tag_list, many=True, context=self.context).data @@ -609,6 +623,9 @@ class Meta: "parents", "details", "occurrences_count", + "verified_count", + "agreed_with_prediction_count", + "agreed_exact_count", "occurrences", "tags", "last_detected", @@ -886,6 +903,9 @@ class Meta: "parents", "details", "occurrences_count", + "verified_count", + "agreed_with_prediction_count", + "agreed_exact_count", "events_count", "occurrences", "gbif_taxon_key", diff --git a/ami/main/api/views.py b/ami/main/api/views.py index 5a6d5aece..0a09ae075 100644 --- a/ami/main/api/views.py +++ b/ami/main/api/views.py @@ -37,6 +37,8 @@ from ami.utils.storages import ConnectionTestResult from ..models import ( + BEST_IDENTIFICATION_ORDER, + BEST_MACHINE_PREDICTION_ORDER, NULL_DETECTIONS_FILTER, Classification, Deployment, @@ -1428,6 +1430,7 @@ class TaxonViewSet(DefaultViewSet, ProjectMixin): "created_at", "updated_at", "occurrences_count", + "verified_count", "last_detected", "best_determination_score", "name", @@ -1654,6 +1657,114 @@ def get_taxa_observed( # Efficient EXISTS check that uses the composite index qs = qs.filter(models.Exists(Occurrence.objects.filter(base_filter))) + qs = self.add_verification_data(qs, occurrence_filters, default_filters_q) + + return qs + + def _include_agreement(self) -> bool: + """Whether the heavier ``agreed_exact_count`` annotation should be computed.""" + if self.action == "retrieve": + return True + return bool(BooleanField(required=False).clean(self.request.query_params.get("with_agreement"))) + + def add_verification_data( + self, qs: QuerySet, occurrence_filters: models.Q, default_filters_q: models.Q + ) -> QuerySet: + """ + Annotate per-taxon verification and human/model agreement counts, and apply the + ``verified=true|false`` filter on list responses. + + Counts roll up descendant occurrences (verifying a species also counts toward its + genus/family rows) and respect the project's default filters (same + ``apply_defaults`` handling as ``occurrences_count``). + + All three counts only concern *verified* occurrences (those with a non-withdrawn + Identification), which are sparse relative to all occurrences. So the hierarchical + rollup is computed in a single pass over that small set in Python and applied as + constant-time ``CASE`` annotations. A correlated ``parents_json`` subquery per + taxon does not scale: on large projects it forces a per-row scan that the GIN + index can't serve (the containment RHS is an ``OuterRef``), timing out the list. + """ + include_agreement = self._include_agreement() + + # The chosen (best, non-withdrawn) identification's agreed_with_prediction FK. + best_identification_agreed_prediction = models.Subquery( + Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False) + .order_by(*BEST_IDENTIFICATION_ORDER) + .values("agreed_with_prediction_id")[:1] + ) + verified_occurrences = ( + Occurrence.objects.filter(occurrence_filters) + .filter(default_filters_q) + .filter(models.Exists(Identification.objects.filter(occurrence=models.OuterRef("pk"), withdrawn=False))) + .annotate(_agreed_prediction_id=best_identification_agreed_prediction) + ) + value_fields = ["determination_id", "determination__parents_json", "_agreed_prediction_id"] + if include_agreement: + # Top machine prediction's taxon for the same occurrence. + verified_occurrences = verified_occurrences.annotate( + _best_machine_taxon_id=models.Subquery( + Classification.objects.filter(detection__occurrence=models.OuterRef("pk")) + .order_by(*BEST_MACHINE_PREDICTION_ORDER) + .values("taxon_id")[:1] + ) + ) + value_fields.append("_best_machine_taxon_id") + + verified_counts: dict[int, int] = {} + agreed_with_prediction_counts: dict[int, int] = {} + agreed_exact_counts: dict[int, int] = {} + for row in verified_occurrences.values(*value_fields): + determination_id = row["determination_id"] + # The taxon itself plus every ancestor — i.e. every row this occurrence rolls up to. + taxon_ids: set[int] = set() + if determination_id is not None: + taxon_ids.add(determination_id) + for parent in row["determination__parents_json"] or []: + # parents_json round-trips through the pydantic schema field, so elements + # may be dicts or ``TaxonParent`` objects depending on the query path. + parent_id = parent.get("id") if isinstance(parent, dict) else getattr(parent, "id", None) + if parent_id is not None: + taxon_ids.add(int(parent_id)) + + for taxon_id in taxon_ids: + verified_counts[taxon_id] = verified_counts.get(taxon_id, 0) + 1 + if row["_agreed_prediction_id"] is not None: + for taxon_id in taxon_ids: + agreed_with_prediction_counts[taxon_id] = agreed_with_prediction_counts.get(taxon_id, 0) + 1 + if ( + include_agreement + and determination_id is not None + and determination_id == row["_best_machine_taxon_id"] + ): + for taxon_id in taxon_ids: + agreed_exact_counts[taxon_id] = agreed_exact_counts.get(taxon_id, 0) + 1 + + def count_annotation(counts: dict[int, int]) -> models.expressions.Combinable: + if not counts: + return models.Value(0, output_field=models.IntegerField()) + return models.Case( + *(models.When(id=taxon_id, then=models.Value(count)) for taxon_id, count in counts.items()), + default=models.Value(0), + output_field=models.IntegerField(), + ) + + qs = qs.annotate( + verified_count=count_annotation(verified_counts), + agreed_with_prediction_count=count_annotation(agreed_with_prediction_counts), + ) + if include_agreement: + qs = qs.annotate(agreed_exact_count=count_annotation(agreed_exact_counts)) + + # verified=true|false filter (list only); verified=false is the strict complement. + if self.action == "list" and "verified" in self.request.query_params: + verified = BooleanField(required=False).clean(self.request.query_params.get("verified")) + verified_taxon_ids = list(verified_counts.keys()) + if verified: + qs = qs.filter(id__in=verified_taxon_ids) + else: + qs = qs.exclude(id__in=verified_taxon_ids) + return qs def attach_tags_by_project(self, qs: QuerySet, project: Project) -> QuerySet: diff --git a/ami/main/migrations/0085_taxon_parents_json_gin_index.py b/ami/main/migrations/0085_taxon_parents_json_gin_index.py new file mode 100644 index 000000000..94b65ff2a --- /dev/null +++ b/ami/main/migrations/0085_taxon_parents_json_gin_index.py @@ -0,0 +1,31 @@ +from django.db import migrations + + +class Migration(migrations.Migration): + """ + GIN index on Taxon.parents_json to support hierarchical (descendant) rollup + of the per-taxon verification / agreement counts added for issue #1316. + + Without it, Family- and Order-rank rows on large projects fall back to a + seq-scan on the parents_json containment (`@>`) test and dominate query time. + + CREATE INDEX CONCURRENTLY can't run inside a transaction, so this migration + is non-atomic. IF NOT EXISTS keeps it safe to co-exist with the same index if + it lands separately via the #1307 follow-up. + """ + + atomic = False + + dependencies = [ + ("main", "0084_revoke_delete_job_from_roles"), + ] + + operations = [ + migrations.RunSQL( + sql=( + "CREATE INDEX CONCURRENTLY IF NOT EXISTS main_taxon_parents_json_gin_idx " + "ON main_taxon USING gin (parents_json jsonb_path_ops);" + ), + reverse_sql="DROP INDEX CONCURRENTLY IF EXISTS main_taxon_parents_json_gin_idx;", + ), + ] diff --git a/ami/main/models.py b/ami/main/models.py index b30b4e645..d772d2ca7 100644 --- a/ami/main/models.py +++ b/ami/main/models.py @@ -3811,6 +3811,18 @@ def best_determination_score(self) -> float | None: # This is handled by an annotation if we are filtering by project, deployment or event return None + def verified_count(self) -> int | None: + # Handled by an annotation when filtering by project (TaxonViewSet.add_verification_data) + return None + + def agreed_with_prediction_count(self) -> int | None: + # Handled by an annotation when filtering by project (TaxonViewSet.add_verification_data) + return None + + def agreed_exact_count(self) -> int | None: + # Handled by an annotation only when with_agreement is requested or on the detail view + return None + def occurrence_images(self, limit: int | None = 10) -> list[str]: # This is handled by an annotation if we are filtering by project, deployment or event return [] diff --git a/ami/main/tests.py b/ami/main/tests.py index 517c4c87b..d419d7acc 100644 --- a/ami/main/tests.py +++ b/ami/main/tests.py @@ -4761,3 +4761,132 @@ def test_registration_order_preserves_occurrence_retrieve(self): retrieve_response = self.client.get(f"/api/v2/occurrences/{occurrence.pk}/?project_id={self.project.pk}") self.assertEqual(stats_response.status_code, 200, "stats URL must resolve") self.assertEqual(retrieve_response.status_code, 200, "occurrence retrieve must still work") + + +class TestTaxaVerification(APITestCase): + """Per-taxon verification + human/model agreement annotations and the verified filter (#1316).""" + + def setUp(self): + self.project, self.deployment = setup_test_project(reuse=False) + self.taxa_list = create_taxa(self.project) + self.order = Taxon.objects.get(name="Lepidoptera") + self.family = Taxon.objects.get(name="Nymphalidae") + self.genus = Taxon.objects.get(name="Vanessa") + self.cardui = Taxon.objects.get(name="Vanessa cardui") + self.atalanta = Taxon.objects.get(name="Vanessa atalanta") + self.itea = Taxon.objects.get(name="Vanessa itea") + + create_captures(deployment=self.deployment, num_nights=1, images_per_night=3) + # 3 occurrences ML-determined to cardui, 1 to itea (left unverified) + create_occurrences(deployment=self.deployment, num=3, taxon=self.cardui, determination_score=0.9) + create_occurrences(deployment=self.deployment, num=1, taxon=self.itea, determination_score=0.9) + + self.user = User.objects.create_user(email="verifier@insectai.org", is_staff=True, is_superuser=True) + self.client.force_authenticate(user=self.user) + + cardui_occ = list(Occurrence.objects.filter(project=self.project, determination=self.cardui).order_by("pk")) + self.assertEqual(len(cardui_occ), 3) + self.occ_pred, self.occ_exact, self.occ_disagree = cardui_occ + + # occ_pred: user agrees with the model prediction (cardui), agreed_with_prediction set + Identification.objects.create( + occurrence=self.occ_pred, + taxon=self.cardui, + user=self.user, + agreed_with_prediction=self.occ_pred.best_prediction, + ) + # occ_exact: same taxon as the model, but not via the "agree" workflow + Identification.objects.create(occurrence=self.occ_exact, taxon=self.cardui, user=self.user) + # occ_disagree: user overrides to a different taxon (atalanta) than the model (cardui) + Identification.objects.create(occurrence=self.occ_disagree, taxon=self.atalanta, user=self.user) + + self.itea_occ = Occurrence.objects.get(project=self.project, determination=self.itea) + self.list_url = f"/api/v2/taxa/?project_id={self.project.pk}&limit=1000" + + def _detail(self, taxon): + res = self.client.get(f"/api/v2/taxa/{taxon.pk}/?project_id={self.project.pk}") + self.assertEqual(res.status_code, status.HTTP_200_OK) + return res.json() + + def _list_by_name(self, url=None): + res = self.client.get(url or self.list_url) + self.assertEqual(res.status_code, status.HTTP_200_OK) + return {row["name"]: row for row in res.json()["results"]} + + # --- verified_count (hierarchical rollup) --- + + def test_verified_count_species(self): + self.assertEqual(self._detail(self.cardui)["verified_count"], 2) + self.assertEqual(self._detail(self.atalanta)["verified_count"], 1) + self.assertEqual(self._detail(self.itea)["verified_count"], 0) + + def test_verified_count_rolls_up_to_ancestors(self): + # Verifying species marks genus/family/order verified, occurrence-weighted by descendants. + for ancestor in (self.genus, self.family, self.order): + self.assertEqual(self._detail(ancestor)["verified_count"], 3, ancestor.name) + + # --- agreed_with_prediction_count (chosen identification only) --- + + def test_agreed_with_prediction_counts_only_chosen_identification(self): + self.assertEqual(self._detail(self.cardui)["agreed_with_prediction_count"], 1) + self.assertEqual(self._detail(self.atalanta)["agreed_with_prediction_count"], 0) + # Rolls up: only occ_pred contributes under the genus. + self.assertEqual(self._detail(self.genus)["agreed_with_prediction_count"], 1) + + # --- agreed_exact_count (gated) --- + + def test_agreed_exact_count_on_detail(self): + # occ_pred + occ_exact: user determination == top machine prediction (cardui). + self.assertEqual(self._detail(self.cardui)["agreed_exact_count"], 2) + # occ_disagree: user picked atalanta, model said cardui → not exact. + self.assertEqual(self._detail(self.atalanta)["agreed_exact_count"], 0) + self.assertEqual(self._detail(self.genus)["agreed_exact_count"], 2) + + def test_agreed_exact_count_gated_on_list(self): + rows = self._list_by_name() + self.assertIn("verified_count", rows["Vanessa cardui"]) + self.assertIn("agreed_with_prediction_count", rows["Vanessa cardui"]) + self.assertNotIn("agreed_exact_count", rows["Vanessa cardui"]) + + rows = self._list_by_name(self.list_url + "&with_agreement=true") + self.assertIn("agreed_exact_count", rows["Vanessa cardui"]) + self.assertEqual(rows["Vanessa cardui"]["agreed_exact_count"], 2) + + # --- list field values --- + + def test_list_field_values(self): + rows = self._list_by_name() + self.assertEqual(rows["Vanessa cardui"]["occurrences_count"], 2) + self.assertEqual(rows["Vanessa cardui"]["verified_count"], 2) + self.assertEqual(rows["Vanessa cardui"]["agreed_with_prediction_count"], 1) + self.assertEqual(rows["Vanessa atalanta"]["verified_count"], 1) + self.assertEqual(rows["Vanessa itea"]["verified_count"], 0) + + # --- verified=true|false filter --- + + def test_verified_filter_true_false_complement(self): + all_names = set(self._list_by_name().keys()) + verified = set(self._list_by_name(self.list_url + "&verified=true").keys()) + unverified = set(self._list_by_name(self.list_url + "&verified=false").keys()) + self.assertEqual(verified, {"Vanessa cardui", "Vanessa atalanta"}) + self.assertEqual(unverified, {"Vanessa itea"}) + # verified=false is the strict complement of verified=true on the filtered set. + self.assertEqual(verified | unverified, all_names) + self.assertEqual(verified & unverified, set()) + + def test_ordering_by_verified_count(self): + res = self.client.get(self.list_url + "&ordering=verified_count") + self.assertEqual(res.status_code, status.HTTP_200_OK) + counts = [row["verified_count"] for row in res.json()["results"]] + self.assertEqual(counts, sorted(counts)) + + # --- apply_defaults handling --- + + def test_verified_filter_respects_apply_defaults(self): + self.project.default_filters_exclude_taxa.add(self.atalanta) + + verified_default = set(self._list_by_name(self.list_url + "&verified=true").keys()) + self.assertEqual(verified_default, {"Vanessa cardui"}) + + verified_bypassed = set(self._list_by_name(self.list_url + "&verified=true&apply_defaults=false").keys()) + self.assertEqual(verified_bypassed, {"Vanessa cardui", "Vanessa atalanta"}) diff --git a/docs/claude/planning/2026-05-20-taxa-verification-guidance-ticket.md b/docs/claude/planning/2026-05-20-taxa-verification-guidance-ticket.md new file mode 100644 index 000000000..5d2fc9747 --- /dev/null +++ b/docs/claude/planning/2026-05-20-taxa-verification-guidance-ticket.md @@ -0,0 +1,165 @@ +# Add guidance and stats about which taxa need verification + +## Motivation + +To get a meaningful project-wide picture of model accuracy and data quality, users should verify at least one occurrence of every unique taxon their pipelines have apparently observed. Today there is no surface that tells them *which* taxa still need attention or how many they have already verified — they have to drill into the occurrence list per taxon and check by hand. + +This ticket adds per-taxon verification and agreement data to the existing taxa list endpoint, plus the matching UI controls, so users can sort and filter to find the taxa that most need their attention. It is part of this year's proactive-surfacing goal and is the natural next step after [#1296](https://github.com/RolnickLab/antenna/pull/1296) (project summary) and [#1307](https://github.com/RolnickLab/antenna/pull/1307) (dataset-wide model agreement endpoint). + +## Scope + +Backend annotations on `GET /api/v2/taxa/` plus a new filter, and a new column + filter in the taxa list table. The dataset-wide `/occurrences/stats/model-agreement/` endpoint from #1307 is unchanged — it stays as the aggregate view; this ticket adds the per-taxon breakdown. + +### Out of scope (queued separately) + +- "Needs verification" badge / status pill on rows. +- Project-summary widget with `X of Y unique taxa verified`. +- Dedicated unverified-taxa queue page. +- Backfilling counts onto a denormalized `Taxon` field. +- Macro-average rollup (occurrence-weighted descendant sum is the only rollup in this ticket). + +## Backend + +### Filter + +- `verified=true|false` on `TaxonViewSet` — matches taxa with at least one non-withdrawn `Identification` on an occurrence whose `determination` is the taxon itself **or any descendant** (via `parents_json__contains`). Implemented as an `EXISTS` subquery, project-scoped, respects `apply_default_filters`. + +### Always-on annotations (cheap) + +Both reuse the existing hierarchical-match pattern (`parents_json__contains [{id: OuterRef("id")}] OR determination_id = OuterRef("id")`) used by the `taxon=` filter in the occurrence list, so a Family row aggregates all its descendant species' occurrences — occurrence-weighted by construction. + +- `verified_count` — count of occurrences under the taxon (incl. descendants) with at least one non-withdrawn `Identification`. Sortable. Single correlated subquery per row. +- `agreed_with_prediction_count` — count of verified occurrences whose chosen `Identification.agreed_with_prediction` is non-null. No join through `Classification` needed — just a non-null FK check. Different signal from `agreed_exact_count` below: this measures the *agree-with-model workflow* (user clicked the "agree" button on a prediction), not independent-match accuracy. + +### Gated annotation (heavier) + +- `agreed_exact_count` — count of verified occurrences where `occurrence.determination_id` equals the top machine `Classification.taxon_id` for the same occurrence. Surfaced only when `with_agreement=true` is on the request. Cost: two correlated subqueries per row (verified set + best classification per occurrence). Needs benchmarking on P#85 (13k verified) before this can default on. **NOT** included in the default list response. + +`occurrence.determination` is already maintained as the top non-withdrawn user identification's taxon (`update_occurrence_determination` runs on every `Identification.save`, see `ami/main/models.py:2528, 3383-3393`), so we do not need a correlated subquery over `Identification` to find the best human identification — just read `determination_id` directly. This is what makes `agreed_exact_count` only two subqueries instead of three. + +### Dropped vs PR #1307 + +- `agreed_under_order_count` is not added per-taxon. The under-order LCA bucket from `/occurrences/stats/model-agreement/` stays available at the dataset level; per-taxon it's redundant since each row already represents a single taxon. + +### Detail view + +`GET /api/v2/taxa//` should include all four fields above unconditionally — single-row cost is negligible. + +### Performance prerequisites + +- The hierarchical match uses `Taxon.parents_json` containment. Without a GIN index on that column, Family- and Order-rank rows on large projects (P#85 has 13k verified, P#20 has 41k occurrences) will fall back to seq-scan and dominate query time. **This index is already flagged as a follow-up to #1307**: + + ```sql + CREATE INDEX CONCURRENTLY main_taxon_parents_json_gin_idx + ON main_taxon USING gin (parents_json jsonb_path_ops); + ``` + + Treat shipping the GIN index as a hard blocker for enabling recursive rollup correctness at higher ranks. Without it, this ticket is safe to ship for projects with shallow taxa lists (species-only) but will be slow elsewhere. + +- The composite-index follow-up from #1307 (`main_occurrence (project_id, determination_score)`) is also relevant — `verified_count` filters by project + verified flag and benefits from the same indexed path. + +### Cost benchmarks to run before merge + +| Query | Project | Expected | Acceptance | +|---|---|---|---| +| `/taxa/?project_id=18&verified=true` | P#18 (45 verified) | < 200ms warm | ≤ 1.5× current `/taxa/` p99 | +| `/taxa/?project_id=85&verified=false` | P#85 (13k verified) | < 500ms warm | ≤ 2× current p99 | +| `/taxa/?project_id=85&with_agreement=true` | P#85 | < 1.5s warm | < 5s cold | +| `/taxa/?project_id=85&ordering=verified_count` | P#85 | < 1s warm | doesn't fall off cliff | + +If `with_agreement=true` exceeds the cold budget on P#85, fall back to keeping `agreed_exact_count` on the detail view only and add a `/taxa/stats/verification/` aggregate endpoint mirroring the #1307 pattern instead. + +## Frontend + +### Taxa list page (`/projects//taxa`) + +- New sortable column **Verified** showing `verified_count` per row. Default ordering unchanged; user can click the column to sort asc (least-verified first → matches the proactive-surfacing intent). +- New filter pill **Verification status**: `All` (default) / `Verified` / `Unverified`. Wires to the `verified=` query param. +- Existing `Occurrences` column stays as the primary count signal; `Verified` sits next to it so the ratio is visually obvious. + +### Not in this ticket + +- No new column for `agreed_with_prediction_count` or `agreed_exact_count` in the table by default. These are surfaced on the **taxon detail page** only (existing detail page; add a small "Verification" panel showing the four numbers). If the table eventually grows a "Model accuracy" toggle, it would flip `with_agreement=true` on — design that in a follow-up. + +## API contract examples + +```bash +# Verified taxa only, project default filters applied +curl '.../api/v2/taxa/?project_id=18&verified=true' + +# Unverified taxa, sorted by occurrence count desc — the "biggest gaps" view +curl '.../api/v2/taxa/?project_id=18&verified=false&ordering=-occurrences_count' + +# Sort by which taxa have the most human verification +curl '.../api/v2/taxa/?project_id=18&ordering=-verified_count' + +# Enable the heavier agreed_exact_count on a list response +curl '.../api/v2/taxa/?project_id=18&with_agreement=true' + +# Detail view always includes all four +curl '.../api/v2/taxa/567/?project_id=18' +``` + +### Response shape (list) + +```json +{ + "id": 567, + "name": "Hyalophora cecropia", + "rank": "SPECIES", + "occurrences_count": 124, + "verified_count": 3, + "agreed_with_prediction_count": 2, + "best_determination_score": 0.94, + "last_detected": "2025-08-12T03:14:22" +} +``` + +With `with_agreement=true`: + +```json +{ + "...": "...", + "verified_count": 3, + "agreed_with_prediction_count": 2, + "agreed_exact_count": 2 +} +``` + +## Test plan + +Backend: + +- [ ] Unit test on `Taxon` queryset: `verified=true` returns only taxa with non-withdrawn identifications, respecting hierarchical match (verifying a species also marks its genus/family as verified at higher-rank rows). +- [ ] Unit test on `Taxon` queryset: `verified=false` is the strict complement on the project's filtered taxa set. +- [ ] Unit test: `verified_count` equals number of verified occurrences under the taxon (descendants included). +- [ ] Unit test: `agreed_with_prediction_count` only counts the chosen identification's `agreed_with_prediction`, not all identifications on the occurrence. +- [ ] Unit test: `agreed_exact_count` reads `occurrence.determination_id` for the user side and top-score `Classification.taxon_id` for the model side, and is only populated when `with_agreement=true`. +- [ ] HTTP test: list endpoint shape includes new fields; gated field absent unless flag is set. +- [ ] HTTP test: `verified=` filter behaves correctly under `apply_defaults=true|false`. +- [ ] Bench: queries above hit acceptance thresholds. + +Frontend: + +- [ ] Verified column renders, sorts asc and desc. +- [ ] Filter pill updates URL, persists across reload, clears with the rest of the project filter state. +- [ ] Detail page Verification panel renders all four fields. + +## Follow-ups (not in this ticket) + +- `Taxon.parents_json` GIN index (carries over from #1307 — gating dependency for rollup correctness at higher ranks). +- `main_occurrence (project_id, determination_score)` composite index (also from #1307). +- Project-summary "X of Y unique taxa verified" widget on the overview page. +- "Needs verification" status pill on taxa rows once we know what the threshold should be (`verified_count == 0` is the obvious v1). +- Dedicated unverified-taxa queue view (pre-filtered, ranked by occurrence count desc). +- Macro-averaged agreement rollup at higher ranks (alternative to the occurrence-weighted sum this ticket ships). +- A `with_counts` / `with_agreement` query-param convention audit across the API — we already have similar gated-annotation patterns elsewhere; document a single convention. + +## References + +- PR #1307 — dataset-wide `/occurrences/stats/model-agreement/` endpoint, established the LCA + agreement-bucket compute that this ticket reuses per-taxon. Includes the GIN-index and composite-index follow-ups this ticket inherits. +- PR #1296 — project summary view, the surfacing target this work feeds into. +- `ami/main/api/views.py:1403` — `TaxonViewSet`, where the new annotations and filter land. +- `ami/main/api/views.py:1576` — `get_taxa_observed`, the existing helper that already wires `parents_json`-aware subqueries; pattern for adding the new ones. +- `ami/main/models.py:2440` — `Identification` model (`agreed_with_prediction` FK). +- `ami/main/models.py:3383` — `update_occurrence_determination`, which keeps `Occurrence.determination` aligned with the top non-withdrawn user identification. diff --git a/ui/src/data-services/models/species.ts b/ui/src/data-services/models/species.ts index e507eca4f..bd0f34f33 100644 --- a/ui/src/data-services/models/species.ts +++ b/ui/src/data-services/models/species.ts @@ -80,6 +80,19 @@ export class Species extends Taxon { return this._species.occurrences_count ?? 0 } + get numVerified(): number { + return this._species.verified_count ?? 0 + } + + get numAgreedWithPrediction(): number { + return this._species.agreed_with_prediction_count ?? 0 + } + + // Only present when with_agreement=true is requested (or on the detail view). + get numAgreedExact(): number | undefined { + return this._species.agreed_exact_count ?? undefined + } + get score(): number | undefined { const score = this._species.best_determination_score diff --git a/ui/src/pages/species-details/species-details.tsx b/ui/src/pages/species-details/species-details.tsx index d6677033c..bd5f20edf 100644 --- a/ui/src/pages/species-details/species-details.tsx +++ b/ui/src/pages/species-details/species-details.tsx @@ -157,6 +157,25 @@ export const SpeciesDetails = ({ })} /> + + + + {species.numAgreedExact !== undefined ? ( + + ) : null} + diff --git a/ui/src/pages/species/species-columns.tsx b/ui/src/pages/species/species-columns.tsx index 49bf330ad..d78181fe9 100644 --- a/ui/src/pages/species/species-columns.tsx +++ b/ui/src/pages/species/species-columns.tsx @@ -95,6 +95,24 @@ export const columns: (project: { ), }, + { + id: 'verified', + sortField: 'verified_count', + name: 'Verified', + styles: { + textAlign: TextAlign.Right, + }, + renderCell: (item: Species) => ( + + + + ), + }, { id: 'best-determination-score', name: translate(STRING.FIELD_LABEL_BEST_SCORE), diff --git a/ui/src/pages/species/species.tsx b/ui/src/pages/species/species.tsx index 6609db0fa..a693ee995 100644 --- a/ui/src/pages/species/species.tsx +++ b/ui/src/pages/species/species.tsx @@ -39,6 +39,7 @@ export const Species = () => { rank: false, 'last-seen': true, occurrences: true, + verified: true, 'best-determination-score': true, 'created-at': false, 'updated-at': false, @@ -82,6 +83,7 @@ export const Species = () => { )} + {project?.featureFlags.tags ? ( <> diff --git a/ui/src/utils/getAppRoute.ts b/ui/src/utils/getAppRoute.ts index 4cb054028..ca4012f94 100644 --- a/ui/src/utils/getAppRoute.ts +++ b/ui/src/utils/getAppRoute.ts @@ -13,6 +13,7 @@ type FilterType = | 'taxa_list_id' | 'taxon' | 'timestamp' + | 'verified' export const getAppRoute = ({ to,