From bd25319d04c9607bb42fe1c9dff62ff04dad4253 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Tue, 27 May 2025 19:39:44 -0400
Subject: [PATCH 01/24] feat: implement detection tracking and integrate it
 into save_results

---
 ami/ml/models/pipeline.py |  11 ++--
 ami/ml/tracking.py        | 114 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 4 deletions(-)
 create mode 100644 ami/ml/tracking.py

diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py
index 13bbb5350..a183226b1 100644
--- a/ami/ml/models/pipeline.py
+++ b/ami/ml/models/pipeline.py
@@ -47,6 +47,7 @@
     SourceImageResponse,
 )
 from ami.ml.tasks import celery_app, create_detection_images
+from ami.ml.tracking import assign_occurrences_by_tracking
 from ami.utils.requests import create_session
 
 logger = logging.getLogger(__name__)
@@ -881,10 +882,12 @@ def save_results(
 
     # Create a new occurrence for each detection (no tracking yet)
     # @TODO remove when we implement tracking!
-    create_and_update_occurrences_for_detections(
-        detections=detections,
-        logger=job_logger,
-    )
+    # create_and_update_occurrences_for_detections(
+    #     detections=detections,
+    #     logger=job_logger,
+    # )
+    job_logger.info(f"Creating occurrences for {len(detections)} detections ")
+    assign_occurrences_by_tracking(detections=detections, logger=job_logger)
 
     # Update precalculated counts on source images and events
     source_images = list(source_images)
diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
new file mode 100644
index 000000000..7c3ad5484
--- /dev/null
+++ b/ami/ml/tracking.py
@@ -0,0 +1,114 @@
+import logging
+import math
+from collections import defaultdict
+from collections.abc import Iterable
+
+import numpy as np
+
+from ami.main.models import Detection, Occurrence
+
+logger = logging.getLogger(__name__)
+
+TRACKING_COST_THRESHOLD = 0.25
+
+
+def cosine_similarity(v1: Iterable[float], v2: Iterable[float]) -> float:
+    v1 = np.array(v1)
+    v2 = np.array(v2)
+    sim = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
+    return float(np.clip(sim, 0.0, 1.0))
+
+
+def iou(bb1, bb2):
+    xA = max(bb1[0], bb2[0])
+    yA = max(bb1[1], bb2[1])
+    xB = min(bb1[2], bb2[2])
+    yB = min(bb1[3], bb2[3])
+    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
+    boxAArea = (bb1[2] - bb1[0] + 1) * (bb1[3] - bb1[1] + 1)
+    boxBArea = (bb2[2] - bb2[0] + 1) * (bb2[3] - bb2[1] + 1)
+    unionArea = boxAArea + boxBArea - interArea
+    return interArea / unionArea if unionArea > 0 else 0
+
+
+def box_ratio(bb1, bb2):
+    area1 = (bb1[2] - bb1[0] + 1) * (bb1[3] - bb1[1] + 1)
+    area2 = (bb2[2] - bb2[0] + 1) * (bb2[3] - bb2[1] + 1)
+    return min(area1, area2) / max(area1, area2)
+
+
+def distance_ratio(bb1, bb2, img_diag):
+    cx1 = (bb1[0] + bb1[2]) / 2
+    cy1 = (bb1[1] + bb1[3]) / 2
+    cx2 = (bb2[0] + bb2[2]) / 2
+    cy2 = (bb2[1] + bb2[3]) / 2
+    dist = math.sqrt((cx2 - cx1) ** 2 + (cy2 - cy1) ** 2)
+    return dist / img_diag if img_diag > 0 else 1.0
+
+
+def image_diagonal(width: int, height: int) -> int:
+    img_diagonal = int(math.ceil(math.sqrt(width**2 + height**2)))
+    return img_diagonal
+
+
+def total_cost(f1, f2, bb1, bb2, diag):
+    return (
+        (1 - cosine_similarity(f1, f2))
+        + (1 - iou(bb1, bb2))
+        + (1 - box_ratio(bb1, bb2))
+        + distance_ratio(bb1, bb2, diag)
+    )
+
+
+def assign_occurrences_by_tracking(
+    detections: list[Detection],
+    logger: logging.Logger,
+) -> None:
+    """
+    Perform object tracking by assigning detections across multiple source images
+    to the same Occurrence if they are similar enough.
+    """
+    logger.info(f"Starting to assign occurrences by tracking.{len(detections)} detections found.")
+    # Group detections by source image and sort
+    image_to_dets = defaultdict(list)
+    for det in detections:
+        image_to_dets[det.source_image.timestamp].append(det)
+    sorted_images = sorted(image_to_dets.keys())
+    logger.info(f"Found {len(sorted_images)} source images with detections.")
+    last_detections = []
+
+    for t in sorted_images:
+        current_detections = image_to_dets[t]
+        logger.info(f"Processing {len(current_detections)} detections at {t}")
+        for det in current_detections:
+            best_match = None
+            best_cost = float("inf")
+
+            for prev in last_detections:
+                if prev.similarity_vector is None or det.similarity_vector is None:
+                    continue
+
+                cost = total_cost(
+                    det.similarity_vector,
+                    prev.similarity_vector,
+                    det.bbox,
+                    prev.bbox,
+                    image_diagonal(det.source_image.width, det.source_image.height),
+                )
+
+                if cost < best_cost:
+                    best_cost = cost
+                    best_match = prev
+
+            if best_match and best_cost < TRACKING_COST_THRESHOLD:
+                det.occurrence = best_match.occurrence
+            else:
+                occurrence = Occurrence.objects.create(event=det.source_image.event)
+                det.occurrence = occurrence
+                logger.info(f"Created new occurrence {occurrence.pk} for detection {det.id}")
+
+            det.save()
+
+        last_detections = current_detections
+
+    logger.info("Finished assigning occurrences by tracking.")

From e06f79f48379524db4f567652301ae6a23067ad1 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Wed, 28 May 2025 16:05:25 -0400
Subject: [PATCH 02/24] used the latest classification features vector instead
 of the detection similarity vecto

---
 ami/ml/models/pipeline.py |  2 ++
 ami/ml/tracking.py        | 49 ++++++++++++++++++++++++++-------------
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py
index a183226b1..7b79c170d 100644
--- a/ami/ml/models/pipeline.py
+++ b/ami/ml/models/pipeline.py
@@ -887,6 +887,8 @@ def save_results(
     #     logger=job_logger,
     # )
     job_logger.info(f"Creating occurrences for {len(detections)} detections ")
+    job_logger.info("type logger: " + str(type(job_logger)))
+
     assign_occurrences_by_tracking(detections=detections, logger=job_logger)
 
     # Update precalculated counts on source images and events
diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 7c3ad5484..c9f85df36 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -1,4 +1,3 @@
-import logging
 import math
 from collections import defaultdict
 from collections.abc import Iterable
@@ -7,9 +6,7 @@
 
 from ami.main.models import Detection, Occurrence
 
-logger = logging.getLogger(__name__)
-
-TRACKING_COST_THRESHOLD = 0.25
+TRACKING_COST_THRESHOLD = 2
 
 
 def cosine_similarity(v1: Iterable[float], v2: Iterable[float]) -> float:
@@ -60,48 +57,68 @@ def total_cost(f1, f2, bb1, bb2, diag):
     )
 
 
+def get_latest_feature_vector(detection: Detection):
+    return (
+        detection.classifications.filter(features_2048__isnull=False)
+        .order_by("-timestamp")
+        .values_list("features_2048", flat=True)
+        .first()
+    )
+
+
 def assign_occurrences_by_tracking(
     detections: list[Detection],
-    logger: logging.Logger,
+    logger,
 ) -> None:
     """
     Perform object tracking by assigning detections across multiple source images
-    to the same Occurrence if they are similar enough.
+    to the same Occurrence if they are similar enough, based on the latest classification feature vectors.
     """
-    logger.info(f"Starting to assign occurrences by tracking.{len(detections)} detections found.")
-    # Group detections by source image and sort
+    logger.info(f"Starting to assign occurrences by tracking. {len(detections)} detections found.")
+
+    # Group detections by source image timestamp
     image_to_dets = defaultdict(list)
     for det in detections:
         image_to_dets[det.source_image.timestamp].append(det)
-    sorted_images = sorted(image_to_dets.keys())
-    logger.info(f"Found {len(sorted_images)} source images with detections.")
+    sorted_timestamps = sorted(image_to_dets.keys())
+    logger.info(f"Found {len(sorted_timestamps)} source images with detections.")
+
     last_detections = []
 
-    for t in sorted_images:
-        current_detections = image_to_dets[t]
-        logger.info(f"Processing {len(current_detections)} detections at {t}")
+    for timestamp in sorted_timestamps:
+        current_detections = image_to_dets[timestamp]
+        logger.info(f"Processing {len(current_detections)} detections at {timestamp}")
+
         for det in current_detections:
+            det_vec = get_latest_feature_vector(det)
+            if det_vec is None:
+                logger.info(f"No features for detection {det.id}, skipping.")
+                continue
+
             best_match = None
             best_cost = float("inf")
 
             for prev in last_detections:
-                if prev.similarity_vector is None or det.similarity_vector is None:
+                prev_vec = get_latest_feature_vector(prev)
+                if prev_vec is None:
                     continue
 
                 cost = total_cost(
-                    det.similarity_vector,
-                    prev.similarity_vector,
+                    det_vec,
+                    prev_vec,
                     det.bbox,
                     prev.bbox,
                     image_diagonal(det.source_image.width, det.source_image.height),
                 )
 
+                logger.info(f"Comparing detection {det.id} with previous {prev.id}: cost = {cost:.4f}")
                 if cost < best_cost:
                     best_cost = cost
                     best_match = prev
 
             if best_match and best_cost < TRACKING_COST_THRESHOLD:
                 det.occurrence = best_match.occurrence
+                logger.info(f"Assigned detection {det.id} to existing occurrence {best_match.occurrence.pk}")
             else:
                 occurrence = Occurrence.objects.create(event=det.source_image.event)
                 det.occurrence = occurrence

From 853607de7d1af8504e731d613179826d4a00d609 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Thu, 29 May 2025 19:24:36 -0400
Subject: [PATCH 03/24] feat: added tracking stage, per-session logic, optimal
 detection pairing

---
 ami/jobs/models.py                            |   2 +
 .../0067_detection_next_detection.py          |  25 ++
 ami/main/models.py                            |   8 +
 ami/ml/models/pipeline.py                     |  17 +-
 ami/ml/tracking.py                            | 284 +++++++++++++++---
 5 files changed, 277 insertions(+), 59 deletions(-)
 create mode 100644 ami/main/migrations/0067_detection_next_detection.py

diff --git a/ami/jobs/models.py b/ami/jobs/models.py
index 189112661..b1f728c97 100644
--- a/ami/jobs/models.py
+++ b/ami/jobs/models.py
@@ -17,6 +17,7 @@
 from ami.jobs.tasks import run_job
 from ami.main.models import Deployment, Project, SourceImage, SourceImageCollection
 from ami.ml.models import Pipeline
+from ami.ml.tracking import perform_tracking_for_job
 from ami.utils.schemas import OrderedEnum
 
 logger = logging.getLogger(__name__)
@@ -502,6 +503,7 @@ def run(cls, job: "Job"):
             status=JobState.SUCCESS,
             progress=1,
         )
+        perform_tracking_for_job(job)
         job.update_status(JobState.SUCCESS, save=False)
         job.finished_at = datetime.datetime.now()
         job.save()
diff --git a/ami/main/migrations/0067_detection_next_detection.py b/ami/main/migrations/0067_detection_next_detection.py
new file mode 100644
index 000000000..d91ee4a0d
--- /dev/null
+++ b/ami/main/migrations/0067_detection_next_detection.py
@@ -0,0 +1,25 @@
+# Generated by Django 4.2.10 on 2025-05-29 16:21
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("main", "0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="detection",
+            name="next_detection",
+            field=models.OneToOneField(
+                blank=True,
+                help_text="The detection that follows this one in the tracking sequence.",
+                null=True,
+                on_delete=django.db.models.deletion.SET_NULL,
+                related_name="previous_detection",
+                to="main.detection",
+            ),
+        ),
+    ]
diff --git a/ami/main/models.py b/ami/main/models.py
index 73985251c..0f78bed92 100644
--- a/ami/main/models.py
+++ b/ami/main/models.py
@@ -2137,6 +2137,14 @@ class Detection(BaseModel):
     classifications: models.QuerySet["Classification"]
     source_image_id: int
     detection_algorithm_id: int
+    next_detection = models.OneToOneField(
+        "self",
+        on_delete=models.SET_NULL,
+        null=True,
+        blank=True,
+        related_name="previous_detection",
+        help_text="The detection that follows this one in the tracking sequence.",
+    )
 
     # def bbox(self):
     #     return (
diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py
index 7b79c170d..15b87cbbb 100644
--- a/ami/ml/models/pipeline.py
+++ b/ami/ml/models/pipeline.py
@@ -47,7 +47,6 @@
     SourceImageResponse,
 )
 from ami.ml.tasks import celery_app, create_detection_images
-from ami.ml.tracking import assign_occurrences_by_tracking
 from ami.utils.requests import create_session
 
 logger = logging.getLogger(__name__)
@@ -882,14 +881,14 @@ def save_results(
 
     # Create a new occurrence for each detection (no tracking yet)
     # @TODO remove when we implement tracking!
-    # create_and_update_occurrences_for_detections(
-    #     detections=detections,
-    #     logger=job_logger,
-    # )
-    job_logger.info(f"Creating occurrences for {len(detections)} detections ")
-    job_logger.info("type logger: " + str(type(job_logger)))
-
-    assign_occurrences_by_tracking(detections=detections, logger=job_logger)
+    create_and_update_occurrences_for_detections(
+        detections=detections,
+        logger=job_logger,
+    )
+    # job_logger.info(f"Creating occurrences for {len(detections)} detections ")
+    # check if every image in the sessions in processed
+
+    # assign_occurrences_by_tracking(detections=detections, logger=job_logger)
 
     # Update precalculated counts on source images and events
     source_images = list(source_images)
diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index c9f85df36..658df3d2f 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -1,10 +1,11 @@
 import math
-from collections import defaultdict
 from collections.abc import Iterable
 
 import numpy as np
+from django.db.models import Count
 
-from ami.main.models import Detection, Occurrence
+from ami.main.models import Classification, Detection, Event, Occurrence
+from ami.ml.models import Algorithm
 
 TRACKING_COST_THRESHOLD = 2
 
@@ -57,75 +58,258 @@ def total_cost(f1, f2, bb1, bb2, diag):
     )
 
 
-def get_latest_feature_vector(detection: Detection):
+def get_most_common_algorithm_for_event(event):
+    """
+    Returns the most common Algorithm object (used in classifications with features_2048) for the given event.
+    """
+    most_common = (
+        Classification.objects.filter(
+            detection__source_image__event=event,
+            features_2048__isnull=False,
+        )
+        .values("algorithm_id")
+        .annotate(count=Count("id"))
+        .order_by("-count")
+        .first()
+    )
+
+    if most_common:
+        return Algorithm.objects.get(id=most_common["algorithm_id"])
+
+    return None
+
+
+def event_fully_processed(event, pipeline_algorithms, logger) -> bool:
+    """
+    with a classification from any of the pipeline algorithms and a non-null features_2048 vector.
+    """
+    total_captures = event.captures.count()
+    logger.info(f"Checking if event {event.pk} is fully processed... Total captures: {total_captures}")
+
+    processed_captures = (
+        event.captures.filter(
+            detections__classifications__algorithm__in=pipeline_algorithms,
+            detections__classifications__features_2048__isnull=False,
+        )
+        .distinct()
+        .count()
+    )
+
+    if processed_captures < total_captures:
+        logger.info(
+            f"Event {event.pk} is not fully processed. "
+            f"Only {processed_captures}/{total_captures} captures have processed detections."
+        )
+        return False
+
+    logger.info(f"Event {event.pk} is fully processed.")
+    return True
+
+
+def get_feature_vector(detection: Detection, algorithm: Algorithm):
+    """
+    Returns the latest non-null features_2048 vector from the given detection,
+    extracted by a specific algorithm.
+    """
     return (
-        detection.classifications.filter(features_2048__isnull=False)
+        detection.classifications.filter(features_2048__isnull=False, algorithm=algorithm)
         .order_by("-timestamp")
         .values_list("features_2048", flat=True)
         .first()
     )
 
 
-def assign_occurrences_by_tracking(
-    detections: list[Detection],
+def assign_occurrences_from_detection_chains(source_images, logger):
+    """
+    Walk detection chains across source images and assign a new occurrence to each chain.
+    """
+    visited = set()
+
+    for image in source_images:
+        for det in image.detections.all():
+            if det.id in visited or getattr(det, "previous_detection", None) is not None:
+                continue  # Already processed or this is not a chain start
+
+            chain = []
+            current = det
+            while current and current.id not in visited:
+                chain.append(current)
+                visited.add(current.id)
+                current = current.next_detection
+
+            if chain:
+                old_occurrences = {d.occurrence_id for d in chain if d.occurrence_id}
+
+                # Delete old occurrences (if any)
+                for occ_id in old_occurrences:
+                    try:
+                        Occurrence.objects.filter(id=occ_id).delete()
+                        logger.info(f"Deleted old occurrence {occ_id} before reassignment.")
+                    except Exception as e:
+                        logger.warning(f"Failed to delete occurrence {occ_id}: {e}")
+
+                occurrence = Occurrence.objects.create(event=chain[0].source_image.event)
+                for d in chain:
+                    d.occurrence = occurrence
+                    d.save()
+                logger.info(f"Assigned occurrence {occurrence.pk} to chain of {len(chain)} detections")
+
+
+def assign_occurrences_by_tracking_images(
+    source_images,
     logger,
+    cost_threshold: float = TRACKING_COST_THRESHOLD,
 ) -> None:
     """
-    Perform object tracking by assigning detections across multiple source images
-    to the same Occurrence if they are similar enough, based on the latest classification feature vectors.
+    Track detections across ordered source images and assign them to occurrences.
     """
-    logger.info(f"Starting to assign occurrences by tracking. {len(detections)} detections found.")
+    logger.info(f"Starting occurrence tracking over {len(source_images)} images")
+
+    for i in range(len(source_images) - 1):
+        current_image = source_images[i]
+        next_image = source_images[i + 1]
+
+        current_detections = list(current_image.detections.all())
+        next_detections = list(next_image.detections.all())
+
+        logger.info(
+            f"""Tracking: Processing image {i + 1}/{len(source_images)}:
+             {len(current_detections)} -> {len(next_detections)} detections"""
+        )
+        # Get the most common algorithm for the current event
+        most_common_algorithm = get_most_common_algorithm_for_event(current_image.event)
+        logger.info(
+            f"""Using most common algorithm for event {current_image.event.pk}:
+            {most_common_algorithm.name if most_common_algorithm else 'None'}"""
+        )
+
+        pair_detections(
+            current_detections,
+            next_detections,
+            image_width=current_image.width,
+            image_height=current_image.height,
+            cost_threshold=cost_threshold,
+            algorithm=most_common_algorithm,
+            logger=logger,
+        )
+
+    assign_occurrences_from_detection_chains(source_images, logger)
 
-    # Group detections by source image timestamp
-    image_to_dets = defaultdict(list)
-    for det in detections:
-        image_to_dets[det.source_image.timestamp].append(det)
-    sorted_timestamps = sorted(image_to_dets.keys())
-    logger.info(f"Found {len(sorted_timestamps)} source images with detections.")
 
-    last_detections = []
+def pair_detections(
+    current_detections: list,
+    next_detections: list,
+    image_width: int,
+    image_height: int,
+    cost_threshold: float,
+    algorithm,
+    logger,
+) -> None:
+    """
+    Assigns next_detection for each detection in current_detections based on lowest cost match
+    from next_detections, ensuring unique assignments and no duplicates.
+
+    Only pairs with cost < threshold are considered.
+    """
+    logger.info(f"Pairing {len(current_detections)} - >{len(next_detections)} detections")
+
+    potential_matches = []
 
-    for timestamp in sorted_timestamps:
-        current_detections = image_to_dets[timestamp]
-        logger.info(f"Processing {len(current_detections)} detections at {timestamp}")
+    for det in current_detections:
+        det_vec = get_feature_vector(det, algorithm)
+        if det_vec is None:
+            logger.debug(f"Skipping detection {det.id} (no features)")
+            continue
 
-        for det in current_detections:
-            det_vec = get_latest_feature_vector(det)
-            if det_vec is None:
-                logger.info(f"No features for detection {det.id}, skipping.")
+        for next_det in next_detections:
+            next_vec = get_feature_vector(next_det, algorithm)
+            if next_vec is None:
+                logger.debug(f"Skipping next detection {next_det.id} (no features)")
                 continue
 
-            best_match = None
-            best_cost = float("inf")
+            cost = total_cost(
+                det_vec,
+                next_vec,
+                det.bbox,
+                next_det.bbox,
+                image_diagonal(image_width, image_height),
+            )
 
-            for prev in last_detections:
-                prev_vec = get_latest_feature_vector(prev)
-                if prev_vec is None:
-                    continue
+            if cost < cost_threshold:
+                potential_matches.append((det, next_det, cost))
 
-                cost = total_cost(
-                    det_vec,
-                    prev_vec,
-                    det.bbox,
-                    prev.bbox,
-                    image_diagonal(det.source_image.width, det.source_image.height),
-                )
+    # Sort by cost: lower is better
+    potential_matches.sort(key=lambda x: x[2])
+
+    assigned_current_ids = set()
+    assigned_next_ids = set()
+
+    for det, next_det, cost in potential_matches:
+        if det.id in assigned_current_ids or next_det.id in assigned_next_ids:
+            continue
+        # check if next detection has a previous detection already assigned
+        if getattr(next_det, "previous_detection", None) is not None:
+            logger.info(f"{next_det.id} already has previous detection: {next_det.previous_detection.id}")
+            previous_detection = getattr(next_det, "previous_detection", None)
+            previous_detection.next_detection = None
+            previous_detection.save()
+            logger.info(f"Cleared previous detection {previous_detection.pk} -> {next_det.pk}  link")
+
+        logger.info(f"Trying to link {det.id} => {next_det.id}")
+        det.next_detection = next_det
+        det.save()
+        logger.info(f"Linked detection {det.id} => {next_det.id} with cost {cost:.4f}")
+
+        assigned_current_ids.add(det.id)
+        assigned_next_ids.add(next_det.id)
+
+
+def perform_tracking_for_job(job):
+    """
+    Perform detection tracking for all events in the job's source image collection.
+    Runs tracking only if all images in an event have processed detections with features.
+    """
+    from ami.jobs.models import JobState
+
+    pipeline_algorithms = job.pipeline.algorithms.all()
+    job.logger.info("Tracking started")
+    job.progress.add_stage(name="Tracking", key="tracking")
+    job.progress.update_stage("tracking", status=JobState.STARTED, progress=0)
+    job.save()
+
+    collection = job.source_image_collection
+    if not collection:
+        job.logger.warning("Tracking: No source image collection found. Skipping tracking.")
+        return
+
+    events = (
+        Event.objects.filter(captures__collections=collection)
+        .distinct()
+        .prefetch_related("captures__detections__classifications")
+    )
+    total_events = events.count()
+    processed_events = 0
 
-                logger.info(f"Comparing detection {det.id} with previous {prev.id}: cost = {cost:.4f}")
-                if cost < best_cost:
-                    best_cost = cost
-                    best_match = prev
+    for event in events:
+        source_images = event.captures.order_by("timestamp")
 
-            if best_match and best_cost < TRACKING_COST_THRESHOLD:
-                det.occurrence = best_match.occurrence
-                logger.info(f"Assigned detection {det.id} to existing occurrence {best_match.occurrence.pk}")
-            else:
-                occurrence = Occurrence.objects.create(event=det.source_image.event)
-                det.occurrence = occurrence
-                logger.info(f"Created new occurrence {occurrence.pk} for detection {det.id}")
+        if not event_fully_processed(event, pipeline_algorithms, logger=job.logger):
+            job.logger.info(
+                f"Tracking: Skipping tracking for event {event.pk}: not all detections are fully processed."
+            )
+            continue
 
-            det.save()
+        job.logger.info(f"Tracking: Running tracking for event {event.pk}")
+        assign_occurrences_by_tracking_images(source_images, job.logger)
+        processed_events += 1
 
-        last_detections = current_detections
+        job.progress.update_stage(
+            "tracking",
+            status=JobState.STARTED,
+            progress=processed_events / total_events if total_events else 1,
+        )
+        job.save()
 
-    logger.info("Finished assigning occurrences by tracking.")
+    job.logger.info("Tracking: Finished tracking.")
+    job.progress.update_stage("tracking", status=JobState.SUCCESS, progress=1)
+    job.save()

From 50ce8fc9ed5ed11e55374df33d88db20cf47321c Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Mon, 2 Jun 2025 13:44:50 -0400
Subject: [PATCH 04/24] test: added testing for tracking

---
 ami/main/tests/test_tracking.py | 98 +++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 ami/main/tests/test_tracking.py

diff --git a/ami/main/tests/test_tracking.py b/ami/main/tests/test_tracking.py
new file mode 100644
index 000000000..755e74009
--- /dev/null
+++ b/ami/main/tests/test_tracking.py
@@ -0,0 +1,98 @@
+import logging
+from collections import defaultdict
+
+import numpy as np
+from django.test import TestCase
+from django.utils import timezone
+
+from ami.main.models import Classification, Detection, Occurrence, Project
+from ami.ml.models import Algorithm
+from ami.ml.tracking import assign_occurrences_by_tracking_images
+
+logger = logging.getLogger(__name__)
+
+
+class TestTracking(TestCase):
+    def setUp(self) -> None:
+        self.project = Project.objects.first()
+        self.event = self.project.events.first()
+        self.source_images = list(self.event.captures.order_by("timestamp"))
+        self.assign_mock_features_to_occurrence_detections(self.event)
+        # Save ground truth occurrence groupings
+        self.ground_truth_groups = defaultdict(set)
+        for occ in Occurrence.objects.filter(event=self.event):
+            det_ids = Detection.objects.filter(occurrence=occ).values_list("id", flat=True)
+            for det_id in det_ids:
+                self.ground_truth_groups[occ.pk].add(det_id)
+
+        # Clear existing tracking data (next_detection + occurrence)
+        Detection.objects.filter(source_image__event=self.event).update(next_detection=None)
+
+    def assign_mock_features_to_occurrence_detections(self, event, algorithm_name="MockTrackingAlgorithm"):
+        algorithm, _ = Algorithm.objects.get_or_create(name=algorithm_name)
+
+        for occurrence in event.occurrences.all():
+            base_vector = np.random.rand(2048)  # Base feature for this occurrence group
+
+            for det in occurrence.detections.all():
+                feature_vector = base_vector + np.random.normal(0, 0.001, size=2048)  # Add slight variation
+                Classification.objects.update_or_create(
+                    detection=det,
+                    algorithm=algorithm,
+                    defaults={
+                        "timestamp": timezone.now(),
+                        "features_2048": feature_vector.tolist(),
+                        "terminal": True,
+                        "score": 1.0,
+                    },
+                )
+
+    def test_tracking_exactly_reproduces_occurrences(self):
+        # Clear previous detection chains and occurrences
+        for det in Detection.objects.filter(source_image__event=self.event):
+            det.occurrence = None
+            det.next_detection = None
+            det.save()
+
+        Occurrence.objects.filter(event=self.event).delete()
+
+        # Run the tracking algorithm to regenerate occurrences
+        assign_occurrences_by_tracking_images(self.source_images, logger)
+
+        # Capture new tracking-generated occurrence groups
+        new_groups = {
+            occ.pk: set(Detection.objects.filter(occurrence=occ).values_list("id", flat=True))
+            for occ in Occurrence.objects.filter(event=self.event)
+        }
+
+        # Assert that the number of new groups equals the number of ground truth groups
+        self.assertEqual(
+            len(new_groups),
+            len(self.ground_truth_groups),
+            f"Expected {len(self.ground_truth_groups)} groups, but got {len(new_groups)}",
+        )
+
+        # Assert each new group exactly matches one of the original ground truth groups
+        unmatched_groups = [
+            new_set for new_set in new_groups.values() if new_set not in self.ground_truth_groups.values()
+        ]
+
+        self.assertEqual(
+            len(unmatched_groups),
+            0,
+            f"{len(unmatched_groups)} of the new groups do not exactly match any ground truth group",
+        )
+        logger.info(
+            f"All {len(new_groups)} new groups match the ground truth groups exactly.",
+        )
+        logger.info(f"new groups: {new_groups}")
+        # Assert that each ground truth group is present in the new tracking results
+        for gt_set in self.ground_truth_groups.values():
+            logger.info(
+                f"Checking ground truth group: {gt_set}",
+            )
+            self.assertIn(
+                gt_set,
+                new_groups.values(),
+                f"Ground truth group {gt_set} not found in new tracking results",
+            )

From 0ebbea5379718dc6e465ca677365bc48dee34034 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Tue, 3 Jun 2025 12:00:11 -0400
Subject: [PATCH 05/24] skip sessions with human identifications

---
 ami/ml/tracking.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 658df3d2f..9831f09df 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -298,6 +298,10 @@ def perform_tracking_for_job(job):
                 f"Tracking: Skipping tracking for event {event.pk}: not all detections are fully processed."
             )
             continue
+        # Check if there are human identifications in the event
+        if Occurrence.objects.filter(event=event, identifications__isnull=False).exists():
+            job.logger.info(f"Tracking: Skipping tracking for event {event.pk}: human identifications present.")
+            continue
 
         job.logger.info(f"Tracking: Running tracking for event {event.pk}")
         assign_occurrences_by_tracking_images(source_images, job.logger)

From 3f4acaccb29640817e26d375c166391e958a6fe0 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Mon, 9 Jun 2025 10:12:47 -0400
Subject: [PATCH 06/24] added missing migration

---
 ...jects_alter_taxon_projects_tag_and_more.py | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py

diff --git a/ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py b/ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py
new file mode 100644
index 000000000..031fd3fb0
--- /dev/null
+++ b/ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py
@@ -0,0 +1,50 @@
+# Generated by Django 4.2.10 on 2025-05-27 15:22
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("main", "0065_detection_favorite_occurrence_best_detection_and_more"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="taxalist",
+            name="projects",
+            field=models.ManyToManyField(blank=True, related_name="taxa_lists", to="main.project"),
+        ),
+        migrations.AlterField(
+            model_name="taxon",
+            name="projects",
+            field=models.ManyToManyField(blank=True, related_name="taxa", to="main.project"),
+        ),
+        migrations.CreateModel(
+            name="Tag",
+            fields=[
+                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
+                ("created_at", models.DateTimeField(auto_now_add=True)),
+                ("updated_at", models.DateTimeField(auto_now=True)),
+                ("name", models.CharField(max_length=255)),
+                (
+                    "project",
+                    models.ForeignKey(
+                        blank=True,
+                        null=True,
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="tags",
+                        to="main.project",
+                    ),
+                ),
+            ],
+            options={
+                "unique_together": {("name", "project")},
+            },
+        ),
+        migrations.AddField(
+            model_name="taxon",
+            name="tags",
+            field=models.ManyToManyField(blank=True, related_name="taxa", to="main.tag"),
+        ),
+    ]

From a4eeede07b37f5aba0bcb121d2d1f90cf5cd8573 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Mon, 9 Jun 2025 11:14:53 -0400
Subject: [PATCH 07/24] restored migrations

---
 ...e_credit_taxon_cover_image_url_and_more.py | 27 +++++++++++++++++++
 .../migrations/0069_merge_20250609_1108.py    | 14 ++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py
 create mode 100644 ami/main/migrations/0069_merge_20250609_1108.py

diff --git a/ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py b/ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py
new file mode 100644
index 000000000..4c74608f2
--- /dev/null
+++ b/ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py
@@ -0,0 +1,27 @@
+# Generated by Django 4.2.10 on 2025-04-30 02:46
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("main", "0059_alter_project_options"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="taxon",
+            name="cover_image_credit",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="taxon",
+            name="cover_image_url",
+            field=models.URLField(blank=True, max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name="taxon",
+            name="fieldguide_id",
+            field=models.CharField(blank=True, max_length=255, null=True),
+        ),
+    ]
diff --git a/ami/main/migrations/0069_merge_20250609_1108.py b/ami/main/migrations/0069_merge_20250609_1108.py
new file mode 100644
index 000000000..e952d5e43
--- /dev/null
+++ b/ami/main/migrations/0069_merge_20250609_1108.py
@@ -0,0 +1,14 @@
+# Generated by Django 4.2.10 on 2025-06-09 11:08
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("main", "0060_taxon_cover_image_credit_taxon_cover_image_url_and_more"),
+        ("main", "0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more"),
+        ("main", "0066_populate_cached_occurence_fields"),
+        ("main", "0068_allow_taxa_without_project"),
+    ]
+
+    operations = []

From 8801f89b04ccbe721a5965d59b93a73f89a38fb9 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Tue, 10 Jun 2025 10:23:04 -0400
Subject: [PATCH 08/24] fixed migration issues

---
 ...e_credit_taxon_cover_image_url_and_more.py | 27 ----------
 ...jects_alter_taxon_projects_tag_and_more.py | 50 -------------------
 ...on.py => 0069_detection_next_detection.py} |  4 +-
 .../migrations/0069_merge_20250609_1108.py    | 14 ------
 4 files changed, 2 insertions(+), 93 deletions(-)
 delete mode 100644 ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py
 delete mode 100644 ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py
 rename ami/main/migrations/{0067_detection_next_detection.py => 0069_detection_next_detection.py} (82%)
 delete mode 100644 ami/main/migrations/0069_merge_20250609_1108.py

diff --git a/ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py b/ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py
deleted file mode 100644
index 4c74608f2..000000000
--- a/ami/main/migrations/0060_taxon_cover_image_credit_taxon_cover_image_url_and_more.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Generated by Django 4.2.10 on 2025-04-30 02:46
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("main", "0059_alter_project_options"),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name="taxon",
-            name="cover_image_credit",
-            field=models.CharField(blank=True, max_length=255, null=True),
-        ),
-        migrations.AddField(
-            model_name="taxon",
-            name="cover_image_url",
-            field=models.URLField(blank=True, max_length=255, null=True),
-        ),
-        migrations.AddField(
-            model_name="taxon",
-            name="fieldguide_id",
-            field=models.CharField(blank=True, max_length=255, null=True),
-        ),
-    ]
diff --git a/ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py b/ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py
deleted file mode 100644
index 031fd3fb0..000000000
--- a/ami/main/migrations/0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Generated by Django 4.2.10 on 2025-05-27 15:22
-
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("main", "0065_detection_favorite_occurrence_best_detection_and_more"),
-    ]
-
-    operations = [
-        migrations.AlterField(
-            model_name="taxalist",
-            name="projects",
-            field=models.ManyToManyField(blank=True, related_name="taxa_lists", to="main.project"),
-        ),
-        migrations.AlterField(
-            model_name="taxon",
-            name="projects",
-            field=models.ManyToManyField(blank=True, related_name="taxa", to="main.project"),
-        ),
-        migrations.CreateModel(
-            name="Tag",
-            fields=[
-                ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
-                ("created_at", models.DateTimeField(auto_now_add=True)),
-                ("updated_at", models.DateTimeField(auto_now=True)),
-                ("name", models.CharField(max_length=255)),
-                (
-                    "project",
-                    models.ForeignKey(
-                        blank=True,
-                        null=True,
-                        on_delete=django.db.models.deletion.CASCADE,
-                        related_name="tags",
-                        to="main.project",
-                    ),
-                ),
-            ],
-            options={
-                "unique_together": {("name", "project")},
-            },
-        ),
-        migrations.AddField(
-            model_name="taxon",
-            name="tags",
-            field=models.ManyToManyField(blank=True, related_name="taxa", to="main.tag"),
-        ),
-    ]
diff --git a/ami/main/migrations/0067_detection_next_detection.py b/ami/main/migrations/0069_detection_next_detection.py
similarity index 82%
rename from ami/main/migrations/0067_detection_next_detection.py
rename to ami/main/migrations/0069_detection_next_detection.py
index d91ee4a0d..949b3e539 100644
--- a/ami/main/migrations/0067_detection_next_detection.py
+++ b/ami/main/migrations/0069_detection_next_detection.py
@@ -1,4 +1,4 @@
-# Generated by Django 4.2.10 on 2025-05-29 16:21
+# Generated by Django 4.2.10 on 2025-06-10 10:19
 
 from django.db import migrations, models
 import django.db.models.deletion
@@ -6,7 +6,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("main", "0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more"),
+        ("main", "0068_allow_taxa_without_project"),
     ]
 
     operations = [
diff --git a/ami/main/migrations/0069_merge_20250609_1108.py b/ami/main/migrations/0069_merge_20250609_1108.py
deleted file mode 100644
index e952d5e43..000000000
--- a/ami/main/migrations/0069_merge_20250609_1108.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Generated by Django 4.2.10 on 2025-06-09 11:08
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-    dependencies = [
-        ("main", "0060_taxon_cover_image_credit_taxon_cover_image_url_and_more"),
-        ("main", "0066_alter_taxalist_projects_alter_taxon_projects_tag_and_more"),
-        ("main", "0066_populate_cached_occurence_fields"),
-        ("main", "0068_allow_taxa_without_project"),
-    ]
-
-    operations = []

From d6d481a1ec5d5c3d71485126d8b97b2f5a2c07dc Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Tue, 17 Jun 2025 17:06:25 -0700
Subject: [PATCH 09/24] fix: pin minio containers to known working versions

---
 docker-compose.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 09ec5eba2..006da6746 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -108,7 +108,7 @@ services:
       - ./data/flower/:/data/
 
   minio:
-    image: minio/minio:latest
+    image: minio/minio:RELEASE.2024-11-07T00-52-20Z
     command: minio server --console-address ":9001" /data
     volumes:
       - "minio_data:/data"
@@ -134,7 +134,7 @@ services:
       - minio
 
   minio-init:
-    image: minio/mc
+    image: minio/mc:RELEASE.2025-03-12T17-29-24Z
     env_file:
       - ./.envs/.local/.django
     depends_on:

From 8991b44a0e9c8eade4455021e514e1cfec548a52 Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Tue, 17 Jun 2025 17:14:42 -0700
Subject: [PATCH 10/24] fix: pin minio container versions in CI stack

---
 docker-compose.ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml
index e842c3656..8dda0e846 100644
--- a/docker-compose.ci.yml
+++ b/docker-compose.ci.yml
@@ -31,13 +31,13 @@ services:
     command: /start-celeryworker
 
   minio:
-    image: minio/minio:latest
+    image: minio/minio:RELEASE.2024-11-07T00-52-20Z
     command: minio server --console-address ":9001" /data
     env_file:
       - ./.envs/.ci/.django
 
   minio-init:
-    image: minio/mc
+    image: minio/mc:RELEASE.2025-03-12T17-29-24Z
     env_file:
       - ./.envs/.ci/.django
     depends_on:

From ff3d1bb248d19390be0a29b554e292e9d26958ce Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Thu, 19 Jun 2025 08:40:32 -0400
Subject: [PATCH 11/24] moved tracking to a separate job

---
 .../migrations/0019_alter_job_job_type_key.py | 30 ++++++++++++++++
 ami/jobs/models.py                            | 34 +++++++++++++++++--
 ami/main/admin.py                             | 22 +++++++++++-
 3 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 ami/jobs/migrations/0019_alter_job_job_type_key.py

diff --git a/ami/jobs/migrations/0019_alter_job_job_type_key.py b/ami/jobs/migrations/0019_alter_job_job_type_key.py
new file mode 100644
index 000000000..e776d9cf6
--- /dev/null
+++ b/ami/jobs/migrations/0019_alter_job_job_type_key.py
@@ -0,0 +1,30 @@
+# Generated by Django 4.2.10 on 2025-06-19 08:33
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("jobs", "0018_alter_job_job_type_key"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="job",
+            name="job_type_key",
+            field=models.CharField(
+                choices=[
+                    ("ml", "ML pipeline"),
+                    ("populate_captures_collection", "Populate captures collection"),
+                    ("data_storage_sync", "Data storage sync"),
+                    ("unknown", "Unknown"),
+                    ("data_export", "Data Export"),
+                    ("detection_clustering", "Detection Feature Clustering"),
+                    ("tracking", "Occurrence Tracking"),
+                ],
+                default="unknown",
+                max_length=255,
+                verbose_name="Job Type",
+            ),
+        ),
+    ]
diff --git a/ami/jobs/models.py b/ami/jobs/models.py
index b1f728c97..89386b7ff 100644
--- a/ami/jobs/models.py
+++ b/ami/jobs/models.py
@@ -17,7 +17,7 @@
 from ami.jobs.tasks import run_job
 from ami.main.models import Deployment, Project, SourceImage, SourceImageCollection
 from ami.ml.models import Pipeline
-from ami.ml.tracking import perform_tracking_for_job
+from ami.ml.tracking import perform_tracking
 from ami.utils.schemas import OrderedEnum
 
 logger = logging.getLogger(__name__)
@@ -317,6 +317,7 @@ def run(cls, job: "Job"):
         """
         Procedure for an ML pipeline as a job.
         """
+        job.progress.add_stage(name="Tracking", key="tracking")
         job.update_status(JobState.STARTED)
         job.started_at = datetime.datetime.now()
         job.finished_at = None
@@ -503,7 +504,7 @@ def run(cls, job: "Job"):
             status=JobState.SUCCESS,
             progress=1,
         )
-        perform_tracking_for_job(job)
+        perform_tracking(job)
         job.update_status(JobState.SUCCESS, save=False)
         job.finished_at = datetime.datetime.now()
         job.save()
@@ -674,6 +675,34 @@ def run(cls, job: "Job"):
         job.save()
 
 
+class TrackingJob(JobType):
+    name = "Occurrence Tracking"
+    key = "tracking"
+
+    @classmethod
+    def run(cls, job: "Job"):
+        job.logger.info("Starting tracking job")
+        job.update_status(JobState.STARTED)
+        job.started_at = datetime.datetime.now()
+        job.finished_at = None
+
+        # Add tracking stage and save job
+        job.progress.add_stage(name="Tracking", key="tracking")
+        job.save()
+
+        perform_tracking(job)
+
+        job.progress.update_stage(
+            "tracking",
+            status=JobState.SUCCESS,
+            progress=1,
+        )
+        job.update_status(JobState.SUCCESS)
+        job.logger.info("Tracking job finished successfully.")
+        job.finished_at = datetime.datetime.now()
+        job.save()
+
+
 class UnknownJobType(JobType):
     name = "Unknown"
     key = "unknown"
@@ -690,6 +719,7 @@ def run(cls, job: "Job"):
     UnknownJobType,
     DataExportJob,
     DetectionClusteringJob,
+    TrackingJob,
 ]
 
 
diff --git a/ami/main/admin.py b/ami/main/admin.py
index 94884e081..0decbee75 100644
--- a/ami/main/admin.py
+++ b/ami/main/admin.py
@@ -651,7 +651,27 @@ def cluster_detections(self, request: HttpRequest, queryset: QuerySet[SourceImag
 
         self.message_user(request, f"Clustered {queryset.count()} collection(s).")
 
-    actions = [populate_collection, populate_collection_async, cluster_detections, create_clustering_job]
+    @admin.action(description="Run tracking job")
+    def run_tracking_job(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
+        from ami.jobs.models import Job, TrackingJob
+
+        for collection in queryset:
+            job = Job.objects.create(
+                name=f"Tracking for collection {collection.pk}",
+                project=collection.project,
+                source_image_collection=collection,
+                job_type_key=TrackingJob.key,
+            )
+            job.enqueue()
+            self.message_user(request, f"Tracking job #{job.pk} started for collection #{collection.pk}")
+
+    actions = [
+        populate_collection,
+        populate_collection_async,
+        cluster_detections,
+        create_clustering_job,
+        run_tracking_job,
+    ]
 
     # Hide images many-to-many field from form. This would list all source images in the database.
     exclude = ("images",)

From c688e682eda18dabe481f116f2a712b1082e35e5 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Fri, 20 Jun 2025 09:54:21 -0400
Subject: [PATCH 12/24] removed call to tracking from ml job

---
 ami/jobs/models.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ami/jobs/models.py b/ami/jobs/models.py
index 89386b7ff..4b61ee605 100644
--- a/ami/jobs/models.py
+++ b/ami/jobs/models.py
@@ -317,7 +317,6 @@ def run(cls, job: "Job"):
         """
         Procedure for an ML pipeline as a job.
         """
-        job.progress.add_stage(name="Tracking", key="tracking")
         job.update_status(JobState.STARTED)
         job.started_at = datetime.datetime.now()
         job.finished_at = None
@@ -504,7 +503,6 @@ def run(cls, job: "Job"):
             status=JobState.SUCCESS,
             progress=1,
         )
-        perform_tracking(job)
         job.update_status(JobState.SUCCESS, save=False)
         job.finished_at = datetime.datetime.now()
         job.save()

From db5f1040b9a68b79c97170886196ecf5c10293f8 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Fri, 20 Jun 2025 09:54:54 -0400
Subject: [PATCH 13/24] passed tracking cost threshold as a job param

---
 ami/main/admin.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ami/main/admin.py b/ami/main/admin.py
index 0decbee75..46febb45a 100644
--- a/ami/main/admin.py
+++ b/ami/main/admin.py
@@ -661,6 +661,9 @@ def run_tracking_job(self, request: HttpRequest, queryset: QuerySet[SourceImageC
                 project=collection.project,
                 source_image_collection=collection,
                 job_type_key=TrackingJob.key,
+                params={
+                    "cost_threshold": 0.4,
+                },
             )
             job.enqueue()
             self.message_user(request, f"Tracking job #{job.pk} started for collection #{collection.pk}")

From d3a7b8c0acb6f3329c6fdfe5f76cb0fad87f4c9e Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Fri, 20 Jun 2025 09:56:15 -0400
Subject: [PATCH 14/24] fix: assigned occurrence project and deployment

---
 ami/ml/tracking.py | 55 ++++++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 9831f09df..342823fd0 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -79,16 +79,15 @@ def get_most_common_algorithm_for_event(event):
     return None
 
 
-def event_fully_processed(event, pipeline_algorithms, logger) -> bool:
+def event_fully_processed(event, logger) -> bool:
     """
-    with a classification from any of the pipeline algorithms and a non-null features_2048 vector.
+    Checks if all captures in the event have processed detections with features_2048
     """
     total_captures = event.captures.count()
     logger.info(f"Checking if event {event.pk} is fully processed... Total captures: {total_captures}")
 
     processed_captures = (
         event.captures.filter(
-            detections__classifications__algorithm__in=pipeline_algorithms,
             detections__classifications__features_2048__isnull=False,
         )
         .distinct()
@@ -148,10 +147,18 @@ def assign_occurrences_from_detection_chains(source_images, logger):
                     except Exception as e:
                         logger.warning(f"Failed to delete occurrence {occ_id}: {e}")
 
-                occurrence = Occurrence.objects.create(event=chain[0].source_image.event)
+                occurrence = Occurrence.objects.create(
+                    event=chain[0].source_image.event,
+                    deployment=chain[0].source_image.deployment,
+                    project=chain[0].source_image.project,
+                )
+
                 for d in chain:
                     d.occurrence = occurrence
                     d.save()
+
+                occurrence.save()
+
                 logger.info(f"Assigned occurrence {occurrence.pk} to chain of {len(chain)} detections")
 
 
@@ -164,7 +171,9 @@ def assign_occurrences_by_tracking_images(
     Track detections across ordered source images and assign them to occurrences.
     """
     logger.info(f"Starting occurrence tracking over {len(source_images)} images")
-
+    if len(source_images) < 2:
+        logger.info("Not enough images to perform tracking. At least 2 images are required.")
+        return
     for i in range(len(source_images) - 1):
         current_image = source_images[i]
         next_image = source_images[i + 1]
@@ -264,47 +273,45 @@ def pair_detections(
         assigned_next_ids.add(next_det.id)
 
 
-def perform_tracking_for_job(job):
+def perform_tracking(job):
     """
     Perform detection tracking for all events in the job's source image collection.
     Runs tracking only if all images in an event have processed detections with features.
     """
     from ami.jobs.models import JobState
 
-    pipeline_algorithms = job.pipeline.algorithms.all()
+    cost_threshold = job.params.get("cost_threshold", TRACKING_COST_THRESHOLD)
     job.logger.info("Tracking started")
-    job.progress.add_stage(name="Tracking", key="tracking")
+    job.logger.info(f"Using cost threshold: {cost_threshold}")
     job.progress.update_stage("tracking", status=JobState.STARTED, progress=0)
     job.save()
-
+    job.logger.info("Progresss updated and job saved")
     collection = job.source_image_collection
     if not collection:
         job.logger.warning("Tracking: No source image collection found. Skipping tracking.")
         return
-
-    events = (
-        Event.objects.filter(captures__collections=collection)
-        .distinct()
-        .prefetch_related("captures__detections__classifications")
-    )
-    total_events = events.count()
+    job.logger.info("Tracking: Fetching events for collection %s", collection.pk)
+    events_qs = Event.objects.filter(captures__collections=collection).distinct()
+    total_events = events_qs.count()
+    events = events_qs.iterator()
     processed_events = 0
-
+    job.logger.info("Tracking: Found %d events in collection %s", total_events, collection.pk)
     for event in events:
+        job.logger.info("Tracking: Processing event %s", event.pk)
         source_images = event.captures.order_by("timestamp")
-
-        if not event_fully_processed(event, pipeline_algorithms, logger=job.logger):
-            job.logger.info(
-                f"Tracking: Skipping tracking for event {event.pk}: not all detections are fully processed."
-            )
-            continue
         # Check if there are human identifications in the event
         if Occurrence.objects.filter(event=event, identifications__isnull=False).exists():
             job.logger.info(f"Tracking: Skipping tracking for event {event.pk}: human identifications present.")
             continue
+        # Check if the all captures in the event have processed detections with features
+        if not event_fully_processed(event, logger=job.logger):
+            job.logger.info(
+                f"Tracking: Skipping tracking for event {event.pk}: not all detections are fully processed."
+            )
+            continue
 
         job.logger.info(f"Tracking: Running tracking for event {event.pk}")
-        assign_occurrences_by_tracking_images(source_images, job.logger)
+        assign_occurrences_by_tracking_images(source_images, job.logger, cost_threshold=cost_threshold)
         processed_events += 1
 
         job.progress.update_stage(

From 000c24764e84c2f3d5175873923410b23653a14e Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Fri, 20 Jun 2025 10:49:33 -0400
Subject: [PATCH 15/24] changed the observed date field in the occurrence list
 view to show the # detections

---
 ui/src/pages/occurrences/occurrence-columns.tsx | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ui/src/pages/occurrences/occurrence-columns.tsx b/ui/src/pages/occurrences/occurrence-columns.tsx
index 8d6160a47..d3180e546 100644
--- a/ui/src/pages/occurrences/occurrence-columns.tsx
+++ b/ui/src/pages/occurrences/occurrence-columns.tsx
@@ -139,10 +139,11 @@ export const columns: (
   },
   {
     id: 'date',
-    name: translate(STRING.FIELD_LABEL_DATE_OBSERVED),
-    sortField: 'first_appearance_timestamp',
-    renderCell: (item: Occurrence) => <BasicTableCell value={item.dateLabel} />,
-  },
+    name: '# Detections',
+    sortField: 'detections_count',
+    renderCell: (item: Occurrence) => <BasicTableCell value={item.numDetections} />,
+    },
+
   {
     id: 'time',
     sortField: 'first_appearance_time',

From be698070c4209695b0165e7163b529bb1e5871bc Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Fri, 20 Jun 2025 14:43:52 -0400
Subject: [PATCH 16/24] fixed frontend code formatting

---
 ui/src/pages/occurrences/occurrence-columns.tsx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ui/src/pages/occurrences/occurrence-columns.tsx b/ui/src/pages/occurrences/occurrence-columns.tsx
index d3180e546..388786f2a 100644
--- a/ui/src/pages/occurrences/occurrence-columns.tsx
+++ b/ui/src/pages/occurrences/occurrence-columns.tsx
@@ -141,8 +141,10 @@ export const columns: (
     id: 'date',
     name: '# Detections',
     sortField: 'detections_count',
-    renderCell: (item: Occurrence) => <BasicTableCell value={item.numDetections} />,
-    },
+    renderCell: (item: Occurrence) => (
+      <BasicTableCell value={item.numDetections} />
+    ),
+  },
 
   {
     id: 'time',

From 1526cfbc6e9adabcb69386128016662a5d3de369 Mon Sep 17 00:00:00 2001
From: mohamedelabbas1996 <hack1996man@gmail.com>
Date: Fri, 20 Jun 2025 18:02:06 -0400
Subject: [PATCH 17/24] improved logging and job progress tracking

---
 ami/jobs/models.py              |  9 ----
 ami/main/tests/test_tracking.py |  2 +-
 ami/ml/models/pipeline.py       |  4 --
 ami/ml/tracking.py              | 80 +++++++++++++++++----------------
 4 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/ami/jobs/models.py b/ami/jobs/models.py
index 4b61ee605..e6b3e85dc 100644
--- a/ami/jobs/models.py
+++ b/ami/jobs/models.py
@@ -684,17 +684,8 @@ def run(cls, job: "Job"):
         job.started_at = datetime.datetime.now()
         job.finished_at = None
 
-        # Add tracking stage and save job
-        job.progress.add_stage(name="Tracking", key="tracking")
-        job.save()
-
         perform_tracking(job)
 
-        job.progress.update_stage(
-            "tracking",
-            status=JobState.SUCCESS,
-            progress=1,
-        )
         job.update_status(JobState.SUCCESS)
         job.logger.info("Tracking job finished successfully.")
         job.finished_at = datetime.datetime.now()
diff --git a/ami/main/tests/test_tracking.py b/ami/main/tests/test_tracking.py
index 755e74009..483d571ee 100644
--- a/ami/main/tests/test_tracking.py
+++ b/ami/main/tests/test_tracking.py
@@ -57,7 +57,7 @@ def test_tracking_exactly_reproduces_occurrences(self):
         Occurrence.objects.filter(event=self.event).delete()
 
         # Run the tracking algorithm to regenerate occurrences
-        assign_occurrences_by_tracking_images(self.source_images, logger)
+        assign_occurrences_by_tracking_images(self.event, logger)
 
         # Capture new tracking-generated occurrence groups
         new_groups = {
diff --git a/ami/ml/models/pipeline.py b/ami/ml/models/pipeline.py
index 15b87cbbb..13bbb5350 100644
--- a/ami/ml/models/pipeline.py
+++ b/ami/ml/models/pipeline.py
@@ -885,10 +885,6 @@ def save_results(
         detections=detections,
         logger=job_logger,
     )
-    # job_logger.info(f"Creating occurrences for {len(detections)} detections ")
-    # check if every image in the sessions in processed
-
-    # assign_occurrences_by_tracking(detections=detections, logger=job_logger)
 
     # Update precalculated counts on source images and events
     source_images = list(source_images)
diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 342823fd0..4085efda6 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -123,7 +123,7 @@ def assign_occurrences_from_detection_chains(source_images, logger):
     Walk detection chains across source images and assign a new occurrence to each chain.
     """
     visited = set()
-
+    created_occurrences_count = 0
     for image in source_images:
         for det in image.detections.all():
             if det.id in visited or getattr(det, "previous_detection", None) is not None:
@@ -143,15 +143,16 @@ def assign_occurrences_from_detection_chains(source_images, logger):
                 for occ_id in old_occurrences:
                     try:
                         Occurrence.objects.filter(id=occ_id).delete()
-                        logger.info(f"Deleted old occurrence {occ_id} before reassignment.")
+                        logger.debug(f"Deleted old occurrence {occ_id} before reassignment.")
                     except Exception as e:
-                        logger.warning(f"Failed to delete occurrence {occ_id}: {e}")
+                        logger.info(f"Failed to delete occurrence {occ_id}: {e}")
 
                 occurrence = Occurrence.objects.create(
                     event=chain[0].source_image.event,
                     deployment=chain[0].source_image.deployment,
                     project=chain[0].source_image.project,
                 )
+                created_occurrences_count += 1
 
                 for d in chain:
                     d.occurrence = occurrence
@@ -159,18 +160,22 @@ def assign_occurrences_from_detection_chains(source_images, logger):
 
                 occurrence.save()
 
-                logger.info(f"Assigned occurrence {occurrence.pk} to chain of {len(chain)} detections")
+                logger.debug(f"Assigned occurrence {occurrence.pk} to chain of {len(chain)} detections")
+    logger.info(
+        f"Assigned {created_occurrences_count} occurrences from detection chains across {len(source_images)} images."
+    )
 
 
 def assign_occurrences_by_tracking_images(
-    source_images,
-    logger,
-    cost_threshold: float = TRACKING_COST_THRESHOLD,
+    event, logger, cost_threshold: float = TRACKING_COST_THRESHOLD, job=None
 ) -> None:
     """
     Track detections across ordered source images and assign them to occurrences.
     """
-    logger.info(f"Starting occurrence tracking over {len(source_images)} images")
+    from ami.jobs.models import JobState
+
+    source_images = event.captures.order_by("timestamp")
+    logger.info(f"Found {len(source_images)} source images for event {event.pk}")
     if len(source_images) < 2:
         logger.info("Not enough images to perform tracking. At least 2 images are required.")
         return
@@ -181,13 +186,10 @@ def assign_occurrences_by_tracking_images(
         current_detections = list(current_image.detections.all())
         next_detections = list(next_image.detections.all())
 
-        logger.info(
-            f"""Tracking: Processing image {i + 1}/{len(source_images)}:
-             {len(current_detections)} -> {len(next_detections)} detections"""
-        )
+        logger.debug(f"""Tracking: Processing image {i + 1}/{len(source_images)}""")
         # Get the most common algorithm for the current event
         most_common_algorithm = get_most_common_algorithm_for_event(current_image.event)
-        logger.info(
+        logger.debug(
             f"""Using most common algorithm for event {current_image.event.pk}:
             {most_common_algorithm.name if most_common_algorithm else 'None'}"""
         )
@@ -201,8 +203,21 @@ def assign_occurrences_by_tracking_images(
             algorithm=most_common_algorithm,
             logger=logger,
         )
+        if job:
+            job.progress.update_stage(
+                f"event_{event.pk}",
+                status=JobState.STARTED,
+                progress=(i + 1) / (len(source_images) - 1),
+            )
+            job.save()
 
     assign_occurrences_from_detection_chains(source_images, logger)
+    if job:
+        job.progress.update_stage(
+            f"event_{event.pk}",
+            progress=1.0,
+        )
+        job.save()
 
 
 def pair_detections(
@@ -220,7 +235,7 @@ def pair_detections(
 
     Only pairs with cost < threshold are considered.
     """
-    logger.info(f"Pairing {len(current_detections)} - >{len(next_detections)} detections")
+    logger.debug(f"Pairing {len(current_detections)} - >{len(next_detections)} detections")
 
     potential_matches = []
 
@@ -258,16 +273,16 @@ def pair_detections(
             continue
         # check if next detection has a previous detection already assigned
         if getattr(next_det, "previous_detection", None) is not None:
-            logger.info(f"{next_det.id} already has previous detection: {next_det.previous_detection.id}")
+            logger.debug(f"{next_det.id} already has previous detection: {next_det.previous_detection.id}")
             previous_detection = getattr(next_det, "previous_detection", None)
             previous_detection.next_detection = None
             previous_detection.save()
-            logger.info(f"Cleared previous detection {previous_detection.pk} -> {next_det.pk}  link")
+            logger.debug(f"Cleared previous detection {previous_detection.pk} -> {next_det.pk}  link")
 
-        logger.info(f"Trying to link {det.id} => {next_det.id}")
+        logger.debug(f"Trying to link {det.id} => {next_det.id}")
         det.next_detection = next_det
         det.save()
-        logger.info(f"Linked detection {det.id} => {next_det.id} with cost {cost:.4f}")
+        logger.debug(f"Linked detection {det.id} => {next_det.id} with cost {cost:.4f}")
 
         assigned_current_ids.add(det.id)
         assigned_next_ids.add(next_det.id)
@@ -278,27 +293,25 @@ def perform_tracking(job):
     Perform detection tracking for all events in the job's source image collection.
     Runs tracking only if all images in an event have processed detections with features.
     """
-    from ami.jobs.models import JobState
 
     cost_threshold = job.params.get("cost_threshold", TRACKING_COST_THRESHOLD)
     job.logger.info("Tracking started")
     job.logger.info(f"Using cost threshold: {cost_threshold}")
-    job.progress.update_stage("tracking", status=JobState.STARTED, progress=0)
-    job.save()
-    job.logger.info("Progresss updated and job saved")
     collection = job.source_image_collection
     if not collection:
-        job.logger.warning("Tracking: No source image collection found. Skipping tracking.")
+        job.logger.info("Tracking: No source image collection found. Skipping tracking.")
         return
     job.logger.info("Tracking: Fetching events for collection %s", collection.pk)
-    events_qs = Event.objects.filter(captures__collections=collection).distinct()
+    events_qs = Event.objects.filter(captures__collections=collection).order_by("created_at").distinct()
     total_events = events_qs.count()
     events = events_qs.iterator()
-    processed_events = 0
     job.logger.info("Tracking: Found %d events in collection %s", total_events, collection.pk)
-    for event in events:
-        job.logger.info("Tracking: Processing event %s", event.pk)
-        source_images = event.captures.order_by("timestamp")
+    for event in events_qs:
+        job.progress.add_stage(name=f"Event {event.pk}", key=f"event_{event.pk}")
+        job.save()
+    for idx, event in enumerate(events, start=1):
+        job.logger.info(f"Tracking: Processing event {idx}/{total_events} (Event ID: {event.pk})")
+
         # Check if there are human identifications in the event
         if Occurrence.objects.filter(event=event, identifications__isnull=False).exists():
             job.logger.info(f"Tracking: Skipping tracking for event {event.pk}: human identifications present.")
@@ -311,16 +324,7 @@ def perform_tracking(job):
             continue
 
         job.logger.info(f"Tracking: Running tracking for event {event.pk}")
-        assign_occurrences_by_tracking_images(source_images, job.logger, cost_threshold=cost_threshold)
-        processed_events += 1
-
-        job.progress.update_stage(
-            "tracking",
-            status=JobState.STARTED,
-            progress=processed_events / total_events if total_events else 1,
-        )
-        job.save()
+        assign_occurrences_by_tracking_images(event, job.logger, cost_threshold=cost_threshold, job=job)
 
     job.logger.info("Tracking: Finished tracking.")
-    job.progress.update_stage("tracking", status=JobState.SUCCESS, progress=1)
     job.save()

From b82f17521a88e8db7f35e2408de65eae73e614ee Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 25 Jun 2025 15:29:09 -0700
Subject: [PATCH 18/24] fix: use features from the same algo when comparing
 detections

---
 ami/ml/tracking.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 4085efda6..b8729ccae 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -79,7 +79,7 @@ def get_most_common_algorithm_for_event(event):
     return None
 
 
-def event_fully_processed(event, logger) -> bool:
+def event_fully_processed(event: Event, logger, algorithm: Algorithm) -> bool:
     """
     Checks if all captures in the event have processed detections with features_2048
     """
@@ -89,6 +89,7 @@ def event_fully_processed(event, logger) -> bool:
     processed_captures = (
         event.captures.filter(
             detections__classifications__features_2048__isnull=False,
+            detections__classifications__algorithm=algorithm,
         )
         .distinct()
         .count()
@@ -167,7 +168,7 @@ def assign_occurrences_from_detection_chains(source_images, logger):
 
 
 def assign_occurrences_by_tracking_images(
-    event, logger, cost_threshold: float = TRACKING_COST_THRESHOLD, job=None
+    event: Event, logger, algorithm: Algorithm, cost_threshold: float = TRACKING_COST_THRESHOLD, job=None
 ) -> None:
     """
     Track detections across ordered source images and assign them to occurrences.
@@ -200,7 +201,7 @@ def assign_occurrences_by_tracking_images(
             image_width=current_image.width,
             image_height=current_image.height,
             cost_threshold=cost_threshold,
-            algorithm=most_common_algorithm,
+            algorithm=algorithm,
             logger=logger,
         )
         if job:
@@ -306,25 +307,46 @@ def perform_tracking(job):
     total_events = events_qs.count()
     events = events_qs.iterator()
     job.logger.info("Tracking: Found %d events in collection %s", total_events, collection.pk)
+
     for event in events_qs:
         job.progress.add_stage(name=f"Event {event.pk}", key=f"event_{event.pk}")
         job.save()
+
     for idx, event in enumerate(events, start=1):
         job.logger.info(f"Tracking: Processing event {idx}/{total_events} (Event ID: {event.pk})")
 
+        # Get the most common algorithm for the current event
+        algorithm = get_most_common_algorithm_for_event(event)
+        if algorithm is not None:
+            job.logger.info(f"Using most common feature extraction algorithm for event {event}: ", f"{algorithm.name}")
+        else:
+            job.logger.warning(
+                f"No feature extraction algorithm found for detections in event {event}. "
+                "Skipping tracking for this event."
+            )
+            continue
+
         # Check if there are human identifications in the event
         if Occurrence.objects.filter(event=event, identifications__isnull=False).exists():
             job.logger.info(f"Tracking: Skipping tracking for event {event.pk}: human identifications present.")
             continue
+
         # Check if the all captures in the event have processed detections with features
-        if not event_fully_processed(event, logger=job.logger):
+        if not event_fully_processed(event, logger=job.logger, algorithm=algorithm):
             job.logger.info(
                 f"Tracking: Skipping tracking for event {event.pk}: not all detections are fully processed."
             )
             continue
 
         job.logger.info(f"Tracking: Running tracking for event {event.pk}")
-        assign_occurrences_by_tracking_images(event, job.logger, cost_threshold=cost_threshold, job=job)
+
+        assign_occurrences_by_tracking_images(
+            event=event,
+            logger=job.logger,
+            algorithm=algorithm,
+            cost_threshold=cost_threshold,
+            job=job,
+        )
 
     job.logger.info("Tracking: Finished tracking.")
     job.save()

From 9d0fd96c47a31462a43c09c8497b9fb90b6c7177 Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 25 Jun 2025 15:30:40 -0700
Subject: [PATCH 19/24] feat: update some type annotations & logging, resolve
 warnings

---
 ami/ml/tracking.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index b8729ccae..70feff1bf 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -4,6 +4,7 @@
 import numpy as np
 from django.db.models import Count
 
+from ami.jobs.models import Job
 from ami.main.models import Classification, Detection, Event, Occurrence
 from ami.ml.models import Algorithm
 
@@ -178,7 +179,7 @@ def assign_occurrences_by_tracking_images(
     source_images = event.captures.order_by("timestamp")
     logger.info(f"Found {len(source_images)} source images for event {event.pk}")
     if len(source_images) < 2:
-        logger.info("Not enough images to perform tracking. At least 2 images are required.")
+        logger.warn("Not enough images to perform tracking. At least 2 images are required.")
         return
     for i in range(len(source_images) - 1):
         current_image = source_images[i]
@@ -188,12 +189,10 @@ def assign_occurrences_by_tracking_images(
         next_detections = list(next_image.detections.all())
 
         logger.debug(f"""Tracking: Processing image {i + 1}/{len(source_images)}""")
-        # Get the most common algorithm for the current event
-        most_common_algorithm = get_most_common_algorithm_for_event(current_image.event)
-        logger.debug(
-            f"""Using most common algorithm for event {current_image.event.pk}:
-            {most_common_algorithm.name if most_common_algorithm else 'None'}"""
-        )
+
+        if not current_image.width or not current_image.height:
+            logger.warning(f"Image {current_image.pk} has no width and/or height. Skipping tracking for this event.")
+            return
 
         pair_detections(
             current_detections,
@@ -273,9 +272,9 @@ def pair_detections(
         if det.id in assigned_current_ids or next_det.id in assigned_next_ids:
             continue
         # check if next detection has a previous detection already assigned
-        if getattr(next_det, "previous_detection", None) is not None:
+        previous_detection: Detection | None = getattr(next_det, "previous_detection", None)
+        if previous_detection is not None:
             logger.debug(f"{next_det.id} already has previous detection: {next_det.previous_detection.id}")
-            previous_detection = getattr(next_det, "previous_detection", None)
             previous_detection.next_detection = None
             previous_detection.save()
             logger.debug(f"Cleared previous detection {previous_detection.pk} -> {next_det.pk}  link")
@@ -289,13 +288,15 @@ def pair_detections(
         assigned_next_ids.add(next_det.id)
 
 
-def perform_tracking(job):
+def perform_tracking(job: Job):
     """
     Perform detection tracking for all events in the job's source image collection.
     Runs tracking only if all images in an event have processed detections with features.
     """
 
-    cost_threshold = job.params.get("cost_threshold", TRACKING_COST_THRESHOLD)
+    job_params = job.params or {}
+    cost_threshold = job_params.get("cost_threshold", TRACKING_COST_THRESHOLD)
+
     job.logger.info("Tracking started")
     job.logger.info(f"Using cost threshold: {cost_threshold}")
     collection = job.source_image_collection

From 87c91c1a0cb8e83b130db79384d1c470fa042192 Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 25 Jun 2025 15:56:46 -0700
Subject: [PATCH 20/24] feat: validate tracking parameters, add more parameters

---
 ami/main/admin.py  | 19 +++++++++++++++---
 ami/ml/tracking.py | 48 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/ami/main/admin.py b/ami/main/admin.py
index 46febb45a..92cf8b581 100644
--- a/ami/main/admin.py
+++ b/ami/main/admin.py
@@ -651,6 +651,21 @@ def cluster_detections(self, request: HttpRequest, queryset: QuerySet[SourceImag
 
         self.message_user(request, f"Clustered {queryset.count()} collection(s).")
 
+    @admin.action(description="Create tracking job (but don't run it)")
+    def create_tracking_job(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
+        from ami.jobs.models import Job, TrackingJob
+        from ami.ml.tracking import DEFAULT_TRACKING_PARAMS
+
+        for collection in queryset:
+            job = Job.objects.create(
+                name=f"Tracking for collection {collection.pk}",
+                project=collection.project,
+                source_image_collection=collection,
+                job_type_key=TrackingJob.key,
+                params=DEFAULT_TRACKING_PARAMS.__dict__,
+            )
+            self.message_user(request, f"Tracking job #{job.pk} created for collection #{collection.pk}")
+
     @admin.action(description="Run tracking job")
     def run_tracking_job(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
         from ami.jobs.models import Job, TrackingJob
@@ -661,9 +676,6 @@ def run_tracking_job(self, request: HttpRequest, queryset: QuerySet[SourceImageC
                 project=collection.project,
                 source_image_collection=collection,
                 job_type_key=TrackingJob.key,
-                params={
-                    "cost_threshold": 0.4,
-                },
             )
             job.enqueue()
             self.message_user(request, f"Tracking job #{job.pk} started for collection #{collection.pk}")
@@ -673,6 +685,7 @@ def run_tracking_job(self, request: HttpRequest, queryset: QuerySet[SourceImageC
         populate_collection_async,
         cluster_detections,
         create_clustering_job,
+        create_tracking_job,
         run_tracking_job,
     ]
 
diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 70feff1bf..0b1306501 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -1,14 +1,31 @@
+import dataclasses
 import math
+import typing
 from collections.abc import Iterable
 
 import numpy as np
 from django.db.models import Count
 
-from ami.jobs.models import Job
 from ami.main.models import Classification, Detection, Event, Occurrence
 from ami.ml.models import Algorithm
 
-TRACKING_COST_THRESHOLD = 2
+if typing.TYPE_CHECKING:
+    from ami.jobs.models import Job
+
+
+@dataclasses.dataclass
+class TrackingParams:
+    """
+    Parameters for the tracking job.
+    """
+
+    cost_threshold: float = 0.2
+    skip_if_human_identifications: bool = True
+    require_completely_processed_session: bool = True
+    feature_extraction_algorithm_id: int | None = None
+
+
+DEFAULT_TRACKING_PARAMS = TrackingParams()
 
 
 def cosine_similarity(v1: Iterable[float], v2: Iterable[float]) -> float:
@@ -169,7 +186,7 @@ def assign_occurrences_from_detection_chains(source_images, logger):
 
 
 def assign_occurrences_by_tracking_images(
-    event: Event, logger, algorithm: Algorithm, cost_threshold: float = TRACKING_COST_THRESHOLD, job=None
+    event: Event, logger, algorithm: Algorithm, params: TrackingParams = DEFAULT_TRACKING_PARAMS, job=None
 ) -> None:
     """
     Track detections across ordered source images and assign them to occurrences.
@@ -199,7 +216,7 @@ def assign_occurrences_by_tracking_images(
             next_detections,
             image_width=current_image.width,
             image_height=current_image.height,
-            cost_threshold=cost_threshold,
+            cost_threshold=params.cost_threshold,
             algorithm=algorithm,
             logger=logger,
         )
@@ -288,17 +305,19 @@ def pair_detections(
         assigned_next_ids.add(next_det.id)
 
 
-def perform_tracking(job: Job):
+def perform_tracking(job: "Job"):
     """
     Perform detection tracking for all events in the job's source image collection.
     Runs tracking only if all images in an event have processed detections with features.
     """
 
-    job_params = job.params or {}
-    cost_threshold = job_params.get("cost_threshold", TRACKING_COST_THRESHOLD)
+    params = DEFAULT_TRACKING_PARAMS
+    # Override default params with job params if provided
+    if job.params:
+        params = dataclasses.replace(params, **job.params)
 
     job.logger.info("Tracking started")
-    job.logger.info(f"Using cost threshold: {cost_threshold}")
+    job.logger.info(f"Using tracking parameters: {params}")
     collection = job.source_image_collection
     if not collection:
         job.logger.info("Tracking: No source image collection found. Skipping tracking.")
@@ -319,7 +338,7 @@ def perform_tracking(job: Job):
         # Get the most common algorithm for the current event
         algorithm = get_most_common_algorithm_for_event(event)
         if algorithm is not None:
-            job.logger.info(f"Using most common feature extraction algorithm for event {event}: ", f"{algorithm.name}")
+            job.logger.info(f"Using most common feature extraction algorithm for event {event}: {algorithm.name}")
         else:
             job.logger.warning(
                 f"No feature extraction algorithm found for detections in event {event}. "
@@ -328,12 +347,17 @@ def perform_tracking(job: Job):
             continue
 
         # Check if there are human identifications in the event
-        if Occurrence.objects.filter(event=event, identifications__isnull=False).exists():
+        if (
+            params.skip_if_human_identifications
+            and Occurrence.objects.filter(event=event, identifications__isnull=False).exists()
+        ):
             job.logger.info(f"Tracking: Skipping tracking for event {event.pk}: human identifications present.")
             continue
 
         # Check if the all captures in the event have processed detections with features
-        if not event_fully_processed(event, logger=job.logger, algorithm=algorithm):
+        if params.require_completely_processed_session and not event_fully_processed(
+            event, logger=job.logger, algorithm=algorithm
+        ):
             job.logger.info(
                 f"Tracking: Skipping tracking for event {event.pk}: not all detections are fully processed."
             )
@@ -344,8 +368,8 @@ def perform_tracking(job: Job):
         assign_occurrences_by_tracking_images(
             event=event,
             logger=job.logger,
+            params=params,
             algorithm=algorithm,
-            cost_threshold=cost_threshold,
             job=job,
         )
 

From a194ffd8458f1d1e87a773fa79b34fd6b0c69caa Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 25 Jun 2025 16:23:24 -0700
Subject: [PATCH 21/24] fix: only assign new occurrences to tracks with >1
 detection

---
 ami/ml/tracking.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 0b1306501..9fe78f88e 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -155,7 +155,12 @@ def assign_occurrences_from_detection_chains(source_images, logger):
                 visited.add(current.id)
                 current = current.next_detection
 
-            if chain:
+            if chain and len(chain) > 1:
+                # Only create new occurrence if there are multiple detections in the chain
+                logger.debug(
+                    f"Found chain of {len(chain)} detections starting from detection {det.id} in image {image.pk}"
+                )
+
                 old_occurrences = {d.occurrence_id for d in chain if d.occurrence_id}
 
                 # Delete old occurrences (if any)

From 4c4e4c97c1204c35479b811fedc1cb4fef3b9ab1 Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 25 Jun 2025 16:24:12 -0700
Subject: [PATCH 22/24] feat: log number of occurrences reduced, and other
 things.

---
 ami/ml/tracking.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 9fe78f88e..65ca43cf5 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -143,6 +143,7 @@ def assign_occurrences_from_detection_chains(source_images, logger):
     """
     visited = set()
     created_occurrences_count = 0
+    existing_occurrence_count = Occurrence.objects.filter(detections__source_image__in=source_images).count()
     for image in source_images:
         for det in image.detections.all():
             if det.id in visited or getattr(det, "previous_detection", None) is not None:
@@ -164,12 +165,15 @@ def assign_occurrences_from_detection_chains(source_images, logger):
                 old_occurrences = {d.occurrence_id for d in chain if d.occurrence_id}
 
                 # Delete old occurrences (if any)
+                # @TODO: Consider if this is the desired behavior. Check for any history on the occurrence. Consider
+                # soft deleting or just reassign the detections to the new occurrence.
+
                 for occ_id in old_occurrences:
                     try:
+                        logger.debug(f"Deleting old occurrence {occ_id} before reassignment.")
                         Occurrence.objects.filter(id=occ_id).delete()
-                        logger.debug(f"Deleted old occurrence {occ_id} before reassignment.")
                     except Exception as e:
-                        logger.info(f"Failed to delete occurrence {occ_id}: {e}")
+                        logger.error(f"Failed to delete occurrence {occ_id}: {e}")
 
                 occurrence = Occurrence.objects.create(
                     event=chain[0].source_image.event,
@@ -185,8 +189,14 @@ def assign_occurrences_from_detection_chains(source_images, logger):
                 occurrence.save()
 
                 logger.debug(f"Assigned occurrence {occurrence.pk} to chain of {len(chain)} detections")
+    new_occurrence_count = Occurrence.objects.filter(detections__source_image__in=source_images).count()
+    occurrences_removed = existing_occurrence_count - new_occurrence_count
+    if occurrences_removed > 0:
+        logger.info(f"Reduced existing occurrences by {occurrences_removed}.")
     logger.info(
-        f"Assigned {created_occurrences_count} occurrences from detection chains across {len(source_images)} images."
+        f"Assigned {created_occurrences_count} occurrences from detection chains across {len(source_images)} images.\n"
+        f"Occurrences before: {existing_occurrence_count}, after: {new_occurrence_count}.\n"
+        f"Total detections processed: {len(visited)}."
     )
 
 
@@ -210,7 +220,7 @@ def assign_occurrences_by_tracking_images(
         current_detections = list(current_image.detections.all())
         next_detections = list(next_image.detections.all())
 
-        logger.debug(f"""Tracking: Processing image {i + 1}/{len(source_images)}""")
+        logger.debug(f"""Tracking: Processing image {i + 1} of {len(source_images)}""")
 
         if not current_image.width or not current_image.height:
             logger.warning(f"Image {current_image.pk} has no width and/or height. Skipping tracking for this event.")
@@ -338,7 +348,7 @@ def perform_tracking(job: "Job"):
         job.save()
 
     for idx, event in enumerate(events, start=1):
-        job.logger.info(f"Tracking: Processing event {idx}/{total_events} (Event ID: {event.pk})")
+        job.logger.info(f"Tracking: Processing event {idx} of {total_events} (Event ID: {event.pk})")
 
         # Get the most common algorithm for the current event
         algorithm = get_most_common_algorithm_for_event(event)

From b7f28ea5ab34f2a50220429690143f285e6664d2 Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 25 Jun 2025 16:43:42 -0700
Subject: [PATCH 23/24] feat: skip chains that don't need new occurrences (len
 1 or all same)

---
 ami/main/models.py |  2 ++
 ami/ml/tracking.py | 35 +++++++++++++++++++++++------------
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/ami/main/models.py b/ami/main/models.py
index 0f78bed92..2bf8afbfd 100644
--- a/ami/main/models.py
+++ b/ami/main/models.py
@@ -2178,6 +2178,8 @@ def height(self) -> int | None:
         if self.bbox and len(self.bbox) == 4:
             return self.bbox[3] - self.bbox[1]
 
+    occurrence_id: int | None = None
+
     class Meta:
         ordering = [
             "frame_num",
diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index 65ca43cf5..fe44def8b 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -6,7 +6,7 @@
 import numpy as np
 from django.db.models import Count
 
-from ami.main.models import Classification, Detection, Event, Occurrence
+from ami.main.models import Classification, Detection, Event, Occurrence, SourceImage
 from ami.ml.models import Algorithm
 
 if typing.TYPE_CHECKING:
@@ -137,37 +137,45 @@ def get_feature_vector(detection: Detection, algorithm: Algorithm):
     )
 
 
-def assign_occurrences_from_detection_chains(source_images, logger):
+def assign_occurrences_from_detection_chains(source_images: "list[SourceImage]", logger):
     """
     Walk detection chains across source images and assign a new occurrence to each chain.
     """
-    visited = set()
+    visited: set[int] = set()
     created_occurrences_count = 0
-    existing_occurrence_count = Occurrence.objects.filter(detections__source_image__in=source_images).count()
+    existing_occurrence_count = (
+        Occurrence.objects.filter(detections__source_image__in=source_images).distinct().count()
+    )
     for image in source_images:
         for det in image.detections.all():
-            if det.id in visited or getattr(det, "previous_detection", None) is not None:
+            if det.pk in visited or getattr(det, "previous_detection", None) is not None:
                 continue  # Already processed or this is not a chain start
 
-            chain = []
+            chain: list[Detection] = []
             current = det
-            while current and current.id not in visited:
+            while current and current.pk not in visited:
                 chain.append(current)
-                visited.add(current.id)
+                visited.add(current.pk)
                 current = current.next_detection
 
             if chain and len(chain) > 1:
                 # Only create new occurrence if there are multiple detections in the chain
                 logger.debug(
-                    f"Found chain of {len(chain)} detections starting from detection {det.id} in image {image.pk}"
+                    f"Found chain of {len(chain)} detections starting from detection {det.pk} in image {image.pk}"
                 )
 
                 old_occurrences = {d.occurrence_id for d in chain if d.occurrence_id}
 
+                if len(old_occurrences) == 1:
+                    # If all detections in the chain belong to the same occurrence, skip reassignment
+                    logger.debug(
+                        f"All detections in chain already assigned to occurrence {old_occurrences.pop()}. Skipping."
+                    )
+                    continue
+
                 # Delete old occurrences (if any)
                 # @TODO: Consider if this is the desired behavior. Check for any history on the occurrence. Consider
                 # soft deleting or just reassign the detections to the new occurrence.
-
                 for occ_id in old_occurrences:
                     try:
                         logger.debug(f"Deleting old occurrence {occ_id} before reassignment.")
@@ -189,12 +197,15 @@ def assign_occurrences_from_detection_chains(source_images, logger):
                 occurrence.save()
 
                 logger.debug(f"Assigned occurrence {occurrence.pk} to chain of {len(chain)} detections")
-    new_occurrence_count = Occurrence.objects.filter(detections__source_image__in=source_images).count()
+
+    # @TODO report how many detections were processed, length of chains, which are solo vs. chains, etc.
+
+    new_occurrence_count = Occurrence.objects.filter(detections__source_image__in=source_images).distinct().count()
     occurrences_removed = existing_occurrence_count - new_occurrence_count
     if occurrences_removed > 0:
         logger.info(f"Reduced existing occurrences by {occurrences_removed}.")
     logger.info(
-        f"Assigned {created_occurrences_count} occurrences from detection chains across {len(source_images)} images.\n"
+        f"Assigned {created_occurrences_count} new occurrences to detection chains in {len(source_images)} images.\n"
         f"Occurrences before: {existing_occurrence_count}, after: {new_occurrence_count}.\n"
         f"Total detections processed: {len(visited)}."
     )

From 30cc2790dd80a7a10b13e7abee05d0e2d81853c2 Mon Sep 17 00:00:00 2001
From: Michael Bunsen <notbot@gmail.com>
Date: Wed, 2 Jul 2025 12:04:33 -0700
Subject: [PATCH 24/24] feat: don't require fully processed sessions for now

---
 ami/ml/tracking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ami/ml/tracking.py b/ami/ml/tracking.py
index fe44def8b..075c1825d 100644
--- a/ami/ml/tracking.py
+++ b/ami/ml/tracking.py
@@ -21,7 +21,7 @@ class TrackingParams:
 
     cost_threshold: float = 0.2
     skip_if_human_identifications: bool = True
-    require_completely_processed_session: bool = True
+    require_completely_processed_session: bool = False
     feature_extraction_algorithm_id: int | None = None