From a7543822382ef22b821eb336a5b1eaf632d7eee1 Mon Sep 17 00:00:00 2001 From: RobXYZ Date: Sun, 7 Jun 2026 19:26:44 +0100 Subject: [PATCH 01/19] =?UTF-8?q?fix(import):=20squash=20manual-import-twe?= =?UTF-8?q?aks=20=E2=80=94=20dedupe,=20skip=20un-thumbable=20clips?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squashes three minor fixes: - Don't upload clips already in the archive but do import incomplete files - Mark already-on-disk clips done so sync stops re-downloading - Stop re-running ffmpeg on un-thumbable clips every sweep Co-Authored-By: Claude Opus 4.8 --- tests/test_import_endpoints.py | 38 ++++++++++++++++ tests/test_importer.py | 72 ++++++++++++++++++++++++++++-- tests/test_reconcile_present.py | 55 +++++++++++++++++++++++ tests/test_thumb_failcache.py | 79 +++++++++++++++++++++++++++++++++ web/routers/imports.py | 32 ++++++++++++- web/services/importer.py | 48 ++++++++++++++++++-- web/services/queue.py | 10 +++++ web/services/scanner.py | 5 +++ web/services/thumbs.py | 41 +++++++++++++++++ web/static/app.js | 35 ++++++++++++--- 10 files changed, 402 insertions(+), 13 deletions(-) create mode 100644 tests/test_reconcile_present.py create mode 100644 tests/test_thumb_failcache.py diff --git a/tests/test_import_endpoints.py b/tests/test_import_endpoints.py index c30a46e..e0ebb38 100644 --- a/tests/test_import_endpoints.py +++ b/tests/test_import_endpoints.py @@ -88,6 +88,44 @@ def test_scan_lists_recognised_and_skipped(client): assert {s["name"] for s in body["skipped"]} == {"junk.bin"} +def test_present_reports_clips_already_in_archive(client): + c, rec = client + here = "2026_0101_080000_0001F.MP4" # complete copy -> present + partial = "2026_0102_090000_0002R.MP4" # archive smaller -> redo -> absent + gone = "2026_0103_100000_0003F.MP4" # not imported -> absent + (rec / "2026-01-01").mkdir() + (rec / "2026-01-01" / here).write_bytes(b"a" * 10) + (rec / "2026-01-02").mkdir() + (rec / "2026-01-02" / partial).write_bytes(b"a" * 3) + r = c.post("/api/import/present", json={"files": [ + {"name": here, "size": 10}, + {"name": partial, "size": 10}, + {"name": gone, "size": 10}, + ]}) + assert r.status_code == 200 + assert r.json()["present"] == [here] + + +def test_scan_marks_present_clips(client): + c, rec = client + card = rec / "import" / "DCIM" + card.mkdir(parents=True) + here = "2026_0101_080000_0001F.MP4" # already archived, full size + partial = "2026_0102_090000_0002R.MP4" # archived but truncated + fresh = "2026_0103_100000_0003F.MP4" # not in archive + (card / here).write_bytes(b"a" * 10) + (card / partial).write_bytes(b"b" * 10) + (card / fresh).write_bytes(b"c" * 10) + (rec / "2026-01-01").mkdir() + (rec / "2026-01-01" / here).write_bytes(b"a" * 10) + (rec / "2026-01-02").mkdir() + (rec / "2026-01-02" / partial).write_bytes(b"b" * 4) + r = c.post("/api/import/scan", json={}) + assert r.status_code == 200 + present = {it["basename"]: it["present"] for it in r.json()["recognised"]} + assert present == {here: True, partial: False, fresh: False} + + def test_scan_bad_path_400(client): c, rec = client r = c.post("/api/import/scan", json={"path": str(rec / "nope")}) diff --git a/tests/test_importer.py b/tests/test_importer.py index 522431c..9c4802d 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -84,6 +84,33 @@ def test_ingest_clip_places_file_and_records_origin(tmp_path: Path): assert rows[name]["event_type"] == "ro" +def test_ingest_clip_marks_preexisting_queue_row_done(tmp_path: Path): + # The dashcam listed this clip first (queued pending); the user then + # bulk web-uploads it. The import must flip the row to done, or the + # next Wi-Fi cycle re-tries the (now 404) download. + from web.db import Database + from web.services import importer + rec = tmp_path / "rec" + rec.mkdir() + db = Database(str(rec / ".viofosync.db")) + name = "2026_0519_074752_022262PF.MP4" + with db.write() as c: + c.execute( + "INSERT INTO download_queue (filename, source_dir, state, " + "enqueued_at) VALUES (?,?,?,0)", + (name, "/DCIM/Movie", "pending"), + ) + src = tmp_path / "usb" + src.mkdir() + (src / name).write_bytes(b"a" * 10) + man = importer.scan_source(str(src)) + res = importer.ingest_clip(db, _snap(rec), man.items[0], cross_volume=True) + assert res.status == "imported" + rows = _origin_rows(db) + assert rows[name]["state"] == "done" + assert rows[name]["manual"] == 1 + + def test_ingest_clip_cross_volume_copies_and_keeps_source(tmp_path: Path): from web.db import Database from web.services import importer @@ -104,7 +131,7 @@ def test_ingest_clip_cross_volume_copies_and_keeps_source(tmp_path: Path): assert rows[name]["manual"] == 1 -def test_ingest_clip_skips_duplicate(tmp_path: Path): +def test_ingest_clip_skips_complete_duplicate(tmp_path: Path): from web.db import Database from web.services import importer rec = tmp_path / "rec" @@ -112,14 +139,53 @@ def test_ingest_clip_skips_duplicate(tmp_path: Path): db = Database(str(rec / ".viofosync.db")) name = "2026_0101_080000_0001F.MP4" (rec / "2026-01-01").mkdir() - (rec / "2026-01-01" / name).write_bytes(b"existing") + (rec / "2026-01-01" / name).write_bytes(b"x" * 10) # same size -> complete src = tmp_path / "usb" src.mkdir() (src / name).write_bytes(b"a" * 10) man = importer.scan_source(str(src)) res = importer.ingest_clip(db, _snap(rec), man.items[0], cross_volume=True) assert res.status == "already_present" - assert (rec / "2026-01-01" / name).read_bytes() == b"existing" + assert (rec / "2026-01-01" / name).read_bytes() == b"x" * 10 + + +def test_ingest_clip_redoes_partial_archive_file(tmp_path: Path): + from web.db import Database + from web.services import importer + rec = tmp_path / "rec" + rec.mkdir() + db = Database(str(rec / ".viofosync.db")) + name = "2026_0101_080000_0001F.MP4" + (rec / "2026-01-01").mkdir() + (rec / "2026-01-01" / name).write_bytes(b"ab") # 2 bytes — truncated + src = tmp_path / "usb" + src.mkdir() + (src / name).write_bytes(b"a" * 10) + man = importer.scan_source(str(src)) + res = importer.ingest_clip(db, _snap(rec), man.items[0], cross_volume=True) + assert res.status == "imported" # partial gets redone + assert (rec / "2026-01-01" / name).read_bytes() == b"a" * 10 + + +def test_present_in_archive_matches_size_and_skips_partials(tmp_path: Path): + from web.services import importer + rec = tmp_path / "rec" + rec.mkdir() + full = "2026_0101_080000_0001F.MP4" # exact-size copy -> present + bigger = "2026_0102_090000_0002R.MP4" # archive larger -> keep, present + partial = "2026_0103_100000_0003F.MP4" # archive smaller -> redo, absent + gone = "2026_0104_110000_0004R.MP4" # not imported at all -> absent + (rec / "2026-01-01").mkdir() + (rec / "2026-01-01" / full).write_bytes(b"a" * 10) + (rec / "2026-01-02").mkdir() + (rec / "2026-01-02" / bigger).write_bytes(b"a" * 20) + (rec / "2026-01-03").mkdir() + (rec / "2026-01-03" / partial).write_bytes(b"a" * 3) + + present = importer.present_in_archive(_snap(rec), { + full: 10, bigger: 10, partial: 10, gone: 10, "notes.txt": 5, + }) + assert present == {full, bigger} def test_ingest_clip_restores_source_when_final_rename_fails(tmp_path, monkeypatch): diff --git a/tests/test_reconcile_present.py b/tests/test_reconcile_present.py new file mode 100644 index 0000000..22fa9f5 --- /dev/null +++ b/tests/test_reconcile_present.py @@ -0,0 +1,55 @@ +"""Reconcile must not leave on-disk clips stuck as pending/failed. + +Regression: a clip the dashcam listed (queued pending), then placed on +disk by another path (bulk web-upload / manual copy), used to stay +pending because reconcile only inserted a 'done' row when the filename +was absent from the queue. On the next Wi-Fi cycle the worker re-tried +the download and the dashcam 404'd it. +""" +from __future__ import annotations + +import datetime as _dt +from pathlib import Path + +import pytest + +from web.db import Database +from web.services import queue as q + + +class _Rec: + def __init__(self, filename: str, *, filepath: str = "/DCIM/Movie", + size: int = 1000) -> None: + self.filename = filename + self.filepath = filepath + self.size = size + self.datetime = _dt.datetime(2026, 5, 19, 7, 47, 52) + + +@pytest.fixture +def db(tmp_path: Path) -> Database: + return Database(str(tmp_path / ".viofosync.db")) + + +def _states(db: Database) -> dict[str, str]: + with db.conn() as c: + return {r["filename"]: r["state"] for r in c.execute( + "SELECT filename, state FROM download_queue").fetchall()} + + +def _seed(db: Database, filename: str, state: str) -> None: + with db.write() as c: + c.execute( + "INSERT INTO download_queue (filename, source_dir, state, " + "enqueued_at) VALUES (?,?,?,0)", + (filename, "/DCIM/Movie", state), + ) + + +@pytest.mark.parametrize("state", ["pending", "failed"]) +def test_reconcile_heals_on_disk_clip_stuck_in_queue(db: Database, state: str): + name = "2026_0519_074752_022262PF.MP4" + _seed(db, name, state) # camera listed it earlier + # File is now on disk (web-upload) and the camera still lists it. + q.reconcile(db, [_Rec(name)], present_filenames=[name]) + assert _states(db)[name] == "done" diff --git a/tests/test_thumb_failcache.py b/tests/test_thumb_failcache.py new file mode 100644 index 0000000..2c0f0c3 --- /dev/null +++ b/tests/test_thumb_failcache.py @@ -0,0 +1,79 @@ +"""Thumbnail sweep must not re-attempt clips that can't produce a thumb. + +Regression: ``ensure_thumb`` returned None on ffmpeg failure and left no +marker, so un-thumbable clips (short/corrupt/partial) were re-selected and +re-run through ffmpeg on every sweep. With a sweep after every working +cycle (and on pause) that was a recurring CPU storm. +""" +from __future__ import annotations + +import os +import time +from pathlib import Path + +import pytest + +from web.db import Database +from web.services import scanner, thumbs + + +@pytest.fixture +def db(tmp_path: Path) -> Database: + return Database(str(tmp_path / ".viofosync.db")) + + +def _add_clip(db: Database, path: str, clip_id: int = 1) -> int: + with db.write() as c: + c.execute( + "INSERT INTO clip_index (id, path, basename, timestamp, camera, " + "sequence, scanned_at) VALUES (?,?,?,?,?,?,?)", + (clip_id, path, os.path.basename(path), 0, "F", 0, 0), + ) + return clip_id + + +def test_mark_failed_then_skipped(tmp_path: Path): + rec = tmp_path / "rec" + rec.mkdir() + video = rec / "clip.MP4" + video.write_bytes(b"not a real video") + # A fresh failure marker (recorded after the video was written) means + # "don't bother trying again until the file changes". + thumbs.mark_failed(str(rec), 1) + assert thumbs.failed_recently(str(rec), 1, str(video)) is True + + +def test_stale_marker_retried_after_file_changes(tmp_path: Path): + rec = tmp_path / "rec" + rec.mkdir() + video = rec / "clip.MP4" + video.write_bytes(b"old") + thumbs.mark_failed(str(rec), 1) + # The clip is later rewritten (e.g. a partial import got redone) — its + # mtime moves past the marker, so the thumb is worth another attempt. + time.sleep(0.01) + os.utime(str(video), None) + assert thumbs.failed_recently(str(rec), 1, str(video)) is False + + +async def test_sweep_skips_failed_clip_on_next_pass(tmp_path: Path, db: Database): + rec = tmp_path / "rec" + rec.mkdir() + video = rec / "clip.MP4" + video.write_bytes(b"not a real video") + _add_clip(db, str(video)) + + calls = {"n": 0} + + async def _fake_ensure(recordings, clip_id, path): + calls["n"] += 1 + thumbs.mark_failed(recordings, clip_id) # simulate ffmpeg failure + return None + + import unittest.mock as _m + with _m.patch.object(thumbs, "ensure_thumb", _fake_ensure): + await scanner.sweep_missing_thumbs(db, str(rec)) + await scanner.sweep_missing_thumbs(db, str(rec)) + + # First sweep attempts it once; the second must skip it. + assert calls["n"] == 1 diff --git a/web/routers/imports.py b/web/routers/imports.py index a6d6846..c0b09a8 100644 --- a/web/routers/imports.py +++ b/web/routers/imports.py @@ -32,6 +32,15 @@ class _PathBody(BaseModel): path: str | None = None +class _FileRef(BaseModel): + name: str + size: int = 0 + + +class _FilesBody(BaseModel): + files: list[_FileRef] = [] + + def _db(request: Request): return request.app.state.db @@ -54,15 +63,34 @@ def scan(request: Request, body: _PathBody) -> dict: if not os.path.isdir(root): raise HTTPException(400, f"not a readable directory: {root}") man = importer.scan_source(root) + present = importer.present_in_archive( + snap, {it.basename: it.size_bytes for it in man.items}, + ) + recognised = [] + for it in man.items: + d = importer.scan_item_dict(it) + d["present"] = it.basename in present + recognised.append(d) return { "path": root, "cross_volume": importer.is_cross_volume(root, snap.recordings), "total_bytes": man.total_bytes, - "recognised": [importer.scan_item_dict(it) for it in man.items], + "present_count": len(present), + "recognised": recognised, "skipped": man.skipped, } +@router.post("/present", dependencies=[Depends(require_csrf)]) +def present(request: Request, body: _FilesBody) -> dict: + """Report which clips already have a complete copy in the archive, so + the browser-upload tab can skip re-sending them. Size-matched: a + truncated archive copy is reported absent so the upload redoes it.""" + snap = _snap(request) + sizes = {f.name: f.size for f in body.files} + return {"present": sorted(importer.present_in_archive(snap, sizes))} + + @router.post("/ingest", dependencies=[Depends(require_csrf)]) async def ingest(request: Request, body: _PathBody) -> dict: if getattr(request.app.state, "import_running", False): @@ -111,7 +139,7 @@ async def upload(request: Request) -> dict: m, name, source_rel_path=rel, size=size, src_path="", ) dest = importer.dest_for(snap, item) - if os.path.exists(dest): + if importer.has_complete_copy(dest, item.size_bytes): return {"status": "already_present", "filename": name} # Evict to fit BEFORE writing bytes (size known from the header). diff --git a/web/services/importer.py b/web/services/importer.py index 747c9a6..3426eb3 100644 --- a/web/services/importer.py +++ b/web/services/importer.py @@ -131,6 +131,43 @@ def dest_for(snap, item: ScanItem) -> str: return vfs.get_filepath(snap.recordings, group or "", item.basename) +def has_complete_copy(dest: str, expected_size: int) -> bool: + """True if ``dest`` already holds a non-partial copy: it exists and is + not smaller than ``expected_size``. A smaller file is treated as a + truncated/partial import and redone; an unknown size (<= 0) trusts mere + existence. Larger-than-expected files are kept, never clobbered.""" + if not os.path.exists(dest): + return False + if expected_size and expected_size > 0: + try: + return os.path.getsize(dest) >= expected_size + except OSError: + return False + return True + + +def present_in_archive(snap, sizes) -> set[str]: + """Return the subset of names that already have a COMPLETE copy in the + archive. ``sizes`` maps basename -> expected size in bytes (0/unknown + trusts existence). Unrecognised / unparseable names are ignored. Lets + the import flow skip clips already there instead of re-uploading or + re-scanning them, while still redoing truncated partials.""" + out: set[str] = set() + for name, size in sizes.items(): + m = vfs.downloaded_filename_re.match(name) + if not m: + continue + try: + item = scan_item_from_match( + m, name, source_rel_path=name, size=size or 0, src_path="", + ) + except ValueError: + continue + if has_complete_copy(dest_for(snap, item), size or 0): + out.add(name) + return out + + def _origin_source_dir(item: ScanItem) -> str: # Invariant: contains "/RO/" iff the clip is locked, so scanner.scan # re-derives event_type='ro' on every future rescan. @@ -140,11 +177,16 @@ def _origin_source_dir(item: ScanItem) -> str: def _record_origin(db: Database, item: ScanItem) -> None: now = int(time.time()) with db.write() as c: + # On conflict the clip was already queued (e.g. the dashcam listed + # it before this bulk import). Flip that row to done so the next + # Wi-Fi cycle doesn't re-attempt a download that 404s. c.execute( - "INSERT OR IGNORE INTO download_queue " + "INSERT INTO download_queue " "(filename, source_dir, remote_size, recorded_at, camera, " " event_type, state, priority, enqueued_at, finished_at, manual) " - "VALUES (?, ?, ?, ?, ?, ?, 'done', 0, ?, ?, 1)", + "VALUES (?, ?, ?, ?, ?, ?, 'done', 0, ?, ?, 1) " + "ON CONFLICT(filename) DO UPDATE SET " + " state='done', finished_at=excluded.finished_at, manual=1", (item.basename, _origin_source_dir(item), item.size_bytes, item.timestamp, item.camera, item.event_type, now, now), ) @@ -166,7 +208,7 @@ def ingest_clip( (the upload path); we then go straight to the final rename.""" recordings = snap.recordings dest = dest_for(snap, item) - if os.path.exists(dest): + if has_complete_copy(dest, item.size_bytes): return ClipResult(item.basename, "already_present", size_bytes=item.size_bytes, event_type=item.event_type) diff --git a/web/services/queue.py b/web/services/queue.py index e031ffe..8491306 100644 --- a/web/services/queue.py +++ b/web/services/queue.py @@ -102,6 +102,16 @@ def reconcile( ), ) marked_done += 1 + elif existing[filename]["state"] in ("pending", "failed"): + # The clip got onto disk by another path (bulk import, + # manual copy) after it was queued. Heal the stale row + # instead of re-downloading a file we already have. + c.execute( + "UPDATE download_queue SET state='done', " + "finished_at=? WHERE filename=?", + (now, filename), + ) + marked_done += 1 continue if filename in existing: diff --git a/web/services/scanner.py b/web/services/scanner.py index 0706b72..2968284 100644 --- a/web/services/scanner.py +++ b/web/services/scanner.py @@ -238,6 +238,11 @@ async def sweep_missing_thumbs( thumb_file = thumbs.thumb_path(recordings, row["id"]) if os.path.exists(thumb_file) and os.path.getsize(thumb_file) > 0: continue + # A clip that already failed extraction (corrupt/too-short/partial) + # is skipped until its file changes, so the sweep doesn't re-spawn + # ffmpeg on the same un-thumbable clips every cycle. + if thumbs.failed_recently(recordings, row["id"], row["path"]): + continue todo.append((row["id"], row["path"])) if not todo: diff --git a/web/services/thumbs.py b/web/services/thumbs.py index 3818b19..a952690 100644 --- a/web/services/thumbs.py +++ b/web/services/thumbs.py @@ -26,6 +26,44 @@ def thumb_path(recordings: str, clip_id: int) -> str: return os.path.join(_cache_dir(recordings), f"{clip_id}.jpg") +def fail_marker_path(recordings: str, clip_id: int) -> str: + return os.path.join(_cache_dir(recordings), f"{clip_id}.jpg.fail") + + +def mark_failed(recordings: str, clip_id: int) -> None: + """Record that thumbnail extraction failed for this clip, so the + sweep doesn't re-run ffmpeg on it every pass. Cleared automatically + once the source file changes (see :func:`failed_recently`).""" + try: + with open(fail_marker_path(recordings, clip_id), "w"): + pass + except OSError: # pragma: no cover — best-effort cache + pass + + +def _clear_failed(recordings: str, clip_id: int) -> None: + try: + os.remove(fail_marker_path(recordings, clip_id)) + except OSError: + pass + + +def failed_recently(recordings: str, clip_id: int, video_path: str) -> bool: + """True if a prior thumbnail attempt failed and the source file + hasn't changed since. A marker older than the video (the clip was + rewritten — e.g. a partial import got redone) is treated as stale, + so the thumb is worth another attempt.""" + marker = fail_marker_path(recordings, clip_id) + try: + marker_mtime = os.path.getmtime(marker) + except OSError: + return False + try: + return marker_mtime >= os.path.getmtime(video_path) + except OSError: # video gone — let the caller's isfile check handle it + return False + + async def ensure_thumb( recordings: str, clip_id: int, video_path: str ) -> Optional[str]: @@ -58,8 +96,11 @@ async def ensure_thumb( await asyncio.wait_for(proc.wait(), timeout=15.0) except asyncio.TimeoutError: proc.kill() + mark_failed(recordings, clip_id) return None if proc.returncode != 0 or not os.path.exists(out): + mark_failed(recordings, clip_id) return None + _clear_failed(recordings, clip_id) return out diff --git a/web/static/app.js b/web/static/app.js index d0ded9a..ff7db90 100644 --- a/web/static/app.js +++ b/web/static/app.js @@ -3129,10 +3129,31 @@ window.addEventListener("hashchange", () => { $("import-upload-go").addEventListener("click", async () => { show($("import-progress")); hide($("import-summary")); const tally = {}; - for (let i = 0; i < picked.length; i++) { - const { file, path } = picked[i]; - $("import-status").textContent = `Uploading ${file.name} (${i + 1}/${picked.length})`; - $("import-bar").style.width = `${(i / picked.length) * 100}%`; + + // Ask the server which clips are already in the archive and drop them + // up front, so they're never re-uploaded. + let queue = picked; + try { + $("import-status").textContent = "Checking for clips already imported…"; + const r = await fetch("/api/import/present", { + method: "POST", + credentials: "same-origin", + headers: { ...csrfH(), "Content-Type": "application/json" }, + body: JSON.stringify({ + files: picked.map((p) => ({ name: p.file.name, size: p.file.size })), + }), + }); + if (r.ok) { + const present = new Set((await r.json()).present || []); + queue = picked.filter((p) => !present.has(p.file.name)); + if (present.size) tally.already_present = present.size; + } + } catch (_) { /* fall back to uploading everything */ } + + for (let i = 0; i < queue.length; i++) { + const { file, path } = queue[i]; + $("import-status").textContent = `Uploading ${file.name} (${i + 1}/${queue.length})`; + $("import-bar").style.width = `${(i / queue.length) * 100}%`; let res; try { const r = await fetch("/api/import/upload", { @@ -3179,8 +3200,12 @@ window.addEventListener("hashchange", () => { return; } const m = await r.json(); + const newCount = m.recognised.length - (m.present_count || 0); + const dupNote = m.present_count + ? `${newCount} new, ${m.present_count} already in archive, ` + : `${m.recognised.length} clip(s), `; $("import-folder-manifest").textContent = - `${m.recognised.length} clip(s), ${m.skipped.length} skipped, ` + + `${dupNote}${m.skipped.length} skipped, ` + `${(m.total_bytes / 1e9).toFixed(2)} GB${m.cross_volume ? " (external — copy)" : ""}.`; $("import-folder-go").dataset.path = path || ""; show($("import-folder-go")); From 53c18c422bd38d2ab2dab5f577d1f00436c29e58 Mon Sep 17 00:00:00 2001 From: RobXYZ Date: Sun, 7 Jun 2026 21:53:09 +0100 Subject: [PATCH 02/19] perf(archive): cache the per-day GPS route aggregation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opening a day (or the day route) re-parsed every GPX sidecar for that date on each request — ~19s for 536 files on a busy day, and the day view's "Loading…" blocks on it. The result only changes when the day's GPX files change, so cache the aggregated payload keyed by a signature of those files (path + mtime + size), persisted as a JSON sidecar under $RECORDINGS/.route_cache so it survives restarts. Labels are applied on read (not cached) so they stay current as the geocode cache fills. First open still pays the parse; every later open is near-instant until the day's GPX actually changes. Co-Authored-By: Claude Opus 4.8 --- tests/test_route_cache.py | 59 ++++++++++++++++++++++++ tests/test_route_endpoint.py | 85 +++++++++++++++++++++++++++++++++++ web/routers/archive.py | 87 +++++++++++++++++++++++------------- web/services/route_cache.py | 66 +++++++++++++++++++++++++++ 4 files changed, 266 insertions(+), 31 deletions(-) create mode 100644 tests/test_route_cache.py create mode 100644 tests/test_route_endpoint.py create mode 100644 web/services/route_cache.py diff --git a/tests/test_route_cache.py b/tests/test_route_cache.py new file mode 100644 index 0000000..a0f5065 --- /dev/null +++ b/tests/test_route_cache.py @@ -0,0 +1,59 @@ +"""Tests for the per-day route aggregation cache.""" +from __future__ import annotations + +import os +from pathlib import Path + +from web.services import route_cache + + +def test_signature_is_order_independent(tmp_path: Path): + a = tmp_path / "1.gpx" + a.write_text("x") + b = tmp_path / "2.gpx" + b.write_text("yy") + assert route_cache.signature([str(a), str(b)]) == \ + route_cache.signature([str(b), str(a)]) + + +def test_signature_changes_when_a_file_changes(tmp_path: Path): + a = tmp_path / "1.gpx" + a.write_text("x") + before = route_cache.signature([str(a)]) + a.write_text("xxxxxxxx") # size changes + os.utime(a, (a.stat().st_atime, a.stat().st_mtime + 10)) # and mtime + assert route_cache.signature([str(a)]) != before + + +def test_signature_changes_when_a_file_is_added(tmp_path: Path): + a = tmp_path / "1.gpx" + a.write_text("x") + one = route_cache.signature([str(a)]) + b = tmp_path / "2.gpx" + b.write_text("y") + assert route_cache.signature([str(a), str(b)]) != one + + +def test_signature_ignores_missing_files(tmp_path: Path): + a = tmp_path / "1.gpx" + a.write_text("x") + missing = str(tmp_path / "nope.gpx") + assert route_cache.signature([str(a), missing]) == \ + route_cache.signature([str(a)]) + + +def test_store_then_load_roundtrips(tmp_path: Path): + rec = str(tmp_path) + payload = {"date": "2026-06-02", "point_count": 3, "journeys": []} + route_cache.store(rec, "2026-06-02", "sig1", payload) + assert route_cache.load(rec, "2026-06-02", "sig1") == payload + + +def test_load_returns_none_on_signature_mismatch(tmp_path: Path): + rec = str(tmp_path) + route_cache.store(rec, "2026-06-02", "sig1", {"point_count": 1}) + assert route_cache.load(rec, "2026-06-02", "sig2") is None + + +def test_load_returns_none_when_absent(tmp_path: Path): + assert route_cache.load(str(tmp_path), "2026-06-02", "sig") is None diff --git a/tests/test_route_endpoint.py b/tests/test_route_endpoint.py new file mode 100644 index 0000000..97a8558 --- /dev/null +++ b/tests/test_route_endpoint.py @@ -0,0 +1,85 @@ +"""Tests for GET /api/archive/day/{date}/route aggregation caching.""" +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from fastapi.testclient import TestClient + +from web.routers import archive + + +@pytest.fixture +def authed_client(tmp_config_dir, tmp_recordings_dir, monkeypatch): + from web import app as app_mod + from web import settings as settings_mod + monkeypatch.setenv("VIOFOSYNC_RESTART_DISABLED", "1") + settings_mod.reset_for_tests() + application = app_mod.create_app() + with TestClient(application) as c: + c.post("/setup", data={ + "address": "192.168.1.230", + "password": "twelve-chars-min!", + "confirm": "twelve-chars-min!", + }) + csrf = c.get("/api/auth/csrf").json()["csrf"] + c.headers.update({"x-csrf-token": csrf}) + yield c + + +def _add_gpx_clip(app, rec: Path, clip_id: int, date: str = "2026-06-02") -> str: + day_dir = rec / date + day_dir.mkdir(parents=True, exist_ok=True) + mp4 = day_dir / f"{clip_id}.MP4" + mp4.write_bytes(b"\0") + gpx = day_dir / f"{clip_id}.MP4.gpx" + gpx.write_text("") + with app.state.db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(id, path, basename, group_name, timestamp, camera, sequence, " + " event_type, has_gpx, gps_examined, scanned_at) " + "VALUES (?,?,?,?,?,?,?,?,1,0,?)", + (clip_id, str(mp4), mp4.name, date, 1_717_312_440 + clip_id, + "F", clip_id, "normal", 1_717_312_440), + ) + return str(gpx) + + +def _counting_aggregate(calls): + def _agg(paths): + calls["n"] += 1 + return [], [], [] + return _agg + + +def test_route_aggregation_is_cached(authed_client, tmp_recordings_dir, monkeypatch): + app = authed_client.app + _add_gpx_clip(app, tmp_recordings_dir, 1) + calls = {"n": 0} + monkeypatch.setattr(archive.gps_service, "aggregate_day", + _counting_aggregate(calls)) + + r1 = authed_client.get("/api/archive/day/2026-06-02/route") + r2 = authed_client.get("/api/archive/day/2026-06-02/route") + assert r1.status_code == 200 and r2.status_code == 200 + assert r1.json() == r2.json() + assert calls["n"] == 1 # second request served from cache + + +def test_route_cache_busts_when_gpx_changes( + authed_client, tmp_recordings_dir, monkeypatch +): + app = authed_client.app + gpx = _add_gpx_clip(app, tmp_recordings_dir, 1) + calls = {"n": 0} + monkeypatch.setattr(archive.gps_service, "aggregate_day", + _counting_aggregate(calls)) + + authed_client.get("/api/archive/day/2026-06-02/route") + Path(gpx).write_text("changed-and-larger") + st = Path(gpx).stat() + os.utime(gpx, (st.st_atime, st.st_mtime + 10)) + authed_client.get("/api/archive/day/2026-06-02/route") + assert calls["n"] == 2 # changed GPX -> recomputed diff --git a/web/routers/archive.py b/web/routers/archive.py index a569f73..0cfb1f2 100644 --- a/web/routers/archive.py +++ b/web/routers/archive.py @@ -23,7 +23,7 @@ from ..auth import require_csrf, require_session from ..services import gps as gps_service -from ..services import scanner, thumbs +from ..services import route_cache, scanner, thumbs log = logging.getLogger("viofosync.archive") @@ -223,33 +223,10 @@ def _in_range(ts: int) -> bool: return {"date": date, "clips": clips} -@router.get("/day/{date}/route") -def get_route(request: Request, date: str) -> dict: - """Merged GPS track for the day plus detected journeys.""" - try: - _dt.date.fromisoformat(date) - except ValueError: - raise HTTPException(400, "bad date format") - - with _db(request).conn() as c: - rows = c.execute( - """ - SELECT path FROM clip_index - WHERE group_name = ? AND has_gpx = 1 - ORDER BY timestamp ASC - """, - (date,), - ).fetchall() - - gpx_paths = [r["path"] + ".gpx" for r in rows] - points, stops, journeys = gps_service.aggregate_day(gpx_paths) - - # Synchronous cache lookup — no network. The UI fetches any - # uncached labels lazily via /geocode after first paint. - geocoder = getattr(request.app.state, "geocode", None) - def _lbl(lat, lon): - return geocoder.cache_lookup(lat, lon) if geocoder else None - +def _assemble_route(date: str, points, stops, journeys) -> dict: + """Build the route payload (no labels — those are applied on read so + they stay current as the geocode cache fills). This is the expensive- + to-produce part that gets cached.""" return { "date": date, "point_count": len(points), @@ -263,8 +240,8 @@ def _lbl(lat, lon): "start_lon": j.start_lon, "end_lat": j.end_lat, "end_lon": j.end_lon, - "start_label": _lbl(j.start_lat, j.start_lon), - "end_label": _lbl(j.end_lat, j.end_lon), + "start_label": None, + "end_label": None, "distance_m": round(j.distance_m, 1), "duration_s": int( (j.end_time - j.start_time).total_seconds() @@ -291,13 +268,61 @@ def _lbl(lat, lon): "duration_s": int(s.duration_s), "lat": s.center_lat, "lon": s.center_lon, - "label": _lbl(s.center_lat, s.center_lon), + "label": None, } for s in stops ], } +def _apply_labels(payload: dict, geocoder) -> None: + """Fill journey/stop labels from the geocode cache (synchronous, no + network). Mutates ``payload`` in place. Uncached labels stay None and + are fetched lazily by the UI via /geocode after first paint.""" + def _lbl(lat, lon): + return geocoder.cache_lookup(lat, lon) if geocoder else None + for j in payload.get("journeys", []): + j["start_label"] = _lbl(j["start_lat"], j["start_lon"]) + j["end_label"] = _lbl(j["end_lat"], j["end_lon"]) + for s in payload.get("stops", []): + s["label"] = _lbl(s["lat"], s["lon"]) + + +@router.get("/day/{date}/route") +def get_route(request: Request, date: str) -> dict: + """Merged GPS track for the day plus detected journeys.""" + try: + _dt.date.fromisoformat(date) + except ValueError: + raise HTTPException(400, "bad date format") + + with _db(request).conn() as c: + rows = c.execute( + """ + SELECT path FROM clip_index + WHERE group_name = ? AND has_gpx = 1 + ORDER BY timestamp ASC + """, + (date,), + ).fetchall() + + gpx_paths = [r["path"] + ".gpx" for r in rows] + + # The GPX re-parse is the slow part (tens of seconds on a busy day) and + # only changes when the day's GPX files change, so cache it keyed by a + # signature of those files. Labels are applied after, on every request. + recordings = _settings(request).recordings + sig = route_cache.signature(gpx_paths) + payload = route_cache.load(recordings, date, sig) + if payload is None: + points, stops, journeys = gps_service.aggregate_day(gpx_paths) + payload = _assemble_route(date, points, stops, journeys) + route_cache.store(recordings, date, sig, payload) + + _apply_labels(payload, getattr(request.app.state, "geocode", None)) + return payload + + @router.get("/geocode") async def geocode( request: Request, diff --git a/web/services/route_cache.py b/web/services/route_cache.py new file mode 100644 index 0000000..06b8a1f --- /dev/null +++ b/web/services/route_cache.py @@ -0,0 +1,66 @@ +"""Per-day cache of the aggregated GPS route payload. + +Building a day's route re-parses every GPX sidecar for that day, which is +slow on a large archive (tens of seconds for hundreds of clips) and runs +on every day-view / route request. The result only changes when the day's +GPX files change, so cache it keyed by a signature of those files +(path + mtime + size) and rebuild only on a mismatch. + +Persisted as a JSON sidecar under ``$RECORDINGS/.route_cache/.json`` +so it survives restarts — mirroring the ``.thumbs`` / ``.filmstrips`` +caches. Labels are NOT cached here: they come from the geocode cache and +are applied fresh by the caller, so they stay current as that cache fills. +""" +from __future__ import annotations + +import hashlib +import json +import os +from typing import Any, Iterable, Optional + + +def _cache_dir(recordings: str) -> str: + d = os.path.join(recordings, ".route_cache") + os.makedirs(d, exist_ok=True) + return d + + +def _cache_path(recordings: str, date: str) -> str: + return os.path.join(_cache_dir(recordings), f"{date}.json") + + +def signature(gpx_paths: Iterable[str]) -> str: + """Stable fingerprint of the GPX file set for a day. Order-independent; + changes when any file's mtime/size changes or files are added/removed. + Missing files contribute nothing (they can't affect the aggregation).""" + parts = [] + for p in sorted(gpx_paths): + try: + st = os.stat(p) + except OSError: + continue + parts.append(f"{p}:{st.st_mtime_ns}:{st.st_size}") + return hashlib.sha256("\n".join(parts).encode()).hexdigest() + + +def load(recordings: str, date: str, sig: str) -> Optional[dict]: + """Return the cached payload for ``date`` iff it was built from the + same GPX file set (``sig``); otherwise None.""" + try: + with open(_cache_path(recordings, date)) as f: + blob = json.load(f) + except (OSError, ValueError): + return None + if blob.get("signature") != sig: + return None + return blob.get("payload") + + +def store(recordings: str, date: str, sig: str, payload: Any) -> None: + """Persist ``payload`` for ``date`` under signature ``sig``. Best-effort: + a write failure just means the next request recomputes.""" + try: + with open(_cache_path(recordings, date), "w") as f: + json.dump({"signature": sig, "payload": payload}, f) + except OSError: + pass From fd872dafa3a06b8b55403d2d893fa31c277a9681 Mon Sep 17 00:00:00 2001 From: RobXYZ Date: Sun, 7 Jun 2026 22:09:49 +0100 Subject: [PATCH 03/19] fix(sync): probe recordings writability with real write, not os.access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit os.access(path, W_OK) is unreliable on NFS — it checks cached owner/mode against the local UID and can report a genuinely writable export as non-writable, while the real write is accepted by the server's own permission mapping. This caused a false "recordings path not writable" error for users on NFS mounts where manual writes succeed. Co-Authored-By: Claude Opus 4.8 --- tests/test_sync_worker_error_signals.py | 21 +++++++++++++++++++++ web/services/sync_worker.py | 25 ++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/tests/test_sync_worker_error_signals.py b/tests/test_sync_worker_error_signals.py index 17ce50c..57f39f2 100644 --- a/tests/test_sync_worker_error_signals.py +++ b/tests/test_sync_worker_error_signals.py @@ -48,6 +48,27 @@ async def test_check_recordings_writable_clears_previous_error(tmp_path): assert {"type": "sync_error", "kind": None, "message": None} in hub.events +async def test_check_recordings_writable_uses_real_probe_not_os_access( + tmp_path, monkeypatch +): + """Regression: os.access(W_OK) is unreliable on NFS — it checks cached + owner/mode against the local UID and can report a genuinely writable + export as non-writable. The check must probe with a real write instead.""" + snap = types.SimpleNamespace(recordings=str(tmp_path)) + hub = _RecordingHub() + sw = _make_worker(snap, hub) + # Simulate NFS: os.access lies and says "not writable" even though + # an actual create-and-delete in the directory succeeds. + monkeypatch.setattr( + "web.services.sync_worker.os.access", lambda *a, **k: False + ) + ok = await sw._check_recordings_writable() + assert ok is True + assert hub.events == [] + # The probe must leave nothing behind. + assert list(tmp_path.iterdir()) == [] + + async def test_check_recordings_writable_does_not_emit_when_already_clear(tmp_path): snap = types.SimpleNamespace(recordings=str(tmp_path)) hub = _RecordingHub() diff --git a/web/services/sync_worker.py b/web/services/sync_worker.py index e4ec6c4..c925d1d 100644 --- a/web/services/sync_worker.py +++ b/web/services/sync_worker.py @@ -51,6 +51,29 @@ RETENTION_INTERVAL_SECONDS = 300 # 5 minutes +def _path_is_writable(path: str) -> bool: + """Return True if *path* is a directory we can actually write to. + + We probe with a real create-and-delete rather than ``os.access(W_OK)``: + on NFS (and other networked filesystems) ``os.access`` checks the cached + owner/mode against the local UID and can report a perfectly writable + export as non-writable, while the real write is accepted by the server's + own permission mapping. A live probe is the only reliable test.""" + if not (path and os.path.isdir(path)): + return False + probe = os.path.join(path, f".viofosync-writetest-{os.getpid()}") + try: + with open(probe, "w"): + pass + except OSError: + return False + try: + os.unlink(probe) + except OSError: + pass + return True + + def _filter_ro_only(listing): """Yield only Recordings whose dashcam source path lies under /RO/. Used when the user has 'Sync read-only files only' on.""" @@ -471,7 +494,7 @@ async def _check_recordings_writable(self) -> bool: Emits a sticky sync_error on failure, clears one on recovery.""" snap = self._provider.get() path = getattr(snap, "recordings", None) or "" - ok = bool(path) and os.path.isdir(path) and os.access(path, os.W_OK) + ok = _path_is_writable(path) if not ok: await self._set_sync_error( "recordings_unwritable", From 19cbf3024ffe89d37aba7a7b2291aa19c4bfe714 Mon Sep 17 00:00:00 2001 From: RobXYZ Date: Mon, 8 Jun 2026 12:39:06 +0100 Subject: [PATCH 04/19] fix(mqtt): treat TaskGroup disconnects as reconnecting, not fatal _connect_and_loop runs its workers in an asyncio.TaskGroup, which reports a task failure as an ExceptionGroup rather than the bare error. A routine broker disconnect (aiomqtt.MqttError, e.g. "Disconnected during message iteration") therefore escaped the `except aiomqtt.MqttError` clause, fell through to `except Exception`, was logged as a fatal "unexpected error" with a full traceback, and set the connection state to ERROR instead of RECONNECTING. Unwrap the ExceptionGroup: propagate cancellation untouched (so shutdown is never swallowed), classify a group of pure MqttErrors as a normal reconnect, and only flag genuinely unexpected errors as ERROR. Co-Authored-By: Claude Opus 4.8 --- tests/test_mqtt_reconnect_classification.py | 57 +++++++++++++++++++++ web/services/mqtt.py | 25 +++++++++ 2 files changed, 82 insertions(+) create mode 100644 tests/test_mqtt_reconnect_classification.py diff --git a/tests/test_mqtt_reconnect_classification.py b/tests/test_mqtt_reconnect_classification.py new file mode 100644 index 0000000..6767087 --- /dev/null +++ b/tests/test_mqtt_reconnect_classification.py @@ -0,0 +1,57 @@ +"""Reconnect vs. fatal-error classification in MqttService._run. + +_connect_and_loop runs its workers inside an asyncio.TaskGroup, which +reports any task failure as an ExceptionGroup rather than the bare +exception. A routine broker disconnect (aiomqtt.MqttError, e.g. +"Disconnected during message iteration") therefore reaches _run wrapped +in a group. It must be classified as RECONNECTING, not logged as an +unexpected fatal ERROR. +""" +from __future__ import annotations + +import asyncio + +import aiomqtt + +from web.services.mqtt import ConnState, MqttService + + +def _make_service(raises: BaseException) -> MqttService: + svc = MqttService(db=None, provider=None, hub=None, app=None) + svc._stop = asyncio.Event() + svc._cfg = lambda: {"host": "broker", "port": 1883} + + async def _boom(_aiomqtt_mod, _cfg): + # Break out of the while loop after this single attempt so the + # backoff sleep is skipped (mirrors _run's `if _stop: break`). + svc._stop.set() + raise raises + + svc._connect_and_loop = _boom # type: ignore[assignment] + return svc + + +def test_taskgroup_disconnect_is_reconnecting(): + eg = ExceptionGroup( + "unhandled errors in a TaskGroup (1 sub-exception)", + [aiomqtt.MqttError("Disconnected during message iteration")], + ) + svc = _make_service(eg) + asyncio.run(svc._run()) + assert svc._state is ConnState.RECONNECTING + assert "Disconnected during message iteration" in (svc._detail or "") + + +def test_bare_mqtterror_is_reconnecting(): + svc = _make_service(aiomqtt.MqttError("Operation timed out")) + asyncio.run(svc._run()) + assert svc._state is ConnState.RECONNECTING + assert "Operation timed out" in (svc._detail or "") + + +def test_genuine_error_in_group_is_error(): + eg = ExceptionGroup("boom", [ValueError("something truly unexpected")]) + svc = _make_service(eg) + asyncio.run(svc._run()) + assert svc._state is ConnState.ERROR + assert "something truly unexpected" in (svc._detail or "") diff --git a/web/services/mqtt.py b/web/services/mqtt.py index 1191d22..665ac71 100644 --- a/web/services/mqtt.py +++ b/web/services/mqtt.py @@ -254,6 +254,31 @@ async def _run(self) -> None: except aiomqtt.MqttError as e: log.warning("mqtt: connection lost (%s); reconnecting", e) self._set_state(ConnState.RECONNECTING, detail=str(e)) + except BaseExceptionGroup as eg: + # _connect_and_loop runs its workers in an asyncio.TaskGroup, + # which reports a task failure as an ExceptionGroup — never + # the bare error. A routine broker disconnect therefore + # arrives here wrapped (e.g. an aiomqtt.MqttError + # "Disconnected during message iteration"); the old + # `except aiomqtt.MqttError` missed the group, so a normal + # reconnect was logged as a fatal "unexpected error" with the + # state set to ERROR instead of RECONNECTING. Cancellation is + # normally delivered bare (caught above), but honour it here + # too so shutdown is never swallowed as an error. + cancelled, eg = eg.split(asyncio.CancelledError) + if cancelled is not None: + raise asyncio.CancelledError + mqtt_errs, others = eg.split(aiomqtt.MqttError) + if others is None: + msg = "; ".join(str(e) for e in mqtt_errs.exceptions) + log.warning("mqtt: connection lost (%s); reconnecting", msg) + self._set_state(ConnState.RECONNECTING, detail=msg) + else: + log.error("mqtt: unexpected error", exc_info=others) + self._set_state( + ConnState.ERROR, + detail="; ".join(str(e) for e in others.exceptions), + ) except Exception as e: log.exception("mqtt: unexpected error") self._set_state(ConnState.ERROR, detail=str(e)) From efb4b5f38c469769eba2e8ff3157dd20802c868d Mon Sep 17 00:00:00 2001 From: RobXYZ Date: Tue, 9 Jun 2026 22:23:04 +0100 Subject: [PATCH 05/19] feat(timeline): video editor with QSV-accelerated exports Timeline video editor (clip filmstrips, duration probing, scrubbing) plus a move to a Debian/jellyfin-ffmpeg base image to unlock Intel QuickSync, with VAAPI and software fallbacks for join, PiP and switched exports. Co-Authored-By: Claude Opus 4.8 --- Dockerfile | 74 +- docker-compose.yml | 17 + entrypoint.sh | 6 +- scripts/bench_filmstrip.sh | 163 +++++ setuid.sh | 31 + tests/test_channel_of.py | 34 + tests/test_durations.py | 154 +++++ tests/test_export_control.py | 153 +++++ tests/test_export_hwaccel.py | 209 ++++++ tests/test_filmstrip.py | 275 ++++++++ tests/test_filmstrip_endpoints.py | 149 ++++ tests/test_mvhd_probe.py | 161 +++++ tests/test_scanner_prune.py | 82 +++ tests/test_switch_pieces.py | 47 ++ tests/test_switched_export.py | 198 ++++++ tests/test_thumbs.py | 46 ++ tests/test_timeline_endpoint.py | 196 ++++++ web/app.py | 5 + web/routers/archive.py | 244 ++++++- web/routers/exports.py | 48 +- web/services/durations.py | 239 +++++++ web/services/exporter.py | 446 +++++++++++- web/services/filmstrip.py | 234 +++++++ web/services/naming.py | 25 + web/services/retention.py | 9 +- web/services/scanner.py | 21 +- web/services/thumbs.py | 8 +- web/static/app.js | 81 ++- web/static/index.html | 43 ++ web/static/styles.css | 146 ++++ web/static/timeline.js | 1056 +++++++++++++++++++++++++++++ 31 files changed, 4516 insertions(+), 84 deletions(-) create mode 100755 scripts/bench_filmstrip.sh create mode 100644 tests/test_channel_of.py create mode 100644 tests/test_durations.py create mode 100644 tests/test_export_control.py create mode 100644 tests/test_export_hwaccel.py create mode 100644 tests/test_filmstrip.py create mode 100644 tests/test_filmstrip_endpoints.py create mode 100644 tests/test_mvhd_probe.py create mode 100644 tests/test_scanner_prune.py create mode 100644 tests/test_switch_pieces.py create mode 100644 tests/test_switched_export.py create mode 100644 tests/test_thumbs.py create mode 100644 tests/test_timeline_endpoint.py create mode 100644 web/services/durations.py create mode 100644 web/services/filmstrip.py create mode 100644 web/static/timeline.js diff --git a/Dockerfile b/Dockerfile index 1935223..fe198d6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,35 +1,53 @@ -FROM alpine:3.23 +FROM python:3.12-slim-bookworm LABEL maintainer="Rob Smith https://github.com/RobXYZ" # TARGETARCH is set automatically by `docker buildx build` (amd64, -# arm64, …). Plain `docker build` does NOT set it; in that case we -# fall back to `apk --print-arch`, which reports the actual -# container architecture (x86_64, aarch64, …) and is correct under -# both native builds and QEMU emulation. +# arm64, …). Plain `docker build` does NOT set it; fall back to dpkg. ARG TARGETARCH # System deps: -# - python3 + pip: runtime + installing web deps -# - ffmpeg: exports + thumbnails -# - bash, shadow, tzdata: entrypoint + PUID/PGID remapping -# - intel-media-driver, libva-utils (Intel x86_64 only): VA-API -# userspace + diagnostic tool. ffmpeg's h264_qsv / h264_vaapi -# need iHD_drv_video.so to talk to an Intel iGPU when the host -# maps /dev/dri into the container; without it the MFX runtime -# fails immediately with "MFX session: -9". `vainfo` from -# libva-utils is a one-liner diagnostic the operator can run via -# `docker exec` to verify the passthrough is wired up correctly. -# These packages don't exist on linux/arm64. The app's encoder -# probe (web/services/exporter.py) runtime-tests every candidate -# and falls back to libx264 software encode if QSV / VAAPI -# aren't available, so the missing packages on ARM degrade -# transparently. -RUN apk add --no-cache \ - bash python3 py3-pip ffmpeg shadow su-exec tzdata && \ - arch="${TARGETARCH:-$(apk --print-arch)}" && \ +# - python is in the base image; pip installs web deps (PEP 668 -> +# --break-system-packages, safe in a container). +# - jellyfin-ffmpeg7: exports + thumbnails. Unlike Debian's stock +# ffmpeg (and unlike Alpine's musl build), the Jellyfin bundle ships +# the *legacy* Intel Media SDK runtime alongside oneVPL, which is what +# the DS920+'s Gen-9.5 (Gemini Lake) iGPU needs for QuickSync. Stock +# runtimes only drive Gen 12+, failing Gen 9.5 with "MFX session: -9"; +# the bundle is exactly how Jellyfin solved QSV-in-Docker. It also +# bundles the iHD VAAPI driver, so VAAPI keeps working as a fallback. +# The binary installs to /usr/lib/jellyfin-ffmpeg/{ffmpeg,ffprobe}; +# we symlink it onto PATH so shutil.which("ffmpeg") finds it unchanged. +# - gosu: privilege drop in entrypoint.sh (Debian's su-exec equivalent; +# same initgroups() semantics so the GPU render-group logic in +# setuid.sh keeps working). +# - vainfo (amd64): a one-line passthrough diagnostic. (On Debian the +# binary ships in the `vainfo` package, not Alpine's `libva-utils`.) +# On arm64 jellyfin-ffmpeg installs too but QSV simply won't probe-pass; +# exports degrade to software/VAAPI exactly as before. The app's encoder +# probe (web/services/exporter.py) runtime-tests every candidate and +# falls back to libx264, so a host without a working iGPU degrades +# transparently. +RUN set -eux; \ + arch="${TARGETARCH:-$(dpkg --print-architecture)}"; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + bash ca-certificates gnupg gosu tzdata; \ + install -d /etc/apt/keyrings; \ + gpg_url="https://repo.jellyfin.org/jellyfin_team.gpg.key"; \ + apt-get install -y --no-install-recommends curl; \ + curl -fsSL "$gpg_url" | gpg --dearmor -o /etc/apt/keyrings/jellyfin.gpg; \ + echo "deb [signed-by=/etc/apt/keyrings/jellyfin.gpg] https://repo.jellyfin.org/debian bookworm main" \ + > /etc/apt/sources.list.d/jellyfin.list; \ + apt-get update; \ + apt-get install -y --no-install-recommends jellyfin-ffmpeg7; \ case "$arch" in \ - amd64|x86_64) apk add --no-cache intel-media-driver libva-utils ;; \ - esac && \ + amd64) apt-get install -y --no-install-recommends vainfo ;; \ + esac; \ + ln -sf /usr/lib/jellyfin-ffmpeg/ffmpeg /usr/local/bin/ffmpeg; \ + ln -sf /usr/lib/jellyfin-ffmpeg/ffprobe /usr/local/bin/ffprobe; \ + apt-get purge -y curl gnupg; \ + apt-get autoremove -y; \ + rm -rf /var/lib/apt/lists/*; \ useradd -UMr dashcam COPY LICENSE / @@ -40,9 +58,9 @@ ENV PUID="" \ PGID="" \ RECORDINGS="/recordings" -# Install Python deps into the system site-packages. Alpine's -# pip refuses by default (PEP 668); --break-system-packages is -# safe inside a container. +# Install Python deps into the system site-packages. pip refuses +# by default (PEP 668 on Debian Bookworm+); --break-system-packages +# is safe inside a container. COPY requirements.txt /requirements.txt RUN pip install --no-cache-dir --break-system-packages \ -r /requirements.txt diff --git a/docker-compose.yml b/docker-compose.yml index fb4795b..f7c7202 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,23 @@ services: ports: - "8080:8080" + # Optional: pass through the GPU for hardware-accelerated decode/encode + # (exports). On an Intel iGPU this now uses QuickSync (QSV) first and + # falls back to VA-API, then software. Uncomment on a NAS with an Intel + # iGPU. Verify it engaged with: docker exec vainfo and check the + # startup log line "export encoder available: … qsv …". + # devices: + # - /dev/dri:/dev/dri + # + # The entrypoint auto-detects the render node's group and adds the app + # user to it, so this is usually all you need. Some hosts (notably + # Synology DSM) still require the render group to be granted explicitly — + # if hardware accel doesn't engage, find the GID with + # docker exec sh -c 'stat -c %g /dev/dri/renderD128' + # and add it here (937 is common on Synology): + # group_add: + # - "937" # GID owning /dev/dri/renderD128 — the iGPU render node + volumes: # Config directory. A template viofosync.env is seeded here on first # run — edit it (ADDRESS, WEB_PASSWORD, etc.) and restart to apply. diff --git a/entrypoint.sh b/entrypoint.sh index 942e197..0bf7cf5 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,4 +4,8 @@ set -e mkdir -p /config /recordings /setuid.sh -exec su-exec dashcam:dashcam python3 -m web.launcher +# `gosu dashcam` (user form, NOT `dashcam:dashcam`): the user form calls +# initgroups() so the supplementary groups from /etc/group — including the GPU +# render group added in setuid.sh — are applied. The explicit `user:group` form +# would replace them with just that one group, dropping render-node access. +exec gosu dashcam python3 -m web.launcher diff --git a/scripts/bench_filmstrip.sh b/scripts/bench_filmstrip.sh new file mode 100755 index 0000000..6671ec6 --- /dev/null +++ b/scripts/bench_filmstrip.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# +# bench_filmstrip.sh — measure filmstrip-sprite decode cost on THIS host. +# +# Why this exists: viofosync runs on many NAS models (Synology Celeron/Atom, +# Ryzen embedded, ARM, …). Whether hardware decode (-hwaccel) beats software +# for filmstrip generation depends entirely on the box's CPU and iGPU, so the +# only trustworthy data comes from running the real command on the real +# hardware with a real 4K dashcam clip. Apple-Silicon dev-machine numbers do +# not transfer. +# +# It runs the EXACT command web/services/filmstrip.py issues, in several decode +# configurations, and reports for each: +# real wall-clock seconds (lower = faster for one clip) +# cpu user+sys seconds (lower = more decode offloaded off the CPU — +# THIS is the number that matters on a weak NAS +# that spikes under concurrent filmstrip jobs) +# dims sprite WxH, to confirm the output is correct (not garbage) +# +# Usage: +# ./bench_filmstrip.sh /path/to/4k_clip.MP4 [output_dir] +# +# ffmpeg/ffprobe are taken from PATH; override with env vars if needed, e.g. +# on Synology where ffmpeg ships inside a package: +# FFMPEG=/var/packages/ffmpeg/target/bin/ffmpeg \ +# FFPROBE=/var/packages/ffmpeg/target/bin/ffprobe \ +# ./bench_filmstrip.sh /volume1/dashcam/2024_0101_120000_0001F.MP4 +# +set -u +export LC_ALL=C # force '.' decimal separator for time/ffprobe parsing + +# --- production constants (keep in sync with web/services/filmstrip.py) --- +INTERVAL_S=8 +TILE_W=160 +TILE_H=90 +RUNS="${RUNS:-3}" # runs per config (keep best); set RUNS=1 for a fast first pass + +FFMPEG="${FFMPEG:-$(command -v ffmpeg || true)}" +FFPROBE="${FFPROBE:-$(command -v ffprobe || true)}" + +die() { echo "error: $*" >&2; exit 1; } + +[ -n "$FFMPEG" ] || die "ffmpeg not found (set FFMPEG=/path/to/ffmpeg)" +[ -n "$FFPROBE" ] || die "ffprobe not found (set FFPROBE=/path/to/ffprobe)" + +CLIP="${1:-}" +[ -n "$CLIP" ] || die "usage: $0 /path/to/4k_clip.MP4 [output_dir]" +[ -f "$CLIP" ] || die "clip not found: $CLIP" + +OUTDIR="${2:-$(dirname "$CLIP")/.bench_filmstrip}" +mkdir -p "$OUTDIR" || die "cannot create output dir: $OUTDIR" + +# --- describe the clip and compute the tile count the real code would use --- +# Probe each field separately: with -show_entries, ffprobe emits values in the +# stream's own field order (codec_name often precedes width/height), so a +# single combined query would mislabel them. +_probe1() { + "$FFPROBE" -v error -select_streams v:0 \ + -show_entries "stream=$1" -of default=noprint_wrappers=1:nokey=1 "$CLIP" +} +WIDTH=$(_probe1 width) +HEIGHT=$(_probe1 height) +CODEC=$(_probe1 codec_name) +DURATION=$("$FFPROBE" -v error -show_entries format=duration \ + -of default=noprint_wrappers=1:nokey=1 "$CLIP") +BITRATE=$("$FFPROBE" -v error -show_entries format=bit_rate \ + -of default=noprint_wrappers=1:nokey=1 "$CLIP" 2>/dev/null) + +# tiles = max(1, ceil(duration / INTERVAL_S)) — same as filmstrip.frame_count +TILES=$(awk -v d="$DURATION" -v i="$INTERVAL_S" \ + 'BEGIN{ n=int((d+i-1)/i); if(n<1)n=1; print n }') +VF="fps=1/${INTERVAL_S},scale=${TILE_W}:${TILE_H},tile=${TILES}x1" +EXPECT_W=$(( TILES * TILE_W )) + +echo "host : $(uname -srm)" +echo "ffmpeg : $FFMPEG" +echo "clip : $CLIP" +printf "video : %sx%s %s dur=%.0fs bitrate=%sk -> %d tiles (expect %dx%d sprite)\n" \ + "$WIDTH" "$HEIGHT" "$CODEC" "$DURATION" \ + "$(awk -v b="${BITRATE:-0}" 'BEGIN{printf "%.0f", b/1000}')" \ + "$TILES" "$EXPECT_W" "$TILE_H" +echo + +# --- which hwaccels did this ffmpeg build advertise? --- +HWACCELS=$("$FFMPEG" -hide_banner -hwaccels 2>/dev/null | tail -n +2 | tr -d ' ') +echo "advertised hwaccels: $(echo "$HWACCELS" | paste -sd',' -)" +[ -e /dev/dri/renderD128 ] && echo "found /dev/dri/renderD128 (iGPU render node present)" +echo + +printf "%-26s %8s %8s %-10s %s\n" "config" "real(s)" "cpu(s)" "dims" "status" +printf "%-26s %8s %8s %-10s %s\n" "--------------------------" "-------" "------" "----------" "------" + +# run_cfg LABEL +# Times the production command RUNS times, keeps the best wall-clock, and +# verifies the sprite dimensions. +run_cfg() { + label="$1"; shift + out="$OUTDIR/${label//[^A-Za-z0-9]/_}.jpg" + errlog="$OUTDIR/${label//[^A-Za-z0-9]/_}.err" + + best_real=""; best_cpu=""; rc=1 + for _ in $(seq 1 "$RUNS"); do + rm -f "$out" + TIMEFORMAT='%R %U %S' + # ffmpeg's own stderr -> errlog, stdout -> /dev/null; the `time` builtin's + # report is the only thing left on the compound's stderr, captured here. + t=$( { time "$FFMPEG" -loglevel error -y "$@" -i "$CLIP" -an \ + -vf "$VF" -frames:v 1 "$out" 2>"$errlog" 1>/dev/null ; } 2>&1 ) + rc=$? + [ $rc -eq 0 ] || break + real=$(echo "$t" | awk '{print $1}') + cpu=$(echo "$t" | awk '{printf "%.3f", $2+$3}') + if [ -z "$best_real" ] || awk -v a="$real" -v b="$best_real" 'BEGIN{exit !(a/dev/null | cut -c1-40) + printf "%-26s %8s %8s %-10s FAIL %s\n" "$label" "-" "-" "-" "$msg" + return 1 + fi + + dims=$("$FFPROBE" -v error -select_streams v:0 \ + -show_entries stream=width,height -of csv=p=0 "$out" 2>/dev/null) + status="ok" + case "$dims" in + "${EXPECT_W},${TILE_H}") status="ok" ;; + *) status="DIMS_MISMATCH" ;; + esac + printf "%-26s %8s %8s %-10s %s\n" "$label" "$best_real" "$best_cpu" "$dims" "$status" +} + +# Baseline + software keyframe-skip (the no-GPU path the code uses). +run_cfg "software (no skip)" +run_cfg "software+skip" + +# Every advertised hwaccel that's relevant to decode, with and without the +# keyframe skip — so you can see both the offload AND the surface-transfer +# cliff (hwaccel without skip downloads every decoded frame; can be ~25x). +for hw in videotoolbox cuda qsv vaapi; do + echo "$HWACCELS" | grep -qx "$hw" || continue + + run_cfg "hw:${hw}+skip" -hwaccel "$hw" -skip_frame nokey + run_cfg "hw:${hw} (no skip)" -hwaccel "$hw" + + # vaapi/qsv often need the render node named explicitly inside a container + # (the Synology /dev/dri passthrough case). If the bare form failed and the + # node exists, try again with the device so we learn what production needs. + if { [ "$hw" = "vaapi" ] || [ "$hw" = "qsv" ]; } && [ -e /dev/dri/renderD128 ]; then + run_cfg "hw:${hw}+skip+device" -hwaccel "$hw" \ + -hwaccel_device /dev/dri/renderD128 -skip_frame nokey + fi +done + +echo +echo "Read it like this:" +echo " * 'cpu(s)' is the number to watch on a NAS — lower means decode is" +echo " offloaded and the box stays responsive under concurrent jobs." +echo " * a big gap between a hwaccel's '+skip' and 'no skip' rows confirms the" +echo " surface-transfer cliff (skip_frame nokey is essential on every host)." +echo " * any DIMS_MISMATCH / FAIL row means that decode path is not usable here." +echo " * sprites left in: $OUTDIR (delete when done)" diff --git a/setuid.sh b/setuid.sh index 01178a9..4fe00b9 100755 --- a/setuid.sh +++ b/setuid.sh @@ -7,3 +7,34 @@ fi if [[ ${PGID:-0} -gt 0 ]]; then groupmod -o -g "$PGID" dashcam fi + +# Grant the app user access to the GPU render node(s) so hardware-accelerated +# decode/encode (filmstrips, exports) works. The render node is group-owned +# with no world access and its GID varies by NAS host. We gather candidate +# GIDs from two sources so either deployment style works: +# 1. the group that owns each /dev/dri node — automatic, no compose change +# 2. groups granted to the container via compose `group_add:` (the standard +# Synology approach; visible here as this script's own supplementary +# groups since it runs as root before the gosu drop) +# and add dashcam to each in /etc/group. This is required because entrypoint.sh +# drops privileges with `gosu dashcam`, whose initgroups() reads /etc/group +# — a group_add GID that isn't mirrored there would otherwise be lost. +# Best-effort: a failure here (e.g. no passthrough) must not stop startup. +gpu_gids="" +for dev in /dev/dri/renderD* /dev/dri/card*; do + [[ -e "$dev" ]] || continue + g=$(stat -c '%g' "$dev" 2>/dev/null) && gpu_gids="$gpu_gids $g" +done +gpu_gids="$gpu_gids $(id -G 2>/dev/null)" # group_add GIDs + +for gid in $gpu_gids; do + [[ "$gid" == 0 || "$gid" == "${PGID:-0}" ]] && continue # skip root / own primary + grp=$(awk -F: -v g="$gid" '$3 == g { print $1 }' /etc/group) + if [[ -z "$grp" ]]; then + grp="gpu_$gid" + groupadd -o -g "$gid" "$grp" 2>/dev/null || true + fi + usermod -aG "$grp" dashcam 2>/dev/null || true +done + +exit 0 diff --git a/tests/test_channel_of.py b/tests/test_channel_of.py new file mode 100644 index 0000000..3b21287 --- /dev/null +++ b/tests/test_channel_of.py @@ -0,0 +1,34 @@ +"""Tests for the camera -> timeline-channel mapping.""" +from __future__ import annotations + +from web.services import naming + + +def test_channel_of_front_variants(): + assert naming.channel_of("F") == "front" + assert naming.channel_of("PF") == "front" # parking front + assert naming.channel_of("EF") == "front" # event front + assert naming.channel_of("f") == "front" # case-insensitive + + +def test_channel_of_rear_variants(): + assert naming.channel_of("R") == "rear" + assert naming.channel_of("PR") == "rear" + + +def test_channel_of_interior(): + assert naming.channel_of("I") == "interior" + + +def test_channel_of_unknown_and_empty(): + assert naming.channel_of("") == "other" + assert naming.channel_of("X") == "other" + assert naming.channel_of(None) == "other" + + +def test_channel_order_and_labels(): + assert naming.CHANNEL_ORDER == ["front", "rear", "interior", "other"] + assert naming.CHANNEL_LABELS["front"] == "Front" + assert naming.CHANNEL_LABELS["rear"] == "Rear" + assert naming.CHANNEL_LABELS["interior"] == "Interior" + assert naming.CHANNEL_LABELS["other"] == "Other" diff --git a/tests/test_durations.py b/tests/test_durations.py new file mode 100644 index 0000000..75714df --- /dev/null +++ b/tests/test_durations.py @@ -0,0 +1,154 @@ +"""Tests for the clip duration ffprobe sweep.""" +from __future__ import annotations + +import asyncio +from pathlib import Path + +import pytest + +from web.db import Database +from web.services import durations + + +def _insert_clip(db, clip_id, path, duration_s=None): + with db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(id, path, basename, group_name, timestamp, camera, " + " sequence, event_type, has_gpx, gps_examined, scanned_at, duration_s) " + "VALUES (?,?,?,?,?,?,?,?,0,0,?,?)", + (clip_id, path, f"{clip_id}.MP4", "2026-06-02", + 1_717_312_440, "F", clip_id, "normal", 1_717_312_440, duration_s), + ) + + +async def test_probe_duration_parses_ffprobe(monkeypatch): + class _P: + async def communicate(self): + return (b"60.05\n", b"") + async def fake_exec(*a, **k): + return _P() + monkeypatch.setattr(durations.shutil, "which", lambda _n: "/usr/bin/ffprobe") + monkeypatch.setattr(durations.asyncio, "create_subprocess_exec", fake_exec) + assert await durations.probe_duration("/x.mp4") == pytest.approx(60.05) + + +async def test_probe_duration_none_without_ffprobe(monkeypatch): + monkeypatch.setattr(durations.shutil, "which", lambda _n: None) + assert await durations.probe_duration("/x.mp4") is None + + +async def test_sweep_updates_null_durations(tmp_path: Path, monkeypatch): + db = Database(str(tmp_path / "t.db")) + f1 = tmp_path / "clip1.mp4" + f1.write_bytes(b"\0") + f2 = tmp_path / "clip2.mp4" + f2.write_bytes(b"\0") + _insert_clip(db, 1, str(f1), duration_s=None) # needs probe + _insert_clip(db, 2, str(f2), duration_s=60.0) # already has one -> skipped + + async def fake_probe(path): + return 42.0, "mvhd" + monkeypatch.setattr(durations, "_probe_with_method", fake_probe) + + updated = await durations.sweep_missing_durations(db) + assert updated == 1 + with db.conn() as c: + d1 = c.execute("SELECT duration_s FROM clip_index WHERE id=1").fetchone()["duration_s"] + d2 = c.execute("SELECT duration_s FROM clip_index WHERE id=2").fetchone()["duration_s"] + assert d1 == pytest.approx(42.0) + assert d2 == pytest.approx(60.0) # untouched + + +async def test_sweep_skips_missing_files(tmp_path: Path, monkeypatch): + db = Database(str(tmp_path / "t.db")) + _insert_clip(db, 1, str(tmp_path / "gone.mp4"), duration_s=None) # file absent + async def fake_probe(path): + raise AssertionError("should not probe a missing file") + monkeypatch.setattr(durations, "_probe_with_method", fake_probe) + assert await durations.sweep_missing_durations(db) == 0 + + +async def test_sweep_persists_incrementally_when_interrupted( + tmp_path: Path, monkeypatch +): + """A sweep cancelled partway (e.g. server shutdown) must have already + persisted the clips it probed before the interruption — otherwise a + restart loses all progress and the sweep can never finish.""" + db = Database(str(tmp_path / "t.db")) + f1 = tmp_path / "clip1.mp4" + f1.write_bytes(b"\0") + f2 = tmp_path / "clip2.mp4" + f2.write_bytes(b"\0") + _insert_clip(db, 1, str(f1), duration_s=None) + _insert_clip(db, 2, str(f2), duration_s=None) + + async def fake_probe(path): + if path == str(f1): + return 42.0, "mvhd" + raise asyncio.CancelledError # shutdown hits while probing clip 2 + + monkeypatch.setattr(durations, "_probe_with_method", fake_probe) + + # concurrency=1 -> clip 1 is fully probed (and must be flushed) before + # clip 2 runs; batch_size=1 -> each result is persisted as it lands. + with pytest.raises(asyncio.CancelledError): + await durations.sweep_missing_durations(db, concurrency=1, batch_size=1) + + with db.conn() as c: + d1 = c.execute( + "SELECT duration_s FROM clip_index WHERE id=1" + ).fetchone()["duration_s"] + assert d1 == pytest.approx(42.0) # survived the interruption + + +async def test_sweep_logs_method_breakdown(tmp_path: Path, monkeypatch, caplog): + """The sweep reports how many clips it resolved via the fast mvhd path + vs the ffprobe fallback, so the fast path is visible in the Logs tab.""" + import logging + + db = Database(str(tmp_path / "t.db")) + for i in (1, 2, 3): + f = tmp_path / f"clip{i}.mp4" + f.write_bytes(b"\0") + _insert_clip(db, i, str(f), duration_s=None) + + async def fake(path): + # clip3 needs the ffprobe fallback; the rest resolve via mvhd + return (30.0, "ffprobe") if path.endswith("clip3.mp4") else (15.0, "mvhd") + monkeypatch.setattr(durations, "_probe_with_method", fake) + + with caplog.at_level(logging.INFO, logger="viofosync.durations"): + updated = await durations.sweep_missing_durations(db) + + assert updated == 3 + msgs = " ".join(r.getMessage() for r in caplog.records) + assert "2 via mvhd" in msgs + assert "1 via ffprobe" in msgs + + +async def test_sweep_writes_all_with_small_batches(tmp_path: Path, monkeypatch): + """Batched flushing must not drop rows: every probed clip is written + even when the batch size is smaller than the number of clips.""" + db = Database(str(tmp_path / "t.db")) + paths = [] + for i in range(1, 6): + f = tmp_path / f"clip{i}.mp4" + f.write_bytes(b"\0") + paths.append(str(f)) + _insert_clip(db, i, str(f), duration_s=None) + + async def fake_probe(path): + return 10.0, "mvhd" + + monkeypatch.setattr(durations, "_probe_with_method", fake_probe) + updated = await durations.sweep_missing_durations(db, batch_size=2) + assert updated == 5 + with db.conn() as c: + vals = [ + r["duration_s"] + for r in c.execute( + "SELECT duration_s FROM clip_index ORDER BY id" + ).fetchall() + ] + assert vals == [pytest.approx(10.0)] * 5 diff --git a/tests/test_export_control.py b/tests/test_export_control.py new file mode 100644 index 0000000..5d97639 --- /dev/null +++ b/tests/test_export_control.py @@ -0,0 +1,153 @@ +"""Pause / resume an in-progress export, and kill-on-delete. + +The single export worker tracks the ffmpeg child of the running job so the +HTTP layer can pause it (SIGSTOP), resume it (SIGCONT), or kill it when the +job is deleted mid-render. A killed job unwinds via _ExportCancelled without +being marked failed (its row is being deleted). +""" +from __future__ import annotations + +import signal +from unittest.mock import MagicMock + +import pytest + +from web.db import Database +from web.services import exporter +from web.services.exporter import ExportWorker, reconcile_orphan_jobs + + +async def _noop(_event): # broadcast stub + pass + + +@pytest.fixture +def db(tmp_path): + return Database(str(tmp_path / "t.db")) + + +def _job(db: Database, jid: int, state: str = "running") -> None: + with db.write() as c: + c.execute( + "INSERT INTO export_jobs (id, type, clip_ids, state, created_at) " + "VALUES (?, 'switched', '{}', ?, 0)", + (jid, state), + ) + + +def _state(db: Database, jid: int) -> str: + with db.conn() as c: + return c.execute( + "SELECT state FROM export_jobs WHERE id=?", (jid,) + ).fetchone()["state"] + + +class _FakeProc: + def __init__(self): + self.signals: list = [] + self.killed = False + + def send_signal(self, sig): + self.signals.append(sig) + + def kill(self): + self.killed = True + + +def _worker(db: Database) -> ExportWorker: + return ExportWorker(db=db, provider=MagicMock(), broadcast=_noop) + + +# --- pause / resume --- + +async def test_pause_signals_stop_and_sets_state(db): + w = _worker(db); _job(db, 7, "running") + w._current_job_id = 7 + w._current_proc = _FakeProc() + assert await w.pause(7) is True + assert signal.SIGSTOP in w._current_proc.signals + assert w._paused is True + assert _state(db, 7) == "paused" + + +async def test_resume_signals_cont_and_sets_state(db): + w = _worker(db); _job(db, 7, "paused") + fake = _FakeProc() + w._current_job_id = 7 + w._current_proc = fake + w._paused = True + assert await w.resume(7) is True + assert signal.SIGCONT in fake.signals + assert w._paused is False + assert _state(db, 7) == "running" + + +async def test_pause_false_for_non_current_job(db): + w = _worker(db); _job(db, 7, "running") + w._current_job_id = 7 + w._current_proc = _FakeProc() + assert await w.pause(99) is False + assert _state(db, 7) == "running" + + +# --- cancel / kill on delete --- + +async def test_cancel_kills_current_proc(db): + w = _worker(db) + fake = _FakeProc() + w._current_job_id = 7 + w._current_proc = fake + assert await w.cancel(7) is True + assert fake.killed is True + assert w._cancel_current is True + + +async def test_cancel_false_when_job_not_running(db): + w = _worker(db) + assert await w.cancel(7) is False + + +async def test_run_ffmpeg_raises_when_cancelled(db): + w = _worker(db) + w._current_job_id = 7 + w._cancel_current = True + with pytest.raises(exporter._ExportCancelled): + await w._run_ffmpeg(7, ["-y", "out.mp4"], 1.0) + + +# --- _process: cancelled vs real failure --- + +async def test_process_discards_cancelled_job_without_failing(db, monkeypatch): + w = _worker(db); _job(db, 7, "running") + + async def cancelled(_job): + raise exporter._ExportCancelled + + monkeypatch.setattr(w, "_run_job", cancelled) + await w._process({"id": 7}) + # row is being deleted by the endpoint; must NOT be flipped to failed + assert _state(db, 7) == "running" + + +async def test_process_marks_real_error_failed(db, monkeypatch): + w = _worker(db); _job(db, 8, "running") + + async def boom(_job): + raise ValueError("nope") + + monkeypatch.setattr(w, "_run_job", boom) + await w._process({"id": 8}) + assert _state(db, 8) == "failed" + + +# --- restart reconcile includes paused --- + +def test_reconcile_marks_paused_and_running_failed(db): + _job(db, 1, "paused") + _job(db, 2, "running") + _job(db, 3, "done") + n = reconcile_orphan_jobs(db) + assert n == 2 + assert _state(db, 1) == "failed" + assert _state(db, 2) == "failed" + assert _state(db, 3) == "done" diff --git a/tests/test_export_hwaccel.py b/tests/test_export_hwaccel.py new file mode 100644 index 0000000..7b53652 --- /dev/null +++ b/tests/test_export_hwaccel.py @@ -0,0 +1,209 @@ +"""Hardware-encoder command construction for exports. + +VAAPI encoding needs the frames on the GPU: a global ``-vaapi_device`` plus +a ``format=nv12,hwupload`` tail on the filter chain. Without them ffmpeg +fails with ``Invalid argument`` the moment any filter (scale/setsar/PiP) +is in the chain — which is every export. videotoolbox/nvenc accept software +frames directly, so they get neither. +""" +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from web.db import Database +from web.routers.exports import _resolve_default_encoder +from web.services import exporter +from web.services.exporter import ExportWorker + + +# --- helpers --- + +def _state(prefs_encoder, available): + snap = SimpleNamespace(export_encoder_pref=prefs_encoder) + return SimpleNamespace( + settings_provider=SimpleNamespace(get=lambda: snap), + export_encoders=available, + ) + + +def test_auto_prefers_qsv_over_vaapi(): + st = _state("auto", {"qsv": True, "vaapi": True, "software": True}) + assert _resolve_default_encoder(st) == "qsv" + + +def test_auto_falls_back_to_vaapi_when_no_qsv(): + st = _state("auto", {"qsv": False, "vaapi": True, "software": True}) + assert _resolve_default_encoder(st) == "vaapi" + + +def test_hw_init_args_only_for_vaapi(): + assert exporter._hw_init_args("vaapi") == ["-vaapi_device", "/dev/dri/renderD128"] + assert exporter._hw_init_args("software") == [] + assert exporter._hw_init_args("videotoolbox") == [] + assert exporter._hw_init_args("nvenc") == [] + + +def test_hw_init_args_qsv_creates_device(): + assert exporter._hw_init_args("qsv") == [ + "-init_hw_device", "qsv=hw", "-filter_hw_device", "hw", + ] + + +def test_hw_init_args_vaapi_unchanged(): + assert exporter._hw_init_args("vaapi") == [ + "-vaapi_device", "/dev/dri/renderD128", + ] + + +def test_hw_decode_args_only_for_qsv(): + assert exporter._hw_decode_args("qsv") == [ + "-hwaccel", "qsv", "-hwaccel_output_format", "qsv", + ] + assert exporter._hw_decode_args("vaapi") == [] + assert exporter._hw_decode_args("software") == [] + assert exporter._hw_decode_args("videotoolbox") == [] + + +def test_hw_upload_filter_only_for_vaapi(): + assert exporter._hw_upload_filter("vaapi") == "format=nv12,hwupload" + assert exporter._hw_upload_filter("software") == "" + assert exporter._hw_upload_filter("nvenc") == "" + + +# --- switched export wiring (the reported failure) --- + +@pytest.fixture +def db(tmp_path: Path) -> Database: + return Database(str(tmp_path / "t.db")) + + +async def _noop(_event): # broadcast stub + pass + + +def _insert_clip(db: Database, path: str, ts: int, dur: float = 60.0) -> None: + with db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(path, basename, group_name, timestamp, camera, sequence, " + " event_type, has_gpx, gps_examined, scanned_at, duration_s) " + "VALUES (?, ?, '2026-01-01', ?, 'F', 1, 'normal', 0, 0, 0, ?)", + (path, path.split("/")[-1], ts, dur), + ) + + +async def _capture_switched(db, tmp_path, monkeypatch, encoder): + worker = ExportWorker(db=db, provider=MagicMock(), broadcast=_noop) + base = 1_000_000 + _insert_clip(db, str(tmp_path / "f.mp4"), base, 60.0) + + captured: list[list[str]] = [] + + async def fake_ffmpeg(job_id, args, total, **kw): + captured.append(list(args)) + Path(args[-1]).write_bytes(b"\0") + return 0, "" + + async def fake_res(_path): + return (1920, 1080) + + monkeypatch.setattr(worker, "_run_ffmpeg", fake_ffmpeg) + monkeypatch.setattr(worker, "_probe_resolution", fake_res) + + segments = [{"channel": "front", "start_ts": base + 10, "end_ts": base + 30}] + await worker._run_switched({"id": 1}, segments, encoder, str(tmp_path / "out.mp4")) + + # the per-segment encode is the call carrying the scale filter + return next(a for a in captured + if "-vf" in a and "scale" in a[a.index("-vf") + 1]) + + +async def test_switched_vaapi_adds_device_and_hwupload(db, tmp_path, monkeypatch): + seg = await _capture_switched(db, tmp_path, monkeypatch, "vaapi") + assert seg[seg.index("-vaapi_device") + 1] == "/dev/dri/renderD128" + assert seg.index("-vaapi_device") < seg.index("-i") # global, before input + assert seg[seg.index("-vf") + 1] == "scale=1920:1080,setsar=1,format=nv12,hwupload" + assert "h264_vaapi" in seg + + +async def test_switched_software_has_no_hw_args(db, tmp_path, monkeypatch): + seg = await _capture_switched(db, tmp_path, monkeypatch, "software") + assert "-vaapi_device" not in seg + assert seg[seg.index("-vf") + 1] == "scale=1920:1080,setsar=1" + assert "libx264" in seg + + +def test_video_codec_args_qsv_uses_icq(): + args = exporter.video_codec_args("qsv") + assert args == [ + "-c:v", "h264_qsv", "-global_quality", "23", "-look_ahead", "0", + ] + + +def test_video_codec_args_vaapi_unchanged(): + # Regression guard: VAAPI path must not drift. + assert exporter.video_codec_args("vaapi") == [ + "-c:v", "h264_vaapi", "-rc_mode", "CQP", "-qp", "24", + ] + + +def test_scale_filter_dialects(): + # software/vaapi keep the exact legacy string (regression guard) + assert exporter._scale_filter(1920, 1080, "software") == "scale=1920:1080,setsar=1" + assert exporter._scale_filter(1920, 1080, "vaapi") == "scale=1920:1080,setsar=1" + # qsv uses the VPP scaler and drops setsar (set by the encoder) + assert exporter._scale_filter(1920, 1080, "qsv") == "scale_qsv=w=1920:h=1080" + + +async def test_switched_qsv_uses_gpu_chain(db, tmp_path, monkeypatch): + seg = await _capture_switched(db, tmp_path, monkeypatch, "qsv") + # device init present, before input + assert seg[seg.index("-init_hw_device") + 1] == "qsv=hw" + # per-input decode flags present, before -i + assert "-hwaccel" in seg and seg[seg.index("-hwaccel") + 1] == "qsv" + assert seg.index("-hwaccel") < seg.index("-i") + # qsv scaler, NO setsar, NO hwupload + vf = seg[seg.index("-vf") + 1] + assert vf == "scale_qsv=w=1920:h=1080" + assert "setsar" not in vf and "hwupload" not in vf + assert "h264_qsv" in seg + + +def test_pip_filter_complex_software_unchanged(): + fc = exporter._pip_filter_complex("top_right", main="front") + assert fc == ( + "[1:v]scale=iw/4:ih/4[pip];" + "[0:v][pip]overlay=W-w-20:20" + ) + + +def test_pip_filter_complex_qsv_uses_vpp(): + fc = exporter._pip_filter_complex("top_right", main="front", encoder="qsv") + assert fc == ( + "[1:v]scale_qsv=w=iw/4:h=ih/4[pip];" + "[0:v][pip]overlay_qsv=x=W-w-20:y=20" + ) + + +def test_qsv_probe_command_exercises_mfx(monkeypatch): + captured = {} + + def fake_run(cmd, **kw): + captured["cmd"] = cmd + class R: # noqa: D401 - tiny stub + returncode = 0 + return R() + + monkeypatch.setattr(exporter.subprocess, "run", fake_run) + monkeypatch.setattr(exporter.shutil, "which", lambda _x: "/usr/local/bin/ffmpeg") + + assert exporter._test_encoder_sync("qsv") is True + cmd = captured["cmd"] + # device init + qsv filter + qsv encoder all present + assert "-init_hw_device" in cmd and "qsv=hw" in cmd + assert any("scale_qsv" in c for c in cmd) + assert "h264_qsv" in cmd diff --git a/tests/test_filmstrip.py b/tests/test_filmstrip.py new file mode 100644 index 0000000..03b75b7 --- /dev/null +++ b/tests/test_filmstrip.py @@ -0,0 +1,275 @@ +"""Tests for the filmstrip sprite service (ffmpeg mocked).""" +from __future__ import annotations + +import json +import logging +import os +from pathlib import Path + +from web.services import filmstrip, retention + + +def test_frame_count_basics(): + # 60s clip, one frame / 8s -> ceil(60/8) = 8 + assert filmstrip.frame_count(60.0) == 8 + # exact multiple + assert filmstrip.frame_count(16.0) == 2 + # short / zero / None always yields at least one tile + assert filmstrip.frame_count(3.0) == 1 + assert filmstrip.frame_count(0.0) == 1 + assert filmstrip.frame_count(None) == 1 + + +def test_paths_are_under_filmstrips_dir(tmp_path: Path): + rec = str(tmp_path) + sp = filmstrip.sprite_path(rec, 42) + mp = filmstrip.meta_path(rec, 42) + assert sp.endswith(os.path.join(".filmstrips", "42.jpg")) + assert mp.endswith(os.path.join(".filmstrips", "42.json")) + # accessing a path helper creates the cache dir + assert os.path.isdir(os.path.join(rec, ".filmstrips")) + + +async def test_ensure_returns_none_when_ffmpeg_missing(tmp_path: Path, monkeypatch): + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: None) + meta = await filmstrip.ensure_filmstrip( + str(tmp_path), 7, str(tmp_path / "clip.mp4"), 60.0 + ) + assert meta is None + + +async def test_ensure_cache_hit_reads_sidecar_without_ffmpeg(tmp_path: Path, monkeypatch): + rec = str(tmp_path) + # Pre-seed a cached sprite + sidecar. + Path(filmstrip.sprite_path(rec, 9)).write_bytes(b"\xff\xd8\xff\xd9") # tiny JPEG-ish + Path(filmstrip.meta_path(rec, 9)).write_text(json.dumps({ + "frames": 8, "interval_s": 8, "tile_w": 160, "tile_h": 90, "duration_s": 60.0, + })) + + # If ffmpeg were invoked this would explode — proves the cache short-circuits. + def _boom(*a, **k): + raise AssertionError("ffmpeg must not run on a cache hit") + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", _boom) + + meta = await filmstrip.ensure_filmstrip(rec, 9, str(tmp_path / "clip.mp4"), 60.0) + assert meta == filmstrip.FilmstripMeta(8, 8, 160, 90, 60.0) + + +async def test_ensure_generates_sprite_and_sidecar(tmp_path: Path, monkeypatch): + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: "/usr/bin/ffmpeg") + calls = _capture_all_exec(monkeypatch) + + meta = await filmstrip.ensure_filmstrip(rec, 5, "/rec/clip.mp4", 60.0) + + # Returned + persisted metadata (unchanged contract) + assert meta == filmstrip.FilmstripMeta(8, 8, 160, 90, 60.0) + assert os.path.exists(filmstrip.sprite_path(rec, 5)) + with open(filmstrip.meta_path(rec, 5)) as f: + assert json.load(f)["frames"] == 8 + + # One seek-extract per tile (read only near each 8s mark, not the whole + # file), then a single stitch pass into the sprite. + extracts = [c for c in calls if "-ss" in c] + tiles = [c for c in calls if any("tile=" in a for a in c)] + assert len(extracts) == 8 + assert [c[c.index("-ss") + 1] for c in extracts] == \ + ["0", "8", "16", "24", "32", "40", "48", "56"] + for c in extracts: + assert c[c.index("-i") + 1] == "/rec/clip.mp4" + assert "-frames:v" in c + assert c[c.index("-vf") + 1] == "scale=160:90" + assert "-an" in c + assert "-hwaccel" not in c # software only — hwaccel is slower here + assert len(tiles) == 1 + assert tiles[0][tiles[0].index("-vf") + 1] == "tile=8x1" + assert tiles[0][-1] == filmstrip.sprite_path(rec, 5) + + +def _capture_all_exec(monkeypatch): + """Patch create_subprocess_exec to record every call's argv and write a + stub output (each ffmpeg writes its last positional arg).""" + calls: list[list[str]] = [] + + class _Proc: + returncode = 0 + async def wait(self): + return 0 + + async def fake_exec(*args, **kwargs): + calls.append(list(args)) + Path(args[-1]).write_bytes(b"\xff\xd8\xff\xd9") + return _Proc() + + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", fake_exec) + return calls + + +async def test_ensure_short_clip_is_single_seek(tmp_path: Path, monkeypatch): + """A sub-INTERVAL clip yields one tile: a single seek at t=0.""" + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: "/usr/bin/ffmpeg") + calls = _capture_all_exec(monkeypatch) + + meta = await filmstrip.ensure_filmstrip(rec, 7, "/rec/c.mp4", 3.0) + assert meta.frames == 1 + extracts = [c for c in calls if "-ss" in c] + assert len(extracts) == 1 + assert extracts[0][extracts[0].index("-ss") + 1] == "0" + + +async def test_ensure_returns_none_on_ffmpeg_nonzero(tmp_path: Path, monkeypatch): + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: "/usr/bin/ffmpeg") + + class _FailProc: + returncode = 1 + async def wait(self): + return 1 + + async def fake_exec(*args, **kwargs): + return _FailProc() # writes nothing + + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", fake_exec) + meta = await filmstrip.ensure_filmstrip(rec, 6, "/rec/clip.mp4", 60.0) + assert meta is None + + +def test_retention_removes_filmstrip_sprite_and_sidecar(tmp_path: Path): + rec = str(tmp_path) + clip_file = tmp_path / "clip.mp4" + clip_file.write_bytes(b"\0") + + sp = filmstrip.sprite_path(rec, 11) + mp = filmstrip.meta_path(rec, 11) + Path(sp).write_bytes(b"\xff\xd8\xff\xd9") + Path(mp).write_text("{}") + + rec_row = {"id": 11, "path": str(clip_file)} + retention._delete_clip_files(rec_row, rec) + + assert not clip_file.exists() + assert not os.path.exists(sp) + assert not os.path.exists(mp) + + +class _HangProc: + """Fake ffmpeg child: kill() records, wait() counts body runs.""" + returncode = None + + def __init__(self): + self.killed = False + self.reaped = 0 + + def kill(self): + self.killed = True + + async def wait(self): + self.reaped += 1 + return 0 + + +async def _raise_timeout(coro, timeout): + # Close the inner proc.wait() coroutine so it isn't left un-awaited + # (the suite runs under filterwarnings=error), then simulate a timeout. + coro.close() + raise TimeoutError + + +async def test_ensure_reaps_child_on_timeout(tmp_path: Path, monkeypatch): + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _n: "/usr/bin/ffmpeg") + fake = _HangProc() + + async def fake_exec(*a, **k): + return fake + + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", fake_exec) + monkeypatch.setattr(filmstrip.asyncio, "wait_for", _raise_timeout) + + result = await filmstrip.ensure_filmstrip(rec, 99, "/x.mp4", 60.0) + assert result is None + assert fake.killed is True + assert fake.reaped == 1 # proc.wait() awaited after kill -> child reaped + + +# --- logging: the timeline feature must be debuggable via the Logs tab. +# The app log persists INFO+ from the ``viofosync.*` namespace, so these +# assert the filmstrip service emits on that logger so a NAS CPU spike is +# traceable to the exact clips being rendered. + + +async def test_generation_logs_start_and_done(tmp_path: Path, monkeypatch, caplog): + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: "/usr/bin/ffmpeg") + + class _FakeProc: + returncode = 0 + async def wait(self): + return 0 + + async def fake_exec(*args, **kwargs): + Path(args[-1]).write_bytes(b"\xff\xd8\xff\xd9") + return _FakeProc() + + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", fake_exec) + + with caplog.at_level(logging.INFO, logger="viofosync.filmstrip"): + await filmstrip.ensure_filmstrip(rec, 5, "/rec/clip.mp4", 60.0) + + msgs = [r.getMessage() for r in caplog.records] + assert any("generating clip=5" in m and "frames=8" in m for m in msgs) + assert any("clip=5 done" in m for m in msgs) + + +async def test_timeout_logs_warning(tmp_path: Path, monkeypatch, caplog): + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _n: "/usr/bin/ffmpeg") + fake = _HangProc() + + async def fake_exec(*a, **k): + return fake + + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", fake_exec) + monkeypatch.setattr(filmstrip.asyncio, "wait_for", _raise_timeout) + + with caplog.at_level(logging.INFO, logger="viofosync.filmstrip"): + await filmstrip.ensure_filmstrip(rec, 99, "/x.mp4", 60.0) + + warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING] + assert any("clip=99 generation failed" in m for m in warnings) + + +async def test_ffmpeg_nonzero_logs_warning(tmp_path: Path, monkeypatch, caplog): + rec = str(tmp_path) + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: "/usr/bin/ffmpeg") + + class _FailProc: + returncode = 1 + async def wait(self): + return 1 + + async def fake_exec(*args, **kwargs): + return _FailProc() + + monkeypatch.setattr(filmstrip.asyncio, "create_subprocess_exec", fake_exec) + with caplog.at_level(logging.INFO, logger="viofosync.filmstrip"): + await filmstrip.ensure_filmstrip(rec, 6, "/rec/clip.mp4", 60.0) + + warnings = [r.getMessage() for r in caplog.records if r.levelno >= logging.WARNING] + assert any("clip=6 generation failed" in m for m in warnings) + + +async def test_missing_ffmpeg_warns_once(tmp_path: Path, monkeypatch, caplog): + monkeypatch.setattr(filmstrip.shutil, "which", lambda _name: None) + monkeypatch.setattr(filmstrip, "_warned_no_ffmpeg", False) + + with caplog.at_level(logging.INFO, logger="viofosync.filmstrip"): + await filmstrip.ensure_filmstrip(str(tmp_path), 1, "/a.mp4", 60.0) + await filmstrip.ensure_filmstrip(str(tmp_path), 2, "/b.mp4", 60.0) + + no_ffmpeg = [ + r for r in caplog.records + if "ffmpeg not found" in r.getMessage() + ] + assert len(no_ffmpeg) == 1 # warned once, not once-per-clip diff --git a/tests/test_filmstrip_endpoints.py b/tests/test_filmstrip_endpoints.py new file mode 100644 index 0000000..38225d7 --- /dev/null +++ b/tests/test_filmstrip_endpoints.py @@ -0,0 +1,149 @@ +"""Tests for GET /api/archive/clip/{id}/filmstrip[.jpg].""" +from __future__ import annotations + +from pathlib import Path + +import pytest + + +class _FakeMqttService: + def __init__(self, **kwargs): pass + def start(self): pass + async def stop(self): pass + async def on_settings_changed(self, keys, snap): pass + def get_status(self): + return {"state": "idle", "detail": None, "last_published_at": None} + + +@pytest.fixture +def logged_in_client(tmp_config_dir, tmp_recordings_dir, monkeypatch): + import bcrypt + from fastapi.testclient import TestClient + + from web import settings as settings_mod + from web.app import create_app + from web.services.sync_worker import SyncWorker + + digest = bcrypt.hashpw(b"pw" * 8, bcrypt.gensalt()).decode() + settings_mod.reset_for_tests() + p = settings_mod.get_provider() + data = p._store.load() + data["WEB_PASSWORD_HASH"] = digest + p._store.write(data) + settings_mod.reset_for_tests() + + monkeypatch.setattr(SyncWorker, "start", lambda self: None) + monkeypatch.setattr("web.app.MqttService", _FakeMqttService) + + app = create_app() + c = TestClient(app) + c.__enter__() + c.post("/api/auth/login", json={"password": "pwpwpwpwpwpwpwpw"}) + yield c, Path(str(tmp_recordings_dir)) + c.__exit__(None, None, None) + settings_mod.reset_for_tests() + + +def _insert_clip(app, clip_id: int, path: str, duration_s: float) -> None: + db = app.state.db + with db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(id, path, basename, group_name, timestamp, camera, " + " sequence, event_type, has_gpx, gps_examined, scanned_at, duration_s) " + "VALUES (?,?,?,?,?,?,?,?,0,0,?,?)", + (clip_id, path, f"{clip_id}.MP4", "2026-06-02", + 1_717_312_440, "F", clip_id, "normal", 1_717_312_440, duration_s), + ) + + +def test_filmstrip_meta_returns_slicing_info(logged_in_client, monkeypatch): + client, rec = logged_in_client + clip_file = rec / "clip.mp4" + clip_file.write_bytes(b"\0") + _insert_clip(client.app, 1, str(clip_file), 60.0) + + from web.services import filmstrip + + async def fake_ensure(recordings, clip_id, video_path, duration_s): + Path(filmstrip.sprite_path(recordings, clip_id)).write_bytes(b"\xff\xd8\xff\xd9") + return filmstrip.FilmstripMeta(8, 8, 160, 90, 60.0) + + monkeypatch.setattr("web.routers.archive.filmstrip.ensure_filmstrip", fake_ensure) + + r = client.get("/api/archive/clip/1/filmstrip") + assert r.status_code == 200 + body = r.json() + assert body["frames"] == 8 + assert body["interval_s"] == 8 + assert body["tile_w"] == 160 + assert body["sprite_url"] == "/api/archive/clip/1/filmstrip.jpg" + + +def test_filmstrip_meta_204_when_ffmpeg_unavailable(logged_in_client, monkeypatch): + client, rec = logged_in_client + clip_file = rec / "clip.mp4" + clip_file.write_bytes(b"\0") + _insert_clip(client.app, 2, str(clip_file), 60.0) + + async def fake_ensure(*a, **k): + return None + + monkeypatch.setattr("web.routers.archive.filmstrip.ensure_filmstrip", fake_ensure) + + r = client.get("/api/archive/clip/2/filmstrip") + assert r.status_code == 204 + + +def test_filmstrip_jpg_served(logged_in_client, monkeypatch): + client, rec = logged_in_client + clip_file = rec / "clip.mp4" + clip_file.write_bytes(b"\0") + _insert_clip(client.app, 3, str(clip_file), 60.0) + + from web.services import filmstrip + + async def fake_ensure(recordings, clip_id, video_path, duration_s): + Path(filmstrip.sprite_path(recordings, clip_id)).write_bytes(b"\xff\xd8\xff\xd9") + return filmstrip.FilmstripMeta(8, 8, 160, 90, 60.0) + + monkeypatch.setattr("web.routers.archive.filmstrip.ensure_filmstrip", fake_ensure) + + r = client.get("/api/archive/clip/3/filmstrip.jpg") + assert r.status_code == 200 + assert r.headers["content-type"].startswith("image/jpeg") + + +def test_filmstrip_jpg_404_when_ffmpeg_unavailable(logged_in_client, monkeypatch): + client, rec = logged_in_client + clip_file = rec / "clip.mp4" + clip_file.write_bytes(b"\0") + _insert_clip(client.app, 4, str(clip_file), 60.0) + + async def fake_ensure(*a, **k): + return None + + monkeypatch.setattr("web.routers.archive.filmstrip.ensure_filmstrip", fake_ensure) + + r = client.get("/api/archive/clip/4/filmstrip.jpg") + assert r.status_code == 404 + + +def test_filmstrip_jpg_404_when_sprite_missing(logged_in_client, monkeypatch): + # Defensive guard: meta is returned but the sprite file is absent + # (e.g. retention deleted it concurrently) -> 404, not a 500. + client, rec = logged_in_client + clip_file = rec / "clip.mp4" + clip_file.write_bytes(b"\0") + _insert_clip(client.app, 5, str(clip_file), 60.0) + + from web.services import filmstrip + + async def fake_ensure(recordings, clip_id, video_path, duration_s): + # Return valid meta but deliberately DO NOT write the sprite file. + return filmstrip.FilmstripMeta(8, 8, 160, 90, 60.0) + + monkeypatch.setattr("web.routers.archive.filmstrip.ensure_filmstrip", fake_ensure) + + r = client.get("/api/archive/clip/5/filmstrip.jpg") + assert r.status_code == 404 diff --git a/tests/test_mvhd_probe.py b/tests/test_mvhd_probe.py new file mode 100644 index 0000000..8f7d2ff --- /dev/null +++ b/tests/test_mvhd_probe.py @@ -0,0 +1,161 @@ +"""Tests for the direct mvhd-box duration parser. + +Reading the MP4 ``moov/mvhd`` box gives clip duration without spawning an +ffprobe subprocess per clip — far cheaper for the duration sweep across a +multi-thousand-clip archive. The parser seeks past ``mdat`` rather than +reading it, so it's cheap even when ``moov`` sits at the end of a large +file (the usual dashcam layout). Anything it can't parse returns None so +the caller falls back to ffprobe. +""" +from __future__ import annotations + +import shutil +import struct +import subprocess +from pathlib import Path + +import pytest + +from web.services import durations + + +# --- ISO-BMFF box builders for deterministic fixtures --- + +def _box(btype: bytes, payload: bytes) -> bytes: + return struct.pack(">I", 8 + len(payload)) + btype + payload + + +def _box64(btype: bytes, payload: bytes) -> bytes: + # size==1 sentinel, then a 64-bit largesize (how big mdat is encoded) + return struct.pack(">I", 1) + btype + struct.pack(">Q", 16 + len(payload)) + payload + + +def _mvhd_v0(timescale: int, duration: int) -> bytes: + p = bytes([0, 0, 0, 0]) # version 0 + flags + p += struct.pack(">I", 0) # creation_time + p += struct.pack(">I", 0) # modification_time + p += struct.pack(">I", timescale) + p += struct.pack(">I", duration) + p += b"\x00" * 80 # trailing fields (unparsed) + return _box(b"mvhd", p) + + +def _mvhd_v1(timescale: int, duration: int) -> bytes: + p = bytes([1, 0, 0, 0]) # version 1 + flags + p += struct.pack(">Q", 0) # creation_time (64-bit) + p += struct.pack(">Q", 0) # modification_time (64-bit) + p += struct.pack(">I", timescale) + p += struct.pack(">Q", duration) # duration (64-bit) + p += b"\x00" * 80 + return _box(b"mvhd", p) + + +def _w(tmp_path: Path, name: str, data: bytes) -> str: + p = tmp_path / name + p.write_bytes(data) + return str(p) + + +def test_mvhd_v0_moov_first(tmp_path: Path) -> None: + data = _box(b"ftyp", b"isomiso2") + _box(b"moov", _mvhd_v0(1000, 5000)) + assert durations._probe_duration_mvhd(_w(tmp_path, "a.mp4", data)) == pytest.approx(5.0) + + +def test_mvhd_after_large_mdat(tmp_path: Path) -> None: + """Dashcam layout: moov AFTER a big mdat. Parser must seek past mdat + (not read it) and still find mvhd.""" + data = (_box(b"ftyp", b"isom") + + _box(b"mdat", b"\x00" * 100_000) + + _box(b"moov", _mvhd_v0(90000, 90000 * 12))) + assert durations._probe_duration_mvhd(_w(tmp_path, "b.mp4", data)) == pytest.approx(12.0) + + +def test_mvhd_v1_64bit_duration(tmp_path: Path) -> None: + data = _box(b"ftyp", b"isom") + _box(b"moov", _mvhd_v1(48000, 48000 * 7)) + assert durations._probe_duration_mvhd(_w(tmp_path, "c.mp4", data)) == pytest.approx(7.0) + + +def test_mvhd_64bit_mdat_size(tmp_path: Path) -> None: + """Large mdat encoded with the 64-bit size form must be skipped correctly.""" + data = (_box(b"ftyp", b"isom") + + _box64(b"mdat", b"\x00" * 5000) + + _box(b"moov", _mvhd_v0(1000, 3000))) + assert durations._probe_duration_mvhd(_w(tmp_path, "d.mp4", data)) == pytest.approx(3.0) + + +def test_mvhd_skips_empty_free_box(tmp_path: Path) -> None: + """ffmpeg emits a zero-payload ``free`` box (size 8) before mdat; the + scan must treat it as valid and keep going, not bail.""" + data = (_box(b"ftyp", b"isom") + + struct.pack(">I", 8) + b"free" # empty free box + + _box(b"mdat", b"\x00" * 200) + + _box(b"moov", _mvhd_v0(1000, 4000))) + assert durations._probe_duration_mvhd(_w(tmp_path, "free.mp4", data)) == pytest.approx(4.0) + + +def test_mvhd_no_moov_returns_none(tmp_path: Path) -> None: + data = _box(b"ftyp", b"isom") + _box(b"mdat", b"\x00" * 1000) + assert durations._probe_duration_mvhd(_w(tmp_path, "e.mp4", data)) is None + + +def test_mvhd_unknown_duration_returns_none(tmp_path: Path) -> None: + data = _box(b"ftyp", b"isom") + _box(b"moov", _mvhd_v0(1000, 0xFFFFFFFF)) + assert durations._probe_duration_mvhd(_w(tmp_path, "f.mp4", data)) is None + + +def test_mvhd_zero_timescale_returns_none(tmp_path: Path) -> None: + data = _box(b"ftyp", b"isom") + _box(b"moov", _mvhd_v0(0, 5000)) + assert durations._probe_duration_mvhd(_w(tmp_path, "z.mp4", data)) is None + + +def test_mvhd_missing_file_returns_none(tmp_path: Path) -> None: + assert durations._probe_duration_mvhd(str(tmp_path / "nope.mp4")) is None + + +def test_mvhd_truncated_returns_none(tmp_path: Path) -> None: + # moov header claims a size the file doesn't contain + data = _box(b"ftyp", b"isom") + struct.pack(">I", 0x1000) + b"moov\x00\x00" + assert durations._probe_duration_mvhd(_w(tmp_path, "g.mp4", data)) is None + + +def test_mvhd_matches_real_ffmpeg_clip(tmp_path: Path) -> None: + ffmpeg = shutil.which("ffmpeg") + if ffmpeg is None: + pytest.skip("ffmpeg not available") + clip = tmp_path / "real.mp4" + subprocess.run( + [ffmpeg, "-hide_banner", "-loglevel", "error", "-y", "-f", "lavfi", + "-i", "testsrc=size=320x180:duration=5:rate=30", "-c:v", "libx264", + str(clip)], + check=True, + ) + assert durations._probe_duration_mvhd(str(clip)) == pytest.approx(5.0, abs=0.3) + + +# --- orchestration: probe_duration prefers mvhd, falls back to ffprobe --- + + +async def test_probe_duration_prefers_mvhd_over_ffprobe(tmp_path: Path, monkeypatch) -> None: + data = _box(b"ftyp", b"isom") + _box(b"moov", _mvhd_v0(1000, 8000)) + path = _w(tmp_path, "h.mp4", data) + + def _boom(*a, **k): + raise AssertionError("ffprobe must not run when mvhd parses") + monkeypatch.setattr(durations.asyncio, "create_subprocess_exec", _boom) + + assert await durations.probe_duration(path) == pytest.approx(8.0) + + +async def test_probe_duration_falls_back_to_ffprobe(tmp_path: Path, monkeypatch) -> None: + # No parseable moov -> mvhd returns None -> ffprobe is used. + path = _w(tmp_path, "i.mp4", _box(b"ftyp", b"isom")) + + class _P: + async def communicate(self): + return (b"33.3\n", b"") + async def fake_exec(*a, **k): + return _P() + monkeypatch.setattr(durations.shutil, "which", lambda _n: "/usr/bin/ffprobe") + monkeypatch.setattr(durations.asyncio, "create_subprocess_exec", fake_exec) + + assert await durations.probe_duration(path) == pytest.approx(33.3) diff --git a/tests/test_scanner_prune.py b/tests/test_scanner_prune.py new file mode 100644 index 0000000..693acc5 --- /dev/null +++ b/tests/test_scanner_prune.py @@ -0,0 +1,82 @@ +"""scanner.scan must not wipe the index when recordings is unavailable. + +Root cause of "the duration sweep re-runs across all clips after it +completed yesterday": scanner.scan rebuilds the index from a glob of the +recordings directory and prunes any DB row whose file it didn't see. When +that glob returns nothing — the volume not yet mounted at container start, +or a transient NAS glitch — the prune ran an unconditional +``DELETE FROM clip_index``, wiping every row. The next scan re-inserted +them via an INSERT that omits duration_s (→ NULL) and resets gps_examined, +so the duration sweep (and GPS re-exam, thumbs) re-ran across the whole +archive. +""" +from __future__ import annotations + +import logging +from pathlib import Path + +from web.db import Database +from web.services import scanner + + +def _insert(db: Database, path: str, *, duration_s: float = 42.0, + gps_examined: int = 1) -> None: + with db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(path, basename, group_name, timestamp, camera, sequence, " + " event_type, size_bytes, has_gpx, gps_examined, duration_s, " + " scanned_at) " + "VALUES (?, ?, '2026-06-03', 0, 'F', 1, 'normal', 100, 0, ?, ?, 0)", + (path, path.split("/")[-1], gps_examined, duration_s), + ) + + +def _counts(db: Database) -> tuple[int, int]: + with db.conn() as c: + row = c.execute( + "SELECT COUNT(*) AS n, " + " SUM(CASE WHEN duration_s > 0 THEN 1 ELSE 0 END) AS d " + "FROM clip_index" + ).fetchone() + return row["n"], (row["d"] or 0) + + +def test_empty_scan_does_not_wipe_index(tmp_path: Path, caplog) -> None: + """A scan that finds zero clips (recordings unavailable) must keep the + existing rows and their durations, not delete the whole index.""" + db = Database(str(tmp_path / "t.db")) + _insert(db, "/recordings/2026-06-03/2026_0603_082421_0001F.MP4") + _insert(db, "/recordings/2026-06-03/2026_0603_082421_0001R.MP4") + assert _counts(db) == (2, 2) + + empty = tmp_path / "recordings" + empty.mkdir() # exists but contains no clips -> glob yields nothing + + with caplog.at_level(logging.WARNING, logger="viofosync.scanner"): + scanner.scan(db, str(empty), "daily") + + assert _counts(db) == (2, 2) # index intact, durations preserved + assert any("skip" in r.getMessage().lower() or "0 clip" in r.getMessage() + for r in caplog.records), "expected a warning about the empty scan" + + +def test_scan_still_prunes_genuinely_vanished_file(tmp_path: Path) -> None: + """The empty-scan guard must not disable legitimate pruning: when the + scan DOES find clips, a row whose file is gone is still removed.""" + db = Database(str(tmp_path / "t.db")) + day = tmp_path / "recordings" / "2026-06-03" + day.mkdir(parents=True) + present = day / "2026_0603_082421_0001F.MP4" + present.write_bytes(b"\x00" * 16) + + _insert(db, str(present)) # on disk + _insert(db, "/recordings/2026-06-03/2026_0603_090000_0002F.MP4") # gone + + scanner.scan(db, str(tmp_path / "recordings"), "daily") + + with db.conn() as c: + paths = [r["path"] for r in + c.execute("SELECT path FROM clip_index ORDER BY path")] + assert str(present) in paths # the real file kept + assert "/recordings/2026-06-03/2026_0603_090000_0002F.MP4" not in paths diff --git a/tests/test_switch_pieces.py b/tests/test_switch_pieces.py new file mode 100644 index 0000000..24b7653 --- /dev/null +++ b/tests/test_switch_pieces.py @@ -0,0 +1,47 @@ +"""Tests for the pure switched-export piece builder.""" +from __future__ import annotations + +from web.services.exporter import build_switch_pieces + + +def _clips(): + return [ + {"path": "/f0.mp4", "channel": "front", "start_ts": 1000, "duration_s": 60}, + {"path": "/f1.mp4", "channel": "front", "start_ts": 1060, "duration_s": 60}, + {"path": "/r0.mp4", "channel": "rear", "start_ts": 1000, "duration_s": 60}, + ] + + +def test_single_segment_within_one_clip(): + segs = [{"channel": "rear", "start_ts": 1010, "end_ts": 1040}] + pieces = build_switch_pieces(segs, _clips()) + assert pieces == [{"path": "/r0.mp4", "ss": 10.0, "t": 30.0}] + + +def test_segment_spans_two_clips(): + segs = [{"channel": "front", "start_ts": 1030, "end_ts": 1090}] + pieces = build_switch_pieces(segs, _clips()) + assert pieces == [ + {"path": "/f0.mp4", "ss": 30.0, "t": 30.0}, + {"path": "/f1.mp4", "ss": 0.0, "t": 30.0}, + ] + + +def test_switch_between_cameras_in_order(): + segs = [ + {"channel": "rear", "start_ts": 1000, "end_ts": 1020}, + {"channel": "front", "start_ts": 1020, "end_ts": 1050}, + ] + pieces = build_switch_pieces(segs, _clips()) + assert pieces == [ + {"path": "/r0.mp4", "ss": 0.0, "t": 20.0}, + {"path": "/f0.mp4", "ss": 20.0, "t": 30.0}, + ] + + +def test_zero_width_and_missing_channel_skipped(): + segs = [ + {"channel": "front", "start_ts": 1000, "end_ts": 1000.02}, + {"channel": "interior", "start_ts": 1000, "end_ts": 1030}, + ] + assert build_switch_pieces(segs, _clips()) == [] diff --git a/tests/test_switched_export.py b/tests/test_switched_export.py new file mode 100644 index 0000000..3f907eb --- /dev/null +++ b/tests/test_switched_export.py @@ -0,0 +1,198 @@ +"""Tests for the switched export job (enqueue + render, ffmpeg mocked).""" +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from web.db import Database +from web.services.exporter import ExportWorker + + +@pytest.fixture +def db(tmp_path: Path) -> Database: + return Database(str(tmp_path / "t.db")) + + +async def _noop(_e): # broadcast stub + pass + + +def _worker(db): + return ExportWorker(db=db, provider=MagicMock(), broadcast=_noop) + + +def test_enqueue_switched_stores_plan_and_range(db, monkeypatch): + monkeypatch.setattr("web.services.exporter.ffmpeg_available", lambda: True) + segs = [ + {"channel": "rear", "start_ts": 1000.0, "end_ts": 1020.0}, + {"channel": "front", "start_ts": 1020.0, "end_ts": 1050.0}, + ] + job_id = _worker(db).enqueue_switched(segs, encoder="software") + with db.conn() as c: + row = c.execute( + "SELECT type, clip_ids, clip_start, clip_end FROM export_jobs WHERE id=?", + (job_id,), + ).fetchone() + import json + assert row["type"] == "switched" + payload = json.loads(row["clip_ids"]) + assert payload["encoder"] == "software" + assert len(payload["segments"]) == 2 + assert row["clip_start"] == 1000 + assert row["clip_end"] == 1050 + + +def test_enqueue_switched_rejects_empty(db, monkeypatch): + monkeypatch.setattr("web.services.exporter.ffmpeg_available", lambda: True) + with pytest.raises(ValueError): + _worker(db).enqueue_switched([], encoder="software") + + +def test_enqueue_switched_rejects_bad_window(db, monkeypatch): + monkeypatch.setattr("web.services.exporter.ffmpeg_available", lambda: True) + with pytest.raises(ValueError): + _worker(db).enqueue_switched( + [{"channel": "front", "start_ts": 50.0, "end_ts": 50.0}], + encoder="software", + ) + + +def _insert_clip(db, clip_id, ts, camera, dur, path): + with db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(id, path, basename, group_name, timestamp, camera, " + " sequence, event_type, has_gpx, gps_examined, scanned_at, duration_s) " + "VALUES (?,?,?,?,?,?,?,?,0,0,?,?)", + (clip_id, path, f"{clip_id}.MP4", "2026-06-02", + ts, camera, clip_id, "normal", ts, dur), + ) + + +async def test_run_switched_trims_and_concats(db, tmp_path, monkeypatch): + monkeypatch.setattr("web.services.exporter.ffmpeg_available", lambda: True) + _insert_clip(db, 1, 1000, "F", 60.0, "/rec/f0.mp4") + _insert_clip(db, 2, 1000, "R", 60.0, "/rec/r0.mp4") + snap = MagicMock() + snap.recordings = str(tmp_path) + provider = MagicMock() + provider.get.return_value = snap + worker = ExportWorker(db=db, provider=provider, broadcast=_noop) + + calls = [] + + async def fake_run_ffmpeg(job_id, args, total, **kw): + calls.append(args) + Path(args[-1]).write_bytes(b"\0") + return 0, "" + + async def fake_probe_res(path): + return (1920, 1080) + + monkeypatch.setattr(worker, "_run_ffmpeg", fake_run_ffmpeg) + monkeypatch.setattr(worker, "_probe_resolution", fake_probe_res) + finishes = [] + monkeypatch.setattr(worker, "_finish", + lambda jid, ok, err, out: finishes.append((ok, err, out))) + + segs = [ + {"channel": "rear", "start_ts": 1000, "end_ts": 1020}, + {"channel": "front", "start_ts": 1020, "end_ts": 1050}, + ] + import json as _json + job = {"id": 5, "type": "switched", + "clip_ids": _json.dumps({"segments": segs, "encoder": "software"})} + await worker._run_job(job) + + assert finishes and finishes[-1][0] is True + assert len(calls) == 3 # 2 trims + 1 concat + trim0 = calls[0] + assert "/rec/r0.mp4" in trim0 + assert trim0[trim0.index("-ss") + 1] == "0.0" + assert "scale=1920:1080,setsar=1" in trim0[trim0.index("-vf") + 1] + trim1 = calls[1] + assert "/rec/f0.mp4" in trim1 + assert trim1[trim1.index("-ss") + 1] == "20.0" + concat = calls[2] + assert "concat" in concat and "copy" in concat + + +async def test_run_switched_no_footage_fails(db, tmp_path, monkeypatch): + monkeypatch.setattr("web.services.exporter.ffmpeg_available", lambda: True) + snap = MagicMock() + snap.recordings = str(tmp_path) + provider = MagicMock() + provider.get.return_value = snap + worker = ExportWorker(db=db, provider=provider, broadcast=_noop) + finishes = [] + monkeypatch.setattr(worker, "_finish", + lambda jid, ok, err, out: finishes.append((ok, err))) + import json as _json + job = {"id": 6, "type": "switched", + "clip_ids": _json.dumps( + {"segments": [{"channel": "front", "start_ts": 1, "end_ts": 9}], + "encoder": "software"})} + await worker._run_job(job) + assert finishes[-1][0] is False + + +class _FakeMqttService: + def __init__(self, **k): pass + def start(self): pass + async def stop(self): pass + async def on_settings_changed(self, keys, snap): pass + def get_status(self): + return {"state": "idle", "detail": None, "last_published_at": None} + + +@pytest.fixture +def logged_in_client(tmp_config_dir, tmp_recordings_dir, monkeypatch): + import bcrypt + from fastapi.testclient import TestClient + + from web import settings as settings_mod + from web.app import create_app + from web.services.sync_worker import SyncWorker + + digest = bcrypt.hashpw(b"pw" * 8, bcrypt.gensalt()).decode() + settings_mod.reset_for_tests() + p = settings_mod.get_provider() + data = p._store.load() + data["WEB_PASSWORD_HASH"] = digest + p._store.write(data) + settings_mod.reset_for_tests() + monkeypatch.setattr(SyncWorker, "start", lambda self: None) + monkeypatch.setattr("web.app.MqttService", _FakeMqttService) + app = create_app() + c = TestClient(app) + c.__enter__() + c.post("/api/auth/login", json={"password": "pwpwpwpwpwpwpwpw"}) + yield c + c.__exit__(None, None, None) + settings_mod.reset_for_tests() + + +def test_post_switched_export_creates_job(logged_in_client, monkeypatch): + logged_in_client.app.state.export_encoders = {"software": True} + csrf = logged_in_client.get("/api/auth/csrf").json()["csrf"] + r = logged_in_client.post("/api/exports", json={ + "type": "switched", + "segments": [ + {"channel": "rear", "start_ts": 1000.0, "end_ts": 1020.0}, + {"channel": "front", "start_ts": 1020.0, "end_ts": 1050.0}, + ], + "encoder": "software", + }, headers={"x-csrf-token": csrf}) + assert r.status_code == 200, r.text + assert "job_id" in r.json() + + +def test_post_switched_requires_segments(logged_in_client): + logged_in_client.app.state.export_encoders = {"software": True} + csrf = logged_in_client.get("/api/auth/csrf").json()["csrf"] + r = logged_in_client.post("/api/exports", json={ + "type": "switched", "clip_ids": [], "encoder": "software"}, + headers={"x-csrf-token": csrf}) + assert r.status_code in (400, 422) diff --git a/tests/test_thumbs.py b/tests/test_thumbs.py new file mode 100644 index 0000000..4486662 --- /dev/null +++ b/tests/test_thumbs.py @@ -0,0 +1,46 @@ +"""Tests for the thumbnail service (ffmpeg mocked).""" +from __future__ import annotations + +from pathlib import Path + +from web.services import thumbs + + +class _HangProc: + """Fake ffmpeg child: kill() records, wait() counts body runs.""" + returncode = None + + def __init__(self): + self.killed = False + self.reaped = 0 + + def kill(self): + self.killed = True + + async def wait(self): + self.reaped += 1 + return 0 + + +async def _raise_timeout(coro, timeout): + # Close the inner proc.wait() coroutine so it isn't left un-awaited + # (the suite runs under filterwarnings=error), then simulate a timeout. + coro.close() + raise TimeoutError + + +async def test_ensure_thumb_reaps_child_on_timeout(tmp_path: Path, monkeypatch): + rec = str(tmp_path) + monkeypatch.setattr(thumbs.shutil, "which", lambda _n: "/usr/bin/ffmpeg") + fake = _HangProc() + + async def fake_exec(*a, **k): + return fake + + monkeypatch.setattr(thumbs.asyncio, "create_subprocess_exec", fake_exec) + monkeypatch.setattr(thumbs.asyncio, "wait_for", _raise_timeout) + + result = await thumbs.ensure_thumb(rec, 7, "/x.mp4") + assert result is None + assert fake.killed is True + assert fake.reaped == 1 # proc.wait() awaited after kill -> child reaped diff --git a/tests/test_timeline_endpoint.py b/tests/test_timeline_endpoint.py new file mode 100644 index 0000000..fa4ec8d --- /dev/null +++ b/tests/test_timeline_endpoint.py @@ -0,0 +1,196 @@ +"""Tests for build_route_payload + GET /api/archive/timeline.""" +from __future__ import annotations + +import logging +from pathlib import Path + +import pytest + +from web.db import Database +from web.routers import archive + + +def test_build_route_payload_empty_day(tmp_path: Path): + """No gpx clips for the date -> empty journeys/stops, point_count 0.""" + db = Database(str(tmp_path / "t.db")) + payload = archive.build_route_payload(db, str(tmp_path), "2026-06-02", None) + assert payload["date"] == "2026-06-02" + assert payload["point_count"] == 0 + assert payload["journeys"] == [] + assert payload["stops"] == [] + + +class _FakeMqttService: + def __init__(self, **kwargs): pass + def start(self): pass + async def stop(self): pass + async def on_settings_changed(self, keys, snap): pass + def get_status(self): + return {"state": "idle", "detail": None, "last_published_at": None} + + +@pytest.fixture +def logged_in_client(tmp_config_dir, tmp_recordings_dir, monkeypatch): + import bcrypt + from fastapi.testclient import TestClient + + from web import settings as settings_mod + from web.app import create_app + from web.services.sync_worker import SyncWorker + + digest = bcrypt.hashpw(b"pw" * 8, bcrypt.gensalt()).decode() + settings_mod.reset_for_tests() + p = settings_mod.get_provider() + data = p._store.load() + data["WEB_PASSWORD_HASH"] = digest + p._store.write(data) + settings_mod.reset_for_tests() + + monkeypatch.setattr(SyncWorker, "start", lambda self: None) + monkeypatch.setattr("web.app.MqttService", _FakeMqttService) + + app = create_app() + c = TestClient(app) + c.__enter__() + c.post("/api/auth/login", json={"password": "pwpwpwpwpwpwpwpw"}) + yield c + c.__exit__(None, None, None) + settings_mod.reset_for_tests() + + +def _insert_clip(app, clip_id, ts, camera, duration_s, date="2026-06-02"): + with app.state.db.write() as c: + c.execute( + "INSERT INTO clip_index " + "(id, path, basename, group_name, timestamp, camera, " + " sequence, event_type, has_gpx, gps_examined, scanned_at, duration_s) " + "VALUES (?,?,?,?,?,?,?,?,0,0,?,?)", + (clip_id, f"/rec/{clip_id}.MP4", f"{clip_id}.MP4", date, + ts, camera, clip_id, "normal", ts, duration_s), + ) + + +def test_timeline_bad_date_400(logged_in_client): + r = logged_in_client.get("/api/archive/timeline?date=nonsense") + assert r.status_code == 400 + + +def test_timeline_day_mode_channels_clips_bounds(logged_in_client): + app = logged_in_client.app + _insert_clip(app, 1, 1_717_312_440, "F", 60.0) + _insert_clip(app, 2, 1_717_312_440, "R", 60.0) + _insert_clip(app, 3, 1_717_312_500, "F", 60.0) + + r = logged_in_client.get("/api/archive/timeline?date=2026-06-02") + assert r.status_code == 200 + body = r.json() + assert [ch["key"] for ch in body["channels"]] == ["front", "rear"] + assert body["channels"][0]["label"] == "Front" + assert len(body["clips"]) == 3 + assert body["bounds"]["start_ts"] == 1_717_312_440 + assert body["bounds"]["end_ts"] == 1_717_312_560 + assert body["gps"] is None + + +def test_timeline_journey_mode_windows_clips(logged_in_client, monkeypatch): + app = logged_in_client.app + _insert_clip(app, 1, 1_717_312_440, "F", 60.0) + _insert_clip(app, 2, 1_717_312_500, "F", 60.0) + _insert_clip(app, 3, 1_717_313_040, "F", 60.0) + + fake_route = { + "date": "2026-06-02", + "point_count": 5, + "journeys": [{"start_ts": 1_717_312_440, "end_ts": 1_717_312_560}], + "stops": [], + } + monkeypatch.setattr( + "web.routers.archive.build_route_payload", + lambda db, recordings, date, geocoder: fake_route, + ) + + r = logged_in_client.get("/api/archive/timeline?date=2026-06-02&journey=0") + assert r.status_code == 200 + body = r.json() + ids = sorted(c["id"] for c in body["clips"]) + assert ids == [1, 2] + assert [ch["key"] for ch in body["channels"]] == ["front"] + assert body["bounds"]["start_ts"] == 1_717_312_440 + assert body["bounds"]["end_ts"] == 1_717_312_560 + assert body["gps"] is not None + + +def test_timeline_journey_out_of_range_404(logged_in_client, monkeypatch): + app = logged_in_client.app + _insert_clip(app, 1, 1_717_312_440, "F", 60.0) + monkeypatch.setattr( + "web.routers.archive.build_route_payload", + lambda db, recordings, date, geocoder: { + "date": "2026-06-02", "point_count": 0, "journeys": [], "stops": [], + }, + ) + r = logged_in_client.get("/api/archive/timeline?date=2026-06-02&journey=0") + assert r.status_code == 404 + + +def test_timeline_open_logs_clip_count(logged_in_client, caplog): + """Opening the editor logs how many clips (= filmstrip jobs) it will + drive, so a NAS CPU spike is traceable from the Logs tab.""" + app = logged_in_client.app + _insert_clip(app, 1, 1_717_312_440, "F", 60.0) + _insert_clip(app, 2, 1_717_312_440, "R", 60.0) + + with caplog.at_level(logging.INFO, logger="viofosync.archive"): + r = logged_in_client.get("/api/archive/timeline?date=2026-06-02") + assert r.status_code == 200 + + msgs = [r.getMessage() for r in caplog.records] + assert any("timeline: date=2026-06-02" in m and "2 clip(s)" in m for m in msgs) + + +# --- fallback durations: the editor needs a non-zero duration per clip to +# render blocks and resolve footage at the playhead. Until ffprobe has filled +# duration_s, derive it from the gap to the next clip on the same channel so +# the editor works immediately instead of showing empty tracks. + + +def test_timeline_fills_missing_duration_from_gap(logged_in_client): + app = logged_in_client.app + base = 1_717_312_440 + _insert_clip(app, 1, base, "F", None) + _insert_clip(app, 2, base + 45, "F", None) + body = logged_in_client.get("/api/archive/timeline?date=2026-06-02").json() + clips = sorted(body["clips"], key=lambda c: c["start_ts"]) + assert clips[0]["duration_s"] == 45 # gap to next clip + assert clips[1]["duration_s"] == archive.FALLBACK_DEFAULT_S # last -> default + + +def test_timeline_caps_fallback_for_large_gap(logged_in_client): + app = logged_in_client.app + base = 1_717_312_440 + _insert_clip(app, 1, base, "F", None) + _insert_clip(app, 2, base + 99_999, "F", None) # parking-sized gap + body = logged_in_client.get("/api/archive/timeline?date=2026-06-02").json() + clips = sorted(body["clips"], key=lambda c: c["start_ts"]) + assert clips[0]["duration_s"] == archive.FALLBACK_MAX_S # capped + + +def test_timeline_gap_is_per_channel(logged_in_client): + app = logged_in_client.app + base = 1_717_312_440 + _insert_clip(app, 1, base, "F", None) + _insert_clip(app, 2, base + 10, "R", None) # other channel, ignored + _insert_clip(app, 3, base + 60, "F", None) + body = logged_in_client.get("/api/archive/timeline?date=2026-06-02").json() + fronts = sorted( + (c for c in body["clips"] if c["channel"] == "front"), + key=lambda c: c["start_ts"], + ) + assert fronts[0]["duration_s"] == 60 # gap to next FRONT, not the rear + + +def test_timeline_keeps_real_duration(logged_in_client): + app = logged_in_client.app + _insert_clip(app, 1, 1_717_312_440, "F", 42.0) + body = logged_in_client.get("/api/archive/timeline?date=2026-06-02").json() + assert body["clips"][0]["duration_s"] == 42.0 diff --git a/web/app.py b/web/app.py index b0c933e..898c75a 100644 --- a/web/app.py +++ b/web/app.py @@ -34,6 +34,7 @@ from .routers import storage as storage_router from .routers import imports as imports_router from .routers import logs as logs_router +from .services import durations as _dur_mod from .services import retention as _ret_mod from .services import scanner from .services.exporter import ( @@ -128,6 +129,10 @@ async def _background_scan() -> None: ) except Exception as e: # pragma: no cover — non-fatal log.warning("thumb sweep failed: %s", e) + try: + await _dur_mod.sweep_missing_durations(app.state.db) + except Exception as e: # pragma: no cover — non-fatal + log.warning("duration sweep failed: %s", e) app.state.initial_scan_task = asyncio.create_task(_background_scan()) diff --git a/web/routers/archive.py b/web/routers/archive.py index 0cfb1f2..d20d048 100644 --- a/web/routers/archive.py +++ b/web/routers/archive.py @@ -16,17 +16,26 @@ import os from collections import defaultdict from dataclasses import dataclass -from typing import Optional from fastapi import APIRouter, Depends, HTTPException, Query, Request from fastapi.responses import FileResponse, JSONResponse, Response from ..auth import require_csrf, require_session +from ..services import durations, filmstrip, route_cache, scanner, thumbs from ..services import gps as gps_service -from ..services import route_cache, scanner, thumbs +from ..services.naming import CHANNEL_LABELS, CHANNEL_ORDER, channel_of log = logging.getLogger("viofosync.archive") +# Until ffprobe has populated ``clip_index.duration_s`` (a background sweep +# that can take a while on a large archive), the timeline editor would see +# zero-length clips and render nothing. Fall back to the gap until the next +# clip on the same channel — dashcam clips are contiguous — capped so a +# parking gap can't produce an absurdly long block. The last clip on a +# channel has no successor to measure, so it gets a typical-clip default. +FALLBACK_MAX_S = 300.0 +FALLBACK_DEFAULT_S = 60.0 + router = APIRouter( prefix="/api/archive", tags=["archive"], @@ -52,7 +61,7 @@ def _settings(request: Request): } -def _kind_filter_clause(driving: bool, parking: bool, ro: bool) -> Optional[str]: +def _kind_filter_clause(driving: bool, parking: bool, ro: bool) -> str | None: """Build a WHERE fragment for the three event-type filters. All on → no filter. All off → ``1 = 0`` (no rows). Otherwise @@ -71,8 +80,8 @@ def _kind_filter_clause(driving: bool, parking: bool, ro: bool) -> Optional[str] @router.get("/days") def list_days( request: Request, - date_from: Optional[str] = Query(None, alias="from"), - date_to: Optional[str] = Query(None, alias="to"), + date_from: str | None = Query(None, alias="from"), + date_to: str | None = Query(None, alias="to"), driving: bool = Query(True), parking: bool = Query(True), ro: bool = Query(True), @@ -137,8 +146,8 @@ def list_days( def get_day( request: Request, date: str, - time_from: Optional[str] = Query(None), - time_to: Optional[str] = Query(None), + time_from: str | None = Query(None), + time_to: str | None = Query(None), driving: bool = Query(True), parking: bool = Query(True), ro: bool = Query(True), @@ -223,6 +232,48 @@ def _in_range(ts: int) -> bool: return {"date": date, "clips": clips} +def build_route_payload(db, recordings, date: str, geocoder) -> dict: + """Merged GPS track for a day plus detected journeys/stops, as a + JSON-able dict. Shared by GET /day/{date}/route and GET /timeline. + + The GPX re-parse is the slow part (tens of seconds on a busy day) and + only changes when the day's GPX files change, so cache it keyed by a + signature of those files. Labels are applied after, on every request, + so they stay current as the geocode cache fills. + + ``geocoder`` is the app's geocoder (or None); only its synchronous + ``cache_lookup`` is used here — uncached labels are fetched lazily + by the UI via /geocode after first paint. + """ + with db.conn() as c: + rows = c.execute( + """ + SELECT path FROM clip_index + WHERE group_name = ? AND has_gpx = 1 + ORDER BY timestamp ASC + """, + (date,), + ).fetchall() + + gpx_paths = [r["path"] + ".gpx" for r in rows] + sig = route_cache.signature(gpx_paths) + payload = route_cache.load(recordings, date, sig) + if payload is None: + log.info( + "route: aggregating %d GPX file(s) for %s", len(gpx_paths), date + ) + points, stops, journeys = gps_service.aggregate_day(gpx_paths) + log.info( + "route: aggregated %s -> %d point(s), %d journey(s), %d stop(s)", + date, len(points), len(journeys), len(stops), + ) + payload = _assemble_route(date, points, stops, journeys) + route_cache.store(recordings, date, sig, payload) + + _apply_labels(payload, geocoder) + return payload + + def _assemble_route(date: str, points, stops, journeys) -> dict: """Build the route payload (no labels — those are applied on read so they stay current as the geocode cache fills). This is the expensive- @@ -295,32 +346,137 @@ def get_route(request: Request, date: str) -> dict: _dt.date.fromisoformat(date) except ValueError: raise HTTPException(400, "bad date format") + geocoder = getattr(request.app.state, "geocode", None) + return build_route_payload( + _db(request), _settings(request).recordings, date, geocoder + ) - with _db(request).conn() as c: + +def _effective_durations(rows) -> dict[int, float]: + """Map clip id -> a usable duration. Uses the real probed ``duration_s`` + when present; otherwise estimates from the gap to the next clip on the + same channel (capped), so the editor renders before ffprobe catches up. + ``rows`` must be ordered by timestamp ascending.""" + by_channel: dict[str, list] = {} + for r in rows: + by_channel.setdefault(channel_of(r["camera"]), []).append(r) + + eff: dict[int, float] = {} + for chrows in by_channel.values(): + for i, r in enumerate(chrows): + real = r["duration_s"] or 0.0 + if real > 0: + eff[r["id"]] = float(real) + continue + if i + 1 < len(chrows): + gap = chrows[i + 1]["timestamp"] - r["timestamp"] + eff[r["id"]] = ( + float(min(gap, FALLBACK_MAX_S)) + if gap > 0 else FALLBACK_DEFAULT_S + ) + else: + eff[r["id"]] = FALLBACK_DEFAULT_S + return eff + + +@router.get("/timeline") +def get_timeline( + request: Request, + date: str, + journey: int | None = Query(None, ge=0), + driving: bool = Query(True), + parking: bool = Query(True), + ro: bool = Query(True), +) -> dict: + """Everything the timeline editor needs for one journey (or a whole + day when ``journey`` is omitted): channels present, clips with + channel + start_ts + duration, time bounds, and the GPS route.""" + try: + _dt.date.fromisoformat(date) + except ValueError: + raise HTTPException(400, "bad date format, use YYYY-MM-DD") from None + + log.info("timeline: open date=%s journey=%s — building route", date, journey) + geocoder = getattr(request.app.state, "geocode", None) + db = _db(request) + route = build_route_payload( + db, _settings(request).recordings, date, geocoder + ) + log.info( + "timeline: route built (%d GPS point(s)) — querying clips", + route["point_count"], + ) + + start_ts: float | None = None + end_ts: float | None = None + if journey is not None: + journeys = route["journeys"] + if journey >= len(journeys): + raise HTTPException(404, "journey index out of range") + j = journeys[journey] + start_ts, end_ts = j["start_ts"], j["end_ts"] + + where = ["group_name = ?"] + params: list = [date] + kind_clause = _kind_filter_clause(driving, parking, ro) + if kind_clause is not None: + where.append(kind_clause) + + with db.conn() as c: rows = c.execute( - """ - SELECT path FROM clip_index - WHERE group_name = ? AND has_gpx = 1 + f""" + SELECT id, camera, timestamp, duration_s + FROM clip_index + WHERE {' AND '.join(where)} ORDER BY timestamp ASC """, - (date,), + params, ).fetchall() - gpx_paths = [r["path"] + ".gpx" for r in rows] + eff_dur = _effective_durations(rows) - # The GPX re-parse is the slow part (tens of seconds on a busy day) and - # only changes when the day's GPX files change, so cache it keyed by a - # signature of those files. Labels are applied after, on every request. - recordings = _settings(request).recordings - sig = route_cache.signature(gpx_paths) - payload = route_cache.load(recordings, date, sig) - if payload is None: - points, stops, journeys = gps_service.aggregate_day(gpx_paths) - payload = _assemble_route(date, points, stops, journeys) - route_cache.store(recordings, date, sig, payload) + clips = [] + present: set[str] = set() + for r in rows: + ts = r["timestamp"] + dur = eff_dur[r["id"]] + if start_ts is not None and (ts > end_ts or (ts + dur) < start_ts): + continue + ch = channel_of(r["camera"]) + present.add(ch) + clips.append({ + "id": r["id"], + "channel": ch, + "start_ts": ts, + "duration_s": dur, + }) - _apply_labels(payload, getattr(request.app.state, "geocode", None)) - return payload + channels = [ + {"key": k, "label": CHANNEL_LABELS[k]} + for k in CHANNEL_ORDER + if k in present + ] + + if start_ts is None and clips: + start_ts = min(c["start_ts"] for c in clips) + end_ts = max(c["start_ts"] + c["duration_s"] for c in clips) + + # Each clip block lazy-loads a filmstrip sprite, so this count is how + # many ffmpeg jobs the editor may kick off — the usual cause of a NAS + # CPU spike on open. + log.info( + "timeline: date=%s journey=%s -> %d clip(s) across %d channel(s)", + date, journey, len(clips), len(channels), + ) + + return { + "date": date, + "journey": journey, + "bounds": {"start_ts": start_ts, "end_ts": end_ts}, + "channels": channels, + "clips": clips, + "gps": route if route["point_count"] > 0 else None, + } @router.get("/geocode") @@ -342,7 +498,7 @@ async def geocode( def _fetch_clip(request: Request, clip_id: int) -> dict: with _db(request).conn() as c: row = c.execute( - "SELECT id, path, basename, size_bytes " + "SELECT id, path, basename, size_bytes, duration_s " "FROM clip_index WHERE id = ?", (clip_id,), ).fetchone() @@ -372,6 +528,41 @@ async def clip_thumb(request: Request, clip_id: int): return FileResponse(path, media_type="image/jpeg") +@router.get("/clip/{clip_id}/filmstrip") +async def clip_filmstrip(request: Request, clip_id: int): + """Slicing metadata for the clip's filmstrip sprite (generates it + on demand). 204 when ffmpeg is unavailable so the UI shows + placeholder tiles.""" + clip = _fetch_clip(request, clip_id) + s = _settings(request) + meta = await filmstrip.ensure_filmstrip( + s.recordings, clip_id, clip["path"], clip.get("duration_s") + ) + if meta is None: + return Response(status_code=204) + return { + "sprite_url": f"/api/archive/clip/{clip_id}/filmstrip.jpg", + "frames": meta.frames, + "interval_s": meta.interval_s, + "tile_w": meta.tile_w, + "tile_h": meta.tile_h, + "duration_s": meta.duration_s, + } + + +@router.get("/clip/{clip_id}/filmstrip.jpg") +async def clip_filmstrip_jpg(request: Request, clip_id: int): + clip = _fetch_clip(request, clip_id) + s = _settings(request) + meta = await filmstrip.ensure_filmstrip( + s.recordings, clip_id, clip["path"], clip.get("duration_s") + ) + sp = filmstrip.sprite_path(s.recordings, clip_id) + if meta is None or not os.path.exists(sp): + raise HTTPException(404, "no filmstrip") + return FileResponse(sp, media_type="image/jpeg") + + @router.get("/clip/{clip_id}/video") def clip_video(request: Request, clip_id: int): """Stream the MP4. ``FileResponse`` handles HTTP Range @@ -406,6 +597,7 @@ async def rescan(request: Request) -> JSONResponse: request.app.state.db, s.recordings, ) ) + asyncio.create_task(durations.sweep_missing_durations(request.app.state.db)) return JSONResponse({"ok": True, "indexed": n}) diff --git a/web/routers/exports.py b/web/routers/exports.py index 323b5fe..98fe10e 100644 --- a/web/routers/exports.py +++ b/web/routers/exports.py @@ -43,11 +43,18 @@ def _resolve_default_encoder(app_state) -> str: return pref +class Segment(BaseModel): + channel: str = Field(pattern="^(front|rear|interior|other)$") + start_ts: float + end_ts: float + + class CreateExport(BaseModel): type: str = Field( - pattern="^(join_front|join_rear|pip|pip_rear)$" + pattern="^(join_front|join_rear|pip|pip_rear|switched)$" ) - clip_ids: List[int] + clip_ids: List[int] = [] + segments: list[Segment] | None = None encoder: str | None = Field( default=None, pattern="^(software|videotoolbox|nvenc|qsv|vaapi)$", @@ -82,9 +89,13 @@ def create(body: CreateExport, request: Request) -> dict: f"encoder '{encoder}' not available on this server", ) try: - job_id = worker.enqueue( - body.type, body.clip_ids, encoder=encoder, - ) + if body.type == "switched": + segs = [s.model_dump() for s in (body.segments or [])] + job_id = worker.enqueue_switched(segs, encoder=encoder) + else: + job_id = worker.enqueue( + body.type, body.clip_ids, encoder=encoder, + ) except RuntimeError as e: raise HTTPException(503, str(e)) except ValueError as e: @@ -152,7 +163,12 @@ def download(job_id: int, request: Request): @router.delete("/{job_id}", dependencies=[Depends(require_csrf)]) -def delete(job_id: int, request: Request) -> dict: +async def delete(job_id: int, request: Request) -> dict: + # If this is the job currently rendering, kill its ffmpeg first so we + # don't leave an orphaned encoder running (and writing to a deleted row). + worker = getattr(request.app.state, "export_worker", None) + if worker is not None: + await worker.cancel(job_id) with request.app.state.db.write() as c: row = c.execute( "SELECT output_path FROM export_jobs WHERE id=?", @@ -167,3 +183,23 @@ def delete(job_id: int, request: Request) -> dict: pass c.execute("DELETE FROM export_jobs WHERE id=?", (job_id,)) return {"ok": True} + + +@router.post("/{job_id}/pause", dependencies=[Depends(require_csrf)]) +async def pause(job_id: int, request: Request) -> dict: + worker = getattr(request.app.state, "export_worker", None) + if worker is None: + raise HTTPException(503, "export worker not running") + if not await worker.pause(job_id): + raise HTTPException(409, "job is not currently rendering") + return {"ok": True, "state": "paused"} + + +@router.post("/{job_id}/resume", dependencies=[Depends(require_csrf)]) +async def resume(job_id: int, request: Request) -> dict: + worker = getattr(request.app.state, "export_worker", None) + if worker is None: + raise HTTPException(503, "export worker not running") + if not await worker.resume(job_id): + raise HTTPException(409, "job is not currently paused") + return {"ok": True, "state": "running"} diff --git a/web/services/durations.py b/web/services/durations.py new file mode 100644 index 0000000..d9b80cb --- /dev/null +++ b/web/services/durations.py @@ -0,0 +1,239 @@ +"""Populate ``clip_index.duration_s`` via ffprobe. + +The scanner indexes clips from filenames but never measures their +length. ``duration_s`` drives filmstrip frame counts and the +timeline layout, so probe any clip missing it and store the value. +Mirrors ``scanner.sweep_missing_thumbs``: bounded concurrency, +idempotent (only NULL/zero rows are probed), non-fatal on failure. +""" +from __future__ import annotations + +import asyncio +import logging +import os +import shutil + +from ..db import Database + +log = logging.getLogger("viofosync.durations") + + +# mvhd ``duration`` sentinel meaning "unknown" (all bits set), per the +# ISO base media format — 32-bit for a v0 header, 64-bit for v1. +_MVHD_UNKNOWN = {0xFFFFFFFF, 0xFFFFFFFFFFFFFFFF} + + +def _read_box_header(f): + """Read an ISO-BMFF box header at the current offset. + + Returns ``(size, type, header_len)`` where ``size`` is the total box + length including the header (or ``None`` for the size==0 "to EOF" form), + or ``None`` at EOF / on a short read. + """ + hdr = f.read(8) + if len(hdr) < 8: + return None + size = int.from_bytes(hdr[:4], "big") + btype = hdr[4:8] + header_len = 8 + if size == 1: # 64-bit largesize follows (big mdat) + ext = f.read(8) + if len(ext) < 8: + return None + size = int.from_bytes(ext, "big") + header_len = 16 + elif size == 0: # extends to end of file + size = None + return size, btype, header_len + + +def _find_box(f, target: bytes, region_end: int): + """Scan sibling boxes from the current offset up to ``region_end`` and + return ``(payload_start, box_end)`` of the first box of ``target`` type, + or ``None``. On a match the file is left positioned at ``payload_start``. + Bails out (None) on a malformed/truncated box rather than looping.""" + while f.tell() + 8 <= region_end: + start = f.tell() + hdr = _read_box_header(f) + if hdr is None: + return None + size, btype, header_len = hdr + box_end = region_end if size is None else start + size + # ``==`` is a valid empty box (e.g. ffmpeg's zero-payload ``free``); + # only a box claiming to be smaller than its own header, or running + # past the parent, is malformed. + if box_end < start + header_len or box_end > region_end: + return None + if btype == target: + return start + header_len, box_end + f.seek(box_end) + return None + + +def _probe_duration_mvhd(path: str) -> float | None: + """Clip duration in seconds read directly from the MP4 ``moov/mvhd`` + box — no subprocess. Returns ``None`` when the file isn't a parseable + MP4, ``mvhd`` is absent, or the duration is unknown, so the caller can + fall back to ffprobe. + + Only a handful of box headers plus the ~108-byte ``mvhd`` are read; the + huge ``mdat`` is seeked past, so this is cheap even when ``moov`` is at + the end of a large file on a slow NAS volume. + """ + try: + end = os.path.getsize(path) + with open(path, "rb") as f: + moov = _find_box(f, b"moov", end) + if moov is None: + return None + moov_start, moov_end = moov + f.seek(moov_start) + mvhd = _find_box(f, b"mvhd", moov_end) + if mvhd is None: + return None + f.seek(mvhd[0]) + version_flags = f.read(4) + if len(version_flags) < 4: + return None + if version_flags[0] == 1: + buf = f.read(28) # ctime(8) mtime(8) timescale(4) dur(8) + if len(buf) < 28: + return None + timescale = int.from_bytes(buf[16:20], "big") + duration = int.from_bytes(buf[20:28], "big") + else: + buf = f.read(16) # ctime(4) mtime(4) timescale(4) dur(4) + if len(buf) < 16: + return None + timescale = int.from_bytes(buf[8:12], "big") + duration = int.from_bytes(buf[12:16], "big") + except (OSError, ValueError): + return None + if timescale <= 0 or duration in _MVHD_UNKNOWN: + return None + secs = duration / timescale + return secs if secs > 0 else None + + +async def _probe_with_method(path: str) -> tuple[float | None, str | None]: + """``(duration, method)`` where ``method`` is ``"mvhd"``, ``"ffprobe"`` + or ``None``. The sweep uses this to report how clips were resolved; + :func:`probe_duration` is the value-only wrapper.""" + secs = await asyncio.to_thread(_probe_duration_mvhd, path) + if secs is not None: + return secs, "mvhd" + secs = await _probe_duration_ffprobe(path) + return (secs, "ffprobe") if secs is not None else (None, None) + + +async def probe_duration(path: str) -> float | None: + """Clip length in seconds. Fast path parses the MP4 ``mvhd`` box + directly (no subprocess); falls back to ffprobe for anything that + doesn't parse (odd containers, damaged moov, non-MP4).""" + secs, _ = await _probe_with_method(path) + return secs + + +async def _probe_duration_ffprobe(path: str) -> float | None: + """Clip length in seconds via ffprobe, or None if ffprobe is + missing / the probe fails / the value is non-positive.""" + ffprobe = shutil.which("ffprobe") + if ffprobe is None: + return None + try: + proc = await asyncio.create_subprocess_exec( + ffprobe, "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.DEVNULL, + ) + out, _ = await asyncio.wait_for(proc.communicate(), timeout=15.0) + except (TimeoutError, OSError): + return None + try: + d = float(out.decode().strip()) + except ValueError: + return None + return d if d > 0 else None + + +def _flush(db: Database, batch: list[tuple[int, float]]) -> int: + """Persist a batch of (clip_id, duration) pairs. Returns rows written.""" + if not batch: + return 0 + with db.write() as c: + for clip_id, dur in batch: + c.execute( + "UPDATE clip_index SET duration_s = ? WHERE id = ?", + (dur, clip_id), + ) + return len(batch) + + +async def sweep_missing_durations( + db: Database, *, concurrency: int = 4, batch_size: int = 200 +) -> int: + """ffprobe every indexed clip with a NULL/zero ``duration_s`` and + store the result. Returns the number of rows updated. Idempotent. + + Results are persisted in batches *as they are probed*, not all at the + end, so an interrupted sweep (server restart/shutdown) keeps the work + it has already done — successive runs whittle down the remainder + instead of redoing all ~N clips every boot. + """ + with db.conn() as c: + rows = c.execute( + "SELECT id, path FROM clip_index " + "WHERE duration_s IS NULL OR duration_s <= 0" + ).fetchall() + todo = [(r["id"], r["path"]) for r in rows if os.path.isfile(r["path"])] + if not todo: + return 0 + + log.info( + "duration sweep: probing %d clip(s) via mvhd (ffprobe fallback), " + "concurrency=%d", len(todo), concurrency, + ) + sem = asyncio.Semaphore(concurrency) + + async def _one(clip_id: int, path: str) -> tuple[int, float | None, str | None]: + async with sem: + try: + dur, method = await _probe_with_method(path) + return clip_id, dur, method + except asyncio.CancelledError: + raise # shutdown — let it propagate so we flush + stop + except Exception as e: # pragma: no cover — non-fatal + log.warning("duration probe failed for %s: %s", path, e) + return clip_id, None, None + + tasks = [asyncio.ensure_future(_one(cid, p)) for cid, p in todo] + updated = 0 + methods = {"mvhd": 0, "ffprobe": 0} + batch: list[tuple[int, float]] = [] + try: + for t in tasks: + clip_id, dur, method = await t + if dur is not None: + if method in methods: + methods[method] += 1 + batch.append((clip_id, dur)) + if len(batch) >= batch_size: + updated += _flush(db, batch) + batch = [] + updated += _flush(db, batch) + batch = [] + finally: + # On interruption, abandon the rest and persist what we have so + # the next run resumes from here rather than starting over. + for t in tasks: + if not t.done(): + t.cancel() + updated += _flush(db, batch) + log.info( + "duration sweep: %d updated (%d via mvhd, %d via ffprobe)", + updated, methods["mvhd"], methods["ffprobe"], + ) + return updated diff --git a/web/services/exporter.py b/web/services/exporter.py index ac70611..95b1f06 100644 --- a/web/services/exporter.py +++ b/web/services/exporter.py @@ -24,18 +24,27 @@ from __future__ import annotations import asyncio +import contextlib import json import logging import os import re import shutil +import signal import subprocess import tempfile import time from typing import List, Optional + +class _ExportCancelled(Exception): + """Raised inside the worker when the running job is deleted/cancelled, so + _run_job unwinds — cleaning its temp dirs via its ``finally`` blocks — + without marking the (now-deleted) row as failed.""" + from ..db import Database from ..settings import SettingsProvider +from .naming import channel_of log = logging.getLogger("viofosync.exporter") @@ -49,6 +58,48 @@ def exports_dir(recordings: str) -> str: return d +# Minimum trim length; sub-frame slivers from clamping are dropped. +_MIN_PIECE_S = 0.05 + + +def build_switch_pieces(segments: list, clips: list) -> list: + """Turn a switched-export plan into an ordered list of trims. + + ``segments`` is ``[{channel, start_ts, end_ts}, ...]`` (in output + order). ``clips`` is ``[{path, channel, start_ts, duration_s}, ...]`` + (``channel`` already derived via ``naming.channel_of``). Returns + ``[{path, ss, t}, ...]`` — each piece trims ``path`` from offset + ``ss`` for duration ``t`` seconds. Clips are clamped to the segment + window; pieces shorter than ``_MIN_PIECE_S`` are dropped. + """ + pieces: list = [] + for seg in segments: + s, e, ch = seg["start_ts"], seg["end_ts"], seg["channel"] + seg_clips = sorted( + ( + c for c in clips + if c["channel"] == ch + and c["start_ts"] < e + and c["start_ts"] + (c.get("duration_s") or 0) > s + ), + key=lambda c: c["start_ts"], + ) + for c in seg_clips: + cs = c["start_ts"] + ce = cs + (c.get("duration_s") or 0) + in_ = max(s, cs) - cs + out_ = min(e, ce) - cs + if out_ - in_ >= _MIN_PIECE_S: + pieces.append({ + "path": c["path"], + # float() so integer unix timestamps still yield + # float offsets (consistent ffmpeg -ss/-t strings). + "ss": round(float(in_), 3), + "t": round(float(out_ - in_), 3), + }) + return pieces + + def reconcile_orphan_jobs(db: Database) -> int: """Mark rows stuck at ``state='running'`` as failed. @@ -59,12 +110,14 @@ def reconcile_orphan_jobs(db: Database) -> int: Returns the number of rows updated. """ with db.write() as c: + # 'paused' jobs are a SIGSTOP'd ffmpeg child that the restart killed, + # so they can't resume either — reconcile them too. cur = c.execute( "UPDATE export_jobs " "SET state='failed', " " error='interrupted by container restart', " " finished_at=? " - "WHERE state='running'", + "WHERE state IN ('running', 'paused')", (int(time.time()),), ) return cur.rowcount @@ -115,16 +168,52 @@ def _test_encoder_sync(encoder: str) -> bool: the container — common on Synology where ``/dev/dri`` isn't mapped through by default. The 1-frame test exercises the exact init path the real export uses, so anything that - survives this is genuinely usable.""" + survives this is genuinely usable. + + QSV takes a dedicated branch (its own device-init + scale_qsv command) + rather than the generic path below.""" if encoder == "software": # libx264 ships with every ffmpeg build; the -encoders # presence check is enough. return True + if encoder == "qsv": + # Exercise the real QSV init path: device creation (the step that + # returned MFX session -9 on Alpine), a VPP filter, and the encoder. + # lavfi yields software frames, so we hwupload here; the real export + # uses -hwaccel qsv decode instead, a strictly easier init once the + # MFX session exists. + cmd = [ + shutil.which("ffmpeg") or "ffmpeg", + "-hide_banner", "-loglevel", "error", + "-init_hw_device", "qsv=hw", "-filter_hw_device", "hw", + "-f", "lavfi", + "-i", "color=size=64x64:duration=0.1:rate=1", + # extra_hw_frames=16: a small surface pool for the upload — enough + # to satisfy the QSV VPP/encoder without reserving GPU memory. + "-vf", "format=nv12,hwupload=extra_hw_frames=16,scale_qsv=64:64", + "-c:v", "h264_qsv", "-global_quality", "23", + "-frames:v", "1", + "-f", "null", "-", + ] + try: + result = subprocess.run(cmd, capture_output=True, timeout=10) + return result.returncode == 0 + except (subprocess.TimeoutExpired, OSError): + return False + # Exercise the REAL pipeline a filtered export uses: init the hw device + # and run a filter (here a no-op format/hwupload for vaapi) before the + # encoder. A bare encoder test passes for vaapi even though every real + # export fails, because ffmpeg auto-inserts the upload only when there's + # no explicit filter chain — a false positive we must not repeat. + upload = _hw_upload_filter(encoder) + vf = (["-vf", upload] if upload else []) cmd = [ shutil.which("ffmpeg") or "ffmpeg", "-hide_banner", "-loglevel", "error", + *_hw_init_args(encoder), "-f", "lavfi", "-i", "color=size=64x64:duration=0.1:rate=1", + *vf, *video_codec_args(encoder), "-frames:v", "1", "-f", "null", "-", @@ -186,7 +275,21 @@ async def _check(name: str, present: bool) -> tuple[str, bool]: } -def _pip_filter_complex(position: str, main: str = "front") -> str: +def _scale_filter(w: int, h: int, encoder: str) -> str: + """Full-frame scale filter in the right dialect for ``encoder``. + + QSV runs the scaler on the GPU (``scale_qsv``) and omits ``setsar`` — + that filter can't operate on QSV surfaces and SAR is carried by the + encoder instead. Every other encoder uses the software ``scale`` plus + ``setsar=1`` (VAAPI then appends hwupload via :func:`_with_upload`).""" + if encoder == "qsv": + return f"scale_qsv=w={w}:h={h}" + return f"scale={w}:{h},setsar=1" + + +def _pip_filter_complex( + position: str, main: str = "front", encoder: str = "software", +) -> str: """Build the -filter_complex argument for the PiP overlay. ffmpeg input 0 is the front clip, input 1 is the rear clip. @@ -203,6 +306,15 @@ def _pip_filter_complex(position: str, main: str = "front") -> str: # type); anything else falls through to rear-main rather than # erroring, matching the lenient position handling above. base, inset = ("0", "1") if main == "front" else ("1", "0") + if encoder == "qsv": + # GPU composition: scale_qsv shrinks the inset, overlay_qsv composes + # on the iGPU. overlay_qsv takes x=/y= (the legacy overlay's single + # "x:y" positional form isn't accepted), so split the coord pair. + x, y = coords.split(":") + return ( + f"[{inset}:v]scale_qsv=w=iw/4:h=ih/4[pip];" + f"[{base}:v][pip]overlay_qsv=x={x}:y={y}" + ) return ( f"[{inset}:v]scale=iw/4:ih/4[pip];" f"[{base}:v][pip]overlay={coords}" @@ -220,13 +332,75 @@ def video_codec_args(encoder: str) -> List[str]: "-c:v", "h264_nvenc", "-preset", "p5", "-cq", "23", ] if encoder == "qsv": + # ICQ (intelligent constant quality): -global_quality acts as the + # ICQ quality level when no bitrate is set — QSV's best quality-per-bit + # mode on Gen 9.5. look_ahead is disabled: Gen 9.5's LA is weak and + # only adds latency. QP 23 ≈ the VAAPI CQP 24 used above. return [ - "-c:v", "h264_qsv", "-global_quality", "23", + "-c:v", "h264_qsv", "-global_quality", "23", "-look_ahead", "0", ] + if encoder == "vaapi": + # Constant-QP rate control; pairs with the format=nv12,hwupload the + # filter chain adds and the -vaapi_device global arg. + return ["-c:v", "h264_vaapi", "-rc_mode", "CQP", "-qp", "24"] # ``software`` (default / fallback) — widely compatible. return ["-c:v", "libx264", "-preset", "fast", "-crf", "23"] +# The DRM render node VAAPI uploads/encodes through. Standard on a +# single-iGPU NAS; setuid.sh already grants the app user access to it. +VAAPI_RENDER_NODE = "/dev/dri/renderD128" + + +def _hw_init_args(encoder: str) -> List[str]: + """Global ffmpeg args to initialise a hardware device for ``encoder``. + + VAAPI needs an explicit render node bound before the inputs. The others + we support (videotoolbox, nvenc) derive their device implicitly, and + software needs nothing. QSV creates a shared "qsv=hw" device (used by + decode, the VPP filters via -filter_hw_device, and the encoder) so + frames stay on the GPU end to end. + """ + if encoder == "qsv": + # One QSV device shared by decode, the VPP filters (scale_qsv/ + # overlay_qsv via -filter_hw_device) and the encoder, so frames + # never leave the GPU. + return ["-init_hw_device", "qsv=hw", "-filter_hw_device", "hw"] + if encoder == "vaapi": + return ["-vaapi_device", VAAPI_RENDER_NODE] + return [] + + +def _hw_decode_args(encoder: str) -> List[str]: + """Per-input flags that decode on the GPU and keep frames there. + + QSV exports run the whole chain on the iGPU: these go *before* each + ``-i`` so ffmpeg decodes into QSV surfaces that scale_qsv/overlay_qsv + and h264_qsv consume without a GPU->RAM round trip. Every other + encoder decodes on the CPU (VAAPI uploads later via hwupload), so they + get nothing here.""" + if encoder == "qsv": + return ["-hwaccel", "qsv", "-hwaccel_output_format", "qsv"] + return [] + + +def _hw_upload_filter(encoder: str) -> str: + """Filter that moves software frames onto the GPU before a hardware + encoder that requires it. VAAPI does (``h264_vaapi`` only accepts VAAPI + surfaces); videotoolbox/nvenc accept software frames directly, so they + get ``""``. Append to the end of a ``-vf`` / ``-filter_complex`` chain.""" + if encoder == "vaapi": + return "format=nv12,hwupload" + return "" + + +def _with_upload(chain: str, encoder: str) -> str: + """Append the hardware-upload filter to ``chain`` when ``encoder`` needs + it, else return ``chain`` unchanged.""" + up = _hw_upload_filter(encoder) + return f"{chain},{up}" if up else chain + + class ExportWorker: def __init__( self, @@ -239,6 +413,67 @@ def __init__( self.broadcast = broadcast self._task: Optional[asyncio.Task] = None self._stop = asyncio.Event() + # Control of the one job running right now. Only the worker loop and + # the (same-event-loop) HTTP handlers touch these, so no locking. + self._current_job_id: Optional[int] = None + self._current_proc: Optional[asyncio.subprocess.Process] = None + self._cancel_current = False + self._paused = False + self._resume = asyncio.Event() + self._resume.set() # not paused + + def _set_state(self, job_id: int, state: str) -> None: + with self.db.write() as c: + c.execute( + "UPDATE export_jobs SET state=? WHERE id=?", (state, job_id) + ) + + async def pause(self, job_id: int) -> bool: + """Freeze the running job's encoder (SIGSTOP) and mark it paused. + Returns False if ``job_id`` isn't the job currently running.""" + if job_id != self._current_job_id: + return False + self._paused = True + self._resume.clear() + if self._current_proc is not None: + with contextlib.suppress(Exception): + self._current_proc.send_signal(signal.SIGSTOP) + self._set_state(job_id, "paused") + await self.broadcast( + {"type": "export_state", "job_id": job_id, "state": "paused"} + ) + return True + + async def resume(self, job_id: int) -> bool: + """Resume a paused job (SIGCONT) and mark it running again.""" + if job_id != self._current_job_id: + return False + self._paused = False + self._resume.set() + if self._current_proc is not None: + with contextlib.suppress(Exception): + self._current_proc.send_signal(signal.SIGCONT) + self._set_state(job_id, "running") + await self.broadcast( + {"type": "export_state", "job_id": job_id, "state": "running"} + ) + return True + + async def cancel(self, job_id: int) -> bool: + """Kill the running job's encoder so a delete-in-progress actually + stops the ffmpeg work. Returns False if ``job_id`` isn't running. + The worker unwinds via _ExportCancelled (no 'failed' row).""" + if job_id != self._current_job_id: + return False + self._cancel_current = True + self._paused = False + self._resume.set() # unblock a paused job so it can unwind + if self._current_proc is not None: + with contextlib.suppress(Exception): + self._current_proc.send_signal(signal.SIGCONT) # unfreeze first + with contextlib.suppress(Exception): + self._current_proc.kill() + return True def start(self) -> None: if self._task is None or self._task.done(): @@ -296,6 +531,31 @@ def enqueue( ) return cur.lastrowid + def enqueue_switched(self, segments: list, encoder: str = "software") -> int: + if not ffmpeg_available(): + raise RuntimeError("ffmpeg not installed on this host") + if not segments: + raise ValueError("no segments") + for s in segments: + if "channel" not in s or "start_ts" not in s or "end_ts" not in s: + raise ValueError("segment missing channel/start_ts/end_ts") + if not (s["end_ts"] > s["start_ts"]): + raise ValueError("segment end_ts must be after start_ts") + + payload = json.dumps({"segments": segments, "encoder": encoder}) + clip_start = int(min(s["start_ts"] for s in segments)) + clip_end = int(max(s["end_ts"] for s in segments)) + with self.db.write() as c: + cur = c.execute( + """ + INSERT INTO export_jobs + (type, clip_ids, state, created_at, clip_start, clip_end) + VALUES ('switched', ?, 'queued', ?, ?, ?) + """, + (payload, int(time.time()), clip_start, clip_end), + ) + return cur.lastrowid + # ---- Background loop ---- async def _run(self) -> None: @@ -309,11 +569,25 @@ async def _run(self) -> None: except asyncio.TimeoutError: pass continue - try: - await self._run_job(job) - except Exception as e: # pragma: no cover - log.exception("export job %d failed", job["id"]) - self._finish(job["id"], False, str(e), None) + await self._process(job) + + async def _process(self, job: dict) -> None: + """Run one job, translating a cancellation (delete-in-progress) into + a clean discard and any real error into a 'failed' row. Always resets + the per-job control state afterwards.""" + try: + await self._run_job(job) + except _ExportCancelled: + log.info("export job %d cancelled — discarded", job["id"]) + except Exception as e: # pragma: no cover + log.exception("export job %d failed", job["id"]) + self._finish(job["id"], False, str(e), None) + finally: + self._current_job_id = None + self._current_proc = None + self._cancel_current = False + self._paused = False + self._resume.set() def _pop_next(self) -> Optional[dict]: with self.db.write() as c: @@ -329,6 +603,11 @@ def _pop_next(self) -> Optional[dict]: "SET state='running', started_at=? WHERE id=?", (int(time.time()), row["id"]), ) + self._current_job_id = row["id"] + self._current_proc = None + self._cancel_current = False + self._paused = False + self._resume.set() return dict(row) def _finish( @@ -396,11 +675,17 @@ async def _run_job(self, job: dict) -> None: else: clip_ids = raw.get("clip_ids", []) encoder = raw.get("encoder") or "software" - clips = self._fetch_clips(clip_ids) out = os.path.join( exports_dir(snap.recordings), f"{job['id']}.mp4" ) + if job["type"] == "switched": + segments = raw.get("segments", []) if isinstance(raw, dict) else [] + await self._run_switched(job, segments, encoder, out) + return + + clips = self._fetch_clips(clip_ids) + if job["type"] in ("join_front", "join_rear"): wanted = "F" if job["type"] == "join_front" else "R" # ``camera`` may be ``F``, ``R``, ``PF``, ``PR``, etc. @@ -508,7 +793,7 @@ async def _pip( tmp = tempfile.mkdtemp(prefix="vfs_pip_") parts: List[str] = [] total_segments = len(pairs) - filter_complex = _pip_filter_complex(position, main=main) + filter_complex = _pip_filter_complex(position, main=main, encoder=encoder) try: for i, (_, p) in enumerate(pairs): # Probe this segment's duration so the inner @@ -532,11 +817,18 @@ async def _pip( rc, err = await self._run_ffmpeg( job_id, [ + *_hw_init_args(encoder), "-y", + # decode flags are per-input: repeated before each -i + # so QSV decodes both clips straight onto the GPU. + *_hw_decode_args(encoder), "-i", p["front"]["path"], + *_hw_decode_args(encoder), "-i", p["rear"]["path"], + # _with_upload appends hwupload for VAAPI only; for QSV + # the filter already yields GPU surfaces, so it's a no-op. "-filter_complex", - filter_complex, + _with_upload(filter_complex, encoder), *video_codec_args(encoder), "-c:a", "copy", seg, @@ -586,6 +878,101 @@ async def _pip( finally: shutil.rmtree(tmp, ignore_errors=True) + async def _run_switched(self, job, segments, encoder, out) -> None: + lo = min(s["start_ts"] for s in segments) + hi = max(s["end_ts"] for s in segments) + with self.db.conn() as c: + rows = c.execute( + """ + SELECT path, camera, timestamp, duration_s + FROM clip_index + WHERE timestamp < ? + AND timestamp + COALESCE(duration_s, 0) > ? + """, + (hi, lo), + ).fetchall() + clips = [ + { + "path": r["path"], + "channel": channel_of(r["camera"]), + "start_ts": r["timestamp"], + "duration_s": r["duration_s"], + } + for r in rows + ] + pieces = build_switch_pieces(segments, clips) + if not pieces: + self._finish(job["id"], False, "no footage in selection", None) + return + + res = await self._probe_resolution(pieces[0]["path"]) + w, h = res if res else (1920, 1080) + vf = _with_upload(_scale_filter(w, h, encoder), encoder) + + tmp = tempfile.mkdtemp(prefix="vfs_switched_") + parts: List[str] = [] + n = len(pieces) + try: + for i, pc in enumerate(pieces): + seg = os.path.join(tmp, f"seg_{i:04d}.mp4") + await self.broadcast({ + "type": "export_progress", "job_id": job["id"], + "progress": i / max(1, n), + "stage": f"segment {i + 1}/{n}", + }) + rc, err = await self._run_ffmpeg( + job["id"], + [ + *_hw_init_args(encoder), + *_hw_decode_args(encoder), + "-y", + "-ss", str(pc["ss"]), + "-i", pc["path"], + "-t", str(pc["t"]), + "-vf", vf, + *video_codec_args(encoder), + "-c:a", "aac", + seg, + ], + pc["t"], + progress_base=i / max(1, n), + progress_span=1.0 / max(1, n), + stage=f"segment {i + 1}/{n}", + ) + if rc != 0: + self._finish( + job["id"], False, + f"segment {i + 1} failed (ffmpeg exit {rc}): {err}", + None, + ) + return + parts.append(seg) + + await self.broadcast({ + "type": "export_progress", "job_id": job["id"], + "progress": 0.98, "stage": "concatenating", + }) + list_file = os.path.join(tmp, "parts.txt") + with open(list_file, "w") as f: + for p in parts: + safe = os.path.abspath(p).replace("'", "'\\''") + f.write(f"file '{safe}'\n") + rc, err = await self._run_ffmpeg( + job["id"], + ["-y", "-f", "concat", "-safe", "0", + "-i", list_file, "-c", "copy", out], + None, stage="concatenating", + ) + if rc == 0 and os.path.exists(out): + self._finish(job["id"], True, None, out) + else: + self._finish( + job["id"], False, + f"concat failed (ffmpeg exit {rc}): {err}", None, + ) + finally: + shutil.rmtree(tmp, ignore_errors=True) + async def _probe_total(self, clips: List[dict]) -> Optional[float]: ffprobe = shutil.which("ffprobe") if ffprobe is None: @@ -608,6 +995,25 @@ async def _probe_total(self, clips: List[dict]) -> Optional[float]: return None return total + async def _probe_resolution(self, path: str): + """(width, height) of the first video stream, or None.""" + ffprobe = shutil.which("ffprobe") + if ffprobe is None: + return None + proc = await asyncio.create_subprocess_exec( + ffprobe, "-v", "error", "-select_streams", "v:0", + "-show_entries", "stream=width,height", + "-of", "csv=s=x:p=0", path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.DEVNULL, + ) + out, _ = await proc.communicate() + try: + w, h = out.decode().strip().split("x") + return int(w), int(h) + except ValueError: + return None + async def _run_ffmpeg( self, job_id: int, @@ -631,12 +1037,23 @@ async def _run_ffmpeg( "-progress", "pipe:1", "-nostats", *args, ] + # A delete may have landed between segments — bail before spawning. + if self._cancel_current: + raise _ExportCancelled + # Block here while the job is paused (e.g. paused between segments). + await self._resume.wait() + log.info("export job %d: %s", job_id, " ".join(cmd)) proc = await asyncio.create_subprocess_exec( *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) + self._current_proc = proc + # If a pause raced in just before the spawn, stop the child now. + if self._paused: + with contextlib.suppress(Exception): + proc.send_signal(signal.SIGSTOP) stderr_tail: list[str] = [] @@ -683,6 +1100,11 @@ async def pump_stderr(): await asyncio.gather( pump_stdout(), pump_stderr(), proc.wait() ) + self._current_proc = None + # If the job was cancelled (its child was killed), unwind cleanly + # instead of reporting a spurious ffmpeg failure for a deleted row. + if self._cancel_current: + raise _ExportCancelled rc = proc.returncode or 0 err = " | ".join(stderr_tail[-3:]) if stderr_tail else "" return rc, err diff --git a/web/services/filmstrip.py b/web/services/filmstrip.py new file mode 100644 index 0000000..00a93bd --- /dev/null +++ b/web/services/filmstrip.py @@ -0,0 +1,234 @@ +"""On-demand filmstrip sprite-sheet generation via ffmpeg. + +One JPEG per clip: a horizontal montage of frames, one every +``INTERVAL_S`` seconds, packed with ffmpeg's ``tile`` filter. Cached +to ``$RECORDINGS/.filmstrips/.jpg`` with a sidecar +``.json`` holding the slicing metadata the frontend needs. + +Mirrors ``thumbs.py``: the first request shells out to ffmpeg; later +requests read the cache. Returns ``None`` if ffmpeg is missing or +extraction failed, so the API layer can serve a placeholder. +""" +from __future__ import annotations + +import asyncio +import contextlib +import json +import logging +import math +import os +import shutil +import tempfile +import time +from dataclasses import asdict, dataclass + +log = logging.getLogger("viofosync.filmstrip") + +INTERVAL_S = 8 # one frame every 8 seconds +TILE_W = 160 # tile width (16:9 dashcam frame) +TILE_H = 90 # tile height +_MAX_CONCURRENCY = 3 # cap simultaneous ffmpeg children +_FFMPEG_TIMEOUT_S = 60.0 # kill a sprite job that outruns this + +# ffmpeg-missing warns once, not once-per-clip — a whole day of clips +# would otherwise flood the log with the same line. +_warned_no_ffmpeg = False + + +@dataclass +class FilmstripMeta: + frames: int + interval_s: int + tile_w: int + tile_h: int + duration_s: float + + +def _cache_dir(recordings: str) -> str: + d = os.path.join(recordings, ".filmstrips") + os.makedirs(d, exist_ok=True) + return d + + +def sprite_path(recordings: str, clip_id: int) -> str: + return os.path.join(_cache_dir(recordings), f"{clip_id}.jpg") + + +def meta_path(recordings: str, clip_id: int) -> str: + return os.path.join(_cache_dir(recordings), f"{clip_id}.json") + + +def frame_count(duration_s: float | None, interval_s: int = INTERVAL_S) -> int: + """Number of tiles for a clip: one frame every ``interval_s`` + seconds, always at least one.""" + if not duration_s or duration_s <= 0: + return 1 + return max(1, math.ceil(duration_s / interval_s)) + + +# Per-event-loop semaphores. A module-level Semaphore created at +# import binds to whichever loop first acquires it, which breaks +# pytest's function-scoped loops; keying by the running loop keeps +# it correct in both tests and the single-loop production server. +_sems: dict[asyncio.AbstractEventLoop, asyncio.Semaphore] = {} + + +def _semaphore() -> asyncio.Semaphore: + loop = asyncio.get_running_loop() + sem = _sems.get(loop) + if sem is None: + sem = asyncio.Semaphore(_MAX_CONCURRENCY) + _sems[loop] = sem + return sem + + +def _extract_cmd(ffmpeg: str, video_path: str, ts: int, out: str) -> list[str]: + """ffmpeg argv to grab one scaled frame near ``ts`` seconds. + + Input seeking (``-ss`` *before* ``-i``) jumps to the nearest keyframe via + the container index and reads only a small chunk around ``ts`` — so a + whole sprite reads ~one chunk per tile instead of streaming the entire + file. Benchmarked ~3x faster wall-clock and ~half the CPU of the old + single-pass decode on a NAS, where reading the file was the bottleneck. + Software only: hardware decode is *slower* here (it can't honour + ``skip_frame`` and pays a per-frame GPU->RAM download).""" + return [ + ffmpeg, "-loglevel", "error", "-y", + "-ss", str(ts), + "-i", video_path, + "-an", + "-frames:v", "1", + "-vf", f"scale={TILE_W}:{TILE_H}", + out, + ] + + +def _tile_cmd(ffmpeg: str, pattern: str, frames: int, out: str) -> list[str]: + """ffmpeg argv to stitch the extracted per-tile frames (an image2 + sequence) into the horizontal sprite. Tiny, fast, no large I/O.""" + return [ + ffmpeg, "-loglevel", "error", "-y", + "-start_number", "0", + "-i", pattern, + "-vf", f"tile={frames}x1", + "-frames:v", "1", + out, + ] + + +async def _run_ffmpeg(cmd: list[str], timeout: float) -> int | None: + """Run one ffmpeg child with a timeout. Returns its return code, or + ``None`` if it timed out (the child is killed and reaped).""" + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + try: + await asyncio.wait_for(proc.wait(), timeout=timeout) + except TimeoutError: # asyncio.TimeoutError is the builtin since 3.11 + proc.kill() + with contextlib.suppress(Exception): + await proc.wait() # reap the killed child (no zombie) + return None + return proc.returncode + + +async def _generate_sprite( + ffmpeg: str, video_path: str, sprite: str, frames: int +) -> bool: + """Seek to each tile's timestamp, extract one scaled frame, then stitch + them into ``sprite``. Returns True on success. Extractions run + sequentially so a single sprite uses one ffmpeg at a time (the caller's + semaphore bounds how many sprites run at once).""" + tiles_dir = tempfile.mkdtemp(prefix=".tiles_", dir=os.path.dirname(sprite)) + try: + for i in range(frames): + tile = os.path.join(tiles_dir, f"f{i:04d}.jpg") + rc = await _run_ffmpeg( + _extract_cmd(ffmpeg, video_path, i * INTERVAL_S, tile), + _FFMPEG_TIMEOUT_S, + ) + if rc != 0 or not (os.path.exists(tile) and os.path.getsize(tile) > 0): + return False + pattern = os.path.join(tiles_dir, "f%04d.jpg") + rc = await _run_ffmpeg( + _tile_cmd(ffmpeg, pattern, frames, sprite), _FFMPEG_TIMEOUT_S + ) + return rc == 0 and os.path.exists(sprite) and os.path.getsize(sprite) > 0 + finally: + shutil.rmtree(tiles_dir, ignore_errors=True) + + +def _read_cached_meta(mp: str) -> FilmstripMeta | None: + try: + with open(mp) as f: + return FilmstripMeta(**json.load(f)) + except (OSError, ValueError, TypeError, KeyError): + return None # corrupt/old/partial sidecar -> regenerate + + +async def ensure_filmstrip( + recordings: str, + clip_id: int, + video_path: str, + duration_s: float | None, +) -> FilmstripMeta | None: + """Return slicing metadata for ``clip_id``'s filmstrip sprite, + generating the sprite + sidecar if missing. ``None`` when ffmpeg + is unavailable or extraction failed.""" + sp = sprite_path(recordings, clip_id) + mp = meta_path(recordings, clip_id) + + if os.path.exists(sp) and os.path.getsize(sp) > 0 and os.path.exists(mp): + cached = _read_cached_meta(mp) + if cached is not None: + log.debug("filmstrip cache hit clip=%s", clip_id) + return cached + + ffmpeg = shutil.which("ffmpeg") + if ffmpeg is None: + global _warned_no_ffmpeg + if not _warned_no_ffmpeg: + _warned_no_ffmpeg = True + log.warning( + "filmstrip: ffmpeg not found on PATH — sprites cannot be " + "generated; the timeline will show placeholder tiles" + ) + return None + + frames = frame_count(duration_s) + # The CPU cost is roughly proportional to frame count (one decoded, + # scaled frame per INTERVAL_S of clip). Logging it here makes a NAS + # CPU spike traceable to the exact clips being rendered. + log.info( + "filmstrip: generating clip=%s frames=%d duration=%.0fs", + clip_id, frames, duration_s or 0.0, + ) + started = time.monotonic() + async with _semaphore(): + ok = await _generate_sprite(ffmpeg, video_path, sp, frames) + + elapsed = time.monotonic() - started + if not ok: + log.warning( + "filmstrip: clip=%s generation failed after %.1fs (frames=%d)", + clip_id, elapsed, frames, + ) + return None + log.info( + "filmstrip: clip=%s done in %.1fs (frames=%d)", + clip_id, elapsed, frames, + ) + + meta = FilmstripMeta( + frames=frames, interval_s=INTERVAL_S, + tile_w=TILE_W, tile_h=TILE_H, + duration_s=float(duration_s) if duration_s else 0.0, + ) + try: + with open(mp, "w") as f: + json.dump(asdict(meta), f) + except OSError: + pass # sprite is usable even if the sidecar write fails + return meta diff --git a/web/services/naming.py b/web/services/naming.py index faa9eea..e131315 100644 --- a/web/services/naming.py +++ b/web/services/naming.py @@ -86,3 +86,28 @@ def export_download_name( if not label or not clips: return f"viofosync_export_{job_id}.mp4" return f"{build_basename(clips, label)}.mp4" + + +# --- Timeline camera channels ------------------------------------------- + +# The lens is the trailing letter of a clip's ``camera`` code: +# F / PF (parking) / EF (event) -> front; R / PR -> rear; a future +# interior lens is I. Anything else falls back to "other" so an +# unexpected code still gets its own track rather than vanishing. +_CHANNEL_FOR_LETTER = {"F": "front", "R": "rear", "I": "interior"} + +# Stable display order for channel tracks, and human labels. +CHANNEL_ORDER = ["front", "rear", "interior", "other"] +CHANNEL_LABELS = { + "front": "Front", + "rear": "Rear", + "interior": "Interior", + "other": "Other", +} + + +def channel_of(camera: str | None) -> str: + """Map a clip's ``camera`` code to a timeline channel key.""" + if not camera: + return "other" + return _CHANNEL_FOR_LETTER.get(camera[-1].upper(), "other") diff --git a/web/services/retention.py b/web/services/retention.py index 79d4fa6..eb7d41b 100644 --- a/web/services/retention.py +++ b/web/services/retention.py @@ -20,6 +20,7 @@ from typing import Optional from ..db import Database +from . import filmstrip as _filmstrip from . import thumbs as _thumbs log = logging.getLogger("viofosync.retention") @@ -58,7 +59,13 @@ def _delete_clip_files(rec: dict, recordings: str) -> int: freed = os.path.getsize(path) except OSError: freed = 0 - for p in (path, path + ".gpx", _thumbs.thumb_path(recordings, rec["id"])): + for p in ( + path, + path + ".gpx", + _thumbs.thumb_path(recordings, rec["id"]), + _filmstrip.sprite_path(recordings, rec["id"]), + _filmstrip.meta_path(recordings, rec["id"]), + ): try: os.remove(p) except FileNotFoundError: diff --git a/web/services/scanner.py b/web/services/scanner.py index 2968284..e8b97f1 100644 --- a/web/services/scanner.py +++ b/web/services/scanner.py @@ -142,6 +142,7 @@ def scan(db: Database, destination: str, grouping: str, hub=None, loop=None) -> clips = list(_iter_clips(destination, grouping, source_dirs)) seen_paths: List[str] = [clip.path for clip in clips] + log.info("scan: %d clip(s) found under %s", len(clips), destination) with db.write() as c: c.execute("BEGIN") @@ -184,8 +185,14 @@ def scan(db: Database, destination: str, grouping: str, hub=None, loop=None) -> ), ) - # Drop index rows whose files vanished (retention - # policy or manual move). + # Drop index rows whose files vanished (retention policy or + # manual move). But a scan that found *nothing* almost always + # means the recordings volume is unavailable — not yet mounted + # at container start, or a transient NAS glitch — rather than + # the user having deleted their entire archive. Wiping the index + # there resets duration_s/gps_examined for every clip and kicks + # off a full duration re-sweep, GPS re-exam and thumb regen. So + # never prune on an empty scan when the index still holds rows. if seen_paths: placeholders = ",".join("?" * len(seen_paths)) c.execute( @@ -194,7 +201,15 @@ def scan(db: Database, destination: str, grouping: str, hub=None, loop=None) -> seen_paths, ) else: - c.execute("DELETE FROM clip_index") + existing = c.execute( + "SELECT COUNT(*) FROM clip_index" + ).fetchone()[0] + if existing: + log.warning( + "scan found 0 clips but index holds %d — skipping " + "prune (recordings dir %s likely unavailable)", + existing, destination, + ) c.execute("COMMIT") except Exception: c.execute("ROLLBACK") diff --git a/web/services/thumbs.py b/web/services/thumbs.py index a952690..536f35e 100644 --- a/web/services/thumbs.py +++ b/web/services/thumbs.py @@ -11,9 +11,9 @@ from __future__ import annotations import asyncio +import contextlib import os import shutil -from typing import Optional def _cache_dir(recordings: str) -> str: @@ -66,7 +66,7 @@ def failed_recently(recordings: str, clip_id: int, video_path: str) -> bool: async def ensure_thumb( recordings: str, clip_id: int, video_path: str -) -> Optional[str]: +) -> str | None: """Return the path to a JPEG thumbnail for ``video_path``, generating it if missing. ``None`` if ffmpeg is unavailable or extraction failed.""" @@ -94,8 +94,10 @@ async def ensure_thumb( ) try: await asyncio.wait_for(proc.wait(), timeout=15.0) - except asyncio.TimeoutError: + except TimeoutError: # asyncio.TimeoutError is the builtin since 3.11 proc.kill() + with contextlib.suppress(Exception): + await proc.wait() # reap the killed child (no zombie) mark_failed(recordings, clip_id) return None diff --git a/web/static/app.js b/web/static/app.js index ff7db90..8ca0100 100644 --- a/web/static/app.js +++ b/web/static/app.js @@ -243,6 +243,15 @@ function routeTo(hash) { if (logsView) logsView.hidden = tab !== "logs"; const settingsView = document.getElementById("view-settings"); if (settingsView) settingsView.hidden = tab !== "settings"; + const timelineView = document.getElementById("view-timeline"); + if (timelineView) { + timelineView.hidden = tab !== "timeline"; + // Stop timeline playback when navigating away (a hidden