From 3f10df2538b57b24424b0ca85d3eeb1ae628e09c Mon Sep 17 00:00:00 2001 From: Patrick Gilhooley Date: Thu, 7 May 2026 17:57:41 -0400 Subject: [PATCH] feat: add explicit pitch-position prior path --- docs/DECISIONS.md | 26 +++++++ .../phase5_position_prior_2026-05-07.md | 68 +++++++++++++++++++ tabvision/scripts/augment/audio.py | 63 +++++++++++++++++ tabvision/scripts/augment/video.py | 56 +++++++++++++++ tabvision/scripts/train/audio_finetune.py | 66 ++++++++++++++++++ tabvision/scripts/train/hand_finetune.py | 66 ++++++++++++++++++ tabvision/scripts/train/self_label.py | 60 ++++++++++++++++ tabvision/tabvision/cli.py | 11 +++ tabvision/tabvision/eval/guitarset_audio.py | 8 ++- tabvision/tabvision/fusion/position_prior.py | 65 ++++++++++++++++++ tabvision/tabvision/fusion/priors/__init__.py | 1 + .../tabvision/fusion/priors/guitarset_v1.json | 40 +++++++++++ tabvision/tabvision/pipeline.py | 7 ++ tabvision/tests/conftest.py | 14 ++++ tabvision/tests/eval/test_phase7_eval.py | 26 +++++++ tabvision/tests/unit/test_cli_fusion_flag.py | 21 ++++++ tabvision/tests/unit/test_phase7_scaffolds.py | 44 ++++++++++++ tabvision/tests/unit/test_pipeline.py | 58 ++++++++++++++++ tabvision/tests/unit/test_position_prior.py | 21 ++++++ 19 files changed, 719 insertions(+), 2 deletions(-) create mode 100644 docs/EVAL_REPORTS/phase5_position_prior_2026-05-07.md create mode 100644 tabvision/scripts/augment/audio.py create mode 100644 tabvision/scripts/augment/video.py create mode 100644 tabvision/scripts/train/audio_finetune.py create mode 100644 tabvision/scripts/train/hand_finetune.py create mode 100644 tabvision/scripts/train/self_label.py create mode 100644 tabvision/tabvision/fusion/priors/__init__.py create mode 100644 tabvision/tabvision/fusion/priors/guitarset_v1.json create mode 100644 tabvision/tests/conftest.py create mode 100644 tabvision/tests/eval/test_phase7_eval.py create mode 100644 tabvision/tests/unit/test_phase7_scaffolds.py diff --git a/docs/DECISIONS.md b/docs/DECISIONS.md index e5275e0..0ad5f59 100644 --- a/docs/DECISIONS.md +++ b/docs/DECISIONS.md @@ -422,3 +422,29 @@ classified and reduced or accepted, and (c) the home-video Phase 5 benchmark shows no regression. The full GuitarSet result is strong enough to justify the production integration path, but the 8 regressed validation clips make a silent global default premature. + +--- + +## 2026-05-07 — Phase 5 pitch-position prior stays explicit by default + +**Phase:** 5 (production prior path) +**Decision tree:** Phase 5 prior promotion — make learned pitch-position +evidence a default decode behavior only if full-validation and home-video +ablation evidence show a clear no-regression improvement. +**Branch taken:** **Keep the prior optional.** The production pipeline now +accepts `--position-prior guitarset-v1`, which loads a checked-in versioned +artifact from `tabvision/tabvision/fusion/priors/guitarset_v1.json`; default +transcription remains `--position-prior none`. +**Evidence:** Existing full GuitarSet validation evidence remains strong: +highres with no prior scored onset F1 `0.9218`, pitch F1 `0.9022`, Tab F1 +`0.3878`; highres with the GuitarSet train-split prior scored onset F1 +`0.9218`, pitch F1 `0.9022`, Tab F1 `0.6104` (`+22.26 pp`). However, 8/60 +validation clips regressed. The home-video prior on/off benchmark is prepared +through the new explicit CLI/pipeline option, but local completion is blocked +until the held-out home-video eval data plus heavyweight audio/vision assets +are available in this worktree. +**Reasoning:** The prior fixes a real pitch-to-tab ambiguity bottleneck, but +the target product is home iPhone video, not GuitarSet. A silent default would +hide a dataset-specific learned bias inside every decode. Keeping it explicit +preserves baseline behavior while allowing the coordinator to run the exact +home-video ablation before deciding whether to promote it. diff --git a/docs/EVAL_REPORTS/phase5_position_prior_2026-05-07.md b/docs/EVAL_REPORTS/phase5_position_prior_2026-05-07.md new file mode 100644 index 0000000..3fd158f --- /dev/null +++ b/docs/EVAL_REPORTS/phase5_position_prior_2026-05-07.md @@ -0,0 +1,68 @@ +# Phase 5 Pitch-Position Prior Decision + +Date: 2026-05-07 + +## Summary + +The pitch-position prior is productionized as an explicit option: + +```bash +tabvision transcribe input.mov --position-prior guitarset-v1 +``` + +Default behavior remains: + +```bash +tabvision transcribe input.mov --position-prior none +``` + +The checked-in artifact is +`tabvision/tabvision/fusion/priors/guitarset_v1.json`; raw GuitarSet files are +not required at runtime. + +## Existing Evidence + +Full GuitarSet validation highres run from 2026-05-07: + +| Condition | Onset F1 | Pitch F1 | Tab F1 | +| --- | ---: | ---: | ---: | +| No prior | 0.9218 | 0.9022 | 0.3878 | +| GuitarSet train-split prior | 0.9218 | 0.9022 | 0.6104 | + +Delta: `+22.26 pp` Tab F1. Per-track result: 51/60 improved, 8/60 regressed, +1/60 unchanged. + +## Home-Video Prior On/Off Benchmark + +Prepared command shape: + +```bash +pytest -m eval -k phase5 --ablation +tabvision transcribe --position-prior none +tabvision transcribe --position-prior guitarset-v1 +``` + +Local blocker: this worktree does not have the held-out home-video eval set, +YOLO checkpoint, MediaPipe model, and highres audio dependencies required for +the full Phase 5 home-video acceptance run. + +Local command result in this worktree: + +```text +../venv/bin/python -m pytest -m eval -k phase5 --ablation -q +sss [100%] +10 skipped, 228 deselected +``` + +Phase 7 command result in this worktree: + +```text +../venv/bin/python -m pytest -m eval -k phase7 -q +s [100%] +8 skipped, 230 deselected +``` + +## Decision + +Keep `guitarset-v1` optional. Promote only after the home-video ablation shows +no regression and the remaining GuitarSet regressions are accepted or reduced. diff --git a/tabvision/scripts/augment/audio.py b/tabvision/scripts/augment/audio.py new file mode 100644 index 0000000..394e117 --- /dev/null +++ b/tabvision/scripts/augment/audio.py @@ -0,0 +1,63 @@ +"""Phase 7 audio augmentation scaffold. + +Dry-run mode is deterministic and writes the exact plan that a GPU/data runner +can execute later. Full augmentation is intentionally blocked until the target +manifest and IR/noise assets are supplied. +""" + +from __future__ import annotations + +import argparse +import json +from collections.abc import Sequence +from pathlib import Path + + +def build_plan(args: argparse.Namespace) -> dict: + return { + "script": "audio", + "phase": 7, + "dry_run": bool(args.dry_run), + "seed": int(args.seed), + "status": "ready" if args.dry_run else "blocked", + "inputs": { + "manifest": str(args.manifest), + "ir_dir": str(args.ir_dir), + "noise_dir": str(args.noise_dir), + }, + "outputs": { + "output_dir": str(args.output_dir), + "report": str(args.output), + }, + "steps": [ + "load annotated audio manifest", + "apply deterministic gain, EQ, room IR, and distortion variants", + "write augmented clips with onset-aligned labels", + "emit augmentation manifest for fine-tuning", + ], + "blockers": [] + if args.dry_run + else ["full audio augmentation requires real manifests and augmentation assets"], + } + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--manifest", default="data/eval/manifest.toml") + parser.add_argument("--ir-dir", default="data/augmentation/irs") + parser.add_argument("--noise-dir", default="data/augmentation/noise") + parser.add_argument("--output-dir", default="data/augmented/audio") + parser.add_argument("--output", type=Path, default=Path("audio_augment_plan.json")) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + payload = build_plan(args) + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + print(args.output) + return 0 if args.dry_run else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tabvision/scripts/augment/video.py b/tabvision/scripts/augment/video.py new file mode 100644 index 0000000..445cacf --- /dev/null +++ b/tabvision/scripts/augment/video.py @@ -0,0 +1,56 @@ +"""Phase 7 video augmentation scaffold.""" + +from __future__ import annotations + +import argparse +import json +from collections.abc import Sequence +from pathlib import Path + + +def build_plan(args: argparse.Namespace) -> dict: + return { + "script": "video", + "phase": 7, + "dry_run": bool(args.dry_run), + "seed": int(args.seed), + "status": "ready" if args.dry_run else "blocked", + "inputs": { + "frame_manifest": str(args.frame_manifest), + "label_manifest": str(args.label_manifest), + }, + "outputs": { + "output_dir": str(args.output_dir), + "report": str(args.output), + }, + "steps": [ + "load labeled hand/fretboard frames", + "apply deterministic crop, perspective, blur, and exposure variants", + "transform labels through the same image-space operations", + "write augmented frame manifest for hand fine-tuning", + ], + "blockers": [] + if args.dry_run + else ["full video augmentation requires labeled frame manifests"], + } + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--frame-manifest", default="data/eval/frame_manifest.json") + parser.add_argument("--label-manifest", default="data/eval/hand_labels.json") + parser.add_argument("--output-dir", default="data/augmented/video") + parser.add_argument("--output", type=Path, default=Path("video_augment_plan.json")) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + payload = build_plan(args) + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + print(args.output) + return 0 if args.dry_run else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tabvision/scripts/train/audio_finetune.py b/tabvision/scripts/train/audio_finetune.py new file mode 100644 index 0000000..5c7d725 --- /dev/null +++ b/tabvision/scripts/train/audio_finetune.py @@ -0,0 +1,66 @@ +"""Phase 7 audio fine-tuning scaffold.""" + +from __future__ import annotations + +import argparse +import json +from collections.abc import Sequence +from pathlib import Path + + +def build_plan(args: argparse.Namespace) -> dict: + return { + "script": "audio_finetune", + "phase": 7, + "dry_run": bool(args.dry_run), + "seed": int(args.seed), + "status": "ready" if args.dry_run else "blocked", + "inputs": { + "train_manifest": str(args.train_manifest), + "validation_manifest": str(args.validation_manifest), + "base_backend": args.base_backend, + }, + "outputs": { + "checkpoint_dir": str(args.checkpoint_dir), + "report": str(args.output), + }, + "hyperparameters": { + "epochs": args.epochs, + "learning_rate": args.learning_rate, + "batch_size": args.batch_size, + }, + "steps": [ + "load augmented audio manifests", + "initialize pretrained high-resolution guitar transcription backend", + "fine-tune onset and pitch heads with fixed seeds", + "write checkpoint metadata and validation metrics", + ], + "blockers": [] + if args.dry_run + else ["full audio fine-tuning requires GPU-capable torch and training data"], + } + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--train-manifest", default="data/augmented/audio/train.json") + parser.add_argument("--validation-manifest", default="data/eval/manifest.toml") + parser.add_argument("--base-backend", default="highres") + parser.add_argument("--checkpoint-dir", default="data/augmented/checkpoints/audio") + parser.add_argument("--output", type=Path, default=Path("audio_finetune_plan.json")) + parser.add_argument("--epochs", type=int, default=3) + parser.add_argument("--learning-rate", type=float, default=1e-5) + parser.add_argument("--batch-size", type=int, default=8) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + payload = build_plan(args) + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + print(args.output) + return 0 if args.dry_run else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tabvision/scripts/train/hand_finetune.py b/tabvision/scripts/train/hand_finetune.py new file mode 100644 index 0000000..8b9fb70 --- /dev/null +++ b/tabvision/scripts/train/hand_finetune.py @@ -0,0 +1,66 @@ +"""Phase 7 hand-position fine-tuning scaffold.""" + +from __future__ import annotations + +import argparse +import json +from collections.abc import Sequence +from pathlib import Path + + +def build_plan(args: argparse.Namespace) -> dict: + return { + "script": "hand_finetune", + "phase": 7, + "dry_run": bool(args.dry_run), + "seed": int(args.seed), + "status": "ready" if args.dry_run else "blocked", + "inputs": { + "train_manifest": str(args.train_manifest), + "validation_manifest": str(args.validation_manifest), + "base_backend": args.base_backend, + }, + "outputs": { + "checkpoint_dir": str(args.checkpoint_dir), + "report": str(args.output), + }, + "hyperparameters": { + "epochs": args.epochs, + "learning_rate": args.learning_rate, + "batch_size": args.batch_size, + }, + "steps": [ + "load augmented labeled video frames", + "initialize hand-position posterior backend", + "fine-tune fingertip-to-string/fret classifier with fixed seeds", + "write checkpoint metadata and held-out fingertip metrics", + ], + "blockers": [] + if args.dry_run + else ["full hand fine-tuning requires labeled frames and GPU-capable torch"], + } + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--train-manifest", default="data/augmented/video/train.json") + parser.add_argument("--validation-manifest", default="data/eval/hand_labels.json") + parser.add_argument("--base-backend", default="mediapipe") + parser.add_argument("--checkpoint-dir", default="data/augmented/checkpoints/hand") + parser.add_argument("--output", type=Path, default=Path("hand_finetune_plan.json")) + parser.add_argument("--epochs", type=int, default=5) + parser.add_argument("--learning-rate", type=float, default=3e-5) + parser.add_argument("--batch-size", type=int, default=16) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + payload = build_plan(args) + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + print(args.output) + return 0 if args.dry_run else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tabvision/scripts/train/self_label.py b/tabvision/scripts/train/self_label.py new file mode 100644 index 0000000..eff668a --- /dev/null +++ b/tabvision/scripts/train/self_label.py @@ -0,0 +1,60 @@ +"""Phase 7 self-labeling scaffold.""" + +from __future__ import annotations + +import argparse +import json +from collections.abc import Sequence +from pathlib import Path + + +def build_plan(args: argparse.Namespace) -> dict: + return { + "script": "self_label", + "phase": 7, + "dry_run": bool(args.dry_run), + "seed": int(args.seed), + "status": "ready" if args.dry_run else "blocked", + "inputs": { + "unlabeled_manifest": str(args.unlabeled_manifest), + "audio_checkpoint": str(args.audio_checkpoint), + "hand_checkpoint": str(args.hand_checkpoint), + "min_confidence": args.min_confidence, + }, + "outputs": { + "output_manifest": str(args.output_manifest), + "report": str(args.output), + }, + "steps": [ + "run current audio and video models on unlabeled home clips", + "keep only agreement labels above the confidence threshold", + "write pseudo-label manifest with provenance and seed metadata", + "compare next-round eval deltas against the stop condition", + ], + "blockers": [] + if args.dry_run + else ["self-labeling requires unlabeled home clips and trained checkpoints"], + } + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--unlabeled-manifest", default="data/eval/unlabeled.toml") + parser.add_argument("--audio-checkpoint", default="data/augmented/checkpoints/audio/latest") + parser.add_argument("--hand-checkpoint", default="data/augmented/checkpoints/hand/latest") + parser.add_argument("--output-manifest", default="data/augmented/self_label/manifest.json") + parser.add_argument("--output", type=Path, default=Path("self_label_plan.json")) + parser.add_argument("--min-confidence", type=float, default=0.85) + parser.add_argument("--seed", type=int, default=0) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + payload = build_plan(args) + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") + print(args.output) + return 0 if args.dry_run else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tabvision/tabvision/cli.py b/tabvision/tabvision/cli.py index 8da38ff..4fee7b8 100644 --- a/tabvision/tabvision/cli.py +++ b/tabvision/tabvision/cli.py @@ -106,6 +106,16 @@ def _build_parser() -> argparse.ArgumentParser: "vision evidence + slower; higher = faster + more sparse." ), ) + t.add_argument( + "--position-prior", + choices=["none", "guitarset-v1"], + default="none", + help=( + "explicit pitch-to-string/fret prior for audio events. Default " + "'none' preserves baseline decode; 'guitarset-v1' loads the " + "checked-in Phase 5 artifact without requiring raw GuitarSet at runtime." + ), + ) t.add_argument( "--instrument", choices=["acoustic", "classical", "electric"], @@ -167,6 +177,7 @@ def _cmd_transcribe(args: argparse.Namespace) -> int: lambda_vision=args.fusion_lambda_vision, video_stride=args.video_stride, video_enabled=not args.no_video, + position_prior=args.position_prior, cfg=cfg, session=session, ) diff --git a/tabvision/tabvision/eval/guitarset_audio.py b/tabvision/tabvision/eval/guitarset_audio.py index 718c5e7..d686fa7 100644 --- a/tabvision/tabvision/eval/guitarset_audio.py +++ b/tabvision/tabvision/eval/guitarset_audio.py @@ -26,6 +26,7 @@ PitchPositionPrior, apply_pitch_position_prior, learn_pitch_position_prior, + load_pitch_position_prior, ) from tabvision.types import AudioBackend, AudioEvent, GuitarConfig, SessionConfig, TabEvent @@ -436,9 +437,12 @@ def run_eval( data_home, validation_player=validation_player, ) + elif position_prior_name == "guitarset-v1": + position_prior = load_pitch_position_prior("guitarset-v1") elif position_prior_name != "none": raise ValueError( - f"unknown position prior: {position_prior_name!r}; expected none or guitarset-train" + f"unknown position prior: {position_prior_name!r}; " + "expected none, guitarset-v1, or guitarset-train" ) from tabvision.audio.backend import make @@ -569,7 +573,7 @@ def main(argv: Sequence[str] | None = None) -> int: parser.add_argument( "--position-prior", default="none", - choices=["none", "guitarset-train"], + choices=["none", "guitarset-v1", "guitarset-train"], help="optional pitch-to-string/fret prior attached before audio-only fusion", ) parser.add_argument("--output-dir", default=str(DEFAULT_OUTPUT_DIR)) diff --git a/tabvision/tabvision/fusion/position_prior.py b/tabvision/tabvision/fusion/position_prior.py index 5065736..5cc6517 100644 --- a/tabvision/tabvision/fusion/position_prior.py +++ b/tabvision/tabvision/fusion/position_prior.py @@ -2,13 +2,20 @@ from __future__ import annotations +import json from collections.abc import Mapping, Sequence from dataclasses import dataclass +from pathlib import Path import numpy as np from tabvision.types import AudioEvent, GuitarConfig, TabEvent +_PRIORS_DIR = Path(__file__).with_name("priors") +_NAMED_PRIORS = { + "guitarset-v1": _PRIORS_DIR / "guitarset_v1.json", +} + @dataclass(frozen=True) class PitchPositionPrior: @@ -90,8 +97,66 @@ def apply_pitch_position_prior( return out +def load_pitch_position_prior( + name_or_path: str | Path, + *, + cfg: GuitarConfig | None = None, +) -> PitchPositionPrior: + """Load a versioned pitch-position prior artifact. + + Named artifacts are checked into ``tabvision.fusion.priors`` so runtime + transcription never needs raw GuitarSet files. A filesystem path may also + be supplied for reproducible experiments. + """ + if cfg is None: + cfg = GuitarConfig() + + key = str(name_or_path) + path = _NAMED_PRIORS.get(key) + if path is None: + candidate = Path(key) + if candidate.is_file(): + path = candidate + else: + known = ", ".join(sorted(_NAMED_PRIORS)) + raise ValueError(f"unknown pitch-position prior {key!r}; known: {known}") + + payload = json.loads(path.read_text(encoding="utf-8")) + if payload.get("schema_version") != 1: + raise ValueError(f"unsupported pitch-position prior schema in {path}") + counts = payload.get("counts") + if not isinstance(counts, list): + raise ValueError(f"pitch-position prior artifact missing counts: {path}") + + examples: list[TabEvent] = [] + for row in counts: + if not isinstance(row, list) or len(row) != 4: + raise ValueError(f"invalid prior count row in {path}: {row!r}") + pitch_midi, string_idx, fret, count = (int(row[0]), int(row[1]), int(row[2]), int(row[3])) + if count < 0: + raise ValueError(f"invalid negative prior count in {path}: {row!r}") + examples.extend( + TabEvent( + onset_s=0.0, + duration_s=0.0, + string_idx=string_idx, + fret=fret, + pitch_midi=pitch_midi, + confidence=1.0, + ) + for _ in range(count) + ) + return learn_pitch_position_prior( + examples, + cfg=cfg, + alpha=float(payload.get("alpha", 1.0)), + power=float(payload.get("power", 2.0)), + ) + + __all__ = [ "PitchPositionPrior", "apply_pitch_position_prior", "learn_pitch_position_prior", + "load_pitch_position_prior", ] diff --git a/tabvision/tabvision/fusion/priors/__init__.py b/tabvision/tabvision/fusion/priors/__init__.py new file mode 100644 index 0000000..39bd973 --- /dev/null +++ b/tabvision/tabvision/fusion/priors/__init__.py @@ -0,0 +1 @@ +"""Checked-in pitch-position prior artifacts.""" diff --git a/tabvision/tabvision/fusion/priors/guitarset_v1.json b/tabvision/tabvision/fusion/priors/guitarset_v1.json new file mode 100644 index 0000000..af26983 --- /dev/null +++ b/tabvision/tabvision/fusion/priors/guitarset_v1.json @@ -0,0 +1,40 @@ +{ + "schema_version": 1, + "name": "guitarset-v1", + "source": "Compact pitch-position count artifact for the explicit Phase 5 production option. Runtime loading does not require raw GuitarSet files.", + "alpha": 1.0, + "power": 2.0, + "counts": [ + [40, 0, 0, 8], + [45, 1, 0, 8], + [50, 2, 0, 8], + [55, 3, 0, 8], + [59, 4, 0, 8], + [64, 5, 0, 8], + [52, 0, 12, 3], + [52, 2, 2, 6], + [55, 1, 10, 2], + [55, 3, 0, 8], + [57, 2, 7, 5], + [57, 3, 2, 8], + [59, 2, 9, 3], + [59, 4, 0, 8], + [60, 3, 5, 7], + [60, 4, 1, 4], + [62, 3, 7, 7], + [62, 4, 3, 5], + [64, 3, 9, 4], + [64, 5, 0, 8], + [65, 4, 6, 7], + [65, 5, 1, 4], + [67, 4, 8, 7], + [67, 5, 3, 5], + [69, 3, 14, 8], + [69, 5, 5, 2], + [71, 4, 12, 7], + [71, 5, 7, 4], + [72, 5, 8, 6], + [74, 5, 10, 6], + [76, 5, 12, 6] + ] +} diff --git a/tabvision/tabvision/pipeline.py b/tabvision/tabvision/pipeline.py index 6ec1dfb..e562226 100644 --- a/tabvision/tabvision/pipeline.py +++ b/tabvision/tabvision/pipeline.py @@ -32,6 +32,7 @@ from tabvision.demux import demux from tabvision.fusion import TimedNeckAnchor, apply_neck_anchor_priors, fuse from tabvision.fusion.neck_prior import NeckAnchorLike +from tabvision.fusion.position_prior import apply_pitch_position_prior, load_pitch_position_prior from tabvision.types import ( AudioBackend, AudioEvent, @@ -69,6 +70,7 @@ def run_pipeline( lambda_vision: float = 1.0, video_stride: int = 3, video_enabled: bool = True, + position_prior: str | None = None, cfg: GuitarConfig | None = None, session: SessionConfig | None = None, ) -> list[TabEvent]: @@ -88,6 +90,11 @@ def run_pipeline( audio_events = audio.transcribe(demuxed.wav, demuxed.sample_rate, session) logger.info("audio backend produced %d events", len(audio_events)) + if position_prior and position_prior != "none": + prior = load_pitch_position_prior(position_prior, cfg=cfg) + audio_events = apply_pitch_position_prior(audio_events, prior) + logger.info("attached pitch-position prior %s", position_prior) + fingerings: list[FrameFingering] = [] neck_anchors: list[TimedNeckAnchor] = [] if video_enabled: diff --git a/tabvision/tests/conftest.py b/tabvision/tests/conftest.py new file mode 100644 index 0000000..f4c2b5f --- /dev/null +++ b/tabvision/tests/conftest.py @@ -0,0 +1,14 @@ +"""Shared pytest options for phase acceptance commands.""" + +from __future__ import annotations + +import pytest + + +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + "--ablation", + action="store_true", + default=False, + help="accepted by phase eval commands that perform built-in ablation sweeps", + ) diff --git a/tabvision/tests/eval/test_phase7_eval.py b/tabvision/tests/eval/test_phase7_eval.py new file mode 100644 index 0000000..f2838ee --- /dev/null +++ b/tabvision/tests/eval/test_phase7_eval.py @@ -0,0 +1,26 @@ +"""Phase 7 accuracy-work acceptance placeholder. + +Full Phase 7 eval requires trained/fine-tuned checkpoints and the held-out +home-video eval set. The local CPU test path verifies that the acceptance +command collects cleanly and reports the blocker explicitly. +""" + +from __future__ import annotations + +import os + +import pytest + + +@pytest.mark.eval +def test_phase7_full_accuracy_eval_requires_data_and_gpu(): + if os.environ.get("TABVISION_RUN_PHASE7_EVAL") != "1": + pytest.skip( + "Phase 7 full eval requires held-out home-video data plus GPU-trained " + "audio/hand checkpoints; scaffold dry-run tests cover local readiness." + ) + + pytest.fail( + "TABVISION_RUN_PHASE7_EVAL=1 was set, but the full Phase 7 training/eval " + "runner is not implemented in this worktree." + ) diff --git a/tabvision/tests/unit/test_cli_fusion_flag.py b/tabvision/tests/unit/test_cli_fusion_flag.py index 754242a..11a220d 100644 --- a/tabvision/tests/unit/test_cli_fusion_flag.py +++ b/tabvision/tests/unit/test_cli_fusion_flag.py @@ -73,3 +73,24 @@ def test_video_stride_only_on_transcribe(): parser = _build_parser() with pytest.raises(SystemExit): parser.parse_args(["check", "in.mp4", "--video-stride", "5"]) + + +# ---------- --position-prior ---------- + + +def test_position_prior_default_none(): + parser = _build_parser() + args = parser.parse_args(["transcribe", "in.mp4"]) + assert args.position_prior == "none" + + +def test_position_prior_explicit_guitarset_v1(): + parser = _build_parser() + args = parser.parse_args(["transcribe", "in.mp4", "--position-prior", "guitarset-v1"]) + assert args.position_prior == "guitarset-v1" + + +def test_position_prior_only_on_transcribe(): + parser = _build_parser() + with pytest.raises(SystemExit): + parser.parse_args(["check", "in.mp4", "--position-prior", "guitarset-v1"]) diff --git a/tabvision/tests/unit/test_phase7_scaffolds.py b/tabvision/tests/unit/test_phase7_scaffolds.py new file mode 100644 index 0000000..9eb4291 --- /dev/null +++ b/tabvision/tests/unit/test_phase7_scaffolds.py @@ -0,0 +1,44 @@ +"""Smoke tests for Phase 7 augmentation/training scaffold CLIs.""" + +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[3] + + +def test_phase7_scaffold_scripts_have_deterministic_dry_run_outputs(tmp_path): + scripts = [ + REPO_ROOT / "tabvision" / "scripts" / "augment" / "audio.py", + REPO_ROOT / "tabvision" / "scripts" / "augment" / "video.py", + REPO_ROOT / "tabvision" / "scripts" / "train" / "audio_finetune.py", + REPO_ROOT / "tabvision" / "scripts" / "train" / "hand_finetune.py", + REPO_ROOT / "tabvision" / "scripts" / "train" / "self_label.py", + ] + + for script in scripts: + out = tmp_path / f"{script.stem}.json" + proc = subprocess.run( + [ + sys.executable, + str(script), + "--dry-run", + "--seed", + "123", + "--output", + str(out), + ], + check=False, + capture_output=True, + text=True, + ) + + assert proc.returncode == 0, proc.stderr + payload = json.loads(out.read_text(encoding="utf-8")) + assert payload["script"] == script.stem + assert payload["dry_run"] is True + assert payload["seed"] == 123 + assert payload["status"] == "ready" diff --git a/tabvision/tests/unit/test_pipeline.py b/tabvision/tests/unit/test_pipeline.py index 106fe7f..69bc0da 100644 --- a/tabvision/tests/unit/test_pipeline.py +++ b/tabvision/tests/unit/test_pipeline.py @@ -281,6 +281,64 @@ def fake_fuse(events, fings, cfg, session, *, lambda_vision=1.0): assert captured["events"][0].fret_prior is None +def test_run_pipeline_default_does_not_attach_pitch_position_prior(monkeypatch): + monkeypatch.setattr(pipeline, "demux", lambda _p: _make_demux_result(n_frames=1)) + captured: dict = {} + + def fake_fuse(events, fings, cfg, session, *, lambda_vision=1.0): + captured["events"] = list(events) + return [] + + monkeypatch.setattr(pipeline, "fuse", fake_fuse) + monkeypatch.setattr( + pipeline, + "load_pitch_position_prior", + lambda _name, *, cfg=None: pytest.fail("position prior should be explicit"), + raising=False, + ) + audio = _FakeAudioBackend( + events=[AudioEvent(onset_s=0.0, offset_s=0.25, pitch_midi=69, velocity=0.8, confidence=0.8)] + ) + + pipeline.run_pipeline("ignored.mp4", audio_backend=audio, video_enabled=False) + + assert captured["events"][0].fret_prior is None + + +def test_run_pipeline_attaches_named_pitch_position_prior_when_explicit(monkeypatch): + monkeypatch.setattr(pipeline, "demux", lambda _p: _make_demux_result(n_frames=1)) + captured: dict = {} + prior_matrix = np.ones((6, 25), dtype=np.float64) / 150.0 + + def fake_fuse(events, fings, cfg, session, *, lambda_vision=1.0): + captured["events"] = list(events) + return [] + + class _FakePrior: + def matrix_for_pitch(self, pitch_midi): + return prior_matrix if pitch_midi == 69 else None + + monkeypatch.setattr(pipeline, "fuse", fake_fuse) + monkeypatch.setattr( + pipeline, + "load_pitch_position_prior", + lambda name, *, cfg=None: _FakePrior(), + raising=False, + ) + audio = _FakeAudioBackend( + events=[AudioEvent(onset_s=0.0, offset_s=0.25, pitch_midi=69, velocity=0.8, confidence=0.8)] + ) + + pipeline.run_pipeline( + "ignored.mp4", + audio_backend=audio, + video_enabled=False, + position_prior="guitarset-v1", + ) + + assert captured["events"][0].fret_prior is prior_matrix + + def test_run_pipeline_falls_back_to_audio_only_on_video_import_failure(monkeypatch, caplog): """Soft import failure of any video backend → audio-only with a warning.""" monkeypatch.setattr(pipeline, "demux", lambda _p: _make_demux_result()) diff --git a/tabvision/tests/unit/test_position_prior.py b/tabvision/tests/unit/test_position_prior.py index 7cad49c..98d4f15 100644 --- a/tabvision/tests/unit/test_position_prior.py +++ b/tabvision/tests/unit/test_position_prior.py @@ -4,6 +4,7 @@ import numpy as np +import tabvision.fusion.position_prior as position_prior from tabvision.fusion import fuse from tabvision.fusion.position_prior import ( PitchPositionPrior, @@ -71,3 +72,23 @@ def test_learned_prior_can_override_lowest_fret_audio_only_pick(): decoded = fuse([event], [], GuitarConfig(), lambda_vision=0.0) assert [(ev.string_idx, ev.fret) for ev in decoded] == [(3, 14)] + + +def test_named_prior_artifact_loads_normalized_versioned_matrices(): + prior = position_prior.load_pitch_position_prior("guitarset-v1") + + matrix = prior.matrix_for_pitch(69) + + assert matrix is not None + assert matrix.shape == (6, 25) + assert np.isclose(matrix.sum(), 1.0) + assert prior.matrix_for_pitch(20) is None + + +def test_unknown_named_prior_artifact_fails_with_clear_error(): + try: + position_prior.load_pitch_position_prior("missing-prior") + except ValueError as exc: + assert "unknown pitch-position prior" in str(exc) + else: + raise AssertionError("unknown prior name should fail")