Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions docs/DECISIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -422,3 +422,29 @@ classified and reduced or accepted, and (c) the home-video Phase 5 benchmark
shows no regression. The full GuitarSet result is strong enough to justify the
production integration path, but the 8 regressed validation clips make a silent
global default premature.

---

## 2026-05-07 — Phase 5 pitch-position prior stays explicit by default

**Phase:** 5 (production prior path)
**Decision tree:** Phase 5 prior promotion — make learned pitch-position
evidence a default decode behavior only if full-validation and home-video
ablation evidence show a clear no-regression improvement.
**Branch taken:** **Keep the prior optional.** The production pipeline now
accepts `--position-prior guitarset-v1`, which loads a checked-in versioned
artifact from `tabvision/tabvision/fusion/priors/guitarset_v1.json`; default
transcription remains `--position-prior none`.
**Evidence:** Existing full GuitarSet validation evidence remains strong:
highres with no prior scored onset F1 `0.9218`, pitch F1 `0.9022`, Tab F1
`0.3878`; highres with the GuitarSet train-split prior scored onset F1
`0.9218`, pitch F1 `0.9022`, Tab F1 `0.6104` (`+22.26 pp`). However, 8/60
validation clips regressed. The home-video prior on/off benchmark is prepared
through the new explicit CLI/pipeline option, but local completion is blocked
until the held-out home-video eval data plus heavyweight audio/vision assets
are available in this worktree.
**Reasoning:** The prior fixes a real pitch-to-tab ambiguity bottleneck, but
the target product is home iPhone video, not GuitarSet. A silent default would
hide a dataset-specific learned bias inside every decode. Keeping it explicit
preserves baseline behavior while allowing the coordinator to run the exact
home-video ablation before deciding whether to promote it.
68 changes: 68 additions & 0 deletions docs/EVAL_REPORTS/phase5_position_prior_2026-05-07.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Phase 5 Pitch-Position Prior Decision

Date: 2026-05-07

## Summary

The pitch-position prior is productionized as an explicit option:

```bash
tabvision transcribe input.mov --position-prior guitarset-v1
```

Default behavior remains:

```bash
tabvision transcribe input.mov --position-prior none
```

The checked-in artifact is
`tabvision/tabvision/fusion/priors/guitarset_v1.json`; raw GuitarSet files are
not required at runtime.

## Existing Evidence

Full GuitarSet validation highres run from 2026-05-07:

| Condition | Onset F1 | Pitch F1 | Tab F1 |
| --- | ---: | ---: | ---: |
| No prior | 0.9218 | 0.9022 | 0.3878 |
| GuitarSet train-split prior | 0.9218 | 0.9022 | 0.6104 |

Delta: `+22.26 pp` Tab F1. Per-track result: 51/60 improved, 8/60 regressed,
1/60 unchanged.

## Home-Video Prior On/Off Benchmark

Prepared command shape:

```bash
pytest -m eval -k phase5 --ablation
tabvision transcribe <home_clip.mov> --position-prior none
tabvision transcribe <home_clip.mov> --position-prior guitarset-v1
```

Local blocker: this worktree does not have the held-out home-video eval set,
YOLO checkpoint, MediaPipe model, and highres audio dependencies required for
the full Phase 5 home-video acceptance run.

Local command result in this worktree:

```text
../venv/bin/python -m pytest -m eval -k phase5 --ablation -q
sss [100%]
10 skipped, 228 deselected
```

Phase 7 command result in this worktree:

```text
../venv/bin/python -m pytest -m eval -k phase7 -q
s [100%]
8 skipped, 230 deselected
```

## Decision

Keep `guitarset-v1` optional. Promote only after the home-video ablation shows
no regression and the remaining GuitarSet regressions are accepted or reduced.
63 changes: 63 additions & 0 deletions tabvision/scripts/augment/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Phase 7 audio augmentation scaffold.

Dry-run mode is deterministic and writes the exact plan that a GPU/data runner
can execute later. Full augmentation is intentionally blocked until the target
manifest and IR/noise assets are supplied.
"""

from __future__ import annotations

import argparse
import json
from collections.abc import Sequence
from pathlib import Path


def build_plan(args: argparse.Namespace) -> dict:
return {
"script": "audio",
"phase": 7,
"dry_run": bool(args.dry_run),
"seed": int(args.seed),
"status": "ready" if args.dry_run else "blocked",
"inputs": {
"manifest": str(args.manifest),
"ir_dir": str(args.ir_dir),
"noise_dir": str(args.noise_dir),
},
"outputs": {
"output_dir": str(args.output_dir),
"report": str(args.output),
},
"steps": [
"load annotated audio manifest",
"apply deterministic gain, EQ, room IR, and distortion variants",
"write augmented clips with onset-aligned labels",
"emit augmentation manifest for fine-tuning",
],
"blockers": []
if args.dry_run
else ["full audio augmentation requires real manifests and augmentation assets"],
}


def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--manifest", default="data/eval/manifest.toml")
parser.add_argument("--ir-dir", default="data/augmentation/irs")
parser.add_argument("--noise-dir", default="data/augmentation/noise")
parser.add_argument("--output-dir", default="data/augmented/audio")
parser.add_argument("--output", type=Path, default=Path("audio_augment_plan.json"))
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args(argv)

payload = build_plan(args)
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(args.output)
return 0 if args.dry_run else 2


if __name__ == "__main__":
raise SystemExit(main())
56 changes: 56 additions & 0 deletions tabvision/scripts/augment/video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Phase 7 video augmentation scaffold."""

from __future__ import annotations

import argparse
import json
from collections.abc import Sequence
from pathlib import Path


def build_plan(args: argparse.Namespace) -> dict:
return {
"script": "video",
"phase": 7,
"dry_run": bool(args.dry_run),
"seed": int(args.seed),
"status": "ready" if args.dry_run else "blocked",
"inputs": {
"frame_manifest": str(args.frame_manifest),
"label_manifest": str(args.label_manifest),
},
"outputs": {
"output_dir": str(args.output_dir),
"report": str(args.output),
},
"steps": [
"load labeled hand/fretboard frames",
"apply deterministic crop, perspective, blur, and exposure variants",
"transform labels through the same image-space operations",
"write augmented frame manifest for hand fine-tuning",
],
"blockers": []
if args.dry_run
else ["full video augmentation requires labeled frame manifests"],
}


def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--frame-manifest", default="data/eval/frame_manifest.json")
parser.add_argument("--label-manifest", default="data/eval/hand_labels.json")
parser.add_argument("--output-dir", default="data/augmented/video")
parser.add_argument("--output", type=Path, default=Path("video_augment_plan.json"))
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args(argv)

payload = build_plan(args)
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(args.output)
return 0 if args.dry_run else 2


if __name__ == "__main__":
raise SystemExit(main())
66 changes: 66 additions & 0 deletions tabvision/scripts/train/audio_finetune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Phase 7 audio fine-tuning scaffold."""

from __future__ import annotations

import argparse
import json
from collections.abc import Sequence
from pathlib import Path


def build_plan(args: argparse.Namespace) -> dict:
return {
"script": "audio_finetune",
"phase": 7,
"dry_run": bool(args.dry_run),
"seed": int(args.seed),
"status": "ready" if args.dry_run else "blocked",
"inputs": {
"train_manifest": str(args.train_manifest),
"validation_manifest": str(args.validation_manifest),
"base_backend": args.base_backend,
},
"outputs": {
"checkpoint_dir": str(args.checkpoint_dir),
"report": str(args.output),
},
"hyperparameters": {
"epochs": args.epochs,
"learning_rate": args.learning_rate,
"batch_size": args.batch_size,
},
"steps": [
"load augmented audio manifests",
"initialize pretrained high-resolution guitar transcription backend",
"fine-tune onset and pitch heads with fixed seeds",
"write checkpoint metadata and validation metrics",
],
"blockers": []
if args.dry_run
else ["full audio fine-tuning requires GPU-capable torch and training data"],
}


def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--train-manifest", default="data/augmented/audio/train.json")
parser.add_argument("--validation-manifest", default="data/eval/manifest.toml")
parser.add_argument("--base-backend", default="highres")
parser.add_argument("--checkpoint-dir", default="data/augmented/checkpoints/audio")
parser.add_argument("--output", type=Path, default=Path("audio_finetune_plan.json"))
parser.add_argument("--epochs", type=int, default=3)
parser.add_argument("--learning-rate", type=float, default=1e-5)
parser.add_argument("--batch-size", type=int, default=8)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args(argv)

payload = build_plan(args)
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(args.output)
return 0 if args.dry_run else 2


if __name__ == "__main__":
raise SystemExit(main())
66 changes: 66 additions & 0 deletions tabvision/scripts/train/hand_finetune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Phase 7 hand-position fine-tuning scaffold."""

from __future__ import annotations

import argparse
import json
from collections.abc import Sequence
from pathlib import Path


def build_plan(args: argparse.Namespace) -> dict:
return {
"script": "hand_finetune",
"phase": 7,
"dry_run": bool(args.dry_run),
"seed": int(args.seed),
"status": "ready" if args.dry_run else "blocked",
"inputs": {
"train_manifest": str(args.train_manifest),
"validation_manifest": str(args.validation_manifest),
"base_backend": args.base_backend,
},
"outputs": {
"checkpoint_dir": str(args.checkpoint_dir),
"report": str(args.output),
},
"hyperparameters": {
"epochs": args.epochs,
"learning_rate": args.learning_rate,
"batch_size": args.batch_size,
},
"steps": [
"load augmented labeled video frames",
"initialize hand-position posterior backend",
"fine-tune fingertip-to-string/fret classifier with fixed seeds",
"write checkpoint metadata and held-out fingertip metrics",
],
"blockers": []
if args.dry_run
else ["full hand fine-tuning requires labeled frames and GPU-capable torch"],
}


def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--train-manifest", default="data/augmented/video/train.json")
parser.add_argument("--validation-manifest", default="data/eval/hand_labels.json")
parser.add_argument("--base-backend", default="mediapipe")
parser.add_argument("--checkpoint-dir", default="data/augmented/checkpoints/hand")
parser.add_argument("--output", type=Path, default=Path("hand_finetune_plan.json"))
parser.add_argument("--epochs", type=int, default=5)
parser.add_argument("--learning-rate", type=float, default=3e-5)
parser.add_argument("--batch-size", type=int, default=16)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args(argv)

payload = build_plan(args)
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(args.output)
return 0 if args.dry_run else 2


if __name__ == "__main__":
raise SystemExit(main())
Loading