diff --git a/scripts/pocket-run.sh b/scripts/pocket-run.sh index 3d54e1a..31732d4 100755 --- a/scripts/pocket-run.sh +++ b/scripts/pocket-run.sh @@ -163,7 +163,11 @@ fi # Clean up old manifest if it exists rm -f "$ROOT/.seam/.last-pull-manifest" -# ── Phase 3: Rebuild dashboard manifest ─────────────────────── +# ── Phase 3: Stage inferred speakers for review ────────────── +log "Staging inferred speakers..." +python3 -u "$SCRIPT_DIR/stage-people.py" 2>&1 | tee -a "$LOG_FILE" + +# ── Phase 4: Rebuild dashboard manifest ─────────────────────── log "Rebuilding dashboard manifest..." python3 -u "$SCRIPT_DIR/build-manifest.py" 2>&1 | tee -a "$LOG_FILE" diff --git a/scripts/stage-people.py b/scripts/stage-people.py new file mode 100644 index 0000000..028a0e7 --- /dev/null +++ b/scripts/stage-people.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +"""Stage inferred speakers from analyses into people-pending.json. + +Scans .seam/analysis/*/analysis.json for speaker_map entries, filters out +generic labels, and stages new names for user confirmation. Does NOT +auto-add to people.json — the dashboard UI handles confirm/merge/dismiss. + +Safe to re-run: skips names already in people.json, people-pending.json, +or the dismissed list. +""" +from __future__ import annotations + +import json +import re +import sys +import uuid +from datetime import datetime, timezone +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +DATA_DIR = ROOT / ".seam" +ANALYSIS_DIR = DATA_DIR / "analysis" +PEOPLE_FILE = DATA_DIR / "people.json" +PENDING_FILE = DATA_DIR / "people-pending.json" +DISMISSED_FILE = DATA_DIR / "dismissed-speakers.txt" + +# Universally generic — never a real name +GENERIC_LABELS = { + "unknown", "speaker", "narrator", "host", "facilitator", "moderator", + "chair", "participant", "interviewer", "interviewee", + "guest", "attendee", "caller", "member", "team", +} + +SPEAKER_NUM_RE = re.compile(r"^speaker\s*\d+$", re.IGNORECASE) + + +def load_generic_labels() -> set[str]: + """Load universal generics + user-defined exclusions.""" + labels = GENERIC_LABELS.copy() + custom_file = DATA_DIR / "generic-speakers.txt" + if custom_file.exists(): + for line in custom_file.read_text().splitlines(): + word = line.strip().lower() + if word and not word.startswith("#"): + labels.add(word) + return labels + + +def load_dismissed() -> set[str]: + """Load dismissed speaker names (lowercase).""" + if DISMISSED_FILE.exists(): + return {line.strip().lower() for line in DISMISSED_FILE.read_text().splitlines() if line.strip()} + return set() + + +def is_generic(name: str, labels: set[str]) -> bool: + n = name.strip() + if not n or n.lower() == "unknown": + return True + if SPEAKER_NUM_RE.match(n): + return True + # Strip trailing parenthetical: "Mark (Speaker 01)" → test "Mark" + base = re.sub(r"\s*\([^)]*\)\s*$", "", n).strip() + if base != n and not is_generic(base, labels): + return False + # All tokens are generic → skip + tokens = [t.lower() for t in re.split(r"[\s\-/]+", n) if t] + if tokens and all(t in labels or t.isdigit() for t in tokens): + return True + return False + + +def canonical(name: str) -> str: + return re.sub(r"\s*\([^)]*\)\s*$", "", name).strip() + + +def main() -> int: + if not ANALYSIS_DIR.exists(): + print("No analysis directory found.", file=sys.stderr) + return 1 + + labels = load_generic_labels() + dismissed = load_dismissed() + + # Load existing people names (including aliases) — case-insensitive + existing_names: set[str] = set() + if PEOPLE_FILE.exists(): + try: + people = json.loads(PEOPLE_FILE.read_text()).get("people", []) + for p in people: + existing_names.add(p["name"].lower()) + for alias in p.get("aliases", []): + existing_names.add(alias.lower()) + except (OSError, json.JSONDecodeError): + pass + + # Load already-pending names + pending_names: set[str] = set() + pending_entries: list[dict] = [] + if PENDING_FILE.exists(): + try: + pending_entries = json.loads(PENDING_FILE.read_text()).get("pending", []) + for p in pending_entries: + pending_names.add(p["name"].lower()) + except (OSError, json.JSONDecodeError): + pass + + # Scan analyses for speaker_map entries + # Track: canonical name → {display variants, recordings seen in} + found: dict[str, dict] = {} + for analysis_path in sorted(ANALYSIS_DIR.glob("*/analysis.json")): + try: + data = json.loads(analysis_path.read_text()) + except (OSError, json.JSONDecodeError): + continue + + recording_dir = analysis_path.parent.name + speaker_map = data.get("speaker_map") or {} + + for raw in speaker_map.values(): + if not isinstance(raw, str): + continue + name = raw.strip() + if is_generic(name, labels): + continue + key = canonical(name).lower() + if not key: + continue + if key in existing_names or key in pending_names or key in dismissed: + continue + + if key not in found: + found[key] = {"variants": {}, "recordings": set()} + found[key]["variants"][name] = found[key]["variants"].get(name, 0) + 1 + found[key]["recordings"].add(recording_dir) + + # Build pending entries + now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + added = 0 + for key, info in sorted(found.items()): + # Pick best display name: most frequent, then longest + best = sorted(info["variants"].items(), key=lambda kv: (-kv[1], -len(kv[0])))[0][0] + pending_entries.append({ + "id": str(uuid.uuid4()), + "name": best, + "seenIn": sorted(info["recordings"]), + "count": sum(info["variants"].values()), + "suggestedMatch": None, # UI can populate this + "createdAt": now, + }) + added += 1 + + # Write pending file + DATA_DIR.mkdir(parents=True, exist_ok=True) + PENDING_FILE.write_text(json.dumps({"pending": pending_entries}, indent=2) + "\n") + print(f"Staged {added} new speaker(s) for review (total pending: {len(pending_entries)})") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/server/index.ts b/server/index.ts index 4a9f1a5..9e0e864 100644 --- a/server/index.ts +++ b/server/index.ts @@ -235,6 +235,8 @@ app.get("/api/sync/history", (_req, res) => { // ── People API ─────────────────────────────────────────────── const PEOPLE_FILE = path.join(ROOT, ".seam", "people.json"); +const PENDING_PEOPLE_FILE = path.join(ROOT, ".seam", "people-pending.json"); +const DISMISSED_FILE = path.join(ROOT, ".seam", "dismissed-speakers.txt"); interface Person { id: string; @@ -322,6 +324,115 @@ app.delete("/api/people/:id", (req, res) => { res.json({ ok: true }); }); +// ── Pending People (staging) ───────────────────────────────── + +interface PendingPerson { + id: string; + name: string; + seenIn: string[]; + count: number; + suggestedMatch: string | null; + createdAt: string; +} + +function readPending(): PendingPerson[] { + try { + if (existsSync(PENDING_PEOPLE_FILE)) { + return JSON.parse(readFileSync(PENDING_PEOPLE_FILE, "utf-8")).pending || []; + } + } catch {} + return []; +} + +function writePending(pending: PendingPerson[]) { + if (pending.length === 0) { + try { if (existsSync(PENDING_PEOPLE_FILE)) require("fs").unlinkSync(PENDING_PEOPLE_FILE); } catch {} + return; + } + writeFileSync(PENDING_PEOPLE_FILE, JSON.stringify({ pending }, null, 2) + "\n"); +} + +function addDismissed(name: string) { + const existing = existsSync(DISMISSED_FILE) ? readFileSync(DISMISSED_FILE, "utf-8") : ""; + const names = new Set(existing.trim().split("\n").filter(Boolean)); + names.add(name.toLowerCase()); + writeFileSync(DISMISSED_FILE, [...names].join("\n") + "\n"); +} + +// List pending speakers +app.get("/api/people/pending", (_req, res) => { + res.json({ pending: readPending() }); +}); + +// Confirm — move from pending to people.json as a new person +app.post("/api/people/pending/:id/confirm", (_req, res) => { + const pending = readPending(); + const idx = pending.findIndex((p) => p.id === _req.params.id); + if (idx === -1) { + res.status(404).json({ error: "Not found" }); + return; + } + const entry = pending[idx]; + const people = readPeople(); + const person: Person = { + id: randomUUID(), + name: entry.name, + source: "inferred", + createdAt: new Date().toISOString(), + }; + people.push(person); + writePeople(people); + pending.splice(idx, 1); + writePending(pending); + res.json(person); +}); + +// Merge — add as alias to an existing person, remove from pending +app.post("/api/people/pending/:id/merge", (req, res) => { + const { targetPersonId } = req.body as { targetPersonId: string }; + if (!targetPersonId) { + res.status(400).json({ error: "targetPersonId required" }); + return; + } + const pending = readPending(); + const idx = pending.findIndex((p) => p.id === req.params.id); + if (idx === -1) { + res.status(404).json({ error: "Pending person not found" }); + return; + } + const entry = pending[idx]; + const people = readPeople(); + const target = people.find((p) => p.id === targetPersonId); + if (!target) { + res.status(404).json({ error: "Target person not found" }); + return; + } + // Add as alias + if (!target.aliases) target.aliases = []; + if (!target.aliases.some((a) => a.toLowerCase() === entry.name.toLowerCase())) { + target.aliases.push(entry.name); + } + writePeople(people); + pending.splice(idx, 1); + writePending(pending); + res.json(target); +}); + +// Dismiss — remove from pending, add to exclusion list +app.post("/api/people/pending/:id/dismiss", (req, res) => { + const pending = readPending(); + const idx = pending.findIndex((p) => p.id === req.params.id); + if (idx === -1) { + res.status(404).json({ error: "Not found" }); + return; + } + const entry = pending[idx]; + addDismissed(entry.name); + pending.splice(idx, 1); + writePending(pending); + res.json({ ok: true }); +}); + // Delete a recording (and its analysis), track it so sync doesn't re-pull const DELETED_FILE = path.join(ROOT, ".seam", ".deleted"); diff --git a/src/hooks/usePendingPeople.ts b/src/hooks/usePendingPeople.ts new file mode 100644 index 0000000..3a35877 --- /dev/null +++ b/src/hooks/usePendingPeople.ts @@ -0,0 +1,60 @@ +import { useState, useEffect, useCallback } from "react"; + +const API = "http://localhost:3001"; + +export interface PendingPerson { + id: string; + name: string; + seenIn: string[]; + count: number; + suggestedMatch: string | null; + createdAt: string; +} + +export function usePendingPeople() { + const [pending, setPending] = useState([]); + const [loading, setLoading] = useState(true); + + const refresh = useCallback(() => { + fetch(`${API}/api/people/pending`) + .then((r) => r.json()) + .then((data) => { + setPending(data.pending || []); + setLoading(false); + }) + .catch(() => setLoading(false)); + }, []); + + useEffect(() => { refresh(); }, [refresh]); + + const confirm = useCallback(async (id: string) => { + const res = await fetch(`${API}/api/people/pending/${id}/confirm`, { method: "POST" }); + if (res.ok) { + setPending((prev) => prev.filter((p) => p.id !== id)); + return await res.json(); + } + return null; + }, []); + + const merge = useCallback(async (id: string, targetPersonId: string) => { + const res = await fetch(`${API}/api/people/pending/${id}/merge`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ targetPersonId }), + }); + if (res.ok) { + setPending((prev) => prev.filter((p) => p.id !== id)); + return await res.json(); + } + return null; + }, []); + + const dismiss = useCallback(async (id: string) => { + const res = await fetch(`${API}/api/people/pending/${id}/dismiss`, { method: "POST" }); + if (res.ok) { + setPending((prev) => prev.filter((p) => p.id !== id)); + } + }, []); + + return { pending, loading, confirm, merge, dismiss, refresh }; +} diff --git a/src/pages/PeoplePage.tsx b/src/pages/PeoplePage.tsx index 770344f..58d7482 100644 --- a/src/pages/PeoplePage.tsx +++ b/src/pages/PeoplePage.tsx @@ -1,5 +1,6 @@ import { useState } from "react"; import { usePeople } from "@/hooks/usePeople"; +import { usePendingPeople } from "@/hooks/usePendingPeople"; import { useRecordings } from "@/hooks/useRecordings"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; @@ -18,13 +19,19 @@ import { Users, Mic, Save, + CheckCircle2, + GitMerge, + XCircle, + AlertCircle, } from "lucide-react"; import { tagClassName } from "@/lib/tag-colors"; import type { Person } from "@/types/people"; export function PeoplePage() { - const { people, addPerson, updatePerson, deletePerson } = usePeople(); + const { people, addPerson, updatePerson, deletePerson, refresh: refreshPeople } = usePeople(); + const { pending, confirm: confirmPending, merge: mergePending, dismiss: dismissPending } = usePendingPeople(); const { recordings } = useRecordings(); + const [mergeTarget, setMergeTarget] = useState(null); // pending person id being merged const [newName, setNewName] = useState(""); const [newRole, setNewRole] = useState(""); const [newTags, setNewTags] = useState(""); @@ -128,6 +135,89 @@ export function PeoplePage() {

+ {/* Pending speakers */} + {pending.length > 0 && ( +
+
+ +

+ {pending.length} new speaker{pending.length !== 1 ? "s" : ""} detected +

+
+
+ {pending.map((p) => ( + + +
+
+ {p.name} + + {p.count} mention{p.count !== 1 ? "s" : ""} in {p.seenIn.length} recording{p.seenIn.length !== 1 ? "s" : ""} + +
+
+ + + +
+
+ {/* Merge picker — shows when Merge is clicked */} + {mergeTarget === p.id && ( +
+

+ Merge "{p.name}" as alias of: +

+
+ {people.map((person) => ( + + ))} +
+
+ )} +
+
+ ))} +
+ +
+ )} + {/* Add person */}