From fbae916bff1079fed3bf98a08b93f02411da10a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yoaquim=20Cintr=C3=B3n?=
Date: Sat, 25 Apr 2026 12:13:20 -0400
Subject: [PATCH] =?UTF-8?q?Add=20people=20staging=20flow=20=E2=80=94=20con?=
=?UTF-8?q?firm,=20merge,=20or=20dismiss=20inferred=20speakers?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Instead of auto-adding inferred speakers to people.json, they land in
people-pending.json for user review. The People page shows a "new speakers
detected" section with three actions:
- Confirm — creates a new person
- Merge — adds as alias to an existing person (click to pick who)
- Dismiss — adds to dismissed-speakers.txt exclusion list
Pipeline:
- stage-people.py scans analysis speaker_maps, filters generic labels,
stages new names (skips existing people, pending, and dismissed)
- pocket-run.sh runs stage-people.py after analysis phase
- Server endpoints: GET/POST pending confirm/merge/dismiss
- GENERIC_LABELS trimmed to ~15 universal terms
- User-configurable .seam/generic-speakers.txt for domain-specific exclusions
---
scripts/pocket-run.sh | 6 +-
scripts/stage-people.py | 161 ++++++++++++++++++++++++++++++++++
server/index.ts | 111 +++++++++++++++++++++++
src/hooks/usePendingPeople.ts | 60 +++++++++++++
src/pages/PeoplePage.tsx | 92 ++++++++++++++++++-
5 files changed, 428 insertions(+), 2 deletions(-)
create mode 100644 scripts/stage-people.py
create mode 100644 src/hooks/usePendingPeople.ts
diff --git a/scripts/pocket-run.sh b/scripts/pocket-run.sh
index 3d54e1a..31732d4 100755
--- a/scripts/pocket-run.sh
+++ b/scripts/pocket-run.sh
@@ -163,7 +163,11 @@ fi
# Clean up old manifest if it exists
rm -f "$ROOT/.seam/.last-pull-manifest"
-# ── Phase 3: Rebuild dashboard manifest ───────────────────────
+# ── Phase 3: Stage inferred speakers for review ──────────────
+log "Staging inferred speakers..."
+python3 -u "$SCRIPT_DIR/stage-people.py" 2>&1 | tee -a "$LOG_FILE"
+
+# ── Phase 4: Rebuild dashboard manifest ───────────────────────
log "Rebuilding dashboard manifest..."
python3 -u "$SCRIPT_DIR/build-manifest.py" 2>&1 | tee -a "$LOG_FILE"
diff --git a/scripts/stage-people.py b/scripts/stage-people.py
new file mode 100644
index 0000000..028a0e7
--- /dev/null
+++ b/scripts/stage-people.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""Stage inferred speakers from analyses into people-pending.json.
+
+Scans .seam/analysis/*/analysis.json for speaker_map entries, filters out
+generic labels, and stages new names for user confirmation. Does NOT
+auto-add to people.json — the dashboard UI handles confirm/merge/dismiss.
+
+Safe to re-run: skips names already in people.json, people-pending.json,
+or the dismissed list.
+"""
+from __future__ import annotations
+
+import json
+import re
+import sys
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parent.parent
+DATA_DIR = ROOT / ".seam"
+ANALYSIS_DIR = DATA_DIR / "analysis"
+PEOPLE_FILE = DATA_DIR / "people.json"
+PENDING_FILE = DATA_DIR / "people-pending.json"
+DISMISSED_FILE = DATA_DIR / "dismissed-speakers.txt"
+
+# Universally generic — never a real name
+GENERIC_LABELS = {
+ "unknown", "speaker", "narrator", "host", "facilitator", "moderator",
+ "chair", "participant", "interviewer", "interviewee",
+ "guest", "attendee", "caller", "member", "team",
+}
+
+SPEAKER_NUM_RE = re.compile(r"^speaker\s*\d+$", re.IGNORECASE)
+
+
+def load_generic_labels() -> set[str]:
+ """Load universal generics + user-defined exclusions."""
+ labels = GENERIC_LABELS.copy()
+ custom_file = DATA_DIR / "generic-speakers.txt"
+ if custom_file.exists():
+ for line in custom_file.read_text().splitlines():
+ word = line.strip().lower()
+ if word and not word.startswith("#"):
+ labels.add(word)
+ return labels
+
+
+def load_dismissed() -> set[str]:
+ """Load dismissed speaker names (lowercase)."""
+ if DISMISSED_FILE.exists():
+ return {line.strip().lower() for line in DISMISSED_FILE.read_text().splitlines() if line.strip()}
+ return set()
+
+
+def is_generic(name: str, labels: set[str]) -> bool:
+ n = name.strip()
+ if not n or n.lower() == "unknown":
+ return True
+ if SPEAKER_NUM_RE.match(n):
+ return True
+ # Strip trailing parenthetical: "Mark (Speaker 01)" → test "Mark"
+ base = re.sub(r"\s*\([^)]*\)\s*$", "", n).strip()
+ if base != n and not is_generic(base, labels):
+ return False
+ # All tokens are generic → skip
+ tokens = [t.lower() for t in re.split(r"[\s\-/]+", n) if t]
+ if tokens and all(t in labels or t.isdigit() for t in tokens):
+ return True
+ return False
+
+
+def canonical(name: str) -> str:
+ return re.sub(r"\s*\([^)]*\)\s*$", "", name).strip()
+
+
+def main() -> int:
+ if not ANALYSIS_DIR.exists():
+ print("No analysis directory found.", file=sys.stderr)
+ return 1
+
+ labels = load_generic_labels()
+ dismissed = load_dismissed()
+
+ # Load existing people names (including aliases) — case-insensitive
+ existing_names: set[str] = set()
+ if PEOPLE_FILE.exists():
+ try:
+ people = json.loads(PEOPLE_FILE.read_text()).get("people", [])
+ for p in people:
+ existing_names.add(p["name"].lower())
+ for alias in p.get("aliases", []):
+ existing_names.add(alias.lower())
+ except (OSError, json.JSONDecodeError):
+ pass
+
+ # Load already-pending names
+ pending_names: set[str] = set()
+ pending_entries: list[dict] = []
+ if PENDING_FILE.exists():
+ try:
+ pending_entries = json.loads(PENDING_FILE.read_text()).get("pending", [])
+ for p in pending_entries:
+ pending_names.add(p["name"].lower())
+ except (OSError, json.JSONDecodeError):
+ pass
+
+ # Scan analyses for speaker_map entries
+ # Track: canonical name → {display variants, recordings seen in}
+ found: dict[str, dict] = {}
+ for analysis_path in sorted(ANALYSIS_DIR.glob("*/analysis.json")):
+ try:
+ data = json.loads(analysis_path.read_text())
+ except (OSError, json.JSONDecodeError):
+ continue
+
+ recording_dir = analysis_path.parent.name
+ speaker_map = data.get("speaker_map") or {}
+
+ for raw in speaker_map.values():
+ if not isinstance(raw, str):
+ continue
+ name = raw.strip()
+ if is_generic(name, labels):
+ continue
+ key = canonical(name).lower()
+ if not key:
+ continue
+ if key in existing_names or key in pending_names or key in dismissed:
+ continue
+
+ if key not in found:
+ found[key] = {"variants": {}, "recordings": set()}
+ found[key]["variants"][name] = found[key]["variants"].get(name, 0) + 1
+ found[key]["recordings"].add(recording_dir)
+
+ # Build pending entries
+ now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+ added = 0
+ for key, info in sorted(found.items()):
+ # Pick best display name: most frequent, then longest
+ best = sorted(info["variants"].items(), key=lambda kv: (-kv[1], -len(kv[0])))[0][0]
+ pending_entries.append({
+ "id": str(uuid.uuid4()),
+ "name": best,
+ "seenIn": sorted(info["recordings"]),
+ "count": sum(info["variants"].values()),
+ "suggestedMatch": None, # UI can populate this
+ "createdAt": now,
+ })
+ added += 1
+
+ # Write pending file
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
+ PENDING_FILE.write_text(json.dumps({"pending": pending_entries}, indent=2) + "\n")
+ print(f"Staged {added} new speaker(s) for review (total pending: {len(pending_entries)})")
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/server/index.ts b/server/index.ts
index 4a9f1a5..9e0e864 100644
--- a/server/index.ts
+++ b/server/index.ts
@@ -235,6 +235,8 @@ app.get("/api/sync/history", (_req, res) => {
// ── People API ───────────────────────────────────────────────
const PEOPLE_FILE = path.join(ROOT, ".seam", "people.json");
+const PENDING_PEOPLE_FILE = path.join(ROOT, ".seam", "people-pending.json");
+const DISMISSED_FILE = path.join(ROOT, ".seam", "dismissed-speakers.txt");
interface Person {
id: string;
@@ -322,6 +324,115 @@ app.delete("/api/people/:id", (req, res) => {
res.json({ ok: true });
});
+// ── Pending People (staging) ─────────────────────────────────
+
+interface PendingPerson {
+ id: string;
+ name: string;
+ seenIn: string[];
+ count: number;
+ suggestedMatch: string | null;
+ createdAt: string;
+}
+
+function readPending(): PendingPerson[] {
+ try {
+ if (existsSync(PENDING_PEOPLE_FILE)) {
+ return JSON.parse(readFileSync(PENDING_PEOPLE_FILE, "utf-8")).pending || [];
+ }
+ } catch {}
+ return [];
+}
+
+function writePending(pending: PendingPerson[]) {
+ if (pending.length === 0) {
+ try { if (existsSync(PENDING_PEOPLE_FILE)) require("fs").unlinkSync(PENDING_PEOPLE_FILE); } catch {}
+ return;
+ }
+ writeFileSync(PENDING_PEOPLE_FILE, JSON.stringify({ pending }, null, 2) + "\n");
+}
+
+function addDismissed(name: string) {
+ const existing = existsSync(DISMISSED_FILE) ? readFileSync(DISMISSED_FILE, "utf-8") : "";
+ const names = new Set(existing.trim().split("\n").filter(Boolean));
+ names.add(name.toLowerCase());
+ writeFileSync(DISMISSED_FILE, [...names].join("\n") + "\n");
+}
+
+// List pending speakers
+app.get("/api/people/pending", (_req, res) => {
+ res.json({ pending: readPending() });
+});
+
+// Confirm — move from pending to people.json as a new person
+app.post("/api/people/pending/:id/confirm", (_req, res) => {
+ const pending = readPending();
+ const idx = pending.findIndex((p) => p.id === _req.params.id);
+ if (idx === -1) {
+ res.status(404).json({ error: "Not found" });
+ return;
+ }
+ const entry = pending[idx];
+ const people = readPeople();
+ const person: Person = {
+ id: randomUUID(),
+ name: entry.name,
+ source: "inferred",
+ createdAt: new Date().toISOString(),
+ };
+ people.push(person);
+ writePeople(people);
+ pending.splice(idx, 1);
+ writePending(pending);
+ res.json(person);
+});
+
+// Merge — add as alias to an existing person, remove from pending
+app.post("/api/people/pending/:id/merge", (req, res) => {
+ const { targetPersonId } = req.body as { targetPersonId: string };
+ if (!targetPersonId) {
+ res.status(400).json({ error: "targetPersonId required" });
+ return;
+ }
+ const pending = readPending();
+ const idx = pending.findIndex((p) => p.id === req.params.id);
+ if (idx === -1) {
+ res.status(404).json({ error: "Pending person not found" });
+ return;
+ }
+ const entry = pending[idx];
+ const people = readPeople();
+ const target = people.find((p) => p.id === targetPersonId);
+ if (!target) {
+ res.status(404).json({ error: "Target person not found" });
+ return;
+ }
+ // Add as alias
+ if (!target.aliases) target.aliases = [];
+ if (!target.aliases.some((a) => a.toLowerCase() === entry.name.toLowerCase())) {
+ target.aliases.push(entry.name);
+ }
+ writePeople(people);
+ pending.splice(idx, 1);
+ writePending(pending);
+ res.json(target);
+});
+
+// Dismiss — remove from pending, add to exclusion list
+app.post("/api/people/pending/:id/dismiss", (req, res) => {
+ const pending = readPending();
+ const idx = pending.findIndex((p) => p.id === req.params.id);
+ if (idx === -1) {
+ res.status(404).json({ error: "Not found" });
+ return;
+ }
+ const entry = pending[idx];
+ addDismissed(entry.name);
+ pending.splice(idx, 1);
+ writePending(pending);
+ res.json({ ok: true });
+});
+
// Delete a recording (and its analysis), track it so sync doesn't re-pull
const DELETED_FILE = path.join(ROOT, ".seam", ".deleted");
diff --git a/src/hooks/usePendingPeople.ts b/src/hooks/usePendingPeople.ts
new file mode 100644
index 0000000..3a35877
--- /dev/null
+++ b/src/hooks/usePendingPeople.ts
@@ -0,0 +1,60 @@
+import { useState, useEffect, useCallback } from "react";
+
+const API = "http://localhost:3001";
+
+export interface PendingPerson {
+ id: string;
+ name: string;
+ seenIn: string[];
+ count: number;
+ suggestedMatch: string | null;
+ createdAt: string;
+}
+
+export function usePendingPeople() {
+ const [pending, setPending] = useState([]);
+ const [loading, setLoading] = useState(true);
+
+ const refresh = useCallback(() => {
+ fetch(`${API}/api/people/pending`)
+ .then((r) => r.json())
+ .then((data) => {
+ setPending(data.pending || []);
+ setLoading(false);
+ })
+ .catch(() => setLoading(false));
+ }, []);
+
+ useEffect(() => { refresh(); }, [refresh]);
+
+ const confirm = useCallback(async (id: string) => {
+ const res = await fetch(`${API}/api/people/pending/${id}/confirm`, { method: "POST" });
+ if (res.ok) {
+ setPending((prev) => prev.filter((p) => p.id !== id));
+ return await res.json();
+ }
+ return null;
+ }, []);
+
+ const merge = useCallback(async (id: string, targetPersonId: string) => {
+ const res = await fetch(`${API}/api/people/pending/${id}/merge`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ targetPersonId }),
+ });
+ if (res.ok) {
+ setPending((prev) => prev.filter((p) => p.id !== id));
+ return await res.json();
+ }
+ return null;
+ }, []);
+
+ const dismiss = useCallback(async (id: string) => {
+ const res = await fetch(`${API}/api/people/pending/${id}/dismiss`, { method: "POST" });
+ if (res.ok) {
+ setPending((prev) => prev.filter((p) => p.id !== id));
+ }
+ }, []);
+
+ return { pending, loading, confirm, merge, dismiss, refresh };
+}
diff --git a/src/pages/PeoplePage.tsx b/src/pages/PeoplePage.tsx
index 770344f..58d7482 100644
--- a/src/pages/PeoplePage.tsx
+++ b/src/pages/PeoplePage.tsx
@@ -1,5 +1,6 @@
import { useState } from "react";
import { usePeople } from "@/hooks/usePeople";
+import { usePendingPeople } from "@/hooks/usePendingPeople";
import { useRecordings } from "@/hooks/useRecordings";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
@@ -18,13 +19,19 @@ import {
Users,
Mic,
Save,
+ CheckCircle2,
+ GitMerge,
+ XCircle,
+ AlertCircle,
} from "lucide-react";
import { tagClassName } from "@/lib/tag-colors";
import type { Person } from "@/types/people";
export function PeoplePage() {
- const { people, addPerson, updatePerson, deletePerson } = usePeople();
+ const { people, addPerson, updatePerson, deletePerson, refresh: refreshPeople } = usePeople();
+ const { pending, confirm: confirmPending, merge: mergePending, dismiss: dismissPending } = usePendingPeople();
const { recordings } = useRecordings();
+ const [mergeTarget, setMergeTarget] = useState(null); // pending person id being merged
const [newName, setNewName] = useState("");
const [newRole, setNewRole] = useState("");
const [newTags, setNewTags] = useState("");
@@ -128,6 +135,89 @@ export function PeoplePage() {
+ {/* Pending speakers */}
+ {pending.length > 0 && (
+
+
+
+
+ {pending.length} new speaker{pending.length !== 1 ? "s" : ""} detected
+
+
+
+ {pending.map((p) => (
+
+
+
+
+ {p.name}
+
+ {p.count} mention{p.count !== 1 ? "s" : ""} in {p.seenIn.length} recording{p.seenIn.length !== 1 ? "s" : ""}
+
+
+
+
+
+
+
+
+ {/* Merge picker — shows when Merge is clicked */}
+ {mergeTarget === p.id && (
+
+
+ Merge "{p.name}" as alias of:
+
+
+ {people.map((person) => (
+
+ ))}
+
+
+ )}
+
+
+ ))}
+
+
+
+ )}
+
{/* Add person */}