From 90c92cc0e39c2c6e8f774322ccf9f7bcb8c982af Mon Sep 17 00:00:00 2001 From: "Carlos D. Escobar-Valbuena" Date: Mon, 1 Jun 2026 16:10:20 -0500 Subject: [PATCH 1/2] feat(0.5.0): auto-load task-relevant KG entities in intake (BRO-1295) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the "contextualized on who, not on what" gap. Until now intake surfaced only persona constraints every prompt; task-specific entities loaded only when a domain lens scored >=2. For off-lens prompts (most), no topic knowledge was auto-loaded. Now every turn carries the top-5 relevant entities from the graph. Mechanism (self-contained — no cross-skill import of the kg loader): - After lens selection, scan docs/knowledge-index.md (dense-catalog-v2), score each entity's slug/tags/claim vs prompt tokens, surface top-5 in a new "Task-relevant knowledge (auto-loaded by relevance ...)" block. Guards (found + fixed via P11 validation against the live catalog): - Curated-match gate: surface only on slug/tag overlap, never body-text alone. - Body-excerpt suppression: weak-core_claim entities render path-only, not markdown noise (ellipsis + structural-marker heuristic). - Robust header parse: tolerates trailing " · score N/9" that drifted the parser. - Weighted slug(3)/tag(2)/claim(1), min 3, dedup vs persona/lens, persona skipped. - Never blocks the turn: any absence/parse error -> empty list, exit 0. 4 new tests; full suite 51/51 green. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 36 ++++++++ scripts/role-x.py | 194 +++++++++++++++++++++++++++++++++++++++++++ tests/test_role_x.py | 95 +++++++++++++++++++++ 3 files changed, 325 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53466b5..9265015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,42 @@ All notable changes to `role-x` are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.0] — 2026-06-01 + +Adds **task-relevant entity auto-loading** to intake (BRO-1295). Until now the +intake hook surfaced only *persona* constraints every prompt; task-specific +entities loaded only when a domain lens scored ≥2. For off-lens prompts (most), +no topic knowledge was auto-loaded — the agent was contextualized on *who* the +user is, not on *what we already know about the task at hand*. This closes that +gap, so every turn carries the most relevant entities from the knowledge graph. + +### Added + +- **Task-relevant entity scan** in `_format_intake_context`. After lens + selection, intake reads the dense knowledge catalog (`docs/knowledge-index.md`, + schema `dense-catalog-v2`), scores each entity's slug/tags/claim against the + prompt tokens, and surfaces the top-5 under a new *"Task-relevant knowledge + (auto-loaded by relevance — read full bodies via `/kg load ` …)"* block. + Self-contained: no cross-skill import of the `kg` loader, so the hook degrades + gracefully when the catalog (or kg) is absent. +- Helpers `_task_scan_tokens`, `_parse_catalog_entities`, `_score_catalog_entity`, + `_load_task_entities`, `_clean_claim_or_none`; `TASK_ENTITY_*` constants. +- 4 tests covering surface-relevant, body-excerpt→path-only, curated-gate + rejection of body-only matches, and graceful no-catalog behavior. + +### Behavior / guards + +- **Curated-match gate**: an entity surfaces only on a slug or tag token overlap, + never on body-text claim overlap alone — precision over a 300+ entity graph. +- **Weighted scoring**: slug (3) > tag (2) > claim (1); min total 3; top-5; + deduped against persona/lens entities; `persona`-type skipped (surfaced already). +- **Body-excerpt suppression**: entities with a weak/missing `core_claim` carry a + truncated body excerpt in the catalog; those render path-only instead of + markdown noise (ellipsis + structural-marker heuristic). +- Robust catalog header parse — tolerates a trailing ` · score N/9` suffix that + previously made the parser drift and mis-attribute claims. +- Never blocks the turn: any absence/parse error → empty list, exit 0. + ## [0.4.2] — 2026-05-29 Activates the previously-inert `context_loaders.entities` wire. The field was diff --git a/scripts/role-x.py b/scripts/role-x.py index 99d8535..2bf096f 100644 --- a/scripts/role-x.py +++ b/scripts/role-x.py @@ -73,6 +73,31 @@ } SIGNAL_WEIGHT_KEYS = set(DEFAULT_SIGNAL_WEIGHTS.keys()) +# v0.5.0 — task-relevant entity auto-loading (BRO-1295). After lens selection, +# scan the dense knowledge catalog (docs/knowledge-index.md, schema +# dense-catalog-v2) and surface the top-N entities whose slug/tags/claim match +# the prompt — so every turn is contextualized on what we already know about the +# topic, not only persona constraints. Self-contained: no cross-skill import of +# the kg loader (the hook must not break when kg is absent). +TASK_ENTITY_CATALOG_REL = "docs/knowledge-index.md" +TASK_ENTITY_ENTITIES_ROOT = "research/entities" +TASK_ENTITY_TOP_N = 5 +TASK_ENTITY_MIN_SCORE = 3 +TASK_ENTITY_MIN_TOKEN_LEN = 4 +# Curated slug/tag matches outrank free-text claim matches. +TASK_ENTITY_W_SLUG = 3 +TASK_ENTITY_W_TAG = 2 +TASK_ENTITY_W_CLAIM = 1 +TASK_ENTITY_STOPWORDS = frozenset({ + "this", "that", "with", "make", "sure", "your", "you", "the", "and", "for", + "how", "can", "use", "used", "using", "always", "being", "properly", + "current", "setup", "what", "would", "should", "could", "need", "want", + "into", "from", "about", "have", "does", "they", "them", "then", "than", + "when", "where", "which", "while", "work", "working", "here", "there", + "their", "will", "just", "like", "also", "more", "most", "some", "such", + "very", "over", "only", "each", "both", "onto", "across", "these", "those", +}) + def parse_frontmatter(text: str) -> dict: """Extract YAML frontmatter from a markdown file.""" @@ -653,6 +678,152 @@ def _safe_inline(text: str) -> str: return " ".join(cleaned.split())[:200] +def _task_scan_tokens(prompt: str) -> set[str]: + """Filtered prompt tokens for catalog scanning: length-gated, stopword-free.""" + return { + tok + for tok in _tokenize_prompt(prompt) + if len(tok) >= TASK_ENTITY_MIN_TOKEN_LEN and tok not in TASK_ENTITY_STOPWORDS + } + + +def _parse_catalog_entities(catalog_text: str) -> list[dict]: + """Parse a dense-catalog-v2 knowledge index into entity records. + + Block shape: + #### [·] + + → out · ← in · #tag #tag · src: ... + path: /.md + + Each record is ``{slug, type, claim, tags(set), path}``. Resilient to format + drift — a malformed block is skipped, never raised. + """ + records: list[dict] = [] + cur: dict | None = None + # Bracket may carry a trailing suffix (e.g. ` · score 7/9`); don't anchor to + # end-of-line or the parser drifts and mis-attributes the next block's text. + header_re = re.compile(r"^####\s+(\S+)\s+\[([^\]·]+)·([^\]]+)\]") + for line in catalog_text.splitlines(): + m = header_re.match(line) + if m: + if cur: + records.append(cur) + cur = { + "slug": m.group(1).strip(), + "type": m.group(2).strip(), + "claim": "", + "tags": set(), + "path": "", + } + continue + if cur is None: + continue + s = line.strip() + if not s: + continue + if s.startswith("path:"): + cur["path"] = s[len("path:"):].strip() + elif s.startswith(("→", "←")) or s.startswith("#") or " src:" in s: + # edges / tags / src line — harvest #tags for scoring + for tag in re.findall(r"#([A-Za-z0-9][A-Za-z0-9_-]*)", s): + cur["tags"].add(tag.lower()) + elif not cur["claim"]: + cur["claim"] = s + if cur: + records.append(cur) + return records + + +def _score_catalog_entity(rec: dict, prompt_tokens: set[str]) -> int: + """Weighted token-overlap score between a catalog record and the prompt.""" + slug_tokens = {t for t in re.split(r"[-_]", rec["slug"].lower()) if t} + claim_tokens = { + t + for t in _tokenize_prompt(rec.get("claim", "")) + if len(t) >= TASK_ENTITY_MIN_TOKEN_LEN + } + return ( + TASK_ENTITY_W_SLUG * len(slug_tokens & prompt_tokens) + + TASK_ENTITY_W_TAG * len(rec["tags"] & prompt_tokens) + + TASK_ENTITY_W_CLAIM * len(claim_tokens & prompt_tokens) + ) + + +def _load_task_entities( + workspace: Path | None, + prompt: str, + exclude_slugs: set[str], +) -> list[tuple[str, str]]: + """Return up to TASK_ENTITY_TOP_N ``(path, claim)`` task-relevant entities. + + Scans ``docs/knowledge-index.md`` by prompt relevance. Returns an empty list + on any absence/error — this rides the UserPromptSubmit hook, which must never + block the turn. Persona entities are skipped (surfaced as constraints already) + and ``exclude_slugs`` drops anything a lens already surfaced. + """ + if workspace is None or not prompt: + return [] + try: + catalog = workspace / TASK_ENTITY_CATALOG_REL + if not catalog.is_file(): + return [] + prompt_tokens = _task_scan_tokens(prompt) + if not prompt_tokens: + return [] + text = catalog.read_text(encoding="utf-8") + except OSError: + return [] + + scored: list[tuple[int, dict]] = [] + for rec in _parse_catalog_entities(text): + slug = rec.get("slug") or "" + if not slug or slug in exclude_slugs or rec.get("type") == "persona": + continue + # Require a curated (slug or tag) match — never surface on body-text + # claim overlap alone. Entities with a missing/weak core_claim carry a + # body excerpt in the catalog; scoring that excerpt is noise, not signal. + slug_tokens = {t for t in re.split(r"[-_]", slug.lower()) if t} + if not ((slug_tokens & prompt_tokens) or (rec["tags"] & prompt_tokens)): + continue + score = _score_catalog_entity(rec, prompt_tokens) + if score >= TASK_ENTITY_MIN_SCORE: + scored.append((score, rec)) + # Highest score first; tie-break on slug for deterministic output. + scored.sort(key=lambda sr: (-sr[0], sr[1]["slug"])) + + out: list[tuple[str, str]] = [] + for _score, rec in scored[:TASK_ENTITY_TOP_N]: + rel = rec["path"] or f"{rec['type']}/{rec['slug']}.md" + full = f"{TASK_ENTITY_ENTITIES_ROOT}/{rel}" + if _confined_entity_path(full, workspace) is None: + continue # path escapes the workspace — never surface it + out.append((full, rec.get("claim", ""))) + return out + + +def _clean_claim_or_none(claim: str) -> str | None: + """Return a clean one-line claim, or None if it looks like a body excerpt. + + Entities with a weak/missing ``core_claim`` get a markdown body excerpt in + the catalog. Those carry structure markers a real one-sentence claim never + has; when detected, the caller renders a path-only line instead of noise. + """ + c = claim.strip() + if not c: + return None + # A real core_claim is ≤140 chars, so the catalog stores it whole. A claim + # ending in an ellipsis was truncated → it's an over-length body excerpt, + # not a clean claim. (Principled + low false-positive: legit claims that + # merely contain a hyphen or backtick are kept.) + if c.endswith("...") or c.endswith("…"): + return None + # Structural markdown markers a one-sentence claim never carries. + if any(marker in c for marker in ("**", " > ", "](", "####")): + return None + return c + + def _format_intake_context(selection: dict, workspace: Path | None = None) -> str: """Render the selection as a markdown block that becomes agent context. @@ -735,6 +906,28 @@ def _format_intake_context(selection: dict, workspace: Path | None = None) -> st if entity_lines: lines.append("Knowledge-graph constraints to honor (core_claim):") lines.extend(entity_lines) + + # v0.5.0 — task-relevant entities, auto-loaded from the catalog by prompt + # relevance (BRO-1295). Surfaced separately from persona constraints so the + # agent is contextualized on prior knowledge of the *topic*, not just identity + # — closing the "contextualized on who, not on what" gap for off-lens prompts. + task_entities = _load_task_entities( + workspace, + selection.get("prompt", ""), + {Path(p).stem for p in seen_entities}, + ) + if task_entities: + lines.append( + "Task-relevant knowledge (auto-loaded by relevance — read full " + "bodies via `/kg load ` or Read before relying on them):" + ) + for path, claim in task_entities: + display = _safe_inline(path) + clean = _clean_claim_or_none(claim) + claim_s = _safe_inline(clean) if clean else "" + lines.append( + f" - {claim_s} · [{display}]" if claim_s else f" - {display}" + ) if suggestions and mode != "augment": lines.append("Prompt-improvement suggestions (optional):") for sug in suggestions: @@ -831,6 +1024,7 @@ def cmd_intake(args: argparse.Namespace) -> int: signals = _git_signals(workspace) selection = _select_lenses(roles_dir, signals, prompt) + selection["prompt"] = prompt # v0.5.0 — enables task-entity catalog scan # v0.4.1: attach authoring nudge for _meta-only domain-rich prompts selection["authoring_nudge"] = _build_authoring_nudge(prompt, selection) _emit_event(session_id, prompt, selection) diff --git a/tests/test_role_x.py b/tests/test_role_x.py index b3e8088..69e3ac4 100644 --- a/tests/test_role_x.py +++ b/tests/test_role_x.py @@ -1238,3 +1238,98 @@ def test_intake_entity_brackets_and_controls_sanitized(tmp_path): # entity's own brackets were stripped by _safe_inline (no-claim → bare render, # so any '[' / ']' on this line could only have come from the malicious entry) assert "[" not in ln and "]" not in ln + + +# --- v0.5.0: task-relevant entity auto-loading (BRO-1295) --- + +_FIXTURE_CATALOG = """--- +generator: bookkeeping index +schema: dense-catalog-v2 +entity_count: 4 +--- + +# Knowledge Index + +## Entities + +### concept (1) + +#### stability-budget [concept·entity] +Stability lambda fixes inter-level time-scale ratios; unbuyable and orthogonal to compute tempo. +→ rcs · #concept #rcs #stability · src: paper +path: concept/stability-budget.md + +### pattern (3) + +#### proactive-documentation [pattern·entity] +Knowledge capture is the agent default action; file proactively and report after, never ask. +→ x · #pattern #bookkeeping · src: synthesis +path: pattern/proactive-documentation.md + +#### stability-weak [pattern·candidate] · score 5/9 +This body excerpt was truncated by the catalog because it exceeded the claim length cap and continues... +→ y · #pattern #stability · src: note +path: pattern/stability-weak.md + +#### body-only-noise [pattern·candidate] +A short clean claim that merely mentions stability in prose but whose slug and tags are unrelated here. +→ z · #pattern #unrelated · src: note +path: pattern/body-only-noise.md +""" + + +def _seed_catalog(workspace: Path, catalog: str = _FIXTURE_CATALOG) -> None: + """Write a dense-catalog-v2 knowledge index into the seeded workspace.""" + docs = workspace / "docs" + docs.mkdir(exist_ok=True) + (docs / "knowledge-index.md").write_text(catalog, encoding="utf-8") + + +def test_intake_surfaces_relevant_task_entity(tmp_path): + """A prompt whose tokens hit an entity's slug/tags surfaces it with its claim.""" + workspace = _seed_workspace(tmp_path) + _seed_catalog(workspace) + rc, out, err = run_cli( + "intake", "--prompt", "explain the stability budget margin", + "--workspace", str(workspace), "--session", "task-1", + ) + assert rc == 0, f"stderr={err}" + assert "Task-relevant knowledge" in out + assert "concept/stability-budget.md" in out + assert "unbuyable" in out # clean core_claim rendered inline + + +def test_intake_task_entity_body_excerpt_renders_path_only(tmp_path): + """An entity whose catalog claim is a truncated body excerpt renders path-only.""" + workspace = _seed_workspace(tmp_path) + _seed_catalog(workspace) + rc, out, err = run_cli( + "intake", "--prompt", "explain the stability budget margin", + "--workspace", str(workspace), "--session", "task-2", + ) + assert rc == 0, f"stderr={err}" + assert "pattern/stability-weak.md" in out # surfaced via slug match + assert "truncated by the catalog" not in out # body excerpt suppressed + + +def test_intake_task_entity_curated_gate_rejects_body_only_match(tmp_path): + """Body-text-only relevance (no slug/tag overlap) must NOT surface an entity.""" + workspace = _seed_workspace(tmp_path) + _seed_catalog(workspace) + rc, out, err = run_cli( + "intake", "--prompt", "explain the stability budget margin", + "--workspace", str(workspace), "--session", "task-3", + ) + assert rc == 0, f"stderr={err}" + assert "body-only-noise.md" not in out + + +def test_intake_no_catalog_emits_no_task_block(tmp_path): + """No docs/knowledge-index.md → graceful: no task block, exit 0.""" + workspace = _seed_workspace(tmp_path) # no catalog seeded + rc, out, err = run_cli( + "intake", "--prompt", "explain the stability budget margin", + "--workspace", str(workspace), "--session", "task-4", + ) + assert rc == 0, f"stderr={err}" + assert "Task-relevant knowledge" not in out From c48515e92704e6fe2125db6b9df15f76d944f615 Mon Sep 17 00:00:00 2001 From: "Carlos D. Escobar-Valbuena" Date: Mon, 1 Jun 2026 16:22:38 -0500 Subject: [PATCH 2/2] =?UTF-8?q?fix(P20):=20resolve=20two=20blocking=20revi?= =?UTF-8?q?ew=20findings=20=E2=80=94=20hook=20crash=20+=20claim=20suppress?= =?UTF-8?q?ion=20(BRO-1295)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P20 cross-review (FAIL 6/10) found two real, verified, untested defects: 1. BLOCKING — `except OSError` does not catch `UnicodeDecodeError` (a ValueError), so a single non-UTF-8 byte in any scraped catalog claim crashed the every-prompt hook (exit 1). Siblings already use `except Exception`; my narrow catch was a regression. Fix: broaden to `except Exception` + read with errors="replace" + an 8 MiB size cap (keeps the hook inside its time budget at any graph size). 2. BLOCKING — `_clean_claim_or_none` matched interior " > " as a markdown blockquote, false-suppressing legit math claims. On the live catalog this stripped the claim off `stability-budget` itself (the flagship RCS entity: "lambda_i must stay > 0 …"). Fix: detect only a *leading* "> "; the ellipsis check still catches the real body excerpts. Non-blocking (also applied): type whitelist (drop discovery/question noise), dedup by type/slug (not bare stem), catalog size cap. Tests: +2 regression tests (math-inequality claim survives; non-UTF-8 catalog exits 0). Suite 53/53. Verified on the live 318-entity catalog: stability-budget now renders its full claim inline; 0 discovery/question noise on generic prompts. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/role-x.py | 38 +++++++++++++++++++++++++++++--------- tests/test_role_x.py | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/scripts/role-x.py b/scripts/role-x.py index 2bf096f..6bfcaf4 100644 --- a/scripts/role-x.py +++ b/scripts/role-x.py @@ -88,6 +88,15 @@ TASK_ENTITY_W_SLUG = 3 TASK_ENTITY_W_TAG = 2 TASK_ENTITY_W_CLAIM = 1 +# Catalog files larger than this are skipped (keeps the every-prompt hook inside +# its time budget regardless of graph growth). +TASK_ENTITY_CATALOG_MAX_BYTES = 8 * 1024 * 1024 +# Only these substantive knowledge types are auto-loaded. persona is surfaced as +# constraints already; discovery/question/living-test-case are low-signal noise. +TASK_ENTITY_TYPES = frozenset({ + "concept", "pattern", "tool", "project", + "framework-refinement", "industry-pattern", +}) TASK_ENTITY_STOPWORDS = frozenset({ "this", "that", "with", "make", "sure", "your", "you", "the", "and", "for", "how", "can", "use", "used", "using", "always", "being", "properly", @@ -753,14 +762,15 @@ def _score_catalog_entity(rec: dict, prompt_tokens: set[str]) -> int: def _load_task_entities( workspace: Path | None, prompt: str, - exclude_slugs: set[str], + exclude_keys: set[str], ) -> list[tuple[str, str]]: """Return up to TASK_ENTITY_TOP_N ``(path, claim)`` task-relevant entities. Scans ``docs/knowledge-index.md`` by prompt relevance. Returns an empty list on any absence/error — this rides the UserPromptSubmit hook, which must never - block the turn. Persona entities are skipped (surfaced as constraints already) - and ``exclude_slugs`` drops anything a lens already surfaced. + block the turn (so the except is broad: a single non-UTF-8 byte in a scraped + claim must degrade gracefully, not crash). Only substantive knowledge types + are considered; ``exclude_keys`` (``type/slug``) drops lens-surfaced entities. """ if workspace is None or not prompt: return [] @@ -768,17 +778,22 @@ def _load_task_entities( catalog = workspace / TASK_ENTITY_CATALOG_REL if not catalog.is_file(): return [] + if catalog.stat().st_size > TASK_ENTITY_CATALOG_MAX_BYTES: + return [] # pathological size — stay within the hook's time budget prompt_tokens = _task_scan_tokens(prompt) if not prompt_tokens: return [] - text = catalog.read_text(encoding="utf-8") - except OSError: - return [] + # errors="replace": one bad byte degrades a single claim, never crashes. + text = catalog.read_text(encoding="utf-8", errors="replace") + except Exception: + return [] # never block the turn scored: list[tuple[int, dict]] = [] for rec in _parse_catalog_entities(text): slug = rec.get("slug") or "" - if not slug or slug in exclude_slugs or rec.get("type") == "persona": + if not slug or rec.get("type") not in TASK_ENTITY_TYPES: + continue + if f"{rec['type']}/{slug}" in exclude_keys: continue # Require a curated (slug or tag) match — never surface on body-text # claim overlap alone. Entities with a missing/weak core_claim carry a @@ -818,8 +833,13 @@ def _clean_claim_or_none(claim: str) -> str | None: # merely contain a hyphen or backtick are kept.) if c.endswith("...") or c.endswith("…"): return None + # A *leading* "> " is a markdown blockquote excerpt. Do NOT match an interior + # " > " — that occurs in legitimate math claims ("lambda must stay > 0") and + # false-suppressing those drops real core_claims (e.g. stability-budget). + if c.lstrip().startswith(">"): + return None # Structural markdown markers a one-sentence claim never carries. - if any(marker in c for marker in ("**", " > ", "](", "####")): + if any(marker in c for marker in ("**", "](", "####")): return None return c @@ -914,7 +934,7 @@ def _format_intake_context(selection: dict, workspace: Path | None = None) -> st task_entities = _load_task_entities( workspace, selection.get("prompt", ""), - {Path(p).stem for p in seen_entities}, + {f"{Path(p).parent.name}/{Path(p).stem}" for p in seen_entities}, ) if task_entities: lines.append( diff --git a/tests/test_role_x.py b/tests/test_role_x.py index 69e3ac4..d3c2826 100644 --- a/tests/test_role_x.py +++ b/tests/test_role_x.py @@ -1255,7 +1255,7 @@ def test_intake_entity_brackets_and_controls_sanitized(tmp_path): ### concept (1) #### stability-budget [concept·entity] -Stability lambda fixes inter-level time-scale ratios; unbuyable and orthogonal to compute tempo. +The shared stability margin lambda must stay > 0 at every level for exponential stability. → rcs · #concept #rcs #stability · src: paper path: concept/stability-budget.md @@ -1296,7 +1296,9 @@ def test_intake_surfaces_relevant_task_entity(tmp_path): assert rc == 0, f"stderr={err}" assert "Task-relevant knowledge" in out assert "concept/stability-budget.md" in out - assert "unbuyable" in out # clean core_claim rendered inline + # clean core_claim rendered inline — and it contains an interior " > 0" that + # must NOT be mistaken for a markdown blockquote and suppressed (BRO-1295 P20). + assert "exponential stability" in out def test_intake_task_entity_body_excerpt_renders_path_only(tmp_path): @@ -1333,3 +1335,38 @@ def test_intake_no_catalog_emits_no_task_block(tmp_path): ) assert rc == 0, f"stderr={err}" assert "Task-relevant knowledge" not in out + + +def test_intake_keeps_math_inequality_claim(tmp_path): + """A claim with an interior ' > ' (math) must render inline, not be mistaken + for a markdown blockquote and suppressed to path-only (P20 regression).""" + workspace = _seed_workspace(tmp_path) + _seed_catalog(workspace) + rc, out, err = run_cli( + "intake", "--prompt", "explain the stability budget margin", + "--workspace", str(workspace), "--session", "task-5", + ) + assert rc == 0, f"stderr={err}" + assert "must stay > 0" in out # the ' > 0' claim survives _clean_claim_or_none + + +def test_intake_non_utf8_catalog_does_not_crash(tmp_path): + """A non-UTF-8 byte in the catalog degrades gracefully (exit 0), never crashes + the every-prompt hook — UnicodeDecodeError is a ValueError, not OSError (P20).""" + workspace = _seed_workspace(tmp_path) + docs = workspace / "docs" + docs.mkdir(exist_ok=True) + # Valid dense-catalog-v2 shape (UTF-8 ·, →, ·) with a stray 0xff byte in a claim. + raw = ( + b"---\nschema: dense-catalog-v2\n---\n\n## Entities\n\n### concept (1)\n\n" + b"#### stability-budget [concept\xc2\xb7entity]\n" + b"A claim carrying a bad byte \xff inside the stability margin text here.\n" + b"\xe2\x86\x92 rcs \xc2\xb7 #concept #stability \xc2\xb7 src: paper\n" + b"path: concept/stability-budget.md\n" + ) + (docs / "knowledge-index.md").write_bytes(raw) + rc, out, err = run_cli( + "intake", "--prompt", "explain the stability budget margin", + "--workspace", str(workspace), "--session", "task-6", + ) + assert rc == 0, f"stderr={err}" # no traceback; the hook never blocks the turn