broomva · broomva · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,42 @@ All notable changes to `role-x` are documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.5.0] — 2026-06-01
+
+Adds **task-relevant entity auto-loading** to intake (BRO-1295). Until now the
+intake hook surfaced only *persona* constraints every prompt; task-specific
+entities loaded only when a domain lens scored ≥2. For off-lens prompts (most),
+no topic knowledge was auto-loaded — the agent was contextualized on *who* the
+user is, not on *what we already know about the task at hand*. This closes that
+gap, so every turn carries the most relevant entities from the knowledge graph.
+
+### Added
+
+- **Task-relevant entity scan** in `_format_intake_context`. After lens
+  selection, intake reads the dense knowledge catalog (`docs/knowledge-index.md`,
+  schema `dense-catalog-v2`), scores each entity's slug/tags/claim against the
+  prompt tokens, and surfaces the top-5 under a new *"Task-relevant knowledge
+  (auto-loaded by relevance — read full bodies via `/kg load <slug>` …)"* block.
+  Self-contained: no cross-skill import of the `kg` loader, so the hook degrades
+  gracefully when the catalog (or kg) is absent.
+- Helpers `_task_scan_tokens`, `_parse_catalog_entities`, `_score_catalog_entity`,
+  `_load_task_entities`, `_clean_claim_or_none`; `TASK_ENTITY_*` constants.
+- 4 tests covering surface-relevant, body-excerpt→path-only, curated-gate
+  rejection of body-only matches, and graceful no-catalog behavior.
+
+### Behavior / guards
+
+- **Curated-match gate**: an entity surfaces only on a slug or tag token overlap,
+  never on body-text claim overlap alone — precision over a 300+ entity graph.
+- **Weighted scoring**: slug (3) > tag (2) > claim (1); min total 3; top-5;
+  deduped against persona/lens entities; `persona`-type skipped (surfaced already).
+- **Body-excerpt suppression**: entities with a weak/missing `core_claim` carry a
+  truncated body excerpt in the catalog; those render path-only instead of
+  markdown noise (ellipsis + structural-marker heuristic).
+- Robust catalog header parse — tolerates a trailing ` · score N/9` suffix that
+  previously made the parser drift and mis-attribute claims.
+- Never blocks the turn: any absence/parse error → empty list, exit 0.
+
 ## [0.4.2] — 2026-05-29
 
 Activates the previously-inert `context_loaders.entities` wire. The field was

diff --git a/scripts/role-x.py b/scripts/role-x.py
@@ -73,6 +73,40 @@
 }
 SIGNAL_WEIGHT_KEYS = set(DEFAULT_SIGNAL_WEIGHTS.keys())
 
+# v0.5.0 — task-relevant entity auto-loading (BRO-1295). After lens selection,
+# scan the dense knowledge catalog (docs/knowledge-index.md, schema
+# dense-catalog-v2) and surface the top-N entities whose slug/tags/claim match
+# the prompt — so every turn is contextualized on what we already know about the
+# topic, not only persona constraints. Self-contained: no cross-skill import of
+# the kg loader (the hook must not break when kg is absent).
+TASK_ENTITY_CATALOG_REL = "docs/knowledge-index.md"
+TASK_ENTITY_ENTITIES_ROOT = "research/entities"
+TASK_ENTITY_TOP_N = 5
+TASK_ENTITY_MIN_SCORE = 3
+TASK_ENTITY_MIN_TOKEN_LEN = 4
+# Curated slug/tag matches outrank free-text claim matches.
+TASK_ENTITY_W_SLUG = 3
+TASK_ENTITY_W_TAG = 2
+TASK_ENTITY_W_CLAIM = 1
+# Catalog files larger than this are skipped (keeps the every-prompt hook inside
+# its time budget regardless of graph growth).
+TASK_ENTITY_CATALOG_MAX_BYTES = 8 * 1024 * 1024
+# Only these substantive knowledge types are auto-loaded. persona is surfaced as
+# constraints already; discovery/question/living-test-case are low-signal noise.
+TASK_ENTITY_TYPES = frozenset({
+    "concept", "pattern", "tool", "project",
+    "framework-refinement", "industry-pattern",
+})
+TASK_ENTITY_STOPWORDS = frozenset({
+    "this", "that", "with", "make", "sure", "your", "you", "the", "and", "for",
+    "how", "can", "use", "used", "using", "always", "being", "properly",
+    "current", "setup", "what", "would", "should", "could", "need", "want",
+    "into", "from", "about", "have", "does", "they", "them", "then", "than",
+    "when", "where", "which", "while", "work", "working", "here", "there",
+    "their", "will", "just", "like", "also", "more", "most", "some", "such",
+    "very", "over", "only", "each", "both", "onto", "across", "these", "those",
+})
+
 
 def parse_frontmatter(text: str) -> dict:
     """Extract YAML frontmatter from a markdown file."""
@@ -653,6 +687,163 @@ def _safe_inline(text: str) -> str:
     return " ".join(cleaned.split())[:200]
 
 
+def _task_scan_tokens(prompt: str) -> set[str]:
+    """Filtered prompt tokens for catalog scanning: length-gated, stopword-free."""
+    return {
+        tok
+        for tok in _tokenize_prompt(prompt)
+        if len(tok) >= TASK_ENTITY_MIN_TOKEN_LEN and tok not in TASK_ENTITY_STOPWORDS
+    }
+
+
+def _parse_catalog_entities(catalog_text: str) -> list[dict]:
+    """Parse a dense-catalog-v2 knowledge index into entity records.
+
+    Block shape:
+        #### <slug> [<type>·<status>]
+        <claim line>
+        → out · ← in · #tag #tag · src: ...
+        path: <type>/<slug>.md
+
+    Each record is ``{slug, type, claim, tags(set), path}``. Resilient to format
+    drift — a malformed block is skipped, never raised.
+    """
+    records: list[dict] = []
+    cur: dict | None = None
+    # Bracket may carry a trailing suffix (e.g. ` · score 7/9`); don't anchor to
+    # end-of-line or the parser drifts and mis-attributes the next block's text.
+    header_re = re.compile(r"^####\s+(\S+)\s+\[([^\]·]+)·([^\]]+)\]")
+    for line in catalog_text.splitlines():
+        m = header_re.match(line)
+        if m:
+            if cur:
+                records.append(cur)
+            cur = {
+                "slug": m.group(1).strip(),
+                "type": m.group(2).strip(),
+                "claim": "",
+                "tags": set(),
+                "path": "",
+            }
+            continue
+        if cur is None:
+            continue
+        s = line.strip()
+        if not s:
+            continue
+        if s.startswith("path:"):
+            cur["path"] = s[len("path:"):].strip()
+        elif s.startswith(("→", "←")) or s.startswith("#") or " src:" in s:
+            # edges / tags / src line — harvest #tags for scoring
+            for tag in re.findall(r"#([A-Za-z0-9][A-Za-z0-9_-]*)", s):
+                cur["tags"].add(tag.lower())
+        elif not cur["claim"]:
+            cur["claim"] = s
+    if cur:
+        records.append(cur)
+    return records
+
+
+def _score_catalog_entity(rec: dict, prompt_tokens: set[str]) -> int:
+    """Weighted token-overlap score between a catalog record and the prompt."""
+    slug_tokens = {t for t in re.split(r"[-_]", rec["slug"].lower()) if t}
+    claim_tokens = {
+        t
+        for t in _tokenize_prompt(rec.get("claim", ""))
+        if len(t) >= TASK_ENTITY_MIN_TOKEN_LEN
+    }
+    return (
+        TASK_ENTITY_W_SLUG * len(slug_tokens & prompt_tokens)
+        + TASK_ENTITY_W_TAG * len(rec["tags"] & prompt_tokens)
+        + TASK_ENTITY_W_CLAIM * len(claim_tokens & prompt_tokens)
+    )
+
+
+def _load_task_entities(
+    workspace: Path | None,
+    prompt: str,
+    exclude_keys: set[str],
+) -> list[tuple[str, str]]:
+    """Return up to TASK_ENTITY_TOP_N ``(path, claim)`` task-relevant entities.
+
+    Scans ``docs/knowledge-index.md`` by prompt relevance. Returns an empty list
+    on any absence/error — this rides the UserPromptSubmit hook, which must never
+    block the turn (so the except is broad: a single non-UTF-8 byte in a scraped
+    claim must degrade gracefully, not crash). Only substantive knowledge types
+    are considered; ``exclude_keys`` (``type/slug``) drops lens-surfaced entities.
+    """
+    if workspace is None or not prompt:
+        return []
+    try:
+        catalog = workspace / TASK_ENTITY_CATALOG_REL
+        if not catalog.is_file():
+            return []
+        if catalog.stat().st_size > TASK_ENTITY_CATALOG_MAX_BYTES:
+            return []  # pathological size — stay within the hook's time budget
+        prompt_tokens = _task_scan_tokens(prompt)
+        if not prompt_tokens:
+            return []
+        # errors="replace": one bad byte degrades a single claim, never crashes.
+        text = catalog.read_text(encoding="utf-8", errors="replace")
+    except Exception:
+        return []  # never block the turn
+
+    scored: list[tuple[int, dict]] = []
+    for rec in _parse_catalog_entities(text):
+        slug = rec.get("slug") or ""
+        if not slug or rec.get("type") not in TASK_ENTITY_TYPES:
+            continue
+        if f"{rec['type']}/{slug}" in exclude_keys:
+            continue
+        # Require a curated (slug or tag) match — never surface on body-text
+        # claim overlap alone. Entities with a missing/weak core_claim carry a
+        # body excerpt in the catalog; scoring that excerpt is noise, not signal.
+        slug_tokens = {t for t in re.split(r"[-_]", slug.lower()) if t}
+        if not ((slug_tokens & prompt_tokens) or (rec["tags"] & prompt_tokens)):
+            continue
+        score = _score_catalog_entity(rec, prompt_tokens)
+        if score >= TASK_ENTITY_MIN_SCORE:
+            scored.append((score, rec))
+    # Highest score first; tie-break on slug for deterministic output.
+    scored.sort(key=lambda sr: (-sr[0], sr[1]["slug"]))
+
+    out: list[tuple[str, str]] = []
+    for _score, rec in scored[:TASK_ENTITY_TOP_N]:
+        rel = rec["path"] or f"{rec['type']}/{rec['slug']}.md"
+        full = f"{TASK_ENTITY_ENTITIES_ROOT}/{rel}"
+        if _confined_entity_path(full, workspace) is None:
+            continue  # path escapes the workspace — never surface it
+        out.append((full, rec.get("claim", "")))
+    return out
+
+
+def _clean_claim_or_none(claim: str) -> str | None:
+    """Return a clean one-line claim, or None if it looks like a body excerpt.
+
+    Entities with a weak/missing ``core_claim`` get a markdown body excerpt in
+    the catalog. Those carry structure markers a real one-sentence claim never
+    has; when detected, the caller renders a path-only line instead of noise.
+    """
+    c = claim.strip()
+    if not c:
+        return None
+    # A real core_claim is ≤140 chars, so the catalog stores it whole. A claim
+    # ending in an ellipsis was truncated → it's an over-length body excerpt,
+    # not a clean claim. (Principled + low false-positive: legit claims that
+    # merely contain a hyphen or backtick are kept.)
+    if c.endswith("...") or c.endswith("…"):
+        return None
+    # A *leading* "> " is a markdown blockquote excerpt. Do NOT match an interior
+    # " > " — that occurs in legitimate math claims ("lambda must stay > 0") and
+    # false-suppressing those drops real core_claims (e.g. stability-budget).
+    if c.lstrip().startswith(">"):
+        return None
+    # Structural markdown markers a one-sentence claim never carries.
+    if any(marker in c for marker in ("**", "](", "####")):
+        return None
+    return c
+
+
 def _format_intake_context(selection: dict, workspace: Path | None = None) -> str:
     """Render the selection as a markdown block that becomes agent context.
 
@@ -735,6 +926,28 @@ def _format_intake_context(selection: dict, workspace: Path | None = None) -> st
     if entity_lines:
         lines.append("Knowledge-graph constraints to honor (core_claim):")
         lines.extend(entity_lines)
+
+    # v0.5.0 — task-relevant entities, auto-loaded from the catalog by prompt
+    # relevance (BRO-1295). Surfaced separately from persona constraints so the
+    # agent is contextualized on prior knowledge of the *topic*, not just identity
+    # — closing the "contextualized on who, not on what" gap for off-lens prompts.
+    task_entities = _load_task_entities(
+        workspace,
+        selection.get("prompt", ""),
+        {f"{Path(p).parent.name}/{Path(p).stem}" for p in seen_entities},
+    )
+    if task_entities:
+        lines.append(
+            "Task-relevant knowledge (auto-loaded by relevance — read full "
+            "bodies via `/kg load <slug>` or Read before relying on them):"
+        )
+        for path, claim in task_entities:
+            display = _safe_inline(path)
+            clean = _clean_claim_or_none(claim)
+            claim_s = _safe_inline(clean) if clean else ""
+            lines.append(
+                f"  - {claim_s}  ·  [{display}]" if claim_s else f"  - {display}"
+            )
     if suggestions and mode != "augment":
         lines.append("Prompt-improvement suggestions (optional):")
         for sug in suggestions:
@@ -831,6 +1044,7 @@ def cmd_intake(args: argparse.Namespace) -> int:
 
     signals = _git_signals(workspace)
     selection = _select_lenses(roles_dir, signals, prompt)
+    selection["prompt"] = prompt  # v0.5.0 — enables task-entity catalog scan
     # v0.4.1: attach authoring nudge for _meta-only domain-rich prompts
     selection["authoring_nudge"] = _build_authoring_nudge(prompt, selection)
     _emit_event(session_id, prompt, selection)