Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,42 @@ All notable changes to `role-x` are documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.5.0] — 2026-06-01

Adds **task-relevant entity auto-loading** to intake (BRO-1295). Until now the
intake hook surfaced only *persona* constraints every prompt; task-specific
entities loaded only when a domain lens scored ≥2. For off-lens prompts (most),
no topic knowledge was auto-loaded — the agent was contextualized on *who* the
user is, not on *what we already know about the task at hand*. This closes that
gap, so every turn carries the most relevant entities from the knowledge graph.

### Added

- **Task-relevant entity scan** in `_format_intake_context`. After lens
selection, intake reads the dense knowledge catalog (`docs/knowledge-index.md`,
schema `dense-catalog-v2`), scores each entity's slug/tags/claim against the
prompt tokens, and surfaces the top-5 under a new *"Task-relevant knowledge
(auto-loaded by relevance — read full bodies via `/kg load <slug>` …)"* block.
Self-contained: no cross-skill import of the `kg` loader, so the hook degrades
gracefully when the catalog (or kg) is absent.
- Helpers `_task_scan_tokens`, `_parse_catalog_entities`, `_score_catalog_entity`,
`_load_task_entities`, `_clean_claim_or_none`; `TASK_ENTITY_*` constants.
- 4 tests covering surface-relevant, body-excerpt→path-only, curated-gate
rejection of body-only matches, and graceful no-catalog behavior.

### Behavior / guards

- **Curated-match gate**: an entity surfaces only on a slug or tag token overlap,
never on body-text claim overlap alone — precision over a 300+ entity graph.
- **Weighted scoring**: slug (3) > tag (2) > claim (1); min total 3; top-5;
deduped against persona/lens entities; `persona`-type skipped (surfaced already).
- **Body-excerpt suppression**: entities with a weak/missing `core_claim` carry a
truncated body excerpt in the catalog; those render path-only instead of
markdown noise (ellipsis + structural-marker heuristic).
- Robust catalog header parse — tolerates a trailing ` · score N/9` suffix that
previously made the parser drift and mis-attribute claims.
- Never blocks the turn: any absence/parse error → empty list, exit 0.

## [0.4.2] — 2026-05-29

Activates the previously-inert `context_loaders.entities` wire. The field was
Expand Down
214 changes: 214 additions & 0 deletions scripts/role-x.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,40 @@
}
SIGNAL_WEIGHT_KEYS = set(DEFAULT_SIGNAL_WEIGHTS.keys())

# v0.5.0 — task-relevant entity auto-loading (BRO-1295). After lens selection,
# scan the dense knowledge catalog (docs/knowledge-index.md, schema
# dense-catalog-v2) and surface the top-N entities whose slug/tags/claim match
# the prompt — so every turn is contextualized on what we already know about the
# topic, not only persona constraints. Self-contained: no cross-skill import of
# the kg loader (the hook must not break when kg is absent).
TASK_ENTITY_CATALOG_REL = "docs/knowledge-index.md"
TASK_ENTITY_ENTITIES_ROOT = "research/entities"
TASK_ENTITY_TOP_N = 5
TASK_ENTITY_MIN_SCORE = 3
TASK_ENTITY_MIN_TOKEN_LEN = 4
# Curated slug/tag matches outrank free-text claim matches.
TASK_ENTITY_W_SLUG = 3
TASK_ENTITY_W_TAG = 2
TASK_ENTITY_W_CLAIM = 1
# Catalog files larger than this are skipped (keeps the every-prompt hook inside
# its time budget regardless of graph growth).
TASK_ENTITY_CATALOG_MAX_BYTES = 8 * 1024 * 1024
# Only these substantive knowledge types are auto-loaded. persona is surfaced as
# constraints already; discovery/question/living-test-case are low-signal noise.
TASK_ENTITY_TYPES = frozenset({
"concept", "pattern", "tool", "project",
"framework-refinement", "industry-pattern",
})
TASK_ENTITY_STOPWORDS = frozenset({
"this", "that", "with", "make", "sure", "your", "you", "the", "and", "for",
"how", "can", "use", "used", "using", "always", "being", "properly",
"current", "setup", "what", "would", "should", "could", "need", "want",
"into", "from", "about", "have", "does", "they", "them", "then", "than",
"when", "where", "which", "while", "work", "working", "here", "there",
"their", "will", "just", "like", "also", "more", "most", "some", "such",
"very", "over", "only", "each", "both", "onto", "across", "these", "those",
})


def parse_frontmatter(text: str) -> dict:
"""Extract YAML frontmatter from a markdown file."""
Expand Down Expand Up @@ -653,6 +687,163 @@ def _safe_inline(text: str) -> str:
return " ".join(cleaned.split())[:200]


def _task_scan_tokens(prompt: str) -> set[str]:
"""Filtered prompt tokens for catalog scanning: length-gated, stopword-free."""
return {
tok
for tok in _tokenize_prompt(prompt)
if len(tok) >= TASK_ENTITY_MIN_TOKEN_LEN and tok not in TASK_ENTITY_STOPWORDS
}


def _parse_catalog_entities(catalog_text: str) -> list[dict]:
"""Parse a dense-catalog-v2 knowledge index into entity records.

Block shape:
#### <slug> [<type>·<status>]
<claim line>
→ out · ← in · #tag #tag · src: ...
path: <type>/<slug>.md

Each record is ``{slug, type, claim, tags(set), path}``. Resilient to format
drift — a malformed block is skipped, never raised.
"""
records: list[dict] = []
cur: dict | None = None
# Bracket may carry a trailing suffix (e.g. ` · score 7/9`); don't anchor to
# end-of-line or the parser drifts and mis-attributes the next block's text.
header_re = re.compile(r"^####\s+(\S+)\s+\[([^\]·]+)·([^\]]+)\]")
for line in catalog_text.splitlines():
m = header_re.match(line)
if m:
if cur:
records.append(cur)
cur = {
"slug": m.group(1).strip(),
"type": m.group(2).strip(),
"claim": "",
"tags": set(),
"path": "",
}
continue
if cur is None:
continue
s = line.strip()
if not s:
continue
if s.startswith("path:"):
cur["path"] = s[len("path:"):].strip()
elif s.startswith(("→", "←")) or s.startswith("#") or " src:" in s:
# edges / tags / src line — harvest #tags for scoring
for tag in re.findall(r"#([A-Za-z0-9][A-Za-z0-9_-]*)", s):
cur["tags"].add(tag.lower())
elif not cur["claim"]:
cur["claim"] = s
if cur:
records.append(cur)
return records


def _score_catalog_entity(rec: dict, prompt_tokens: set[str]) -> int:
"""Weighted token-overlap score between a catalog record and the prompt."""
slug_tokens = {t for t in re.split(r"[-_]", rec["slug"].lower()) if t}
claim_tokens = {
t
for t in _tokenize_prompt(rec.get("claim", ""))
if len(t) >= TASK_ENTITY_MIN_TOKEN_LEN
}
return (
TASK_ENTITY_W_SLUG * len(slug_tokens & prompt_tokens)
+ TASK_ENTITY_W_TAG * len(rec["tags"] & prompt_tokens)
+ TASK_ENTITY_W_CLAIM * len(claim_tokens & prompt_tokens)
)


def _load_task_entities(
workspace: Path | None,
prompt: str,
exclude_keys: set[str],
) -> list[tuple[str, str]]:
"""Return up to TASK_ENTITY_TOP_N ``(path, claim)`` task-relevant entities.

Scans ``docs/knowledge-index.md`` by prompt relevance. Returns an empty list
on any absence/error — this rides the UserPromptSubmit hook, which must never
block the turn (so the except is broad: a single non-UTF-8 byte in a scraped
claim must degrade gracefully, not crash). Only substantive knowledge types
are considered; ``exclude_keys`` (``type/slug``) drops lens-surfaced entities.
"""
if workspace is None or not prompt:
return []
try:
catalog = workspace / TASK_ENTITY_CATALOG_REL
if not catalog.is_file():
return []
if catalog.stat().st_size > TASK_ENTITY_CATALOG_MAX_BYTES:
return [] # pathological size — stay within the hook's time budget
prompt_tokens = _task_scan_tokens(prompt)
if not prompt_tokens:
return []
# errors="replace": one bad byte degrades a single claim, never crashes.
text = catalog.read_text(encoding="utf-8", errors="replace")
except Exception:
return [] # never block the turn

scored: list[tuple[int, dict]] = []
for rec in _parse_catalog_entities(text):
slug = rec.get("slug") or ""
if not slug or rec.get("type") not in TASK_ENTITY_TYPES:
continue
if f"{rec['type']}/{slug}" in exclude_keys:
continue
# Require a curated (slug or tag) match — never surface on body-text
# claim overlap alone. Entities with a missing/weak core_claim carry a
# body excerpt in the catalog; scoring that excerpt is noise, not signal.
slug_tokens = {t for t in re.split(r"[-_]", slug.lower()) if t}
if not ((slug_tokens & prompt_tokens) or (rec["tags"] & prompt_tokens)):
continue
score = _score_catalog_entity(rec, prompt_tokens)
if score >= TASK_ENTITY_MIN_SCORE:
scored.append((score, rec))
# Highest score first; tie-break on slug for deterministic output.
scored.sort(key=lambda sr: (-sr[0], sr[1]["slug"]))

out: list[tuple[str, str]] = []
for _score, rec in scored[:TASK_ENTITY_TOP_N]:
rel = rec["path"] or f"{rec['type']}/{rec['slug']}.md"
full = f"{TASK_ENTITY_ENTITIES_ROOT}/{rel}"
if _confined_entity_path(full, workspace) is None:
continue # path escapes the workspace — never surface it
out.append((full, rec.get("claim", "")))
return out


def _clean_claim_or_none(claim: str) -> str | None:
"""Return a clean one-line claim, or None if it looks like a body excerpt.

Entities with a weak/missing ``core_claim`` get a markdown body excerpt in
the catalog. Those carry structure markers a real one-sentence claim never
has; when detected, the caller renders a path-only line instead of noise.
"""
c = claim.strip()
if not c:
return None
# A real core_claim is ≤140 chars, so the catalog stores it whole. A claim
# ending in an ellipsis was truncated → it's an over-length body excerpt,
# not a clean claim. (Principled + low false-positive: legit claims that
# merely contain a hyphen or backtick are kept.)
if c.endswith("...") or c.endswith("…"):
return None
# A *leading* "> " is a markdown blockquote excerpt. Do NOT match an interior
# " > " — that occurs in legitimate math claims ("lambda must stay > 0") and
# false-suppressing those drops real core_claims (e.g. stability-budget).
if c.lstrip().startswith(">"):
return None
# Structural markdown markers a one-sentence claim never carries.
if any(marker in c for marker in ("**", "](", "####")):
return None
return c


def _format_intake_context(selection: dict, workspace: Path | None = None) -> str:
"""Render the selection as a markdown block that becomes agent context.

Expand Down Expand Up @@ -735,6 +926,28 @@ def _format_intake_context(selection: dict, workspace: Path | None = None) -> st
if entity_lines:
lines.append("Knowledge-graph constraints to honor (core_claim):")
lines.extend(entity_lines)

# v0.5.0 — task-relevant entities, auto-loaded from the catalog by prompt
# relevance (BRO-1295). Surfaced separately from persona constraints so the
# agent is contextualized on prior knowledge of the *topic*, not just identity
# — closing the "contextualized on who, not on what" gap for off-lens prompts.
task_entities = _load_task_entities(
workspace,
selection.get("prompt", ""),
{f"{Path(p).parent.name}/{Path(p).stem}" for p in seen_entities},
)
if task_entities:
lines.append(
"Task-relevant knowledge (auto-loaded by relevance — read full "
"bodies via `/kg load <slug>` or Read before relying on them):"
)
for path, claim in task_entities:
display = _safe_inline(path)
clean = _clean_claim_or_none(claim)
claim_s = _safe_inline(clean) if clean else ""
lines.append(
f" - {claim_s} · [{display}]" if claim_s else f" - {display}"
)
if suggestions and mode != "augment":
lines.append("Prompt-improvement suggestions (optional):")
for sug in suggestions:
Expand Down Expand Up @@ -831,6 +1044,7 @@ def cmd_intake(args: argparse.Namespace) -> int:

signals = _git_signals(workspace)
selection = _select_lenses(roles_dir, signals, prompt)
selection["prompt"] = prompt # v0.5.0 — enables task-entity catalog scan
# v0.4.1: attach authoring nudge for _meta-only domain-rich prompts
selection["authoring_nudge"] = _build_authoring_nudge(prompt, selection)
_emit_event(session_id, prompt, selection)
Expand Down
Loading
Loading