From 3e98586d7001ad70f6af1fcf1e87b773a59494b1 Mon Sep 17 00:00:00 2001
From: Rahul Rajaram <rahulrajaram2005@gmail.com>
Date: Thu, 26 Mar 2026 11:16:22 -0400
Subject: [PATCH] refactor: decompose main.rs into 15 focused modules

Break a monolithic 13,241-line main.rs into cohesive modules:

- cli.rs: Clap CLI struct definitions (1173 lines)
- types.rs: all shared type definitions (978 lines)
- config.rs: config loading and resolution (107 lines)
- search.rs: BM25, SimHash, MinHash primitives (317 lines)
- util.rs: shared utilities and index loaders (579 lines)
- index.rs: index building (639 lines)
- assemble.rs: assembly engine and cross-refs (1570 lines)
- mcp.rs: MCP protocol handlers (767 lines)
- commands_query.rs: query, similar, dupes, diff (1243 lines)
- commands_text.rs: stats, repl, vocabulary, eval (840 lines)
- commands_links.rs: check-links, policy, fix-links (1377 lines)
- commands_graph.rs: mv, paths, graph, stale, health (877 lines)
- commands_audit.rs: orphans, canonicality, suggest (498 lines)
- tests_main.rs: unit tests via #[path] (1939 lines)

main.rs is now 495 lines (module wiring + dispatch).
All 125 tests pass, zero clippy warnings.
---
 src/assemble.rs       |  1570 +++++
 src/cli.rs            |  1173 ++++
 src/commands_audit.rs |   498 ++
 src/commands_graph.rs |   877 +++
 src/commands_links.rs |  1377 ++++
 src/commands_query.rs |  1243 ++++
 src/commands_text.rs  |   840 +++
 src/config.rs         |   107 +
 src/index.rs          |   639 ++
 src/main.rs           | 13602 ++--------------------------------------
 src/mcp.rs            |   767 +++
 src/search.rs         |   317 +
 src/tests_main.rs     |  1939 ++++++
 src/types.rs          |   978 +++
 src/util.rs           |   579 ++
 15 files changed, 13332 insertions(+), 13174 deletions(-)
 create mode 100644 src/assemble.rs
 create mode 100644 src/cli.rs
 create mode 100644 src/commands_audit.rs
 create mode 100644 src/commands_graph.rs
 create mode 100644 src/commands_links.rs
 create mode 100644 src/commands_query.rs
 create mode 100644 src/commands_text.rs
 create mode 100644 src/config.rs
 create mode 100644 src/index.rs
 create mode 100644 src/mcp.rs
 create mode 100644 src/search.rs
 create mode 100644 src/tests_main.rs
 create mode 100644 src/types.rs
 create mode 100644 src/util.rs
diff --git a/src/assemble.rs b/src/assemble.rs
new file mode 100644
index 0000000..d940fd7
--- /dev/null
+++ b/src/assemble.rs
@@ -0,0 +1,1570 @@
+use regex::Regex;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::search::*;
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) fn search_relevant_sections(
+    query: &str,
+    index: &ForwardIndex,
+    max_sections: usize,
+) -> Vec<SectionMatch> {
+    let query_terms = parse_query_terms(query, true);
+    if query_terms.is_empty() {
+        return Vec::new();
+    }
+
+    let mut all_sections: Vec<SectionMatch> = Vec::new();
+
+    // First, get top documents by BM25
+    let mut doc_scores: Vec<(&String, &FileEntry, f64)> = index
+        .files
+        .iter()
+        .map(|(path, entry)| {
+            let score = bm25_score(&query_terms, entry, index.avg_doc_length, &index.idf_map);
+            (path, entry, score)
+        })
+        .filter(|(_, _, score)| *score > 0.01)
+        .collect();
+
+    doc_scores.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
+
+    // Take top 20 documents
+    for (doc_path, entry, doc_score) in doc_scores.iter().take(20) {
+        let canonicality = score_canonicality(doc_path, entry);
+
+        // Split document into sections based on section_fingerprints
+        if !entry.section_fingerprints.is_empty() {
+            if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
+                let lines: Vec<&str> = content.lines().collect();
+
+                // Use indexed sections
+                for section in &entry.section_fingerprints {
+                    let start = section.line_start.saturating_sub(1);
+                    let end = section.line_end.min(lines.len());
+
+                    if start < end {
+                        let section_content = lines[start..end].join("\n");
+
+                        all_sections.push(SectionMatch {
+                            doc_path: (*doc_path).to_string(),
+                            heading: section.heading.clone(),
+                            line_start: section.line_start,
+                            line_end: section.line_end,
+                            bm25_score: *doc_score, // Use doc-level score for now
+                            content: section_content,
+                            canonicality,
+                        });
+                    }
+                }
+            }
+        } else {
+            // Fallback: treat whole doc as one section
+            if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
+                all_sections.push(SectionMatch {
+                    doc_path: (*doc_path).to_string(),
+                    heading: "Full Document".to_string(),
+                    line_start: 1,
+                    line_end: content.lines().count(),
+                    bm25_score: *doc_score,
+                    content,
+                    canonicality,
+                });
+            }
+        }
+    }
+
+    // Sort by combined score with deterministic tie-breaks.
+    all_sections.sort_by(compare_sections_by_relevance);
+
+    // Take top N sections
+    all_sections.into_iter().take(max_sections).collect()
+}
+
+/// Score document canonicality based on path, recency, and patterns
+pub(crate) fn score_canonicality(doc_path: &str, _entry: &FileEntry) -> f64 {
+    let mut score: f64 = 0.5; // baseline
+
+    let path_lower = doc_path.to_lowercase();
+
+    // Path-based boosts
+    if path_lower.contains("docs/adr/") || path_lower.contains("docs/architecture/") {
+        score += 0.2;
+    }
+    if path_lower.contains("docs/index/") {
+        score += 0.15;
+    }
+    if path_lower.contains("scratch")
+        || path_lower.contains("archive")
+        || path_lower.contains("old")
+    {
+        score -= 0.3;
+    }
+    if path_lower.contains("deprecated") || path_lower.contains("backup") {
+        score -= 0.25;
+    }
+
+    // Filename patterns
+    let filename = Path::new(doc_path)
+        .file_name()
+        .and_then(|s| s.to_str())
+        .unwrap_or("")
+        .to_lowercase();
+
+    if filename.contains("readme") || filename.contains("index") {
+        score += 0.1;
+    }
+    if filename.contains("guide") || filename.contains("runbook") || filename.contains("plan") {
+        score += 0.1;
+    }
+
+    // Recency (approximate - we don't have mtime in index yet)
+    // For now, we'll just use this as a placeholder
+    // In future: add last_modified to FileEntry
+
+    // Clamp to [0.0, 1.0]
+    score.clamp(0.0, 1.0)
+}
+
+/// Distill sections into markdown digest within token budget
+pub(crate) fn distill_to_markdown(
+    sections: &[SectionMatch],
+    query: &str,
+    max_tokens: usize,
+) -> String {
+    let mut output = String::new();
+    let mut used_tokens = 0;
+
+    // Header
+    let header = format!(
+        "# Context Digest for: \"{}\"\n\n\
+         **Generated:** {}\n\
+         **Token Budget:** {}\n\
+         **Documents Scanned:** N/A\n\
+         **Sections Selected:** {}\n\n\
+         ---\n\n",
+        query,
+        chrono_now(),
+        max_tokens,
+        sections.len()
+    );
+    output.push_str(&header);
+    used_tokens += estimate_tokens(&header);
+
+    // Group sections by document
+    let mut doc_groups: HashMap<String, Vec<&SectionMatch>> = HashMap::new();
+    for section in sections {
+        doc_groups
+            .entry(section.doc_path.clone())
+            .or_default()
+            .push(section);
+    }
+
+    // Top Relevant Documents section
+    output.push_str("## Top Relevant Documents\n\n");
+    used_tokens += 10;
+
+    let mut ranked_docs: Vec<_> = doc_groups.iter().collect();
+    ranked_docs.sort_by(|a, b| {
+        let score_a = a.1[0].bm25_score * 0.7 + a.1[0].canonicality * 0.3;
+        let score_b = b.1[0].bm25_score * 0.7 + b.1[0].canonicality * 0.3;
+        score_b
+            .partial_cmp(&score_a)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+
+    for (idx, (doc_path, doc_sections)) in ranked_docs.iter().enumerate().take(10) {
+        let section = doc_sections[0];
+        let combined_score = section.bm25_score * 0.7 + section.canonicality * 0.3;
+        let doc_line = format!(
+            "{}. **{}** (score: {:.2}, canonical: {:.2})\n   - Sections included: {}\n\n",
+            idx + 1,
+            doc_path,
+            combined_score,
+            section.canonicality,
+            doc_sections.len()
+        );
+        output.push_str(&doc_line);
+        used_tokens += estimate_tokens(&doc_line);
+    }
+
+    output.push_str("---\n\n## Distilled Content\n\n");
+    used_tokens += 10;
+
+    // Add sections
+    for section in sections {
+        if used_tokens >= max_tokens {
+            output.push_str("\n\n*[Content truncated due to token budget]*\n");
+            break;
+        }
+
+        let section_header = format!(
+            "### {} (from {})\n\n**Source:** {}:{}-{} (canonical: {:.2})\n\n",
+            section.heading,
+            section.doc_path,
+            section.doc_path,
+            section.line_start,
+            section.line_end,
+            section.canonicality
+        );
+
+        // Estimate how much space we need
+        let section_tokens = estimate_tokens(&section_header) + estimate_tokens(&section.content);
+
+        if used_tokens + section_tokens > max_tokens {
+            // Try to fit a truncated version
+            let remaining_tokens = max_tokens - used_tokens;
+            let chars_to_include = remaining_tokens * 4; // rough approximation
+
+            if chars_to_include > 200 {
+                output.push_str(&section_header);
+                output.push_str(&section.content[..chars_to_include.min(section.content.len())]);
+                output.push_str("\n\n*[Section truncated]*\n");
+            }
+            break;
+        }
+
+        output.push_str(&section_header);
+        output.push_str(&section.content);
+        output.push_str("\n\n---\n\n");
+
+        used_tokens += section_tokens;
+    }
+
+    // Metadata footer
+    let footer = format!(
+        "\n## Metadata\n\n\
+         **Canonicality Scores:**\n\
+         - 0.90+: Authoritative source, prefer over other docs\n\
+         - 0.70-0.89: Reliable, current documentation\n\
+         - 0.50-0.69: Secondary or supporting documentation\n\
+         - <0.50: Potentially stale, use with caution\n\n\
+         **Actual Tokens Used:** ~{used_tokens}\n\n\
+         ---\n\n\
+         ## Usage with LLM\n\n\
+         Paste this digest into your LLM conversation, then ask:\n\n\
+         > Using only the information in the context above, answer: \"{query}\"\n\
+         > Be explicit when something is not documented in the context.\n"
+    );
+
+    output.push_str(&footer);
+
+    output
+}
+
+/// Estimate token count (rough approximation: 1 token ≈ 4 chars)
+pub(crate) fn estimate_tokens(text: &str) -> usize {
+    text.len() / 4
+}
+
+/// Build ADR index mapping ADR numbers to file paths
+/// Extract all deterministic relation edges from a forward index.
+/// Produces document-level links, section-level links, and ADR reference edges.
+pub fn extract_relations(forward_index: &ForwardIndex) -> RelationIndex {
+    // Build normalized-path-to-key map (sorted iteration for determinism)
+    let mut norm_to_key: HashMap<String, String> = HashMap::new();
+    let mut sorted_keys: Vec<&String> = forward_index.files.keys().collect();
+    sorted_keys.sort();
+    for key in &sorted_keys {
+        let normalized = normalize_path(Path::new(key));
+        norm_to_key
+            .entry(normalized)
+            .or_insert_with(|| (*key).clone());
+    }
+
+    let adr_index = build_adr_index(forward_index);
+    let mut edges: Vec<RelationEdge> = Vec::new();
+
+    for source_key in &sorted_keys {
+        let entry = &forward_index.files[*source_key];
+        let source_base = Path::new(source_key.as_str());
+
+        // Document & section edges from links
+        for link in &entry.links {
+            let target = &link.target;
+
+            // Skip external links
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            // Split off anchor
+            let (link_path, anchor) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            if link_path.is_empty() {
+                continue;
+            }
+
+            let resolved = if let Some(parent) = source_base.parent() {
+                parent.join(&link_path).to_string_lossy().to_string()
+            } else {
+                link_path.clone()
+            };
+            let normalized = normalize_path(Path::new(&resolved));
+
+            let target_key = match norm_to_key.get(&normalized) {
+                Some(k) => k.clone(),
+                None => continue,
+            };
+
+            // Skip self-links
+            if &target_key == *source_key {
+                continue;
+            }
+
+            // Document-level LinksTo edge
+            edges.push(RelationEdge {
+                source: (*source_key).clone(),
+                target: target_key.clone(),
+                kind: RelationKind::LinksTo,
+                anchor: anchor.clone(),
+                source_section: None,
+                target_section: None,
+                raw_text: None,
+            });
+
+            // Section-level edge
+            let source_section = find_containing_section(&entry.section_fingerprints, link.line);
+            if source_section.is_some() {
+                let target_section = anchor.as_deref().and_then(|a| {
+                    forward_index
+                        .files
+                        .get(&target_key)
+                        .and_then(|te| resolve_anchor_to_section(te, a))
+                });
+
+                edges.push(RelationEdge {
+                    source: (*source_key).clone(),
+                    target: target_key.clone(),
+                    kind: RelationKind::SectionLinksTo,
+                    anchor: anchor.clone(),
+                    source_section,
+                    target_section,
+                    raw_text: None,
+                });
+            }
+        }
+
+        // ADR reference edges
+        for adr_ref in &entry.adr_references {
+            if let Some(target_path) = adr_index.get(&adr_ref.normalized_id) {
+                // Skip self-links
+                if target_path == *source_key {
+                    continue;
+                }
+
+                let source_section =
+                    find_containing_section(&entry.section_fingerprints, adr_ref.line);
+
+                edges.push(RelationEdge {
+                    source: (*source_key).clone(),
+                    target: target_path.clone(),
+                    kind: RelationKind::AdrReference,
+                    anchor: None,
+                    source_section,
+                    target_section: None,
+                    raw_text: Some(adr_ref.raw_text.clone()),
+                });
+            }
+        }
+    }
+
+    edges.sort();
+    edges.dedup();
+
+    RelationIndex {
+        version: 1,
+        indexed_at: chrono_now(),
+        total_edges: edges.len(),
+        edges,
+    }
+}
+
+/// Parse markdown links from a section's content
+pub fn parse_markdown_links(section: &SectionMatch, origin_dir: &Path) -> Vec<CrossRef> {
+    let mut refs = Vec::new();
+
+    // Regex: [text](target) - we'll filter out ![image] manually
+    let link_regex = Regex::new(r"(!?)\[(?P<label>[^\]]+)\]\((?P<target>[^)]+)\)").unwrap();
+
+    for caps in link_regex.captures_iter(&section.content) {
+        // Skip if this is an image link (starts with !)
+        if caps.get(1).is_some_and(|m| m.as_str() == "!") {
+            continue;
+        }
+
+        if let (Some(label), Some(target)) = (caps.name("label"), caps.name("target")) {
+            let target_str = target.as_str();
+
+            // Skip external links
+            if target_str.starts_with("http://")
+                || target_str.starts_with("https://")
+                || target_str.starts_with("mailto:")
+            {
+                continue;
+            }
+
+            // Parse target: path.md#anchor
+            let (path_part, anchor) = if let Some(hash_pos) = target_str.find('#') {
+                (
+                    &target_str[..hash_pos],
+                    Some(target_str[hash_pos + 1..].to_string()),
+                )
+            } else {
+                (target_str, None)
+            };
+
+            // Skip non-markdown links
+            let lc = path_part.to_ascii_lowercase();
+            if !lc.ends_with(".md") && !lc.ends_with(".txt") && !lc.ends_with(".rst") {
+                continue;
+            }
+
+            // Resolve relative path
+            let target_path = if path_part.starts_with('/') {
+                // Absolute path within repo - strip leading /
+                PathBuf::from(path_part.trim_start_matches('/'))
+            } else {
+                // Relative path - resolve from origin doc's directory
+                origin_dir.join(path_part)
+            };
+
+            // Normalize path
+            let normalized = normalize_path(&target_path);
+
+            // Skip self-links
+            if normalized == section.doc_path {
+                continue;
+            }
+
+            refs.push(CrossRef {
+                ref_type: RefType::MarkdownLink,
+                origin_doc_path: section.doc_path.clone(),
+                target_doc_path: normalized,
+                target_anchor: anchor,
+                raw_text: label.as_str().to_string(),
+            });
+        }
+    }
+
+    refs
+}
+
+/// Find the section containing a given line number
+pub fn find_containing_section(sections: &[SectionFingerprint], line: usize) -> Option<SectionRef> {
+    for section in sections {
+        if section.line_start <= line && line <= section.line_end {
+            return Some(SectionRef {
+                heading: section.heading.clone(),
+                line_start: section.line_start,
+            });
+        }
+    }
+    None
+}
+
+/// Resolve an anchor fragment to a section in the target file entry
+pub fn resolve_anchor_to_section(entry: &FileEntry, anchor: &str) -> Option<SectionRef> {
+    let anchor_slug = anchor.to_lowercase().replace([' ', '_'], "-");
+    for section in &entry.section_fingerprints {
+        let heading_slug = section.heading.to_lowercase().replace(' ', "-");
+        if heading_slug == anchor_slug || heading_slug.contains(&anchor_slug) {
+            return Some(SectionRef {
+                heading: section.heading.clone(),
+                line_start: section.line_start,
+            });
+        }
+    }
+    None
+}
+pub(crate) fn build_adr_index(index: &ForwardIndex) -> HashMap<String, String> {
+    let mut adr_map = HashMap::new();
+    let adr_regex = Regex::new(r"ADR[-_]?(\d{2,4})").unwrap();
+
+    for path in index.files.keys() {
+        let path_lower = path.to_lowercase();
+        if path_lower.contains("/adr/") || path_lower.contains("adr-") {
+            if let Some(caps) = adr_regex.captures(path) {
+                if let Some(num_str) = caps.get(1) {
+                    // Zero-pad to 3 digits
+                    let num: usize = num_str.as_str().parse().unwrap_or(0);
+                    let normalized = format!("{num:03}");
+                    adr_map.insert(normalized, path.clone());
+                }
+            }
+        }
+    }
+
+    adr_map
+}
+
+/// Parse ADR ID references from section content
+pub(crate) fn parse_adr_ids(
+    section: &SectionMatch,
+    adr_index: &HashMap<String, String>,
+) -> Vec<CrossRef> {
+    let mut refs = Vec::new();
+
+    // Regex: ADR-013, ADR 13, ADR_0013
+    let adr_regex = Regex::new(r"\bADR[-_ ]?(?P<num>\d{2,4})\b").unwrap();
+
+    for caps in adr_regex.captures_iter(&section.content) {
+        if let Some(num) = caps.name("num") {
+            let num_str = num.as_str();
+            let num_val: usize = num_str.parse().unwrap_or(0);
+
+            // Zero-pad to 3 digits
+            let normalized = format!("{num_val:03}");
+
+            // Lookup in ADR index
+            if let Some(target_path) = adr_index.get(&normalized) {
+                // Skip if same file
+                if target_path == &section.doc_path {
+                    continue;
+                }
+
+                refs.push(CrossRef {
+                    ref_type: RefType::AdrId,
+                    origin_doc_path: section.doc_path.clone(),
+                    target_doc_path: target_path.clone(),
+                    target_anchor: None,
+                    raw_text: caps.get(0).unwrap().as_str().to_string(),
+                });
+            }
+        }
+    }
+
+    refs
+}
+
+/// Collect and deduplicate cross-references from primary sections
+pub(crate) fn collect_crossrefs(
+    sections: &[SectionMatch],
+    adr_index: &HashMap<String, String>,
+) -> Vec<CrossRef> {
+    let mut all_refs = Vec::new();
+
+    for section in sections {
+        // Get parent directory of origin doc
+        let origin_dir = Path::new(&section.doc_path)
+            .parent()
+            .unwrap_or_else(|| Path::new("."));
+
+        // Parse markdown links
+        all_refs.extend(parse_markdown_links(section, origin_dir));
+
+        // Parse ADR IDs
+        all_refs.extend(parse_adr_ids(section, adr_index));
+    }
+
+    // Deduplicate by (origin_doc_path, target_doc_path, target_anchor)
+    let mut seen: HashSet<(String, String, Option<String>)> = HashSet::new();
+    let mut unique_refs = Vec::new();
+
+    for r in all_refs {
+        let key = (
+            r.origin_doc_path.clone(),
+            r.target_doc_path.clone(),
+            r.target_anchor.clone(),
+        );
+
+        if !seen.contains(&key) {
+            seen.insert(key);
+            unique_refs.push(r);
+        }
+    }
+
+    unique_refs
+}
+
+/// Classify target document by type
+pub(crate) fn classify_target_doc(path: &str) -> DocType {
+    let path_lower = path.to_lowercase();
+
+    if path_lower.contains("/adr/") || path_lower.contains("adr-") {
+        DocType::Adr
+    } else if path_lower.contains("architecture") || path_lower.contains("design") {
+        DocType::Design
+    } else if path_lower.contains("runbook")
+        || path_lower.contains("operations")
+        || path_lower.contains("ops")
+    {
+        DocType::Ops
+    } else {
+        DocType::Other
+    }
+}
+
+/// Select sections from an ADR doc
+pub(crate) fn select_sections_for_adr(
+    doc_path: &str,
+    index: &ForwardIndex,
+    entry: &FileEntry,
+    max_sections: usize,
+) -> Vec<SectionMatch> {
+    let mut sections = Vec::new();
+
+    // Priority sections: Context, Decision, Consequences
+    let priority_keywords = [
+        "context",
+        "decision",
+        "consequences",
+        "motivation",
+        "rationale",
+        "summary",
+    ];
+
+    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
+        let lines: Vec<&str> = content.lines().collect();
+
+        // Try to use section fingerprints
+        for section in &entry.section_fingerprints {
+            if sections.len() >= max_sections {
+                break;
+            }
+
+            // Check if this is a priority section
+            let heading_lower = section.heading.to_lowercase();
+            let is_priority = priority_keywords
+                .iter()
+                .any(|kw| heading_lower.contains(kw));
+
+            if is_priority || sections.is_empty() {
+                // Include this section
+                let start = section.line_start.saturating_sub(1);
+                let end = section.line_end.min(lines.len());
+
+                if start < end {
+                    let section_content = lines[start..end].join("\n");
+
+                    sections.push(SectionMatch {
+                        doc_path: doc_path.to_string(),
+                        heading: section.heading.clone(),
+                        line_start: section.line_start,
+                        line_end: section.line_end,
+                        bm25_score: 0.0, // Cross-ref sections don't have BM25 scores
+                        content: section_content,
+                        canonicality: score_canonicality(doc_path, entry),
+                    });
+                }
+            }
+        }
+
+        // If no sections found, include the first section or full doc
+        if sections.is_empty() && !lines.is_empty() {
+            sections.push(SectionMatch {
+                doc_path: doc_path.to_string(),
+                heading: "Full Document".to_string(),
+                line_start: 1,
+                line_end: lines.len().min(100), // Limit to first 100 lines
+                bm25_score: 0.0,
+                content: lines[..lines.len().min(100)].join("\n"),
+                canonicality: score_canonicality(doc_path, entry),
+            });
+        }
+    }
+
+    sections
+}
+
+/// Select sections from a design/architecture doc
+pub(crate) fn select_sections_for_design(
+    doc_path: &str,
+    index: &ForwardIndex,
+    entry: &FileEntry,
+    anchor: Option<&str>,
+    max_sections: usize,
+) -> Vec<SectionMatch> {
+    let mut sections = Vec::new();
+
+    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
+        let lines: Vec<&str> = content.lines().collect();
+
+        // If anchor is specified, try to find matching section
+        if let Some(anchor_str) = anchor {
+            let anchor_lower = anchor_str.to_lowercase().replace(['-', '_'], " ");
+
+            for section in &entry.section_fingerprints {
+                let heading_lower = section.heading.to_lowercase();
+                let heading_slug = heading_lower.replace(' ', "-");
+
+                if heading_slug.contains(&anchor_str.replace(' ', "-"))
+                    || heading_lower.contains(&anchor_lower)
+                {
+                    // Found matching section
+                    let start = section.line_start.saturating_sub(1);
+                    let end = section.line_end.min(lines.len());
+
+                    if start < end {
+                        let section_content = lines[start..end].join("\n");
+
+                        sections.push(SectionMatch {
+                            doc_path: doc_path.to_string(),
+                            heading: section.heading.clone(),
+                            line_start: section.line_start,
+                            line_end: section.line_end,
+                            bm25_score: 0.0,
+                            content: section_content,
+                            canonicality: score_canonicality(doc_path, entry),
+                        });
+                    }
+
+                    break; // Found the target section
+                }
+            }
+        }
+
+        // If no anchor or not found, include first few sections
+        if sections.is_empty() {
+            for section in entry.section_fingerprints.iter().take(max_sections) {
+                let start = section.line_start.saturating_sub(1);
+                let end = section.line_end.min(lines.len());
+
+                if start < end {
+                    let section_content = lines[start..end].join("\n");
+
+                    sections.push(SectionMatch {
+                        doc_path: doc_path.to_string(),
+                        heading: section.heading.clone(),
+                        line_start: section.line_start,
+                        line_end: section.line_end,
+                        bm25_score: 0.0,
+                        content: section_content,
+                        canonicality: score_canonicality(doc_path, entry),
+                    });
+                }
+            }
+        }
+
+        // Fallback: if still no sections, include beginning of doc
+        if sections.is_empty() && !lines.is_empty() {
+            sections.push(SectionMatch {
+                doc_path: doc_path.to_string(),
+                heading: "Introduction".to_string(),
+                line_start: 1,
+                line_end: lines.len().min(50),
+                bm25_score: 0.0,
+                content: lines[..lines.len().min(50)].join("\n"),
+                canonicality: score_canonicality(doc_path, entry),
+            });
+        }
+    }
+
+    sections
+}
+
+/// Select sections from an ops/runbook doc
+pub(crate) fn select_sections_for_ops(
+    doc_path: &str,
+    index: &ForwardIndex,
+    entry: &FileEntry,
+    max_sections: usize,
+) -> Vec<SectionMatch> {
+    let mut sections = Vec::new();
+
+    // Keywords for ops docs
+    let ops_keywords = [
+        "deploy",
+        "restart",
+        "rollback",
+        "monitor",
+        "troubleshoot",
+        "debug",
+        "fix",
+        "restore",
+    ];
+
+    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
+        let lines: Vec<&str> = content.lines().collect();
+
+        // Prioritize sections with ops keywords
+        for section in &entry.section_fingerprints {
+            if sections.len() >= max_sections {
+                break;
+            }
+
+            let heading_lower = section.heading.to_lowercase();
+            let is_ops = ops_keywords.iter().any(|kw| heading_lower.contains(kw));
+
+            if is_ops {
+                let start = section.line_start.saturating_sub(1);
+                let end = section.line_end.min(lines.len());
+
+                if start < end {
+                    let section_content = lines[start..end].join("\n");
+
+                    sections.push(SectionMatch {
+                        doc_path: doc_path.to_string(),
+                        heading: section.heading.clone(),
+                        line_start: section.line_start,
+                        line_end: section.line_end,
+                        bm25_score: 0.0,
+                        content: section_content,
+                        canonicality: score_canonicality(doc_path, entry),
+                    });
+                }
+            }
+        }
+
+        // If no ops sections found, include first section
+        if sections.is_empty() && !entry.section_fingerprints.is_empty() {
+            let section = &entry.section_fingerprints[0];
+            let start = section.line_start.saturating_sub(1);
+            let end = section.line_end.min(lines.len());
+
+            if start < end {
+                let section_content = lines[start..end].join("\n");
+
+                sections.push(SectionMatch {
+                    doc_path: doc_path.to_string(),
+                    heading: section.heading.clone(),
+                    line_start: section.line_start,
+                    line_end: section.line_end,
+                    bm25_score: 0.0,
+                    content: section_content,
+                    canonicality: score_canonicality(doc_path, entry),
+                });
+            }
+        }
+    }
+
+    sections
+}
+
+/// Select sections from an "other" type doc
+pub(crate) fn select_sections_for_other(
+    doc_path: &str,
+    index: &ForwardIndex,
+    entry: &FileEntry,
+) -> Vec<SectionMatch> {
+    let mut sections = Vec::new();
+
+    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
+        let lines: Vec<&str> = content.lines().collect();
+
+        // Include only the first section (overview)
+        if !entry.section_fingerprints.is_empty() {
+            let section = &entry.section_fingerprints[0];
+            let start = section.line_start.saturating_sub(1);
+            let end = section.line_end.min(lines.len());
+
+            if start < end {
+                let section_content = lines[start..end].join("\n");
+
+                sections.push(SectionMatch {
+                    doc_path: doc_path.to_string(),
+                    heading: section.heading.clone(),
+                    line_start: section.line_start,
+                    line_end: section.line_end,
+                    bm25_score: 0.0,
+                    content: section_content,
+                    canonicality: score_canonicality(doc_path, entry),
+                });
+            }
+        }
+    }
+
+    sections
+}
+
+/// Resolve cross-references into additional sections to include
+pub(crate) fn resolve_crossrefs(
+    crossrefs: &[CrossRef],
+    primary_docs: &HashSet<String>,
+    index: &ForwardIndex,
+    xref_token_budget: usize,
+) -> Vec<SectionMatch> {
+    const MAX_SECTIONS_PER_ADR: usize = 3;
+    const MAX_SECTIONS_PER_DESIGN: usize = 2;
+    const MAX_SECTIONS_PER_OPS: usize = 2;
+    const MAX_TOKENS_PER_XREF_DOC: usize = 600;
+
+    let mut xref_sections = Vec::new();
+    let mut remaining_budget = xref_token_budget;
+    let mut visited_docs: HashSet<String> = primary_docs.clone();
+
+    // Group crossrefs by target doc
+    let mut doc_refs: HashMap<String, Vec<&CrossRef>> = HashMap::new();
+    for cr in crossrefs {
+        // Skip if already in primary docs or visited
+        if visited_docs.contains(&cr.target_doc_path) {
+            continue;
+        }
+
+        doc_refs
+            .entry(cr.target_doc_path.clone())
+            .or_default()
+            .push(cr);
+    }
+
+    // Sort target docs by priority and score
+    let mut target_docs: Vec<(String, Vec<&CrossRef>)> = doc_refs.into_iter().collect();
+    target_docs.sort_by(|a, b| {
+        let type_a = classify_target_doc(&a.0);
+        let type_b = classify_target_doc(&b.0);
+
+        // First by doc type priority
+        let cmp = type_a.cmp(&type_b);
+        if cmp != std::cmp::Ordering::Equal {
+            return cmp;
+        }
+
+        // Then by number of references (descending)
+        b.1.len().cmp(&a.1.len())
+    });
+
+    // Process each target doc in priority order
+    for (target_path, refs) in target_docs {
+        if remaining_budget == 0 {
+            break;
+        }
+
+        // Get file entry
+        let Some(entry) = index.files.get(&target_path) else {
+            continue; // Doc not in index
+        };
+
+        let doc_type = classify_target_doc(&target_path);
+
+        // Select sections based on doc type
+        let mut doc_sections = match doc_type {
+            DocType::Adr => {
+                select_sections_for_adr(&target_path, index, entry, MAX_SECTIONS_PER_ADR)
+            }
+            DocType::Design => {
+                // Check if any ref has an anchor
+                let anchor = refs.iter().find_map(|r| r.target_anchor.as_deref());
+                select_sections_for_design(
+                    &target_path,
+                    index,
+                    entry,
+                    anchor,
+                    MAX_SECTIONS_PER_DESIGN,
+                )
+            }
+            DocType::Ops => {
+                select_sections_for_ops(&target_path, index, entry, MAX_SECTIONS_PER_OPS)
+            }
+            DocType::Other => select_sections_for_other(&target_path, index, entry),
+        };
+
+        // Apply per-doc token budget
+        let mut doc_tokens = 0;
+        let mut filtered_sections = Vec::new();
+
+        for section in doc_sections.drain(..) {
+            let section_tokens = estimate_tokens(&section.content);
+
+            if doc_tokens + section_tokens > MAX_TOKENS_PER_XREF_DOC {
+                break; // Exceeded per-doc limit
+            }
+
+            if remaining_budget < section_tokens {
+                break; // Exceeded global budget
+            }
+
+            doc_tokens += section_tokens;
+            remaining_budget -= section_tokens;
+            filtered_sections.push(section);
+        }
+
+        if !filtered_sections.is_empty() {
+            visited_docs.insert(target_path.clone());
+            xref_sections.extend(filtered_sections);
+        }
+    }
+
+    xref_sections
+}
+
+/// Resolve cross-references using the persisted relation graph (graph-aware mode).
+/// Finds all documents reachable from primary docs via relation edges and
+/// includes their sections within the token budget.
+pub(crate) fn resolve_crossrefs_from_relations(
+    relation_index: &RelationIndex,
+    primary_docs: &HashSet<String>,
+    index: &ForwardIndex,
+    xref_token_budget: usize,
+) -> Vec<SectionMatch> {
+    const MAX_TOKENS_PER_XREF_DOC: usize = 600;
+
+    // Collect target docs reachable from primary docs, with edge info
+    let mut target_edges: HashMap<String, Vec<&RelationEdge>> = HashMap::new();
+    for edge in &relation_index.edges {
+        if primary_docs.contains(&edge.source) && !primary_docs.contains(&edge.target) {
+            target_edges
+                .entry(edge.target.clone())
+                .or_default()
+                .push(edge);
+        }
+    }
+
+    // Sort targets: more edges = higher priority, then by doc type, then alphabetical
+    let mut targets: Vec<(String, Vec<&RelationEdge>)> = target_edges.into_iter().collect();
+    targets.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then_with(|| a.0.cmp(&b.0)));
+
+    let mut xref_sections = Vec::new();
+    let mut remaining_budget = xref_token_budget;
+    let mut visited: HashSet<String> = primary_docs.clone();
+
+    for (target_path, edges) in targets {
+        if remaining_budget == 0 {
+            break;
+        }
+        if visited.contains(&target_path) {
+            continue;
+        }
+
+        let Some(entry) = index.files.get(&target_path) else {
+            continue;
+        };
+
+        // Pick anchor from first edge that has one
+        let anchor = edges.iter().find_map(|e| e.anchor.as_deref());
+
+        // Select sections: if anchor, try targeted; otherwise first few sections
+        let doc_type = classify_target_doc(&target_path);
+        let max_sections = match doc_type {
+            DocType::Adr => 3,
+            DocType::Design => 2,
+            DocType::Ops => 2,
+            DocType::Other => 2,
+        };
+
+        let mut doc_sections = match doc_type {
+            DocType::Adr => select_sections_for_adr(&target_path, index, entry, max_sections),
+            DocType::Design => {
+                select_sections_for_design(&target_path, index, entry, anchor, max_sections)
+            }
+            DocType::Ops => select_sections_for_ops(&target_path, index, entry, max_sections),
+            DocType::Other => select_sections_for_other(&target_path, index, entry),
+        };
+
+        // Apply token budget
+        let mut doc_tokens = 0;
+        let mut filtered = Vec::new();
+        for section in doc_sections.drain(..) {
+            let section_tokens = estimate_tokens(&section.content);
+            if doc_tokens + section_tokens > MAX_TOKENS_PER_XREF_DOC {
+                break;
+            }
+            if remaining_budget < section_tokens {
+                break;
+            }
+            doc_tokens += section_tokens;
+            remaining_budget -= section_tokens;
+            filtered.push(section);
+        }
+
+        visited.insert(target_path);
+        xref_sections.extend(filtered);
+    }
+
+    xref_sections
+}
+
+// ============================================================================
+// Extractive Refiner (Phase 2.3)
+// ============================================================================
+
+/// Split text into sentences using simple regex
+pub(crate) fn split_sentences(text: &str) -> Vec<String> {
+    // Preserve code blocks
+    let code_block_re = Regex::new(r"```[\s\S]*?```").unwrap();
+    let mut code_blocks = Vec::new();
+    let mut placeholder_text = text.to_string();
+
+    // Extract code blocks and replace with placeholders
+    for (i, caps) in code_block_re.captures_iter(text).enumerate() {
+        let code = caps.get(0).unwrap().as_str();
+        code_blocks.push(code.to_string());
+        placeholder_text = placeholder_text.replace(code, &format!("__CODE_BLOCK_{i}__"));
+    }
+
+    // Split on sentence boundaries: period/exclamation/question followed by space
+    // We'll use a simpler approach: split on these punctuation marks and then filter
+    let parts: Vec<&str> = placeholder_text.split(&['.', '!', '?']).collect();
+    let mut sentences = Vec::new();
+
+    for part in parts {
+        let trimmed = part.trim();
+        // Keep sentences that are substantial (>10 chars) and start with a letter/number
+        if trimmed.len() > 10 {
+            let first_char = trimmed.chars().next().unwrap_or(' ');
+            if first_char.is_alphanumeric() || first_char == '#' {
+                sentences.push(trimmed.to_string());
+            }
+        }
+    }
+
+    // Restore code blocks
+    for (i, code) in code_blocks.iter().enumerate() {
+        let placeholder = format!("__CODE_BLOCK_{i}__");
+        for sentence in &mut sentences {
+            *sentence = sentence.replace(&placeholder, code);
+        }
+    }
+
+    sentences
+}
+
+/// Score a sentence for relevance
+pub(crate) fn score_sentence(
+    sentence: &str,
+    query_terms: &[String],
+    is_first: bool,
+    section_has_crossref: bool,
+) -> f64 {
+    let mut score = 0.0;
+
+    // Weight factors
+    const W_LEXICAL: f64 = 2.0;
+    const W_KEYWORD: f64 = 1.5;
+    const W_CODE: f64 = 3.0;
+    const W_FIRST: f64 = 0.3;
+    const W_CROSSREF: f64 = 1.0;
+
+    let sentence_lower = sentence.to_lowercase();
+
+    // 1. Lexical overlap with query
+    let mut overlap_count = 0;
+    for term in query_terms {
+        if sentence_lower.contains(&term.to_lowercase()) {
+            overlap_count += 1;
+        }
+    }
+    score += f64::from(overlap_count) * W_LEXICAL;
+
+    // 2. High-value keywords
+    let keywords = [
+        "deploy",
+        "deployment",
+        "restart",
+        "auth",
+        "authentication",
+        "session",
+        "state",
+        "error",
+        "failure",
+        "retry",
+        "timeout",
+        "architecture",
+        "design",
+        "decision",
+        "invariant",
+        "must",
+        "should",
+        "requires",
+        "context",
+        "rationale",
+        "consequence",
+        "kubernetes",
+        "container",
+        "pod",
+        "service",
+        "config",
+        "configuration",
+        "security",
+        "permission",
+        "rbac",
+        "policy",
+        "test",
+        "testing",
+    ];
+
+    for keyword in &keywords {
+        if sentence_lower.contains(keyword) {
+            score += W_KEYWORD;
+        }
+    }
+
+    // 3. Contains code or config
+    if sentence.contains("```")
+        || sentence.contains("    ")
+        || sentence.contains("kubectl")
+        || sentence.contains("docker")
+        || sentence.contains("make")
+        || sentence.contains("cargo")
+        || sentence.contains("python")
+        || sentence.contains("bash")
+    {
+        score += W_CODE;
+    }
+
+    // 4. First sentence bias
+    if is_first {
+        score += W_FIRST;
+    }
+
+    // 5. Cross-reference bonus
+    if section_has_crossref
+        && (sentence_lower.contains("adr")
+            || sentence_lower.contains("see ")
+            || sentence_lower.contains("refer")
+            || sentence_lower.contains("described in"))
+    {
+        score += W_CROSSREF;
+    }
+
+    score
+}
+
+/// Extract heading from section text
+pub(crate) fn extract_heading(text: &str) -> (String, String) {
+    let lines: Vec<&str> = text.lines().collect();
+    if lines.is_empty() {
+        return (String::new(), String::new());
+    }
+
+    // Check if first line is a heading
+    let first_line = lines[0].trim();
+    if first_line.starts_with('#') {
+        let heading = first_line.to_string();
+        let body = lines[1..].join("\n");
+        (heading, body)
+    } else {
+        (String::new(), text.to_string())
+    }
+}
+
+/// Refine a single section by extracting high-signal sentences
+pub(crate) fn refine_section(
+    section: &SectionMatch,
+    query_terms: &[String],
+    max_tokens: usize,
+) -> RefinedSection {
+    let (heading, body) = extract_heading(&section.content);
+
+    // Extract code blocks - preserve them fully
+    let code_block_re = Regex::new(r"```[\s\S]*?```").unwrap();
+    let code_blocks: Vec<String> = code_block_re
+        .captures_iter(&body)
+        .map(|cap| cap.get(0).unwrap().as_str().to_string())
+        .collect();
+
+    // Extract lists - preserve them
+    let list_re = Regex::new(r"(?m)^[\s]*[-*+]\s+.+$").unwrap();
+    let list_items: Vec<String> = list_re
+        .captures_iter(&body)
+        .map(|cap| cap.get(0).unwrap().as_str().to_string())
+        .collect();
+
+    // Extract subheadings - preserve them
+    let subheading_re = Regex::new(r"(?m)^#{2,6}\s+.+$").unwrap();
+    let subheadings: Vec<String> = subheading_re
+        .captures_iter(&body)
+        .map(|cap| cap.get(0).unwrap().as_str().to_string())
+        .collect();
+
+    // Split into sentences
+    let sentences = split_sentences(&body);
+
+    if sentences.is_empty() {
+        return RefinedSection {
+            section: section.clone(),
+            truncated: false,
+            truncation_reasons: Vec::new(),
+        };
+    }
+
+    // Check if section has cross-references
+    let has_crossref =
+        body.to_lowercase().contains("adr") || body.contains('[') && body.contains("](");
+
+    // Score each sentence
+    let mut scored_sentences: Vec<(String, f64)> = sentences
+        .iter()
+        .enumerate()
+        .map(|(i, s)| {
+            let score = score_sentence(s, query_terms, i == 0, has_crossref);
+            (s.clone(), score)
+        })
+        .collect();
+
+    // Sort by score (descending)
+    scored_sentences.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+
+    // Keep top K sentences
+    let total_sentences = sentences.len();
+    let k = 6.max((total_sentences as f64 * 0.4).ceil() as usize);
+
+    let top_sentences: Vec<String> = scored_sentences
+        .iter()
+        .take(k)
+        .map(|(s, _)| s.clone())
+        .collect();
+
+    // Reconstruct section
+    let mut refined_parts = Vec::new();
+
+    // Add heading
+    if !heading.is_empty() {
+        refined_parts.push(heading.clone());
+    }
+
+    // Add preserved elements in order of appearance
+    let mut all_preserved = Vec::new();
+    all_preserved.extend(code_blocks);
+    all_preserved.extend(list_items);
+    all_preserved.extend(subheadings);
+
+    // Add top sentences
+    for sentence in &top_sentences {
+        refined_parts.push(sentence.clone());
+    }
+
+    // Add preserved elements
+    for item in &all_preserved {
+        if !refined_parts.iter().any(|p| p.contains(item)) {
+            refined_parts.push(item.clone());
+        }
+    }
+
+    let refined_text = refined_parts.join("\n\n");
+    let (final_text, truncated, truncation_reasons) =
+        truncate_text_to_budget(&refined_text, max_tokens, 0);
+
+    RefinedSection {
+        section: SectionMatch {
+            doc_path: section.doc_path.clone(),
+            heading: section.heading.clone(),
+            line_start: section.line_start,
+            line_end: section.line_end,
+            bm25_score: section.bm25_score,
+            content: final_text,
+            canonicality: section.canonicality,
+        },
+        truncated,
+        truncation_reasons,
+    }
+}
+
+/// Apply extractive refinement to all sections
+pub(crate) fn apply_extractive_refiner(
+    sections: Vec<SectionMatch>,
+    query: &str,
+    max_tokens_per_section: usize,
+) -> Vec<RefinedSection> {
+    let query_terms = parse_query_terms(query, true);
+
+    sections
+        .into_iter()
+        .map(|section| refine_section(&section, &query_terms, max_tokens_per_section))
+        .collect()
+}
+
+pub(crate) fn expand_from_files_args(
+    args: &[String],
+) -> Result<Vec<String>, Box<dyn std::error::Error>> {
+    let mut expanded = Vec::new();
+
+    for arg in args {
+        if let Some(list_path) = arg.strip_prefix('@') {
+            let content = fs::read_to_string(list_path)?;
+            for line in content.lines() {
+                let trimmed = line.trim();
+                if !trimmed.is_empty() {
+                    expanded.push(trimmed.to_string());
+                }
+            }
+        } else {
+            expanded.push(arg.to_string());
+        }
+    }
+
+    Ok(expanded)
+}
+
+pub(crate) fn resolve_indexed_path(input: &str, index: &ForwardIndex) -> Option<String> {
+    let trimmed = input.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let mut candidates = Vec::new();
+    candidates.push(trimmed.to_string());
+    candidates.push(trimmed.trim_start_matches("./").to_string());
+
+    let normalized = normalize_path(Path::new(trimmed));
+    if !normalized.is_empty() {
+        candidates.push(normalized);
+    }
+
+    if Path::new(trimmed).is_absolute() {
+        if let Some(source_root) = forward_index_source_root(index) {
+            if let Ok(stripped) = Path::new(trimmed).strip_prefix(&source_root) {
+                let stripped_str = stripped.to_string_lossy().to_string();
+                if !stripped_str.is_empty() {
+                    candidates.push(stripped_str);
+                }
+                let normalized_stripped = normalize_path(stripped);
+                if !normalized_stripped.is_empty() {
+                    candidates.push(normalized_stripped);
+                }
+            }
+        }
+    }
+
+    let mut seen = HashSet::new();
+    for candidate in candidates {
+        if !seen.insert(candidate.clone()) {
+            continue;
+        }
+        if index.files.contains_key(&candidate) {
+            return Some(candidate);
+        }
+        let with_dot = format!("./{}", candidate.trim_start_matches("./"));
+        if index.files.contains_key(&with_dot) {
+            return Some(with_dot);
+        }
+    }
+
+    None
+}
+
+pub(crate) fn resolve_from_files(
+    inputs: &[String],
+    index: &ForwardIndex,
+) -> (Vec<String>, Vec<String>) {
+    let mut resolved = Vec::new();
+    let mut missing = Vec::new();
+    let mut seen = HashSet::new();
+
+    for input in inputs {
+        if let Some(path) = resolve_indexed_path(input, index) {
+            if seen.insert(path.clone()) {
+                resolved.push(path);
+            }
+        } else {
+            missing.push(input.clone());
+        }
+    }
+
+    (resolved, missing)
+}
+
+pub(crate) fn collect_sections_for_files(
+    file_paths: &[String],
+    index: &ForwardIndex,
+    query: &str,
+    max_sections: usize,
+) -> Vec<SectionMatch> {
+    let query_terms = if query.is_empty() {
+        Vec::new()
+    } else {
+        parse_query_terms(query, true)
+    };
+    let mut all_sections = Vec::new();
+
+    for path in file_paths {
+        let Some(entry) = index.files.get(path) else {
+            continue;
+        };
+        let doc_score = if query_terms.is_empty() {
+            1.0
+        } else {
+            bm25_score(&query_terms, entry, index.avg_doc_length, &index.idf_map)
+        };
+        let canonicality = score_canonicality(path, entry);
+
+        if !entry.section_fingerprints.is_empty() {
+            if let Ok(content) = read_indexed_doc(index, path, entry) {
+                let lines: Vec<&str> = content.lines().collect();
+                for section in &entry.section_fingerprints {
+                    let start = section.line_start.saturating_sub(1);
+                    let end = section.line_end.min(lines.len());
+                    if start < end {
+                        let section_content = lines[start..end].join("\n");
+                        all_sections.push(SectionMatch {
+                            doc_path: path.to_string(),
+                            heading: section.heading.clone(),
+                            line_start: section.line_start,
+                            line_end: section.line_end,
+                            bm25_score: doc_score,
+                            content: section_content,
+                            canonicality,
+                        });
+                    }
+                }
+            }
+        } else if let Ok(content) = read_indexed_doc(index, path, entry) {
+            all_sections.push(SectionMatch {
+                doc_path: path.to_string(),
+                heading: "Full Document".to_string(),
+                line_start: 1,
+                line_end: content.lines().count(),
+                bm25_score: doc_score,
+                content,
+                canonicality,
+            });
+        }
+    }
+
+    all_sections.sort_by(compare_sections_by_relevance);
+
+    all_sections.into_iter().take(max_sections).collect()
+}
+
+pub(crate) fn collect_context_selection(
+    query: &str,
+    from_files: &[String],
+    index: &ForwardIndex,
+    max_sections: usize,
+) -> Result<ContextSelection, ContextSelectionIssue> {
+    let query_label = if query.trim().is_empty() {
+        "selected files".to_string()
+    } else {
+        query.to_string()
+    };
+    let query_for_refiner = if query.trim().is_empty() {
+        String::new()
+    } else {
+        query.to_string()
+    };
+
+    let sections = if !from_files.is_empty() {
+        let expanded = expand_from_files_args(from_files)
+            .map_err(|_| ContextSelectionIssue::NoIndexedFilesMatched)?;
+        let (resolved, missing) = resolve_from_files(&expanded, index);
+
+        if !missing.is_empty() {
+            return Err(ContextSelectionIssue::MissingFiles(missing));
+        }
+
+        if resolved.is_empty() {
+            return Err(ContextSelectionIssue::NoIndexedFilesMatched);
+        }
+
+        collect_sections_for_files(&resolved, index, query, max_sections)
+    } else {
+        let query_terms = parse_query_terms(query, true);
+        if query_terms.is_empty() {
+            return Err(ContextSelectionIssue::NoSearchableTerms);
+        }
+        search_relevant_sections(query, index, max_sections)
+    };
+
+    if sections.is_empty() {
+        return Err(ContextSelectionIssue::NoRelevantSections(query_label));
+    }
+
+    Ok(ContextSelection {
+        query_label,
+        query_for_refiner,
+        sections,
+    })
+}
diff --git a/src/cli.rs b/src/cli.rs
new file mode 100644
index 0000000..f45ba20
--- /dev/null
+++ b/src/cli.rs
@@ -0,0 +1,1173 @@
+use clap::{Parser, Subcommand};
+use std::path::PathBuf;
+
+/// yore – Deterministic documentation indexer and context assembly engine.
+///
+/// Yore indexes markdown and text documentation, computes BM25 statistics,
+/// section fingerprints, link graphs, and canonicality scores, and then
+/// assembles minimal, high‑signal context for large language models (LLMs)
+/// and automation agents.
+///
+/// Typical workflow:
+///   1. Build an index over your docs with `yore build`.
+///   2. Inspect and clean the docs with `query`, `dupes*`, `check-links`,
+///      `backlinks`, `orphans`, `canonicality`, and `canonical-orphans`.
+///   3. Assemble an answer‑ready context for an LLM with `yore assemble`.
+///
+/// All commands are deterministic and operate over the on‑disk index in
+/// `--index` (default: `.yore`).
+#[derive(Parser)]
+#[command(
+    name = "yore",
+    author,
+    version,
+    about = "Fast, deterministic documentation indexer and LLM context assembler",
+    long_about = r#"yore is a deterministic documentation indexer and context
+assembly engine for large language models (LLMs) and automation agents.
+
+It walks a documentation tree, builds on-disk forward and reverse indexes
+(BM25 term statistics, section fingerprints, link graphs, canonicality scores),
+and then assembles minimal, high-signal context for a given question.
+
+Typical workflow:
+  1. Build an index over your docs with `yore build`.
+  2. Inspect and clean the docs using `query`, `dupes*`, `check-links`,
+     `backlinks`, `orphans`, `canonicality`, and `canonical-orphans`.
+  3. Assemble an answer-ready context for an LLM with `yore assemble`.
+
+All commands operate deterministically over the on-disk index in `--index`
+(default: `.yore`)."#,
+    after_long_help = r#"EXAMPLES
+
+  Build an index over docs/ and write it to .yore:
+    yore build docs --output .yore --types md,txt
+
+  Search the index for a free-text query:
+    yore query kubernetes deployment --index .yore --limit 5
+
+  Assemble context for an LLM question:
+    yore assemble "How does authentication work?" \
+      --index .yore --max-tokens 8000 --depth 1 > context.md
+
+  Evaluate retrieval quality against a questions file:
+    yore eval --questions questions.jsonl --index .yore
+
+  Inspect structure and documentation quality:
+    yore dupes --index .yore
+    yore dupes-sections --index .yore --threshold 0.7
+    yore check-links --index .yore --json
+    yore backlinks docs/architecture/DEPLOYMENT-GUIDE.md --index .yore
+    yore orphans --index .yore --exclude README
+    yore canonicality --index .yore --threshold 0.7
+    yore canonical-orphans --index .yore --threshold 0.7
+
+OUTPUT FORMATS
+
+  Most inspection commands support --json for structured output suitable for
+  CI pipelines and automation agents. Commands with JSON support:
+
+    build, eval, query, similar, dupes, dupes-sections, check, check-links,
+    fix-links, backlinks, orphans, canonicality, canonical-orphans, stale,
+    vocabulary, suggest-consolidation, policy, diff, stats, mv, fix-references
+
+  Example: yore check-links --index .yore --json | jq '.broken[]'"#
+)]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+
+    /// Config file path
+    #[arg(short, long, global = true, default_value = ".yore.toml")]
+    pub config: PathBuf,
+
+    /// Profile name to load from config (limits which roots are indexed; use a full-root profile for whole-repo review)
+    #[arg(long, global = true)]
+    pub profile: Option<String>,
+
+    /// Quiet mode - suppress non-essential output
+    #[arg(short, long, global = true)]
+    pub quiet: bool,
+}
+
+#[derive(Subcommand)]
+pub enum Commands {
+    /// Run one or more documentation checks in a single entrypoint.
+    ///
+    /// This is the recommended command for CI and agents. It can run
+    /// link checks, duplicate detection, taxonomy/policy rules, and
+    /// staleness checks, and it supports CI-friendly exit codes.
+    ///
+    /// Examples:
+    ///   # Basic link check (default index)
+    ///   yore check --links
+    ///
+    ///   # CI mode: fail on missing docs or code
+    ///   yore check --links --ci --fail-on doc_missing,code_missing
+    ///
+    ///   # Run links + staleness + taxonomy in one shot
+    ///   yore check --links --stale --taxonomy --policy taxonomy.yaml
+    ///
+    /// Run multiple checks in one pass (links, policy, stale).
+    ///
+    /// Designed for CI and automation; always emits JSON output.
+    ///
+    /// Limitations:
+    ///   - `--dupes` is accepted but not currently executed.
+    ///
+    /// Related:
+    ///   - `yore check-links`, `yore policy`, `yore stale`
+    ///
+    /// Example:
+    ///   yore check --links --taxonomy --policy .yore-policy.yaml --index .yore --ci
+    Check {
+        /// Run link validation (same engine as `check-links`)
+        #[arg(long)]
+        links: bool,
+
+        /// Run duplicate detection (same engine as `dupes`)
+        #[arg(long)]
+        dupes: bool,
+
+        /// Run taxonomy / policy checks from a YAML file
+        #[arg(long)]
+        taxonomy: bool,
+
+        /// Run staleness checks based on mtime and inbound links
+        #[arg(long)]
+        stale: bool,
+
+        /// CI mode: machine-friendly output and exit codes
+        #[arg(long)]
+        ci: bool,
+
+        /// Kinds/check IDs that should cause a non-zero exit code (comma-separated; repeat flag to pass multiple)
+        #[arg(long, value_delimiter = ',')]
+        fail_on: Vec<String>,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Policy file for taxonomy checks (YAML)
+        #[arg(long)]
+        policy: Option<PathBuf>,
+
+        /// Staleness threshold in days (files older than this are candidates)
+        #[arg(long, default_value = "30")]
+        stale_days: u64,
+    },
+    /// Detect structural document-health issues from build-time metrics.
+    ///
+    /// Uses persisted document and section metrics emitted by `yore build`
+    /// to flag oversized docs, accumulator-style section growth, stale
+    /// completed sections, and changelog sprawl.
+    ///
+    /// Examples:
+    ///   yore health docs/plan.md --index .yore
+    ///   yore health --all --index .yore --json
+    Health {
+        /// Specific file to inspect
+        file: Option<PathBuf>,
+
+        /// Evaluate every indexed document with persisted metrics
+        #[arg(long)]
+        all: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Maximum lines before a file is flagged as bloated
+        #[arg(long, default_value = "500")]
+        max_lines: usize,
+
+        /// Maximum count of "Part N" headings before accumulator risk is flagged
+        #[arg(long, default_value = "8")]
+        max_part_sections: usize,
+
+        /// Maximum retained lines across completion-marked sections
+        #[arg(long, default_value = "50")]
+        max_completed_lines: usize,
+
+        /// Maximum changelog list items before changelog bloat is flagged
+        #[arg(long, default_value = "15")]
+        max_changelog_entries: usize,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+    /// Build forward and reverse indexes over documentation.
+    ///
+    /// Walks a directory tree, indexes Markdown/text files, and writes
+    /// forward and reverse indexes into `--output` (default: `.yore`).
+    ///
+    /// Agents typically run this once at startup or as part of CI, then
+    /// call other commands (`query`, `assemble`, `dupes*`, etc.) against
+    /// the resulting index.
+    ///
+    /// Limitations:
+    ///   - Only indexes the extensions listed in `--types`.
+    ///   - Ignores binary files and content outside the selected roots.
+    ///   - `--track-renames` requires a git repo with history.
+    ///
+    /// Related:
+    ///   - `yore stats`, `yore query`, `yore assemble`
+    ///
+    /// Examples:
+    ///   yore build docs --output .yore --types md,txt --json
+    ///   yore build . --output .yore --exclude node_modules --exclude target
+    Build {
+        /// Path to index
+        #[arg(default_value = ".")]
+        path: PathBuf,
+
+        /// Output directory for indexes
+        #[arg(short, long, default_value = ".yore")]
+        output: PathBuf,
+
+        /// File extensions to index (comma-separated)
+        #[arg(short, long, default_value = "md,txt,rst")]
+        types: String,
+
+        /// Patterns to exclude (can be repeated)
+        #[arg(short, long)]
+        exclude: Vec<String>,
+
+        /// Output as JSON (query results include the original query text)
+        #[arg(long)]
+        json: bool,
+
+        /// Track file renames using git history
+        #[arg(long)]
+        track_renames: bool,
+    },
+
+    /// Search the index for relevant documents using BM25.
+    ///
+    /// Accepts free-text terms, ranks documents with BM25 using the
+    /// precomputed index, and optionally returns machine-readable JSON.
+    ///
+    /// Useful for quick inspection by humans and for agents that want to
+    /// select candidate files before assembling full context.
+    ///
+    /// Limitations:
+    ///   - Only searches indexed files; run `yore build` first.
+    ///   - Ranking is term-based, not semantic.
+    ///
+    /// Related:
+    ///   - `yore assemble`, `yore similar`, `yore stats`
+    ///
+    /// Examples:
+    ///   yore query kubernetes deployment --index .yore --limit 5
+    ///   yore query --query '"async migration"' --phrase --index .yore --files-only
+    Query {
+        /// Search terms
+        terms: Vec<String>,
+
+        /// Raw query string (avoids shell-quoting pitfalls; overrides positional terms)
+        #[arg(long)]
+        query: Option<String>,
+
+        /// Maximum results to show
+        #[arg(short = 'n', long, default_value = "10")]
+        limit: usize,
+
+        /// Show only file paths
+        #[arg(short = 'l', long)]
+        files_only: bool,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Show top N distinctive terms per result (0 = disabled)
+        #[arg(long, default_value = "0")]
+        doc_terms: usize,
+
+        /// Show query diagnostics and scoring details (JSON output wraps query + results + diagnostics)
+        #[arg(long)]
+        explain: bool,
+
+        /// Do not filter stopwords from the query
+        #[arg(long)]
+        no_stopwords: bool,
+
+        /// Require exact adjacency matches for quoted segments (use --query to include quotes)
+        #[arg(long)]
+        phrase: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Find documents similar to a reference file.
+    ///
+    /// Uses both keyword overlap and SimHash fingerprints to identify
+    /// documents that are textually similar to the given file.
+    ///
+    /// Useful for de-duplicating design docs, spotting outdated copies,
+    /// or finding related ADRs and guides.
+    ///
+    /// Limitations:
+    ///   - The reference file must be in the index.
+    ///   - Similarity is heuristic, not semantic.
+    ///
+    /// Related:
+    ///   - `yore dupes`, `yore diff`, `yore query`
+    ///
+    /// Examples:
+    ///   yore similar docs/adr/ADR-0013-retries.md --index .yore --limit 5
+    ///   yore similar docs/architecture/AUTH.md --threshold 0.4 --json
+    Similar {
+        /// Reference file
+        file: PathBuf,
+
+        /// Maximum results to show
+        #[arg(short = 'n', long, default_value = "5")]
+        limit: usize,
+
+        /// Similarity threshold (0.0 to 1.0)
+        #[arg(short, long, default_value = "0.3")]
+        threshold: f64,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Show top N distinctive terms per result (0 = disabled)
+        #[arg(long, default_value = "0")]
+        doc_terms: usize,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Find duplicate or heavily overlapping documents.
+    ///
+    /// Groups or lists documents that share a large fraction of content,
+    /// based on MinHash and SimHash signatures stored in the index.
+    ///
+    /// Useful for documentation cleanup and for agents choosing which
+    /// version of a document to treat as canonical.
+    ///
+    /// Limitations:
+    ///   - Similarity is heuristic and may miss paraphrases.
+    ///   - Tune `--threshold` for larger or smaller corpora.
+    ///
+    /// Related:
+    ///   - `yore dupes-sections`, `yore diff`, `yore suggest-consolidation`
+    ///
+    /// Examples:
+    ///   yore dupes --index .yore --threshold 0.35 --group
+    ///   yore dupes --index .yore --threshold 0.5 --json
+    Dupes {
+        /// Similarity threshold (0.0 to 1.0)
+        #[arg(short, long, default_value = "0.35")]
+        threshold: f64,
+
+        /// Group duplicates together
+        #[arg(long)]
+        group: bool,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Find duplicate sections across documents.
+    ///
+    /// Identifies individual sections (for example headings and their
+    /// bodies) that appear in multiple files, even when the files are
+    /// otherwise different.
+    ///
+    /// Helpful for detecting repeated how-to blocks, copy-pasted API
+    /// descriptions, or repeated ADR fragments.
+    ///
+    /// Limitations:
+    ///   - Section similarity uses SimHash; reworded sections may be missed.
+    ///   - Smaller sections may require a lower `--threshold`.
+    ///
+    /// Related:
+    ///   - `yore dupes`, `yore diff`, `yore suggest-consolidation`
+    ///
+    /// Examples:
+    ///   yore dupes-sections --index .yore --threshold 0.7 --min-files 2
+    ///   yore dupes-sections --index .yore --threshold 0.85 --min-files 5 --json
+    DupesSections {
+        /// Similarity threshold (0.0 to 1.0)
+        #[arg(short, long, default_value = "0.7")]
+        threshold: f64,
+
+        /// Minimum number of files sharing a section
+        #[arg(short = 'n', long, default_value = "2")]
+        min_files: usize,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Show overlapping content and shared sections between two files.
+    ///
+    /// Compares two files using the index and reports what content they
+    /// share, helping you understand drift or duplication between them.
+    ///
+    /// Limitations:
+    ///   - Not a line-by-line diff; uses indexed keywords/headings.
+    ///   - Both files must be indexed.
+    ///
+    /// Related:
+    ///   - `yore dupes`, `yore dupes-sections`, `yore similar`
+    ///
+    /// Examples:
+    ///   yore diff docs/old.md docs/new.md --index .yore --json
+    ///   yore diff docs/plan.md docs/status.md --index .yore
+    Diff {
+        /// First file
+        file1: PathBuf,
+
+        /// Second file
+        file2: PathBuf,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Show high-level index statistics.
+    ///
+    /// Prints counts of files, headings, links, and top keywords, which
+    /// is useful for sanity-checking an index and monitoring drift over time.
+    ///
+    /// Limitations:
+    ///   - Reports only what is in the index, not the live filesystem.
+    ///
+    /// Related:
+    ///   - `yore build`, `yore query`
+    ///
+    /// Examples:
+    ///   yore stats --index .yore --top-keywords 20 --json
+    ///   yore stats --index docs/.index --top-keywords 50
+    Stats {
+        /// Show top N keywords
+        #[arg(long, default_value = "20")]
+        top_keywords: usize,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Interactive query REPL over the index.
+    ///
+    /// Starts a simple read-eval-print loop where you can type queries
+    /// and inspect results quickly while iterating on documentation.
+    ///
+    /// Limitations:
+    ///   - No persistence or scripting; use `yore query` for batch runs.
+    ///
+    /// Related:
+    ///   - `yore query`, `yore stats`
+    ///
+    /// Examples:
+    ///   yore repl --index .yore
+    Repl {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Assemble a high-signal context digest for LLM consumption.
+    ///
+    /// Runs the full retrieval pipeline: BM25 ranking, section selection,
+    /// link and ADR expansion, extractive refinement, and token-budgeted
+    /// trimming to produce a markdown context for a natural language query.
+    ///
+    /// This is the primary entry point for agents and tools that want a
+    /// deterministic, reproducible context to send to an LLM.
+    ///
+    /// Limitations:
+    ///   - Uses indexed content only; run `yore build` first.
+    ///   - Cross-reference expansion follows internal links only.
+    ///
+    /// Related:
+    ///   - `yore query`, `yore eval`, `yore build`
+    ///
+    /// Examples:
+    ///   yore assemble "How does authentication work?" \
+    ///     --index .yore --max-tokens 8000 --depth 1 > context.md
+    ///   yore assemble "async migration status" --index .yore --max-sections 10
+    ///   yore assemble --from-files docs/adr/ADR-0010.md docs/adr/ADR-0011.md --index .yore
+    Assemble {
+        /// Natural language query/question (required unless --from-files is used)
+        #[arg(required_unless_present = "from_files")]
+        query: Vec<String>,
+
+        /// Maximum tokens in output (approximate)
+        #[arg(short = 't', long, default_value = "8000")]
+        max_tokens: usize,
+
+        /// Maximum sections to include
+        #[arg(short = 's', long, default_value = "20")]
+        max_sections: usize,
+
+        /// Cross-reference expansion depth
+        #[arg(short = 'd', long, default_value = "1")]
+        depth: usize,
+
+        /// Output format
+        #[arg(short = 'f', long, default_value = "markdown")]
+        format: String,
+
+        /// Show top N distinctive terms per source document (0 = disabled)
+        #[arg(long, default_value = "0")]
+        doc_terms: usize,
+
+        /// Assemble context from explicit files (supports @list.txt)
+        #[arg(long, value_name = "PATH", num_args = 1..)]
+        from_files: Vec<String>,
+
+        /// Use persisted relation graph for cross-reference expansion
+        #[arg(long)]
+        use_relations: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Experimental MCP-oriented context tools with bounded preview/fetch contracts.
+    ///
+    /// This surface is JSON-first and intentionally narrow: search/preview
+    /// returns compact snippets plus opaque handles, and fetch returns more
+    /// detail only when explicitly asked.
+    ///
+    /// Related:
+    ///   - `yore query`, `yore assemble`
+    ///
+    /// Examples:
+    ///   yore mcp search-context "authentication flow" --index .yore
+    ///   yore mcp fetch-context ctx_1234abcd --index .yore
+    Mcp {
+        #[command(subcommand)]
+        command: McpCommands,
+    },
+
+    /// Evaluate the retrieval pipeline against test questions.
+    ///
+    /// Given a JSONL questions file with expected substrings, runs the
+    /// same retrieval/assembly pipeline used by `assemble` and reports
+    /// whether each question's expected answers were retrieved.
+    ///
+    /// Useful for regression testing and measuring improvements to docs
+    /// or index configuration.
+    ///
+    /// Limitations:
+    ///   - Uses substring matching; does not grade semantic answers.
+    ///   - False positives/negatives are possible; tune expectations.
+    ///
+    /// Related:
+    ///   - `yore assemble`, `yore query`
+    ///
+    /// Examples:
+    ///   yore eval --questions questions.jsonl --index .yore --json
+    ///   yore eval --questions questions.jsonl --index .yore
+    Eval {
+        /// Path to questions JSONL file
+        #[arg(long, default_value = "questions.jsonl")]
+        questions: PathBuf,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Derive a deterministic vocabulary list from a built index.
+    ///
+    /// Use this command when you want a compact candidate vocabulary for
+    /// prompt engineering, glossary generation, or documentation normalization.
+    ///
+    /// Output formats:
+    ///   - `lines` (default): one term per line for easy filtering scripts
+    ///   - `json`: structured payload with `term`, `score`, and `count`
+    ///   - `prompt`: comma-separated terms for LLM initial prompts
+    ///
+    /// Usage guidance:
+    ///   1. Build an index: `yore build <path> --output .yore`
+    ///   2. Generate vocabulary candidates:
+    ///      - `yore vocabulary --index .yore --limit 200 --format lines`
+    ///      - `yore vocabulary --index .yore --format json --limit 50`
+    ///      - `yore vocabulary --index .yore --format prompt --limit 150`
+    ///   3. Optionally remove common words:
+    ///      - `yore vocabulary --index .yore --stopwords my.stopwords`
+    ///      - `yore vocabulary --index .yore --format json --json`
+    ///      - `yore vocabulary --index .yore --common-terms 20`
+    ///      - `yore vocabulary --index .yore --no-default-stopwords --common-terms 40`
+    ///      - `yore vocabulary --index .yore --no-default-stopwords --stopwords my.stopwords`
+    ///
+    /// Limitations:
+    ///   - Ranking is deterministic but may still evolve as stop-word defaults
+    ///     or indexing heuristics are tuned.
+    ///   - `--common-terms` derives a corpus-frequency stoplist and may remove
+    ///     domain terms in very small projects.
+    Vocabulary {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Maximum number of terms to return
+        #[arg(short = 'n', long, default_value = "100")]
+        limit: usize,
+
+        /// Output format: lines, json, or prompt
+        #[arg(long, default_value = "lines")]
+        format: String,
+
+        /// Alias for `--format json`
+        #[arg(long)]
+        json: bool,
+
+        /// Path to an additional stop-word list (optional; one word per line)
+        #[arg(long)]
+        stopwords: Option<PathBuf>,
+
+        /// Keep stem-only terms when no non-stem surface form is available
+        #[arg(long)]
+        include_stemming: bool,
+
+        /// Keep built-in stopword filtering enabled (set false with --no-default-stopwords)
+        #[arg(long)]
+        no_default_stopwords: bool,
+
+        /// Exclude the top N corpus-common terms before applying other filters
+        #[arg(long, default_value = "0")]
+        common_terms: usize,
+    },
+
+    /// Check all markdown links for validity.
+    ///
+    /// Parses all markdown links in indexed documents, resolves relative and
+    /// absolute paths, and reports broken targets and anchors.
+    ///
+    /// Can emit JSON for automated checks in CI or for agents that want to
+    /// repair links automatically, including a grouped summary by file and
+    /// by issue kind (doc_missing, code_missing, placeholder, etc.).
+    ///
+    /// Limitations:
+    ///   - Does not fetch external URLs; external links are not validated.
+    ///   - Only checks files within the index roots.
+    ///
+    /// Related:
+    ///   - `yore fix-links`, `yore export-graph`, `yore backlinks`
+    ///
+    /// Examples:
+    ///   # Basic JSON output over default index
+    ///   yore check-links --index .yore --json
+    ///
+    ///   # Docs-only profile with summary for CI
+    ///   yore --profile docs check-links --json --summary-only
+    CheckLinks {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Root directory for resolving relative paths
+        #[arg(short, long)]
+        root: Option<PathBuf>,
+
+        /// Include a grouped summary of link issues
+        #[arg(long)]
+        summary: bool,
+
+        /// Only show the summary (suppress individual link entries)
+        #[arg(long)]
+        summary_only: bool,
+    },
+
+    /// Find all files that link to a specific file.
+    ///
+    /// Traverses the link graph to list every document that links to the
+    /// given target file, including optional anchors.
+    ///
+    /// Useful for understanding impact of changes, cleaning up docs, and
+    /// deciding whether a document is safe to delete.
+    ///
+    /// Limitations:
+    ///   - Only considers indexed markdown links (not external URLs).
+    ///
+    /// Related:
+    ///   - `yore orphans`, `yore export-graph`
+    ///
+    /// Examples:
+    ///   yore backlinks docs/architecture/DEPLOYMENT-GUIDE.md --index .yore
+    ///   yore backlinks docs/README.md --index .yore --json
+    Backlinks {
+        /// File to find backlinks for
+        file: String,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Find orphaned files with no inbound links.
+    ///
+    /// Lists documents that are not linked to from anywhere else in the
+    /// documentation graph (subject to `--exclude` filters).
+    ///
+    /// Helpful for identifying dead, experimental, or forgotten documents
+    /// that may be candidates for deletion or consolidation.
+    ///
+    /// Limitations:
+    ///   - Entry-point docs (README/INDEX) may be intentionally orphaned.
+    ///   - Only considers links in the index.
+    ///
+    /// Related:
+    ///   - `yore backlinks`, `yore canonical-orphans`
+    ///
+    /// Examples:
+    ///   yore orphans --index .yore --exclude README
+    ///   yore orphans --index .yore --exclude README --exclude INDEX --json
+    Orphans {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Exclude files matching pattern (can be repeated)
+        #[arg(short, long)]
+        exclude: Vec<String>,
+    },
+
+    /// Show canonicality scores for all documents.
+    ///
+    /// Computes a heuristic "authority" score per document based on naming,
+    /// path, and link structure so agents can consistently pick canonical
+    /// sources of truth when multiple documents overlap.
+    ///
+    /// Limitations:
+    ///   - Heuristic scoring; validate with `dupes` and human review.
+    ///
+    /// Related:
+    ///   - `yore suggest-consolidation`, `yore canonical-orphans`
+    ///
+    /// Examples:
+    ///   yore canonicality --index .yore --threshold 0.7
+    ///   yore canonicality --index .yore --json
+    Canonicality {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Minimum score threshold (0.0 to 1.0)
+        #[arg(short, long, default_value = "0.0")]
+        threshold: f64,
+    },
+
+    /// Find canonical documents with no inbound links.
+    ///
+    /// Filters documents by canonicality score and reports those that are
+    /// not linked to by any other indexed document.
+    ///
+    /// Limitations:
+    ///   - Only considers inbound links in the index roots.
+    ///   - Canonicality is heuristic, not semantic.
+    ///
+    /// Related:
+    ///   - `yore canonicality`, `yore orphans`, `yore backlinks`
+    ///
+    /// Examples:
+    ///   yore canonical-orphans --index .yore --threshold 0.7
+    ///   yore canonical-orphans --index .yore --threshold 0.8 --json
+    CanonicalOrphans {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Minimum canonicality score (0.0 to 1.0)
+        #[arg(short, long, default_value = "0.7")]
+        threshold: f64,
+    },
+
+    /// Automatically fix a subset of broken relative links.
+    ///
+    /// This command uses heuristics over the index to propose safe,
+    /// mechanical rewrites for links that appear to point to the wrong
+    /// file (for example, the right filename in the wrong directory).
+    ///
+    /// For agent-friendly operation, use --propose to output ambiguous
+    /// cases to a YAML file, then --apply-decisions to apply choices.
+    ///
+    /// Limitations:
+    ///   - Only fixes a conservative subset of relative links.
+    ///   - Ambiguous targets require `--propose` + `--apply-decisions`.
+    ///
+    /// Related:
+    ///   - `yore check-links`, `yore mv`, `yore fix-references`
+    ///
+    /// Examples:
+    ///   yore fix-links --index .yore --dry-run
+    ///   yore fix-links --index .yore --apply
+    ///   yore fix-links --index .yore --propose proposals.yaml
+    ///   yore fix-links --index .yore --apply-decisions proposals.yaml
+    FixLinks {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Show proposed edits without modifying any files
+        #[arg(long)]
+        dry_run: bool,
+
+        /// Apply changes to files on disk (only unambiguous fixes)
+        #[arg(long)]
+        apply: bool,
+
+        /// Output ambiguous link fixes to a YAML file for agent/human review
+        #[arg(long)]
+        propose: Option<PathBuf>,
+
+        /// Apply decisions from a previously generated proposal file
+        #[arg(long)]
+        apply_decisions: Option<PathBuf>,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Use git rename history to suggest fixes for moved files
+        #[arg(long)]
+        use_git_history: bool,
+    },
+
+    /// Rewrite references according to an explicit mapping file.
+    ///
+    /// This promotes the `mv --update-refs` machinery into a more general
+    /// bulk rewrite tool, suitable for large documentation reorganizations.
+    ///
+    /// Limitations:
+    ///   - Does not move files; only rewrites references.
+    ///   - Requires a mapping file that lists exact from/to pairs.
+    ///
+    /// Related:
+    ///   - `yore mv`, `yore fix-links`
+    ///
+    /// Examples:
+    ///   yore fix-references --mapping mappings.yaml --index .yore --dry-run --json
+    ///   yore fix-references --mapping mappings.yaml --index .yore --apply
+    FixReferences {
+        /// Path to reference mapping configuration (YAML)
+        #[arg(short, long)]
+        mapping: PathBuf,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Show planned changes without modifying files
+        #[arg(long)]
+        dry_run: bool,
+
+        /// Apply changes to files on disk
+        #[arg(long)]
+        apply: bool,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Move a documentation file and optionally update inbound references.
+    ///
+    /// This is a thin, ergonomic wrapper around link rewrite logic. When
+    /// --update-refs is used, all Markdown links that point to the old
+    /// path are rewritten to point to the new path.
+    ///
+    /// Limitations:
+    ///   - Only updates links in indexed files; run `yore build` first.
+    ///   - Does not update external repositories or URLs.
+    ///
+    /// Related:
+    ///   - `yore fix-references`, `yore fix-links`, `yore check-links`
+    ///
+    /// Examples:
+    ///   yore mv docs/old/auth.md docs/architecture/AUTH.md --update-refs --index .yore --json
+    ///   yore mv agents/tmp/note.md agents/archive/note.md --index .yore
+    Mv {
+        /// Source path to move from
+        from: PathBuf,
+
+        /// Destination path to move to
+        to: PathBuf,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Update inbound links that reference the old path
+        #[arg(long)]
+        update_refs: bool,
+
+        /// Show planned changes without modifying files
+        #[arg(long)]
+        dry_run: bool,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Report potentially stale documentation based on age and inbound links.
+    ///
+    /// Uses file modification time and inbound link counts from the index
+    /// to highlight documents that may be unmaintained or dead.
+    ///
+    /// Limitations:
+    ///   - Staleness is heuristic; validate before deleting.
+    ///   - Depends on file mtime and inbound links only.
+    ///
+    /// Related:
+    ///   - `yore orphans`, `yore canonicality`
+    ///
+    /// Examples:
+    ///   yore stale --index .yore --days 90 --min-inlinks 0 --json
+    ///   yore stale --index .yore --days 30 --min-inlinks 1
+    Stale {
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Minimum age in days to consider a file stale
+        #[arg(long, default_value = "90")]
+        days: u64,
+
+        /// Minimum inbound link count (files with >= this many links are included)
+        #[arg(long, default_value = "0")]
+        min_inlinks: usize,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+
+    /// Export the documentation link graph.
+    ///
+    /// Emits either a JSON representation or a Graphviz DOT file
+    /// describing links between indexed documents.
+    ///
+    /// Limitations:
+    ///   - Graph only includes indexed documents and internal links.
+    ///
+    /// Related:
+    ///   - `yore backlinks`, `yore check-links`
+    ///
+    /// Examples:
+    ///   yore export-graph --format json --index .yore
+    ///   yore export-graph --format dot --index .yore > graph.dot
+    ExportGraph {
+        /// Output format: "json" or "dot"
+        #[arg(long, default_value = "json")]
+        format: String,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Show relation paths between documents via the persisted relation graph.
+    ///
+    /// Displays how a source document connects to other documents through
+    /// links, section links, and ADR references. Requires `relations.json`
+    /// from `yore build`.
+    ///
+    /// Examples:
+    ///   yore paths docs/architecture.md --index .yore
+    ///   yore paths docs/architecture.md --json --index .yore
+    ///   yore paths docs/architecture.md --depth 2 --index .yore
+    Paths {
+        /// Source file to show paths from
+        source: String,
+
+        /// Traversal depth (1 = direct edges, 2 = two hops)
+        #[arg(short = 'd', long, default_value = "1")]
+        depth: usize,
+
+        /// Filter by edge kind: links_to, section_links_to, adr_reference
+        #[arg(long)]
+        kind: Option<String>,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Suggest document consolidation based on duplicates and canonicality.
+    ///
+    /// Uses duplicate detection and canonicality scoring to propose a
+    /// canonical document and a set of files that should be merged into it.
+    ///
+    /// Limitations:
+    ///   - Suggestions are heuristic; review before merging or deleting.
+    ///
+    /// Related:
+    ///   - `yore dupes`, `yore canonicality`, `yore diff`
+    ///
+    /// Examples:
+    ///   yore suggest-consolidation --threshold 0.7 --json --index .yore
+    ///   yore suggest-consolidation --threshold 0.6 --index .yore
+    SuggestConsolidation {
+        /// Minimum duplicate similarity threshold (0.0 to 1.0)
+        #[arg(long, default_value = "0.7")]
+        threshold: f64,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Check documentation against declarative policy rules.
+    ///
+    /// Reads a YAML policy file describing path patterns and required or
+    /// forbidden content, and reports any violations it finds. Rules can
+    /// also enforce maximum section length (optionally filtered by heading
+    /// regex) and required markdown links.
+    /// Required links treat absolute paths as repo-root relative, and
+    /// resolve relative paths against the source file.
+    ///
+    /// Limitations:
+    ///   - Rules operate on indexed content; run `yore build` first.
+    ///   - Content checks are literal substring matches.
+    ///
+    /// Related:
+    ///   - `yore check --taxonomy`, `yore check-links`
+    ///
+    /// Examples:
+    ///   yore policy --config .yore-policy.yaml --index .yore --json
+    ///   yore policy --config .yore-policy.yaml --index .yore
+    Policy {
+        /// Path to policy configuration (YAML)
+        #[arg(long, default_value = ".yore-policy.yaml")]
+        config: PathBuf,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+
+        /// Output as JSON
+        #[arg(long)]
+        json: bool,
+    },
+}
+
+#[derive(Subcommand)]
+pub enum McpCommands {
+    /// Return bounded previews plus opaque handles for follow-up fetches.
+    #[command(name = "search-context", alias = "preview-context")]
+    SearchContext {
+        /// Natural language query/question (required unless --from-files is used)
+        #[arg(required_unless_present = "from_files")]
+        query: Vec<String>,
+
+        /// Maximum preview results to return
+        #[arg(long, default_value = "5")]
+        max_results: usize,
+
+        /// Maximum total tokens across all previews (approximate)
+        #[arg(long, default_value = "1200")]
+        max_tokens: usize,
+
+        /// Maximum total bytes across all previews
+        #[arg(long, default_value = "12000")]
+        max_bytes: usize,
+
+        /// Search/preview from explicit files instead of a query (supports @list.txt)
+        #[arg(long, value_name = "PATH", num_args = 1..)]
+        from_files: Vec<String>,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Expand a previously returned opaque handle.
+    #[command(name = "fetch-context", alias = "expand-context")]
+    FetchContext {
+        /// Opaque handle returned by `search-context`
+        handle: String,
+
+        /// Maximum tokens in fetched content (approximate)
+        #[arg(long, default_value = "4000")]
+        max_tokens: usize,
+
+        /// Maximum bytes in fetched content
+        #[arg(long, default_value = "20000")]
+        max_bytes: usize,
+
+        /// Index directory
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+
+    /// Serve the bounded preview/fetch tools over MCP stdio transport.
+    ///
+    /// This wraps the existing `search_context` and `fetch_context`
+    /// contracts so MCP clients can call Yore without scraping CLI stdout.
+    ///
+    /// Examples:
+    ///   yore mcp serve --index .yore
+    Serve {
+        /// Default index directory for MCP tool calls
+        #[arg(short, long, default_value = ".yore")]
+        index: PathBuf,
+    },
+}
diff --git a/src/commands_audit.rs b/src/commands_audit.rs
new file mode 100644
index 0000000..fd18f32
--- /dev/null
+++ b/src/commands_audit.rs
@@ -0,0 +1,498 @@
+use crate::commands_query::*;
+use colored::Colorize;
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+
+use crate::assemble::*;
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) fn cmd_orphans(
+    index_dir: &Path,
+    json: bool,
+    exclude_patterns: &[String],
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Load the forward index
+    let forward_index = load_forward_index(index_dir)?;
+
+    if !json {
+        println!("{}", "Finding orphaned files...".cyan().bold());
+        println!();
+    }
+
+    // Build a set of all files that are linked to
+    let mut linked_files: HashSet<String> = HashSet::new();
+
+    for (source_path, entry) in &forward_index.files {
+        for link in &entry.links {
+            let target = &link.target;
+
+            // Skip external links
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            // Parse link to separate file path and anchor
+            let (link_path, _) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            // Skip anchor-only links
+            if link_path.is_empty() {
+                continue;
+            }
+
+            // Resolve relative path from source file
+            let resolved_path = if let Some(stripped) = link_path.strip_prefix('/') {
+                // Absolute path - strip leading / and use as-is
+                stripped.to_string()
+            } else {
+                // Relative path
+                let source_file_path = Path::new(source_path);
+                if let Some(parent) = source_file_path.parent() {
+                    parent.join(&link_path).to_string_lossy().to_string()
+                } else {
+                    link_path.clone()
+                }
+            };
+
+            // Normalize the resolved path
+            let normalized_link = normalize_path(Path::new(&resolved_path));
+            linked_files.insert(normalized_link);
+        }
+    }
+
+    // Find files that are NOT in the linked set
+    let mut orphans = Vec::new();
+
+    for (file_path, entry) in &forward_index.files {
+        // Check if this file has any inbound links
+        if !linked_files.contains(file_path) {
+            // Check exclude patterns
+            let mut excluded = false;
+            for pattern in exclude_patterns {
+                if file_path.contains(pattern) {
+                    excluded = true;
+                    break;
+                }
+            }
+
+            if excluded {
+                continue;
+            }
+
+            orphans.push(OrphanFile {
+                file: file_path.clone(),
+                size_bytes: entry.size_bytes,
+                line_count: entry.line_count,
+            });
+        }
+    }
+
+    // Sort orphans by file path
+    orphans.sort_by(|a, b| a.file.cmp(&b.file));
+
+    let result = OrphansResult {
+        total_orphans: orphans.len(),
+        orphans: orphans.clone(),
+    };
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else {
+        println!("{}", "Orphaned Files".cyan().bold());
+        println!("{}", "=".repeat(60));
+        println!();
+        println!("Total orphans: {}", orphans.len());
+        println!();
+
+        if orphans.is_empty() {
+            println!(
+                "{}",
+                "No orphaned files found. All documents are linked!".green()
+            );
+            println!();
+        } else {
+            for (idx, orphan) in orphans.iter().enumerate() {
+                println!("[{}] {}", idx + 1, orphan.file.white().bold());
+                println!(
+                    "    Size: {} bytes, Lines: {}",
+                    orphan.size_bytes, orphan.line_count
+                );
+                println!();
+            }
+
+            println!("{}", "Cleanup suggestions:".yellow().bold());
+            println!("  1. Review each file to determine if it's still needed");
+            println!("  2. Add links from relevant documents if the content is valuable");
+            println!("  3. Delete or archive files that are no longer relevant");
+            println!("  4. Entry point files (README.md) may intentionally have no backlinks");
+            println!();
+            println!("{}", "To exclude patterns:".cyan());
+            println!("  yore orphans --exclude README --exclude INDEX");
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn build_inbound_link_counts(forward_index: &ForwardIndex) -> HashMap<String, usize> {
+    let mut inbound_counts: HashMap<String, usize> = HashMap::new();
+
+    for (source_path, entry) in &forward_index.files {
+        for link in &entry.links {
+            let target = &link.target;
+
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            let (link_path, _) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            if link_path.is_empty() {
+                continue;
+            }
+
+            let resolved_path = if let Some(stripped) = link_path.strip_prefix('/') {
+                stripped.to_string()
+            } else {
+                let source_file_path = Path::new(source_path);
+                if let Some(parent) = source_file_path.parent() {
+                    parent.join(&link_path).to_string_lossy().to_string()
+                } else {
+                    link_path.clone()
+                }
+            };
+
+            let normalized_link = normalize_path(Path::new(&resolved_path));
+            *inbound_counts.entry(normalized_link).or_insert(0) += 1;
+        }
+    }
+
+    inbound_counts
+}
+
+pub(crate) fn cmd_canonical_orphans(
+    index_dir: &Path,
+    threshold: f64,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let inbound_counts = build_inbound_link_counts(&forward_index);
+
+    let mut orphans = Vec::new();
+
+    for (file_path, entry) in &forward_index.files {
+        let inbound_links = *inbound_counts.get(file_path).unwrap_or(&0);
+        if inbound_links > 0 {
+            continue;
+        }
+
+        let score = score_canonicality(file_path, entry);
+        if score >= threshold {
+            orphans.push(CanonicalOrphan {
+                file: file_path.clone(),
+                canonicality: score,
+                inbound_links,
+            });
+        }
+    }
+
+    orphans.sort_by(|a, b| {
+        b.canonicality
+            .partial_cmp(&a.canonicality)
+            .unwrap_or(std::cmp::Ordering::Equal)
+            .then_with(|| a.file.cmp(&b.file))
+    });
+
+    let result = CanonicalOrphansResult {
+        total_orphans: orphans.len(),
+        threshold,
+        orphans: orphans.clone(),
+    };
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    println!("{}", "Canonical Orphans".cyan().bold());
+    println!("{}", "=".repeat(60));
+    println!();
+    println!("Threshold: {threshold}");
+    println!("Total canonical orphans: {}", orphans.len());
+    println!();
+
+    if orphans.is_empty() {
+        println!(
+            "{}",
+            "No canonical documents without inbound links found.".green()
+        );
+        return Ok(());
+    }
+
+    for (idx, orphan) in orphans.iter().enumerate() {
+        println!("[{}] {}", idx + 1, orphan.file.white().bold());
+        println!(
+            "    Canonicality: {:.2}, Inbound links: {}",
+            orphan.canonicality, orphan.inbound_links
+        );
+        println!();
+    }
+
+    Ok(())
+}
+
+/// Score canonicality with reasons
+pub(crate) fn score_canonicality_with_reasons(
+    doc_path: &str,
+    _entry: &FileEntry,
+) -> (f64, Vec<String>) {
+    let mut score: f64 = 0.5; // baseline
+    let mut reasons = Vec::new();
+
+    let path_lower = doc_path.to_lowercase();
+
+    // Path-based boosts
+    if path_lower.contains("docs/adr/") || path_lower.contains("docs/architecture/") {
+        score += 0.2;
+        reasons.push("Architecture/ADR document (+0.2)".to_string());
+    }
+    if path_lower.contains("docs/index/") {
+        score += 0.15;
+        reasons.push("Index document (+0.15)".to_string());
+    }
+    if path_lower.contains("scratch")
+        || path_lower.contains("archive")
+        || path_lower.contains("old")
+    {
+        score -= 0.3;
+        reasons.push("Scratch/archive/old location (-0.3)".to_string());
+    }
+    if path_lower.contains("deprecated") || path_lower.contains("backup") {
+        score -= 0.25;
+        reasons.push("Deprecated/backup location (-0.25)".to_string());
+    }
+
+    // Filename patterns
+    let filename = Path::new(doc_path)
+        .file_name()
+        .and_then(|s| s.to_str())
+        .unwrap_or("")
+        .to_lowercase();
+
+    if filename.contains("readme") || filename.contains("index") {
+        score += 0.1;
+        reasons.push("README/INDEX file (+0.1)".to_string());
+    }
+    if filename.contains("guide") || filename.contains("runbook") || filename.contains("plan") {
+        score += 0.1;
+        reasons.push("Guide/runbook/plan document (+0.1)".to_string());
+    }
+
+    // Clamp to [0.0, 1.0]
+    let final_score = score.clamp(0.0, 1.0);
+
+    if reasons.is_empty() {
+        reasons.push("Baseline score (0.5)".to_string());
+    }
+
+    (final_score, reasons)
+}
+
+/// Show canonicality scores for all documents
+pub(crate) fn cmd_canonicality(
+    index_dir: &Path,
+    json: bool,
+    threshold: f64,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Load the forward index
+    let forward_index = load_forward_index(index_dir)?;
+
+    if !json {
+        println!("{}", "Computing canonicality scores...".cyan().bold());
+        println!();
+    }
+
+    let mut scored_files = Vec::new();
+
+    for (file_path, entry) in &forward_index.files {
+        let (score, reasons) = score_canonicality_with_reasons(file_path, entry);
+
+        if score >= threshold {
+            scored_files.push(CanonicalityScore {
+                file: file_path.clone(),
+                score,
+                reasons,
+            });
+        }
+    }
+
+    // Sort by score descending
+    scored_files.sort_by(|a, b| {
+        b.score
+            .partial_cmp(&a.score)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+
+    let result = CanonicalityResult {
+        total_files: scored_files.len(),
+        files: scored_files.clone(),
+    };
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else {
+        println!("{}", "Canonicality Scores".cyan().bold());
+        println!("{}", "=".repeat(60));
+        println!();
+        println!(
+            "Total files: {} (threshold: {})",
+            scored_files.len(),
+            threshold
+        );
+        println!();
+
+        // Group by score ranges
+        let high_canon: Vec<_> = scored_files.iter().filter(|s| s.score >= 0.7).collect();
+        let medium_canon: Vec<_> = scored_files
+            .iter()
+            .filter(|s| s.score >= 0.5 && s.score < 0.7)
+            .collect();
+        let low_canon: Vec<_> = scored_files.iter().filter(|s| s.score < 0.5).collect();
+
+        println!(
+            "{} High canonicality (≥0.7): {} files",
+            "📚".green(),
+            high_canon.len()
+        );
+        for file in high_canon.iter().take(10) {
+            println!("  [{:.2}] {}", file.score, file.file.white().bold());
+            for reason in &file.reasons {
+                println!("         - {reason}");
+            }
+        }
+        if high_canon.len() > 10 {
+            println!("  ... and {} more", high_canon.len() - 10);
+        }
+        println!();
+
+        println!(
+            "{} Medium canonicality (0.5-0.7): {} files",
+            "📄".yellow(),
+            medium_canon.len()
+        );
+        for file in medium_canon.iter().take(5) {
+            println!("  [{:.2}] {}", file.score, file.file);
+        }
+        if medium_canon.len() > 5 {
+            println!("  ... and {} more", medium_canon.len() - 5);
+        }
+        println!();
+
+        println!(
+            "{} Low canonicality (<0.5): {} files",
+            "📋".red(),
+            low_canon.len()
+        );
+        for file in low_canon.iter().take(5) {
+            println!("  [{:.2}] {}", file.score, file.file);
+            for reason in &file.reasons {
+                println!("         - {reason}");
+            }
+        }
+        if low_canon.len() > 5 {
+            println!("  ... and {} more", low_canon.len() - 5);
+        }
+        println!();
+
+        println!("{}", "What does this mean?".yellow().bold());
+        println!("  - High scores: Authoritative, well-placed documents");
+        println!("  - Medium scores: Standard documentation");
+        println!("  - Low scores: Scratch work, archived, or deprecated content");
+        println!();
+        println!("{}", "For decision support:".cyan());
+        println!("  - Trust high-canon docs when resolving conflicts");
+        println!("  - Review low-canon docs for potential archival");
+        println!("  - Use threshold flag to filter: --threshold 0.6");
+    }
+
+    Ok(())
+}
+
+pub(crate) fn cmd_suggest_consolidation(
+    index_dir: &Path,
+    threshold: f64,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+
+    let pairs = compute_duplicate_pairs(&forward_index, threshold);
+    if pairs.is_empty() {
+        println!(
+            "{} No consolidation candidates found above threshold {}.",
+            "Info:".yellow(),
+            threshold
+        );
+        return Ok(());
+    }
+
+    let result = build_consolidation_groups(&forward_index, &pairs);
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    if result.groups.is_empty() {
+        println!(
+            "{} Duplicate pairs found but no multi-file groups to consolidate.",
+            "Info:".yellow()
+        );
+        return Ok(());
+    }
+
+    println!("{}", "Consolidation Suggestions".cyan().bold());
+    println!("{}", "=".repeat(60));
+    println!(
+        "Total groups: {} (threshold: {:.2})",
+        result.total_groups, threshold
+    );
+    println!();
+
+    for group in &result.groups {
+        println!("{}", group.canonical.white().bold());
+        println!(
+            "  Canonical score: {:.2}, Avg similarity: {:.2}",
+            group.canonical_score, group.avg_similarity
+        );
+        println!("  Merge into canonical:");
+        for m in &group.merge_into {
+            println!("    - {m}");
+        }
+        println!("  Note: {}", group.note);
+        println!();
+    }
+
+    Ok(())
+}
diff --git a/src/commands_graph.rs b/src/commands_graph.rs
new file mode 100644
index 0000000..9be7fc3
--- /dev/null
+++ b/src/commands_graph.rs
@@ -0,0 +1,877 @@
+use crate::commands_links::*;
+use crate::commands_query::*;
+use colored::Colorize;
+use serde::Serialize;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::path::Path;
+
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) fn cmd_mv(
+    from: &Path,
+    to: &Path,
+    index_dir: &Path,
+    update_refs: bool,
+    dry_run: bool,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let from_str = from.to_string_lossy().to_string();
+    let to_str = to.to_string_lossy().to_string();
+
+    let mut updated_files: Vec<String> = Vec::new();
+
+    if !dry_run {
+        if let Some(parent) = to.parent() {
+            fs::create_dir_all(parent)?;
+        }
+        fs::rename(from, to)?;
+    }
+
+    if update_refs {
+        let forward_index = load_forward_index(index_dir)?;
+
+        // Group by file for rewrites
+        let mut files_to_update: HashSet<String> = HashSet::new();
+        for (file_path, entry) in &forward_index.files {
+            for link in &entry.links {
+                if link.target == from_str {
+                    files_to_update.insert(file_path.clone());
+                }
+            }
+        }
+
+        for file in &files_to_update {
+            let content = fs::read_to_string(file)?;
+            let new_content = apply_reference_mapping_to_content(&content, &from_str, &to_str);
+            if content != new_content {
+                if !dry_run {
+                    fs::write(file, &new_content)?;
+                }
+                updated_files.push(file.clone());
+            }
+        }
+    }
+
+    updated_files.sort();
+
+    if json {
+        let result = MvResult {
+            from: from_str,
+            to: to_str,
+            moved: !dry_run,
+            updated_files,
+        };
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    // Human-readable output
+    if dry_run {
+        println!("{}", "Dry run:".cyan().bold());
+    }
+
+    println!(
+        "{} {} -> {}",
+        if dry_run { "Would move" } else { "Moving" },
+        from_str,
+        to_str
+    );
+
+    if update_refs {
+        if updated_files.is_empty() {
+            println!(
+                "{} No inbound links found for {} in index {}",
+                "Note:".yellow(),
+                from_str,
+                index_dir.display()
+            );
+        } else {
+            println!(
+                "{} Updating references in {} file(s)",
+                if dry_run { "Would update" } else { "Updating" },
+                updated_files.len()
+            );
+            for file in updated_files {
+                if dry_run {
+                    println!("  {file} (references would change)");
+                } else {
+                    println!("  {file}");
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn compute_inbound_link_counts(forward_index: &ForwardIndex) -> HashMap<String, usize> {
+    let mut counts: HashMap<String, usize> = HashMap::new();
+
+    for (source_path, entry) in &forward_index.files {
+        let source_base = Path::new(source_path);
+        for link in &entry.links {
+            let target = &link.target;
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            let (link_path, _) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            if link_path.is_empty() {
+                continue;
+            }
+
+            let resolved = if let Some(parent) = source_base.parent() {
+                parent.join(&link_path).to_string_lossy().to_string()
+            } else {
+                link_path.clone()
+            };
+            let normalized = normalize_path(Path::new(&resolved));
+            *counts.entry(normalized).or_insert(0) += 1;
+        }
+    }
+
+    counts
+}
+
+/// Show relation paths from a source document via the persisted relation graph.
+pub(crate) fn cmd_paths(
+    source: &str,
+    depth: usize,
+    kind_filter: Option<&str>,
+    json: bool,
+    index_dir: &Path,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let relation_index = load_relation_index(index_dir);
+    if relation_index.edges.is_empty() {
+        if json {
+            println!("{{\"source\":\"{source}\",\"paths\":[]}}");
+        } else {
+            println!(
+                "{} No relations found. Run 'yore build' first.",
+                "Info:".yellow()
+            );
+        }
+        return Ok(());
+    }
+
+    let depth = depth.clamp(1, 3);
+
+    // Normalize source: try exact match, then suffix match
+    let all_sources: HashSet<&str> = relation_index
+        .edges
+        .iter()
+        .flat_map(|e| [e.source.as_str(), e.target.as_str()])
+        .collect();
+
+    let resolved_source = if all_sources.contains(source) {
+        source.to_string()
+    } else {
+        // Try suffix match
+        if let Some(s) = all_sources
+            .iter()
+            .find(|s| s.ends_with(source) || source.ends_with(*s))
+        {
+            (*s).to_string()
+        } else {
+            if json {
+                println!("{{\"source\":\"{source}\",\"paths\":[]}}");
+            } else {
+                println!(
+                    "{} '{}' not found in relation graph.",
+                    "Info:".yellow(),
+                    source
+                );
+            }
+            return Ok(());
+        }
+    };
+
+    // BFS traversal up to depth
+    let mut visited: HashSet<String> = HashSet::new();
+    visited.insert(resolved_source.clone());
+    let mut frontier: Vec<String> = vec![resolved_source.clone()];
+    let mut result_edges: Vec<&RelationEdge> = Vec::new();
+
+    for _ in 0..depth {
+        let mut next_frontier: Vec<String> = Vec::new();
+        for node in &frontier {
+            for edge in &relation_index.edges {
+                if &edge.source != node {
+                    continue;
+                }
+                // Apply kind filter
+                if let Some(kf) = kind_filter {
+                    let edge_kind = match &edge.kind {
+                        RelationKind::LinksTo => "links_to",
+                        RelationKind::SectionLinksTo => "section_links_to",
+                        RelationKind::AdrReference => "adr_reference",
+                    };
+                    if edge_kind != kf {
+                        continue;
+                    }
+                }
+                result_edges.push(edge);
+                if !visited.contains(&edge.target) {
+                    visited.insert(edge.target.clone());
+                    next_frontier.push(edge.target.clone());
+                }
+            }
+        }
+        frontier = next_frontier;
+    }
+
+    if json {
+        #[derive(Serialize)]
+        struct PathsResult<'a> {
+            source: &'a str,
+            depth: usize,
+            total_edges: usize,
+            edges: &'a [&'a RelationEdge],
+        }
+        let result = PathsResult {
+            source: &resolved_source,
+            depth,
+            total_edges: result_edges.len(),
+            edges: &result_edges,
+        };
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else {
+        println!(
+            "{} {} (depth {})",
+            "Paths from".green().bold(),
+            resolved_source.cyan(),
+            depth
+        );
+        println!();
+
+        if result_edges.is_empty() {
+            println!("  No outgoing edges found.");
+        } else {
+            for edge in &result_edges {
+                let kind_label = match &edge.kind {
+                    RelationKind::LinksTo => "links_to",
+                    RelationKind::SectionLinksTo => "section_links_to",
+                    RelationKind::AdrReference => "adr_reference",
+                };
+                let mut detail = String::new();
+                if let Some(anchor) = &edge.anchor {
+                    use std::fmt::Write;
+                    let _ = write!(detail, " #{anchor}");
+                }
+                if let Some(src_sec) = &edge.source_section {
+                    use std::fmt::Write;
+                    let _ = write!(detail, " [from: {}]", src_sec.heading);
+                }
+                if let Some(tgt_sec) = &edge.target_section {
+                    use std::fmt::Write;
+                    let _ = write!(detail, " [to: {}]", tgt_sec.heading);
+                }
+                if let Some(raw) = &edge.raw_text {
+                    use std::fmt::Write;
+                    let _ = write!(detail, " ({raw})");
+                }
+                println!(
+                    "  {} {} -> {}{}",
+                    kind_label.yellow(),
+                    edge.source,
+                    edge.target.cyan(),
+                    detail
+                );
+            }
+            println!();
+            println!("  {} edges total", result_edges.len());
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn cmd_export_graph(
+    index_dir: &Path,
+    format: &str,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+
+    // Map normalized paths to canonical file keys
+    let mut norm_to_key: HashMap<String, String> = HashMap::new();
+    for path in forward_index.files.keys() {
+        let normalized = normalize_path(Path::new(path));
+        norm_to_key
+            .entry(normalized)
+            .or_insert_with(|| path.clone());
+    }
+
+    let mut nodes: Vec<GraphNode> = forward_index
+        .files
+        .keys()
+        .cloned()
+        .map(|id| GraphNode { id })
+        .collect();
+    nodes.sort_by(|a, b| a.id.cmp(&b.id));
+
+    let mut edges: Vec<GraphEdge> = Vec::new();
+
+    for (source_path, entry) in &forward_index.files {
+        let source_base = Path::new(source_path);
+
+        for link in &entry.links {
+            let target = &link.target;
+
+            // Skip external links
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            // Split off anchor
+            let (link_path, anchor) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            if link_path.is_empty() {
+                continue;
+            }
+
+            let resolved = if let Some(parent) = source_base.parent() {
+                parent.join(&link_path).to_string_lossy().to_string()
+            } else {
+                link_path.clone()
+            };
+            let normalized = normalize_path(Path::new(&resolved));
+
+            if let Some(target_key) = norm_to_key.get(&normalized) {
+                edges.push(GraphEdge {
+                    source: source_path.clone(),
+                    target: target_key.clone(),
+                    anchor,
+                });
+            }
+        }
+    }
+
+    if edges.is_empty() {
+        println!(
+            "{} No internal documentation links found to export.",
+            "Info:".yellow()
+        );
+        return Ok(());
+    }
+
+    match format {
+        "json" => {
+            let export = GraphExport { nodes, edges };
+            println!("{}", serde_json::to_string_pretty(&export)?);
+        }
+        "dot" => {
+            println!("digraph yore_docs {{");
+            for edge in &edges {
+                let src = edge.source.replace('"', "\\\"");
+                let dst = edge.target.replace('"', "\\\"");
+                if let Some(anchor) = &edge.anchor {
+                    let label = anchor.replace('"', "\\\"");
+                    println!("  \"{src}\" -> \"{dst}\" [label=\"{label}\"];");
+                } else {
+                    println!("  \"{src}\" -> \"{dst}\";");
+                }
+            }
+            println!("}}");
+        }
+        other => {
+            return Err(format!("Unsupported format: {other}").into());
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn run_stale_check(
+    index_dir: &Path,
+    days: u64,
+    min_inlinks: usize,
+) -> Result<StaleResult, Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let inbound_counts = compute_inbound_link_counts(&forward_index);
+
+    let now = std::time::SystemTime::now();
+    let mut files = Vec::new();
+
+    for file_path in forward_index.files.keys() {
+        let meta = fs::metadata(file_path);
+        if meta.is_err() {
+            continue;
+        }
+        let meta = meta?;
+        let modified = meta.modified().unwrap_or(now);
+        let age = now.duration_since(modified).unwrap_or_default().as_secs() / 86_400;
+
+        let inlinks = *inbound_counts.get(file_path).unwrap_or(&0);
+
+        if age >= days && inlinks >= min_inlinks {
+            files.push(StaleFile {
+                file: file_path.clone(),
+                days_since_modified: age,
+                inbound_links: inlinks,
+            });
+        }
+    }
+
+    files.sort_by(|a, b| b.days_since_modified.cmp(&a.days_since_modified));
+
+    Ok(StaleResult {
+        total_stale: files.len(),
+        files,
+    })
+}
+
+pub(crate) fn cmd_stale(
+    index_dir: &Path,
+    days: u64,
+    min_inlinks: usize,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let result = run_stale_check(index_dir, days, min_inlinks)?;
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    if result.files.is_empty() {
+        println!(
+            "{} No stale files found (threshold: {} days, min_inlinks: {}).",
+            "✓".green().bold(),
+            days,
+            min_inlinks
+        );
+        return Ok(());
+    }
+
+    println!(
+        "{} Stale files (>= {} days old, inbound_links >= {}):",
+        "Stale".yellow().bold(),
+        days,
+        min_inlinks
+    );
+    println!("{}", "=".repeat(60));
+    for f in &result.files {
+        println!(
+            "{} ({} days, {} inbound links)",
+            f.file, f.days_since_modified, f.inbound_links
+        );
+    }
+
+    Ok(())
+}
+
+pub(crate) fn resolve_health_target_key(
+    file: &Path,
+    index_dir: &Path,
+    metrics_index: &DocumentMetricsIndex,
+) -> Option<String> {
+    let input = normalize_path(file);
+    let without_dot = input.trim_start_matches("./").to_string();
+    let with_dot = format!("./{without_dot}");
+
+    for candidate in [&input, &without_dot, &with_dot] {
+        if metrics_index.files.contains_key(candidate) {
+            return Some(candidate.clone());
+        }
+    }
+
+    let absolute = canonicalize_existing_path(file);
+    let absolute_normalized = normalize_path(&absolute);
+    if metrics_index.files.contains_key(&absolute_normalized) {
+        return Some(absolute_normalized);
+    }
+
+    if let Ok(forward_index) = load_forward_index(index_dir) {
+        if let Some(source_root) = forward_index_source_root(&forward_index) {
+            let derived = build_indexed_doc_key(&absolute, &source_root);
+            if metrics_index.files.contains_key(&derived) {
+                return Some(derived);
+            }
+        }
+    }
+
+    None
+}
+
+pub(crate) fn evaluate_document_health(
+    metrics: &DocumentMetrics,
+    options: &HealthOptions,
+) -> HealthFileResult {
+    let mut issues = Vec::new();
+
+    if metrics.line_count > options.max_lines {
+        issues.push(HealthIssue {
+            kind: "bloated-file".to_string(),
+            severity: "error".to_string(),
+            message: format!(
+                "{} lines exceeds the configured threshold",
+                metrics.line_count
+            ),
+            value: metrics.line_count,
+            threshold: options.max_lines,
+        });
+    }
+
+    if metrics.part_heading_count >= options.max_part_sections {
+        issues.push(HealthIssue {
+            kind: "accumulator-pattern".to_string(),
+            severity: "error".to_string(),
+            message: format!(
+                "{} \"Part N\" headings suggest an accumulating narrative doc",
+                metrics.part_heading_count
+            ),
+            value: metrics.part_heading_count,
+            threshold: options.max_part_sections,
+        });
+    }
+
+    let completed_section_lines: usize = metrics
+        .sections
+        .iter()
+        .filter(|section| section.has_completion_marker)
+        .map(|section| section.line_count)
+        .sum();
+    if completed_section_lines > options.max_completed_lines {
+        issues.push(HealthIssue {
+            kind: "stale-completed".to_string(),
+            severity: "warning".to_string(),
+            message: format!(
+                "{completed_section_lines} retained lines sit under completion-marked sections"
+            ),
+            value: completed_section_lines,
+            threshold: options.max_completed_lines,
+        });
+    }
+
+    if metrics.changelog_entry_count > options.max_changelog_entries {
+        issues.push(HealthIssue {
+            kind: "changelog-bloat".to_string(),
+            severity: "warning".to_string(),
+            message: format!(
+                "{} changelog-style entries exceed the configured threshold",
+                metrics.changelog_entry_count
+            ),
+            value: metrics.changelog_entry_count,
+            threshold: options.max_changelog_entries,
+        });
+    }
+
+    let status = if issues.iter().any(|issue| issue.severity == "error") {
+        "unhealthy"
+    } else if issues.iter().any(|issue| issue.severity == "warning") {
+        "warning"
+    } else {
+        "healthy"
+    };
+
+    HealthFileResult {
+        file: metrics.path.clone(),
+        status: status.to_string(),
+        issues,
+    }
+}
+
+pub(crate) fn cmd_health(
+    file: Option<&Path>,
+    all: bool,
+    index_dir: &Path,
+    options: &HealthOptions,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    if all == file.is_some() {
+        return Err("pass either a file path or --all".into());
+    }
+
+    let metrics_index = load_document_metrics(index_dir)?;
+    let total_files = metrics_index.files.len();
+    let mut files = Vec::new();
+
+    if let Some(file_path) = file {
+        let key =
+            resolve_health_target_key(file_path, index_dir, &metrics_index).ok_or_else(|| {
+                format!(
+                    "File not found in document metrics index: {}",
+                    file_path.display()
+                )
+            })?;
+        let metrics = metrics_index.files.get(&key).ok_or_else(|| {
+            format!(
+                "File not found in document metrics index: {}",
+                file_path.display()
+            )
+        })?;
+        files.push(evaluate_document_health(metrics, options));
+    } else {
+        let mut all_results: Vec<HealthFileResult> = metrics_index
+            .files
+            .values()
+            .map(|metrics| evaluate_document_health(metrics, options))
+            .filter(|result| !result.issues.is_empty())
+            .collect();
+        all_results.sort_by(|a, b| a.file.cmp(&b.file).then_with(|| a.status.cmp(&b.status)));
+        files = all_results;
+    }
+
+    let unhealthy_files = files
+        .iter()
+        .filter(|file| file.status == "unhealthy")
+        .count();
+    let warning_files = files.iter().filter(|file| file.status == "warning").count();
+    let result = HealthResult {
+        total_files,
+        unhealthy_files,
+        warning_files,
+        files,
+    };
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    if result.files.is_empty() {
+        println!("{}", "✓ No health issues detected.".green().bold());
+        return Ok(());
+    }
+
+    for file_result in &result.files {
+        let label = match file_result.status.as_str() {
+            "unhealthy" => "UNHEALTHY".red().bold(),
+            "warning" => "WARNING".yellow().bold(),
+            _ => "HEALTHY".green().bold(),
+        };
+        println!(
+            "{}: {} ({} issue{})",
+            file_result.file,
+            label,
+            file_result.issues.len(),
+            if file_result.issues.len() == 1 {
+                ""
+            } else {
+                "s"
+            }
+        );
+        for issue in &file_result.issues {
+            println!(
+                "  {:<20} {:<7} {} (value: {}, threshold: {})",
+                issue.kind,
+                issue.severity.to_uppercase(),
+                issue.message,
+                issue.value,
+                issue.threshold
+            );
+        }
+        println!();
+    }
+
+    Ok(())
+}
+
+pub(crate) fn is_placeholder_target(target: &str) -> bool {
+    let lower = target.to_ascii_lowercase();
+
+    matches!(lower.as_str(), "url" | "text" | "todo" | "link" | "tbd")
+        || lower.starts_with("/path/to/")
+        || lower.starts_with("../path/to/")
+        || lower.contains("replace-me")
+}
+
+pub(crate) fn is_code_extension(ext: &str) -> bool {
+    matches!(
+        ext,
+        "py" | "ts" | "tsx" | "json" | "yaml" | "yml" | "png" | "svg"
+    )
+}
+
+pub(crate) fn file_extension(path: &str) -> String {
+    std::path::Path::new(path)
+        .extension()
+        .and_then(|e| e.to_str())
+        .unwrap_or_default()
+        .to_lowercase()
+}
+
+pub(crate) fn record_link_kind(
+    by_file: &mut HashMap<String, HashMap<String, usize>>,
+    by_kind: &mut HashMap<String, usize>,
+    file: &str,
+    kind: &LinkKind,
+) {
+    let kind_name = match kind {
+        LinkKind::DocMissing => "doc_missing",
+        LinkKind::CodeMissing => "code_missing",
+        LinkKind::Placeholder => "placeholder",
+        LinkKind::CodeReference => "code_reference",
+        LinkKind::DirectoryReference => "directory_reference",
+        LinkKind::ExternalReference => "external_reference",
+        LinkKind::AnchorMissing => "anchor_missing",
+        LinkKind::AnchorUnverified => "anchor_unverified",
+    }
+    .to_string();
+
+    by_kind
+        .entry(kind_name.clone())
+        .and_modify(|c| *c += 1)
+        .or_insert(1);
+
+    let entry = by_file.entry(file.to_string()).or_default();
+    entry.entry(kind_name).and_modify(|c| *c += 1).or_insert(1);
+}
+
+/// Find all files that link to a specific file
+pub(crate) fn cmd_backlinks(
+    target_file: &str,
+    index_dir: &Path,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Load the forward index
+    let forward_index = load_forward_index(index_dir)?;
+
+    // Normalize the target file path for comparison
+    let normalized_target = normalize_path(Path::new(target_file));
+
+    if !json {
+        println!(
+            "{} {}",
+            "Finding backlinks for".cyan().bold(),
+            normalized_target.white().bold()
+        );
+        println!();
+    }
+
+    let mut backlinks = Vec::new();
+
+    // Iterate through all files and check if they link to the target
+    for (source_path, entry) in &forward_index.files {
+        for link in &entry.links {
+            let target = &link.target;
+
+            // Skip external links
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            // Parse link to separate file path and anchor
+            let (link_path, anchor) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            // Resolve relative path from source file
+            let resolved_path = if link_path.is_empty() {
+                // Just an anchor in the current file
+                source_path.clone()
+            } else if let Some(stripped) = link_path.strip_prefix('/') {
+                // Absolute path - strip leading / and use as-is
+                stripped.to_string()
+            } else {
+                // Relative path
+                let source_file_path = Path::new(source_path);
+                if let Some(parent) = source_file_path.parent() {
+                    parent.join(&link_path).to_string_lossy().to_string()
+                } else {
+                    link_path.clone()
+                }
+            };
+
+            // Normalize the resolved path
+            let normalized_link = normalize_path(Path::new(&resolved_path));
+
+            // Check if this link points to our target file
+            if normalized_link == normalized_target {
+                backlinks.push(Backlink {
+                    source_file: source_path.clone(),
+                    link_text: link.text.clone(),
+                    link_target: target.clone(),
+                    anchor,
+                });
+            }
+        }
+    }
+
+    // Sort backlinks by source file for consistent output
+    backlinks.sort_by(|a, b| a.source_file.cmp(&b.source_file));
+
+    let result = BacklinksResult {
+        target_file: normalized_target.clone(),
+        total_backlinks: backlinks.len(),
+        backlinks: backlinks.clone(),
+    };
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else {
+        println!("{}", "Backlinks Found".cyan().bold());
+        println!("{}", "=".repeat(60));
+        println!();
+        println!("Total backlinks: {}", backlinks.len());
+        println!();
+
+        if backlinks.is_empty() {
+            println!(
+                "{}",
+                "No backlinks found. This file is not referenced by any other file.".yellow()
+            );
+            println!();
+            println!("{}", "This may indicate:".yellow());
+            println!("  - An orphaned document (consider reviewing for deletion)");
+            println!("  - A new document that needs linking");
+            println!("  - An entry point document (like README.md)");
+        } else {
+            for (idx, backlink) in backlinks.iter().enumerate() {
+                println!("[{}] {}", idx + 1, backlink.source_file.white().bold());
+                println!(
+                    "    Link: [{}]({})",
+                    backlink.link_text, backlink.link_target
+                );
+                if let Some(anchor) = &backlink.anchor {
+                    println!("    Anchor: #{anchor}");
+                }
+                println!();
+            }
+
+            println!("{}", "Safe to delete?".yellow().bold());
+            println!(
+                "  {} These {} file(s) link to this document.",
+                "⚠".yellow(),
+                backlinks.len()
+            );
+            println!("  Review and update references before deletion.");
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/commands_links.rs b/src/commands_links.rs
new file mode 100644
index 0000000..b6a43db
--- /dev/null
+++ b/src/commands_links.rs
@@ -0,0 +1,1377 @@
+use crate::commands_graph::*;
+use colored::Colorize;
+use globset::Glob;
+use regex::Regex;
+use serde::Serialize;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) fn run_link_check(
+    index_dir: &Path,
+    root: Option<&Path>,
+    include_summary: bool,
+    summary_only: bool,
+    external_paths: &[String],
+) -> Result<LinkCheckResult, Box<dyn std::error::Error>> {
+    // Load the forward index
+    let forward_index = load_forward_index(index_dir)?;
+
+    // Determine root directory for resolving relative paths
+    let root_dir = if let Some(r) = root {
+        r.to_path_buf()
+    } else if let Some(source_root) = forward_index_source_root(&forward_index) {
+        source_root
+    } else {
+        // Extract root from index by finding common prefix of all paths
+        if let Some((first_path, _)) = forward_index.files.iter().next() {
+            let first_path = Path::new(first_path);
+            if let Some(parent) = first_path.parent() {
+                // Walk up to find the common root
+                let mut candidate = parent.to_path_buf();
+                while candidate.parent().is_some() {
+                    let parent_path = candidate.parent().unwrap();
+                    // Check if this is the common root by checking if it contains "docs"
+                    if candidate.file_name().and_then(|s| s.to_str()) == Some("docs") {
+                        break;
+                    }
+                    candidate = parent_path.to_path_buf();
+                }
+                candidate.parent().unwrap_or(Path::new(".")).to_path_buf()
+            } else {
+                Path::new(".").to_path_buf()
+            }
+        } else {
+            Path::new(".").to_path_buf()
+        }
+    };
+
+    // Build file set for fast lookup (keys of the HashMap)
+    let file_set: HashSet<String> = forward_index.files.keys().cloned().collect();
+
+    // Build heading index for anchor validation
+    let mut heading_index: HashMap<String, HashSet<String>> = HashMap::new();
+    for (path, entry) in &forward_index.files {
+        let mut anchors = HashSet::new();
+        for heading in &entry.headings {
+            // Convert heading text to anchor format (lowercase, replace spaces with hyphens)
+            let anchor = heading.text.to_lowercase().replace(' ', "-");
+            anchors.insert(anchor);
+        }
+        heading_index.insert(path.clone(), anchors);
+    }
+
+    let mut broken_links = Vec::new();
+    let mut total_links = 0;
+
+    // Cache file lines for context snippets
+    let mut file_lines_cache: HashMap<String, Vec<String>> = HashMap::new();
+
+    // Summary accumulators
+    let mut counts_by_file: HashMap<String, HashMap<String, usize>> = HashMap::new();
+    let mut counts_by_kind: HashMap<String, usize> = HashMap::new();
+
+    // Iterate through all files and check their links
+    for (file_path, entry) in &forward_index.files {
+        for link in &entry.links {
+            total_links += 1;
+
+            let target = &link.target;
+
+            // Skip external links (http://, https://, mailto:, etc.)
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            // Parse link to separate file path and anchor
+            let (link_path, anchor) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            let line_number = link.line;
+
+            // Resolve relative path
+            let resolved_path = if link_path.is_empty() {
+                // Just an anchor in the current file
+                file_path.clone()
+            } else if let Some(stripped) = link_path.strip_prefix('/') {
+                // Absolute path from root
+                root_dir.join(stripped).to_string_lossy().to_string()
+            } else {
+                // Relative path
+                let source_path = Path::new(file_path);
+                if let Some(parent) = source_path.parent() {
+                    parent.join(&link_path).to_string_lossy().to_string()
+                } else {
+                    link_path.clone()
+                }
+            };
+
+            // Normalize path (remove ./ and resolve ../)
+            let normalized_path = normalize_path(Path::new(&resolved_path));
+
+            // Placeholder targets: treat as lower-severity broken links
+            if !link_path.is_empty() && is_placeholder_target(&link_path) {
+                let context = get_link_context(&mut file_lines_cache, file_path, line_number)?;
+                let kind = LinkKind::Placeholder;
+                record_link_kind(&mut counts_by_file, &mut counts_by_kind, file_path, &kind);
+                broken_links.push(BrokenLink {
+                    source_file: file_path.clone(),
+                    line_number,
+                    link_text: link.text.clone(),
+                    link_target: target.clone(),
+                    error: format!("Placeholder link target: {link_path}"),
+                    anchor: anchor.clone(),
+                    context,
+                });
+                continue;
+            }
+
+            // File-level checks only when there is an explicit path component
+            if !link_path.is_empty() {
+                let meta = fs::metadata(&normalized_path).ok();
+                let exists = meta.is_some();
+                let is_dir = meta.as_ref().is_some_and(std::fs::Metadata::is_dir);
+
+                if exists && is_dir {
+                    // Valid directory reference
+                    record_link_kind(
+                        &mut counts_by_file,
+                        &mut counts_by_kind,
+                        file_path,
+                        &LinkKind::DirectoryReference,
+                    );
+                } else if exists {
+                    // File exists on disk but may not be indexed (e.g., code)
+                    if !file_set.contains(&normalized_path) {
+                        let ext = file_extension(&normalized_path);
+                        let kind = if is_code_extension(&ext) {
+                            LinkKind::CodeReference
+                        } else {
+                            LinkKind::ExternalReference
+                        };
+                        record_link_kind(
+                            &mut counts_by_file,
+                            &mut counts_by_kind,
+                            file_path,
+                            &kind,
+                        );
+                    }
+                } else {
+                    // File not found locally - check external repos
+                    let mut found_in_external = false;
+                    for ext_path in external_paths {
+                        // Check if the link might be pointing to an external repo
+                        // by seeing if the normalized path contains the external path pattern
+                        if normalized_path.contains(ext_path) {
+                            // The link references an external repo path, try to resolve it
+                            if Path::new(&normalized_path).exists() {
+                                found_in_external = true;
+                                record_link_kind(
+                                    &mut counts_by_file,
+                                    &mut counts_by_kind,
+                                    file_path,
+                                    &LinkKind::ExternalReference,
+                                );
+                                break;
+                            }
+                        }
+                        // Also check if it's a relative path that would resolve to external repo
+                        let resolved_ext = Path::new(ext_path)
+                            .join(Path::new(&link_path).file_name().unwrap_or_default());
+                        if resolved_ext.exists() {
+                            found_in_external = true;
+                            record_link_kind(
+                                &mut counts_by_file,
+                                &mut counts_by_kind,
+                                file_path,
+                                &LinkKind::ExternalReference,
+                            );
+                            break;
+                        }
+                    }
+
+                    if found_in_external {
+                        continue;
+                    }
+
+                    // Missing target file: classify as doc_missing or code_missing
+                    let ext = file_extension(&normalized_path);
+                    let kind = if is_code_extension(&ext) {
+                        LinkKind::CodeMissing
+                    } else {
+                        LinkKind::DocMissing
+                    };
+                    let context = get_link_context(&mut file_lines_cache, file_path, line_number)?;
+                    record_link_kind(&mut counts_by_file, &mut counts_by_kind, file_path, &kind);
+                    broken_links.push(BrokenLink {
+                        source_file: file_path.clone(),
+                        line_number,
+                        link_text: link.text.clone(),
+                        link_target: target.clone(),
+                        error: format!("Target file not found: {normalized_path}"),
+                        anchor: anchor.clone(),
+                        context,
+                    });
+                    continue;
+                }
+            }
+
+            // Check anchor if present
+            if let Some(ref anchor_text) = anchor {
+                let target_file = if link_path.is_empty() {
+                    file_path
+                } else {
+                    &normalized_path
+                };
+
+                if let Some(anchors) = heading_index.get(target_file) {
+                    if !anchors.contains(anchor_text as &str) {
+                        let context =
+                            get_link_context(&mut file_lines_cache, file_path, line_number)?;
+                        let kind = LinkKind::AnchorMissing;
+                        record_link_kind(
+                            &mut counts_by_file,
+                            &mut counts_by_kind,
+                            file_path,
+                            &kind,
+                        );
+                        broken_links.push(BrokenLink {
+                            source_file: file_path.clone(),
+                            line_number,
+                            link_text: link.text.clone(),
+                            link_target: target.clone(),
+                            error: format!("Anchor not found: #{anchor_text}"),
+                            anchor: Some(anchor_text.clone()),
+                            context,
+                        });
+                    }
+                } else {
+                    let context = get_link_context(&mut file_lines_cache, file_path, line_number)?;
+                    let kind = LinkKind::AnchorUnverified;
+                    record_link_kind(&mut counts_by_file, &mut counts_by_kind, file_path, &kind);
+                    broken_links.push(BrokenLink {
+                        source_file: file_path.clone(),
+                        line_number,
+                        link_text: link.text.clone(),
+                        link_target: target.clone(),
+                        error: format!(
+                            "Could not verify anchor (file has no headings): #{anchor_text}"
+                        ),
+                        anchor: Some(anchor_text.clone()),
+                        context,
+                    });
+                }
+            }
+        }
+    }
+
+    let valid_links = total_links - broken_links.len();
+
+    let mut result = LinkCheckResult {
+        total_links,
+        valid_links,
+        broken_links: broken_links.len(),
+        broken: broken_links.clone(),
+        summary: None,
+    };
+
+    // Build summary if requested
+    if include_summary || summary_only {
+        let mut by_file_vec: Vec<LinkSummaryByFile> = counts_by_file
+            .into_iter()
+            .map(|(file, counts)| LinkSummaryByFile { file, counts })
+            .collect();
+        by_file_vec.sort_by(|a, b| a.file.cmp(&b.file));
+
+        let mut by_kind_vec: Vec<LinkSummaryByKind> = counts_by_kind
+            .into_iter()
+            .map(|(kind, count)| LinkSummaryByKind { kind, count })
+            .collect();
+        by_kind_vec.sort_by(|a, b| a.kind.cmp(&b.kind));
+
+        result.summary = Some(LinkCheckSummary {
+            by_file: by_file_vec,
+            by_kind: by_kind_vec,
+        });
+    }
+
+    Ok(result)
+}
+
+/// User-facing link check command that prints results.
+pub(crate) fn cmd_check_links(
+    index_dir: &Path,
+    json: bool,
+    root: Option<&Path>,
+    summary_flag: bool,
+    summary_only: bool,
+    external_paths: &[String],
+) -> Result<(), Box<dyn std::error::Error>> {
+    let include_summary = summary_flag || summary_only || !json;
+    let result = run_link_check(
+        index_dir,
+        root,
+        include_summary,
+        summary_only,
+        external_paths,
+    )?;
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    // Recompute root directory for display purposes only
+    let forward_index = load_forward_index(index_dir)?;
+    let display_root = if let Some(r) = root {
+        r.to_path_buf()
+    } else if let Some((first_path, _)) = forward_index.files.iter().next() {
+        let first_path = Path::new(first_path);
+        first_path.parent().unwrap_or(Path::new(".")).to_path_buf()
+    } else {
+        Path::new(".").to_path_buf()
+    };
+
+    println!(
+        "{} {}",
+        "Checking links in".cyan().bold(),
+        display_root.display()
+    );
+    println!();
+
+    println!("{}", "Link Check Results".cyan().bold());
+    println!("{}", "=".repeat(60));
+    println!();
+    println!("Total links:  {}", result.total_links);
+    println!(
+        "Valid links:  {} {}",
+        result.valid_links,
+        "✓".green().bold()
+    );
+    println!(
+        "Broken links: {} {}",
+        result.broken_links,
+        if result.broken_links == 0 {
+            "✓".green().bold().to_string()
+        } else {
+            "✗".red().bold().to_string()
+        }
+    );
+    println!();
+
+    if let Some(summary) = &result.summary {
+        println!("{}", "Summary by kind:".cyan().bold());
+        for item in &summary.by_kind {
+            println!("  - {:<18} {}", item.kind, item.count);
+        }
+        println!();
+    }
+
+    if !summary_only && !result.broken.is_empty() {
+        println!("{}", "Broken Links:".red().bold());
+        println!();
+
+        for (idx, link) in result.broken.iter().enumerate() {
+            println!("[{}] {}", idx + 1, link.source_file.white().bold());
+            println!("    Link: [{}]({})", link.link_text, link.link_target);
+            if link.line_number > 0 {
+                println!("    Line: {}", link.line_number);
+            }
+            if let Some(ref ctx) = link.context {
+                println!("    Context: {ctx}");
+            }
+            println!("    Error: {}", link.error.red());
+            println!();
+        }
+    }
+
+    Ok(())
+}
+
+/// Load a single-line context snippet for a link location.
+pub(crate) fn get_link_context(
+    cache: &mut HashMap<String, Vec<String>>,
+    file_path: &str,
+    line_number: usize,
+) -> Result<Option<String>, Box<dyn std::error::Error>> {
+    if line_number == 0 {
+        return Ok(None);
+    }
+
+    // Load and cache file lines if needed
+    if !cache.contains_key(file_path) {
+        let content = fs::read_to_string(file_path)?;
+        let lines: Vec<String> = content
+            .lines()
+            .map(std::string::ToString::to_string)
+            .collect();
+        cache.insert(file_path.to_string(), lines);
+    }
+
+    let lines = cache.get(file_path).unwrap();
+    if line_number == 0 || line_number > lines.len() {
+        return Ok(None);
+    }
+
+    let mut line = lines[line_number - 1].clone();
+    if line.len() > 160 {
+        line.truncate(157);
+        line.push_str("...");
+    }
+
+    Ok(Some(line))
+}
+
+pub(crate) fn load_policy_config(path: &Path) -> Result<PolicyConfig, Box<dyn std::error::Error>> {
+    let content = fs::read_to_string(path)?;
+    let cfg: PolicyConfig = serde_yaml::from_str(&content)?;
+    Ok(cfg)
+}
+
+pub(crate) fn rule_severity(rule: &PolicyRule) -> String {
+    rule.severity.as_deref().unwrap_or("error").to_string()
+}
+
+pub(crate) fn rule_name(rule: &PolicyRule) -> String {
+    rule.name.clone().unwrap_or_else(|| rule.pattern.clone())
+}
+
+#[derive(Debug)]
+pub(crate) struct PolicySection {
+    heading: String,
+    line_start: usize,
+    line_end: usize,
+}
+
+pub(crate) fn parse_policy_sections(content: &str) -> Vec<PolicySection> {
+    let lines: Vec<&str> = content.lines().collect();
+    if lines.is_empty() {
+        return Vec::new();
+    }
+
+    let heading_re = Regex::new(r"^(#{1,6})\s+(.+)$").unwrap();
+    let mut sections: Vec<PolicySection> = Vec::new();
+    let mut current: Option<PolicySection> = None;
+
+    for (idx, line) in lines.iter().enumerate() {
+        if let Some(caps) = heading_re.captures(line) {
+            let heading = caps
+                .get(2)
+                .map_or_else(|| "Untitled".to_string(), |m| m.as_str().trim().to_string());
+
+            if let Some(mut prev) = current.take() {
+                if idx > 0 {
+                    prev.line_end = idx;
+                }
+                sections.push(prev);
+            }
+
+            current = Some(PolicySection {
+                heading,
+                line_start: idx + 1,
+                line_end: lines.len(),
+            });
+        }
+    }
+
+    if let Some(mut last) = current {
+        last.line_end = lines.len();
+        sections.push(last);
+    }
+
+    if sections.is_empty() {
+        sections.push(PolicySection {
+            heading: "Full Document".to_string(),
+            line_start: 1,
+            line_end: lines.len(),
+        });
+    }
+
+    sections
+}
+
+#[derive(Debug)]
+pub(crate) struct LinkTarget {
+    path: String,
+    anchor: Option<String>,
+}
+
+pub(crate) fn extract_markdown_link_targets(file_path: &str, content: &str) -> Vec<LinkTarget> {
+    let mut targets = Vec::new();
+    let link_regex = Regex::new(r"(!?)\[(?P<label>[^\]]+)\]\((?P<target>[^)]+)\)").unwrap();
+
+    let origin_dir = Path::new(file_path)
+        .parent()
+        .unwrap_or_else(|| Path::new("."));
+
+    for caps in link_regex.captures_iter(content) {
+        if caps.get(1).is_some_and(|m| m.as_str() == "!") {
+            continue;
+        }
+
+        let target_str = match caps.name("target") {
+            Some(t) => t.as_str(),
+            None => continue,
+        };
+
+        if target_str.starts_with("http://")
+            || target_str.starts_with("https://")
+            || target_str.starts_with("mailto:")
+            || target_str.starts_with("ftp://")
+        {
+            continue;
+        }
+
+        let (path_part, anchor) = if let Some(hash_pos) = target_str.find('#') {
+            (
+                &target_str[..hash_pos],
+                Some(target_str[hash_pos + 1..].to_string()),
+            )
+        } else {
+            (target_str, None)
+        };
+
+        if path_part.is_empty() {
+            continue;
+        }
+
+        let lc = path_part.to_ascii_lowercase();
+        if !lc.ends_with(".md") && !lc.ends_with(".txt") && !lc.ends_with(".rst") {
+            continue;
+        }
+
+        let target_path = if let Some(stripped) = path_part.strip_prefix('/') {
+            PathBuf::from(stripped)
+        } else {
+            origin_dir.join(path_part)
+        };
+
+        let normalized = normalize_path(&target_path);
+        targets.push(LinkTarget {
+            path: normalized,
+            anchor,
+        });
+    }
+
+    targets
+}
+
+pub(crate) fn normalize_required_link(file_path: &str, required: &str) -> (String, Option<String>) {
+    let (path_part, anchor) = if let Some(hash_pos) = required.find('#') {
+        (
+            &required[..hash_pos],
+            Some(required[hash_pos + 1..].to_string()),
+        )
+    } else {
+        (required, None)
+    };
+
+    if path_part.starts_with("http://")
+        || path_part.starts_with("https://")
+        || path_part.starts_with("mailto:")
+        || path_part.starts_with("ftp://")
+    {
+        return (required.to_string(), anchor);
+    }
+
+    let path_part = path_part.trim_start_matches("./");
+    let resolved = if path_part.is_empty() {
+        PathBuf::from(file_path)
+    } else if path_part.starts_with("../") {
+        let origin_dir = Path::new(file_path)
+            .parent()
+            .unwrap_or_else(|| Path::new("."));
+        origin_dir.join(path_part)
+    } else if path_part.starts_with('/') || path_part.contains('/') {
+        PathBuf::from(path_part.trim_start_matches('/'))
+    } else {
+        let origin_dir = Path::new(file_path)
+            .parent()
+            .unwrap_or_else(|| Path::new("."));
+        origin_dir.join(path_part)
+    };
+
+    (normalize_path(&resolved), anchor)
+}
+
+pub(crate) fn collect_policy_violations_for_content(
+    rule: &PolicyRule,
+    file_path: &str,
+    content: &str,
+) -> Vec<PolicyViolation> {
+    let mut violations = Vec::new();
+
+    // Required substrings
+    for needle in &rule.must_contain {
+        if !content.contains(needle) {
+            violations.push(PolicyViolation {
+                file: file_path.to_string(),
+                rule: rule_name(rule),
+                message: format!("Missing required content: {needle:?}"),
+                severity: rule_severity(rule),
+                kind: "policy_violation".to_string(),
+            });
+        }
+    }
+
+    // Forbidden substrings
+    for needle in &rule.must_not_contain {
+        if content.contains(needle) {
+            violations.push(PolicyViolation {
+                file: file_path.to_string(),
+                rule: rule_name(rule),
+                message: format!("Forbidden content present: {needle:?}"),
+                severity: rule_severity(rule),
+                kind: "policy_violation".to_string(),
+            });
+        }
+    }
+
+    // Length-based checks (line count)
+    let line_count = content.lines().count();
+    if let Some(min_len) = rule.min_length {
+        if line_count < min_len {
+            violations.push(PolicyViolation {
+                file: file_path.to_string(),
+                rule: rule_name(rule),
+                message: format!(
+                    "Document too short: {line_count} lines (min required: {min_len})"
+                ),
+                severity: rule_severity(rule),
+                kind: "policy_violation".to_string(),
+            });
+        }
+    }
+    if let Some(max_len) = rule.max_length {
+        if line_count > max_len {
+            violations.push(PolicyViolation {
+                file: file_path.to_string(),
+                rule: rule_name(rule),
+                message: format!("Document too long: {line_count} lines (max allowed: {max_len})"),
+                severity: rule_severity(rule),
+                kind: "policy_violation".to_string(),
+            });
+        }
+    }
+
+    // Heading-based checks
+    if !rule.required_headings.is_empty() || !rule.forbidden_headings.is_empty() {
+        let heading_re = Regex::new(r"^(#{1,6})\s+(.+)$").unwrap();
+        let mut headings: Vec<String> = Vec::new();
+
+        for line in content.lines() {
+            if let Some(caps) = heading_re.captures(line) {
+                if let Some(text_match) = caps.get(2) {
+                    let text = text_match.as_str().trim().to_string();
+                    headings.push(text);
+                }
+            }
+        }
+
+        // Required headings (by text)
+        for h in &rule.required_headings {
+            if !headings.iter().any(|t| t == h) {
+                violations.push(PolicyViolation {
+                    file: file_path.to_string(),
+                    rule: rule_name(rule),
+                    message: format!("Missing required heading: {h:?}"),
+                    severity: rule_severity(rule),
+                    kind: "policy_violation".to_string(),
+                });
+            }
+        }
+
+        // Forbidden headings (by text)
+        for h in &rule.forbidden_headings {
+            if headings.iter().any(|t| t == h) {
+                violations.push(PolicyViolation {
+                    file: file_path.to_string(),
+                    rule: rule_name(rule),
+                    message: format!("Forbidden heading present: {h:?}"),
+                    severity: rule_severity(rule),
+                    kind: "policy_violation".to_string(),
+                });
+            }
+        }
+    }
+
+    // Section length checks (line count)
+    if let Some(max_section_len) = rule.max_section_length {
+        let heading_filter = match rule.section_heading_regex.as_deref() {
+            Some(pattern) => match Regex::new(pattern) {
+                Ok(re) => Some(re),
+                Err(_) => {
+                    violations.push(PolicyViolation {
+                        file: file_path.to_string(),
+                        rule: rule_name(rule),
+                        message: format!("Invalid section heading regex: {pattern:?}"),
+                        severity: rule_severity(rule),
+                        kind: "policy_violation".to_string(),
+                    });
+                    return violations;
+                }
+            },
+            None => None,
+        };
+
+        for section in parse_policy_sections(content) {
+            if let Some(ref re) = heading_filter {
+                if !re.is_match(&section.heading) {
+                    continue;
+                }
+            }
+
+            let section_len = if section.line_end >= section.line_start {
+                section.line_end - section.line_start + 1
+            } else {
+                0
+            };
+
+            if section_len > max_section_len {
+                violations.push(PolicyViolation {
+                    file: file_path.to_string(),
+                    rule: rule_name(rule),
+                    message: format!(
+                        "Section too long: {:?} is {} lines (max allowed: {})",
+                        section.heading, section_len, max_section_len
+                    ),
+                    severity: rule_severity(rule),
+                    kind: "policy_violation".to_string(),
+                });
+            }
+        }
+    }
+
+    // Required link checks
+    if !rule.must_link_to.is_empty() {
+        let targets = extract_markdown_link_targets(file_path, content);
+        let mut target_paths: HashSet<String> = HashSet::new();
+        let mut target_keys: HashSet<String> = HashSet::new();
+
+        for target in targets {
+            target_paths.insert(target.path.clone());
+            let key = match target.anchor {
+                Some(anchor) => format!("{}#{}", target.path, anchor),
+                None => target.path.clone(),
+            };
+            target_keys.insert(key);
+        }
+
+        for required in &rule.must_link_to {
+            let (req_path, req_anchor) = normalize_required_link(file_path, required);
+            let satisfied = if let Some(anchor) = req_anchor {
+                target_keys.contains(&format!("{req_path}#{anchor}"))
+            } else {
+                target_paths.contains(&req_path)
+            };
+
+            if !satisfied {
+                violations.push(PolicyViolation {
+                    file: file_path.to_string(),
+                    rule: rule_name(rule),
+                    message: format!("Missing required link: {required:?}"),
+                    severity: rule_severity(rule),
+                    kind: "policy_violation".to_string(),
+                });
+            }
+        }
+    }
+
+    violations
+}
+
+pub(crate) fn run_policy_check(
+    index_dir: &Path,
+    policy_path: &Path,
+) -> Result<PolicyCheckResult, Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let policy = load_policy_config(policy_path)?;
+
+    let mut violations = Vec::new();
+
+    for rule in &policy.rules {
+        let glob = Glob::new(&rule.pattern)?;
+        let matcher = glob.compile_matcher();
+
+        for file_path in forward_index.files.keys() {
+            if !matcher.is_match(file_path.as_str()) {
+                continue;
+            }
+
+            let content = fs::read_to_string(file_path.as_str())?;
+            let mut rule_violations =
+                collect_policy_violations_for_content(rule, file_path, &content);
+            violations.append(&mut rule_violations);
+        }
+    }
+
+    Ok(PolicyCheckResult {
+        policy_file: policy_path.to_string_lossy().to_string(),
+        total_violations: violations.len(),
+        violations,
+    })
+}
+
+pub(crate) fn cmd_policy(
+    config_path: &Path,
+    index_dir: &Path,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    if !config_path.exists() {
+        return Err(format!("Policy file not found: {}", config_path.display()).into());
+    }
+
+    let result = run_policy_check(index_dir, config_path)?;
+
+    if json {
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    if result.violations.is_empty() {
+        println!(
+            "{} No policy violations found ({}).",
+            "✓".green().bold(),
+            result.policy_file
+        );
+        return Ok(());
+    }
+
+    println!(
+        "{} Policy violations found using {}",
+        "✗".red().bold(),
+        result.policy_file
+    );
+    println!("{}", "=".repeat(60));
+    println!();
+
+    for v in &result.violations {
+        println!("{}", v.file.white().bold());
+        println!("  Rule: {}", v.rule);
+        println!("  Severity: {}", v.severity);
+        println!("  Kind: {}", v.kind);
+        println!("  Message: {}", v.message);
+        println!();
+    }
+
+    println!("Total violations: {}", result.total_violations);
+
+    Ok(())
+}
+
+/// Suggest a new link target based on available files in the index.
+/// Very conservative: only rewrites when there is exactly one file with
+/// the same filename as the link target and that file lives under the
+/// same parent directory as the source file.
+/// Find all candidate files that match the broken link's filename
+pub(crate) fn find_link_candidates(
+    source_file: &str,
+    link_path: &str,
+    available_files: &HashSet<String>,
+) -> Vec<String> {
+    if link_path.is_empty() {
+        return vec![];
+    }
+
+    let Some(link_filename) = Path::new(link_path).file_name().and_then(|s| s.to_str()) else {
+        return vec![];
+    };
+
+    let source_path = Path::new(source_file);
+    let source_parent = source_path.parent().unwrap_or(Path::new("."));
+
+    // Find all candidates whose filename matches
+    let mut candidates: Vec<String> = available_files
+        .iter()
+        .filter(|p| {
+            Path::new(p)
+                .file_name()
+                .and_then(|s| s.to_str())
+                .is_some_and(|name| name == link_filename)
+        })
+        .map(|candidate| {
+            // Try to create a relative path from source to candidate
+            let candidate_path = Path::new(candidate);
+            if let Ok(stripped) = candidate_path.strip_prefix(source_parent) {
+                let rel = stripped.to_string_lossy().to_string();
+                if !rel.is_empty() {
+                    return rel;
+                }
+            }
+            // Fall back to returning the full path
+            candidate.clone()
+        })
+        .collect();
+
+    candidates.sort();
+    candidates
+}
+
+#[allow(dead_code)] // Utility for future interactive fix mode
+pub(crate) fn suggest_new_link_target(
+    source_file: &str,
+    link_path: &str,
+    available_files: &HashSet<String>,
+) -> Option<String> {
+    let candidates = find_link_candidates(source_file, link_path, available_files);
+    if candidates.len() == 1 {
+        Some(candidates.into_iter().next().unwrap())
+    } else {
+        None
+    }
+}
+
+pub(crate) fn cmd_fix_links(
+    index_dir: &Path,
+    dry_run: bool,
+    apply: bool,
+    propose: Option<PathBuf>,
+    apply_decisions: Option<PathBuf>,
+    json: bool,
+    use_git_history: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Handle apply-decisions mode: read and apply a proposal file
+    if let Some(decisions_path) = apply_decisions {
+        return apply_link_decisions(&decisions_path, dry_run, json);
+    }
+
+    // Validate mode flags for regular operation
+    let propose_mode = propose.is_some();
+    if !propose_mode && !dry_run && !apply {
+        return Err("Specify --dry-run, --apply, or --propose <file>".into());
+    }
+
+    let forward_index = load_forward_index(index_dir)?;
+    let available_files: HashSet<String> = forward_index.files.keys().cloned().collect();
+
+    // Load git rename history if requested and available
+    let rename_history: Option<RenameHistory> = if use_git_history {
+        let rename_path = index_dir.join("rename_history.json");
+        if rename_path.exists() {
+            let content = fs::read_to_string(&rename_path)?;
+            Some(serde_json::from_str(&content)?)
+        } else {
+            eprintln!(
+                "Warning: --use-git-history requested but no rename_history.json found. \
+                 Run 'yore build --track-renames' first."
+            );
+            None
+        }
+    } else {
+        None
+    };
+
+    let mut fixes: Vec<LinkFix> = Vec::new();
+    let mut proposals: Vec<LinkFixProposal> = Vec::new();
+
+    for (file_path, entry) in &forward_index.files {
+        for link in &entry.links {
+            let target = &link.target;
+
+            // Skip external links
+            if target.starts_with("http://")
+                || target.starts_with("https://")
+                || target.starts_with("mailto:")
+                || target.starts_with("ftp://")
+            {
+                continue;
+            }
+
+            // Split off anchor
+            let (link_path, anchor) = if let Some(idx) = target.find('#') {
+                (
+                    target[..idx].to_string(),
+                    Some(target[idx + 1..].to_string()),
+                )
+            } else {
+                (target.clone(), None)
+            };
+
+            // Check if link resolves
+            let source_path = Path::new(file_path);
+            let resolved = if link_path.is_empty() {
+                file_path.clone()
+            } else if let Some(parent) = source_path.parent() {
+                parent.join(&link_path).to_string_lossy().to_string()
+            } else {
+                link_path.clone()
+            };
+
+            let normalized = normalize_path(Path::new(&resolved));
+            if available_files.contains(&normalized) {
+                continue;
+            }
+
+            // Find candidates using index-based matching
+            let mut candidates = find_link_candidates(file_path, &link_path, &available_files);
+
+            // If no candidates found and git history is available, check for renames
+            if candidates.is_empty() {
+                if let Some(ref history) = rename_history {
+                    // Try to resolve the old path to its current location
+                    if let Some(new_path) = resolve_renamed_path(&normalized, history) {
+                        // Check if the new path exists in available files
+                        if available_files.contains(&new_path) {
+                            // Convert to relative path from source
+                            if let Some(rel) =
+                                compute_relative_path(file_path, &new_path, &available_files)
+                            {
+                                candidates.push(rel);
+                            } else {
+                                candidates.push(new_path);
+                            }
+                        }
+                    }
+                }
+            }
+
+            if candidates.is_empty() {
+                continue;
+            }
+
+            if candidates.len() == 1 {
+                // Unambiguous fix
+                let mut new_target = candidates[0].clone();
+                if let Some(ref a) = anchor {
+                    new_target.push('#');
+                    new_target.push_str(a);
+                }
+                if new_target != *target {
+                    fixes.push(LinkFix {
+                        file: file_path.clone(),
+                        old_target: target.clone(),
+                        new_target,
+                    });
+                }
+            } else if propose_mode {
+                // Multiple candidates - add to proposals
+                proposals.push(LinkFixProposal {
+                    source: file_path.clone(),
+                    line: link.line,
+                    broken_target: target.clone(),
+                    candidates,
+                    decision: None,
+                });
+            }
+        }
+    }
+
+    // Handle propose mode: write proposals to file
+    if let Some(propose_path) = propose {
+        let proposal_file = LinkFixProposalFile {
+            version: 1,
+            proposals,
+        };
+        let yaml = serde_yaml::to_string(&proposal_file)?;
+        fs::write(&propose_path, &yaml)?;
+
+        if json {
+            #[derive(Serialize)]
+            struct ProposeResult {
+                proposal_file: String,
+                unambiguous_fixes: usize,
+                ambiguous_proposals: usize,
+            }
+            let result = ProposeResult {
+                proposal_file: propose_path.to_string_lossy().to_string(),
+                unambiguous_fixes: fixes.len(),
+                ambiguous_proposals: proposal_file.proposals.len(),
+            };
+            println!("{}", serde_json::to_string_pretty(&result)?);
+        } else {
+            println!(
+                "{} Wrote {} ambiguous proposals to {}",
+                "Propose:".cyan().bold(),
+                proposal_file.proposals.len(),
+                propose_path.display()
+            );
+            println!(
+                "{} {} unambiguous fixes available (use --apply to apply)",
+                "Info:".yellow(),
+                fixes.len()
+            );
+        }
+        return Ok(());
+    }
+
+    // Regular fix mode (dry-run or apply)
+    if fixes.is_empty() {
+        if json {
+            println!(r#"{{"fixes": [], "applied": false}}"#);
+        } else {
+            println!("{}", "No safe link fixes found.".green().bold());
+        }
+        return Ok(());
+    }
+
+    // Group fixes by file
+    let mut fixes_by_file: HashMap<String, Vec<LinkFix>> = HashMap::new();
+    for fix in &fixes {
+        fixes_by_file
+            .entry(fix.file.clone())
+            .or_default()
+            .push(fix.clone());
+    }
+
+    if json {
+        let result = serde_json::json!({
+            "fixes": fixes.iter().map(|f| {
+                serde_json::json!({
+                    "file": f.file,
+                    "old_target": f.old_target,
+                    "new_target": f.new_target
+                })
+            }).collect::<Vec<_>>(),
+            "applied": apply
+        });
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else {
+        println!(
+            "{} Proposed link fixes in {} file(s):",
+            if dry_run { "Previewing" } else { "Applying" },
+            fixes_by_file.len()
+        );
+        for (file, file_fixes) in &fixes_by_file {
+            println!("{}", file.white().bold());
+            for f in file_fixes {
+                println!("  {} -> {}", f.old_target.red(), f.new_target.green());
+            }
+        }
+    }
+
+    if apply {
+        for (file, file_fixes) in &fixes_by_file {
+            let content = fs::read_to_string(file)?;
+            let mut new_content = content.clone();
+            for f in file_fixes {
+                let old = format!("]({})", f.old_target);
+                let new = format!("]({})", f.new_target);
+                new_content = new_content.replace(&old, &new);
+            }
+            if new_content != content {
+                fs::write(file, new_content)?;
+            }
+        }
+        if !json {
+            println!("{}", "Link fixes applied.".green().bold());
+        }
+    }
+
+    Ok(())
+}
+
+/// Apply decisions from a proposal file
+pub(crate) fn apply_link_decisions(
+    decisions_path: &Path,
+    dry_run: bool,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let content = fs::read_to_string(decisions_path)?;
+    let proposal_file: LinkFixProposalFile = serde_yaml::from_str(&content)?;
+
+    let mut fixes: Vec<LinkFix> = Vec::new();
+
+    for proposal in &proposal_file.proposals {
+        if let Some(decision_idx) = proposal.decision {
+            if decision_idx < proposal.candidates.len() {
+                let mut new_target = proposal.candidates[decision_idx].clone();
+                // Preserve anchor if present in broken_target
+                if let Some(idx) = proposal.broken_target.find('#') {
+                    new_target.push_str(&proposal.broken_target[idx..]);
+                }
+                fixes.push(LinkFix {
+                    file: proposal.source.clone(),
+                    old_target: proposal.broken_target.clone(),
+                    new_target,
+                });
+            }
+        }
+    }
+
+    if fixes.is_empty() {
+        if json {
+            println!(
+                r#"{{"fixes": [], "applied": false, "message": "No decisions made in proposal file"}}"#
+            );
+        } else {
+            println!(
+                "{} No decisions found in {}. Set 'decision' field to candidate index.",
+                "Note:".yellow(),
+                decisions_path.display()
+            );
+        }
+        return Ok(());
+    }
+
+    // Group and apply
+    let mut fixes_by_file: HashMap<String, Vec<LinkFix>> = HashMap::new();
+    for fix in &fixes {
+        fixes_by_file
+            .entry(fix.file.clone())
+            .or_default()
+            .push(fix.clone());
+    }
+
+    if json {
+        let result = serde_json::json!({
+            "fixes": fixes.iter().map(|f| {
+                serde_json::json!({
+                    "file": f.file,
+                    "old_target": f.old_target,
+                    "new_target": f.new_target
+                })
+            }).collect::<Vec<_>>(),
+            "applied": !dry_run
+        });
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else {
+        println!(
+            "{} {} link fixes from decisions:",
+            if dry_run { "Would apply" } else { "Applying" },
+            fixes.len()
+        );
+        for (file, file_fixes) in &fixes_by_file {
+            println!("{}", file.white().bold());
+            for f in file_fixes {
+                println!("  {} -> {}", f.old_target.red(), f.new_target.green());
+            }
+        }
+    }
+
+    if !dry_run {
+        for (file, file_fixes) in &fixes_by_file {
+            let content = fs::read_to_string(file)?;
+            let mut new_content = content.clone();
+            for f in file_fixes {
+                let old = format!("]({})", f.old_target);
+                let new = format!("]({})", f.new_target);
+                new_content = new_content.replace(&old, &new);
+            }
+            if new_content != content {
+                fs::write(file, new_content)?;
+            }
+        }
+        if !json {
+            println!("{}", "Link fixes applied.".green().bold());
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn apply_reference_mapping_to_content(content: &str, from: &str, to: &str) -> String {
+    let old = format!("]({from})");
+    let new = format!("]({to})");
+    content.replace(&old, &new)
+}
+
+pub(crate) fn load_reference_mappings(
+    path: &Path,
+) -> Result<ReferenceMappingConfig, Box<dyn std::error::Error>> {
+    let content = fs::read_to_string(path)?;
+    let cfg: ReferenceMappingConfig = serde_yaml::from_str(&content)?;
+    Ok(cfg)
+}
+
+pub(crate) fn cmd_fix_references(
+    index_dir: &Path,
+    mapping_path: &Path,
+    dry_run: bool,
+    apply: bool,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    if !dry_run && !apply {
+        return Err("Specify either --dry-run or --apply".into());
+    }
+    if !mapping_path.exists() {
+        return Err(format!("Mapping file not found: {}", mapping_path.display()).into());
+    }
+
+    let mappings_cfg = load_reference_mappings(mapping_path)?;
+    if mappings_cfg.mappings.is_empty() {
+        if json {
+            let result = FixReferencesResult {
+                mapping_file: mapping_path.to_string_lossy().to_string(),
+                mappings_count: 0,
+                updated_files: vec![],
+                applied: apply,
+            };
+            println!("{}", serde_json::to_string_pretty(&result)?);
+        } else {
+            println!(
+                "{} No mappings defined in {}",
+                "Note:".yellow(),
+                mapping_path.display()
+            );
+        }
+        return Ok(());
+    }
+
+    let forward_index = load_forward_index(index_dir)?;
+
+    let mut changed_files: Vec<String> = Vec::new();
+
+    for file_path in forward_index.files.keys() {
+        let content = fs::read_to_string(file_path)?;
+        let mut new_content = content.clone();
+
+        for m in &mappings_cfg.mappings {
+            new_content = apply_reference_mapping_to_content(&new_content, &m.from, &m.to);
+        }
+
+        if new_content != content {
+            if dry_run {
+                changed_files.push(file_path.clone());
+            } else if apply {
+                fs::write(file_path, new_content)?;
+                changed_files.push(file_path.clone());
+            }
+        }
+    }
+
+    changed_files.sort();
+
+    if json {
+        let result = FixReferencesResult {
+            mapping_file: mapping_path.to_string_lossy().to_string(),
+            mappings_count: mappings_cfg.mappings.len(),
+            updated_files: changed_files,
+            applied: apply,
+        };
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    if changed_files.is_empty() {
+        println!(
+            "{} No references needed updating based on {}",
+            "Note:".yellow(),
+            mapping_path.display()
+        );
+    } else {
+        println!(
+            "{} Updated references in {} file(s) using mapping {}",
+            if dry_run { "Would update" } else { "Updated" },
+            changed_files.len(),
+            mapping_path.display()
+        );
+        for f in changed_files {
+            println!("  {f}");
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/commands_query.rs b/src/commands_query.rs
new file mode 100644
index 0000000..9d1be97
--- /dev/null
+++ b/src/commands_query.rs
@@ -0,0 +1,1243 @@
+use crate::commands_audit::*;
+use colored::Colorize;
+use std::collections::{HashMap, HashSet};
+use std::path::Path;
+use std::time::Instant;
+
+use crate::search::*;
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) struct QueryDiagnostics {
+    pub tokens: Vec<String>,
+    pub stems: Vec<String>,
+    pub missing_terms: Vec<String>,
+    pub idf_values: Vec<(String, String, f64)>,
+    pub index_path: String,
+    pub doc_count: usize,
+}
+
+pub(crate) fn build_query_diagnostics(
+    parsed: &ParsedQuery,
+    forward_index: &ForwardIndex,
+    index_dir: &Path,
+) -> QueryDiagnostics {
+    let tokens = parsed.terms.clone();
+    let stems: Vec<String> = tokens
+        .iter()
+        .map(|t| stem_word(&t.to_lowercase()))
+        .collect();
+    let mut missing_set: HashSet<String> = HashSet::new();
+    let mut missing_terms = Vec::new();
+    let mut idf_values = Vec::new();
+
+    for term in &tokens {
+        let stem = stem_word(&term.to_lowercase());
+        let idf = *forward_index.idf_map.get(&stem).unwrap_or(&0.0);
+        idf_values.push((term.clone(), stem.clone(), idf));
+        if !forward_index.idf_map.contains_key(&stem) && missing_set.insert(term.clone()) {
+            missing_terms.push(term.clone());
+        }
+    }
+
+    QueryDiagnostics {
+        tokens,
+        stems,
+        missing_terms,
+        idf_values,
+        index_path: index_dir.display().to_string(),
+        doc_count: forward_index.files.len(),
+    }
+}
+
+pub(crate) fn print_query_diagnostics(
+    diagnostics: &QueryDiagnostics,
+    include_scoring: bool,
+    include_suggestions: bool,
+) {
+    println!("{}", "Diagnostics:".dimmed());
+    println!(
+        "  {} {}",
+        "tokens:".dimmed(),
+        if diagnostics.tokens.is_empty() {
+            "(none)".to_string()
+        } else {
+            diagnostics.tokens.join(" ")
+        }
+    );
+    println!(
+        "  {} {}",
+        "stems:".dimmed(),
+        if diagnostics.stems.is_empty() {
+            "(none)".to_string()
+        } else {
+            diagnostics.stems.join(" ")
+        }
+    );
+    println!(
+        "  {} {}",
+        "missing:".dimmed(),
+        if diagnostics.missing_terms.is_empty() {
+            "(none)".to_string()
+        } else {
+            diagnostics.missing_terms.join(" ")
+        }
+    );
+    println!(
+        "  {} {} ({} docs)",
+        "index:".dimmed(),
+        diagnostics.index_path,
+        diagnostics.doc_count
+    );
+
+    if include_scoring {
+        let mut idf_parts = Vec::new();
+        for (term, stem, idf) in &diagnostics.idf_values {
+            idf_parts.push(format!("{term}->{stem}:{idf:.3}"));
+        }
+        println!(
+            "  {} {}",
+            "idf:".dimmed(),
+            if idf_parts.is_empty() {
+                "(none)".to_string()
+            } else {
+                idf_parts.join(", ")
+            }
+        );
+        println!("  {} k1={:.2}, b={:.2}", "bm25:".dimmed(), BM25_K1, BM25_B);
+    }
+
+    if include_suggestions {
+        println!(
+            "  {} try fewer terms; use --no-stopwords; run yore stats; check index path",
+            "suggestions:".dimmed()
+        );
+    }
+}
+
+pub(crate) struct QueryOptions {
+    pub limit: usize,
+    pub files_only: bool,
+    pub json: bool,
+    pub doc_terms: usize,
+    pub explain: bool,
+    pub require_phrases: bool,
+    pub filter_stopwords: bool,
+}
+
+pub(crate) struct AssembleOptions {
+    pub max_tokens: usize,
+    pub max_sections: usize,
+    pub depth: usize,
+    pub format: String,
+    pub doc_terms: usize,
+    pub use_relations: bool,
+}
+
+pub(crate) struct HealthOptions {
+    pub max_lines: usize,
+    pub max_part_sections: usize,
+    pub max_completed_lines: usize,
+    pub max_changelog_entries: usize,
+}
+
+pub(crate) fn cmd_query(
+    query: &str,
+    index_dir: &Path,
+    options: &QueryOptions,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let parsed = parse_query(query, options.filter_stopwords);
+    if parsed.terms.is_empty() {
+        if options.json {
+            let obj = serde_json::json!({
+                "query": query,
+                "error": "no_query_terms"
+            });
+            println!("{}", serde_json::to_string_pretty(&obj)?);
+        } else {
+            println!(
+                "{}",
+                "No searchable terms in query. Try different keywords or use --no-stopwords."
+                    .yellow()
+            );
+        }
+        return Ok(());
+    }
+    let _reverse_index = load_reverse_index(index_dir)?;
+    let forward_index = load_forward_index(index_dir)?;
+    let diagnostics = build_query_diagnostics(&parsed, &forward_index, index_dir);
+
+    // Compute BM25 scores for all documents
+    let mut file_scores: Vec<(String, f64)> = forward_index
+        .files
+        .iter()
+        .map(|(path, entry)| {
+            let score = bm25_score(
+                &parsed.terms,
+                entry,
+                forward_index.avg_doc_length,
+                &forward_index.idf_map,
+            );
+            (path.clone(), score)
+        })
+        .filter(|(_, score)| *score > 0.0)
+        .collect();
+
+    // Sort by BM25 score (descending)
+    file_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+    let results = if parsed.phrases.is_empty() {
+        file_scores.truncate(options.limit);
+        file_scores
+    } else {
+        let candidate_cap = std::cmp::min(
+            file_scores.len(),
+            std::cmp::max(options.limit.saturating_mul(10), 100),
+        );
+        let mut candidates = file_scores[..candidate_cap].to_vec();
+
+        for (path, score) in &mut candidates {
+            let content = std::fs::read_to_string(Path::new(path)).unwrap_or_default();
+            let content_terms = extract_keywords_with_options(&content, false);
+            let mut matched_phrases = 0usize;
+
+            for phrase in &parsed.phrases {
+                if contains_phrase_tokens(&content_terms, &phrase.terms) {
+                    matched_phrases += 1;
+                }
+            }
+
+            if options.require_phrases && matched_phrases < parsed.phrases.len() {
+                *score = 0.0;
+            } else if matched_phrases > 0 {
+                *score += matched_phrases as f64;
+            }
+        }
+
+        if options.require_phrases {
+            candidates.retain(|(_, score)| *score > 0.0);
+        }
+
+        candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+        candidates.truncate(options.limit);
+        candidates
+    };
+
+    if options.json {
+        let output: Vec<_> = results
+            .iter()
+            .map(|(path, score)| {
+                let mut obj = serde_json::json!({
+                    "path": path,
+                    "score": score,
+                    "query": query
+                });
+                if options.doc_terms > 0 {
+                    if let Some(entry) = forward_index.files.get(path) {
+                        let top_terms = get_top_doc_terms(
+                            entry,
+                            &forward_index.idf_map,
+                            &parsed.terms,
+                            options.doc_terms,
+                        );
+                        obj["doc_terms"] = serde_json::json!(top_terms);
+                    }
+                }
+                obj
+            })
+            .collect();
+
+        if options.explain {
+            let notice = if output.is_empty() {
+                Some("No data to explain.".to_string())
+            } else {
+                None
+            };
+            let diag_json = serde_json::json!({
+                "tokens": diagnostics.tokens,
+                "stems": diagnostics.stems,
+                "missing_terms": diagnostics.missing_terms,
+                "idf": diagnostics.idf_values.iter().map(|(term, stem, idf)| {
+                    serde_json::json!({
+                        "term": term,
+                        "stem": stem,
+                        "idf": idf
+                    })
+                }).collect::<Vec<_>>(),
+                "bm25": {
+                    "k1": BM25_K1,
+                    "b": BM25_B,
+                    "avg_doc_length": forward_index.avg_doc_length
+                },
+                "index_path": diagnostics.index_path,
+                "doc_count": diagnostics.doc_count,
+                "notice": notice,
+                "suggestions": if output.is_empty() {
+                    serde_json::json!(["try fewer terms", "use --no-stopwords", "run yore stats", "check index path"])
+                } else {
+                    serde_json::Value::Null
+                }
+            });
+            let wrapped = serde_json::json!({
+                "query": query,
+                "results": output,
+                "diagnostics": diag_json
+            });
+            println!("{}", serde_json::to_string_pretty(&wrapped)?);
+        } else {
+            println!("{}", serde_json::to_string_pretty(&output)?);
+        }
+        return Ok(());
+    }
+
+    if results.is_empty() {
+        println!("{}", "No results found.".yellow());
+        if options.explain {
+            println!("{}", "No data to explain.".dimmed());
+        }
+        print_query_diagnostics(&diagnostics, options.explain, true);
+        return Ok(());
+    }
+
+    println!(
+        "{} results for: {}\n",
+        results.len().to_string().green().bold(),
+        parsed.terms.join(" ").cyan()
+    );
+
+    for (file, score) in results {
+        if options.files_only {
+            println!("{file}");
+        } else {
+            println!("{} (score: {:.2})", file.cyan(), score);
+
+            // Show doc terms if requested
+            if options.doc_terms > 0 {
+                if let Some(entry) = forward_index.files.get(&file) {
+                    let top_terms = get_top_doc_terms(
+                        entry,
+                        &forward_index.idf_map,
+                        &parsed.terms,
+                        options.doc_terms,
+                    );
+                    if !top_terms.is_empty() {
+                        println!("  {} {}", "terms:".dimmed(), top_terms.join(", "));
+                    }
+                }
+            }
+
+            // Show matching headings
+            if let Some(entry) = forward_index.files.get(&file) {
+                for heading in entry.headings.iter().take(3) {
+                    let heading_keywords: HashSet<String> = extract_keywords(&heading.text)
+                        .into_iter()
+                        .map(|k| stem_word(&k))
+                        .collect();
+
+                    let matches: Vec<_> = parsed
+                        .terms
+                        .iter()
+                        .filter(|t| heading_keywords.contains(&stem_word(&t.to_lowercase())))
+                        .collect();
+
+                    if !matches.is_empty() {
+                        println!(
+                            "  {} L{}: {}",
+                            ">".dimmed(),
+                            heading.line.to_string().dimmed(),
+                            heading.text
+                        );
+                    }
+                }
+            }
+            println!();
+        }
+    }
+
+    if options.explain {
+        print_query_diagnostics(&diagnostics, true, false);
+    }
+
+    Ok(())
+}
+
+pub(crate) fn cmd_similar(
+    file: &Path,
+    limit: usize,
+    threshold: f64,
+    json: bool,
+    doc_terms: usize,
+    index_dir: &Path,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+
+    // Get keywords for reference file - try multiple path formats
+    let file_str = file.to_string_lossy().to_string();
+    let file_with_dot = format!("./{}", file_str.trim_start_matches("./"));
+    let file_without_dot = file_str.trim_start_matches("./").to_string();
+
+    let (matched_path, ref_entry) = forward_index
+        .files
+        .get(&file_str)
+        .map(|e| (file_str.clone(), e))
+        .or_else(|| {
+            forward_index
+                .files
+                .get(&file_with_dot)
+                .map(|e| (file_with_dot.clone(), e))
+        })
+        .or_else(|| {
+            forward_index
+                .files
+                .get(&file_without_dot)
+                .map(|e| (file_without_dot.clone(), e))
+        })
+        .ok_or_else(|| format!("File not in index: {file_str}"))?;
+
+    // Combine heading and body keywords
+    let ref_keywords: HashSet<String> = ref_entry
+        .keywords
+        .iter()
+        .chain(ref_entry.body_keywords.iter())
+        .map(|k| k.to_lowercase())
+        .collect();
+
+    // For doc_terms, exclude the reference file's terms
+    let ref_terms_vec: Vec<String> = ref_entry
+        .body_keywords
+        .iter()
+        .chain(ref_entry.keywords.iter())
+        .map(|k| k.to_lowercase())
+        .collect();
+
+    // Compare with all other files using both Jaccard and Simhash
+    let mut similarities: Vec<(String, f64, f64, f64)> = Vec::new(); // (path, jaccard, simhash, combined)
+
+    for (path, entry) in &forward_index.files {
+        if path == &matched_path {
+            continue;
+        }
+
+        let other_keywords: HashSet<String> = entry
+            .keywords
+            .iter()
+            .chain(entry.body_keywords.iter())
+            .map(|k| k.to_lowercase())
+            .collect();
+
+        let jaccard = jaccard_similarity(&ref_keywords, &other_keywords);
+        let simhash_sim = simhash_similarity(ref_entry.simhash, entry.simhash);
+
+        // Combined score: weighted average
+        let combined = jaccard * 0.6 + simhash_sim * 0.4;
+
+        if combined >= threshold {
+            similarities.push((path.clone(), jaccard, simhash_sim, combined));
+        }
+    }
+
+    // Sort by combined similarity
+    similarities.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap());
+    similarities.truncate(limit);
+
+    if json {
+        let output: Vec<_> = similarities
+            .iter()
+            .map(|(p, j, s, c)| {
+                let mut obj = serde_json::json!({
+                    "path": p,
+                    "jaccard": j,
+                    "simhash": s,
+                    "combined": c
+                });
+                if doc_terms > 0 {
+                    if let Some(entry) = forward_index.files.get(p) {
+                        let top_terms = get_top_doc_terms(
+                            entry,
+                            &forward_index.idf_map,
+                            &ref_terms_vec,
+                            doc_terms,
+                        );
+                        obj["doc_terms"] = serde_json::json!(top_terms);
+                    }
+                }
+                obj
+            })
+            .collect();
+        println!("{}", serde_json::to_string_pretty(&output)?);
+        return Ok(());
+    }
+
+    if similarities.is_empty() {
+        println!("{}", "No similar files found.".yellow());
+        return Ok(());
+    }
+
+    println!("Files similar to: {}\n", matched_path.cyan());
+    println!("{:>5} {:>5} {:>5}  Path", "Comb", "Jacc", "Sim");
+    println!("{}", "-".repeat(60));
+
+    for (path, jaccard, simhash_sim, combined) in similarities {
+        let comb_pct = (combined * 100.0) as u32;
+        let jacc_pct = (jaccard * 100.0) as u32;
+        let sim_pct = (simhash_sim * 100.0) as u32;
+        println!(
+            "{:>4}% {:>4}% {:>4}%  {}",
+            comb_pct.to_string().green(),
+            jacc_pct.to_string().cyan(),
+            sim_pct.to_string().yellow(),
+            path
+        );
+
+        // Show doc terms if requested
+        if doc_terms > 0 {
+            if let Some(entry) = forward_index.files.get(&path) {
+                let top_terms =
+                    get_top_doc_terms(entry, &forward_index.idf_map, &ref_terms_vec, doc_terms);
+                if !top_terms.is_empty() {
+                    println!(
+                        "                   {} {}",
+                        "terms:".dimmed(),
+                        top_terms.join(", ")
+                    );
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn cmd_dupes(
+    threshold: f64,
+    group: bool,
+    json: bool,
+    index_dir: &Path,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let start = Instant::now();
+
+    // Build LSH buckets for fast duplicate detection
+    let buckets = lsh_buckets(&forward_index.files, 16); // 16 bands x 8 rows = 128 hashes
+    let mut candidates: HashSet<(String, String)> = HashSet::new();
+
+    // Collect candidate pairs from buckets
+    for paths in buckets.values() {
+        if paths.len() > 1 {
+            for i in 0..paths.len() {
+                for j in (i + 1)..paths.len() {
+                    let (p1, p2) = if paths[i] < paths[j] {
+                        (paths[i].clone(), paths[j].clone())
+                    } else {
+                        (paths[j].clone(), paths[i].clone())
+                    };
+                    candidates.insert((p1, p2));
+                }
+            }
+        }
+    }
+
+    let mut duplicates: Vec<(String, String, f64, f64, f64, f64)> = Vec::new(); // (path1, path2, jaccard, simhash, minhash, combined)
+
+    // Compare candidate pairs
+    for (path1, path2) in &candidates {
+        if let (Some(entry1), Some(entry2)) = (
+            forward_index.files.get(path1),
+            forward_index.files.get(path2),
+        ) {
+            let kw1: HashSet<String> = entry1
+                .keywords
+                .iter()
+                .chain(entry1.body_keywords.iter())
+                .map(|k| k.to_lowercase())
+                .collect();
+            let kw2: HashSet<String> = entry2
+                .keywords
+                .iter()
+                .chain(entry2.body_keywords.iter())
+                .map(|k| k.to_lowercase())
+                .collect();
+
+            let jaccard = jaccard_similarity(&kw1, &kw2);
+            let simhash_sim = simhash_similarity(entry1.simhash, entry2.simhash);
+            let minhash_sim = minhash_similarity(&entry1.minhash, &entry2.minhash);
+            let combined = jaccard * 0.4 + simhash_sim * 0.3 + minhash_sim * 0.3;
+
+            if combined >= threshold {
+                duplicates.push((
+                    path1.clone(),
+                    path2.clone(),
+                    jaccard,
+                    simhash_sim,
+                    minhash_sim,
+                    combined,
+                ));
+            }
+        }
+    }
+
+    let elapsed = start.elapsed();
+
+    // Sort by combined similarity
+    duplicates.sort_by(|a, b| b.5.partial_cmp(&a.5).unwrap_or(std::cmp::Ordering::Equal));
+
+    if json {
+        let output: Vec<_> = duplicates
+            .iter()
+            .map(|(p1, p2, j, s, m, c)| {
+                serde_json::json!({
+                    "file1": p1,
+                    "file2": p2,
+                    "jaccard": j,
+                    "simhash": s,
+                    "minhash": m,
+                    "combined": c
+                })
+            })
+            .collect();
+        println!("{}", serde_json::to_string_pretty(&output)?);
+        return Ok(());
+    }
+
+    if duplicates.is_empty() {
+        println!("{}", "No duplicates found above threshold.".green());
+        eprintln!(
+            "LSH duplicate detection: {:?} ({} candidate pairs from {} buckets)",
+            elapsed,
+            candidates.len(),
+            buckets.len()
+        );
+        return Ok(());
+    }
+
+    println!(
+        "{} duplicate pairs found (threshold: {}%)",
+        duplicates.len().to_string().yellow().bold(),
+        (threshold * 100.0) as u32
+    );
+    eprintln!(
+        "LSH duplicate detection: {:?} ({} candidates from {} buckets)\n",
+        elapsed,
+        candidates.len(),
+        buckets.len()
+    );
+
+    if group {
+        // Group duplicates
+        let mut groups: HashMap<String, Vec<(String, f64)>> = HashMap::new();
+
+        for (path1, path2, _, _, _, combined) in &duplicates {
+            let group = groups.entry(path1.clone()).or_default();
+            if !group.iter().any(|(p, _)| p == path2) {
+                group.push((path2.clone(), *combined));
+            }
+        }
+
+        for (file, related) in groups {
+            println!("{}", file.cyan());
+            for (r, sim) in related {
+                println!("  {} {}% {}", "~".dimmed(), (sim * 100.0) as u32, r);
+            }
+            println!();
+        }
+    } else {
+        for (path1, path2, jaccard, simhash_sim, minhash_sim, combined) in
+            duplicates.iter().take(50)
+        {
+            let comb_pct = (combined * 100.0) as u32;
+            println!(
+                "{}% [J:{}% S:{}% M:{}%] {} <-> {}",
+                comb_pct.to_string().yellow(),
+                (jaccard * 100.0) as u32,
+                (simhash_sim * 100.0) as u32,
+                (minhash_sim * 100.0) as u32,
+                path1.cyan(),
+                path2
+            );
+        }
+
+        if duplicates.len() > 50 {
+            println!(
+                "\n{}",
+                format!("... and {} more", duplicates.len() - 50).dimmed()
+            );
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn compute_duplicate_pairs(
+    forward_index: &ForwardIndex,
+    threshold: f64,
+) -> Vec<(String, String, f64)> {
+    // Build LSH buckets for duplicate detection
+    let buckets = lsh_buckets(&forward_index.files, 16); // 16 bands x 8 rows = 128 hashes
+    let mut candidates: HashSet<(String, String)> = HashSet::new();
+
+    // Collect candidate pairs from buckets
+    for paths in buckets.values() {
+        if paths.len() > 1 {
+            for i in 0..paths.len() {
+                for j in (i + 1)..paths.len() {
+                    let (p1, p2) = if paths[i] < paths[j] {
+                        (paths[i].clone(), paths[j].clone())
+                    } else {
+                        (paths[j].clone(), paths[i].clone())
+                    };
+                    candidates.insert((p1, p2));
+                }
+            }
+        }
+    }
+
+    let mut pairs: Vec<(String, String, f64)> = Vec::new(); // (path1, path2, combined)
+
+    for (path1, path2) in &candidates {
+        if let (Some(entry1), Some(entry2)) = (
+            forward_index.files.get(path1),
+            forward_index.files.get(path2),
+        ) {
+            let kw1: HashSet<String> = entry1
+                .keywords
+                .iter()
+                .chain(entry1.body_keywords.iter())
+                .map(|k| k.to_lowercase())
+                .collect();
+            let kw2: HashSet<String> = entry2
+                .keywords
+                .iter()
+                .chain(entry2.body_keywords.iter())
+                .map(|k| k.to_lowercase())
+                .collect();
+
+            let jaccard = jaccard_similarity(&kw1, &kw2);
+            let simhash_sim = simhash_similarity(entry1.simhash, entry2.simhash);
+            let minhash_sim = minhash_similarity(&entry1.minhash, &entry2.minhash);
+            let combined = jaccard * 0.4 + simhash_sim * 0.3 + minhash_sim * 0.3;
+
+            if combined >= threshold {
+                pairs.push((path1.clone(), path2.clone(), combined));
+            }
+        }
+    }
+
+    // Sort descending by similarity for stable output
+    pairs.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
+    pairs
+}
+
+pub(crate) fn build_consolidation_groups(
+    forward_index: &ForwardIndex,
+    pairs: &[(String, String, f64)],
+) -> ConsolidationResult {
+    use std::cmp::Ordering;
+
+    // Build adjacency graph
+    let mut adj: HashMap<String, HashSet<String>> = HashMap::new();
+    let mut pair_sims: HashMap<(String, String), f64> = HashMap::new();
+
+    for (a, b, sim) in pairs {
+        adj.entry(a.clone()).or_default().insert(b.clone());
+        adj.entry(b.clone()).or_default().insert(a.clone());
+
+        let key = if a <= b {
+            (a.clone(), b.clone())
+        } else {
+            (b.clone(), a.clone())
+        };
+        pair_sims.insert(key, *sim);
+    }
+
+    let mut visited: HashSet<String> = HashSet::new();
+    let mut groups: Vec<ConsolidationGroup> = Vec::new();
+
+    for start in adj.keys() {
+        if visited.contains(start) {
+            continue;
+        }
+
+        // BFS/DFS to collect connected component
+        let mut stack = vec![start.clone()];
+        let mut component: Vec<String> = Vec::new();
+
+        while let Some(node) = stack.pop() {
+            if !visited.insert(node.clone()) {
+                continue;
+            }
+            component.push(node.clone());
+            if let Some(neighbors) = adj.get(&node) {
+                for n in neighbors {
+                    if !visited.contains(n) {
+                        stack.push(n.clone());
+                    }
+                }
+            }
+        }
+
+        if component.len() < 2 {
+            continue;
+        }
+
+        // Choose canonical doc via canonicality score
+        component.sort(); // deterministic order
+        let mut best: Option<(String, f64)> = None;
+        for path in &component {
+            if let Some(entry) = forward_index.files.get(path) {
+                let (score, _reasons) = score_canonicality_with_reasons(path, entry);
+                match best {
+                    None => best = Some((path.clone(), score)),
+                    Some((_, best_score)) => {
+                        if score > best_score
+                            || (score == best_score
+                                && path.cmp(&best.as_ref().unwrap().0) == Ordering::Less)
+                        {
+                            best = Some((path.clone(), score));
+                        }
+                    }
+                }
+            }
+        }
+
+        let Some((canonical, canonical_score)) = best else {
+            continue;
+        };
+
+        let mut merge_into: Vec<String> = component
+            .iter()
+            .filter(|p| *p != &canonical)
+            .cloned()
+            .collect();
+        if merge_into.is_empty() {
+            continue;
+        }
+
+        merge_into.sort();
+
+        // Compute average similarity between canonical and others
+        let mut total_sim = 0.0;
+        let mut count = 0usize;
+        for other in &merge_into {
+            let key = if &canonical <= other {
+                (canonical.clone(), other.clone())
+            } else {
+                (other.clone(), canonical.clone())
+            };
+            if let Some(sim) = pair_sims.get(&key) {
+                total_sim += *sim;
+                count += 1;
+            }
+        }
+        let avg_similarity = if count > 0 {
+            total_sim / (count as f64)
+        } else {
+            0.0
+        };
+
+        let note = format!(
+            "Merge {} file(s) into canonical {}",
+            merge_into.len(),
+            canonical
+        );
+
+        groups.push(ConsolidationGroup {
+            canonical,
+            merge_into,
+            canonical_score,
+            avg_similarity,
+            note,
+        });
+    }
+
+    // Stable ordering: sort by canonical path
+    groups.sort_by(|a, b| a.canonical.cmp(&b.canonical));
+
+    ConsolidationResult {
+        total_groups: groups.len(),
+        groups,
+    }
+}
+
+/// NEW: Show what's shared between two files
+pub(crate) fn cmd_diff(
+    file1: &Path,
+    file2: &Path,
+    index_dir: &Path,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+
+    // Resolve paths
+    let resolve_path = |f: &Path| -> Option<(String, &FileEntry)> {
+        let s = f.to_string_lossy().to_string();
+        let with_dot = format!("./{}", s.trim_start_matches("./"));
+        let without_dot = s.trim_start_matches("./").to_string();
+
+        forward_index
+            .files
+            .get(&s)
+            .map(|e| (s.clone(), e))
+            .or_else(|| {
+                forward_index
+                    .files
+                    .get(&with_dot)
+                    .map(|e| (with_dot.clone(), e))
+            })
+            .or_else(|| {
+                forward_index
+                    .files
+                    .get(&without_dot)
+                    .map(|e| (without_dot, e))
+            })
+    };
+
+    let (path1, entry1) =
+        resolve_path(file1).ok_or_else(|| format!("File not in index: {}", file1.display()))?;
+    let (path2, entry2) =
+        resolve_path(file2).ok_or_else(|| format!("File not in index: {}", file2.display()))?;
+
+    // Compute similarities
+    let kw1: HashSet<String> = entry1
+        .keywords
+        .iter()
+        .chain(entry1.body_keywords.iter())
+        .map(|k| k.to_lowercase())
+        .collect();
+    let kw2: HashSet<String> = entry2
+        .keywords
+        .iter()
+        .chain(entry2.body_keywords.iter())
+        .map(|k| k.to_lowercase())
+        .collect();
+
+    let shared: HashSet<_> = kw1.intersection(&kw2).cloned().collect();
+    let only_in_1: HashSet<_> = kw1.difference(&kw2).cloned().collect();
+    let only_in_2: HashSet<_> = kw2.difference(&kw1).cloned().collect();
+
+    let jaccard = jaccard_similarity(&kw1, &kw2);
+    let simhash_sim = simhash_similarity(entry1.simhash, entry2.simhash);
+    let combined = jaccard * 0.6 + simhash_sim * 0.4;
+
+    // Show shared headings
+    let h1: HashSet<String> = entry1
+        .headings
+        .iter()
+        .map(|h| h.text.to_lowercase())
+        .collect();
+    let h2: HashSet<String> = entry2
+        .headings
+        .iter()
+        .map(|h| h.text.to_lowercase())
+        .collect();
+    let shared_headings: Vec<String> = h1.intersection(&h2).cloned().collect();
+
+    if json {
+        let mut shared_vec: Vec<_> = shared.iter().cloned().collect();
+        shared_vec.sort();
+        let mut only1_vec: Vec<_> = only_in_1.iter().cloned().collect();
+        only1_vec.sort();
+        let mut only2_vec: Vec<_> = only_in_2.iter().cloned().collect();
+        only2_vec.sort();
+        let mut headings_vec = shared_headings.clone();
+        headings_vec.sort();
+
+        let result = DiffResult {
+            file1: path1,
+            file2: path2,
+            similarity: DiffSimilarity {
+                combined,
+                jaccard,
+                simhash: simhash_sim,
+            },
+            shared_keywords: shared_vec,
+            only_in_file1: only1_vec,
+            only_in_file2: only2_vec,
+            shared_headings: headings_vec,
+        };
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    println!("{}", "Comparison".green().bold());
+    println!();
+    println!("  File 1: {}", path1.cyan());
+    println!("  File 2: {}", path2.cyan());
+    println!();
+    println!("{}", "Similarity Scores".green().bold());
+    println!();
+    println!("  Combined:    {}%", (combined * 100.0) as u32);
+    println!(
+        "  Jaccard:     {}% (keyword overlap)",
+        (jaccard * 100.0) as u32
+    );
+    println!(
+        "  SimHash:     {}% (content structure)",
+        (simhash_sim * 100.0) as u32
+    );
+    println!();
+
+    println!(
+        "{} ({} keywords)",
+        "Shared Keywords".green().bold(),
+        shared.len()
+    );
+    let mut shared_vec: Vec<_> = shared.iter().collect();
+    shared_vec.sort();
+    for chunk in shared_vec.chunks(8) {
+        println!(
+            "  {}",
+            chunk
+                .iter()
+                .map(|s| s.as_str())
+                .collect::<Vec<_>>()
+                .join(", ")
+        );
+    }
+
+    println!();
+    println!(
+        "{} ({} keywords)",
+        format!("Only in {}", path1.split('/').next_back().unwrap_or(&path1))
+            .yellow()
+            .bold(),
+        only_in_1.len()
+    );
+    let mut only1_vec: Vec<_> = only_in_1.iter().take(24).collect();
+    only1_vec.sort();
+    for chunk in only1_vec.chunks(8) {
+        println!(
+            "  {}",
+            chunk
+                .iter()
+                .map(|s| s.as_str())
+                .collect::<Vec<_>>()
+                .join(", ")
+        );
+    }
+    if only_in_1.len() > 24 {
+        println!("  ... and {} more", only_in_1.len() - 24);
+    }
+
+    println!();
+    println!(
+        "{} ({} keywords)",
+        format!("Only in {}", path2.split('/').next_back().unwrap_or(&path2))
+            .yellow()
+            .bold(),
+        only_in_2.len()
+    );
+    let mut only2_vec: Vec<_> = only_in_2.iter().take(24).collect();
+    only2_vec.sort();
+    for chunk in only2_vec.chunks(8) {
+        println!(
+            "  {}",
+            chunk
+                .iter()
+                .map(|s| s.as_str())
+                .collect::<Vec<_>>()
+                .join(", ")
+        );
+    }
+    if only_in_2.len() > 24 {
+        println!("  ... and {} more", only_in_2.len() - 24);
+    }
+
+    if !shared_headings.is_empty() {
+        println!();
+        println!(
+            "{} ({} headings)",
+            "Identical Headings".red().bold(),
+            shared_headings.len()
+        );
+        for h in shared_headings.iter().take(10) {
+            println!("  - {h}");
+        }
+        if shared_headings.len() > 10 {
+            println!("  ... and {} more", shared_headings.len() - 10);
+        }
+    }
+
+    Ok(())
+}
+
+/// Find duplicate sections across documents
+pub(crate) fn cmd_dupes_sections(
+    threshold: f64,
+    min_files: usize,
+    json: bool,
+    index_dir: &Path,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let start = Instant::now();
+
+    // Collect all sections from all files
+    #[derive(Debug, Clone)]
+    struct SectionInfo {
+        file_path: String,
+        heading: String,
+        line_start: usize,
+        line_end: usize,
+        simhash: u64,
+    }
+
+    let mut all_sections: Vec<SectionInfo> = Vec::new();
+    for (path, entry) in &forward_index.files {
+        for section in &entry.section_fingerprints {
+            all_sections.push(SectionInfo {
+                file_path: path.clone(),
+                heading: section.heading.clone(),
+                line_start: section.line_start,
+                line_end: section.line_end,
+                simhash: section.simhash,
+            });
+        }
+    }
+
+    if all_sections.is_empty() {
+        println!("{}", "No sections found in indexed files.".yellow());
+        return Ok(());
+    }
+
+    // Group similar sections using SimHash similarity
+    #[derive(Debug)]
+    struct SectionCluster {
+        heading: String,
+        files: Vec<(String, f64, usize, usize)>, // (file_path, similarity, line_start, line_end)
+        avg_simhash: u64,
+    }
+
+    let mut clusters: Vec<SectionCluster> = Vec::new();
+
+    for section in &all_sections {
+        let mut best_cluster_idx: Option<usize> = None;
+        let mut best_similarity = 0.0;
+
+        // Find best matching cluster
+        for (cluster_idx, cluster) in clusters.iter().enumerate() {
+            let similarity = simhash_similarity(section.simhash, cluster.avg_simhash);
+            if similarity >= threshold && similarity > best_similarity {
+                best_similarity = similarity;
+                best_cluster_idx = Some(cluster_idx);
+            }
+        }
+
+        if let Some(cluster_idx) = best_cluster_idx {
+            // Add to existing cluster
+            clusters[cluster_idx].files.push((
+                section.file_path.clone(),
+                best_similarity,
+                section.line_start,
+                section.line_end,
+            ));
+        } else {
+            // Create new cluster
+            clusters.push(SectionCluster {
+                heading: section.heading.clone(),
+                files: vec![(
+                    section.file_path.clone(),
+                    1.0,
+                    section.line_start,
+                    section.line_end,
+                )],
+                avg_simhash: section.simhash,
+            });
+        }
+    }
+
+    let elapsed = start.elapsed();
+
+    // Filter clusters by min_files threshold
+    let duplicate_clusters: Vec<_> = clusters
+        .into_iter()
+        .filter(|c| c.files.len() >= min_files)
+        .collect();
+
+    if duplicate_clusters.is_empty() {
+        println!(
+            "{}",
+            format!(
+                "No duplicate sections found with {} or more files at {}% threshold.",
+                min_files,
+                (threshold * 100.0) as u32
+            )
+            .green()
+        );
+        eprintln!(
+            "Section analysis: {:?} ({} sections analyzed)",
+            elapsed,
+            all_sections.len()
+        );
+        return Ok(());
+    }
+
+    // Sort clusters by number of files (descending)
+    let mut sorted_clusters = duplicate_clusters;
+    sorted_clusters.sort_by(|a, b| b.files.len().cmp(&a.files.len()));
+
+    if json {
+        let output: Vec<_> = sorted_clusters
+            .iter()
+            .map(|cluster| {
+                serde_json::json!({
+                    "heading": cluster.heading,
+                    "file_count": cluster.files.len(),
+                    "files": cluster.files.iter().map(|(path, sim, start, end)| {
+                        serde_json::json!({
+                            "path": path,
+                            "similarity": sim,
+                            "line_start": start,
+                            "line_end": end,
+                        })
+                    }).collect::<Vec<_>>(),
+                })
+            })
+            .collect();
+        println!("{}", serde_json::to_string_pretty(&output)?);
+        return Ok(());
+    }
+
+    println!(
+        "{} duplicate section clusters found (threshold: {}%, min files: {})",
+        sorted_clusters.len().to_string().yellow().bold(),
+        (threshold * 100.0) as u32,
+        min_files
+    );
+    eprintln!(
+        "Section analysis: {:?} ({} sections analyzed)\n",
+        elapsed,
+        all_sections.len()
+    );
+
+    for cluster in sorted_clusters.iter().take(20) {
+        println!(
+            "{} {} ({} files)",
+            "Section:".cyan().bold(),
+            cluster.heading.yellow(),
+            cluster.files.len()
+        );
+
+        for (path, similarity, line_start, line_end) in &cluster.files {
+            let sim_pct = (similarity * 100.0) as u32;
+            println!(
+                "  {}% {}:{}-{}",
+                sim_pct.to_string().dimmed(),
+                path,
+                line_start,
+                line_end
+            );
+        }
+        println!();
+    }
+
+    if sorted_clusters.len() > 20 {
+        println!(
+            "{}",
+            format!(
+                "... and {} more section clusters",
+                sorted_clusters.len() - 20
+            )
+            .dimmed()
+        );
+    }
+
+    Ok(())
+}
diff --git a/src/commands_text.rs b/src/commands_text.rs
new file mode 100644
index 0000000..f11e497
--- /dev/null
+++ b/src/commands_text.rs
@@ -0,0 +1,840 @@
+use crate::commands_query::*;
+use colored::Colorize;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::io::{self, BufRead, Write};
+use std::path::Path;
+
+use crate::assemble::*;
+use crate::search::*;
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) fn cmd_stats(
+    top_keywords: usize,
+    index_dir: &Path,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let reverse_index = load_reverse_index(index_dir)?;
+
+    // Count keyword occurrences
+    let mut keyword_counts: Vec<_> = reverse_index
+        .keywords
+        .iter()
+        .map(|(k, v)| (k.clone(), v.len()))
+        .collect();
+    keyword_counts.sort_by(|a, b| b.1.cmp(&a.1));
+
+    let total_headings: usize = forward_index.files.values().map(|e| e.headings.len()).sum();
+    let total_links: usize = forward_index.files.values().map(|e| e.links.len()).sum();
+    let total_body_keywords: usize = forward_index
+        .files
+        .values()
+        .map(|e| e.body_keywords.len())
+        .sum();
+
+    if json {
+        let result = StatsResult {
+            total_files: forward_index.files.len(),
+            unique_keywords: reverse_index.keywords.len(),
+            total_headings,
+            body_keywords: total_body_keywords,
+            total_links,
+            index_version: forward_index.version,
+            indexed_at: forward_index.indexed_at.clone(),
+            top_keywords: keyword_counts
+                .iter()
+                .take(top_keywords)
+                .map(|(k, c)| KeywordCount {
+                    keyword: k.clone(),
+                    count: *c,
+                })
+                .collect(),
+        };
+        println!("{}", serde_json::to_string_pretty(&result)?);
+        return Ok(());
+    }
+
+    println!("{}", "Index Statistics".green().bold());
+    println!();
+    println!(
+        "  Total files:       {}",
+        forward_index.files.len().to_string().cyan()
+    );
+    println!(
+        "  Unique keywords:   {}",
+        reverse_index.keywords.len().to_string().cyan()
+    );
+    println!("  Total headings:    {}", total_headings.to_string().cyan());
+    println!(
+        "  Body keywords:     {}",
+        total_body_keywords.to_string().cyan()
+    );
+    println!("  Total links:       {}", total_links.to_string().cyan());
+    println!(
+        "  Index version:     {}",
+        forward_index.version.to_string().dimmed()
+    );
+    println!("  Indexed at:        {}", forward_index.indexed_at.dimmed());
+    println!();
+    println!("{}", format!("Top {top_keywords} Keywords").green().bold());
+    println!();
+
+    for (keyword, count) in keyword_counts.iter().take(top_keywords) {
+        let bar = "=".repeat((count / 2).min(40));
+        println!("  {:>20} {:>4} {}", keyword.cyan(), count, bar.dimmed());
+    }
+
+    Ok(())
+}
+
+pub(crate) fn cmd_repl(index_dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
+    println!("{}", "yore interactive mode (v2)".green().bold());
+    println!("Commands: query <terms>, similar <file>, dupes, diff <f1> <f2>, stats, help, quit\n");
+
+    let stdin = io::stdin();
+    let mut stdout = io::stdout();
+    let query_options = QueryOptions {
+        limit: 10,
+        files_only: false,
+        json: false,
+        doc_terms: 0,
+        explain: false,
+        require_phrases: false,
+        filter_stopwords: true,
+    };
+
+    loop {
+        print!("{} ", ">".cyan().bold());
+        stdout.flush()?;
+
+        let mut line = String::new();
+        if stdin.lock().read_line(&mut line)? == 0 {
+            break;
+        }
+
+        let parts: Vec<&str> = line.split_whitespace().collect();
+        if parts.is_empty() {
+            continue;
+        }
+
+        match parts[0] {
+            "quit" | "exit" | "q" => break,
+            "help" | "?" => {
+                println!("  query <terms...>   - Search for keywords");
+                println!("  similar <file>     - Find similar files");
+                println!("  dupes              - Find duplicates");
+                println!("  diff <f1> <f2>     - Compare two files");
+                println!("  stats              - Show statistics");
+                println!("  quit               - Exit");
+            }
+            "query" => {
+                let trimmed = line.trim();
+                let rest = trimmed.strip_prefix("query").unwrap_or("").trim();
+                if rest.is_empty() {
+                    println!("{}", "Usage: query <terms...>".yellow());
+                } else {
+                    let _ = cmd_query(rest, index_dir, &query_options);
+                }
+            }
+            "similar" => {
+                if parts.len() < 2 {
+                    println!("{}", "Usage: similar <file>".yellow());
+                } else {
+                    let _ = cmd_similar(Path::new(parts[1]), 5, 0.3, false, 0, index_dir);
+                }
+            }
+            "dupes" => {
+                let _ = cmd_dupes(0.35, false, false, index_dir);
+            }
+            "diff" => {
+                if parts.len() < 3 {
+                    println!("{}", "Usage: diff <file1> <file2>".yellow());
+                } else {
+                    let _ = cmd_diff(Path::new(parts[1]), Path::new(parts[2]), index_dir, false);
+                }
+            }
+            "stats" => {
+                let _ = cmd_stats(10, index_dir, false);
+            }
+            _ => {
+                // Treat as query
+                let trimmed = line.trim();
+                if !trimmed.is_empty() {
+                    let _ = cmd_query(trimmed, index_dir, &query_options);
+                }
+            }
+        }
+        println!();
+    }
+
+    Ok(())
+}
+
+pub(crate) fn cmd_vocabulary(
+    index_dir: &Path,
+    limit: usize,
+    format: &str,
+    json: bool,
+    options: VocabularyOptions<'_>,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let reverse_index = match load_reverse_index(index_dir) {
+        Ok(index) => index,
+        Err(err) => {
+            if let Some(io_err) = err.downcast_ref::<std::io::Error>() {
+                if io_err.kind() == std::io::ErrorKind::NotFound {
+                    ReverseIndex {
+                        keywords: HashMap::new(),
+                    }
+                } else {
+                    return Err(err);
+                }
+            } else {
+                return Err(err);
+            }
+        }
+    };
+
+    let forward_index = load_forward_index(index_dir).ok();
+    let stopwords_path = options
+        .stopwords
+        .map(|path| path.to_string_lossy().to_string());
+    let mut stopwords =
+        load_vocabulary_stopwords(options.stopwords, !options.no_default_stopwords)?;
+    let auto_common_terms = if options.common_terms > 0 {
+        let candidate_metrics: Vec<VocabularyCandidateTerm> = reverse_index
+            .keywords
+            .iter()
+            .map(|(term, postings)| {
+                let term = term.to_string();
+                VocabularyCandidateTerm {
+                    term: term.clone(),
+                    surface: None,
+                    term_freq: postings.len(),
+                    doc_freq: postings
+                        .iter()
+                        .map(|posting| posting.file.clone())
+                        .collect::<HashSet<_>>()
+                        .len(),
+                    first_file: String::new(),
+                    first_line: usize::MAX,
+                    first_heading: String::new(),
+                }
+            })
+            .collect();
+        let common =
+            build_auto_common_vocabulary_stopwords(&candidate_metrics, options.common_terms);
+        for common_term in &common {
+            stopwords.insert(common_term.clone());
+        }
+        Some(common.len())
+    } else {
+        None
+    };
+
+    let mut candidates: Vec<VocabularyCandidateTerm> = reverse_index
+        .keywords
+        .into_iter()
+        .filter(|(_, postings)| !postings.is_empty())
+        .map(|(term, postings)| {
+            let mut ordered_postings = postings;
+
+            let mut docs = HashSet::new();
+            for posting in &ordered_postings {
+                docs.insert(posting.file.clone());
+            }
+            ordered_postings.sort_by(|a, b| {
+                a.file
+                    .cmp(&b.file)
+                    .then_with(|| {
+                        a.line
+                            .unwrap_or(usize::MAX)
+                            .cmp(&b.line.unwrap_or(usize::MAX))
+                    })
+                    .then_with(|| {
+                        a.heading
+                            .as_deref()
+                            .unwrap_or("")
+                            .cmp(b.heading.as_deref().unwrap_or(""))
+                    })
+            });
+
+            let first = ordered_postings.first().expect("postings non-empty");
+            let first_heading = first.heading.clone().unwrap_or_default();
+
+            VocabularyCandidateTerm {
+                term: term.clone(),
+                surface: resolve_vocabulary_surface(
+                    &term,
+                    &ordered_postings,
+                    forward_index.as_ref(),
+                ),
+                term_freq: ordered_postings.len(),
+                doc_freq: docs.len(),
+                first_file: first.file.clone(),
+                first_line: first.line.unwrap_or(usize::MAX),
+                first_heading,
+            }
+        })
+        .collect();
+
+    candidates.sort_by(|a, b| {
+        b.doc_freq
+            .cmp(&a.doc_freq)
+            .then_with(|| b.term_freq.cmp(&a.term_freq))
+            .then_with(|| a.first_file.cmp(&b.first_file))
+            .then_with(|| a.first_line.cmp(&b.first_line))
+            .then_with(|| a.first_heading.cmp(&b.first_heading))
+            .then_with(|| a.term.cmp(&b.term))
+    });
+
+    let mut terms = Vec::new();
+    for candidate in &candidates {
+        let term = if let Some(surface) = &candidate.surface {
+            surface
+        } else if options.include_stemming {
+            &candidate.term
+        } else {
+            continue;
+        };
+
+        let term_lower = term.to_lowercase();
+        if !is_hygienic_vocabulary_term(term) || stopwords.contains(&term_lower) {
+            continue;
+        }
+
+        terms.push(VocabularyTerm {
+            term: term.clone(),
+            score: candidate.doc_freq as f64,
+            count: candidate.term_freq,
+        });
+    }
+    let (terms, total_candidates) = apply_vocabulary_limit(terms, limit);
+
+    let effective_format = if json { "json" } else { format };
+    let result = VocabularyResult {
+        format: effective_format.to_string(),
+        limit,
+        total: total_candidates,
+        terms,
+        stopwords: stopwords_path,
+        used_default_stopwords: !options.no_default_stopwords,
+        auto_common_terms,
+        include_stemming: options.include_stemming,
+    };
+
+    match effective_format {
+        "lines" => {
+            if !result.terms.is_empty() {
+                println!("{}", render_vocabulary_lines(&result.terms));
+            }
+            Ok(())
+        }
+        "json" => {
+            println!("{}", serde_json::to_string_pretty(&result)?);
+            Ok(())
+        }
+        "prompt" => {
+            println!("{}", render_vocabulary_prompt(&result.terms));
+            Ok(())
+        }
+        _ => Err(format!("Unsupported vocabulary format: {effective_format}").into()),
+    }
+}
+
+pub(crate) fn render_vocabulary_prompt(terms: &[VocabularyTerm]) -> String {
+    let rendered_terms: Vec<String> = terms
+        .iter()
+        .map(|term| normalize_prompt_term(&term.term))
+        .filter(|term| !term.is_empty())
+        .collect();
+
+    rendered_terms.join(", ")
+}
+
+pub(crate) fn render_vocabulary_lines(terms: &[VocabularyTerm]) -> String {
+    terms
+        .iter()
+        .map(|term| term.term.clone())
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+pub(crate) fn apply_vocabulary_limit(
+    mut terms: Vec<VocabularyTerm>,
+    limit: usize,
+) -> (Vec<VocabularyTerm>, usize) {
+    let total = terms.len();
+    if terms.len() > limit {
+        terms.truncate(limit);
+    }
+    (terms, total)
+}
+
+pub(crate) fn build_auto_common_vocabulary_stopwords(
+    candidates: &[VocabularyCandidateTerm],
+    top_n: usize,
+) -> HashSet<String> {
+    if top_n == 0 || candidates.is_empty() {
+        return HashSet::new();
+    }
+
+    let mut candidates = candidates.to_vec();
+    candidates.sort_by(|a, b| {
+        b.term_freq
+            .cmp(&a.term_freq)
+            .then_with(|| b.doc_freq.cmp(&a.doc_freq))
+            .then_with(|| a.term.cmp(&b.term))
+    });
+
+    candidates
+        .into_iter()
+        .filter(|candidate| is_hygienic_vocabulary_term(&candidate.term))
+        .take(top_n)
+        .map(|candidate| candidate.term.to_lowercase())
+        .collect()
+}
+
+pub(crate) fn resolve_vocabulary_surface(
+    stem: &str,
+    postings: &[ReverseEntry],
+    forward_index: Option<&ForwardIndex>,
+) -> Option<String> {
+    #[derive(Debug)]
+    struct SurfaceCandidate {
+        value: String,
+        file: String,
+        line: usize,
+        source_rank: usize,
+        token_idx: usize,
+    }
+
+    let mut ordered_postings = postings.to_vec();
+    ordered_postings.sort_by(|a, b| {
+        a.file
+            .cmp(&b.file)
+            .then_with(|| {
+                a.line
+                    .unwrap_or(usize::MAX)
+                    .cmp(&b.line.unwrap_or(usize::MAX))
+            })
+            .then_with(|| {
+                a.heading
+                    .as_deref()
+                    .unwrap_or("")
+                    .cmp(b.heading.as_deref().unwrap_or(""))
+            })
+    });
+
+    let mut candidates: Vec<SurfaceCandidate> = Vec::new();
+
+    for posting in &ordered_postings {
+        if let Some(heading) = &posting.heading {
+            for (token_idx, token) in extract_keywords(heading).into_iter().enumerate() {
+                if stem_word(&token) == stem {
+                    candidates.push(SurfaceCandidate {
+                        value: token,
+                        file: posting.file.clone(),
+                        line: posting.line.unwrap_or(usize::MAX),
+                        source_rank: 0,
+                        token_idx,
+                    });
+                }
+            }
+        }
+
+        if let Some(forward_index) = forward_index {
+            if let Some(entry) = forward_index.files.get(&posting.file) {
+                for (token_idx, token) in entry
+                    .keywords
+                    .iter()
+                    .chain(entry.body_keywords.iter())
+                    .enumerate()
+                {
+                    if stem_word(&token.to_lowercase()) == stem {
+                        candidates.push(SurfaceCandidate {
+                            value: token.to_lowercase(),
+                            file: posting.file.clone(),
+                            line: posting.line.unwrap_or(usize::MAX),
+                            source_rank: 1,
+                            token_idx,
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    if candidates.is_empty() {
+        return None;
+    }
+
+    candidates.sort_by(|a, b| {
+        a.source_rank
+            .cmp(&b.source_rank)
+            .then_with(|| a.file.cmp(&b.file))
+            .then_with(|| a.line.cmp(&b.line))
+            .then_with(|| a.token_idx.cmp(&b.token_idx))
+            .then_with(|| a.value.cmp(&b.value))
+    });
+
+    candidates.first().map(|candidate| candidate.value.clone())
+}
+
+pub(crate) fn normalize_prompt_term(term: &str) -> String {
+    let no_control: String = term
+        .chars()
+        .filter(|character| !character.is_control())
+        .collect();
+
+    no_control
+        .split_whitespace()
+        .collect::<Vec<_>>()
+        .join(" ")
+        .trim()
+        .to_string()
+}
+
+/// Main assemble command handler
+pub(crate) fn cmd_assemble(
+    query: &str,
+    from_files: &[String],
+    options: &AssembleOptions,
+    index_dir: &Path,
+) -> Result<(), Box<dyn std::error::Error>> {
+    if options.format != "markdown" {
+        return Err("Only markdown format is supported currently".into());
+    }
+
+    let forward_index = load_forward_index(index_dir)?;
+    let selection =
+        match collect_context_selection(query, from_files, &forward_index, options.max_sections) {
+            Ok(selection) => selection,
+            Err(ContextSelectionIssue::NoSearchableTerms) => {
+                println!("# No searchable terms in query. Try different keywords.");
+                return Ok(());
+            }
+            Err(ContextSelectionIssue::MissingFiles(missing)) => {
+                eprintln!(
+                    "{}",
+                    "Some files were not found in the index (they may be missing or excluded):"
+                        .yellow()
+                );
+                for path in missing {
+                    eprintln!("  - {path}");
+                }
+                return Ok(());
+            }
+            Err(ContextSelectionIssue::NoIndexedFilesMatched) => {
+                println!("# No indexed files matched the provided inputs.");
+                return Ok(());
+            }
+            Err(ContextSelectionIssue::NoRelevantSections(label)) => {
+                println!("# No relevant sections found for query: \"{label}\"");
+                return Ok(());
+            }
+        };
+    let query_label = selection.query_label;
+    let query_for_refiner = selection.query_for_refiner;
+    let primary_sections = selection.sections;
+
+    let primary_tokens: usize = primary_sections
+        .iter()
+        .map(|s| estimate_tokens(&s.content))
+        .sum();
+
+    // Phase 2: Cross-reference expansion (if depth > 0)
+    let mut all_sections = primary_sections.clone();
+
+    if options.depth > 0 {
+        // Calculate xref token budget
+        const XREF_TOKEN_FRACTION: f64 = 0.3;
+        const XREF_TOKEN_ABS_MAX: usize = 2000;
+
+        let xref_cap =
+            ((options.max_tokens as f64 * XREF_TOKEN_FRACTION) as usize).min(XREF_TOKEN_ABS_MAX);
+        let remaining_tokens = options.max_tokens.saturating_sub(primary_tokens);
+        let xref_token_budget = remaining_tokens.min(xref_cap);
+
+        let primary_docs: HashSet<String> = primary_sections
+            .iter()
+            .map(|s| s.doc_path.clone())
+            .collect();
+
+        if options.use_relations {
+            // Graph-aware expansion via persisted relation edges
+            let relation_index = load_relation_index(index_dir);
+            if !relation_index.edges.is_empty() && xref_token_budget > 0 {
+                let xref_sections = resolve_crossrefs_from_relations(
+                    &relation_index,
+                    &primary_docs,
+                    &forward_index,
+                    xref_token_budget,
+                );
+                all_sections.extend(xref_sections);
+            }
+        } else {
+            // Legacy on-the-fly cross-reference expansion
+            let adr_index = build_adr_index(&forward_index);
+            let crossrefs = collect_crossrefs(&primary_sections, &adr_index);
+
+            if xref_token_budget > 0 && !crossrefs.is_empty() {
+                let xref_sections =
+                    resolve_crossrefs(&crossrefs, &primary_docs, &forward_index, xref_token_budget);
+                all_sections.extend(xref_sections);
+            }
+        }
+    }
+    let (all_sections, _) = dedupe_section_matches(all_sections);
+
+    // Phase 3: Extractive refinement (increase signal density)
+    let max_tokens_per_section = options.max_tokens / all_sections.len().max(1);
+    let refined_sections =
+        apply_extractive_refiner(all_sections, &query_for_refiner, max_tokens_per_section);
+
+    // If doc_terms requested, prepend a source summary
+    if options.doc_terms > 0 {
+        println!("<!-- Source Documents -->");
+        let query_terms = if query_for_refiner.is_empty() {
+            Vec::new()
+        } else {
+            parse_query_terms(&query_for_refiner, true)
+        };
+        let mut seen_docs: HashSet<String> = HashSet::new();
+
+        for section in &refined_sections {
+            if seen_docs.contains(&section.section.doc_path) {
+                continue;
+            }
+            seen_docs.insert(section.section.doc_path.clone());
+
+            if let Some(entry) = forward_index.files.get(&section.section.doc_path) {
+                let top_terms = get_top_doc_terms(
+                    entry,
+                    &forward_index.idf_map,
+                    &query_terms,
+                    options.doc_terms,
+                );
+                if !top_terms.is_empty() {
+                    println!(
+                        "<!-- {} : {} -->",
+                        section.section.doc_path,
+                        top_terms.join(", ")
+                    );
+                }
+            }
+        }
+        println!();
+    }
+
+    // Phase 4: Distill to markdown
+    let digest_sections: Vec<SectionMatch> = refined_sections
+        .iter()
+        .map(|section| section.section.clone())
+        .collect();
+    let digest = distill_to_markdown(&digest_sections, &query_label, options.max_tokens);
+
+    println!("{digest}");
+
+    Ok(())
+}
+
+/// Evaluation command handler - runs retrieval pipeline against test questions
+pub(crate) fn cmd_eval(
+    questions_path: &Path,
+    index_dir: &Path,
+    json: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    // Load questions from JSONL file
+    let questions_content = fs::read_to_string(questions_path)?;
+    let questions: Vec<Question> = questions_content
+        .lines()
+        .filter(|line| !line.trim().is_empty())
+        .map(serde_json::from_str)
+        .collect::<Result<Vec<_>, _>>()?;
+
+    if questions.is_empty() {
+        if json {
+            println!(
+                r#"{{"questions_file": "{}", "total_questions": 0, "error": "No questions found"}}"#,
+                questions_path.display()
+            );
+        } else {
+            println!("No questions found in {}", questions_path.display());
+        }
+        return Ok(());
+    }
+
+    // Load index once
+    let forward_index = load_forward_index(index_dir)?;
+
+    // Run evaluation for each question
+    let mut results = Vec::new();
+
+    for question in &questions {
+        // Run assemble internally (capture output as string)
+        let primary_sections = search_relevant_sections(&question.q, &forward_index, 20);
+
+        if primary_sections.is_empty() {
+            results.push(EvalResult {
+                id: question.id,
+                question: question.q.clone(),
+                hits: 0,
+                total: question.expect.len(),
+                passed: false,
+                tokens: 0,
+            });
+            continue;
+        }
+
+        let primary_tokens: usize = primary_sections
+            .iter()
+            .map(|s| estimate_tokens(&s.content))
+            .sum();
+
+        // Cross-reference expansion
+        let mut all_sections = primary_sections.clone();
+        let adr_index = build_adr_index(&forward_index);
+        let crossrefs = collect_crossrefs(&primary_sections, &adr_index);
+
+        const XREF_TOKEN_FRACTION: f64 = 0.3;
+        const XREF_TOKEN_ABS_MAX: usize = 2000;
+        let max_tokens: usize = 8000; // Default for eval
+
+        let xref_cap = ((max_tokens as f64 * XREF_TOKEN_FRACTION) as usize).min(XREF_TOKEN_ABS_MAX);
+        let remaining_tokens = max_tokens.saturating_sub(primary_tokens);
+        let xref_token_budget = remaining_tokens.min(xref_cap);
+
+        if xref_token_budget > 0 && !crossrefs.is_empty() {
+            let primary_docs: HashSet<String> = primary_sections
+                .iter()
+                .map(|s| s.doc_path.clone())
+                .collect();
+
+            let xref_sections =
+                resolve_crossrefs(&crossrefs, &primary_docs, &forward_index, xref_token_budget);
+
+            all_sections.extend(xref_sections);
+        }
+
+        // Extractive refinement
+        let max_tokens_per_section = max_tokens / all_sections.len().max(1);
+        let refined_sections =
+            apply_extractive_refiner(all_sections, &question.q, max_tokens_per_section);
+
+        // Distill to markdown
+        let digest_sections: Vec<SectionMatch> = refined_sections
+            .iter()
+            .map(|section| section.section.clone())
+            .collect();
+        let digest = distill_to_markdown(&digest_sections, &question.q, max_tokens);
+
+        // Check coverage of expected substrings
+        let digest_lower = digest.to_lowercase();
+        let hits = question
+            .expect
+            .iter()
+            .filter(|e| digest_lower.contains(&e.to_lowercase()))
+            .count();
+
+        let min_hits = question.min_hits.unwrap_or(question.expect.len());
+        let passed = hits >= min_hits;
+        let tokens = estimate_tokens(&digest);
+
+        results.push(EvalResult {
+            id: question.id,
+            question: question.q.clone(),
+            hits,
+            total: question.expect.len(),
+            passed,
+            tokens,
+        });
+    }
+
+    // Calculate summary
+    let passed_count = results.iter().filter(|r| r.passed).count();
+    let total = results.len();
+    let pass_rate_pct = passed_count as f64 / total as f64 * 100.0;
+
+    if json {
+        let json_results: Vec<EvalQuestionResult> = results
+            .iter()
+            .map(|r| {
+                let expected: Vec<String> = questions
+                    .iter()
+                    .find(|q| q.id == r.id)
+                    .map(|q| q.expect.clone())
+                    .unwrap_or_default();
+                let found: Vec<String> = expected
+                    .iter()
+                    .filter(|e| r.question.to_lowercase().contains(&e.to_lowercase()))
+                    .cloned()
+                    .collect();
+                let missing: Vec<String> = expected
+                    .iter()
+                    .filter(|e| !r.question.to_lowercase().contains(&e.to_lowercase()))
+                    .cloned()
+                    .collect();
+                EvalQuestionResult {
+                    question: r.question.clone(),
+                    passed: r.passed,
+                    expected,
+                    found,
+                    missing,
+                }
+            })
+            .collect();
+
+        let output = EvalJsonResult {
+            questions_file: questions_path.to_string_lossy().to_string(),
+            total_questions: total,
+            passed: passed_count,
+            failed: total - passed_count,
+            pass_rate: pass_rate_pct,
+            results: json_results,
+        };
+        println!("{}", serde_json::to_string_pretty(&output)?);
+        return Ok(());
+    }
+
+    // Print results (human-readable)
+    println!("\n{}", "Evaluation Results".cyan().bold());
+    println!("{}", "=".repeat(60));
+    println!();
+
+    for result in &results {
+        let status = if result.passed {
+            "✓".green().bold()
+        } else {
+            "✗".red().bold()
+        };
+
+        println!("[{}] {}", result.id, result.question.white().bold());
+        println!("  - hits: {}/{} {}", result.hits, result.total, status);
+        println!("  - size: {} tokens", result.tokens);
+        println!();
+    }
+
+    // Print summary
+    println!("{}", "=".repeat(60));
+    println!("{}", "Summary".cyan().bold());
+    println!("  Passed: {passed_count}/{total} ({pass_rate_pct:.0}%)");
+    println!("  Failed: {}/{}", total - passed_count, total);
+    println!();
+
+    if passed_count < total {
+        println!("{}", "Failed Questions:".yellow().bold());
+        for result in &results {
+            if !result.passed {
+                println!(
+                    "  - [{}] {} (hits: {}/{})",
+                    result.id, result.question, result.hits, result.total
+                );
+            }
+        }
+        println!();
+    }
+
+    Ok(())
+}
diff --git a/src/config.rs b/src/config.rs
new file mode 100644
index 0000000..12e1b26
--- /dev/null
+++ b/src/config.rs
@@ -0,0 +1,107 @@
+use colored::Colorize;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use crate::types::*;
+
+pub fn load_config(path: &Path, quiet: bool) -> Option<YoreConfig> {
+    if !path.exists() {
+        return None;
+    }
+
+    let contents = match fs::read_to_string(path) {
+        Ok(c) => c,
+        Err(e) => {
+            if !quiet {
+                eprintln!(
+                    "{}: failed to read config {}: {}",
+                    "warning".yellow(),
+                    path.display(),
+                    e
+                );
+            }
+            return None;
+        }
+    };
+
+    match toml::from_str::<YoreConfig>(&contents) {
+        Ok(cfg) => Some(cfg),
+        Err(e) => {
+            if !quiet {
+                eprintln!(
+                    "{}: failed to parse config {}: {}",
+                    "warning".yellow(),
+                    path.display(),
+                    e
+                );
+            }
+            None
+        }
+    }
+}
+
+pub fn resolve_build_params(
+    path: PathBuf,
+    output: PathBuf,
+    types: String,
+    profile: Option<&str>,
+    config: &Option<YoreConfig>,
+) -> (PathBuf, PathBuf, String, Option<Vec<PathBuf>>) {
+    // Defaults from CLI definition
+    let default_path = PathBuf::from(".");
+    let default_output = PathBuf::from(".yore");
+    let default_types = "md,txt,rst".to_string();
+
+    let mut effective_path = path;
+    let mut effective_output = output;
+    let mut effective_types = types;
+    let mut roots: Option<Vec<PathBuf>> = None;
+
+    if let (Some(profile_name), Some(cfg)) = (profile, config.as_ref()) {
+        if let Some(profile_cfg) = cfg.index.get(profile_name) {
+            // Roots: if present, use them as allowed roots (multi-root support)
+            if !profile_cfg.roots.is_empty() {
+                let rs: Vec<PathBuf> = profile_cfg.roots.iter().map(PathBuf::from).collect();
+                roots = Some(rs);
+                // Use repo root (".") as walk root when using multiple roots
+                effective_path.clone_from(&default_path);
+            }
+
+            // Types: only override when CLI used the default
+            if effective_types == default_types && !profile_cfg.types.is_empty() {
+                effective_types = profile_cfg.types.join(",");
+            }
+
+            // Output: only override when CLI used the default
+            if effective_output == default_output {
+                if let Some(ref out) = profile_cfg.output {
+                    effective_output = PathBuf::from(out);
+                }
+            }
+        }
+    }
+
+    (effective_path, effective_output, effective_types, roots)
+}
+
+pub fn resolve_index_path(
+    index: PathBuf,
+    profile: Option<&str>,
+    config: &Option<YoreConfig>,
+) -> PathBuf {
+    let default_index = PathBuf::from(".yore");
+
+    if index != default_index {
+        return index;
+    }
+
+    if let (Some(profile_name), Some(cfg)) = (profile, config.as_ref()) {
+        if let Some(profile_cfg) = cfg.index.get(profile_name) {
+            if let Some(ref out) = profile_cfg.output {
+                return PathBuf::from(out);
+            }
+        }
+    }
+
+    index
+}
diff --git a/src/index.rs b/src/index.rs
new file mode 100644
index 0000000..49ac016
--- /dev/null
+++ b/src/index.rs
@@ -0,0 +1,639 @@
+use colored::Colorize;
+use ignore::WalkBuilder;
+use regex::Regex;
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+
+use crate::assemble::extract_relations;
+use crate::search::*;
+use crate::types::*;
+use crate::util::*;
+
+#[allow(clippy::too_many_arguments)]
+pub fn cmd_build(
+    path: &Path,
+    output: &Path,
+    types: &str,
+    exclude: &[String],
+    quiet: bool,
+    roots: Option<&[PathBuf]>,
+    json: bool,
+    track_renames: bool,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let start = Instant::now();
+    let source_root = canonicalize_existing_path(&std::env::current_dir()?);
+
+    if !quiet && !json {
+        println!("{} {}", "Indexing".cyan().bold(), path.display());
+    }
+
+    // Parse file types
+    let extensions: HashSet<String> = types.split(',').map(|s| s.trim().to_lowercase()).collect();
+
+    // Build walker with ignore patterns
+    let mut builder = WalkBuilder::new(path);
+    builder.hidden(true).git_ignore(true).git_global(true);
+
+    // Add custom excludes
+    for pattern in exclude {
+        builder.add_ignore(Path::new(pattern));
+    }
+
+    // Collect files
+    let mut forward_index = ForwardIndex {
+        files: HashMap::new(),
+        indexed_at: chrono_now(),
+        version: 4, // Version 4 adds source_root metadata for portable file resolution
+        source_root: source_root.to_string_lossy().to_string(),
+        avg_doc_length: 0.0,
+        idf_map: HashMap::new(),
+    };
+
+    let mut reverse_index = ReverseIndex {
+        keywords: HashMap::new(),
+    };
+    let mut document_metrics_index = DocumentMetricsIndex {
+        indexed_at: chrono_now(),
+        version: 1,
+        files: HashMap::new(),
+    };
+
+    let mut file_count = 0;
+    let mut total_headings = 0;
+    let mut total_links = 0;
+
+    for entry in builder.build().filter_map(std::result::Result::ok) {
+        let path = entry.path();
+
+        // Skip directories
+        if path.is_dir() {
+            continue;
+        }
+
+        // If roots are configured, skip files outside those roots
+        if let Some(root_list) = roots {
+            let mut inside_any_root = false;
+            for root in root_list {
+                if path.starts_with(root) {
+                    inside_any_root = true;
+                    break;
+                }
+            }
+            if !inside_any_root {
+                continue;
+            }
+        }
+
+        // Check extension
+        let ext = path
+            .extension()
+            .and_then(|e| e.to_str())
+            .map(str::to_lowercase)
+            .unwrap_or_default();
+
+        if !extensions.contains(&ext) {
+            continue;
+        }
+
+        // Skip common non-content directories
+        let path_str = path.to_string_lossy();
+        if path_str.contains("node_modules")
+            || path_str.contains(".git/")
+            || path_str.contains("target/")
+            || path_str.contains("vendor/")
+            || path_str.contains("venv/")
+            || path_str.contains("__pycache__")
+        {
+            continue;
+        }
+
+        // Index the file
+        if let Ok((mut entry, mut metrics)) = index_file(path) {
+            let physical_path = canonicalize_existing_path(path);
+            let rel_path = build_indexed_doc_key(&physical_path, &source_root);
+            entry.path = physical_path.to_string_lossy().to_string();
+            metrics.path.clone_from(&rel_path);
+
+            // Update reverse index with heading keywords
+            for keyword in &entry.keywords {
+                let stemmed = stem_word(&keyword.to_lowercase());
+                reverse_index
+                    .keywords
+                    .entry(stemmed)
+                    .or_default()
+                    .push(ReverseEntry {
+                        file: rel_path.clone(),
+                        line: None,
+                        heading: None,
+                        level: None,
+                    });
+            }
+
+            // Update reverse index with body keywords
+            for keyword in &entry.body_keywords {
+                let stemmed = stem_word(&keyword.to_lowercase());
+                reverse_index
+                    .keywords
+                    .entry(stemmed)
+                    .or_default()
+                    .push(ReverseEntry {
+                        file: rel_path.clone(),
+                        line: None,
+                        heading: None,
+                        level: None,
+                    });
+            }
+
+            for heading in &entry.headings {
+                let words = extract_keywords(&heading.text);
+                for word in words {
+                    let stemmed = stem_word(&word.to_lowercase());
+                    reverse_index
+                        .keywords
+                        .entry(stemmed)
+                        .or_default()
+                        .push(ReverseEntry {
+                            file: rel_path.clone(),
+                            line: Some(heading.line),
+                            heading: Some(heading.text.clone()),
+                            level: Some(heading.level),
+                        });
+                }
+            }
+
+            total_headings += entry.headings.len();
+            total_links += entry.links.len();
+            file_count += 1;
+
+            document_metrics_index
+                .files
+                .insert(rel_path.clone(), metrics);
+            forward_index.files.insert(rel_path, entry);
+        }
+    }
+
+    // Compute BM25 statistics (IDF and average document length)
+    let total_docs = forward_index.files.len() as f64;
+    let mut doc_frequencies: HashMap<String, usize> = HashMap::new();
+    let mut total_length = 0;
+
+    // Compute document frequencies
+    for entry in forward_index.files.values() {
+        total_length += entry.doc_length;
+        for term in entry.term_frequencies.keys() {
+            *doc_frequencies.entry(term.clone()).or_insert(0) += 1;
+        }
+    }
+
+    // Compute IDF scores (with floor to handle high-frequency terms)
+    let mut idf_map: HashMap<String, f64> = HashMap::new();
+    for (term, df) in doc_frequencies {
+        // Standard BM25 IDF can go negative when df > 50% of docs.
+        // We floor at a small positive value so common terms still contribute.
+        let idf = ((total_docs - df as f64 + 0.5) / (df as f64 + 0.5))
+            .ln()
+            .max(0.1);
+        idf_map.insert(term, idf);
+    }
+
+    forward_index.avg_doc_length = if total_docs > 0.0 {
+        total_length as f64 / total_docs
+    } else {
+        0.0
+    };
+    forward_index.idf_map = idf_map;
+
+    // Create output directory
+    fs::create_dir_all(output)?;
+
+    // Write indexes
+    let forward_path = output.join("forward_index.json");
+    let reverse_path = output.join("reverse_index.json");
+    let stats_path = output.join("stats.json");
+    let metrics_path = output.join("document_metrics.json");
+
+    fs::write(&forward_path, serde_json::to_string_pretty(&forward_index)?)?;
+    fs::write(&reverse_path, serde_json::to_string_pretty(&reverse_index)?)?;
+    fs::write(
+        &metrics_path,
+        serde_json::to_string_pretty(&document_metrics_index)?,
+    )?;
+
+    let stats = IndexStats {
+        total_files: file_count,
+        total_keywords: reverse_index.keywords.len(),
+        total_headings,
+        total_links,
+        indexed_at: chrono_now(),
+    };
+    fs::write(&stats_path, serde_json::to_string_pretty(&stats)?)?;
+
+    // Extract and persist relation edges
+    let relation_index = extract_relations(&forward_index);
+    let relations_count = relation_index.total_edges;
+    let relations_path = output.join("relations.json");
+    fs::write(
+        &relations_path,
+        serde_json::to_string_pretty(&relation_index)?,
+    )?;
+
+    // Track git renames if requested
+    let renames_count = if track_renames {
+        if !quiet && !json {
+            println!("  Extracting git rename history...");
+        }
+        let rename_history = extract_git_renames(path);
+        let count = rename_history.renames.len();
+        let rename_path = output.join("rename_history.json");
+        fs::write(&rename_path, serde_json::to_string_pretty(&rename_history)?)?;
+        if !quiet && !json {
+            println!("  Tracked {count} file renames");
+        }
+        Some(count)
+    } else {
+        None
+    };
+
+    let elapsed = start.elapsed();
+
+    if json {
+        let result = BuildResult {
+            index_path: output.to_string_lossy().to_string(),
+            files_indexed: file_count,
+            total_headings,
+            total_links,
+            unique_keywords: reverse_index.keywords.len(),
+            duration_ms: elapsed.as_millis(),
+            renames_tracked: renames_count,
+            total_relations: Some(relations_count),
+        };
+        println!("{}", serde_json::to_string_pretty(&result)?);
+    } else if !quiet {
+        println!();
+        println!("{}", "Index Statistics".green().bold());
+        println!("  Files indexed:    {}", file_count.to_string().cyan());
+        println!(
+            "  Unique keywords:  {}",
+            reverse_index.keywords.len().to_string().cyan()
+        );
+        println!("  Total headings:   {}", total_headings.to_string().cyan());
+        println!("  Total links:      {}", total_links.to_string().cyan());
+        println!("  Relations:        {}", relations_count.to_string().cyan());
+        println!("  Time elapsed:     {elapsed:.2?}");
+        println!();
+        println!(
+            "{} {}",
+            "Indexes written to".green(),
+            output.display().to_string().cyan()
+        );
+    }
+
+    Ok(())
+}
+
+pub fn index_file(path: &Path) -> Result<(FileEntry, DocumentMetrics), Box<dyn std::error::Error>> {
+    let content = fs::read_to_string(path)?;
+    let metadata = fs::metadata(path)?;
+
+    let lines: Vec<&str> = content.lines().collect();
+    let line_count = lines.len();
+
+    // Extract headings (markdown)
+    let heading_re = Regex::new(r"^(#{1,6})\s+(.+)$")?;
+    let mut headings = Vec::new();
+
+    for (i, line) in lines.iter().enumerate() {
+        if let Some(caps) = heading_re.captures(line) {
+            headings.push(Heading {
+                line: i + 1,
+                level: caps.get(1).map_or(1, |m| m.as_str().len()),
+                text: caps
+                    .get(2)
+                    .map(|m| m.as_str().to_string())
+                    .unwrap_or_default(),
+            });
+        }
+    }
+
+    // Extract links
+    let link_re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)")?;
+    let mut links = Vec::new();
+
+    for (i, line) in lines.iter().enumerate() {
+        for caps in link_re.captures_iter(line) {
+            links.push(Link {
+                line: i + 1,
+                text: caps
+                    .get(1)
+                    .map(|m| m.as_str().to_string())
+                    .unwrap_or_default(),
+                target: caps
+                    .get(2)
+                    .map(|m| m.as_str().to_string())
+                    .unwrap_or_default(),
+            });
+        }
+    }
+
+    // Extract keywords from headings
+    let mut keywords: HashSet<String> = HashSet::new();
+    for heading in &headings {
+        for kw in extract_keywords(&heading.text) {
+            keywords.insert(stem_word(&kw));
+        }
+    }
+
+    // NEW: Extract keywords from full body text
+    let mut body_keywords: HashSet<String> = HashSet::new();
+    for line in &lines {
+        // Skip code blocks
+        if line.starts_with("```") || line.starts_with("    ") {
+            continue;
+        }
+        for kw in extract_keywords(line) {
+            body_keywords.insert(stem_word(&kw));
+        }
+    }
+    // Remove heading keywords from body to avoid duplication
+    for kw in &keywords {
+        body_keywords.remove(kw);
+    }
+
+    // NEW: Compute term frequencies for BM25
+    let mut term_frequencies: HashMap<String, usize> = HashMap::new();
+    let mut total_terms = 0;
+
+    for line in &lines {
+        // Skip code blocks
+        if line.starts_with("```") || line.starts_with("    ") {
+            continue;
+        }
+        let words = extract_keywords(line);
+        for word in words {
+            let stemmed = stem_word(&word);
+            *term_frequencies.entry(stemmed).or_insert(0) += 1;
+            total_terms += 1;
+        }
+    }
+
+    // NEW: Compute MinHash signature
+    let all_keywords: Vec<String> = keywords
+        .iter()
+        .chain(body_keywords.iter())
+        .cloned()
+        .collect();
+    let minhash = compute_minhash(&all_keywords, 128);
+
+    // NEW: Compute section-level SimHash fingerprints
+    let section_fingerprints = index_sections(&content, &headings);
+    let metrics =
+        compute_document_metrics(&path.to_string_lossy(), &content, &lines, &headings, &links);
+
+    // Compute simhash fingerprint
+    let simhash = compute_simhash(&content);
+
+    // Extract ADR references from content
+    let adr_regex = Regex::new(r"\bADR[-_ ]?(\d{2,4})\b").unwrap();
+    let mut adr_references = Vec::new();
+    for (i, line) in lines.iter().enumerate() {
+        for caps in adr_regex.captures_iter(line) {
+            if let Some(num_match) = caps.get(1) {
+                let num_val: usize = num_match.as_str().parse().unwrap_or(0);
+                adr_references.push(AdrRef {
+                    line: i + 1,
+                    raw_text: caps.get(0).unwrap().as_str().to_string(),
+                    normalized_id: format!("{num_val:03}"),
+                });
+            }
+        }
+    }
+
+    Ok((
+        FileEntry {
+            path: path.to_string_lossy().to_string(),
+            size_bytes: metadata.len(),
+            line_count,
+            headings,
+            keywords: keywords.into_iter().collect(),
+            body_keywords: body_keywords.into_iter().collect(),
+            links,
+            simhash,
+            term_frequencies,
+            doc_length: total_terms,
+            minhash,
+            section_fingerprints,
+            adr_references,
+        },
+        metrics,
+    ))
+}
+
+pub fn compute_document_metrics(
+    path: &str,
+    content: &str,
+    lines: &[&str],
+    headings: &[Heading],
+    links: &[Link],
+) -> DocumentMetrics {
+    let word_re = Regex::new(r"[A-Za-z0-9_][A-Za-z0-9_-]*").unwrap();
+    let list_re = Regex::new(r"^(\s*[-+*]\s+|\s*\d+\.\s+)").unwrap();
+    let metadata_re =
+        Regex::new(r"^(?:\*\*[^*]+\*\*|[A-Za-z][A-Za-z0-9 _/\-]{1,40}):\s+\S").unwrap();
+
+    let mut h1_count = 0;
+    let mut h2_count = 0;
+    let mut h3_count = 0;
+    let mut h4_plus_count = 0;
+    let mut part_heading_count = 0;
+    let mut completion_heading_count = 0;
+    let mut changelog_heading_count = 0;
+
+    for heading in headings {
+        match heading.level {
+            1 => h1_count += 1,
+            2 => h2_count += 1,
+            3 => h3_count += 1,
+            _ => h4_plus_count += 1,
+        }
+        if heading_looks_like_part(&heading.text) {
+            part_heading_count += 1;
+        }
+        if heading_has_completion_marker(&heading.text) {
+            completion_heading_count += 1;
+        }
+        if heading_looks_like_changelog(&heading.text) {
+            changelog_heading_count += 1;
+        }
+    }
+
+    let code_block_count = count_code_blocks(lines);
+    let list_item_count = lines
+        .iter()
+        .filter(|line| list_re.is_match(line.trim_end()))
+        .count();
+    let table_row_count = lines
+        .iter()
+        .filter(|line| {
+            let trimmed = line.trim();
+            trimmed.matches('|').count() >= 2
+        })
+        .count();
+    let word_count = word_re.find_iter(content).count();
+    let (frontmatter_key_count, metadata_scan_start) = extract_frontmatter_key_count(lines);
+    let metadata_line_count = lines
+        .iter()
+        .enumerate()
+        .skip(metadata_scan_start)
+        .take_while(|(_, line)| {
+            let trimmed = line.trim();
+            !trimmed.is_empty() && !trimmed.starts_with('#')
+        })
+        .filter(|(_, line)| metadata_re.is_match(line.trim()))
+        .count();
+
+    let sections = compute_section_metrics(lines, headings, links);
+    let longest_section_lines = sections
+        .iter()
+        .map(|section| section.line_count)
+        .max()
+        .unwrap_or(0);
+    let changelog_entry_count = sections
+        .iter()
+        .filter(|section| section.looks_like_changelog)
+        .map(|section| section.list_item_count)
+        .sum();
+
+    DocumentMetrics {
+        path: path.to_string(),
+        line_count: lines.len(),
+        word_count,
+        heading_count: headings.len(),
+        section_count: sections.len(),
+        link_count: links.len(),
+        h1_count,
+        h2_count,
+        h3_count,
+        h4_plus_count,
+        code_block_count,
+        list_item_count,
+        table_row_count,
+        frontmatter_key_count,
+        metadata_line_count,
+        part_heading_count,
+        completion_heading_count,
+        changelog_heading_count,
+        changelog_entry_count,
+        longest_section_lines,
+        sections,
+    }
+}
+
+pub fn extract_frontmatter_key_count(lines: &[&str]) -> (usize, usize) {
+    if lines.first().map(|line| line.trim()) != Some("---") {
+        return (0, 0);
+    }
+
+    let mut key_count = 0;
+    for (idx, line) in lines.iter().enumerate().skip(1) {
+        let trimmed = line.trim();
+        if trimmed == "---" {
+            return (key_count, idx + 1);
+        }
+        if trimmed.is_empty() || trimmed.starts_with('#') {
+            continue;
+        }
+        if trimmed.contains(':') {
+            key_count += 1;
+        }
+    }
+
+    (0, 0)
+}
+
+pub fn heading_looks_like_part(text: &str) -> bool {
+    let trimmed = text.trim().to_ascii_lowercase();
+    trimmed.starts_with("part ")
+        && trimmed
+            .split_whitespace()
+            .nth(1)
+            .is_some_and(|token| token.chars().next().is_some_and(|ch| ch.is_ascii_digit()))
+}
+
+pub fn heading_has_completion_marker(text: &str) -> bool {
+    let lowered = text.to_ascii_lowercase();
+    lowered.contains("done")
+        || lowered.contains("complete")
+        || lowered.contains("completed")
+        || lowered.contains("resolved")
+}
+
+pub fn heading_looks_like_changelog(text: &str) -> bool {
+    let lowered = text.to_ascii_lowercase();
+    lowered.contains("changelog")
+        || lowered.contains("release notes")
+        || lowered == "changes"
+        || lowered.ends_with(" changes")
+        || lowered.ends_with(" history")
+}
+
+pub fn count_code_blocks(lines: &[&str]) -> usize {
+    let mut count = 0;
+    let mut in_block = false;
+
+    for line in lines {
+        if line.trim_start().starts_with("```") {
+            if !in_block {
+                count += 1;
+            }
+            in_block = !in_block;
+        }
+    }
+
+    count
+}
+
+pub fn compute_section_metrics(
+    lines: &[&str],
+    headings: &[Heading],
+    links: &[Link],
+) -> Vec<SectionMetrics> {
+    let word_re = Regex::new(r"[A-Za-z0-9_][A-Za-z0-9_-]*").unwrap();
+    let list_re = Regex::new(r"^(\s*[-+*]\s+|\s*\d+\.\s+)").unwrap();
+    let mut sections = Vec::new();
+
+    for idx in 0..headings.len() {
+        let start = headings[idx].line.saturating_sub(1);
+        let end = headings
+            .get(idx + 1)
+            .map_or(lines.len(), |heading| heading.line.saturating_sub(1));
+        let section_lines = &lines[start..end];
+        let section_text = section_lines.join("\n");
+        let line_start = start + 1;
+        let line_end = end;
+
+        sections.push(SectionMetrics {
+            heading: headings[idx].text.clone(),
+            level: headings[idx].level,
+            line_start,
+            line_end,
+            line_count: end.saturating_sub(start),
+            word_count: word_re.find_iter(&section_text).count(),
+            link_count: links
+                .iter()
+                .filter(|link| link.line >= line_start && link.line <= line_end)
+                .count(),
+            list_item_count: section_lines
+                .iter()
+                .filter(|line| list_re.is_match(line.trim_end()))
+                .count(),
+            code_block_count: count_code_blocks(section_lines),
+            has_completion_marker: heading_has_completion_marker(&headings[idx].text),
+            looks_like_part: heading_looks_like_part(&headings[idx].text),
+            looks_like_changelog: heading_looks_like_changelog(&headings[idx].text),
+        });
+    }
+
+    sections
+}
diff --git a/src/main.rs b/src/main.rs
index a221f91..1d90529 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -36,13206 +36,460 @@
     clippy::ref_option
 )]
 
-use ahash::AHasher;
-use clap::{Parser, Subcommand};
+mod assemble;
+mod cli;
+mod commands_audit;
+mod commands_graph;
+mod commands_links;
+mod commands_query;
+mod commands_text;
+mod config;
+mod index;
+mod mcp;
+mod search;
+mod types;
+mod util;
+use cli::*;
+use commands_audit::*;
+use commands_graph::*;
+use commands_links::*;
+use commands_query::*;
+use commands_text::*;
+use config::*;
+use index::*;
+use mcp::*;
+pub use types::*;
+
+use clap::Parser;
 use colored::Colorize;
+use std::path::PathBuf;
+
+// Re-exports consumed by `mod tests { use super::*; }`.
+#[allow(unused_imports)]
+use assemble::*;
+#[allow(unused_imports)]
 use globset::Glob;
-use ignore::WalkBuilder;
+#[allow(unused_imports)]
 use regex::Regex;
-use serde::{Deserialize, Serialize};
+#[allow(unused_imports)]
+use search::*;
+#[allow(unused_imports)]
+use serde::Serialize;
+#[allow(unused_imports)]
 use std::collections::{HashMap, HashSet};
+#[allow(unused_imports)]
 use std::fs;
-use std::hash::{Hash, Hasher};
-use std::io::{self, BufRead, Write};
-use std::path::{Path, PathBuf};
+#[allow(unused_imports)]
+use std::io::{self, Write};
+#[allow(unused_imports)]
+use std::path::Path;
+#[allow(unused_imports)]
 use std::time::Instant;
+#[allow(unused_imports)]
+use util::*;
 
-/// yore – Deterministic documentation indexer and context assembly engine.
-///
-/// Yore indexes markdown and text documentation, computes BM25 statistics,
-/// section fingerprints, link graphs, and canonicality scores, and then
-/// assembles minimal, high‑signal context for large language models (LLMs)
-/// and automation agents.
-///
-/// Typical workflow:
-///   1. Build an index over your docs with `yore build`.
-///   2. Inspect and clean the docs with `query`, `dupes*`, `check-links`,
-///      `backlinks`, `orphans`, `canonicality`, and `canonical-orphans`.
-///   3. Assemble an answer‑ready context for an LLM with `yore assemble`.
-///
-/// All commands are deterministic and operate over the on‑disk index in
-/// `--index` (default: `.yore`).
-#[derive(Parser)]
-#[command(
-    name = "yore",
-    author,
-    version,
-    about = "Fast, deterministic documentation indexer and LLM context assembler",
-    long_about = r#"yore is a deterministic documentation indexer and context
-assembly engine for large language models (LLMs) and automation agents.
-
-It walks a documentation tree, builds on-disk forward and reverse indexes
-(BM25 term statistics, section fingerprints, link graphs, canonicality scores),
-and then assembles minimal, high-signal context for a given question.
-
-Typical workflow:
-  1. Build an index over your docs with `yore build`.
-  2. Inspect and clean the docs using `query`, `dupes*`, `check-links`,
-     `backlinks`, `orphans`, `canonicality`, and `canonical-orphans`.
-  3. Assemble an answer-ready context for an LLM with `yore assemble`.
-
-All commands operate deterministically over the on-disk index in `--index`
-(default: `.yore`)."#,
-    after_long_help = r#"EXAMPLES
-
-  Build an index over docs/ and write it to .yore:
-    yore build docs --output .yore --types md,txt
-
-  Search the index for a free-text query:
-    yore query kubernetes deployment --index .yore --limit 5
-
-  Assemble context for an LLM question:
-    yore assemble "How does authentication work?" \
-      --index .yore --max-tokens 8000 --depth 1 > context.md
-
-  Evaluate retrieval quality against a questions file:
-    yore eval --questions questions.jsonl --index .yore
-
-  Inspect structure and documentation quality:
-    yore dupes --index .yore
-    yore dupes-sections --index .yore --threshold 0.7
-    yore check-links --index .yore --json
-    yore backlinks docs/architecture/DEPLOYMENT-GUIDE.md --index .yore
-    yore orphans --index .yore --exclude README
-    yore canonicality --index .yore --threshold 0.7
-    yore canonical-orphans --index .yore --threshold 0.7
-
-OUTPUT FORMATS
-
-  Most inspection commands support --json for structured output suitable for
-  CI pipelines and automation agents. Commands with JSON support:
-
-    build, eval, query, similar, dupes, dupes-sections, check, check-links,
-    fix-links, backlinks, orphans, canonicality, canonical-orphans, stale,
-    vocabulary, suggest-consolidation, policy, diff, stats, mv, fix-references
-
-  Example: yore check-links --index .yore --json | jq '.broken[]'"#
-)]
-struct Cli {
-    #[command(subcommand)]
-    command: Commands,
-
-    /// Config file path
-    #[arg(short, long, global = true, default_value = ".yore.toml")]
-    config: PathBuf,
-
-    /// Profile name to load from config (limits which roots are indexed; use a full-root profile for whole-repo review)
-    #[arg(long, global = true)]
-    profile: Option<String>,
-
-    /// Quiet mode - suppress non-essential output
-    #[arg(short, long, global = true)]
-    quiet: bool,
-}
-
-#[derive(Subcommand)]
-enum Commands {
-    /// Run one or more documentation checks in a single entrypoint.
-    ///
-    /// This is the recommended command for CI and agents. It can run
-    /// link checks, duplicate detection, taxonomy/policy rules, and
-    /// staleness checks, and it supports CI-friendly exit codes.
-    ///
-    /// Examples:
-    ///   # Basic link check (default index)
-    ///   yore check --links
-    ///
-    ///   # CI mode: fail on missing docs or code
-    ///   yore check --links --ci --fail-on doc_missing,code_missing
-    ///
-    ///   # Run links + staleness + taxonomy in one shot
-    ///   yore check --links --stale --taxonomy --policy taxonomy.yaml
-    ///
-    /// Run multiple checks in one pass (links, policy, stale).
-    ///
-    /// Designed for CI and automation; always emits JSON output.
-    ///
-    /// Limitations:
-    ///   - `--dupes` is accepted but not currently executed.
-    ///
-    /// Related:
-    ///   - `yore check-links`, `yore policy`, `yore stale`
-    ///
-    /// Example:
-    ///   yore check --links --taxonomy --policy .yore-policy.yaml --index .yore --ci
-    Check {
-        /// Run link validation (same engine as `check-links`)
-        #[arg(long)]
-        links: bool,
-
-        /// Run duplicate detection (same engine as `dupes`)
-        #[arg(long)]
-        dupes: bool,
-
-        /// Run taxonomy / policy checks from a YAML file
-        #[arg(long)]
-        taxonomy: bool,
-
-        /// Run staleness checks based on mtime and inbound links
-        #[arg(long)]
-        stale: bool,
-
-        /// CI mode: machine-friendly output and exit codes
-        #[arg(long)]
-        ci: bool,
-
-        /// Kinds/check IDs that should cause a non-zero exit code (comma-separated; repeat flag to pass multiple)
-        #[arg(long, value_delimiter = ',')]
-        fail_on: Vec<String>,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Policy file for taxonomy checks (YAML)
-        #[arg(long)]
-        policy: Option<PathBuf>,
-
-        /// Staleness threshold in days (files older than this are candidates)
-        #[arg(long, default_value = "30")]
-        stale_days: u64,
-    },
-    /// Detect structural document-health issues from build-time metrics.
-    ///
-    /// Uses persisted document and section metrics emitted by `yore build`
-    /// to flag oversized docs, accumulator-style section growth, stale
-    /// completed sections, and changelog sprawl.
-    ///
-    /// Examples:
-    ///   yore health docs/plan.md --index .yore
-    ///   yore health --all --index .yore --json
-    Health {
-        /// Specific file to inspect
-        file: Option<PathBuf>,
-
-        /// Evaluate every indexed document with persisted metrics
-        #[arg(long)]
-        all: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Maximum lines before a file is flagged as bloated
-        #[arg(long, default_value = "500")]
-        max_lines: usize,
-
-        /// Maximum count of "Part N" headings before accumulator risk is flagged
-        #[arg(long, default_value = "8")]
-        max_part_sections: usize,
-
-        /// Maximum retained lines across completion-marked sections
-        #[arg(long, default_value = "50")]
-        max_completed_lines: usize,
-
-        /// Maximum changelog list items before changelog bloat is flagged
-        #[arg(long, default_value = "15")]
-        max_changelog_entries: usize,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-    /// Build forward and reverse indexes over documentation.
-    ///
-    /// Walks a directory tree, indexes Markdown/text files, and writes
-    /// forward and reverse indexes into `--output` (default: `.yore`).
-    ///
-    /// Agents typically run this once at startup or as part of CI, then
-    /// call other commands (`query`, `assemble`, `dupes*`, etc.) against
-    /// the resulting index.
-    ///
-    /// Limitations:
-    ///   - Only indexes the extensions listed in `--types`.
-    ///   - Ignores binary files and content outside the selected roots.
-    ///   - `--track-renames` requires a git repo with history.
-    ///
-    /// Related:
-    ///   - `yore stats`, `yore query`, `yore assemble`
-    ///
-    /// Examples:
-    ///   yore build docs --output .yore --types md,txt --json
-    ///   yore build . --output .yore --exclude node_modules --exclude target
-    Build {
-        /// Path to index
-        #[arg(default_value = ".")]
-        path: PathBuf,
-
-        /// Output directory for indexes
-        #[arg(short, long, default_value = ".yore")]
-        output: PathBuf,
-
-        /// File extensions to index (comma-separated)
-        #[arg(short, long, default_value = "md,txt,rst")]
-        types: String,
-
-        /// Patterns to exclude (can be repeated)
-        #[arg(short, long)]
-        exclude: Vec<String>,
-
-        /// Output as JSON (query results include the original query text)
-        #[arg(long)]
-        json: bool,
-
-        /// Track file renames using git history
-        #[arg(long)]
-        track_renames: bool,
-    },
-
-    /// Search the index for relevant documents using BM25.
-    ///
-    /// Accepts free-text terms, ranks documents with BM25 using the
-    /// precomputed index, and optionally returns machine-readable JSON.
-    ///
-    /// Useful for quick inspection by humans and for agents that want to
-    /// select candidate files before assembling full context.
-    ///
-    /// Limitations:
-    ///   - Only searches indexed files; run `yore build` first.
-    ///   - Ranking is term-based, not semantic.
-    ///
-    /// Related:
-    ///   - `yore assemble`, `yore similar`, `yore stats`
-    ///
-    /// Examples:
-    ///   yore query kubernetes deployment --index .yore --limit 5
-    ///   yore query --query '"async migration"' --phrase --index .yore --files-only
-    Query {
-        /// Search terms
-        terms: Vec<String>,
-
-        /// Raw query string (avoids shell-quoting pitfalls; overrides positional terms)
-        #[arg(long)]
-        query: Option<String>,
-
-        /// Maximum results to show
-        #[arg(short = 'n', long, default_value = "10")]
-        limit: usize,
-
-        /// Show only file paths
-        #[arg(short = 'l', long)]
-        files_only: bool,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Show top N distinctive terms per result (0 = disabled)
-        #[arg(long, default_value = "0")]
-        doc_terms: usize,
-
-        /// Show query diagnostics and scoring details (JSON output wraps query + results + diagnostics)
-        #[arg(long)]
-        explain: bool,
-
-        /// Do not filter stopwords from the query
-        #[arg(long)]
-        no_stopwords: bool,
-
-        /// Require exact adjacency matches for quoted segments (use --query to include quotes)
-        #[arg(long)]
-        phrase: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Find documents similar to a reference file.
-    ///
-    /// Uses both keyword overlap and SimHash fingerprints to identify
-    /// documents that are textually similar to the given file.
-    ///
-    /// Useful for de-duplicating design docs, spotting outdated copies,
-    /// or finding related ADRs and guides.
-    ///
-    /// Limitations:
-    ///   - The reference file must be in the index.
-    ///   - Similarity is heuristic, not semantic.
-    ///
-    /// Related:
-    ///   - `yore dupes`, `yore diff`, `yore query`
-    ///
-    /// Examples:
-    ///   yore similar docs/adr/ADR-0013-retries.md --index .yore --limit 5
-    ///   yore similar docs/architecture/AUTH.md --threshold 0.4 --json
-    Similar {
-        /// Reference file
-        file: PathBuf,
-
-        /// Maximum results to show
-        #[arg(short = 'n', long, default_value = "5")]
-        limit: usize,
-
-        /// Similarity threshold (0.0 to 1.0)
-        #[arg(short, long, default_value = "0.3")]
-        threshold: f64,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Show top N distinctive terms per result (0 = disabled)
-        #[arg(long, default_value = "0")]
-        doc_terms: usize,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Find duplicate or heavily overlapping documents.
-    ///
-    /// Groups or lists documents that share a large fraction of content,
-    /// based on MinHash and SimHash signatures stored in the index.
-    ///
-    /// Useful for documentation cleanup and for agents choosing which
-    /// version of a document to treat as canonical.
-    ///
-    /// Limitations:
-    ///   - Similarity is heuristic and may miss paraphrases.
-    ///   - Tune `--threshold` for larger or smaller corpora.
-    ///
-    /// Related:
-    ///   - `yore dupes-sections`, `yore diff`, `yore suggest-consolidation`
-    ///
-    /// Examples:
-    ///   yore dupes --index .yore --threshold 0.35 --group
-    ///   yore dupes --index .yore --threshold 0.5 --json
-    Dupes {
-        /// Similarity threshold (0.0 to 1.0)
-        #[arg(short, long, default_value = "0.35")]
-        threshold: f64,
-
-        /// Group duplicates together
-        #[arg(long)]
-        group: bool,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Find duplicate sections across documents.
-    ///
-    /// Identifies individual sections (for example headings and their
-    /// bodies) that appear in multiple files, even when the files are
-    /// otherwise different.
-    ///
-    /// Helpful for detecting repeated how-to blocks, copy-pasted API
-    /// descriptions, or repeated ADR fragments.
-    ///
-    /// Limitations:
-    ///   - Section similarity uses SimHash; reworded sections may be missed.
-    ///   - Smaller sections may require a lower `--threshold`.
-    ///
-    /// Related:
-    ///   - `yore dupes`, `yore diff`, `yore suggest-consolidation`
-    ///
-    /// Examples:
-    ///   yore dupes-sections --index .yore --threshold 0.7 --min-files 2
-    ///   yore dupes-sections --index .yore --threshold 0.85 --min-files 5 --json
-    DupesSections {
-        /// Similarity threshold (0.0 to 1.0)
-        #[arg(short, long, default_value = "0.7")]
-        threshold: f64,
-
-        /// Minimum number of files sharing a section
-        #[arg(short = 'n', long, default_value = "2")]
-        min_files: usize,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Show overlapping content and shared sections between two files.
-    ///
-    /// Compares two files using the index and reports what content they
-    /// share, helping you understand drift or duplication between them.
-    ///
-    /// Limitations:
-    ///   - Not a line-by-line diff; uses indexed keywords/headings.
-    ///   - Both files must be indexed.
-    ///
-    /// Related:
-    ///   - `yore dupes`, `yore dupes-sections`, `yore similar`
-    ///
-    /// Examples:
-    ///   yore diff docs/old.md docs/new.md --index .yore --json
-    ///   yore diff docs/plan.md docs/status.md --index .yore
-    Diff {
-        /// First file
-        file1: PathBuf,
-
-        /// Second file
-        file2: PathBuf,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Show high-level index statistics.
-    ///
-    /// Prints counts of files, headings, links, and top keywords, which
-    /// is useful for sanity-checking an index and monitoring drift over time.
-    ///
-    /// Limitations:
-    ///   - Reports only what is in the index, not the live filesystem.
-    ///
-    /// Related:
-    ///   - `yore build`, `yore query`
-    ///
-    /// Examples:
-    ///   yore stats --index .yore --top-keywords 20 --json
-    ///   yore stats --index docs/.index --top-keywords 50
-    Stats {
-        /// Show top N keywords
-        #[arg(long, default_value = "20")]
-        top_keywords: usize,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Interactive query REPL over the index.
-    ///
-    /// Starts a simple read-eval-print loop where you can type queries
-    /// and inspect results quickly while iterating on documentation.
-    ///
-    /// Limitations:
-    ///   - No persistence or scripting; use `yore query` for batch runs.
-    ///
-    /// Related:
-    ///   - `yore query`, `yore stats`
-    ///
-    /// Examples:
-    ///   yore repl --index .yore
-    Repl {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Assemble a high-signal context digest for LLM consumption.
-    ///
-    /// Runs the full retrieval pipeline: BM25 ranking, section selection,
-    /// link and ADR expansion, extractive refinement, and token-budgeted
-    /// trimming to produce a markdown context for a natural language query.
-    ///
-    /// This is the primary entry point for agents and tools that want a
-    /// deterministic, reproducible context to send to an LLM.
-    ///
-    /// Limitations:
-    ///   - Uses indexed content only; run `yore build` first.
-    ///   - Cross-reference expansion follows internal links only.
-    ///
-    /// Related:
-    ///   - `yore query`, `yore eval`, `yore build`
-    ///
-    /// Examples:
-    ///   yore assemble "How does authentication work?" \
-    ///     --index .yore --max-tokens 8000 --depth 1 > context.md
-    ///   yore assemble "async migration status" --index .yore --max-sections 10
-    ///   yore assemble --from-files docs/adr/ADR-0010.md docs/adr/ADR-0011.md --index .yore
-    Assemble {
-        /// Natural language query/question (required unless --from-files is used)
-        #[arg(required_unless_present = "from_files")]
-        query: Vec<String>,
-
-        /// Maximum tokens in output (approximate)
-        #[arg(short = 't', long, default_value = "8000")]
-        max_tokens: usize,
-
-        /// Maximum sections to include
-        #[arg(short = 's', long, default_value = "20")]
-        max_sections: usize,
-
-        /// Cross-reference expansion depth
-        #[arg(short = 'd', long, default_value = "1")]
-        depth: usize,
-
-        /// Output format
-        #[arg(short = 'f', long, default_value = "markdown")]
-        format: String,
-
-        /// Show top N distinctive terms per source document (0 = disabled)
-        #[arg(long, default_value = "0")]
-        doc_terms: usize,
-
-        /// Assemble context from explicit files (supports @list.txt)
-        #[arg(long, value_name = "PATH", num_args = 1..)]
-        from_files: Vec<String>,
-
-        /// Use persisted relation graph for cross-reference expansion
-        #[arg(long)]
-        use_relations: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Experimental MCP-oriented context tools with bounded preview/fetch contracts.
-    ///
-    /// This surface is JSON-first and intentionally narrow: search/preview
-    /// returns compact snippets plus opaque handles, and fetch returns more
-    /// detail only when explicitly asked.
-    ///
-    /// Related:
-    ///   - `yore query`, `yore assemble`
-    ///
-    /// Examples:
-    ///   yore mcp search-context "authentication flow" --index .yore
-    ///   yore mcp fetch-context ctx_1234abcd --index .yore
-    Mcp {
-        #[command(subcommand)]
-        command: McpCommands,
-    },
-
-    /// Evaluate the retrieval pipeline against test questions.
-    ///
-    /// Given a JSONL questions file with expected substrings, runs the
-    /// same retrieval/assembly pipeline used by `assemble` and reports
-    /// whether each question's expected answers were retrieved.
-    ///
-    /// Useful for regression testing and measuring improvements to docs
-    /// or index configuration.
-    ///
-    /// Limitations:
-    ///   - Uses substring matching; does not grade semantic answers.
-    ///   - False positives/negatives are possible; tune expectations.
-    ///
-    /// Related:
-    ///   - `yore assemble`, `yore query`
-    ///
-    /// Examples:
-    ///   yore eval --questions questions.jsonl --index .yore --json
-    ///   yore eval --questions questions.jsonl --index .yore
-    Eval {
-        /// Path to questions JSONL file
-        #[arg(long, default_value = "questions.jsonl")]
-        questions: PathBuf,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Derive a deterministic vocabulary list from a built index.
-    ///
-    /// Use this command when you want a compact candidate vocabulary for
-    /// prompt engineering, glossary generation, or documentation normalization.
-    ///
-    /// Output formats:
-    ///   - `lines` (default): one term per line for easy filtering scripts
-    ///   - `json`: structured payload with `term`, `score`, and `count`
-    ///   - `prompt`: comma-separated terms for LLM initial prompts
-    ///
-    /// Usage guidance:
-    ///   1. Build an index: `yore build <path> --output .yore`
-    ///   2. Generate vocabulary candidates:
-    ///      - `yore vocabulary --index .yore --limit 200 --format lines`
-    ///      - `yore vocabulary --index .yore --format json --limit 50`
-    ///      - `yore vocabulary --index .yore --format prompt --limit 150`
-    ///   3. Optionally remove common words:
-    ///      - `yore vocabulary --index .yore --stopwords my.stopwords`
-    ///      - `yore vocabulary --index .yore --format json --json`
-    ///      - `yore vocabulary --index .yore --common-terms 20`
-    ///      - `yore vocabulary --index .yore --no-default-stopwords --common-terms 40`
-    ///      - `yore vocabulary --index .yore --no-default-stopwords --stopwords my.stopwords`
-    ///
-    /// Limitations:
-    ///   - Ranking is deterministic but may still evolve as stop-word defaults
-    ///     or indexing heuristics are tuned.
-    ///   - `--common-terms` derives a corpus-frequency stoplist and may remove
-    ///     domain terms in very small projects.
-    Vocabulary {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Maximum number of terms to return
-        #[arg(short = 'n', long, default_value = "100")]
-        limit: usize,
-
-        /// Output format: lines, json, or prompt
-        #[arg(long, default_value = "lines")]
-        format: String,
-
-        /// Alias for `--format json`
-        #[arg(long)]
-        json: bool,
-
-        /// Path to an additional stop-word list (optional; one word per line)
-        #[arg(long)]
-        stopwords: Option<PathBuf>,
-
-        /// Keep stem-only terms when no non-stem surface form is available
-        #[arg(long)]
-        include_stemming: bool,
-
-        /// Keep built-in stopword filtering enabled (set false with --no-default-stopwords)
-        #[arg(long)]
-        no_default_stopwords: bool,
-
-        /// Exclude the top N corpus-common terms before applying other filters
-        #[arg(long, default_value = "0")]
-        common_terms: usize,
-    },
-
-    /// Check all markdown links for validity.
-    ///
-    /// Parses all markdown links in indexed documents, resolves relative and
-    /// absolute paths, and reports broken targets and anchors.
-    ///
-    /// Can emit JSON for automated checks in CI or for agents that want to
-    /// repair links automatically, including a grouped summary by file and
-    /// by issue kind (doc_missing, code_missing, placeholder, etc.).
-    ///
-    /// Limitations:
-    ///   - Does not fetch external URLs; external links are not validated.
-    ///   - Only checks files within the index roots.
-    ///
-    /// Related:
-    ///   - `yore fix-links`, `yore export-graph`, `yore backlinks`
-    ///
-    /// Examples:
-    ///   # Basic JSON output over default index
-    ///   yore check-links --index .yore --json
-    ///
-    ///   # Docs-only profile with summary for CI
-    ///   yore --profile docs check-links --json --summary-only
-    CheckLinks {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Root directory for resolving relative paths
-        #[arg(short, long)]
-        root: Option<PathBuf>,
-
-        /// Include a grouped summary of link issues
-        #[arg(long)]
-        summary: bool,
-
-        /// Only show the summary (suppress individual link entries)
-        #[arg(long)]
-        summary_only: bool,
-    },
-
-    /// Find all files that link to a specific file.
-    ///
-    /// Traverses the link graph to list every document that links to the
-    /// given target file, including optional anchors.
-    ///
-    /// Useful for understanding impact of changes, cleaning up docs, and
-    /// deciding whether a document is safe to delete.
-    ///
-    /// Limitations:
-    ///   - Only considers indexed markdown links (not external URLs).
-    ///
-    /// Related:
-    ///   - `yore orphans`, `yore export-graph`
-    ///
-    /// Examples:
-    ///   yore backlinks docs/architecture/DEPLOYMENT-GUIDE.md --index .yore
-    ///   yore backlinks docs/README.md --index .yore --json
-    Backlinks {
-        /// File to find backlinks for
-        file: String,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Find orphaned files with no inbound links.
-    ///
-    /// Lists documents that are not linked to from anywhere else in the
-    /// documentation graph (subject to `--exclude` filters).
-    ///
-    /// Helpful for identifying dead, experimental, or forgotten documents
-    /// that may be candidates for deletion or consolidation.
-    ///
-    /// Limitations:
-    ///   - Entry-point docs (README/INDEX) may be intentionally orphaned.
-    ///   - Only considers links in the index.
-    ///
-    /// Related:
-    ///   - `yore backlinks`, `yore canonical-orphans`
-    ///
-    /// Examples:
-    ///   yore orphans --index .yore --exclude README
-    ///   yore orphans --index .yore --exclude README --exclude INDEX --json
-    Orphans {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Exclude files matching pattern (can be repeated)
-        #[arg(short, long)]
-        exclude: Vec<String>,
-    },
-
-    /// Show canonicality scores for all documents.
-    ///
-    /// Computes a heuristic "authority" score per document based on naming,
-    /// path, and link structure so agents can consistently pick canonical
-    /// sources of truth when multiple documents overlap.
-    ///
-    /// Limitations:
-    ///   - Heuristic scoring; validate with `dupes` and human review.
-    ///
-    /// Related:
-    ///   - `yore suggest-consolidation`, `yore canonical-orphans`
-    ///
-    /// Examples:
-    ///   yore canonicality --index .yore --threshold 0.7
-    ///   yore canonicality --index .yore --json
-    Canonicality {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Minimum score threshold (0.0 to 1.0)
-        #[arg(short, long, default_value = "0.0")]
-        threshold: f64,
-    },
-
-    /// Find canonical documents with no inbound links.
-    ///
-    /// Filters documents by canonicality score and reports those that are
-    /// not linked to by any other indexed document.
-    ///
-    /// Limitations:
-    ///   - Only considers inbound links in the index roots.
-    ///   - Canonicality is heuristic, not semantic.
-    ///
-    /// Related:
-    ///   - `yore canonicality`, `yore orphans`, `yore backlinks`
-    ///
-    /// Examples:
-    ///   yore canonical-orphans --index .yore --threshold 0.7
-    ///   yore canonical-orphans --index .yore --threshold 0.8 --json
-    CanonicalOrphans {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Minimum canonicality score (0.0 to 1.0)
-        #[arg(short, long, default_value = "0.7")]
-        threshold: f64,
-    },
-
-    /// Automatically fix a subset of broken relative links.
-    ///
-    /// This command uses heuristics over the index to propose safe,
-    /// mechanical rewrites for links that appear to point to the wrong
-    /// file (for example, the right filename in the wrong directory).
-    ///
-    /// For agent-friendly operation, use --propose to output ambiguous
-    /// cases to a YAML file, then --apply-decisions to apply choices.
-    ///
-    /// Limitations:
-    ///   - Only fixes a conservative subset of relative links.
-    ///   - Ambiguous targets require `--propose` + `--apply-decisions`.
-    ///
-    /// Related:
-    ///   - `yore check-links`, `yore mv`, `yore fix-references`
-    ///
-    /// Examples:
-    ///   yore fix-links --index .yore --dry-run
-    ///   yore fix-links --index .yore --apply
-    ///   yore fix-links --index .yore --propose proposals.yaml
-    ///   yore fix-links --index .yore --apply-decisions proposals.yaml
-    FixLinks {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Show proposed edits without modifying any files
-        #[arg(long)]
-        dry_run: bool,
-
-        /// Apply changes to files on disk (only unambiguous fixes)
-        #[arg(long)]
-        apply: bool,
-
-        /// Output ambiguous link fixes to a YAML file for agent/human review
-        #[arg(long)]
-        propose: Option<PathBuf>,
-
-        /// Apply decisions from a previously generated proposal file
-        #[arg(long)]
-        apply_decisions: Option<PathBuf>,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Use git rename history to suggest fixes for moved files
-        #[arg(long)]
-        use_git_history: bool,
-    },
-
-    /// Rewrite references according to an explicit mapping file.
-    ///
-    /// This promotes the `mv --update-refs` machinery into a more general
-    /// bulk rewrite tool, suitable for large documentation reorganizations.
-    ///
-    /// Limitations:
-    ///   - Does not move files; only rewrites references.
-    ///   - Requires a mapping file that lists exact from/to pairs.
-    ///
-    /// Related:
-    ///   - `yore mv`, `yore fix-links`
-    ///
-    /// Examples:
-    ///   yore fix-references --mapping mappings.yaml --index .yore --dry-run --json
-    ///   yore fix-references --mapping mappings.yaml --index .yore --apply
-    FixReferences {
-        /// Path to reference mapping configuration (YAML)
-        #[arg(short, long)]
-        mapping: PathBuf,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Show planned changes without modifying files
-        #[arg(long)]
-        dry_run: bool,
-
-        /// Apply changes to files on disk
-        #[arg(long)]
-        apply: bool,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Move a documentation file and optionally update inbound references.
-    ///
-    /// This is a thin, ergonomic wrapper around link rewrite logic. When
-    /// --update-refs is used, all Markdown links that point to the old
-    /// path are rewritten to point to the new path.
-    ///
-    /// Limitations:
-    ///   - Only updates links in indexed files; run `yore build` first.
-    ///   - Does not update external repositories or URLs.
-    ///
-    /// Related:
-    ///   - `yore fix-references`, `yore fix-links`, `yore check-links`
-    ///
-    /// Examples:
-    ///   yore mv docs/old/auth.md docs/architecture/AUTH.md --update-refs --index .yore --json
-    ///   yore mv agents/tmp/note.md agents/archive/note.md --index .yore
-    Mv {
-        /// Source path to move from
-        from: PathBuf,
-
-        /// Destination path to move to
-        to: PathBuf,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Update inbound links that reference the old path
-        #[arg(long)]
-        update_refs: bool,
-
-        /// Show planned changes without modifying files
-        #[arg(long)]
-        dry_run: bool,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Report potentially stale documentation based on age and inbound links.
-    ///
-    /// Uses file modification time and inbound link counts from the index
-    /// to highlight documents that may be unmaintained or dead.
-    ///
-    /// Limitations:
-    ///   - Staleness is heuristic; validate before deleting.
-    ///   - Depends on file mtime and inbound links only.
-    ///
-    /// Related:
-    ///   - `yore orphans`, `yore canonicality`
-    ///
-    /// Examples:
-    ///   yore stale --index .yore --days 90 --min-inlinks 0 --json
-    ///   yore stale --index .yore --days 30 --min-inlinks 1
-    Stale {
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Minimum age in days to consider a file stale
-        #[arg(long, default_value = "90")]
-        days: u64,
-
-        /// Minimum inbound link count (files with >= this many links are included)
-        #[arg(long, default_value = "0")]
-        min_inlinks: usize,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
-
-    /// Export the documentation link graph.
-    ///
-    /// Emits either a JSON representation or a Graphviz DOT file
-    /// describing links between indexed documents.
-    ///
-    /// Limitations:
-    ///   - Graph only includes indexed documents and internal links.
-    ///
-    /// Related:
-    ///   - `yore backlinks`, `yore check-links`
-    ///
-    /// Examples:
-    ///   yore export-graph --format json --index .yore
-    ///   yore export-graph --format dot --index .yore > graph.dot
-    ExportGraph {
-        /// Output format: "json" or "dot"
-        #[arg(long, default_value = "json")]
-        format: String,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Show relation paths between documents via the persisted relation graph.
-    ///
-    /// Displays how a source document connects to other documents through
-    /// links, section links, and ADR references. Requires `relations.json`
-    /// from `yore build`.
-    ///
-    /// Examples:
-    ///   yore paths docs/architecture.md --index .yore
-    ///   yore paths docs/architecture.md --json --index .yore
-    ///   yore paths docs/architecture.md --depth 2 --index .yore
-    Paths {
-        /// Source file to show paths from
-        source: String,
-
-        /// Traversal depth (1 = direct edges, 2 = two hops)
-        #[arg(short = 'd', long, default_value = "1")]
-        depth: usize,
-
-        /// Filter by edge kind: links_to, section_links_to, adr_reference
-        #[arg(long)]
-        kind: Option<String>,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Suggest document consolidation based on duplicates and canonicality.
-    ///
-    /// Uses duplicate detection and canonicality scoring to propose a
-    /// canonical document and a set of files that should be merged into it.
-    ///
-    /// Limitations:
-    ///   - Suggestions are heuristic; review before merging or deleting.
-    ///
-    /// Related:
-    ///   - `yore dupes`, `yore canonicality`, `yore diff`
-    ///
-    /// Examples:
-    ///   yore suggest-consolidation --threshold 0.7 --json --index .yore
-    ///   yore suggest-consolidation --threshold 0.6 --index .yore
-    SuggestConsolidation {
-        /// Minimum duplicate similarity threshold (0.0 to 1.0)
-        #[arg(long, default_value = "0.7")]
-        threshold: f64,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-
-    /// Check documentation against declarative policy rules.
-    ///
-    /// Reads a YAML policy file describing path patterns and required or
-    /// forbidden content, and reports any violations it finds. Rules can
-    /// also enforce maximum section length (optionally filtered by heading
-    /// regex) and required markdown links.
-    /// Required links treat absolute paths as repo-root relative, and
-    /// resolve relative paths against the source file.
-    ///
-    /// Limitations:
-    ///   - Rules operate on indexed content; run `yore build` first.
-    ///   - Content checks are literal substring matches.
-    ///
-    /// Related:
-    ///   - `yore check --taxonomy`, `yore check-links`
-    ///
-    /// Examples:
-    ///   yore policy --config .yore-policy.yaml --index .yore --json
-    ///   yore policy --config .yore-policy.yaml --index .yore
-    Policy {
-        /// Path to policy configuration (YAML)
-        #[arg(long, default_value = ".yore-policy.yaml")]
-        config: PathBuf,
-
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-
-        /// Output as JSON
-        #[arg(long)]
-        json: bool,
-    },
+fn main() {
+    if let Err(e) = run() {
+        eprintln!("{}: {}", "error".red().bold(), e);
+        std::process::exit(1);
+    }
 }
 
-#[derive(Subcommand)]
-enum McpCommands {
-    /// Return bounded previews plus opaque handles for follow-up fetches.
-    #[command(name = "search-context", alias = "preview-context")]
-    SearchContext {
-        /// Natural language query/question (required unless --from-files is used)
-        #[arg(required_unless_present = "from_files")]
-        query: Vec<String>,
-
-        /// Maximum preview results to return
-        #[arg(long, default_value = "5")]
-        max_results: usize,
+fn run() -> Result<(), Box<dyn std::error::Error>> {
+    // Handle SIGPIPE / broken pipe panics gracefully (e.g., when piping into `head`).
+    let default_hook = std::panic::take_hook();
+    std::panic::set_hook(Box::new(move |info| {
+        let msg = format!("{info}");
+        if msg.contains("Broken pipe (os error 32)") {
+            // Treat broken pipe as a normal early exit with success.
+            std::process::exit(0);
+        }
+        default_hook(info);
+    }));
 
-        /// Maximum total tokens across all previews (approximate)
-        #[arg(long, default_value = "1200")]
-        max_tokens: usize,
+    let cli = Cli::parse();
+    let config = load_config(&cli.config, cli.quiet);
 
-        /// Maximum total bytes across all previews
-        #[arg(long, default_value = "12000")]
-        max_bytes: usize,
+    let result = match cli.command {
+        Commands::Check {
+            links,
+            dupes: _,
+            taxonomy,
+            stale,
+            ci,
+            fail_on,
+            index,
+            policy,
+            stale_days,
+        } => {
+            let index_path = resolve_index_path(index, cli.profile.as_deref(), &config);
 
-        /// Search/preview from explicit files instead of a query (supports @list.txt)
-        #[arg(long, value_name = "PATH", num_args = 1..)]
-        from_files: Vec<String>,
+            let mut combined = CombinedCheckResult::default();
 
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
+            // Run link checks if requested
+            if links {
+                let include_summary = true;
+                let external_paths: Vec<String> = config
+                    .as_ref()
+                    .and_then(|c| c.external.as_ref())
+                    .map(|e| e.repos.iter().map(|r| r.path.clone()).collect())
+                    .unwrap_or_default();
+                let link_result =
+                    run_link_check(&index_path, None, include_summary, false, &external_paths)?;
+                combined.links = Some(link_result);
+            }
 
-    /// Expand a previously returned opaque handle.
-    #[command(name = "fetch-context", alias = "expand-context")]
-    FetchContext {
-        /// Opaque handle returned by `search-context`
-        handle: String,
+            // Run policy checks if requested
+            if taxonomy {
+                let policy_path = match policy {
+                    Some(p) => p,
+                    None => PathBuf::from(".yore-policy.yaml"),
+                };
+                let policy_result = run_policy_check(&index_path, &policy_path)?;
+                combined.policy = Some(policy_result);
+            }
 
-        /// Maximum tokens in fetched content (approximate)
-        #[arg(long, default_value = "4000")]
-        max_tokens: usize,
+            // Run staleness checks if requested
+            if stale {
+                let stale_result = run_stale_check(&index_path, stale_days, 0)?;
+                combined.stale = Some(stale_result);
+            }
 
-        /// Maximum bytes in fetched content
-        #[arg(long, default_value = "20000")]
-        max_bytes: usize,
+            // For now, `check` always prints JSON.
+            let json_str = serde_json::to_string_pretty(&combined)?;
+            println!("{json_str}");
 
-        /// Index directory
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
+            // CI/fail-on logic: allow both link kinds and policy severities.
+            if ci && !fail_on.is_empty() {
+                let mut should_fail = false;
 
-    /// Serve the bounded preview/fetch tools over MCP stdio transport.
-    ///
-    /// This wraps the existing `search_context` and `fetch_context`
-    /// contracts so MCP clients can call Yore without scraping CLI stdout.
-    ///
-    /// Examples:
-    ///   yore mcp serve --index .yore
-    Serve {
-        /// Default index directory for MCP tool calls
-        #[arg(short, long, default_value = ".yore")]
-        index: PathBuf,
-    },
-}
+                // Link-based failure conditions (existing behavior)
+                if links {
+                    if let Some(link_result) = &combined.links {
+                        if let Some(summary) = &link_result.summary {
+                            for key in &fail_on {
+                                if let Some(kind) = summary.by_kind.iter().find(|k| &k.kind == key)
+                                {
+                                    if kind.count > 0 {
+                                        should_fail = true;
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
 
-// Evaluation structures
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct Question {
-    id: usize,
-    q: String,
-    expect: Vec<String>,
-    #[serde(default)]
-    min_hits: Option<usize>,
-}
+                // Policy-based failure conditions: keyed by severity.
+                // Supported keys:
+                //   - "policy_error"  – fail if any violation has severity "error"
+                //   - "policy_warn"   – fail if any violation has severity "warn" / "warning"
+                if taxonomy {
+                    if let Some(policy_result) = &combined.policy {
+                        let fail_on_error = fail_on.iter().any(|k| k == "policy_error");
+                        let fail_on_warn = fail_on
+                            .iter()
+                            .any(|k| k == "policy_warn" || k == "policy_warning");
 
-#[derive(Debug, Clone)]
-struct EvalResult {
-    id: usize,
-    question: String,
-    hits: usize,
-    total: usize,
-    passed: bool,
-    tokens: usize,
-}
+                        if fail_on_error || fail_on_warn {
+                            for v in &policy_result.violations {
+                                let sev = v.severity.as_str();
+                                if (fail_on_error && sev == "error")
+                                    || (fail_on_warn && (sev == "warn" || sev == "warning"))
+                                {
+                                    should_fail = true;
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
 
-// Link checking structures
-#[derive(Serialize, Debug, Clone)]
-struct BrokenLink {
-    source_file: String,
-    line_number: usize,
-    link_text: String,
-    link_target: String,
-    error: String,
-    anchor: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    context: Option<String>,
-}
+                if should_fail {
+                    std::process::exit(1);
+                }
+            }
 
-#[derive(Serialize, Debug, Clone, PartialEq, Eq, Hash)]
-#[serde(rename_all = "snake_case")]
-enum LinkKind {
-    DocMissing,
-    CodeMissing,
-    Placeholder,
-    CodeReference,
-    DirectoryReference,
-    ExternalReference,
-    AnchorMissing,
-    AnchorUnverified,
-}
-
-#[derive(Serialize, Debug)]
-struct LinkSummaryByFile {
-    file: String,
-    counts: HashMap<String, usize>,
-}
-
-#[derive(Serialize, Debug)]
-struct LinkSummaryByKind {
-    kind: String,
-    count: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct LinkCheckSummary {
-    by_file: Vec<LinkSummaryByFile>,
-    by_kind: Vec<LinkSummaryByKind>,
-}
-
-#[derive(Serialize, Debug)]
-struct LinkCheckResult {
-    total_links: usize,
-    valid_links: usize,
-    broken_links: usize,
-    broken: Vec<BrokenLink>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    summary: Option<LinkCheckSummary>,
-}
-
-// Diff output structure
-#[derive(Serialize, Debug)]
-struct DiffResult {
-    file1: String,
-    file2: String,
-    similarity: DiffSimilarity,
-    shared_keywords: Vec<String>,
-    only_in_file1: Vec<String>,
-    only_in_file2: Vec<String>,
-    shared_headings: Vec<String>,
-}
-
-#[derive(Serialize, Debug)]
-struct DiffSimilarity {
-    combined: f64,
-    jaccard: f64,
-    simhash: f64,
-}
-
-// Stats output structure
-#[derive(Serialize, Debug)]
-struct StatsResult {
-    total_files: usize,
-    unique_keywords: usize,
-    total_headings: usize,
-    body_keywords: usize,
-    total_links: usize,
-    index_version: u32,
-    indexed_at: String,
-    top_keywords: Vec<KeywordCount>,
-}
-
-#[derive(Serialize, Debug)]
-struct KeywordCount {
-    keyword: String,
-    count: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct VocabularyResult {
-    format: String,
-    limit: usize,
-    total: usize,
-    terms: Vec<VocabularyTerm>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    stopwords: Option<String>,
-    used_default_stopwords: bool,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    auto_common_terms: Option<usize>,
-    include_stemming: bool,
-}
-
-#[derive(Serialize, Debug)]
-struct VocabularyTerm {
-    term: String,
-    score: f64,
-    count: usize,
-}
-
-#[derive(Debug, Clone)]
-struct VocabularyCandidateTerm {
-    term: String,
-    surface: Option<String>,
-    term_freq: usize,
-    doc_freq: usize,
-    first_file: String,
-    first_line: usize,
-    first_heading: String,
-}
-
-#[derive(Debug, Clone, Copy)]
-struct VocabularyOptions<'a> {
-    stopwords: Option<&'a Path>,
-    include_stemming: bool,
-    no_default_stopwords: bool,
-    common_terms: usize,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct McpScoreBreakdown {
-    bm25: f64,
-    canonicality: f64,
-    combined: f64,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct McpSourceRef {
-    path: String,
-    heading: String,
-    line_start: usize,
-    line_end: usize,
-}
-
-#[derive(Serialize, Debug, Default, Clone)]
-struct McpPressure {
-    truncated: bool,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    reasons: Vec<String>,
-}
-
-#[derive(Serialize, Debug, Default)]
-struct McpSearchBudget {
-    max_results: usize,
-    max_tokens: usize,
-    max_bytes: usize,
-    returned_results: usize,
-    candidate_hits: usize,
-    deduped_hits: usize,
-    omitted_hits: usize,
-    estimated_tokens: usize,
-    bytes: usize,
-}
-
-#[derive(Serialize, Debug, Default)]
-struct McpFetchBudget {
-    max_tokens: usize,
-    max_bytes: usize,
-    estimated_tokens: usize,
-    bytes: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct McpSearchResult {
-    handle: String,
-    rank: usize,
-    source: McpSourceRef,
-    scores: McpScoreBreakdown,
-    preview: String,
-    preview_tokens: usize,
-    preview_bytes: usize,
-    truncated: bool,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    truncation_reasons: Vec<String>,
-}
-
-#[derive(Serialize, Debug)]
-struct McpFetchResult {
-    source: McpSourceRef,
-    scores: McpScoreBreakdown,
-    preview: String,
-    content: String,
-    content_tokens: usize,
-    content_bytes: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct McpSearchResponse {
-    schema_version: u32,
-    tool: String,
-    query: String,
-    selection_mode: String,
-    budget: McpSearchBudget,
-    pressure: McpPressure,
-    results: Vec<McpSearchResult>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    error: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    message: Option<String>,
-    #[serde(default, skip_serializing_if = "Vec::is_empty")]
-    missing_files: Vec<String>,
-}
-
-#[derive(Serialize, Debug)]
-struct McpFetchResponse {
-    schema_version: u32,
-    tool: String,
-    handle: String,
-    budget: McpFetchBudget,
-    pressure: McpPressure,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    query: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    result: Option<McpFetchResult>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    error: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    message: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct McpArtifact {
-    schema_version: u32,
-    handle: String,
-    query: String,
-    source: McpSourceRef,
-    scores: McpScoreBreakdown,
-    preview: String,
-    content: String,
-    created_at: String,
-}
-
-#[derive(Debug, Clone, Copy)]
-struct McpSearchOptions {
-    max_results: usize,
-    max_tokens: usize,
-    max_bytes: usize,
-}
-
-#[derive(Debug, Clone, Copy)]
-struct McpFetchOptions {
-    max_tokens: usize,
-    max_bytes: usize,
-}
-
-const DEFAULT_MCP_PROTOCOL_VERSION: &str = "2025-11-25";
-
-#[derive(Debug, Default, Deserialize)]
-#[serde(default, rename_all = "camelCase")]
-struct McpInitializeParams {
-    protocol_version: Option<String>,
-}
-
-#[derive(Debug, Deserialize)]
-struct JsonRpcRequest {
-    #[serde(default)]
-    jsonrpc: Option<String>,
-    #[serde(default)]
-    id: Option<serde_json::Value>,
-    method: String,
-    #[serde(default)]
-    params: serde_json::Value,
-}
-
-#[derive(Debug, Deserialize)]
-struct McpToolCallParams {
-    name: String,
-    #[serde(default)]
-    arguments: serde_json::Value,
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(default)]
-struct McpSearchToolArgs {
-    query: String,
-    from_files: Vec<String>,
-    max_results: usize,
-    max_tokens: usize,
-    max_bytes: usize,
-    index: Option<PathBuf>,
-}
-
-impl Default for McpSearchToolArgs {
-    fn default() -> Self {
-        Self {
-            query: String::new(),
-            from_files: Vec::new(),
-            max_results: 5,
-            max_tokens: 1200,
-            max_bytes: 12000,
-            index: None,
-        }
-    }
-}
-
-#[derive(Debug, Deserialize)]
-#[serde(default)]
-struct McpFetchToolArgs {
-    handle: String,
-    max_tokens: usize,
-    max_bytes: usize,
-    index: Option<PathBuf>,
-}
-
-impl Default for McpFetchToolArgs {
-    fn default() -> Self {
-        Self {
-            handle: String::new(),
-            max_tokens: 4000,
-            max_bytes: 20000,
-            index: None,
-        }
-    }
-}
-
-// Mv output structure
-#[derive(Serialize, Debug)]
-struct MvResult {
-    from: String,
-    to: String,
-    moved: bool,
-    updated_files: Vec<String>,
-}
-
-// FixReferences output structure
-#[derive(Serialize, Debug)]
-struct FixReferencesResult {
-    mapping_file: String,
-    mappings_count: usize,
-    updated_files: Vec<String>,
-    applied: bool,
-}
-
-// Build output structure
-#[derive(Serialize, Debug)]
-struct BuildResult {
-    index_path: String,
-    files_indexed: usize,
-    total_headings: usize,
-    total_links: usize,
-    unique_keywords: usize,
-    duration_ms: u128,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    renames_tracked: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    total_relations: Option<usize>,
-}
-
-// Eval JSON output structure
-#[derive(Serialize, Debug)]
-struct EvalJsonResult {
-    questions_file: String,
-    total_questions: usize,
-    passed: usize,
-    failed: usize,
-    pass_rate: f64,
-    results: Vec<EvalQuestionResult>,
-}
-
-#[derive(Serialize, Debug)]
-struct EvalQuestionResult {
-    question: String,
-    passed: bool,
-    expected: Vec<String>,
-    found: Vec<String>,
-    missing: Vec<String>,
-}
-
-// Policy / taxonomy structures
-#[derive(Debug, Deserialize, Default)]
-struct PolicyRule {
-    /// Glob pattern to match files (e.g., "agents/plans/*.md")
-    pattern: String,
-    /// Required substrings that must appear in matching files
-    #[serde(default)]
-    must_contain: Vec<String>,
-    /// Substrings that must NOT appear in matching files
-    #[serde(default)]
-    must_not_contain: Vec<String>,
-    /// Optional rule name (for clearer reporting)
-    #[serde(default)]
-    name: Option<String>,
-    /// Optional severity ("error" or "warn"), defaults to "error"
-    #[serde(default)]
-    severity: Option<String>,
-    /// Optional minimum document length in lines
-    #[serde(default)]
-    min_length: Option<usize>,
-    /// Optional maximum document length in lines
-    #[serde(default)]
-    max_length: Option<usize>,
-    /// Optional maximum section length in lines
-    #[serde(default)]
-    max_section_length: Option<usize>,
-    /// Optional regex to scope section-length rules to matching headings
-    #[serde(default)]
-    section_heading_regex: Option<String>,
-    /// Required markdown headings (by text, without leading '#')
-    #[serde(default)]
-    required_headings: Vec<String>,
-    /// Forbidden markdown headings (by text, without leading '#')
-    #[serde(default)]
-    forbidden_headings: Vec<String>,
-    /// Required markdown link targets (resolved relative to file)
-    #[serde(default)]
-    must_link_to: Vec<String>,
-}
-
-#[derive(Debug, Deserialize)]
-struct PolicyConfig {
-    #[serde(default)]
-    rules: Vec<PolicyRule>,
-}
-
-#[derive(Serialize, Debug)]
-struct PolicyViolation {
-    file: String,
-    rule: String,
-    message: String,
-    severity: String,
-    /// Always "policy_violation" so agents can key off kind
-    kind: String,
-}
-
-#[derive(Serialize, Debug)]
-struct PolicyCheckResult {
-    policy_file: String,
-    total_violations: usize,
-    violations: Vec<PolicyViolation>,
-}
-
-#[derive(Serialize, Debug, Default)]
-struct CombinedCheckResult {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    links: Option<LinkCheckResult>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    policy: Option<PolicyCheckResult>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    stale: Option<StaleResult>,
-}
-
-#[derive(Serialize, Debug)]
-struct StaleFile {
-    file: String,
-    days_since_modified: u64,
-    inbound_links: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct StaleResult {
-    total_stale: usize,
-    files: Vec<StaleFile>,
-}
-
-#[derive(Serialize, Debug, Clone)]
-struct HealthIssue {
-    kind: String,
-    severity: String,
-    message: String,
-    value: usize,
-    threshold: usize,
-}
-
-#[derive(Serialize, Debug, Clone)]
-struct HealthFileResult {
-    file: String,
-    status: String,
-    issues: Vec<HealthIssue>,
-}
-
-#[derive(Serialize, Debug)]
-struct HealthResult {
-    total_files: usize,
-    unhealthy_files: usize,
-    warning_files: usize,
-    files: Vec<HealthFileResult>,
-}
-
-#[derive(Serialize, Debug)]
-struct GraphNode {
-    id: String,
-}
-
-#[derive(Serialize, Debug)]
-struct GraphEdge {
-    source: String,
-    target: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    anchor: Option<String>,
-}
-
-#[derive(Serialize, Debug)]
-struct GraphExport {
-    nodes: Vec<GraphNode>,
-    edges: Vec<GraphEdge>,
-}
-
-// Relation extraction structs (YEH-004)
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-struct SectionRef {
-    heading: String,
-    line_start: usize,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-#[serde(rename_all = "snake_case")]
-enum RelationKind {
-    LinksTo,
-    SectionLinksTo,
-    AdrReference,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-struct RelationEdge {
-    source: String,
-    target: String,
-    kind: RelationKind,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    anchor: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    source_section: Option<SectionRef>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    target_section: Option<SectionRef>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    raw_text: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct RelationIndex {
-    version: u32,
-    indexed_at: String,
-    total_edges: usize,
-    edges: Vec<RelationEdge>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct AdrRef {
-    line: usize,
-    raw_text: String,
-    normalized_id: String,
-}
-
-#[derive(Serialize, Debug)]
-struct ConsolidationGroup {
-    canonical: String,
-    merge_into: Vec<String>,
-    canonical_score: f64,
-    avg_similarity: f64,
-    note: String,
-}
-
-#[derive(Serialize, Debug)]
-struct ConsolidationResult {
-    total_groups: usize,
-    groups: Vec<ConsolidationGroup>,
-}
-
-#[derive(Serialize, Debug, Clone)]
-struct LinkFix {
-    file: String,
-    old_target: String,
-    new_target: String,
-}
-
-// Proposal structures for agent-friendly fix-links
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct LinkFixProposal {
-    source: String,
-    line: usize,
-    broken_target: String,
-    candidates: Vec<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    decision: Option<usize>, // Index into candidates, or None to skip
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct LinkFixProposalFile {
-    /// Schema version for forward compatibility
-    version: u32,
-    /// Proposals for ambiguous link fixes
-    proposals: Vec<LinkFixProposal>,
-}
-
-// Backlinks structures
-#[derive(Serialize, Debug, Clone)]
-struct Backlink {
-    source_file: String,
-    link_text: String,
-    link_target: String,
-    anchor: Option<String>,
-}
-
-#[derive(Debug, Deserialize)]
-struct ReferenceMapping {
-    from: String,
-    to: String,
-}
-
-#[derive(Debug, Deserialize)]
-struct ReferenceMappingConfig {
-    #[serde(default)]
-    mappings: Vec<ReferenceMapping>,
-}
-
-#[derive(Serialize, Debug)]
-struct BacklinksResult {
-    target_file: String,
-    total_backlinks: usize,
-    backlinks: Vec<Backlink>,
-}
-
-// Orphans structures
-#[derive(Serialize, Debug, Clone)]
-struct OrphanFile {
-    file: String,
-    size_bytes: u64,
-    line_count: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct OrphansResult {
-    total_orphans: usize,
-    orphans: Vec<OrphanFile>,
-}
-
-#[derive(Serialize, Debug, Clone)]
-struct CanonicalOrphan {
-    file: String,
-    canonicality: f64,
-    inbound_links: usize,
-}
-
-#[derive(Serialize, Debug)]
-struct CanonicalOrphansResult {
-    total_orphans: usize,
-    threshold: f64,
-    orphans: Vec<CanonicalOrphan>,
-}
-
-// Canonicality structures
-#[derive(Serialize, Debug, Clone)]
-struct CanonicalityScore {
-    file: String,
-    score: f64,
-    reasons: Vec<String>,
-}
-
-#[derive(Serialize, Debug)]
-struct CanonicalityResult {
-    total_files: usize,
-    files: Vec<CanonicalityScore>,
-}
-
-// Index structures
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct FileEntry {
-    path: String,
-    size_bytes: u64,
-    line_count: usize,
-    headings: Vec<Heading>,
-    keywords: Vec<String>,
-    body_keywords: Vec<String>, // keywords from full text
-    links: Vec<Link>,
-    simhash: u64, // content fingerprint
-    #[serde(default)]
-    term_frequencies: HashMap<String, usize>, // term counts for BM25
-    #[serde(default)]
-    doc_length: usize, // total terms for BM25
-    #[serde(default)]
-    minhash: Vec<u64>, // MinHash signature for LSH
-    #[serde(default)]
-    section_fingerprints: Vec<SectionFingerprint>, // NEW: section-level SimHash
-    #[serde(default)]
-    adr_references: Vec<AdrRef>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct Heading {
-    line: usize,
-    level: usize,
-    text: String,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct Link {
-    line: usize,
-    text: String,
-    target: String,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct SectionFingerprint {
-    heading: String,
-    level: usize,
-    line_start: usize,
-    line_end: usize,
-    simhash: u64,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct ReverseEntry {
-    file: String,
-    line: Option<usize>,
-    heading: Option<String>,
-    level: Option<usize>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct ForwardIndex {
-    files: HashMap<String, FileEntry>,
-    indexed_at: String,
-    version: u32, // index version for compatibility
-    #[serde(default)]
-    source_root: String,
-    #[serde(default)]
-    avg_doc_length: f64, // NEW: average document length for BM25
-    #[serde(default)]
-    idf_map: HashMap<String, f64>, // NEW: IDF scores for BM25
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct ReverseIndex {
-    keywords: HashMap<String, Vec<ReverseEntry>>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
-struct SectionMetrics {
-    heading: String,
-    level: usize,
-    line_start: usize,
-    line_end: usize,
-    line_count: usize,
-    word_count: usize,
-    link_count: usize,
-    list_item_count: usize,
-    code_block_count: usize,
-    has_completion_marker: bool,
-    looks_like_part: bool,
-    looks_like_changelog: bool,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
-struct DocumentMetrics {
-    path: String,
-    line_count: usize,
-    word_count: usize,
-    heading_count: usize,
-    section_count: usize,
-    link_count: usize,
-    h1_count: usize,
-    h2_count: usize,
-    h3_count: usize,
-    h4_plus_count: usize,
-    code_block_count: usize,
-    list_item_count: usize,
-    table_row_count: usize,
-    frontmatter_key_count: usize,
-    metadata_line_count: usize,
-    part_heading_count: usize,
-    completion_heading_count: usize,
-    changelog_heading_count: usize,
-    changelog_entry_count: usize,
-    longest_section_lines: usize,
-    sections: Vec<SectionMetrics>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, Default)]
-struct DocumentMetricsIndex {
-    indexed_at: String,
-    version: u32,
-    files: HashMap<String, DocumentMetrics>,
-}
-
-#[derive(Serialize, Deserialize, Debug)]
-struct IndexStats {
-    total_files: usize,
-    total_keywords: usize,
-    total_headings: usize,
-    total_links: usize,
-    indexed_at: String,
-}
-
-/// A single file rename event from git history
-#[derive(Serialize, Deserialize, Debug, Clone)]
-struct RenameEntry {
-    /// The old path before the rename
-    old_path: String,
-    /// The new path after the rename
-    new_path: String,
-    /// Git commit hash where the rename occurred
-    commit: String,
-}
-
-/// Git rename history for tracking file moves
-#[derive(Serialize, Deserialize, Debug, Default)]
-struct RenameHistory {
-    /// All rename events, ordered from oldest to newest
-    renames: Vec<RenameEntry>,
-    /// Indexed at timestamp
-    indexed_at: String,
-}
-
-#[derive(Deserialize, Debug, Clone)]
-struct IndexProfileConfig {
-    #[serde(default)]
-    roots: Vec<String>,
-    #[serde(default)]
-    types: Vec<String>,
-    output: Option<String>,
-}
-
-/// Severity override for link checking based on path patterns
-#[derive(Deserialize, Debug, Clone)]
-#[allow(dead_code)] // Config scaffolding for future severity filtering
-struct SeverityOverride {
-    pattern: String,
-    severity: String,
-}
-
-/// Link checking configuration
-#[derive(Deserialize, Debug, Clone, Default)]
-#[allow(dead_code)] // Config scaffolding for future exclude patterns
-struct LinkCheckConfig {
-    #[serde(default)]
-    exclude: Vec<String>,
-    #[serde(default, rename = "severity-overrides")]
-    severity_overrides: Vec<SeverityOverride>,
-}
-
-/// External repository configuration for cross-repo link validation
-#[derive(Deserialize, Debug, Clone)]
-struct ExternalRepo {
-    path: String,
-    #[serde(default)]
-    #[allow(dead_code)] // Config scaffolding for future prefix support
-    prefix: Option<String>,
-}
-
-/// External repositories configuration
-#[derive(Deserialize, Debug, Clone, Default)]
-struct ExternalConfig {
-    #[serde(default)]
-    repos: Vec<ExternalRepo>,
-}
-
-/// Policy configuration
-#[derive(Deserialize, Debug, Clone, Default)]
-#[allow(dead_code)] // Config scaffolding for future policy file reference
-struct PolicyConfigRef {
-    #[serde(default, rename = "rules-file")]
-    rules_file: Option<String>,
-}
-
-#[derive(Deserialize, Debug, Clone, Default)]
-struct YoreConfig {
-    #[serde(default)]
-    index: HashMap<String, IndexProfileConfig>,
-    #[serde(default, rename = "link-check")]
-    #[allow(dead_code)] // Config scaffolding
-    link_check: Option<LinkCheckConfig>,
-    #[serde(default)]
-    #[allow(dead_code)] // Config scaffolding
-    policy: Option<PolicyConfigRef>,
-    #[serde(default)]
-    external: Option<ExternalConfig>,
-}
-
-fn load_config(path: &Path, quiet: bool) -> Option<YoreConfig> {
-    if !path.exists() {
-        return None;
-    }
-
-    let contents = match fs::read_to_string(path) {
-        Ok(c) => c,
-        Err(e) => {
-            if !quiet {
-                eprintln!(
-                    "{}: failed to read config {}: {}",
-                    "warning".yellow(),
-                    path.display(),
-                    e
-                );
-            }
-            return None;
-        }
-    };
-
-    match toml::from_str::<YoreConfig>(&contents) {
-        Ok(cfg) => Some(cfg),
-        Err(e) => {
-            if !quiet {
-                eprintln!(
-                    "{}: failed to parse config {}: {}",
-                    "warning".yellow(),
-                    path.display(),
-                    e
-                );
-            }
-            None
-        }
-    }
-}
-
-fn resolve_build_params(
-    path: PathBuf,
-    output: PathBuf,
-    types: String,
-    profile: Option<&str>,
-    config: &Option<YoreConfig>,
-) -> (PathBuf, PathBuf, String, Option<Vec<PathBuf>>) {
-    // Defaults from CLI definition
-    let default_path = PathBuf::from(".");
-    let default_output = PathBuf::from(".yore");
-    let default_types = "md,txt,rst".to_string();
-
-    let mut effective_path = path;
-    let mut effective_output = output;
-    let mut effective_types = types;
-    let mut roots: Option<Vec<PathBuf>> = None;
-
-    if let (Some(profile_name), Some(cfg)) = (profile, config.as_ref()) {
-        if let Some(profile_cfg) = cfg.index.get(profile_name) {
-            // Roots: if present, use them as allowed roots (multi-root support)
-            if !profile_cfg.roots.is_empty() {
-                let rs: Vec<PathBuf> = profile_cfg.roots.iter().map(PathBuf::from).collect();
-                roots = Some(rs);
-                // Use repo root (".") as walk root when using multiple roots
-                effective_path.clone_from(&default_path);
-            }
-
-            // Types: only override when CLI used the default
-            if effective_types == default_types && !profile_cfg.types.is_empty() {
-                effective_types = profile_cfg.types.join(",");
-            }
-
-            // Output: only override when CLI used the default
-            if effective_output == default_output {
-                if let Some(ref out) = profile_cfg.output {
-                    effective_output = PathBuf::from(out);
-                }
-            }
-        }
-    }
-
-    (effective_path, effective_output, effective_types, roots)
-}
-
-fn resolve_index_path(
-    index: PathBuf,
-    profile: Option<&str>,
-    config: &Option<YoreConfig>,
-) -> PathBuf {
-    let default_index = PathBuf::from(".yore");
-
-    if index != default_index {
-        return index;
-    }
-
-    if let (Some(profile_name), Some(cfg)) = (profile, config.as_ref()) {
-        if let Some(profile_cfg) = cfg.index.get(profile_name) {
-            if let Some(ref out) = profile_cfg.output {
-                return PathBuf::from(out);
-            }
-        }
-    }
-
-    index
-}
-
-fn main() {
-    if let Err(e) = run() {
-        eprintln!("{}: {}", "error".red().bold(), e);
-        std::process::exit(1);
-    }
-}
-
-fn run() -> Result<(), Box<dyn std::error::Error>> {
-    // Handle SIGPIPE / broken pipe panics gracefully (e.g., when piping into `head`).
-    let default_hook = std::panic::take_hook();
-    std::panic::set_hook(Box::new(move |info| {
-        let msg = format!("{info}");
-        if msg.contains("Broken pipe (os error 32)") {
-            // Treat broken pipe as a normal early exit with success.
-            std::process::exit(0);
-        }
-        default_hook(info);
-    }));
-
-    let cli = Cli::parse();
-    let config = load_config(&cli.config, cli.quiet);
-
-    let result = match cli.command {
-        Commands::Check {
-            links,
-            dupes: _,
-            taxonomy,
-            stale,
-            ci,
-            fail_on,
-            index,
-            policy,
-            stale_days,
-        } => {
-            let index_path = resolve_index_path(index, cli.profile.as_deref(), &config);
-
-            let mut combined = CombinedCheckResult::default();
-
-            // Run link checks if requested
-            if links {
-                let include_summary = true;
-                let external_paths: Vec<String> = config
-                    .as_ref()
-                    .and_then(|c| c.external.as_ref())
-                    .map(|e| e.repos.iter().map(|r| r.path.clone()).collect())
-                    .unwrap_or_default();
-                let link_result =
-                    run_link_check(&index_path, None, include_summary, false, &external_paths)?;
-                combined.links = Some(link_result);
-            }
-
-            // Run policy checks if requested
-            if taxonomy {
-                let policy_path = match policy {
-                    Some(p) => p,
-                    None => PathBuf::from(".yore-policy.yaml"),
-                };
-                let policy_result = run_policy_check(&index_path, &policy_path)?;
-                combined.policy = Some(policy_result);
-            }
-
-            // Run staleness checks if requested
-            if stale {
-                let stale_result = run_stale_check(&index_path, stale_days, 0)?;
-                combined.stale = Some(stale_result);
-            }
-
-            // For now, `check` always prints JSON.
-            let json_str = serde_json::to_string_pretty(&combined)?;
-            println!("{json_str}");
-
-            // CI/fail-on logic: allow both link kinds and policy severities.
-            if ci && !fail_on.is_empty() {
-                let mut should_fail = false;
-
-                // Link-based failure conditions (existing behavior)
-                if links {
-                    if let Some(link_result) = &combined.links {
-                        if let Some(summary) = &link_result.summary {
-                            for key in &fail_on {
-                                if let Some(kind) = summary.by_kind.iter().find(|k| &k.kind == key)
-                                {
-                                    if kind.count > 0 {
-                                        should_fail = true;
-                                        break;
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-
-                // Policy-based failure conditions: keyed by severity.
-                // Supported keys:
-                //   - "policy_error"  – fail if any violation has severity "error"
-                //   - "policy_warn"   – fail if any violation has severity "warn" / "warning"
-                if taxonomy {
-                    if let Some(policy_result) = &combined.policy {
-                        let fail_on_error = fail_on.iter().any(|k| k == "policy_error");
-                        let fail_on_warn = fail_on
-                            .iter()
-                            .any(|k| k == "policy_warn" || k == "policy_warning");
-
-                        if fail_on_error || fail_on_warn {
-                            for v in &policy_result.violations {
-                                let sev = v.severity.as_str();
-                                if (fail_on_error && sev == "error")
-                                    || (fail_on_warn && (sev == "warn" || sev == "warning"))
-                                {
-                                    should_fail = true;
-                                    break;
-                                }
-                            }
-                        }
-                    }
-                }
-
-                if should_fail {
-                    std::process::exit(1);
-                }
-            }
-
-            Ok(())
-        }
-        Commands::Health {
-            file,
-            all,
-            index,
-            max_lines,
-            max_part_sections,
-            max_completed_lines,
-            max_changelog_entries,
-            json,
-        } => cmd_health(
-            file.as_deref(),
-            all,
-            &index,
-            &HealthOptions {
-                max_lines,
-                max_part_sections,
-                max_completed_lines,
-                max_changelog_entries,
-            },
-            json,
-        ),
-        Commands::Build {
-            path,
-            output,
-            types,
-            exclude,
-            json,
-            track_renames,
-        } => {
-            let (path, output, types, roots) =
-                resolve_build_params(path, output, types, cli.profile.as_deref(), &config);
-            cmd_build(
-                &path,
-                &output,
-                &types,
-                &exclude,
-                cli.quiet,
-                roots.as_deref(),
-                json,
-                track_renames,
-            )
-        }
-        Commands::Query {
-            terms,
-            query,
-            limit,
-            files_only,
-            json,
-            doc_terms,
-            explain,
-            no_stopwords,
-            phrase,
-            index,
-        } => {
-            let query_text = query.unwrap_or_else(|| terms.join(" "));
-            let options = QueryOptions {
-                limit,
-                files_only,
-                json,
-                doc_terms,
-                explain,
-                require_phrases: phrase,
-                filter_stopwords: !no_stopwords,
-            };
-            cmd_query(&query_text, &index, &options)
-        }
-        Commands::Similar {
-            file,
-            limit,
-            threshold,
-            json,
-            doc_terms,
-            index,
-        } => cmd_similar(&file, limit, threshold, json, doc_terms, &index),
-        Commands::Dupes {
-            threshold,
-            group,
-            json,
-            index,
-        } => cmd_dupes(threshold, group, json, &index),
-        Commands::DupesSections {
-            threshold,
-            min_files,
-            json,
-            index,
-        } => cmd_dupes_sections(threshold, min_files, json, &index),
-        Commands::Diff {
-            file1,
-            file2,
-            index,
-            json,
-        } => cmd_diff(&file1, &file2, &index, json),
-        Commands::Stats {
-            top_keywords,
-            index,
-            json,
-        } => cmd_stats(top_keywords, &index, json),
-        Commands::Repl { index } => cmd_repl(&index),
-        Commands::Assemble {
-            query,
-            max_tokens,
-            max_sections,
-            depth,
-            format,
-            doc_terms,
-            from_files,
-            use_relations,
-            index,
-        } => cmd_assemble(
-            &query.join(" "),
-            &from_files,
-            &AssembleOptions {
-                max_tokens,
-                max_sections,
-                depth,
-                format,
-                doc_terms,
-                use_relations,
-            },
-            &index,
-        ),
-        Commands::Mcp { command } => match command {
-            McpCommands::SearchContext {
-                query,
-                max_results,
-                max_tokens,
-                max_bytes,
-                from_files,
-                index,
-            } => cmd_mcp_search_context(
-                &query.join(" "),
-                &from_files,
-                &index,
-                McpSearchOptions {
-                    max_results,
-                    max_tokens,
-                    max_bytes,
-                },
-            ),
-            McpCommands::FetchContext {
-                handle,
-                max_tokens,
-                max_bytes,
-                index,
-            } => cmd_mcp_fetch_context(
-                &handle,
-                &index,
-                McpFetchOptions {
-                    max_tokens,
-                    max_bytes,
-                },
-            ),
-            McpCommands::Serve { index } => cmd_mcp_serve(&index),
-        },
-        Commands::Eval {
-            questions,
-            index,
-            json,
-        } => cmd_eval(&questions, &index, json),
-        Commands::Vocabulary {
-            index,
-            limit,
-            format,
-            json,
-            stopwords,
-            include_stemming,
-            no_default_stopwords,
-            common_terms,
-        } => cmd_vocabulary(
-            &index,
-            limit,
-            &format,
-            json,
-            VocabularyOptions {
-                stopwords: stopwords.as_deref(),
-                include_stemming,
-                no_default_stopwords,
-                common_terms,
-            },
-        ),
-        Commands::CheckLinks {
-            index,
-            json,
-            root,
-            summary,
-            summary_only,
-        } => {
-            let index_path = resolve_index_path(index, cli.profile.as_deref(), &config);
-            let external_paths: Vec<String> = config
-                .as_ref()
-                .and_then(|c| c.external.as_ref())
-                .map(|e| e.repos.iter().map(|r| r.path.clone()).collect())
-                .unwrap_or_default();
-            cmd_check_links(
-                &index_path,
-                json,
-                root.as_deref(),
-                summary,
-                summary_only,
-                &external_paths,
-            )
-        }
-        Commands::Backlinks { file, index, json } => cmd_backlinks(&file, &index, json),
-        Commands::Orphans {
-            index,
-            json,
-            exclude,
-        } => cmd_orphans(&index, json, &exclude),
-        Commands::Canonicality {
-            index,
-            json,
-            threshold,
-        } => cmd_canonicality(&index, json, threshold),
-        Commands::CanonicalOrphans {
-            index,
-            json,
-            threshold,
-        } => cmd_canonical_orphans(&index, threshold, json),
-        Commands::ExportGraph { format, index } => cmd_export_graph(&index, &format),
-        Commands::Paths {
-            source,
-            depth,
-            kind,
-            json,
-            index,
-        } => cmd_paths(&source, depth, kind.as_deref(), json, &index),
-        Commands::SuggestConsolidation {
-            threshold,
-            json,
-            index,
-        } => cmd_suggest_consolidation(&index, threshold, json),
-        Commands::Policy {
-            config,
-            index,
-            json,
-        } => cmd_policy(&config, &index, json),
-        Commands::FixLinks {
-            index,
-            dry_run,
-            apply,
-            propose,
-            apply_decisions,
-            json,
-            use_git_history,
-        } => cmd_fix_links(
-            &index,
-            dry_run,
-            apply,
-            propose,
-            apply_decisions,
-            json,
-            use_git_history,
-        ),
-        Commands::FixReferences {
-            mapping,
-            index,
-            dry_run,
-            apply,
-            json,
-        } => cmd_fix_references(&index, &mapping, dry_run, apply, json),
-        Commands::Mv {
-            from,
-            to,
-            index,
-            update_refs,
-            dry_run,
-            json,
-        } => cmd_mv(&from, &to, &index, update_refs, dry_run, json),
-        Commands::Stale {
-            index,
-            days,
-            min_inlinks,
-            json,
-        } => cmd_stale(&index, days, min_inlinks, json),
-    };
-    result
-}
-
-#[allow(clippy::too_many_arguments)]
-fn cmd_build(
-    path: &Path,
-    output: &Path,
-    types: &str,
-    exclude: &[String],
-    quiet: bool,
-    roots: Option<&[PathBuf]>,
-    json: bool,
-    track_renames: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let start = Instant::now();
-    let source_root = canonicalize_existing_path(&std::env::current_dir()?);
-
-    if !quiet && !json {
-        println!("{} {}", "Indexing".cyan().bold(), path.display());
-    }
-
-    // Parse file types
-    let extensions: HashSet<String> = types.split(',').map(|s| s.trim().to_lowercase()).collect();
-
-    // Build walker with ignore patterns
-    let mut builder = WalkBuilder::new(path);
-    builder.hidden(true).git_ignore(true).git_global(true);
-
-    // Add custom excludes
-    for pattern in exclude {
-        builder.add_ignore(Path::new(pattern));
-    }
-
-    // Collect files
-    let mut forward_index = ForwardIndex {
-        files: HashMap::new(),
-        indexed_at: chrono_now(),
-        version: 4, // Version 4 adds source_root metadata for portable file resolution
-        source_root: source_root.to_string_lossy().to_string(),
-        avg_doc_length: 0.0,
-        idf_map: HashMap::new(),
-    };
-
-    let mut reverse_index = ReverseIndex {
-        keywords: HashMap::new(),
-    };
-    let mut document_metrics_index = DocumentMetricsIndex {
-        indexed_at: chrono_now(),
-        version: 1,
-        files: HashMap::new(),
-    };
-
-    let mut file_count = 0;
-    let mut total_headings = 0;
-    let mut total_links = 0;
-
-    for entry in builder.build().filter_map(std::result::Result::ok) {
-        let path = entry.path();
-
-        // Skip directories
-        if path.is_dir() {
-            continue;
-        }
-
-        // If roots are configured, skip files outside those roots
-        if let Some(root_list) = roots {
-            let mut inside_any_root = false;
-            for root in root_list {
-                if path.starts_with(root) {
-                    inside_any_root = true;
-                    break;
-                }
-            }
-            if !inside_any_root {
-                continue;
-            }
-        }
-
-        // Check extension
-        let ext = path
-            .extension()
-            .and_then(|e| e.to_str())
-            .map(str::to_lowercase)
-            .unwrap_or_default();
-
-        if !extensions.contains(&ext) {
-            continue;
-        }
-
-        // Skip common non-content directories
-        let path_str = path.to_string_lossy();
-        if path_str.contains("node_modules")
-            || path_str.contains(".git/")
-            || path_str.contains("target/")
-            || path_str.contains("vendor/")
-            || path_str.contains("venv/")
-            || path_str.contains("__pycache__")
-        {
-            continue;
-        }
-
-        // Index the file
-        if let Ok((mut entry, mut metrics)) = index_file(path) {
-            let physical_path = canonicalize_existing_path(path);
-            let rel_path = build_indexed_doc_key(&physical_path, &source_root);
-            entry.path = physical_path.to_string_lossy().to_string();
-            metrics.path.clone_from(&rel_path);
-
-            // Update reverse index with heading keywords
-            for keyword in &entry.keywords {
-                let stemmed = stem_word(&keyword.to_lowercase());
-                reverse_index
-                    .keywords
-                    .entry(stemmed)
-                    .or_default()
-                    .push(ReverseEntry {
-                        file: rel_path.clone(),
-                        line: None,
-                        heading: None,
-                        level: None,
-                    });
-            }
-
-            // Update reverse index with body keywords
-            for keyword in &entry.body_keywords {
-                let stemmed = stem_word(&keyword.to_lowercase());
-                reverse_index
-                    .keywords
-                    .entry(stemmed)
-                    .or_default()
-                    .push(ReverseEntry {
-                        file: rel_path.clone(),
-                        line: None,
-                        heading: None,
-                        level: None,
-                    });
-            }
-
-            for heading in &entry.headings {
-                let words = extract_keywords(&heading.text);
-                for word in words {
-                    let stemmed = stem_word(&word.to_lowercase());
-                    reverse_index
-                        .keywords
-                        .entry(stemmed)
-                        .or_default()
-                        .push(ReverseEntry {
-                            file: rel_path.clone(),
-                            line: Some(heading.line),
-                            heading: Some(heading.text.clone()),
-                            level: Some(heading.level),
-                        });
-                }
-            }
-
-            total_headings += entry.headings.len();
-            total_links += entry.links.len();
-            file_count += 1;
-
-            document_metrics_index
-                .files
-                .insert(rel_path.clone(), metrics);
-            forward_index.files.insert(rel_path, entry);
-        }
-    }
-
-    // Compute BM25 statistics (IDF and average document length)
-    let total_docs = forward_index.files.len() as f64;
-    let mut doc_frequencies: HashMap<String, usize> = HashMap::new();
-    let mut total_length = 0;
-
-    // Compute document frequencies
-    for entry in forward_index.files.values() {
-        total_length += entry.doc_length;
-        for term in entry.term_frequencies.keys() {
-            *doc_frequencies.entry(term.clone()).or_insert(0) += 1;
-        }
-    }
-
-    // Compute IDF scores (with floor to handle high-frequency terms)
-    let mut idf_map: HashMap<String, f64> = HashMap::new();
-    for (term, df) in doc_frequencies {
-        // Standard BM25 IDF can go negative when df > 50% of docs.
-        // We floor at a small positive value so common terms still contribute.
-        let idf = ((total_docs - df as f64 + 0.5) / (df as f64 + 0.5))
-            .ln()
-            .max(0.1);
-        idf_map.insert(term, idf);
-    }
-
-    forward_index.avg_doc_length = if total_docs > 0.0 {
-        total_length as f64 / total_docs
-    } else {
-        0.0
-    };
-    forward_index.idf_map = idf_map;
-
-    // Create output directory
-    fs::create_dir_all(output)?;
-
-    // Write indexes
-    let forward_path = output.join("forward_index.json");
-    let reverse_path = output.join("reverse_index.json");
-    let stats_path = output.join("stats.json");
-    let metrics_path = output.join("document_metrics.json");
-
-    fs::write(&forward_path, serde_json::to_string_pretty(&forward_index)?)?;
-    fs::write(&reverse_path, serde_json::to_string_pretty(&reverse_index)?)?;
-    fs::write(
-        &metrics_path,
-        serde_json::to_string_pretty(&document_metrics_index)?,
-    )?;
-
-    let stats = IndexStats {
-        total_files: file_count,
-        total_keywords: reverse_index.keywords.len(),
-        total_headings,
-        total_links,
-        indexed_at: chrono_now(),
-    };
-    fs::write(&stats_path, serde_json::to_string_pretty(&stats)?)?;
-
-    // Extract and persist relation edges
-    let relation_index = extract_relations(&forward_index);
-    let relations_count = relation_index.total_edges;
-    let relations_path = output.join("relations.json");
-    fs::write(
-        &relations_path,
-        serde_json::to_string_pretty(&relation_index)?,
-    )?;
-
-    // Track git renames if requested
-    let renames_count = if track_renames {
-        if !quiet && !json {
-            println!("  Extracting git rename history...");
-        }
-        let rename_history = extract_git_renames(path);
-        let count = rename_history.renames.len();
-        let rename_path = output.join("rename_history.json");
-        fs::write(&rename_path, serde_json::to_string_pretty(&rename_history)?)?;
-        if !quiet && !json {
-            println!("  Tracked {count} file renames");
-        }
-        Some(count)
-    } else {
-        None
-    };
-
-    let elapsed = start.elapsed();
-
-    if json {
-        let result = BuildResult {
-            index_path: output.to_string_lossy().to_string(),
-            files_indexed: file_count,
-            total_headings,
-            total_links,
-            unique_keywords: reverse_index.keywords.len(),
-            duration_ms: elapsed.as_millis(),
-            renames_tracked: renames_count,
-            total_relations: Some(relations_count),
-        };
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else if !quiet {
-        println!();
-        println!("{}", "Index Statistics".green().bold());
-        println!("  Files indexed:    {}", file_count.to_string().cyan());
-        println!(
-            "  Unique keywords:  {}",
-            reverse_index.keywords.len().to_string().cyan()
-        );
-        println!("  Total headings:   {}", total_headings.to_string().cyan());
-        println!("  Total links:      {}", total_links.to_string().cyan());
-        println!("  Relations:        {}", relations_count.to_string().cyan());
-        println!("  Time elapsed:     {elapsed:.2?}");
-        println!();
-        println!(
-            "{} {}",
-            "Indexes written to".green(),
-            output.display().to_string().cyan()
-        );
-    }
-
-    Ok(())
-}
-
-fn index_file(path: &Path) -> Result<(FileEntry, DocumentMetrics), Box<dyn std::error::Error>> {
-    let content = fs::read_to_string(path)?;
-    let metadata = fs::metadata(path)?;
-
-    let lines: Vec<&str> = content.lines().collect();
-    let line_count = lines.len();
-
-    // Extract headings (markdown)
-    let heading_re = Regex::new(r"^(#{1,6})\s+(.+)$")?;
-    let mut headings = Vec::new();
-
-    for (i, line) in lines.iter().enumerate() {
-        if let Some(caps) = heading_re.captures(line) {
-            headings.push(Heading {
-                line: i + 1,
-                level: caps.get(1).map_or(1, |m| m.as_str().len()),
-                text: caps
-                    .get(2)
-                    .map(|m| m.as_str().to_string())
-                    .unwrap_or_default(),
-            });
-        }
-    }
-
-    // Extract links
-    let link_re = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)")?;
-    let mut links = Vec::new();
-
-    for (i, line) in lines.iter().enumerate() {
-        for caps in link_re.captures_iter(line) {
-            links.push(Link {
-                line: i + 1,
-                text: caps
-                    .get(1)
-                    .map(|m| m.as_str().to_string())
-                    .unwrap_or_default(),
-                target: caps
-                    .get(2)
-                    .map(|m| m.as_str().to_string())
-                    .unwrap_or_default(),
-            });
-        }
-    }
-
-    // Extract keywords from headings
-    let mut keywords: HashSet<String> = HashSet::new();
-    for heading in &headings {
-        for kw in extract_keywords(&heading.text) {
-            keywords.insert(stem_word(&kw));
-        }
-    }
-
-    // NEW: Extract keywords from full body text
-    let mut body_keywords: HashSet<String> = HashSet::new();
-    for line in &lines {
-        // Skip code blocks
-        if line.starts_with("```") || line.starts_with("    ") {
-            continue;
-        }
-        for kw in extract_keywords(line) {
-            body_keywords.insert(stem_word(&kw));
-        }
-    }
-    // Remove heading keywords from body to avoid duplication
-    for kw in &keywords {
-        body_keywords.remove(kw);
-    }
-
-    // NEW: Compute term frequencies for BM25
-    let mut term_frequencies: HashMap<String, usize> = HashMap::new();
-    let mut total_terms = 0;
-
-    for line in &lines {
-        // Skip code blocks
-        if line.starts_with("```") || line.starts_with("    ") {
-            continue;
-        }
-        let words = extract_keywords(line);
-        for word in words {
-            let stemmed = stem_word(&word);
-            *term_frequencies.entry(stemmed).or_insert(0) += 1;
-            total_terms += 1;
-        }
-    }
-
-    // NEW: Compute MinHash signature
-    let all_keywords: Vec<String> = keywords
-        .iter()
-        .chain(body_keywords.iter())
-        .cloned()
-        .collect();
-    let minhash = compute_minhash(&all_keywords, 128);
-
-    // NEW: Compute section-level SimHash fingerprints
-    let section_fingerprints = index_sections(&content, &headings);
-    let metrics =
-        compute_document_metrics(&path.to_string_lossy(), &content, &lines, &headings, &links);
-
-    // Compute simhash fingerprint
-    let simhash = compute_simhash(&content);
-
-    // Extract ADR references from content
-    let adr_regex = Regex::new(r"\bADR[-_ ]?(\d{2,4})\b").unwrap();
-    let mut adr_references = Vec::new();
-    for (i, line) in lines.iter().enumerate() {
-        for caps in adr_regex.captures_iter(line) {
-            if let Some(num_match) = caps.get(1) {
-                let num_val: usize = num_match.as_str().parse().unwrap_or(0);
-                adr_references.push(AdrRef {
-                    line: i + 1,
-                    raw_text: caps.get(0).unwrap().as_str().to_string(),
-                    normalized_id: format!("{num_val:03}"),
-                });
-            }
-        }
-    }
-
-    Ok((
-        FileEntry {
-            path: path.to_string_lossy().to_string(),
-            size_bytes: metadata.len(),
-            line_count,
-            headings,
-            keywords: keywords.into_iter().collect(),
-            body_keywords: body_keywords.into_iter().collect(),
-            links,
-            simhash,
-            term_frequencies,
-            doc_length: total_terms,
-            minhash,
-            section_fingerprints,
-            adr_references,
-        },
-        metrics,
-    ))
-}
-
-fn compute_document_metrics(
-    path: &str,
-    content: &str,
-    lines: &[&str],
-    headings: &[Heading],
-    links: &[Link],
-) -> DocumentMetrics {
-    let word_re = Regex::new(r"[A-Za-z0-9_][A-Za-z0-9_-]*").unwrap();
-    let list_re = Regex::new(r"^(\s*[-+*]\s+|\s*\d+\.\s+)").unwrap();
-    let metadata_re =
-        Regex::new(r"^(?:\*\*[^*]+\*\*|[A-Za-z][A-Za-z0-9 _/\-]{1,40}):\s+\S").unwrap();
-
-    let mut h1_count = 0;
-    let mut h2_count = 0;
-    let mut h3_count = 0;
-    let mut h4_plus_count = 0;
-    let mut part_heading_count = 0;
-    let mut completion_heading_count = 0;
-    let mut changelog_heading_count = 0;
-
-    for heading in headings {
-        match heading.level {
-            1 => h1_count += 1,
-            2 => h2_count += 1,
-            3 => h3_count += 1,
-            _ => h4_plus_count += 1,
-        }
-        if heading_looks_like_part(&heading.text) {
-            part_heading_count += 1;
-        }
-        if heading_has_completion_marker(&heading.text) {
-            completion_heading_count += 1;
-        }
-        if heading_looks_like_changelog(&heading.text) {
-            changelog_heading_count += 1;
-        }
-    }
-
-    let code_block_count = count_code_blocks(lines);
-    let list_item_count = lines
-        .iter()
-        .filter(|line| list_re.is_match(line.trim_end()))
-        .count();
-    let table_row_count = lines
-        .iter()
-        .filter(|line| {
-            let trimmed = line.trim();
-            trimmed.matches('|').count() >= 2
-        })
-        .count();
-    let word_count = word_re.find_iter(content).count();
-    let (frontmatter_key_count, metadata_scan_start) = extract_frontmatter_key_count(lines);
-    let metadata_line_count = lines
-        .iter()
-        .enumerate()
-        .skip(metadata_scan_start)
-        .take_while(|(_, line)| {
-            let trimmed = line.trim();
-            !trimmed.is_empty() && !trimmed.starts_with('#')
-        })
-        .filter(|(_, line)| metadata_re.is_match(line.trim()))
-        .count();
-
-    let sections = compute_section_metrics(lines, headings, links);
-    let longest_section_lines = sections
-        .iter()
-        .map(|section| section.line_count)
-        .max()
-        .unwrap_or(0);
-    let changelog_entry_count = sections
-        .iter()
-        .filter(|section| section.looks_like_changelog)
-        .map(|section| section.list_item_count)
-        .sum();
-
-    DocumentMetrics {
-        path: path.to_string(),
-        line_count: lines.len(),
-        word_count,
-        heading_count: headings.len(),
-        section_count: sections.len(),
-        link_count: links.len(),
-        h1_count,
-        h2_count,
-        h3_count,
-        h4_plus_count,
-        code_block_count,
-        list_item_count,
-        table_row_count,
-        frontmatter_key_count,
-        metadata_line_count,
-        part_heading_count,
-        completion_heading_count,
-        changelog_heading_count,
-        changelog_entry_count,
-        longest_section_lines,
-        sections,
-    }
-}
-
-fn extract_frontmatter_key_count(lines: &[&str]) -> (usize, usize) {
-    if lines.first().map(|line| line.trim()) != Some("---") {
-        return (0, 0);
-    }
-
-    let mut key_count = 0;
-    for (idx, line) in lines.iter().enumerate().skip(1) {
-        let trimmed = line.trim();
-        if trimmed == "---" {
-            return (key_count, idx + 1);
-        }
-        if trimmed.is_empty() || trimmed.starts_with('#') {
-            continue;
-        }
-        if trimmed.contains(':') {
-            key_count += 1;
-        }
-    }
-
-    (0, 0)
-}
-
-fn heading_looks_like_part(text: &str) -> bool {
-    let trimmed = text.trim().to_ascii_lowercase();
-    trimmed.starts_with("part ")
-        && trimmed
-            .split_whitespace()
-            .nth(1)
-            .is_some_and(|token| token.chars().next().is_some_and(|ch| ch.is_ascii_digit()))
-}
-
-fn heading_has_completion_marker(text: &str) -> bool {
-    let lowered = text.to_ascii_lowercase();
-    lowered.contains("done")
-        || lowered.contains("complete")
-        || lowered.contains("completed")
-        || lowered.contains("resolved")
-}
-
-fn heading_looks_like_changelog(text: &str) -> bool {
-    let lowered = text.to_ascii_lowercase();
-    lowered.contains("changelog")
-        || lowered.contains("release notes")
-        || lowered == "changes"
-        || lowered.ends_with(" changes")
-        || lowered.ends_with(" history")
-}
-
-fn count_code_blocks(lines: &[&str]) -> usize {
-    let mut count = 0;
-    let mut in_block = false;
-
-    for line in lines {
-        if line.trim_start().starts_with("```") {
-            if !in_block {
-                count += 1;
-            }
-            in_block = !in_block;
-        }
-    }
-
-    count
-}
-
-fn compute_section_metrics(
-    lines: &[&str],
-    headings: &[Heading],
-    links: &[Link],
-) -> Vec<SectionMetrics> {
-    let word_re = Regex::new(r"[A-Za-z0-9_][A-Za-z0-9_-]*").unwrap();
-    let list_re = Regex::new(r"^(\s*[-+*]\s+|\s*\d+\.\s+)").unwrap();
-    let mut sections = Vec::new();
-
-    for idx in 0..headings.len() {
-        let start = headings[idx].line.saturating_sub(1);
-        let end = headings
-            .get(idx + 1)
-            .map_or(lines.len(), |heading| heading.line.saturating_sub(1));
-        let section_lines = &lines[start..end];
-        let section_text = section_lines.join("\n");
-        let line_start = start + 1;
-        let line_end = end;
-
-        sections.push(SectionMetrics {
-            heading: headings[idx].text.clone(),
-            level: headings[idx].level,
-            line_start,
-            line_end,
-            line_count: end.saturating_sub(start),
-            word_count: word_re.find_iter(&section_text).count(),
-            link_count: links
-                .iter()
-                .filter(|link| link.line >= line_start && link.line <= line_end)
-                .count(),
-            list_item_count: section_lines
-                .iter()
-                .filter(|line| list_re.is_match(line.trim_end()))
-                .count(),
-            code_block_count: count_code_blocks(section_lines),
-            has_completion_marker: heading_has_completion_marker(&headings[idx].text),
-            looks_like_part: heading_looks_like_part(&headings[idx].text),
-            looks_like_changelog: heading_looks_like_changelog(&headings[idx].text),
-        });
-    }
-
-    sections
-}
-
-fn extract_keywords(text: &str) -> Vec<String> {
-    extract_keywords_with_options(text, true)
-}
-
-fn extract_keywords_with_options(text: &str, filter_stopwords: bool) -> Vec<String> {
-    let stop_words: HashSet<&str> = default_query_stop_words().iter().copied().collect();
-
-    let word_re = Regex::new(r"[a-zA-Z][a-zA-Z0-9_-]*").unwrap();
-
-    word_re
-        .find_iter(text)
-        .map(|m| m.as_str().to_lowercase())
-        .filter(|w| w.len() >= 3 && (!filter_stopwords || !stop_words.contains(w.as_str())))
-        .collect()
-}
-
-#[derive(Debug, Clone)]
-struct ParsedQuery {
-    terms: Vec<String>,
-    phrases: Vec<PhraseGroup>,
-}
-
-#[derive(Debug, Clone)]
-struct PhraseGroup {
-    terms: Vec<String>,
-}
-
-fn parse_query_terms(query: &str, filter_stopwords: bool) -> Vec<String> {
-    extract_keywords_with_options(query, filter_stopwords)
-}
-
-fn parse_query(query: &str, filter_stopwords: bool) -> ParsedQuery {
-    let mut parts: Vec<(String, bool)> = Vec::new();
-    let mut buffer = String::new();
-    let mut in_quote = false;
-
-    for ch in query.chars() {
-        if ch == '"' {
-            let trimmed = buffer.trim();
-            if !trimmed.is_empty() {
-                parts.push((trimmed.to_string(), in_quote));
-            }
-            buffer.clear();
-            in_quote = !in_quote;
-            continue;
-        }
-        buffer.push(ch);
-    }
-
-    let trimmed = buffer.trim();
-    if !trimmed.is_empty() {
-        parts.push((trimmed.to_string(), in_quote));
-    }
-    let mut terms = Vec::new();
-    let mut phrases = Vec::new();
-
-    for (text, is_phrase) in parts {
-        let parsed_terms = parse_query_terms(&text, filter_stopwords);
-        terms.extend(parsed_terms.iter().cloned());
-        if is_phrase {
-            let phrase_terms = extract_keywords_with_options(&text, false);
-            if !phrase_terms.is_empty() {
-                phrases.push(PhraseGroup {
-                    terms: phrase_terms,
-                });
-            }
-        }
-    }
-
-    ParsedQuery { terms, phrases }
-}
-
-/// Simple suffix-stripping stemmer
-fn stem_word(word: &str) -> String {
-    let w = word.to_lowercase();
-
-    // Common suffixes to strip
-    let suffixes = [
-        "ization", "ational", "iveness", "fulness", "ousness", "ation", "ement", "ment", "able",
-        "ible", "ness", "ical", "ings", "ing", "ies", "ive", "ful", "ous", "ity", "ed", "ly", "er",
-        "es", "s",
-    ];
-
-    for suffix in suffixes {
-        if w.len() > suffix.len() + 2 && w.ends_with(suffix) {
-            return w[..w.len() - suffix.len()].to_string();
-        }
-    }
-
-    w
-}
-
-/// Extract top N distinctive terms from a document, excluding query terms.
-/// Returns human-readable (unstemmed) terms ranked by TF-IDF.
-fn get_top_doc_terms(
-    entry: &FileEntry,
-    idf_map: &HashMap<String, f64>,
-    exclude_terms: &[String],
-    n: usize,
-) -> Vec<String> {
-    if n == 0 {
-        return Vec::new();
-    }
-
-    // Stem the exclusion terms for comparison
-    let exclude_stemmed: HashSet<String> = exclude_terms
-        .iter()
-        .map(|t| stem_word(&t.to_lowercase()))
-        .collect();
-
-    // Collect unique keywords with their TF-IDF scores
-    // Use body_keywords (unstemmed) but rank by term_frequencies (stemmed)
-    let mut seen_stems: HashSet<String> = HashSet::new();
-    let mut term_scores: Vec<(String, f64)> = Vec::new();
-
-    for kw in entry.body_keywords.iter().chain(entry.keywords.iter()) {
-        let stemmed = stem_word(&kw.to_lowercase());
-
-        // Skip if already seen this stem, or if it's an excluded term
-        if seen_stems.contains(&stemmed) || exclude_stemmed.contains(&stemmed) {
-            continue;
-        }
-        seen_stems.insert(stemmed.clone());
-
-        // Calculate TF-IDF score
-        let tf = *entry.term_frequencies.get(&stemmed).unwrap_or(&0) as f64;
-        let idf = *idf_map.get(&stemmed).unwrap_or(&0.0);
-        let score = tf * idf;
-
-        if score > 0.0 {
-            term_scores.push((kw.to_lowercase(), score));
-        }
-    }
-
-    // Sort by score descending
-    term_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-
-    // Take top N
-    term_scores
-        .into_iter()
-        .take(n)
-        .map(|(term, _)| term)
-        .collect()
-}
-
-/// Compute simhash fingerprint for content
-fn compute_simhash(content: &str) -> u64 {
-    let mut v = [0i32; 64];
-
-    // Extract features (word shingles)
-    let words: Vec<&str> = content.split_whitespace().collect();
-
-    for window in words.windows(3) {
-        let shingle = format!("{} {} {}", window[0], window[1], window[2]);
-        let h = hash_string(&shingle);
-
-        for (i, item) in v.iter_mut().enumerate() {
-            if (h >> i) & 1 == 1 {
-                *item += 1;
-            } else {
-                *item -= 1;
-            }
-        }
-    }
-
-    // Convert to fingerprint
-    let mut fingerprint: u64 = 0;
-    for (i, item) in v.iter().enumerate() {
-        if *item > 0 {
-            fingerprint |= 1 << i;
-        }
-    }
-
-    fingerprint
-}
-
-fn hash_string(s: &str) -> u64 {
-    use std::collections::hash_map::DefaultHasher;
-    let mut hasher = DefaultHasher::new();
-    s.hash(&mut hasher);
-    hasher.finish()
-}
-
-/// Count differing bits between two simhashes (Hamming distance)
-fn hamming_distance(a: u64, b: u64) -> u32 {
-    (a ^ b).count_ones()
-}
-
-/// Convert hamming distance to similarity (0.0 to 1.0)
-fn simhash_similarity(a: u64, b: u64) -> f64 {
-    let distance = hamming_distance(a, b);
-    1.0 - (f64::from(distance) / 64.0)
-}
-
-/// Index sections of a document with SimHash fingerprints
-fn index_sections(content: &str, headings: &[Heading]) -> Vec<SectionFingerprint> {
-    let lines: Vec<&str> = content.lines().collect();
-    let mut sections = Vec::new();
-
-    if headings.is_empty() {
-        return sections;
-    }
-
-    for i in 0..headings.len() {
-        let start = headings[i].line.saturating_sub(1);
-        let end = headings
-            .get(i + 1)
-            .map_or(lines.len(), |h| h.line.saturating_sub(1));
-
-        // Extract section text
-        let section_text = lines[start..end].join("\n");
-
-        sections.push(SectionFingerprint {
-            heading: headings[i].text.clone(),
-            level: headings[i].level,
-            line_start: start + 1,
-            line_end: end,
-            simhash: compute_simhash(&section_text),
-        });
-    }
-
-    sections
-}
-
-/// Compute MinHash signature for a set of keywords
-fn compute_minhash(keywords: &[String], num_hashes: usize) -> Vec<u64> {
-    let mut hashes = vec![u64::MAX; num_hashes];
-
-    for keyword in keywords {
-        for (i, hash_slot) in hashes.iter_mut().enumerate().take(num_hashes) {
-            let mut hasher = AHasher::default();
-            keyword.hash(&mut hasher);
-            i.hash(&mut hasher); // Use index as seed
-            let h = hasher.finish();
-
-            *hash_slot = (*hash_slot).min(h);
-        }
-    }
-
-    hashes
-}
-
-/// Compute MinHash similarity (Jaccard estimate)
-fn minhash_similarity(a: &[u64], b: &[u64]) -> f64 {
-    if a.len() != b.len() || a.is_empty() {
-        return 0.0;
-    }
-
-    let matches = a.iter().zip(b.iter()).filter(|(x, y)| x == y).count();
-
-    matches as f64 / a.len() as f64
-}
-
-const BM25_K1: f64 = 1.5;
-const BM25_B: f64 = 0.75;
-
-/// Compute BM25 score for a document given query terms
-fn bm25_score(
-    query_terms: &[String],
-    doc: &FileEntry,
-    avg_doc_length: f64,
-    idf_map: &HashMap<String, f64>,
-) -> f64 {
-    if doc.doc_length == 0 {
-        return 0.0;
-    }
-
-    let mut score = 0.0;
-    let norm_factor = 1.0 - BM25_B + BM25_B * (doc.doc_length as f64 / avg_doc_length);
-
-    for term in query_terms {
-        let stemmed = stem_word(&term.to_lowercase());
-        let tf = *doc.term_frequencies.get(&stemmed).unwrap_or(&0) as f64;
-        let idf = idf_map.get(&stemmed).unwrap_or(&0.0);
-
-        if tf > 0.0 {
-            score += idf * (tf * (BM25_K1 + 1.0)) / (tf + BM25_K1 * norm_factor);
-        }
-    }
-
-    score
-}
-
-/// Build LSH buckets for fast duplicate detection
-fn lsh_buckets(files: &HashMap<String, FileEntry>, bands: usize) -> HashMap<u64, Vec<String>> {
-    let rows_per_band = 128 / bands; // Assuming 128 hashes
-    let mut buckets: HashMap<u64, Vec<String>> = HashMap::new();
-
-    for (path, entry) in files {
-        if entry.minhash.is_empty() {
-            continue; // Skip files without MinHash
-        }
-
-        for band in 0..bands {
-            let start = band * rows_per_band;
-            let end = (start + rows_per_band).min(entry.minhash.len());
-
-            // Hash this band's values
-            let mut hasher = AHasher::default();
-            for val in &entry.minhash[start..end] {
-                val.hash(&mut hasher);
-            }
-            let band_hash = hasher.finish();
-
-            buckets.entry(band_hash).or_default().push(path.clone());
-        }
-    }
-
-    buckets
-}
-
-fn contains_phrase_tokens(haystack: &[String], needle: &[String]) -> bool {
-    if needle.is_empty() || haystack.len() < needle.len() {
-        return false;
-    }
-    haystack
-        .windows(needle.len())
-        .any(|window| window == needle)
-}
-
-struct QueryDiagnostics {
-    tokens: Vec<String>,
-    stems: Vec<String>,
-    missing_terms: Vec<String>,
-    idf_values: Vec<(String, String, f64)>,
-    index_path: String,
-    doc_count: usize,
-}
-
-fn build_query_diagnostics(
-    parsed: &ParsedQuery,
-    forward_index: &ForwardIndex,
-    index_dir: &Path,
-) -> QueryDiagnostics {
-    let tokens = parsed.terms.clone();
-    let stems: Vec<String> = tokens
-        .iter()
-        .map(|t| stem_word(&t.to_lowercase()))
-        .collect();
-    let mut missing_set: HashSet<String> = HashSet::new();
-    let mut missing_terms = Vec::new();
-    let mut idf_values = Vec::new();
-
-    for term in &tokens {
-        let stem = stem_word(&term.to_lowercase());
-        let idf = *forward_index.idf_map.get(&stem).unwrap_or(&0.0);
-        idf_values.push((term.clone(), stem.clone(), idf));
-        if !forward_index.idf_map.contains_key(&stem) && missing_set.insert(term.clone()) {
-            missing_terms.push(term.clone());
-        }
-    }
-
-    QueryDiagnostics {
-        tokens,
-        stems,
-        missing_terms,
-        idf_values,
-        index_path: index_dir.display().to_string(),
-        doc_count: forward_index.files.len(),
-    }
-}
-
-fn print_query_diagnostics(
-    diagnostics: &QueryDiagnostics,
-    include_scoring: bool,
-    include_suggestions: bool,
-) {
-    println!("{}", "Diagnostics:".dimmed());
-    println!(
-        "  {} {}",
-        "tokens:".dimmed(),
-        if diagnostics.tokens.is_empty() {
-            "(none)".to_string()
-        } else {
-            diagnostics.tokens.join(" ")
-        }
-    );
-    println!(
-        "  {} {}",
-        "stems:".dimmed(),
-        if diagnostics.stems.is_empty() {
-            "(none)".to_string()
-        } else {
-            diagnostics.stems.join(" ")
-        }
-    );
-    println!(
-        "  {} {}",
-        "missing:".dimmed(),
-        if diagnostics.missing_terms.is_empty() {
-            "(none)".to_string()
-        } else {
-            diagnostics.missing_terms.join(" ")
-        }
-    );
-    println!(
-        "  {} {} ({} docs)",
-        "index:".dimmed(),
-        diagnostics.index_path,
-        diagnostics.doc_count
-    );
-
-    if include_scoring {
-        let mut idf_parts = Vec::new();
-        for (term, stem, idf) in &diagnostics.idf_values {
-            idf_parts.push(format!("{term}->{stem}:{idf:.3}"));
-        }
-        println!(
-            "  {} {}",
-            "idf:".dimmed(),
-            if idf_parts.is_empty() {
-                "(none)".to_string()
-            } else {
-                idf_parts.join(", ")
-            }
-        );
-        println!("  {} k1={:.2}, b={:.2}", "bm25:".dimmed(), BM25_K1, BM25_B);
-    }
-
-    if include_suggestions {
-        println!(
-            "  {} try fewer terms; use --no-stopwords; run yore stats; check index path",
-            "suggestions:".dimmed()
-        );
-    }
-}
-
-struct QueryOptions {
-    limit: usize,
-    files_only: bool,
-    json: bool,
-    doc_terms: usize,
-    explain: bool,
-    require_phrases: bool,
-    filter_stopwords: bool,
-}
-
-struct AssembleOptions {
-    max_tokens: usize,
-    max_sections: usize,
-    depth: usize,
-    format: String,
-    doc_terms: usize,
-    use_relations: bool,
-}
-
-struct HealthOptions {
-    max_lines: usize,
-    max_part_sections: usize,
-    max_completed_lines: usize,
-    max_changelog_entries: usize,
-}
-
-fn cmd_query(
-    query: &str,
-    index_dir: &Path,
-    options: &QueryOptions,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let parsed = parse_query(query, options.filter_stopwords);
-    if parsed.terms.is_empty() {
-        if options.json {
-            let obj = serde_json::json!({
-                "query": query,
-                "error": "no_query_terms"
-            });
-            println!("{}", serde_json::to_string_pretty(&obj)?);
-        } else {
-            println!(
-                "{}",
-                "No searchable terms in query. Try different keywords or use --no-stopwords."
-                    .yellow()
-            );
-        }
-        return Ok(());
-    }
-    let _reverse_index = load_reverse_index(index_dir)?;
-    let forward_index = load_forward_index(index_dir)?;
-    let diagnostics = build_query_diagnostics(&parsed, &forward_index, index_dir);
-
-    // Compute BM25 scores for all documents
-    let mut file_scores: Vec<(String, f64)> = forward_index
-        .files
-        .iter()
-        .map(|(path, entry)| {
-            let score = bm25_score(
-                &parsed.terms,
-                entry,
-                forward_index.avg_doc_length,
-                &forward_index.idf_map,
-            );
-            (path.clone(), score)
-        })
-        .filter(|(_, score)| *score > 0.0)
-        .collect();
-
-    // Sort by BM25 score (descending)
-    file_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-    let results = if parsed.phrases.is_empty() {
-        file_scores.truncate(options.limit);
-        file_scores
-    } else {
-        let candidate_cap = std::cmp::min(
-            file_scores.len(),
-            std::cmp::max(options.limit.saturating_mul(10), 100),
-        );
-        let mut candidates = file_scores[..candidate_cap].to_vec();
-
-        for (path, score) in &mut candidates {
-            let content = std::fs::read_to_string(Path::new(path)).unwrap_or_default();
-            let content_terms = extract_keywords_with_options(&content, false);
-            let mut matched_phrases = 0usize;
-
-            for phrase in &parsed.phrases {
-                if contains_phrase_tokens(&content_terms, &phrase.terms) {
-                    matched_phrases += 1;
-                }
-            }
-
-            if options.require_phrases && matched_phrases < parsed.phrases.len() {
-                *score = 0.0;
-            } else if matched_phrases > 0 {
-                *score += matched_phrases as f64;
-            }
-        }
-
-        if options.require_phrases {
-            candidates.retain(|(_, score)| *score > 0.0);
-        }
-
-        candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-        candidates.truncate(options.limit);
-        candidates
-    };
-
-    if options.json {
-        let output: Vec<_> = results
-            .iter()
-            .map(|(path, score)| {
-                let mut obj = serde_json::json!({
-                    "path": path,
-                    "score": score,
-                    "query": query
-                });
-                if options.doc_terms > 0 {
-                    if let Some(entry) = forward_index.files.get(path) {
-                        let top_terms = get_top_doc_terms(
-                            entry,
-                            &forward_index.idf_map,
-                            &parsed.terms,
-                            options.doc_terms,
-                        );
-                        obj["doc_terms"] = serde_json::json!(top_terms);
-                    }
-                }
-                obj
-            })
-            .collect();
-
-        if options.explain {
-            let notice = if output.is_empty() {
-                Some("No data to explain.".to_string())
-            } else {
-                None
-            };
-            let diag_json = serde_json::json!({
-                "tokens": diagnostics.tokens,
-                "stems": diagnostics.stems,
-                "missing_terms": diagnostics.missing_terms,
-                "idf": diagnostics.idf_values.iter().map(|(term, stem, idf)| {
-                    serde_json::json!({
-                        "term": term,
-                        "stem": stem,
-                        "idf": idf
-                    })
-                }).collect::<Vec<_>>(),
-                "bm25": {
-                    "k1": BM25_K1,
-                    "b": BM25_B,
-                    "avg_doc_length": forward_index.avg_doc_length
-                },
-                "index_path": diagnostics.index_path,
-                "doc_count": diagnostics.doc_count,
-                "notice": notice,
-                "suggestions": if output.is_empty() {
-                    serde_json::json!(["try fewer terms", "use --no-stopwords", "run yore stats", "check index path"])
-                } else {
-                    serde_json::Value::Null
-                }
-            });
-            let wrapped = serde_json::json!({
-                "query": query,
-                "results": output,
-                "diagnostics": diag_json
-            });
-            println!("{}", serde_json::to_string_pretty(&wrapped)?);
-        } else {
-            println!("{}", serde_json::to_string_pretty(&output)?);
-        }
-        return Ok(());
-    }
-
-    if results.is_empty() {
-        println!("{}", "No results found.".yellow());
-        if options.explain {
-            println!("{}", "No data to explain.".dimmed());
-        }
-        print_query_diagnostics(&diagnostics, options.explain, true);
-        return Ok(());
-    }
-
-    println!(
-        "{} results for: {}\n",
-        results.len().to_string().green().bold(),
-        parsed.terms.join(" ").cyan()
-    );
-
-    for (file, score) in results {
-        if options.files_only {
-            println!("{file}");
-        } else {
-            println!("{} (score: {:.2})", file.cyan(), score);
-
-            // Show doc terms if requested
-            if options.doc_terms > 0 {
-                if let Some(entry) = forward_index.files.get(&file) {
-                    let top_terms = get_top_doc_terms(
-                        entry,
-                        &forward_index.idf_map,
-                        &parsed.terms,
-                        options.doc_terms,
-                    );
-                    if !top_terms.is_empty() {
-                        println!("  {} {}", "terms:".dimmed(), top_terms.join(", "));
-                    }
-                }
-            }
-
-            // Show matching headings
-            if let Some(entry) = forward_index.files.get(&file) {
-                for heading in entry.headings.iter().take(3) {
-                    let heading_keywords: HashSet<String> = extract_keywords(&heading.text)
-                        .into_iter()
-                        .map(|k| stem_word(&k))
-                        .collect();
-
-                    let matches: Vec<_> = parsed
-                        .terms
-                        .iter()
-                        .filter(|t| heading_keywords.contains(&stem_word(&t.to_lowercase())))
-                        .collect();
-
-                    if !matches.is_empty() {
-                        println!(
-                            "  {} L{}: {}",
-                            ">".dimmed(),
-                            heading.line.to_string().dimmed(),
-                            heading.text
-                        );
-                    }
-                }
-            }
-            println!();
-        }
-    }
-
-    if options.explain {
-        print_query_diagnostics(&diagnostics, true, false);
-    }
-
-    Ok(())
-}
-
-fn cmd_similar(
-    file: &Path,
-    limit: usize,
-    threshold: f64,
-    json: bool,
-    doc_terms: usize,
-    index_dir: &Path,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-
-    // Get keywords for reference file - try multiple path formats
-    let file_str = file.to_string_lossy().to_string();
-    let file_with_dot = format!("./{}", file_str.trim_start_matches("./"));
-    let file_without_dot = file_str.trim_start_matches("./").to_string();
-
-    let (matched_path, ref_entry) = forward_index
-        .files
-        .get(&file_str)
-        .map(|e| (file_str.clone(), e))
-        .or_else(|| {
-            forward_index
-                .files
-                .get(&file_with_dot)
-                .map(|e| (file_with_dot.clone(), e))
-        })
-        .or_else(|| {
-            forward_index
-                .files
-                .get(&file_without_dot)
-                .map(|e| (file_without_dot.clone(), e))
-        })
-        .ok_or_else(|| format!("File not in index: {file_str}"))?;
-
-    // Combine heading and body keywords
-    let ref_keywords: HashSet<String> = ref_entry
-        .keywords
-        .iter()
-        .chain(ref_entry.body_keywords.iter())
-        .map(|k| k.to_lowercase())
-        .collect();
-
-    // For doc_terms, exclude the reference file's terms
-    let ref_terms_vec: Vec<String> = ref_entry
-        .body_keywords
-        .iter()
-        .chain(ref_entry.keywords.iter())
-        .map(|k| k.to_lowercase())
-        .collect();
-
-    // Compare with all other files using both Jaccard and Simhash
-    let mut similarities: Vec<(String, f64, f64, f64)> = Vec::new(); // (path, jaccard, simhash, combined)
-
-    for (path, entry) in &forward_index.files {
-        if path == &matched_path {
-            continue;
-        }
-
-        let other_keywords: HashSet<String> = entry
-            .keywords
-            .iter()
-            .chain(entry.body_keywords.iter())
-            .map(|k| k.to_lowercase())
-            .collect();
-
-        let jaccard = jaccard_similarity(&ref_keywords, &other_keywords);
-        let simhash_sim = simhash_similarity(ref_entry.simhash, entry.simhash);
-
-        // Combined score: weighted average
-        let combined = jaccard * 0.6 + simhash_sim * 0.4;
-
-        if combined >= threshold {
-            similarities.push((path.clone(), jaccard, simhash_sim, combined));
-        }
-    }
-
-    // Sort by combined similarity
-    similarities.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap());
-    similarities.truncate(limit);
-
-    if json {
-        let output: Vec<_> = similarities
-            .iter()
-            .map(|(p, j, s, c)| {
-                let mut obj = serde_json::json!({
-                    "path": p,
-                    "jaccard": j,
-                    "simhash": s,
-                    "combined": c
-                });
-                if doc_terms > 0 {
-                    if let Some(entry) = forward_index.files.get(p) {
-                        let top_terms = get_top_doc_terms(
-                            entry,
-                            &forward_index.idf_map,
-                            &ref_terms_vec,
-                            doc_terms,
-                        );
-                        obj["doc_terms"] = serde_json::json!(top_terms);
-                    }
-                }
-                obj
-            })
-            .collect();
-        println!("{}", serde_json::to_string_pretty(&output)?);
-        return Ok(());
-    }
-
-    if similarities.is_empty() {
-        println!("{}", "No similar files found.".yellow());
-        return Ok(());
-    }
-
-    println!("Files similar to: {}\n", matched_path.cyan());
-    println!("{:>5} {:>5} {:>5}  Path", "Comb", "Jacc", "Sim");
-    println!("{}", "-".repeat(60));
-
-    for (path, jaccard, simhash_sim, combined) in similarities {
-        let comb_pct = (combined * 100.0) as u32;
-        let jacc_pct = (jaccard * 100.0) as u32;
-        let sim_pct = (simhash_sim * 100.0) as u32;
-        println!(
-            "{:>4}% {:>4}% {:>4}%  {}",
-            comb_pct.to_string().green(),
-            jacc_pct.to_string().cyan(),
-            sim_pct.to_string().yellow(),
-            path
-        );
-
-        // Show doc terms if requested
-        if doc_terms > 0 {
-            if let Some(entry) = forward_index.files.get(&path) {
-                let top_terms =
-                    get_top_doc_terms(entry, &forward_index.idf_map, &ref_terms_vec, doc_terms);
-                if !top_terms.is_empty() {
-                    println!(
-                        "                   {} {}",
-                        "terms:".dimmed(),
-                        top_terms.join(", ")
-                    );
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-fn cmd_dupes(
-    threshold: f64,
-    group: bool,
-    json: bool,
-    index_dir: &Path,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let start = Instant::now();
-
-    // Build LSH buckets for fast duplicate detection
-    let buckets = lsh_buckets(&forward_index.files, 16); // 16 bands x 8 rows = 128 hashes
-    let mut candidates: HashSet<(String, String)> = HashSet::new();
-
-    // Collect candidate pairs from buckets
-    for paths in buckets.values() {
-        if paths.len() > 1 {
-            for i in 0..paths.len() {
-                for j in (i + 1)..paths.len() {
-                    let (p1, p2) = if paths[i] < paths[j] {
-                        (paths[i].clone(), paths[j].clone())
-                    } else {
-                        (paths[j].clone(), paths[i].clone())
-                    };
-                    candidates.insert((p1, p2));
-                }
-            }
-        }
-    }
-
-    let mut duplicates: Vec<(String, String, f64, f64, f64, f64)> = Vec::new(); // (path1, path2, jaccard, simhash, minhash, combined)
-
-    // Compare candidate pairs
-    for (path1, path2) in &candidates {
-        if let (Some(entry1), Some(entry2)) = (
-            forward_index.files.get(path1),
-            forward_index.files.get(path2),
-        ) {
-            let kw1: HashSet<String> = entry1
-                .keywords
-                .iter()
-                .chain(entry1.body_keywords.iter())
-                .map(|k| k.to_lowercase())
-                .collect();
-            let kw2: HashSet<String> = entry2
-                .keywords
-                .iter()
-                .chain(entry2.body_keywords.iter())
-                .map(|k| k.to_lowercase())
-                .collect();
-
-            let jaccard = jaccard_similarity(&kw1, &kw2);
-            let simhash_sim = simhash_similarity(entry1.simhash, entry2.simhash);
-            let minhash_sim = minhash_similarity(&entry1.minhash, &entry2.minhash);
-            let combined = jaccard * 0.4 + simhash_sim * 0.3 + minhash_sim * 0.3;
-
-            if combined >= threshold {
-                duplicates.push((
-                    path1.clone(),
-                    path2.clone(),
-                    jaccard,
-                    simhash_sim,
-                    minhash_sim,
-                    combined,
-                ));
-            }
-        }
-    }
-
-    let elapsed = start.elapsed();
-
-    // Sort by combined similarity
-    duplicates.sort_by(|a, b| b.5.partial_cmp(&a.5).unwrap_or(std::cmp::Ordering::Equal));
-
-    if json {
-        let output: Vec<_> = duplicates
-            .iter()
-            .map(|(p1, p2, j, s, m, c)| {
-                serde_json::json!({
-                    "file1": p1,
-                    "file2": p2,
-                    "jaccard": j,
-                    "simhash": s,
-                    "minhash": m,
-                    "combined": c
-                })
-            })
-            .collect();
-        println!("{}", serde_json::to_string_pretty(&output)?);
-        return Ok(());
-    }
-
-    if duplicates.is_empty() {
-        println!("{}", "No duplicates found above threshold.".green());
-        eprintln!(
-            "LSH duplicate detection: {:?} ({} candidate pairs from {} buckets)",
-            elapsed,
-            candidates.len(),
-            buckets.len()
-        );
-        return Ok(());
-    }
-
-    println!(
-        "{} duplicate pairs found (threshold: {}%)",
-        duplicates.len().to_string().yellow().bold(),
-        (threshold * 100.0) as u32
-    );
-    eprintln!(
-        "LSH duplicate detection: {:?} ({} candidates from {} buckets)\n",
-        elapsed,
-        candidates.len(),
-        buckets.len()
-    );
-
-    if group {
-        // Group duplicates
-        let mut groups: HashMap<String, Vec<(String, f64)>> = HashMap::new();
-
-        for (path1, path2, _, _, _, combined) in &duplicates {
-            let group = groups.entry(path1.clone()).or_default();
-            if !group.iter().any(|(p, _)| p == path2) {
-                group.push((path2.clone(), *combined));
-            }
-        }
-
-        for (file, related) in groups {
-            println!("{}", file.cyan());
-            for (r, sim) in related {
-                println!("  {} {}% {}", "~".dimmed(), (sim * 100.0) as u32, r);
-            }
-            println!();
-        }
-    } else {
-        for (path1, path2, jaccard, simhash_sim, minhash_sim, combined) in
-            duplicates.iter().take(50)
-        {
-            let comb_pct = (combined * 100.0) as u32;
-            println!(
-                "{}% [J:{}% S:{}% M:{}%] {} <-> {}",
-                comb_pct.to_string().yellow(),
-                (jaccard * 100.0) as u32,
-                (simhash_sim * 100.0) as u32,
-                (minhash_sim * 100.0) as u32,
-                path1.cyan(),
-                path2
-            );
-        }
-
-        if duplicates.len() > 50 {
-            println!(
-                "\n{}",
-                format!("... and {} more", duplicates.len() - 50).dimmed()
-            );
-        }
-    }
-
-    Ok(())
-}
-
-fn compute_duplicate_pairs(
-    forward_index: &ForwardIndex,
-    threshold: f64,
-) -> Vec<(String, String, f64)> {
-    // Build LSH buckets for duplicate detection
-    let buckets = lsh_buckets(&forward_index.files, 16); // 16 bands x 8 rows = 128 hashes
-    let mut candidates: HashSet<(String, String)> = HashSet::new();
-
-    // Collect candidate pairs from buckets
-    for paths in buckets.values() {
-        if paths.len() > 1 {
-            for i in 0..paths.len() {
-                for j in (i + 1)..paths.len() {
-                    let (p1, p2) = if paths[i] < paths[j] {
-                        (paths[i].clone(), paths[j].clone())
-                    } else {
-                        (paths[j].clone(), paths[i].clone())
-                    };
-                    candidates.insert((p1, p2));
-                }
-            }
-        }
-    }
-
-    let mut pairs: Vec<(String, String, f64)> = Vec::new(); // (path1, path2, combined)
-
-    for (path1, path2) in &candidates {
-        if let (Some(entry1), Some(entry2)) = (
-            forward_index.files.get(path1),
-            forward_index.files.get(path2),
-        ) {
-            let kw1: HashSet<String> = entry1
-                .keywords
-                .iter()
-                .chain(entry1.body_keywords.iter())
-                .map(|k| k.to_lowercase())
-                .collect();
-            let kw2: HashSet<String> = entry2
-                .keywords
-                .iter()
-                .chain(entry2.body_keywords.iter())
-                .map(|k| k.to_lowercase())
-                .collect();
-
-            let jaccard = jaccard_similarity(&kw1, &kw2);
-            let simhash_sim = simhash_similarity(entry1.simhash, entry2.simhash);
-            let minhash_sim = minhash_similarity(&entry1.minhash, &entry2.minhash);
-            let combined = jaccard * 0.4 + simhash_sim * 0.3 + minhash_sim * 0.3;
-
-            if combined >= threshold {
-                pairs.push((path1.clone(), path2.clone(), combined));
-            }
-        }
-    }
-
-    // Sort descending by similarity for stable output
-    pairs.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
-    pairs
-}
-
-fn build_consolidation_groups(
-    forward_index: &ForwardIndex,
-    pairs: &[(String, String, f64)],
-) -> ConsolidationResult {
-    use std::cmp::Ordering;
-
-    // Build adjacency graph
-    let mut adj: HashMap<String, HashSet<String>> = HashMap::new();
-    let mut pair_sims: HashMap<(String, String), f64> = HashMap::new();
-
-    for (a, b, sim) in pairs {
-        adj.entry(a.clone()).or_default().insert(b.clone());
-        adj.entry(b.clone()).or_default().insert(a.clone());
-
-        let key = if a <= b {
-            (a.clone(), b.clone())
-        } else {
-            (b.clone(), a.clone())
-        };
-        pair_sims.insert(key, *sim);
-    }
-
-    let mut visited: HashSet<String> = HashSet::new();
-    let mut groups: Vec<ConsolidationGroup> = Vec::new();
-
-    for start in adj.keys() {
-        if visited.contains(start) {
-            continue;
-        }
-
-        // BFS/DFS to collect connected component
-        let mut stack = vec![start.clone()];
-        let mut component: Vec<String> = Vec::new();
-
-        while let Some(node) = stack.pop() {
-            if !visited.insert(node.clone()) {
-                continue;
-            }
-            component.push(node.clone());
-            if let Some(neighbors) = adj.get(&node) {
-                for n in neighbors {
-                    if !visited.contains(n) {
-                        stack.push(n.clone());
-                    }
-                }
-            }
-        }
-
-        if component.len() < 2 {
-            continue;
-        }
-
-        // Choose canonical doc via canonicality score
-        component.sort(); // deterministic order
-        let mut best: Option<(String, f64)> = None;
-        for path in &component {
-            if let Some(entry) = forward_index.files.get(path) {
-                let (score, _reasons) = score_canonicality_with_reasons(path, entry);
-                match best {
-                    None => best = Some((path.clone(), score)),
-                    Some((_, best_score)) => {
-                        if score > best_score
-                            || (score == best_score
-                                && path.cmp(&best.as_ref().unwrap().0) == Ordering::Less)
-                        {
-                            best = Some((path.clone(), score));
-                        }
-                    }
-                }
-            }
-        }
-
-        let Some((canonical, canonical_score)) = best else {
-            continue;
-        };
-
-        let mut merge_into: Vec<String> = component
-            .iter()
-            .filter(|p| *p != &canonical)
-            .cloned()
-            .collect();
-        if merge_into.is_empty() {
-            continue;
-        }
-
-        merge_into.sort();
-
-        // Compute average similarity between canonical and others
-        let mut total_sim = 0.0;
-        let mut count = 0usize;
-        for other in &merge_into {
-            let key = if &canonical <= other {
-                (canonical.clone(), other.clone())
-            } else {
-                (other.clone(), canonical.clone())
-            };
-            if let Some(sim) = pair_sims.get(&key) {
-                total_sim += *sim;
-                count += 1;
-            }
-        }
-        let avg_similarity = if count > 0 {
-            total_sim / (count as f64)
-        } else {
-            0.0
-        };
-
-        let note = format!(
-            "Merge {} file(s) into canonical {}",
-            merge_into.len(),
-            canonical
-        );
-
-        groups.push(ConsolidationGroup {
-            canonical,
-            merge_into,
-            canonical_score,
-            avg_similarity,
-            note,
-        });
-    }
-
-    // Stable ordering: sort by canonical path
-    groups.sort_by(|a, b| a.canonical.cmp(&b.canonical));
-
-    ConsolidationResult {
-        total_groups: groups.len(),
-        groups,
-    }
-}
-
-/// NEW: Show what's shared between two files
-fn cmd_diff(
-    file1: &Path,
-    file2: &Path,
-    index_dir: &Path,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-
-    // Resolve paths
-    let resolve_path = |f: &Path| -> Option<(String, &FileEntry)> {
-        let s = f.to_string_lossy().to_string();
-        let with_dot = format!("./{}", s.trim_start_matches("./"));
-        let without_dot = s.trim_start_matches("./").to_string();
-
-        forward_index
-            .files
-            .get(&s)
-            .map(|e| (s.clone(), e))
-            .or_else(|| {
-                forward_index
-                    .files
-                    .get(&with_dot)
-                    .map(|e| (with_dot.clone(), e))
-            })
-            .or_else(|| {
-                forward_index
-                    .files
-                    .get(&without_dot)
-                    .map(|e| (without_dot, e))
-            })
-    };
-
-    let (path1, entry1) =
-        resolve_path(file1).ok_or_else(|| format!("File not in index: {}", file1.display()))?;
-    let (path2, entry2) =
-        resolve_path(file2).ok_or_else(|| format!("File not in index: {}", file2.display()))?;
-
-    // Compute similarities
-    let kw1: HashSet<String> = entry1
-        .keywords
-        .iter()
-        .chain(entry1.body_keywords.iter())
-        .map(|k| k.to_lowercase())
-        .collect();
-    let kw2: HashSet<String> = entry2
-        .keywords
-        .iter()
-        .chain(entry2.body_keywords.iter())
-        .map(|k| k.to_lowercase())
-        .collect();
-
-    let shared: HashSet<_> = kw1.intersection(&kw2).cloned().collect();
-    let only_in_1: HashSet<_> = kw1.difference(&kw2).cloned().collect();
-    let only_in_2: HashSet<_> = kw2.difference(&kw1).cloned().collect();
-
-    let jaccard = jaccard_similarity(&kw1, &kw2);
-    let simhash_sim = simhash_similarity(entry1.simhash, entry2.simhash);
-    let combined = jaccard * 0.6 + simhash_sim * 0.4;
-
-    // Show shared headings
-    let h1: HashSet<String> = entry1
-        .headings
-        .iter()
-        .map(|h| h.text.to_lowercase())
-        .collect();
-    let h2: HashSet<String> = entry2
-        .headings
-        .iter()
-        .map(|h| h.text.to_lowercase())
-        .collect();
-    let shared_headings: Vec<String> = h1.intersection(&h2).cloned().collect();
-
-    if json {
-        let mut shared_vec: Vec<_> = shared.iter().cloned().collect();
-        shared_vec.sort();
-        let mut only1_vec: Vec<_> = only_in_1.iter().cloned().collect();
-        only1_vec.sort();
-        let mut only2_vec: Vec<_> = only_in_2.iter().cloned().collect();
-        only2_vec.sort();
-        let mut headings_vec = shared_headings.clone();
-        headings_vec.sort();
-
-        let result = DiffResult {
-            file1: path1,
-            file2: path2,
-            similarity: DiffSimilarity {
-                combined,
-                jaccard,
-                simhash: simhash_sim,
-            },
-            shared_keywords: shared_vec,
-            only_in_file1: only1_vec,
-            only_in_file2: only2_vec,
-            shared_headings: headings_vec,
-        };
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    println!("{}", "Comparison".green().bold());
-    println!();
-    println!("  File 1: {}", path1.cyan());
-    println!("  File 2: {}", path2.cyan());
-    println!();
-    println!("{}", "Similarity Scores".green().bold());
-    println!();
-    println!("  Combined:    {}%", (combined * 100.0) as u32);
-    println!(
-        "  Jaccard:     {}% (keyword overlap)",
-        (jaccard * 100.0) as u32
-    );
-    println!(
-        "  SimHash:     {}% (content structure)",
-        (simhash_sim * 100.0) as u32
-    );
-    println!();
-
-    println!(
-        "{} ({} keywords)",
-        "Shared Keywords".green().bold(),
-        shared.len()
-    );
-    let mut shared_vec: Vec<_> = shared.iter().collect();
-    shared_vec.sort();
-    for chunk in shared_vec.chunks(8) {
-        println!(
-            "  {}",
-            chunk
-                .iter()
-                .map(|s| s.as_str())
-                .collect::<Vec<_>>()
-                .join(", ")
-        );
-    }
-
-    println!();
-    println!(
-        "{} ({} keywords)",
-        format!("Only in {}", path1.split('/').next_back().unwrap_or(&path1))
-            .yellow()
-            .bold(),
-        only_in_1.len()
-    );
-    let mut only1_vec: Vec<_> = only_in_1.iter().take(24).collect();
-    only1_vec.sort();
-    for chunk in only1_vec.chunks(8) {
-        println!(
-            "  {}",
-            chunk
-                .iter()
-                .map(|s| s.as_str())
-                .collect::<Vec<_>>()
-                .join(", ")
-        );
-    }
-    if only_in_1.len() > 24 {
-        println!("  ... and {} more", only_in_1.len() - 24);
-    }
-
-    println!();
-    println!(
-        "{} ({} keywords)",
-        format!("Only in {}", path2.split('/').next_back().unwrap_or(&path2))
-            .yellow()
-            .bold(),
-        only_in_2.len()
-    );
-    let mut only2_vec: Vec<_> = only_in_2.iter().take(24).collect();
-    only2_vec.sort();
-    for chunk in only2_vec.chunks(8) {
-        println!(
-            "  {}",
-            chunk
-                .iter()
-                .map(|s| s.as_str())
-                .collect::<Vec<_>>()
-                .join(", ")
-        );
-    }
-    if only_in_2.len() > 24 {
-        println!("  ... and {} more", only_in_2.len() - 24);
-    }
-
-    if !shared_headings.is_empty() {
-        println!();
-        println!(
-            "{} ({} headings)",
-            "Identical Headings".red().bold(),
-            shared_headings.len()
-        );
-        for h in shared_headings.iter().take(10) {
-            println!("  - {h}");
-        }
-        if shared_headings.len() > 10 {
-            println!("  ... and {} more", shared_headings.len() - 10);
-        }
-    }
-
-    Ok(())
-}
-
-/// Find duplicate sections across documents
-fn cmd_dupes_sections(
-    threshold: f64,
-    min_files: usize,
-    json: bool,
-    index_dir: &Path,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let start = Instant::now();
-
-    // Collect all sections from all files
-    #[derive(Debug, Clone)]
-    struct SectionInfo {
-        file_path: String,
-        heading: String,
-        line_start: usize,
-        line_end: usize,
-        simhash: u64,
-    }
-
-    let mut all_sections: Vec<SectionInfo> = Vec::new();
-    for (path, entry) in &forward_index.files {
-        for section in &entry.section_fingerprints {
-            all_sections.push(SectionInfo {
-                file_path: path.clone(),
-                heading: section.heading.clone(),
-                line_start: section.line_start,
-                line_end: section.line_end,
-                simhash: section.simhash,
-            });
-        }
-    }
-
-    if all_sections.is_empty() {
-        println!("{}", "No sections found in indexed files.".yellow());
-        return Ok(());
-    }
-
-    // Group similar sections using SimHash similarity
-    #[derive(Debug)]
-    struct SectionCluster {
-        heading: String,
-        files: Vec<(String, f64, usize, usize)>, // (file_path, similarity, line_start, line_end)
-        avg_simhash: u64,
-    }
-
-    let mut clusters: Vec<SectionCluster> = Vec::new();
-
-    for section in &all_sections {
-        let mut best_cluster_idx: Option<usize> = None;
-        let mut best_similarity = 0.0;
-
-        // Find best matching cluster
-        for (cluster_idx, cluster) in clusters.iter().enumerate() {
-            let similarity = simhash_similarity(section.simhash, cluster.avg_simhash);
-            if similarity >= threshold && similarity > best_similarity {
-                best_similarity = similarity;
-                best_cluster_idx = Some(cluster_idx);
-            }
-        }
-
-        if let Some(cluster_idx) = best_cluster_idx {
-            // Add to existing cluster
-            clusters[cluster_idx].files.push((
-                section.file_path.clone(),
-                best_similarity,
-                section.line_start,
-                section.line_end,
-            ));
-        } else {
-            // Create new cluster
-            clusters.push(SectionCluster {
-                heading: section.heading.clone(),
-                files: vec![(
-                    section.file_path.clone(),
-                    1.0,
-                    section.line_start,
-                    section.line_end,
-                )],
-                avg_simhash: section.simhash,
-            });
-        }
-    }
-
-    let elapsed = start.elapsed();
-
-    // Filter clusters by min_files threshold
-    let duplicate_clusters: Vec<_> = clusters
-        .into_iter()
-        .filter(|c| c.files.len() >= min_files)
-        .collect();
-
-    if duplicate_clusters.is_empty() {
-        println!(
-            "{}",
-            format!(
-                "No duplicate sections found with {} or more files at {}% threshold.",
-                min_files,
-                (threshold * 100.0) as u32
-            )
-            .green()
-        );
-        eprintln!(
-            "Section analysis: {:?} ({} sections analyzed)",
-            elapsed,
-            all_sections.len()
-        );
-        return Ok(());
-    }
-
-    // Sort clusters by number of files (descending)
-    let mut sorted_clusters = duplicate_clusters;
-    sorted_clusters.sort_by(|a, b| b.files.len().cmp(&a.files.len()));
-
-    if json {
-        let output: Vec<_> = sorted_clusters
-            .iter()
-            .map(|cluster| {
-                serde_json::json!({
-                    "heading": cluster.heading,
-                    "file_count": cluster.files.len(),
-                    "files": cluster.files.iter().map(|(path, sim, start, end)| {
-                        serde_json::json!({
-                            "path": path,
-                            "similarity": sim,
-                            "line_start": start,
-                            "line_end": end,
-                        })
-                    }).collect::<Vec<_>>(),
-                })
-            })
-            .collect();
-        println!("{}", serde_json::to_string_pretty(&output)?);
-        return Ok(());
-    }
-
-    println!(
-        "{} duplicate section clusters found (threshold: {}%, min files: {})",
-        sorted_clusters.len().to_string().yellow().bold(),
-        (threshold * 100.0) as u32,
-        min_files
-    );
-    eprintln!(
-        "Section analysis: {:?} ({} sections analyzed)\n",
-        elapsed,
-        all_sections.len()
-    );
-
-    for cluster in sorted_clusters.iter().take(20) {
-        println!(
-            "{} {} ({} files)",
-            "Section:".cyan().bold(),
-            cluster.heading.yellow(),
-            cluster.files.len()
-        );
-
-        for (path, similarity, line_start, line_end) in &cluster.files {
-            let sim_pct = (similarity * 100.0) as u32;
-            println!(
-                "  {}% {}:{}-{}",
-                sim_pct.to_string().dimmed(),
-                path,
-                line_start,
-                line_end
-            );
-        }
-        println!();
-    }
-
-    if sorted_clusters.len() > 20 {
-        println!(
-            "{}",
-            format!(
-                "... and {} more section clusters",
-                sorted_clusters.len() - 20
-            )
-            .dimmed()
-        );
-    }
-
-    Ok(())
-}
-
-fn cmd_stats(
-    top_keywords: usize,
-    index_dir: &Path,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let reverse_index = load_reverse_index(index_dir)?;
-
-    // Count keyword occurrences
-    let mut keyword_counts: Vec<_> = reverse_index
-        .keywords
-        .iter()
-        .map(|(k, v)| (k.clone(), v.len()))
-        .collect();
-    keyword_counts.sort_by(|a, b| b.1.cmp(&a.1));
-
-    let total_headings: usize = forward_index.files.values().map(|e| e.headings.len()).sum();
-    let total_links: usize = forward_index.files.values().map(|e| e.links.len()).sum();
-    let total_body_keywords: usize = forward_index
-        .files
-        .values()
-        .map(|e| e.body_keywords.len())
-        .sum();
-
-    if json {
-        let result = StatsResult {
-            total_files: forward_index.files.len(),
-            unique_keywords: reverse_index.keywords.len(),
-            total_headings,
-            body_keywords: total_body_keywords,
-            total_links,
-            index_version: forward_index.version,
-            indexed_at: forward_index.indexed_at.clone(),
-            top_keywords: keyword_counts
-                .iter()
-                .take(top_keywords)
-                .map(|(k, c)| KeywordCount {
-                    keyword: k.clone(),
-                    count: *c,
-                })
-                .collect(),
-        };
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    println!("{}", "Index Statistics".green().bold());
-    println!();
-    println!(
-        "  Total files:       {}",
-        forward_index.files.len().to_string().cyan()
-    );
-    println!(
-        "  Unique keywords:   {}",
-        reverse_index.keywords.len().to_string().cyan()
-    );
-    println!("  Total headings:    {}", total_headings.to_string().cyan());
-    println!(
-        "  Body keywords:     {}",
-        total_body_keywords.to_string().cyan()
-    );
-    println!("  Total links:       {}", total_links.to_string().cyan());
-    println!(
-        "  Index version:     {}",
-        forward_index.version.to_string().dimmed()
-    );
-    println!("  Indexed at:        {}", forward_index.indexed_at.dimmed());
-    println!();
-    println!("{}", format!("Top {top_keywords} Keywords").green().bold());
-    println!();
-
-    for (keyword, count) in keyword_counts.iter().take(top_keywords) {
-        let bar = "=".repeat((count / 2).min(40));
-        println!("  {:>20} {:>4} {}", keyword.cyan(), count, bar.dimmed());
-    }
-
-    Ok(())
-}
-
-fn cmd_repl(index_dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
-    println!("{}", "yore interactive mode (v2)".green().bold());
-    println!("Commands: query <terms>, similar <file>, dupes, diff <f1> <f2>, stats, help, quit\n");
-
-    let stdin = io::stdin();
-    let mut stdout = io::stdout();
-    let query_options = QueryOptions {
-        limit: 10,
-        files_only: false,
-        json: false,
-        doc_terms: 0,
-        explain: false,
-        require_phrases: false,
-        filter_stopwords: true,
-    };
-
-    loop {
-        print!("{} ", ">".cyan().bold());
-        stdout.flush()?;
-
-        let mut line = String::new();
-        if stdin.lock().read_line(&mut line)? == 0 {
-            break;
-        }
-
-        let parts: Vec<&str> = line.split_whitespace().collect();
-        if parts.is_empty() {
-            continue;
-        }
-
-        match parts[0] {
-            "quit" | "exit" | "q" => break,
-            "help" | "?" => {
-                println!("  query <terms...>   - Search for keywords");
-                println!("  similar <file>     - Find similar files");
-                println!("  dupes              - Find duplicates");
-                println!("  diff <f1> <f2>     - Compare two files");
-                println!("  stats              - Show statistics");
-                println!("  quit               - Exit");
-            }
-            "query" => {
-                let trimmed = line.trim();
-                let rest = trimmed.strip_prefix("query").unwrap_or("").trim();
-                if rest.is_empty() {
-                    println!("{}", "Usage: query <terms...>".yellow());
-                } else {
-                    let _ = cmd_query(rest, index_dir, &query_options);
-                }
-            }
-            "similar" => {
-                if parts.len() < 2 {
-                    println!("{}", "Usage: similar <file>".yellow());
-                } else {
-                    let _ = cmd_similar(Path::new(parts[1]), 5, 0.3, false, 0, index_dir);
-                }
-            }
-            "dupes" => {
-                let _ = cmd_dupes(0.35, false, false, index_dir);
-            }
-            "diff" => {
-                if parts.len() < 3 {
-                    println!("{}", "Usage: diff <file1> <file2>".yellow());
-                } else {
-                    let _ = cmd_diff(Path::new(parts[1]), Path::new(parts[2]), index_dir, false);
-                }
-            }
-            "stats" => {
-                let _ = cmd_stats(10, index_dir, false);
-            }
-            _ => {
-                // Treat as query
-                let trimmed = line.trim();
-                if !trimmed.is_empty() {
-                    let _ = cmd_query(trimmed, index_dir, &query_options);
-                }
-            }
-        }
-        println!();
-    }
-
-    Ok(())
-}
-
-fn cmd_vocabulary(
-    index_dir: &Path,
-    limit: usize,
-    format: &str,
-    json: bool,
-    options: VocabularyOptions<'_>,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let reverse_index = match load_reverse_index(index_dir) {
-        Ok(index) => index,
-        Err(err) => {
-            if let Some(io_err) = err.downcast_ref::<std::io::Error>() {
-                if io_err.kind() == std::io::ErrorKind::NotFound {
-                    ReverseIndex {
-                        keywords: HashMap::new(),
-                    }
-                } else {
-                    return Err(err);
-                }
-            } else {
-                return Err(err);
-            }
-        }
-    };
-
-    let forward_index = load_forward_index(index_dir).ok();
-    let stopwords_path = options
-        .stopwords
-        .map(|path| path.to_string_lossy().to_string());
-    let mut stopwords =
-        load_vocabulary_stopwords(options.stopwords, !options.no_default_stopwords)?;
-    let auto_common_terms = if options.common_terms > 0 {
-        let candidate_metrics: Vec<VocabularyCandidateTerm> = reverse_index
-            .keywords
-            .iter()
-            .map(|(term, postings)| {
-                let term = term.to_string();
-                VocabularyCandidateTerm {
-                    term: term.clone(),
-                    surface: None,
-                    term_freq: postings.len(),
-                    doc_freq: postings
-                        .iter()
-                        .map(|posting| posting.file.clone())
-                        .collect::<HashSet<_>>()
-                        .len(),
-                    first_file: String::new(),
-                    first_line: usize::MAX,
-                    first_heading: String::new(),
-                }
-            })
-            .collect();
-        let common =
-            build_auto_common_vocabulary_stopwords(&candidate_metrics, options.common_terms);
-        for common_term in &common {
-            stopwords.insert(common_term.clone());
-        }
-        Some(common.len())
-    } else {
-        None
-    };
-
-    let mut candidates: Vec<VocabularyCandidateTerm> = reverse_index
-        .keywords
-        .into_iter()
-        .filter(|(_, postings)| !postings.is_empty())
-        .map(|(term, postings)| {
-            let mut ordered_postings = postings;
-
-            let mut docs = HashSet::new();
-            for posting in &ordered_postings {
-                docs.insert(posting.file.clone());
-            }
-            ordered_postings.sort_by(|a, b| {
-                a.file
-                    .cmp(&b.file)
-                    .then_with(|| {
-                        a.line
-                            .unwrap_or(usize::MAX)
-                            .cmp(&b.line.unwrap_or(usize::MAX))
-                    })
-                    .then_with(|| {
-                        a.heading
-                            .as_deref()
-                            .unwrap_or("")
-                            .cmp(b.heading.as_deref().unwrap_or(""))
-                    })
-            });
-
-            let first = ordered_postings.first().expect("postings non-empty");
-            let first_heading = first.heading.clone().unwrap_or_default();
-
-            VocabularyCandidateTerm {
-                term: term.clone(),
-                surface: resolve_vocabulary_surface(
-                    &term,
-                    &ordered_postings,
-                    forward_index.as_ref(),
-                ),
-                term_freq: ordered_postings.len(),
-                doc_freq: docs.len(),
-                first_file: first.file.clone(),
-                first_line: first.line.unwrap_or(usize::MAX),
-                first_heading,
-            }
-        })
-        .collect();
-
-    candidates.sort_by(|a, b| {
-        b.doc_freq
-            .cmp(&a.doc_freq)
-            .then_with(|| b.term_freq.cmp(&a.term_freq))
-            .then_with(|| a.first_file.cmp(&b.first_file))
-            .then_with(|| a.first_line.cmp(&b.first_line))
-            .then_with(|| a.first_heading.cmp(&b.first_heading))
-            .then_with(|| a.term.cmp(&b.term))
-    });
-
-    let mut terms = Vec::new();
-    for candidate in &candidates {
-        let term = if let Some(surface) = &candidate.surface {
-            surface
-        } else if options.include_stemming {
-            &candidate.term
-        } else {
-            continue;
-        };
-
-        let term_lower = term.to_lowercase();
-        if !is_hygienic_vocabulary_term(term) || stopwords.contains(&term_lower) {
-            continue;
-        }
-
-        terms.push(VocabularyTerm {
-            term: term.clone(),
-            score: candidate.doc_freq as f64,
-            count: candidate.term_freq,
-        });
-    }
-    let (terms, total_candidates) = apply_vocabulary_limit(terms, limit);
-
-    let effective_format = if json { "json" } else { format };
-    let result = VocabularyResult {
-        format: effective_format.to_string(),
-        limit,
-        total: total_candidates,
-        terms,
-        stopwords: stopwords_path,
-        used_default_stopwords: !options.no_default_stopwords,
-        auto_common_terms,
-        include_stemming: options.include_stemming,
-    };
-
-    match effective_format {
-        "lines" => {
-            if !result.terms.is_empty() {
-                println!("{}", render_vocabulary_lines(&result.terms));
-            }
-            Ok(())
-        }
-        "json" => {
-            println!("{}", serde_json::to_string_pretty(&result)?);
-            Ok(())
-        }
-        "prompt" => {
-            println!("{}", render_vocabulary_prompt(&result.terms));
-            Ok(())
-        }
-        _ => Err(format!("Unsupported vocabulary format: {effective_format}").into()),
-    }
-}
-
-fn render_vocabulary_prompt(terms: &[VocabularyTerm]) -> String {
-    let rendered_terms: Vec<String> = terms
-        .iter()
-        .map(|term| normalize_prompt_term(&term.term))
-        .filter(|term| !term.is_empty())
-        .collect();
-
-    rendered_terms.join(", ")
-}
-
-fn render_vocabulary_lines(terms: &[VocabularyTerm]) -> String {
-    terms
-        .iter()
-        .map(|term| term.term.clone())
-        .collect::<Vec<_>>()
-        .join("\n")
-}
-
-fn apply_vocabulary_limit(
-    mut terms: Vec<VocabularyTerm>,
-    limit: usize,
-) -> (Vec<VocabularyTerm>, usize) {
-    let total = terms.len();
-    if terms.len() > limit {
-        terms.truncate(limit);
-    }
-    (terms, total)
-}
-
-fn build_auto_common_vocabulary_stopwords(
-    candidates: &[VocabularyCandidateTerm],
-    top_n: usize,
-) -> HashSet<String> {
-    if top_n == 0 || candidates.is_empty() {
-        return HashSet::new();
-    }
-
-    let mut candidates = candidates.to_vec();
-    candidates.sort_by(|a, b| {
-        b.term_freq
-            .cmp(&a.term_freq)
-            .then_with(|| b.doc_freq.cmp(&a.doc_freq))
-            .then_with(|| a.term.cmp(&b.term))
-    });
-
-    candidates
-        .into_iter()
-        .filter(|candidate| is_hygienic_vocabulary_term(&candidate.term))
-        .take(top_n)
-        .map(|candidate| candidate.term.to_lowercase())
-        .collect()
-}
-
-fn resolve_vocabulary_surface(
-    stem: &str,
-    postings: &[ReverseEntry],
-    forward_index: Option<&ForwardIndex>,
-) -> Option<String> {
-    #[derive(Debug)]
-    struct SurfaceCandidate {
-        value: String,
-        file: String,
-        line: usize,
-        source_rank: usize,
-        token_idx: usize,
-    }
-
-    let mut ordered_postings = postings.to_vec();
-    ordered_postings.sort_by(|a, b| {
-        a.file
-            .cmp(&b.file)
-            .then_with(|| {
-                a.line
-                    .unwrap_or(usize::MAX)
-                    .cmp(&b.line.unwrap_or(usize::MAX))
-            })
-            .then_with(|| {
-                a.heading
-                    .as_deref()
-                    .unwrap_or("")
-                    .cmp(b.heading.as_deref().unwrap_or(""))
-            })
-    });
-
-    let mut candidates: Vec<SurfaceCandidate> = Vec::new();
-
-    for posting in &ordered_postings {
-        if let Some(heading) = &posting.heading {
-            for (token_idx, token) in extract_keywords(heading).into_iter().enumerate() {
-                if stem_word(&token) == stem {
-                    candidates.push(SurfaceCandidate {
-                        value: token,
-                        file: posting.file.clone(),
-                        line: posting.line.unwrap_or(usize::MAX),
-                        source_rank: 0,
-                        token_idx,
-                    });
-                }
-            }
-        }
-
-        if let Some(forward_index) = forward_index {
-            if let Some(entry) = forward_index.files.get(&posting.file) {
-                for (token_idx, token) in entry
-                    .keywords
-                    .iter()
-                    .chain(entry.body_keywords.iter())
-                    .enumerate()
-                {
-                    if stem_word(&token.to_lowercase()) == stem {
-                        candidates.push(SurfaceCandidate {
-                            value: token.to_lowercase(),
-                            file: posting.file.clone(),
-                            line: posting.line.unwrap_or(usize::MAX),
-                            source_rank: 1,
-                            token_idx,
-                        });
-                    }
-                }
-            }
-        }
-    }
-
-    if candidates.is_empty() {
-        return None;
-    }
-
-    candidates.sort_by(|a, b| {
-        a.source_rank
-            .cmp(&b.source_rank)
-            .then_with(|| a.file.cmp(&b.file))
-            .then_with(|| a.line.cmp(&b.line))
-            .then_with(|| a.token_idx.cmp(&b.token_idx))
-            .then_with(|| a.value.cmp(&b.value))
-    });
-
-    candidates.first().map(|candidate| candidate.value.clone())
-}
-
-fn normalize_prompt_term(term: &str) -> String {
-    let no_control: String = term
-        .chars()
-        .filter(|character| !character.is_control())
-        .collect();
-
-    no_control
-        .split_whitespace()
-        .collect::<Vec<_>>()
-        .join(" ")
-        .trim()
-        .to_string()
-}
-
-// Helper functions
-
-fn load_forward_index(index_dir: &Path) -> Result<ForwardIndex, Box<dyn std::error::Error>> {
-    let path = index_dir.join("forward_index.json");
-    let content =
-        fs::read_to_string(&path).map_err(|_| "Index not found. Run 'yore build' first.")?;
-    Ok(serde_json::from_str(&content)?)
-}
-
-/// Load the relation index; returns an empty index if the file does not exist (backward compat).
-#[allow(dead_code)] // Used by upcoming YEH-005/006
-fn load_relation_index(index_dir: &Path) -> RelationIndex {
-    let path = index_dir.join("relations.json");
-    match fs::read_to_string(&path) {
-        Ok(content) => serde_json::from_str(&content).unwrap_or(RelationIndex {
-            version: 1,
-            indexed_at: String::new(),
-            total_edges: 0,
-            edges: vec![],
-        }),
-        Err(_) => RelationIndex {
-            version: 1,
-            indexed_at: String::new(),
-            total_edges: 0,
-            edges: vec![],
-        },
-    }
-}
-
-fn load_document_metrics(
-    index_dir: &Path,
-) -> Result<DocumentMetricsIndex, Box<dyn std::error::Error>> {
-    let path = index_dir.join("document_metrics.json");
-    let content = fs::read_to_string(&path).map_err(|_| {
-        "Health metrics not found. Re-run 'yore build' to persist document metrics."
-    })?;
-    Ok(serde_json::from_str(&content)?)
-}
-
-fn load_reverse_index(index_dir: &Path) -> Result<ReverseIndex, Box<dyn std::error::Error>> {
-    let path = index_dir.join("reverse_index.json");
-    let content =
-        fs::read_to_string(&path).map_err(|_| "Index not found. Run 'yore build' first.")?;
-    Ok(serde_json::from_str(&content)?)
-}
-
-fn default_query_stop_words() -> &'static [&'static str] {
-    &[
-        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "had", "has", "have", "he",
-        "in", "is", "it", "not", "of", "on", "or", "that", "the", "their", "there", "these",
-        "they", "this", "to", "was", "we", "were", "what", "when", "where", "which", "who", "will",
-        "with", "would", "you", "your", "did", "do", "does", "can", "could", "must", "shall",
-        "should", "may", "might", "new", "using", "used", "use", "add", "set", "run", "get", "see",
-        "only", "no", "so", "than", "then", "them", "all", "any", "both", "each", "more", "most",
-        "some", "such", "own", "same", "just", "also", "now", "other", "into", "about", "up",
-        "over",
-    ]
-}
-
-fn default_vocabulary_stop_words() -> &'static [&'static str] {
-    &[
-        "a",
-        "an",
-        "and",
-        "are",
-        "as",
-        "at",
-        "be",
-        "by",
-        "for",
-        "from",
-        "had",
-        "has",
-        "have",
-        "he",
-        "in",
-        "is",
-        "it",
-        "not",
-        "of",
-        "on",
-        "or",
-        "that",
-        "the",
-        "their",
-        "there",
-        "these",
-        "they",
-        "this",
-        "to",
-        "was",
-        "we",
-        "were",
-        "what",
-        "when",
-        "where",
-        "which",
-        "who",
-        "will",
-        "with",
-        "would",
-        "you",
-        "your",
-        "did",
-        "do",
-        "does",
-        "can",
-        "could",
-        "must",
-        "shall",
-        "should",
-        "may",
-        "might",
-        "new",
-        "using",
-        "used",
-        "use",
-        "add",
-        "set",
-        "run",
-        "get",
-        "see",
-        "only",
-        "no",
-        "so",
-        "than",
-        "then",
-        "them",
-        "all",
-        "any",
-        "both",
-        "each",
-        "more",
-        "most",
-        "some",
-        "such",
-        "own",
-        "same",
-        "just",
-        "also",
-        "now",
-        "other",
-        "into",
-        "about",
-        "up",
-        "over",
-        "document",
-        "documents",
-        "docs",
-        "json",
-        "changes",
-        "change",
-        "build",
-        "output",
-        "validation",
-        "command",
-        "commands",
-        "prompting",
-        "workflow",
-        "core",
-        "keep",
-        "apply",
-        "file",
-        "files",
-        "reporting",
-        "pattern",
-        "examples",
-        "help",
-        "format",
-        "index",
-        "indexes",
-        "indexer",
-        "indexing",
-    ]
-}
-
-fn load_vocabulary_stopwords(
-    stopwords: Option<&Path>,
-    include_default: bool,
-) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
-    let mut words: HashSet<String> = default_vocabulary_stop_words()
-        .iter()
-        .map(|word| (*word).to_string())
-        .collect();
-
-    if !include_default {
-        words.clear();
-    }
-
-    if let Some(path) = stopwords {
-        let path_value = path.to_string_lossy().to_string();
-        let content = fs::read_to_string(path)
-            .map_err(|err| format!("Unable to read stop-word file '{path_value}': {err}"))?;
-
-        for token in content.split_whitespace() {
-            if !token.is_empty() {
-                words.insert(token.to_lowercase());
-            }
-        }
-    }
-
-    Ok(words)
-}
-
-fn is_hygienic_vocabulary_term(term: &str) -> bool {
-    if term.len() < 3 || term.len() > 48 {
-        return false;
-    }
-
-    let mut digits = 0usize;
-    let mut letters = 0usize;
-
-    for ch in term.chars() {
-        if ch.is_ascii_digit() {
-            digits += 1;
-        } else if ch.is_ascii_alphabetic() {
-            letters += 1;
-        } else if !matches!(ch, '-' | '_') {
-            return false;
-        }
-    }
-
-    if letters == 0 {
-        return false;
-    }
-
-    if digits > 0 && digits.saturating_mul(10) >= term.len().saturating_mul(6) {
-        return false;
-    }
-
-    true
-}
-
-fn jaccard_similarity(a: &HashSet<String>, b: &HashSet<String>) -> f64 {
-    if a.is_empty() && b.is_empty() {
-        return 0.0;
-    }
-    let intersection = a.intersection(b).count();
-    let union = a.union(b).count();
-    if union == 0 {
-        return 0.0;
-    }
-    intersection as f64 / union as f64
-}
-
-fn chrono_now() -> String {
-    use std::time::{SystemTime, UNIX_EPOCH};
-    let duration = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
-    format!("{}", duration.as_secs())
-}
-
-/// Extract file rename history from git
-///
-/// Runs `git log --name-status --diff-filter=R` to find all renames in the repo.
-/// Returns empty history if not in a git repo or git is unavailable.
-fn extract_git_renames(path: &Path) -> RenameHistory {
-    use std::process::Command;
-
-    let output = Command::new("git")
-        .args([
-            "log",
-            "--name-status",
-            "--diff-filter=R",
-            "--pretty=format:%H",
-            "-M",
-            "--",
-        ])
-        .current_dir(path)
-        .output();
-
-    let output = match output {
-        Ok(o) if o.status.success() => o,
-        _ => {
-            return RenameHistory {
-                renames: vec![],
-                indexed_at: chrono_now(),
-            };
-        }
-    };
-
-    let stdout = String::from_utf8_lossy(&output.stdout);
-    let mut renames = Vec::new();
-    let mut current_commit = String::new();
-
-    for line in stdout.lines() {
-        let line = line.trim();
-        if line.is_empty() {
-            continue;
-        }
-
-        // Check if this is a commit hash (40 hex chars)
-        if line.len() == 40 && line.chars().all(|c| c.is_ascii_hexdigit()) {
-            current_commit = line.to_string();
-        } else if line.starts_with('R') {
-            // Rename line: R<score>\told_path\tnew_path
-            let parts: Vec<&str> = line.splitn(3, '\t').collect();
-            if parts.len() == 3 {
-                renames.push(RenameEntry {
-                    old_path: parts[1].to_string(),
-                    new_path: parts[2].to_string(),
-                    commit: current_commit.clone(),
-                });
-            }
-        }
-    }
-
-    // Reverse to get oldest-first order
-    renames.reverse();
-
-    RenameHistory {
-        renames,
-        indexed_at: chrono_now(),
-    }
-}
-
-/// Look up the current path for a file that may have been renamed.
-/// Returns the most recent path if renames exist, or None if no rename history.
-fn resolve_renamed_path(old_path: &str, history: &RenameHistory) -> Option<String> {
-    let mut current = old_path.to_string();
-    let mut found_any = false;
-
-    for entry in &history.renames {
-        if entry.old_path == current {
-            current.clone_from(&entry.new_path);
-            found_any = true;
-        }
-    }
-
-    if found_any {
-        Some(current)
-    } else {
-        None
-    }
-}
-
-/// Compute the relative path from source file to target file.
-/// Returns the relative link path as it would appear in markdown.
-fn compute_relative_path(
-    source: &str,
-    target: &str,
-    _available_files: &HashSet<String>,
-) -> Option<String> {
-    let source_path = Path::new(source);
-    let target_path = Path::new(target);
-
-    // Get the directory containing the source file
-    let source_dir = source_path.parent()?;
-
-    // Try to compute relative path
-    if let Ok(rel) = target_path.strip_prefix(source_dir) {
-        return Some(rel.to_string_lossy().to_string());
-    }
-
-    // Need to go up directories - find common ancestor
-    let source_components: Vec<_> = source_dir.components().collect();
-    let target_components: Vec<_> = target_path.components().collect();
-
-    // Find common prefix length
-    let common_len = source_components
-        .iter()
-        .zip(target_components.iter())
-        .take_while(|(a, b)| a == b)
-        .count();
-
-    // Build relative path: go up (source_components.len() - common_len) times, then down to target
-    let ups = source_components.len() - common_len;
-    let mut result = String::new();
-
-    for _ in 0..ups {
-        result.push_str("../");
-    }
-
-    // Add remaining target path components
-    for (i, comp) in target_components.iter().enumerate().skip(common_len) {
-        if i > common_len {
-            result.push('/');
-        }
-        result.push_str(&comp.as_os_str().to_string_lossy());
-    }
-
-    if result.is_empty() {
-        None
-    } else {
-        Some(result)
-    }
-}
-
-// ============================================================================
-// Context Assembly for LLMs (Phase 2)
-// ============================================================================
-
-#[derive(Debug, Clone)]
-struct SectionMatch {
-    doc_path: String,
-    heading: String,
-    line_start: usize,
-    line_end: usize,
-    bm25_score: f64,
-    content: String,
-    canonicality: f64,
-}
-
-const MCP_SCHEMA_VERSION: u32 = 1;
-
-#[derive(Debug, Clone)]
-struct ContextSelection {
-    query_label: String,
-    query_for_refiner: String,
-    sections: Vec<SectionMatch>,
-}
-
-#[derive(Debug, Clone)]
-enum ContextSelectionIssue {
-    NoSearchableTerms,
-    MissingFiles(Vec<String>),
-    NoIndexedFilesMatched,
-    NoRelevantSections(String),
-}
-
-#[derive(Debug, Clone)]
-struct RefinedSection {
-    section: SectionMatch,
-    truncated: bool,
-    truncation_reasons: Vec<String>,
-}
-
-fn combined_section_score(section: &SectionMatch) -> f64 {
-    section.bm25_score * 0.7 + section.canonicality * 0.3
-}
-
-fn compare_sections_by_relevance(a: &SectionMatch, b: &SectionMatch) -> std::cmp::Ordering {
-    combined_section_score(b)
-        .partial_cmp(&combined_section_score(a))
-        .unwrap_or(std::cmp::Ordering::Equal)
-        .then_with(|| a.doc_path.cmp(&b.doc_path))
-        .then_with(|| a.line_start.cmp(&b.line_start))
-        .then_with(|| a.line_end.cmp(&b.line_end))
-        .then_with(|| a.heading.cmp(&b.heading))
-}
-
-fn normalize_content_for_dedupe(text: &str) -> String {
-    text.split_whitespace().collect::<Vec<_>>().join(" ")
-}
-
-fn forward_index_source_root(index: &ForwardIndex) -> Option<PathBuf> {
-    let trimmed = index.source_root.trim();
-    if trimmed.is_empty() {
-        None
-    } else {
-        Some(PathBuf::from(trimmed))
-    }
-}
-
-fn canonicalize_existing_path(path: &Path) -> PathBuf {
-    fs::canonicalize(path).unwrap_or_else(|_| {
-        if path.is_absolute() {
-            path.to_path_buf()
-        } else if let Ok(cwd) = std::env::current_dir() {
-            cwd.join(path)
-        } else {
-            path.to_path_buf()
-        }
-    })
-}
-
-fn build_indexed_doc_key(path: &Path, source_root: &Path) -> String {
-    if let Ok(stripped) = path.strip_prefix(source_root) {
-        let normalized = normalize_path(stripped);
-        if !normalized.is_empty() {
-            return normalized;
-        }
-    }
-
-    let normalized = normalize_path(path);
-    if normalized.is_empty() {
-        path.to_string_lossy().to_string()
-    } else {
-        normalized
-    }
-}
-
-fn resolve_doc_fs_path(index: &ForwardIndex, doc_path: &str, entry: &FileEntry) -> PathBuf {
-    let stored_path = Path::new(&entry.path);
-    if stored_path.is_absolute() {
-        return stored_path.to_path_buf();
-    }
-
-    if let Some(source_root) = forward_index_source_root(index) {
-        let stored_candidate = source_root.join(stored_path);
-        if stored_candidate.exists() {
-            return stored_candidate;
-        }
-
-        let doc_candidate = source_root.join(doc_path);
-        if doc_candidate.exists() {
-            return doc_candidate;
-        }
-    }
-
-    PathBuf::from(doc_path)
-}
-
-fn read_indexed_doc(
-    index: &ForwardIndex,
-    doc_path: &str,
-    entry: &FileEntry,
-) -> Result<String, io::Error> {
-    fs::read_to_string(resolve_doc_fs_path(index, doc_path, entry))
-}
-
-fn dedupe_section_matches(sections: Vec<SectionMatch>) -> (Vec<SectionMatch>, usize) {
-    let mut unique: Vec<SectionMatch> = Vec::new();
-    let mut seen_content = HashSet::new();
-    let mut deduped_hits = 0usize;
-
-    for section in sections {
-        let overlaps_existing = unique.iter().any(|existing| {
-            existing.doc_path == section.doc_path
-                && existing.line_start <= section.line_end
-                && section.line_start <= existing.line_end
-        });
-
-        let content_key = normalize_content_for_dedupe(&section.content);
-        let duplicate_content = !content_key.is_empty() && !seen_content.insert(content_key);
-
-        if overlaps_existing || duplicate_content {
-            deduped_hits += 1;
-            continue;
-        }
-
-        unique.push(section);
-    }
-
-    (unique, deduped_hits)
-}
-
-fn floor_char_boundary(text: &str, limit: usize) -> usize {
-    let mut idx = limit.min(text.len());
-    while idx > 0 && !text.is_char_boundary(idx) {
-        idx -= 1;
-    }
-    idx
-}
-
-fn truncate_text_to_budget(
-    text: &str,
-    max_tokens: usize,
-    max_bytes: usize,
-) -> (String, bool, Vec<String>) {
-    const TRUNCATION_MARKER: &str = " ...[truncated]";
-
-    let mut reasons = Vec::new();
-    let mut limit = text.len();
-
-    let token_char_limit = max_tokens.saturating_mul(4);
-    if token_char_limit > 0 && text.len() > token_char_limit {
-        reasons.push("token_cap".to_string());
-        limit = limit.min(token_char_limit);
-    }
-
-    if max_bytes > 0 && text.len() > max_bytes {
-        reasons.push("byte_cap".to_string());
-        limit = limit.min(max_bytes);
-    }
-
-    if reasons.is_empty() {
-        return (text.to_string(), false, reasons);
-    }
-
-    let marker_len = TRUNCATION_MARKER.len();
-    let mut marker_budget = usize::MAX;
-    if token_char_limit > 0 {
-        marker_budget = marker_budget.min(token_char_limit);
-    }
-    if max_bytes > marker_len {
-        marker_budget = marker_budget.min(max_bytes);
-    }
-
-    if marker_budget > marker_len {
-        limit = limit.min(marker_budget.saturating_sub(marker_len));
-    }
-    let boundary = floor_char_boundary(text, limit);
-    let mut truncated = text[..boundary].trim_end().to_string();
-
-    if marker_budget > marker_len && truncated.len() + marker_len <= marker_budget {
-        truncated.push_str(TRUNCATION_MARKER);
-    }
-
-    (truncated, true, reasons)
-}
-
-fn mcp_handle_dir(index_dir: &Path) -> PathBuf {
-    index_dir.join("mcp_handles")
-}
-
-fn build_mcp_store_namespace(index_dir: &Path) -> String {
-    const FNV_OFFSET_BASIS: u64 = 14_695_981_039_346_656_037;
-    let canonical = canonicalize_existing_path(index_dir);
-    let mut state = FNV_OFFSET_BASIS;
-    stable_mcp_hash_update(&mut state, canonical.to_string_lossy().as_bytes());
-    format!("{state:016x}")
-}
-
-fn fallback_mcp_handle_dir(index_dir: &Path) -> PathBuf {
-    std::env::temp_dir()
-        .join("yore")
-        .join("mcp_handles")
-        .join(build_mcp_store_namespace(index_dir))
-}
-
-fn candidate_mcp_handle_dirs(index_dir: &Path) -> Vec<PathBuf> {
-    vec![
-        mcp_handle_dir(index_dir),
-        fallback_mcp_handle_dir(index_dir),
-    ]
-}
-
-fn stable_mcp_hash_update(state: &mut u64, bytes: &[u8]) {
-    const FNV_PRIME: u64 = 1_099_511_628_211;
-
-    for byte in bytes {
-        *state ^= u64::from(*byte);
-        *state = state.wrapping_mul(FNV_PRIME);
-    }
-}
-
-fn build_mcp_handle(query: &str, section: &SectionMatch) -> String {
-    const FNV_OFFSET_BASIS: u64 = 14_695_981_039_346_656_037;
-    let mut state = FNV_OFFSET_BASIS;
-
-    stable_mcp_hash_update(&mut state, query.as_bytes());
-    stable_mcp_hash_update(&mut state, &[0xff]);
-    stable_mcp_hash_update(&mut state, section.doc_path.as_bytes());
-    stable_mcp_hash_update(&mut state, &[0xff]);
-    stable_mcp_hash_update(&mut state, section.heading.as_bytes());
-    stable_mcp_hash_update(&mut state, &[0xff]);
-    stable_mcp_hash_update(&mut state, &section.line_start.to_le_bytes());
-    stable_mcp_hash_update(&mut state, &[0xff]);
-    stable_mcp_hash_update(&mut state, &section.line_end.to_le_bytes());
-    stable_mcp_hash_update(&mut state, &[0xff]);
-    stable_mcp_hash_update(&mut state, section.content.as_bytes());
-
-    format!("ctx_{state:016x}")
-}
-
-fn build_mcp_source_ref(section: &SectionMatch) -> McpSourceRef {
-    McpSourceRef {
-        path: section.doc_path.clone(),
-        heading: section.heading.clone(),
-        line_start: section.line_start,
-        line_end: section.line_end,
-    }
-}
-
-fn store_mcp_artifact(
-    index_dir: &Path,
-    artifact: &McpArtifact,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let payload = serde_json::to_vec_pretty(artifact)?;
-    let mut last_error: Option<io::Error> = None;
-
-    for handle_dir in candidate_mcp_handle_dirs(index_dir) {
-        match fs::create_dir_all(&handle_dir) {
-            Ok(()) => {}
-            Err(err) => {
-                last_error = Some(err);
-                continue;
-            }
-        }
-
-        let handle_path = handle_dir.join(format!("{}.json", artifact.handle));
-        match fs::write(handle_path, &payload) {
-            Ok(()) => return Ok(()),
-            Err(err) => {
-                last_error = Some(err);
-            }
-        }
-    }
-
-    Err(last_error
-        .unwrap_or_else(|| io::Error::other("unable to store MCP artifact"))
-        .into())
-}
-
-fn load_mcp_artifact(
-    index_dir: &Path,
-    handle: &str,
-) -> Result<McpArtifact, Box<dyn std::error::Error>> {
-    let mut last_error: Option<io::Error> = None;
-
-    for handle_dir in candidate_mcp_handle_dirs(index_dir) {
-        let handle_path = handle_dir.join(format!("{handle}.json"));
-        match fs::read_to_string(&handle_path) {
-            Ok(content) => return Ok(serde_json::from_str(&content)?),
-            Err(err) if err.kind() == io::ErrorKind::NotFound => {
-                last_error = Some(err);
-            }
-            Err(err) => return Err(err.into()),
-        }
-    }
-
-    Err(last_error
-        .unwrap_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown handle"))
-        .into())
-}
-
-// Cross-reference expansion (Phase 2.2)
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-enum RefType {
-    MarkdownLink,
-    AdrId,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-struct CrossRef {
-    ref_type: RefType,
-    origin_doc_path: String,
-    target_doc_path: String,
-    target_anchor: Option<String>,
-    raw_text: String,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-enum DocType {
-    Adr,    // Priority 1
-    Design, // Priority 2
-    Ops,    // Priority 3
-    Other,  // Priority 4
-}
-
-/// Search for relevant sections using BM25 scoring
-fn search_relevant_sections(
-    query: &str,
-    index: &ForwardIndex,
-    max_sections: usize,
-) -> Vec<SectionMatch> {
-    let query_terms = parse_query_terms(query, true);
-    if query_terms.is_empty() {
-        return Vec::new();
-    }
-
-    let mut all_sections: Vec<SectionMatch> = Vec::new();
-
-    // First, get top documents by BM25
-    let mut doc_scores: Vec<(&String, &FileEntry, f64)> = index
-        .files
-        .iter()
-        .map(|(path, entry)| {
-            let score = bm25_score(&query_terms, entry, index.avg_doc_length, &index.idf_map);
-            (path, entry, score)
-        })
-        .filter(|(_, _, score)| *score > 0.01)
-        .collect();
-
-    doc_scores.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
-
-    // Take top 20 documents
-    for (doc_path, entry, doc_score) in doc_scores.iter().take(20) {
-        let canonicality = score_canonicality(doc_path, entry);
-
-        // Split document into sections based on section_fingerprints
-        if !entry.section_fingerprints.is_empty() {
-            if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
-                let lines: Vec<&str> = content.lines().collect();
-
-                // Use indexed sections
-                for section in &entry.section_fingerprints {
-                    let start = section.line_start.saturating_sub(1);
-                    let end = section.line_end.min(lines.len());
-
-                    if start < end {
-                        let section_content = lines[start..end].join("\n");
-
-                        all_sections.push(SectionMatch {
-                            doc_path: (*doc_path).to_string(),
-                            heading: section.heading.clone(),
-                            line_start: section.line_start,
-                            line_end: section.line_end,
-                            bm25_score: *doc_score, // Use doc-level score for now
-                            content: section_content,
-                            canonicality,
-                        });
-                    }
-                }
-            }
-        } else {
-            // Fallback: treat whole doc as one section
-            if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
-                all_sections.push(SectionMatch {
-                    doc_path: (*doc_path).to_string(),
-                    heading: "Full Document".to_string(),
-                    line_start: 1,
-                    line_end: content.lines().count(),
-                    bm25_score: *doc_score,
-                    content,
-                    canonicality,
-                });
-            }
-        }
-    }
-
-    // Sort by combined score with deterministic tie-breaks.
-    all_sections.sort_by(compare_sections_by_relevance);
-
-    // Take top N sections
-    all_sections.into_iter().take(max_sections).collect()
-}
-
-/// Score document canonicality based on path, recency, and patterns
-fn score_canonicality(doc_path: &str, _entry: &FileEntry) -> f64 {
-    let mut score: f64 = 0.5; // baseline
-
-    let path_lower = doc_path.to_lowercase();
-
-    // Path-based boosts
-    if path_lower.contains("docs/adr/") || path_lower.contains("docs/architecture/") {
-        score += 0.2;
-    }
-    if path_lower.contains("docs/index/") {
-        score += 0.15;
-    }
-    if path_lower.contains("scratch")
-        || path_lower.contains("archive")
-        || path_lower.contains("old")
-    {
-        score -= 0.3;
-    }
-    if path_lower.contains("deprecated") || path_lower.contains("backup") {
-        score -= 0.25;
-    }
-
-    // Filename patterns
-    let filename = Path::new(doc_path)
-        .file_name()
-        .and_then(|s| s.to_str())
-        .unwrap_or("")
-        .to_lowercase();
-
-    if filename.contains("readme") || filename.contains("index") {
-        score += 0.1;
-    }
-    if filename.contains("guide") || filename.contains("runbook") || filename.contains("plan") {
-        score += 0.1;
-    }
-
-    // Recency (approximate - we don't have mtime in index yet)
-    // For now, we'll just use this as a placeholder
-    // In future: add last_modified to FileEntry
-
-    // Clamp to [0.0, 1.0]
-    score.clamp(0.0, 1.0)
-}
-
-/// Distill sections into markdown digest within token budget
-fn distill_to_markdown(sections: &[SectionMatch], query: &str, max_tokens: usize) -> String {
-    let mut output = String::new();
-    let mut used_tokens = 0;
-
-    // Header
-    let header = format!(
-        "# Context Digest for: \"{}\"\n\n\
-         **Generated:** {}\n\
-         **Token Budget:** {}\n\
-         **Documents Scanned:** N/A\n\
-         **Sections Selected:** {}\n\n\
-         ---\n\n",
-        query,
-        chrono_now(),
-        max_tokens,
-        sections.len()
-    );
-    output.push_str(&header);
-    used_tokens += estimate_tokens(&header);
-
-    // Group sections by document
-    let mut doc_groups: HashMap<String, Vec<&SectionMatch>> = HashMap::new();
-    for section in sections {
-        doc_groups
-            .entry(section.doc_path.clone())
-            .or_default()
-            .push(section);
-    }
-
-    // Top Relevant Documents section
-    output.push_str("## Top Relevant Documents\n\n");
-    used_tokens += 10;
-
-    let mut ranked_docs: Vec<_> = doc_groups.iter().collect();
-    ranked_docs.sort_by(|a, b| {
-        let score_a = a.1[0].bm25_score * 0.7 + a.1[0].canonicality * 0.3;
-        let score_b = b.1[0].bm25_score * 0.7 + b.1[0].canonicality * 0.3;
-        score_b
-            .partial_cmp(&score_a)
-            .unwrap_or(std::cmp::Ordering::Equal)
-    });
-
-    for (idx, (doc_path, doc_sections)) in ranked_docs.iter().enumerate().take(10) {
-        let section = doc_sections[0];
-        let combined_score = section.bm25_score * 0.7 + section.canonicality * 0.3;
-        let doc_line = format!(
-            "{}. **{}** (score: {:.2}, canonical: {:.2})\n   - Sections included: {}\n\n",
-            idx + 1,
-            doc_path,
-            combined_score,
-            section.canonicality,
-            doc_sections.len()
-        );
-        output.push_str(&doc_line);
-        used_tokens += estimate_tokens(&doc_line);
-    }
-
-    output.push_str("---\n\n## Distilled Content\n\n");
-    used_tokens += 10;
-
-    // Add sections
-    for section in sections {
-        if used_tokens >= max_tokens {
-            output.push_str("\n\n*[Content truncated due to token budget]*\n");
-            break;
-        }
-
-        let section_header = format!(
-            "### {} (from {})\n\n**Source:** {}:{}-{} (canonical: {:.2})\n\n",
-            section.heading,
-            section.doc_path,
-            section.doc_path,
-            section.line_start,
-            section.line_end,
-            section.canonicality
-        );
-
-        // Estimate how much space we need
-        let section_tokens = estimate_tokens(&section_header) + estimate_tokens(&section.content);
-
-        if used_tokens + section_tokens > max_tokens {
-            // Try to fit a truncated version
-            let remaining_tokens = max_tokens - used_tokens;
-            let chars_to_include = remaining_tokens * 4; // rough approximation
-
-            if chars_to_include > 200 {
-                output.push_str(&section_header);
-                output.push_str(&section.content[..chars_to_include.min(section.content.len())]);
-                output.push_str("\n\n*[Section truncated]*\n");
-            }
-            break;
-        }
-
-        output.push_str(&section_header);
-        output.push_str(&section.content);
-        output.push_str("\n\n---\n\n");
-
-        used_tokens += section_tokens;
-    }
-
-    // Metadata footer
-    let footer = format!(
-        "\n## Metadata\n\n\
-         **Canonicality Scores:**\n\
-         - 0.90+: Authoritative source, prefer over other docs\n\
-         - 0.70-0.89: Reliable, current documentation\n\
-         - 0.50-0.69: Secondary or supporting documentation\n\
-         - <0.50: Potentially stale, use with caution\n\n\
-         **Actual Tokens Used:** ~{used_tokens}\n\n\
-         ---\n\n\
-         ## Usage with LLM\n\n\
-         Paste this digest into your LLM conversation, then ask:\n\n\
-         > Using only the information in the context above, answer: \"{query}\"\n\
-         > Be explicit when something is not documented in the context.\n"
-    );
-
-    output.push_str(&footer);
-
-    output
-}
-
-/// Estimate token count (rough approximation: 1 token ≈ 4 chars)
-fn estimate_tokens(text: &str) -> usize {
-    text.len() / 4
-}
-
-/// Build ADR index mapping ADR numbers to file paths
-fn build_adr_index(index: &ForwardIndex) -> HashMap<String, String> {
-    let mut adr_map = HashMap::new();
-    let adr_regex = Regex::new(r"ADR[-_]?(\d{2,4})").unwrap();
-
-    for path in index.files.keys() {
-        let path_lower = path.to_lowercase();
-        if path_lower.contains("/adr/") || path_lower.contains("adr-") {
-            if let Some(caps) = adr_regex.captures(path) {
-                if let Some(num_str) = caps.get(1) {
-                    // Zero-pad to 3 digits
-                    let num: usize = num_str.as_str().parse().unwrap_or(0);
-                    let normalized = format!("{num:03}");
-                    adr_map.insert(normalized, path.clone());
-                }
-            }
-        }
-    }
-
-    adr_map
-}
-
-/// Extract all deterministic relation edges from a forward index.
-/// Produces document-level links, section-level links, and ADR reference edges.
-fn extract_relations(forward_index: &ForwardIndex) -> RelationIndex {
-    // Build normalized-path-to-key map (sorted iteration for determinism)
-    let mut norm_to_key: HashMap<String, String> = HashMap::new();
-    let mut sorted_keys: Vec<&String> = forward_index.files.keys().collect();
-    sorted_keys.sort();
-    for key in &sorted_keys {
-        let normalized = normalize_path(Path::new(key));
-        norm_to_key
-            .entry(normalized)
-            .or_insert_with(|| (*key).clone());
-    }
-
-    let adr_index = build_adr_index(forward_index);
-    let mut edges: Vec<RelationEdge> = Vec::new();
-
-    for source_key in &sorted_keys {
-        let entry = &forward_index.files[*source_key];
-        let source_base = Path::new(source_key.as_str());
-
-        // Document & section edges from links
-        for link in &entry.links {
-            let target = &link.target;
-
-            // Skip external links
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            // Split off anchor
-            let (link_path, anchor) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            if link_path.is_empty() {
-                continue;
-            }
-
-            let resolved = if let Some(parent) = source_base.parent() {
-                parent.join(&link_path).to_string_lossy().to_string()
-            } else {
-                link_path.clone()
-            };
-            let normalized = normalize_path(Path::new(&resolved));
-
-            let target_key = match norm_to_key.get(&normalized) {
-                Some(k) => k.clone(),
-                None => continue,
-            };
-
-            // Skip self-links
-            if &target_key == *source_key {
-                continue;
-            }
-
-            // Document-level LinksTo edge
-            edges.push(RelationEdge {
-                source: (*source_key).clone(),
-                target: target_key.clone(),
-                kind: RelationKind::LinksTo,
-                anchor: anchor.clone(),
-                source_section: None,
-                target_section: None,
-                raw_text: None,
-            });
-
-            // Section-level edge
-            let source_section = find_containing_section(&entry.section_fingerprints, link.line);
-            if source_section.is_some() {
-                let target_section = anchor.as_deref().and_then(|a| {
-                    forward_index
-                        .files
-                        .get(&target_key)
-                        .and_then(|te| resolve_anchor_to_section(te, a))
-                });
-
-                edges.push(RelationEdge {
-                    source: (*source_key).clone(),
-                    target: target_key.clone(),
-                    kind: RelationKind::SectionLinksTo,
-                    anchor: anchor.clone(),
-                    source_section,
-                    target_section,
-                    raw_text: None,
-                });
-            }
-        }
-
-        // ADR reference edges
-        for adr_ref in &entry.adr_references {
-            if let Some(target_path) = adr_index.get(&adr_ref.normalized_id) {
-                // Skip self-links
-                if target_path == *source_key {
-                    continue;
-                }
-
-                let source_section =
-                    find_containing_section(&entry.section_fingerprints, adr_ref.line);
-
-                edges.push(RelationEdge {
-                    source: (*source_key).clone(),
-                    target: target_path.clone(),
-                    kind: RelationKind::AdrReference,
-                    anchor: None,
-                    source_section,
-                    target_section: None,
-                    raw_text: Some(adr_ref.raw_text.clone()),
-                });
-            }
-        }
-    }
-
-    edges.sort();
-    edges.dedup();
-
-    RelationIndex {
-        version: 1,
-        indexed_at: chrono_now(),
-        total_edges: edges.len(),
-        edges,
-    }
-}
-
-/// Parse markdown links from a section's content
-fn parse_markdown_links(section: &SectionMatch, origin_dir: &Path) -> Vec<CrossRef> {
-    let mut refs = Vec::new();
-
-    // Regex: [text](target) - we'll filter out ![image] manually
-    let link_regex = Regex::new(r"(!?)\[(?P<label>[^\]]+)\]\((?P<target>[^)]+)\)").unwrap();
-
-    for caps in link_regex.captures_iter(&section.content) {
-        // Skip if this is an image link (starts with !)
-        if caps.get(1).is_some_and(|m| m.as_str() == "!") {
-            continue;
-        }
-
-        if let (Some(label), Some(target)) = (caps.name("label"), caps.name("target")) {
-            let target_str = target.as_str();
-
-            // Skip external links
-            if target_str.starts_with("http://")
-                || target_str.starts_with("https://")
-                || target_str.starts_with("mailto:")
-            {
-                continue;
-            }
-
-            // Parse target: path.md#anchor
-            let (path_part, anchor) = if let Some(hash_pos) = target_str.find('#') {
-                (
-                    &target_str[..hash_pos],
-                    Some(target_str[hash_pos + 1..].to_string()),
-                )
-            } else {
-                (target_str, None)
-            };
-
-            // Skip non-markdown links
-            let lc = path_part.to_ascii_lowercase();
-            if !lc.ends_with(".md") && !lc.ends_with(".txt") && !lc.ends_with(".rst") {
-                continue;
-            }
-
-            // Resolve relative path
-            let target_path = if path_part.starts_with('/') {
-                // Absolute path within repo - strip leading /
-                PathBuf::from(path_part.trim_start_matches('/'))
-            } else {
-                // Relative path - resolve from origin doc's directory
-                origin_dir.join(path_part)
-            };
-
-            // Normalize path
-            let normalized = normalize_path(&target_path);
-
-            // Skip self-links
-            if normalized == section.doc_path {
-                continue;
-            }
-
-            refs.push(CrossRef {
-                ref_type: RefType::MarkdownLink,
-                origin_doc_path: section.doc_path.clone(),
-                target_doc_path: normalized,
-                target_anchor: anchor,
-                raw_text: label.as_str().to_string(),
-            });
-        }
-    }
-
-    refs
-}
-
-/// Find the section containing a given line number
-fn find_containing_section(sections: &[SectionFingerprint], line: usize) -> Option<SectionRef> {
-    for section in sections {
-        if section.line_start <= line && line <= section.line_end {
-            return Some(SectionRef {
-                heading: section.heading.clone(),
-                line_start: section.line_start,
-            });
-        }
-    }
-    None
-}
-
-/// Resolve an anchor fragment to a section in the target file entry
-fn resolve_anchor_to_section(entry: &FileEntry, anchor: &str) -> Option<SectionRef> {
-    let anchor_slug = anchor.to_lowercase().replace([' ', '_'], "-");
-    for section in &entry.section_fingerprints {
-        let heading_slug = section.heading.to_lowercase().replace(' ', "-");
-        if heading_slug == anchor_slug || heading_slug.contains(&anchor_slug) {
-            return Some(SectionRef {
-                heading: section.heading.clone(),
-                line_start: section.line_start,
-            });
-        }
-    }
-    None
-}
-
-/// Normalize a path (resolve .. and .)
-fn normalize_path(path: &Path) -> String {
-    let mut components = Vec::new();
-
-    for component in path.components() {
-        match component {
-            std::path::Component::Normal(c) => components.push(c.to_string_lossy().to_string()),
-            std::path::Component::ParentDir => {
-                components.pop();
-            }
-            std::path::Component::CurDir => {}
-            _ => {}
-        }
-    }
-
-    components.join("/")
-}
-
-/// Parse ADR ID references from section content
-fn parse_adr_ids(section: &SectionMatch, adr_index: &HashMap<String, String>) -> Vec<CrossRef> {
-    let mut refs = Vec::new();
-
-    // Regex: ADR-013, ADR 13, ADR_0013
-    let adr_regex = Regex::new(r"\bADR[-_ ]?(?P<num>\d{2,4})\b").unwrap();
-
-    for caps in adr_regex.captures_iter(&section.content) {
-        if let Some(num) = caps.name("num") {
-            let num_str = num.as_str();
-            let num_val: usize = num_str.parse().unwrap_or(0);
-
-            // Zero-pad to 3 digits
-            let normalized = format!("{num_val:03}");
-
-            // Lookup in ADR index
-            if let Some(target_path) = adr_index.get(&normalized) {
-                // Skip if same file
-                if target_path == &section.doc_path {
-                    continue;
-                }
-
-                refs.push(CrossRef {
-                    ref_type: RefType::AdrId,
-                    origin_doc_path: section.doc_path.clone(),
-                    target_doc_path: target_path.clone(),
-                    target_anchor: None,
-                    raw_text: caps.get(0).unwrap().as_str().to_string(),
-                });
-            }
-        }
-    }
-
-    refs
-}
-
-/// Collect and deduplicate cross-references from primary sections
-fn collect_crossrefs(
-    sections: &[SectionMatch],
-    adr_index: &HashMap<String, String>,
-) -> Vec<CrossRef> {
-    let mut all_refs = Vec::new();
-
-    for section in sections {
-        // Get parent directory of origin doc
-        let origin_dir = Path::new(&section.doc_path)
-            .parent()
-            .unwrap_or_else(|| Path::new("."));
-
-        // Parse markdown links
-        all_refs.extend(parse_markdown_links(section, origin_dir));
-
-        // Parse ADR IDs
-        all_refs.extend(parse_adr_ids(section, adr_index));
-    }
-
-    // Deduplicate by (origin_doc_path, target_doc_path, target_anchor)
-    let mut seen: HashSet<(String, String, Option<String>)> = HashSet::new();
-    let mut unique_refs = Vec::new();
-
-    for r in all_refs {
-        let key = (
-            r.origin_doc_path.clone(),
-            r.target_doc_path.clone(),
-            r.target_anchor.clone(),
-        );
-
-        if !seen.contains(&key) {
-            seen.insert(key);
-            unique_refs.push(r);
-        }
-    }
-
-    unique_refs
-}
-
-/// Classify target document by type
-fn classify_target_doc(path: &str) -> DocType {
-    let path_lower = path.to_lowercase();
-
-    if path_lower.contains("/adr/") || path_lower.contains("adr-") {
-        DocType::Adr
-    } else if path_lower.contains("architecture") || path_lower.contains("design") {
-        DocType::Design
-    } else if path_lower.contains("runbook")
-        || path_lower.contains("operations")
-        || path_lower.contains("ops")
-    {
-        DocType::Ops
-    } else {
-        DocType::Other
-    }
-}
-
-/// Select sections from an ADR doc
-fn select_sections_for_adr(
-    doc_path: &str,
-    index: &ForwardIndex,
-    entry: &FileEntry,
-    max_sections: usize,
-) -> Vec<SectionMatch> {
-    let mut sections = Vec::new();
-
-    // Priority sections: Context, Decision, Consequences
-    let priority_keywords = [
-        "context",
-        "decision",
-        "consequences",
-        "motivation",
-        "rationale",
-        "summary",
-    ];
-
-    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
-        let lines: Vec<&str> = content.lines().collect();
-
-        // Try to use section fingerprints
-        for section in &entry.section_fingerprints {
-            if sections.len() >= max_sections {
-                break;
-            }
-
-            // Check if this is a priority section
-            let heading_lower = section.heading.to_lowercase();
-            let is_priority = priority_keywords
-                .iter()
-                .any(|kw| heading_lower.contains(kw));
-
-            if is_priority || sections.is_empty() {
-                // Include this section
-                let start = section.line_start.saturating_sub(1);
-                let end = section.line_end.min(lines.len());
-
-                if start < end {
-                    let section_content = lines[start..end].join("\n");
-
-                    sections.push(SectionMatch {
-                        doc_path: doc_path.to_string(),
-                        heading: section.heading.clone(),
-                        line_start: section.line_start,
-                        line_end: section.line_end,
-                        bm25_score: 0.0, // Cross-ref sections don't have BM25 scores
-                        content: section_content,
-                        canonicality: score_canonicality(doc_path, entry),
-                    });
-                }
-            }
-        }
-
-        // If no sections found, include the first section or full doc
-        if sections.is_empty() && !lines.is_empty() {
-            sections.push(SectionMatch {
-                doc_path: doc_path.to_string(),
-                heading: "Full Document".to_string(),
-                line_start: 1,
-                line_end: lines.len().min(100), // Limit to first 100 lines
-                bm25_score: 0.0,
-                content: lines[..lines.len().min(100)].join("\n"),
-                canonicality: score_canonicality(doc_path, entry),
-            });
-        }
-    }
-
-    sections
-}
-
-/// Select sections from a design/architecture doc
-fn select_sections_for_design(
-    doc_path: &str,
-    index: &ForwardIndex,
-    entry: &FileEntry,
-    anchor: Option<&str>,
-    max_sections: usize,
-) -> Vec<SectionMatch> {
-    let mut sections = Vec::new();
-
-    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
-        let lines: Vec<&str> = content.lines().collect();
-
-        // If anchor is specified, try to find matching section
-        if let Some(anchor_str) = anchor {
-            let anchor_lower = anchor_str.to_lowercase().replace(['-', '_'], " ");
-
-            for section in &entry.section_fingerprints {
-                let heading_lower = section.heading.to_lowercase();
-                let heading_slug = heading_lower.replace(' ', "-");
-
-                if heading_slug.contains(&anchor_str.replace(' ', "-"))
-                    || heading_lower.contains(&anchor_lower)
-                {
-                    // Found matching section
-                    let start = section.line_start.saturating_sub(1);
-                    let end = section.line_end.min(lines.len());
-
-                    if start < end {
-                        let section_content = lines[start..end].join("\n");
-
-                        sections.push(SectionMatch {
-                            doc_path: doc_path.to_string(),
-                            heading: section.heading.clone(),
-                            line_start: section.line_start,
-                            line_end: section.line_end,
-                            bm25_score: 0.0,
-                            content: section_content,
-                            canonicality: score_canonicality(doc_path, entry),
-                        });
-                    }
-
-                    break; // Found the target section
-                }
-            }
-        }
-
-        // If no anchor or not found, include first few sections
-        if sections.is_empty() {
-            for section in entry.section_fingerprints.iter().take(max_sections) {
-                let start = section.line_start.saturating_sub(1);
-                let end = section.line_end.min(lines.len());
-
-                if start < end {
-                    let section_content = lines[start..end].join("\n");
-
-                    sections.push(SectionMatch {
-                        doc_path: doc_path.to_string(),
-                        heading: section.heading.clone(),
-                        line_start: section.line_start,
-                        line_end: section.line_end,
-                        bm25_score: 0.0,
-                        content: section_content,
-                        canonicality: score_canonicality(doc_path, entry),
-                    });
-                }
-            }
-        }
-
-        // Fallback: if still no sections, include beginning of doc
-        if sections.is_empty() && !lines.is_empty() {
-            sections.push(SectionMatch {
-                doc_path: doc_path.to_string(),
-                heading: "Introduction".to_string(),
-                line_start: 1,
-                line_end: lines.len().min(50),
-                bm25_score: 0.0,
-                content: lines[..lines.len().min(50)].join("\n"),
-                canonicality: score_canonicality(doc_path, entry),
-            });
-        }
-    }
-
-    sections
-}
-
-/// Select sections from an ops/runbook doc
-fn select_sections_for_ops(
-    doc_path: &str,
-    index: &ForwardIndex,
-    entry: &FileEntry,
-    max_sections: usize,
-) -> Vec<SectionMatch> {
-    let mut sections = Vec::new();
-
-    // Keywords for ops docs
-    let ops_keywords = [
-        "deploy",
-        "restart",
-        "rollback",
-        "monitor",
-        "troubleshoot",
-        "debug",
-        "fix",
-        "restore",
-    ];
-
-    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
-        let lines: Vec<&str> = content.lines().collect();
-
-        // Prioritize sections with ops keywords
-        for section in &entry.section_fingerprints {
-            if sections.len() >= max_sections {
-                break;
-            }
-
-            let heading_lower = section.heading.to_lowercase();
-            let is_ops = ops_keywords.iter().any(|kw| heading_lower.contains(kw));
-
-            if is_ops {
-                let start = section.line_start.saturating_sub(1);
-                let end = section.line_end.min(lines.len());
-
-                if start < end {
-                    let section_content = lines[start..end].join("\n");
-
-                    sections.push(SectionMatch {
-                        doc_path: doc_path.to_string(),
-                        heading: section.heading.clone(),
-                        line_start: section.line_start,
-                        line_end: section.line_end,
-                        bm25_score: 0.0,
-                        content: section_content,
-                        canonicality: score_canonicality(doc_path, entry),
-                    });
-                }
-            }
-        }
-
-        // If no ops sections found, include first section
-        if sections.is_empty() && !entry.section_fingerprints.is_empty() {
-            let section = &entry.section_fingerprints[0];
-            let start = section.line_start.saturating_sub(1);
-            let end = section.line_end.min(lines.len());
-
-            if start < end {
-                let section_content = lines[start..end].join("\n");
-
-                sections.push(SectionMatch {
-                    doc_path: doc_path.to_string(),
-                    heading: section.heading.clone(),
-                    line_start: section.line_start,
-                    line_end: section.line_end,
-                    bm25_score: 0.0,
-                    content: section_content,
-                    canonicality: score_canonicality(doc_path, entry),
-                });
-            }
-        }
-    }
-
-    sections
-}
-
-/// Select sections from an "other" type doc
-fn select_sections_for_other(
-    doc_path: &str,
-    index: &ForwardIndex,
-    entry: &FileEntry,
-) -> Vec<SectionMatch> {
-    let mut sections = Vec::new();
-
-    if let Ok(content) = read_indexed_doc(index, doc_path, entry) {
-        let lines: Vec<&str> = content.lines().collect();
-
-        // Include only the first section (overview)
-        if !entry.section_fingerprints.is_empty() {
-            let section = &entry.section_fingerprints[0];
-            let start = section.line_start.saturating_sub(1);
-            let end = section.line_end.min(lines.len());
-
-            if start < end {
-                let section_content = lines[start..end].join("\n");
-
-                sections.push(SectionMatch {
-                    doc_path: doc_path.to_string(),
-                    heading: section.heading.clone(),
-                    line_start: section.line_start,
-                    line_end: section.line_end,
-                    bm25_score: 0.0,
-                    content: section_content,
-                    canonicality: score_canonicality(doc_path, entry),
-                });
-            }
-        }
-    }
-
-    sections
-}
-
-/// Resolve cross-references into additional sections to include
-fn resolve_crossrefs(
-    crossrefs: &[CrossRef],
-    primary_docs: &HashSet<String>,
-    index: &ForwardIndex,
-    xref_token_budget: usize,
-) -> Vec<SectionMatch> {
-    const MAX_SECTIONS_PER_ADR: usize = 3;
-    const MAX_SECTIONS_PER_DESIGN: usize = 2;
-    const MAX_SECTIONS_PER_OPS: usize = 2;
-    const MAX_TOKENS_PER_XREF_DOC: usize = 600;
-
-    let mut xref_sections = Vec::new();
-    let mut remaining_budget = xref_token_budget;
-    let mut visited_docs: HashSet<String> = primary_docs.clone();
-
-    // Group crossrefs by target doc
-    let mut doc_refs: HashMap<String, Vec<&CrossRef>> = HashMap::new();
-    for cr in crossrefs {
-        // Skip if already in primary docs or visited
-        if visited_docs.contains(&cr.target_doc_path) {
-            continue;
-        }
-
-        doc_refs
-            .entry(cr.target_doc_path.clone())
-            .or_default()
-            .push(cr);
-    }
-
-    // Sort target docs by priority and score
-    let mut target_docs: Vec<(String, Vec<&CrossRef>)> = doc_refs.into_iter().collect();
-    target_docs.sort_by(|a, b| {
-        let type_a = classify_target_doc(&a.0);
-        let type_b = classify_target_doc(&b.0);
-
-        // First by doc type priority
-        let cmp = type_a.cmp(&type_b);
-        if cmp != std::cmp::Ordering::Equal {
-            return cmp;
-        }
-
-        // Then by number of references (descending)
-        b.1.len().cmp(&a.1.len())
-    });
-
-    // Process each target doc in priority order
-    for (target_path, refs) in target_docs {
-        if remaining_budget == 0 {
-            break;
-        }
-
-        // Get file entry
-        let Some(entry) = index.files.get(&target_path) else {
-            continue; // Doc not in index
-        };
-
-        let doc_type = classify_target_doc(&target_path);
-
-        // Select sections based on doc type
-        let mut doc_sections = match doc_type {
-            DocType::Adr => {
-                select_sections_for_adr(&target_path, index, entry, MAX_SECTIONS_PER_ADR)
-            }
-            DocType::Design => {
-                // Check if any ref has an anchor
-                let anchor = refs.iter().find_map(|r| r.target_anchor.as_deref());
-                select_sections_for_design(
-                    &target_path,
-                    index,
-                    entry,
-                    anchor,
-                    MAX_SECTIONS_PER_DESIGN,
-                )
-            }
-            DocType::Ops => {
-                select_sections_for_ops(&target_path, index, entry, MAX_SECTIONS_PER_OPS)
-            }
-            DocType::Other => select_sections_for_other(&target_path, index, entry),
-        };
-
-        // Apply per-doc token budget
-        let mut doc_tokens = 0;
-        let mut filtered_sections = Vec::new();
-
-        for section in doc_sections.drain(..) {
-            let section_tokens = estimate_tokens(&section.content);
-
-            if doc_tokens + section_tokens > MAX_TOKENS_PER_XREF_DOC {
-                break; // Exceeded per-doc limit
-            }
-
-            if remaining_budget < section_tokens {
-                break; // Exceeded global budget
-            }
-
-            doc_tokens += section_tokens;
-            remaining_budget -= section_tokens;
-            filtered_sections.push(section);
-        }
-
-        if !filtered_sections.is_empty() {
-            visited_docs.insert(target_path.clone());
-            xref_sections.extend(filtered_sections);
-        }
-    }
-
-    xref_sections
-}
-
-/// Resolve cross-references using the persisted relation graph (graph-aware mode).
-/// Finds all documents reachable from primary docs via relation edges and
-/// includes their sections within the token budget.
-fn resolve_crossrefs_from_relations(
-    relation_index: &RelationIndex,
-    primary_docs: &HashSet<String>,
-    index: &ForwardIndex,
-    xref_token_budget: usize,
-) -> Vec<SectionMatch> {
-    const MAX_TOKENS_PER_XREF_DOC: usize = 600;
-
-    // Collect target docs reachable from primary docs, with edge info
-    let mut target_edges: HashMap<String, Vec<&RelationEdge>> = HashMap::new();
-    for edge in &relation_index.edges {
-        if primary_docs.contains(&edge.source) && !primary_docs.contains(&edge.target) {
-            target_edges
-                .entry(edge.target.clone())
-                .or_default()
-                .push(edge);
-        }
-    }
-
-    // Sort targets: more edges = higher priority, then by doc type, then alphabetical
-    let mut targets: Vec<(String, Vec<&RelationEdge>)> = target_edges.into_iter().collect();
-    targets.sort_by(|a, b| b.1.len().cmp(&a.1.len()).then_with(|| a.0.cmp(&b.0)));
-
-    let mut xref_sections = Vec::new();
-    let mut remaining_budget = xref_token_budget;
-    let mut visited: HashSet<String> = primary_docs.clone();
-
-    for (target_path, edges) in targets {
-        if remaining_budget == 0 {
-            break;
-        }
-        if visited.contains(&target_path) {
-            continue;
-        }
-
-        let Some(entry) = index.files.get(&target_path) else {
-            continue;
-        };
-
-        // Pick anchor from first edge that has one
-        let anchor = edges.iter().find_map(|e| e.anchor.as_deref());
-
-        // Select sections: if anchor, try targeted; otherwise first few sections
-        let doc_type = classify_target_doc(&target_path);
-        let max_sections = match doc_type {
-            DocType::Adr => 3,
-            DocType::Design => 2,
-            DocType::Ops => 2,
-            DocType::Other => 2,
-        };
-
-        let mut doc_sections = match doc_type {
-            DocType::Adr => select_sections_for_adr(&target_path, index, entry, max_sections),
-            DocType::Design => {
-                select_sections_for_design(&target_path, index, entry, anchor, max_sections)
-            }
-            DocType::Ops => select_sections_for_ops(&target_path, index, entry, max_sections),
-            DocType::Other => select_sections_for_other(&target_path, index, entry),
-        };
-
-        // Apply token budget
-        let mut doc_tokens = 0;
-        let mut filtered = Vec::new();
-        for section in doc_sections.drain(..) {
-            let section_tokens = estimate_tokens(&section.content);
-            if doc_tokens + section_tokens > MAX_TOKENS_PER_XREF_DOC {
-                break;
-            }
-            if remaining_budget < section_tokens {
-                break;
-            }
-            doc_tokens += section_tokens;
-            remaining_budget -= section_tokens;
-            filtered.push(section);
-        }
-
-        visited.insert(target_path);
-        xref_sections.extend(filtered);
-    }
-
-    xref_sections
-}
-
-// ============================================================================
-// Extractive Refiner (Phase 2.3)
-// ============================================================================
-
-/// Split text into sentences using simple regex
-fn split_sentences(text: &str) -> Vec<String> {
-    // Preserve code blocks
-    let code_block_re = Regex::new(r"```[\s\S]*?```").unwrap();
-    let mut code_blocks = Vec::new();
-    let mut placeholder_text = text.to_string();
-
-    // Extract code blocks and replace with placeholders
-    for (i, caps) in code_block_re.captures_iter(text).enumerate() {
-        let code = caps.get(0).unwrap().as_str();
-        code_blocks.push(code.to_string());
-        placeholder_text = placeholder_text.replace(code, &format!("__CODE_BLOCK_{i}__"));
-    }
-
-    // Split on sentence boundaries: period/exclamation/question followed by space
-    // We'll use a simpler approach: split on these punctuation marks and then filter
-    let parts: Vec<&str> = placeholder_text.split(&['.', '!', '?']).collect();
-    let mut sentences = Vec::new();
-
-    for part in parts {
-        let trimmed = part.trim();
-        // Keep sentences that are substantial (>10 chars) and start with a letter/number
-        if trimmed.len() > 10 {
-            let first_char = trimmed.chars().next().unwrap_or(' ');
-            if first_char.is_alphanumeric() || first_char == '#' {
-                sentences.push(trimmed.to_string());
-            }
-        }
-    }
-
-    // Restore code blocks
-    for (i, code) in code_blocks.iter().enumerate() {
-        let placeholder = format!("__CODE_BLOCK_{i}__");
-        for sentence in &mut sentences {
-            *sentence = sentence.replace(&placeholder, code);
-        }
-    }
-
-    sentences
-}
-
-/// Score a sentence for relevance
-fn score_sentence(
-    sentence: &str,
-    query_terms: &[String],
-    is_first: bool,
-    section_has_crossref: bool,
-) -> f64 {
-    let mut score = 0.0;
-
-    // Weight factors
-    const W_LEXICAL: f64 = 2.0;
-    const W_KEYWORD: f64 = 1.5;
-    const W_CODE: f64 = 3.0;
-    const W_FIRST: f64 = 0.3;
-    const W_CROSSREF: f64 = 1.0;
-
-    let sentence_lower = sentence.to_lowercase();
-
-    // 1. Lexical overlap with query
-    let mut overlap_count = 0;
-    for term in query_terms {
-        if sentence_lower.contains(&term.to_lowercase()) {
-            overlap_count += 1;
-        }
-    }
-    score += f64::from(overlap_count) * W_LEXICAL;
-
-    // 2. High-value keywords
-    let keywords = [
-        "deploy",
-        "deployment",
-        "restart",
-        "auth",
-        "authentication",
-        "session",
-        "state",
-        "error",
-        "failure",
-        "retry",
-        "timeout",
-        "architecture",
-        "design",
-        "decision",
-        "invariant",
-        "must",
-        "should",
-        "requires",
-        "context",
-        "rationale",
-        "consequence",
-        "kubernetes",
-        "container",
-        "pod",
-        "service",
-        "config",
-        "configuration",
-        "security",
-        "permission",
-        "rbac",
-        "policy",
-        "test",
-        "testing",
-    ];
-
-    for keyword in &keywords {
-        if sentence_lower.contains(keyword) {
-            score += W_KEYWORD;
-        }
-    }
-
-    // 3. Contains code or config
-    if sentence.contains("```")
-        || sentence.contains("    ")
-        || sentence.contains("kubectl")
-        || sentence.contains("docker")
-        || sentence.contains("make")
-        || sentence.contains("cargo")
-        || sentence.contains("python")
-        || sentence.contains("bash")
-    {
-        score += W_CODE;
-    }
-
-    // 4. First sentence bias
-    if is_first {
-        score += W_FIRST;
-    }
-
-    // 5. Cross-reference bonus
-    if section_has_crossref
-        && (sentence_lower.contains("adr")
-            || sentence_lower.contains("see ")
-            || sentence_lower.contains("refer")
-            || sentence_lower.contains("described in"))
-    {
-        score += W_CROSSREF;
-    }
-
-    score
-}
-
-/// Extract heading from section text
-fn extract_heading(text: &str) -> (String, String) {
-    let lines: Vec<&str> = text.lines().collect();
-    if lines.is_empty() {
-        return (String::new(), String::new());
-    }
-
-    // Check if first line is a heading
-    let first_line = lines[0].trim();
-    if first_line.starts_with('#') {
-        let heading = first_line.to_string();
-        let body = lines[1..].join("\n");
-        (heading, body)
-    } else {
-        (String::new(), text.to_string())
-    }
-}
-
-/// Refine a single section by extracting high-signal sentences
-fn refine_section(
-    section: &SectionMatch,
-    query_terms: &[String],
-    max_tokens: usize,
-) -> RefinedSection {
-    let (heading, body) = extract_heading(&section.content);
-
-    // Extract code blocks - preserve them fully
-    let code_block_re = Regex::new(r"```[\s\S]*?```").unwrap();
-    let code_blocks: Vec<String> = code_block_re
-        .captures_iter(&body)
-        .map(|cap| cap.get(0).unwrap().as_str().to_string())
-        .collect();
-
-    // Extract lists - preserve them
-    let list_re = Regex::new(r"(?m)^[\s]*[-*+]\s+.+$").unwrap();
-    let list_items: Vec<String> = list_re
-        .captures_iter(&body)
-        .map(|cap| cap.get(0).unwrap().as_str().to_string())
-        .collect();
-
-    // Extract subheadings - preserve them
-    let subheading_re = Regex::new(r"(?m)^#{2,6}\s+.+$").unwrap();
-    let subheadings: Vec<String> = subheading_re
-        .captures_iter(&body)
-        .map(|cap| cap.get(0).unwrap().as_str().to_string())
-        .collect();
-
-    // Split into sentences
-    let sentences = split_sentences(&body);
-
-    if sentences.is_empty() {
-        return RefinedSection {
-            section: section.clone(),
-            truncated: false,
-            truncation_reasons: Vec::new(),
-        };
-    }
-
-    // Check if section has cross-references
-    let has_crossref =
-        body.to_lowercase().contains("adr") || body.contains('[') && body.contains("](");
-
-    // Score each sentence
-    let mut scored_sentences: Vec<(String, f64)> = sentences
-        .iter()
-        .enumerate()
-        .map(|(i, s)| {
-            let score = score_sentence(s, query_terms, i == 0, has_crossref);
-            (s.clone(), score)
-        })
-        .collect();
-
-    // Sort by score (descending)
-    scored_sentences.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
-
-    // Keep top K sentences
-    let total_sentences = sentences.len();
-    let k = 6.max((total_sentences as f64 * 0.4).ceil() as usize);
-
-    let top_sentences: Vec<String> = scored_sentences
-        .iter()
-        .take(k)
-        .map(|(s, _)| s.clone())
-        .collect();
-
-    // Reconstruct section
-    let mut refined_parts = Vec::new();
-
-    // Add heading
-    if !heading.is_empty() {
-        refined_parts.push(heading.clone());
-    }
-
-    // Add preserved elements in order of appearance
-    let mut all_preserved = Vec::new();
-    all_preserved.extend(code_blocks);
-    all_preserved.extend(list_items);
-    all_preserved.extend(subheadings);
-
-    // Add top sentences
-    for sentence in &top_sentences {
-        refined_parts.push(sentence.clone());
-    }
-
-    // Add preserved elements
-    for item in &all_preserved {
-        if !refined_parts.iter().any(|p| p.contains(item)) {
-            refined_parts.push(item.clone());
-        }
-    }
-
-    let refined_text = refined_parts.join("\n\n");
-    let (final_text, truncated, truncation_reasons) =
-        truncate_text_to_budget(&refined_text, max_tokens, 0);
-
-    RefinedSection {
-        section: SectionMatch {
-            doc_path: section.doc_path.clone(),
-            heading: section.heading.clone(),
-            line_start: section.line_start,
-            line_end: section.line_end,
-            bm25_score: section.bm25_score,
-            content: final_text,
-            canonicality: section.canonicality,
-        },
-        truncated,
-        truncation_reasons,
-    }
-}
-
-/// Apply extractive refinement to all sections
-fn apply_extractive_refiner(
-    sections: Vec<SectionMatch>,
-    query: &str,
-    max_tokens_per_section: usize,
-) -> Vec<RefinedSection> {
-    let query_terms = parse_query_terms(query, true);
-
-    sections
-        .into_iter()
-        .map(|section| refine_section(&section, &query_terms, max_tokens_per_section))
-        .collect()
-}
-
-fn expand_from_files_args(args: &[String]) -> Result<Vec<String>, Box<dyn std::error::Error>> {
-    let mut expanded = Vec::new();
-
-    for arg in args {
-        if let Some(list_path) = arg.strip_prefix('@') {
-            let content = fs::read_to_string(list_path)?;
-            for line in content.lines() {
-                let trimmed = line.trim();
-                if !trimmed.is_empty() {
-                    expanded.push(trimmed.to_string());
-                }
-            }
-        } else {
-            expanded.push(arg.to_string());
-        }
-    }
-
-    Ok(expanded)
-}
-
-fn resolve_indexed_path(input: &str, index: &ForwardIndex) -> Option<String> {
-    let trimmed = input.trim();
-    if trimmed.is_empty() {
-        return None;
-    }
-
-    let mut candidates = Vec::new();
-    candidates.push(trimmed.to_string());
-    candidates.push(trimmed.trim_start_matches("./").to_string());
-
-    let normalized = normalize_path(Path::new(trimmed));
-    if !normalized.is_empty() {
-        candidates.push(normalized);
-    }
-
-    if Path::new(trimmed).is_absolute() {
-        if let Some(source_root) = forward_index_source_root(index) {
-            if let Ok(stripped) = Path::new(trimmed).strip_prefix(&source_root) {
-                let stripped_str = stripped.to_string_lossy().to_string();
-                if !stripped_str.is_empty() {
-                    candidates.push(stripped_str);
-                }
-                let normalized_stripped = normalize_path(stripped);
-                if !normalized_stripped.is_empty() {
-                    candidates.push(normalized_stripped);
-                }
-            }
-        }
-    }
-
-    let mut seen = HashSet::new();
-    for candidate in candidates {
-        if !seen.insert(candidate.clone()) {
-            continue;
-        }
-        if index.files.contains_key(&candidate) {
-            return Some(candidate);
-        }
-        let with_dot = format!("./{}", candidate.trim_start_matches("./"));
-        if index.files.contains_key(&with_dot) {
-            return Some(with_dot);
-        }
-    }
-
-    None
-}
-
-fn resolve_from_files(inputs: &[String], index: &ForwardIndex) -> (Vec<String>, Vec<String>) {
-    let mut resolved = Vec::new();
-    let mut missing = Vec::new();
-    let mut seen = HashSet::new();
-
-    for input in inputs {
-        if let Some(path) = resolve_indexed_path(input, index) {
-            if seen.insert(path.clone()) {
-                resolved.push(path);
-            }
-        } else {
-            missing.push(input.clone());
-        }
-    }
-
-    (resolved, missing)
-}
-
-fn collect_sections_for_files(
-    file_paths: &[String],
-    index: &ForwardIndex,
-    query: &str,
-    max_sections: usize,
-) -> Vec<SectionMatch> {
-    let query_terms = if query.is_empty() {
-        Vec::new()
-    } else {
-        parse_query_terms(query, true)
-    };
-    let mut all_sections = Vec::new();
-
-    for path in file_paths {
-        let Some(entry) = index.files.get(path) else {
-            continue;
-        };
-        let doc_score = if query_terms.is_empty() {
-            1.0
-        } else {
-            bm25_score(&query_terms, entry, index.avg_doc_length, &index.idf_map)
-        };
-        let canonicality = score_canonicality(path, entry);
-
-        if !entry.section_fingerprints.is_empty() {
-            if let Ok(content) = read_indexed_doc(index, path, entry) {
-                let lines: Vec<&str> = content.lines().collect();
-                for section in &entry.section_fingerprints {
-                    let start = section.line_start.saturating_sub(1);
-                    let end = section.line_end.min(lines.len());
-                    if start < end {
-                        let section_content = lines[start..end].join("\n");
-                        all_sections.push(SectionMatch {
-                            doc_path: path.to_string(),
-                            heading: section.heading.clone(),
-                            line_start: section.line_start,
-                            line_end: section.line_end,
-                            bm25_score: doc_score,
-                            content: section_content,
-                            canonicality,
-                        });
-                    }
-                }
-            }
-        } else if let Ok(content) = read_indexed_doc(index, path, entry) {
-            all_sections.push(SectionMatch {
-                doc_path: path.to_string(),
-                heading: "Full Document".to_string(),
-                line_start: 1,
-                line_end: content.lines().count(),
-                bm25_score: doc_score,
-                content,
-                canonicality,
-            });
-        }
-    }
-
-    all_sections.sort_by(compare_sections_by_relevance);
-
-    all_sections.into_iter().take(max_sections).collect()
-}
-
-fn collect_context_selection(
-    query: &str,
-    from_files: &[String],
-    index: &ForwardIndex,
-    max_sections: usize,
-) -> Result<ContextSelection, ContextSelectionIssue> {
-    let query_label = if query.trim().is_empty() {
-        "selected files".to_string()
-    } else {
-        query.to_string()
-    };
-    let query_for_refiner = if query.trim().is_empty() {
-        String::new()
-    } else {
-        query.to_string()
-    };
-
-    let sections = if !from_files.is_empty() {
-        let expanded = expand_from_files_args(from_files)
-            .map_err(|_| ContextSelectionIssue::NoIndexedFilesMatched)?;
-        let (resolved, missing) = resolve_from_files(&expanded, index);
-
-        if !missing.is_empty() {
-            return Err(ContextSelectionIssue::MissingFiles(missing));
-        }
-
-        if resolved.is_empty() {
-            return Err(ContextSelectionIssue::NoIndexedFilesMatched);
-        }
-
-        collect_sections_for_files(&resolved, index, query, max_sections)
-    } else {
-        let query_terms = parse_query_terms(query, true);
-        if query_terms.is_empty() {
-            return Err(ContextSelectionIssue::NoSearchableTerms);
-        }
-        search_relevant_sections(query, index, max_sections)
-    };
-
-    if sections.is_empty() {
-        return Err(ContextSelectionIssue::NoRelevantSections(query_label));
-    }
-
-    Ok(ContextSelection {
-        query_label,
-        query_for_refiner,
-        sections,
-    })
-}
-
-fn build_mcp_search_response(
-    query: &str,
-    from_files: &[String],
-    index_dir: &Path,
-    options: McpSearchOptions,
-) -> Result<McpSearchResponse, Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let selection_mode = if from_files.is_empty() {
-        "query".to_string()
-    } else {
-        "from_files".to_string()
-    };
-    let requested_query = if query.trim().is_empty() {
-        "selected files".to_string()
-    } else {
-        query.to_string()
-    };
-
-    let selection_limit = options.max_results.max(1).saturating_mul(4).max(8);
-    let selection = match collect_context_selection(
-        query,
-        from_files,
-        &forward_index,
-        selection_limit,
-    ) {
-        Ok(selection) => selection,
-        Err(issue) => {
-            let (error, message, missing_files) = match issue {
-                ContextSelectionIssue::NoSearchableTerms => (
-                    Some("no_query_terms".to_string()),
-                    Some("No searchable terms in query. Try different keywords.".to_string()),
-                    Vec::new(),
-                ),
-                ContextSelectionIssue::MissingFiles(missing) => (
-                    Some("missing_files".to_string()),
-                    Some(
-                        "Some files were not found in the index; search-context requires explicit indexed files."
-                            .to_string(),
-                    ),
-                    missing,
-                ),
-                ContextSelectionIssue::NoIndexedFilesMatched => (
-                    Some("no_indexed_files".to_string()),
-                    Some("No indexed files matched the provided inputs.".to_string()),
-                    Vec::new(),
-                ),
-                ContextSelectionIssue::NoRelevantSections(label) => (
-                    Some("no_relevant_sections".to_string()),
-                    Some(format!("No relevant sections found for query: \"{label}\"")),
-                    Vec::new(),
-                ),
-            };
-
-            return Ok(McpSearchResponse {
-                schema_version: MCP_SCHEMA_VERSION,
-                tool: "search_context".to_string(),
-                query: requested_query,
-                selection_mode,
-                budget: McpSearchBudget {
-                    max_results: options.max_results,
-                    max_tokens: options.max_tokens,
-                    max_bytes: options.max_bytes,
-                    ..McpSearchBudget::default()
-                },
-                pressure: McpPressure::default(),
-                results: Vec::new(),
-                error,
-                message,
-                missing_files,
-            });
-        }
-    };
-
-    let (unique_sections, deduped_hits) = dedupe_section_matches(selection.sections.clone());
-    let max_results = options.max_results.max(1);
-    let per_result_tokens = (options.max_tokens / max_results).max(40);
-    let per_result_bytes = (options.max_bytes / max_results).max(160);
-    let preview_sections = apply_extractive_refiner(
-        unique_sections.clone(),
-        &selection.query_for_refiner,
-        per_result_tokens,
-    );
-
-    let mut pressure = McpPressure::default();
-    let mut budget = McpSearchBudget {
-        max_results: options.max_results,
-        max_tokens: options.max_tokens,
-        max_bytes: options.max_bytes,
-        candidate_hits: selection.sections.len(),
-        deduped_hits,
-        ..McpSearchBudget::default()
-    };
-    let mut results = Vec::new();
-    let mut used_tokens = 0usize;
-    let mut used_bytes = 0usize;
-
-    for (rank, (raw_section, preview_section)) in unique_sections
-        .iter()
-        .zip(preview_sections.iter())
-        .enumerate()
-    {
-        if results.len() >= max_results {
-            pressure.truncated = true;
-            pressure.reasons.push("result_cap".to_string());
-            break;
-        }
-
-        let (preview, truncated, truncation_reasons) = truncate_text_to_budget(
-            &preview_section.section.content,
-            per_result_tokens,
-            per_result_bytes,
-        );
-        let preview_tokens = estimate_tokens(&preview);
-        let preview_bytes = preview.len();
-        let mut result_truncated = preview_section.truncated || truncated;
-        let mut result_reasons = preview_section.truncation_reasons.clone();
-        result_reasons.extend(truncation_reasons.clone());
-
-        if used_tokens + preview_tokens > options.max_tokens {
-            pressure.truncated = true;
-            pressure.reasons.push("token_cap".to_string());
-            break;
-        }
-        if used_bytes + preview_bytes > options.max_bytes {
-            pressure.truncated = true;
-            pressure.reasons.push("byte_cap".to_string());
-            break;
-        }
-
-        result_reasons.sort();
-        result_reasons.dedup();
-        result_truncated = result_truncated || !result_reasons.is_empty();
-
-        if result_truncated {
-            pressure.truncated = true;
-            pressure.reasons.extend(result_reasons.clone());
-        }
-
-        let handle = build_mcp_handle(&selection.query_label, raw_section);
-        let artifact = McpArtifact {
-            schema_version: MCP_SCHEMA_VERSION,
-            handle: handle.clone(),
-            query: selection.query_label.clone(),
-            source: build_mcp_source_ref(raw_section),
-            scores: McpScoreBreakdown {
-                bm25: raw_section.bm25_score,
-                canonicality: raw_section.canonicality,
-                combined: combined_section_score(raw_section),
-            },
-            preview: preview.clone(),
-            content: raw_section.content.clone(),
-            created_at: chrono_now(),
-        };
-        if let Err(err) = store_mcp_artifact(index_dir, &artifact) {
-            return Ok(McpSearchResponse {
-                schema_version: MCP_SCHEMA_VERSION,
-                tool: "search_context".to_string(),
-                query: selection.query_label.clone(),
-                selection_mode: selection_mode.clone(),
-                budget: McpSearchBudget {
-                    returned_results: results.len(),
-                    estimated_tokens: used_tokens,
-                    bytes: used_bytes,
-                    ..budget
-                },
-                pressure,
-                results,
-                error: Some("artifact_store_unavailable".to_string()),
-                message: Some(format!(
-                    "Unable to persist MCP handles for follow-up fetches: {err}"
-                )),
-                missing_files: Vec::new(),
-            });
-        }
-
-        results.push(McpSearchResult {
-            handle,
-            rank: rank + 1,
-            source: artifact.source.clone(),
-            scores: artifact.scores.clone(),
-            preview,
-            preview_tokens,
-            preview_bytes,
-            truncated: result_truncated,
-            truncation_reasons: result_reasons,
-        });
-
-        used_tokens += preview_tokens;
-        used_bytes += preview_bytes;
-    }
-
-    budget.returned_results = results.len();
-    budget.omitted_hits = unique_sections.len().saturating_sub(results.len());
-    budget.estimated_tokens = used_tokens;
-    budget.bytes = used_bytes;
-
-    if budget.omitted_hits > 0 && !pressure.reasons.iter().any(|reason| reason == "result_cap") {
-        pressure.truncated = true;
-        pressure.reasons.push("result_cap".to_string());
-    }
-    pressure.reasons.sort();
-    pressure.reasons.dedup();
-
-    Ok(McpSearchResponse {
-        schema_version: MCP_SCHEMA_VERSION,
-        tool: "search_context".to_string(),
-        query: selection.query_label,
-        selection_mode,
-        budget,
-        pressure,
-        results,
-        error: None,
-        message: None,
-        missing_files: Vec::new(),
-    })
-}
-
-fn cmd_mcp_search_context(
-    query: &str,
-    from_files: &[String],
-    index_dir: &Path,
-    options: McpSearchOptions,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let response = build_mcp_search_response(query, from_files, index_dir, options)?;
-    println!("{}", serde_json::to_string_pretty(&response)?);
-    Ok(())
-}
-
-fn build_mcp_fetch_response(
-    handle: &str,
-    index_dir: &Path,
-    options: McpFetchOptions,
-) -> Result<McpFetchResponse, Box<dyn std::error::Error>> {
-    let Ok(artifact) = load_mcp_artifact(index_dir, handle) else {
-        return Ok(McpFetchResponse {
-            schema_version: MCP_SCHEMA_VERSION,
-            tool: "fetch_context".to_string(),
-            handle: handle.to_string(),
-            budget: McpFetchBudget {
-                max_tokens: options.max_tokens,
-                max_bytes: options.max_bytes,
-                ..McpFetchBudget::default()
-            },
-            pressure: McpPressure::default(),
-            query: None,
-            result: None,
-            error: Some("unknown_handle".to_string()),
-            message: Some(format!(
-                "No stored MCP artifact found for handle '{handle}'. Run `yore mcp search-context` first."
-            )),
-        });
-    };
-
-    let (content, truncated, truncation_reasons) =
-        truncate_text_to_budget(&artifact.content, options.max_tokens, options.max_bytes);
-    let content_tokens = estimate_tokens(&content);
-    let content_bytes = content.len();
-
-    Ok(McpFetchResponse {
-        schema_version: MCP_SCHEMA_VERSION,
-        tool: "fetch_context".to_string(),
-        handle: handle.to_string(),
-        budget: McpFetchBudget {
-            max_tokens: options.max_tokens,
-            max_bytes: options.max_bytes,
-            estimated_tokens: content_tokens,
-            bytes: content_bytes,
-        },
-        pressure: McpPressure {
-            truncated,
-            reasons: truncation_reasons,
-        },
-        query: Some(artifact.query),
-        result: Some(McpFetchResult {
-            source: artifact.source,
-            scores: artifact.scores,
-            preview: artifact.preview,
-            content,
-            content_tokens,
-            content_bytes,
-        }),
-        error: None,
-        message: None,
-    })
-}
-
-fn cmd_mcp_fetch_context(
-    handle: &str,
-    index_dir: &Path,
-    options: McpFetchOptions,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let response = build_mcp_fetch_response(handle, index_dir, options)?;
-    println!("{}", serde_json::to_string_pretty(&response)?);
-    Ok(())
-}
-
-fn read_mcp_stdio_message<R: BufRead>(
-    reader: &mut R,
-) -> Result<Option<serde_json::Value>, io::Error> {
-    let mut content_length: Option<usize> = None;
-    let mut line = String::new();
-
-    loop {
-        line.clear();
-        let bytes_read = reader.read_line(&mut line)?;
-        if bytes_read == 0 {
-            if content_length.is_none() {
-                return Ok(None);
-            }
-            return Err(io::Error::new(
-                io::ErrorKind::UnexpectedEof,
-                "unexpected EOF while reading MCP message headers",
-            ));
-        }
-
-        if line == "\r\n" || line == "\n" {
-            break;
-        }
-
-        let header = line.trim_end_matches(['\r', '\n']);
-        if let Some((name, value)) = header.split_once(':') {
-            if name.eq_ignore_ascii_case("content-length") {
-                content_length = Some(value.trim().parse().map_err(|err| {
-                    io::Error::new(
-                        io::ErrorKind::InvalidData,
-                        format!("invalid Content-Length header: {err}"),
-                    )
-                })?);
-            }
-        }
-    }
-
-    let content_length = content_length.ok_or_else(|| {
-        io::Error::new(
-            io::ErrorKind::InvalidData,
-            "missing Content-Length header in MCP message",
-        )
-    })?;
-    let mut payload = vec![0; content_length];
-    reader.read_exact(&mut payload)?;
-    serde_json::from_slice(&payload)
-        .map(Some)
-        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
-}
-
-fn write_mcp_stdio_message<W: Write, T: Serialize>(
-    writer: &mut W,
-    payload: &T,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let body = serde_json::to_vec(payload)?;
-    write!(writer, "Content-Length: {}\r\n\r\n", body.len())?;
-    writer.write_all(&body)?;
-    writer.flush()?;
-    Ok(())
-}
-
-fn resolve_mcp_tool_index(default_index: &Path, requested_index: Option<PathBuf>) -> PathBuf {
-    requested_index.unwrap_or_else(|| default_index.to_path_buf())
-}
-
-fn mcp_tool_definitions() -> serde_json::Value {
-    serde_json::json!([
-        {
-            "name": "search_context",
-            "description": "Return bounded previews, source references, pressure metadata, and opaque handles for explicit follow-up fetches.",
-            "inputSchema": {
-                "type": "object",
-                "additionalProperties": false,
-                "properties": {
-                    "query": {
-                        "type": "string",
-                        "description": "Natural language query or question. Optional when from_files is provided."
-                    },
-                    "from_files": {
-                        "type": "array",
-                        "description": "Explicit indexed files to preview instead of a query. Supports @list.txt expansion.",
-                        "items": {
-                            "type": "string"
-                        },
-                        "minItems": 1
-                    },
-                    "max_results": {
-                        "type": "integer",
-                        "description": "Maximum preview results to return.",
-                        "minimum": 1,
-                        "default": 5
-                    },
-                    "max_tokens": {
-                        "type": "integer",
-                        "description": "Approximate maximum total tokens across previews.",
-                        "minimum": 1,
-                        "default": 1200
-                    },
-                    "max_bytes": {
-                        "type": "integer",
-                        "description": "Maximum total bytes across previews.",
-                        "minimum": 1,
-                        "default": 12000
-                    },
-                    "index": {
-                        "type": "string",
-                        "description": "Optional override for the index directory. Defaults to the server's configured index."
-                    }
-                },
-                "oneOf": [
-                    {
-                        "required": ["query"]
-                    },
-                    {
-                        "required": ["from_files"]
-                    }
-                ]
-            }
-        },
-        {
-            "name": "fetch_context",
-            "description": "Expand a previously returned opaque handle with its own token and byte caps.",
-            "inputSchema": {
-                "type": "object",
-                "additionalProperties": false,
-                "properties": {
-                    "handle": {
-                        "type": "string",
-                        "description": "Opaque ctx_... handle returned by search_context."
-                    },
-                    "max_tokens": {
-                        "type": "integer",
-                        "description": "Approximate maximum tokens for fetched content.",
-                        "minimum": 1,
-                        "default": 4000
-                    },
-                    "max_bytes": {
-                        "type": "integer",
-                        "description": "Maximum bytes for fetched content.",
-                        "minimum": 1,
-                        "default": 20000
-                    },
-                    "index": {
-                        "type": "string",
-                        "description": "Optional override for the index directory. Defaults to the server's configured index."
-                    }
-                },
-                "required": ["handle"]
-            }
-        }
-    ])
-}
-
-fn build_mcp_tool_result<T: Serialize>(
-    payload: &T,
-    is_error: bool,
-) -> Result<serde_json::Value, Box<dyn std::error::Error>> {
-    Ok(serde_json::json!({
-        "content": [
-            {
-                "type": "text",
-                "text": serde_json::to_string(payload)?,
-            }
-        ],
-        "structuredContent": serde_json::to_value(payload)?,
-        "isError": is_error,
-    }))
-}
-
-fn json_rpc_success(id: serde_json::Value, result: serde_json::Value) -> serde_json::Value {
-    serde_json::json!({
-        "jsonrpc": "2.0",
-        "id": id,
-        "result": result,
-    })
-}
-
-fn json_rpc_error(id: Option<serde_json::Value>, code: i64, message: &str) -> serde_json::Value {
-    serde_json::json!({
-        "jsonrpc": "2.0",
-        "id": id.unwrap_or(serde_json::Value::Null),
-        "error": {
-            "code": code,
-            "message": message,
-        }
-    })
-}
-
-fn cmd_mcp_serve(index_dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
-    let stdin = io::stdin();
-    let stdout = io::stdout();
-    let mut reader = stdin.lock();
-    let mut writer = stdout.lock();
-
-    loop {
-        let Some(message) = read_mcp_stdio_message(&mut reader)? else {
-            break;
-        };
-        let request: JsonRpcRequest = match serde_json::from_value(message) {
-            Ok(request) => request,
-            Err(err) => {
-                let response = json_rpc_error(None, -32600, &format!("Invalid request: {err}"));
-                write_mcp_stdio_message(&mut writer, &response)?;
-                continue;
-            }
-        };
-
-        if request.jsonrpc.as_deref() != Some("2.0") {
-            if let Some(id) = request.id {
-                let response = json_rpc_error(Some(id), -32600, "Only JSON-RPC 2.0 is supported.");
-                write_mcp_stdio_message(&mut writer, &response)?;
-            }
-            continue;
-        }
-
-        let response = match request.method.as_str() {
-            "initialize" => {
-                let params: McpInitializeParams = serde_json::from_value(request.params)
-                    .unwrap_or_else(|_| McpInitializeParams::default());
-                let protocol_version = params
-                    .protocol_version
-                    .unwrap_or_else(|| DEFAULT_MCP_PROTOCOL_VERSION.to_string());
-                request.id.map(|id| {
-                    json_rpc_success(
-                        id,
-                        serde_json::json!({
-                            "protocolVersion": protocol_version,
-                            "capabilities": {
-                                "tools": {
-                                    "listChanged": false
-                                }
-                            },
-                            "serverInfo": {
-                                "name": "yore",
-                                "version": env!("CARGO_PKG_VERSION")
-                            },
-                            "instructions": "Use search_context for bounded previews and fetch_context only for explicit follow-up expansion.",
-                        }),
-                    )
-                })
-            }
-            "notifications/initialized" | "notifications/cancelled" => None,
-            "ping" => request
-                .id
-                .map(|id| json_rpc_success(id, serde_json::json!({}))),
-            "tools/list" => request.id.map(|id| {
-                json_rpc_success(
-                    id,
-                    serde_json::json!({
-                        "tools": mcp_tool_definitions(),
-                    }),
-                )
-            }),
-            "tools/call" => {
-                let id = request.id.clone();
-                match serde_json::from_value::<McpToolCallParams>(request.params) {
-                    Ok(McpToolCallParams { name, arguments }) => match name.as_str() {
-                        "search_context" => {
-                            match serde_json::from_value::<McpSearchToolArgs>(arguments) {
-                                Ok(args) => {
-                                    let tool_index = resolve_mcp_tool_index(index_dir, args.index);
-                                    let response = build_mcp_search_response(
-                                        args.query.trim(),
-                                        &args.from_files,
-                                        &tool_index,
-                                        McpSearchOptions {
-                                            max_results: args.max_results,
-                                            max_tokens: args.max_tokens,
-                                            max_bytes: args.max_bytes,
-                                        },
-                                    )?;
-                                    let result =
-                                        build_mcp_tool_result(&response, response.error.is_some())?;
-                                    id.map(|id| json_rpc_success(id, result))
-                                }
-                                Err(err) => id.map(|id| {
-                                    json_rpc_error(
-                                        Some(id),
-                                        -32602,
-                                        &format!("Invalid search_context arguments: {err}"),
-                                    )
-                                }),
-                            }
-                        }
-                        "fetch_context" => {
-                            match serde_json::from_value::<McpFetchToolArgs>(arguments) {
-                                Ok(args) => {
-                                    let tool_index = resolve_mcp_tool_index(index_dir, args.index);
-                                    let response = build_mcp_fetch_response(
-                                        args.handle.trim(),
-                                        &tool_index,
-                                        McpFetchOptions {
-                                            max_tokens: args.max_tokens,
-                                            max_bytes: args.max_bytes,
-                                        },
-                                    )?;
-                                    let result =
-                                        build_mcp_tool_result(&response, response.error.is_some())?;
-                                    id.map(|id| json_rpc_success(id, result))
-                                }
-                                Err(err) => id.map(|id| {
-                                    json_rpc_error(
-                                        Some(id),
-                                        -32602,
-                                        &format!("Invalid fetch_context arguments: {err}"),
-                                    )
-                                }),
-                            }
-                        }
-                        _ => id.map(|id| {
-                            json_rpc_error(Some(id), -32602, &format!("Unknown tool '{name}'."))
-                        }),
-                    },
-                    Err(err) => id.map(|id| {
-                        json_rpc_error(
-                            Some(id),
-                            -32602,
-                            &format!("Invalid tools/call params: {err}"),
-                        )
-                    }),
-                }
-            }
-            _ => request.id.map(|id| {
-                json_rpc_error(
-                    Some(id),
-                    -32601,
-                    &format!("Method '{}' is not supported.", request.method),
-                )
-            }),
-        };
-
-        if let Some(response) = response {
-            write_mcp_stdio_message(&mut writer, &response)?;
-        }
-    }
-
-    Ok(())
-}
-
-/// Main assemble command handler
-fn cmd_assemble(
-    query: &str,
-    from_files: &[String],
-    options: &AssembleOptions,
-    index_dir: &Path,
-) -> Result<(), Box<dyn std::error::Error>> {
-    if options.format != "markdown" {
-        return Err("Only markdown format is supported currently".into());
-    }
-
-    let forward_index = load_forward_index(index_dir)?;
-    let selection =
-        match collect_context_selection(query, from_files, &forward_index, options.max_sections) {
-            Ok(selection) => selection,
-            Err(ContextSelectionIssue::NoSearchableTerms) => {
-                println!("# No searchable terms in query. Try different keywords.");
-                return Ok(());
-            }
-            Err(ContextSelectionIssue::MissingFiles(missing)) => {
-                eprintln!(
-                    "{}",
-                    "Some files were not found in the index (they may be missing or excluded):"
-                        .yellow()
-                );
-                for path in missing {
-                    eprintln!("  - {path}");
-                }
-                return Ok(());
-            }
-            Err(ContextSelectionIssue::NoIndexedFilesMatched) => {
-                println!("# No indexed files matched the provided inputs.");
-                return Ok(());
-            }
-            Err(ContextSelectionIssue::NoRelevantSections(label)) => {
-                println!("# No relevant sections found for query: \"{label}\"");
-                return Ok(());
-            }
-        };
-    let query_label = selection.query_label;
-    let query_for_refiner = selection.query_for_refiner;
-    let primary_sections = selection.sections;
-
-    let primary_tokens: usize = primary_sections
-        .iter()
-        .map(|s| estimate_tokens(&s.content))
-        .sum();
-
-    // Phase 2: Cross-reference expansion (if depth > 0)
-    let mut all_sections = primary_sections.clone();
-
-    if options.depth > 0 {
-        // Calculate xref token budget
-        const XREF_TOKEN_FRACTION: f64 = 0.3;
-        const XREF_TOKEN_ABS_MAX: usize = 2000;
-
-        let xref_cap =
-            ((options.max_tokens as f64 * XREF_TOKEN_FRACTION) as usize).min(XREF_TOKEN_ABS_MAX);
-        let remaining_tokens = options.max_tokens.saturating_sub(primary_tokens);
-        let xref_token_budget = remaining_tokens.min(xref_cap);
-
-        let primary_docs: HashSet<String> = primary_sections
-            .iter()
-            .map(|s| s.doc_path.clone())
-            .collect();
-
-        if options.use_relations {
-            // Graph-aware expansion via persisted relation edges
-            let relation_index = load_relation_index(index_dir);
-            if !relation_index.edges.is_empty() && xref_token_budget > 0 {
-                let xref_sections = resolve_crossrefs_from_relations(
-                    &relation_index,
-                    &primary_docs,
-                    &forward_index,
-                    xref_token_budget,
-                );
-                all_sections.extend(xref_sections);
-            }
-        } else {
-            // Legacy on-the-fly cross-reference expansion
-            let adr_index = build_adr_index(&forward_index);
-            let crossrefs = collect_crossrefs(&primary_sections, &adr_index);
-
-            if xref_token_budget > 0 && !crossrefs.is_empty() {
-                let xref_sections =
-                    resolve_crossrefs(&crossrefs, &primary_docs, &forward_index, xref_token_budget);
-                all_sections.extend(xref_sections);
-            }
-        }
-    }
-    let (all_sections, _) = dedupe_section_matches(all_sections);
-
-    // Phase 3: Extractive refinement (increase signal density)
-    let max_tokens_per_section = options.max_tokens / all_sections.len().max(1);
-    let refined_sections =
-        apply_extractive_refiner(all_sections, &query_for_refiner, max_tokens_per_section);
-
-    // If doc_terms requested, prepend a source summary
-    if options.doc_terms > 0 {
-        println!("<!-- Source Documents -->");
-        let query_terms = if query_for_refiner.is_empty() {
-            Vec::new()
-        } else {
-            parse_query_terms(&query_for_refiner, true)
-        };
-        let mut seen_docs: HashSet<String> = HashSet::new();
-
-        for section in &refined_sections {
-            if seen_docs.contains(&section.section.doc_path) {
-                continue;
-            }
-            seen_docs.insert(section.section.doc_path.clone());
-
-            if let Some(entry) = forward_index.files.get(&section.section.doc_path) {
-                let top_terms = get_top_doc_terms(
-                    entry,
-                    &forward_index.idf_map,
-                    &query_terms,
-                    options.doc_terms,
-                );
-                if !top_terms.is_empty() {
-                    println!(
-                        "<!-- {} : {} -->",
-                        section.section.doc_path,
-                        top_terms.join(", ")
-                    );
-                }
-            }
-        }
-        println!();
-    }
-
-    // Phase 4: Distill to markdown
-    let digest_sections: Vec<SectionMatch> = refined_sections
-        .iter()
-        .map(|section| section.section.clone())
-        .collect();
-    let digest = distill_to_markdown(&digest_sections, &query_label, options.max_tokens);
-
-    println!("{digest}");
-
-    Ok(())
-}
-
-/// Evaluation command handler - runs retrieval pipeline against test questions
-fn cmd_eval(
-    questions_path: &Path,
-    index_dir: &Path,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    // Load questions from JSONL file
-    let questions_content = fs::read_to_string(questions_path)?;
-    let questions: Vec<Question> = questions_content
-        .lines()
-        .filter(|line| !line.trim().is_empty())
-        .map(serde_json::from_str)
-        .collect::<Result<Vec<_>, _>>()?;
-
-    if questions.is_empty() {
-        if json {
-            println!(
-                r#"{{"questions_file": "{}", "total_questions": 0, "error": "No questions found"}}"#,
-                questions_path.display()
-            );
-        } else {
-            println!("No questions found in {}", questions_path.display());
-        }
-        return Ok(());
-    }
-
-    // Load index once
-    let forward_index = load_forward_index(index_dir)?;
-
-    // Run evaluation for each question
-    let mut results = Vec::new();
-
-    for question in &questions {
-        // Run assemble internally (capture output as string)
-        let primary_sections = search_relevant_sections(&question.q, &forward_index, 20);
-
-        if primary_sections.is_empty() {
-            results.push(EvalResult {
-                id: question.id,
-                question: question.q.clone(),
-                hits: 0,
-                total: question.expect.len(),
-                passed: false,
-                tokens: 0,
-            });
-            continue;
-        }
-
-        let primary_tokens: usize = primary_sections
-            .iter()
-            .map(|s| estimate_tokens(&s.content))
-            .sum();
-
-        // Cross-reference expansion
-        let mut all_sections = primary_sections.clone();
-        let adr_index = build_adr_index(&forward_index);
-        let crossrefs = collect_crossrefs(&primary_sections, &adr_index);
-
-        const XREF_TOKEN_FRACTION: f64 = 0.3;
-        const XREF_TOKEN_ABS_MAX: usize = 2000;
-        let max_tokens: usize = 8000; // Default for eval
-
-        let xref_cap = ((max_tokens as f64 * XREF_TOKEN_FRACTION) as usize).min(XREF_TOKEN_ABS_MAX);
-        let remaining_tokens = max_tokens.saturating_sub(primary_tokens);
-        let xref_token_budget = remaining_tokens.min(xref_cap);
-
-        if xref_token_budget > 0 && !crossrefs.is_empty() {
-            let primary_docs: HashSet<String> = primary_sections
-                .iter()
-                .map(|s| s.doc_path.clone())
-                .collect();
-
-            let xref_sections =
-                resolve_crossrefs(&crossrefs, &primary_docs, &forward_index, xref_token_budget);
-
-            all_sections.extend(xref_sections);
-        }
-
-        // Extractive refinement
-        let max_tokens_per_section = max_tokens / all_sections.len().max(1);
-        let refined_sections =
-            apply_extractive_refiner(all_sections, &question.q, max_tokens_per_section);
-
-        // Distill to markdown
-        let digest_sections: Vec<SectionMatch> = refined_sections
-            .iter()
-            .map(|section| section.section.clone())
-            .collect();
-        let digest = distill_to_markdown(&digest_sections, &question.q, max_tokens);
-
-        // Check coverage of expected substrings
-        let digest_lower = digest.to_lowercase();
-        let hits = question
-            .expect
-            .iter()
-            .filter(|e| digest_lower.contains(&e.to_lowercase()))
-            .count();
-
-        let min_hits = question.min_hits.unwrap_or(question.expect.len());
-        let passed = hits >= min_hits;
-        let tokens = estimate_tokens(&digest);
-
-        results.push(EvalResult {
-            id: question.id,
-            question: question.q.clone(),
-            hits,
-            total: question.expect.len(),
-            passed,
-            tokens,
-        });
-    }
-
-    // Calculate summary
-    let passed_count = results.iter().filter(|r| r.passed).count();
-    let total = results.len();
-    let pass_rate_pct = passed_count as f64 / total as f64 * 100.0;
-
-    if json {
-        let json_results: Vec<EvalQuestionResult> = results
-            .iter()
-            .map(|r| {
-                let expected: Vec<String> = questions
-                    .iter()
-                    .find(|q| q.id == r.id)
-                    .map(|q| q.expect.clone())
-                    .unwrap_or_default();
-                let found: Vec<String> = expected
-                    .iter()
-                    .filter(|e| r.question.to_lowercase().contains(&e.to_lowercase()))
-                    .cloned()
-                    .collect();
-                let missing: Vec<String> = expected
-                    .iter()
-                    .filter(|e| !r.question.to_lowercase().contains(&e.to_lowercase()))
-                    .cloned()
-                    .collect();
-                EvalQuestionResult {
-                    question: r.question.clone(),
-                    passed: r.passed,
-                    expected,
-                    found,
-                    missing,
-                }
-            })
-            .collect();
-
-        let output = EvalJsonResult {
-            questions_file: questions_path.to_string_lossy().to_string(),
-            total_questions: total,
-            passed: passed_count,
-            failed: total - passed_count,
-            pass_rate: pass_rate_pct,
-            results: json_results,
-        };
-        println!("{}", serde_json::to_string_pretty(&output)?);
-        return Ok(());
-    }
-
-    // Print results (human-readable)
-    println!("\n{}", "Evaluation Results".cyan().bold());
-    println!("{}", "=".repeat(60));
-    println!();
-
-    for result in &results {
-        let status = if result.passed {
-            "✓".green().bold()
-        } else {
-            "✗".red().bold()
-        };
-
-        println!("[{}] {}", result.id, result.question.white().bold());
-        println!("  - hits: {}/{} {}", result.hits, result.total, status);
-        println!("  - size: {} tokens", result.tokens);
-        println!();
-    }
-
-    // Print summary
-    println!("{}", "=".repeat(60));
-    println!("{}", "Summary".cyan().bold());
-    println!("  Passed: {passed_count}/{total} ({pass_rate_pct:.0}%)");
-    println!("  Failed: {}/{}", total - passed_count, total);
-    println!();
-
-    if passed_count < total {
-        println!("{}", "Failed Questions:".yellow().bold());
-        for result in &results {
-            if !result.passed {
-                println!(
-                    "  - [{}] {} (hits: {}/{})",
-                    result.id, result.question, result.hits, result.total
-                );
-            }
-        }
-        println!();
-    }
-
-    Ok(())
-}
-
-/// Core link checking engine used by both `check` and `check-links`.
-/// Returns a structured `LinkCheckResult` without printing.
-fn run_link_check(
-    index_dir: &Path,
-    root: Option<&Path>,
-    include_summary: bool,
-    summary_only: bool,
-    external_paths: &[String],
-) -> Result<LinkCheckResult, Box<dyn std::error::Error>> {
-    // Load the forward index
-    let forward_index = load_forward_index(index_dir)?;
-
-    // Determine root directory for resolving relative paths
-    let root_dir = if let Some(r) = root {
-        r.to_path_buf()
-    } else if let Some(source_root) = forward_index_source_root(&forward_index) {
-        source_root
-    } else {
-        // Extract root from index by finding common prefix of all paths
-        if let Some((first_path, _)) = forward_index.files.iter().next() {
-            let first_path = Path::new(first_path);
-            if let Some(parent) = first_path.parent() {
-                // Walk up to find the common root
-                let mut candidate = parent.to_path_buf();
-                while candidate.parent().is_some() {
-                    let parent_path = candidate.parent().unwrap();
-                    // Check if this is the common root by checking if it contains "docs"
-                    if candidate.file_name().and_then(|s| s.to_str()) == Some("docs") {
-                        break;
-                    }
-                    candidate = parent_path.to_path_buf();
-                }
-                candidate.parent().unwrap_or(Path::new(".")).to_path_buf()
-            } else {
-                Path::new(".").to_path_buf()
-            }
-        } else {
-            Path::new(".").to_path_buf()
-        }
-    };
-
-    // Build file set for fast lookup (keys of the HashMap)
-    let file_set: HashSet<String> = forward_index.files.keys().cloned().collect();
-
-    // Build heading index for anchor validation
-    let mut heading_index: HashMap<String, HashSet<String>> = HashMap::new();
-    for (path, entry) in &forward_index.files {
-        let mut anchors = HashSet::new();
-        for heading in &entry.headings {
-            // Convert heading text to anchor format (lowercase, replace spaces with hyphens)
-            let anchor = heading.text.to_lowercase().replace(' ', "-");
-            anchors.insert(anchor);
-        }
-        heading_index.insert(path.clone(), anchors);
-    }
-
-    let mut broken_links = Vec::new();
-    let mut total_links = 0;
-
-    // Cache file lines for context snippets
-    let mut file_lines_cache: HashMap<String, Vec<String>> = HashMap::new();
-
-    // Summary accumulators
-    let mut counts_by_file: HashMap<String, HashMap<String, usize>> = HashMap::new();
-    let mut counts_by_kind: HashMap<String, usize> = HashMap::new();
-
-    // Iterate through all files and check their links
-    for (file_path, entry) in &forward_index.files {
-        for link in &entry.links {
-            total_links += 1;
-
-            let target = &link.target;
-
-            // Skip external links (http://, https://, mailto:, etc.)
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            // Parse link to separate file path and anchor
-            let (link_path, anchor) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            let line_number = link.line;
-
-            // Resolve relative path
-            let resolved_path = if link_path.is_empty() {
-                // Just an anchor in the current file
-                file_path.clone()
-            } else if let Some(stripped) = link_path.strip_prefix('/') {
-                // Absolute path from root
-                root_dir.join(stripped).to_string_lossy().to_string()
-            } else {
-                // Relative path
-                let source_path = Path::new(file_path);
-                if let Some(parent) = source_path.parent() {
-                    parent.join(&link_path).to_string_lossy().to_string()
-                } else {
-                    link_path.clone()
-                }
-            };
-
-            // Normalize path (remove ./ and resolve ../)
-            let normalized_path = normalize_path(Path::new(&resolved_path));
-
-            // Placeholder targets: treat as lower-severity broken links
-            if !link_path.is_empty() && is_placeholder_target(&link_path) {
-                let context = get_link_context(&mut file_lines_cache, file_path, line_number)?;
-                let kind = LinkKind::Placeholder;
-                record_link_kind(&mut counts_by_file, &mut counts_by_kind, file_path, &kind);
-                broken_links.push(BrokenLink {
-                    source_file: file_path.clone(),
-                    line_number,
-                    link_text: link.text.clone(),
-                    link_target: target.clone(),
-                    error: format!("Placeholder link target: {link_path}"),
-                    anchor: anchor.clone(),
-                    context,
-                });
-                continue;
-            }
-
-            // File-level checks only when there is an explicit path component
-            if !link_path.is_empty() {
-                let meta = fs::metadata(&normalized_path).ok();
-                let exists = meta.is_some();
-                let is_dir = meta.as_ref().is_some_and(std::fs::Metadata::is_dir);
-
-                if exists && is_dir {
-                    // Valid directory reference
-                    record_link_kind(
-                        &mut counts_by_file,
-                        &mut counts_by_kind,
-                        file_path,
-                        &LinkKind::DirectoryReference,
-                    );
-                } else if exists {
-                    // File exists on disk but may not be indexed (e.g., code)
-                    if !file_set.contains(&normalized_path) {
-                        let ext = file_extension(&normalized_path);
-                        let kind = if is_code_extension(&ext) {
-                            LinkKind::CodeReference
-                        } else {
-                            LinkKind::ExternalReference
-                        };
-                        record_link_kind(
-                            &mut counts_by_file,
-                            &mut counts_by_kind,
-                            file_path,
-                            &kind,
-                        );
-                    }
-                } else {
-                    // File not found locally - check external repos
-                    let mut found_in_external = false;
-                    for ext_path in external_paths {
-                        // Check if the link might be pointing to an external repo
-                        // by seeing if the normalized path contains the external path pattern
-                        if normalized_path.contains(ext_path) {
-                            // The link references an external repo path, try to resolve it
-                            if Path::new(&normalized_path).exists() {
-                                found_in_external = true;
-                                record_link_kind(
-                                    &mut counts_by_file,
-                                    &mut counts_by_kind,
-                                    file_path,
-                                    &LinkKind::ExternalReference,
-                                );
-                                break;
-                            }
-                        }
-                        // Also check if it's a relative path that would resolve to external repo
-                        let resolved_ext = Path::new(ext_path)
-                            .join(Path::new(&link_path).file_name().unwrap_or_default());
-                        if resolved_ext.exists() {
-                            found_in_external = true;
-                            record_link_kind(
-                                &mut counts_by_file,
-                                &mut counts_by_kind,
-                                file_path,
-                                &LinkKind::ExternalReference,
-                            );
-                            break;
-                        }
-                    }
-
-                    if found_in_external {
-                        continue;
-                    }
-
-                    // Missing target file: classify as doc_missing or code_missing
-                    let ext = file_extension(&normalized_path);
-                    let kind = if is_code_extension(&ext) {
-                        LinkKind::CodeMissing
-                    } else {
-                        LinkKind::DocMissing
-                    };
-                    let context = get_link_context(&mut file_lines_cache, file_path, line_number)?;
-                    record_link_kind(&mut counts_by_file, &mut counts_by_kind, file_path, &kind);
-                    broken_links.push(BrokenLink {
-                        source_file: file_path.clone(),
-                        line_number,
-                        link_text: link.text.clone(),
-                        link_target: target.clone(),
-                        error: format!("Target file not found: {normalized_path}"),
-                        anchor: anchor.clone(),
-                        context,
-                    });
-                    continue;
-                }
-            }
-
-            // Check anchor if present
-            if let Some(ref anchor_text) = anchor {
-                let target_file = if link_path.is_empty() {
-                    file_path
-                } else {
-                    &normalized_path
-                };
-
-                if let Some(anchors) = heading_index.get(target_file) {
-                    if !anchors.contains(anchor_text as &str) {
-                        let context =
-                            get_link_context(&mut file_lines_cache, file_path, line_number)?;
-                        let kind = LinkKind::AnchorMissing;
-                        record_link_kind(
-                            &mut counts_by_file,
-                            &mut counts_by_kind,
-                            file_path,
-                            &kind,
-                        );
-                        broken_links.push(BrokenLink {
-                            source_file: file_path.clone(),
-                            line_number,
-                            link_text: link.text.clone(),
-                            link_target: target.clone(),
-                            error: format!("Anchor not found: #{anchor_text}"),
-                            anchor: Some(anchor_text.clone()),
-                            context,
-                        });
-                    }
-                } else {
-                    let context = get_link_context(&mut file_lines_cache, file_path, line_number)?;
-                    let kind = LinkKind::AnchorUnverified;
-                    record_link_kind(&mut counts_by_file, &mut counts_by_kind, file_path, &kind);
-                    broken_links.push(BrokenLink {
-                        source_file: file_path.clone(),
-                        line_number,
-                        link_text: link.text.clone(),
-                        link_target: target.clone(),
-                        error: format!(
-                            "Could not verify anchor (file has no headings): #{anchor_text}"
-                        ),
-                        anchor: Some(anchor_text.clone()),
-                        context,
-                    });
-                }
-            }
-        }
-    }
-
-    let valid_links = total_links - broken_links.len();
-
-    let mut result = LinkCheckResult {
-        total_links,
-        valid_links,
-        broken_links: broken_links.len(),
-        broken: broken_links.clone(),
-        summary: None,
-    };
-
-    // Build summary if requested
-    if include_summary || summary_only {
-        let mut by_file_vec: Vec<LinkSummaryByFile> = counts_by_file
-            .into_iter()
-            .map(|(file, counts)| LinkSummaryByFile { file, counts })
-            .collect();
-        by_file_vec.sort_by(|a, b| a.file.cmp(&b.file));
-
-        let mut by_kind_vec: Vec<LinkSummaryByKind> = counts_by_kind
-            .into_iter()
-            .map(|(kind, count)| LinkSummaryByKind { kind, count })
-            .collect();
-        by_kind_vec.sort_by(|a, b| a.kind.cmp(&b.kind));
-
-        result.summary = Some(LinkCheckSummary {
-            by_file: by_file_vec,
-            by_kind: by_kind_vec,
-        });
-    }
-
-    Ok(result)
-}
-
-/// User-facing link check command that prints results.
-fn cmd_check_links(
-    index_dir: &Path,
-    json: bool,
-    root: Option<&Path>,
-    summary_flag: bool,
-    summary_only: bool,
-    external_paths: &[String],
-) -> Result<(), Box<dyn std::error::Error>> {
-    let include_summary = summary_flag || summary_only || !json;
-    let result = run_link_check(
-        index_dir,
-        root,
-        include_summary,
-        summary_only,
-        external_paths,
-    )?;
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    // Recompute root directory for display purposes only
-    let forward_index = load_forward_index(index_dir)?;
-    let display_root = if let Some(r) = root {
-        r.to_path_buf()
-    } else if let Some((first_path, _)) = forward_index.files.iter().next() {
-        let first_path = Path::new(first_path);
-        first_path.parent().unwrap_or(Path::new(".")).to_path_buf()
-    } else {
-        Path::new(".").to_path_buf()
-    };
-
-    println!(
-        "{} {}",
-        "Checking links in".cyan().bold(),
-        display_root.display()
-    );
-    println!();
-
-    println!("{}", "Link Check Results".cyan().bold());
-    println!("{}", "=".repeat(60));
-    println!();
-    println!("Total links:  {}", result.total_links);
-    println!(
-        "Valid links:  {} {}",
-        result.valid_links,
-        "✓".green().bold()
-    );
-    println!(
-        "Broken links: {} {}",
-        result.broken_links,
-        if result.broken_links == 0 {
-            "✓".green().bold().to_string()
-        } else {
-            "✗".red().bold().to_string()
-        }
-    );
-    println!();
-
-    if let Some(summary) = &result.summary {
-        println!("{}", "Summary by kind:".cyan().bold());
-        for item in &summary.by_kind {
-            println!("  - {:<18} {}", item.kind, item.count);
-        }
-        println!();
-    }
-
-    if !summary_only && !result.broken.is_empty() {
-        println!("{}", "Broken Links:".red().bold());
-        println!();
-
-        for (idx, link) in result.broken.iter().enumerate() {
-            println!("[{}] {}", idx + 1, link.source_file.white().bold());
-            println!("    Link: [{}]({})", link.link_text, link.link_target);
-            if link.line_number > 0 {
-                println!("    Line: {}", link.line_number);
-            }
-            if let Some(ref ctx) = link.context {
-                println!("    Context: {ctx}");
-            }
-            println!("    Error: {}", link.error.red());
-            println!();
-        }
-    }
-
-    Ok(())
-}
-
-/// Load a single-line context snippet for a link location.
-fn get_link_context(
-    cache: &mut HashMap<String, Vec<String>>,
-    file_path: &str,
-    line_number: usize,
-) -> Result<Option<String>, Box<dyn std::error::Error>> {
-    if line_number == 0 {
-        return Ok(None);
-    }
-
-    // Load and cache file lines if needed
-    if !cache.contains_key(file_path) {
-        let content = fs::read_to_string(file_path)?;
-        let lines: Vec<String> = content
-            .lines()
-            .map(std::string::ToString::to_string)
-            .collect();
-        cache.insert(file_path.to_string(), lines);
-    }
-
-    let lines = cache.get(file_path).unwrap();
-    if line_number == 0 || line_number > lines.len() {
-        return Ok(None);
-    }
-
-    let mut line = lines[line_number - 1].clone();
-    if line.len() > 160 {
-        line.truncate(157);
-        line.push_str("...");
-    }
-
-    Ok(Some(line))
-}
-
-fn load_policy_config(path: &Path) -> Result<PolicyConfig, Box<dyn std::error::Error>> {
-    let content = fs::read_to_string(path)?;
-    let cfg: PolicyConfig = serde_yaml::from_str(&content)?;
-    Ok(cfg)
-}
-
-fn rule_severity(rule: &PolicyRule) -> String {
-    rule.severity.as_deref().unwrap_or("error").to_string()
-}
-
-fn rule_name(rule: &PolicyRule) -> String {
-    rule.name.clone().unwrap_or_else(|| rule.pattern.clone())
-}
-
-#[derive(Debug)]
-struct PolicySection {
-    heading: String,
-    line_start: usize,
-    line_end: usize,
-}
-
-fn parse_policy_sections(content: &str) -> Vec<PolicySection> {
-    let lines: Vec<&str> = content.lines().collect();
-    if lines.is_empty() {
-        return Vec::new();
-    }
-
-    let heading_re = Regex::new(r"^(#{1,6})\s+(.+)$").unwrap();
-    let mut sections: Vec<PolicySection> = Vec::new();
-    let mut current: Option<PolicySection> = None;
-
-    for (idx, line) in lines.iter().enumerate() {
-        if let Some(caps) = heading_re.captures(line) {
-            let heading = caps
-                .get(2)
-                .map_or_else(|| "Untitled".to_string(), |m| m.as_str().trim().to_string());
-
-            if let Some(mut prev) = current.take() {
-                if idx > 0 {
-                    prev.line_end = idx;
-                }
-                sections.push(prev);
-            }
-
-            current = Some(PolicySection {
-                heading,
-                line_start: idx + 1,
-                line_end: lines.len(),
-            });
-        }
-    }
-
-    if let Some(mut last) = current {
-        last.line_end = lines.len();
-        sections.push(last);
-    }
-
-    if sections.is_empty() {
-        sections.push(PolicySection {
-            heading: "Full Document".to_string(),
-            line_start: 1,
-            line_end: lines.len(),
-        });
-    }
-
-    sections
-}
-
-#[derive(Debug)]
-struct LinkTarget {
-    path: String,
-    anchor: Option<String>,
-}
-
-fn extract_markdown_link_targets(file_path: &str, content: &str) -> Vec<LinkTarget> {
-    let mut targets = Vec::new();
-    let link_regex = Regex::new(r"(!?)\[(?P<label>[^\]]+)\]\((?P<target>[^)]+)\)").unwrap();
-
-    let origin_dir = Path::new(file_path)
-        .parent()
-        .unwrap_or_else(|| Path::new("."));
-
-    for caps in link_regex.captures_iter(content) {
-        if caps.get(1).is_some_and(|m| m.as_str() == "!") {
-            continue;
-        }
-
-        let target_str = match caps.name("target") {
-            Some(t) => t.as_str(),
-            None => continue,
-        };
-
-        if target_str.starts_with("http://")
-            || target_str.starts_with("https://")
-            || target_str.starts_with("mailto:")
-            || target_str.starts_with("ftp://")
-        {
-            continue;
-        }
-
-        let (path_part, anchor) = if let Some(hash_pos) = target_str.find('#') {
-            (
-                &target_str[..hash_pos],
-                Some(target_str[hash_pos + 1..].to_string()),
-            )
-        } else {
-            (target_str, None)
-        };
-
-        if path_part.is_empty() {
-            continue;
-        }
-
-        let lc = path_part.to_ascii_lowercase();
-        if !lc.ends_with(".md") && !lc.ends_with(".txt") && !lc.ends_with(".rst") {
-            continue;
-        }
-
-        let target_path = if let Some(stripped) = path_part.strip_prefix('/') {
-            PathBuf::from(stripped)
-        } else {
-            origin_dir.join(path_part)
-        };
-
-        let normalized = normalize_path(&target_path);
-        targets.push(LinkTarget {
-            path: normalized,
-            anchor,
-        });
-    }
-
-    targets
-}
-
-fn normalize_required_link(file_path: &str, required: &str) -> (String, Option<String>) {
-    let (path_part, anchor) = if let Some(hash_pos) = required.find('#') {
-        (
-            &required[..hash_pos],
-            Some(required[hash_pos + 1..].to_string()),
-        )
-    } else {
-        (required, None)
-    };
-
-    if path_part.starts_with("http://")
-        || path_part.starts_with("https://")
-        || path_part.starts_with("mailto:")
-        || path_part.starts_with("ftp://")
-    {
-        return (required.to_string(), anchor);
-    }
-
-    let path_part = path_part.trim_start_matches("./");
-    let resolved = if path_part.is_empty() {
-        PathBuf::from(file_path)
-    } else if path_part.starts_with("../") {
-        let origin_dir = Path::new(file_path)
-            .parent()
-            .unwrap_or_else(|| Path::new("."));
-        origin_dir.join(path_part)
-    } else if path_part.starts_with('/') || path_part.contains('/') {
-        PathBuf::from(path_part.trim_start_matches('/'))
-    } else {
-        let origin_dir = Path::new(file_path)
-            .parent()
-            .unwrap_or_else(|| Path::new("."));
-        origin_dir.join(path_part)
-    };
-
-    (normalize_path(&resolved), anchor)
-}
-
-fn collect_policy_violations_for_content(
-    rule: &PolicyRule,
-    file_path: &str,
-    content: &str,
-) -> Vec<PolicyViolation> {
-    let mut violations = Vec::new();
-
-    // Required substrings
-    for needle in &rule.must_contain {
-        if !content.contains(needle) {
-            violations.push(PolicyViolation {
-                file: file_path.to_string(),
-                rule: rule_name(rule),
-                message: format!("Missing required content: {needle:?}"),
-                severity: rule_severity(rule),
-                kind: "policy_violation".to_string(),
-            });
-        }
-    }
-
-    // Forbidden substrings
-    for needle in &rule.must_not_contain {
-        if content.contains(needle) {
-            violations.push(PolicyViolation {
-                file: file_path.to_string(),
-                rule: rule_name(rule),
-                message: format!("Forbidden content present: {needle:?}"),
-                severity: rule_severity(rule),
-                kind: "policy_violation".to_string(),
-            });
-        }
-    }
-
-    // Length-based checks (line count)
-    let line_count = content.lines().count();
-    if let Some(min_len) = rule.min_length {
-        if line_count < min_len {
-            violations.push(PolicyViolation {
-                file: file_path.to_string(),
-                rule: rule_name(rule),
-                message: format!(
-                    "Document too short: {line_count} lines (min required: {min_len})"
-                ),
-                severity: rule_severity(rule),
-                kind: "policy_violation".to_string(),
-            });
-        }
-    }
-    if let Some(max_len) = rule.max_length {
-        if line_count > max_len {
-            violations.push(PolicyViolation {
-                file: file_path.to_string(),
-                rule: rule_name(rule),
-                message: format!("Document too long: {line_count} lines (max allowed: {max_len})"),
-                severity: rule_severity(rule),
-                kind: "policy_violation".to_string(),
-            });
-        }
-    }
-
-    // Heading-based checks
-    if !rule.required_headings.is_empty() || !rule.forbidden_headings.is_empty() {
-        let heading_re = Regex::new(r"^(#{1,6})\s+(.+)$").unwrap();
-        let mut headings: Vec<String> = Vec::new();
-
-        for line in content.lines() {
-            if let Some(caps) = heading_re.captures(line) {
-                if let Some(text_match) = caps.get(2) {
-                    let text = text_match.as_str().trim().to_string();
-                    headings.push(text);
-                }
-            }
-        }
-
-        // Required headings (by text)
-        for h in &rule.required_headings {
-            if !headings.iter().any(|t| t == h) {
-                violations.push(PolicyViolation {
-                    file: file_path.to_string(),
-                    rule: rule_name(rule),
-                    message: format!("Missing required heading: {h:?}"),
-                    severity: rule_severity(rule),
-                    kind: "policy_violation".to_string(),
-                });
-            }
-        }
-
-        // Forbidden headings (by text)
-        for h in &rule.forbidden_headings {
-            if headings.iter().any(|t| t == h) {
-                violations.push(PolicyViolation {
-                    file: file_path.to_string(),
-                    rule: rule_name(rule),
-                    message: format!("Forbidden heading present: {h:?}"),
-                    severity: rule_severity(rule),
-                    kind: "policy_violation".to_string(),
-                });
-            }
-        }
-    }
-
-    // Section length checks (line count)
-    if let Some(max_section_len) = rule.max_section_length {
-        let heading_filter = match rule.section_heading_regex.as_deref() {
-            Some(pattern) => match Regex::new(pattern) {
-                Ok(re) => Some(re),
-                Err(_) => {
-                    violations.push(PolicyViolation {
-                        file: file_path.to_string(),
-                        rule: rule_name(rule),
-                        message: format!("Invalid section heading regex: {pattern:?}"),
-                        severity: rule_severity(rule),
-                        kind: "policy_violation".to_string(),
-                    });
-                    return violations;
-                }
-            },
-            None => None,
-        };
-
-        for section in parse_policy_sections(content) {
-            if let Some(ref re) = heading_filter {
-                if !re.is_match(&section.heading) {
-                    continue;
-                }
-            }
-
-            let section_len = if section.line_end >= section.line_start {
-                section.line_end - section.line_start + 1
-            } else {
-                0
-            };
-
-            if section_len > max_section_len {
-                violations.push(PolicyViolation {
-                    file: file_path.to_string(),
-                    rule: rule_name(rule),
-                    message: format!(
-                        "Section too long: {:?} is {} lines (max allowed: {})",
-                        section.heading, section_len, max_section_len
-                    ),
-                    severity: rule_severity(rule),
-                    kind: "policy_violation".to_string(),
-                });
-            }
-        }
-    }
-
-    // Required link checks
-    if !rule.must_link_to.is_empty() {
-        let targets = extract_markdown_link_targets(file_path, content);
-        let mut target_paths: HashSet<String> = HashSet::new();
-        let mut target_keys: HashSet<String> = HashSet::new();
-
-        for target in targets {
-            target_paths.insert(target.path.clone());
-            let key = match target.anchor {
-                Some(anchor) => format!("{}#{}", target.path, anchor),
-                None => target.path.clone(),
-            };
-            target_keys.insert(key);
-        }
-
-        for required in &rule.must_link_to {
-            let (req_path, req_anchor) = normalize_required_link(file_path, required);
-            let satisfied = if let Some(anchor) = req_anchor {
-                target_keys.contains(&format!("{req_path}#{anchor}"))
-            } else {
-                target_paths.contains(&req_path)
-            };
-
-            if !satisfied {
-                violations.push(PolicyViolation {
-                    file: file_path.to_string(),
-                    rule: rule_name(rule),
-                    message: format!("Missing required link: {required:?}"),
-                    severity: rule_severity(rule),
-                    kind: "policy_violation".to_string(),
-                });
-            }
-        }
-    }
-
-    violations
-}
-
-fn run_policy_check(
-    index_dir: &Path,
-    policy_path: &Path,
-) -> Result<PolicyCheckResult, Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let policy = load_policy_config(policy_path)?;
-
-    let mut violations = Vec::new();
-
-    for rule in &policy.rules {
-        let glob = Glob::new(&rule.pattern)?;
-        let matcher = glob.compile_matcher();
-
-        for file_path in forward_index.files.keys() {
-            if !matcher.is_match(file_path.as_str()) {
-                continue;
-            }
-
-            let content = fs::read_to_string(file_path.as_str())?;
-            let mut rule_violations =
-                collect_policy_violations_for_content(rule, file_path, &content);
-            violations.append(&mut rule_violations);
-        }
-    }
-
-    Ok(PolicyCheckResult {
-        policy_file: policy_path.to_string_lossy().to_string(),
-        total_violations: violations.len(),
-        violations,
-    })
-}
-
-fn cmd_policy(
-    config_path: &Path,
-    index_dir: &Path,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    if !config_path.exists() {
-        return Err(format!("Policy file not found: {}", config_path.display()).into());
-    }
-
-    let result = run_policy_check(index_dir, config_path)?;
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    if result.violations.is_empty() {
-        println!(
-            "{} No policy violations found ({}).",
-            "✓".green().bold(),
-            result.policy_file
-        );
-        return Ok(());
-    }
-
-    println!(
-        "{} Policy violations found using {}",
-        "✗".red().bold(),
-        result.policy_file
-    );
-    println!("{}", "=".repeat(60));
-    println!();
-
-    for v in &result.violations {
-        println!("{}", v.file.white().bold());
-        println!("  Rule: {}", v.rule);
-        println!("  Severity: {}", v.severity);
-        println!("  Kind: {}", v.kind);
-        println!("  Message: {}", v.message);
-        println!();
-    }
-
-    println!("Total violations: {}", result.total_violations);
-
-    Ok(())
-}
-
-/// Suggest a new link target based on available files in the index.
-/// Very conservative: only rewrites when there is exactly one file with
-/// the same filename as the link target and that file lives under the
-/// same parent directory as the source file.
-/// Find all candidate files that match the broken link's filename
-fn find_link_candidates(
-    source_file: &str,
-    link_path: &str,
-    available_files: &HashSet<String>,
-) -> Vec<String> {
-    if link_path.is_empty() {
-        return vec![];
-    }
-
-    let Some(link_filename) = Path::new(link_path).file_name().and_then(|s| s.to_str()) else {
-        return vec![];
-    };
-
-    let source_path = Path::new(source_file);
-    let source_parent = source_path.parent().unwrap_or(Path::new("."));
-
-    // Find all candidates whose filename matches
-    let mut candidates: Vec<String> = available_files
-        .iter()
-        .filter(|p| {
-            Path::new(p)
-                .file_name()
-                .and_then(|s| s.to_str())
-                .is_some_and(|name| name == link_filename)
-        })
-        .map(|candidate| {
-            // Try to create a relative path from source to candidate
-            let candidate_path = Path::new(candidate);
-            if let Ok(stripped) = candidate_path.strip_prefix(source_parent) {
-                let rel = stripped.to_string_lossy().to_string();
-                if !rel.is_empty() {
-                    return rel;
-                }
-            }
-            // Fall back to returning the full path
-            candidate.clone()
-        })
-        .collect();
-
-    candidates.sort();
-    candidates
-}
-
-#[allow(dead_code)] // Utility for future interactive fix mode
-fn suggest_new_link_target(
-    source_file: &str,
-    link_path: &str,
-    available_files: &HashSet<String>,
-) -> Option<String> {
-    let candidates = find_link_candidates(source_file, link_path, available_files);
-    if candidates.len() == 1 {
-        Some(candidates.into_iter().next().unwrap())
-    } else {
-        None
-    }
-}
-
-fn cmd_fix_links(
-    index_dir: &Path,
-    dry_run: bool,
-    apply: bool,
-    propose: Option<PathBuf>,
-    apply_decisions: Option<PathBuf>,
-    json: bool,
-    use_git_history: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    // Handle apply-decisions mode: read and apply a proposal file
-    if let Some(decisions_path) = apply_decisions {
-        return apply_link_decisions(&decisions_path, dry_run, json);
-    }
-
-    // Validate mode flags for regular operation
-    let propose_mode = propose.is_some();
-    if !propose_mode && !dry_run && !apply {
-        return Err("Specify --dry-run, --apply, or --propose <file>".into());
-    }
-
-    let forward_index = load_forward_index(index_dir)?;
-    let available_files: HashSet<String> = forward_index.files.keys().cloned().collect();
-
-    // Load git rename history if requested and available
-    let rename_history: Option<RenameHistory> = if use_git_history {
-        let rename_path = index_dir.join("rename_history.json");
-        if rename_path.exists() {
-            let content = fs::read_to_string(&rename_path)?;
-            Some(serde_json::from_str(&content)?)
-        } else {
-            eprintln!(
-                "Warning: --use-git-history requested but no rename_history.json found. \
-                 Run 'yore build --track-renames' first."
-            );
-            None
-        }
-    } else {
-        None
-    };
-
-    let mut fixes: Vec<LinkFix> = Vec::new();
-    let mut proposals: Vec<LinkFixProposal> = Vec::new();
-
-    for (file_path, entry) in &forward_index.files {
-        for link in &entry.links {
-            let target = &link.target;
-
-            // Skip external links
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            // Split off anchor
-            let (link_path, anchor) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            // Check if link resolves
-            let source_path = Path::new(file_path);
-            let resolved = if link_path.is_empty() {
-                file_path.clone()
-            } else if let Some(parent) = source_path.parent() {
-                parent.join(&link_path).to_string_lossy().to_string()
-            } else {
-                link_path.clone()
-            };
-
-            let normalized = normalize_path(Path::new(&resolved));
-            if available_files.contains(&normalized) {
-                continue;
-            }
-
-            // Find candidates using index-based matching
-            let mut candidates = find_link_candidates(file_path, &link_path, &available_files);
-
-            // If no candidates found and git history is available, check for renames
-            if candidates.is_empty() {
-                if let Some(ref history) = rename_history {
-                    // Try to resolve the old path to its current location
-                    if let Some(new_path) = resolve_renamed_path(&normalized, history) {
-                        // Check if the new path exists in available files
-                        if available_files.contains(&new_path) {
-                            // Convert to relative path from source
-                            if let Some(rel) =
-                                compute_relative_path(file_path, &new_path, &available_files)
-                            {
-                                candidates.push(rel);
-                            } else {
-                                candidates.push(new_path);
-                            }
-                        }
-                    }
-                }
-            }
-
-            if candidates.is_empty() {
-                continue;
-            }
-
-            if candidates.len() == 1 {
-                // Unambiguous fix
-                let mut new_target = candidates[0].clone();
-                if let Some(ref a) = anchor {
-                    new_target.push('#');
-                    new_target.push_str(a);
-                }
-                if new_target != *target {
-                    fixes.push(LinkFix {
-                        file: file_path.clone(),
-                        old_target: target.clone(),
-                        new_target,
-                    });
-                }
-            } else if propose_mode {
-                // Multiple candidates - add to proposals
-                proposals.push(LinkFixProposal {
-                    source: file_path.clone(),
-                    line: link.line,
-                    broken_target: target.clone(),
-                    candidates,
-                    decision: None,
-                });
-            }
-        }
-    }
-
-    // Handle propose mode: write proposals to file
-    if let Some(propose_path) = propose {
-        let proposal_file = LinkFixProposalFile {
-            version: 1,
-            proposals,
-        };
-        let yaml = serde_yaml::to_string(&proposal_file)?;
-        fs::write(&propose_path, &yaml)?;
-
-        if json {
-            #[derive(Serialize)]
-            struct ProposeResult {
-                proposal_file: String,
-                unambiguous_fixes: usize,
-                ambiguous_proposals: usize,
-            }
-            let result = ProposeResult {
-                proposal_file: propose_path.to_string_lossy().to_string(),
-                unambiguous_fixes: fixes.len(),
-                ambiguous_proposals: proposal_file.proposals.len(),
-            };
-            println!("{}", serde_json::to_string_pretty(&result)?);
-        } else {
-            println!(
-                "{} Wrote {} ambiguous proposals to {}",
-                "Propose:".cyan().bold(),
-                proposal_file.proposals.len(),
-                propose_path.display()
-            );
-            println!(
-                "{} {} unambiguous fixes available (use --apply to apply)",
-                "Info:".yellow(),
-                fixes.len()
-            );
-        }
-        return Ok(());
-    }
-
-    // Regular fix mode (dry-run or apply)
-    if fixes.is_empty() {
-        if json {
-            println!(r#"{{"fixes": [], "applied": false}}"#);
-        } else {
-            println!("{}", "No safe link fixes found.".green().bold());
-        }
-        return Ok(());
-    }
-
-    // Group fixes by file
-    let mut fixes_by_file: HashMap<String, Vec<LinkFix>> = HashMap::new();
-    for fix in &fixes {
-        fixes_by_file
-            .entry(fix.file.clone())
-            .or_default()
-            .push(fix.clone());
-    }
-
-    if json {
-        let result = serde_json::json!({
-            "fixes": fixes.iter().map(|f| {
-                serde_json::json!({
-                    "file": f.file,
-                    "old_target": f.old_target,
-                    "new_target": f.new_target
-                })
-            }).collect::<Vec<_>>(),
-            "applied": apply
-        });
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else {
-        println!(
-            "{} Proposed link fixes in {} file(s):",
-            if dry_run { "Previewing" } else { "Applying" },
-            fixes_by_file.len()
-        );
-        for (file, file_fixes) in &fixes_by_file {
-            println!("{}", file.white().bold());
-            for f in file_fixes {
-                println!("  {} -> {}", f.old_target.red(), f.new_target.green());
-            }
-        }
-    }
-
-    if apply {
-        for (file, file_fixes) in &fixes_by_file {
-            let content = fs::read_to_string(file)?;
-            let mut new_content = content.clone();
-            for f in file_fixes {
-                let old = format!("]({})", f.old_target);
-                let new = format!("]({})", f.new_target);
-                new_content = new_content.replace(&old, &new);
-            }
-            if new_content != content {
-                fs::write(file, new_content)?;
-            }
-        }
-        if !json {
-            println!("{}", "Link fixes applied.".green().bold());
-        }
-    }
-
-    Ok(())
-}
-
-/// Apply decisions from a proposal file
-fn apply_link_decisions(
-    decisions_path: &Path,
-    dry_run: bool,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let content = fs::read_to_string(decisions_path)?;
-    let proposal_file: LinkFixProposalFile = serde_yaml::from_str(&content)?;
-
-    let mut fixes: Vec<LinkFix> = Vec::new();
-
-    for proposal in &proposal_file.proposals {
-        if let Some(decision_idx) = proposal.decision {
-            if decision_idx < proposal.candidates.len() {
-                let mut new_target = proposal.candidates[decision_idx].clone();
-                // Preserve anchor if present in broken_target
-                if let Some(idx) = proposal.broken_target.find('#') {
-                    new_target.push_str(&proposal.broken_target[idx..]);
-                }
-                fixes.push(LinkFix {
-                    file: proposal.source.clone(),
-                    old_target: proposal.broken_target.clone(),
-                    new_target,
-                });
-            }
-        }
-    }
-
-    if fixes.is_empty() {
-        if json {
-            println!(
-                r#"{{"fixes": [], "applied": false, "message": "No decisions made in proposal file"}}"#
-            );
-        } else {
-            println!(
-                "{} No decisions found in {}. Set 'decision' field to candidate index.",
-                "Note:".yellow(),
-                decisions_path.display()
-            );
-        }
-        return Ok(());
-    }
-
-    // Group and apply
-    let mut fixes_by_file: HashMap<String, Vec<LinkFix>> = HashMap::new();
-    for fix in &fixes {
-        fixes_by_file
-            .entry(fix.file.clone())
-            .or_default()
-            .push(fix.clone());
-    }
-
-    if json {
-        let result = serde_json::json!({
-            "fixes": fixes.iter().map(|f| {
-                serde_json::json!({
-                    "file": f.file,
-                    "old_target": f.old_target,
-                    "new_target": f.new_target
-                })
-            }).collect::<Vec<_>>(),
-            "applied": !dry_run
-        });
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else {
-        println!(
-            "{} {} link fixes from decisions:",
-            if dry_run { "Would apply" } else { "Applying" },
-            fixes.len()
-        );
-        for (file, file_fixes) in &fixes_by_file {
-            println!("{}", file.white().bold());
-            for f in file_fixes {
-                println!("  {} -> {}", f.old_target.red(), f.new_target.green());
-            }
-        }
-    }
-
-    if !dry_run {
-        for (file, file_fixes) in &fixes_by_file {
-            let content = fs::read_to_string(file)?;
-            let mut new_content = content.clone();
-            for f in file_fixes {
-                let old = format!("]({})", f.old_target);
-                let new = format!("]({})", f.new_target);
-                new_content = new_content.replace(&old, &new);
-            }
-            if new_content != content {
-                fs::write(file, new_content)?;
-            }
-        }
-        if !json {
-            println!("{}", "Link fixes applied.".green().bold());
-        }
-    }
-
-    Ok(())
-}
-
-fn apply_reference_mapping_to_content(content: &str, from: &str, to: &str) -> String {
-    let old = format!("]({from})");
-    let new = format!("]({to})");
-    content.replace(&old, &new)
-}
-
-fn load_reference_mappings(
-    path: &Path,
-) -> Result<ReferenceMappingConfig, Box<dyn std::error::Error>> {
-    let content = fs::read_to_string(path)?;
-    let cfg: ReferenceMappingConfig = serde_yaml::from_str(&content)?;
-    Ok(cfg)
-}
-
-fn cmd_fix_references(
-    index_dir: &Path,
-    mapping_path: &Path,
-    dry_run: bool,
-    apply: bool,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    if !dry_run && !apply {
-        return Err("Specify either --dry-run or --apply".into());
-    }
-    if !mapping_path.exists() {
-        return Err(format!("Mapping file not found: {}", mapping_path.display()).into());
-    }
-
-    let mappings_cfg = load_reference_mappings(mapping_path)?;
-    if mappings_cfg.mappings.is_empty() {
-        if json {
-            let result = FixReferencesResult {
-                mapping_file: mapping_path.to_string_lossy().to_string(),
-                mappings_count: 0,
-                updated_files: vec![],
-                applied: apply,
-            };
-            println!("{}", serde_json::to_string_pretty(&result)?);
-        } else {
-            println!(
-                "{} No mappings defined in {}",
-                "Note:".yellow(),
-                mapping_path.display()
-            );
-        }
-        return Ok(());
-    }
-
-    let forward_index = load_forward_index(index_dir)?;
-
-    let mut changed_files: Vec<String> = Vec::new();
-
-    for file_path in forward_index.files.keys() {
-        let content = fs::read_to_string(file_path)?;
-        let mut new_content = content.clone();
-
-        for m in &mappings_cfg.mappings {
-            new_content = apply_reference_mapping_to_content(&new_content, &m.from, &m.to);
-        }
-
-        if new_content != content {
-            if dry_run {
-                changed_files.push(file_path.clone());
-            } else if apply {
-                fs::write(file_path, new_content)?;
-                changed_files.push(file_path.clone());
-            }
-        }
-    }
-
-    changed_files.sort();
-
-    if json {
-        let result = FixReferencesResult {
-            mapping_file: mapping_path.to_string_lossy().to_string(),
-            mappings_count: mappings_cfg.mappings.len(),
-            updated_files: changed_files,
-            applied: apply,
-        };
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    if changed_files.is_empty() {
-        println!(
-            "{} No references needed updating based on {}",
-            "Note:".yellow(),
-            mapping_path.display()
-        );
-    } else {
-        println!(
-            "{} Updated references in {} file(s) using mapping {}",
-            if dry_run { "Would update" } else { "Updated" },
-            changed_files.len(),
-            mapping_path.display()
-        );
-        for f in changed_files {
-            println!("  {f}");
-        }
-    }
-
-    Ok(())
-}
-
-fn cmd_mv(
-    from: &Path,
-    to: &Path,
-    index_dir: &Path,
-    update_refs: bool,
-    dry_run: bool,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let from_str = from.to_string_lossy().to_string();
-    let to_str = to.to_string_lossy().to_string();
-
-    let mut updated_files: Vec<String> = Vec::new();
-
-    if !dry_run {
-        if let Some(parent) = to.parent() {
-            fs::create_dir_all(parent)?;
-        }
-        fs::rename(from, to)?;
-    }
-
-    if update_refs {
-        let forward_index = load_forward_index(index_dir)?;
-
-        // Group by file for rewrites
-        let mut files_to_update: HashSet<String> = HashSet::new();
-        for (file_path, entry) in &forward_index.files {
-            for link in &entry.links {
-                if link.target == from_str {
-                    files_to_update.insert(file_path.clone());
-                }
-            }
-        }
-
-        for file in &files_to_update {
-            let content = fs::read_to_string(file)?;
-            let new_content = apply_reference_mapping_to_content(&content, &from_str, &to_str);
-            if content != new_content {
-                if !dry_run {
-                    fs::write(file, &new_content)?;
-                }
-                updated_files.push(file.clone());
-            }
-        }
-    }
-
-    updated_files.sort();
-
-    if json {
-        let result = MvResult {
-            from: from_str,
-            to: to_str,
-            moved: !dry_run,
-            updated_files,
-        };
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    // Human-readable output
-    if dry_run {
-        println!("{}", "Dry run:".cyan().bold());
-    }
-
-    println!(
-        "{} {} -> {}",
-        if dry_run { "Would move" } else { "Moving" },
-        from_str,
-        to_str
-    );
-
-    if update_refs {
-        if updated_files.is_empty() {
-            println!(
-                "{} No inbound links found for {} in index {}",
-                "Note:".yellow(),
-                from_str,
-                index_dir.display()
-            );
-        } else {
-            println!(
-                "{} Updating references in {} file(s)",
-                if dry_run { "Would update" } else { "Updating" },
-                updated_files.len()
-            );
-            for file in updated_files {
-                if dry_run {
-                    println!("  {file} (references would change)");
-                } else {
-                    println!("  {file}");
-                }
-            }
-        }
-    }
-
-    Ok(())
-}
-
-fn compute_inbound_link_counts(forward_index: &ForwardIndex) -> HashMap<String, usize> {
-    let mut counts: HashMap<String, usize> = HashMap::new();
-
-    for (source_path, entry) in &forward_index.files {
-        let source_base = Path::new(source_path);
-        for link in &entry.links {
-            let target = &link.target;
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            let (link_path, _) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            if link_path.is_empty() {
-                continue;
-            }
-
-            let resolved = if let Some(parent) = source_base.parent() {
-                parent.join(&link_path).to_string_lossy().to_string()
-            } else {
-                link_path.clone()
-            };
-            let normalized = normalize_path(Path::new(&resolved));
-            *counts.entry(normalized).or_insert(0) += 1;
-        }
-    }
-
-    counts
-}
-
-/// Show relation paths from a source document via the persisted relation graph.
-fn cmd_paths(
-    source: &str,
-    depth: usize,
-    kind_filter: Option<&str>,
-    json: bool,
-    index_dir: &Path,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let relation_index = load_relation_index(index_dir);
-    if relation_index.edges.is_empty() {
-        if json {
-            println!("{{\"source\":\"{source}\",\"paths\":[]}}");
-        } else {
-            println!(
-                "{} No relations found. Run 'yore build' first.",
-                "Info:".yellow()
-            );
-        }
-        return Ok(());
-    }
-
-    let depth = depth.clamp(1, 3);
-
-    // Normalize source: try exact match, then suffix match
-    let all_sources: HashSet<&str> = relation_index
-        .edges
-        .iter()
-        .flat_map(|e| [e.source.as_str(), e.target.as_str()])
-        .collect();
-
-    let resolved_source = if all_sources.contains(source) {
-        source.to_string()
-    } else {
-        // Try suffix match
-        if let Some(s) = all_sources
-            .iter()
-            .find(|s| s.ends_with(source) || source.ends_with(*s))
-        {
-            (*s).to_string()
-        } else {
-            if json {
-                println!("{{\"source\":\"{source}\",\"paths\":[]}}");
-            } else {
-                println!(
-                    "{} '{}' not found in relation graph.",
-                    "Info:".yellow(),
-                    source
-                );
-            }
-            return Ok(());
-        }
-    };
-
-    // BFS traversal up to depth
-    let mut visited: HashSet<String> = HashSet::new();
-    visited.insert(resolved_source.clone());
-    let mut frontier: Vec<String> = vec![resolved_source.clone()];
-    let mut result_edges: Vec<&RelationEdge> = Vec::new();
-
-    for _ in 0..depth {
-        let mut next_frontier: Vec<String> = Vec::new();
-        for node in &frontier {
-            for edge in &relation_index.edges {
-                if &edge.source != node {
-                    continue;
-                }
-                // Apply kind filter
-                if let Some(kf) = kind_filter {
-                    let edge_kind = match &edge.kind {
-                        RelationKind::LinksTo => "links_to",
-                        RelationKind::SectionLinksTo => "section_links_to",
-                        RelationKind::AdrReference => "adr_reference",
-                    };
-                    if edge_kind != kf {
-                        continue;
-                    }
-                }
-                result_edges.push(edge);
-                if !visited.contains(&edge.target) {
-                    visited.insert(edge.target.clone());
-                    next_frontier.push(edge.target.clone());
-                }
-            }
-        }
-        frontier = next_frontier;
-    }
-
-    if json {
-        #[derive(Serialize)]
-        struct PathsResult<'a> {
-            source: &'a str,
-            depth: usize,
-            total_edges: usize,
-            edges: &'a [&'a RelationEdge],
-        }
-        let result = PathsResult {
-            source: &resolved_source,
-            depth,
-            total_edges: result_edges.len(),
-            edges: &result_edges,
-        };
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else {
-        println!(
-            "{} {} (depth {})",
-            "Paths from".green().bold(),
-            resolved_source.cyan(),
-            depth
-        );
-        println!();
-
-        if result_edges.is_empty() {
-            println!("  No outgoing edges found.");
-        } else {
-            for edge in &result_edges {
-                let kind_label = match &edge.kind {
-                    RelationKind::LinksTo => "links_to",
-                    RelationKind::SectionLinksTo => "section_links_to",
-                    RelationKind::AdrReference => "adr_reference",
-                };
-                let mut detail = String::new();
-                if let Some(anchor) = &edge.anchor {
-                    use std::fmt::Write;
-                    let _ = write!(detail, " #{anchor}");
-                }
-                if let Some(src_sec) = &edge.source_section {
-                    use std::fmt::Write;
-                    let _ = write!(detail, " [from: {}]", src_sec.heading);
-                }
-                if let Some(tgt_sec) = &edge.target_section {
-                    use std::fmt::Write;
-                    let _ = write!(detail, " [to: {}]", tgt_sec.heading);
-                }
-                if let Some(raw) = &edge.raw_text {
-                    use std::fmt::Write;
-                    let _ = write!(detail, " ({raw})");
-                }
-                println!(
-                    "  {} {} -> {}{}",
-                    kind_label.yellow(),
-                    edge.source,
-                    edge.target.cyan(),
-                    detail
-                );
-            }
-            println!();
-            println!("  {} edges total", result_edges.len());
-        }
-    }
-
-    Ok(())
-}
-
-fn cmd_export_graph(index_dir: &Path, format: &str) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-
-    // Map normalized paths to canonical file keys
-    let mut norm_to_key: HashMap<String, String> = HashMap::new();
-    for path in forward_index.files.keys() {
-        let normalized = normalize_path(Path::new(path));
-        norm_to_key
-            .entry(normalized)
-            .or_insert_with(|| path.clone());
-    }
-
-    let mut nodes: Vec<GraphNode> = forward_index
-        .files
-        .keys()
-        .cloned()
-        .map(|id| GraphNode { id })
-        .collect();
-    nodes.sort_by(|a, b| a.id.cmp(&b.id));
-
-    let mut edges: Vec<GraphEdge> = Vec::new();
-
-    for (source_path, entry) in &forward_index.files {
-        let source_base = Path::new(source_path);
-
-        for link in &entry.links {
-            let target = &link.target;
-
-            // Skip external links
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            // Split off anchor
-            let (link_path, anchor) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            if link_path.is_empty() {
-                continue;
-            }
-
-            let resolved = if let Some(parent) = source_base.parent() {
-                parent.join(&link_path).to_string_lossy().to_string()
-            } else {
-                link_path.clone()
-            };
-            let normalized = normalize_path(Path::new(&resolved));
-
-            if let Some(target_key) = norm_to_key.get(&normalized) {
-                edges.push(GraphEdge {
-                    source: source_path.clone(),
-                    target: target_key.clone(),
-                    anchor,
-                });
-            }
-        }
-    }
-
-    if edges.is_empty() {
-        println!(
-            "{} No internal documentation links found to export.",
-            "Info:".yellow()
-        );
-        return Ok(());
-    }
-
-    match format {
-        "json" => {
-            let export = GraphExport { nodes, edges };
-            println!("{}", serde_json::to_string_pretty(&export)?);
-        }
-        "dot" => {
-            println!("digraph yore_docs {{");
-            for edge in &edges {
-                let src = edge.source.replace('"', "\\\"");
-                let dst = edge.target.replace('"', "\\\"");
-                if let Some(anchor) = &edge.anchor {
-                    let label = anchor.replace('"', "\\\"");
-                    println!("  \"{src}\" -> \"{dst}\" [label=\"{label}\"];");
-                } else {
-                    println!("  \"{src}\" -> \"{dst}\";");
-                }
-            }
-            println!("}}");
-        }
-        other => {
-            return Err(format!("Unsupported format: {other}").into());
-        }
-    }
-
-    Ok(())
-}
-
-fn run_stale_check(
-    index_dir: &Path,
-    days: u64,
-    min_inlinks: usize,
-) -> Result<StaleResult, Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let inbound_counts = compute_inbound_link_counts(&forward_index);
-
-    let now = std::time::SystemTime::now();
-    let mut files = Vec::new();
-
-    for file_path in forward_index.files.keys() {
-        let meta = fs::metadata(file_path);
-        if meta.is_err() {
-            continue;
-        }
-        let meta = meta?;
-        let modified = meta.modified().unwrap_or(now);
-        let age = now.duration_since(modified).unwrap_or_default().as_secs() / 86_400;
-
-        let inlinks = *inbound_counts.get(file_path).unwrap_or(&0);
-
-        if age >= days && inlinks >= min_inlinks {
-            files.push(StaleFile {
-                file: file_path.clone(),
-                days_since_modified: age,
-                inbound_links: inlinks,
-            });
-        }
-    }
-
-    files.sort_by(|a, b| b.days_since_modified.cmp(&a.days_since_modified));
-
-    Ok(StaleResult {
-        total_stale: files.len(),
-        files,
-    })
-}
-
-fn cmd_stale(
-    index_dir: &Path,
-    days: u64,
-    min_inlinks: usize,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let result = run_stale_check(index_dir, days, min_inlinks)?;
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    if result.files.is_empty() {
-        println!(
-            "{} No stale files found (threshold: {} days, min_inlinks: {}).",
-            "✓".green().bold(),
-            days,
-            min_inlinks
-        );
-        return Ok(());
-    }
-
-    println!(
-        "{} Stale files (>= {} days old, inbound_links >= {}):",
-        "Stale".yellow().bold(),
-        days,
-        min_inlinks
-    );
-    println!("{}", "=".repeat(60));
-    for f in &result.files {
-        println!(
-            "{} ({} days, {} inbound links)",
-            f.file, f.days_since_modified, f.inbound_links
-        );
-    }
-
-    Ok(())
-}
-
-fn resolve_health_target_key(
-    file: &Path,
-    index_dir: &Path,
-    metrics_index: &DocumentMetricsIndex,
-) -> Option<String> {
-    let input = normalize_path(file);
-    let without_dot = input.trim_start_matches("./").to_string();
-    let with_dot = format!("./{without_dot}");
-
-    for candidate in [&input, &without_dot, &with_dot] {
-        if metrics_index.files.contains_key(candidate) {
-            return Some(candidate.clone());
-        }
-    }
-
-    let absolute = canonicalize_existing_path(file);
-    let absolute_normalized = normalize_path(&absolute);
-    if metrics_index.files.contains_key(&absolute_normalized) {
-        return Some(absolute_normalized);
-    }
-
-    if let Ok(forward_index) = load_forward_index(index_dir) {
-        if let Some(source_root) = forward_index_source_root(&forward_index) {
-            let derived = build_indexed_doc_key(&absolute, &source_root);
-            if metrics_index.files.contains_key(&derived) {
-                return Some(derived);
-            }
-        }
-    }
-
-    None
-}
-
-fn evaluate_document_health(
-    metrics: &DocumentMetrics,
-    options: &HealthOptions,
-) -> HealthFileResult {
-    let mut issues = Vec::new();
-
-    if metrics.line_count > options.max_lines {
-        issues.push(HealthIssue {
-            kind: "bloated-file".to_string(),
-            severity: "error".to_string(),
-            message: format!(
-                "{} lines exceeds the configured threshold",
-                metrics.line_count
-            ),
-            value: metrics.line_count,
-            threshold: options.max_lines,
-        });
-    }
-
-    if metrics.part_heading_count >= options.max_part_sections {
-        issues.push(HealthIssue {
-            kind: "accumulator-pattern".to_string(),
-            severity: "error".to_string(),
-            message: format!(
-                "{} \"Part N\" headings suggest an accumulating narrative doc",
-                metrics.part_heading_count
-            ),
-            value: metrics.part_heading_count,
-            threshold: options.max_part_sections,
-        });
-    }
-
-    let completed_section_lines: usize = metrics
-        .sections
-        .iter()
-        .filter(|section| section.has_completion_marker)
-        .map(|section| section.line_count)
-        .sum();
-    if completed_section_lines > options.max_completed_lines {
-        issues.push(HealthIssue {
-            kind: "stale-completed".to_string(),
-            severity: "warning".to_string(),
-            message: format!(
-                "{completed_section_lines} retained lines sit under completion-marked sections"
-            ),
-            value: completed_section_lines,
-            threshold: options.max_completed_lines,
-        });
-    }
-
-    if metrics.changelog_entry_count > options.max_changelog_entries {
-        issues.push(HealthIssue {
-            kind: "changelog-bloat".to_string(),
-            severity: "warning".to_string(),
-            message: format!(
-                "{} changelog-style entries exceed the configured threshold",
-                metrics.changelog_entry_count
-            ),
-            value: metrics.changelog_entry_count,
-            threshold: options.max_changelog_entries,
-        });
-    }
-
-    let status = if issues.iter().any(|issue| issue.severity == "error") {
-        "unhealthy"
-    } else if issues.iter().any(|issue| issue.severity == "warning") {
-        "warning"
-    } else {
-        "healthy"
-    };
-
-    HealthFileResult {
-        file: metrics.path.clone(),
-        status: status.to_string(),
-        issues,
-    }
-}
-
-fn cmd_health(
-    file: Option<&Path>,
-    all: bool,
-    index_dir: &Path,
-    options: &HealthOptions,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    if all == file.is_some() {
-        return Err("pass either a file path or --all".into());
-    }
-
-    let metrics_index = load_document_metrics(index_dir)?;
-    let total_files = metrics_index.files.len();
-    let mut files = Vec::new();
-
-    if let Some(file_path) = file {
-        let key =
-            resolve_health_target_key(file_path, index_dir, &metrics_index).ok_or_else(|| {
-                format!(
-                    "File not found in document metrics index: {}",
-                    file_path.display()
-                )
-            })?;
-        let metrics = metrics_index.files.get(&key).ok_or_else(|| {
-            format!(
-                "File not found in document metrics index: {}",
-                file_path.display()
-            )
-        })?;
-        files.push(evaluate_document_health(metrics, options));
-    } else {
-        let mut all_results: Vec<HealthFileResult> = metrics_index
-            .files
-            .values()
-            .map(|metrics| evaluate_document_health(metrics, options))
-            .filter(|result| !result.issues.is_empty())
-            .collect();
-        all_results.sort_by(|a, b| a.file.cmp(&b.file).then_with(|| a.status.cmp(&b.status)));
-        files = all_results;
-    }
-
-    let unhealthy_files = files
-        .iter()
-        .filter(|file| file.status == "unhealthy")
-        .count();
-    let warning_files = files.iter().filter(|file| file.status == "warning").count();
-    let result = HealthResult {
-        total_files,
-        unhealthy_files,
-        warning_files,
-        files,
-    };
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    if result.files.is_empty() {
-        println!("{}", "✓ No health issues detected.".green().bold());
-        return Ok(());
-    }
-
-    for file_result in &result.files {
-        let label = match file_result.status.as_str() {
-            "unhealthy" => "UNHEALTHY".red().bold(),
-            "warning" => "WARNING".yellow().bold(),
-            _ => "HEALTHY".green().bold(),
-        };
-        println!(
-            "{}: {} ({} issue{})",
-            file_result.file,
-            label,
-            file_result.issues.len(),
-            if file_result.issues.len() == 1 {
-                ""
-            } else {
-                "s"
-            }
-        );
-        for issue in &file_result.issues {
-            println!(
-                "  {:<20} {:<7} {} (value: {}, threshold: {})",
-                issue.kind,
-                issue.severity.to_uppercase(),
-                issue.message,
-                issue.value,
-                issue.threshold
-            );
-        }
-        println!();
-    }
-
-    Ok(())
-}
-
-fn is_placeholder_target(target: &str) -> bool {
-    let lower = target.to_ascii_lowercase();
-
-    matches!(lower.as_str(), "url" | "text" | "todo" | "link" | "tbd")
-        || lower.starts_with("/path/to/")
-        || lower.starts_with("../path/to/")
-        || lower.contains("replace-me")
-}
-
-fn is_code_extension(ext: &str) -> bool {
-    matches!(
-        ext,
-        "py" | "ts" | "tsx" | "json" | "yaml" | "yml" | "png" | "svg"
-    )
-}
-
-fn file_extension(path: &str) -> String {
-    std::path::Path::new(path)
-        .extension()
-        .and_then(|e| e.to_str())
-        .unwrap_or_default()
-        .to_lowercase()
-}
-
-fn record_link_kind(
-    by_file: &mut HashMap<String, HashMap<String, usize>>,
-    by_kind: &mut HashMap<String, usize>,
-    file: &str,
-    kind: &LinkKind,
-) {
-    let kind_name = match kind {
-        LinkKind::DocMissing => "doc_missing",
-        LinkKind::CodeMissing => "code_missing",
-        LinkKind::Placeholder => "placeholder",
-        LinkKind::CodeReference => "code_reference",
-        LinkKind::DirectoryReference => "directory_reference",
-        LinkKind::ExternalReference => "external_reference",
-        LinkKind::AnchorMissing => "anchor_missing",
-        LinkKind::AnchorUnverified => "anchor_unverified",
-    }
-    .to_string();
-
-    by_kind
-        .entry(kind_name.clone())
-        .and_modify(|c| *c += 1)
-        .or_insert(1);
-
-    let entry = by_file.entry(file.to_string()).or_default();
-    entry.entry(kind_name).and_modify(|c| *c += 1).or_insert(1);
-}
-
-/// Find all files that link to a specific file
-fn cmd_backlinks(
-    target_file: &str,
-    index_dir: &Path,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    // Load the forward index
-    let forward_index = load_forward_index(index_dir)?;
-
-    // Normalize the target file path for comparison
-    let normalized_target = normalize_path(Path::new(target_file));
-
-    if !json {
-        println!(
-            "{} {}",
-            "Finding backlinks for".cyan().bold(),
-            normalized_target.white().bold()
-        );
-        println!();
-    }
-
-    let mut backlinks = Vec::new();
-
-    // Iterate through all files and check if they link to the target
-    for (source_path, entry) in &forward_index.files {
-        for link in &entry.links {
-            let target = &link.target;
-
-            // Skip external links
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            // Parse link to separate file path and anchor
-            let (link_path, anchor) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            // Resolve relative path from source file
-            let resolved_path = if link_path.is_empty() {
-                // Just an anchor in the current file
-                source_path.clone()
-            } else if let Some(stripped) = link_path.strip_prefix('/') {
-                // Absolute path - strip leading / and use as-is
-                stripped.to_string()
-            } else {
-                // Relative path
-                let source_file_path = Path::new(source_path);
-                if let Some(parent) = source_file_path.parent() {
-                    parent.join(&link_path).to_string_lossy().to_string()
-                } else {
-                    link_path.clone()
-                }
-            };
-
-            // Normalize the resolved path
-            let normalized_link = normalize_path(Path::new(&resolved_path));
-
-            // Check if this link points to our target file
-            if normalized_link == normalized_target {
-                backlinks.push(Backlink {
-                    source_file: source_path.clone(),
-                    link_text: link.text.clone(),
-                    link_target: target.clone(),
-                    anchor,
-                });
-            }
-        }
-    }
-
-    // Sort backlinks by source file for consistent output
-    backlinks.sort_by(|a, b| a.source_file.cmp(&b.source_file));
-
-    let result = BacklinksResult {
-        target_file: normalized_target.clone(),
-        total_backlinks: backlinks.len(),
-        backlinks: backlinks.clone(),
-    };
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else {
-        println!("{}", "Backlinks Found".cyan().bold());
-        println!("{}", "=".repeat(60));
-        println!();
-        println!("Total backlinks: {}", backlinks.len());
-        println!();
-
-        if backlinks.is_empty() {
-            println!(
-                "{}",
-                "No backlinks found. This file is not referenced by any other file.".yellow()
-            );
-            println!();
-            println!("{}", "This may indicate:".yellow());
-            println!("  - An orphaned document (consider reviewing for deletion)");
-            println!("  - A new document that needs linking");
-            println!("  - An entry point document (like README.md)");
-        } else {
-            for (idx, backlink) in backlinks.iter().enumerate() {
-                println!("[{}] {}", idx + 1, backlink.source_file.white().bold());
-                println!(
-                    "    Link: [{}]({})",
-                    backlink.link_text, backlink.link_target
-                );
-                if let Some(anchor) = &backlink.anchor {
-                    println!("    Anchor: #{anchor}");
-                }
-                println!();
-            }
-
-            println!("{}", "Safe to delete?".yellow().bold());
-            println!(
-                "  {} These {} file(s) link to this document.",
-                "⚠".yellow(),
-                backlinks.len()
-            );
-            println!("  Review and update references before deletion.");
-        }
-    }
-
-    Ok(())
-}
-
-/// Find orphaned files with no inbound links
-fn cmd_orphans(
-    index_dir: &Path,
-    json: bool,
-    exclude_patterns: &[String],
-) -> Result<(), Box<dyn std::error::Error>> {
-    // Load the forward index
-    let forward_index = load_forward_index(index_dir)?;
-
-    if !json {
-        println!("{}", "Finding orphaned files...".cyan().bold());
-        println!();
-    }
-
-    // Build a set of all files that are linked to
-    let mut linked_files: HashSet<String> = HashSet::new();
-
-    for (source_path, entry) in &forward_index.files {
-        for link in &entry.links {
-            let target = &link.target;
-
-            // Skip external links
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            // Parse link to separate file path and anchor
-            let (link_path, _) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            // Skip anchor-only links
-            if link_path.is_empty() {
-                continue;
-            }
-
-            // Resolve relative path from source file
-            let resolved_path = if let Some(stripped) = link_path.strip_prefix('/') {
-                // Absolute path - strip leading / and use as-is
-                stripped.to_string()
-            } else {
-                // Relative path
-                let source_file_path = Path::new(source_path);
-                if let Some(parent) = source_file_path.parent() {
-                    parent.join(&link_path).to_string_lossy().to_string()
-                } else {
-                    link_path.clone()
-                }
-            };
-
-            // Normalize the resolved path
-            let normalized_link = normalize_path(Path::new(&resolved_path));
-            linked_files.insert(normalized_link);
-        }
-    }
-
-    // Find files that are NOT in the linked set
-    let mut orphans = Vec::new();
-
-    for (file_path, entry) in &forward_index.files {
-        // Check if this file has any inbound links
-        if !linked_files.contains(file_path) {
-            // Check exclude patterns
-            let mut excluded = false;
-            for pattern in exclude_patterns {
-                if file_path.contains(pattern) {
-                    excluded = true;
-                    break;
-                }
-            }
-
-            if excluded {
-                continue;
-            }
-
-            orphans.push(OrphanFile {
-                file: file_path.clone(),
-                size_bytes: entry.size_bytes,
-                line_count: entry.line_count,
-            });
-        }
-    }
-
-    // Sort orphans by file path
-    orphans.sort_by(|a, b| a.file.cmp(&b.file));
-
-    let result = OrphansResult {
-        total_orphans: orphans.len(),
-        orphans: orphans.clone(),
-    };
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else {
-        println!("{}", "Orphaned Files".cyan().bold());
-        println!("{}", "=".repeat(60));
-        println!();
-        println!("Total orphans: {}", orphans.len());
-        println!();
-
-        if orphans.is_empty() {
-            println!(
-                "{}",
-                "No orphaned files found. All documents are linked!".green()
-            );
-            println!();
-        } else {
-            for (idx, orphan) in orphans.iter().enumerate() {
-                println!("[{}] {}", idx + 1, orphan.file.white().bold());
-                println!(
-                    "    Size: {} bytes, Lines: {}",
-                    orphan.size_bytes, orphan.line_count
-                );
-                println!();
-            }
-
-            println!("{}", "Cleanup suggestions:".yellow().bold());
-            println!("  1. Review each file to determine if it's still needed");
-            println!("  2. Add links from relevant documents if the content is valuable");
-            println!("  3. Delete or archive files that are no longer relevant");
-            println!("  4. Entry point files (README.md) may intentionally have no backlinks");
-            println!();
-            println!("{}", "To exclude patterns:".cyan());
-            println!("  yore orphans --exclude README --exclude INDEX");
-        }
-    }
-
-    Ok(())
-}
-
-fn build_inbound_link_counts(forward_index: &ForwardIndex) -> HashMap<String, usize> {
-    let mut inbound_counts: HashMap<String, usize> = HashMap::new();
-
-    for (source_path, entry) in &forward_index.files {
-        for link in &entry.links {
-            let target = &link.target;
-
-            if target.starts_with("http://")
-                || target.starts_with("https://")
-                || target.starts_with("mailto:")
-                || target.starts_with("ftp://")
-            {
-                continue;
-            }
-
-            let (link_path, _) = if let Some(idx) = target.find('#') {
-                (
-                    target[..idx].to_string(),
-                    Some(target[idx + 1..].to_string()),
-                )
-            } else {
-                (target.clone(), None)
-            };
-
-            if link_path.is_empty() {
-                continue;
-            }
-
-            let resolved_path = if let Some(stripped) = link_path.strip_prefix('/') {
-                stripped.to_string()
-            } else {
-                let source_file_path = Path::new(source_path);
-                if let Some(parent) = source_file_path.parent() {
-                    parent.join(&link_path).to_string_lossy().to_string()
-                } else {
-                    link_path.clone()
-                }
-            };
-
-            let normalized_link = normalize_path(Path::new(&resolved_path));
-            *inbound_counts.entry(normalized_link).or_insert(0) += 1;
-        }
-    }
-
-    inbound_counts
-}
-
-fn cmd_canonical_orphans(
-    index_dir: &Path,
-    threshold: f64,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-    let inbound_counts = build_inbound_link_counts(&forward_index);
-
-    let mut orphans = Vec::new();
-
-    for (file_path, entry) in &forward_index.files {
-        let inbound_links = *inbound_counts.get(file_path).unwrap_or(&0);
-        if inbound_links > 0 {
-            continue;
-        }
-
-        let score = score_canonicality(file_path, entry);
-        if score >= threshold {
-            orphans.push(CanonicalOrphan {
-                file: file_path.clone(),
-                canonicality: score,
-                inbound_links,
-            });
-        }
-    }
-
-    orphans.sort_by(|a, b| {
-        b.canonicality
-            .partial_cmp(&a.canonicality)
-            .unwrap_or(std::cmp::Ordering::Equal)
-            .then_with(|| a.file.cmp(&b.file))
-    });
-
-    let result = CanonicalOrphansResult {
-        total_orphans: orphans.len(),
-        threshold,
-        orphans: orphans.clone(),
-    };
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    println!("{}", "Canonical Orphans".cyan().bold());
-    println!("{}", "=".repeat(60));
-    println!();
-    println!("Threshold: {threshold}");
-    println!("Total canonical orphans: {}", orphans.len());
-    println!();
-
-    if orphans.is_empty() {
-        println!(
-            "{}",
-            "No canonical documents without inbound links found.".green()
-        );
-        return Ok(());
-    }
-
-    for (idx, orphan) in orphans.iter().enumerate() {
-        println!("[{}] {}", idx + 1, orphan.file.white().bold());
-        println!(
-            "    Canonicality: {:.2}, Inbound links: {}",
-            orphan.canonicality, orphan.inbound_links
-        );
-        println!();
-    }
-
-    Ok(())
-}
-
-/// Score canonicality with reasons
-fn score_canonicality_with_reasons(doc_path: &str, _entry: &FileEntry) -> (f64, Vec<String>) {
-    let mut score: f64 = 0.5; // baseline
-    let mut reasons = Vec::new();
-
-    let path_lower = doc_path.to_lowercase();
-
-    // Path-based boosts
-    if path_lower.contains("docs/adr/") || path_lower.contains("docs/architecture/") {
-        score += 0.2;
-        reasons.push("Architecture/ADR document (+0.2)".to_string());
-    }
-    if path_lower.contains("docs/index/") {
-        score += 0.15;
-        reasons.push("Index document (+0.15)".to_string());
-    }
-    if path_lower.contains("scratch")
-        || path_lower.contains("archive")
-        || path_lower.contains("old")
-    {
-        score -= 0.3;
-        reasons.push("Scratch/archive/old location (-0.3)".to_string());
-    }
-    if path_lower.contains("deprecated") || path_lower.contains("backup") {
-        score -= 0.25;
-        reasons.push("Deprecated/backup location (-0.25)".to_string());
-    }
-
-    // Filename patterns
-    let filename = Path::new(doc_path)
-        .file_name()
-        .and_then(|s| s.to_str())
-        .unwrap_or("")
-        .to_lowercase();
-
-    if filename.contains("readme") || filename.contains("index") {
-        score += 0.1;
-        reasons.push("README/INDEX file (+0.1)".to_string());
-    }
-    if filename.contains("guide") || filename.contains("runbook") || filename.contains("plan") {
-        score += 0.1;
-        reasons.push("Guide/runbook/plan document (+0.1)".to_string());
-    }
-
-    // Clamp to [0.0, 1.0]
-    let final_score = score.clamp(0.0, 1.0);
-
-    if reasons.is_empty() {
-        reasons.push("Baseline score (0.5)".to_string());
-    }
-
-    (final_score, reasons)
-}
-
-/// Show canonicality scores for all documents
-fn cmd_canonicality(
-    index_dir: &Path,
-    json: bool,
-    threshold: f64,
-) -> Result<(), Box<dyn std::error::Error>> {
-    // Load the forward index
-    let forward_index = load_forward_index(index_dir)?;
-
-    if !json {
-        println!("{}", "Computing canonicality scores...".cyan().bold());
-        println!();
-    }
-
-    let mut scored_files = Vec::new();
-
-    for (file_path, entry) in &forward_index.files {
-        let (score, reasons) = score_canonicality_with_reasons(file_path, entry);
-
-        if score >= threshold {
-            scored_files.push(CanonicalityScore {
-                file: file_path.clone(),
-                score,
-                reasons,
-            });
-        }
-    }
-
-    // Sort by score descending
-    scored_files.sort_by(|a, b| {
-        b.score
-            .partial_cmp(&a.score)
-            .unwrap_or(std::cmp::Ordering::Equal)
-    });
-
-    let result = CanonicalityResult {
-        total_files: scored_files.len(),
-        files: scored_files.clone(),
-    };
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-    } else {
-        println!("{}", "Canonicality Scores".cyan().bold());
-        println!("{}", "=".repeat(60));
-        println!();
-        println!(
-            "Total files: {} (threshold: {})",
-            scored_files.len(),
-            threshold
-        );
-        println!();
-
-        // Group by score ranges
-        let high_canon: Vec<_> = scored_files.iter().filter(|s| s.score >= 0.7).collect();
-        let medium_canon: Vec<_> = scored_files
-            .iter()
-            .filter(|s| s.score >= 0.5 && s.score < 0.7)
-            .collect();
-        let low_canon: Vec<_> = scored_files.iter().filter(|s| s.score < 0.5).collect();
-
-        println!(
-            "{} High canonicality (≥0.7): {} files",
-            "📚".green(),
-            high_canon.len()
-        );
-        for file in high_canon.iter().take(10) {
-            println!("  [{:.2}] {}", file.score, file.file.white().bold());
-            for reason in &file.reasons {
-                println!("         - {reason}");
-            }
-        }
-        if high_canon.len() > 10 {
-            println!("  ... and {} more", high_canon.len() - 10);
-        }
-        println!();
-
-        println!(
-            "{} Medium canonicality (0.5-0.7): {} files",
-            "📄".yellow(),
-            medium_canon.len()
-        );
-        for file in medium_canon.iter().take(5) {
-            println!("  [{:.2}] {}", file.score, file.file);
-        }
-        if medium_canon.len() > 5 {
-            println!("  ... and {} more", medium_canon.len() - 5);
-        }
-        println!();
-
-        println!(
-            "{} Low canonicality (<0.5): {} files",
-            "📋".red(),
-            low_canon.len()
-        );
-        for file in low_canon.iter().take(5) {
-            println!("  [{:.2}] {}", file.score, file.file);
-            for reason in &file.reasons {
-                println!("         - {reason}");
-            }
-        }
-        if low_canon.len() > 5 {
-            println!("  ... and {} more", low_canon.len() - 5);
-        }
-        println!();
-
-        println!("{}", "What does this mean?".yellow().bold());
-        println!("  - High scores: Authoritative, well-placed documents");
-        println!("  - Medium scores: Standard documentation");
-        println!("  - Low scores: Scratch work, archived, or deprecated content");
-        println!();
-        println!("{}", "For decision support:".cyan());
-        println!("  - Trust high-canon docs when resolving conflicts");
-        println!("  - Review low-canon docs for potential archival");
-        println!("  - Use threshold flag to filter: --threshold 0.6");
-    }
-
-    Ok(())
-}
-
-fn cmd_suggest_consolidation(
-    index_dir: &Path,
-    threshold: f64,
-    json: bool,
-) -> Result<(), Box<dyn std::error::Error>> {
-    let forward_index = load_forward_index(index_dir)?;
-
-    let pairs = compute_duplicate_pairs(&forward_index, threshold);
-    if pairs.is_empty() {
-        println!(
-            "{} No consolidation candidates found above threshold {}.",
-            "Info:".yellow(),
-            threshold
-        );
-        return Ok(());
-    }
-
-    let result = build_consolidation_groups(&forward_index, &pairs);
-
-    if json {
-        println!("{}", serde_json::to_string_pretty(&result)?);
-        return Ok(());
-    }
-
-    if result.groups.is_empty() {
-        println!(
-            "{} Duplicate pairs found but no multi-file groups to consolidate.",
-            "Info:".yellow()
-        );
-        return Ok(());
-    }
-
-    println!("{}", "Consolidation Suggestions".cyan().bold());
-    println!("{}", "=".repeat(60));
-    println!(
-        "Total groups: {} (threshold: {:.2})",
-        result.total_groups, threshold
-    );
-    println!();
-
-    for group in &result.groups {
-        println!("{}", group.canonical.white().bold());
-        println!(
-            "  Canonical score: {:.2}, Avg similarity: {:.2}",
-            group.canonical_score, group.avg_similarity
-        );
-        println!("  Merge into canonical:");
-        for m in &group.merge_into {
-            println!("    - {m}");
-        }
-        println!("  Note: {}", group.note);
-        println!();
-    }
-
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_jaccard_similarity() {
-        let set1: HashSet<String> = ["foo", "bar", "baz"]
-            .iter()
-            .map(|s| (*s).to_string())
-            .collect();
-        let set2: HashSet<String> = ["bar", "baz", "qux"]
-            .iter()
-            .map(|s| (*s).to_string())
-            .collect();
-
-        let sim = jaccard_similarity(&set1, &set2);
-        // Intersection: {bar, baz} = 2
-        // Union: {foo, bar, baz, qux} = 4
-        // Jaccard: 2/4 = 0.5
-        assert_eq!(sim, 0.5);
-
-        // Empty sets
-        let empty1: HashSet<String> = HashSet::new();
-        let empty2: HashSet<String> = HashSet::new();
-        assert_eq!(jaccard_similarity(&empty1, &empty2), 0.0);
-
-        // Identical sets
-        assert_eq!(jaccard_similarity(&set1, &set1), 1.0);
-    }
-
-    #[test]
-    fn test_simhash_similarity() {
-        // Identical hashes
-        assert_eq!(simhash_similarity(0x123456, 0x123456), 1.0);
-
-        // Completely different (all bits flipped)
-        let hash1 = 0x0000000000000000u64;
-        let hash2 = 0xFFFFFFFFFFFFFFFFu64;
-        assert_eq!(simhash_similarity(hash1, hash2), 0.0);
-
-        // 1 bit different out of 64
-        let hash_a = 0b0000000000000000u64;
-        let hash_b = 0b0000000000000001u64;
-        let sim = simhash_similarity(hash_a, hash_b);
-        assert!((sim - (63.0 / 64.0)).abs() < 0.01);
-    }
-
-    #[test]
-    fn test_hamming_distance() {
-        assert_eq!(hamming_distance(0b1010, 0b1010), 0);
-        assert_eq!(hamming_distance(0b1010, 0b0101), 4);
-        assert_eq!(hamming_distance(0b1111, 0b0000), 4);
-        assert_eq!(hamming_distance(0b1100, 0b1010), 2);
-    }
-
-    #[test]
-    fn test_compute_simhash_stability() {
-        let text1 = "The quick brown fox jumps over the lazy dog";
-        let text2 = "The quick brown fox jumps over the lazy dog";
-
-        let hash1 = compute_simhash(text1);
-        let hash2 = compute_simhash(text2);
-
-        // Identical text should produce identical hashes
-        assert_eq!(hash1, hash2);
-    }
-
-    #[test]
-    fn test_compute_simhash_similarity() {
-        let text1 = "machine learning algorithms";
-        let text2 = "machine learning systems";
-        let text3 = "completely different topic about cooking";
-
-        let hash1 = compute_simhash(text1);
-        let hash2 = compute_simhash(text2);
-        let hash3 = compute_simhash(text3);
-
-        // Similar texts should have high similarity
-        let sim_similar = simhash_similarity(hash1, hash2);
-        // Different texts should have lower similarity
-        let sim_different = simhash_similarity(hash1, hash3);
-
-        assert!(sim_similar > sim_different);
-        assert!(sim_similar > 0.5); // Similar texts should be > 50% similar
-    }
-
-    #[test]
-    fn test_minhash_basic() {
-        let keywords1 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
-        let keywords2 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
-
-        let mh1 = compute_minhash(&keywords1, 128);
-        let mh2 = compute_minhash(&keywords2, 128);
-
-        // Same keywords should produce same MinHash
-        assert_eq!(mh1, mh2);
-        assert_eq!(mh1.len(), 128);
-
-        // Similarity should be 1.0
-        assert_eq!(minhash_similarity(&mh1, &mh2), 1.0);
-    }
-
-    #[test]
-    fn test_minhash_similarity_estimation() {
-        let keywords1 = vec!["a".to_string(), "b".to_string(), "c".to_string()];
-        let keywords2 = vec!["b".to_string(), "c".to_string(), "d".to_string()];
-        let keywords3 = vec!["x".to_string(), "y".to_string(), "z".to_string()];
-
-        let mh1 = compute_minhash(&keywords1, 128);
-        let mh2 = compute_minhash(&keywords2, 128);
-        let mh3 = compute_minhash(&keywords3, 128);
-
-        // keywords1 and keywords2 share 2 out of 4 unique items = 0.5 Jaccard
-        let sim_similar = minhash_similarity(&mh1, &mh2);
-        // keywords1 and keywords3 share 0 items
-        let sim_different = minhash_similarity(&mh1, &mh3);
-
-        // Similar sets should have higher MinHash similarity
-        assert!(sim_similar > sim_different);
-        // MinHash should approximate Jaccard (within reasonable error)
-        assert!(sim_similar > 0.3 && sim_similar < 0.7); // Approximately 0.5
-    }
-
-    #[test]
-    fn test_lsh_buckets() {
-        let mut files = HashMap::new();
-
-        // Create 3 files with MinHash signatures
-        let keywords1 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
-        let keywords2 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
-        let keywords3 = vec!["completely".to_string(), "different".to_string()];
-
-        files.insert(
-            "file1.md".to_string(),
-            FileEntry {
-                path: "file1.md".to_string(),
-                size_bytes: 100,
-                line_count: 10,
-                headings: vec![],
-                keywords: keywords1.clone(),
-                body_keywords: vec![],
-                links: vec![],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: compute_minhash(&keywords1, 128),
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-
-        files.insert(
-            "file2.md".to_string(),
-            FileEntry {
-                path: "file2.md".to_string(),
-                size_bytes: 100,
-                line_count: 10,
-                headings: vec![],
-                keywords: keywords2.clone(),
-                body_keywords: vec![],
-                links: vec![],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: compute_minhash(&keywords2, 128),
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-
-        files.insert(
-            "file3.md".to_string(),
-            FileEntry {
-                path: "file3.md".to_string(),
-                size_bytes: 100,
-                line_count: 10,
-                headings: vec![],
-                keywords: keywords3.clone(),
-                body_keywords: vec![],
-                links: vec![],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: compute_minhash(&keywords3, 128),
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-
-        let buckets = lsh_buckets(&files, 16);
-
-        // Should create some buckets
-        assert!(!buckets.is_empty());
-
-        // file1 and file2 should likely be in the same bucket (identical MinHash)
-        // Check if they appear together in any bucket
-        let mut file1_file2_together = false;
-        for paths in buckets.values() {
-            if paths.contains(&"file1.md".to_string()) && paths.contains(&"file2.md".to_string()) {
-                file1_file2_together = true;
-                break;
-            }
-        }
-        assert!(
-            file1_file2_together,
-            "Identical files should be in same LSH bucket"
-        );
-    }
-
-    #[test]
-    fn test_bm25_score_basic() {
-        let mut term_freq = HashMap::new();
-        term_freq.insert("test".to_string(), 5);
-        term_freq.insert("word".to_string(), 2);
-
-        let doc = FileEntry {
-            path: "test.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec![],
-            body_keywords: vec![],
-            links: vec![],
-            simhash: 0,
-            term_frequencies: term_freq,
-            doc_length: 100,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        let mut idf_map = HashMap::new();
-        idf_map.insert("test".to_string(), 2.5);
-        idf_map.insert("word".to_string(), 1.8);
-
-        let query = vec!["test".to_string()];
-        let score = bm25_score(&query, &doc, 100.0, &idf_map);
-
-        // Score should be > 0 for matching term
-        assert!(score > 0.0);
-
-        // Query with no matching terms should score 0
-        let empty_query = vec!["nonexistent".to_string()];
-        let zero_score = bm25_score(&empty_query, &doc, 100.0, &idf_map);
-        assert_eq!(zero_score, 0.0);
-    }
-
-    #[test]
-    fn test_bm25_score_ordering() {
-        // Document with high term frequency
-        let mut tf_high = HashMap::new();
-        tf_high.insert("test".to_string(), 10);
-
-        let doc_high_tf = FileEntry {
-            path: "high.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec![],
-            body_keywords: vec![],
-            links: vec![],
-            simhash: 0,
-            term_frequencies: tf_high,
-            doc_length: 50,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        // Document with low term frequency
-        let mut tf_low = HashMap::new();
-        tf_low.insert("test".to_string(), 1);
-
-        let doc_low_tf = FileEntry {
-            path: "low.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec![],
-            body_keywords: vec![],
-            links: vec![],
-            simhash: 0,
-            term_frequencies: tf_low,
-            doc_length: 50,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        let mut idf_map = HashMap::new();
-        idf_map.insert("test".to_string(), 2.0);
-
-        let query = vec!["test".to_string()];
-        let score_high = bm25_score(&query, &doc_high_tf, 50.0, &idf_map);
-        let score_low = bm25_score(&query, &doc_low_tf, 50.0, &idf_map);
-
-        // Higher term frequency should yield higher BM25 score
-        assert!(score_high > score_low);
-    }
-
-    #[test]
-    fn test_policy_rule_matching_and_violations() {
-        // Build a simple policy with one rule
-        let rule = PolicyRule {
-            pattern: "agents/plans/*.md".to_string(),
-            must_contain: vec!["## Objective".to_string()],
-            must_not_contain: vec![],
-            name: Some("plans-must-have-objective".to_string()),
-            severity: Some("error".to_string()),
-            ..Default::default()
-        };
-
-        let policy = PolicyConfig { rules: vec![rule] };
-
-        // Compile glob and check that it matches only the agents/plans file
-        let glob = Glob::new(&policy.rules[0].pattern).unwrap();
-        let matcher = glob.compile_matcher();
-        assert!(matcher.is_match("agents/plans/plan.md"));
-        assert!(!matcher.is_match("docs/architecture/auth.md"));
-
-        // Simulate a violation: empty content should trigger missing "## Objective"
-        let rule_ref = &policy.rules[0];
-        let file_path = "agents/plans/plan.md";
-        let content = String::new();
-        let violations = collect_policy_violations_for_content(rule_ref, file_path, &content);
-
-        assert_eq!(violations.len(), 1);
-        let v = &violations[0];
-        assert_eq!(v.file, "agents/plans/plan.md");
-        assert_eq!(v.rule, "plans-must-have-objective");
-        assert_eq!(v.severity, "error");
-        assert_eq!(v.kind, "policy_violation");
-    }
-
-    #[test]
-    fn test_policy_min_max_length_violations() {
-        // Require 10–20 lines
-        let rule = PolicyRule {
-            pattern: "docs/*.md".to_string(),
-            min_length: Some(10),
-            max_length: Some(20),
-            name: Some("length-bounds".to_string()),
-            severity: Some("error".to_string()),
-            ..Default::default()
-        };
-
-        // Too short: 3 lines
-        let short_content = "line1\nline2\nline3\n";
-        let short_violations =
-            collect_policy_violations_for_content(&rule, "docs/short.md", short_content);
-        assert!(
-            short_violations
-                .iter()
-                .any(|v| v.message.contains("Document too short")),
-            "Expected a 'Document too short' violation"
-        );
-
-        // Too long: 25 lines
-        let long_content: String = (0..25).map(|i| format!("line{i}\n")).collect();
-        let long_violations =
-            collect_policy_violations_for_content(&rule, "docs/long.md", &long_content);
-        assert!(
-            long_violations
-                .iter()
-                .any(|v| v.message.contains("Document too long")),
-            "Expected a 'Document too long' violation"
-        );
-    }
-
-    #[test]
-    fn test_policy_required_and_forbidden_headings() {
-        let rule = PolicyRule {
-            pattern: "docs/*.md".to_string(),
-            required_headings: vec!["Objective".to_string()],
-            forbidden_headings: vec!["Deprecated".to_string()],
-            name: Some("heading-rules".to_string()),
-            severity: Some("error".to_string()),
-            ..Default::default()
-        };
-
-        let content = r"
-# Title
-
-## Objective
-
-Some content here.
-
-## Deprecated
-";
-
-        let violations = collect_policy_violations_for_content(&rule, "docs/example.md", content);
-
-        // Should not flag missing Objective (it exists)
-        assert!(
-            !violations
-                .iter()
-                .any(|v| v.message.contains("Missing required heading")),
-            "Did not expect a missing required heading violation"
-        );
-
-        // Should flag forbidden Deprecated heading
-        assert!(
-            violations
-                .iter()
-                .any(|v| v.message.contains("Forbidden heading present")),
-            "Expected a forbidden heading violation"
-        );
-    }
-
-    #[test]
-    fn test_policy_section_length_violation() {
-        let rule = PolicyRule {
-            pattern: "docs/*.md".to_string(),
-            max_section_length: Some(3),
-            section_heading_regex: Some("^Async".to_string()),
-            name: Some("status-section-length".to_string()),
-            severity: Some("warn".to_string()),
-            ..Default::default()
-        };
-
-        let content = r"
-# Status
-
-## Async Migration
-line1
-line2
-line3
-line4
-
-## Other
-ok
-";
-
-        let violations =
-            collect_policy_violations_for_content(&rule, "docs/IMPLEMENTATION_STATUS.md", content);
-
-        assert!(
-            violations
-                .iter()
-                .any(|v| v.message.contains("Section too long")),
-            "Expected a section-length violation"
-        );
-    }
-
-    #[test]
-    fn test_policy_required_link() {
-        let rule = PolicyRule {
-            pattern: "docs/*.md".to_string(),
-            must_link_to: vec!["docs/ASYNC_MIGRATION_COMPLETE_SUMMARY.md".to_string()],
-            name: Some("status-requires-summary-link".to_string()),
-            severity: Some("error".to_string()),
-            ..Default::default()
-        };
-
-        let missing_link = r"
-# Status
-No links here.
-";
-        let violations = collect_policy_violations_for_content(
-            &rule,
-            "docs/IMPLEMENTATION_STATUS.md",
-            missing_link,
-        );
-        assert!(
-            violations
-                .iter()
-                .any(|v| v.message.contains("Missing required link")),
-            "Expected a missing required link violation"
-        );
-
-        let with_link = r"
-# Status
-See [summary](ASYNC_MIGRATION_COMPLETE_SUMMARY.md).
-";
-        let ok_violations = collect_policy_violations_for_content(
-            &rule,
-            "docs/IMPLEMENTATION_STATUS.md",
-            with_link,
-        );
-        assert!(
-            ok_violations.is_empty(),
-            "Did not expect violations when required link is present"
-        );
-    }
-
-    #[test]
-    fn test_suggest_new_link_target_same_dir() {
-        let mut available = HashSet::new();
-        available.insert("docs/guide/auth.md".to_string());
-        available.insert("docs/guide/other.md".to_string());
-
-        // Source and target are in the same parent; filename matches exactly one file
-        let suggested = suggest_new_link_target("docs/guide/README.md", "auth.md", &available);
-        // Expect a simple relative path suggestion
-        assert_eq!(suggested.as_deref(), Some("auth.md"));
-    }
-
-    #[test]
-    fn test_apply_reference_mapping_to_content() {
-        let content = "See [auth](docs/old/auth.md) for details.";
-        let updated = apply_reference_mapping_to_content(
-            content,
-            "docs/old/auth.md",
-            "docs/architecture/AUTH.md",
-        );
-        assert_eq!(
-            updated,
-            "See [auth](docs/architecture/AUTH.md) for details."
-        );
-    }
-
-    #[test]
-    fn test_build_consolidation_groups_basic() {
-        // Minimal forward index with two files; we create a single duplicate pair
-        let mut files = HashMap::new();
-
-        files.insert(
-            "docs/a.md".to_string(),
-            FileEntry {
-                path: "docs/a.md".to_string(),
-                size_bytes: 0,
-                line_count: 1,
-                headings: vec![],
-                keywords: vec!["foo".to_string()],
-                body_keywords: vec![],
-                links: vec![],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: vec![],
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-        files.insert(
-            "docs/b.md".to_string(),
-            FileEntry {
-                path: "docs/b.md".to_string(),
-                size_bytes: 0,
-                line_count: 1,
-                headings: vec![],
-                keywords: vec!["foo".to_string()],
-                body_keywords: vec![],
-                links: vec![],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: vec![],
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-
-        let forward_index = ForwardIndex {
-            files,
-            indexed_at: chrono_now(),
-            version: 3,
-            source_root: String::new(),
-            avg_doc_length: 0.0,
-            idf_map: HashMap::new(),
-        };
-
-        let pairs = vec![("docs/a.md".to_string(), "docs/b.md".to_string(), 0.9_f64)];
-
-        let result = build_consolidation_groups(&forward_index, &pairs);
-        assert_eq!(result.total_groups, 1);
-        let group = &result.groups[0];
-        assert!(group.canonical == "docs/a.md" || group.canonical == "docs/b.md");
-        assert_eq!(group.merge_into.len(), 1);
-    }
-
-    #[test]
-    fn test_compute_inbound_link_counts() {
-        let mut files = HashMap::new();
-
-        files.insert(
-            "docs/a.md".to_string(),
-            FileEntry {
-                path: "docs/a.md".to_string(),
-                size_bytes: 0,
-                line_count: 1,
-                headings: vec![],
-                keywords: vec![],
-                body_keywords: vec![],
-                links: vec![Link {
-                    line: 1,
-                    text: "b".to_string(),
-                    target: "b.md".to_string(),
-                }],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: vec![],
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-        files.insert(
-            "docs/b.md".to_string(),
-            FileEntry {
-                path: "docs/b.md".to_string(),
-                size_bytes: 0,
-                line_count: 1,
-                headings: vec![],
-                keywords: vec![],
-                body_keywords: vec![],
-                links: vec![],
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: vec![],
-                section_fingerprints: vec![],
-                adr_references: vec![],
-            },
-        );
-
-        let forward_index = ForwardIndex {
-            files,
-            indexed_at: "0".to_string(),
-            version: 3,
-            source_root: String::new(),
-            avg_doc_length: 0.0,
-            idf_map: HashMap::new(),
-        };
-
-        let counts = compute_inbound_link_counts(&forward_index);
-        // a.md links to b.md, so b.md should have 1 inbound link
-        assert_eq!(counts.get("docs/b.md"), Some(&1));
-    }
-
-    #[test]
-    fn test_index_sections() {
-        let content = "# Introduction\nThis is the intro.\n\n## Details\nMore details here.\n\n## Summary\nFinal thoughts.";
-        let headings = vec![
-            Heading {
-                line: 1,
-                level: 1,
-                text: "Introduction".to_string(),
-            },
-            Heading {
-                line: 4,
-                level: 2,
-                text: "Details".to_string(),
-            },
-            Heading {
-                line: 7,
-                level: 2,
-                text: "Summary".to_string(),
-            },
-        ];
-
-        let sections = index_sections(content, &headings);
-
-        assert_eq!(sections.len(), 3);
-        assert_eq!(sections[0].heading, "Introduction");
-        assert_eq!(sections[0].level, 1);
-        assert_eq!(sections[0].line_start, 1);
-
-        assert_eq!(sections[1].heading, "Details");
-        assert_eq!(sections[1].level, 2);
-        assert_eq!(sections[1].line_start, 4);
-
-        assert_eq!(sections[2].heading, "Summary");
-        assert_eq!(sections[2].level, 2);
-    }
-
-    #[test]
-    fn test_index_sections_similar_content() {
-        let content1 = "## Testing\nRun the tests with:\n```\npytest\n```";
-        let content2 = "## Testing\nRun the tests with:\n```\npytest\n```";
-        let content3 = "## Testing\nCompletely different content about testing";
-
-        let headings1 = vec![Heading {
-            line: 1,
-            level: 2,
-            text: "Testing".to_string(),
-        }];
-        let headings2 = vec![Heading {
-            line: 1,
-            level: 2,
-            text: "Testing".to_string(),
-        }];
-        let headings3 = vec![Heading {
-            line: 1,
-            level: 2,
-            text: "Testing".to_string(),
-        }];
-
-        let sections1 = index_sections(content1, &headings1);
-        let sections2 = index_sections(content2, &headings2);
-        let sections3 = index_sections(content3, &headings3);
-
-        // Identical content should produce identical SimHash
-        assert_eq!(sections1[0].simhash, sections2[0].simhash);
-
-        // Different content should produce different SimHash
-        assert_ne!(sections1[0].simhash, sections3[0].simhash);
-
-        // Identical sections should have 100% similarity
-        let sim_identical = simhash_similarity(sections1[0].simhash, sections2[0].simhash);
-        assert_eq!(sim_identical, 1.0);
-
-        // Different sections should have < 100% similarity
-        let sim_different = simhash_similarity(sections1[0].simhash, sections3[0].simhash);
-        assert!(sim_different < 1.0);
-    }
-
-    #[test]
-    fn test_compute_document_metrics_captures_structure_signals() {
-        let content = r"---
-title: Demo
-owner: Docs
----
-
-# Overview
-Intro paragraph.
-
-## Part 1
-- first
-- second
-
-## Changelog
-- Added feature
-- Fixed bug
-
-## Completed Work
-```rust
-fn main() {}
-```
-";
-        let lines: Vec<&str> = content.lines().collect();
-        let headings = vec![
-            Heading {
-                line: 6,
-                level: 1,
-                text: "Overview".to_string(),
-            },
-            Heading {
-                line: 9,
-                level: 2,
-                text: "Part 1".to_string(),
-            },
-            Heading {
-                line: 13,
-                level: 2,
-                text: "Changelog".to_string(),
-            },
-            Heading {
-                line: 17,
-                level: 2,
-                text: "Completed Work".to_string(),
-            },
-        ];
-        let links = vec![Link {
-            line: 7,
-            text: "readme".to_string(),
-            target: "README.md".to_string(),
-        }];
-
-        let metrics = compute_document_metrics("docs/demo.md", content, &lines, &headings, &links);
-
-        assert_eq!(metrics.path, "docs/demo.md");
-        assert_eq!(metrics.frontmatter_key_count, 2);
-        assert_eq!(metrics.heading_count, 4);
-        assert_eq!(metrics.section_count, 4);
-        assert_eq!(metrics.h1_count, 1);
-        assert_eq!(metrics.h2_count, 3);
-        assert_eq!(metrics.part_heading_count, 1);
-        assert_eq!(metrics.changelog_heading_count, 1);
-        assert_eq!(metrics.completion_heading_count, 1);
-        assert_eq!(metrics.changelog_entry_count, 2);
-        assert_eq!(metrics.list_item_count, 4);
-        assert_eq!(metrics.code_block_count, 1);
-        assert!(metrics.longest_section_lines >= 3);
-        assert!(metrics
-            .sections
-            .iter()
-            .any(|section| section.looks_like_part));
-        assert!(metrics
-            .sections
-            .iter()
-            .any(|section| section.looks_like_changelog && section.list_item_count == 2));
-    }
-
-    #[test]
-    fn test_cmd_build_writes_document_metrics_index() {
-        let unique = std::time::SystemTime::now()
-            .duration_since(std::time::UNIX_EPOCH)
-            .unwrap()
-            .as_nanos();
-        let root = std::env::temp_dir().join(format!("yore-build-metrics-{unique}"));
-        let docs_dir = root.join("docs");
-        let index_dir = root.join(".yore");
-
-        fs::create_dir_all(&docs_dir).unwrap();
-        fs::write(
-            docs_dir.join("guide.md"),
-            "# Guide\n\n## Part 1\n- step one\n- step two\n",
-        )
-        .unwrap();
-
-        cmd_build(&docs_dir, &index_dir, "md", &[], true, None, false, false).unwrap();
-
-        let metrics_path = index_dir.join("document_metrics.json");
-        assert!(metrics_path.exists());
-
-        let metrics_index: DocumentMetricsIndex =
-            serde_json::from_str(&fs::read_to_string(metrics_path).unwrap()).unwrap();
-        assert_eq!(metrics_index.version, 1);
-        assert_eq!(metrics_index.files.len(), 1);
-
-        let metrics = metrics_index.files.values().next().unwrap();
-        assert_eq!(metrics.heading_count, 2);
-        assert_eq!(metrics.part_heading_count, 1);
-        assert_eq!(metrics.list_item_count, 2);
-        assert_eq!(metrics.section_count, 2);
-
-        fs::remove_dir_all(root).unwrap();
-    }
-
-    #[test]
-    fn test_extract_keywords() {
-        let text = "This is a TEST document with some KEYWORDS";
-        let keywords = extract_keywords(text);
-
-        // Should lowercase (but not stem - extract_keywords doesn't stem)
-        assert!(keywords.contains(&"test".to_string()));
-        assert!(keywords.contains(&"document".to_string()));
-        assert!(keywords.contains(&"keywords".to_string())); // Note: not stemmed
-
-        // Should not contain stop words
-        assert!(!keywords.contains(&"this".to_string()));
-        assert!(!keywords.contains(&"is".to_string()));
-        // "a" and "with" are too short or stop words
-        assert!(!keywords.contains(&"with".to_string()));
-    }
-
-    #[test]
-    fn test_stem_word() {
-        // Test actual stemming behavior
-        assert_eq!(stem_word("running"), "runn"); // Simple stemmer removes "ing"
-        assert_eq!(stem_word("tests"), "test"); // Removes "s"
-        assert_eq!(stem_word("testing"), "test"); // Removes "ing"
-        assert_eq!(stem_word("keywords"), "keyword"); // Removes "s"
-
-        // Short words should not be stemmed
-        assert_eq!(stem_word("go"), "go");
-        assert_eq!(stem_word("it"), "it");
-    }
-
-    #[test]
-    fn test_get_link_context_basic() {
-        let path = "test_get_link_context_basic.md";
-        fs::write(path, "first line\nsecond line with a link\nthird line\n").unwrap();
-
-        let mut cache: HashMap<String, Vec<String>> = HashMap::new();
-        let ctx = get_link_context(&mut cache, path, 2).unwrap();
-        assert_eq!(ctx.as_deref(), Some("second line with a link"));
-
-        // Out-of-range line number should yield None
-        let ctx_out = get_link_context(&mut cache, path, 10).unwrap();
-        assert!(ctx_out.is_none());
-
-        fs::remove_file(path).unwrap();
-    }
-
-    #[test]
-    fn test_get_link_context_truncates_long_lines() {
-        let path = "test_get_link_context_truncate.md";
-        let long_line = "a".repeat(200);
-        fs::write(path, format!("{long_line}\n")).unwrap();
-
-        let mut cache: HashMap<String, Vec<String>> = HashMap::new();
-        let ctx = get_link_context(&mut cache, path, 1)
-            .unwrap()
-            .expect("expected context");
-
-        assert!(ctx.len() <= 160);
-        assert!(ctx.ends_with("..."));
-
-        fs::remove_file(path).unwrap();
-    }
-
-    #[test]
-    fn test_get_top_doc_terms_basic() {
-        // Setup: doc with term frequencies and IDF map
-        // Note: term_frequencies and idf_map use STEMMED keys
-        // "docker" -> "dock", "nginx" -> "nginx", "helm" -> "helm"
-        let mut term_frequencies = HashMap::new();
-        term_frequencies.insert("dock".to_string(), 10); // stem of "docker"
-        term_frequencies.insert("nginx".to_string(), 5);
-        term_frequencies.insert("helm".to_string(), 3);
-
-        let entry = FileEntry {
-            path: "test.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec![],
-            body_keywords: vec![
-                "docker".to_string(),
-                "nginx".to_string(),
-                "helm".to_string(),
-                "container".to_string(), // not in tf, will be excluded
-            ],
-            links: vec![],
-            simhash: 0,
-            term_frequencies,
-            doc_length: 100,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        let mut idf_map = HashMap::new();
-        idf_map.insert("dock".to_string(), 2.0); // stemmed
-        idf_map.insert("nginx".to_string(), 1.5);
-        idf_map.insert("helm".to_string(), 3.0);
-
-        // Test: get top 2 terms, excluding nothing
-        let terms = get_top_doc_terms(&entry, &idf_map, &[], 2);
-
-        // docker: 10 * 2.0 = 20
-        // helm: 3 * 3.0 = 9
-        // nginx: 5 * 1.5 = 7.5
-        assert_eq!(terms.len(), 2);
-        assert_eq!(terms[0], "docker");
-        assert_eq!(terms[1], "helm");
-    }
-
-    #[test]
-    fn test_get_top_doc_terms_excludes_query_terms() {
-        // Note: term_frequencies and idf_map use STEMMED keys
-        let mut term_frequencies = HashMap::new();
-        term_frequencies.insert("kubernete".to_string(), 10); // stem of "kubernetes"
-        term_frequencies.insert("dock".to_string(), 5); // stem of "docker"
-        term_frequencies.insert("nginx".to_string(), 3);
-
-        let entry = FileEntry {
-            path: "test.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec![],
-            body_keywords: vec![
-                "kubernetes".to_string(),
-                "docker".to_string(),
-                "nginx".to_string(),
-            ],
-            links: vec![],
-            simhash: 0,
-            term_frequencies,
-            doc_length: 100,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        let mut idf_map = HashMap::new();
-        idf_map.insert("kubernete".to_string(), 2.0); // stemmed
-        idf_map.insert("dock".to_string(), 1.5); // stemmed
-        idf_map.insert("nginx".to_string(), 3.0);
-
-        // Exclude "kubernetes" from results (different case, should still match after stemming)
-        let exclude = vec!["Kubernetes".to_string()];
-        let terms = get_top_doc_terms(&entry, &idf_map, &exclude, 3);
-
-        assert_eq!(terms.len(), 2);
-        assert!(!terms.contains(&"kubernetes".to_string()));
-        assert_eq!(terms[0], "nginx"); // 3 * 3.0 = 9
-        assert_eq!(terms[1], "docker"); // 5 * 1.5 = 7.5
-    }
-
-    #[test]
-    fn test_get_top_doc_terms_deduplicates_stems() {
-        let mut term_frequencies = HashMap::new();
-        term_frequencies.insert("run".to_string(), 10); // stem of running, runs, run
-
-        let entry = FileEntry {
-            path: "test.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec![],
-            body_keywords: vec!["running".to_string(), "runs".to_string(), "run".to_string()],
-            links: vec![],
-            simhash: 0,
-            term_frequencies,
-            doc_length: 100,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        let mut idf_map = HashMap::new();
-        idf_map.insert("run".to_string(), 1.0);
-
-        let terms = get_top_doc_terms(&entry, &idf_map, &[], 5);
-
-        // Should only return one term (first occurrence), not all three
-        assert_eq!(terms.len(), 1);
-    }
-
-    #[test]
-    fn test_get_top_doc_terms_zero_returns_empty() {
-        let entry = FileEntry {
-            path: "test.md".to_string(),
-            size_bytes: 100,
-            line_count: 10,
-            headings: vec![],
-            keywords: vec!["test".to_string()],
-            body_keywords: vec!["test".to_string()],
-            links: vec![],
-            simhash: 0,
-            term_frequencies: HashMap::new(),
-            doc_length: 100,
-            minhash: vec![],
-            section_fingerprints: vec![],
-            adr_references: vec![],
-        };
-
-        let idf_map = HashMap::new();
-        let terms = get_top_doc_terms(&entry, &idf_map, &[], 0);
-
-        assert!(terms.is_empty());
-    }
-
-    #[test]
-    fn test_find_link_candidates_single_match() {
-        let mut available = HashSet::new();
-        available.insert("docs/guide/auth.md".to_string());
-        available.insert("docs/guide/other.md".to_string());
-
-        // Source and target are in the same parent; filename matches exactly one file
-        let candidates = find_link_candidates("docs/guide/README.md", "auth.md", &available);
-        assert_eq!(candidates.len(), 1);
-        assert_eq!(candidates[0], "auth.md");
-    }
-
-    #[test]
-    fn test_find_link_candidates_multiple_matches() {
-        let mut available = HashSet::new();
-        available.insert("docs/v1/auth.md".to_string());
-        available.insert("docs/v2/auth.md".to_string());
-        available.insert("docs/archive/auth.md".to_string());
-
-        // Multiple files with same name - should return all
-        let candidates = find_link_candidates("docs/README.md", "auth.md", &available);
-        assert!(candidates.len() >= 2);
-    }
-
-    #[test]
-    fn test_find_link_candidates_no_match() {
-        let mut available = HashSet::new();
-        available.insert("docs/guide/other.md".to_string());
-
-        // No file matches
-        let candidates = find_link_candidates("docs/README.md", "nonexistent.md", &available);
-        assert!(candidates.is_empty());
-    }
-
-    #[test]
-    fn test_link_fix_proposal_serialization() {
-        let proposal = LinkFixProposal {
-            source: "docs/README.md".to_string(),
-            line: 42,
-            broken_target: "../old/auth.md".to_string(),
-            candidates: vec![
-                "../archive/auth.md".to_string(),
-                "../v2/auth.md".to_string(),
-            ],
-            decision: None,
-        };
-
-        let yaml = serde_yaml::to_string(&proposal).unwrap();
-        assert!(yaml.contains("source: docs/README.md"));
-        assert!(yaml.contains("line: 42"));
-        assert!(yaml.contains("broken_target:"));
-        assert!(yaml.contains("candidates:"));
-
-        // Test deserialization
-        let parsed: LinkFixProposal = serde_yaml::from_str(&yaml).unwrap();
-        assert_eq!(parsed.source, "docs/README.md");
-        assert_eq!(parsed.line, 42);
-        assert_eq!(parsed.candidates.len(), 2);
-    }
-
-    #[test]
-    fn test_link_fix_proposal_with_decision() {
-        let yaml = r#"
-source: docs/README.md
-line: 42
-broken_target: "../old/auth.md"
-candidates:
-  - "../archive/auth.md"
-  - "../v2/auth.md"
-decision: 1
-"#;
-        let proposal: LinkFixProposal = serde_yaml::from_str(yaml).unwrap();
-        assert_eq!(proposal.decision, Some(1));
-        assert_eq!(proposal.candidates[1], "../v2/auth.md");
-    }
-
-    #[test]
-    fn test_diff_result_serialization() {
-        let result = DiffResult {
-            file1: "docs/a.md".to_string(),
-            file2: "docs/b.md".to_string(),
-            similarity: DiffSimilarity {
-                combined: 0.75,
-                jaccard: 0.6,
-                simhash: 0.9,
-            },
-            shared_keywords: vec!["auth".to_string(), "login".to_string()],
-            only_in_file1: vec!["oauth".to_string()],
-            only_in_file2: vec!["jwt".to_string()],
-            shared_headings: vec!["Introduction".to_string()],
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"file1\": \"docs/a.md\""));
-        assert!(json.contains("\"combined\": 0.75"));
-        assert!(json.contains("\"shared_keywords\""));
-    }
-
-    #[test]
-    fn test_stats_result_serialization() {
-        let result = StatsResult {
-            total_files: 100,
-            unique_keywords: 500,
-            total_headings: 250,
-            body_keywords: 1000,
-            total_links: 300,
-            index_version: 3,
-            indexed_at: "2024-01-01T00:00:00Z".to_string(),
-            top_keywords: vec![
-                KeywordCount {
-                    keyword: "authentication".to_string(),
-                    count: 50,
-                },
-                KeywordCount {
-                    keyword: "kubernetes".to_string(),
-                    count: 40,
-                },
-            ],
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"total_files\": 100"));
-        assert!(json.contains("\"top_keywords\""));
-        assert!(json.contains("\"authentication\""));
-    }
-
-    #[test]
-    fn test_mv_result_serialization() {
-        let result = MvResult {
-            from: "docs/old.md".to_string(),
-            to: "docs/new.md".to_string(),
-            moved: true,
-            updated_files: vec!["docs/index.md".to_string(), "docs/guide.md".to_string()],
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"from\": \"docs/old.md\""));
-        assert!(json.contains("\"moved\": true"));
-        assert!(json.contains("\"updated_files\""));
-    }
-
-    #[test]
-    fn test_fix_references_result_serialization() {
-        let result = FixReferencesResult {
-            mapping_file: "mappings.yaml".to_string(),
-            mappings_count: 5,
-            updated_files: vec!["docs/a.md".to_string()],
-            applied: false,
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"mapping_file\": \"mappings.yaml\""));
-        assert!(json.contains("\"mappings_count\": 5"));
-        assert!(json.contains("\"applied\": false"));
-    }
-
-    #[test]
-    fn test_yore_config_basic_parsing() {
-        let toml = r#"
-[index.docs]
-roots = ["docs/"]
-types = ["md"]
-output = ".yore"
-"#;
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-        assert!(config.index.contains_key("docs"));
-        let docs = config.index.get("docs").unwrap();
-        assert_eq!(docs.roots, vec!["docs/"]);
-        assert_eq!(docs.types, vec!["md"]);
-    }
-
-    #[test]
-    fn test_yore_config_link_check_section() {
-        let toml = r#"
-[link-check]
-exclude = ["archive/**", "deprecated/**"]
-
-[[link-check.severity-overrides]]
-pattern = "archive/**"
-severity = "warn"
-
-[[link-check.severity-overrides]]
-pattern = "deprecated/**"
-severity = "info"
-"#;
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-        let link_check = config.link_check.unwrap();
-        assert_eq!(link_check.exclude.len(), 2);
-        assert_eq!(link_check.severity_overrides.len(), 2);
-        assert_eq!(link_check.severity_overrides[0].pattern, "archive/**");
-        assert_eq!(link_check.severity_overrides[0].severity, "warn");
-    }
-
-    #[test]
-    fn test_yore_config_external_repos() {
-        let toml = r#"
-[[external.repos]]
-path = "../runtime/docs"
-prefix = "runtime"
-
-[[external.repos]]
-path = "../api-docs"
-"#;
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-        let external = config.external.unwrap();
-        assert_eq!(external.repos.len(), 2);
-        assert_eq!(external.repos[0].path, "../runtime/docs");
-        assert_eq!(external.repos[0].prefix, Some("runtime".to_string()));
-        assert_eq!(external.repos[1].prefix, None);
-    }
-
-    #[test]
-    fn test_yore_config_policy_section() {
-        let toml = r#"
-[policy]
-rules-file = ".yore-policy.yaml"
-"#;
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-        let policy = config.policy.unwrap();
-        assert_eq!(policy.rules_file, Some(".yore-policy.yaml".to_string()));
-    }
-
-    #[test]
-    fn test_yore_config_full_example() {
-        let toml = r#"
-[index.docs]
-roots = ["docs/"]
-types = ["md", "txt"]
-output = ".yore"
-
-[index.all]
-roots = ["docs/", "specs/"]
-types = ["md"]
-
-[link-check]
-exclude = ["archive/**"]
-
-[[link-check.severity-overrides]]
-pattern = "deprecated/**"
-severity = "info"
-
-[policy]
-rules-file = ".yore-policy.yaml"
-
-[[external.repos]]
-path = "../runtime/docs"
-prefix = "runtime"
-"#;
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-
-        // Index profiles
-        assert_eq!(config.index.len(), 2);
-        assert!(config.index.contains_key("docs"));
-        assert!(config.index.contains_key("all"));
-
-        // Link check
-        let link_check = config.link_check.unwrap();
-        assert_eq!(link_check.exclude.len(), 1);
-        assert_eq!(link_check.severity_overrides.len(), 1);
-
-        // Policy
-        let policy = config.policy.unwrap();
-        assert!(policy.rules_file.is_some());
-
-        // External
-        let external = config.external.unwrap();
-        assert_eq!(external.repos.len(), 1);
-    }
-
-    #[test]
-    fn test_yore_config_empty_is_valid() {
-        let toml = "";
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-        assert!(config.index.is_empty());
-        assert!(config.link_check.is_none());
-        assert!(config.policy.is_none());
-        assert!(config.external.is_none());
-    }
-
-    #[test]
-    fn test_build_result_serialization() {
-        let result = BuildResult {
-            index_path: ".yore".to_string(),
-            files_indexed: 150,
-            total_headings: 450,
-            total_links: 200,
-            unique_keywords: 800,
-            duration_ms: 1234,
-            renames_tracked: None,
-            total_relations: None,
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"index_path\": \".yore\""));
-        // renames_tracked should be absent when None due to skip_serializing_if
-        assert!(!json.contains("renames_tracked"));
-        assert!(json.contains("\"files_indexed\": 150"));
-        assert!(json.contains("\"total_headings\": 450"));
-        assert!(json.contains("\"total_links\": 200"));
-        assert!(json.contains("\"unique_keywords\": 800"));
-        assert!(json.contains("\"duration_ms\": 1234"));
-    }
-
-    #[test]
-    fn test_eval_json_result_serialization() {
-        let result = EvalJsonResult {
-            questions_file: "questions.jsonl".to_string(),
-            total_questions: 10,
-            passed: 8,
-            failed: 2,
-            pass_rate: 80.0,
-            results: vec![
-                EvalQuestionResult {
-                    question: "How do I authenticate?".to_string(),
-                    passed: true,
-                    expected: vec!["auth.md".to_string()],
-                    found: vec!["auth.md".to_string()],
-                    missing: vec![],
-                },
-                EvalQuestionResult {
-                    question: "What is the API endpoint?".to_string(),
-                    passed: false,
-                    expected: vec!["api.md".to_string()],
-                    found: vec![],
-                    missing: vec!["api.md".to_string()],
-                },
-            ],
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"questions_file\": \"questions.jsonl\""));
-        assert!(json.contains("\"total_questions\": 10"));
-        assert!(json.contains("\"passed\": 8"));
-        assert!(json.contains("\"failed\": 2"));
-        assert!(json.contains("\"pass_rate\": 80.0"));
-        assert!(json.contains("\"results\""));
-        assert!(json.contains("How do I authenticate?"));
-        assert!(json.contains("\"missing\": []"));
-    }
-
-    #[test]
-    fn test_rename_history_serialization() {
-        let history = RenameHistory {
-            renames: vec![
-                RenameEntry {
-                    old_path: "docs/old/auth.md".to_string(),
-                    new_path: "docs/v2/auth.md".to_string(),
-                    commit: "abc123".to_string(),
-                },
-                RenameEntry {
-                    old_path: "docs/v2/auth.md".to_string(),
-                    new_path: "docs/current/auth.md".to_string(),
-                    commit: "def456".to_string(),
-                },
-            ],
-            indexed_at: "1234567890".to_string(),
-        };
-
-        let json = serde_json::to_string_pretty(&history).unwrap();
-        assert!(json.contains("\"old_path\": \"docs/old/auth.md\""));
-        assert!(json.contains("\"new_path\": \"docs/v2/auth.md\""));
-        assert!(json.contains("\"commit\": \"abc123\""));
-
-        // Verify roundtrip
-        let parsed: RenameHistory = serde_json::from_str(&json).unwrap();
-        assert_eq!(parsed.renames.len(), 2);
-    }
-
-    #[test]
-    fn test_resolve_renamed_path_single_rename() {
-        let history = RenameHistory {
-            renames: vec![RenameEntry {
-                old_path: "docs/old.md".to_string(),
-                new_path: "docs/new.md".to_string(),
-                commit: "abc123".to_string(),
-            }],
-            indexed_at: "0".to_string(),
-        };
-
-        assert_eq!(
-            resolve_renamed_path("docs/old.md", &history),
-            Some("docs/new.md".to_string())
-        );
-        assert_eq!(resolve_renamed_path("docs/other.md", &history), None);
-    }
-
-    #[test]
-    fn test_resolve_renamed_path_chain() {
-        let history = RenameHistory {
-            renames: vec![
-                RenameEntry {
-                    old_path: "a.md".to_string(),
-                    new_path: "b.md".to_string(),
-                    commit: "1".to_string(),
-                },
-                RenameEntry {
-                    old_path: "b.md".to_string(),
-                    new_path: "c.md".to_string(),
-                    commit: "2".to_string(),
-                },
-                RenameEntry {
-                    old_path: "c.md".to_string(),
-                    new_path: "d.md".to_string(),
-                    commit: "3".to_string(),
-                },
-            ],
-            indexed_at: "0".to_string(),
-        };
-
-        // Should follow the chain from a.md -> b.md -> c.md -> d.md
-        assert_eq!(
-            resolve_renamed_path("a.md", &history),
-            Some("d.md".to_string())
-        );
-        // Starting from middle should also work
-        assert_eq!(
-            resolve_renamed_path("b.md", &history),
-            Some("d.md".to_string())
-        );
-    }
-
-    #[test]
-    fn test_compute_relative_path_same_dir() {
-        let files: HashSet<String> = HashSet::new();
-        assert_eq!(
-            compute_relative_path("docs/foo.md", "docs/bar.md", &files),
-            Some("bar.md".to_string())
-        );
-    }
-
-    #[test]
-    fn test_compute_relative_path_subdirectory() {
-        let files: HashSet<String> = HashSet::new();
-        assert_eq!(
-            compute_relative_path("docs/index.md", "docs/guides/auth.md", &files),
-            Some("guides/auth.md".to_string())
-        );
-    }
-
-    #[test]
-    fn test_compute_relative_path_parent_directory() {
-        let files: HashSet<String> = HashSet::new();
-        let result = compute_relative_path("docs/guides/auth.md", "docs/index.md", &files);
-        assert!(result.is_some());
-        assert!(result.unwrap().starts_with("../"));
-    }
-
-    #[test]
-    fn test_build_result_with_renames() {
-        let result = BuildResult {
-            index_path: ".yore".to_string(),
-            files_indexed: 100,
-            total_headings: 200,
-            total_links: 50,
-            unique_keywords: 500,
-            duration_ms: 1000,
-            renames_tracked: Some(25),
-            total_relations: None,
-        };
-
-        let json = serde_json::to_string_pretty(&result).unwrap();
-        assert!(json.contains("\"renames_tracked\": 25"));
-    }
-
-    #[test]
-    fn test_external_repos_path_extraction() {
-        let toml = r#"
-[[external.repos]]
-path = "../runtime/docs"
-prefix = "runtime"
-
-[[external.repos]]
-path = "../api-docs"
-"#;
-        let config: YoreConfig = toml::from_str(toml).unwrap();
-        let external = config.external.unwrap();
-
-        // Extract paths like the cmd_check_links dispatch does
-        let paths: Vec<String> = external.repos.iter().map(|r| r.path.clone()).collect();
-
-        assert_eq!(paths.len(), 2);
-        assert_eq!(paths[0], "../runtime/docs");
-        assert_eq!(paths[1], "../api-docs");
-    }
-
-    fn make_file_entry(path: &str) -> FileEntry {
-        FileEntry {
-            path: path.to_string(),
-            size_bytes: 0,
-            line_count: 0,
-            headings: Vec::new(),
-            keywords: Vec::new(),
-            body_keywords: Vec::new(),
-            links: Vec::new(),
-            simhash: 0,
-            term_frequencies: HashMap::new(),
-            doc_length: 0,
-            minhash: Vec::new(),
-            section_fingerprints: Vec::new(),
-            adr_references: Vec::new(),
-        }
-    }
-
-    fn make_forward_index(files: Vec<FileEntry>) -> ForwardIndex {
-        let map = files
-            .into_iter()
-            .map(|entry| (entry.path.clone(), entry))
-            .collect();
-        ForwardIndex {
-            files: map,
-            indexed_at: "now".to_string(),
-            version: 1,
-            source_root: String::new(),
-            avg_doc_length: 0.0,
-            idf_map: HashMap::new(),
+            Ok(())
         }
-    }
-
-    #[test]
-    fn test_parse_query_terms_punctuation_hyphen_case() {
-        let terms = parse_query_terms("Hello, async-migration!", true);
-        assert!(terms.contains(&"hello".to_string()));
-        assert!(terms.contains(&"async-migration".to_string()));
-    }
-
-    #[test]
-    fn test_parse_query_terms_stopwords_only() {
-        let terms = parse_query_terms("the and of", true);
-        assert!(terms.is_empty());
-    }
-
-    #[test]
-    fn test_load_vocabulary_stopwords_merges_defaults_and_custom() {
-        let default_words = load_vocabulary_stopwords(None, true).unwrap();
-        assert!(default_words.contains("the"));
-        assert!(default_words.contains("using"));
-
-        let custom_path = "tmp-vocabulary-stopwords.txt";
-        fs::write(custom_path, "custom\nThe\nvocab-test\n").unwrap();
-        let merged_words = load_vocabulary_stopwords(Some(Path::new(custom_path)), true).unwrap();
-
-        fs::remove_file(custom_path).unwrap();
-        assert!(merged_words.contains("custom"));
-        assert!(merged_words.contains("the"));
-        assert!(merged_words.contains("vocab-test"));
-    }
-
-    #[test]
-    fn test_load_vocabulary_stopwords_can_disable_defaults() {
-        let stopwords = load_vocabulary_stopwords(None, false).unwrap();
-        assert!(!stopwords.contains("the"));
-        assert!(!stopwords.contains("and"));
-        assert!(stopwords.is_empty());
-    }
-
-    #[test]
-    fn test_build_auto_common_vocabulary_stopwords() {
-        let candidates = vec![
-            VocabularyCandidateTerm {
-                term: "build".into(),
-                surface: None,
-                term_freq: 12,
-                doc_freq: 2,
-                first_file: "a".into(),
-                first_line: 1,
-                first_heading: "Build".into(),
-            },
-            VocabularyCandidateTerm {
-                term: "yore".into(),
-                surface: None,
-                term_freq: 9,
-                doc_freq: 3,
-                first_file: "a".into(),
-                first_line: 1,
-                first_heading: "Yore".into(),
-            },
-            VocabularyCandidateTerm {
-                term: "indexer".into(),
-                surface: None,
-                term_freq: 8,
-                doc_freq: 5,
-                first_file: "a".into(),
-                first_line: 1,
-                first_heading: "Index".into(),
-            },
-        ];
-
-        let common = build_auto_common_vocabulary_stopwords(&candidates, 2);
-        assert!(common.contains("build"));
-        assert!(common.contains("yore"));
-        assert_eq!(common.len(), 2);
-    }
-
-    #[test]
-    fn test_is_hygienic_vocabulary_term() {
-        assert!(!is_hygienic_vocabulary_term("th"));
-        assert!(is_hygienic_vocabulary_term("yore"));
-        assert!(!is_hygienic_vocabulary_term("a1234567890"));
-        assert!(!is_hygienic_vocabulary_term("12345"));
-        assert!(!is_hygienic_vocabulary_term("v2.0"));
-        assert!(!is_hygienic_vocabulary_term("x"));
-    }
-
-    #[test]
-    fn test_apply_vocabulary_limit_preserves_total_and_truncates_terms() {
-        let terms = vec![
-            VocabularyTerm {
-                term: "alpha".into(),
-                score: 3.0,
-                count: 4,
-            },
-            VocabularyTerm {
-                term: "beta".into(),
-                score: 2.0,
-                count: 3,
-            },
-            VocabularyTerm {
-                term: "gamma".into(),
-                score: 1.0,
-                count: 2,
-            },
-        ];
-        let (clipped, total) = apply_vocabulary_limit(terms, 2);
-        assert_eq!(total, 3);
-        assert_eq!(clipped.len(), 2);
-        assert_eq!(clipped[0].term, "alpha");
-        assert_eq!(clipped[1].term, "beta");
-    }
-
-    #[test]
-    fn test_render_vocabulary_lines() {
-        let terms = vec![
-            VocabularyTerm {
-                term: "alpha".into(),
-                score: 1.2,
-                count: 7,
-            },
-            VocabularyTerm {
-                term: "beta".into(),
-                score: 0.9,
-                count: 5,
-            },
-        ];
-        assert_eq!(render_vocabulary_lines(&terms), "alpha\nbeta");
-    }
-
-    #[test]
-    fn test_render_vocabulary_prompt_normalizes_terms() {
-        let terms = vec![
-            VocabularyTerm {
-                term: "alpha beta".into(),
-                score: 1.0,
-                count: 2,
-            },
-            VocabularyTerm {
-                term: "gamma\x00delta".into(),
-                score: 1.0,
-                count: 2,
+        Commands::Health {
+            file,
+            all,
+            index,
+            max_lines,
+            max_part_sections,
+            max_completed_lines,
+            max_changelog_entries,
+            json,
+        } => cmd_health(
+            file.as_deref(),
+            all,
+            &index,
+            &HealthOptions {
+                max_lines,
+                max_part_sections,
+                max_completed_lines,
+                max_changelog_entries,
             },
-            VocabularyTerm {
-                term: "  spaced   out  ".into(),
-                score: 1.0,
-                count: 2,
+            json,
+        ),
+        Commands::Build {
+            path,
+            output,
+            types,
+            exclude,
+            json,
+            track_renames,
+        } => {
+            let (path, output, types, roots) =
+                resolve_build_params(path, output, types, cli.profile.as_deref(), &config);
+            cmd_build(
+                &path,
+                &output,
+                &types,
+                &exclude,
+                cli.quiet,
+                roots.as_deref(),
+                json,
+                track_renames,
+            )
+        }
+        Commands::Query {
+            terms,
+            query,
+            limit,
+            files_only,
+            json,
+            doc_terms,
+            explain,
+            no_stopwords,
+            phrase,
+            index,
+        } => {
+            let query_text = query.unwrap_or_else(|| terms.join(" "));
+            let options = QueryOptions {
+                limit,
+                files_only,
+                json,
+                doc_terms,
+                explain,
+                require_phrases: phrase,
+                filter_stopwords: !no_stopwords,
+            };
+            cmd_query(&query_text, &index, &options)
+        }
+        Commands::Similar {
+            file,
+            limit,
+            threshold,
+            json,
+            doc_terms,
+            index,
+        } => cmd_similar(&file, limit, threshold, json, doc_terms, &index),
+        Commands::Dupes {
+            threshold,
+            group,
+            json,
+            index,
+        } => cmd_dupes(threshold, group, json, &index),
+        Commands::DupesSections {
+            threshold,
+            min_files,
+            json,
+            index,
+        } => cmd_dupes_sections(threshold, min_files, json, &index),
+        Commands::Diff {
+            file1,
+            file2,
+            index,
+            json,
+        } => cmd_diff(&file1, &file2, &index, json),
+        Commands::Stats {
+            top_keywords,
+            index,
+            json,
+        } => cmd_stats(top_keywords, &index, json),
+        Commands::Repl { index } => cmd_repl(&index),
+        Commands::Assemble {
+            query,
+            max_tokens,
+            max_sections,
+            depth,
+            format,
+            doc_terms,
+            from_files,
+            use_relations,
+            index,
+        } => cmd_assemble(
+            &query.join(" "),
+            &from_files,
+            &AssembleOptions {
+                max_tokens,
+                max_sections,
+                depth,
+                format,
+                doc_terms,
+                use_relations,
             },
-        ];
-        assert_eq!(
-            render_vocabulary_prompt(&terms),
-            "alpha beta, gammadelta, spaced out"
-        );
-    }
-
-    #[test]
-    fn test_vocabulary_term_json_shape() {
-        let result = VocabularyResult {
-            format: "json".into(),
-            limit: 2,
-            total: 3,
-            terms: vec![
-                VocabularyTerm {
-                    term: "alpha".into(),
-                    score: 2.0,
-                    count: 7,
+            &index,
+        ),
+        Commands::Mcp { command } => match command {
+            McpCommands::SearchContext {
+                query,
+                max_results,
+                max_tokens,
+                max_bytes,
+                from_files,
+                index,
+            } => cmd_mcp_search_context(
+                &query.join(" "),
+                &from_files,
+                &index,
+                McpSearchOptions {
+                    max_results,
+                    max_tokens,
+                    max_bytes,
                 },
-                VocabularyTerm {
-                    term: "beta".into(),
-                    score: 1.1,
-                    count: 4,
+            ),
+            McpCommands::FetchContext {
+                handle,
+                max_tokens,
+                max_bytes,
+                index,
+            } => cmd_mcp_fetch_context(
+                &handle,
+                &index,
+                McpFetchOptions {
+                    max_tokens,
+                    max_bytes,
                 },
-            ],
-            stopwords: None,
-            used_default_stopwords: true,
-            auto_common_terms: None,
-            include_stemming: false,
-        };
-        let json_value: serde_json::Value = serde_json::to_value(&result).unwrap();
-        assert_eq!(json_value["terms"][0]["term"], "alpha");
-        assert_eq!(json_value["terms"][0]["score"], 2.0);
-        assert_eq!(json_value["terms"][0]["count"], 7);
-        assert_eq!(json_value["terms"].as_array().unwrap().len(), 2);
-    }
-
-    #[test]
-    fn test_resolve_vocabulary_surface_prefers_heading_surface() {
-        let postings = vec![
-            ReverseEntry {
-                file: "notes.md".to_string(),
-                line: Some(10),
-                heading: Some("alpha term".to_string()),
-                level: None,
-            },
-            ReverseEntry {
-                file: "guide.md".to_string(),
-                line: Some(2),
-                heading: None,
-                level: None,
-            },
-        ];
-        let forward = make_forward_index(vec![
-            make_file_entry("notes.md"),
-            FileEntry {
-                path: "guide.md".to_string(),
-                size_bytes: 0,
-                line_count: 0,
-                headings: Vec::new(),
-                keywords: vec!["term".to_string(), "other".to_string()],
-                body_keywords: vec!["term".to_string()],
-                links: Vec::new(),
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: Vec::new(),
-                section_fingerprints: Vec::new(),
-                adr_references: Vec::new(),
-            },
-        ]);
-        let resolved = resolve_vocabulary_surface("term", &postings, Some(&forward)).unwrap();
-        assert_eq!(resolved, "term");
-    }
-
-    #[test]
-    fn test_resolve_vocabulary_surface_fallbacks_to_forward_index() {
-        let postings = vec![
-            ReverseEntry {
-                file: "notes.md".to_string(),
-                line: Some(10),
-                heading: None,
-                level: None,
-            },
-            ReverseEntry {
-                file: "guide.md".to_string(),
-                line: Some(2),
-                heading: None,
-                level: None,
-            },
-        ];
-        let forward = make_forward_index(vec![
-            FileEntry {
-                path: "notes.md".to_string(),
-                size_bytes: 0,
-                line_count: 0,
-                headings: Vec::new(),
-                keywords: vec!["word".to_string()],
-                body_keywords: vec![],
-                links: Vec::new(),
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: Vec::new(),
-                section_fingerprints: Vec::new(),
-                adr_references: Vec::new(),
-            },
-            FileEntry {
-                path: "guide.md".to_string(),
-                size_bytes: 0,
-                line_count: 0,
-                headings: Vec::new(),
-                keywords: vec!["word".to_string()],
-                body_keywords: vec![],
-                links: Vec::new(),
-                simhash: 0,
-                term_frequencies: HashMap::new(),
-                doc_length: 0,
-                minhash: Vec::new(),
-                section_fingerprints: Vec::new(),
-                adr_references: Vec::new(),
+            ),
+            McpCommands::Serve { index } => cmd_mcp_serve(&index),
+        },
+        Commands::Eval {
+            questions,
+            index,
+            json,
+        } => cmd_eval(&questions, &index, json),
+        Commands::Vocabulary {
+            index,
+            limit,
+            format,
+            json,
+            stopwords,
+            include_stemming,
+            no_default_stopwords,
+            common_terms,
+        } => cmd_vocabulary(
+            &index,
+            limit,
+            &format,
+            json,
+            VocabularyOptions {
+                stopwords: stopwords.as_deref(),
+                include_stemming,
+                no_default_stopwords,
+                common_terms,
             },
-        ]);
-        let resolved = resolve_vocabulary_surface("word", &postings, Some(&forward)).unwrap();
-        assert_eq!(resolved, "word");
-    }
-
-    #[test]
-    fn test_parse_query_terms_mixed_case() {
-        let terms = parse_query_terms("TeSt CaSe", true);
-        assert_eq!(terms, vec!["test".to_string(), "case".to_string()]);
-    }
-
-    #[test]
-    fn test_parse_query_phrases() {
-        let parsed = parse_query("\"async migration\" plan", true);
-        assert_eq!(
-            parsed.terms,
-            vec![
-                "async".to_string(),
-                "migration".to_string(),
-                "plan".to_string()
-            ]
-        );
-        assert_eq!(parsed.phrases.len(), 1);
-        assert_eq!(
-            parsed.phrases[0].terms,
-            vec!["async".to_string(), "migration".to_string()]
-        );
-    }
-
-    #[test]
-    fn test_expand_from_files_args_supports_list() {
-        let dir = std::env::temp_dir().join(format!(
-            "yore-test-{}",
-            std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .unwrap()
-                .as_nanos()
-        ));
-        fs::create_dir_all(&dir).unwrap();
-        let list_path = dir.join("files.txt");
-        fs::write(&list_path, "docs/a.md\n\n docs/b.md\n").unwrap();
-
-        let args = vec![
-            format!("@{}", list_path.to_string_lossy()),
-            "docs/c.md".to_string(),
-        ];
-        let expanded = expand_from_files_args(&args).unwrap();
-
-        assert_eq!(
-            expanded,
-            vec![
-                "docs/a.md".to_string(),
-                "docs/b.md".to_string(),
-                "docs/c.md".to_string()
-            ]
-        );
-    }
-
-    #[test]
-    fn test_resolve_from_files_reports_missing() {
-        let index = make_forward_index(vec![make_file_entry("docs/a.md")]);
-        let inputs = vec!["./docs/a.md".to_string(), "docs/missing.md".to_string()];
-        let (resolved, missing) = resolve_from_files(&inputs, &index);
-        assert_eq!(resolved, vec!["docs/a.md".to_string()]);
-        assert_eq!(missing, vec!["docs/missing.md".to_string()]);
-    }
-
-    #[test]
-    fn test_collect_sections_for_files_max_sections() {
-        let dir = std::env::temp_dir().join(format!(
-            "yore-test-{}",
-            std::time::SystemTime::now()
-                .duration_since(std::time::UNIX_EPOCH)
-                .unwrap()
-                .as_nanos()
-        ));
-        fs::create_dir_all(&dir).unwrap();
-        let file_path = dir.join("doc.md");
-        fs::write(&file_path, "# Title\n\nBody\n\n## Sub\n\nMore").unwrap();
-        let file_path_str = file_path.to_string_lossy().to_string();
-
-        let entry = FileEntry {
-            path: file_path_str.clone(),
-            size_bytes: 0,
-            line_count: 0,
-            headings: Vec::new(),
-            keywords: Vec::new(),
-            body_keywords: Vec::new(),
-            links: Vec::new(),
-            simhash: 0,
-            term_frequencies: HashMap::new(),
-            doc_length: 0,
-            minhash: Vec::new(),
-            section_fingerprints: vec![
-                SectionFingerprint {
-                    heading: "Title".to_string(),
-                    level: 1,
-                    line_start: 1,
-                    line_end: 3,
-                    simhash: 0,
-                },
-                SectionFingerprint {
-                    heading: "Sub".to_string(),
-                    level: 2,
-                    line_start: 5,
-                    line_end: 6,
-                    simhash: 0,
-                },
-            ],
-            adr_references: vec![],
-        };
-        let index = make_forward_index(vec![entry]);
-        let sections = collect_sections_for_files(&[file_path_str], &index, "", 1);
-        assert_eq!(sections.len(), 1);
-    }
-
-    #[test]
-    fn test_build_mcp_handle_is_stable() {
-        let section = SectionMatch {
-            doc_path: "docs/aa-auth.md".to_string(),
-            heading: "Authentication Overview".to_string(),
-            line_start: 1,
-            line_end: 11,
-            bm25_score: 0.25,
-            content: "# Authentication Overview\n\nAuthentication flow".to_string(),
-            canonicality: 0.5,
-        };
-
-        let left = build_mcp_handle("authentication", &section);
-        let right = build_mcp_handle("authentication", &section);
-
-        assert_eq!(left, right);
-        assert!(left.starts_with("ctx_"));
-    }
+        ),
+        Commands::CheckLinks {
+            index,
+            json,
+            root,
+            summary,
+            summary_only,
+        } => {
+            let index_path = resolve_index_path(index, cli.profile.as_deref(), &config);
+            let external_paths: Vec<String> = config
+                .as_ref()
+                .and_then(|c| c.external.as_ref())
+                .map(|e| e.repos.iter().map(|r| r.path.clone()).collect())
+                .unwrap_or_default();
+            cmd_check_links(
+                &index_path,
+                json,
+                root.as_deref(),
+                summary,
+                summary_only,
+                &external_paths,
+            )
+        }
+        Commands::Backlinks { file, index, json } => cmd_backlinks(&file, &index, json),
+        Commands::Orphans {
+            index,
+            json,
+            exclude,
+        } => cmd_orphans(&index, json, &exclude),
+        Commands::Canonicality {
+            index,
+            json,
+            threshold,
+        } => cmd_canonicality(&index, json, threshold),
+        Commands::CanonicalOrphans {
+            index,
+            json,
+            threshold,
+        } => cmd_canonical_orphans(&index, threshold, json),
+        Commands::ExportGraph { format, index } => cmd_export_graph(&index, &format),
+        Commands::Paths {
+            source,
+            depth,
+            kind,
+            json,
+            index,
+        } => cmd_paths(&source, depth, kind.as_deref(), json, &index),
+        Commands::SuggestConsolidation {
+            threshold,
+            json,
+            index,
+        } => cmd_suggest_consolidation(&index, threshold, json),
+        Commands::Policy {
+            config,
+            index,
+            json,
+        } => cmd_policy(&config, &index, json),
+        Commands::FixLinks {
+            index,
+            dry_run,
+            apply,
+            propose,
+            apply_decisions,
+            json,
+            use_git_history,
+        } => cmd_fix_links(
+            &index,
+            dry_run,
+            apply,
+            propose,
+            apply_decisions,
+            json,
+            use_git_history,
+        ),
+        Commands::FixReferences {
+            mapping,
+            index,
+            dry_run,
+            apply,
+            json,
+        } => cmd_fix_references(&index, &mapping, dry_run, apply, json),
+        Commands::Mv {
+            from,
+            to,
+            index,
+            update_refs,
+            dry_run,
+            json,
+        } => cmd_mv(&from, &to, &index, update_refs, dry_run, json),
+        Commands::Stale {
+            index,
+            days,
+            min_inlinks,
+            json,
+        } => cmd_stale(&index, days, min_inlinks, json),
+    };
+    result
 }
+
+#[cfg(test)]
+#[path = "tests_main.rs"]
+mod tests;
diff --git a/src/mcp.rs b/src/mcp.rs
new file mode 100644
index 0000000..8e745f5
--- /dev/null
+++ b/src/mcp.rs
@@ -0,0 +1,767 @@
+use serde::Serialize;
+use std::fs;
+use std::io::{self, BufRead, Write};
+use std::path::{Path, PathBuf};
+
+use crate::assemble::*;
+use crate::types::*;
+use crate::util::*;
+
+pub(crate) fn mcp_handle_dir(index_dir: &Path) -> PathBuf {
+    index_dir.join("mcp_handles")
+}
+
+pub(crate) fn build_mcp_store_namespace(index_dir: &Path) -> String {
+    const FNV_OFFSET_BASIS: u64 = 14_695_981_039_346_656_037;
+    let canonical = canonicalize_existing_path(index_dir);
+    let mut state = FNV_OFFSET_BASIS;
+    stable_mcp_hash_update(&mut state, canonical.to_string_lossy().as_bytes());
+    format!("{state:016x}")
+}
+
+pub(crate) fn fallback_mcp_handle_dir(index_dir: &Path) -> PathBuf {
+    std::env::temp_dir()
+        .join("yore")
+        .join("mcp_handles")
+        .join(build_mcp_store_namespace(index_dir))
+}
+
+pub(crate) fn candidate_mcp_handle_dirs(index_dir: &Path) -> Vec<PathBuf> {
+    vec![
+        mcp_handle_dir(index_dir),
+        fallback_mcp_handle_dir(index_dir),
+    ]
+}
+
+pub(crate) fn stable_mcp_hash_update(state: &mut u64, bytes: &[u8]) {
+    const FNV_PRIME: u64 = 1_099_511_628_211;
+
+    for byte in bytes {
+        *state ^= u64::from(*byte);
+        *state = state.wrapping_mul(FNV_PRIME);
+    }
+}
+
+pub(crate) fn build_mcp_handle(query: &str, section: &SectionMatch) -> String {
+    const FNV_OFFSET_BASIS: u64 = 14_695_981_039_346_656_037;
+    let mut state = FNV_OFFSET_BASIS;
+
+    stable_mcp_hash_update(&mut state, query.as_bytes());
+    stable_mcp_hash_update(&mut state, &[0xff]);
+    stable_mcp_hash_update(&mut state, section.doc_path.as_bytes());
+    stable_mcp_hash_update(&mut state, &[0xff]);
+    stable_mcp_hash_update(&mut state, section.heading.as_bytes());
+    stable_mcp_hash_update(&mut state, &[0xff]);
+    stable_mcp_hash_update(&mut state, &section.line_start.to_le_bytes());
+    stable_mcp_hash_update(&mut state, &[0xff]);
+    stable_mcp_hash_update(&mut state, &section.line_end.to_le_bytes());
+    stable_mcp_hash_update(&mut state, &[0xff]);
+    stable_mcp_hash_update(&mut state, section.content.as_bytes());
+
+    format!("ctx_{state:016x}")
+}
+
+pub(crate) fn build_mcp_source_ref(section: &SectionMatch) -> McpSourceRef {
+    McpSourceRef {
+        path: section.doc_path.clone(),
+        heading: section.heading.clone(),
+        line_start: section.line_start,
+        line_end: section.line_end,
+    }
+}
+
+pub(crate) fn store_mcp_artifact(
+    index_dir: &Path,
+    artifact: &McpArtifact,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let payload = serde_json::to_vec_pretty(artifact)?;
+    let mut last_error: Option<io::Error> = None;
+
+    for handle_dir in candidate_mcp_handle_dirs(index_dir) {
+        match fs::create_dir_all(&handle_dir) {
+            Ok(()) => {}
+            Err(err) => {
+                last_error = Some(err);
+                continue;
+            }
+        }
+
+        let handle_path = handle_dir.join(format!("{}.json", artifact.handle));
+        match fs::write(handle_path, &payload) {
+            Ok(()) => return Ok(()),
+            Err(err) => {
+                last_error = Some(err);
+            }
+        }
+    }
+
+    Err(last_error
+        .unwrap_or_else(|| io::Error::other("unable to store MCP artifact"))
+        .into())
+}
+
+pub(crate) fn load_mcp_artifact(
+    index_dir: &Path,
+    handle: &str,
+) -> Result<McpArtifact, Box<dyn std::error::Error>> {
+    let mut last_error: Option<io::Error> = None;
+
+    for handle_dir in candidate_mcp_handle_dirs(index_dir) {
+        let handle_path = handle_dir.join(format!("{handle}.json"));
+        match fs::read_to_string(&handle_path) {
+            Ok(content) => return Ok(serde_json::from_str(&content)?),
+            Err(err) if err.kind() == io::ErrorKind::NotFound => {
+                last_error = Some(err);
+            }
+            Err(err) => return Err(err.into()),
+        }
+    }
+
+    Err(last_error
+        .unwrap_or_else(|| io::Error::new(io::ErrorKind::NotFound, "unknown handle"))
+        .into())
+}
+
+pub(crate) fn build_mcp_search_response(
+    query: &str,
+    from_files: &[String],
+    index_dir: &Path,
+    options: McpSearchOptions,
+) -> Result<McpSearchResponse, Box<dyn std::error::Error>> {
+    let forward_index = load_forward_index(index_dir)?;
+    let selection_mode = if from_files.is_empty() {
+        "query".to_string()
+    } else {
+        "from_files".to_string()
+    };
+    let requested_query = if query.trim().is_empty() {
+        "selected files".to_string()
+    } else {
+        query.to_string()
+    };
+
+    let selection_limit = options.max_results.max(1).saturating_mul(4).max(8);
+    let selection = match collect_context_selection(
+        query,
+        from_files,
+        &forward_index,
+        selection_limit,
+    ) {
+        Ok(selection) => selection,
+        Err(issue) => {
+            let (error, message, missing_files) = match issue {
+                ContextSelectionIssue::NoSearchableTerms => (
+                    Some("no_query_terms".to_string()),
+                    Some("No searchable terms in query. Try different keywords.".to_string()),
+                    Vec::new(),
+                ),
+                ContextSelectionIssue::MissingFiles(missing) => (
+                    Some("missing_files".to_string()),
+                    Some(
+                        "Some files were not found in the index; search-context requires explicit indexed files."
+                            .to_string(),
+                    ),
+                    missing,
+                ),
+                ContextSelectionIssue::NoIndexedFilesMatched => (
+                    Some("no_indexed_files".to_string()),
+                    Some("No indexed files matched the provided inputs.".to_string()),
+                    Vec::new(),
+                ),
+                ContextSelectionIssue::NoRelevantSections(label) => (
+                    Some("no_relevant_sections".to_string()),
+                    Some(format!("No relevant sections found for query: \"{label}\"")),
+                    Vec::new(),
+                ),
+            };
+
+            return Ok(McpSearchResponse {
+                schema_version: MCP_SCHEMA_VERSION,
+                tool: "search_context".to_string(),
+                query: requested_query,
+                selection_mode,
+                budget: McpSearchBudget {
+                    max_results: options.max_results,
+                    max_tokens: options.max_tokens,
+                    max_bytes: options.max_bytes,
+                    ..McpSearchBudget::default()
+                },
+                pressure: McpPressure::default(),
+                results: Vec::new(),
+                error,
+                message,
+                missing_files,
+            });
+        }
+    };
+
+    let (unique_sections, deduped_hits) = dedupe_section_matches(selection.sections.clone());
+    let max_results = options.max_results.max(1);
+    let per_result_tokens = (options.max_tokens / max_results).max(40);
+    let per_result_bytes = (options.max_bytes / max_results).max(160);
+    let preview_sections = apply_extractive_refiner(
+        unique_sections.clone(),
+        &selection.query_for_refiner,
+        per_result_tokens,
+    );
+
+    let mut pressure = McpPressure::default();
+    let mut budget = McpSearchBudget {
+        max_results: options.max_results,
+        max_tokens: options.max_tokens,
+        max_bytes: options.max_bytes,
+        candidate_hits: selection.sections.len(),
+        deduped_hits,
+        ..McpSearchBudget::default()
+    };
+    let mut results = Vec::new();
+    let mut used_tokens = 0usize;
+    let mut used_bytes = 0usize;
+
+    for (rank, (raw_section, preview_section)) in unique_sections
+        .iter()
+        .zip(preview_sections.iter())
+        .enumerate()
+    {
+        if results.len() >= max_results {
+            pressure.truncated = true;
+            pressure.reasons.push("result_cap".to_string());
+            break;
+        }
+
+        let (preview, truncated, truncation_reasons) = truncate_text_to_budget(
+            &preview_section.section.content,
+            per_result_tokens,
+            per_result_bytes,
+        );
+        let preview_tokens = estimate_tokens(&preview);
+        let preview_bytes = preview.len();
+        let mut result_truncated = preview_section.truncated || truncated;
+        let mut result_reasons = preview_section.truncation_reasons.clone();
+        result_reasons.extend(truncation_reasons.clone());
+
+        if used_tokens + preview_tokens > options.max_tokens {
+            pressure.truncated = true;
+            pressure.reasons.push("token_cap".to_string());
+            break;
+        }
+        if used_bytes + preview_bytes > options.max_bytes {
+            pressure.truncated = true;
+            pressure.reasons.push("byte_cap".to_string());
+            break;
+        }
+
+        result_reasons.sort();
+        result_reasons.dedup();
+        result_truncated = result_truncated || !result_reasons.is_empty();
+
+        if result_truncated {
+            pressure.truncated = true;
+            pressure.reasons.extend(result_reasons.clone());
+        }
+
+        let handle = build_mcp_handle(&selection.query_label, raw_section);
+        let artifact = McpArtifact {
+            schema_version: MCP_SCHEMA_VERSION,
+            handle: handle.clone(),
+            query: selection.query_label.clone(),
+            source: build_mcp_source_ref(raw_section),
+            scores: McpScoreBreakdown {
+                bm25: raw_section.bm25_score,
+                canonicality: raw_section.canonicality,
+                combined: combined_section_score(raw_section),
+            },
+            preview: preview.clone(),
+            content: raw_section.content.clone(),
+            created_at: chrono_now(),
+        };
+        if let Err(err) = store_mcp_artifact(index_dir, &artifact) {
+            return Ok(McpSearchResponse {
+                schema_version: MCP_SCHEMA_VERSION,
+                tool: "search_context".to_string(),
+                query: selection.query_label.clone(),
+                selection_mode: selection_mode.clone(),
+                budget: McpSearchBudget {
+                    returned_results: results.len(),
+                    estimated_tokens: used_tokens,
+                    bytes: used_bytes,
+                    ..budget
+                },
+                pressure,
+                results,
+                error: Some("artifact_store_unavailable".to_string()),
+                message: Some(format!(
+                    "Unable to persist MCP handles for follow-up fetches: {err}"
+                )),
+                missing_files: Vec::new(),
+            });
+        }
+
+        results.push(McpSearchResult {
+            handle,
+            rank: rank + 1,
+            source: artifact.source.clone(),
+            scores: artifact.scores.clone(),
+            preview,
+            preview_tokens,
+            preview_bytes,
+            truncated: result_truncated,
+            truncation_reasons: result_reasons,
+        });
+
+        used_tokens += preview_tokens;
+        used_bytes += preview_bytes;
+    }
+
+    budget.returned_results = results.len();
+    budget.omitted_hits = unique_sections.len().saturating_sub(results.len());
+    budget.estimated_tokens = used_tokens;
+    budget.bytes = used_bytes;
+
+    if budget.omitted_hits > 0 && !pressure.reasons.iter().any(|reason| reason == "result_cap") {
+        pressure.truncated = true;
+        pressure.reasons.push("result_cap".to_string());
+    }
+    pressure.reasons.sort();
+    pressure.reasons.dedup();
+
+    Ok(McpSearchResponse {
+        schema_version: MCP_SCHEMA_VERSION,
+        tool: "search_context".to_string(),
+        query: selection.query_label,
+        selection_mode,
+        budget,
+        pressure,
+        results,
+        error: None,
+        message: None,
+        missing_files: Vec::new(),
+    })
+}
+
+pub(crate) fn cmd_mcp_search_context(
+    query: &str,
+    from_files: &[String],
+    index_dir: &Path,
+    options: McpSearchOptions,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let response = build_mcp_search_response(query, from_files, index_dir, options)?;
+    println!("{}", serde_json::to_string_pretty(&response)?);
+    Ok(())
+}
+
+pub(crate) fn build_mcp_fetch_response(
+    handle: &str,
+    index_dir: &Path,
+    options: McpFetchOptions,
+) -> Result<McpFetchResponse, Box<dyn std::error::Error>> {
+    let Ok(artifact) = load_mcp_artifact(index_dir, handle) else {
+        return Ok(McpFetchResponse {
+            schema_version: MCP_SCHEMA_VERSION,
+            tool: "fetch_context".to_string(),
+            handle: handle.to_string(),
+            budget: McpFetchBudget {
+                max_tokens: options.max_tokens,
+                max_bytes: options.max_bytes,
+                ..McpFetchBudget::default()
+            },
+            pressure: McpPressure::default(),
+            query: None,
+            result: None,
+            error: Some("unknown_handle".to_string()),
+            message: Some(format!(
+                "No stored MCP artifact found for handle '{handle}'. Run `yore mcp search-context` first."
+            )),
+        });
+    };
+
+    let (content, truncated, truncation_reasons) =
+        truncate_text_to_budget(&artifact.content, options.max_tokens, options.max_bytes);
+    let content_tokens = estimate_tokens(&content);
+    let content_bytes = content.len();
+
+    Ok(McpFetchResponse {
+        schema_version: MCP_SCHEMA_VERSION,
+        tool: "fetch_context".to_string(),
+        handle: handle.to_string(),
+        budget: McpFetchBudget {
+            max_tokens: options.max_tokens,
+            max_bytes: options.max_bytes,
+            estimated_tokens: content_tokens,
+            bytes: content_bytes,
+        },
+        pressure: McpPressure {
+            truncated,
+            reasons: truncation_reasons,
+        },
+        query: Some(artifact.query),
+        result: Some(McpFetchResult {
+            source: artifact.source,
+            scores: artifact.scores,
+            preview: artifact.preview,
+            content,
+            content_tokens,
+            content_bytes,
+        }),
+        error: None,
+        message: None,
+    })
+}
+
+pub(crate) fn cmd_mcp_fetch_context(
+    handle: &str,
+    index_dir: &Path,
+    options: McpFetchOptions,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let response = build_mcp_fetch_response(handle, index_dir, options)?;
+    println!("{}", serde_json::to_string_pretty(&response)?);
+    Ok(())
+}
+
+pub(crate) fn read_mcp_stdio_message<R: BufRead>(
+    reader: &mut R,
+) -> Result<Option<serde_json::Value>, io::Error> {
+    let mut content_length: Option<usize> = None;
+    let mut line = String::new();
+
+    loop {
+        line.clear();
+        let bytes_read = reader.read_line(&mut line)?;
+        if bytes_read == 0 {
+            if content_length.is_none() {
+                return Ok(None);
+            }
+            return Err(io::Error::new(
+                io::ErrorKind::UnexpectedEof,
+                "unexpected EOF while reading MCP message headers",
+            ));
+        }
+
+        if line == "\r\n" || line == "\n" {
+            break;
+        }
+
+        let header = line.trim_end_matches(['\r', '\n']);
+        if let Some((name, value)) = header.split_once(':') {
+            if name.eq_ignore_ascii_case("content-length") {
+                content_length = Some(value.trim().parse().map_err(|err| {
+                    io::Error::new(
+                        io::ErrorKind::InvalidData,
+                        format!("invalid Content-Length header: {err}"),
+                    )
+                })?);
+            }
+        }
+    }
+
+    let content_length = content_length.ok_or_else(|| {
+        io::Error::new(
+            io::ErrorKind::InvalidData,
+            "missing Content-Length header in MCP message",
+        )
+    })?;
+    let mut payload = vec![0; content_length];
+    reader.read_exact(&mut payload)?;
+    serde_json::from_slice(&payload)
+        .map(Some)
+        .map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))
+}
+
+pub(crate) fn write_mcp_stdio_message<W: Write, T: Serialize>(
+    writer: &mut W,
+    payload: &T,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let body = serde_json::to_vec(payload)?;
+    write!(writer, "Content-Length: {}\r\n\r\n", body.len())?;
+    writer.write_all(&body)?;
+    writer.flush()?;
+    Ok(())
+}
+
+pub(crate) fn resolve_mcp_tool_index(
+    default_index: &Path,
+    requested_index: Option<PathBuf>,
+) -> PathBuf {
+    requested_index.unwrap_or_else(|| default_index.to_path_buf())
+}
+
+pub(crate) fn mcp_tool_definitions() -> serde_json::Value {
+    serde_json::json!([
+        {
+            "name": "search_context",
+            "description": "Return bounded previews, source references, pressure metadata, and opaque handles for explicit follow-up fetches.",
+            "inputSchema": {
+                "type": "object",
+                "additionalProperties": false,
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Natural language query or question. Optional when from_files is provided."
+                    },
+                    "from_files": {
+                        "type": "array",
+                        "description": "Explicit indexed files to preview instead of a query. Supports @list.txt expansion.",
+                        "items": {
+                            "type": "string"
+                        },
+                        "minItems": 1
+                    },
+                    "max_results": {
+                        "type": "integer",
+                        "description": "Maximum preview results to return.",
+                        "minimum": 1,
+                        "default": 5
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "description": "Approximate maximum total tokens across previews.",
+                        "minimum": 1,
+                        "default": 1200
+                    },
+                    "max_bytes": {
+                        "type": "integer",
+                        "description": "Maximum total bytes across previews.",
+                        "minimum": 1,
+                        "default": 12000
+                    },
+                    "index": {
+                        "type": "string",
+                        "description": "Optional override for the index directory. Defaults to the server's configured index."
+                    }
+                },
+                "oneOf": [
+                    {
+                        "required": ["query"]
+                    },
+                    {
+                        "required": ["from_files"]
+                    }
+                ]
+            }
+        },
+        {
+            "name": "fetch_context",
+            "description": "Expand a previously returned opaque handle with its own token and byte caps.",
+            "inputSchema": {
+                "type": "object",
+                "additionalProperties": false,
+                "properties": {
+                    "handle": {
+                        "type": "string",
+                        "description": "Opaque ctx_... handle returned by search_context."
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "description": "Approximate maximum tokens for fetched content.",
+                        "minimum": 1,
+                        "default": 4000
+                    },
+                    "max_bytes": {
+                        "type": "integer",
+                        "description": "Maximum bytes for fetched content.",
+                        "minimum": 1,
+                        "default": 20000
+                    },
+                    "index": {
+                        "type": "string",
+                        "description": "Optional override for the index directory. Defaults to the server's configured index."
+                    }
+                },
+                "required": ["handle"]
+            }
+        }
+    ])
+}
+
+pub(crate) fn build_mcp_tool_result<T: Serialize>(
+    payload: &T,
+    is_error: bool,
+) -> Result<serde_json::Value, Box<dyn std::error::Error>> {
+    Ok(serde_json::json!({
+        "content": [
+            {
+                "type": "text",
+                "text": serde_json::to_string(payload)?,
+            }
+        ],
+        "structuredContent": serde_json::to_value(payload)?,
+        "isError": is_error,
+    }))
+}
+
+pub(crate) fn json_rpc_success(
+    id: serde_json::Value,
+    result: serde_json::Value,
+) -> serde_json::Value {
+    serde_json::json!({
+        "jsonrpc": "2.0",
+        "id": id,
+        "result": result,
+    })
+}
+
+pub(crate) fn json_rpc_error(
+    id: Option<serde_json::Value>,
+    code: i64,
+    message: &str,
+) -> serde_json::Value {
+    serde_json::json!({
+        "jsonrpc": "2.0",
+        "id": id.unwrap_or(serde_json::Value::Null),
+        "error": {
+            "code": code,
+            "message": message,
+        }
+    })
+}
+
+pub(crate) fn cmd_mcp_serve(index_dir: &Path) -> Result<(), Box<dyn std::error::Error>> {
+    let stdin = io::stdin();
+    let stdout = io::stdout();
+    let mut reader = stdin.lock();
+    let mut writer = stdout.lock();
+
+    loop {
+        let Some(message) = read_mcp_stdio_message(&mut reader)? else {
+            break;
+        };
+        let request: JsonRpcRequest = match serde_json::from_value(message) {
+            Ok(request) => request,
+            Err(err) => {
+                let response = json_rpc_error(None, -32600, &format!("Invalid request: {err}"));
+                write_mcp_stdio_message(&mut writer, &response)?;
+                continue;
+            }
+        };
+
+        if request.jsonrpc.as_deref() != Some("2.0") {
+            if let Some(id) = request.id {
+                let response = json_rpc_error(Some(id), -32600, "Only JSON-RPC 2.0 is supported.");
+                write_mcp_stdio_message(&mut writer, &response)?;
+            }
+            continue;
+        }
+
+        let response = match request.method.as_str() {
+            "initialize" => {
+                let params: McpInitializeParams = serde_json::from_value(request.params)
+                    .unwrap_or_else(|_| McpInitializeParams::default());
+                let protocol_version = params
+                    .protocol_version
+                    .unwrap_or_else(|| DEFAULT_MCP_PROTOCOL_VERSION.to_string());
+                request.id.map(|id| {
+                    json_rpc_success(
+                        id,
+                        serde_json::json!({
+                            "protocolVersion": protocol_version,
+                            "capabilities": {
+                                "tools": {
+                                    "listChanged": false
+                                }
+                            },
+                            "serverInfo": {
+                                "name": "yore",
+                                "version": env!("CARGO_PKG_VERSION")
+                            },
+                            "instructions": "Use search_context for bounded previews and fetch_context only for explicit follow-up expansion.",
+                        }),
+                    )
+                })
+            }
+            "notifications/initialized" | "notifications/cancelled" => None,
+            "ping" => request
+                .id
+                .map(|id| json_rpc_success(id, serde_json::json!({}))),
+            "tools/list" => request.id.map(|id| {
+                json_rpc_success(
+                    id,
+                    serde_json::json!({
+                        "tools": mcp_tool_definitions(),
+                    }),
+                )
+            }),
+            "tools/call" => {
+                let id = request.id.clone();
+                match serde_json::from_value::<McpToolCallParams>(request.params) {
+                    Ok(McpToolCallParams { name, arguments }) => match name.as_str() {
+                        "search_context" => {
+                            match serde_json::from_value::<McpSearchToolArgs>(arguments) {
+                                Ok(args) => {
+                                    let tool_index = resolve_mcp_tool_index(index_dir, args.index);
+                                    let response = build_mcp_search_response(
+                                        args.query.trim(),
+                                        &args.from_files,
+                                        &tool_index,
+                                        McpSearchOptions {
+                                            max_results: args.max_results,
+                                            max_tokens: args.max_tokens,
+                                            max_bytes: args.max_bytes,
+                                        },
+                                    )?;
+                                    let result =
+                                        build_mcp_tool_result(&response, response.error.is_some())?;
+                                    id.map(|id| json_rpc_success(id, result))
+                                }
+                                Err(err) => id.map(|id| {
+                                    json_rpc_error(
+                                        Some(id),
+                                        -32602,
+                                        &format!("Invalid search_context arguments: {err}"),
+                                    )
+                                }),
+                            }
+                        }
+                        "fetch_context" => {
+                            match serde_json::from_value::<McpFetchToolArgs>(arguments) {
+                                Ok(args) => {
+                                    let tool_index = resolve_mcp_tool_index(index_dir, args.index);
+                                    let response = build_mcp_fetch_response(
+                                        args.handle.trim(),
+                                        &tool_index,
+                                        McpFetchOptions {
+                                            max_tokens: args.max_tokens,
+                                            max_bytes: args.max_bytes,
+                                        },
+                                    )?;
+                                    let result =
+                                        build_mcp_tool_result(&response, response.error.is_some())?;
+                                    id.map(|id| json_rpc_success(id, result))
+                                }
+                                Err(err) => id.map(|id| {
+                                    json_rpc_error(
+                                        Some(id),
+                                        -32602,
+                                        &format!("Invalid fetch_context arguments: {err}"),
+                                    )
+                                }),
+                            }
+                        }
+                        _ => id.map(|id| {
+                            json_rpc_error(Some(id), -32602, &format!("Unknown tool '{name}'."))
+                        }),
+                    },
+                    Err(err) => id.map(|id| {
+                        json_rpc_error(
+                            Some(id),
+                            -32602,
+                            &format!("Invalid tools/call params: {err}"),
+                        )
+                    }),
+                }
+            }
+            _ => request.id.map(|id| {
+                json_rpc_error(
+                    Some(id),
+                    -32601,
+                    &format!("Method '{}' is not supported.", request.method),
+                )
+            }),
+        };
+
+        if let Some(response) = response {
+            write_mcp_stdio_message(&mut writer, &response)?;
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/search.rs b/src/search.rs
new file mode 100644
index 0000000..869a427
--- /dev/null
+++ b/src/search.rs
@@ -0,0 +1,317 @@
+use ahash::AHasher;
+use regex::Regex;
+use std::collections::{HashMap, HashSet};
+use std::hash::{Hash, Hasher};
+
+use crate::types::*;
+use crate::util::default_query_stop_words;
+
+// BM25 tuning constants
+pub const BM25_K1: f64 = 1.5;
+pub const BM25_B: f64 = 0.75;
+
+pub fn extract_keywords(text: &str) -> Vec<String> {
+    extract_keywords_with_options(text, true)
+}
+
+pub fn extract_keywords_with_options(text: &str, filter_stopwords: bool) -> Vec<String> {
+    let stop_words: HashSet<&str> = default_query_stop_words().iter().copied().collect();
+
+    let word_re = Regex::new(r"[a-zA-Z][a-zA-Z0-9_-]*").unwrap();
+
+    word_re
+        .find_iter(text)
+        .map(|m| m.as_str().to_lowercase())
+        .filter(|w| w.len() >= 3 && (!filter_stopwords || !stop_words.contains(w.as_str())))
+        .collect()
+}
+
+pub fn parse_query_terms(query: &str, filter_stopwords: bool) -> Vec<String> {
+    extract_keywords_with_options(query, filter_stopwords)
+}
+
+pub fn parse_query(query: &str, filter_stopwords: bool) -> ParsedQuery {
+    let mut parts: Vec<(String, bool)> = Vec::new();
+    let mut buffer = String::new();
+    let mut in_quote = false;
+
+    for ch in query.chars() {
+        if ch == '"' {
+            let trimmed = buffer.trim();
+            if !trimmed.is_empty() {
+                parts.push((trimmed.to_string(), in_quote));
+            }
+            buffer.clear();
+            in_quote = !in_quote;
+            continue;
+        }
+        buffer.push(ch);
+    }
+
+    let trimmed = buffer.trim();
+    if !trimmed.is_empty() {
+        parts.push((trimmed.to_string(), in_quote));
+    }
+    let mut terms = Vec::new();
+    let mut phrases = Vec::new();
+
+    for (text, is_phrase) in parts {
+        let parsed_terms = parse_query_terms(&text, filter_stopwords);
+        terms.extend(parsed_terms.iter().cloned());
+        if is_phrase {
+            let phrase_terms = extract_keywords_with_options(&text, false);
+            if !phrase_terms.is_empty() {
+                phrases.push(PhraseGroup {
+                    terms: phrase_terms,
+                });
+            }
+        }
+    }
+
+    ParsedQuery { terms, phrases }
+}
+
+/// Simple suffix-stripping stemmer
+pub fn stem_word(word: &str) -> String {
+    let w = word.to_lowercase();
+
+    // Common suffixes to strip
+    let suffixes = [
+        "ization", "ational", "iveness", "fulness", "ousness", "ation", "ement", "ment", "able",
+        "ible", "ness", "ical", "ings", "ing", "ies", "ive", "ful", "ous", "ity", "ed", "ly", "er",
+        "es", "s",
+    ];
+
+    for suffix in suffixes {
+        if w.len() > suffix.len() + 2 && w.ends_with(suffix) {
+            return w[..w.len() - suffix.len()].to_string();
+        }
+    }
+
+    w
+}
+
+/// Extract top N distinctive terms from a document, excluding query terms.
+/// Returns human-readable (unstemmed) terms ranked by TF-IDF.
+pub fn get_top_doc_terms(
+    entry: &FileEntry,
+    idf_map: &HashMap<String, f64>,
+    exclude_terms: &[String],
+    n: usize,
+) -> Vec<String> {
+    if n == 0 {
+        return Vec::new();
+    }
+
+    // Stem the exclusion terms for comparison
+    let exclude_stemmed: HashSet<String> = exclude_terms
+        .iter()
+        .map(|t| stem_word(&t.to_lowercase()))
+        .collect();
+
+    // Collect unique keywords with their TF-IDF scores
+    // Use body_keywords (unstemmed) but rank by term_frequencies (stemmed)
+    let mut seen_stems: HashSet<String> = HashSet::new();
+    let mut term_scores: Vec<(String, f64)> = Vec::new();
+
+    for kw in entry.body_keywords.iter().chain(entry.keywords.iter()) {
+        let stemmed = stem_word(&kw.to_lowercase());
+
+        // Skip if already seen this stem, or if it's an excluded term
+        if seen_stems.contains(&stemmed) || exclude_stemmed.contains(&stemmed) {
+            continue;
+        }
+        seen_stems.insert(stemmed.clone());
+
+        // Calculate TF-IDF score
+        let tf = *entry.term_frequencies.get(&stemmed).unwrap_or(&0) as f64;
+        let idf = *idf_map.get(&stemmed).unwrap_or(&0.0);
+        let score = tf * idf;
+
+        if score > 0.0 {
+            term_scores.push((kw.to_lowercase(), score));
+        }
+    }
+
+    // Sort by score descending
+    term_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+
+    // Take top N
+    term_scores
+        .into_iter()
+        .take(n)
+        .map(|(term, _)| term)
+        .collect()
+}
+
+/// Compute simhash fingerprint for content
+pub fn compute_simhash(content: &str) -> u64 {
+    let mut v = [0i32; 64];
+
+    // Extract features (word shingles)
+    let words: Vec<&str> = content.split_whitespace().collect();
+
+    for window in words.windows(3) {
+        let shingle = format!("{} {} {}", window[0], window[1], window[2]);
+        let h = hash_string(&shingle);
+
+        for (i, item) in v.iter_mut().enumerate() {
+            if (h >> i) & 1 == 1 {
+                *item += 1;
+            } else {
+                *item -= 1;
+            }
+        }
+    }
+
+    // Convert to fingerprint
+    let mut fingerprint: u64 = 0;
+    for (i, item) in v.iter().enumerate() {
+        if *item > 0 {
+            fingerprint |= 1 << i;
+        }
+    }
+
+    fingerprint
+}
+
+pub fn hash_string(s: &str) -> u64 {
+    use std::collections::hash_map::DefaultHasher;
+    let mut hasher = DefaultHasher::new();
+    s.hash(&mut hasher);
+    hasher.finish()
+}
+
+/// Count differing bits between two simhashes (Hamming distance)
+pub fn hamming_distance(a: u64, b: u64) -> u32 {
+    (a ^ b).count_ones()
+}
+
+/// Convert hamming distance to similarity (0.0 to 1.0)
+pub fn simhash_similarity(a: u64, b: u64) -> f64 {
+    let distance = hamming_distance(a, b);
+    1.0 - (f64::from(distance) / 64.0)
+}
+
+/// Index sections of a document with SimHash fingerprints
+pub fn index_sections(content: &str, headings: &[Heading]) -> Vec<SectionFingerprint> {
+    let lines: Vec<&str> = content.lines().collect();
+    let mut sections = Vec::new();
+
+    if headings.is_empty() {
+        return sections;
+    }
+
+    for i in 0..headings.len() {
+        let start = headings[i].line.saturating_sub(1);
+        let end = headings
+            .get(i + 1)
+            .map_or(lines.len(), |h| h.line.saturating_sub(1));
+
+        // Extract section text
+        let section_text = lines[start..end].join("\n");
+
+        sections.push(SectionFingerprint {
+            heading: headings[i].text.clone(),
+            level: headings[i].level,
+            line_start: start + 1,
+            line_end: end,
+            simhash: compute_simhash(&section_text),
+        });
+    }
+
+    sections
+}
+
+/// Compute MinHash signature for a set of keywords
+pub fn compute_minhash(keywords: &[String], num_hashes: usize) -> Vec<u64> {
+    let mut hashes = vec![u64::MAX; num_hashes];
+
+    for keyword in keywords {
+        for (i, hash_slot) in hashes.iter_mut().enumerate().take(num_hashes) {
+            let mut hasher = AHasher::default();
+            keyword.hash(&mut hasher);
+            i.hash(&mut hasher); // Use index as seed
+            let h = hasher.finish();
+
+            *hash_slot = (*hash_slot).min(h);
+        }
+    }
+
+    hashes
+}
+
+/// Compute MinHash similarity (Jaccard estimate)
+pub fn minhash_similarity(a: &[u64], b: &[u64]) -> f64 {
+    if a.len() != b.len() || a.is_empty() {
+        return 0.0;
+    }
+
+    let matches = a.iter().zip(b.iter()).filter(|(x, y)| x == y).count();
+
+    matches as f64 / a.len() as f64
+}
+
+/// Compute BM25 score for a document given query terms
+pub fn bm25_score(
+    query_terms: &[String],
+    doc: &FileEntry,
+    avg_doc_length: f64,
+    idf_map: &HashMap<String, f64>,
+) -> f64 {
+    if doc.doc_length == 0 {
+        return 0.0;
+    }
+
+    let mut score = 0.0;
+    let norm_factor = 1.0 - BM25_B + BM25_B * (doc.doc_length as f64 / avg_doc_length);
+
+    for term in query_terms {
+        let stemmed = stem_word(&term.to_lowercase());
+        let tf = *doc.term_frequencies.get(&stemmed).unwrap_or(&0) as f64;
+        let idf = idf_map.get(&stemmed).unwrap_or(&0.0);
+
+        if tf > 0.0 {
+            score += idf * (tf * (BM25_K1 + 1.0)) / (tf + BM25_K1 * norm_factor);
+        }
+    }
+
+    score
+}
+
+/// Build LSH buckets for fast duplicate detection
+pub fn lsh_buckets(files: &HashMap<String, FileEntry>, bands: usize) -> HashMap<u64, Vec<String>> {
+    let rows_per_band = 128 / bands; // Assuming 128 hashes
+    let mut buckets: HashMap<u64, Vec<String>> = HashMap::new();
+
+    for (path, entry) in files {
+        if entry.minhash.is_empty() {
+            continue; // Skip files without MinHash
+        }
+
+        for band in 0..bands {
+            let start = band * rows_per_band;
+            let end = (start + rows_per_band).min(entry.minhash.len());
+
+            // Hash this band's values
+            let mut hasher = AHasher::default();
+            for val in &entry.minhash[start..end] {
+                val.hash(&mut hasher);
+            }
+            let band_hash = hasher.finish();
+
+            buckets.entry(band_hash).or_default().push(path.clone());
+        }
+    }
+
+    buckets
+}
+
+pub fn contains_phrase_tokens(haystack: &[String], needle: &[String]) -> bool {
+    if needle.is_empty() || haystack.len() < needle.len() {
+        return false;
+    }
+    haystack
+        .windows(needle.len())
+        .any(|window| window == needle)
+}
diff --git a/src/tests_main.rs b/src/tests_main.rs
new file mode 100644
index 0000000..5182f5e
--- /dev/null
+++ b/src/tests_main.rs
@@ -0,0 +1,1939 @@
+use super::*;
+
+#[test]
+fn test_jaccard_similarity() {
+    let set1: HashSet<String> = ["foo", "bar", "baz"]
+        .iter()
+        .map(|s| (*s).to_string())
+        .collect();
+    let set2: HashSet<String> = ["bar", "baz", "qux"]
+        .iter()
+        .map(|s| (*s).to_string())
+        .collect();
+
+    let sim = jaccard_similarity(&set1, &set2);
+    // Intersection: {bar, baz} = 2
+    // Union: {foo, bar, baz, qux} = 4
+    // Jaccard: 2/4 = 0.5
+    assert_eq!(sim, 0.5);
+
+    // Empty sets
+    let empty1: HashSet<String> = HashSet::new();
+    let empty2: HashSet<String> = HashSet::new();
+    assert_eq!(jaccard_similarity(&empty1, &empty2), 0.0);
+
+    // Identical sets
+    assert_eq!(jaccard_similarity(&set1, &set1), 1.0);
+}
+
+#[test]
+fn test_simhash_similarity() {
+    // Identical hashes
+    assert_eq!(simhash_similarity(0x123456, 0x123456), 1.0);
+
+    // Completely different (all bits flipped)
+    let hash1 = 0x0000000000000000u64;
+    let hash2 = 0xFFFFFFFFFFFFFFFFu64;
+    assert_eq!(simhash_similarity(hash1, hash2), 0.0);
+
+    // 1 bit different out of 64
+    let hash_a = 0b0000000000000000u64;
+    let hash_b = 0b0000000000000001u64;
+    let sim = simhash_similarity(hash_a, hash_b);
+    assert!((sim - (63.0 / 64.0)).abs() < 0.01);
+}
+
+#[test]
+fn test_hamming_distance() {
+    assert_eq!(hamming_distance(0b1010, 0b1010), 0);
+    assert_eq!(hamming_distance(0b1010, 0b0101), 4);
+    assert_eq!(hamming_distance(0b1111, 0b0000), 4);
+    assert_eq!(hamming_distance(0b1100, 0b1010), 2);
+}
+
+#[test]
+fn test_compute_simhash_stability() {
+    let text1 = "The quick brown fox jumps over the lazy dog";
+    let text2 = "The quick brown fox jumps over the lazy dog";
+
+    let hash1 = compute_simhash(text1);
+    let hash2 = compute_simhash(text2);
+
+    // Identical text should produce identical hashes
+    assert_eq!(hash1, hash2);
+}
+
+#[test]
+fn test_compute_simhash_similarity() {
+    let text1 = "machine learning algorithms";
+    let text2 = "machine learning systems";
+    let text3 = "completely different topic about cooking";
+
+    let hash1 = compute_simhash(text1);
+    let hash2 = compute_simhash(text2);
+    let hash3 = compute_simhash(text3);
+
+    // Similar texts should have high similarity
+    let sim_similar = simhash_similarity(hash1, hash2);
+    // Different texts should have lower similarity
+    let sim_different = simhash_similarity(hash1, hash3);
+
+    assert!(sim_similar > sim_different);
+    assert!(sim_similar > 0.5); // Similar texts should be > 50% similar
+}
+
+#[test]
+fn test_minhash_basic() {
+    let keywords1 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
+    let keywords2 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
+
+    let mh1 = compute_minhash(&keywords1, 128);
+    let mh2 = compute_minhash(&keywords2, 128);
+
+    // Same keywords should produce same MinHash
+    assert_eq!(mh1, mh2);
+    assert_eq!(mh1.len(), 128);
+
+    // Similarity should be 1.0
+    assert_eq!(minhash_similarity(&mh1, &mh2), 1.0);
+}
+
+#[test]
+fn test_minhash_similarity_estimation() {
+    let keywords1 = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+    let keywords2 = vec!["b".to_string(), "c".to_string(), "d".to_string()];
+    let keywords3 = vec!["x".to_string(), "y".to_string(), "z".to_string()];
+
+    let mh1 = compute_minhash(&keywords1, 128);
+    let mh2 = compute_minhash(&keywords2, 128);
+    let mh3 = compute_minhash(&keywords3, 128);
+
+    // keywords1 and keywords2 share 2 out of 4 unique items = 0.5 Jaccard
+    let sim_similar = minhash_similarity(&mh1, &mh2);
+    // keywords1 and keywords3 share 0 items
+    let sim_different = minhash_similarity(&mh1, &mh3);
+
+    // Similar sets should have higher MinHash similarity
+    assert!(sim_similar > sim_different);
+    // MinHash should approximate Jaccard (within reasonable error)
+    assert!(sim_similar > 0.3 && sim_similar < 0.7); // Approximately 0.5
+}
+
+#[test]
+fn test_lsh_buckets() {
+    let mut files = HashMap::new();
+
+    // Create 3 files with MinHash signatures
+    let keywords1 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
+    let keywords2 = vec!["foo".to_string(), "bar".to_string(), "baz".to_string()];
+    let keywords3 = vec!["completely".to_string(), "different".to_string()];
+
+    files.insert(
+        "file1.md".to_string(),
+        FileEntry {
+            path: "file1.md".to_string(),
+            size_bytes: 100,
+            line_count: 10,
+            headings: vec![],
+            keywords: keywords1.clone(),
+            body_keywords: vec![],
+            links: vec![],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: compute_minhash(&keywords1, 128),
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+
+    files.insert(
+        "file2.md".to_string(),
+        FileEntry {
+            path: "file2.md".to_string(),
+            size_bytes: 100,
+            line_count: 10,
+            headings: vec![],
+            keywords: keywords2.clone(),
+            body_keywords: vec![],
+            links: vec![],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: compute_minhash(&keywords2, 128),
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+
+    files.insert(
+        "file3.md".to_string(),
+        FileEntry {
+            path: "file3.md".to_string(),
+            size_bytes: 100,
+            line_count: 10,
+            headings: vec![],
+            keywords: keywords3.clone(),
+            body_keywords: vec![],
+            links: vec![],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: compute_minhash(&keywords3, 128),
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+
+    let buckets = lsh_buckets(&files, 16);
+
+    // Should create some buckets
+    assert!(!buckets.is_empty());
+
+    // file1 and file2 should likely be in the same bucket (identical MinHash)
+    // Check if they appear together in any bucket
+    let mut file1_file2_together = false;
+    for paths in buckets.values() {
+        if paths.contains(&"file1.md".to_string()) && paths.contains(&"file2.md".to_string()) {
+            file1_file2_together = true;
+            break;
+        }
+    }
+    assert!(
+        file1_file2_together,
+        "Identical files should be in same LSH bucket"
+    );
+}
+
+#[test]
+fn test_bm25_score_basic() {
+    let mut term_freq = HashMap::new();
+    term_freq.insert("test".to_string(), 5);
+    term_freq.insert("word".to_string(), 2);
+
+    let doc = FileEntry {
+        path: "test.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec![],
+        body_keywords: vec![],
+        links: vec![],
+        simhash: 0,
+        term_frequencies: term_freq,
+        doc_length: 100,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    let mut idf_map = HashMap::new();
+    idf_map.insert("test".to_string(), 2.5);
+    idf_map.insert("word".to_string(), 1.8);
+
+    let query = vec!["test".to_string()];
+    let score = bm25_score(&query, &doc, 100.0, &idf_map);
+
+    // Score should be > 0 for matching term
+    assert!(score > 0.0);
+
+    // Query with no matching terms should score 0
+    let empty_query = vec!["nonexistent".to_string()];
+    let zero_score = bm25_score(&empty_query, &doc, 100.0, &idf_map);
+    assert_eq!(zero_score, 0.0);
+}
+
+#[test]
+fn test_bm25_score_ordering() {
+    // Document with high term frequency
+    let mut tf_high = HashMap::new();
+    tf_high.insert("test".to_string(), 10);
+
+    let doc_high_tf = FileEntry {
+        path: "high.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec![],
+        body_keywords: vec![],
+        links: vec![],
+        simhash: 0,
+        term_frequencies: tf_high,
+        doc_length: 50,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    // Document with low term frequency
+    let mut tf_low = HashMap::new();
+    tf_low.insert("test".to_string(), 1);
+
+    let doc_low_tf = FileEntry {
+        path: "low.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec![],
+        body_keywords: vec![],
+        links: vec![],
+        simhash: 0,
+        term_frequencies: tf_low,
+        doc_length: 50,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    let mut idf_map = HashMap::new();
+    idf_map.insert("test".to_string(), 2.0);
+
+    let query = vec!["test".to_string()];
+    let score_high = bm25_score(&query, &doc_high_tf, 50.0, &idf_map);
+    let score_low = bm25_score(&query, &doc_low_tf, 50.0, &idf_map);
+
+    // Higher term frequency should yield higher BM25 score
+    assert!(score_high > score_low);
+}
+
+#[test]
+fn test_policy_rule_matching_and_violations() {
+    // Build a simple policy with one rule
+    let rule = PolicyRule {
+        pattern: "agents/plans/*.md".to_string(),
+        must_contain: vec!["## Objective".to_string()],
+        must_not_contain: vec![],
+        name: Some("plans-must-have-objective".to_string()),
+        severity: Some("error".to_string()),
+        ..Default::default()
+    };
+
+    let policy = PolicyConfig { rules: vec![rule] };
+
+    // Compile glob and check that it matches only the agents/plans file
+    let glob = Glob::new(&policy.rules[0].pattern).unwrap();
+    let matcher = glob.compile_matcher();
+    assert!(matcher.is_match("agents/plans/plan.md"));
+    assert!(!matcher.is_match("docs/architecture/auth.md"));
+
+    // Simulate a violation: empty content should trigger missing "## Objective"
+    let rule_ref = &policy.rules[0];
+    let file_path = "agents/plans/plan.md";
+    let content = String::new();
+    let violations = collect_policy_violations_for_content(rule_ref, file_path, &content);
+
+    assert_eq!(violations.len(), 1);
+    let v = &violations[0];
+    assert_eq!(v.file, "agents/plans/plan.md");
+    assert_eq!(v.rule, "plans-must-have-objective");
+    assert_eq!(v.severity, "error");
+    assert_eq!(v.kind, "policy_violation");
+}
+
+#[test]
+fn test_policy_min_max_length_violations() {
+    // Require 10–20 lines
+    let rule = PolicyRule {
+        pattern: "docs/*.md".to_string(),
+        min_length: Some(10),
+        max_length: Some(20),
+        name: Some("length-bounds".to_string()),
+        severity: Some("error".to_string()),
+        ..Default::default()
+    };
+
+    // Too short: 3 lines
+    let short_content = "line1\nline2\nline3\n";
+    let short_violations =
+        collect_policy_violations_for_content(&rule, "docs/short.md", short_content);
+    assert!(
+        short_violations
+            .iter()
+            .any(|v| v.message.contains("Document too short")),
+        "Expected a 'Document too short' violation"
+    );
+
+    // Too long: 25 lines
+    let long_content: String = (0..25).map(|i| format!("line{i}\n")).collect();
+    let long_violations =
+        collect_policy_violations_for_content(&rule, "docs/long.md", &long_content);
+    assert!(
+        long_violations
+            .iter()
+            .any(|v| v.message.contains("Document too long")),
+        "Expected a 'Document too long' violation"
+    );
+}
+
+#[test]
+fn test_policy_required_and_forbidden_headings() {
+    let rule = PolicyRule {
+        pattern: "docs/*.md".to_string(),
+        required_headings: vec!["Objective".to_string()],
+        forbidden_headings: vec!["Deprecated".to_string()],
+        name: Some("heading-rules".to_string()),
+        severity: Some("error".to_string()),
+        ..Default::default()
+    };
+
+    let content = r"
+# Title
+
+## Objective
+
+Some content here.
+
+## Deprecated
+";
+
+    let violations = collect_policy_violations_for_content(&rule, "docs/example.md", content);
+
+    // Should not flag missing Objective (it exists)
+    assert!(
+        !violations
+            .iter()
+            .any(|v| v.message.contains("Missing required heading")),
+        "Did not expect a missing required heading violation"
+    );
+
+    // Should flag forbidden Deprecated heading
+    assert!(
+        violations
+            .iter()
+            .any(|v| v.message.contains("Forbidden heading present")),
+        "Expected a forbidden heading violation"
+    );
+}
+
+#[test]
+fn test_policy_section_length_violation() {
+    let rule = PolicyRule {
+        pattern: "docs/*.md".to_string(),
+        max_section_length: Some(3),
+        section_heading_regex: Some("^Async".to_string()),
+        name: Some("status-section-length".to_string()),
+        severity: Some("warn".to_string()),
+        ..Default::default()
+    };
+
+    let content = r"
+# Status
+
+## Async Migration
+line1
+line2
+line3
+line4
+
+## Other
+ok
+";
+
+    let violations =
+        collect_policy_violations_for_content(&rule, "docs/IMPLEMENTATION_STATUS.md", content);
+
+    assert!(
+        violations
+            .iter()
+            .any(|v| v.message.contains("Section too long")),
+        "Expected a section-length violation"
+    );
+}
+
+#[test]
+fn test_policy_required_link() {
+    let rule = PolicyRule {
+        pattern: "docs/*.md".to_string(),
+        must_link_to: vec!["docs/ASYNC_MIGRATION_COMPLETE_SUMMARY.md".to_string()],
+        name: Some("status-requires-summary-link".to_string()),
+        severity: Some("error".to_string()),
+        ..Default::default()
+    };
+
+    let missing_link = r"
+# Status
+No links here.
+";
+    let violations =
+        collect_policy_violations_for_content(&rule, "docs/IMPLEMENTATION_STATUS.md", missing_link);
+    assert!(
+        violations
+            .iter()
+            .any(|v| v.message.contains("Missing required link")),
+        "Expected a missing required link violation"
+    );
+
+    let with_link = r"
+# Status
+See [summary](ASYNC_MIGRATION_COMPLETE_SUMMARY.md).
+";
+    let ok_violations =
+        collect_policy_violations_for_content(&rule, "docs/IMPLEMENTATION_STATUS.md", with_link);
+    assert!(
+        ok_violations.is_empty(),
+        "Did not expect violations when required link is present"
+    );
+}
+
+#[test]
+fn test_suggest_new_link_target_same_dir() {
+    let mut available = HashSet::new();
+    available.insert("docs/guide/auth.md".to_string());
+    available.insert("docs/guide/other.md".to_string());
+
+    // Source and target are in the same parent; filename matches exactly one file
+    let suggested = suggest_new_link_target("docs/guide/README.md", "auth.md", &available);
+    // Expect a simple relative path suggestion
+    assert_eq!(suggested.as_deref(), Some("auth.md"));
+}
+
+#[test]
+fn test_apply_reference_mapping_to_content() {
+    let content = "See [auth](docs/old/auth.md) for details.";
+    let updated = apply_reference_mapping_to_content(
+        content,
+        "docs/old/auth.md",
+        "docs/architecture/AUTH.md",
+    );
+    assert_eq!(
+        updated,
+        "See [auth](docs/architecture/AUTH.md) for details."
+    );
+}
+
+#[test]
+fn test_build_consolidation_groups_basic() {
+    // Minimal forward index with two files; we create a single duplicate pair
+    let mut files = HashMap::new();
+
+    files.insert(
+        "docs/a.md".to_string(),
+        FileEntry {
+            path: "docs/a.md".to_string(),
+            size_bytes: 0,
+            line_count: 1,
+            headings: vec![],
+            keywords: vec!["foo".to_string()],
+            body_keywords: vec![],
+            links: vec![],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: vec![],
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+    files.insert(
+        "docs/b.md".to_string(),
+        FileEntry {
+            path: "docs/b.md".to_string(),
+            size_bytes: 0,
+            line_count: 1,
+            headings: vec![],
+            keywords: vec!["foo".to_string()],
+            body_keywords: vec![],
+            links: vec![],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: vec![],
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+
+    let forward_index = ForwardIndex {
+        files,
+        indexed_at: chrono_now(),
+        version: 3,
+        source_root: String::new(),
+        avg_doc_length: 0.0,
+        idf_map: HashMap::new(),
+    };
+
+    let pairs = vec![("docs/a.md".to_string(), "docs/b.md".to_string(), 0.9_f64)];
+
+    let result = build_consolidation_groups(&forward_index, &pairs);
+    assert_eq!(result.total_groups, 1);
+    let group = &result.groups[0];
+    assert!(group.canonical == "docs/a.md" || group.canonical == "docs/b.md");
+    assert_eq!(group.merge_into.len(), 1);
+}
+
+#[test]
+fn test_compute_inbound_link_counts() {
+    let mut files = HashMap::new();
+
+    files.insert(
+        "docs/a.md".to_string(),
+        FileEntry {
+            path: "docs/a.md".to_string(),
+            size_bytes: 0,
+            line_count: 1,
+            headings: vec![],
+            keywords: vec![],
+            body_keywords: vec![],
+            links: vec![Link {
+                line: 1,
+                text: "b".to_string(),
+                target: "b.md".to_string(),
+            }],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: vec![],
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+    files.insert(
+        "docs/b.md".to_string(),
+        FileEntry {
+            path: "docs/b.md".to_string(),
+            size_bytes: 0,
+            line_count: 1,
+            headings: vec![],
+            keywords: vec![],
+            body_keywords: vec![],
+            links: vec![],
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: vec![],
+            section_fingerprints: vec![],
+            adr_references: vec![],
+        },
+    );
+
+    let forward_index = ForwardIndex {
+        files,
+        indexed_at: "0".to_string(),
+        version: 3,
+        source_root: String::new(),
+        avg_doc_length: 0.0,
+        idf_map: HashMap::new(),
+    };
+
+    let counts = compute_inbound_link_counts(&forward_index);
+    // a.md links to b.md, so b.md should have 1 inbound link
+    assert_eq!(counts.get("docs/b.md"), Some(&1));
+}
+
+#[test]
+fn test_index_sections() {
+    let content = "# Introduction\nThis is the intro.\n\n## Details\nMore details here.\n\n## Summary\nFinal thoughts.";
+    let headings = vec![
+        Heading {
+            line: 1,
+            level: 1,
+            text: "Introduction".to_string(),
+        },
+        Heading {
+            line: 4,
+            level: 2,
+            text: "Details".to_string(),
+        },
+        Heading {
+            line: 7,
+            level: 2,
+            text: "Summary".to_string(),
+        },
+    ];
+
+    let sections = index_sections(content, &headings);
+
+    assert_eq!(sections.len(), 3);
+    assert_eq!(sections[0].heading, "Introduction");
+    assert_eq!(sections[0].level, 1);
+    assert_eq!(sections[0].line_start, 1);
+
+    assert_eq!(sections[1].heading, "Details");
+    assert_eq!(sections[1].level, 2);
+    assert_eq!(sections[1].line_start, 4);
+
+    assert_eq!(sections[2].heading, "Summary");
+    assert_eq!(sections[2].level, 2);
+}
+
+#[test]
+fn test_index_sections_similar_content() {
+    let content1 = "## Testing\nRun the tests with:\n```\npytest\n```";
+    let content2 = "## Testing\nRun the tests with:\n```\npytest\n```";
+    let content3 = "## Testing\nCompletely different content about testing";
+
+    let headings1 = vec![Heading {
+        line: 1,
+        level: 2,
+        text: "Testing".to_string(),
+    }];
+    let headings2 = vec![Heading {
+        line: 1,
+        level: 2,
+        text: "Testing".to_string(),
+    }];
+    let headings3 = vec![Heading {
+        line: 1,
+        level: 2,
+        text: "Testing".to_string(),
+    }];
+
+    let sections1 = index_sections(content1, &headings1);
+    let sections2 = index_sections(content2, &headings2);
+    let sections3 = index_sections(content3, &headings3);
+
+    // Identical content should produce identical SimHash
+    assert_eq!(sections1[0].simhash, sections2[0].simhash);
+
+    // Different content should produce different SimHash
+    assert_ne!(sections1[0].simhash, sections3[0].simhash);
+
+    // Identical sections should have 100% similarity
+    let sim_identical = simhash_similarity(sections1[0].simhash, sections2[0].simhash);
+    assert_eq!(sim_identical, 1.0);
+
+    // Different sections should have < 100% similarity
+    let sim_different = simhash_similarity(sections1[0].simhash, sections3[0].simhash);
+    assert!(sim_different < 1.0);
+}
+
+#[test]
+fn test_compute_document_metrics_captures_structure_signals() {
+    let content = r"---
+title: Demo
+owner: Docs
+---
+
+# Overview
+Intro paragraph.
+
+## Part 1
+- first
+- second
+
+## Changelog
+- Added feature
+- Fixed bug
+
+## Completed Work
+```rust
+fn main() {}
+```
+";
+    let lines: Vec<&str> = content.lines().collect();
+    let headings = vec![
+        Heading {
+            line: 6,
+            level: 1,
+            text: "Overview".to_string(),
+        },
+        Heading {
+            line: 9,
+            level: 2,
+            text: "Part 1".to_string(),
+        },
+        Heading {
+            line: 13,
+            level: 2,
+            text: "Changelog".to_string(),
+        },
+        Heading {
+            line: 17,
+            level: 2,
+            text: "Completed Work".to_string(),
+        },
+    ];
+    let links = vec![Link {
+        line: 7,
+        text: "readme".to_string(),
+        target: "README.md".to_string(),
+    }];
+
+    let metrics = compute_document_metrics("docs/demo.md", content, &lines, &headings, &links);
+
+    assert_eq!(metrics.path, "docs/demo.md");
+    assert_eq!(metrics.frontmatter_key_count, 2);
+    assert_eq!(metrics.heading_count, 4);
+    assert_eq!(metrics.section_count, 4);
+    assert_eq!(metrics.h1_count, 1);
+    assert_eq!(metrics.h2_count, 3);
+    assert_eq!(metrics.part_heading_count, 1);
+    assert_eq!(metrics.changelog_heading_count, 1);
+    assert_eq!(metrics.completion_heading_count, 1);
+    assert_eq!(metrics.changelog_entry_count, 2);
+    assert_eq!(metrics.list_item_count, 4);
+    assert_eq!(metrics.code_block_count, 1);
+    assert!(metrics.longest_section_lines >= 3);
+    assert!(metrics
+        .sections
+        .iter()
+        .any(|section| section.looks_like_part));
+    assert!(metrics
+        .sections
+        .iter()
+        .any(|section| section.looks_like_changelog && section.list_item_count == 2));
+}
+
+#[test]
+fn test_cmd_build_writes_document_metrics_index() {
+    let unique = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap()
+        .as_nanos();
+    let root = std::env::temp_dir().join(format!("yore-build-metrics-{unique}"));
+    let docs_dir = root.join("docs");
+    let index_dir = root.join(".yore");
+
+    fs::create_dir_all(&docs_dir).unwrap();
+    fs::write(
+        docs_dir.join("guide.md"),
+        "# Guide\n\n## Part 1\n- step one\n- step two\n",
+    )
+    .unwrap();
+
+    cmd_build(&docs_dir, &index_dir, "md", &[], true, None, false, false).unwrap();
+
+    let metrics_path = index_dir.join("document_metrics.json");
+    assert!(metrics_path.exists());
+
+    let metrics_index: DocumentMetricsIndex =
+        serde_json::from_str(&fs::read_to_string(metrics_path).unwrap()).unwrap();
+    assert_eq!(metrics_index.version, 1);
+    assert_eq!(metrics_index.files.len(), 1);
+
+    let metrics = metrics_index.files.values().next().unwrap();
+    assert_eq!(metrics.heading_count, 2);
+    assert_eq!(metrics.part_heading_count, 1);
+    assert_eq!(metrics.list_item_count, 2);
+    assert_eq!(metrics.section_count, 2);
+
+    fs::remove_dir_all(root).unwrap();
+}
+
+#[test]
+fn test_extract_keywords() {
+    let text = "This is a TEST document with some KEYWORDS";
+    let keywords = extract_keywords(text);
+
+    // Should lowercase (but not stem - extract_keywords doesn't stem)
+    assert!(keywords.contains(&"test".to_string()));
+    assert!(keywords.contains(&"document".to_string()));
+    assert!(keywords.contains(&"keywords".to_string())); // Note: not stemmed
+
+    // Should not contain stop words
+    assert!(!keywords.contains(&"this".to_string()));
+    assert!(!keywords.contains(&"is".to_string()));
+    // "a" and "with" are too short or stop words
+    assert!(!keywords.contains(&"with".to_string()));
+}
+
+#[test]
+fn test_stem_word() {
+    // Test actual stemming behavior
+    assert_eq!(stem_word("running"), "runn"); // Simple stemmer removes "ing"
+    assert_eq!(stem_word("tests"), "test"); // Removes "s"
+    assert_eq!(stem_word("testing"), "test"); // Removes "ing"
+    assert_eq!(stem_word("keywords"), "keyword"); // Removes "s"
+
+    // Short words should not be stemmed
+    assert_eq!(stem_word("go"), "go");
+    assert_eq!(stem_word("it"), "it");
+}
+
+#[test]
+fn test_get_link_context_basic() {
+    let path = "test_get_link_context_basic.md";
+    fs::write(path, "first line\nsecond line with a link\nthird line\n").unwrap();
+
+    let mut cache: HashMap<String, Vec<String>> = HashMap::new();
+    let ctx = get_link_context(&mut cache, path, 2).unwrap();
+    assert_eq!(ctx.as_deref(), Some("second line with a link"));
+
+    // Out-of-range line number should yield None
+    let ctx_out = get_link_context(&mut cache, path, 10).unwrap();
+    assert!(ctx_out.is_none());
+
+    fs::remove_file(path).unwrap();
+}
+
+#[test]
+fn test_get_link_context_truncates_long_lines() {
+    let path = "test_get_link_context_truncate.md";
+    let long_line = "a".repeat(200);
+    fs::write(path, format!("{long_line}\n")).unwrap();
+
+    let mut cache: HashMap<String, Vec<String>> = HashMap::new();
+    let ctx = get_link_context(&mut cache, path, 1)
+        .unwrap()
+        .expect("expected context");
+
+    assert!(ctx.len() <= 160);
+    assert!(ctx.ends_with("..."));
+
+    fs::remove_file(path).unwrap();
+}
+
+#[test]
+fn test_get_top_doc_terms_basic() {
+    // Setup: doc with term frequencies and IDF map
+    // Note: term_frequencies and idf_map use STEMMED keys
+    // "docker" -> "dock", "nginx" -> "nginx", "helm" -> "helm"
+    let mut term_frequencies = HashMap::new();
+    term_frequencies.insert("dock".to_string(), 10); // stem of "docker"
+    term_frequencies.insert("nginx".to_string(), 5);
+    term_frequencies.insert("helm".to_string(), 3);
+
+    let entry = FileEntry {
+        path: "test.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec![],
+        body_keywords: vec![
+            "docker".to_string(),
+            "nginx".to_string(),
+            "helm".to_string(),
+            "container".to_string(), // not in tf, will be excluded
+        ],
+        links: vec![],
+        simhash: 0,
+        term_frequencies,
+        doc_length: 100,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    let mut idf_map = HashMap::new();
+    idf_map.insert("dock".to_string(), 2.0); // stemmed
+    idf_map.insert("nginx".to_string(), 1.5);
+    idf_map.insert("helm".to_string(), 3.0);
+
+    // Test: get top 2 terms, excluding nothing
+    let terms = get_top_doc_terms(&entry, &idf_map, &[], 2);
+
+    // docker: 10 * 2.0 = 20
+    // helm: 3 * 3.0 = 9
+    // nginx: 5 * 1.5 = 7.5
+    assert_eq!(terms.len(), 2);
+    assert_eq!(terms[0], "docker");
+    assert_eq!(terms[1], "helm");
+}
+
+#[test]
+fn test_get_top_doc_terms_excludes_query_terms() {
+    // Note: term_frequencies and idf_map use STEMMED keys
+    let mut term_frequencies = HashMap::new();
+    term_frequencies.insert("kubernete".to_string(), 10); // stem of "kubernetes"
+    term_frequencies.insert("dock".to_string(), 5); // stem of "docker"
+    term_frequencies.insert("nginx".to_string(), 3);
+
+    let entry = FileEntry {
+        path: "test.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec![],
+        body_keywords: vec![
+            "kubernetes".to_string(),
+            "docker".to_string(),
+            "nginx".to_string(),
+        ],
+        links: vec![],
+        simhash: 0,
+        term_frequencies,
+        doc_length: 100,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    let mut idf_map = HashMap::new();
+    idf_map.insert("kubernete".to_string(), 2.0); // stemmed
+    idf_map.insert("dock".to_string(), 1.5); // stemmed
+    idf_map.insert("nginx".to_string(), 3.0);
+
+    // Exclude "kubernetes" from results (different case, should still match after stemming)
+    let exclude = vec!["Kubernetes".to_string()];
+    let terms = get_top_doc_terms(&entry, &idf_map, &exclude, 3);
+
+    assert_eq!(terms.len(), 2);
+    assert!(!terms.contains(&"kubernetes".to_string()));
+    assert_eq!(terms[0], "nginx"); // 3 * 3.0 = 9
+    assert_eq!(terms[1], "docker"); // 5 * 1.5 = 7.5
+}
+
+#[test]
+fn test_get_top_doc_terms_deduplicates_stems() {
+    let mut term_frequencies = HashMap::new();
+    term_frequencies.insert("run".to_string(), 10); // stem of running, runs, run
+
+    let entry = FileEntry {
+        path: "test.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec![],
+        body_keywords: vec!["running".to_string(), "runs".to_string(), "run".to_string()],
+        links: vec![],
+        simhash: 0,
+        term_frequencies,
+        doc_length: 100,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    let mut idf_map = HashMap::new();
+    idf_map.insert("run".to_string(), 1.0);
+
+    let terms = get_top_doc_terms(&entry, &idf_map, &[], 5);
+
+    // Should only return one term (first occurrence), not all three
+    assert_eq!(terms.len(), 1);
+}
+
+#[test]
+fn test_get_top_doc_terms_zero_returns_empty() {
+    let entry = FileEntry {
+        path: "test.md".to_string(),
+        size_bytes: 100,
+        line_count: 10,
+        headings: vec![],
+        keywords: vec!["test".to_string()],
+        body_keywords: vec!["test".to_string()],
+        links: vec![],
+        simhash: 0,
+        term_frequencies: HashMap::new(),
+        doc_length: 100,
+        minhash: vec![],
+        section_fingerprints: vec![],
+        adr_references: vec![],
+    };
+
+    let idf_map = HashMap::new();
+    let terms = get_top_doc_terms(&entry, &idf_map, &[], 0);
+
+    assert!(terms.is_empty());
+}
+
+#[test]
+fn test_find_link_candidates_single_match() {
+    let mut available = HashSet::new();
+    available.insert("docs/guide/auth.md".to_string());
+    available.insert("docs/guide/other.md".to_string());
+
+    // Source and target are in the same parent; filename matches exactly one file
+    let candidates = find_link_candidates("docs/guide/README.md", "auth.md", &available);
+    assert_eq!(candidates.len(), 1);
+    assert_eq!(candidates[0], "auth.md");
+}
+
+#[test]
+fn test_find_link_candidates_multiple_matches() {
+    let mut available = HashSet::new();
+    available.insert("docs/v1/auth.md".to_string());
+    available.insert("docs/v2/auth.md".to_string());
+    available.insert("docs/archive/auth.md".to_string());
+
+    // Multiple files with same name - should return all
+    let candidates = find_link_candidates("docs/README.md", "auth.md", &available);
+    assert!(candidates.len() >= 2);
+}
+
+#[test]
+fn test_find_link_candidates_no_match() {
+    let mut available = HashSet::new();
+    available.insert("docs/guide/other.md".to_string());
+
+    // No file matches
+    let candidates = find_link_candidates("docs/README.md", "nonexistent.md", &available);
+    assert!(candidates.is_empty());
+}
+
+#[test]
+fn test_link_fix_proposal_serialization() {
+    let proposal = LinkFixProposal {
+        source: "docs/README.md".to_string(),
+        line: 42,
+        broken_target: "../old/auth.md".to_string(),
+        candidates: vec![
+            "../archive/auth.md".to_string(),
+            "../v2/auth.md".to_string(),
+        ],
+        decision: None,
+    };
+
+    let yaml = serde_yaml::to_string(&proposal).unwrap();
+    assert!(yaml.contains("source: docs/README.md"));
+    assert!(yaml.contains("line: 42"));
+    assert!(yaml.contains("broken_target:"));
+    assert!(yaml.contains("candidates:"));
+
+    // Test deserialization
+    let parsed: LinkFixProposal = serde_yaml::from_str(&yaml).unwrap();
+    assert_eq!(parsed.source, "docs/README.md");
+    assert_eq!(parsed.line, 42);
+    assert_eq!(parsed.candidates.len(), 2);
+}
+
+#[test]
+fn test_link_fix_proposal_with_decision() {
+    let yaml = r#"
+source: docs/README.md
+line: 42
+broken_target: "../old/auth.md"
+candidates:
+  - "../archive/auth.md"
+  - "../v2/auth.md"
+decision: 1
+"#;
+    let proposal: LinkFixProposal = serde_yaml::from_str(yaml).unwrap();
+    assert_eq!(proposal.decision, Some(1));
+    assert_eq!(proposal.candidates[1], "../v2/auth.md");
+}
+
+#[test]
+fn test_diff_result_serialization() {
+    let result = DiffResult {
+        file1: "docs/a.md".to_string(),
+        file2: "docs/b.md".to_string(),
+        similarity: DiffSimilarity {
+            combined: 0.75,
+            jaccard: 0.6,
+            simhash: 0.9,
+        },
+        shared_keywords: vec!["auth".to_string(), "login".to_string()],
+        only_in_file1: vec!["oauth".to_string()],
+        only_in_file2: vec!["jwt".to_string()],
+        shared_headings: vec!["Introduction".to_string()],
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"file1\": \"docs/a.md\""));
+    assert!(json.contains("\"combined\": 0.75"));
+    assert!(json.contains("\"shared_keywords\""));
+}
+
+#[test]
+fn test_stats_result_serialization() {
+    let result = StatsResult {
+        total_files: 100,
+        unique_keywords: 500,
+        total_headings: 250,
+        body_keywords: 1000,
+        total_links: 300,
+        index_version: 3,
+        indexed_at: "2024-01-01T00:00:00Z".to_string(),
+        top_keywords: vec![
+            KeywordCount {
+                keyword: "authentication".to_string(),
+                count: 50,
+            },
+            KeywordCount {
+                keyword: "kubernetes".to_string(),
+                count: 40,
+            },
+        ],
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"total_files\": 100"));
+    assert!(json.contains("\"top_keywords\""));
+    assert!(json.contains("\"authentication\""));
+}
+
+#[test]
+fn test_mv_result_serialization() {
+    let result = MvResult {
+        from: "docs/old.md".to_string(),
+        to: "docs/new.md".to_string(),
+        moved: true,
+        updated_files: vec!["docs/index.md".to_string(), "docs/guide.md".to_string()],
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"from\": \"docs/old.md\""));
+    assert!(json.contains("\"moved\": true"));
+    assert!(json.contains("\"updated_files\""));
+}
+
+#[test]
+fn test_fix_references_result_serialization() {
+    let result = FixReferencesResult {
+        mapping_file: "mappings.yaml".to_string(),
+        mappings_count: 5,
+        updated_files: vec!["docs/a.md".to_string()],
+        applied: false,
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"mapping_file\": \"mappings.yaml\""));
+    assert!(json.contains("\"mappings_count\": 5"));
+    assert!(json.contains("\"applied\": false"));
+}
+
+#[test]
+fn test_yore_config_basic_parsing() {
+    let toml = r#"
+[index.docs]
+roots = ["docs/"]
+types = ["md"]
+output = ".yore"
+"#;
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+    assert!(config.index.contains_key("docs"));
+    let docs = config.index.get("docs").unwrap();
+    assert_eq!(docs.roots, vec!["docs/"]);
+    assert_eq!(docs.types, vec!["md"]);
+}
+
+#[test]
+fn test_yore_config_link_check_section() {
+    let toml = r#"
+[link-check]
+exclude = ["archive/**", "deprecated/**"]
+
+[[link-check.severity-overrides]]
+pattern = "archive/**"
+severity = "warn"
+
+[[link-check.severity-overrides]]
+pattern = "deprecated/**"
+severity = "info"
+"#;
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+    let link_check = config.link_check.unwrap();
+    assert_eq!(link_check.exclude.len(), 2);
+    assert_eq!(link_check.severity_overrides.len(), 2);
+    assert_eq!(link_check.severity_overrides[0].pattern, "archive/**");
+    assert_eq!(link_check.severity_overrides[0].severity, "warn");
+}
+
+#[test]
+fn test_yore_config_external_repos() {
+    let toml = r#"
+[[external.repos]]
+path = "../runtime/docs"
+prefix = "runtime"
+
+[[external.repos]]
+path = "../api-docs"
+"#;
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+    let external = config.external.unwrap();
+    assert_eq!(external.repos.len(), 2);
+    assert_eq!(external.repos[0].path, "../runtime/docs");
+    assert_eq!(external.repos[0].prefix, Some("runtime".to_string()));
+    assert_eq!(external.repos[1].prefix, None);
+}
+
+#[test]
+fn test_yore_config_policy_section() {
+    let toml = r#"
+[policy]
+rules-file = ".yore-policy.yaml"
+"#;
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+    let policy = config.policy.unwrap();
+    assert_eq!(policy.rules_file, Some(".yore-policy.yaml".to_string()));
+}
+
+#[test]
+fn test_yore_config_full_example() {
+    let toml = r#"
+[index.docs]
+roots = ["docs/"]
+types = ["md", "txt"]
+output = ".yore"
+
+[index.all]
+roots = ["docs/", "specs/"]
+types = ["md"]
+
+[link-check]
+exclude = ["archive/**"]
+
+[[link-check.severity-overrides]]
+pattern = "deprecated/**"
+severity = "info"
+
+[policy]
+rules-file = ".yore-policy.yaml"
+
+[[external.repos]]
+path = "../runtime/docs"
+prefix = "runtime"
+"#;
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+
+    // Index profiles
+    assert_eq!(config.index.len(), 2);
+    assert!(config.index.contains_key("docs"));
+    assert!(config.index.contains_key("all"));
+
+    // Link check
+    let link_check = config.link_check.unwrap();
+    assert_eq!(link_check.exclude.len(), 1);
+    assert_eq!(link_check.severity_overrides.len(), 1);
+
+    // Policy
+    let policy = config.policy.unwrap();
+    assert!(policy.rules_file.is_some());
+
+    // External
+    let external = config.external.unwrap();
+    assert_eq!(external.repos.len(), 1);
+}
+
+#[test]
+fn test_yore_config_empty_is_valid() {
+    let toml = "";
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+    assert!(config.index.is_empty());
+    assert!(config.link_check.is_none());
+    assert!(config.policy.is_none());
+    assert!(config.external.is_none());
+}
+
+#[test]
+fn test_build_result_serialization() {
+    let result = BuildResult {
+        index_path: ".yore".to_string(),
+        files_indexed: 150,
+        total_headings: 450,
+        total_links: 200,
+        unique_keywords: 800,
+        duration_ms: 1234,
+        renames_tracked: None,
+        total_relations: None,
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"index_path\": \".yore\""));
+    // renames_tracked should be absent when None due to skip_serializing_if
+    assert!(!json.contains("renames_tracked"));
+    assert!(json.contains("\"files_indexed\": 150"));
+    assert!(json.contains("\"total_headings\": 450"));
+    assert!(json.contains("\"total_links\": 200"));
+    assert!(json.contains("\"unique_keywords\": 800"));
+    assert!(json.contains("\"duration_ms\": 1234"));
+}
+
+#[test]
+fn test_eval_json_result_serialization() {
+    let result = EvalJsonResult {
+        questions_file: "questions.jsonl".to_string(),
+        total_questions: 10,
+        passed: 8,
+        failed: 2,
+        pass_rate: 80.0,
+        results: vec![
+            EvalQuestionResult {
+                question: "How do I authenticate?".to_string(),
+                passed: true,
+                expected: vec!["auth.md".to_string()],
+                found: vec!["auth.md".to_string()],
+                missing: vec![],
+            },
+            EvalQuestionResult {
+                question: "What is the API endpoint?".to_string(),
+                passed: false,
+                expected: vec!["api.md".to_string()],
+                found: vec![],
+                missing: vec!["api.md".to_string()],
+            },
+        ],
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"questions_file\": \"questions.jsonl\""));
+    assert!(json.contains("\"total_questions\": 10"));
+    assert!(json.contains("\"passed\": 8"));
+    assert!(json.contains("\"failed\": 2"));
+    assert!(json.contains("\"pass_rate\": 80.0"));
+    assert!(json.contains("\"results\""));
+    assert!(json.contains("How do I authenticate?"));
+    assert!(json.contains("\"missing\": []"));
+}
+
+#[test]
+fn test_rename_history_serialization() {
+    let history = RenameHistory {
+        renames: vec![
+            RenameEntry {
+                old_path: "docs/old/auth.md".to_string(),
+                new_path: "docs/v2/auth.md".to_string(),
+                commit: "abc123".to_string(),
+            },
+            RenameEntry {
+                old_path: "docs/v2/auth.md".to_string(),
+                new_path: "docs/current/auth.md".to_string(),
+                commit: "def456".to_string(),
+            },
+        ],
+        indexed_at: "1234567890".to_string(),
+    };
+
+    let json = serde_json::to_string_pretty(&history).unwrap();
+    assert!(json.contains("\"old_path\": \"docs/old/auth.md\""));
+    assert!(json.contains("\"new_path\": \"docs/v2/auth.md\""));
+    assert!(json.contains("\"commit\": \"abc123\""));
+
+    // Verify roundtrip
+    let parsed: RenameHistory = serde_json::from_str(&json).unwrap();
+    assert_eq!(parsed.renames.len(), 2);
+}
+
+#[test]
+fn test_resolve_renamed_path_single_rename() {
+    let history = RenameHistory {
+        renames: vec![RenameEntry {
+            old_path: "docs/old.md".to_string(),
+            new_path: "docs/new.md".to_string(),
+            commit: "abc123".to_string(),
+        }],
+        indexed_at: "0".to_string(),
+    };
+
+    assert_eq!(
+        resolve_renamed_path("docs/old.md", &history),
+        Some("docs/new.md".to_string())
+    );
+    assert_eq!(resolve_renamed_path("docs/other.md", &history), None);
+}
+
+#[test]
+fn test_resolve_renamed_path_chain() {
+    let history = RenameHistory {
+        renames: vec![
+            RenameEntry {
+                old_path: "a.md".to_string(),
+                new_path: "b.md".to_string(),
+                commit: "1".to_string(),
+            },
+            RenameEntry {
+                old_path: "b.md".to_string(),
+                new_path: "c.md".to_string(),
+                commit: "2".to_string(),
+            },
+            RenameEntry {
+                old_path: "c.md".to_string(),
+                new_path: "d.md".to_string(),
+                commit: "3".to_string(),
+            },
+        ],
+        indexed_at: "0".to_string(),
+    };
+
+    // Should follow the chain from a.md -> b.md -> c.md -> d.md
+    assert_eq!(
+        resolve_renamed_path("a.md", &history),
+        Some("d.md".to_string())
+    );
+    // Starting from middle should also work
+    assert_eq!(
+        resolve_renamed_path("b.md", &history),
+        Some("d.md".to_string())
+    );
+}
+
+#[test]
+fn test_compute_relative_path_same_dir() {
+    let files: HashSet<String> = HashSet::new();
+    assert_eq!(
+        compute_relative_path("docs/foo.md", "docs/bar.md", &files),
+        Some("bar.md".to_string())
+    );
+}
+
+#[test]
+fn test_compute_relative_path_subdirectory() {
+    let files: HashSet<String> = HashSet::new();
+    assert_eq!(
+        compute_relative_path("docs/index.md", "docs/guides/auth.md", &files),
+        Some("guides/auth.md".to_string())
+    );
+}
+
+#[test]
+fn test_compute_relative_path_parent_directory() {
+    let files: HashSet<String> = HashSet::new();
+    let result = compute_relative_path("docs/guides/auth.md", "docs/index.md", &files);
+    assert!(result.is_some());
+    assert!(result.unwrap().starts_with("../"));
+}
+
+#[test]
+fn test_build_result_with_renames() {
+    let result = BuildResult {
+        index_path: ".yore".to_string(),
+        files_indexed: 100,
+        total_headings: 200,
+        total_links: 50,
+        unique_keywords: 500,
+        duration_ms: 1000,
+        renames_tracked: Some(25),
+        total_relations: None,
+    };
+
+    let json = serde_json::to_string_pretty(&result).unwrap();
+    assert!(json.contains("\"renames_tracked\": 25"));
+}
+
+#[test]
+fn test_external_repos_path_extraction() {
+    let toml = r#"
+[[external.repos]]
+path = "../runtime/docs"
+prefix = "runtime"
+
+[[external.repos]]
+path = "../api-docs"
+"#;
+    let config: YoreConfig = toml::from_str(toml).unwrap();
+    let external = config.external.unwrap();
+
+    // Extract paths like the cmd_check_links dispatch does
+    let paths: Vec<String> = external.repos.iter().map(|r| r.path.clone()).collect();
+
+    assert_eq!(paths.len(), 2);
+    assert_eq!(paths[0], "../runtime/docs");
+    assert_eq!(paths[1], "../api-docs");
+}
+
+fn make_file_entry(path: &str) -> FileEntry {
+    FileEntry {
+        path: path.to_string(),
+        size_bytes: 0,
+        line_count: 0,
+        headings: Vec::new(),
+        keywords: Vec::new(),
+        body_keywords: Vec::new(),
+        links: Vec::new(),
+        simhash: 0,
+        term_frequencies: HashMap::new(),
+        doc_length: 0,
+        minhash: Vec::new(),
+        section_fingerprints: Vec::new(),
+        adr_references: Vec::new(),
+    }
+}
+
+fn make_forward_index(files: Vec<FileEntry>) -> ForwardIndex {
+    let map = files
+        .into_iter()
+        .map(|entry| (entry.path.clone(), entry))
+        .collect();
+    ForwardIndex {
+        files: map,
+        indexed_at: "now".to_string(),
+        version: 1,
+        source_root: String::new(),
+        avg_doc_length: 0.0,
+        idf_map: HashMap::new(),
+    }
+}
+
+#[test]
+fn test_parse_query_terms_punctuation_hyphen_case() {
+    let terms = parse_query_terms("Hello, async-migration!", true);
+    assert!(terms.contains(&"hello".to_string()));
+    assert!(terms.contains(&"async-migration".to_string()));
+}
+
+#[test]
+fn test_parse_query_terms_stopwords_only() {
+    let terms = parse_query_terms("the and of", true);
+    assert!(terms.is_empty());
+}
+
+#[test]
+fn test_load_vocabulary_stopwords_merges_defaults_and_custom() {
+    let default_words = load_vocabulary_stopwords(None, true).unwrap();
+    assert!(default_words.contains("the"));
+    assert!(default_words.contains("using"));
+
+    let custom_path = "tmp-vocabulary-stopwords.txt";
+    fs::write(custom_path, "custom\nThe\nvocab-test\n").unwrap();
+    let merged_words = load_vocabulary_stopwords(Some(Path::new(custom_path)), true).unwrap();
+
+    fs::remove_file(custom_path).unwrap();
+    assert!(merged_words.contains("custom"));
+    assert!(merged_words.contains("the"));
+    assert!(merged_words.contains("vocab-test"));
+}
+
+#[test]
+fn test_load_vocabulary_stopwords_can_disable_defaults() {
+    let stopwords = load_vocabulary_stopwords(None, false).unwrap();
+    assert!(!stopwords.contains("the"));
+    assert!(!stopwords.contains("and"));
+    assert!(stopwords.is_empty());
+}
+
+#[test]
+fn test_build_auto_common_vocabulary_stopwords() {
+    let candidates = vec![
+        VocabularyCandidateTerm {
+            term: "build".into(),
+            surface: None,
+            term_freq: 12,
+            doc_freq: 2,
+            first_file: "a".into(),
+            first_line: 1,
+            first_heading: "Build".into(),
+        },
+        VocabularyCandidateTerm {
+            term: "yore".into(),
+            surface: None,
+            term_freq: 9,
+            doc_freq: 3,
+            first_file: "a".into(),
+            first_line: 1,
+            first_heading: "Yore".into(),
+        },
+        VocabularyCandidateTerm {
+            term: "indexer".into(),
+            surface: None,
+            term_freq: 8,
+            doc_freq: 5,
+            first_file: "a".into(),
+            first_line: 1,
+            first_heading: "Index".into(),
+        },
+    ];
+
+    let common = build_auto_common_vocabulary_stopwords(&candidates, 2);
+    assert!(common.contains("build"));
+    assert!(common.contains("yore"));
+    assert_eq!(common.len(), 2);
+}
+
+#[test]
+fn test_is_hygienic_vocabulary_term() {
+    assert!(!is_hygienic_vocabulary_term("th"));
+    assert!(is_hygienic_vocabulary_term("yore"));
+    assert!(!is_hygienic_vocabulary_term("a1234567890"));
+    assert!(!is_hygienic_vocabulary_term("12345"));
+    assert!(!is_hygienic_vocabulary_term("v2.0"));
+    assert!(!is_hygienic_vocabulary_term("x"));
+}
+
+#[test]
+fn test_apply_vocabulary_limit_preserves_total_and_truncates_terms() {
+    let terms = vec![
+        VocabularyTerm {
+            term: "alpha".into(),
+            score: 3.0,
+            count: 4,
+        },
+        VocabularyTerm {
+            term: "beta".into(),
+            score: 2.0,
+            count: 3,
+        },
+        VocabularyTerm {
+            term: "gamma".into(),
+            score: 1.0,
+            count: 2,
+        },
+    ];
+    let (clipped, total) = apply_vocabulary_limit(terms, 2);
+    assert_eq!(total, 3);
+    assert_eq!(clipped.len(), 2);
+    assert_eq!(clipped[0].term, "alpha");
+    assert_eq!(clipped[1].term, "beta");
+}
+
+#[test]
+fn test_render_vocabulary_lines() {
+    let terms = vec![
+        VocabularyTerm {
+            term: "alpha".into(),
+            score: 1.2,
+            count: 7,
+        },
+        VocabularyTerm {
+            term: "beta".into(),
+            score: 0.9,
+            count: 5,
+        },
+    ];
+    assert_eq!(render_vocabulary_lines(&terms), "alpha\nbeta");
+}
+
+#[test]
+fn test_render_vocabulary_prompt_normalizes_terms() {
+    let terms = vec![
+        VocabularyTerm {
+            term: "alpha beta".into(),
+            score: 1.0,
+            count: 2,
+        },
+        VocabularyTerm {
+            term: "gamma\x00delta".into(),
+            score: 1.0,
+            count: 2,
+        },
+        VocabularyTerm {
+            term: "  spaced   out  ".into(),
+            score: 1.0,
+            count: 2,
+        },
+    ];
+    assert_eq!(
+        render_vocabulary_prompt(&terms),
+        "alpha beta, gammadelta, spaced out"
+    );
+}
+
+#[test]
+fn test_vocabulary_term_json_shape() {
+    let result = VocabularyResult {
+        format: "json".into(),
+        limit: 2,
+        total: 3,
+        terms: vec![
+            VocabularyTerm {
+                term: "alpha".into(),
+                score: 2.0,
+                count: 7,
+            },
+            VocabularyTerm {
+                term: "beta".into(),
+                score: 1.1,
+                count: 4,
+            },
+        ],
+        stopwords: None,
+        used_default_stopwords: true,
+        auto_common_terms: None,
+        include_stemming: false,
+    };
+    let json_value: serde_json::Value = serde_json::to_value(&result).unwrap();
+    assert_eq!(json_value["terms"][0]["term"], "alpha");
+    assert_eq!(json_value["terms"][0]["score"], 2.0);
+    assert_eq!(json_value["terms"][0]["count"], 7);
+    assert_eq!(json_value["terms"].as_array().unwrap().len(), 2);
+}
+
+#[test]
+fn test_resolve_vocabulary_surface_prefers_heading_surface() {
+    let postings = vec![
+        ReverseEntry {
+            file: "notes.md".to_string(),
+            line: Some(10),
+            heading: Some("alpha term".to_string()),
+            level: None,
+        },
+        ReverseEntry {
+            file: "guide.md".to_string(),
+            line: Some(2),
+            heading: None,
+            level: None,
+        },
+    ];
+    let forward = make_forward_index(vec![
+        make_file_entry("notes.md"),
+        FileEntry {
+            path: "guide.md".to_string(),
+            size_bytes: 0,
+            line_count: 0,
+            headings: Vec::new(),
+            keywords: vec!["term".to_string(), "other".to_string()],
+            body_keywords: vec!["term".to_string()],
+            links: Vec::new(),
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: Vec::new(),
+            section_fingerprints: Vec::new(),
+            adr_references: Vec::new(),
+        },
+    ]);
+    let resolved = resolve_vocabulary_surface("term", &postings, Some(&forward)).unwrap();
+    assert_eq!(resolved, "term");
+}
+
+#[test]
+fn test_resolve_vocabulary_surface_fallbacks_to_forward_index() {
+    let postings = vec![
+        ReverseEntry {
+            file: "notes.md".to_string(),
+            line: Some(10),
+            heading: None,
+            level: None,
+        },
+        ReverseEntry {
+            file: "guide.md".to_string(),
+            line: Some(2),
+            heading: None,
+            level: None,
+        },
+    ];
+    let forward = make_forward_index(vec![
+        FileEntry {
+            path: "notes.md".to_string(),
+            size_bytes: 0,
+            line_count: 0,
+            headings: Vec::new(),
+            keywords: vec!["word".to_string()],
+            body_keywords: vec![],
+            links: Vec::new(),
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: Vec::new(),
+            section_fingerprints: Vec::new(),
+            adr_references: Vec::new(),
+        },
+        FileEntry {
+            path: "guide.md".to_string(),
+            size_bytes: 0,
+            line_count: 0,
+            headings: Vec::new(),
+            keywords: vec!["word".to_string()],
+            body_keywords: vec![],
+            links: Vec::new(),
+            simhash: 0,
+            term_frequencies: HashMap::new(),
+            doc_length: 0,
+            minhash: Vec::new(),
+            section_fingerprints: Vec::new(),
+            adr_references: Vec::new(),
+        },
+    ]);
+    let resolved = resolve_vocabulary_surface("word", &postings, Some(&forward)).unwrap();
+    assert_eq!(resolved, "word");
+}
+
+#[test]
+fn test_parse_query_terms_mixed_case() {
+    let terms = parse_query_terms("TeSt CaSe", true);
+    assert_eq!(terms, vec!["test".to_string(), "case".to_string()]);
+}
+
+#[test]
+fn test_parse_query_phrases() {
+    let parsed = parse_query("\"async migration\" plan", true);
+    assert_eq!(
+        parsed.terms,
+        vec![
+            "async".to_string(),
+            "migration".to_string(),
+            "plan".to_string()
+        ]
+    );
+    assert_eq!(parsed.phrases.len(), 1);
+    assert_eq!(
+        parsed.phrases[0].terms,
+        vec!["async".to_string(), "migration".to_string()]
+    );
+}
+
+#[test]
+fn test_expand_from_files_args_supports_list() {
+    let dir = std::env::temp_dir().join(format!(
+        "yore-test-{}",
+        std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_nanos()
+    ));
+    fs::create_dir_all(&dir).unwrap();
+    let list_path = dir.join("files.txt");
+    fs::write(&list_path, "docs/a.md\n\n docs/b.md\n").unwrap();
+
+    let args = vec![
+        format!("@{}", list_path.to_string_lossy()),
+        "docs/c.md".to_string(),
+    ];
+    let expanded = expand_from_files_args(&args).unwrap();
+
+    assert_eq!(
+        expanded,
+        vec![
+            "docs/a.md".to_string(),
+            "docs/b.md".to_string(),
+            "docs/c.md".to_string()
+        ]
+    );
+}
+
+#[test]
+fn test_resolve_from_files_reports_missing() {
+    let index = make_forward_index(vec![make_file_entry("docs/a.md")]);
+    let inputs = vec!["./docs/a.md".to_string(), "docs/missing.md".to_string()];
+    let (resolved, missing) = resolve_from_files(&inputs, &index);
+    assert_eq!(resolved, vec!["docs/a.md".to_string()]);
+    assert_eq!(missing, vec!["docs/missing.md".to_string()]);
+}
+
+#[test]
+fn test_collect_sections_for_files_max_sections() {
+    let dir = std::env::temp_dir().join(format!(
+        "yore-test-{}",
+        std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_nanos()
+    ));
+    fs::create_dir_all(&dir).unwrap();
+    let file_path = dir.join("doc.md");
+    fs::write(&file_path, "# Title\n\nBody\n\n## Sub\n\nMore").unwrap();
+    let file_path_str = file_path.to_string_lossy().to_string();
+
+    let entry = FileEntry {
+        path: file_path_str.clone(),
+        size_bytes: 0,
+        line_count: 0,
+        headings: Vec::new(),
+        keywords: Vec::new(),
+        body_keywords: Vec::new(),
+        links: Vec::new(),
+        simhash: 0,
+        term_frequencies: HashMap::new(),
+        doc_length: 0,
+        minhash: Vec::new(),
+        section_fingerprints: vec![
+            SectionFingerprint {
+                heading: "Title".to_string(),
+                level: 1,
+                line_start: 1,
+                line_end: 3,
+                simhash: 0,
+            },
+            SectionFingerprint {
+                heading: "Sub".to_string(),
+                level: 2,
+                line_start: 5,
+                line_end: 6,
+                simhash: 0,
+            },
+        ],
+        adr_references: vec![],
+    };
+    let index = make_forward_index(vec![entry]);
+    let sections = collect_sections_for_files(&[file_path_str], &index, "", 1);
+    assert_eq!(sections.len(), 1);
+}
+
+#[test]
+fn test_build_mcp_handle_is_stable() {
+    let section = SectionMatch {
+        doc_path: "docs/aa-auth.md".to_string(),
+        heading: "Authentication Overview".to_string(),
+        line_start: 1,
+        line_end: 11,
+        bm25_score: 0.25,
+        content: "# Authentication Overview\n\nAuthentication flow".to_string(),
+        canonicality: 0.5,
+    };
+
+    let left = build_mcp_handle("authentication", &section);
+    let right = build_mcp_handle("authentication", &section);
+
+    assert_eq!(left, right);
+    assert!(left.starts_with("ctx_"));
+}
diff --git a/src/types.rs b/src/types.rs
new file mode 100644
index 0000000..15ccebc
--- /dev/null
+++ b/src/types.rs
@@ -0,0 +1,978 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Question {
+    pub id: usize,
+    pub q: String,
+    pub expect: Vec<String>,
+    #[serde(default)]
+    pub min_hits: Option<usize>,
+}
+
+#[derive(Debug, Clone)]
+pub struct EvalResult {
+    pub id: usize,
+    pub question: String,
+    pub hits: usize,
+    pub total: usize,
+    pub passed: bool,
+    pub tokens: usize,
+}
+
+// Link checking structures
+#[derive(Serialize, Debug, Clone)]
+pub struct BrokenLink {
+    pub source_file: String,
+    pub line_number: usize,
+    pub link_text: String,
+    pub link_target: String,
+    pub error: String,
+    pub anchor: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub context: Option<String>,
+}
+
+#[derive(Serialize, Debug, Clone, PartialEq, Eq, Hash)]
+#[serde(rename_all = "snake_case")]
+pub enum LinkKind {
+    DocMissing,
+    CodeMissing,
+    Placeholder,
+    CodeReference,
+    DirectoryReference,
+    ExternalReference,
+    AnchorMissing,
+    AnchorUnverified,
+}
+
+#[derive(Serialize, Debug)]
+pub struct LinkSummaryByFile {
+    pub file: String,
+    pub counts: HashMap<String, usize>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct LinkSummaryByKind {
+    pub kind: String,
+    pub count: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct LinkCheckSummary {
+    pub by_file: Vec<LinkSummaryByFile>,
+    pub by_kind: Vec<LinkSummaryByKind>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct LinkCheckResult {
+    pub total_links: usize,
+    pub valid_links: usize,
+    pub broken_links: usize,
+    pub broken: Vec<BrokenLink>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub summary: Option<LinkCheckSummary>,
+}
+
+// Diff output structure
+#[derive(Serialize, Debug)]
+pub struct DiffResult {
+    pub file1: String,
+    pub file2: String,
+    pub similarity: DiffSimilarity,
+    pub shared_keywords: Vec<String>,
+    pub only_in_file1: Vec<String>,
+    pub only_in_file2: Vec<String>,
+    pub shared_headings: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct DiffSimilarity {
+    pub combined: f64,
+    pub jaccard: f64,
+    pub simhash: f64,
+}
+
+// Stats output structure
+#[derive(Serialize, Debug)]
+pub struct StatsResult {
+    pub total_files: usize,
+    pub unique_keywords: usize,
+    pub total_headings: usize,
+    pub body_keywords: usize,
+    pub total_links: usize,
+    pub index_version: u32,
+    pub indexed_at: String,
+    pub top_keywords: Vec<KeywordCount>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct KeywordCount {
+    pub keyword: String,
+    pub count: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct VocabularyResult {
+    pub format: String,
+    pub limit: usize,
+    pub total: usize,
+    pub terms: Vec<VocabularyTerm>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stopwords: Option<String>,
+    pub used_default_stopwords: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub auto_common_terms: Option<usize>,
+    pub include_stemming: bool,
+}
+
+#[derive(Serialize, Debug)]
+pub struct VocabularyTerm {
+    pub term: String,
+    pub score: f64,
+    pub count: usize,
+}
+
+#[derive(Debug, Clone)]
+pub struct VocabularyCandidateTerm {
+    pub term: String,
+    pub surface: Option<String>,
+    pub term_freq: usize,
+    pub doc_freq: usize,
+    pub first_file: String,
+    pub first_line: usize,
+    pub first_heading: String,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct VocabularyOptions<'a> {
+    pub stopwords: Option<&'a Path>,
+    pub include_stemming: bool,
+    pub no_default_stopwords: bool,
+    pub common_terms: usize,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct McpScoreBreakdown {
+    pub bm25: f64,
+    pub canonicality: f64,
+    pub combined: f64,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct McpSourceRef {
+    pub path: String,
+    pub heading: String,
+    pub line_start: usize,
+    pub line_end: usize,
+}
+
+#[derive(Serialize, Debug, Default, Clone)]
+pub struct McpPressure {
+    pub truncated: bool,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub reasons: Vec<String>,
+}
+
+#[derive(Serialize, Debug, Default)]
+pub struct McpSearchBudget {
+    pub max_results: usize,
+    pub max_tokens: usize,
+    pub max_bytes: usize,
+    pub returned_results: usize,
+    pub candidate_hits: usize,
+    pub deduped_hits: usize,
+    pub omitted_hits: usize,
+    pub estimated_tokens: usize,
+    pub bytes: usize,
+}
+
+#[derive(Serialize, Debug, Default)]
+pub struct McpFetchBudget {
+    pub max_tokens: usize,
+    pub max_bytes: usize,
+    pub estimated_tokens: usize,
+    pub bytes: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct McpSearchResult {
+    pub handle: String,
+    pub rank: usize,
+    pub source: McpSourceRef,
+    pub scores: McpScoreBreakdown,
+    pub preview: String,
+    pub preview_tokens: usize,
+    pub preview_bytes: usize,
+    pub truncated: bool,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub truncation_reasons: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct McpFetchResult {
+    pub source: McpSourceRef,
+    pub scores: McpScoreBreakdown,
+    pub preview: String,
+    pub content: String,
+    pub content_tokens: usize,
+    pub content_bytes: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct McpSearchResponse {
+    pub schema_version: u32,
+    pub tool: String,
+    pub query: String,
+    pub selection_mode: String,
+    pub budget: McpSearchBudget,
+    pub pressure: McpPressure,
+    pub results: Vec<McpSearchResult>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub message: Option<String>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub missing_files: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct McpFetchResponse {
+    pub schema_version: u32,
+    pub tool: String,
+    pub handle: String,
+    pub budget: McpFetchBudget,
+    pub pressure: McpPressure,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub query: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub result: Option<McpFetchResult>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub message: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct McpArtifact {
+    pub schema_version: u32,
+    pub handle: String,
+    pub query: String,
+    pub source: McpSourceRef,
+    pub scores: McpScoreBreakdown,
+    pub preview: String,
+    pub content: String,
+    pub created_at: String,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct McpSearchOptions {
+    pub max_results: usize,
+    pub max_tokens: usize,
+    pub max_bytes: usize,
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct McpFetchOptions {
+    pub max_tokens: usize,
+    pub max_bytes: usize,
+}
+
+pub const DEFAULT_MCP_PROTOCOL_VERSION: &str = "2025-11-25";
+
+#[derive(Debug, Default, Deserialize)]
+#[serde(default, rename_all = "camelCase")]
+pub struct McpInitializeParams {
+    pub protocol_version: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct JsonRpcRequest {
+    #[serde(default)]
+    pub jsonrpc: Option<String>,
+    #[serde(default)]
+    pub id: Option<serde_json::Value>,
+    pub method: String,
+    #[serde(default)]
+    pub params: serde_json::Value,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct McpToolCallParams {
+    pub name: String,
+    #[serde(default)]
+    pub arguments: serde_json::Value,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(default)]
+pub struct McpSearchToolArgs {
+    pub query: String,
+    pub from_files: Vec<String>,
+    pub max_results: usize,
+    pub max_tokens: usize,
+    pub max_bytes: usize,
+    pub index: Option<PathBuf>,
+}
+
+impl Default for McpSearchToolArgs {
+    fn default() -> Self {
+        Self {
+            query: String::new(),
+            from_files: Vec::new(),
+            max_results: 5,
+            max_tokens: 1200,
+            max_bytes: 12000,
+            index: None,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(default)]
+pub struct McpFetchToolArgs {
+    pub handle: String,
+    pub max_tokens: usize,
+    pub max_bytes: usize,
+    pub index: Option<PathBuf>,
+}
+
+impl Default for McpFetchToolArgs {
+    fn default() -> Self {
+        Self {
+            handle: String::new(),
+            max_tokens: 4000,
+            max_bytes: 20000,
+            index: None,
+        }
+    }
+}
+
+// Mv output structure
+#[derive(Serialize, Debug)]
+pub struct MvResult {
+    pub from: String,
+    pub to: String,
+    pub moved: bool,
+    pub updated_files: Vec<String>,
+}
+
+// FixReferences output structure
+#[derive(Serialize, Debug)]
+pub struct FixReferencesResult {
+    pub mapping_file: String,
+    pub mappings_count: usize,
+    pub updated_files: Vec<String>,
+    pub applied: bool,
+}
+
+// Build output structure
+#[derive(Serialize, Debug)]
+pub struct BuildResult {
+    pub index_path: String,
+    pub files_indexed: usize,
+    pub total_headings: usize,
+    pub total_links: usize,
+    pub unique_keywords: usize,
+    pub duration_ms: u128,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub renames_tracked: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub total_relations: Option<usize>,
+}
+
+// Eval JSON output structure
+#[derive(Serialize, Debug)]
+pub struct EvalJsonResult {
+    pub questions_file: String,
+    pub total_questions: usize,
+    pub passed: usize,
+    pub failed: usize,
+    pub pass_rate: f64,
+    pub results: Vec<EvalQuestionResult>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct EvalQuestionResult {
+    pub question: String,
+    pub passed: bool,
+    pub expected: Vec<String>,
+    pub found: Vec<String>,
+    pub missing: Vec<String>,
+}
+
+// Policy / taxonomy structures
+#[derive(Debug, Deserialize, Default)]
+pub struct PolicyRule {
+    /// Glob pattern to match files (e.g., "agents/plans/*.md")
+    pub pattern: String,
+    /// Required substrings that must appear in matching files
+    #[serde(default)]
+    pub must_contain: Vec<String>,
+    /// Substrings that must NOT appear in matching files
+    #[serde(default)]
+    pub must_not_contain: Vec<String>,
+    /// Optional rule name (for clearer reporting)
+    #[serde(default)]
+    pub name: Option<String>,
+    /// Optional severity ("error" or "warn"), defaults to "error"
+    #[serde(default)]
+    pub severity: Option<String>,
+    /// Optional minimum document length in lines
+    #[serde(default)]
+    pub min_length: Option<usize>,
+    /// Optional maximum document length in lines
+    #[serde(default)]
+    pub max_length: Option<usize>,
+    /// Optional maximum section length in lines
+    #[serde(default)]
+    pub max_section_length: Option<usize>,
+    /// Optional regex to scope section-length rules to matching headings
+    #[serde(default)]
+    pub section_heading_regex: Option<String>,
+    /// Required markdown headings (by text, without leading '#')
+    #[serde(default)]
+    pub required_headings: Vec<String>,
+    /// Forbidden markdown headings (by text, without leading '#')
+    #[serde(default)]
+    pub forbidden_headings: Vec<String>,
+    /// Required markdown link targets (resolved relative to file)
+    #[serde(default)]
+    pub must_link_to: Vec<String>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct PolicyConfig {
+    #[serde(default)]
+    pub rules: Vec<PolicyRule>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct PolicyViolation {
+    pub file: String,
+    pub rule: String,
+    pub message: String,
+    pub severity: String,
+    /// Always "policy_violation" so agents can key off kind
+    pub kind: String,
+}
+
+#[derive(Serialize, Debug)]
+pub struct PolicyCheckResult {
+    pub policy_file: String,
+    pub total_violations: usize,
+    pub violations: Vec<PolicyViolation>,
+}
+
+#[derive(Serialize, Debug, Default)]
+pub struct CombinedCheckResult {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub links: Option<LinkCheckResult>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub policy: Option<PolicyCheckResult>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stale: Option<StaleResult>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct StaleFile {
+    pub file: String,
+    pub days_since_modified: u64,
+    pub inbound_links: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct StaleResult {
+    pub total_stale: usize,
+    pub files: Vec<StaleFile>,
+}
+
+#[derive(Serialize, Debug, Clone)]
+pub struct HealthIssue {
+    pub kind: String,
+    pub severity: String,
+    pub message: String,
+    pub value: usize,
+    pub threshold: usize,
+}
+
+#[derive(Serialize, Debug, Clone)]
+pub struct HealthFileResult {
+    pub file: String,
+    pub status: String,
+    pub issues: Vec<HealthIssue>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct HealthResult {
+    pub total_files: usize,
+    pub unhealthy_files: usize,
+    pub warning_files: usize,
+    pub files: Vec<HealthFileResult>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct GraphNode {
+    pub id: String,
+}
+
+#[derive(Serialize, Debug)]
+pub struct GraphEdge {
+    pub source: String,
+    pub target: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub anchor: Option<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct GraphExport {
+    pub nodes: Vec<GraphNode>,
+    pub edges: Vec<GraphEdge>,
+}
+
+// Relation extraction structs (YEH-004)
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SectionRef {
+    pub heading: String,
+    pub line_start: usize,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+#[serde(rename_all = "snake_case")]
+pub enum RelationKind {
+    LinksTo,
+    SectionLinksTo,
+    AdrReference,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct RelationEdge {
+    pub source: String,
+    pub target: String,
+    pub kind: RelationKind,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub anchor: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub source_section: Option<SectionRef>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub target_section: Option<SectionRef>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub raw_text: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct RelationIndex {
+    pub version: u32,
+    pub indexed_at: String,
+    pub total_edges: usize,
+    pub edges: Vec<RelationEdge>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct AdrRef {
+    pub line: usize,
+    pub raw_text: String,
+    pub normalized_id: String,
+}
+
+#[derive(Serialize, Debug)]
+pub struct ConsolidationGroup {
+    pub canonical: String,
+    pub merge_into: Vec<String>,
+    pub canonical_score: f64,
+    pub avg_similarity: f64,
+    pub note: String,
+}
+
+#[derive(Serialize, Debug)]
+pub struct ConsolidationResult {
+    pub total_groups: usize,
+    pub groups: Vec<ConsolidationGroup>,
+}
+
+#[derive(Serialize, Debug, Clone)]
+pub struct LinkFix {
+    pub file: String,
+    pub old_target: String,
+    pub new_target: String,
+}
+
+// Proposal structures for agent-friendly fix-links
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct LinkFixProposal {
+    pub source: String,
+    pub line: usize,
+    pub broken_target: String,
+    pub candidates: Vec<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub decision: Option<usize>, // Index into candidates, or None to skip
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct LinkFixProposalFile {
+    /// Schema version for forward compatibility
+    pub version: u32,
+    /// Proposals for ambiguous link fixes
+    pub proposals: Vec<LinkFixProposal>,
+}
+
+// Backlinks structures
+#[derive(Serialize, Debug, Clone)]
+pub struct Backlink {
+    pub source_file: String,
+    pub link_text: String,
+    pub link_target: String,
+    pub anchor: Option<String>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ReferenceMapping {
+    pub from: String,
+    pub to: String,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ReferenceMappingConfig {
+    #[serde(default)]
+    pub mappings: Vec<ReferenceMapping>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct BacklinksResult {
+    pub target_file: String,
+    pub total_backlinks: usize,
+    pub backlinks: Vec<Backlink>,
+}
+
+// Orphans structures
+#[derive(Serialize, Debug, Clone)]
+pub struct OrphanFile {
+    pub file: String,
+    pub size_bytes: u64,
+    pub line_count: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct OrphansResult {
+    pub total_orphans: usize,
+    pub orphans: Vec<OrphanFile>,
+}
+
+#[derive(Serialize, Debug, Clone)]
+pub struct CanonicalOrphan {
+    pub file: String,
+    pub canonicality: f64,
+    pub inbound_links: usize,
+}
+
+#[derive(Serialize, Debug)]
+pub struct CanonicalOrphansResult {
+    pub total_orphans: usize,
+    pub threshold: f64,
+    pub orphans: Vec<CanonicalOrphan>,
+}
+
+// Canonicality structures
+#[derive(Serialize, Debug, Clone)]
+pub struct CanonicalityScore {
+    pub file: String,
+    pub score: f64,
+    pub reasons: Vec<String>,
+}
+
+#[derive(Serialize, Debug)]
+pub struct CanonicalityResult {
+    pub total_files: usize,
+    pub files: Vec<CanonicalityScore>,
+}
+
+// Index structures
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct FileEntry {
+    pub path: String,
+    pub size_bytes: u64,
+    pub line_count: usize,
+    pub headings: Vec<Heading>,
+    pub keywords: Vec<String>,
+    pub body_keywords: Vec<String>, // keywords from full text
+    pub links: Vec<Link>,
+    pub simhash: u64, // content fingerprint
+    #[serde(default)]
+    pub term_frequencies: HashMap<String, usize>, // term counts for BM25
+    #[serde(default)]
+    pub doc_length: usize, // total terms for BM25
+    #[serde(default)]
+    pub minhash: Vec<u64>, // MinHash signature for LSH
+    #[serde(default)]
+    pub section_fingerprints: Vec<SectionFingerprint>, // NEW: section-level SimHash
+    #[serde(default)]
+    pub adr_references: Vec<AdrRef>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Heading {
+    pub line: usize,
+    pub level: usize,
+    pub text: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct Link {
+    pub line: usize,
+    pub text: String,
+    pub target: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct SectionFingerprint {
+    pub heading: String,
+    pub level: usize,
+    pub line_start: usize,
+    pub line_end: usize,
+    pub simhash: u64,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct ReverseEntry {
+    pub file: String,
+    pub line: Option<usize>,
+    pub heading: Option<String>,
+    pub level: Option<usize>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ForwardIndex {
+    pub files: HashMap<String, FileEntry>,
+    pub indexed_at: String,
+    pub version: u32, // index version for compatibility
+    #[serde(default)]
+    pub source_root: String,
+    #[serde(default)]
+    pub avg_doc_length: f64, // NEW: average document length for BM25
+    #[serde(default)]
+    pub idf_map: HashMap<String, f64>, // NEW: IDF scores for BM25
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct ReverseIndex {
+    pub keywords: HashMap<String, Vec<ReverseEntry>>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
+pub struct SectionMetrics {
+    pub heading: String,
+    pub level: usize,
+    pub line_start: usize,
+    pub line_end: usize,
+    pub line_count: usize,
+    pub word_count: usize,
+    pub link_count: usize,
+    pub list_item_count: usize,
+    pub code_block_count: usize,
+    pub has_completion_marker: bool,
+    pub looks_like_part: bool,
+    pub looks_like_changelog: bool,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
+pub struct DocumentMetrics {
+    pub path: String,
+    pub line_count: usize,
+    pub word_count: usize,
+    pub heading_count: usize,
+    pub section_count: usize,
+    pub link_count: usize,
+    pub h1_count: usize,
+    pub h2_count: usize,
+    pub h3_count: usize,
+    pub h4_plus_count: usize,
+    pub code_block_count: usize,
+    pub list_item_count: usize,
+    pub table_row_count: usize,
+    pub frontmatter_key_count: usize,
+    pub metadata_line_count: usize,
+    pub part_heading_count: usize,
+    pub completion_heading_count: usize,
+    pub changelog_heading_count: usize,
+    pub changelog_entry_count: usize,
+    pub longest_section_lines: usize,
+    pub sections: Vec<SectionMetrics>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct DocumentMetricsIndex {
+    pub indexed_at: String,
+    pub version: u32,
+    pub files: HashMap<String, DocumentMetrics>,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct IndexStats {
+    pub total_files: usize,
+    pub total_keywords: usize,
+    pub total_headings: usize,
+    pub total_links: usize,
+    pub indexed_at: String,
+}
+
+/// A single file rename event from git history
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct RenameEntry {
+    /// The old path before the rename
+    pub old_path: String,
+    /// The new path after the rename
+    pub new_path: String,
+    /// Git commit hash where the rename occurred
+    pub commit: String,
+}
+
+/// Git rename history for tracking file moves
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct RenameHistory {
+    /// All rename events, ordered from oldest to newest
+    pub renames: Vec<RenameEntry>,
+    /// Indexed at timestamp
+    pub indexed_at: String,
+}
+
+#[derive(Deserialize, Debug, Clone)]
+pub struct IndexProfileConfig {
+    #[serde(default)]
+    pub roots: Vec<String>,
+    #[serde(default)]
+    pub types: Vec<String>,
+    pub output: Option<String>,
+}
+
+/// Severity override for link checking based on path patterns
+#[derive(Deserialize, Debug, Clone)]
+#[allow(dead_code)] // Config scaffolding for future severity filtering
+pub struct SeverityOverride {
+    pub pattern: String,
+    pub severity: String,
+}
+
+/// Link checking configuration
+#[derive(Deserialize, Debug, Clone, Default)]
+#[allow(dead_code)] // Config scaffolding for future exclude patterns
+pub struct LinkCheckConfig {
+    #[serde(default)]
+    pub exclude: Vec<String>,
+    #[serde(default, rename = "severity-overrides")]
+    pub severity_overrides: Vec<SeverityOverride>,
+}
+
+/// External repository configuration for cross-repo link validation
+#[derive(Deserialize, Debug, Clone)]
+pub struct ExternalRepo {
+    pub path: String,
+    #[serde(default)]
+    #[allow(dead_code)] // Config scaffolding for future prefix support
+    pub prefix: Option<String>,
+}
+
+/// External repositories configuration
+#[derive(Deserialize, Debug, Clone, Default)]
+pub struct ExternalConfig {
+    #[serde(default)]
+    pub repos: Vec<ExternalRepo>,
+}
+
+/// Policy configuration
+#[derive(Deserialize, Debug, Clone, Default)]
+#[allow(dead_code)] // Config scaffolding for future policy file reference
+pub struct PolicyConfigRef {
+    #[serde(default, rename = "rules-file")]
+    pub rules_file: Option<String>,
+}
+
+#[derive(Deserialize, Debug, Clone, Default)]
+pub struct YoreConfig {
+    #[serde(default)]
+    pub index: HashMap<String, IndexProfileConfig>,
+    #[serde(default, rename = "link-check")]
+    #[allow(dead_code)] // Config scaffolding
+    pub link_check: Option<LinkCheckConfig>,
+    #[serde(default)]
+    #[allow(dead_code)] // Config scaffolding
+    pub policy: Option<PolicyConfigRef>,
+    #[serde(default)]
+    pub external: Option<ExternalConfig>,
+}
+
+// Assembly / context selection types
+
+#[derive(Debug, Clone)]
+pub struct SectionMatch {
+    pub doc_path: String,
+    pub heading: String,
+    pub line_start: usize,
+    pub line_end: usize,
+    pub bm25_score: f64,
+    pub content: String,
+    pub canonicality: f64,
+}
+
+pub const MCP_SCHEMA_VERSION: u32 = 1;
+
+#[derive(Debug, Clone)]
+pub struct ContextSelection {
+    pub query_label: String,
+    pub query_for_refiner: String,
+    pub sections: Vec<SectionMatch>,
+}
+
+#[derive(Debug, Clone)]
+pub enum ContextSelectionIssue {
+    NoSearchableTerms,
+    MissingFiles(Vec<String>),
+    NoIndexedFilesMatched,
+    NoRelevantSections(String),
+}
+
+#[derive(Debug, Clone)]
+pub struct RefinedSection {
+    pub section: SectionMatch,
+    pub truncated: bool,
+    pub truncation_reasons: Vec<String>,
+}
+
+// Search / query types
+
+#[derive(Debug, Clone)]
+pub struct ParsedQuery {
+    pub terms: Vec<String>,
+    pub phrases: Vec<PhraseGroup>,
+}
+
+#[derive(Debug, Clone)]
+pub struct PhraseGroup {
+    pub terms: Vec<String>,
+}
+
+// Cross-reference / assembly types
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum RefType {
+    MarkdownLink,
+    AdrId,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct CrossRef {
+    pub ref_type: RefType,
+    pub origin_doc_path: String,
+    pub target_doc_path: String,
+    pub target_anchor: Option<String>,
+    pub raw_text: String,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub enum DocType {
+    Adr,
+    Design,
+    Ops,
+    Other,
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..60d5a3a
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,579 @@
+use std::collections::HashSet;
+use std::fs;
+use std::io;
+use std::path::{Path, PathBuf};
+
+use crate::types::*;
+
+// Helper functions
+
+pub fn load_forward_index(index_dir: &Path) -> Result<ForwardIndex, Box<dyn std::error::Error>> {
+    let path = index_dir.join("forward_index.json");
+    let content =
+        fs::read_to_string(&path).map_err(|_| "Index not found. Run 'yore build' first.")?;
+    Ok(serde_json::from_str(&content)?)
+}
+
+/// Load the relation index; returns an empty index if the file does not exist (backward compat).
+#[allow(dead_code)] // Used by upcoming YEH-005/006
+pub fn load_relation_index(index_dir: &Path) -> RelationIndex {
+    let path = index_dir.join("relations.json");
+    match fs::read_to_string(&path) {
+        Ok(content) => serde_json::from_str(&content).unwrap_or(RelationIndex {
+            version: 1,
+            indexed_at: String::new(),
+            total_edges: 0,
+            edges: vec![],
+        }),
+        Err(_) => RelationIndex {
+            version: 1,
+            indexed_at: String::new(),
+            total_edges: 0,
+            edges: vec![],
+        },
+    }
+}
+
+pub fn load_document_metrics(
+    index_dir: &Path,
+) -> Result<DocumentMetricsIndex, Box<dyn std::error::Error>> {
+    let path = index_dir.join("document_metrics.json");
+    let content = fs::read_to_string(&path).map_err(|_| {
+        "Health metrics not found. Re-run 'yore build' to persist document metrics."
+    })?;
+    Ok(serde_json::from_str(&content)?)
+}
+
+pub fn load_reverse_index(index_dir: &Path) -> Result<ReverseIndex, Box<dyn std::error::Error>> {
+    let path = index_dir.join("reverse_index.json");
+    let content =
+        fs::read_to_string(&path).map_err(|_| "Index not found. Run 'yore build' first.")?;
+    Ok(serde_json::from_str(&content)?)
+}
+
+pub fn default_query_stop_words() -> &'static [&'static str] {
+    &[
+        "a", "an", "and", "are", "as", "at", "be", "by", "for", "from", "had", "has", "have", "he",
+        "in", "is", "it", "not", "of", "on", "or", "that", "the", "their", "there", "these",
+        "they", "this", "to", "was", "we", "were", "what", "when", "where", "which", "who", "will",
+        "with", "would", "you", "your", "did", "do", "does", "can", "could", "must", "shall",
+        "should", "may", "might", "new", "using", "used", "use", "add", "set", "run", "get", "see",
+        "only", "no", "so", "than", "then", "them", "all", "any", "both", "each", "more", "most",
+        "some", "such", "own", "same", "just", "also", "now", "other", "into", "about", "up",
+        "over",
+    ]
+}
+
+pub fn default_vocabulary_stop_words() -> &'static [&'static str] {
+    &[
+        "a",
+        "an",
+        "and",
+        "are",
+        "as",
+        "at",
+        "be",
+        "by",
+        "for",
+        "from",
+        "had",
+        "has",
+        "have",
+        "he",
+        "in",
+        "is",
+        "it",
+        "not",
+        "of",
+        "on",
+        "or",
+        "that",
+        "the",
+        "their",
+        "there",
+        "these",
+        "they",
+        "this",
+        "to",
+        "was",
+        "we",
+        "were",
+        "what",
+        "when",
+        "where",
+        "which",
+        "who",
+        "will",
+        "with",
+        "would",
+        "you",
+        "your",
+        "did",
+        "do",
+        "does",
+        "can",
+        "could",
+        "must",
+        "shall",
+        "should",
+        "may",
+        "might",
+        "new",
+        "using",
+        "used",
+        "use",
+        "add",
+        "set",
+        "run",
+        "get",
+        "see",
+        "only",
+        "no",
+        "so",
+        "than",
+        "then",
+        "them",
+        "all",
+        "any",
+        "both",
+        "each",
+        "more",
+        "most",
+        "some",
+        "such",
+        "own",
+        "same",
+        "just",
+        "also",
+        "now",
+        "other",
+        "into",
+        "about",
+        "up",
+        "over",
+        "document",
+        "documents",
+        "docs",
+        "json",
+        "changes",
+        "change",
+        "build",
+        "output",
+        "validation",
+        "command",
+        "commands",
+        "prompting",
+        "workflow",
+        "core",
+        "keep",
+        "apply",
+        "file",
+        "files",
+        "reporting",
+        "pattern",
+        "examples",
+        "help",
+        "format",
+        "index",
+        "indexes",
+        "indexer",
+        "indexing",
+    ]
+}
+
+pub fn load_vocabulary_stopwords(
+    stopwords: Option<&Path>,
+    include_default: bool,
+) -> Result<HashSet<String>, Box<dyn std::error::Error>> {
+    let mut words: HashSet<String> = default_vocabulary_stop_words()
+        .iter()
+        .map(|word| (*word).to_string())
+        .collect();
+
+    if !include_default {
+        words.clear();
+    }
+
+    if let Some(path) = stopwords {
+        let path_value = path.to_string_lossy().to_string();
+        let content = fs::read_to_string(path)
+            .map_err(|err| format!("Unable to read stop-word file '{path_value}': {err}"))?;
+
+        for token in content.split_whitespace() {
+            if !token.is_empty() {
+                words.insert(token.to_lowercase());
+            }
+        }
+    }
+
+    Ok(words)
+}
+
+pub fn is_hygienic_vocabulary_term(term: &str) -> bool {
+    if term.len() < 3 || term.len() > 48 {
+        return false;
+    }
+
+    let mut digits = 0usize;
+    let mut letters = 0usize;
+
+    for ch in term.chars() {
+        if ch.is_ascii_digit() {
+            digits += 1;
+        } else if ch.is_ascii_alphabetic() {
+            letters += 1;
+        } else if !matches!(ch, '-' | '_') {
+            return false;
+        }
+    }
+
+    if letters == 0 {
+        return false;
+    }
+
+    if digits > 0 && digits.saturating_mul(10) >= term.len().saturating_mul(6) {
+        return false;
+    }
+
+    true
+}
+
+pub fn jaccard_similarity(a: &HashSet<String>, b: &HashSet<String>) -> f64 {
+    if a.is_empty() && b.is_empty() {
+        return 0.0;
+    }
+    let intersection = a.intersection(b).count();
+    let union = a.union(b).count();
+    if union == 0 {
+        return 0.0;
+    }
+    intersection as f64 / union as f64
+}
+
+pub fn chrono_now() -> String {
+    use std::time::{SystemTime, UNIX_EPOCH};
+    let duration = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
+    format!("{}", duration.as_secs())
+}
+
+/// Extract file rename history from git
+///
+/// Runs `git log --name-status --diff-filter=R` to find all renames in the repo.
+/// Returns empty history if not in a git repo or git is unavailable.
+pub fn extract_git_renames(path: &Path) -> RenameHistory {
+    use std::process::Command;
+
+    let output = Command::new("git")
+        .args([
+            "log",
+            "--name-status",
+            "--diff-filter=R",
+            "--pretty=format:%H",
+            "-M",
+            "--",
+        ])
+        .current_dir(path)
+        .output();
+
+    let output = match output {
+        Ok(o) if o.status.success() => o,
+        _ => {
+            return RenameHistory {
+                renames: vec![],
+                indexed_at: chrono_now(),
+            };
+        }
+    };
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let mut renames = Vec::new();
+    let mut current_commit = String::new();
+
+    for line in stdout.lines() {
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+
+        // Check if this is a commit hash (40 hex chars)
+        if line.len() == 40 && line.chars().all(|c| c.is_ascii_hexdigit()) {
+            current_commit = line.to_string();
+        } else if line.starts_with('R') {
+            // Rename line: R<score>\told_path\tnew_path
+            let parts: Vec<&str> = line.splitn(3, '\t').collect();
+            if parts.len() == 3 {
+                renames.push(RenameEntry {
+                    old_path: parts[1].to_string(),
+                    new_path: parts[2].to_string(),
+                    commit: current_commit.clone(),
+                });
+            }
+        }
+    }
+
+    // Reverse to get oldest-first order
+    renames.reverse();
+
+    RenameHistory {
+        renames,
+        indexed_at: chrono_now(),
+    }
+}
+
+/// Look up the current path for a file that may have been renamed.
+/// Returns the most recent path if renames exist, or None if no rename history.
+pub fn resolve_renamed_path(old_path: &str, history: &RenameHistory) -> Option<String> {
+    let mut current = old_path.to_string();
+    let mut found_any = false;
+
+    for entry in &history.renames {
+        if entry.old_path == current {
+            current.clone_from(&entry.new_path);
+            found_any = true;
+        }
+    }
+
+    if found_any {
+        Some(current)
+    } else {
+        None
+    }
+}
+
+/// Compute the relative path from source file to target file.
+/// Returns the relative link path as it would appear in markdown.
+pub fn compute_relative_path(
+    source: &str,
+    target: &str,
+    _available_files: &HashSet<String>,
+) -> Option<String> {
+    let source_path = Path::new(source);
+    let target_path = Path::new(target);
+
+    // Get the directory containing the source file
+    let source_dir = source_path.parent()?;
+
+    // Try to compute relative path
+    if let Ok(rel) = target_path.strip_prefix(source_dir) {
+        return Some(rel.to_string_lossy().to_string());
+    }
+
+    // Need to go up directories - find common ancestor
+    let source_components: Vec<_> = source_dir.components().collect();
+    let target_components: Vec<_> = target_path.components().collect();
+
+    // Find common prefix length
+    let common_len = source_components
+        .iter()
+        .zip(target_components.iter())
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    // Build relative path: go up (source_components.len() - common_len) times, then down to target
+    let ups = source_components.len() - common_len;
+    let mut result = String::new();
+
+    for _ in 0..ups {
+        result.push_str("../");
+    }
+
+    // Add remaining target path components
+    for (i, comp) in target_components.iter().enumerate().skip(common_len) {
+        if i > common_len {
+            result.push('/');
+        }
+        result.push_str(&comp.as_os_str().to_string_lossy());
+    }
+
+    if result.is_empty() {
+        None
+    } else {
+        Some(result)
+    }
+}
+
+// ============================================================================
+// Context Assembly for LLMs (Phase 2)
+// ============================================================================
+
+pub fn combined_section_score(section: &SectionMatch) -> f64 {
+    section.bm25_score * 0.7 + section.canonicality * 0.3
+}
+
+pub fn compare_sections_by_relevance(a: &SectionMatch, b: &SectionMatch) -> std::cmp::Ordering {
+    combined_section_score(b)
+        .partial_cmp(&combined_section_score(a))
+        .unwrap_or(std::cmp::Ordering::Equal)
+        .then_with(|| a.doc_path.cmp(&b.doc_path))
+        .then_with(|| a.line_start.cmp(&b.line_start))
+        .then_with(|| a.line_end.cmp(&b.line_end))
+        .then_with(|| a.heading.cmp(&b.heading))
+}
+
+pub fn normalize_content_for_dedupe(text: &str) -> String {
+    text.split_whitespace().collect::<Vec<_>>().join(" ")
+}
+
+pub fn forward_index_source_root(index: &ForwardIndex) -> Option<PathBuf> {
+    let trimmed = index.source_root.trim();
+    if trimmed.is_empty() {
+        None
+    } else {
+        Some(PathBuf::from(trimmed))
+    }
+}
+
+pub fn canonicalize_existing_path(path: &Path) -> PathBuf {
+    fs::canonicalize(path).unwrap_or_else(|_| {
+        if path.is_absolute() {
+            path.to_path_buf()
+        } else if let Ok(cwd) = std::env::current_dir() {
+            cwd.join(path)
+        } else {
+            path.to_path_buf()
+        }
+    })
+}
+
+pub fn build_indexed_doc_key(path: &Path, source_root: &Path) -> String {
+    if let Ok(stripped) = path.strip_prefix(source_root) {
+        let normalized = normalize_path(stripped);
+        if !normalized.is_empty() {
+            return normalized;
+        }
+    }
+
+    let normalized = normalize_path(path);
+    if normalized.is_empty() {
+        path.to_string_lossy().to_string()
+    } else {
+        normalized
+    }
+}
+
+pub fn resolve_doc_fs_path(index: &ForwardIndex, doc_path: &str, entry: &FileEntry) -> PathBuf {
+    let stored_path = Path::new(&entry.path);
+    if stored_path.is_absolute() {
+        return stored_path.to_path_buf();
+    }
+
+    if let Some(source_root) = forward_index_source_root(index) {
+        let stored_candidate = source_root.join(stored_path);
+        if stored_candidate.exists() {
+            return stored_candidate;
+        }
+
+        let doc_candidate = source_root.join(doc_path);
+        if doc_candidate.exists() {
+            return doc_candidate;
+        }
+    }
+
+    PathBuf::from(doc_path)
+}
+
+pub fn read_indexed_doc(
+    index: &ForwardIndex,
+    doc_path: &str,
+    entry: &FileEntry,
+) -> Result<String, io::Error> {
+    fs::read_to_string(resolve_doc_fs_path(index, doc_path, entry))
+}
+
+pub fn dedupe_section_matches(sections: Vec<SectionMatch>) -> (Vec<SectionMatch>, usize) {
+    let mut unique: Vec<SectionMatch> = Vec::new();
+    let mut seen_content = HashSet::new();
+    let mut deduped_hits = 0usize;
+
+    for section in sections {
+        let overlaps_existing = unique.iter().any(|existing| {
+            existing.doc_path == section.doc_path
+                && existing.line_start <= section.line_end
+                && section.line_start <= existing.line_end
+        });
+
+        let content_key = normalize_content_for_dedupe(&section.content);
+        let duplicate_content = !content_key.is_empty() && !seen_content.insert(content_key);
+
+        if overlaps_existing || duplicate_content {
+            deduped_hits += 1;
+            continue;
+        }
+
+        unique.push(section);
+    }
+
+    (unique, deduped_hits)
+}
+
+pub fn floor_char_boundary(text: &str, limit: usize) -> usize {
+    let mut idx = limit.min(text.len());
+    while idx > 0 && !text.is_char_boundary(idx) {
+        idx -= 1;
+    }
+    idx
+}
+
+pub fn truncate_text_to_budget(
+    text: &str,
+    max_tokens: usize,
+    max_bytes: usize,
+) -> (String, bool, Vec<String>) {
+    const TRUNCATION_MARKER: &str = " ...[truncated]";
+
+    let mut reasons = Vec::new();
+    let mut limit = text.len();
+
+    let token_char_limit = max_tokens.saturating_mul(4);
+    if token_char_limit > 0 && text.len() > token_char_limit {
+        reasons.push("token_cap".to_string());
+        limit = limit.min(token_char_limit);
+    }
+
+    if max_bytes > 0 && text.len() > max_bytes {
+        reasons.push("byte_cap".to_string());
+        limit = limit.min(max_bytes);
+    }
+
+    if reasons.is_empty() {
+        return (text.to_string(), false, reasons);
+    }
+
+    let marker_len = TRUNCATION_MARKER.len();
+    let mut marker_budget = usize::MAX;
+    if token_char_limit > 0 {
+        marker_budget = marker_budget.min(token_char_limit);
+    }
+    if max_bytes > marker_len {
+        marker_budget = marker_budget.min(max_bytes);
+    }
+
+    if marker_budget > marker_len {
+        limit = limit.min(marker_budget.saturating_sub(marker_len));
+    }
+    let boundary = floor_char_boundary(text, limit);
+    let mut truncated = text[..boundary].trim_end().to_string();
+
+    if marker_budget > marker_len && truncated.len() + marker_len <= marker_budget {
+        truncated.push_str(TRUNCATION_MARKER);
+    }
+
+    (truncated, true, reasons)
+}
+
+pub fn normalize_path(path: &Path) -> String {
+    let mut components = Vec::new();
+
+    for component in path.components() {
+        match component {
+            std::path::Component::Normal(c) => components.push(c.to_string_lossy().to_string()),
+            std::path::Component::ParentDir => {
+                components.pop();
+            }
+            std::path::Component::CurDir => {}
+            _ => {}
+        }
+    }
+
+    components.join("/")
+}