diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index f05c290..91036f6 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -3,7 +3,7 @@
   "name": "codebase-index",
   "displayName": "Codebase Index",
   "description": "Local-first hybrid codebase index. Auto-provisions its Python CLI on first session start; the skill searches the index so Claude reads only the most relevant files.",
-  "version": "1.2.1",
+  "version": "1.2.2",
   "author": {
     "name": "codebase-index contributors"
   },
diff --git a/.claude/skills/codebase-index/.skill_version b/.claude/skills/codebase-index/.skill_version
index cb174d5..23aa839 100644
--- a/.claude/skills/codebase-index/.skill_version
+++ b/.claude/skills/codebase-index/.skill_version
@@ -1 +1 @@
-1.2.1
\ No newline at end of file
+1.2.2
diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md
index b738921..2105974 100644
--- a/.claude/skills/codebase-index/SKILL.md
+++ b/.claude/skills/codebase-index/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: codebase-index
 description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
-allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
+allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
 ---
 
 # Codebase Index
@@ -44,7 +44,12 @@ Pick the subcommand by intent:
 | a specific symbol name | `codebase-index symbol "<name>" --json` |
 | "who calls / references" | `codebase-index refs "<name>" --json` |
 | "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
-| visual graph / "open graph" | `codebase-index graph "<file-or-symbol>" --open` |
+| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
+
+The `graph` command renders an HTML dependency graph for a person to look at —
+it is not a retrieval packet. Use it only when the user explicitly wants a visual
+graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
+session prefer `--out <path>` over `--open`.
 
 `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
 
@@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear:
 - `--mode symbol` — pure symbol lookups (faster, no FTS noise)
 - `--mode fts` — text/keyword queries where symbol names don't matter
 - `--mode hybrid` — default; best for mixed queries
+- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
+  requests" without the exact words). Requires opt-in embeddings; falls back with
+  a clear message when they are not enabled. `hybrid` already blends vectors in
+  when embeddings are on, so reach for `vector` only for pure-semantic recall.
 
 Natural-language kind words such as `method`, `function`, `class`, `interface`,
 `enum`, and `type` constrain the symbol retriever inside `search`.
@@ -89,6 +98,20 @@ Top-level fields:
 - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
 - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
 - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
+- `intent` / `mode` — how the query was classified and which retrievers ran;
+  useful to sanity-check a weak result (e.g. a "how does X work" question that
+  resolved to a bare symbol lookup may need `explain` instead).
+- `pagination` — present only when more results exist than fit the page. It
+  reports `has_more` and `next_offset`. To page, re-run `search` with
+  `--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
+  refining with a more specific subcommand or raising `--token-budget` first —
+  page only when the top results genuinely miss the answer.
+- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
+  edges (imports/inheritance) are extracted only for fully supported languages.
+  When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
+  as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
+  just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
+  references this". `coverage.languages` lists the affected languages.
 
 ## Token efficiency rules
 
@@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is `
 
 0. If confidence is consistently low across queries, run diagnostics first:
    ```bash
-   codebase-index stats --json    # check coverage and symbol counts per language
+   codebase-index stats --json    # per-language file/symbol counts + graph tier
    codebase-index doctor          # surface config or security issues
    ```
    Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
+   In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
+   `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
+   import/inheritance edges for that language — treat empty results there as
+   inconclusive.
 
 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
 2. If still nothing, Glob for likely paths, then Grep within them.
diff --git a/.claude/skills/codebase-index/scripts/cbx b/.claude/skills/codebase-index/scripts/cbx
index a33c2bd..5666358 100644
--- a/.claude/skills/codebase-index/scripts/cbx
+++ b/.claude/skills/codebase-index/scripts/cbx
@@ -4,7 +4,7 @@
 # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch).
 set -euo pipefail
 
-ALLOWED="search explain symbol refs impact graph stats update index"
+ALLOWED="search explain symbol refs impact graph stats doctor update index"
 
 sub="${1:-}"
 case " $ALLOWED " in
diff --git a/.claude/skills/codebase-index/scripts/cbx.ps1 b/.claude/skills/codebase-index/scripts/cbx.ps1
index fe10bd9..bb8e05d 100644
--- a/.claude/skills/codebase-index/scripts/cbx.ps1
+++ b/.claude/skills/codebase-index/scripts/cbx.ps1
@@ -8,7 +8,7 @@ param(
 )
 
 $ErrorActionPreference = "Stop"
-$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index")
+$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
 
 if ($allowed -notcontains $Subcommand) {
     Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
diff --git a/.codex/skills/codebase-index/.skill_version b/.codex/skills/codebase-index/.skill_version
index cb174d5..23aa839 100644
--- a/.codex/skills/codebase-index/.skill_version
+++ b/.codex/skills/codebase-index/.skill_version
@@ -1 +1 @@
-1.2.1
\ No newline at end of file
+1.2.2
diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md
index b738921..2105974 100644
--- a/.codex/skills/codebase-index/SKILL.md
+++ b/.codex/skills/codebase-index/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: codebase-index
 description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
-allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
+allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
 ---
 
 # Codebase Index
@@ -44,7 +44,12 @@ Pick the subcommand by intent:
 | a specific symbol name | `codebase-index symbol "<name>" --json` |
 | "who calls / references" | `codebase-index refs "<name>" --json` |
 | "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
-| visual graph / "open graph" | `codebase-index graph "<file-or-symbol>" --open` |
+| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
+
+The `graph` command renders an HTML dependency graph for a person to look at —
+it is not a retrieval packet. Use it only when the user explicitly wants a visual
+graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
+session prefer `--out <path>` over `--open`.
 
 `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
 
@@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear:
 - `--mode symbol` — pure symbol lookups (faster, no FTS noise)
 - `--mode fts` — text/keyword queries where symbol names don't matter
 - `--mode hybrid` — default; best for mixed queries
+- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
+  requests" without the exact words). Requires opt-in embeddings; falls back with
+  a clear message when they are not enabled. `hybrid` already blends vectors in
+  when embeddings are on, so reach for `vector` only for pure-semantic recall.
 
 Natural-language kind words such as `method`, `function`, `class`, `interface`,
 `enum`, and `type` constrain the symbol retriever inside `search`.
@@ -89,6 +98,20 @@ Top-level fields:
 - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
 - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
 - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
+- `intent` / `mode` — how the query was classified and which retrievers ran;
+  useful to sanity-check a weak result (e.g. a "how does X work" question that
+  resolved to a bare symbol lookup may need `explain` instead).
+- `pagination` — present only when more results exist than fit the page. It
+  reports `has_more` and `next_offset`. To page, re-run `search` with
+  `--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
+  refining with a more specific subcommand or raising `--token-budget` first —
+  page only when the top results genuinely miss the answer.
+- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
+  edges (imports/inheritance) are extracted only for fully supported languages.
+  When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
+  as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
+  just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
+  references this". `coverage.languages` lists the affected languages.
 
 ## Token efficiency rules
 
@@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is `
 
 0. If confidence is consistently low across queries, run diagnostics first:
    ```bash
-   codebase-index stats --json    # check coverage and symbol counts per language
+   codebase-index stats --json    # per-language file/symbol counts + graph tier
    codebase-index doctor          # surface config or security issues
    ```
    Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
+   In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
+   `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
+   import/inheritance edges for that language — treat empty results there as
+   inconclusive.
 
 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
 2. If still nothing, Glob for likely paths, then Grep within them.
diff --git a/.codex/skills/codebase-index/scripts/cbx b/.codex/skills/codebase-index/scripts/cbx
index a33c2bd..5666358 100644
--- a/.codex/skills/codebase-index/scripts/cbx
+++ b/.codex/skills/codebase-index/scripts/cbx
@@ -4,7 +4,7 @@
 # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch).
 set -euo pipefail
 
-ALLOWED="search explain symbol refs impact graph stats update index"
+ALLOWED="search explain symbol refs impact graph stats doctor update index"
 
 sub="${1:-}"
 case " $ALLOWED " in
diff --git a/.codex/skills/codebase-index/scripts/cbx.ps1 b/.codex/skills/codebase-index/scripts/cbx.ps1
index fe10bd9..bb8e05d 100644
--- a/.codex/skills/codebase-index/scripts/cbx.ps1
+++ b/.codex/skills/codebase-index/scripts/cbx.ps1
@@ -8,7 +8,7 @@ param(
 )
 
 $ErrorActionPreference = "Stop"
-$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index")
+$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
 
 if ($allowed -notcontains $Subcommand) {
     Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
diff --git a/.opencode/skills/codebase-index/.skill_version b/.opencode/skills/codebase-index/.skill_version
index cb174d5..23aa839 100644
--- a/.opencode/skills/codebase-index/.skill_version
+++ b/.opencode/skills/codebase-index/.skill_version
@@ -1 +1 @@
-1.2.1
\ No newline at end of file
+1.2.2
diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md
index b738921..2105974 100644
--- a/.opencode/skills/codebase-index/SKILL.md
+++ b/.opencode/skills/codebase-index/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: codebase-index
 description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
-allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
+allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
 ---
 
 # Codebase Index
@@ -44,7 +44,12 @@ Pick the subcommand by intent:
 | a specific symbol name | `codebase-index symbol "<name>" --json` |
 | "who calls / references" | `codebase-index refs "<name>" --json` |
 | "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
-| visual graph / "open graph" | `codebase-index graph "<file-or-symbol>" --open` |
+| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
+
+The `graph` command renders an HTML dependency graph for a person to look at —
+it is not a retrieval packet. Use it only when the user explicitly wants a visual
+graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
+session prefer `--out <path>` over `--open`.
 
 `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
 
@@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear:
 - `--mode symbol` — pure symbol lookups (faster, no FTS noise)
 - `--mode fts` — text/keyword queries where symbol names don't matter
 - `--mode hybrid` — default; best for mixed queries
+- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
+  requests" without the exact words). Requires opt-in embeddings; falls back with
+  a clear message when they are not enabled. `hybrid` already blends vectors in
+  when embeddings are on, so reach for `vector` only for pure-semantic recall.
 
 Natural-language kind words such as `method`, `function`, `class`, `interface`,
 `enum`, and `type` constrain the symbol retriever inside `search`.
@@ -89,6 +98,20 @@ Top-level fields:
 - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
 - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
 - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
+- `intent` / `mode` — how the query was classified and which retrievers ran;
+  useful to sanity-check a weak result (e.g. a "how does X work" question that
+  resolved to a bare symbol lookup may need `explain` instead).
+- `pagination` — present only when more results exist than fit the page. It
+  reports `has_more` and `next_offset`. To page, re-run `search` with
+  `--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
+  refining with a more specific subcommand or raising `--token-budget` first —
+  page only when the top results genuinely miss the answer.
+- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
+  edges (imports/inheritance) are extracted only for fully supported languages.
+  When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
+  as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
+  just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
+  references this". `coverage.languages` lists the affected languages.
 
 ## Token efficiency rules
 
@@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is `
 
 0. If confidence is consistently low across queries, run diagnostics first:
    ```bash
-   codebase-index stats --json    # check coverage and symbol counts per language
+   codebase-index stats --json    # per-language file/symbol counts + graph tier
    codebase-index doctor          # surface config or security issues
    ```
    Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
+   In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
+   `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
+   import/inheritance edges for that language — treat empty results there as
+   inconclusive.
 
 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
 2. If still nothing, Glob for likely paths, then Grep within them.
diff --git a/.opencode/skills/codebase-index/scripts/cbx b/.opencode/skills/codebase-index/scripts/cbx
index a33c2bd..5666358 100644
--- a/.opencode/skills/codebase-index/scripts/cbx
+++ b/.opencode/skills/codebase-index/scripts/cbx
@@ -4,7 +4,7 @@
 # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch).
 set -euo pipefail
 
-ALLOWED="search explain symbol refs impact graph stats update index"
+ALLOWED="search explain symbol refs impact graph stats doctor update index"
 
 sub="${1:-}"
 case " $ALLOWED " in
diff --git a/.opencode/skills/codebase-index/scripts/cbx.ps1 b/.opencode/skills/codebase-index/scripts/cbx.ps1
index fe10bd9..bb8e05d 100644
--- a/.opencode/skills/codebase-index/scripts/cbx.ps1
+++ b/.opencode/skills/codebase-index/scripts/cbx.ps1
@@ -8,7 +8,7 @@ param(
 )
 
 $ErrorActionPreference = "Stop"
-$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index")
+$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
 
 if ($allowed -notcontains $Subcommand) {
     Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d85eb8c..cb9e12c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,55 @@ All notable changes to this project are documented here. The format is based on
 
 ## [Unreleased]
 
+### Added
+- **Content-addressed embedding cache**: a new `vec_cache` table (keyed by `(model, content_sha)`)
+  persists chunk embeddings across rebuilds. Because chunk ids churn on every full rebuild, the
+  embedding pass now hashes chunk content and only calls the (potentially slow or paid) backend for
+  text never embedded under the active model — unchanged content reuses its cached vector for free.
+
+### Added
+- **Repo-wide graph tier in diagnostics**: `stats` now tags each tree-sitter language with
+  `graph: full|partial`, and `doctor` adds a `graph_coverage` finding listing Tier-B languages
+  present in the index. Surfaces upfront which languages have partial `refs`/`impact` (symbols but
+  no import/inheritance edges) instead of only signaling per-query.
+- **Graph coverage signal**: `refs` and `impact` now report a `coverage` block
+  (`partial`, `languages`, `reason`). Import/inheritance edges are only extracted
+  for the hand-tuned (Tier-A) languages, so a symbol or file in a Tier-B language
+  (generic tree-sitter walk, e.g. Lua) can produce an empty/short result that is
+  inconclusive rather than authoritative. `coverage.partial` flags this so agents
+  fall back to Grep instead of reading "no references" as proof. Markdown output
+  prints a matching warning; the skill documents the field.
+
+### Changed
+- The embedding pass reports cache **misses** (vectors actually computed) as its "embedded" count.
+- `prune_orphan_vectors` now deletes stale `vec_chunks` rows in a single batched `executemany`.
+- **Skill**: documented the `--mode vector` semantic-search path, the `intent`/`mode`/`pagination`
+  response fields, and clarified that `graph --open` renders an HTML view for a human (use
+  `impact`/`refs` for agent-readable dependency answers).
+- **Skill**: narrowed the skill's `allowed-tools` from `Bash(python *)`/`Bash(python3 *)` to
+  `Bash(python -m codebase_index *)`/`Bash(python3 -m codebase_index *)`, so the skill can no longer
+  run arbitrary Python.
+
+### Fixed
+- `search` now exposes `--offset`, so the pagination contract is reachable from the CLI/skill.
+  The retrieval pipeline and MCP already supported paging, but the CLI command never surfaced the
+  flag — every call silently returned page one and the advertised `pagination.next_offset` was a
+  dead end. Markdown output now also notes when more results are available. `--offset` rejects
+  negative values.
+- `explain` now honors the index freshness contract: it passes `root`/`config` into the retrieval
+  pipeline, so `index.stale` / `files_changed_since_build` reflect reality instead of a hardcoded
+  "fresh" block. Previously the skill's freshness check silently never triggered for
+  "how does X work" questions. `explain` also blends in vector results when embeddings are enabled,
+  matching `search --mode hybrid`.
+- The `cbx` wrapper whitelist (skill + plugin `bin/`) now includes `doctor`, which the skill's
+  fallback diagnostics already invoke; previously `cbx doctor` was refused.
+
+## [1.2.2] - 2026-06-05
+
+### Changed
+- Synced the version to `1.2.2` across the package, plugin manifest, and lockfile.
+- Documentation cleanup: removed stale prompt files and screenshots, refreshed the README.
+
 ## [1.2.1] - 2026-06-05
 
 ### Added
diff --git a/bin/cbx b/bin/cbx
index 2b7294d..cfe60b7 100644
--- a/bin/cbx
+++ b/bin/cbx
@@ -3,7 +3,7 @@
 # from the venv provisioned by scripts/bootstrap.sh (located via the .venv-path pointer).
 set -euo pipefail
 
-ALLOWED="search explain symbol refs impact graph stats update index"
+ALLOWED="search explain symbol refs impact graph stats doctor update index"
 sub="${1:-}"
 case " $ALLOWED " in
   *" ${sub} "*) : ;;
diff --git a/bin/cbx.ps1 b/bin/cbx.ps1
index cf33e6b..85face7 100644
--- a/bin/cbx.ps1
+++ b/bin/cbx.ps1
@@ -5,7 +5,7 @@ param(
     [Parameter(ValueFromRemainingArguments = $true)] [string[]]$Rest
 )
 $ErrorActionPreference = "Stop"
-$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index")
+$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
 if ($allowed -notcontains $Subcommand) {
     Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
     exit 2
diff --git a/docs/DATABASE_SCHEMA.md b/docs/DATABASE_SCHEMA.md
index c2e3cd9..e0d0d7a 100644
--- a/docs/DATABASE_SCHEMA.md
+++ b/docs/DATABASE_SCHEMA.md
@@ -85,16 +85,39 @@ FTS5 virtual table for full-text search (auto-managed by triggers).
 | `text` | TEXT | Chunk text (indexed by FTS5) |
 | `chunk_id` | INTEGER | References chunks(id) |
 
-### embeddings (optional)
+### vec_chunks (optional)
 
-Stores vector embeddings for semantic search.
+Vector embeddings for semantic search. Created **only** when `embeddings.enabled = true`, via the
+`sqlite-vec` extension (a `vec0` virtual table).
+
+| Column | Type | Description |
+|---|---|---|
+| `chunk_id` | INTEGER PRIMARY KEY | References chunks(id) |
+| `embedding` | FLOAT[dim] | Embedding vector; `dim` is fixed per build by the configured model |
+
+### vec_meta (optional)
+
+Records which embedding model/dimension produced the vectors currently in `vec_chunks`.
 
 | Column | Type | Description |
 |---|---|---|
-| `chunk_id` | INTEGER PRIMARY KEY REFERENCES chunks(id) | Associated chunk |
-| `vector` | BLOB | Serialized embedding vector |
 | `model` | TEXT | Embedding model identifier |
-| `created_at` | TEXT | Creation timestamp |
+| `dim` | INTEGER | Vector dimension |
+| `built_at` | TEXT | ISO 8601 timestamp of the embedding pass |
+
+### vec_cache (optional)
+
+Content-addressed embedding cache. `chunk_id`s churn on every full rebuild (chunks are deleted and
+re-inserted), so this cache is keyed by `(model, content_sha)` instead — letting unchanged content
+reuse its vector for free across rebuilds, so only new or changed text hits the backend.
+
+| Column | Type | Description |
+|---|---|---|
+| `model` | TEXT NOT NULL | Embedding model identifier |
+| `content_sha` | TEXT NOT NULL | SHA-256 of the chunk content |
+| `embedding` | BLOB NOT NULL | Pre-serialized float32 vector |
+
+Primary key: `(model, content_sha)`.
 
 ### summaries
 
diff --git a/docs/RETRIEVAL_PIPELINE.md b/docs/RETRIEVAL_PIPELINE.md
index 86b2014..4e2901d 100644
--- a/docs/RETRIEVAL_PIPELINE.md
+++ b/docs/RETRIEVAL_PIPELINE.md
@@ -74,6 +74,10 @@ Ranked retrieval packet with confidence score
 
 **Score:** Cosine similarity (0.0 to 1.0).
 
+> **Indexing note:** chunk embeddings are reused across rebuilds via a content-addressed
+> `vec_cache` (keyed by model + content SHA-256), so only new or changed chunks are re-embedded.
+> See [DATABASE_SCHEMA.md](DATABASE_SCHEMA.md) and [SCHEMA.md](SCHEMA.md) for details.
+
 ## 5. Graph Expansion
 
 **Trigger:** After initial results are found.
diff --git a/docs/SCHEMA.md b/docs/SCHEMA.md
index 15b31e9..f5c21bd 100644
--- a/docs/SCHEMA.md
+++ b/docs/SCHEMA.md
@@ -129,9 +129,27 @@ CREATE VIRTUAL TABLE vec_chunks USING vec0(
 );
 -- A side table records which embedding model/dim produced these vectors:
 CREATE TABLE vec_meta (model TEXT, dim INTEGER, built_at TEXT);
+-- Content-addressed embedding cache, keyed by (model, content SHA-256):
+CREATE TABLE vec_cache (
+    model       TEXT NOT NULL,
+    content_sha TEXT NOT NULL,
+    embedding   BLOB NOT NULL,  -- pre-serialized float32 vector
+    PRIMARY KEY (model, content_sha)
+);
 ```
 
-If embeddings are disabled, `vec_chunks` does not exist and the vector searcher is skipped.
+If embeddings are disabled, none of `vec_chunks`, `vec_meta`, or `vec_cache` exist and the vector
+searcher is skipped.
+
+### Embedding reuse via `vec_cache`
+
+`chunk_id`s churn on every full rebuild because `replace_chunks` deletes and re-inserts rows, so a
+`chunk_id`-keyed store alone would re-embed the entire repository each time. The embedding pass
+therefore hashes each chunk's content (SHA-256) and looks it up in `vec_cache` under the active
+model name. Only content never embedded under that model is sent to the (potentially slow or paid)
+backend; everything else is copied straight from the cache into `vec_chunks`. Newly computed vectors
+are written back to `vec_cache` so subsequent rebuilds reuse them. The reported "embedded" count
+reflects cache **misses** — i.e. the work actually performed.
 
 ## Migrations
 
diff --git a/requirements.lock b/requirements.lock
index 1c565ad..f87af0a 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -1,3 +1,3 @@
-codebase-index @ https://github.com/denfry/codebase-index/archive/refs/tags/v1.2.0.tar.gz
+codebase-index @ https://github.com/denfry/codebase-index/archive/refs/tags/v1.2.2.tar.gz
 tree-sitter==0.25.2
 tree-sitter-language-pack==1.8.1
diff --git a/skill/SKILL.md b/skill/SKILL.md
index b738921..2105974 100644
--- a/skill/SKILL.md
+++ b/skill/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: codebase-index
 description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
-allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
+allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
 ---
 
 # Codebase Index
@@ -44,7 +44,12 @@ Pick the subcommand by intent:
 | a specific symbol name | `codebase-index symbol "<name>" --json` |
 | "who calls / references" | `codebase-index refs "<name>" --json` |
 | "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
-| visual graph / "open graph" | `codebase-index graph "<file-or-symbol>" --open` |
+| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
+
+The `graph` command renders an HTML dependency graph for a person to look at —
+it is not a retrieval packet. Use it only when the user explicitly wants a visual
+graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
+session prefer `--out <path>` over `--open`.
 
 `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
 
@@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear:
 - `--mode symbol` — pure symbol lookups (faster, no FTS noise)
 - `--mode fts` — text/keyword queries where symbol names don't matter
 - `--mode hybrid` — default; best for mixed queries
+- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
+  requests" without the exact words). Requires opt-in embeddings; falls back with
+  a clear message when they are not enabled. `hybrid` already blends vectors in
+  when embeddings are on, so reach for `vector` only for pure-semantic recall.
 
 Natural-language kind words such as `method`, `function`, `class`, `interface`,
 `enum`, and `type` constrain the symbol retriever inside `search`.
@@ -89,6 +98,20 @@ Top-level fields:
 - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
 - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
 - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
+- `intent` / `mode` — how the query was classified and which retrievers ran;
+  useful to sanity-check a weak result (e.g. a "how does X work" question that
+  resolved to a bare symbol lookup may need `explain` instead).
+- `pagination` — present only when more results exist than fit the page. It
+  reports `has_more` and `next_offset`. To page, re-run `search` with
+  `--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
+  refining with a more specific subcommand or raising `--token-budget` first —
+  page only when the top results genuinely miss the answer.
+- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
+  edges (imports/inheritance) are extracted only for fully supported languages.
+  When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
+  as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
+  just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
+  references this". `coverage.languages` lists the affected languages.
 
 ## Token efficiency rules
 
@@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is `
 
 0. If confidence is consistently low across queries, run diagnostics first:
    ```bash
-   codebase-index stats --json    # check coverage and symbol counts per language
+   codebase-index stats --json    # per-language file/symbol counts + graph tier
    codebase-index doctor          # surface config or security issues
    ```
    Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
+   In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
+   `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
+   import/inheritance edges for that language — treat empty results there as
+   inconclusive.
 
 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
 2. If still nothing, Glob for likely paths, then Grep within them.
diff --git a/skill/scripts/cbx b/skill/scripts/cbx
index a33c2bd..5666358 100644
--- a/skill/scripts/cbx
+++ b/skill/scripts/cbx
@@ -4,7 +4,7 @@
 # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch).
 set -euo pipefail
 
-ALLOWED="search explain symbol refs impact graph stats update index"
+ALLOWED="search explain symbol refs impact graph stats doctor update index"
 
 sub="${1:-}"
 case " $ALLOWED " in
diff --git a/skill/scripts/cbx.ps1 b/skill/scripts/cbx.ps1
index fe10bd9..bb8e05d 100644
--- a/skill/scripts/cbx.ps1
+++ b/skill/scripts/cbx.ps1
@@ -8,7 +8,7 @@ param(
 )
 
 $ErrorActionPreference = "Stop"
-$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index")
+$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
 
 if ($allowed -notcontains $Subcommand) {
     Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
diff --git a/skills/codebase-index/SKILL.md b/skills/codebase-index/SKILL.md
index b738921..2105974 100644
--- a/skills/codebase-index/SKILL.md
+++ b/skills/codebase-index/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: codebase-index
 description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
-allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
+allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
 ---
 
 # Codebase Index
@@ -44,7 +44,12 @@ Pick the subcommand by intent:
 | a specific symbol name | `codebase-index symbol "<name>" --json` |
 | "who calls / references" | `codebase-index refs "<name>" --json` |
 | "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
-| visual graph / "open graph" | `codebase-index graph "<file-or-symbol>" --open` |
+| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
+
+The `graph` command renders an HTML dependency graph for a person to look at —
+it is not a retrieval packet. Use it only when the user explicitly wants a visual
+graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
+session prefer `--out <path>` over `--open`.
 
 `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
 
@@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear:
 - `--mode symbol` — pure symbol lookups (faster, no FTS noise)
 - `--mode fts` — text/keyword queries where symbol names don't matter
 - `--mode hybrid` — default; best for mixed queries
+- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
+  requests" without the exact words). Requires opt-in embeddings; falls back with
+  a clear message when they are not enabled. `hybrid` already blends vectors in
+  when embeddings are on, so reach for `vector` only for pure-semantic recall.
 
 Natural-language kind words such as `method`, `function`, `class`, `interface`,
 `enum`, and `type` constrain the symbol retriever inside `search`.
@@ -89,6 +98,20 @@ Top-level fields:
 - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
 - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
 - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
+- `intent` / `mode` — how the query was classified and which retrievers ran;
+  useful to sanity-check a weak result (e.g. a "how does X work" question that
+  resolved to a bare symbol lookup may need `explain` instead).
+- `pagination` — present only when more results exist than fit the page. It
+  reports `has_more` and `next_offset`. To page, re-run `search` with
+  `--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
+  refining with a more specific subcommand or raising `--token-budget` first —
+  page only when the top results genuinely miss the answer.
+- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
+  edges (imports/inheritance) are extracted only for fully supported languages.
+  When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
+  as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
+  just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
+  references this". `coverage.languages` lists the affected languages.
 
 ## Token efficiency rules
 
@@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is `
 
 0. If confidence is consistently low across queries, run diagnostics first:
    ```bash
-   codebase-index stats --json    # check coverage and symbol counts per language
+   codebase-index stats --json    # per-language file/symbol counts + graph tier
    codebase-index doctor          # surface config or security issues
    ```
    Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
+   In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
+   `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
+   import/inheritance edges for that language — treat empty results there as
+   inconclusive.
 
 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
 2. If still nothing, Glob for likely paths, then Grep within them.
diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py
index a1c6efa..4126fcf 100644
--- a/src/codebase_index/cli.py
+++ b/src/codebase_index/cli.py
@@ -375,6 +375,9 @@ def search(
     ctx: typer.Context,
     query: str = typer.Argument(..., help="Search query."),
     limit: int = typer.Option(10, "--limit"),
+    offset: int = typer.Option(
+        0, "--offset", help="Skip the first N results (use pagination.next_offset to page)."
+    ),
     token_budget: int = typer.Option(1500, "--token-budget"),
     mode: str = typer.Option("hybrid", "--mode", help="hybrid|fts|symbol|vector"),
     no_fallback: bool = typer.Option(False, "--no-fallback"),
@@ -386,6 +389,10 @@ def search(
     from .retrieval.pipeline import search as run_search
     from .storage.db import Database
 
+    if offset < 0:
+        typer.echo("[codebase-index] --offset must be >= 0.")
+        raise typer.Exit(code=2)
+
     backend = None
     if mode in ("vector", "hybrid"):
         backend = _resolve_backend_for_search(ctx)
@@ -402,7 +409,7 @@ def search(
         if backend is not None and getattr(backend, "enabled", False):
             db.enable_vectors()
         payload = run_search(
-            db.conn, query, mode=mode, limit=limit,
+            db.conn, query, mode=mode, limit=limit, offset=offset,
             token_budget=token_budget, no_fallback=no_fallback, backend=backend,
             root=Path(cfg.root), config=cfg,
         )
@@ -489,12 +496,18 @@ def explain(
     from .retrieval.pipeline import search as run_search
     from .storage.db import Database
 
-    db_path, _cfg = _ensure_index(ctx)
+    backend = _resolve_backend_for_search(ctx)
+    db_path, cfg = _ensure_index(ctx)
 
     q = query if any(w in query.lower() for w in ("how", "architecture", "overview")) else f"how does {query} work"
     with Database(db_path) as db:
-        payload = run_search(db.conn, q, mode="hybrid", limit=10,
-                             token_budget=token_budget, no_fallback=False)
+        if getattr(backend, "enabled", False):
+            db.enable_vectors()
+        payload = run_search(
+            db.conn, q, mode="hybrid", limit=10,
+            token_budget=token_budget, no_fallback=False, backend=backend,
+            root=Path(cfg.root), config=cfg,
+        )
 
     want_json = json_out or (ctx.obj and ctx.obj.get("json"))
     typer.echo(json_renderer.render(payload) if want_json else md_renderer.render(payload))
@@ -556,6 +569,7 @@ def stats(
     import json as _json
 
     from .config import load
+    from .parsers.languages import has_full_graph
     from .storage import repo
     from .storage.db import Database
 
@@ -578,7 +592,14 @@ def stats(
         built_at = repo.get_meta(db.conn, "built_at")
         head = repo.get_meta(db.conn, "head_commit")
         coverage = [
-            {"lang": r["lang"], "files": r["files"], "symbols": r["symbols"]}
+            {
+                "lang": r["lang"],
+                "files": r["files"],
+                "symbols": r["symbols"],
+                # Tier-A languages get import/inheritance edges; Tier-B is symbols-only,
+                # so refs/impact are partial for them.
+                "graph": "full" if has_full_graph(r["lang"]) else "partial",
+            }
             for r in repo.treesitter_coverage(db.conn)
         ]
 
@@ -599,7 +620,8 @@ def stats(
         typer.echo(f"files={files}  symbols={symbols}  built_at={built_at}  head={head}")
         for r in coverage:
             flag = "  ⚠ 0 symbols" if (r["symbols"] or 0) == 0 and r["files"] >= 3 else ""
-            typer.echo(f"  {r['lang']}: {r['files']} files, {r['symbols']} symbols{flag}")
+            tier = "  · partial graph (Tier-B)" if r["graph"] == "partial" else ""
+            typer.echo(f"  {r['lang']}: {r['files']} files, {r['symbols']} symbols{flag}{tier}")
 
 
 @app.command()
diff --git a/src/codebase_index/doctor.py b/src/codebase_index/doctor.py
index 6eac676..0770cae 100644
--- a/src/codebase_index/doctor.py
+++ b/src/codebase_index/doctor.py
@@ -107,6 +107,25 @@ def run_doctor(root: Path, config: Config) -> list[Finding]:
             )
         )
 
+        # 5. Dependency-graph coverage: Tier-B languages (grammar but no hand-tuned spec)
+        #    yield symbols but no import/inheritance edges, so refs/impact undercount.
+        from .parsers.languages import has_full_graph
+
+        tier_b = sorted({r["lang"] for r in coverage if not has_full_graph(r["lang"])})
+        findings.append(
+            Finding(
+                id="graph_coverage",
+                ok=True,
+                severity="info",
+                detail=(
+                    "all indexed languages have full dependency-graph support"
+                    if not tier_b
+                    else f"partial dependency graph for Tier-B language(s): {', '.join(tier_b)} "
+                    "— refs/impact may undercount (confirm with Grep)"
+                ),
+            )
+        )
+
     return findings
 
 
diff --git a/src/codebase_index/graph/expand.py b/src/codebase_index/graph/expand.py
index 6552ba2..deebed1 100644
--- a/src/codebase_index/graph/expand.py
+++ b/src/codebase_index/graph/expand.py
@@ -16,7 +16,7 @@
 from collections import deque
 from typing import Optional
 
-from ..models import ImpactNode, ImpactResponse, IndexFreshness
+from ..models import GraphCoverage, ImpactNode, ImpactResponse, IndexFreshness
 from ..storage import repo
 
 
@@ -106,6 +106,19 @@ def walk_impact(
     return out
 
 
+def _target_paths(conn: sqlite3.Connection, target: str) -> list[str]:
+    """The file path(s) the target resolves to, for coverage classification."""
+    if repo.file_by_path(conn, target) is not None:
+        return [target]
+    sym_rows = repo.symbols_by_name(conn, target, exact=True)
+    if sym_rows:
+        return [r["path"] for r in sym_rows]
+    suffix = repo.files_with_suffix(conn, target)
+    if len(suffix) == 1:
+        return [suffix[0]["path"]]
+    return []
+
+
 def impact_lookup(
     conn: sqlite3.Connection, target: str, *, depth: int, direction: str
 ) -> ImpactResponse:
@@ -118,4 +131,5 @@ def impact_lookup(
     return ImpactResponse(
         target=target, direction=direction, depth=depth,
         index=_freshness(conn), nodes=nodes, files=files,
+        coverage=GraphCoverage.for_paths(_target_paths(conn, target)),
     )
diff --git a/src/codebase_index/indexer/pipeline.py b/src/codebase_index/indexer/pipeline.py
index 48c63a3..6f8237d 100644
--- a/src/codebase_index/indexer/pipeline.py
+++ b/src/codebase_index/indexer/pipeline.py
@@ -85,6 +85,7 @@ def _pool_init(config: Config) -> None:
 def _parse_one(cand) -> _ParseResult:
     """Parse a single file. Top-level for ProcessPoolExecutor pickling; uses _PARSE_CONFIG."""
     config = _PARSE_CONFIG
+    assert config is not None, "_pool_init must set _PARSE_CONFIG before any worker parses"
     try:
         sha256 = _sha256_file(cand.path)
     except OSError:
@@ -201,6 +202,8 @@ def _embed_chunks(cfg, db, conn) -> int:
     backend = resolve_backend(cfg, warn=lambda m: print(m))
     if not getattr(backend, "enabled", False):
         return 0
+    import sqlite_vec  # type: ignore[import-untyped]
+
     db.enable_vectors()
     repo.ensure_vec_tables(conn, dim=backend.dim)
     repo.prune_orphan_vectors(conn)
@@ -208,13 +211,29 @@ def _embed_chunks(cfg, db, conn) -> int:
     rows = [r for r in repo.chunks_for_embedding(conn) if int(r["id"]) not in existing]
     if not rows:
         return 0
-    texts = [r["content"] for r in rows]
-    vectors = backend.embed(texts)
-    for row, vec in zip(rows, vectors):
-        repo.upsert_chunk_vector(conn, int(row["id"]), vec)
+
+    # Content-addressed reuse: chunk ids churn on every full rebuild (replace_chunks),
+    # so a chunk-id keyed skip alone re-embeds the whole repo each time. Hash the content
+    # and only call the (potentially slow / paid) backend for text never embedded under
+    # this model; everything else is copied straight from the cache.
+    shas = [hashlib.sha256(r["content"].encode("utf-8")).hexdigest() for r in rows]
+    cached = repo.cached_embeddings(conn, model=backend.name, shas=shas)
+    misses = [(r, sha) for r, sha in zip(rows, shas) if sha not in cached]
+
+    fresh: dict[str, bytes] = {}
+    if misses:
+        vectors = backend.embed([r["content"] for r, _ in misses])
+        for (_row, sha), vec in zip(misses, vectors):
+            fresh[sha] = sqlite_vec.serialize_float32(vec)
+        repo.store_cached_embeddings(conn, model=backend.name, items=list(fresh.items()))
+
+    for row, sha in zip(rows, shas):
+        blob = cached.get(sha) or fresh[sha]
+        repo.upsert_chunk_vector_blob(conn, int(row["id"]), blob)
+
     built_at = datetime.now(timezone.utc).isoformat()
     repo.set_vec_meta(conn, model=backend.name, dim=backend.dim, built_at=built_at)
-    return len(rows)
+    return len(misses)
 
 
 def _sha256_file(path: Path) -> str:
diff --git a/src/codebase_index/models.py b/src/codebase_index/models.py
index 126988a..b35d959 100644
--- a/src/codebase_index/models.py
+++ b/src/codebase_index/models.py
@@ -5,9 +5,9 @@
 
 from __future__ import annotations
 
-from typing import Literal, Optional
+from typing import Iterable, Literal, Optional
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 Intent = Literal[
     "locate_impl", "how_it_works", "impact", "find_refs",
@@ -67,6 +67,48 @@ class SymbolResponse(BaseModel):
     symbols: list[SymbolDef] = []
 
 
+class GraphCoverage(BaseModel):
+    """Honesty signal for graph-derived answers (refs/impact).
+
+    Dependency edges (imports / inheritance) are only extracted for the fully
+    supported (Tier-A) languages. A symbol or file in a Tier-B language (generic
+    tree-sitter walk) yields symbols and best-effort call sites but no
+    import/extends/implements edges, so refs/impact can undercount. When
+    ``partial`` is true an *empty or short* result does not prove there are no
+    references — it may just be unanalyzed; confirm with Grep.
+    """
+
+    partial: bool = False
+    languages: list[str] = []
+    reason: Optional[str] = None
+
+    @classmethod
+    def for_paths(cls, paths: Iterable[str]) -> "GraphCoverage":
+        from .discovery.classify import detect_language, parser_for
+        from .parsers.languages import spec_for
+
+        tier_b = sorted(
+            {
+                lang
+                for p in paths
+                if (lang := detect_language(p)) is not None
+                and parser_for(lang) == "treesitter"
+                and spec_for(lang) is None
+            }
+        )
+        if not tier_b:
+            return cls()
+        return cls(
+            partial=True,
+            languages=tier_b,
+            reason=(
+                "Import/inheritance edges are not extracted for "
+                f"{', '.join(tier_b)} (best-effort symbols only). An empty or short "
+                "result is inconclusive — confirm with a Grep over the codebase."
+            ),
+        )
+
+
 class RefSite(BaseModel):
     path: str
     line: int
@@ -77,6 +119,7 @@ class RefsResponse(BaseModel):
     query: str
     index: IndexFreshness
     sites: list[RefSite] = []
+    coverage: GraphCoverage = Field(default_factory=GraphCoverage)
 
 
 class ImpactNode(BaseModel):
@@ -95,3 +138,4 @@ class ImpactResponse(BaseModel):
     index: IndexFreshness
     nodes: list[ImpactNode] = []
     files: list[str] = []           # distinct affected files, ranked
+    coverage: GraphCoverage = Field(default_factory=GraphCoverage)
diff --git a/src/codebase_index/output/markdown.py b/src/codebase_index/output/markdown.py
index 5768215..afd6a76 100644
--- a/src/codebase_index/output/markdown.py
+++ b/src/codebase_index/output/markdown.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+from typing import Optional
+
 from ..models import ImpactResponse, RefsResponse, SearchResponse, SymbolResponse
 
 
@@ -45,6 +47,14 @@ def _render_dict(payload: dict) -> str:
         for cmd in fb:
             lines.append(f"- `{cmd}`")
 
+    pg = payload.get("pagination")
+    if pg:
+        shown = f"results {pg['offset'] + 1}–{pg['offset'] + len(payload['results'])}"
+        if pg.get("has_more"):
+            lines.append(f"\n_Showing {shown}; more available — `--offset {pg['next_offset']}`._")
+        else:
+            lines.append(f"\n_Showing {shown} (end of results)._")
+
     return "\n".join(lines)
 
 
@@ -114,17 +124,28 @@ def render_symbols(resp: SymbolResponse) -> str:
     return "\n".join(lines).rstrip() + "\n"
 
 
+def _coverage_line(coverage) -> Optional[str]:
+    if coverage is not None and getattr(coverage, "partial", False):
+        return f"\n> ⚠️ Partial graph coverage: {coverage.reason}"
+    return None
+
+
 def render_refs(resp: RefsResponse) -> str:
     lines = [_header(resp.query, resp.index.exists, resp.index.stale)]
     lines.append("")
+    note = _coverage_line(resp.coverage)
     if not resp.sites:
         lines.append("_No references found._")
+        if note:
+            lines.append(note)
         return "\n".join(lines).rstrip() + "\n"
 
     lines.append("| kind | path | line |")
     lines.append("|------|------|------|")
     for site in resp.sites:
         lines.append(f"| {site.kind} | `{site.path}` | {site.line} |")
+    if note:
+        lines.append(note)
     return "\n".join(lines).rstrip() + "\n"
 
 
@@ -139,12 +160,18 @@ def render_impact(resp: ImpactResponse) -> str:
     header = (f"**impact:** `{resp.target}`  ·  **direction:** {resp.direction}  ·  "
               f"**depth:** {resp.depth}  ·  **affected files:** {len(resp.files)}")
     lines = [header, ""]
+    note = _coverage_line(resp.coverage)
     if not resp.nodes:
-        return "\n".join(lines + ["_No impact found (target unknown or no edges)._", ""]).rstrip() + "\n"
+        body = ["_No impact found (target unknown or no edges)._"]
+        if note:
+            body.append(note)
+        return "\n".join(lines + body + [""]).rstrip() + "\n"
     lines.append("| dist | via | kind | node | location |")
     lines.append("|------|-----|------|------|----------|")
     for n in sorted(resp.nodes, key=lambda x: (x.distance, x.path, x.line_start or 0)):
         loc = f"{n.path}:{n.line_start}" if n.line_start else n.path
         node_name = f"`{n.name}`" if n.name else "—"
         lines.append(f"| {n.distance} | {n.via_edge or ''} | {n.kind} | {node_name} | `{loc}` |")
+    if note:
+        lines.append(note)
     return "\n".join(lines).rstrip() + "\n"
diff --git a/src/codebase_index/parsers/languages.py b/src/codebase_index/parsers/languages.py
index 90cd325..a772fad 100644
--- a/src/codebase_index/parsers/languages.py
+++ b/src/codebase_index/parsers/languages.py
@@ -279,3 +279,12 @@ def is_supported(lang: Optional[str]) -> bool:
 
 def spec_for(lang: Optional[str]) -> Optional[LangSpec]:
     return LANGS.get(lang) if lang else None
+
+
+def has_full_graph(lang: Optional[str]) -> bool:
+    """True if `lang` has a Tier-A spec (full import/inheritance edges for refs/impact).
+
+    Tier-B languages (a loadable grammar but no hand-tuned spec) yield symbols and
+    best-effort call sites only, so their dependency graph is partial.
+    """
+    return spec_for(lang) is not None
diff --git a/src/codebase_index/retrieval/searchers.py b/src/codebase_index/retrieval/searchers.py
index 954ffa2..0cb0d00 100644
--- a/src/codebase_index/retrieval/searchers.py
+++ b/src/codebase_index/retrieval/searchers.py
@@ -16,6 +16,7 @@
 from ..indexer.freshness import compute_freshness
 from ..models import (
     Confidence,
+    GraphCoverage,
     IndexFreshness,
     ReadRange,
     RefSite,
@@ -326,6 +327,7 @@ def symbol_lookup(
 
 
 def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsResponse:
+    defs = repo.symbols_by_name(conn, name, exact=True)
     sites = [
         RefSite(path=row["path"], line=row["line"], kind="call")
         for row in repo.refs_for_name(conn, name)
@@ -333,10 +335,18 @@ def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsRespon
     if kind == "all":
         sites.extend(
             RefSite(path=row["path"], line=row["line_start"], kind="definition")
-            for row in repo.symbols_by_name(conn, name, exact=True)
+            for row in defs
         )
     sites.sort(key=lambda site: (site.path, site.line, site.kind))
-    return RefsResponse(query=name, index=_freshness(conn), sites=sites)
+    # Coverage is judged by the symbol's defining language(s); fall back to the
+    # call-site files when the symbol has no indexed definition.
+    coverage_paths = [row["path"] for row in defs] or [s.path for s in sites]
+    return RefsResponse(
+        query=name,
+        index=_freshness(conn),
+        sites=sites,
+        coverage=GraphCoverage.for_paths(coverage_paths),
+    )
 
 
 def vector_candidates(
diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md
index b738921..2105974 100644
--- a/src/codebase_index/skill_template/SKILL.md
+++ b/src/codebase_index/skill_template/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: codebase-index
 description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project.
-allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
+allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob
 ---
 
 # Codebase Index
@@ -44,7 +44,12 @@ Pick the subcommand by intent:
 | a specific symbol name | `codebase-index symbol "<name>" --json` |
 | "who calls / references" | `codebase-index refs "<name>" --json` |
 | "what breaks if I change" | `codebase-index impact "<file-or-symbol>" --json` |
-| visual graph / "open graph" | `codebase-index graph "<file-or-symbol>" --open` |
+| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "<file-or-symbol>" --open` |
+
+The `graph` command renders an HTML dependency graph for a person to look at —
+it is not a retrieval packet. Use it only when the user explicitly wants a visual
+graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless
+session prefer `--out <path>` over `--open`.
 
 `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow.
 
@@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear:
 - `--mode symbol` — pure symbol lookups (faster, no FTS noise)
 - `--mode fts` — text/keyword queries where symbol names don't matter
 - `--mode hybrid` — default; best for mixed queries
+- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit
+  requests" without the exact words). Requires opt-in embeddings; falls back with
+  a clear message when they are not enabled. `hybrid` already blends vectors in
+  when embeddings are on, so reach for `vector` only for pure-semantic recall.
 
 Natural-language kind words such as `method`, `function`, `class`, `interface`,
 `enum`, and `type` constrain the symbol retriever inside `search`.
@@ -89,6 +98,20 @@ Top-level fields:
 - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan.
 - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback).
 - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak.
+- `intent` / `mode` — how the query was classified and which retrievers ran;
+  useful to sanity-check a weak result (e.g. a "how does X work" question that
+  resolved to a bare symbol lookup may need `explain` instead).
+- `pagination` — present only when more results exist than fit the page. It
+  reports `has_more` and `next_offset`. To page, re-run `search` with
+  `--offset <next_offset>` (e.g. `search "query" --limit 10 --offset 10`). Prefer
+  refining with a more specific subcommand or raising `--token-budget` first —
+  page only when the top results genuinely miss the answer.
+- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency
+  edges (imports/inheritance) are extracted only for fully supported languages.
+  When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such
+  as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may
+  just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing
+  references this". `coverage.languages` lists the affected languages.
 
 ## Token efficiency rules
 
@@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is `
 
 0. If confidence is consistently low across queries, run diagnostics first:
    ```bash
-   codebase-index stats --json    # check coverage and symbol counts per language
+   codebase-index stats --json    # per-language file/symbol counts + graph tier
    codebase-index doctor          # surface config or security issues
    ```
    Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`.
+   In `stats`, each language carries `graph: full|partial` (and `doctor` reports a
+   `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack
+   import/inheritance edges for that language — treat empty results there as
+   inconclusive.
 
 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep.
 2. If still nothing, Glob for likely paths, then Grep within them.
diff --git a/src/codebase_index/skill_template/scripts/cbx b/src/codebase_index/skill_template/scripts/cbx
index a33c2bd..5666358 100644
--- a/src/codebase_index/skill_template/scripts/cbx
+++ b/src/codebase_index/skill_template/scripts/cbx
@@ -4,7 +4,7 @@
 # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch).
 set -euo pipefail
 
-ALLOWED="search explain symbol refs impact graph stats update index"
+ALLOWED="search explain symbol refs impact graph stats doctor update index"
 
 sub="${1:-}"
 case " $ALLOWED " in
diff --git a/src/codebase_index/skill_template/scripts/cbx.ps1 b/src/codebase_index/skill_template/scripts/cbx.ps1
index fe10bd9..bb8e05d 100644
--- a/src/codebase_index/skill_template/scripts/cbx.ps1
+++ b/src/codebase_index/skill_template/scripts/cbx.ps1
@@ -8,7 +8,7 @@ param(
 )
 
 $ErrorActionPreference = "Stop"
-$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index")
+$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index")
 
 if ($allowed -notcontains $Subcommand) {
     Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')"
diff --git a/src/codebase_index/storage/repo.py b/src/codebase_index/storage/repo.py
index d87c995..338a12b 100644
--- a/src/codebase_index/storage/repo.py
+++ b/src/codebase_index/storage/repo.py
@@ -438,13 +438,22 @@ def count_resolved_edges(conn: sqlite3.Connection) -> int:
 
 
 def ensure_vec_tables(conn: sqlite3.Connection, *, dim: int) -> None:
-    """Create vec_chunks (sqlite-vec) + vec_meta if absent. dim is fixed per build."""
+    """Create vec_chunks (sqlite-vec) + vec_meta + vec_cache if absent. dim is fixed per build."""
     dim = int(dim)
     conn.execute(
         f"CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0("
         f"chunk_id INTEGER PRIMARY KEY, embedding FLOAT[{dim}])"
     )
     conn.execute("CREATE TABLE IF NOT EXISTS vec_meta (model TEXT, dim INTEGER, built_at TEXT)")
+    # Content-addressed embedding cache: chunk ids churn on every full rebuild
+    # (replace_chunks deletes + re-inserts), so a chunk-id keyed store alone would
+    # re-embed the whole repo each time. Keyed by (model, content_sha) the cache
+    # survives that churn and lets unchanged content reuse its vector for free.
+    conn.execute(
+        "CREATE TABLE IF NOT EXISTS vec_cache ("
+        "model TEXT NOT NULL, content_sha TEXT NOT NULL, embedding BLOB NOT NULL, "
+        "PRIMARY KEY (model, content_sha))"
+    )
 
 
 def set_vec_meta(conn: sqlite3.Connection, *, model: str, dim: int, built_at: str) -> None:
@@ -467,10 +476,49 @@ def upsert_chunk_vector(
 ) -> None:
     import sqlite_vec  # type: ignore[import-untyped]
 
+    upsert_chunk_vector_blob(conn, chunk_id, sqlite_vec.serialize_float32(embedding))
+
+
+def upsert_chunk_vector_blob(conn: sqlite3.Connection, chunk_id: int, blob: bytes) -> None:
+    """Write a pre-serialized float32 embedding blob for a chunk (cache-reuse path)."""
     conn.execute("DELETE FROM vec_chunks WHERE chunk_id = ?", (int(chunk_id),))
     conn.execute(
         "INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, ?)",
-        (int(chunk_id), sqlite_vec.serialize_float32(embedding)),
+        (int(chunk_id), blob),
+    )
+
+
+def cached_embeddings(
+    conn: sqlite3.Connection, *, model: str, shas: Iterable[str]
+) -> dict[str, bytes]:
+    """Return {content_sha: serialized embedding blob} already cached for this model."""
+    shas = list(dict.fromkeys(shas))
+    if not shas:
+        return {}
+    out: dict[str, bytes] = {}
+    # Chunk the IN list to stay well under SQLite's variable limit on huge repos.
+    for start in range(0, len(shas), 500):
+        batch = shas[start : start + 500]
+        placeholders = ",".join("?" * len(batch))
+        rows = conn.execute(
+            f"SELECT content_sha, embedding FROM vec_cache "
+            f"WHERE model = ? AND content_sha IN ({placeholders})",
+            (model, *batch),
+        ).fetchall()
+        for r in rows:
+            out[r[0]] = r[1]
+    return out
+
+
+def store_cached_embeddings(
+    conn: sqlite3.Connection, *, model: str, items: Sequence[tuple[str, bytes]]
+) -> None:
+    """Insert (content_sha, blob) pairs into the content-addressed embedding cache."""
+    if not items:
+        return
+    conn.executemany(
+        "INSERT OR REPLACE INTO vec_cache (model, content_sha, embedding) VALUES (?, ?, ?)",
+        [(model, sha, blob) for sha, blob in items],
     )
 
 
@@ -496,12 +544,12 @@ def prune_orphan_vectors(conn: sqlite3.Connection) -> int:
     try:
         current_ids = {r[0] for r in conn.execute("SELECT id FROM chunks").fetchall()}
         orphan_ids = [
-            r[0]
+            (r[0],)
             for r in conn.execute("SELECT chunk_id FROM vec_chunks").fetchall()
             if r[0] not in current_ids
         ]
-        for oid in orphan_ids:
-            conn.execute("DELETE FROM vec_chunks WHERE chunk_id = ?", (oid,))
+        if orphan_ids:
+            conn.executemany("DELETE FROM vec_chunks WHERE chunk_id = ?", orphan_ids)
         return len(orphan_ids)
     except Exception:
         return 0
diff --git a/tests/golden/impact_user_model.json b/tests/golden/impact_user_model.json
index 898f952..9ec0c77 100644
--- a/tests/golden/impact_user_model.json
+++ b/tests/golden/impact_user_model.json
@@ -1,4 +1,9 @@
 {
+  "coverage": {
+    "languages": [],
+    "partial": false,
+    "reason": null
+  },
   "depth": 2,
   "direction": "up",
   "files": [
diff --git a/tests/golden/refs_refresh_access_token.json b/tests/golden/refs_refresh_access_token.json
index 04dc0ca..f5693df 100644
--- a/tests/golden/refs_refresh_access_token.json
+++ b/tests/golden/refs_refresh_access_token.json
@@ -1,4 +1,9 @@
 {
+  "coverage": {
+    "languages": [],
+    "partial": false,
+    "reason": null
+  },
   "index": {
     "built_at": "<TS>",
     "exists": true,
diff --git a/tests/golden/stats.json b/tests/golden/stats.json
index 032e118..bf445b5 100644
--- a/tests/golden/stats.json
+++ b/tests/golden/stats.json
@@ -7,11 +7,13 @@
   "treesitter_coverage": [
     {
       "files": 3,
+      "graph": "full",
       "lang": "python",
       "symbols": 6
     },
     {
       "files": 2,
+      "graph": "full",
       "lang": "typescript",
       "symbols": 1
     }
diff --git a/tests/test_doctor.py b/tests/test_doctor.py
index 373616e..943c96a 100644
--- a/tests/test_doctor.py
+++ b/tests/test_doctor.py
@@ -43,6 +43,30 @@ def test_doctor_cli_json(tmp_path):
     assert any(f["id"] == "cache_gitignored" for f in data["findings"])
 
 
+def test_doctor_flags_tier_b_partial_graph(tmp_path):
+    """A Tier-B language (Lua) in the index must surface a partial-graph info finding."""
+    (tmp_path / "mod.lua").write_text("local function greet()\n  return 1\nend\n", encoding="utf-8")
+    assert runner.invoke(app, ["--root", str(tmp_path), "index"]).exit_code == 0
+
+    cfg = Config()
+    cfg.root = str(tmp_path)
+    findings = {f.id: f for f in run_doctor(tmp_path, cfg)}
+    gc = findings["graph_coverage"]
+    assert gc.ok is True and gc.severity == "info"
+    assert "lua" in gc.detail
+
+
+def test_doctor_full_graph_when_only_tier_a(tmp_path):
+    (tmp_path / "mod.py").write_text("def f():\n    return 1\n", encoding="utf-8")
+    assert runner.invoke(app, ["--root", str(tmp_path), "index"]).exit_code == 0
+
+    cfg = Config()
+    cfg.root = str(tmp_path)
+    findings = {f.id: f for f in run_doctor(tmp_path, cfg)}
+    assert "lua" not in findings["graph_coverage"].detail
+    assert "full dependency-graph support" in findings["graph_coverage"].detail
+
+
 def test_doctor_strict_exits_nonzero_on_high_severity(tmp_path):
     # uncovered cache is a high-severity finding → --strict must fail
     res = runner.invoke(app, ["--root", str(tmp_path), "doctor", "--strict"])
diff --git a/tests/test_graph_coverage.py b/tests/test_graph_coverage.py
new file mode 100644
index 0000000..872726c
--- /dev/null
+++ b/tests/test_graph_coverage.py
@@ -0,0 +1,66 @@
+"""Regression: refs/impact must flag partial graph coverage for Tier-B languages.
+
+Import/inheritance edges are only extracted for the hand-tuned (Tier-A) languages.
+A symbol or file in a Tier-B language (generic tree-sitter walk, e.g. Lua) gets
+symbols and best-effort call sites but no dependency edges, so an empty/short
+refs or impact result is inconclusive — the response must say so rather than let
+an agent read "no references" as proof.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from codebase_index.config import load
+from codebase_index.graph.expand import impact_lookup
+from codebase_index.indexer.pipeline import build_index
+from codebase_index.models import GraphCoverage
+from codebase_index.retrieval.searchers import refs_lookup
+from codebase_index.storage.db import Database
+
+_LUA = (
+    "local function greet(name)\n  return name\nend\n\n"
+    "local function main()\n  return greet('x')\nend\n"
+)
+_PY = "def helper():\n    return 1\n\n\ndef caller():\n    return helper()\n"
+
+
+def _index(repo: Path) -> Path:
+    (repo / "mod.lua").write_text(_LUA, encoding="utf-8")
+    (repo / "mod.py").write_text(_PY, encoding="utf-8")
+    cfg = load(root=str(repo))
+    db_path = repo / "index.sqlite"
+    with Database(db_path) as db:
+        build_index(cfg, db, root=Path(cfg.root))
+    return db_path
+
+
+def test_coverage_for_paths_unit():
+    assert GraphCoverage.for_paths(["a.py", "b.go"]).partial is False
+    partial = GraphCoverage.for_paths(["x.lua"])
+    assert partial.partial is True
+    assert partial.languages == ["lua"]
+    assert partial.reason and "lua" in partial.reason
+
+
+def test_refs_flags_partial_for_tier_b_symbol(tmp_path):
+    db_path = _index(tmp_path)
+    with Database(db_path) as db:
+        lua_refs = refs_lookup(db.conn, "greet", kind="all")
+        py_refs = refs_lookup(db.conn, "helper", kind="all")
+
+    assert lua_refs.coverage.partial is True
+    assert "lua" in lua_refs.coverage.languages
+    # Tier-A symbol: fully analyzed, no warning.
+    assert py_refs.coverage.partial is False
+
+
+def test_impact_flags_partial_for_tier_b_file(tmp_path):
+    db_path = _index(tmp_path)
+    with Database(db_path) as db:
+        lua_impact = impact_lookup(db.conn, "mod.lua", depth=2, direction="both")
+        py_impact = impact_lookup(db.conn, "mod.py", depth=2, direction="both")
+
+    assert lua_impact.coverage.partial is True
+    assert "lua" in lua_impact.coverage.languages
+    assert py_impact.coverage.partial is False
diff --git a/tests/test_packaging.py b/tests/test_packaging.py
index 362fa06..ec76406 100644
--- a/tests/test_packaging.py
+++ b/tests/test_packaging.py
@@ -25,6 +25,6 @@ def test_packaged_skill_matches_dev_copy():
 
 def test_packaged_cbx_whitelists_safe_subcommands_only():
     cbx = (_template() / "scripts" / "cbx").read_text(encoding="utf-8")
-    assert 'ALLOWED="search explain symbol refs impact graph stats update index"' in cbx
+    assert 'ALLOWED="search explain symbol refs impact graph stats doctor update index"' in cbx
     for forbidden in ("clean", "init", "watch"):
         assert f" {forbidden} " not in f' {cbx.split("ALLOWED=")[1].splitlines()[0]} '
diff --git a/tests/test_pipeline_vectors.py b/tests/test_pipeline_vectors.py
index 115a1b2..4cd805d 100644
--- a/tests/test_pipeline_vectors.py
+++ b/tests/test_pipeline_vectors.py
@@ -56,3 +56,79 @@ def test_reindex_vectors_idempotent(sample_repo, tmp_path, fake_backend, monkeyp
     assert s2.vectors == 0
     assert repo.count_vectors(db.conn) == s1.vectors
     db.close()
+
+
+class _CountingBackend:
+    """Wraps an embedding backend to record how many texts it is asked to embed."""
+
+    enabled = True
+    name = "fake"
+
+    def __init__(self, inner):
+        self._inner = inner
+        self.dim = inner.dim
+        self.calls = 0
+        self.embedded = 0
+
+    def embed(self, texts):
+        self.calls += 1
+        self.embedded += len(texts)
+        return self._inner.embed(texts)
+
+
+def test_reindex_does_not_recompute_unchanged_embeddings(
+    sample_repo, tmp_path, fake_backend, monkeypatch
+):
+    """A full rebuild must reuse cached vectors for unchanged content, never re-embed it."""
+    import codebase_index.indexer.pipeline as pipe
+
+    backend = _CountingBackend(fake_backend)
+    monkeypatch.setattr(pipe, "resolve_backend", lambda cfg, warn=None: backend)
+    cfg = Config()
+    cfg.root = str(sample_repo)
+    cfg.embeddings.enabled = True
+    db = Database(tmp_path / "index.sqlite").open()
+
+    build_index(cfg, db, root=sample_repo)
+    first_pass = backend.embedded
+    assert first_pass > 0
+
+    build_index(cfg, db, root=sample_repo)
+    # Chunk ids churn across rebuilds, but content is identical -> cache hit, no backend work.
+    assert backend.embedded == first_pass
+    db.close()
+
+
+def test_changed_file_only_embeds_new_content(
+    sample_repo, tmp_path, fake_backend, monkeypatch
+):
+    """Editing one file embeds only its new chunks; the rest come from the cache."""
+    import shutil
+
+    import codebase_index.indexer.pipeline as pipe
+
+    # Copy the fixture so the edit below never mutates the shared, committed sample repo.
+    repo_copy = tmp_path / "repo"
+    shutil.copytree(sample_repo, repo_copy)
+
+    backend = _CountingBackend(fake_backend)
+    monkeypatch.setattr(pipe, "resolve_backend", lambda cfg, warn=None: backend)
+    cfg = Config()
+    cfg.root = str(repo_copy)
+    cfg.embeddings.enabled = True
+    db = Database(tmp_path / "index.sqlite").open()
+
+    build_index(cfg, db, root=repo_copy)
+    baseline = backend.embedded
+
+    target = repo_copy / "src" / "auth" / "token.py"
+    target.write_text(
+        target.read_text(encoding="utf-8") + "\n\ndef brand_new_helper():\n    return 42\n",
+        encoding="utf-8",
+    )
+    s2 = build_index(cfg, db, root=repo_copy)
+
+    # Some new chunks were embedded, but far fewer than a full re-embed of the repo.
+    assert s2.vectors > 0
+    assert backend.embedded - baseline < baseline
+    db.close()
diff --git a/tests/test_search_cli.py b/tests/test_search_cli.py
index 35bf309..d0896a4 100644
--- a/tests/test_search_cli.py
+++ b/tests/test_search_cli.py
@@ -126,6 +126,40 @@ def test_search_reports_stale_after_edit(sample_repo, tmp_path, monkeypatch):
     assert stale["index"]["files_changed_since_build"] >= 1
 
 
+def test_explain_reports_stale_after_edit(sample_repo):
+    """Regression: explain must honor the freshness contract like search.
+
+    Before the fix, explain called the retrieval pipeline without root/config, so
+    it always fell back to a hardcoded ``stale=False, files_changed_since_build=0``
+    block — silently breaking the skill's freshness check for "how does X work".
+    """
+    import sqlite3
+
+    assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0
+
+    res = runner.invoke(
+        app, ["--root", str(sample_repo), "--json", "explain", "how does token refresh work"]
+    )
+    assert res.exit_code == 0, res.output
+    fresh = _json.loads(res.output)
+    assert fresh["index"]["exists"] is True
+    assert fresh["index"]["stale"] is False
+
+    db_path = sample_repo / ".claude" / "cache" / "codebase-index" / "index.sqlite"
+    conn = sqlite3.connect(str(db_path))
+    conn.execute("UPDATE files SET mtime_ns = 1")
+    conn.execute("DELETE FROM meta WHERE key = 'head_commit'")
+    conn.commit()
+    conn.close()
+
+    res2 = runner.invoke(
+        app, ["--root", str(sample_repo), "--json", "explain", "how does token refresh work"]
+    )
+    stale = _json.loads(res2.output)
+    assert stale["index"]["stale"] is True
+    assert stale["index"]["files_changed_since_build"] >= 1
+
+
 def test_search_kind_words_filter_symbol_kind(sample_repo):
     assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0
 
@@ -145,3 +179,52 @@ def test_search_kind_words_filter_symbol_kind(sample_repo):
     assert result.exit_code == 0, result.output
     payload = _json.loads(result.output)
     assert payload["results"][0]["symbols"] == ["refresh_access_token"]
+
+
+def test_search_offset_paginates_through_cli(sample_repo):
+    """Regression: --offset must reach the retrieval pipeline.
+
+    Before the fix, the CLI search command never exposed --offset, so the
+    pipeline's pagination contract (advertised via the JSON ``pagination`` block
+    and MCP's ``next_offset``) was unreachable from the CLI/skill — every call
+    silently returned page one.
+    """
+    assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0
+
+    page1 = runner.invoke(
+        app, ["--root", str(sample_repo), "--json", "search", "token", "--limit", "1"]
+    )
+    assert page1.exit_code == 0, page1.output
+    p1 = _json.loads(page1.output)
+    pag = p1.get("pagination")
+    if not pag or not pag.get("has_more"):
+        # Fixture too small to page; the flag must still be accepted.
+        return
+
+    page2 = runner.invoke(
+        app,
+        [
+            "--root",
+            str(sample_repo),
+            "--json",
+            "search",
+            "token",
+            "--limit",
+            "1",
+            "--offset",
+            str(pag["next_offset"]),
+        ],
+    )
+    assert page2.exit_code == 0, page2.output
+    p2 = _json.loads(page2.output)
+    assert p2["pagination"]["offset"] == pag["next_offset"]
+    # The second page must not repeat the first page's top hit.
+    k1 = (p1["results"][0]["path"], p1["results"][0]["line_start"])
+    k2 = (p2["results"][0]["path"], p2["results"][0]["line_start"])
+    assert k1 != k2
+
+
+def test_search_negative_offset_rejected(sample_repo):
+    result = runner.invoke(app, ["--root", str(sample_repo), "search", "token", "--offset", "-1"])
+    assert result.exit_code == 2
+    assert "offset" in result.output.lower()