diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index f05c290..91036f6 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -3,7 +3,7 @@ "name": "codebase-index", "displayName": "Codebase Index", "description": "Local-first hybrid codebase index. Auto-provisions its Python CLI on first session start; the skill searches the index so Claude reads only the most relevant files.", - "version": "1.2.1", + "version": "1.2.2", "author": { "name": "codebase-index contributors" }, diff --git a/.claude/skills/codebase-index/.skill_version b/.claude/skills/codebase-index/.skill_version index cb174d5..23aa839 100644 --- a/.claude/skills/codebase-index/.skill_version +++ b/.claude/skills/codebase-index/.skill_version @@ -1 +1 @@ -1.2.1 \ No newline at end of file +1.2.2 diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md index b738921..2105974 100644 --- a/.claude/skills/codebase-index/SKILL.md +++ b/.claude/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,20 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules @@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/.claude/skills/codebase-index/scripts/cbx b/.claude/skills/codebase-index/scripts/cbx index a33c2bd..5666358 100644 --- a/.claude/skills/codebase-index/scripts/cbx +++ b/.claude/skills/codebase-index/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/.claude/skills/codebase-index/scripts/cbx.ps1 b/.claude/skills/codebase-index/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/.claude/skills/codebase-index/scripts/cbx.ps1 +++ b/.claude/skills/codebase-index/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/.codex/skills/codebase-index/.skill_version b/.codex/skills/codebase-index/.skill_version index cb174d5..23aa839 100644 --- a/.codex/skills/codebase-index/.skill_version +++ b/.codex/skills/codebase-index/.skill_version @@ -1 +1 @@ -1.2.1 \ No newline at end of file +1.2.2 diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md index b738921..2105974 100644 --- a/.codex/skills/codebase-index/SKILL.md +++ b/.codex/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,20 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules @@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/.codex/skills/codebase-index/scripts/cbx b/.codex/skills/codebase-index/scripts/cbx index a33c2bd..5666358 100644 --- a/.codex/skills/codebase-index/scripts/cbx +++ b/.codex/skills/codebase-index/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/.codex/skills/codebase-index/scripts/cbx.ps1 b/.codex/skills/codebase-index/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/.codex/skills/codebase-index/scripts/cbx.ps1 +++ b/.codex/skills/codebase-index/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/.opencode/skills/codebase-index/.skill_version b/.opencode/skills/codebase-index/.skill_version index cb174d5..23aa839 100644 --- a/.opencode/skills/codebase-index/.skill_version +++ b/.opencode/skills/codebase-index/.skill_version @@ -1 +1 @@ -1.2.1 \ No newline at end of file +1.2.2 diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md index b738921..2105974 100644 --- a/.opencode/skills/codebase-index/SKILL.md +++ b/.opencode/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,20 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules @@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/.opencode/skills/codebase-index/scripts/cbx b/.opencode/skills/codebase-index/scripts/cbx index a33c2bd..5666358 100644 --- a/.opencode/skills/codebase-index/scripts/cbx +++ b/.opencode/skills/codebase-index/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/.opencode/skills/codebase-index/scripts/cbx.ps1 b/.opencode/skills/codebase-index/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/.opencode/skills/codebase-index/scripts/cbx.ps1 +++ b/.opencode/skills/codebase-index/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/CHANGELOG.md b/CHANGELOG.md index d85eb8c..cb9e12c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,55 @@ All notable changes to this project are documented here. The format is based on ## [Unreleased] +### Added +- **Content-addressed embedding cache**: a new `vec_cache` table (keyed by `(model, content_sha)`) + persists chunk embeddings across rebuilds. Because chunk ids churn on every full rebuild, the + embedding pass now hashes chunk content and only calls the (potentially slow or paid) backend for + text never embedded under the active model — unchanged content reuses its cached vector for free. + +### Added +- **Repo-wide graph tier in diagnostics**: `stats` now tags each tree-sitter language with + `graph: full|partial`, and `doctor` adds a `graph_coverage` finding listing Tier-B languages + present in the index. Surfaces upfront which languages have partial `refs`/`impact` (symbols but + no import/inheritance edges) instead of only signaling per-query. +- **Graph coverage signal**: `refs` and `impact` now report a `coverage` block + (`partial`, `languages`, `reason`). Import/inheritance edges are only extracted + for the hand-tuned (Tier-A) languages, so a symbol or file in a Tier-B language + (generic tree-sitter walk, e.g. Lua) can produce an empty/short result that is + inconclusive rather than authoritative. `coverage.partial` flags this so agents + fall back to Grep instead of reading "no references" as proof. Markdown output + prints a matching warning; the skill documents the field. + +### Changed +- The embedding pass reports cache **misses** (vectors actually computed) as its "embedded" count. +- `prune_orphan_vectors` now deletes stale `vec_chunks` rows in a single batched `executemany`. +- **Skill**: documented the `--mode vector` semantic-search path, the `intent`/`mode`/`pagination` + response fields, and clarified that `graph --open` renders an HTML view for a human (use + `impact`/`refs` for agent-readable dependency answers). +- **Skill**: narrowed the skill's `allowed-tools` from `Bash(python *)`/`Bash(python3 *)` to + `Bash(python -m codebase_index *)`/`Bash(python3 -m codebase_index *)`, so the skill can no longer + run arbitrary Python. + +### Fixed +- `search` now exposes `--offset`, so the pagination contract is reachable from the CLI/skill. + The retrieval pipeline and MCP already supported paging, but the CLI command never surfaced the + flag — every call silently returned page one and the advertised `pagination.next_offset` was a + dead end. Markdown output now also notes when more results are available. `--offset` rejects + negative values. +- `explain` now honors the index freshness contract: it passes `root`/`config` into the retrieval + pipeline, so `index.stale` / `files_changed_since_build` reflect reality instead of a hardcoded + "fresh" block. Previously the skill's freshness check silently never triggered for + "how does X work" questions. `explain` also blends in vector results when embeddings are enabled, + matching `search --mode hybrid`. +- The `cbx` wrapper whitelist (skill + plugin `bin/`) now includes `doctor`, which the skill's + fallback diagnostics already invoke; previously `cbx doctor` was refused. + +## [1.2.2] - 2026-06-05 + +### Changed +- Synced the version to `1.2.2` across the package, plugin manifest, and lockfile. +- Documentation cleanup: removed stale prompt files and screenshots, refreshed the README. + ## [1.2.1] - 2026-06-05 ### Added diff --git a/bin/cbx b/bin/cbx index 2b7294d..cfe60b7 100644 --- a/bin/cbx +++ b/bin/cbx @@ -3,7 +3,7 @@ # from the venv provisioned by scripts/bootstrap.sh (located via the .venv-path pointer). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in *" ${sub} "*) : ;; diff --git a/bin/cbx.ps1 b/bin/cbx.ps1 index cf33e6b..85face7 100644 --- a/bin/cbx.ps1 +++ b/bin/cbx.ps1 @@ -5,7 +5,7 @@ param( [Parameter(ValueFromRemainingArguments = $true)] [string[]]$Rest ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" exit 2 diff --git a/docs/DATABASE_SCHEMA.md b/docs/DATABASE_SCHEMA.md index c2e3cd9..e0d0d7a 100644 --- a/docs/DATABASE_SCHEMA.md +++ b/docs/DATABASE_SCHEMA.md @@ -85,16 +85,39 @@ FTS5 virtual table for full-text search (auto-managed by triggers). | `text` | TEXT | Chunk text (indexed by FTS5) | | `chunk_id` | INTEGER | References chunks(id) | -### embeddings (optional) +### vec_chunks (optional) -Stores vector embeddings for semantic search. +Vector embeddings for semantic search. Created **only** when `embeddings.enabled = true`, via the +`sqlite-vec` extension (a `vec0` virtual table). + +| Column | Type | Description | +|---|---|---| +| `chunk_id` | INTEGER PRIMARY KEY | References chunks(id) | +| `embedding` | FLOAT[dim] | Embedding vector; `dim` is fixed per build by the configured model | + +### vec_meta (optional) + +Records which embedding model/dimension produced the vectors currently in `vec_chunks`. | Column | Type | Description | |---|---|---| -| `chunk_id` | INTEGER PRIMARY KEY REFERENCES chunks(id) | Associated chunk | -| `vector` | BLOB | Serialized embedding vector | | `model` | TEXT | Embedding model identifier | -| `created_at` | TEXT | Creation timestamp | +| `dim` | INTEGER | Vector dimension | +| `built_at` | TEXT | ISO 8601 timestamp of the embedding pass | + +### vec_cache (optional) + +Content-addressed embedding cache. `chunk_id`s churn on every full rebuild (chunks are deleted and +re-inserted), so this cache is keyed by `(model, content_sha)` instead — letting unchanged content +reuse its vector for free across rebuilds, so only new or changed text hits the backend. + +| Column | Type | Description | +|---|---|---| +| `model` | TEXT NOT NULL | Embedding model identifier | +| `content_sha` | TEXT NOT NULL | SHA-256 of the chunk content | +| `embedding` | BLOB NOT NULL | Pre-serialized float32 vector | + +Primary key: `(model, content_sha)`. ### summaries diff --git a/docs/RETRIEVAL_PIPELINE.md b/docs/RETRIEVAL_PIPELINE.md index 86b2014..4e2901d 100644 --- a/docs/RETRIEVAL_PIPELINE.md +++ b/docs/RETRIEVAL_PIPELINE.md @@ -74,6 +74,10 @@ Ranked retrieval packet with confidence score **Score:** Cosine similarity (0.0 to 1.0). +> **Indexing note:** chunk embeddings are reused across rebuilds via a content-addressed +> `vec_cache` (keyed by model + content SHA-256), so only new or changed chunks are re-embedded. +> See [DATABASE_SCHEMA.md](DATABASE_SCHEMA.md) and [SCHEMA.md](SCHEMA.md) for details. + ## 5. Graph Expansion **Trigger:** After initial results are found. diff --git a/docs/SCHEMA.md b/docs/SCHEMA.md index 15b31e9..f5c21bd 100644 --- a/docs/SCHEMA.md +++ b/docs/SCHEMA.md @@ -129,9 +129,27 @@ CREATE VIRTUAL TABLE vec_chunks USING vec0( ); -- A side table records which embedding model/dim produced these vectors: CREATE TABLE vec_meta (model TEXT, dim INTEGER, built_at TEXT); +-- Content-addressed embedding cache, keyed by (model, content SHA-256): +CREATE TABLE vec_cache ( + model TEXT NOT NULL, + content_sha TEXT NOT NULL, + embedding BLOB NOT NULL, -- pre-serialized float32 vector + PRIMARY KEY (model, content_sha) +); ``` -If embeddings are disabled, `vec_chunks` does not exist and the vector searcher is skipped. +If embeddings are disabled, none of `vec_chunks`, `vec_meta`, or `vec_cache` exist and the vector +searcher is skipped. + +### Embedding reuse via `vec_cache` + +`chunk_id`s churn on every full rebuild because `replace_chunks` deletes and re-inserts rows, so a +`chunk_id`-keyed store alone would re-embed the entire repository each time. The embedding pass +therefore hashes each chunk's content (SHA-256) and looks it up in `vec_cache` under the active +model name. Only content never embedded under that model is sent to the (potentially slow or paid) +backend; everything else is copied straight from the cache into `vec_chunks`. Newly computed vectors +are written back to `vec_cache` so subsequent rebuilds reuse them. The reported "embedded" count +reflects cache **misses** — i.e. the work actually performed. ## Migrations diff --git a/requirements.lock b/requirements.lock index 1c565ad..f87af0a 100644 --- a/requirements.lock +++ b/requirements.lock @@ -1,3 +1,3 @@ -codebase-index @ https://github.com/denfry/codebase-index/archive/refs/tags/v1.2.0.tar.gz +codebase-index @ https://github.com/denfry/codebase-index/archive/refs/tags/v1.2.2.tar.gz tree-sitter==0.25.2 tree-sitter-language-pack==1.8.1 diff --git a/skill/SKILL.md b/skill/SKILL.md index b738921..2105974 100644 --- a/skill/SKILL.md +++ b/skill/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,20 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules @@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/skill/scripts/cbx b/skill/scripts/cbx index a33c2bd..5666358 100644 --- a/skill/scripts/cbx +++ b/skill/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/skill/scripts/cbx.ps1 b/skill/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/skill/scripts/cbx.ps1 +++ b/skill/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/skills/codebase-index/SKILL.md b/skills/codebase-index/SKILL.md index b738921..2105974 100644 --- a/skills/codebase-index/SKILL.md +++ b/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,20 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules @@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py index a1c6efa..4126fcf 100644 --- a/src/codebase_index/cli.py +++ b/src/codebase_index/cli.py @@ -375,6 +375,9 @@ def search( ctx: typer.Context, query: str = typer.Argument(..., help="Search query."), limit: int = typer.Option(10, "--limit"), + offset: int = typer.Option( + 0, "--offset", help="Skip the first N results (use pagination.next_offset to page)." + ), token_budget: int = typer.Option(1500, "--token-budget"), mode: str = typer.Option("hybrid", "--mode", help="hybrid|fts|symbol|vector"), no_fallback: bool = typer.Option(False, "--no-fallback"), @@ -386,6 +389,10 @@ def search( from .retrieval.pipeline import search as run_search from .storage.db import Database + if offset < 0: + typer.echo("[codebase-index] --offset must be >= 0.") + raise typer.Exit(code=2) + backend = None if mode in ("vector", "hybrid"): backend = _resolve_backend_for_search(ctx) @@ -402,7 +409,7 @@ def search( if backend is not None and getattr(backend, "enabled", False): db.enable_vectors() payload = run_search( - db.conn, query, mode=mode, limit=limit, + db.conn, query, mode=mode, limit=limit, offset=offset, token_budget=token_budget, no_fallback=no_fallback, backend=backend, root=Path(cfg.root), config=cfg, ) @@ -489,12 +496,18 @@ def explain( from .retrieval.pipeline import search as run_search from .storage.db import Database - db_path, _cfg = _ensure_index(ctx) + backend = _resolve_backend_for_search(ctx) + db_path, cfg = _ensure_index(ctx) q = query if any(w in query.lower() for w in ("how", "architecture", "overview")) else f"how does {query} work" with Database(db_path) as db: - payload = run_search(db.conn, q, mode="hybrid", limit=10, - token_budget=token_budget, no_fallback=False) + if getattr(backend, "enabled", False): + db.enable_vectors() + payload = run_search( + db.conn, q, mode="hybrid", limit=10, + token_budget=token_budget, no_fallback=False, backend=backend, + root=Path(cfg.root), config=cfg, + ) want_json = json_out or (ctx.obj and ctx.obj.get("json")) typer.echo(json_renderer.render(payload) if want_json else md_renderer.render(payload)) @@ -556,6 +569,7 @@ def stats( import json as _json from .config import load + from .parsers.languages import has_full_graph from .storage import repo from .storage.db import Database @@ -578,7 +592,14 @@ def stats( built_at = repo.get_meta(db.conn, "built_at") head = repo.get_meta(db.conn, "head_commit") coverage = [ - {"lang": r["lang"], "files": r["files"], "symbols": r["symbols"]} + { + "lang": r["lang"], + "files": r["files"], + "symbols": r["symbols"], + # Tier-A languages get import/inheritance edges; Tier-B is symbols-only, + # so refs/impact are partial for them. + "graph": "full" if has_full_graph(r["lang"]) else "partial", + } for r in repo.treesitter_coverage(db.conn) ] @@ -599,7 +620,8 @@ def stats( typer.echo(f"files={files} symbols={symbols} built_at={built_at} head={head}") for r in coverage: flag = " ⚠ 0 symbols" if (r["symbols"] or 0) == 0 and r["files"] >= 3 else "" - typer.echo(f" {r['lang']}: {r['files']} files, {r['symbols']} symbols{flag}") + tier = " · partial graph (Tier-B)" if r["graph"] == "partial" else "" + typer.echo(f" {r['lang']}: {r['files']} files, {r['symbols']} symbols{flag}{tier}") @app.command() diff --git a/src/codebase_index/doctor.py b/src/codebase_index/doctor.py index 6eac676..0770cae 100644 --- a/src/codebase_index/doctor.py +++ b/src/codebase_index/doctor.py @@ -107,6 +107,25 @@ def run_doctor(root: Path, config: Config) -> list[Finding]: ) ) + # 5. Dependency-graph coverage: Tier-B languages (grammar but no hand-tuned spec) + # yield symbols but no import/inheritance edges, so refs/impact undercount. + from .parsers.languages import has_full_graph + + tier_b = sorted({r["lang"] for r in coverage if not has_full_graph(r["lang"])}) + findings.append( + Finding( + id="graph_coverage", + ok=True, + severity="info", + detail=( + "all indexed languages have full dependency-graph support" + if not tier_b + else f"partial dependency graph for Tier-B language(s): {', '.join(tier_b)} " + "— refs/impact may undercount (confirm with Grep)" + ), + ) + ) + return findings diff --git a/src/codebase_index/graph/expand.py b/src/codebase_index/graph/expand.py index 6552ba2..deebed1 100644 --- a/src/codebase_index/graph/expand.py +++ b/src/codebase_index/graph/expand.py @@ -16,7 +16,7 @@ from collections import deque from typing import Optional -from ..models import ImpactNode, ImpactResponse, IndexFreshness +from ..models import GraphCoverage, ImpactNode, ImpactResponse, IndexFreshness from ..storage import repo @@ -106,6 +106,19 @@ def walk_impact( return out +def _target_paths(conn: sqlite3.Connection, target: str) -> list[str]: + """The file path(s) the target resolves to, for coverage classification.""" + if repo.file_by_path(conn, target) is not None: + return [target] + sym_rows = repo.symbols_by_name(conn, target, exact=True) + if sym_rows: + return [r["path"] for r in sym_rows] + suffix = repo.files_with_suffix(conn, target) + if len(suffix) == 1: + return [suffix[0]["path"]] + return [] + + def impact_lookup( conn: sqlite3.Connection, target: str, *, depth: int, direction: str ) -> ImpactResponse: @@ -118,4 +131,5 @@ def impact_lookup( return ImpactResponse( target=target, direction=direction, depth=depth, index=_freshness(conn), nodes=nodes, files=files, + coverage=GraphCoverage.for_paths(_target_paths(conn, target)), ) diff --git a/src/codebase_index/indexer/pipeline.py b/src/codebase_index/indexer/pipeline.py index 48c63a3..6f8237d 100644 --- a/src/codebase_index/indexer/pipeline.py +++ b/src/codebase_index/indexer/pipeline.py @@ -85,6 +85,7 @@ def _pool_init(config: Config) -> None: def _parse_one(cand) -> _ParseResult: """Parse a single file. Top-level for ProcessPoolExecutor pickling; uses _PARSE_CONFIG.""" config = _PARSE_CONFIG + assert config is not None, "_pool_init must set _PARSE_CONFIG before any worker parses" try: sha256 = _sha256_file(cand.path) except OSError: @@ -201,6 +202,8 @@ def _embed_chunks(cfg, db, conn) -> int: backend = resolve_backend(cfg, warn=lambda m: print(m)) if not getattr(backend, "enabled", False): return 0 + import sqlite_vec # type: ignore[import-untyped] + db.enable_vectors() repo.ensure_vec_tables(conn, dim=backend.dim) repo.prune_orphan_vectors(conn) @@ -208,13 +211,29 @@ def _embed_chunks(cfg, db, conn) -> int: rows = [r for r in repo.chunks_for_embedding(conn) if int(r["id"]) not in existing] if not rows: return 0 - texts = [r["content"] for r in rows] - vectors = backend.embed(texts) - for row, vec in zip(rows, vectors): - repo.upsert_chunk_vector(conn, int(row["id"]), vec) + + # Content-addressed reuse: chunk ids churn on every full rebuild (replace_chunks), + # so a chunk-id keyed skip alone re-embeds the whole repo each time. Hash the content + # and only call the (potentially slow / paid) backend for text never embedded under + # this model; everything else is copied straight from the cache. + shas = [hashlib.sha256(r["content"].encode("utf-8")).hexdigest() for r in rows] + cached = repo.cached_embeddings(conn, model=backend.name, shas=shas) + misses = [(r, sha) for r, sha in zip(rows, shas) if sha not in cached] + + fresh: dict[str, bytes] = {} + if misses: + vectors = backend.embed([r["content"] for r, _ in misses]) + for (_row, sha), vec in zip(misses, vectors): + fresh[sha] = sqlite_vec.serialize_float32(vec) + repo.store_cached_embeddings(conn, model=backend.name, items=list(fresh.items())) + + for row, sha in zip(rows, shas): + blob = cached.get(sha) or fresh[sha] + repo.upsert_chunk_vector_blob(conn, int(row["id"]), blob) + built_at = datetime.now(timezone.utc).isoformat() repo.set_vec_meta(conn, model=backend.name, dim=backend.dim, built_at=built_at) - return len(rows) + return len(misses) def _sha256_file(path: Path) -> str: diff --git a/src/codebase_index/models.py b/src/codebase_index/models.py index 126988a..b35d959 100644 --- a/src/codebase_index/models.py +++ b/src/codebase_index/models.py @@ -5,9 +5,9 @@ from __future__ import annotations -from typing import Literal, Optional +from typing import Iterable, Literal, Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field Intent = Literal[ "locate_impl", "how_it_works", "impact", "find_refs", @@ -67,6 +67,48 @@ class SymbolResponse(BaseModel): symbols: list[SymbolDef] = [] +class GraphCoverage(BaseModel): + """Honesty signal for graph-derived answers (refs/impact). + + Dependency edges (imports / inheritance) are only extracted for the fully + supported (Tier-A) languages. A symbol or file in a Tier-B language (generic + tree-sitter walk) yields symbols and best-effort call sites but no + import/extends/implements edges, so refs/impact can undercount. When + ``partial`` is true an *empty or short* result does not prove there are no + references — it may just be unanalyzed; confirm with Grep. + """ + + partial: bool = False + languages: list[str] = [] + reason: Optional[str] = None + + @classmethod + def for_paths(cls, paths: Iterable[str]) -> "GraphCoverage": + from .discovery.classify import detect_language, parser_for + from .parsers.languages import spec_for + + tier_b = sorted( + { + lang + for p in paths + if (lang := detect_language(p)) is not None + and parser_for(lang) == "treesitter" + and spec_for(lang) is None + } + ) + if not tier_b: + return cls() + return cls( + partial=True, + languages=tier_b, + reason=( + "Import/inheritance edges are not extracted for " + f"{', '.join(tier_b)} (best-effort symbols only). An empty or short " + "result is inconclusive — confirm with a Grep over the codebase." + ), + ) + + class RefSite(BaseModel): path: str line: int @@ -77,6 +119,7 @@ class RefsResponse(BaseModel): query: str index: IndexFreshness sites: list[RefSite] = [] + coverage: GraphCoverage = Field(default_factory=GraphCoverage) class ImpactNode(BaseModel): @@ -95,3 +138,4 @@ class ImpactResponse(BaseModel): index: IndexFreshness nodes: list[ImpactNode] = [] files: list[str] = [] # distinct affected files, ranked + coverage: GraphCoverage = Field(default_factory=GraphCoverage) diff --git a/src/codebase_index/output/markdown.py b/src/codebase_index/output/markdown.py index 5768215..afd6a76 100644 --- a/src/codebase_index/output/markdown.py +++ b/src/codebase_index/output/markdown.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Optional + from ..models import ImpactResponse, RefsResponse, SearchResponse, SymbolResponse @@ -45,6 +47,14 @@ def _render_dict(payload: dict) -> str: for cmd in fb: lines.append(f"- `{cmd}`") + pg = payload.get("pagination") + if pg: + shown = f"results {pg['offset'] + 1}–{pg['offset'] + len(payload['results'])}" + if pg.get("has_more"): + lines.append(f"\n_Showing {shown}; more available — `--offset {pg['next_offset']}`._") + else: + lines.append(f"\n_Showing {shown} (end of results)._") + return "\n".join(lines) @@ -114,17 +124,28 @@ def render_symbols(resp: SymbolResponse) -> str: return "\n".join(lines).rstrip() + "\n" +def _coverage_line(coverage) -> Optional[str]: + if coverage is not None and getattr(coverage, "partial", False): + return f"\n> ⚠️ Partial graph coverage: {coverage.reason}" + return None + + def render_refs(resp: RefsResponse) -> str: lines = [_header(resp.query, resp.index.exists, resp.index.stale)] lines.append("") + note = _coverage_line(resp.coverage) if not resp.sites: lines.append("_No references found._") + if note: + lines.append(note) return "\n".join(lines).rstrip() + "\n" lines.append("| kind | path | line |") lines.append("|------|------|------|") for site in resp.sites: lines.append(f"| {site.kind} | `{site.path}` | {site.line} |") + if note: + lines.append(note) return "\n".join(lines).rstrip() + "\n" @@ -139,12 +160,18 @@ def render_impact(resp: ImpactResponse) -> str: header = (f"**impact:** `{resp.target}` · **direction:** {resp.direction} · " f"**depth:** {resp.depth} · **affected files:** {len(resp.files)}") lines = [header, ""] + note = _coverage_line(resp.coverage) if not resp.nodes: - return "\n".join(lines + ["_No impact found (target unknown or no edges)._", ""]).rstrip() + "\n" + body = ["_No impact found (target unknown or no edges)._"] + if note: + body.append(note) + return "\n".join(lines + body + [""]).rstrip() + "\n" lines.append("| dist | via | kind | node | location |") lines.append("|------|-----|------|------|----------|") for n in sorted(resp.nodes, key=lambda x: (x.distance, x.path, x.line_start or 0)): loc = f"{n.path}:{n.line_start}" if n.line_start else n.path node_name = f"`{n.name}`" if n.name else "—" lines.append(f"| {n.distance} | {n.via_edge or ''} | {n.kind} | {node_name} | `{loc}` |") + if note: + lines.append(note) return "\n".join(lines).rstrip() + "\n" diff --git a/src/codebase_index/parsers/languages.py b/src/codebase_index/parsers/languages.py index 90cd325..a772fad 100644 --- a/src/codebase_index/parsers/languages.py +++ b/src/codebase_index/parsers/languages.py @@ -279,3 +279,12 @@ def is_supported(lang: Optional[str]) -> bool: def spec_for(lang: Optional[str]) -> Optional[LangSpec]: return LANGS.get(lang) if lang else None + + +def has_full_graph(lang: Optional[str]) -> bool: + """True if `lang` has a Tier-A spec (full import/inheritance edges for refs/impact). + + Tier-B languages (a loadable grammar but no hand-tuned spec) yield symbols and + best-effort call sites only, so their dependency graph is partial. + """ + return spec_for(lang) is not None diff --git a/src/codebase_index/retrieval/searchers.py b/src/codebase_index/retrieval/searchers.py index 954ffa2..0cb0d00 100644 --- a/src/codebase_index/retrieval/searchers.py +++ b/src/codebase_index/retrieval/searchers.py @@ -16,6 +16,7 @@ from ..indexer.freshness import compute_freshness from ..models import ( Confidence, + GraphCoverage, IndexFreshness, ReadRange, RefSite, @@ -326,6 +327,7 @@ def symbol_lookup( def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsResponse: + defs = repo.symbols_by_name(conn, name, exact=True) sites = [ RefSite(path=row["path"], line=row["line"], kind="call") for row in repo.refs_for_name(conn, name) @@ -333,10 +335,18 @@ def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsRespon if kind == "all": sites.extend( RefSite(path=row["path"], line=row["line_start"], kind="definition") - for row in repo.symbols_by_name(conn, name, exact=True) + for row in defs ) sites.sort(key=lambda site: (site.path, site.line, site.kind)) - return RefsResponse(query=name, index=_freshness(conn), sites=sites) + # Coverage is judged by the symbol's defining language(s); fall back to the + # call-site files when the symbol has no indexed definition. + coverage_paths = [row["path"] for row in defs] or [s.path for s in sites] + return RefsResponse( + query=name, + index=_freshness(conn), + sites=sites, + coverage=GraphCoverage.for_paths(coverage_paths), + ) def vector_candidates( diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md index b738921..2105974 100644 --- a/src/codebase_index/skill_template/SKILL.md +++ b/src/codebase_index/skill_template/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,20 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules @@ -106,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/src/codebase_index/skill_template/scripts/cbx b/src/codebase_index/skill_template/scripts/cbx index a33c2bd..5666358 100644 --- a/src/codebase_index/skill_template/scripts/cbx +++ b/src/codebase_index/skill_template/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/src/codebase_index/skill_template/scripts/cbx.ps1 b/src/codebase_index/skill_template/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/src/codebase_index/skill_template/scripts/cbx.ps1 +++ b/src/codebase_index/skill_template/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/src/codebase_index/storage/repo.py b/src/codebase_index/storage/repo.py index d87c995..338a12b 100644 --- a/src/codebase_index/storage/repo.py +++ b/src/codebase_index/storage/repo.py @@ -438,13 +438,22 @@ def count_resolved_edges(conn: sqlite3.Connection) -> int: def ensure_vec_tables(conn: sqlite3.Connection, *, dim: int) -> None: - """Create vec_chunks (sqlite-vec) + vec_meta if absent. dim is fixed per build.""" + """Create vec_chunks (sqlite-vec) + vec_meta + vec_cache if absent. dim is fixed per build.""" dim = int(dim) conn.execute( f"CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(" f"chunk_id INTEGER PRIMARY KEY, embedding FLOAT[{dim}])" ) conn.execute("CREATE TABLE IF NOT EXISTS vec_meta (model TEXT, dim INTEGER, built_at TEXT)") + # Content-addressed embedding cache: chunk ids churn on every full rebuild + # (replace_chunks deletes + re-inserts), so a chunk-id keyed store alone would + # re-embed the whole repo each time. Keyed by (model, content_sha) the cache + # survives that churn and lets unchanged content reuse its vector for free. + conn.execute( + "CREATE TABLE IF NOT EXISTS vec_cache (" + "model TEXT NOT NULL, content_sha TEXT NOT NULL, embedding BLOB NOT NULL, " + "PRIMARY KEY (model, content_sha))" + ) def set_vec_meta(conn: sqlite3.Connection, *, model: str, dim: int, built_at: str) -> None: @@ -467,10 +476,49 @@ def upsert_chunk_vector( ) -> None: import sqlite_vec # type: ignore[import-untyped] + upsert_chunk_vector_blob(conn, chunk_id, sqlite_vec.serialize_float32(embedding)) + + +def upsert_chunk_vector_blob(conn: sqlite3.Connection, chunk_id: int, blob: bytes) -> None: + """Write a pre-serialized float32 embedding blob for a chunk (cache-reuse path).""" conn.execute("DELETE FROM vec_chunks WHERE chunk_id = ?", (int(chunk_id),)) conn.execute( "INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, ?)", - (int(chunk_id), sqlite_vec.serialize_float32(embedding)), + (int(chunk_id), blob), + ) + + +def cached_embeddings( + conn: sqlite3.Connection, *, model: str, shas: Iterable[str] +) -> dict[str, bytes]: + """Return {content_sha: serialized embedding blob} already cached for this model.""" + shas = list(dict.fromkeys(shas)) + if not shas: + return {} + out: dict[str, bytes] = {} + # Chunk the IN list to stay well under SQLite's variable limit on huge repos. + for start in range(0, len(shas), 500): + batch = shas[start : start + 500] + placeholders = ",".join("?" * len(batch)) + rows = conn.execute( + f"SELECT content_sha, embedding FROM vec_cache " + f"WHERE model = ? AND content_sha IN ({placeholders})", + (model, *batch), + ).fetchall() + for r in rows: + out[r[0]] = r[1] + return out + + +def store_cached_embeddings( + conn: sqlite3.Connection, *, model: str, items: Sequence[tuple[str, bytes]] +) -> None: + """Insert (content_sha, blob) pairs into the content-addressed embedding cache.""" + if not items: + return + conn.executemany( + "INSERT OR REPLACE INTO vec_cache (model, content_sha, embedding) VALUES (?, ?, ?)", + [(model, sha, blob) for sha, blob in items], ) @@ -496,12 +544,12 @@ def prune_orphan_vectors(conn: sqlite3.Connection) -> int: try: current_ids = {r[0] for r in conn.execute("SELECT id FROM chunks").fetchall()} orphan_ids = [ - r[0] + (r[0],) for r in conn.execute("SELECT chunk_id FROM vec_chunks").fetchall() if r[0] not in current_ids ] - for oid in orphan_ids: - conn.execute("DELETE FROM vec_chunks WHERE chunk_id = ?", (oid,)) + if orphan_ids: + conn.executemany("DELETE FROM vec_chunks WHERE chunk_id = ?", orphan_ids) return len(orphan_ids) except Exception: return 0 diff --git a/tests/golden/impact_user_model.json b/tests/golden/impact_user_model.json index 898f952..9ec0c77 100644 --- a/tests/golden/impact_user_model.json +++ b/tests/golden/impact_user_model.json @@ -1,4 +1,9 @@ { + "coverage": { + "languages": [], + "partial": false, + "reason": null + }, "depth": 2, "direction": "up", "files": [ diff --git a/tests/golden/refs_refresh_access_token.json b/tests/golden/refs_refresh_access_token.json index 04dc0ca..f5693df 100644 --- a/tests/golden/refs_refresh_access_token.json +++ b/tests/golden/refs_refresh_access_token.json @@ -1,4 +1,9 @@ { + "coverage": { + "languages": [], + "partial": false, + "reason": null + }, "index": { "built_at": "", "exists": true, diff --git a/tests/golden/stats.json b/tests/golden/stats.json index 032e118..bf445b5 100644 --- a/tests/golden/stats.json +++ b/tests/golden/stats.json @@ -7,11 +7,13 @@ "treesitter_coverage": [ { "files": 3, + "graph": "full", "lang": "python", "symbols": 6 }, { "files": 2, + "graph": "full", "lang": "typescript", "symbols": 1 } diff --git a/tests/test_doctor.py b/tests/test_doctor.py index 373616e..943c96a 100644 --- a/tests/test_doctor.py +++ b/tests/test_doctor.py @@ -43,6 +43,30 @@ def test_doctor_cli_json(tmp_path): assert any(f["id"] == "cache_gitignored" for f in data["findings"]) +def test_doctor_flags_tier_b_partial_graph(tmp_path): + """A Tier-B language (Lua) in the index must surface a partial-graph info finding.""" + (tmp_path / "mod.lua").write_text("local function greet()\n return 1\nend\n", encoding="utf-8") + assert runner.invoke(app, ["--root", str(tmp_path), "index"]).exit_code == 0 + + cfg = Config() + cfg.root = str(tmp_path) + findings = {f.id: f for f in run_doctor(tmp_path, cfg)} + gc = findings["graph_coverage"] + assert gc.ok is True and gc.severity == "info" + assert "lua" in gc.detail + + +def test_doctor_full_graph_when_only_tier_a(tmp_path): + (tmp_path / "mod.py").write_text("def f():\n return 1\n", encoding="utf-8") + assert runner.invoke(app, ["--root", str(tmp_path), "index"]).exit_code == 0 + + cfg = Config() + cfg.root = str(tmp_path) + findings = {f.id: f for f in run_doctor(tmp_path, cfg)} + assert "lua" not in findings["graph_coverage"].detail + assert "full dependency-graph support" in findings["graph_coverage"].detail + + def test_doctor_strict_exits_nonzero_on_high_severity(tmp_path): # uncovered cache is a high-severity finding → --strict must fail res = runner.invoke(app, ["--root", str(tmp_path), "doctor", "--strict"]) diff --git a/tests/test_graph_coverage.py b/tests/test_graph_coverage.py new file mode 100644 index 0000000..872726c --- /dev/null +++ b/tests/test_graph_coverage.py @@ -0,0 +1,66 @@ +"""Regression: refs/impact must flag partial graph coverage for Tier-B languages. + +Import/inheritance edges are only extracted for the hand-tuned (Tier-A) languages. +A symbol or file in a Tier-B language (generic tree-sitter walk, e.g. Lua) gets +symbols and best-effort call sites but no dependency edges, so an empty/short +refs or impact result is inconclusive — the response must say so rather than let +an agent read "no references" as proof. +""" + +from __future__ import annotations + +from pathlib import Path + +from codebase_index.config import load +from codebase_index.graph.expand import impact_lookup +from codebase_index.indexer.pipeline import build_index +from codebase_index.models import GraphCoverage +from codebase_index.retrieval.searchers import refs_lookup +from codebase_index.storage.db import Database + +_LUA = ( + "local function greet(name)\n return name\nend\n\n" + "local function main()\n return greet('x')\nend\n" +) +_PY = "def helper():\n return 1\n\n\ndef caller():\n return helper()\n" + + +def _index(repo: Path) -> Path: + (repo / "mod.lua").write_text(_LUA, encoding="utf-8") + (repo / "mod.py").write_text(_PY, encoding="utf-8") + cfg = load(root=str(repo)) + db_path = repo / "index.sqlite" + with Database(db_path) as db: + build_index(cfg, db, root=Path(cfg.root)) + return db_path + + +def test_coverage_for_paths_unit(): + assert GraphCoverage.for_paths(["a.py", "b.go"]).partial is False + partial = GraphCoverage.for_paths(["x.lua"]) + assert partial.partial is True + assert partial.languages == ["lua"] + assert partial.reason and "lua" in partial.reason + + +def test_refs_flags_partial_for_tier_b_symbol(tmp_path): + db_path = _index(tmp_path) + with Database(db_path) as db: + lua_refs = refs_lookup(db.conn, "greet", kind="all") + py_refs = refs_lookup(db.conn, "helper", kind="all") + + assert lua_refs.coverage.partial is True + assert "lua" in lua_refs.coverage.languages + # Tier-A symbol: fully analyzed, no warning. + assert py_refs.coverage.partial is False + + +def test_impact_flags_partial_for_tier_b_file(tmp_path): + db_path = _index(tmp_path) + with Database(db_path) as db: + lua_impact = impact_lookup(db.conn, "mod.lua", depth=2, direction="both") + py_impact = impact_lookup(db.conn, "mod.py", depth=2, direction="both") + + assert lua_impact.coverage.partial is True + assert "lua" in lua_impact.coverage.languages + assert py_impact.coverage.partial is False diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 362fa06..ec76406 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -25,6 +25,6 @@ def test_packaged_skill_matches_dev_copy(): def test_packaged_cbx_whitelists_safe_subcommands_only(): cbx = (_template() / "scripts" / "cbx").read_text(encoding="utf-8") - assert 'ALLOWED="search explain symbol refs impact graph stats update index"' in cbx + assert 'ALLOWED="search explain symbol refs impact graph stats doctor update index"' in cbx for forbidden in ("clean", "init", "watch"): assert f" {forbidden} " not in f' {cbx.split("ALLOWED=")[1].splitlines()[0]} ' diff --git a/tests/test_pipeline_vectors.py b/tests/test_pipeline_vectors.py index 115a1b2..4cd805d 100644 --- a/tests/test_pipeline_vectors.py +++ b/tests/test_pipeline_vectors.py @@ -56,3 +56,79 @@ def test_reindex_vectors_idempotent(sample_repo, tmp_path, fake_backend, monkeyp assert s2.vectors == 0 assert repo.count_vectors(db.conn) == s1.vectors db.close() + + +class _CountingBackend: + """Wraps an embedding backend to record how many texts it is asked to embed.""" + + enabled = True + name = "fake" + + def __init__(self, inner): + self._inner = inner + self.dim = inner.dim + self.calls = 0 + self.embedded = 0 + + def embed(self, texts): + self.calls += 1 + self.embedded += len(texts) + return self._inner.embed(texts) + + +def test_reindex_does_not_recompute_unchanged_embeddings( + sample_repo, tmp_path, fake_backend, monkeypatch +): + """A full rebuild must reuse cached vectors for unchanged content, never re-embed it.""" + import codebase_index.indexer.pipeline as pipe + + backend = _CountingBackend(fake_backend) + monkeypatch.setattr(pipe, "resolve_backend", lambda cfg, warn=None: backend) + cfg = Config() + cfg.root = str(sample_repo) + cfg.embeddings.enabled = True + db = Database(tmp_path / "index.sqlite").open() + + build_index(cfg, db, root=sample_repo) + first_pass = backend.embedded + assert first_pass > 0 + + build_index(cfg, db, root=sample_repo) + # Chunk ids churn across rebuilds, but content is identical -> cache hit, no backend work. + assert backend.embedded == first_pass + db.close() + + +def test_changed_file_only_embeds_new_content( + sample_repo, tmp_path, fake_backend, monkeypatch +): + """Editing one file embeds only its new chunks; the rest come from the cache.""" + import shutil + + import codebase_index.indexer.pipeline as pipe + + # Copy the fixture so the edit below never mutates the shared, committed sample repo. + repo_copy = tmp_path / "repo" + shutil.copytree(sample_repo, repo_copy) + + backend = _CountingBackend(fake_backend) + monkeypatch.setattr(pipe, "resolve_backend", lambda cfg, warn=None: backend) + cfg = Config() + cfg.root = str(repo_copy) + cfg.embeddings.enabled = True + db = Database(tmp_path / "index.sqlite").open() + + build_index(cfg, db, root=repo_copy) + baseline = backend.embedded + + target = repo_copy / "src" / "auth" / "token.py" + target.write_text( + target.read_text(encoding="utf-8") + "\n\ndef brand_new_helper():\n return 42\n", + encoding="utf-8", + ) + s2 = build_index(cfg, db, root=repo_copy) + + # Some new chunks were embedded, but far fewer than a full re-embed of the repo. + assert s2.vectors > 0 + assert backend.embedded - baseline < baseline + db.close() diff --git a/tests/test_search_cli.py b/tests/test_search_cli.py index 35bf309..d0896a4 100644 --- a/tests/test_search_cli.py +++ b/tests/test_search_cli.py @@ -126,6 +126,40 @@ def test_search_reports_stale_after_edit(sample_repo, tmp_path, monkeypatch): assert stale["index"]["files_changed_since_build"] >= 1 +def test_explain_reports_stale_after_edit(sample_repo): + """Regression: explain must honor the freshness contract like search. + + Before the fix, explain called the retrieval pipeline without root/config, so + it always fell back to a hardcoded ``stale=False, files_changed_since_build=0`` + block — silently breaking the skill's freshness check for "how does X work". + """ + import sqlite3 + + assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0 + + res = runner.invoke( + app, ["--root", str(sample_repo), "--json", "explain", "how does token refresh work"] + ) + assert res.exit_code == 0, res.output + fresh = _json.loads(res.output) + assert fresh["index"]["exists"] is True + assert fresh["index"]["stale"] is False + + db_path = sample_repo / ".claude" / "cache" / "codebase-index" / "index.sqlite" + conn = sqlite3.connect(str(db_path)) + conn.execute("UPDATE files SET mtime_ns = 1") + conn.execute("DELETE FROM meta WHERE key = 'head_commit'") + conn.commit() + conn.close() + + res2 = runner.invoke( + app, ["--root", str(sample_repo), "--json", "explain", "how does token refresh work"] + ) + stale = _json.loads(res2.output) + assert stale["index"]["stale"] is True + assert stale["index"]["files_changed_since_build"] >= 1 + + def test_search_kind_words_filter_symbol_kind(sample_repo): assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0 @@ -145,3 +179,52 @@ def test_search_kind_words_filter_symbol_kind(sample_repo): assert result.exit_code == 0, result.output payload = _json.loads(result.output) assert payload["results"][0]["symbols"] == ["refresh_access_token"] + + +def test_search_offset_paginates_through_cli(sample_repo): + """Regression: --offset must reach the retrieval pipeline. + + Before the fix, the CLI search command never exposed --offset, so the + pipeline's pagination contract (advertised via the JSON ``pagination`` block + and MCP's ``next_offset``) was unreachable from the CLI/skill — every call + silently returned page one. + """ + assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0 + + page1 = runner.invoke( + app, ["--root", str(sample_repo), "--json", "search", "token", "--limit", "1"] + ) + assert page1.exit_code == 0, page1.output + p1 = _json.loads(page1.output) + pag = p1.get("pagination") + if not pag or not pag.get("has_more"): + # Fixture too small to page; the flag must still be accepted. + return + + page2 = runner.invoke( + app, + [ + "--root", + str(sample_repo), + "--json", + "search", + "token", + "--limit", + "1", + "--offset", + str(pag["next_offset"]), + ], + ) + assert page2.exit_code == 0, page2.output + p2 = _json.loads(page2.output) + assert p2["pagination"]["offset"] == pag["next_offset"] + # The second page must not repeat the first page's top hit. + k1 = (p1["results"][0]["path"], p1["results"][0]["line_start"]) + k2 = (p2["results"][0]["path"], p2["results"][0]["line_start"]) + assert k1 != k2 + + +def test_search_negative_offset_rejected(sample_repo): + result = runner.invoke(app, ["--root", str(sample_repo), "search", "token", "--offset", "-1"]) + assert result.exit_code == 2 + assert "offset" in result.output.lower()