From 2da60f2eca81b595baa3e6bd582a69694ed15279 Mon Sep 17 00:00:00 2001 From: denfry Date: Sun, 7 Jun 2026 16:33:51 +0300 Subject: [PATCH 1/8] fix(release): sync version to 1.2.2 across plugin manifest and lock The 1.2.2 version bump (e8714a2) left .claude-plugin/plugin.json at 1.2.1 and requirements.lock pinned to the v1.2.0 release tarball, breaking the version-consistency contract enforced by test_plugin_manifest.py. Co-Authored-By: Claude Sonnet 4.6 --- .claude-plugin/plugin.json | 2 +- requirements.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index f05c290..91036f6 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -3,7 +3,7 @@ "name": "codebase-index", "displayName": "Codebase Index", "description": "Local-first hybrid codebase index. Auto-provisions its Python CLI on first session start; the skill searches the index so Claude reads only the most relevant files.", - "version": "1.2.1", + "version": "1.2.2", "author": { "name": "codebase-index contributors" }, diff --git a/requirements.lock b/requirements.lock index 1c565ad..f87af0a 100644 --- a/requirements.lock +++ b/requirements.lock @@ -1,3 +1,3 @@ -codebase-index @ https://github.com/denfry/codebase-index/archive/refs/tags/v1.2.0.tar.gz +codebase-index @ https://github.com/denfry/codebase-index/archive/refs/tags/v1.2.2.tar.gz tree-sitter==0.25.2 tree-sitter-language-pack==1.8.1 From eccfcac8379199e8327c5cfdb44becd155fa7205 Mon Sep 17 00:00:00 2001 From: denfry Date: Sun, 7 Jun 2026 19:26:49 +0300 Subject: [PATCH 2/8] fix(skill): honor freshness in explain, sync skill contract, narrow tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit explain() now passes root/config into the retrieval pipeline so the index freshness block (stale / files_changed_since_build) is real instead of a hardcoded "fresh" fallback — the skill freshness check silently never fired for "how does X work" questions. explain also blends vectors when embeddings are enabled, matching search --mode hybrid. Skill (all targets: claude/codex/opencode + plugin skills/ + bin/ wrappers): - add `doctor` to the cbx whitelist (the skill fallback already invokes it) - narrow allowed-tools python to `-m codebase_index` so the skill cannot run arbitrary Python - document the --mode vector path, the intent/mode/pagination response fields, and clarify graph --open is a human-facing HTML view (use impact/refs for agent-readable dependency answers) Regenerated the three installed skill copies via skill-update so they match the authored skill/ and wheel-bundled skill_template/ sources. Tests: regression test that explain reports staleness after an edit; update the packaging whitelist assertion for the new `doctor` entry. Co-Authored-By: Claude Opus 4.8 --- .claude/skills/codebase-index/SKILL.md | 21 ++++++++++-- .claude/skills/codebase-index/scripts/cbx | 2 +- .claude/skills/codebase-index/scripts/cbx.ps1 | 2 +- .codex/skills/codebase-index/SKILL.md | 21 ++++++++++-- .codex/skills/codebase-index/scripts/cbx | 2 +- .codex/skills/codebase-index/scripts/cbx.ps1 | 2 +- .opencode/skills/codebase-index/SKILL.md | 21 ++++++++++-- .opencode/skills/codebase-index/scripts/cbx | 2 +- .../skills/codebase-index/scripts/cbx.ps1 | 2 +- CHANGELOG.md | 31 +++++++++++++++++ bin/cbx | 2 +- bin/cbx.ps1 | 2 +- skill/SKILL.md | 21 ++++++++++-- skill/scripts/cbx | 2 +- skill/scripts/cbx.ps1 | 2 +- skills/codebase-index/SKILL.md | 21 ++++++++++-- src/codebase_index/cli.py | 12 +++++-- src/codebase_index/skill_template/SKILL.md | 21 ++++++++++-- src/codebase_index/skill_template/scripts/cbx | 2 +- .../skill_template/scripts/cbx.ps1 | 2 +- tests/test_packaging.py | 2 +- tests/test_search_cli.py | 34 +++++++++++++++++++ 22 files changed, 201 insertions(+), 28 deletions(-) diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md index b738921..16b7753 100644 --- a/.claude/skills/codebase-index/SKILL.md +++ b/.claude/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,14 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; + if `has_more` is true and you still lack context, raise `--token-budget` or + refine the query with a more specific subcommand rather than expecting a second + page (CLI search is single-page). ## Token efficiency rules diff --git a/.claude/skills/codebase-index/scripts/cbx b/.claude/skills/codebase-index/scripts/cbx index a33c2bd..5666358 100644 --- a/.claude/skills/codebase-index/scripts/cbx +++ b/.claude/skills/codebase-index/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/.claude/skills/codebase-index/scripts/cbx.ps1 b/.claude/skills/codebase-index/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/.claude/skills/codebase-index/scripts/cbx.ps1 +++ b/.claude/skills/codebase-index/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md index b738921..16b7753 100644 --- a/.codex/skills/codebase-index/SKILL.md +++ b/.codex/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,14 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; + if `has_more` is true and you still lack context, raise `--token-budget` or + refine the query with a more specific subcommand rather than expecting a second + page (CLI search is single-page). ## Token efficiency rules diff --git a/.codex/skills/codebase-index/scripts/cbx b/.codex/skills/codebase-index/scripts/cbx index a33c2bd..5666358 100644 --- a/.codex/skills/codebase-index/scripts/cbx +++ b/.codex/skills/codebase-index/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/.codex/skills/codebase-index/scripts/cbx.ps1 b/.codex/skills/codebase-index/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/.codex/skills/codebase-index/scripts/cbx.ps1 +++ b/.codex/skills/codebase-index/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md index b738921..16b7753 100644 --- a/.opencode/skills/codebase-index/SKILL.md +++ b/.opencode/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,14 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; + if `has_more` is true and you still lack context, raise `--token-budget` or + refine the query with a more specific subcommand rather than expecting a second + page (CLI search is single-page). ## Token efficiency rules diff --git a/.opencode/skills/codebase-index/scripts/cbx b/.opencode/skills/codebase-index/scripts/cbx index a33c2bd..5666358 100644 --- a/.opencode/skills/codebase-index/scripts/cbx +++ b/.opencode/skills/codebase-index/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/.opencode/skills/codebase-index/scripts/cbx.ps1 b/.opencode/skills/codebase-index/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/.opencode/skills/codebase-index/scripts/cbx.ps1 +++ b/.opencode/skills/codebase-index/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/CHANGELOG.md b/CHANGELOG.md index d85eb8c..8b4573d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,37 @@ All notable changes to this project are documented here. The format is based on ## [Unreleased] +### Added +- **Content-addressed embedding cache**: a new `vec_cache` table (keyed by `(model, content_sha)`) + persists chunk embeddings across rebuilds. Because chunk ids churn on every full rebuild, the + embedding pass now hashes chunk content and only calls the (potentially slow or paid) backend for + text never embedded under the active model — unchanged content reuses its cached vector for free. + +### Changed +- The embedding pass reports cache **misses** (vectors actually computed) as its "embedded" count. +- `prune_orphan_vectors` now deletes stale `vec_chunks` rows in a single batched `executemany`. +- **Skill**: documented the `--mode vector` semantic-search path, the `intent`/`mode`/`pagination` + response fields, and clarified that `graph --open` renders an HTML view for a human (use + `impact`/`refs` for agent-readable dependency answers). +- **Skill**: narrowed the skill's `allowed-tools` from `Bash(python *)`/`Bash(python3 *)` to + `Bash(python -m codebase_index *)`/`Bash(python3 -m codebase_index *)`, so the skill can no longer + run arbitrary Python. + +### Fixed +- `explain` now honors the index freshness contract: it passes `root`/`config` into the retrieval + pipeline, so `index.stale` / `files_changed_since_build` reflect reality instead of a hardcoded + "fresh" block. Previously the skill's freshness check silently never triggered for + "how does X work" questions. `explain` also blends in vector results when embeddings are enabled, + matching `search --mode hybrid`. +- The `cbx` wrapper whitelist (skill + plugin `bin/`) now includes `doctor`, which the skill's + fallback diagnostics already invoke; previously `cbx doctor` was refused. + +## [1.2.2] - 2026-06-05 + +### Changed +- Synced the version to `1.2.2` across the package, plugin manifest, and lockfile. +- Documentation cleanup: removed stale prompt files and screenshots, refreshed the README. + ## [1.2.1] - 2026-06-05 ### Added diff --git a/bin/cbx b/bin/cbx index 2b7294d..cfe60b7 100644 --- a/bin/cbx +++ b/bin/cbx @@ -3,7 +3,7 @@ # from the venv provisioned by scripts/bootstrap.sh (located via the .venv-path pointer). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in *" ${sub} "*) : ;; diff --git a/bin/cbx.ps1 b/bin/cbx.ps1 index cf33e6b..85face7 100644 --- a/bin/cbx.ps1 +++ b/bin/cbx.ps1 @@ -5,7 +5,7 @@ param( [Parameter(ValueFromRemainingArguments = $true)] [string[]]$Rest ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" exit 2 diff --git a/skill/SKILL.md b/skill/SKILL.md index b738921..16b7753 100644 --- a/skill/SKILL.md +++ b/skill/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,14 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; + if `has_more` is true and you still lack context, raise `--token-budget` or + refine the query with a more specific subcommand rather than expecting a second + page (CLI search is single-page). ## Token efficiency rules diff --git a/skill/scripts/cbx b/skill/scripts/cbx index a33c2bd..5666358 100644 --- a/skill/scripts/cbx +++ b/skill/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/skill/scripts/cbx.ps1 b/skill/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/skill/scripts/cbx.ps1 +++ b/skill/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/skills/codebase-index/SKILL.md b/skills/codebase-index/SKILL.md index b738921..16b7753 100644 --- a/skills/codebase-index/SKILL.md +++ b/skills/codebase-index/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,14 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; + if `has_more` is true and you still lack context, raise `--token-budget` or + refine the query with a more specific subcommand rather than expecting a second + page (CLI search is single-page). ## Token efficiency rules diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py index a1c6efa..609c824 100644 --- a/src/codebase_index/cli.py +++ b/src/codebase_index/cli.py @@ -489,12 +489,18 @@ def explain( from .retrieval.pipeline import search as run_search from .storage.db import Database - db_path, _cfg = _ensure_index(ctx) + backend = _resolve_backend_for_search(ctx) + db_path, cfg = _ensure_index(ctx) q = query if any(w in query.lower() for w in ("how", "architecture", "overview")) else f"how does {query} work" with Database(db_path) as db: - payload = run_search(db.conn, q, mode="hybrid", limit=10, - token_budget=token_budget, no_fallback=False) + if getattr(backend, "enabled", False): + db.enable_vectors() + payload = run_search( + db.conn, q, mode="hybrid", limit=10, + token_budget=token_budget, no_fallback=False, backend=backend, + root=Path(cfg.root), config=cfg, + ) want_json = json_out or (ctx.obj and ctx.obj.get("json")) typer.echo(json_renderer.render(payload) if want_json else md_renderer.render(payload)) diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md index b738921..16b7753 100644 --- a/src/codebase_index/skill_template/SKILL.md +++ b/src/codebase_index/skill_template/SKILL.md @@ -1,7 +1,7 @@ --- name: codebase-index description: Use this skill before answering questions about a repository's architecture, implementation locations, symbols, references, dependencies, refactoring impact, data flow, bugs, or where something is implemented. It searches a local hybrid codebase index so Claude reads only the most relevant files instead of scanning the entire project. -allowed-tools: Bash(python *), Bash(python3 *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob +allowed-tools: Bash(python -m codebase_index *), Bash(python3 -m codebase_index *), Bash(codebase-index *), Bash(cbx *), Read, Grep, Glob --- # Codebase Index @@ -44,7 +44,12 @@ Pick the subcommand by intent: | a specific symbol name | `codebase-index symbol "" --json` | | "who calls / references" | `codebase-index refs "" --json` | | "what breaks if I change" | `codebase-index impact "" --json` | -| visual graph / "open graph" | `codebase-index graph "" --open` | +| visual graph / "open graph" (for the human, not for you to read) | `codebase-index graph "" --open` | + +The `graph` command renders an HTML dependency graph for a person to look at — +it is not a retrieval packet. Use it only when the user explicitly wants a visual +graph; for "what depends on X" answer from `impact`/`refs` instead. In a headless +session prefer `--out ` over `--open`. `explain` has a higher default token budget (2200) and HOW_IT_WORKS intent weights — use it whenever the question is about understanding behavior or flow. @@ -52,6 +57,10 @@ For `search`, pick a `--mode` when the intent is clear: - `--mode symbol` — pure symbol lookups (faster, no FTS noise) - `--mode fts` — text/keyword queries where symbol names don't matter - `--mode hybrid` — default; best for mixed queries +- `--mode vector` — semantic / near-synonym queries ("where do we rate-limit + requests" without the exact words). Requires opt-in embeddings; falls back with + a clear message when they are not enabled. `hybrid` already blends vectors in + when embeddings are on, so reach for `vector` only for pure-semantic recall. Natural-language kind words such as `method`, `function`, `class`, `interface`, `enum`, and `type` constrain the symbol retriever inside `search`. @@ -89,6 +98,14 @@ Top-level fields: - `recommended_reads` — the precise `{path, line_start, line_end}` list to open next. This is your read plan. - `confidence` — `high` (answer directly), `medium` (read + optionally confirm with one Grep), `low` (use fallback). - `fallback_suggestions` — ripgrep patterns and paths to try if the index is weak. +- `intent` / `mode` — how the query was classified and which retrievers ran; + useful to sanity-check a weak result (e.g. a "how does X work" question that + resolved to a bare symbol lookup may need `explain` instead). +- `pagination` — present only when more results exist than fit the page. It + reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; + if `has_more` is true and you still lack context, raise `--token-budget` or + refine the query with a more specific subcommand rather than expecting a second + page (CLI search is single-page). ## Token efficiency rules diff --git a/src/codebase_index/skill_template/scripts/cbx b/src/codebase_index/skill_template/scripts/cbx index a33c2bd..5666358 100644 --- a/src/codebase_index/skill_template/scripts/cbx +++ b/src/codebase_index/skill_template/scripts/cbx @@ -4,7 +4,7 @@ # - Whitelists subcommands so the skill can never invoke destructive ones (clean/init/watch). set -euo pipefail -ALLOWED="search explain symbol refs impact graph stats update index" +ALLOWED="search explain symbol refs impact graph stats doctor update index" sub="${1:-}" case " $ALLOWED " in diff --git a/src/codebase_index/skill_template/scripts/cbx.ps1 b/src/codebase_index/skill_template/scripts/cbx.ps1 index fe10bd9..bb8e05d 100644 --- a/src/codebase_index/skill_template/scripts/cbx.ps1 +++ b/src/codebase_index/skill_template/scripts/cbx.ps1 @@ -8,7 +8,7 @@ param( ) $ErrorActionPreference = "Stop" -$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "update", "index") +$allowed = @("search", "explain", "symbol", "refs", "impact", "graph", "stats", "doctor", "update", "index") if ($allowed -notcontains $Subcommand) { Write-Error "cbx: refusing subcommand '$Subcommand'. Allowed: $($allowed -join ', ')" diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 362fa06..ec76406 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -25,6 +25,6 @@ def test_packaged_skill_matches_dev_copy(): def test_packaged_cbx_whitelists_safe_subcommands_only(): cbx = (_template() / "scripts" / "cbx").read_text(encoding="utf-8") - assert 'ALLOWED="search explain symbol refs impact graph stats update index"' in cbx + assert 'ALLOWED="search explain symbol refs impact graph stats doctor update index"' in cbx for forbidden in ("clean", "init", "watch"): assert f" {forbidden} " not in f' {cbx.split("ALLOWED=")[1].splitlines()[0]} ' diff --git a/tests/test_search_cli.py b/tests/test_search_cli.py index 35bf309..0efccae 100644 --- a/tests/test_search_cli.py +++ b/tests/test_search_cli.py @@ -126,6 +126,40 @@ def test_search_reports_stale_after_edit(sample_repo, tmp_path, monkeypatch): assert stale["index"]["files_changed_since_build"] >= 1 +def test_explain_reports_stale_after_edit(sample_repo): + """Regression: explain must honor the freshness contract like search. + + Before the fix, explain called the retrieval pipeline without root/config, so + it always fell back to a hardcoded ``stale=False, files_changed_since_build=0`` + block — silently breaking the skill's freshness check for "how does X work". + """ + import sqlite3 + + assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0 + + res = runner.invoke( + app, ["--root", str(sample_repo), "--json", "explain", "how does token refresh work"] + ) + assert res.exit_code == 0, res.output + fresh = _json.loads(res.output) + assert fresh["index"]["exists"] is True + assert fresh["index"]["stale"] is False + + db_path = sample_repo / ".claude" / "cache" / "codebase-index" / "index.sqlite" + conn = sqlite3.connect(str(db_path)) + conn.execute("UPDATE files SET mtime_ns = 1") + conn.execute("DELETE FROM meta WHERE key = 'head_commit'") + conn.commit() + conn.close() + + res2 = runner.invoke( + app, ["--root", str(sample_repo), "--json", "explain", "how does token refresh work"] + ) + stale = _json.loads(res2.output) + assert stale["index"]["stale"] is True + assert stale["index"]["files_changed_since_build"] >= 1 + + def test_search_kind_words_filter_symbol_kind(sample_repo): assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0 From ace0f9b509af4bb5569f257cd1a491f8c57d2f02 Mon Sep 17 00:00:00 2001 From: denfry Date: Sun, 7 Jun 2026 19:41:00 +0300 Subject: [PATCH 3/8] feat(search): expose --offset so CLI pagination works end-to-end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The retrieval pipeline and MCP server already supported result paging (offset -> pagination.next_offset), but the CLI `search` command never surfaced an --offset flag. Every invocation silently returned page one and the advertised `pagination.next_offset` was a dead end for the skill. - Add `--offset` to `search` (rejects negative values with exit 2). - Surface a "more available — --offset N" note in markdown output. - Update SKILL.md (authored + packaged template + the three installed copies) to document paging via --offset, replacing the stale "CLI search is single-page" guidance. - Regression tests at the CLI layer for paging and negative-offset rejection. Co-Authored-By: Claude Opus 4.8 --- .claude/skills/codebase-index/SKILL.md | 8 ++-- .codex/skills/codebase-index/SKILL.md | 8 ++-- .opencode/skills/codebase-index/SKILL.md | 8 ++-- CHANGELOG.md | 5 +++ skill/SKILL.md | 8 ++-- src/codebase_index/cli.py | 9 +++- src/codebase_index/output/markdown.py | 8 ++++ src/codebase_index/skill_template/SKILL.md | 8 ++-- tests/test_search_cli.py | 49 ++++++++++++++++++++++ 9 files changed, 90 insertions(+), 21 deletions(-) diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md index 16b7753..5950c41 100644 --- a/.claude/skills/codebase-index/SKILL.md +++ b/.claude/skills/codebase-index/SKILL.md @@ -102,10 +102,10 @@ Top-level fields: useful to sanity-check a weak result (e.g. a "how does X work" question that resolved to a bare symbol lookup may need `explain` instead). - `pagination` — present only when more results exist than fit the page. It - reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; - if `has_more` is true and you still lack context, raise `--token-budget` or - refine the query with a more specific subcommand rather than expecting a second - page (CLI search is single-page). + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. ## Token efficiency rules diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md index 16b7753..5950c41 100644 --- a/.codex/skills/codebase-index/SKILL.md +++ b/.codex/skills/codebase-index/SKILL.md @@ -102,10 +102,10 @@ Top-level fields: useful to sanity-check a weak result (e.g. a "how does X work" question that resolved to a bare symbol lookup may need `explain` instead). - `pagination` — present only when more results exist than fit the page. It - reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; - if `has_more` is true and you still lack context, raise `--token-budget` or - refine the query with a more specific subcommand rather than expecting a second - page (CLI search is single-page). + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. ## Token efficiency rules diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md index 16b7753..5950c41 100644 --- a/.opencode/skills/codebase-index/SKILL.md +++ b/.opencode/skills/codebase-index/SKILL.md @@ -102,10 +102,10 @@ Top-level fields: useful to sanity-check a weak result (e.g. a "how does X work" question that resolved to a bare symbol lookup may need `explain` instead). - `pagination` — present only when more results exist than fit the page. It - reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; - if `has_more` is true and you still lack context, raise `--token-budget` or - refine the query with a more specific subcommand rather than expecting a second - page (CLI search is single-page). + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. ## Token efficiency rules diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b4573d..278b283 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,11 @@ All notable changes to this project are documented here. The format is based on run arbitrary Python. ### Fixed +- `search` now exposes `--offset`, so the pagination contract is reachable from the CLI/skill. + The retrieval pipeline and MCP already supported paging, but the CLI command never surfaced the + flag — every call silently returned page one and the advertised `pagination.next_offset` was a + dead end. Markdown output now also notes when more results are available. `--offset` rejects + negative values. - `explain` now honors the index freshness contract: it passes `root`/`config` into the retrieval pipeline, so `index.stale` / `files_changed_since_build` reflect reality instead of a hardcoded "fresh" block. Previously the skill's freshness check silently never triggered for diff --git a/skill/SKILL.md b/skill/SKILL.md index 16b7753..5950c41 100644 --- a/skill/SKILL.md +++ b/skill/SKILL.md @@ -102,10 +102,10 @@ Top-level fields: useful to sanity-check a weak result (e.g. a "how does X work" question that resolved to a bare symbol lookup may need `explain` instead). - `pagination` — present only when more results exist than fit the page. It - reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; - if `has_more` is true and you still lack context, raise `--token-budget` or - refine the query with a more specific subcommand rather than expecting a second - page (CLI search is single-page). + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. ## Token efficiency rules diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py index 609c824..57867b7 100644 --- a/src/codebase_index/cli.py +++ b/src/codebase_index/cli.py @@ -375,6 +375,9 @@ def search( ctx: typer.Context, query: str = typer.Argument(..., help="Search query."), limit: int = typer.Option(10, "--limit"), + offset: int = typer.Option( + 0, "--offset", help="Skip the first N results (use pagination.next_offset to page)." + ), token_budget: int = typer.Option(1500, "--token-budget"), mode: str = typer.Option("hybrid", "--mode", help="hybrid|fts|symbol|vector"), no_fallback: bool = typer.Option(False, "--no-fallback"), @@ -386,6 +389,10 @@ def search( from .retrieval.pipeline import search as run_search from .storage.db import Database + if offset < 0: + typer.echo("[codebase-index] --offset must be >= 0.") + raise typer.Exit(code=2) + backend = None if mode in ("vector", "hybrid"): backend = _resolve_backend_for_search(ctx) @@ -402,7 +409,7 @@ def search( if backend is not None and getattr(backend, "enabled", False): db.enable_vectors() payload = run_search( - db.conn, query, mode=mode, limit=limit, + db.conn, query, mode=mode, limit=limit, offset=offset, token_budget=token_budget, no_fallback=no_fallback, backend=backend, root=Path(cfg.root), config=cfg, ) diff --git a/src/codebase_index/output/markdown.py b/src/codebase_index/output/markdown.py index 5768215..558fc6a 100644 --- a/src/codebase_index/output/markdown.py +++ b/src/codebase_index/output/markdown.py @@ -45,6 +45,14 @@ def _render_dict(payload: dict) -> str: for cmd in fb: lines.append(f"- `{cmd}`") + pg = payload.get("pagination") + if pg: + shown = f"results {pg['offset'] + 1}–{pg['offset'] + len(payload['results'])}" + if pg.get("has_more"): + lines.append(f"\n_Showing {shown}; more available — `--offset {pg['next_offset']}`._") + else: + lines.append(f"\n_Showing {shown} (end of results)._") + return "\n".join(lines) diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md index 16b7753..5950c41 100644 --- a/src/codebase_index/skill_template/SKILL.md +++ b/src/codebase_index/skill_template/SKILL.md @@ -102,10 +102,10 @@ Top-level fields: useful to sanity-check a weak result (e.g. a "how does X work" question that resolved to a bare symbol lookup may need `explain` instead). - `pagination` — present only when more results exist than fit the page. It - reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; - if `has_more` is true and you still lack context, raise `--token-budget` or - refine the query with a more specific subcommand rather than expecting a second - page (CLI search is single-page). + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. ## Token efficiency rules diff --git a/tests/test_search_cli.py b/tests/test_search_cli.py index 0efccae..d0896a4 100644 --- a/tests/test_search_cli.py +++ b/tests/test_search_cli.py @@ -179,3 +179,52 @@ def test_search_kind_words_filter_symbol_kind(sample_repo): assert result.exit_code == 0, result.output payload = _json.loads(result.output) assert payload["results"][0]["symbols"] == ["refresh_access_token"] + + +def test_search_offset_paginates_through_cli(sample_repo): + """Regression: --offset must reach the retrieval pipeline. + + Before the fix, the CLI search command never exposed --offset, so the + pipeline's pagination contract (advertised via the JSON ``pagination`` block + and MCP's ``next_offset``) was unreachable from the CLI/skill — every call + silently returned page one. + """ + assert runner.invoke(app, ["--root", str(sample_repo), "index"]).exit_code == 0 + + page1 = runner.invoke( + app, ["--root", str(sample_repo), "--json", "search", "token", "--limit", "1"] + ) + assert page1.exit_code == 0, page1.output + p1 = _json.loads(page1.output) + pag = p1.get("pagination") + if not pag or not pag.get("has_more"): + # Fixture too small to page; the flag must still be accepted. + return + + page2 = runner.invoke( + app, + [ + "--root", + str(sample_repo), + "--json", + "search", + "token", + "--limit", + "1", + "--offset", + str(pag["next_offset"]), + ], + ) + assert page2.exit_code == 0, page2.output + p2 = _json.loads(page2.output) + assert p2["pagination"]["offset"] == pag["next_offset"] + # The second page must not repeat the first page's top hit. + k1 = (p1["results"][0]["path"], p1["results"][0]["line_start"]) + k2 = (p2["results"][0]["path"], p2["results"][0]["line_start"]) + assert k1 != k2 + + +def test_search_negative_offset_rejected(sample_repo): + result = runner.invoke(app, ["--root", str(sample_repo), "search", "token", "--offset", "-1"]) + assert result.exit_code == 2 + assert "offset" in result.output.lower() From 3d86326c091f5c59d3db928fcc053f7201298ef7 Mon Sep 17 00:00:00 2001 From: denfry Date: Sun, 7 Jun 2026 19:58:53 +0300 Subject: [PATCH 4/8] feat(graph): flag partial refs/impact coverage for Tier-B languages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Import/inheritance edges are only extracted for the hand-tuned (Tier-A) languages. A symbol or file in a Tier-B language (generic tree-sitter walk, e.g. Lua) yields symbols and best-effort call sites but no import/extends/ implements edges, so `refs`/`impact` can silently undercount — an empty result read as "nothing references this" is a footgun for an agent. - Add a `GraphCoverage` model (`partial`, `languages`, `reason`) and attach it to RefsResponse / ImpactResponse. `for_paths` classifies a symbol/target's defining language(s): Tier-B (tree-sitter routed, no LangSpec) -> partial. - refs_lookup judges coverage by the symbol's definition language; impact_lookup by the resolved target file('s) language. - Markdown output prints a "Partial graph coverage" warning (including on the empty-result path, where it matters most). - Document the `coverage` field in SKILL.md (authored + packaged template + the four installed/plugin copies); regenerate the refs/impact goldens (Tier-A Python = partial:false). - Regression test over a mixed Lua/Python repo asserts partial for the Tier-B symbol/file and full coverage for the Tier-A one. Also syncs the plugin skill copy with the prior --offset doc change. Co-Authored-By: Claude Opus 4.8 --- .claude/skills/codebase-index/SKILL.md | 6 ++ .codex/skills/codebase-index/SKILL.md | 6 ++ .opencode/skills/codebase-index/SKILL.md | 6 ++ CHANGELOG.md | 9 +++ skill/SKILL.md | 6 ++ skills/codebase-index/SKILL.md | 14 +++-- src/codebase_index/graph/expand.py | 16 ++++- src/codebase_index/models.py | 48 ++++++++++++++- src/codebase_index/output/markdown.py | 21 ++++++- src/codebase_index/retrieval/searchers.py | 14 ++++- src/codebase_index/skill_template/SKILL.md | 6 ++ tests/golden/impact_user_model.json | 5 ++ tests/golden/refs_refresh_access_token.json | 5 ++ tests/test_graph_coverage.py | 66 +++++++++++++++++++++ 14 files changed, 218 insertions(+), 10 deletions(-) create mode 100644 tests/test_graph_coverage.py diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md index 5950c41..561cac3 100644 --- a/.claude/skills/codebase-index/SKILL.md +++ b/.claude/skills/codebase-index/SKILL.md @@ -106,6 +106,12 @@ Top-level fields: `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer refining with a more specific subcommand or raising `--token-budget` first — page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md index 5950c41..561cac3 100644 --- a/.codex/skills/codebase-index/SKILL.md +++ b/.codex/skills/codebase-index/SKILL.md @@ -106,6 +106,12 @@ Top-level fields: `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer refining with a more specific subcommand or raising `--token-budget` first — page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md index 5950c41..561cac3 100644 --- a/.opencode/skills/codebase-index/SKILL.md +++ b/.opencode/skills/codebase-index/SKILL.md @@ -106,6 +106,12 @@ Top-level fields: `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer refining with a more specific subcommand or raising `--token-budget` first — page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules diff --git a/CHANGELOG.md b/CHANGELOG.md index 278b283..39b425c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,15 @@ All notable changes to this project are documented here. The format is based on embedding pass now hashes chunk content and only calls the (potentially slow or paid) backend for text never embedded under the active model — unchanged content reuses its cached vector for free. +### Added +- **Graph coverage signal**: `refs` and `impact` now report a `coverage` block + (`partial`, `languages`, `reason`). Import/inheritance edges are only extracted + for the hand-tuned (Tier-A) languages, so a symbol or file in a Tier-B language + (generic tree-sitter walk, e.g. Lua) can produce an empty/short result that is + inconclusive rather than authoritative. `coverage.partial` flags this so agents + fall back to Grep instead of reading "no references" as proof. Markdown output + prints a matching warning; the skill documents the field. + ### Changed - The embedding pass reports cache **misses** (vectors actually computed) as its "embedded" count. - `prune_orphan_vectors` now deletes stale `vec_chunks` rows in a single batched `executemany`. diff --git a/skill/SKILL.md b/skill/SKILL.md index 5950c41..561cac3 100644 --- a/skill/SKILL.md +++ b/skill/SKILL.md @@ -106,6 +106,12 @@ Top-level fields: `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer refining with a more specific subcommand or raising `--token-budget` first — page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules diff --git a/skills/codebase-index/SKILL.md b/skills/codebase-index/SKILL.md index 16b7753..561cac3 100644 --- a/skills/codebase-index/SKILL.md +++ b/skills/codebase-index/SKILL.md @@ -102,10 +102,16 @@ Top-level fields: useful to sanity-check a weak result (e.g. a "how does X work" question that resolved to a bare symbol lookup may need `explain` instead). - `pagination` — present only when more results exist than fit the page. It - reports `has_more` and `next_offset`. The CLI returns the highest-ranked page; - if `has_more` is true and you still lack context, raise `--token-budget` or - refine the query with a more specific subcommand rather than expecting a second - page (CLI search is single-page). + reports `has_more` and `next_offset`. To page, re-run `search` with + `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer + refining with a more specific subcommand or raising `--token-budget` first — + page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules diff --git a/src/codebase_index/graph/expand.py b/src/codebase_index/graph/expand.py index 6552ba2..deebed1 100644 --- a/src/codebase_index/graph/expand.py +++ b/src/codebase_index/graph/expand.py @@ -16,7 +16,7 @@ from collections import deque from typing import Optional -from ..models import ImpactNode, ImpactResponse, IndexFreshness +from ..models import GraphCoverage, ImpactNode, ImpactResponse, IndexFreshness from ..storage import repo @@ -106,6 +106,19 @@ def walk_impact( return out +def _target_paths(conn: sqlite3.Connection, target: str) -> list[str]: + """The file path(s) the target resolves to, for coverage classification.""" + if repo.file_by_path(conn, target) is not None: + return [target] + sym_rows = repo.symbols_by_name(conn, target, exact=True) + if sym_rows: + return [r["path"] for r in sym_rows] + suffix = repo.files_with_suffix(conn, target) + if len(suffix) == 1: + return [suffix[0]["path"]] + return [] + + def impact_lookup( conn: sqlite3.Connection, target: str, *, depth: int, direction: str ) -> ImpactResponse: @@ -118,4 +131,5 @@ def impact_lookup( return ImpactResponse( target=target, direction=direction, depth=depth, index=_freshness(conn), nodes=nodes, files=files, + coverage=GraphCoverage.for_paths(_target_paths(conn, target)), ) diff --git a/src/codebase_index/models.py b/src/codebase_index/models.py index 126988a..b35d959 100644 --- a/src/codebase_index/models.py +++ b/src/codebase_index/models.py @@ -5,9 +5,9 @@ from __future__ import annotations -from typing import Literal, Optional +from typing import Iterable, Literal, Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field Intent = Literal[ "locate_impl", "how_it_works", "impact", "find_refs", @@ -67,6 +67,48 @@ class SymbolResponse(BaseModel): symbols: list[SymbolDef] = [] +class GraphCoverage(BaseModel): + """Honesty signal for graph-derived answers (refs/impact). + + Dependency edges (imports / inheritance) are only extracted for the fully + supported (Tier-A) languages. A symbol or file in a Tier-B language (generic + tree-sitter walk) yields symbols and best-effort call sites but no + import/extends/implements edges, so refs/impact can undercount. When + ``partial`` is true an *empty or short* result does not prove there are no + references — it may just be unanalyzed; confirm with Grep. + """ + + partial: bool = False + languages: list[str] = [] + reason: Optional[str] = None + + @classmethod + def for_paths(cls, paths: Iterable[str]) -> "GraphCoverage": + from .discovery.classify import detect_language, parser_for + from .parsers.languages import spec_for + + tier_b = sorted( + { + lang + for p in paths + if (lang := detect_language(p)) is not None + and parser_for(lang) == "treesitter" + and spec_for(lang) is None + } + ) + if not tier_b: + return cls() + return cls( + partial=True, + languages=tier_b, + reason=( + "Import/inheritance edges are not extracted for " + f"{', '.join(tier_b)} (best-effort symbols only). An empty or short " + "result is inconclusive — confirm with a Grep over the codebase." + ), + ) + + class RefSite(BaseModel): path: str line: int @@ -77,6 +119,7 @@ class RefsResponse(BaseModel): query: str index: IndexFreshness sites: list[RefSite] = [] + coverage: GraphCoverage = Field(default_factory=GraphCoverage) class ImpactNode(BaseModel): @@ -95,3 +138,4 @@ class ImpactResponse(BaseModel): index: IndexFreshness nodes: list[ImpactNode] = [] files: list[str] = [] # distinct affected files, ranked + coverage: GraphCoverage = Field(default_factory=GraphCoverage) diff --git a/src/codebase_index/output/markdown.py b/src/codebase_index/output/markdown.py index 558fc6a..afd6a76 100644 --- a/src/codebase_index/output/markdown.py +++ b/src/codebase_index/output/markdown.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing import Optional + from ..models import ImpactResponse, RefsResponse, SearchResponse, SymbolResponse @@ -122,17 +124,28 @@ def render_symbols(resp: SymbolResponse) -> str: return "\n".join(lines).rstrip() + "\n" +def _coverage_line(coverage) -> Optional[str]: + if coverage is not None and getattr(coverage, "partial", False): + return f"\n> ⚠️ Partial graph coverage: {coverage.reason}" + return None + + def render_refs(resp: RefsResponse) -> str: lines = [_header(resp.query, resp.index.exists, resp.index.stale)] lines.append("") + note = _coverage_line(resp.coverage) if not resp.sites: lines.append("_No references found._") + if note: + lines.append(note) return "\n".join(lines).rstrip() + "\n" lines.append("| kind | path | line |") lines.append("|------|------|------|") for site in resp.sites: lines.append(f"| {site.kind} | `{site.path}` | {site.line} |") + if note: + lines.append(note) return "\n".join(lines).rstrip() + "\n" @@ -147,12 +160,18 @@ def render_impact(resp: ImpactResponse) -> str: header = (f"**impact:** `{resp.target}` · **direction:** {resp.direction} · " f"**depth:** {resp.depth} · **affected files:** {len(resp.files)}") lines = [header, ""] + note = _coverage_line(resp.coverage) if not resp.nodes: - return "\n".join(lines + ["_No impact found (target unknown or no edges)._", ""]).rstrip() + "\n" + body = ["_No impact found (target unknown or no edges)._"] + if note: + body.append(note) + return "\n".join(lines + body + [""]).rstrip() + "\n" lines.append("| dist | via | kind | node | location |") lines.append("|------|-----|------|------|----------|") for n in sorted(resp.nodes, key=lambda x: (x.distance, x.path, x.line_start or 0)): loc = f"{n.path}:{n.line_start}" if n.line_start else n.path node_name = f"`{n.name}`" if n.name else "—" lines.append(f"| {n.distance} | {n.via_edge or ''} | {n.kind} | {node_name} | `{loc}` |") + if note: + lines.append(note) return "\n".join(lines).rstrip() + "\n" diff --git a/src/codebase_index/retrieval/searchers.py b/src/codebase_index/retrieval/searchers.py index 954ffa2..0cb0d00 100644 --- a/src/codebase_index/retrieval/searchers.py +++ b/src/codebase_index/retrieval/searchers.py @@ -16,6 +16,7 @@ from ..indexer.freshness import compute_freshness from ..models import ( Confidence, + GraphCoverage, IndexFreshness, ReadRange, RefSite, @@ -326,6 +327,7 @@ def symbol_lookup( def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsResponse: + defs = repo.symbols_by_name(conn, name, exact=True) sites = [ RefSite(path=row["path"], line=row["line"], kind="call") for row in repo.refs_for_name(conn, name) @@ -333,10 +335,18 @@ def refs_lookup(conn: sqlite3.Connection, name: str, *, kind: str) -> RefsRespon if kind == "all": sites.extend( RefSite(path=row["path"], line=row["line_start"], kind="definition") - for row in repo.symbols_by_name(conn, name, exact=True) + for row in defs ) sites.sort(key=lambda site: (site.path, site.line, site.kind)) - return RefsResponse(query=name, index=_freshness(conn), sites=sites) + # Coverage is judged by the symbol's defining language(s); fall back to the + # call-site files when the symbol has no indexed definition. + coverage_paths = [row["path"] for row in defs] or [s.path for s in sites] + return RefsResponse( + query=name, + index=_freshness(conn), + sites=sites, + coverage=GraphCoverage.for_paths(coverage_paths), + ) def vector_candidates( diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md index 5950c41..561cac3 100644 --- a/src/codebase_index/skill_template/SKILL.md +++ b/src/codebase_index/skill_template/SKILL.md @@ -106,6 +106,12 @@ Top-level fields: `--offset ` (e.g. `search "query" --limit 10 --offset 10`). Prefer refining with a more specific subcommand or raising `--token-budget` first — page only when the top results genuinely miss the answer. +- `coverage` (on `refs`/`impact` only) — graph-completeness signal. Dependency + edges (imports/inheritance) are extracted only for fully supported languages. + When `coverage.partial` is `true` (the symbol/file is in a Tier-B language such + as Lua), an **empty or short `refs`/`impact` result is inconclusive** — it may + just be unanalyzed, not absent. Confirm with a Grep before concluding "nothing + references this". `coverage.languages` lists the affected languages. ## Token efficiency rules diff --git a/tests/golden/impact_user_model.json b/tests/golden/impact_user_model.json index 898f952..9ec0c77 100644 --- a/tests/golden/impact_user_model.json +++ b/tests/golden/impact_user_model.json @@ -1,4 +1,9 @@ { + "coverage": { + "languages": [], + "partial": false, + "reason": null + }, "depth": 2, "direction": "up", "files": [ diff --git a/tests/golden/refs_refresh_access_token.json b/tests/golden/refs_refresh_access_token.json index 04dc0ca..f5693df 100644 --- a/tests/golden/refs_refresh_access_token.json +++ b/tests/golden/refs_refresh_access_token.json @@ -1,4 +1,9 @@ { + "coverage": { + "languages": [], + "partial": false, + "reason": null + }, "index": { "built_at": "", "exists": true, diff --git a/tests/test_graph_coverage.py b/tests/test_graph_coverage.py new file mode 100644 index 0000000..872726c --- /dev/null +++ b/tests/test_graph_coverage.py @@ -0,0 +1,66 @@ +"""Regression: refs/impact must flag partial graph coverage for Tier-B languages. + +Import/inheritance edges are only extracted for the hand-tuned (Tier-A) languages. +A symbol or file in a Tier-B language (generic tree-sitter walk, e.g. Lua) gets +symbols and best-effort call sites but no dependency edges, so an empty/short +refs or impact result is inconclusive — the response must say so rather than let +an agent read "no references" as proof. +""" + +from __future__ import annotations + +from pathlib import Path + +from codebase_index.config import load +from codebase_index.graph.expand import impact_lookup +from codebase_index.indexer.pipeline import build_index +from codebase_index.models import GraphCoverage +from codebase_index.retrieval.searchers import refs_lookup +from codebase_index.storage.db import Database + +_LUA = ( + "local function greet(name)\n return name\nend\n\n" + "local function main()\n return greet('x')\nend\n" +) +_PY = "def helper():\n return 1\n\n\ndef caller():\n return helper()\n" + + +def _index(repo: Path) -> Path: + (repo / "mod.lua").write_text(_LUA, encoding="utf-8") + (repo / "mod.py").write_text(_PY, encoding="utf-8") + cfg = load(root=str(repo)) + db_path = repo / "index.sqlite" + with Database(db_path) as db: + build_index(cfg, db, root=Path(cfg.root)) + return db_path + + +def test_coverage_for_paths_unit(): + assert GraphCoverage.for_paths(["a.py", "b.go"]).partial is False + partial = GraphCoverage.for_paths(["x.lua"]) + assert partial.partial is True + assert partial.languages == ["lua"] + assert partial.reason and "lua" in partial.reason + + +def test_refs_flags_partial_for_tier_b_symbol(tmp_path): + db_path = _index(tmp_path) + with Database(db_path) as db: + lua_refs = refs_lookup(db.conn, "greet", kind="all") + py_refs = refs_lookup(db.conn, "helper", kind="all") + + assert lua_refs.coverage.partial is True + assert "lua" in lua_refs.coverage.languages + # Tier-A symbol: fully analyzed, no warning. + assert py_refs.coverage.partial is False + + +def test_impact_flags_partial_for_tier_b_file(tmp_path): + db_path = _index(tmp_path) + with Database(db_path) as db: + lua_impact = impact_lookup(db.conn, "mod.lua", depth=2, direction="both") + py_impact = impact_lookup(db.conn, "mod.py", depth=2, direction="both") + + assert lua_impact.coverage.partial is True + assert "lua" in lua_impact.coverage.languages + assert py_impact.coverage.partial is False From 9262cedfae876a6d4a5fe96097f2b9a532de7560 Mon Sep 17 00:00:00 2001 From: denfry Date: Sun, 7 Jun 2026 20:07:20 +0300 Subject: [PATCH 5/8] feat(diagnostics): report graph tier per language in stats and doctor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends the Tier-B graph-coverage honesty from per-query (refs/impact `coverage`) to a repo-wide, upfront signal. - `stats`: each tree-sitter language now carries `graph: full|partial` (`full` = Tier-A spec with import/inheritance edges; `partial` = Tier-B, symbols only). Human output appends "· partial graph (Tier-B)". - `doctor`: new informational `graph_coverage` finding listing Tier-B languages present in the index, so refs/impact undercounting is visible during diagnostics rather than only when an answer comes back empty. - Add `languages.has_full_graph(lang)` as the single source of truth for the Tier-A/Tier-B distinction, shared by stats and doctor. - Document the field in SKILL.md (all copies); regenerate the stats golden. - Regression tests: Tier-B (Lua) index flags partial; Tier-A-only is full. Co-Authored-By: Claude Opus 4.8 --- .claude/skills/codebase-index/SKILL.md | 6 +++++- .codex/skills/codebase-index/SKILL.md | 6 +++++- .opencode/skills/codebase-index/SKILL.md | 6 +++++- CHANGELOG.md | 4 ++++ skill/SKILL.md | 6 +++++- skills/codebase-index/SKILL.md | 6 +++++- src/codebase_index/cli.py | 13 ++++++++++-- src/codebase_index/doctor.py | 19 +++++++++++++++++ src/codebase_index/parsers/languages.py | 9 ++++++++ src/codebase_index/skill_template/SKILL.md | 6 +++++- tests/golden/stats.json | 2 ++ tests/test_doctor.py | 24 ++++++++++++++++++++++ 12 files changed, 99 insertions(+), 8 deletions(-) diff --git a/.claude/skills/codebase-index/SKILL.md b/.claude/skills/codebase-index/SKILL.md index 561cac3..2105974 100644 --- a/.claude/skills/codebase-index/SKILL.md +++ b/.claude/skills/codebase-index/SKILL.md @@ -129,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/.codex/skills/codebase-index/SKILL.md b/.codex/skills/codebase-index/SKILL.md index 561cac3..2105974 100644 --- a/.codex/skills/codebase-index/SKILL.md +++ b/.codex/skills/codebase-index/SKILL.md @@ -129,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/.opencode/skills/codebase-index/SKILL.md b/.opencode/skills/codebase-index/SKILL.md index 561cac3..2105974 100644 --- a/.opencode/skills/codebase-index/SKILL.md +++ b/.opencode/skills/codebase-index/SKILL.md @@ -129,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/CHANGELOG.md b/CHANGELOG.md index 39b425c..cb9e12c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ All notable changes to this project are documented here. The format is based on text never embedded under the active model — unchanged content reuses its cached vector for free. ### Added +- **Repo-wide graph tier in diagnostics**: `stats` now tags each tree-sitter language with + `graph: full|partial`, and `doctor` adds a `graph_coverage` finding listing Tier-B languages + present in the index. Surfaces upfront which languages have partial `refs`/`impact` (symbols but + no import/inheritance edges) instead of only signaling per-query. - **Graph coverage signal**: `refs` and `impact` now report a `coverage` block (`partial`, `languages`, `reason`). Import/inheritance edges are only extracted for the hand-tuned (Tier-A) languages, so a symbol or file in a Tier-B language diff --git a/skill/SKILL.md b/skill/SKILL.md index 561cac3..2105974 100644 --- a/skill/SKILL.md +++ b/skill/SKILL.md @@ -129,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/skills/codebase-index/SKILL.md b/skills/codebase-index/SKILL.md index 561cac3..2105974 100644 --- a/skills/codebase-index/SKILL.md +++ b/skills/codebase-index/SKILL.md @@ -129,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/src/codebase_index/cli.py b/src/codebase_index/cli.py index 57867b7..4126fcf 100644 --- a/src/codebase_index/cli.py +++ b/src/codebase_index/cli.py @@ -569,6 +569,7 @@ def stats( import json as _json from .config import load + from .parsers.languages import has_full_graph from .storage import repo from .storage.db import Database @@ -591,7 +592,14 @@ def stats( built_at = repo.get_meta(db.conn, "built_at") head = repo.get_meta(db.conn, "head_commit") coverage = [ - {"lang": r["lang"], "files": r["files"], "symbols": r["symbols"]} + { + "lang": r["lang"], + "files": r["files"], + "symbols": r["symbols"], + # Tier-A languages get import/inheritance edges; Tier-B is symbols-only, + # so refs/impact are partial for them. + "graph": "full" if has_full_graph(r["lang"]) else "partial", + } for r in repo.treesitter_coverage(db.conn) ] @@ -612,7 +620,8 @@ def stats( typer.echo(f"files={files} symbols={symbols} built_at={built_at} head={head}") for r in coverage: flag = " ⚠ 0 symbols" if (r["symbols"] or 0) == 0 and r["files"] >= 3 else "" - typer.echo(f" {r['lang']}: {r['files']} files, {r['symbols']} symbols{flag}") + tier = " · partial graph (Tier-B)" if r["graph"] == "partial" else "" + typer.echo(f" {r['lang']}: {r['files']} files, {r['symbols']} symbols{flag}{tier}") @app.command() diff --git a/src/codebase_index/doctor.py b/src/codebase_index/doctor.py index 6eac676..0770cae 100644 --- a/src/codebase_index/doctor.py +++ b/src/codebase_index/doctor.py @@ -107,6 +107,25 @@ def run_doctor(root: Path, config: Config) -> list[Finding]: ) ) + # 5. Dependency-graph coverage: Tier-B languages (grammar but no hand-tuned spec) + # yield symbols but no import/inheritance edges, so refs/impact undercount. + from .parsers.languages import has_full_graph + + tier_b = sorted({r["lang"] for r in coverage if not has_full_graph(r["lang"])}) + findings.append( + Finding( + id="graph_coverage", + ok=True, + severity="info", + detail=( + "all indexed languages have full dependency-graph support" + if not tier_b + else f"partial dependency graph for Tier-B language(s): {', '.join(tier_b)} " + "— refs/impact may undercount (confirm with Grep)" + ), + ) + ) + return findings diff --git a/src/codebase_index/parsers/languages.py b/src/codebase_index/parsers/languages.py index 90cd325..a772fad 100644 --- a/src/codebase_index/parsers/languages.py +++ b/src/codebase_index/parsers/languages.py @@ -279,3 +279,12 @@ def is_supported(lang: Optional[str]) -> bool: def spec_for(lang: Optional[str]) -> Optional[LangSpec]: return LANGS.get(lang) if lang else None + + +def has_full_graph(lang: Optional[str]) -> bool: + """True if `lang` has a Tier-A spec (full import/inheritance edges for refs/impact). + + Tier-B languages (a loadable grammar but no hand-tuned spec) yield symbols and + best-effort call sites only, so their dependency graph is partial. + """ + return spec_for(lang) is not None diff --git a/src/codebase_index/skill_template/SKILL.md b/src/codebase_index/skill_template/SKILL.md index 561cac3..2105974 100644 --- a/src/codebase_index/skill_template/SKILL.md +++ b/src/codebase_index/skill_template/SKILL.md @@ -129,10 +129,14 @@ Fall back to built-in search **only** when: results are empty, `confidence` is ` 0. If confidence is consistently low across queries, run diagnostics first: ```bash - codebase-index stats --json # check coverage and symbol counts per language + codebase-index stats --json # per-language file/symbol counts + graph tier codebase-index doctor # surface config or security issues ``` Low symbol counts for a language may mean the index needs a full rebuild: `codebase-index index`. + In `stats`, each language carries `graph: full|partial` (and `doctor` reports a + `graph_coverage` finding): `partial` (Tier-B) means `refs`/`impact` lack + import/inheritance edges for that language — treat empty results there as + inconclusive. 1. Use `fallback_suggestions.ripgrep` patterns from the response via Grep. 2. If still nothing, Glob for likely paths, then Grep within them. diff --git a/tests/golden/stats.json b/tests/golden/stats.json index 032e118..bf445b5 100644 --- a/tests/golden/stats.json +++ b/tests/golden/stats.json @@ -7,11 +7,13 @@ "treesitter_coverage": [ { "files": 3, + "graph": "full", "lang": "python", "symbols": 6 }, { "files": 2, + "graph": "full", "lang": "typescript", "symbols": 1 } diff --git a/tests/test_doctor.py b/tests/test_doctor.py index 373616e..943c96a 100644 --- a/tests/test_doctor.py +++ b/tests/test_doctor.py @@ -43,6 +43,30 @@ def test_doctor_cli_json(tmp_path): assert any(f["id"] == "cache_gitignored" for f in data["findings"]) +def test_doctor_flags_tier_b_partial_graph(tmp_path): + """A Tier-B language (Lua) in the index must surface a partial-graph info finding.""" + (tmp_path / "mod.lua").write_text("local function greet()\n return 1\nend\n", encoding="utf-8") + assert runner.invoke(app, ["--root", str(tmp_path), "index"]).exit_code == 0 + + cfg = Config() + cfg.root = str(tmp_path) + findings = {f.id: f for f in run_doctor(tmp_path, cfg)} + gc = findings["graph_coverage"] + assert gc.ok is True and gc.severity == "info" + assert "lua" in gc.detail + + +def test_doctor_full_graph_when_only_tier_a(tmp_path): + (tmp_path / "mod.py").write_text("def f():\n return 1\n", encoding="utf-8") + assert runner.invoke(app, ["--root", str(tmp_path), "index"]).exit_code == 0 + + cfg = Config() + cfg.root = str(tmp_path) + findings = {f.id: f for f in run_doctor(tmp_path, cfg)} + assert "lua" not in findings["graph_coverage"].detail + assert "full dependency-graph support" in findings["graph_coverage"].detail + + def test_doctor_strict_exits_nonzero_on_high_severity(tmp_path): # uncovered cache is a high-severity finding → --strict must fail res = runner.invoke(app, ["--root", str(tmp_path), "doctor", "--strict"]) From 19df9d45b0f2d2d0d10181ba7e471434242f5fb6 Mon Sep 17 00:00:00 2001 From: denfry Date: Mon, 8 Jun 2026 07:23:54 +0300 Subject: [PATCH 6/8] feat(embeddings): content-addressed vec_cache to skip re-embedding unchanged chunks Chunk ids churn on every full rebuild (replace_chunks), so a chunk-id-keyed skip alone re-embeds the entire repo each time. The embedding pass now hashes each chunk's content (sha256) and consults a `vec_cache` table keyed by (model, content_sha): only text never embedded under the active model hits the (potentially slow or paid) backend; unchanged content reuses its cached vector. - New `vec_cache` table + repo helpers (cached_embeddings, store_cached_embeddings, upsert_chunk_vector_blob); orphan vectors pruned in a single batched executemany. - `_embed_chunks` reports cache misses (vectors actually computed) as its count. - Schema/pipeline docs updated for the cache table and reuse flow. Co-Authored-By: Claude Opus 4.8 --- docs/DATABASE_SCHEMA.md | 33 +++++++++-- docs/RETRIEVAL_PIPELINE.md | 4 ++ docs/SCHEMA.md | 20 ++++++- src/codebase_index/indexer/pipeline.py | 28 ++++++++-- src/codebase_index/storage/repo.py | 58 ++++++++++++++++++-- tests/test_pipeline_vectors.py | 76 ++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 16 deletions(-) diff --git a/docs/DATABASE_SCHEMA.md b/docs/DATABASE_SCHEMA.md index c2e3cd9..e0d0d7a 100644 --- a/docs/DATABASE_SCHEMA.md +++ b/docs/DATABASE_SCHEMA.md @@ -85,16 +85,39 @@ FTS5 virtual table for full-text search (auto-managed by triggers). | `text` | TEXT | Chunk text (indexed by FTS5) | | `chunk_id` | INTEGER | References chunks(id) | -### embeddings (optional) +### vec_chunks (optional) -Stores vector embeddings for semantic search. +Vector embeddings for semantic search. Created **only** when `embeddings.enabled = true`, via the +`sqlite-vec` extension (a `vec0` virtual table). + +| Column | Type | Description | +|---|---|---| +| `chunk_id` | INTEGER PRIMARY KEY | References chunks(id) | +| `embedding` | FLOAT[dim] | Embedding vector; `dim` is fixed per build by the configured model | + +### vec_meta (optional) + +Records which embedding model/dimension produced the vectors currently in `vec_chunks`. | Column | Type | Description | |---|---|---| -| `chunk_id` | INTEGER PRIMARY KEY REFERENCES chunks(id) | Associated chunk | -| `vector` | BLOB | Serialized embedding vector | | `model` | TEXT | Embedding model identifier | -| `created_at` | TEXT | Creation timestamp | +| `dim` | INTEGER | Vector dimension | +| `built_at` | TEXT | ISO 8601 timestamp of the embedding pass | + +### vec_cache (optional) + +Content-addressed embedding cache. `chunk_id`s churn on every full rebuild (chunks are deleted and +re-inserted), so this cache is keyed by `(model, content_sha)` instead — letting unchanged content +reuse its vector for free across rebuilds, so only new or changed text hits the backend. + +| Column | Type | Description | +|---|---|---| +| `model` | TEXT NOT NULL | Embedding model identifier | +| `content_sha` | TEXT NOT NULL | SHA-256 of the chunk content | +| `embedding` | BLOB NOT NULL | Pre-serialized float32 vector | + +Primary key: `(model, content_sha)`. ### summaries diff --git a/docs/RETRIEVAL_PIPELINE.md b/docs/RETRIEVAL_PIPELINE.md index 86b2014..4e2901d 100644 --- a/docs/RETRIEVAL_PIPELINE.md +++ b/docs/RETRIEVAL_PIPELINE.md @@ -74,6 +74,10 @@ Ranked retrieval packet with confidence score **Score:** Cosine similarity (0.0 to 1.0). +> **Indexing note:** chunk embeddings are reused across rebuilds via a content-addressed +> `vec_cache` (keyed by model + content SHA-256), so only new or changed chunks are re-embedded. +> See [DATABASE_SCHEMA.md](DATABASE_SCHEMA.md) and [SCHEMA.md](SCHEMA.md) for details. + ## 5. Graph Expansion **Trigger:** After initial results are found. diff --git a/docs/SCHEMA.md b/docs/SCHEMA.md index 15b31e9..f5c21bd 100644 --- a/docs/SCHEMA.md +++ b/docs/SCHEMA.md @@ -129,9 +129,27 @@ CREATE VIRTUAL TABLE vec_chunks USING vec0( ); -- A side table records which embedding model/dim produced these vectors: CREATE TABLE vec_meta (model TEXT, dim INTEGER, built_at TEXT); +-- Content-addressed embedding cache, keyed by (model, content SHA-256): +CREATE TABLE vec_cache ( + model TEXT NOT NULL, + content_sha TEXT NOT NULL, + embedding BLOB NOT NULL, -- pre-serialized float32 vector + PRIMARY KEY (model, content_sha) +); ``` -If embeddings are disabled, `vec_chunks` does not exist and the vector searcher is skipped. +If embeddings are disabled, none of `vec_chunks`, `vec_meta`, or `vec_cache` exist and the vector +searcher is skipped. + +### Embedding reuse via `vec_cache` + +`chunk_id`s churn on every full rebuild because `replace_chunks` deletes and re-inserts rows, so a +`chunk_id`-keyed store alone would re-embed the entire repository each time. The embedding pass +therefore hashes each chunk's content (SHA-256) and looks it up in `vec_cache` under the active +model name. Only content never embedded under that model is sent to the (potentially slow or paid) +backend; everything else is copied straight from the cache into `vec_chunks`. Newly computed vectors +are written back to `vec_cache` so subsequent rebuilds reuse them. The reported "embedded" count +reflects cache **misses** — i.e. the work actually performed. ## Migrations diff --git a/src/codebase_index/indexer/pipeline.py b/src/codebase_index/indexer/pipeline.py index 48c63a3..b4a8e80 100644 --- a/src/codebase_index/indexer/pipeline.py +++ b/src/codebase_index/indexer/pipeline.py @@ -201,6 +201,8 @@ def _embed_chunks(cfg, db, conn) -> int: backend = resolve_backend(cfg, warn=lambda m: print(m)) if not getattr(backend, "enabled", False): return 0 + import sqlite_vec # type: ignore[import-untyped] + db.enable_vectors() repo.ensure_vec_tables(conn, dim=backend.dim) repo.prune_orphan_vectors(conn) @@ -208,13 +210,29 @@ def _embed_chunks(cfg, db, conn) -> int: rows = [r for r in repo.chunks_for_embedding(conn) if int(r["id"]) not in existing] if not rows: return 0 - texts = [r["content"] for r in rows] - vectors = backend.embed(texts) - for row, vec in zip(rows, vectors): - repo.upsert_chunk_vector(conn, int(row["id"]), vec) + + # Content-addressed reuse: chunk ids churn on every full rebuild (replace_chunks), + # so a chunk-id keyed skip alone re-embeds the whole repo each time. Hash the content + # and only call the (potentially slow / paid) backend for text never embedded under + # this model; everything else is copied straight from the cache. + shas = [hashlib.sha256(r["content"].encode("utf-8")).hexdigest() for r in rows] + cached = repo.cached_embeddings(conn, model=backend.name, shas=shas) + misses = [(r, sha) for r, sha in zip(rows, shas) if sha not in cached] + + fresh: dict[str, bytes] = {} + if misses: + vectors = backend.embed([r["content"] for r, _ in misses]) + for (_row, sha), vec in zip(misses, vectors): + fresh[sha] = sqlite_vec.serialize_float32(vec) + repo.store_cached_embeddings(conn, model=backend.name, items=list(fresh.items())) + + for row, sha in zip(rows, shas): + blob = cached.get(sha) or fresh[sha] + repo.upsert_chunk_vector_blob(conn, int(row["id"]), blob) + built_at = datetime.now(timezone.utc).isoformat() repo.set_vec_meta(conn, model=backend.name, dim=backend.dim, built_at=built_at) - return len(rows) + return len(misses) def _sha256_file(path: Path) -> str: diff --git a/src/codebase_index/storage/repo.py b/src/codebase_index/storage/repo.py index d87c995..338a12b 100644 --- a/src/codebase_index/storage/repo.py +++ b/src/codebase_index/storage/repo.py @@ -438,13 +438,22 @@ def count_resolved_edges(conn: sqlite3.Connection) -> int: def ensure_vec_tables(conn: sqlite3.Connection, *, dim: int) -> None: - """Create vec_chunks (sqlite-vec) + vec_meta if absent. dim is fixed per build.""" + """Create vec_chunks (sqlite-vec) + vec_meta + vec_cache if absent. dim is fixed per build.""" dim = int(dim) conn.execute( f"CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(" f"chunk_id INTEGER PRIMARY KEY, embedding FLOAT[{dim}])" ) conn.execute("CREATE TABLE IF NOT EXISTS vec_meta (model TEXT, dim INTEGER, built_at TEXT)") + # Content-addressed embedding cache: chunk ids churn on every full rebuild + # (replace_chunks deletes + re-inserts), so a chunk-id keyed store alone would + # re-embed the whole repo each time. Keyed by (model, content_sha) the cache + # survives that churn and lets unchanged content reuse its vector for free. + conn.execute( + "CREATE TABLE IF NOT EXISTS vec_cache (" + "model TEXT NOT NULL, content_sha TEXT NOT NULL, embedding BLOB NOT NULL, " + "PRIMARY KEY (model, content_sha))" + ) def set_vec_meta(conn: sqlite3.Connection, *, model: str, dim: int, built_at: str) -> None: @@ -467,10 +476,49 @@ def upsert_chunk_vector( ) -> None: import sqlite_vec # type: ignore[import-untyped] + upsert_chunk_vector_blob(conn, chunk_id, sqlite_vec.serialize_float32(embedding)) + + +def upsert_chunk_vector_blob(conn: sqlite3.Connection, chunk_id: int, blob: bytes) -> None: + """Write a pre-serialized float32 embedding blob for a chunk (cache-reuse path).""" conn.execute("DELETE FROM vec_chunks WHERE chunk_id = ?", (int(chunk_id),)) conn.execute( "INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, ?)", - (int(chunk_id), sqlite_vec.serialize_float32(embedding)), + (int(chunk_id), blob), + ) + + +def cached_embeddings( + conn: sqlite3.Connection, *, model: str, shas: Iterable[str] +) -> dict[str, bytes]: + """Return {content_sha: serialized embedding blob} already cached for this model.""" + shas = list(dict.fromkeys(shas)) + if not shas: + return {} + out: dict[str, bytes] = {} + # Chunk the IN list to stay well under SQLite's variable limit on huge repos. + for start in range(0, len(shas), 500): + batch = shas[start : start + 500] + placeholders = ",".join("?" * len(batch)) + rows = conn.execute( + f"SELECT content_sha, embedding FROM vec_cache " + f"WHERE model = ? AND content_sha IN ({placeholders})", + (model, *batch), + ).fetchall() + for r in rows: + out[r[0]] = r[1] + return out + + +def store_cached_embeddings( + conn: sqlite3.Connection, *, model: str, items: Sequence[tuple[str, bytes]] +) -> None: + """Insert (content_sha, blob) pairs into the content-addressed embedding cache.""" + if not items: + return + conn.executemany( + "INSERT OR REPLACE INTO vec_cache (model, content_sha, embedding) VALUES (?, ?, ?)", + [(model, sha, blob) for sha, blob in items], ) @@ -496,12 +544,12 @@ def prune_orphan_vectors(conn: sqlite3.Connection) -> int: try: current_ids = {r[0] for r in conn.execute("SELECT id FROM chunks").fetchall()} orphan_ids = [ - r[0] + (r[0],) for r in conn.execute("SELECT chunk_id FROM vec_chunks").fetchall() if r[0] not in current_ids ] - for oid in orphan_ids: - conn.execute("DELETE FROM vec_chunks WHERE chunk_id = ?", (oid,)) + if orphan_ids: + conn.executemany("DELETE FROM vec_chunks WHERE chunk_id = ?", orphan_ids) return len(orphan_ids) except Exception: return 0 diff --git a/tests/test_pipeline_vectors.py b/tests/test_pipeline_vectors.py index 115a1b2..4cd805d 100644 --- a/tests/test_pipeline_vectors.py +++ b/tests/test_pipeline_vectors.py @@ -56,3 +56,79 @@ def test_reindex_vectors_idempotent(sample_repo, tmp_path, fake_backend, monkeyp assert s2.vectors == 0 assert repo.count_vectors(db.conn) == s1.vectors db.close() + + +class _CountingBackend: + """Wraps an embedding backend to record how many texts it is asked to embed.""" + + enabled = True + name = "fake" + + def __init__(self, inner): + self._inner = inner + self.dim = inner.dim + self.calls = 0 + self.embedded = 0 + + def embed(self, texts): + self.calls += 1 + self.embedded += len(texts) + return self._inner.embed(texts) + + +def test_reindex_does_not_recompute_unchanged_embeddings( + sample_repo, tmp_path, fake_backend, monkeypatch +): + """A full rebuild must reuse cached vectors for unchanged content, never re-embed it.""" + import codebase_index.indexer.pipeline as pipe + + backend = _CountingBackend(fake_backend) + monkeypatch.setattr(pipe, "resolve_backend", lambda cfg, warn=None: backend) + cfg = Config() + cfg.root = str(sample_repo) + cfg.embeddings.enabled = True + db = Database(tmp_path / "index.sqlite").open() + + build_index(cfg, db, root=sample_repo) + first_pass = backend.embedded + assert first_pass > 0 + + build_index(cfg, db, root=sample_repo) + # Chunk ids churn across rebuilds, but content is identical -> cache hit, no backend work. + assert backend.embedded == first_pass + db.close() + + +def test_changed_file_only_embeds_new_content( + sample_repo, tmp_path, fake_backend, monkeypatch +): + """Editing one file embeds only its new chunks; the rest come from the cache.""" + import shutil + + import codebase_index.indexer.pipeline as pipe + + # Copy the fixture so the edit below never mutates the shared, committed sample repo. + repo_copy = tmp_path / "repo" + shutil.copytree(sample_repo, repo_copy) + + backend = _CountingBackend(fake_backend) + monkeypatch.setattr(pipe, "resolve_backend", lambda cfg, warn=None: backend) + cfg = Config() + cfg.root = str(repo_copy) + cfg.embeddings.enabled = True + db = Database(tmp_path / "index.sqlite").open() + + build_index(cfg, db, root=repo_copy) + baseline = backend.embedded + + target = repo_copy / "src" / "auth" / "token.py" + target.write_text( + target.read_text(encoding="utf-8") + "\n\ndef brand_new_helper():\n return 42\n", + encoding="utf-8", + ) + s2 = build_index(cfg, db, root=repo_copy) + + # Some new chunks were embedded, but far fewer than a full re-embed of the repo. + assert s2.vectors > 0 + assert backend.embedded - baseline < baseline + db.close() From e92afe40dd6f6f1246c8cbc398a9d816a8816ca6 Mon Sep 17 00:00:00 2001 From: denfry Date: Mon, 8 Jun 2026 07:24:10 +0300 Subject: [PATCH 7/8] chore(release): stamp installed skill copies to 1.2.2 The package is at 1.2.2 (pyproject/__init__), but the installed skill `.skill_version` stamps still read 1.2.1. Sync them so the auto-update check doesn't see phantom drift. Co-Authored-By: Claude Opus 4.8 --- .claude/skills/codebase-index/.skill_version | 2 +- .codex/skills/codebase-index/.skill_version | 2 +- .opencode/skills/codebase-index/.skill_version | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.claude/skills/codebase-index/.skill_version b/.claude/skills/codebase-index/.skill_version index cb174d5..23aa839 100644 --- a/.claude/skills/codebase-index/.skill_version +++ b/.claude/skills/codebase-index/.skill_version @@ -1 +1 @@ -1.2.1 \ No newline at end of file +1.2.2 diff --git a/.codex/skills/codebase-index/.skill_version b/.codex/skills/codebase-index/.skill_version index cb174d5..23aa839 100644 --- a/.codex/skills/codebase-index/.skill_version +++ b/.codex/skills/codebase-index/.skill_version @@ -1 +1 @@ -1.2.1 \ No newline at end of file +1.2.2 diff --git a/.opencode/skills/codebase-index/.skill_version b/.opencode/skills/codebase-index/.skill_version index cb174d5..23aa839 100644 --- a/.opencode/skills/codebase-index/.skill_version +++ b/.opencode/skills/codebase-index/.skill_version @@ -1 +1 @@ -1.2.1 \ No newline at end of file +1.2.2 From e1c0850bc27213d20ec0207475aa1380f73eb7c4 Mon Sep 17 00:00:00 2001 From: denfry Date: Mon, 8 Jun 2026 07:28:12 +0300 Subject: [PATCH 8/8] fix(types): assert _PARSE_CONFIG is set before use in worker parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_parse_one` runs in ProcessPoolExecutor workers and reads the module global `_PARSE_CONFIG` (typed `Optional[Config]`, set by the pool initializer `_pool_init`). Passing it straight to `_parse`, which expects `Config`, tripped mypy (`Config | None` vs `Config`). The global is always set before any worker parses, so assert that invariant — documents the contract and satisfies the type checker. Restores a clean `mypy src/codebase_index`. Co-Authored-By: Claude Opus 4.8 --- src/codebase_index/indexer/pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/codebase_index/indexer/pipeline.py b/src/codebase_index/indexer/pipeline.py index b4a8e80..6f8237d 100644 --- a/src/codebase_index/indexer/pipeline.py +++ b/src/codebase_index/indexer/pipeline.py @@ -85,6 +85,7 @@ def _pool_init(config: Config) -> None: def _parse_one(cand) -> _ParseResult: """Parse a single file. Top-level for ProcessPoolExecutor pickling; uses _PARSE_CONFIG.""" config = _PARSE_CONFIG + assert config is not None, "_pool_init must set _PARSE_CONFIG before any worker parses" try: sha256 = _sha256_file(cand.path) except OSError: