diff --git a/.codebase-context/memory.json b/.codebase-context/memory.json
index ea57645..8d2a55e 100644
--- a/.codebase-context/memory.json
+++ b/.codebase-context/memory.json
@@ -129,5 +129,122 @@
     "memory": "Never commit .planning/** or use gsd-tools commit; always use plain git commits with explicit messages",
     "reason": "We accidentally committed ignored .planning files and created pushed placeholder commits (e.g., --help). This is explicitly disallowed in this repo.",
     "date": "2026-02-20T19:08:22.195Z"
+  },
+  {
+    "id": "d4d3b072ea53",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(watcher-tests): await ready + harden Windows cleanup (#55)",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-03-01T15:52:27.000Z",
+    "source": "git"
+  },
+  {
+    "id": "8821f0a1affe",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(watcher): allow debounce 0 and harden test",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-28T17:16:59.000Z",
+    "source": "git"
+  },
+  {
+    "id": "c06d7e79009f",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(watcher): queue refresh during indexing",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-28T17:12:35.000Z",
+    "source": "git"
+  },
+  {
+    "id": "73638343a916",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(refs): prevent out-of-root file reads from index",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-28T15:58:03.000Z",
+    "source": "git"
+  },
+  {
+    "id": "8a0f5410d2e2",
+    "type": "decision",
+    "category": "architecture",
+    "memory": "refactor: eliminate all any types and consolidate type definitions (#46)",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-22T19:45:41.000Z",
+    "source": "git"
+  },
+  {
+    "id": "6a5bf4f56124",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix: close v1.8 post-merge integration gaps (#44)",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-22T17:58:51.000Z",
+    "source": "git"
+  },
+  {
+    "id": "8e014f2b09cd",
+    "type": "decision",
+    "category": "architecture",
+    "memory": "refactor: clean up formatting and improve readability in multiple files",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-21T12:50:44.000Z",
+    "source": "git"
+  },
+  {
+    "id": "3125c037fc40",
+    "type": "decision",
+    "category": "architecture",
+    "memory": "refactor: extract 11 MCP tool handlers into src/tools/ (#37)",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-20T22:21:55.000Z",
+    "source": "git"
+  },
+  {
+    "id": "6ae00519485a",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(03-02): add regression guardrails for extraction and large-file safety",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-20T18:35:47.000Z",
+    "source": "git"
+  },
+  {
+    "id": "0080c6e64d64",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(03-02): harden tree-sitter extraction against byte-offset and parser failures",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-20T18:33:19.000Z",
+    "source": "git"
+  },
+  {
+    "id": "92493e34e3e1",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(02-tree-sitter-02): prevent symbol-aware chunk merging",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-20T14:41:29.000Z",
+    "source": "git"
+  },
+  {
+    "id": "32c95757f1b3",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix(02-01): fall back when tree-sitter parse has errors",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-20T14:38:35.000Z",
+    "source": "git"
+  },
+  {
+    "id": "a597568f48c2",
+    "type": "gotcha",
+    "category": "conventions",
+    "memory": "fix: guard null chunk.content crash + docs rewrite for v1.6.1",
+    "reason": "Auto-extracted from git commit history",
+    "date": "2026-02-15T13:04:10.000Z",
+    "source": "git"
   }
 ]
\ No newline at end of file
diff --git a/.npmignore b/.npmignore
new file mode 100644
index 0000000..ea17c86
--- /dev/null
+++ b/.npmignore
@@ -0,0 +1,2 @@
+docs/TODO.md
+docs/visuals.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0eba259..d787849 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,43 +2,41 @@
 
 ## [1.7.0](https://github.com/PatrickSys/codebase-context/compare/v1.6.1...v1.7.0) (2026-02-21)
 
-
 ### Features
 
-* **02-03:** implement keyword-index symbol reference lookup ([ccfc564](https://github.com/PatrickSys/codebase-context/commit/ccfc5649a3f4e321bbd3770e5945f83213e103a6))
-* **02-03:** register get_symbol_references MCP tool ([6f6bc3a](https://github.com/PatrickSys/codebase-context/commit/6f6bc3ae3bfa9af13c404028c1307d774b69291c))
-* **03-01:** add frozen controlled eval fixture and local codebase ([46736ed](https://github.com/PatrickSys/codebase-context/commit/46736ed4c4681767164682a774e1ddf08ee81768))
-* **03-03:** add multi-codebase eval runner command ([b065042](https://github.com/PatrickSys/codebase-context/commit/b065042f9a689d82485532872009af571d22db44))
-* **03-03:** centralize eval harness scoring logic ([5c5319b](https://github.com/PatrickSys/codebase-context/commit/5c5319b4a3c9caf30f7b31de3ee210bc153ee58c))
-* **04-01:** add curated grammar manifest, sync script, and publish inclusion ([908f39a](https://github.com/PatrickSys/codebase-context/commit/908f39a2c82a9630150262299ec8ae1f25c269ab))
-* **04-01:** update tree-sitter loader to resolve packaged grammars and fail closed ([458520f](https://github.com/PatrickSys/codebase-context/commit/458520ff3d24bd9ff6399b6bedfe1b6776fc6579))
-* **04-02:** add manifest-driven grammar CI test with fail-closed fallback ([2559405](https://github.com/PatrickSys/codebase-context/commit/2559405007e17bad6fffcf6ea61b97475f0da1e6))
-* **05-01:** create AST-aligned chunking engine with symbol tree builder ([f865abc](https://github.com/PatrickSys/codebase-context/commit/f865abc0a3877441b492695c02ddca12fe9b36c6))
-* **05-01:** wire AST-aligned chunker into GenericAnalyzer with 21 unit tests ([68a2d6d](https://github.com/PatrickSys/codebase-context/commit/68a2d6da844a9ffdb6104670c565f338487d2199))
-* **05-02:** add scope-aware prefix generation to AST chunks ([3dbd43e](https://github.com/PatrickSys/codebase-context/commit/3dbd43eec1d6cdf63ec4d5094c870bf2ee6b164d))
-* **06-01:** add index format metadata and headers ([a216c6d](https://github.com/PatrickSys/codebase-context/commit/a216c6dd2c7614b705525bc30ba8fddf918c7cf3))
-* **06-01:** gate index consumers on IndexMeta validation ([6a52c0d](https://github.com/PatrickSys/codebase-context/commit/6a52c0d33d408a7463e036eac8a650c461c86a43))
-* **06-02:** implement staging directory build and atomic swap for full rebuild ([d719801](https://github.com/PatrickSys/codebase-context/commit/d71980128795bdf8e7c7ab16beb350729a85e306))
-* **AST indexing:** Implement relationship index  ([#38](https://github.com/PatrickSys/codebase-context/issues/38)) ([5b05092](https://github.com/PatrickSys/codebase-context/commit/5b05092b4d5a4a08b117fdc06a3292afdcc8764e))
-* expose all 10 MCP tools via CLI + document them ([#42](https://github.com/PatrickSys/codebase-context/issues/42)) ([7581fba](https://github.com/PatrickSys/codebase-context/commit/7581fbac5b4fd5bc52abc56d946bf55962870566))
-* references confidence, remove get_component_usage, ranked search hints ([#39](https://github.com/PatrickSys/codebase-context/issues/39)) ([33616aa](https://github.com/PatrickSys/codebase-context/commit/33616aa48b165d5cfd95c44bc416cb74c4fd5cbf))
-* rework decision-card to make it based on AST parsing ([#41](https://github.com/PatrickSys/codebase-context/issues/41)) ([ac4389d](https://github.com/PatrickSys/codebase-context/commit/ac4389d6cc55b7f8efc310a6e020bcd184a70adc))
-* symbol ranking, smart snippets, and edit decision card ([#40](https://github.com/PatrickSys/codebase-context/issues/40)) ([03964b3](https://github.com/PatrickSys/codebase-context/commit/03964b3f40cc0fa0caf9768747a39fb559daaa8e))
-* use tree-sitter symbols in generic analyzer ([b470709](https://github.com/PatrickSys/codebase-context/commit/b470709aa77f02325ed5a4e2b0710017020565da))
-
+- **02-03:** implement keyword-index symbol reference lookup ([ccfc564](https://github.com/PatrickSys/codebase-context/commit/ccfc5649a3f4e321bbd3770e5945f83213e103a6))
+- **02-03:** register get_symbol_references MCP tool ([6f6bc3a](https://github.com/PatrickSys/codebase-context/commit/6f6bc3ae3bfa9af13c404028c1307d774b69291c))
+- **03-01:** add frozen controlled eval fixture and local codebase ([46736ed](https://github.com/PatrickSys/codebase-context/commit/46736ed4c4681767164682a774e1ddf08ee81768))
+- **03-03:** add multi-codebase eval runner command ([b065042](https://github.com/PatrickSys/codebase-context/commit/b065042f9a689d82485532872009af571d22db44))
+- **03-03:** centralize eval harness scoring logic ([5c5319b](https://github.com/PatrickSys/codebase-context/commit/5c5319b4a3c9caf30f7b31de3ee210bc153ee58c))
+- **04-01:** add curated grammar manifest, sync script, and publish inclusion ([908f39a](https://github.com/PatrickSys/codebase-context/commit/908f39a2c82a9630150262299ec8ae1f25c269ab))
+- **04-01:** update tree-sitter loader to resolve packaged grammars and fail closed ([458520f](https://github.com/PatrickSys/codebase-context/commit/458520ff3d24bd9ff6399b6bedfe1b6776fc6579))
+- **04-02:** add manifest-driven grammar CI test with fail-closed fallback ([2559405](https://github.com/PatrickSys/codebase-context/commit/2559405007e17bad6fffcf6ea61b97475f0da1e6))
+- **05-01:** create AST-aligned chunking engine with symbol tree builder ([f865abc](https://github.com/PatrickSys/codebase-context/commit/f865abc0a3877441b492695c02ddca12fe9b36c6))
+- **05-01:** wire AST-aligned chunker into GenericAnalyzer with 21 unit tests ([68a2d6d](https://github.com/PatrickSys/codebase-context/commit/68a2d6da844a9ffdb6104670c565f338487d2199))
+- **05-02:** add scope-aware prefix generation to AST chunks ([3dbd43e](https://github.com/PatrickSys/codebase-context/commit/3dbd43eec1d6cdf63ec4d5094c870bf2ee6b164d))
+- **06-01:** add index format metadata and headers ([a216c6d](https://github.com/PatrickSys/codebase-context/commit/a216c6dd2c7614b705525bc30ba8fddf918c7cf3))
+- **06-01:** gate index consumers on IndexMeta validation ([6a52c0d](https://github.com/PatrickSys/codebase-context/commit/6a52c0d33d408a7463e036eac8a650c461c86a43))
+- **06-02:** implement staging directory build and atomic swap for full rebuild ([d719801](https://github.com/PatrickSys/codebase-context/commit/d71980128795bdf8e7c7ab16beb350729a85e306))
+- **AST indexing:** Implement relationship index ([#38](https://github.com/PatrickSys/codebase-context/issues/38)) ([5b05092](https://github.com/PatrickSys/codebase-context/commit/5b05092b4d5a4a08b117fdc06a3292afdcc8764e))
+- expose all 10 MCP tools via CLI + document them ([#42](https://github.com/PatrickSys/codebase-context/issues/42)) ([7581fba](https://github.com/PatrickSys/codebase-context/commit/7581fbac5b4fd5bc52abc56d946bf55962870566))
+- references confidence, remove get_component_usage, ranked search hints ([#39](https://github.com/PatrickSys/codebase-context/issues/39)) ([33616aa](https://github.com/PatrickSys/codebase-context/commit/33616aa48b165d5cfd95c44bc416cb74c4fd5cbf))
+- rework decision-card to make it based on AST parsing ([#41](https://github.com/PatrickSys/codebase-context/issues/41)) ([ac4389d](https://github.com/PatrickSys/codebase-context/commit/ac4389d6cc55b7f8efc310a6e020bcd184a70adc))
+- symbol ranking, smart snippets, and edit decision card ([#40](https://github.com/PatrickSys/codebase-context/issues/40)) ([03964b3](https://github.com/PatrickSys/codebase-context/commit/03964b3f40cc0fa0caf9768747a39fb559daaa8e))
+- use tree-sitter symbols in generic analyzer ([b470709](https://github.com/PatrickSys/codebase-context/commit/b470709aa77f02325ed5a4e2b0710017020565da))
 
 ### Bug Fixes
 
-* **02-01:** fall back when tree-sitter parse has errors ([8a7cd92](https://github.com/PatrickSys/codebase-context/commit/8a7cd92cab25b045b5108b1cba04773f644eab10))
-* **02-tree-sitter-02:** prevent symbol-aware chunk merging ([fd02625](https://github.com/PatrickSys/codebase-context/commit/fd0262516e262eff0c17646eaca021d6288c6647))
-* **03-02:** add regression guardrails for extraction and large-file safety ([a1c71de](https://github.com/PatrickSys/codebase-context/commit/a1c71de070b434f326dc80e627964c1540eea93f))
-* **03-02:** harden tree-sitter extraction against byte-offset and parser failures ([375a48f](https://github.com/PatrickSys/codebase-context/commit/375a48f231c85d72157aa74ea964db27bf9a983e))
+- **02-01:** fall back when tree-sitter parse has errors ([8a7cd92](https://github.com/PatrickSys/codebase-context/commit/8a7cd92cab25b045b5108b1cba04773f644eab10))
+- **02-tree-sitter-02:** prevent symbol-aware chunk merging ([fd02625](https://github.com/PatrickSys/codebase-context/commit/fd0262516e262eff0c17646eaca021d6288c6647))
+- **03-02:** add regression guardrails for extraction and large-file safety ([a1c71de](https://github.com/PatrickSys/codebase-context/commit/a1c71de070b434f326dc80e627964c1540eea93f))
+- **03-02:** harden tree-sitter extraction against byte-offset and parser failures ([375a48f](https://github.com/PatrickSys/codebase-context/commit/375a48f231c85d72157aa74ea964db27bf9a983e))
 
 ## [Unreleased]
 
 ### Added
 
-- **Definition-first ranking**: Exact-name searches now show the file that *defines* a symbol before files that use it. For example, searching `parseConfig` shows the function definition first, then callers.
+- **Definition-first ranking**: Exact-name searches now show the file that _defines_ a symbol before files that use it. For example, searching `parseConfig` shows the function definition first, then callers.
 
 ### Refactored
 
@@ -63,16 +61,26 @@
 - Shared eval scoring/reporting module (`src/eval/*`) used by both the CLI runner and the test suite.
 - Second frozen eval fixture plus an in-repo controlled TypeScript codebase for fully-offline eval runs.
 - Regression tests covering Tree-sitter Unicode slicing, parser cleanup/reset behavior, and large/generated file skipping.
+- **Tree-sitter symbol references** (PR #49): identifier scan excludes comment/string nodes; `confidence: "syntactic"` returned; `usageCount` reflects real AST occurrences, not regex matches.
+- **Import edge details** (PR #50): `importDetails` per edge (line number + imported symbols) persisted in `relationships.json`. Backward-compatible with existing `imports` field.
+- **2-hop transitive impact** (PR #50): `search --intent edit` impact now shows direct importers (hop 1) and their importers (hop 2), each labeled with distance. Capped at 20.
+- **Chokidar file watcher** (PR #52): index auto-refreshes in MCP server mode on file save (2 s debounce). No manual `reindex` needed during active editing sessions.
+- **CLI human formatters** (PR #48): all 9 commands now render as structured human-readable output. `--json` flag on every command for agent/pipe consumption.
+- **`status` + `reindex` formatters** (PR #56): status box with index health, progress, and last-built time. ASCII fallback via `CODEBASE_CONTEXT_ASCII=1`.
+- **`docs/cli.md` gallery** (PR #56): command reference with output previews for all 9 CLI commands.
 
 ### Changed
 
 - **Preflight response shape**: Renamed `reason` to `nextAction` for clarity. Removed internal fields (`evidenceLock`, `riskLevel`, `confidence`) so the output is stable and doesn't change shape unexpectedly.
- 
+
 ### Fixed
 
 - Null-pointer crash in GenericAnalyzer when chunk content is undefined.
 - Tree-sitter symbol extraction now treats node offsets as UTF-8 byte ranges and evicts cached parsers on failures/timeouts.
 - **Post-merge integration gaps** (v1.8 audit): Removed orphaned `get_component_usage` source file, deleted phantom allowlist entry, removed dead guidance strings referencing the deleted tool. Added fallback decision card when `intelligence.json` is absent during edit-intent searches, now returns `ready: false` with actionable guidance instead of silently skipping.
+- Watcher initialization race: `onReady` hook ensures tests wait for chokidar readiness before asserting (PR #55).
+- Windows temp dir cleanup hardened with retry/backoff to fix `ENOTEMPTY`/`EPERM` test flakes (PR #55).
+- `--json` output now always pure JSON on stdout; status lines go to stderr (PR #48).
 
 ## [1.6.2] - 2026-02-17
 
diff --git a/README.md b/README.md
index 8508591..3a34854 100644
--- a/README.md
+++ b/README.md
@@ -16,9 +16,11 @@ Here's what codebase-context does:
 
 **Remembers across sessions** - Decisions, failures, workarounds that look wrong but exist for a reason - the battle scars that aren't in the comments. Recorded once, surfaced automatically so the agent doesn't "clean up" something you spent a week getting right. Conventional git commits (`refactor:`, `migrate:`, `fix:`) auto-extract into memory with zero effort. Stale memories decay and get flagged instead of blindly trusted.
 
-**Checks before editing** - Before editing something, you get a decision card showing whether there's enough evidence to proceed. If a symbol has four callers and only two appear in your search results, the card shows that coverage gap. If coverage is low, `whatWouldHelp` lists the specific searches to run before you touch anything. When code, team memories, and patterns contradict each other, it tells you to look deeper instead of guessing.
+**Checks before editing** - Before editing something, you get a decision card showing whether there's enough evidence to proceed. If a symbol has four callers (files that import or reference it) and only two appear in your search results, the card shows that coverage gap. If coverage is low, `whatWouldHelp` lists the specific searches to run before you touch anything. When code, team memories, and patterns contradict each other, it tells you to look deeper instead of guessing.
 
-One tool call returns all of it. Local-first - your code never leaves your machine.
+One tool call returns all of it. Local-first - your code never leaves your machine by default. Opt into `EMBEDDING_PROVIDER=openai` for cloud speed, but then code is sent externally.
+
+The index auto-refreshes as you edit - a file watcher triggers incremental reindex in the background when the MCP server is running. No stale context between tool calls.
 
 <!-- TODO: Add demo GIF: search_codebase("How does this app attach the auth token to outgoing API calls?") -> AuthInterceptor top result + preflight + agent proceeds or asks -->
 <!-- ![Demo](./docs/assets/demo.gif) -->
@@ -302,13 +304,13 @@ Structured filters available: `framework`, `language`, `componentType`, `layer`
 
 ## Configuration
 
-| Variable                 | Default        | Description                                               |
-| ------------------------ | -------------- | --------------------------------------------------------- |
-| `EMBEDDING_PROVIDER`     | `transformers` | `openai` (fast, cloud) or `transformers` (local, private) |
-| `OPENAI_API_KEY`         | -              | Required only if using `openai` provider                  |
-| `CODEBASE_ROOT`          | -              | Project root (CLI arg takes precedence)                   |
-| `CODEBASE_CONTEXT_DEBUG` | -              | Set to `1` for verbose logging                            |
-| `EMBEDDING_MODEL`        | `Xenova/bge-small-en-v1.5` | Local embedding model override (e.g. `ibm-granite/granite-embedding-30m-english` for Granite) |
+| Variable                 | Default                    | Description                                                                                   |
+| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------- |
+| `EMBEDDING_PROVIDER`     | `transformers`             | `openai` (fast, cloud) or `transformers` (local, private)                                     |
+| `OPENAI_API_KEY`         | -                          | Required only if using `openai` provider                                                      |
+| `CODEBASE_ROOT`          | -                          | Project root (CLI arg takes precedence)                                                       |
+| `CODEBASE_CONTEXT_DEBUG` | -                          | Set to `1` for verbose logging                                                                |
+| `EMBEDDING_MODEL`        | `Xenova/bge-small-en-v1.5` | Local embedding model override (e.g. `onnx-community/granite-embedding-small-english-r2-ONNX` for Granite) |
 
 ## Performance
 
diff --git a/docs/capabilities.md b/docs/capabilities.md
index 58016a6..30ed9c3 100644
--- a/docs/capabilities.md
+++ b/docs/capabilities.md
@@ -42,7 +42,7 @@ npx codebase-context reindex --incremental
 | ----------------------- | ----------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `search_codebase`       | `query`, optional `intent`, `limit`, `filters`, `includeSnippets` | Ranked results (`file`, `summary`, `score`, `type`, `trend`, `patternWarning`, `relationships`, `hints`) + `searchQuality` + decision card (`ready`, `nextAction`, `patterns`, `bestExample`, `impact`, `whatWouldHelp`) when `intent="edit"`. Hints capped at 3 per category. |
 | `get_team_patterns`     | optional `category`                                               | Pattern frequencies, trends, golden files, conflicts                                                                                                                                 |
-| `get_symbol_references` | `symbol`, optional `limit`                                        | Concrete symbol usage evidence: `usageCount` + top usage snippets + `confidence` + `isComplete`. `confidence: "syntactic"` means static/source-based only (no runtime or dynamic dispatch). Replaces the removed `get_component_usage`. |
+| `get_symbol_references` | `symbol`, optional `limit`                                        | Concrete symbol usage evidence: `usageCount` + top usage snippets + `confidence` + `isComplete`. `confidence: "syntactic"` means static/source-based only (no runtime or dynamic dispatch). When Tree-sitter + file content are available, comments and string literals are excluded from the scan — the count reflects real identifier nodes only. Replaces the removed `get_component_usage`. |
 | `remember`              | `type`, `category`, `memory`, `reason`                            | Persists to `.codebase-context/memory.json`                                                                                                                                          |
 | `get_memory`            | optional `category`, `type`, `query`, `limit`                     | Memories with confidence decay scoring                                                                                                                                               |
 
@@ -76,7 +76,7 @@ Ordered by execution:
 
 - **Chunk size**: 50 lines, 0 overlap
 - **Reranker trigger**: activates when top-3 results are within 0.08 score of each other
-- **Embedding model**: `Xenova/bge-small-en-v1.5` (512 token context, fast, local-first) via `@xenova/transformers`. Override: `EMBEDDING_MODEL=ibm-granite/granite-embedding-30m-english` for Granite (8192 ctx, slower).
+- **Embedding model**: `Xenova/bge-small-en-v1.5` (512 token context, fast, local-first) via `@huggingface/transformers`. Override: `EMBEDDING_MODEL=onnx-community/granite-embedding-small-english-r2-ONNX` for Granite (8192 ctx, slower).
 - **Vector DB**: LanceDB with cosine distance
 
 ## Decision Card (Edit Intent)
@@ -104,6 +104,8 @@ Returned as `preflight` when search `intent` is `edit`, `refactor`, or `migrate`
 }
 ```
 
+Impact is 2-hop transitive: direct importers (hop 1) and their importers (hop 2), each labeled with distance. Capped at 20 files to avoid noise.
+
 **Fields explained:**
 
 - `ready`: boolean, whether evidence is sufficient to proceed
@@ -144,10 +146,11 @@ Returned as `preflight` when search `intent` is `edit`, `refactor`, or `migrate`
 
 - Initial: full scan → chunking (50 lines, 0 overlap) → embedding → vector DB (LanceDB) + keyword index (Fuse.js)
 - Incremental: SHA-256 manifest diffing, selective embed/delete, full intelligence regeneration
+- Auto-refresh (MCP server mode only): chokidar file watcher triggers incremental reindex after a debounce on any source file change — `node_modules/`, `.git/`, `dist/`, and `.codebase-context/` are excluded. One-shot CLI runs skip the watcher entirely.
 - Version gating: `index-meta.json` tracks format version; mismatches trigger automatic rebuild
 - Crash-safe rebuilds: full rebuilds write to `.staging/` and swap atomically only on success
 - Auto-heal: corrupted index triggers automatic full re-index on next search
-- Relationships sidecar: `relationships.json` contains file import graph and symbol export index
+- Relationships sidecar: `relationships.json` contains file import graph, symbol export index, and per-edge import details (`importDetails`: line number + imported symbol names where available)
 - Storage: `.codebase-context/` directory (memory.json + generated files)
 
 ## Analyzers
@@ -170,3 +173,11 @@ Reproducible evaluation is shipped as a CLI entrypoint backed by shared scoring/
   - `tests/fixtures/eval-angular-spotify.json` (real-world)
   - `tests/fixtures/eval-controlled.json` + `tests/fixtures/codebases/eval-controlled/` (offline controlled)
 - **Reported metrics:** Top-1 accuracy, Top-3 recall, spec contamination rate, and a gate pass/fail
+
+## Limitations
+
+- **Symbol refs are not a call-graph.** `get_symbol_references` counts identifier-node occurrences in the AST (comments/strings excluded via Tree-sitter). It does not distinguish call sites from type annotations, variable assignments, or imports. Full call-site-specific analysis (`call_expression` nodes only) is a roadmap item.
+- **Impact is 2-hop max.** `computeImpactCandidates` walks direct importers then their importers. Full BFS reachability is on the roadmap.
+- **Angular is the only framework with a rich dedicated analyzer.** All other languages go through the Generic analyzer (30+ languages, chunking + import graph, no framework-specific signal extraction).
+- **Default embedding model is `bge-small-en-v1.5` (512-token context).** Granite (8192 context) is opt-in via `EMBEDDING_MODEL`. OpenAI is opt-in via `EMBEDDING_PROVIDER=openai` — sends code externally.
+- **Patterns are file-level frequency counts.** Not semantic clustering. Rising/Declining trend is derived from git commit recency for files using each pattern, not from usage semantics.
diff --git a/docs/cli.md b/docs/cli.md
index ff7ce8c..4a0897c 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -153,7 +153,7 @@ Example output:
 │ State: ready                                                         │
 │ Root:  /path/to/repo                                                 │
 │                                                                      │
-│ → Use refresh_index to manually trigger re-indexing when needed.     │
+│ → Run `reindex` to re-index (`--incremental` skips unchanged).       │
 │                                                                      │
 └──────────────────────────────────────────────────────────────────────┘
 ```
@@ -165,6 +165,8 @@ npx -y codebase-context reindex
 npx -y codebase-context reindex --incremental --reason "changed watcher logic"
 ```
 
+> **MCP server mode**: if you're running codebase-context as an MCP server (long-running process), the index auto-refreshes via a file watcher — you don't need to call `reindex` between edits. Use `reindex` for one-shot CLI runs or to force a full rebuild.
+
 ## `style-guide`
 
 ```bash
diff --git a/package.json b/package.json
index f4f22be..7293f2e 100644
--- a/package.json
+++ b/package.json
@@ -39,7 +39,8 @@
     "grammars",
     "README.md",
     "LICENSE",
-    "docs"
+    "docs/cli.md",
+    "docs/capabilities.md"
   ],
   "packageManager": "pnpm@10.27.0",
   "engines": {
diff --git a/scripts/sync-grammars.mjs b/scripts/sync-grammars.mjs
index cecfd0f..1bd9aa1 100644
--- a/scripts/sync-grammars.mjs
+++ b/scripts/sync-grammars.mjs
@@ -20,7 +20,8 @@ const CURATED_WASMS = [
   'tree-sitter-java.wasm',
   'tree-sitter-c.wasm',
   'tree-sitter-cpp.wasm',
-  'tree-sitter-c_sharp.wasm'
+  'tree-sitter-c_sharp.wasm',
+  'tree-sitter-kotlin.wasm'
 ];
 
 const sourceDir = path.join(path.dirname(require.resolve('tree-sitter-wasms/package.json')), 'out');
diff --git a/src/core/index-meta.ts b/src/core/index-meta.ts
index 758009c..8353dba 100644
--- a/src/core/index-meta.ts
+++ b/src/core/index-meta.ts
@@ -54,7 +54,9 @@ export const IndexMetaSchema = z.object({
       }),
       vectorDb: z.object({
         path: z.string().min(1),
-        provider: z.string().min(1)
+        provider: z.string().min(1),
+        embeddingProvider: z.string().optional(),
+        embeddingModel: z.string().optional()
       }),
       intelligence: z
         .object({
@@ -67,6 +69,16 @@ export const IndexMetaSchema = z.object({
 
 export type IndexMeta = z.infer<typeof IndexMetaSchema>;
 
+export function checkEmbeddingMismatch(
+  meta: IndexMeta,
+  currentProvider: string,
+  currentModel: string
+): boolean {
+  const stored = meta.artifacts.vectorDb;
+  if (!stored.embeddingProvider || !stored.embeddingModel) return false; // legacy index, no info
+  return stored.embeddingProvider !== currentProvider || stored.embeddingModel !== currentModel;
+}
+
 async function pathExists(targetPath: string): Promise<boolean> {
   try {
     await fs.access(targetPath);
diff --git a/src/core/indexer.ts b/src/core/indexer.ts
index 4b586c6..afef69b 100644
--- a/src/core/indexer.ts
+++ b/src/core/indexer.ts
@@ -21,7 +21,12 @@ import {
 } from '../types/index.js';
 import { analyzerRegistry } from './analyzer-registry.js';
 import { isCodeFile, isBinaryFile } from '../utils/language-detection.js';
-import { getEmbeddingProvider, DEFAULT_MODEL } from '../embeddings/index.js';
+import {
+  getEmbeddingProvider,
+  getConfiguredDimensions,
+  DEFAULT_MODEL,
+  parseEmbeddingProviderName
+} from '../embeddings/index.js';
 import { getStorageProvider, CodeChunkWithEmbedding } from '../storage/index.js';
 import {
   LibraryUsageTracker,
@@ -56,6 +61,7 @@ import {
   type FileManifest,
   type ManifestDiff
 } from './manifest.js';
+import { readIndexMeta, checkEmbeddingMismatch } from './index-meta.js';
 
 let cachedToolVersion: string | null = null;
 
@@ -239,6 +245,17 @@ export class CodebaseIndexer {
   }
 
   private mergeConfig(userConfig?: Partial<CodebaseConfig>): CodebaseConfig {
+    const defaultEmbeddingProvider =
+      parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
+
+    // When provider=openai and EMBEDDING_MODEL is not set, DEFAULT_MODEL resolves to the
+    // transformers fallback (Xenova/bge-small-en-v1.5), which the OpenAI API rejects.
+    // Use a sane OpenAI default instead.
+    const defaultModel =
+      defaultEmbeddingProvider === 'openai' && !process.env.EMBEDDING_MODEL
+        ? 'text-embedding-3-small'
+        : DEFAULT_MODEL;
+
     const defaultConfig: CodebaseConfig = {
       analyzers: {
         angular: { enabled: true, priority: 100 },
@@ -246,8 +263,25 @@ export class CodebaseIndexer {
         vue: { enabled: false, priority: 90 },
         generic: { enabled: true, priority: 10 }
       },
-      include: ['**/*.{ts,tsx,js,jsx,html,css,scss,sass,less}'],
-      exclude: ['node_modules/**', 'dist/**', 'build/**', '.git/**', 'coverage/**'],
+      include: [
+        '**/*.{ts,tsx,js,jsx,mjs,cjs,mts,cts}',
+        '**/*.{html,htm,css,scss,sass,less}',
+        '**/*.{py,pyi,rb,php}',
+        '**/*.{java,kt,kts,scala,swift,cs}',
+        '**/*.{go,rs}',
+        '**/*.{c,cpp,cc,cxx,h,hpp}',
+        '**/*.{sh,bash,zsh,ps1}',
+        '**/*.{sql,graphql,gql}',
+        '**/*.{json,jsonc,yaml,yml,toml,xml}'
+      ],
+      exclude: [
+        'node_modules/**',
+        'dist/**',
+        'build/**',
+        '.git/**',
+        'coverage/**',
+        '.codebase-context/**'
+      ],
       respectGitignore: true,
       parsing: {
         maxFileSize: 1048576,
@@ -267,8 +301,8 @@ export class CodebaseIndexer {
         includeChangelogs: false
       },
       embedding: {
-        provider: 'transformers',
-        model: DEFAULT_MODEL,
+        provider: defaultEmbeddingProvider,
+        model: defaultModel,
         batchSize: 32
       },
       skipEmbedding: false,
@@ -382,8 +416,30 @@ export class CodebaseIndexer {
           unchanged: diff.unchanged.length
         };
 
+        // Check for embedding provider/model mismatch — forces full rebuild to avoid
+        // silent vector dimension mismatch when switching providers or models.
+        try {
+          const existingMeta = await readIndexMeta(this.rootPath);
+          const currentProvider = this.config.embedding?.provider ?? 'transformers';
+          const currentModel = this.config.embedding?.model ?? DEFAULT_MODEL;
+          if (checkEmbeddingMismatch(existingMeta, currentProvider, currentModel)) {
+            const stored = existingMeta.artifacts.vectorDb;
+            console.error(
+              `Embedding provider/model changed (stored: ${stored.embeddingProvider}:${stored.embeddingModel}, current: ${currentProvider}:${currentModel}) — forcing full rebuild`
+            );
+            diff = null;
+          }
+        } catch {
+          // No meta yet or legacy index without embedding fields — proceed with incremental
+        }
+
         // Short-circuit: nothing changed
-        if (diff.added.length === 0 && diff.changed.length === 0 && diff.deleted.length === 0) {
+        if (
+          diff &&
+          diff.added.length === 0 &&
+          diff.changed.length === 0 &&
+          diff.deleted.length === 0
+        ) {
           console.error('No files changed - skipping re-index.');
           this.updateProgress('complete', 100);
           stats.duration = Date.now() - startTime;
@@ -731,7 +787,10 @@ export class CodebaseIndexer {
 
       if (!this.config.skipEmbedding) {
         const storagePath = path.join(activeContextDir, VECTOR_DB_DIRNAME);
-        const storageProvider = await getStorageProvider({ path: storagePath });
+        const storageProvider = await getStorageProvider(
+          { path: storagePath },
+          diff ? { expectedDimensions: getConfiguredDimensions(this.config.embedding) } : undefined
+        );
 
         if (diff) {
           // Incremental: delete old chunks for changed + deleted files, then add new
@@ -899,7 +958,12 @@ export class CodebaseIndexer {
             toolVersion,
             artifacts: {
               keywordIndex: { path: KEYWORD_INDEX_FILENAME },
-              vectorDb: { path: VECTOR_DB_DIRNAME, provider: 'lancedb' },
+              vectorDb: {
+                path: VECTOR_DB_DIRNAME,
+                provider: 'lancedb',
+                embeddingProvider: this.config.embedding?.provider ?? 'transformers',
+                embeddingModel: this.config.embedding?.model ?? DEFAULT_MODEL
+              },
               intelligence: { path: INTELLIGENCE_FILENAME },
               manifest: { path: MANIFEST_FILENAME },
               indexingStats: { path: INDEXING_STATS_FILENAME },
diff --git a/src/core/search.ts b/src/core/search.ts
index 09cc6e5..bb01da7 100644
--- a/src/core/search.ts
+++ b/src/core/search.ts
@@ -6,7 +6,7 @@ import Fuse from 'fuse.js';
 import path from 'path';
 import { promises as fs } from 'fs';
 import { CodeChunk, SearchResult, SearchFilters, IntelligenceData } from '../types/index.js';
-import { EmbeddingProvider, getEmbeddingProvider } from '../embeddings/index.js';
+import { EmbeddingProvider, EmbeddingConfig, getEmbeddingProvider } from '../embeddings/index.js';
 import { VectorStorageProvider, getStorageProvider } from '../storage/index.js';
 import { analyzerRegistry } from './analyzer-registry.js';
 import { IndexCorruptedError } from '../errors/index.js';
@@ -147,7 +147,15 @@ export class CodebaseSearcher {
       await this.loadKeywordIndex();
       await this.loadPatternIntelligence();
 
-      this.embeddingProvider = await getEmbeddingProvider();
+      // Use the embedding config the index was built with, not the current env-var defaults.
+      // This ensures query vectors are in the same space as stored vectors.
+      // Legacy indexes (no embeddingProvider stored) fall back to env-var defaults.
+      const storedProvider = this.indexMeta.artifacts.vectorDb.embeddingProvider;
+      const storedModel = this.indexMeta.artifacts.vectorDb.embeddingModel;
+      const embeddingConfig: Partial<EmbeddingConfig> = storedProvider
+        ? { provider: storedProvider as EmbeddingConfig['provider'], model: storedModel }
+        : {};
+      this.embeddingProvider = await getEmbeddingProvider(embeddingConfig);
       this.storageProvider = await getStorageProvider({
         path: this.storagePath
       });
diff --git a/src/embeddings/index.ts b/src/embeddings/index.ts
index c7ed0c8..aabb3fa 100644
--- a/src/embeddings/index.ts
+++ b/src/embeddings/index.ts
@@ -1,8 +1,30 @@
 export * from './types.js';
 export * from './transformers.js';
 
-import { EmbeddingProvider, EmbeddingConfig, DEFAULT_EMBEDDING_CONFIG } from './types.js';
-import { TransformersEmbeddingProvider } from './transformers.js';
+import {
+  EmbeddingProvider,
+  EmbeddingConfig,
+  DEFAULT_EMBEDDING_CONFIG,
+  DEFAULT_MODEL,
+  parseEmbeddingProviderName
+} from './types.js';
+import { TransformersEmbeddingProvider, MODEL_CONFIGS } from './transformers.js';
+
+/**
+ * Returns expected embedding dimensions for a given config without initializing any provider.
+ * Used for LanceDB dimension validation before committing to an incremental update.
+ *
+ * Looks up dimensions from MODEL_CONFIGS (the authoritative source shared with the provider
+ * implementation) so new models are automatically handled without updating this function.
+ */
+export function getConfiguredDimensions(config: Partial<EmbeddingConfig> = {}): number {
+  const provider =
+    config.provider ?? parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers';
+  const model = config.model ?? process.env.EMBEDDING_MODEL ?? DEFAULT_MODEL;
+  if (provider === 'openai') return model.includes('large') ? 3072 : 1536; // text-embedding-3-large: 3072, all others: 1536
+  // Look up from the same MODEL_CONFIGS the provider uses — avoids stale hardcoded guesses
+  return MODEL_CONFIGS[model]?.dimensions ?? 384;
+}
 
 let cachedProvider: EmbeddingProvider | null = null;
 let cachedProviderType: string | null = null;
diff --git a/src/embeddings/openai.ts b/src/embeddings/openai.ts
index 089e110..61c9ee9 100644
--- a/src/embeddings/openai.ts
+++ b/src/embeddings/openai.ts
@@ -11,7 +11,9 @@ interface OpenAIEmbeddingResponse {
  */
 export class OpenAIEmbeddingProvider implements EmbeddingProvider {
   readonly name = 'openai';
-  readonly dimensions = 1536; // Default for text-embedding-3-small
+  get dimensions(): number {
+    return this.modelName.includes('large') ? 3072 : 1536;
+  }
 
   constructor(
     readonly modelName: string = 'text-embedding-3-small',
diff --git a/src/embeddings/transformers.ts b/src/embeddings/transformers.ts
index c2340ba..549cb89 100644
--- a/src/embeddings/transformers.ts
+++ b/src/embeddings/transformers.ts
@@ -6,7 +6,7 @@ interface ModelConfig {
   maxContext: number; // token context window — used to auto-scale batch size
 }
 
-const MODEL_CONFIGS: Record<string, ModelConfig> = {
+export const MODEL_CONFIGS: Record<string, ModelConfig> = {
   'Xenova/bge-small-en-v1.5': { dimensions: 384, maxContext: 512 },
   'Xenova/all-MiniLM-L6-v2': { dimensions: 384, maxContext: 512 },
   'Xenova/bge-base-en-v1.5': { dimensions: 768, maxContext: 512 },
diff --git a/src/embeddings/types.ts b/src/embeddings/types.ts
index 0406209..66e5a7a 100644
--- a/src/embeddings/types.ts
+++ b/src/embeddings/types.ts
@@ -18,13 +18,22 @@ export interface EmbeddingConfig {
   apiEndpoint?: string;
 }
 
+export function parseEmbeddingProviderName(
+  value: unknown
+): EmbeddingConfig['provider'] | undefined {
+  if (value === 'transformers' || value === 'ollama' || value === 'openai' || value === 'custom') {
+    return value;
+  }
+  return undefined;
+}
+
 // Default: bge-small (fast, ~2min indexing, consumer-hardware safe)
 // Opt-in: set EMBEDDING_MODEL=onnx-community/granite-embedding-small-english-r2-ONNX for
 // better conceptual search at the cost of 5-10x slower indexing and higher RAM usage
 export const DEFAULT_MODEL = process.env.EMBEDDING_MODEL || 'Xenova/bge-small-en-v1.5';
 
 export const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig = {
-  provider: (process.env.EMBEDDING_PROVIDER as EmbeddingConfig['provider']) || 'transformers',
+  provider: parseEmbeddingProviderName(process.env.EMBEDDING_PROVIDER) ?? 'transformers',
   model: DEFAULT_MODEL,
   batchSize: 32,
   maxRetries: 3,
diff --git a/src/grammars/manifest.ts b/src/grammars/manifest.ts
index 7c7a9ff..87962b4 100644
--- a/src/grammars/manifest.ts
+++ b/src/grammars/manifest.ts
@@ -15,7 +15,8 @@ export const CURATED_LANGUAGE_TO_WASM: Record<string, string> = {
   java: 'tree-sitter-java.wasm',
   c: 'tree-sitter-c.wasm',
   cpp: 'tree-sitter-cpp.wasm',
-  csharp: 'tree-sitter-c_sharp.wasm'
+  csharp: 'tree-sitter-c_sharp.wasm',
+  kotlin: 'tree-sitter-kotlin.wasm'
 };
 
 /**
diff --git a/src/storage/index.ts b/src/storage/index.ts
index 017ce98..fb9d9e2 100644
--- a/src/storage/index.ts
+++ b/src/storage/index.ts
@@ -13,12 +13,13 @@ import { LanceDBStorageProvider } from './lancedb.js';
  * Get a storage provider based on configuration
  */
 export async function getStorageProvider(
-  config: Partial<StorageConfig> = {}
+  config: Partial<StorageConfig> = {},
+  options?: { expectExisting?: boolean; expectedDimensions?: number }
 ): Promise<VectorStorageProvider> {
   const mergedConfig = { ...DEFAULT_STORAGE_CONFIG, ...config };
 
   const provider = new LanceDBStorageProvider();
-  await provider.initialize(mergedConfig.path);
+  await provider.initialize(mergedConfig.path, options);
 
   return provider;
 }
diff --git a/src/storage/lancedb.ts b/src/storage/lancedb.ts
index de6f413..9351d94 100644
--- a/src/storage/lancedb.ts
+++ b/src/storage/lancedb.ts
@@ -41,8 +41,12 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
    * @param storagePath - Directory path for LanceDB storage
    * @param options - Optional configuration
    * @param options.expectExisting - If true, throws IndexCorruptedError if table doesn't exist
+   * @param options.expectedDimensions - If set, verifies the stored vector column has this many dimensions
    */
-  async initialize(storagePath: string, options?: { expectExisting?: boolean }): Promise<void> {
+  async initialize(
+    storagePath: string,
+    options?: { expectExisting?: boolean; expectedDimensions?: number }
+  ): Promise<void> {
     if (this.initialized) return;
 
     try {
@@ -63,6 +67,18 @@ export class LanceDBStorageProvider implements VectorStorageProvider {
         if (!hasVectorColumn) {
           throw new IndexCorruptedError('LanceDB index corrupted: missing vector column');
         }
+
+        // Check vector dimensions if caller specifies expected dims (e.g. switching providers)
+        if (options?.expectedDimensions !== undefined) {
+          const vectorField = schema.fields.find((f: { name: string }) => f.name === 'vector');
+          const storedDims = (vectorField?.type as { listSize?: number } | undefined)?.listSize;
+          if (storedDims !== undefined && storedDims !== options.expectedDimensions) {
+            throw new IndexCorruptedError(
+              `LanceDB dimension mismatch: stored=${storedDims}, expected=${options.expectedDimensions} (rebuild required)`
+            );
+          }
+        }
+
         if (process.env.CODEBASE_CONTEXT_DEBUG) console.error('Opened existing LanceDB table');
       } else if (options?.expectExisting) {
         throw new IndexCorruptedError(
diff --git a/src/storage/types.ts b/src/storage/types.ts
index 7498a2b..75a553f 100644
--- a/src/storage/types.ts
+++ b/src/storage/types.ts
@@ -11,7 +11,10 @@ export interface VectorStorageProvider {
   /**
    * Initialize the storage (create database, tables, etc.)
    */
-  initialize(storagePath: string): Promise<void>;
+  initialize(
+    storagePath: string,
+    options?: { expectExisting?: boolean; expectedDimensions?: number }
+  ): Promise<void>;
 
   /**
    * Store code chunks with their embeddings
diff --git a/src/utils/language-detection.ts b/src/utils/language-detection.ts
index ae87daa..48d906a 100644
--- a/src/utils/language-detection.ts
+++ b/src/utils/language-detection.ts
@@ -149,7 +149,12 @@ const codeExtensions = new Set([
   '.cc',
   '.cxx',
   '.h',
-  '.hpp'
+  '.hpp',
+  '.sh',
+  '.bash',
+  '.zsh',
+  '.ps1',
+  '.sql'
 ]);
 
 /**
diff --git a/tests/embedding-mismatch.test.ts b/tests/embedding-mismatch.test.ts
new file mode 100644
index 0000000..67e78b3
--- /dev/null
+++ b/tests/embedding-mismatch.test.ts
@@ -0,0 +1,98 @@
+import { describe, it, expect } from 'vitest';
+import { checkEmbeddingMismatch } from '../src/core/index-meta.js';
+import type { IndexMeta } from '../src/core/index-meta.js';
+import { getConfiguredDimensions } from '../src/embeddings/index.js';
+
+function makeMeta(overrides: {
+  embeddingProvider?: string;
+  embeddingModel?: string;
+}): IndexMeta {
+  return {
+    metaVersion: 1,
+    formatVersion: 1,
+    buildId: 'test-build',
+    generatedAt: new Date().toISOString(),
+    toolVersion: '1.8.0',
+    artifacts: {
+      keywordIndex: { path: 'keyword-index.json' },
+      vectorDb: {
+        path: 'vector-db',
+        provider: 'lancedb',
+        ...overrides
+      }
+    }
+  };
+}
+
+describe('checkEmbeddingMismatch', () => {
+  it('returns false for legacy meta with no embeddingProvider or embeddingModel', () => {
+    const meta = makeMeta({});
+    expect(checkEmbeddingMismatch(meta, 'transformers', 'Xenova/bge-small-en-v1.5')).toBe(false);
+  });
+
+  it('returns false when provider and model match', () => {
+    const meta = makeMeta({
+      embeddingProvider: 'transformers',
+      embeddingModel: 'Xenova/bge-small-en-v1.5'
+    });
+    expect(checkEmbeddingMismatch(meta, 'transformers', 'Xenova/bge-small-en-v1.5')).toBe(false);
+  });
+
+  it('returns true when provider differs', () => {
+    const meta = makeMeta({
+      embeddingProvider: 'transformers',
+      embeddingModel: 'Xenova/bge-small-en-v1.5'
+    });
+    expect(checkEmbeddingMismatch(meta, 'openai', 'Xenova/bge-small-en-v1.5')).toBe(true);
+  });
+
+  it('returns true when model differs', () => {
+    const meta = makeMeta({
+      embeddingProvider: 'transformers',
+      embeddingModel: 'Xenova/bge-small-en-v1.5'
+    });
+    expect(
+      checkEmbeddingMismatch(meta, 'transformers', 'onnx-community/granite-embedding-small-english-r2-ONNX')
+    ).toBe(true);
+  });
+
+  it('returns true when both provider and model differ', () => {
+    const meta = makeMeta({
+      embeddingProvider: 'transformers',
+      embeddingModel: 'Xenova/bge-small-en-v1.5'
+    });
+    expect(checkEmbeddingMismatch(meta, 'openai', 'text-embedding-3-small')).toBe(true);
+  });
+});
+
+describe('getConfiguredDimensions', () => {
+  it('returns 384 for default bge-small model', () => {
+    expect(getConfiguredDimensions({ provider: 'transformers', model: 'Xenova/bge-small-en-v1.5' })).toBe(384);
+  });
+
+  it('returns 768 for bge-base-en-v1.5 (not 384)', () => {
+    // This is the correctness regression: bge-base is 768 dims, not 384
+    expect(getConfiguredDimensions({ provider: 'transformers', model: 'Xenova/bge-base-en-v1.5' })).toBe(768);
+  });
+
+  it('returns 384 for Granite small model', () => {
+    expect(
+      getConfiguredDimensions({
+        provider: 'transformers',
+        model: 'onnx-community/granite-embedding-small-english-r2-ONNX'
+      })
+    ).toBe(384);
+  });
+
+  it('returns 1536 for text-embedding-3-small', () => {
+    expect(getConfiguredDimensions({ provider: 'openai', model: 'text-embedding-3-small' })).toBe(1536);
+  });
+
+  it('returns 3072 for text-embedding-3-large', () => {
+    expect(getConfiguredDimensions({ provider: 'openai', model: 'text-embedding-3-large' })).toBe(3072);
+  });
+
+  it('returns 384 as fallback for unknown transformers model', () => {
+    expect(getConfiguredDimensions({ provider: 'transformers', model: 'some/unknown-model' })).toBe(384);
+  });
+});
diff --git a/tests/fixtures/grammars/kotlin.kt b/tests/fixtures/grammars/kotlin.kt
new file mode 100644
index 0000000..12d285e
--- /dev/null
+++ b/tests/fixtures/grammars/kotlin.kt
@@ -0,0 +1,11 @@
+class UserRepository(private val db: Database) {
+    fun findById(id: String): User? {
+        return db.query("SELECT * FROM users WHERE id = ?", id)
+    }
+
+    fun save(user: User): Boolean {
+        return db.execute("INSERT INTO users VALUES (?)", user.id)
+    }
+}
+
+fun greet(name: String): String = "Hello, $name"
diff --git a/tests/incremental-indexing.test.ts b/tests/incremental-indexing.test.ts
index 78fa69f..09fe132 100644
--- a/tests/incremental-indexing.test.ts
+++ b/tests/incremental-indexing.test.ts
@@ -16,11 +16,6 @@ describe('Incremental Indexing', () => {
 
   beforeEach(async () => {
     tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'incremental-test-'));
-    // Create a minimal project
-    await fs.writeFile(
-      path.join(tempDir, 'package.json'),
-      JSON.stringify({ name: 'test-project', dependencies: {} })
-    );
   });
 
   afterEach(async () => {
diff --git a/tests/openai-embeddings.test.ts b/tests/openai-embeddings.test.ts
new file mode 100644
index 0000000..8763efd
--- /dev/null
+++ b/tests/openai-embeddings.test.ts
@@ -0,0 +1,112 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { OpenAIEmbeddingProvider } from '../src/embeddings/openai.js';
+
+function makeFetchResponse(body: unknown, ok = true, status = 200) {
+  return Promise.resolve({
+    ok,
+    status,
+    json: () => Promise.resolve(body),
+    text: () => Promise.resolve(JSON.stringify(body))
+  } as Response);
+}
+
+describe('OpenAIEmbeddingProvider', () => {
+  afterEach(() => {
+    vi.unstubAllGlobals();
+  });
+
+  it('has correct dimensions for text-embedding-3-small', () => {
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'key');
+    expect(provider.dimensions).toBe(1536);
+  });
+
+  it('initialize() throws when API key is missing', async () => {
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', undefined);
+    await expect(provider.initialize()).rejects.toThrow('OpenAI API key');
+  });
+
+  it('initialize() resolves when API key is present', async () => {
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-test');
+    await expect(provider.initialize()).resolves.toBeUndefined();
+  });
+
+  it('isReady() returns false without API key', () => {
+    const provider = new OpenAIEmbeddingProvider();
+    expect(provider.isReady()).toBe(false);
+  });
+
+  it('isReady() returns true with API key', () => {
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-test');
+    expect(provider.isReady()).toBe(true);
+  });
+
+  it('embedBatch() returns [] for empty input', async () => {
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-test');
+    const result = await provider.embedBatch([]);
+    expect(result).toEqual([]);
+  });
+
+  it('embedBatch() sends correct Authorization header, model, and encoding_format', async () => {
+    const mockFetch = vi.fn().mockReturnValue(
+      makeFetchResponse({
+        data: [{ embedding: [0.1, 0.2, 0.3] }]
+      })
+    );
+    vi.stubGlobal('fetch', mockFetch);
+
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-abc123');
+    await provider.embedBatch(['hello world']);
+
+    expect(mockFetch).toHaveBeenCalledOnce();
+    const [url, init] = mockFetch.mock.calls[0] as [string, RequestInit];
+    expect(url).toContain('/embeddings');
+    expect((init.headers as Record<string, string>)['Authorization']).toBe('Bearer sk-abc123');
+
+    const body = JSON.parse(init.body as string);
+    expect(body.model).toBe('text-embedding-3-small');
+    expect(body.encoding_format).toBe('float');
+    expect(body.input).toEqual(['hello world']);
+  });
+
+  it('embedBatch() returns parsed embeddings in input order', async () => {
+    const vec1 = [0.1, 0.2];
+    const vec2 = [0.3, 0.4];
+    vi.stubGlobal(
+      'fetch',
+      vi.fn().mockReturnValue(
+        makeFetchResponse({
+          data: [{ embedding: vec1 }, { embedding: vec2 }]
+        })
+      )
+    );
+
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-test');
+    const result = await provider.embedBatch(['a', 'b']);
+
+    expect(result).toHaveLength(2);
+    expect(result[0]).toEqual(vec1);
+    expect(result[1]).toEqual(vec2);
+  });
+
+  it('embedBatch() throws on non-ok API response with status code', async () => {
+    vi.stubGlobal(
+      'fetch',
+      vi.fn().mockReturnValue(makeFetchResponse({ error: 'Unauthorized' }, false, 401))
+    );
+
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-bad');
+    await expect(provider.embedBatch(['test'])).rejects.toThrow('401');
+  });
+
+  it('embed() delegates to embedBatch and returns first element', async () => {
+    const vec = [0.5, 0.6, 0.7];
+    vi.stubGlobal(
+      'fetch',
+      vi.fn().mockReturnValue(makeFetchResponse({ data: [{ embedding: vec }] }))
+    );
+
+    const provider = new OpenAIEmbeddingProvider('text-embedding-3-small', 'sk-test');
+    const result = await provider.embed('hello');
+    expect(result).toEqual(vec);
+  });
+});
diff --git a/tests/tree-sitter-grammar-assets.test.ts b/tests/tree-sitter-grammar-assets.test.ts
index 126bcfb..c4654dc 100644
--- a/tests/tree-sitter-grammar-assets.test.ts
+++ b/tests/tree-sitter-grammar-assets.test.ts
@@ -32,7 +32,8 @@ const LANGUAGE_FIXTURE_FILE: Record<string, string> = {
   java: 'java.java',
   c: 'c.c',
   cpp: 'cpp.cpp',
-  csharp: 'csharp.cs'
+  csharp: 'csharp.cs',
+  kotlin: 'kotlin.kt'
 };
 
 const fixturesDir = path.join(__dirname, 'fixtures', 'grammars');