diff --git a/docs/issue-692-ast-chunking-design.md b/docs/issue-692-ast-chunking-design.md
new file mode 100644
index 00000000..11653348
--- /dev/null
+++ b/docs/issue-692-ast-chunking-design.md
@@ -0,0 +1,256 @@
+# Issue #692 — AST-based Semantic Chunking for Code Blocks
+
+**Status:** Designed
+**Repo:** `memory-lancedb-pro`
+**Created:** 2026-05-04
+**Source:** https://github.com/CortexReach/memory-lancedb-pro/issues/692
+
+---
+
+## Problem Summary
+
+`chunker.ts` 的 `smartChunk()` 使用純 character-based split，split 邏輯在 `findSplitEnd()`：
+- 先找 sentence ending（`.!！？`）
+- 找不到 → 找 `\n`
+- 找不到 → 找 whitespace
+
+這對自然語言有效，但對程式碼是災難。JS/TS 函式結尾是 `;` 和 `}`，兩者都不在 target set，導致 function declaration 在 `{` / `}` 之間被隨機切斷。
+
+**真實破壞案例：**
+```
+Chunk A（~3800字）：
+"async function handleUserLogin(userId: string, credentials: LoginCredentials): Promise<AuthResult> {\n"
+"    const user = await this.userRepository.findById(userId);\n"
+"    if (!user) {\n"
+"        return { success: false, error: 'USER_NOT_FOUND' };"
+
+Chunk B（~900字）：
+"    }\n"
+"    const passwordValid = await this.verifyPassword(...);"  // verifyPassword 跨 Chunk A 和 B
+```
+
+**問題：**
+- Chunk A 結尾在 `return { success: false, error: 'USER_NOT_FOUND' };` — 不完整的 if-block
+- Chunk B 開頭是 `}` — 脫離語境的 closing brace
+- `verifyPassword` 函式定義被切成兩段
+
+---
+
+## Verified Facts (gitnexus + source reading)
+
+### Call Graph (gitnexus verified)
+```
+smartChunk (chunker.ts:263-281)
+  ├─ calls: getCjkRatio (174-183), chunkDocument (194-255)
+  │
+  └─ called by:
+       ├─ embedSingle (embedder.ts)
+       ├─ embedMany (embedder.ts)
+       ├─ testCjkAwareChunkSizing (test/cjk-recursion-regression.test.mjs)
+       └─ testSmallContextChunking (test/cjk-recursion-regression.test.mjs)
+
+chunkDocument (chunker.ts:194-255)
+  ├─ calls: findSplitEnd (97-143), sliceTrimWithIndices (146-163)
+  └─ called by: smartChunk
+
+findSplitEnd (chunker.ts:97-143)  ← 問題根因所在
+```
+
+### Existing Coverage
+- **測試：** 只有 `test/cjk-recursion-regression.test.mjs` 呼叫 `smartChunk`，**沒有任何專門測試 chunker 破壞案例的測試檔案**
+- **依賴：** 無 tree-sitter
+- **Config:** `maxChunkSize`, `overlapSize`, `minChunkSize`, `semanticSplit`, `maxLinesPerChunk`
+
+---
+
+## Solution: astChunk()
+
+### Architecture
+
+```
+smartChunk(text)
+  ├─ detectCodeLanguage(text) === null  → chunkDocument()  [現有 character split]
+  └─ detectCodeLanguage(text) === 'js'/'ts' → astChunk(text, lang, config)
+                               === 'py'   → astChunk(text, 'python', config)
+                               === 其他   → chunkDocument()  [fallback]
+```
+
+### 1. `detectCodeLanguage(text) → CodeLanguage | null`
+
+取前 200 字做偵測：
+
+| 語言 | Pattern |
+|------|---------|
+| JS/TS | `/\b(function\|const\s\|let\s\|var\s\|=>\|import\s\|export\s\|interface\s\|type\s\|class\s)/` |
+| Python | `/\bdef\s\|class\s\|import\s\|from\s\|print\(/` |
+| Go | `/\bfunc\s\|package\s\|import\s"/` |
+| Rust | `/\bfn\s\|impl\s\|pub\s\|let\s+mut\s/` |
+
+### 2. `astChunk(code, language, config) → ChunkResult`
+
+```typescript
+import Parser from 'tree-sitter';
+import JavaScript from 'tree-sitter-javascript';
+
+export function astChunk(
+  code: string,
+  language: CodeLanguage,
+  config: ChunkerConfig
+): ChunkResult {
+  const parser = new Parser();
+  switch (language) {
+    case 'javascript':
+    case 'typescript':
+      parser.setLanguage(JavaScript);
+      break;
+    case 'python':
+      parser.setLanguage(Python);
+      break;
+    default:
+      return chunkDocument(code, config);
+  }
+
+  const tree = parser.parse(code);
+  const chunks: string[] = [];
+  const metadatas: ChunkMetadata[] = [];
+
+  // Walk top-level nodes
+  const root = tree.rootNode;
+  for (const child of root.children) {
+    if (!isDeclarationNode(child)) continue;
+    const text = code.slice(child.startIndex, child.endIndex);
+    if (text.length <= config.maxChunkSize) {
+      chunks.push(text);
+      metadatas.push({ startIndex: child.startIndex, endIndex: child.endIndex, length: text.length });
+    } else {
+      // Sub-split within this declaration at statement level
+      const subResult = subChunk(text, config);
+      chunks.push(...subResult.chunks);
+      metadatas.push(...subResult.metadatas);
+    }
+  }
+
+  return { chunks, metadatas, totalOriginalLength: code.length, chunkCount: chunks.length };
+}
+```
+
+### 3. Supported Node Types (Phase 1)
+
+| 語言 | P0 節點 |
+|------|---------|
+| JS/TS | `function_declaration`, `arrow_function`, `class_declaration`, `method_definition`, `export_statement`, `interface_declaration`, `type_alias_declaration`, `lexical_declaration` |
+| Python | `function_definition`, `class_definition`, `decorated_definition` |
+| Go | `function_declaration`, `method_declaration` (P2) |
+| Rust | `function_item`, `impl_item` (P2) |
+
+### 4. Config Extension
+
+```typescript
+interface ChunkerConfig {
+  // ... 現有五個欄位 ...
+  astAwareCodeSplit?: boolean;  // NEW: default true
+}
+```
+
+### 5. Dependency Changes
+
+```json
+{
+  "dependencies": {
+    "tree-sitter": "^0.21.1",
+    "tree-sitter-javascript": "^0.21.0",
+    "tree-sitter-python": "^0.21.0"
+  }
+}
+```
+
+---
+
+## Files to Change
+
+| 檔案 | 變更 |
+|------|------|
+| `src/chunker.ts` | + `detectCodeLanguage()`, + `astChunk()`, + `subChunk()`, 修改 `smartChunk()` 路由, + `astAwareCodeSplit` config |
+| `src/chunker.test.ts` | **全新建立**（從破壞案例反轉）|
+| `package.json` | + tree-sitter, tree-sitter-javascript, tree-sitter-python |
+
+---
+
+## Tests (New File)
+
+```typescript
+describe('AST-aware code chunking', () => {
+  it('should keep { and } balanced in every chunk', () => {
+    const code = `async function handleUserLogin(userId: string) {
+    const user = await this.userRepository.findById(userId);
+    if (!user) { return { success: false }; }
+    const session = await this.createSession(user);
+    return { success: true, session };
+}
+async function verifyPassword(input: string): Promise<boolean> {
+    return bcrypt.compare(input, this.hash);
+}`;
+    const result = smartChunk(code, 'jina-embeddings-v5');
+    for (const chunk of result.chunks) {
+      const opens = (chunk.match(/{/g) || []).length;
+      const closes = (chunk.match(/}/g) || []).length;
+      expect(opens).toBe(closes);
+    }
+  });
+
+  it('should not split function mid-body', () => {
+    const result = smartChunk(code, 'jina-embeddings-v5');
+    const hasMiddleOfFunction = result.chunks.some(c =>
+      c.startsWith('}') || c.endsWith('{')
+    );
+    expect(hasMiddleOfFunction).toBe(false);
+  });
+
+  it('should keep complete function as one chunk', () => {
+    const result = smartChunk(code, 'jina-embeddings-v5');
+    const verifyFn = result.chunks.find(c => c.includes('verifyPassword'));
+    expect(verifyFn).toBeDefined();
+    expect(verifyFn).toContain('bcrypt.compare');
+    expect(verifyFn).not.toContain('handleUserLogin');
+  });
+});
+```
+
+---
+
+## Phase Plan
+
+```
+Phase 1（P0 — MVP）：
+  ├─ detectCodeLanguage()（JS/TS/Python）
+  ├─ astChunk() — JS/TS only
+  ├─ astChunk() — Python
+  ├─ Unit tests（破壞案例 → 通過案例）
+  └─ Config: astAwareCodeSplit default = true
+
+Phase 2（P1）：
+  ├─ Sub-split within oversized declarations（statement level）
+  ├─ Go、Rust support
+  └─ Benchmark: 向量品質 vs. character split
+
+Phase 3（P2）：
+  └─ Embedding quality evaluation（問答對比）
+```
+
+---
+
+## Q&A
+
+| Q | A |
+|---|---|
+| tree-sitter 值得嗎？ | **值得**。~1MB runtime，sub-ms parse，能處理巢狀結構/decorator/subclass，比 regex 精準一個數量級。 |
+| 預設開？ | **預設開**。破壞案例太明確，等使用者手動開等於功能永遠不被用。`astAwareCodeSplit: false` 保留給需要復現舊行為的測試。 |
+| 非主流語言？ | **Phase 1 fallback**。現有 sentence-ending split 對自然語言有效；非主流語言佔比低，Phase 1 fallback 合理。 |
+
+---
+
+## Reference
+
+- Issue: https://github.com/CortexReach/memory-lancedb-pro/issues/692
+- Reference impl: `zilliztech/claude-context` ast-splitter.ts
+- Existing chunker: `src/chunker.ts` (284 lines)
diff --git a/package-lock.json b/package-lock.json
index ee4ecef2..c5d6f5d2 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -14,7 +14,10 @@
         "apache-arrow": "18.1.0",
         "json5": "^2.2.3",
         "openai": "^6.21.0",
-        "proper-lockfile": "^4.1.2"
+        "proper-lockfile": "^4.1.2",
+        "tree-sitter": "^0.25.0",
+        "tree-sitter-javascript": "^0.25.0",
+        "tree-sitter-python": "^0.25.0"
       },
       "devDependencies": {
         "commander": "^14.0.0",
@@ -430,6 +433,26 @@
       "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==",
       "license": "MIT"
     },
+    "node_modules/node-addon-api": {
+      "version": "8.7.0",
+      "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.7.0.tgz",
+      "integrity": "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==",
+      "license": "MIT",
+      "engines": {
+        "node": "^18 || ^20 || >= 21"
+      }
+    },
+    "node_modules/node-gyp-build": {
+      "version": "4.8.4",
+      "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz",
+      "integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==",
+      "license": "MIT",
+      "bin": {
+        "node-gyp-build": "bin.js",
+        "node-gyp-build-optional": "optional.js",
+        "node-gyp-build-test": "build-test.js"
+      }
+    },
     "node_modules/openai": {
       "version": "6.22.0",
       "resolved": "https://registry.npmjs.org/openai/-/openai-6.22.0.tgz",
@@ -517,6 +540,55 @@
         "node": ">=12.17"
       }
     },
+    "node_modules/tree-sitter": {
+      "version": "0.25.0",
+      "resolved": "https://registry.npmjs.org/tree-sitter/-/tree-sitter-0.25.0.tgz",
+      "integrity": "sha512-PGZZzFW63eElZJDe/b/R/LbsjDDYJa5UEjLZJB59RQsMX+fo0j54fqBPn1MGKav/QNa0JR0zBiVaikYDWCj5KQ==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "node-addon-api": "^8.3.0",
+        "node-gyp-build": "^4.8.4"
+      }
+    },
+    "node_modules/tree-sitter-javascript": {
+      "version": "0.25.0",
+      "resolved": "https://registry.npmjs.org/tree-sitter-javascript/-/tree-sitter-javascript-0.25.0.tgz",
+      "integrity": "sha512-1fCbmzAskZkxcZzN41sFZ2br2iqTYP3tKls1b/HKGNPQUVOpsUxpmGxdN/wMqAk3jYZnYBR1dd/y/0avMeU7dw==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "node-addon-api": "^8.3.1",
+        "node-gyp-build": "^4.8.4"
+      },
+      "peerDependencies": {
+        "tree-sitter": "^0.25.0"
+      },
+      "peerDependenciesMeta": {
+        "tree-sitter": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/tree-sitter-python": {
+      "version": "0.25.0",
+      "resolved": "https://registry.npmjs.org/tree-sitter-python/-/tree-sitter-python-0.25.0.tgz",
+      "integrity": "sha512-eCmJx6zQa35GxaCtQD+wXHOhYqBxEL+bp71W/s3fcDMu06MrtzkVXR437dRrCrbrDbyLuUDJpAgycs7ncngLXw==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "dependencies": {
+        "node-addon-api": "^8.5.0",
+        "node-gyp-build": "^4.8.4"
+      },
+      "peerDependencies": {
+        "tree-sitter": "^0.25.0"
+      },
+      "peerDependenciesMeta": {
+        "tree-sitter": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/tslib": {
       "version": "2.8.1",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
diff --git a/package.json b/package.json
index fbcb9d98..c091d619 100644
--- a/package.json
+++ b/package.json
@@ -43,7 +43,10 @@
     "apache-arrow": "18.1.0",
     "json5": "^2.2.3",
     "openai": "^6.21.0",
-    "proper-lockfile": "^4.1.2"
+    "proper-lockfile": "^4.1.2",
+    "tree-sitter": "^0.25.0",
+    "tree-sitter-javascript": "^0.25.0",
+    "tree-sitter-python": "^0.25.0"
   },
   "openclaw": {
     "extensions": [
diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs
index fc6435dc..16ea8ac9 100644
--- a/scripts/ci-test-manifest.mjs
+++ b/scripts/ci-test-manifest.mjs
@@ -48,6 +48,8 @@ export const CI_TEST_MANIFEST = [
   { group: "core-regression", runner: "node", file: "test/temporal-awareness.test.mjs", args: ["--test"] },
   // Issue #598 regression tests
   { group: "core-regression", runner: "node", file: "test/store-serialization.test.mjs" },
+  // Issue #692: AST-based semantic chunking
+  { group: "core-regression", runner: "node", file: "test/ast-code-chunking.test.mjs" },
   { group: "core-regression", runner: "node", file: "test/access-tracker-retry.test.mjs" },
   { group: "core-regression", runner: "node", file: "test/embedder-cache.test.mjs" },
   // Issue #629 batch embedding fix
@@ -62,6 +64,9 @@ export const CI_TEST_MANIFEST = [
   // Issue #492 agentId validation tests
   { group: "core-regression", runner: "node", file: "test/agentid-validation.test.mjs", args: ["--test"] },
   { group: "core-regression", runner: "node", file: "test/command-reflection-guard.test.mjs", args: ["--test"] },
+  // Issue #693 extraction write validation tests
+  { group: "core-regression", runner: "node", file: "test/extraction-validation.test.mjs", args: ["--test"] },
+  { group: "core-regression", runner: "node", file: "test/dedup-false-alarm.test.mjs", args: ["--test"] },
 ];
 
 export function getEntriesForGroup(group) {
diff --git a/src/chunker.ts b/src/chunker.ts
index 8bb4dee6..a6aca426 100644
--- a/src/chunker.ts
+++ b/src/chunker.ts
@@ -37,6 +37,8 @@ export interface ChunkerConfig {
   semanticSplit: boolean;
   /** Max lines per chunk before we try to split earlier on a line boundary. */
   maxLinesPerChunk: number;
+  /** Use AST-aware splitting for code blocks (default: true). */
+  astAwareCodeSplit?: boolean;
 }
 
 // Common embedding context limits (provider/model specific). These are typically
@@ -188,6 +190,237 @@ function getCjkRatio(text: string): number {
 const CJK_CHAR_TOKEN_DIVISOR = 2.5;
 const CJK_RATIO_THRESHOLD = 0.3;
 
+// ============================================================================
+// AST-aware Code Chunking
+// ============================================================================
+
+export type CodeLanguage = 'javascript' | 'typescript' | 'python' | 'go' | 'rust';
+
+const CODE_LANGUAGE_PATTERNS: Array<{ pattern: RegExp; lang: CodeLanguage }> = [
+  // Python: must check before JS (def/class are specific)
+  {
+    pattern: /\b(def\s|class\s|import\s|from\s|async\s+def\s|print\()/,
+    lang: 'python',
+  },
+  // Go: func and package keywords
+  {
+    pattern: /\b(func\s|package\s|import\s")/,
+    lang: 'go',
+  },
+  // Rust: fn/impl/pub are distinct
+  {
+    pattern: /\bfn\s|impl\s|pub\s|let\s+mut\s/,
+    lang: 'rust',
+  },
+  // TypeScript: interface / type alias / : type annotations (check before JS 'function')
+  {
+    pattern: /\b(interface\s|type\s+|:\s*(?:string|number|boolean|unknown|never|any|void|object|Error|Promise|Record|Array|Map|Set)\b)/,
+    lang: 'typescript',
+  },
+  // JavaScript / TypeScript: function, const/let/var, arrow, import/export, class
+  {
+    pattern: /\b(function|const\s|let\s|var\s|=>|import\s|export\s|class\s)/,
+    lang: 'javascript',
+  },
+];
+
+/**
+ * Detect if text is code and return the language, or null if not code.
+ * Uses only the first 200 chars to avoid being misled by comments.
+ */
+export function detectCodeLanguage(text: string): CodeLanguage | null {
+  const sample = text.slice(0, 400);
+  for (const { pattern, lang } of CODE_LANGUAGE_PATTERNS) {
+    if (pattern.test(sample)) return lang;
+  }
+  return null;
+}
+
+// Supported top-level declaration node types per language
+const JS_DECLARATION_TYPES = new Set([
+  'function_declaration',
+  'class_declaration',
+  'method_definition',
+  'arrow_function',
+  'export_statement',
+  'export_default_declaration',
+  'interface_declaration',
+  'type_alias_declaration',
+  'lexical_declaration', // const/let declarations
+  'variable_declaration',
+]);
+
+const PYTHON_DECLARATION_TYPES = new Set([
+  'function_definition',
+  'class_definition',
+  'decorated_definition',
+]);
+
+function isDeclarationNode(node: { type: string }, lang: CodeLanguage): boolean {
+  if (lang === 'javascript' || lang === 'typescript') {
+    return JS_DECLARATION_TYPES.has(node.type);
+  }
+  if (lang === 'python') return PYTHON_DECLARATION_TYPES.has(node.type);
+  return false;
+}
+
+/**
+ * Sub-split an oversized declaration at the statement level.
+ * Falls back to chunkDocument for the sub-split logic.
+ */
+function subChunk(text: string, config: ChunkerConfig): ChunkResult {
+  // For now, fall back to the character-based chunker within an oversized declaration.
+  // This preserves the existing behavior for sub-chunks while ensuring top-level
+  // declarations (functions/classes) are kept intact.
+  return chunkDocument(text, config);
+}
+
+/**
+ * AST-aware chunker for code. Parses the code with tree-sitter and splits
+ * on top-level declaration boundaries (function, class, etc.) instead of
+ * arbitrary character positions.
+ *
+ * NOTE: This function is synchronous to match the sync signature of smartChunk.
+ * tree-sitter is loaded via require() with a try-catch fallback.
+ */
+export function astChunk(
+  code: string,
+  language: CodeLanguage,
+  config: ChunkerConfig
+): ChunkResult {
+  // Attempt to load tree-sitter and language grammars
+  let LanguageMap: Record<string, any>;
+  // tree-sitter exports Parser as the default export (module.exports = Parser)
+  // eslint-disable-next-line @typescript-eslint/no-var-requires
+  let TreeSitterParser: any;
+
+  try {
+    TreeSitterParser = require('tree-sitter');
+
+    if (language === 'javascript' || language === 'typescript') {
+      // eslint-disable-next-line @typescript-eslint/no-var-requires
+      const JavaScript = require('tree-sitter-javascript');
+      LanguageMap = { javascript: JavaScript, typescript: JavaScript };
+    } else if (language === 'python') {
+      // eslint-disable-next-line @typescript-eslint/no-var-requires
+      const Python = require('tree-sitter-python');
+      LanguageMap = { python: Python };
+    } else {
+      // Unsupported language — fall back
+      return chunkDocument(code, config);
+    }
+  } catch {
+    // tree-sitter not installed — fall back to character-based chunking
+    return chunkDocument(code, config);
+  }
+
+  const parser = new TreeSitterParser();
+  const chunks: string[] = [];
+  const metadatas: ChunkMetadata[] = [];
+
+  // Set language on the parser
+  let languageSet = false;
+  for (const [, langModule] of Object.entries(LanguageMap)) {
+    try {
+      parser.setLanguage(langModule);
+      languageSet = true;
+      break;
+    } catch {
+      // try next language
+    }
+  }
+
+  if (!languageSet) {
+    return chunkDocument(code, config);
+  }
+
+  let tree: any;
+  try {
+    tree = parser.parse(code);
+  } catch {
+    return chunkDocument(code, config);
+  }
+
+  const root = tree.rootNode;
+
+  // If there are ERROR nodes at the top level, the language parser likely does not
+  // support this syntax (e.g., TypeScript interface parsed by tree-sitter-javascript).
+  // Fall back to chunkDocument to avoid producing broken/incomplete chunks.
+  const hasErrorNodes = root.children.some(c => c.type === 'ERROR');
+  if (hasErrorNodes) {
+    return chunkDocument(code, config);
+  }
+
+  // Collect non-declaration content (comments, imports, etc.) that would otherwise be lost.
+  // These are prepended to the next declaration chunk to preserve no-content-left-behind semantics.
+  let pendingNonDecl = '';
+
+  // Walk top-level children
+  for (const child of root.children) {
+    // Skip non-named nodes and ERROR nodes
+    if (!child.type || child.type === 'ERROR') continue;
+
+    if (!isDeclarationNode(child, language)) {
+      // Collect non-declaration content (comments, imports, exports, etc.)
+      const text = code.slice(child.startIndex, child.endIndex);
+      if (text.length > 0) {
+        pendingNonDecl += (pendingNonDecl.length > 0 ? '\n' : '') + text;
+      }
+      continue;
+    }
+
+    const text = code.slice(child.startIndex, child.endIndex);
+
+    if (text.length === 0) continue;
+
+    // Prepend any pending non-declaration content to this declaration chunk
+    const fullText = pendingNonDecl.length > 0 ? pendingNonDecl + '\n' + text : text;
+    pendingNonDecl = ''; // reset
+
+    if (fullText.length <= config.maxChunkSize) {
+      chunks.push(fullText);
+      metadatas.push({
+        startIndex: child.startIndex,
+        endIndex: child.endIndex,
+        length: fullText.length,
+      });
+    } else {
+      // Oversized declaration with prepended content.
+      // We accept that this chunk may exceed maxChunkSize — splitting
+      // mid-declaration would break { } balance (Issue #692).
+      // Sub-splitting at statement level is Phase 2 work.
+      chunks.push(fullText);
+      metadatas.push({
+        startIndex: child.startIndex,
+        endIndex: child.endIndex,
+        length: fullText.length,
+      });
+    }
+  }
+
+  // If there is trailing non-declaration content (e.g., trailing comments with no following decl),
+  // emit it as its own chunk (fall back to chunkDocument to handle sizing).
+  if (pendingNonDecl.length > 0) {
+    const trailing = chunkDocument(pendingNonDecl, config);
+    for (let i = 0; i < trailing.chunks.length; i++) {
+      chunks.push(trailing.chunks[i]);
+      metadatas.push(trailing.metadatas[i]);
+    }
+  }
+
+  // If we got nothing (e.g. empty file, parse error), fall back
+  if (chunks.length === 0) {
+    return chunkDocument(code, config);
+  }
+
+  return {
+    chunks,
+    metadatas,
+    totalOriginalLength: code.length,
+    chunkCount: chunks.length,
+  };
+}
+
 // ============================================================================
 // Chunking Core
 // ============================================================================
@@ -276,8 +509,17 @@ export function smartChunk(text: string, embedderModel?: string): ChunkResult {
     minChunkSize: Math.max(100, Math.floor(base * 0.1 / divisor)),
     semanticSplit: true,
     maxLinesPerChunk: 50,
+    astAwareCodeSplit: true,
   };
 
+  // AST-aware code path: only activate when explicitly enabled
+  if (config.astAwareCodeSplit === true) {
+    const lang = detectCodeLanguage(text);
+    if (lang !== null) {
+      return astChunk(text, lang, config);
+    }
+  }
+
   return chunkDocument(text, config);
 }
 
diff --git a/src/memory-categories.ts b/src/memory-categories.ts
index 7edc7f53..af6604e9 100644
--- a/src/memory-categories.ts
+++ b/src/memory-categories.ts
@@ -76,6 +76,24 @@ export type ExtractionStats = {
   superseded?: number; // temporal fact replacements
 };
 
+/**
+ * Payload delivered to `ExtractPersistOptions.onExtractionValidationFailed`
+ * when the number of entries actually written to the store differs from
+ * the number of candidates produced by the LLM.
+ *
+ * @see ExtractPersistOptions.onExtractionValidationFailed
+ */
+export type ExtractionValidation = {
+  /** Number of candidates the LLM intended to create (createEntries.length) */
+  expected: number;
+  /** Number of rows actually written (countAfter - countBefore) */
+  actual: number;
+  /** expected - actual; positive = under-write, negative = over-write (concurrent delete) */
+  mismatch: number;
+  /** Session key passed to extractAndPersist */
+  sessionKey: string;
+};
+
 /** Validate and normalize a category string. */
 export function normalizeCategory(raw: string): MemoryCategory | null {
   const lower = raw.toLowerCase().trim();
diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts
index 11354ae6..e8e4da16 100644
--- a/src/smart-extractor.ts
+++ b/src/smart-extractor.ts
@@ -273,6 +273,23 @@ export interface ExtractPersistOptions {
    * - pass a non-empty array to restrict reads to those scopes
    */
   scopeFilter?: string[];
+  /**
+   * Callback invoked when the number of entries actually written to the store
+   * differs from the number of LLM-generated candidates.
+   *
+   * This detects write-path anomalies including:
+   * - SIGKILL / OOM during bulkStore (partial write)
+   * - Concurrent compactor deletions (count window between bulkStore and count())
+   *
+   * The callback is NOT invoked when createEntries is empty (no-op write).
+   * The mismatch field: positive = under-write, negative = over-write (rare).
+   */
+  onExtractionValidationFailed?: (validation: {
+    expected: number;
+    actual: number;
+    mismatch: number;
+    sessionKey: string;
+  }) => void;
 }
 
 export class SmartExtractor {
@@ -441,7 +458,30 @@ export class SmartExtractor {
     }
 
     if (createEntries.length > 0) {
+      // Phase 1: Verify write success via count-before/after.
+      // bulkStore is atomic per entry (randomUUID + table.add), so any
+      // discrepancy here indicates partial failure (SIGKILL/OOM).
+      // LIMITATION: This count-diff approach is only reliable under single-
+      // process assumption (no concurrent compactor/session writes during
+      // extractAndPersist). Concurrent writes inflate actualCreated and can
+      // cause false negatives (mismatch not detected). This is acceptable for
+      // Phase 1 since plugin architecture guarantees single-process extraction.
+      // Phase 2 will address concurrent-safe validation (UUID list compare).
+      const countBefore = await this.store.count();
       await this.store.bulkStore(createEntries);
+      const countAfter = await this.store.count();
+      const actualCreated = countAfter - countBefore;
+      const expectedCreated = createEntries.length;
+
+      if (actualCreated !== expectedCreated) {
+        const mismatch = expectedCreated - actualCreated;
+        options.onExtractionValidationFailed?.({
+          expected: expectedCreated,
+          actual: actualCreated,
+          mismatch,
+          sessionKey,
+        });
+      }
     }
 
     return stats;
diff --git a/test/ast-code-chunking.test.mjs b/test/ast-code-chunking.test.mjs
new file mode 100644
index 00000000..6d0fa2ca
--- /dev/null
+++ b/test/ast-code-chunking.test.mjs
@@ -0,0 +1,296 @@
+/**
+ * AST-aware Code Chunking Tests (Issue #692)
+ *
+ * Verifies that code declarations (functions, classes) are NOT split mid-
+ * declaration, which was breaking { } balance when the old character-based
+ * splitter cut through the middle of a function body.
+ */
+
+import { describe, it, mock, beforeEach } from 'node:test';
+import assert from 'node:assert/strict';
+import jitiFactory from 'jiti';
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { detectCodeLanguage, astChunk, smartChunk, chunkDocument, DEFAULT_CHUNKER_CONFIG } = jiti('../src/chunker.ts');
+
+// ============================================================================
+// detectCodeLanguage
+// ============================================================================
+
+describe('detectCodeLanguage', () => {
+  it('detects JavaScript function', () => {
+    const code = 'async function handleUserLogin(userId, password) {';
+    assert.equal(detectCodeLanguage(code), 'javascript');
+  });
+
+  it('detects TypeScript interface', () => {
+    const code = 'interface UserProfile { name: string; age: number; }';
+    assert.equal(detectCodeLanguage(code), 'typescript');
+  });
+
+  it('detects Python function', () => {
+    const code = 'def verify_password(password: str, hashed: bytes) -> bool:';
+    assert.equal(detectCodeLanguage(code), 'python');
+  });
+
+  it('detects Go function', () => {
+    const code = 'func handleLogin(w http.ResponseWriter, r *http.Request) {';
+    assert.equal(detectCodeLanguage(code), 'go');
+  });
+
+  it('detects Rust function', () => {
+    const code = 'fn verify_password(password: &str, hash: &str) -> bool {';
+    assert.equal(detectCodeLanguage(code), 'rust');
+  });
+
+  it('returns null for plain text', () => {
+    const text = 'This is a plain English sentence with no code markers.';
+    assert.equal(detectCodeLanguage(text), null);
+  });
+
+  it('returns null for Markdown prose', () => {
+    const md = '# Heading\n\nThis is a paragraph with **bold** text.';
+    assert.equal(detectCodeLanguage(md), null);
+  });
+
+  it('uses only first 400 chars to avoid comment noise', () => {
+    // Short comment so 'function' appears within first 400 chars of the sample
+    const commentLine = '// This is a comment\n'; // 20 chars
+    const code = commentLine.repeat(15) + 'function foo() {}'; // ~300 + function
+    assert.equal(detectCodeLanguage(code), 'javascript');
+  });
+});
+
+// ============================================================================
+// Brace balance helper
+// ============================================================================
+
+/** Count net open braces inside a string. */
+function braceDelta(s) {
+  let d = 0;
+  for (const ch of s) {
+    if (ch === '{') d++;
+    else if (ch === '}') d--;
+  }
+  return d;
+}
+
+/** Check all chunks are brace-balanced. */
+function assertBraceBalanced(chunks, label) {
+  const deltas = chunks.map(c => braceDelta(c));
+  const total = deltas.reduce((a, b) => a + b, 0);
+  assert.equal(total, 0, `${label}: unbalanced braces across chunks (net=${total}, deltas=${JSON.stringify(deltas)})`);
+  for (let i = 0; i < deltas.length; i++) {
+    assert(deltas[i] >= 0,
+      `${label}: chunk[${i}] closes more braces than it opens (delta=${deltas[i]})`);
+  }
+}
+
+// ============================================================================
+// Issue #692 — core destructive cases
+// ============================================================================
+
+describe('Issue #692: code functions must not be split mid-declaration', () => {
+
+  it('verifies that a simple async function is kept whole', () => {
+    const code = `async function verifyPassword(password, hash) {
+  const match = await bcrypt.compare(password, hash);
+  return match;
+}`;
+
+    // Very small maxChunkSize to force splitting — old splitter would cut mid-function
+    const config = { ...DEFAULT_CHUNKER_CONFIG, maxChunkSize: 60, minChunkSize: 10, semanticSplit: false };
+    const result = astChunk(code, 'javascript', config);
+
+    // Function body should not be split mid-declaration
+    const splitInsideFunction = result.chunks.some(chunk => {
+      // Should not have "{" without corresponding "}"
+      const d = braceDelta(chunk);
+      return d > 0; // opens braces but never closes
+    });
+    assert.ok(!splitInsideFunction, 'Should not split inside a function declaration');
+    assertBraceBalanced(result.chunks, 'verifyPassword');
+  });
+
+  it('verifies that a long function is NOT split mid-function (maxChunkSize < function length)', () => {
+    // This function is ~250 chars — set maxChunkSize=120 to force the issue.
+    // Oversized functions are kept as ONE atomic chunk (no mid-function split).
+    const code = `async function handleUserLogin(userId, password) {
+  const user = await db.users.findOne({ id: userId });
+  if (!user) throw new Error('User not found');
+  const match = await bcrypt.compare(password, user.hash);
+  return match;
+}`;
+
+    const config = { ...DEFAULT_CHUNKER_CONFIG, maxChunkSize: 120, minChunkSize: 40, semanticSplit: false };
+    const result = astChunk(code, 'javascript', config);
+
+    assertBraceBalanced(result.chunks, 'handleUserLogin');
+    // Should be 1 chunk — entire function kept intact
+    assert.ok(result.chunks.length === 1, `Expected 1 chunk (entire function), got ${result.chunks.length}`);
+  });
+
+  it('verifies that multiple small functions are each kept whole', () => {
+    const code = `async function verifyPassword(password, hash) {
+  return await bcrypt.compare(password, hash);
+}
+
+async function hashPassword(password) {
+  return await bcrypt.hash(password, 10);
+}
+
+export async function createUser(name, email, password) {
+  const hash = await hashPassword(password);
+  return await db.users.create({ name, email, hash });
+}`;
+
+    const config = { ...DEFAULT_CHUNKER_CONFIG, maxChunkSize: 150, minChunkSize: 40, semanticSplit: false };
+    const result = astChunk(code, 'javascript', config);
+
+    assertBraceBalanced(result.chunks, 'multiple functions');
+    // All three functions should appear intact in some chunk
+    assert.ok(result.chunks.some(c => c.includes('function verifyPassword')), 'verifyPassword missing');
+    assert.ok(result.chunks.some(c => c.includes('function hashPassword')), 'hashPassword missing');
+    assert.ok(result.chunks.some(c => c.includes('function createUser')), 'createUser missing');
+  });
+
+  it('smartChunk: entire JavaScript file with functions stays brace-balanced', () => {
+    const code = `const SPEC = {
+  name: 'auth',
+  version: '1.0.0',
+};
+
+async function login(email, password) {
+  const user = await db.findUser(email);
+  const ok = await bcrypt.compare(password, user.hash);
+  if (!ok) throw new Error('Invalid credentials');
+  return { token: signToken(user.id) };
+}
+
+async function logout(token) {
+  invalidateToken(token);
+}`;
+
+    const result = smartChunk(code, 'text-embedding-3-small');
+    assertBraceBalanced(result.chunks, 'smartChunk JS');
+  });
+
+  it('smartChunk: Python function stays syntactically coherent', () => {
+    const code = `def verify_password(password: str, hashed: bytes) -> bool:
+    return pwd_context.verify(password, hashed)
+
+def hash_password(password: str) -> str:
+    return pwd_context.hash(password)`;
+
+    const result = smartChunk(code, 'text-embedding-3-small');
+    assert.ok(result.chunks.length >= 1, 'Should produce at least one chunk');
+    // Python chunks should contain complete function definitions
+    assert.ok(result.chunks.every(c => c.trim().length > 0), 'No empty chunks');
+  });
+});
+
+// ============================================================================
+// astChunk — fallback & edge cases
+// ============================================================================
+
+describe('astChunk fallback behavior', () => {
+
+  it('falls back to chunkDocument when tree-sitter throws', () => {
+    // Pass an empty string to force parse error
+    const code = '';
+    const config = { ...DEFAULT_CHUNKER_CONFIG, maxChunkSize: 50 };
+    const result = astChunk(code, 'javascript', config);
+    // Should return a valid ChunkResult (fallback path)
+    assert.ok('chunks' in result);
+    assert.ok('chunkCount' in result);
+  });
+
+  it('returns chunkDocument result when language is unsupported', () => {
+    const code = 'fn main() {}'; // not JS/TS/Python
+    const config = { ...DEFAULT_CHUNKER_CONFIG, maxChunkSize: 50 };
+    const result = astChunk(code, 'rust', config);
+    // Rust is not yet supported in astChunk — falls back
+    assert.ok('chunks' in result);
+  });
+
+  it('handles an oversized single declaration as one atomic chunk (brace-balanced)', () => {
+    // A very long function that exceeds maxChunkSize — should stay as ONE chunk
+    const body = '  return x + y;\n'.repeat(200);
+    const code = `function processData(x, y) {\n${body}}`;
+
+    const config = { ...DEFAULT_CHUNKER_CONFIG, maxChunkSize: 200, minChunkSize: 50, semanticSplit: false };
+    const result = astChunk(code, 'javascript', config);
+
+    // Should be 1 chunk — entire function kept as one
+    assert.ok(result.chunks.length === 1, `Expected 1 chunk, got ${result.chunks.length}`);
+    assertBraceBalanced(result.chunks, 'oversized function atomic chunk');
+  });
+});
+
+// ============================================================================
+// smartChunk — non-code text unchanged
+// ============================================================================
+
+describe('smartChunk preserves non-code behavior', () => {
+
+  it('passes plain English text to chunkDocument (not astChunk)', () => {
+    const text = 'This is a plain English paragraph. It has sentences. They end with periods. '.repeat(30);
+
+    const result = smartChunk(text, 'text-embedding-3-small');
+
+    assert.ok(result.chunks.length >= 1, 'Should produce chunks');
+    // Plain text should be split on sentence boundaries (semanticSplit=true default)
+  });
+
+  it('passes Markdown prose to chunkDocument', () => {
+    const md = '# Title\n\nThis is a paragraph.\n\n## Section\n\nAnother paragraph here.\n'.repeat(20);
+
+    const result = smartChunk(md, 'text-embedding-3-small');
+
+    assert.ok(result.chunks.length >= 1, 'Should produce chunks');
+    assert.equal(detectCodeLanguage(md), null, 'Markdown should not be detected as code');
+  });
+});
+
+// ============================================================================
+// TypeScript interface chunking
+// ============================================================================
+
+describe('TypeScript interfaces and types', () => {
+
+  it('smartChunk: TypeScript interface stays balanced (via smartChunk, not direct astChunk)', () => {
+    // Note: tree-sitter-javascript cannot fully parse TS interface declarations as one unit.
+    // When astChunk falls back to chunkDocument for an oversized TS interface,
+    // it may produce multiple chunks. smartChunk avoids this by using a large
+    // enough maxChunkSize that the whole interface fits in one chunk.
+    const code = `interface UserProfile {
+  id: string;
+  name: string;
+  email: string;
+  createdAt: Date;
+  metadata?: Record<string, unknown>;
+}`;
+
+    const result = smartChunk(code, 'text-embedding-3-small');
+    // The interface declaration should produce at least one chunk
+    assert.ok(result.chunks.length >= 1, 'Should produce at least one chunk');
+    assertBraceBalanced(result.chunks, 'smartChunk TS interface');
+    assert.ok(result.chunks.some(c => c.includes('interface UserProfile')), 'interface should be present');
+  });
+
+  it('smartChunk on TypeScript stays balanced', () => {
+    const code = `type UserID = string;
+
+interface Config {
+  apiKey: string;
+  timeout: number;
+}
+
+function getConfig(): Config {
+  return { apiKey: process.env.KEY, timeout: 5000 };
+}`;
+
+    const result = smartChunk(code, 'text-embedding-3-small');
+    assertBraceBalanced(result.chunks, 'smartChunk TS');
+  });
+});
diff --git a/test/dedup-false-alarm.test.mjs b/test/dedup-false-alarm.test.mjs
new file mode 100644
index 00000000..80d77ee0
--- /dev/null
+++ b/test/dedup-false-alarm.test.mjs
@@ -0,0 +1,165 @@
+/**
+ * P0 驗證測試：確認 bulkStore 不會因 batchDedup 去重
+ *
+ * 問題：是否 bulkStore 內部呼叫 batchDedup，導致 near-duplicate entries
+ *       被錯誤過濾，造成 countAfter - countBefore < createEntries.length？
+ *
+ * 測試策略：
+ *   1. 直接構造兩個 cosine similarity = 0.95 的向量（保證是 near-duplicate）
+ *   2. 用 batchDedup 確認它們確實被視為 duplicates
+ *   3. 透過 bulkStore 寫入這兩個 entry
+ *   4. 驗證 count 增加 2（而非 1）→ 確認 bulkStore 不去重
+ */
+
+import { describe, it, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { MemoryStore } = jiti("../src/store.ts");
+const { batchDedup } = jiti("../src/batch-dedup.ts");
+
+/** 256-dim constant vector with a controlled cosine similarity to a base vector */
+function makeNearDuplicateVector(baseVec, similarity = 0.95) {
+  const dim = baseVec.length;
+  // Scale factor: cosine = scale * |base| / |target| = scale (since |base|=|target|=1)
+  // Actually: cos(base, target) = scale when target = scale * base + orth
+  const scale = similarity;
+  return baseVec.map(v => v * scale);
+}
+
+/** Two identical vectors — maximum similarity */
+function makeIdenticalVector(dim = 256) {
+  const rng = makeRng(42);
+  return Array.from({ length: dim }, () => rng());
+}
+
+/** Seeded LCG RNG for deterministic vectors */
+function makeRng(seed) {
+  let s = seed >>> 0;
+  return () => {
+    s = (Math.imul(1664525, s) + 1013904223) >>> 0;
+    return s / 0x100000000;
+  };
+}
+
+/** Cosine similarity between two vectors */
+function cosineSimilarity(a, b) {
+  let dot = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
+
+const TEST_DB_PREFIX = "/tmp/test-dedup-p0-";
+
+describe("P0: bulkStore does NOT deduplicate near-duplicate entries", () => {
+  /** @type {MemoryStore} */
+  let store;
+  let dbPath;
+
+  afterEach(async () => {
+    if (store) {
+      try { await store.deleteAll("test-session"); } catch {}
+      try { await store.destroy(); } catch {}
+    }
+  });
+
+  it("bulkStore writes both near-duplicate entries (cosine = 0.95)", async () => {
+    // Step 1: Create base vector and a near-duplicate (cosine = 0.95)
+    const dim = 256;
+    const baseVec = makeIdenticalVector(dim);
+    const dupVec = makeNearDuplicateVector(baseVec, 0.95);
+
+    const cosSim = cosineSimilarity(baseVec, dupVec);
+    console.log(`[P0] cosine similarity between base and near-duplicate: ${cosSim.toFixed(4)}`);
+    assert(cosSim > 0.94, `Near-duplicate should have cosine > 0.94, got ${cosSim}`);
+
+    // Step 2: Verify batchDedup marks one as duplicate
+    const dedupResult = batchDedup(
+      ["abstract one", "abstract two"],
+      [baseVec, dupVec],
+      0.85  // default threshold
+    );
+    console.log(`[P0] batchDedup: ${dedupResult.inputCount} → ${dedupResult.outputCount}, duplicates=${JSON.stringify(dedupResult.duplicateIndices)}`);
+    assert(dedupResult.outputCount < dedupResult.inputCount,
+      `batchDedup should mark one as duplicate (input=${dedupResult.inputCount}, output=${dedupResult.outputCount})`);
+
+    // Step 3: Create MemoryStore and write both via bulkStore
+    dbPath = TEST_DB_PREFIX + Date.now() + "-1";
+    store = new MemoryStore({ dbPath, vectorDim: dim });
+    const countBefore = await store.count();
+
+    await store.bulkStore([
+      {
+        text: "Meeting attendance — quarterly business review",
+        vector: baseVec,
+        category: "fact",
+        scope: "test-session",
+        importance: 0.5,
+        metadata: JSON.stringify({ l0_abstract: "abstract one" }),
+      },
+      {
+        text: "Quarterly business review with team lead",
+        vector: dupVec,
+        category: "fact",
+        scope: "test-session",
+        importance: 0.5,
+        metadata: JSON.stringify({ l0_abstract: "abstract two" }),
+      },
+    ]);
+
+    const countAfter = await store.count();
+    const delta = countAfter - countBefore;
+
+    console.log(`[P0 result] countBefore=${countBefore}, countAfter=${countAfter}, delta=${delta}`);
+
+    // KEY ASSERTION: delta should be exactly 2 — bulkStore does NOT dedupe
+    assert.strictEqual(delta, 2,
+      `bulkStore should write both entries (delta=2), got delta=${delta}. ` +
+      `If delta=1, bulkStore is internally deduplicating near-duplicate entries — this is a P0 bug.`);
+  });
+
+  it("bulkStore writes all 5 entries even when batchDedup would reduce them to 1", async () => {
+    const dim = 256;
+
+    // Create 5 identical vectors — batchDedup with threshold 0.85 will keep only 1
+    const baseVec = makeIdenticalVector(dim);
+    const vectors = Array.from({ length: 5 }, () => baseVec);
+
+    const dedupResult = batchDedup(
+      Array(5).fill("abstract"),
+      vectors,
+      0.85
+    );
+    console.log(`[P0 batch] batchDedup: 5 identical vectors → ${dedupResult.outputCount} survivors`);
+    assert(dedupResult.outputCount < 5,
+      "Sanity: 5 identical vectors should produce < 5 survivors in batchDedup");
+
+    // Now write all 5 via bulkStore
+    dbPath = TEST_DB_PREFIX + Date.now() + "-2";
+    store = new MemoryStore({ dbPath, vectorDim: dim });
+    const countBefore = await store.count();
+
+    await store.bulkStore(vectors.map((v, i) => ({
+      text: `Event ${i + 1}`,
+      vector: v,
+      category: "fact",
+      scope: "test-session",
+      importance: 0.5,
+      metadata: JSON.stringify({ l0_abstract: `abstract ${i}` }),
+    })));
+
+    const countAfter = await store.count();
+    const delta = countAfter - countBefore;
+
+    console.log(`[P0 batch result] countBefore=${countBefore}, countAfter=${countAfter}, delta=${delta}`);
+
+    // KEY ASSERTION: all 5 should be written despite batchDedup saying they're duplicates
+    assert.strictEqual(delta, 5,
+      `bulkStore should write all 5 entries even if batchDedup would drop 4 of them (delta=5), got delta=${delta}`);
+  });
+});
diff --git a/test/extraction-validation.test.mjs b/test/extraction-validation.test.mjs
new file mode 100644
index 00000000..1bd913ed
--- /dev/null
+++ b/test/extraction-validation.test.mjs
@@ -0,0 +1,445 @@
+/**
+ * Test: Extraction Write Validation (Issue #693)
+ *
+ * Tests the countBefore/countAfter validation logic in extractAndPersist().
+ *
+ * Key challenge: SmartExtractor's `batchDedup` uses cosine similarity (threshold 0.85)
+ * on embedded candidate abstracts to filter near-duplicates BEFORE the dedup pipeline.
+ * Simple charCodeAt-based vectors all score >0.85 similarity (common English letters).
+ *
+ * Solution: `DeterministicRandomEmbedder` generates 256-dim vectors using a
+ * seeded RNG (Mulberry32) keyed on the text content. Each distinct text produces
+ * a unique vector with cosine similarity < 0.85, ensuring candidates survive batchDedup.
+ *
+ * Validates:
+ *   T1. Normal extraction: expected === actual, mismatch = 0, callback NOT triggered
+ *   T2. Empty extraction: skipped, mismatch undefined (validation skipped)
+ *   T3. Partial bulkStore failure: actual < expected → mismatch > 0, callback triggered
+ *   T4. Post-write deletion (compactor race): actual < expected → mismatch > 0
+ *   T5. Callback is optional — no error if omitted even on mismatch
+ *   T6. Multiple extractions each get independent validation state
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { SmartExtractor } = jiti("../src/smart-extractor.ts");
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/**
+ * Mulberry32 seeded PRNG — fast, deterministic, good distribution.
+ * Returns a new RNG function seeded from an integer.
+ */
+function makeRng(seed) {
+  let s = seed >>> 0;
+  return () => {
+    s |= 0;
+    s = (s + 0x6d2b79f5) | 0;
+    let t = Math.imul(s ^ (s >>> 15), 1 | s);
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t;
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
+  };
+}
+
+/**
+ * Deterministic random embedder.
+ *
+ * Produces 256-dimensional vectors using a seeded RNG keyed on the text.
+ * Different texts → different seeds → vectors with cosine similarity < 0.85,
+ * ensuring candidates survive SmartExtractor's internal batchDedup filter.
+ *
+ * This lets us test with 2+ candidates without fighting the dedup logic.
+ */
+function makeDeterministicEmbedder() {
+  return {
+    async embed(text) {
+      const seed = [...text].reduce((acc, c) => acc + c.charCodeAt(0), 0) >>> 0;
+      const rng = makeRng(seed === 0 ? 1 : seed);
+      return Array.from({ length: 256 }, () => rng());
+    },
+    async embedBatch(texts) {
+      return texts.map((t) => {
+        const seed = [...t].reduce((acc, c) => acc + c.charCodeAt(0), 0) >>> 0;
+        const rng = makeRng(seed === 0 ? 1 : seed);
+        return Array.from({ length: 256 }, () => rng());
+      });
+    },
+  };
+}
+
+/**
+ * Mock LLM — returns configurable candidates and "create" for all dedup decisions.
+ * The "create" decision ensures candidates progress through the dedup pipeline
+ * and reach bulkStore without special handling (no handleSupersede, handleMerge, etc.).
+ */
+function makeLlm(candidates = []) {
+  return {
+    async completeJson(_prompt, mode) {
+      if (mode === "extract-candidates") return { memories: candidates };
+      if (mode === "dedup-decision") return { decision: "create", reason: "no match" };
+      if (mode === "merge-memory") return candidates[0] ?? null;
+      return null;
+    },
+  };
+}
+
+/**
+ * Mock store with configurable write behavior.
+ *
+ * Config:
+ *   initialCount    — starting row count (default 0)
+ *   dropLastN       — silently drop last N entries on bulkStore (partial write failure)
+ *   bulkStoreThrows — throw on bulkStore (total write failure)
+ */
+function makeStore(config = {}) {
+  const { initialCount = 0, dropLastN = 0, bulkStoreThrows = false } = config;
+  let rowCount = initialCount;
+  const entries = [];
+
+  const store = {
+    _config: config,
+
+    async count() { return rowCount; },
+
+    async vectorSearch() { return []; },
+
+    async store(entry) {
+      rowCount++;
+      entries.push({ action: "store", id: entry.id ?? "?" });
+      return { ...entry, id: "direct-id-" + entries.length };
+    },
+
+    async bulkStore(batchEntries) {
+      if (bulkStoreThrows) throw new Error("bulkStore simulated failure");
+      const stored = dropLastN > 0
+        ? batchEntries.slice(0, batchEntries.length - dropLastN)
+        : batchEntries;
+      for (let i = 0; i < stored.length; i++) {
+        rowCount++;
+        entries.push({ action: "bulkStore", id: stored[i].id ?? "bulk-" + i });
+      }
+      if (dropLastN > 0) entries.push({ action: "bulkStore_dropped", count: dropLastN });
+      return stored.map((e, i) => ({ ...e, id: "bulk-id-" + i }));
+    },
+
+    async update(_id, _patch, _scopeFilter) {
+      entries.push({ action: "update", id: _id });
+    },
+
+    async getById() { return null; },
+
+    async delete(_id) {
+      rowCount = Math.max(0, rowCount - 1);
+      entries.push({ action: "delete", id: _id });
+    },
+
+    get entries() { return [...entries]; },
+    get rowCount() { return rowCount; },
+    reset() { rowCount = initialCount; entries.length = 0; },
+  };
+  return store;
+}
+
+function makeExtractor(embedder, llm, store, config = {}) {
+  return new SmartExtractor(store, embedder, llm, {
+    user: "User",
+    extractMinMessages: 1,
+    extractMaxChars: 8000,
+    defaultScope: "global",
+    log() {},
+    debugLog() {},
+    ...config,
+  });
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+describe("Issue #693: Extraction write validation", () => {
+
+  // --------------------------------------------------------------------------
+  // T1: Normal extraction — expected === actual, mismatch = 0, no callback
+  // --------------------------------------------------------------------------
+  it("T1: normal extraction passes validation with mismatch=0", async () => {
+    const embedder = makeDeterministicEmbedder();
+    // Two semantically different abstracts → low cosine similarity → both survive batchDedup
+    const llm = makeLlm([
+      {
+        category: "preferences",
+        abstract: "User prefers dark mode interface settings for eye comfort during coding",
+        overview: "Display preference",
+        content: "The user prefers dark mode interface settings on their workstation for reduced eye strain during extended coding sessions.",
+      },
+      {
+        category: "entities",
+        abstract: "User works at Acme Corporation headquarters in the R&D division",
+        overview: "Employment information",
+        content: "The user is employed as a senior software engineer at Acme Corporation's research and development headquarters facility.",
+      },
+    ]);
+    const store = makeStore({ initialCount: 0 });
+    const extractor = makeExtractor(embedder, llm, store);
+
+    let callbackInvoked = false;
+    let receivedValidation = null;
+
+    const stats = await extractor.extractAndPersist(
+      "I use dark mode at work and work at Acme Corp",
+      "session-t1",
+      {
+        onExtractionValidationFailed(validation) {
+          callbackInvoked = true;
+          receivedValidation = validation;
+        },
+      }
+    );
+
+    assert.strictEqual(stats.created, 2, "both candidates should be created");
+    assert.strictEqual(
+      stats.validationMismatch,
+      undefined,
+      "validationMismatch should be undefined (not in public ExtractionStats)"
+    );
+    assert.strictEqual(callbackInvoked, false, "callback should NOT be triggered");
+    assert.strictEqual(store.rowCount, 2, "both entries should be written");
+    assert.strictEqual(receivedValidation, null, "no validation object received");
+  });
+
+  // --------------------------------------------------------------------------
+  // T2: Empty extraction — no bulkStore, validation skipped
+  // --------------------------------------------------------------------------
+  it("T2: empty extraction skips validation", async () => {
+    const embedder = makeDeterministicEmbedder();
+    const llm = makeLlm([]);
+    const store = makeStore({ initialCount: 5 });
+    const extractor = makeExtractor(embedder, llm, store);
+
+    let callbackInvoked = false;
+
+    const stats = await extractor.extractAndPersist(
+      "nothing to extract here",
+      "session-t2",
+      { onExtractionValidationFailed() { callbackInvoked = true; } }
+    );
+
+    assert.strictEqual(stats.created, 0, "no entries created");
+    assert.strictEqual(
+      stats.validationMismatch,
+      undefined,
+      "validationMismatch should be undefined for empty extraction (validation skipped)"
+    );
+    assert.strictEqual(callbackInvoked, false, "callback should NOT fire");
+    assert.strictEqual(store.rowCount, 5, "pre-existing count unchanged");
+  });
+
+  // --------------------------------------------------------------------------
+  // T3: Partial bulkStore failure — actual < expected → mismatch > 0
+  // --------------------------------------------------------------------------
+  it("T3: partial bulkStore failure triggers mismatch > 0", async () => {
+    const embedder = makeDeterministicEmbedder();
+    // 3 candidates → dropLastN=1 → actual=2, mismatch=1
+    const llm = makeLlm([
+      {
+        category: "preferences",
+        abstract: "User prefers dark mode interface settings for eye comfort during coding",
+        overview: "Display preference",
+        content: "The user prefers dark mode interface settings on their workstation for reduced eye strain during extended coding sessions.",
+      },
+      {
+        category: "preferences",
+        abstract: "User prefers light theme when editing documents and writing emails",
+        overview: "Display preference",
+        content: "In contrast to dark mode, the user prefers light theme when editing documents and writing emails in their daily productivity workflow.",
+      },
+      {
+        category: "entities",
+        abstract: "User works at Acme Corporation headquarters in the R&D division",
+        overview: "Employment information",
+        content: "The user is employed as a senior software engineer at Acme Corporation's research and development headquarters facility.",
+      },
+    ]);
+    const store = makeStore({ initialCount: 0, dropLastN: 1 });
+    const extractor = makeExtractor(embedder, llm, store);
+
+    let callbackInvoked = false;
+    let receivedValidation = null;
+
+    const stats = await extractor.extractAndPersist(
+      "I use dark mode for coding but light theme for writing emails at Acme Corp",
+      "session-t3",
+      {
+        onExtractionValidationFailed(validation) {
+          callbackInvoked = true;
+          receivedValidation = validation;
+        },
+      }
+    );
+
+    // Expected = 3, Actual = 2 (dropLastN=1), Mismatch = 1
+    // Note: validationMismatch is NOT written to stats (removed from public API)
+    // Only the callback receives the mismatch information
+    assert.strictEqual(callbackInvoked, true, "callback SHOULD be triggered");
+    assert.ok(receivedValidation);
+    assert.strictEqual(receivedValidation.expected, 3);
+    assert.strictEqual(receivedValidation.actual, 2);
+    assert.strictEqual(receivedValidation.mismatch, 1);
+    assert.strictEqual(receivedValidation.sessionKey, "session-t3");
+    assert.strictEqual(store.rowCount, 2, "only 2 rows written");
+  });
+
+  // --------------------------------------------------------------------------
+  // T4: Post-write deletion (compactor race) — actual < expected
+  // --------------------------------------------------------------------------
+  it("T4: post-write deletion triggers mismatch > 0 (compactor race)", async () => {
+    const embedder = makeDeterministicEmbedder();
+    // 2 candidates → compactor deletes 1 after bulkStore → actual=1, mismatch=1
+    const llm = makeLlm([
+      {
+        category: "cases",
+        abstract: "User completed initial setup wizard on first launch of the application",
+        overview: "Setup wizard completion",
+        content: "The user has successfully completed the initial setup wizard and application onboarding process during their first launch of the software application on their primary workstation.",
+      },
+      {
+        category: "cases",
+        abstract: "User configured notification preferences including email and push alerts",
+        overview: "Notification settings configuration",
+        content: "Following the initial setup, the user proceeded to configure various notification preferences including email alerts, desktop push notifications, and mobile synchronization settings.",
+      },
+    ]);
+    const store = makeStore({ initialCount: 0 });
+    const extractor = makeExtractor(embedder, llm, store);
+
+    // Simulate compactor deleting 1 entry after bulkStore succeeds
+    const originalBulkStore = store.bulkStore.bind(store);
+    store.bulkStore = async (entries) => {
+      const result = await originalBulkStore(entries);
+      await store.delete("bulk-id-0"); // compactor race: delete first entry
+      return result;
+    };
+
+    let callbackInvoked = false;
+    let receivedValidation = null;
+
+    const stats = await extractor.extractAndPersist(
+      "I completed setup and configured notification preferences",
+      "session-t4",
+      {
+        onExtractionValidationFailed(validation) {
+          callbackInvoked = true;
+          receivedValidation = validation;
+        },
+      }
+    );
+
+    // Expected = 2, Actual = 1 (compactor deleted 1), Mismatch = 1
+    // Note: validationMismatch is NOT written to stats (removed from public API)
+    assert.strictEqual(callbackInvoked, true, "callback SHOULD be triggered");
+    assert.ok(receivedValidation);
+    assert.strictEqual(receivedValidation.expected, 2);
+    assert.strictEqual(receivedValidation.actual, 1);
+    assert.strictEqual(receivedValidation.mismatch, 1);
+    assert.strictEqual(receivedValidation.sessionKey, "session-t4");
+    assert.strictEqual(store.rowCount, 1, "1 row remaining after deletion");
+  });
+
+  // --------------------------------------------------------------------------
+  // T5: Callback is optional — no error if omitted
+  // --------------------------------------------------------------------------
+  it("T5: callback is optional — no error if omitted even on mismatch", async () => {
+    const embedder = makeDeterministicEmbedder();
+    // 2 candidates that survive batchDedup, with dropLastN=1
+    const llm = makeLlm([
+      {
+        category: "events",
+        abstract: "User attended quarterly business review meeting with the team lead",
+        overview: "Meeting attendance",
+        content: "The quarterly business review meeting was attended by the user along with their direct team members to discuss ongoing project status and future planning initiatives.",
+      },
+      {
+        category: "events",
+        abstract: "User participated in a formal code review session with constructive feedback",
+        overview: "Code review participation",
+        content: "The user actively participated in a formal code review session where they provided constructive feedback on pull request implementations and discussed architectural decision implications.",
+      },
+    ]);
+    const store = makeStore({ initialCount: 0, dropLastN: 1 });
+    const extractor = makeExtractor(embedder, llm, store);
+
+    // Should NOT throw even though mismatch occurs and callback is absent
+    // Note: validationMismatch is NOT written to stats (exposed only via callback)
+    await extractor.extractAndPersist(
+      "User said: I attended the quarterly business review and participated in a code review",
+      "session-t5",
+      {}  // no callback
+    );
+    assert.strictEqual(store.rowCount, 1, "1 row written despite mismatch (dropLastN=1)");
+  });
+
+  // --------------------------------------------------------------------------
+  // T6: Multiple extractions — independent validation state
+  // --------------------------------------------------------------------------
+  it("T6: multiple extractions each get independent validation", async () => {
+    const embedder = makeDeterministicEmbedder();
+
+    // First extraction: normal (no mismatch)
+    const llm1 = makeLlm([{
+      category: "events",
+      abstract: "User attended quarterly business review meeting with the team lead",
+      overview: "Meeting",
+      content: "The quarterly business review meeting was attended by the user along with their direct team members to discuss ongoing project status and future planning initiatives.",
+    }]);
+    const store1 = makeStore({ initialCount: 0 });
+    const extractor1 = makeExtractor(embedder, llm1, store1);
+    store1.reset();
+
+    const validations = [];
+
+    const stats1 = await extractor1.extractAndPersist(
+      "User said: I attended the quarterly business review",
+      "session-multi-1",
+      { onExtractionValidationFailed(v) { validations.push(v); } }
+    );
+
+    assert.strictEqual(stats1.validationMismatch, undefined, "first: validationMismatch undefined");
+    assert.strictEqual(validations.length, 0, "first: no callback fired");
+
+    // Second extraction: partial write failure (dropLastN=1) → mismatch=1
+    const llm2 = makeLlm([
+      {
+        category: "events",
+        abstract: "User attended quarterly business review meeting with the team lead",
+        overview: "Meeting",
+        content: "The quarterly business review meeting was attended by the user along with their direct team members to discuss ongoing project status and future planning initiatives.",
+      },
+      {
+        category: "events",
+        abstract: "User participated in a formal code review session with constructive feedback",
+        overview: "Code review",
+        content: "The user actively participated in a formal code review session where they provided constructive feedback on pull request implementations and discussed architectural decision implications.",
+      },
+    ]);
+    const store2 = makeStore({ initialCount: 0, dropLastN: 1 });
+    const extractor2 = makeExtractor(embedder, llm2, store2);
+
+    const stats2 = await extractor2.extractAndPersist(
+      "User said: I attended a quarterly meeting and participated in a code review",
+      "session-multi-2",
+      { onExtractionValidationFailed(v) { validations.push(v); } }
+    );
+
+    // Second extraction: 2 candidates, dropLastN=1 → actual=1, expected=2, mismatch=1
+    // Note: validationMismatch is NOT written to stats (removed from public API)
+    // Only the callback receives the mismatch
+    assert.strictEqual(validations.length, 1, "second: callback fired once");
+    assert.strictEqual(validations[0].sessionKey, "session-multi-2");
+    assert.strictEqual(validations[0].mismatch, 1);
+  });
+
+});
diff --git a/test/preference-slots.test.mjs b/test/preference-slots.test.mjs
index 1849ef31..4e20fabe 100644
--- a/test/preference-slots.test.mjs
+++ b/test/preference-slots.test.mjs
@@ -125,6 +125,7 @@ function makeGuardExtractor({ vectorSearchResults, onDedupCalled }) {
       stored.push(...entries);
       return entries;
     },
+    async count() { return stored.length; },
   };
   const embedder = {
     async embed() {
@@ -241,6 +242,7 @@ test("dedup guard: non-preference category -> skips guard, goes to LLM", async (
     },
     async store() {},
     async bulkStore() { return []; },
+    async count() { return 0; },
   };
   const embedder = {
     async embed() { return [0.1, 0.2, 0.3]; },
diff --git a/test/smart-extractor-batch-embed.test.mjs b/test/smart-extractor-batch-embed.test.mjs
index ea0120b0..634f291e 100644
--- a/test/smart-extractor-batch-embed.test.mjs
+++ b/test/smart-extractor-batch-embed.test.mjs
@@ -99,6 +99,9 @@ function makeStore() {
     async getById(_id, _scopeFilter) {
       return null;
     },
+    async count() {
+      return entries.length;
+    },
     get entries() {
       return [...entries];
     },
diff --git a/test/smart-extractor-bulk-store-edge-cases.test.mjs b/test/smart-extractor-bulk-store-edge-cases.test.mjs
index 54153158..2eb5ac50 100644
--- a/test/smart-extractor-bulk-store-edge-cases.test.mjs
+++ b/test/smart-extractor-bulk-store-edge-cases.test.mjs
@@ -61,6 +61,7 @@ class MockStore {
   
   async vectorSearch() { return []; }
   async getById() { return null; }
+  async count() { return 0; }
 }
 
 // ============================================================
diff --git a/test/smart-extractor-bulk-store.test.mjs b/test/smart-extractor-bulk-store.test.mjs
index cef140c5..b39c06f0 100644
--- a/test/smart-extractor-bulk-store.test.mjs
+++ b/test/smart-extractor-bulk-store.test.mjs
@@ -66,6 +66,7 @@ class MockStore {
   
   async vectorSearch() { return []; }
   async getById() { return null; }
+  async count() { return 0; }
 }
 
 // ============================================================
diff --git a/test/smart-extractor-scope-filter.test.mjs b/test/smart-extractor-scope-filter.test.mjs
index adef26da..488fb12f 100644
--- a/test/smart-extractor-scope-filter.test.mjs
+++ b/test/smart-extractor-scope-filter.test.mjs
@@ -13,6 +13,7 @@ function makeExtractor(scopeFilters) {
     },
     async store() {},
     async bulkStore() {},
+    async count() { return 0; },
   };
 
   const embedder = {