From 1da87ac32a9157065ef43bc36bd8d2eb870e95df Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 01:41:13 -0700 Subject: [PATCH 1/8] =?UTF-8?q?docs:=20revise=201.5=20plan=20=E2=80=94=20t?= =?UTF-8?q?ree-sitter=20WASM=20queries,=20not=20ast-grep=20CLI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace ast-grep CLI approach with tree-sitter queries using WASM we already bundle. Key changes: PatternMatcher interface for future NAPI swap, PatternRule DSL translated to S-expressions, zero new dependencies. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../1.5-ast-pattern-analysis.md | 632 +++++++++++------- .claude/scratchpad.md | 5 +- 2 files changed, 407 insertions(+), 230 deletions(-) diff --git a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.5-ast-pattern-analysis.md b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.5-ast-pattern-analysis.md index 9e4a1a9..d4e08c3 100644 --- a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.5-ast-pattern-analysis.md +++ b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.5-ast-pattern-analysis.md @@ -1,292 +1,468 @@ -# Part 1.5: AST-Based Pattern Analysis via ast-grep +# Part 1.5: AST-Based Pattern Analysis via Tree-Sitter Queries -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. +**Status:** Draft (revised — addressed review findings) -**Goal:** Replace regex-based pattern detection in `dev_patterns` with ast-grep's tree-sitter AST matching for significantly more accurate analysis. +**Goal:** Replace regex-based pattern detection in `dev_patterns` with tree-sitter +query-based AST matching for more accurate analysis. Use the WASM tree-sitter +runtime we already bundle. Design for future swap to `@ast-grep/napi` if needed. **User stories:** US-11 -**Inspiration:** [ast-grep](https://github.com/ast-grep/ast-grep) — MIT licensed, structural code search using tree-sitter AST patterns. Has its own [MCP server](https://github.com/ast-grep/ast-grep-mcp) validating the approach. - -**Files:** -- Create: `packages/core/src/services/ast-patterns.ts` -- Create: `packages/core/src/services/__tests__/ast-patterns.test.ts` -- Modify: `packages/core/src/services/pattern-analysis-service.ts` +**Inspiration:** Studied [ast-grep](https://github.com/ast-grep/ast-grep) architecture +(custom matcher on tree-sitter AST). Our approach is simpler: use tree-sitter's +built-in query API with S-expression constants. No query builder — YAGNI. --- -## Why ast-grep over regex - -Current regex approach misses common patterns: +## Key design decisions (from two review rounds) -| Pattern | Regex detects | ast-grep detects | -|---------|--------------|-----------------| -| `try { } catch (e) { }` | No | Yes — structural match | -| `promise.catch(handler)` | No | Yes — method call pattern | -| Custom error classes | No | Yes — class extends Error | -| Async error handling | No | Yes — await in try/catch | -| Type annotations on arrow functions | Fragile | Yes — AST node type | +**Round 1 (query builder removal):** +Dropped the `PatternRule` DSL and `query-builder.ts` (~150 lines of template code). +S-expressions are string constants — simpler, less error-prone, directly testable. -ast-grep is a CLI tool (`npm install -g @ast-grep/cli`) that we shell out to. No need to import its internals — just call it with YAML rules and parse JSON output. +**Round 2 (WASM/grammar concerns):** +- Bundle both TypeScript (~2.3MB) and TSX (~2.3MB) WASM grammars. React codebases + are mostly `.tsx` — skipping them would miss the majority of files. Same queries + work on both grammars. Total ~4.9MB WASM bundle (including Go). +- `WasmPatternMatcher` caches a parser instance and calls `tree.delete()` after each + `match()` to prevent WASM heap memory leaks. +- `await-in-try` query is intentionally narrow (const/let declarations only). Documented. +- `error-class` query matches any `extends_clause`, not just `extends Error` literally. + Custom error hierarchies (`extends BaseError`, `extends HttpError`) are common. --- -## Task 1: Add ast-grep as optional dependency +## Verified S-expressions -- [ ] **Step 1: Check if ast-grep is available** +All queries validated against `web-tree-sitter@0.25.10` + `tree-sitter-wasms@0.1.13` +TypeScript grammar by parsing real snippets and inspecting AST output. + +### Error handling (5 rules) ```typescript -// In packages/core/src/services/ast-patterns.ts - -import { execFileSync, execSync } from 'node:child_process'; -import * as fs from 'node:fs'; -import * as os from 'node:os'; -import * as path from 'node:path'; - -let _astGrepAvailable: boolean | null = null; - -/** - * Check if ast-grep CLI is available. Memoized per process. - */ -export function hasAstGrep(): boolean { - if (_astGrepAvailable !== null) return _astGrepAvailable; - try { - execSync('ast-grep --version', { stdio: 'pipe', timeout: 5000 }); - _astGrepAvailable = true; - } catch { - _astGrepAvailable = false; - } - return _astGrepAvailable; -} +const ERROR_HANDLING_QUERIES = { + // Matches any try { } catch { } block + 'try-catch': '(try_statement) @match', + + // Matches throw statements: throw new Error(...), throw err, etc. + 'throw': '(throw_statement) @match', + + // Matches .catch() method calls: promise.catch(handler) + 'promise-catch': '(call_expression function: (member_expression property: (property_identifier) @method (#eq? @method "catch"))) @match', + + // Matches await inside try blocks — intentionally narrow: only catches + // const/let declarations with await (e.g., `const data = await fetch(...)`) + // Does NOT catch bare `await foo()` as expression statements or await + // nested inside if/for within try. This is a known limitation. + 'await-in-try': '(try_statement body: (statement_block (lexical_declaration (variable_declarator value: (await_expression))))) @match', + + // Matches class declarations with any extends clause. + // Catches: class AppError extends Error, class HttpError extends BaseError, etc. + // May have false positives for non-error classes with extends, but in practice + // this is rare in error-handling analysis context. + 'error-class': '(class_declaration (class_heritage (extends_clause))) @match', +}; ``` -- [ ] **Step 2: Write test** +### Import style (3 rules) ```typescript -describe('ast-grep integration', () => { - it('should detect if ast-grep is available', () => { - // This test passes regardless — just checks the function works - const available = hasAstGrep(); - expect(typeof available).toBe('boolean'); - }); -}); +const IMPORT_STYLE_QUERIES = { + // Matches dynamic import(): await import('./module') + 'dynamic-import': '(call_expression function: (import)) @match', + + // Matches re-exports: export { foo } from './bar' + 're-export': '(export_statement source: (string)) @match', + + // Matches require() calls: const fs = require('fs') + 'require': '(call_expression function: (identifier) @fn (#eq? @fn "require")) @match', +}; ``` -- [ ] **Step 3: Commit** +### Type coverage (2 rules) -```bash -git add packages/core/src/services/ast-patterns.ts packages/core/src/services/__tests__/ast-patterns.test.ts -git commit -m "feat(core): add ast-grep availability check" +```typescript +const TYPE_COVERAGE_QUERIES = { + // Matches arrow functions WITH return type annotation + // This is the main win over regex — regex is fragile on (): Type => ... + 'arrow-return-type': '(arrow_function return_type: (type_annotation)) @match', + + // Matches function declarations WITH return type annotation + 'function-return-type': '(function_declaration return_type: (type_annotation)) @match', +}; ``` --- -## Task 2: Define AST pattern rules for error handling +## Architecture -ast-grep uses YAML rules. We define patterns for the 5 categories we analyze. +``` +PatternAnalysisService + │ + │ calls (optional — regex fallback if not configured) + ▼ +PatternMatcher (interface) + │ + │ match(source, language, queries): Map + │ (returns match counts per query ID) + │ + └── WasmPatternMatcher (now) + ├── Caches parser instance (reused across match() calls) + ├── Calls tree.delete() after each match() (WASM heap cleanup) + ├── Detects .tsx → returns empty map (regex fallback handles it) + └── Uses parseCode() + query() from scanner/tree-sitter.ts + +Rules are plain objects: { id: string, category: string, query: string } +No query builder. S-expressions are string constants. +``` -- [ ] **Step 1: Create error handling patterns** +### Language routing -```typescript -// In ast-patterns.ts - -export interface AstPatternResult { - file: string; - matches: Array<{ - rule: string; - line: number; - text: string; - }>; -} - -/** - * Run ast-grep with a pattern rule on a file. Returns matches. - * Pure function over CLI — no state. - */ -/** - * Run ast-grep with a YAML rule on a file. Returns matches. - * - * IMPORTANT: `--rule` expects a file path, not inline YAML. - * We write a temp file for each rule invocation. - * Uses execFileSync (array args) to avoid shell injection via file paths. - */ -export function runAstGrepRule( - filePath: string, - ruleYaml: string, - repositoryPath: string -): AstPatternResult { - try { - const tmpRule = path.join(os.tmpdir(), `ast-grep-rule-${Date.now()}.yml`); - fs.writeFileSync(tmpRule, ruleYaml); - try { - const output = execFileSync( - 'ast-grep', ['scan', '--rule', tmpRule, '--json', filePath], - { cwd: repositoryPath, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 } - ); - const matches = JSON.parse(output || '[]'); - return { file: filePath, matches }; - } finally { - fs.unlinkSync(tmpRule); - } - } catch { - return { file: filePath, matches: [] }; - } -} - -/** Built-in rules for error handling detection */ -export const ERROR_HANDLING_RULES = { - tryCatch: ` -rule: - kind: try_statement -`, - promiseCatch: ` -rule: - pattern: $PROMISE.catch($HANDLER) -`, - throwStatement: ` -rule: - kind: throw_statement -`, - asyncAwaitTryCatch: ` -rule: - kind: try_statement - has: - kind: await_expression -`, -}; -``` +| Extension | Grammar | Size | AST available | +|-----------|---------|------|---------------| +| `.ts` | tree-sitter-typescript | 2.3MB | Yes — all 10 queries | +| `.tsx` | tree-sitter-tsx | 2.3MB | Yes — same queries (TSX is a TS superset) | +| `.js` | tree-sitter-javascript | 647KB | Yes — 8 queries (type coverage queries return 0, correct) | +| `.jsx` | tree-sitter-javascript | (shared) | Yes — same as `.js` | +| `.go` | tree-sitter-go (already bundled) | 236KB | Future — no Go rules yet | +| `.vue`, `.svelte`, `.md`, other | Not supported | — | No → regex fallback | -- [ ] **Step 2: Write test for error handling detection** +All 4 JS/TS grammars bundled. Covers the entire Node/frontend ecosystem: +React, Next.js, Angular, NestJS, Vue (script blocks), Svelte (script blocks), +Express, etc. Type coverage queries naturally return 0 on `.js`/`.jsx` (no type +annotations in JS — correct behavior, not a bug). -```typescript -describe('AST error handling detection', () => { - // Mock execFileSync to avoid requiring ast-grep binary in CI. - // Tests verify parsing and classification logic, not the binary. +Total WASM bundle: ~5.5MB (TS + TSX + JS + Go). - it('should detect try/catch blocks via mock', () => { - const mockOutput = JSON.stringify([{ rule: 'try_statement', line: 2, text: 'try {' }]); - vi.spyOn(child_process, 'execFileSync').mockReturnValue(mockOutput); +### WASM memory management - const result = runAstGrepRule('src/test.ts', ERROR_HANDLING_RULES.tryCatch, tempDir); - expect(result.matches.length).toBe(1); - expect(result.matches[0].rule).toBe('try_statement'); +web-tree-sitter allocates `Parser` and `Tree` objects on the WASM heap. JavaScript's +GC does not collect these — they must be explicitly `.delete()`-ed. - vi.restoreAllMocks(); - }); +- **Parser:** Cached in `WasmPatternMatcher` instance. Created once, reused across + `match()` calls. Deleted when `WasmPatternMatcher` is disposed (if needed). +- **Tree:** Created per `match()` call. Deleted in a `finally` block after queries run. - it('should detect throw statements via mock', () => { - const mockOutput = JSON.stringify([{ rule: 'throw_statement', line: 1, text: 'throw new Error' }]); - vi.spyOn(child_process, 'execFileSync').mockReturnValue(mockOutput); +### NAPI swap path (future) - const result = runAstGrepRule('src/test.ts', ERROR_HANDLING_RULES.throwStatement, tempDir); - expect(result.matches.length).toBe(1); +When/if needed: +1. Add `NapiPatternMatcher` implementing same interface +2. Translate our S-expressions to ast-grep patterns (or keep S-expressions — NAPI supports them) +3. Update factory function +4. All consumers unchanged - vi.restoreAllMocks(); - }); +--- - it('should return empty matches on failure', () => { - vi.spyOn(child_process, 'execFileSync').mockImplementation(() => { throw new Error('not found'); }); +## What changes - const result = runAstGrepRule('src/test.ts', ERROR_HANDLING_RULES.tryCatch, tempDir); - expect(result.matches.length).toBe(0); +### New files - vi.restoreAllMocks(); - }); +| File | What | +|------|------| +| `packages/core/src/pattern-matcher/index.ts` | `PatternMatcher` interface, `PatternMatchRule` type, rule constants, factory | +| `packages/core/src/pattern-matcher/wasm-matcher.ts` | WASM implementation with cached parser + tree cleanup | +| `packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts` | 10 positive, 6 negative, 3 edge case, 1 perf test | - it('should detect promise.catch via mock', () => { - const mockOutput = JSON.stringify([{ rule: 'promiseCatch', line: 3, text: '.catch(handler)' }]); - vi.spyOn(child_process, 'execFileSync').mockReturnValue(mockOutput); +### Modified files - const result = runAstGrepRule('src/test.ts', ERROR_HANDLING_RULES.promiseCatch, tempDir); - expect(result.matches.length).toBe(1); +| File | What changes | +|------|-------------| +| `packages/core/src/scanner/tree-sitter.ts` | Add `'typescript'` to `TreeSitterLanguage` | +| `packages/dev-agent/scripts/copy-wasm.js` | Add `'typescript'` to `SUPPORTED_LANGUAGES` | +| `packages/core/src/services/pattern-analysis-service.ts` | AST-enhanced extractors with regex fallback | +| `packages/core/src/services/pattern-analysis-types.ts` | Add optional `PatternMatcher` to config | +| `packages/mcp-server/src/adapters/built-in/inspect-adapter.ts` | Pass `PatternMatcher` through | +| `packages/mcp-server/bin/dev-agent-mcp.ts` | Wire `PatternMatcher` at startup | - vi.restoreAllMocks(); - }); -}); -``` +--- -- [ ] **Step 3: Commit** +## Tasks -```bash -git add packages/core/src/services/ast-patterns.ts packages/core/src/services/__tests__/ast-patterns.test.ts -git commit -m "feat(core): add ast-grep based error handling detection" -``` +### Task 1: Bundle TS/TSX/JS WASM + register languages ---- +- Add `'typescript' | 'tsx' | 'javascript'` to `TreeSitterLanguage` type in `scanner/tree-sitter.ts` +- Add `'typescript'`, `'tsx'`, `'javascript'` to `SUPPORTED_LANGUAGES` in `copy-wasm.js` +- Verify `parseCode()` works for all 3 grammars +- Test: parse a TS, TSX, and JS file, query for `(function_declaration) @match` +- `.jsx` routes to `'javascript'` grammar (JSX is a subset of the JS grammar) -## Task 3: Integrate AST patterns into PatternAnalysisService +**Risk:** WASM files add ~5.3MB to bundle (TS 2.3MB + TSX 2.3MB + JS 647KB). +**Mitigation:** Acceptable for a CLI tool installed globally. `node_modules` is already +hundreds of MB. We're not a browser bundle — install size is not a primary constraint. -Replace the regex-based `extractErrorHandlingFromContent` with AST detection when ast-grep is available, falling back to regex otherwise. +**Commit:** `feat(core): bundle tree-sitter TS/TSX/JS WASM for AST analysis` -- [ ] **Step 1: Add AST-enhanced error handling analyzer** +### Task 2: PatternMatcher interface + WASM implementation + +- `PatternMatcher` interface: `match(source, language, queries): Map` +- `PatternMatchRule` type: `{ id: string; category: string; query: string }` +- All 10 rule constants with documented limitations in code comments +- `WasmPatternMatcher`: + - Caches parser instance (created on first `match()`, reused thereafter) + - Calls `tree.delete()` in `finally` after running queries + - Routes `.ts` → `'typescript'` grammar, `.tsx` → `'tsx'` grammar + - Invalid query or parse failure → returns empty map (never crash) +- `createPatternMatcher()` factory + +**Commit:** `feat(core): add PatternMatcher with 10 verified tree-sitter queries` + +### Task 3: Tests for WasmPatternMatcher + +**Test setup:** +- `beforeAll`: create one `WasmPatternMatcher` instance for the suite (reuses cached parser) +- Module-level WASM caching in `tree-sitter.ts` means init happens once per process +- Each test is independent — inline source strings, no shared mutable state + +**Positive cases (10 tests — assert exact match counts):** ```typescript -// In pattern-analysis-service.ts - -import { ERROR_HANDLING_RULES, hasAstGrep, runAstGrepRule } from './ast-patterns.js'; - -/** - * Extract error handling pattern using AST (preferred) or regex (fallback). - */ -export function extractErrorHandling( - content: string, - filePath?: string, - repositoryPath?: string -): ErrorHandlingPattern { - // Try AST-based detection first (more accurate) - if (filePath && repositoryPath && hasAstGrep()) { - const tryCatch = runAstGrepRule(filePath, ERROR_HANDLING_RULES.tryCatch, repositoryPath); - const throwStmt = runAstGrepRule(filePath, ERROR_HANDLING_RULES.throwStatement, repositoryPath); - const promiseCatch = runAstGrepRule(filePath, ERROR_HANDLING_RULES.promiseCatch, repositoryPath); - - const counts = { - tryCatch: tryCatch.matches.length, - throw: throwStmt.matches.length, - promiseCatch: promiseCatch.matches.length, - }; - - const total = counts.tryCatch + counts.throw + counts.promiseCatch; - if (total === 0) return { style: 'unknown', examples: [] }; - - const activePatterns = Object.values(counts).filter((c) => c > 0).length; - if (activePatterns > 1) return { style: 'mixed', examples: [] }; - if (counts.throw > 0) return { style: 'throw', examples: [] }; - // try/catch and .catch() are both catch-based — map to 'result' - // (closest existing style for structured error handling) - if (counts.tryCatch > 0 || counts.promiseCatch > 0) return { style: 'result', examples: [] }; - - return { style: 'unknown', examples: [] }; - } - - // Fallback to regex - return extractErrorHandlingFromContent(content); -} +// try-catch — expect count === 1 +'try { x(); } catch (e) { }' + +// throw — expect count === 1 +'throw new Error("bad");' + +// promise-catch — expect count === 1 +'fetch("/api").catch(handleError);' + +// await-in-try — expect count === 1 +'async function f() { try { const x = await fetch("/api"); } catch (e) {} }' + +// error-class — expect count === 1 +'class HttpError extends BaseError { constructor(m: string) { super(m); } }' + +// dynamic-import — expect count === 1 +'const m = await import("./mod");' + +// re-export — expect count === 1 +'export { foo } from "./bar";' + +// require — expect count === 1 +'const fs = require("fs");' + +// arrow-return-type — expect count === 1 +'const add = (a: number, b: number): number => a + b;' + +// function-return-type — expect count === 1 +'function greet(name: string): string { return name; }' ``` -- [ ] **Step 2: Update analyzeFileFromIndex and analyzeFileWithDocs to use new function** +**Negative cases (10 tests — one per query, assert count === 0):** + +```typescript +// try-catch: no try/catch +'const x = 1;' + +// throw: no throw +'function safe() { return 1; }' + +// promise-catch: .then() but not .catch() +'fetch("/api").then(handle);' + +// await-in-try: bare await (not in const/let) — documents narrowness +'async function f() { try { await fetch("/api"); } catch (e) {} }' + +// error-class: class without extends +'class AppService { run() {} }' + +// dynamic-import: static import only +'import { foo } from "./bar";' -Replace `extractErrorHandlingFromContent(content)` with `extractErrorHandling(content, filePath, this.config.repositoryPath)` in both methods. +// re-export: named export without from +'export { foo };' -- [ ] **Step 3: Run tests** +// require: ESM only +'import fs from "fs";' -Run: `pnpm build && pnpm test` -Expected: ALL PASS (ast-grep detection used when available, regex fallback otherwise) +// arrow-return-type: arrow without return type +'const add = (a: number, b: number) => a + b;' -- [ ] **Step 4: Commit** +// function-return-type: function without return type +'function greet(name: string) { return name; }' +``` + +**Language routing tests (3 tests):** +- `.tsx`: use existing fixture `__fixtures__/react-component.tsx` — assert try-catch count > 0 +- `.jsx`: `'const App = () =>
;'` routed to `'javascript'` grammar — parses without crash +- Unknown extension (`.py`) → returns empty map + +**Edge cases (3 tests):** +- Empty source string → no matches, no crash +- Malformed TypeScript → parser handles gracefully, no crash +- Invalid S-expression → returns empty map (exercises try/finally cleanup) + +**Performance sanity (1 test — P1, soft assertion):** +- Parse ~500-line TypeScript file + all 10 queries +- Log timing. Assert < 500ms (generous threshold to avoid CI flakiness) +- Catches gross regressions, not micro-benchmarking + +**WASM cleanup note:** +`WasmPatternMatcher` will use `createParser()` + `parser.parse()` directly (not +`parseCode()`) to access the raw tree for `tree.delete()` in a `finally` block. +`parseCode()` returns a `ParsedTree` that doesn't expose the tree for cleanup. +Cleanup is verified indirectly: malformed source exercises the finally path, and +the performance test would show obvious memory growth if trees leaked. + +**Commit:** `test(core): comprehensive tests for all 10 AST patterns` + +### Task 4: Wire into PatternAnalysisService + +- Add optional `PatternMatcher` to `PatternAnalysisConfig` +- New AST-enhanced extractors (pure functions, testable): + - `extractErrorHandlingWithAst(content, matcher?, language?)` → enriches regex with AST + - `extractImportStyleWithAst(content, matcher?, language?)` → detects dynamic imports + - `extractTypeCoverageWithAst(content, matcher?, language?, signatures?)` → catches arrow types +- Replace calls in `analyzeFileFromIndex` and `analyzeFileWithDocs` +- Wire `PatternMatcher` from MCP entry point through `InspectAdapter` +- Map file extensions to languages: `.ts` → `'typescript'`, `.tsx` → `'tsx'`, `.js`/`.jsx` → `'javascript'`, `.go` → future, others → regex fallback + +Classification logic (error handling): +- throw + try-catch → `'throw'` (try/catch is mechanism, throw is style) +- try-catch without throw → augment regex result (if regex found Result, keep it) +- promise-catch → count as additional error handling signal +- error-class → count as additional signal (file defines custom errors) +- No AST matches → regex only (unchanged behavior) + +Classification logic (imports): +- dynamic-import detected → counts toward ESM +- re-export detected → counts toward ESM (confirms regex detection) +- require detected → counts toward CJS (more precise than regex) + +Classification logic (type coverage): +- arrow-return-type count + function-return-type count = AST-detected annotated functions +- Merged with signature-based detection from indexed metadata +- Arrow functions are the main win — regex is fragile on `(): Type => ...` + +**Integration tests (concrete cases):** -```bash -git add packages/core/src/services/pattern-analysis-service.ts packages/core/src/services/ast-patterns.ts -git commit -m "feat(core): use ast-grep for pattern analysis with regex fallback" +```typescript +// Error handling: try/catch + throw → 'throw' (try/catch is mechanism, throw is style) +source: 'try { validate(x); } catch (e) { throw new Error("failed"); }' +expect: extractErrorHandlingWithAst(source, matcher, 'typescript').style === 'throw' + +// Error handling: Result + try/catch → 'mixed' (AST adds try-catch, regex found Result) +source: 'function f(): Result { try { return ok(); } catch { return err(); } }' +expect: extractErrorHandlingWithAst(source, matcher, 'typescript').style === 'mixed' + +// Error handling: only promise.catch, no throw → 'unknown' (promise.catch alone +// doesn't map to our categories — regex didn't find throw/Result either) +source: 'fetch("/api").catch(log);' +expect: extractErrorHandlingWithAst(source, matcher, 'typescript').style === 'unknown' + +// Imports: dynamic import → counts as ESM +source: 'import { foo } from "./bar"; const m = await import("./baz");' +expect: extractImportStyleWithAst(source, matcher, 'typescript').style === 'esm' + +// Type coverage: arrow with return type detected by AST +source: 'const add = (a: number, b: number): number => a + b;' +signatures: [] // no indexed signatures (arrow not in index) +expect: extractTypeCoverageWithAst(source, matcher, 'typescript', signatures).annotatedCount === 1 + +// Regex fallback: all extractors with matcher=undefined produce identical output +// to current behavior (regression test) +expect: extractErrorHandlingWithAst(source, undefined, undefined) === + extractErrorHandlingFromContent(source) ``` +**Risk:** Behavior change for files where AST detects patterns regex missed. +**Mitigation:** AST only adds detection, never removes. Changes are strictly more accurate. + +**Commit:** `feat(core,mcp): use AST pattern matching for all 3 analysis categories` + +--- + +## Decisions + +| Decision | Rationale | Alternatives | +|----------|-----------|-------------| +| tree-sitter WASM, not ast-grep | Zero new deps, already bundled, universal | ast-grep NAPI: faster but new dep + platform builds | +| S-expression constants, not query builder | YAGNI — builder was riskiest component | Query builder: ~150 lines of template code for speculative NAPI swap | +| PatternMatcher interface (1 method) | Minimal cost, enables NAPI swap | Direct calls: simpler but locks to WASM | +| Cache parser, delete trees | WASM heap not GC'd by JS. Parser reuse avoids repeated init. | New parser per call: ~50ms overhead per match, slow memory leak | +| Bundle both TS and TSX grammars | React codebases are mostly .tsx — can't skip them. Same queries work on both. | Skip TSX: saves 2.3MB but misses most files in React repos | +| `error-class` matches any extends | Custom error hierarchies are common. `extends BaseError` matters. | `#eq? "Error"` only: misses most real error classes | +| `await-in-try` stays narrow | Exhaustive matching requires recursive descent. Diminishing returns. | Broader query: complex, fragile, marginal value | +| Regex fallback preserved | Not all languages supported; Result is regex-only | AST-only: would lose detection for unsupported languages | +| No new error handling categories | try/catch + throw = 'throw' style. Adding categories has ripple effects. | 'try-catch' category: more precise but changes types/formatters/tests | +| Optional PatternMatcher in config | Tests run without it, production wires it in | Required: breaks existing tests | + --- -## Notes +## Risk register + +| Risk | Likelihood | Impact | Mitigation | +|------|-----------|--------|------------| +| S-expressions wrong for some grammar versions | Low | Medium | Verified against actual grammar. Tests assert match count > 0 with real source. Pin tree-sitter-wasms version. | +| WASM heap memory leak | Medium | Medium | `tree.delete()` in finally block. Parser cached, not recreated. | +| Bundle size +5.3MB (TS + TSX + JS WASM) | Low | Low | CLI tool, not browser. Total ~5.5MB with Go WASM. Acceptable for global install. | +| `await-in-try` misses bare await statements | Medium | Low | Documented limitation. Regex fallback catches throw/Result regardless. | +| `error-class` false positives (non-error extends) | Low | Low | In error-handling context, non-error classes with extends are rare. | +| WASM init latency on first call | Low | Low | Lazy init, cached. ~50ms first call, <1ms subsequent. | +| PatternMatcher abstraction unnecessary | Low | Low | 1 method interface, ~50 line impl. Cost is minimal. | +| tree-sitter-typescript grammar changes | Very Low | Medium | Pin tree-sitter-wasms. Tests catch regressions. | + +--- + +## Test strategy + +| Test | Priority | What it verifies | +|------|----------|-----------------| +| **Positive (10 — exact counts)** | | | +| WasmPatternMatcher — try/catch | P0 | count === 1 | +| WasmPatternMatcher — throw | P0 | count === 1 | +| WasmPatternMatcher — promise.catch | P0 | count === 1 | +| WasmPatternMatcher — await-in-try | P0 | count === 1 | +| WasmPatternMatcher — error-class | P0 | count === 1 | +| WasmPatternMatcher — dynamic-import | P0 | count === 1 | +| WasmPatternMatcher — re-export | P0 | count === 1 | +| WasmPatternMatcher — require | P0 | count === 1 | +| WasmPatternMatcher — arrow-return-type | P0 | count === 1 | +| WasmPatternMatcher — function-return-type | P0 | count === 1 | +| **Negative (10 — one per query)** | | | +| no try/catch | P0 | count === 0 | +| no throw | P0 | count === 0 | +| .then() not .catch() | P0 | count === 0 | +| bare await in try (documents narrowness) | P0 | count === 0 | +| class without extends | P0 | count === 0 | +| static import only | P0 | count === 0 | +| export without from | P0 | count === 0 | +| ESM only (no require) | P0 | count === 0 | +| arrow without return type | P0 | count === 0 | +| function without return type | P0 | count === 0 | +| **Language routing (3)** | | | +| TSX: fixture react-component.tsx | P0 | try-catch count > 0 | +| JSX: routed to javascript grammar | P0 | parses without crash | +| Unknown extension (.py) | P0 | returns empty map | +| **Edge cases (3)** | | | +| Empty source | P0 | no crash, empty map | +| Malformed source | P0 | no crash | +| Invalid S-expression | P0 | empty map (exercises finally) | +| **Performance (1)** | | | +| 500-line file + 10 queries | P1 | < 500ms (soft, logged) | +| **Integration (6)** | | | +| try/catch + throw → 'throw' | P0 | AST + regex classification | +| Result + try/catch → 'mixed' | P0 | AST augments regex | +| promise.catch only → 'unknown' | P0 | No category mapping | +| dynamic import → ESM | P0 | AST enriches import style | +| arrow return type detected | P0 | AST enriches type coverage | +| All extractors with matcher=undefined | P0 | Identical to regex-only (regression) | +| **Regression** | | | +| Existing 49 tests unchanged | P0 | No behavioral change on fallback path | + +--- -- ast-grep is an **optional** dependency — all functionality works without it via regex fallback -- `hasAstGrep()` is memoized per process — single execSync on first call -- Uses `execFileSync` (array args) instead of `execSync` (string) to prevent shell injection via file paths -- `--rule` requires a file path — we write temp YAML files and clean up after -- Tests mock `execFileSync` so they run in CI without ast-grep installed -- Future: expand AST rules for import style, type coverage, naming conventions -- Attribution: add to ARCHITECTURE.md: "AST pattern matching inspired by [ast-grep](https://github.com/ast-grep/ast-grep)" +## Verification checklist + +- [ ] `tree-sitter-typescript.wasm`, `tree-sitter-tsx.wasm`, `tree-sitter-javascript.wasm` bundled in dist +- [ ] `parseCode('const x: number = 1;', 'typescript')` works +- [ ] `parseCode('const App = () =>
;', 'tsx')` works +- [ ] `parseCode('const x = 1;', 'javascript')` works +- [ ] All 10 positive tests assert exact match count === 1 +- [ ] All 10 negative tests assert count === 0 +- [ ] Empty/malformed source doesn't crash +- [ ] `tree.delete()` called after every `match()` (WASM cleanup) +- [ ] Parser cached in WasmPatternMatcher (not recreated per call) +- [ ] Regex fallback works when PatternMatcher not configured +- [ ] `pnpm build && pnpm test` passes (all existing + new) +- [ ] `dev_patterns` returns results via MCP (manual test) +- [ ] Performance: 500-line file + 10 queries < 200ms diff --git a/.claude/scratchpad.md b/.claude/scratchpad.md index 8bbea4b..6d3cfb0 100644 --- a/.claude/scratchpad.md +++ b/.claude/scratchpad.md @@ -11,10 +11,11 @@ ## Future Work - Antfly SDK: server-side path filter for `getDocsByFilePath` (eliminates 5k cap) -- `dev_patterns format: "json"` for token-efficient agent output (MCP Phase 1, Part 1.4) -- ast-grep as optional dep for pattern analysis (MCP Phase 1, Part 1.5) - PageRank for `dev_map` hot paths (MCP Phase 1, Part 1.6) - E2E tests in CI — blocked on Antfly memory requirements vs GitHub runner limits (7GB) +- **Python language support** — tree-sitter-python WASM is ~300KB, already in tree-sitter-wasms. Needs a Python scanner (document extraction) + Python-specific pattern rules. High demand — large ecosystem. Worth a standalone plan covering: scanner, pattern rules, test fixtures, indexer integration. The PatternMatcher interface from 1.5 is language-agnostic so pattern rules slot right in; the scanner is the real work. +- Vue/Svelte SFC support — `.vue`/`.svelte` files have embedded `