From cdc0c5cec74455d067fdf0d225b16773dcb3fe46 Mon Sep 17 00:00:00 2001 From: gdilla Date: Fri, 20 Mar 2026 13:22:07 -0700 Subject: [PATCH] Consolidate 4 skills into 1 with progressive disclosure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge codebase-memory-exploring, codebase-memory-tracing, codebase-memory-quality, and codebase-memory-reference into a single codebase-memory skill with a references/ directory. Motivation: - Each skill registered a separate description entry that Claude scans at session startup. 4 skills = 4 description entries consuming listing space when 1 would suffice with a broader trigger phrase set. - The 4 skills had overlapping content (gotchas, tips) scattered across files with no single source of truth. - Per Anthropic's Agent Skills best practices and official spec (agentskills.io), skills should use progressive disclosure: a concise SKILL.md (<500 lines, <5000 tokens) loaded on activation, with detailed reference material in separate files loaded on demand. What changed: - assets/skills/: 4 separate skill directories replaced by 1 codebase-memory/ directory with references/ subdirectory - SKILL.md (54 lines): decision matrix, quick workflows, consolidated gotchas section — everything an agent needs to pick the right tool - references/exploring.md: codebase exploration workflows - references/tracing.md: call chain tracing, impact analysis, cross-service - references/quality.md: dead code, fan-out/fan-in, change coupling - references/tool-reference.md: all 14 tools, edge types, Cypher syntax, regex patterns - cli.c: embedded skill content consolidated into single string with decision matrix, all workflows, and gotchas - cli.h: CBM_SKILL_COUNT 4 → 1 - test_cli.c: updated to test single consolidated skill covering all capabilities The consolidated gotchas section surfaces the 5 most common mistakes (search_graph vs query_graph for edges, 200-row cap, exact name requirement, direction="both" for cross-service, pagination) in the main SKILL.md where the agent reads them before encountering issues. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../skills/codebase-memory-exploring/SKILL.md | 90 ---------- .../skills/codebase-memory-tracing/SKILL.md | 125 -------------- .../assets/skills/codebase-memory/SKILL.md | 54 ++++++ .../codebase-memory/references/exploring.md | 61 +++++++ .../references/quality.md} | 40 ++--- .../references/tool-reference.md} | 48 +----- .../codebase-memory/references/tracing.md | 99 +++++++++++ src/cli/cli.c | 154 +++++++----------- src/cli/cli.h | 4 +- tests/test_cli.c | 59 +++---- 10 files changed, 321 insertions(+), 413 deletions(-) delete mode 100644 cmd/codebase-memory-mcp/assets/skills/codebase-memory-exploring/SKILL.md delete mode 100644 cmd/codebase-memory-mcp/assets/skills/codebase-memory-tracing/SKILL.md create mode 100644 cmd/codebase-memory-mcp/assets/skills/codebase-memory/SKILL.md create mode 100644 cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/exploring.md rename cmd/codebase-memory-mcp/assets/skills/{codebase-memory-quality/SKILL.md => codebase-memory/references/quality.md} (58%) rename cmd/codebase-memory-mcp/assets/skills/{codebase-memory-reference/SKILL.md => codebase-memory/references/tool-reference.md} (68%) create mode 100644 cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tracing.md diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-exploring/SKILL.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory-exploring/SKILL.md deleted file mode 100644 index cc45a8b..0000000 --- a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-exploring/SKILL.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -name: codebase-memory-exploring -description: > - This skill should be used when the user asks to "explore the codebase", - "understand the architecture", "what functions exist", "show me the structure", - "how is the code organized", "find functions matching", "search for classes", - "list all routes", "show API endpoints", or needs codebase orientation. ---- - -# Codebase Exploration via Knowledge Graph - -Use graph tools for structural code questions. They return precise results in ~500 tokens vs ~80K for grep-based exploration. - -## Workflow - -### Step 1: Check if project is indexed - -``` -list_projects -``` - -If the project is missing from the list: - -``` -index_repository(repo_path="/path/to/project") -``` - -If already indexed, skip — auto-sync keeps the graph fresh. - -### Step 2: Get a structural overview - -``` -get_graph_schema -``` - -This returns node label counts (functions, classes, routes, etc.), edge type counts, and relationship patterns. Use it to understand what's in the graph before querying. - -### Step 3: Find specific code elements - -Find functions by name pattern: -``` -search_graph(label="Function", name_pattern=".*Handler.*") -``` - -Find classes: -``` -search_graph(label="Class", name_pattern=".*Service.*") -``` - -Find all REST routes: -``` -search_graph(label="Route") -``` - -Find modules/packages: -``` -search_graph(label="Module") -``` - -Scope to a specific directory: -``` -search_graph(label="Function", qn_pattern=".*services\\.order\\..*") -``` - -### Step 4: Read source code - -After finding a function via search, read its source: -``` -get_code_snippet(qualified_name="project.path.to.FunctionName") -``` - -### Step 5: Understand structure - -For file/directory exploration within the indexed project: -``` -list_directory(path="src/services") -``` - -## When to Use Grep Instead - -- Searching for **string literals** or error messages → `search_code` or Grep -- Finding a file by exact name → Glob -- The graph doesn't index text content, only structural elements - -## Key Tips - -- Results default to 10 per page. Check `has_more` and use `offset` to paginate. -- Use `project` parameter when multiple repos are indexed. -- Route nodes have a `properties.handler` field with the actual handler function name. -- `exclude_labels` removes noise (e.g., `exclude_labels=["Route"]` when searching by name pattern). diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-tracing/SKILL.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory-tracing/SKILL.md deleted file mode 100644 index bc14abe..0000000 --- a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-tracing/SKILL.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -name: codebase-memory-tracing -description: > - This skill should be used when the user asks "who calls this function", - "what does X call", "trace the call chain", "find callers of", - "show dependencies", "what depends on", "trace call path", - "find all references to", "impact analysis", or needs to understand - function call relationships and dependency chains. ---- - -# Call Chain Tracing via Knowledge Graph - -Use graph tools to trace function call relationships. One `trace_call_path` call replaces dozens of grep searches across files. - -## Workflow - -### Step 1: Discover the exact function name - -`trace_call_path` requires an **exact** name match. If you don't know the exact name, discover it first with regex: - -``` -search_graph(name_pattern=".*Order.*", label="Function") -``` - -Use full regex for precise discovery — no full-text search needed: -- `(?i)order` — case-insensitive -- `^(Get|Set|Delete)Order` — CRUD variants -- `.*Order.*Handler$` — handlers only -- `qn_pattern=".*services\\.order\\..*"` — scope to order service directory - -This returns matching functions with their qualified names and file locations. - -### Step 2: Trace callers (who calls this function?) - -``` -trace_call_path(function_name="ProcessOrder", direction="inbound", depth=3) -``` - -Returns a hop-by-hop list of all functions that call `ProcessOrder`, up to 3 levels deep. - -### Step 3: Trace callees (what does this function call?) - -``` -trace_call_path(function_name="ProcessOrder", direction="outbound", depth=3) -``` - -### Step 4: Full context (both callers and callees) - -``` -trace_call_path(function_name="ProcessOrder", direction="both", depth=3) -``` - -**Always use `direction="both"` for complete context.** Cross-service HTTP_CALLS edges from other services appear as inbound edges — `direction="outbound"` alone misses them. - -### Step 5: Read suspicious code - -After finding interesting callers/callees, read their source: - -``` -get_code_snippet(qualified_name="project.path.module.FunctionName") -``` - -## Cross-Service HTTP Calls - -To see all HTTP links between services with URLs and confidence scores: - -``` -query_graph(query="MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path, r.confidence ORDER BY r.confidence DESC LIMIT 20") -``` - -Filter by URL path: -``` -query_graph(query="MATCH (a)-[r:HTTP_CALLS]->(b) WHERE r.url_path CONTAINS '/orders' RETURN a.name, b.name, r.url_path") -``` - -## Async Dispatch (Cloud Tasks, Pub/Sub, etc.) - -Find dispatch functions by name pattern, then trace: -``` -search_graph(name_pattern=".*CreateTask.*|.*send_to_pubsub.*") -trace_call_path(function_name="CreateMultidataTask", direction="both") -``` - -## Interface Implementations - -Find which structs implement an interface method: -``` -query_graph(query="MATCH (s)-[r:OVERRIDE]->(i) WHERE i.name = 'Read' RETURN s.name, i.name LIMIT 20") -``` - -## Read References (callbacks, variable assignments) - -``` -query_graph(query="MATCH (a)-[r:USAGE]->(b) WHERE b.name = 'ProcessOrder' RETURN a.name, a.file_path LIMIT 20") -``` - -## Risk-Classified Impact Analysis - -Add `risk_labels=true` to get risk classification on each node: - -``` -trace_call_path(function_name="ProcessOrder", direction="inbound", depth=3, risk_labels=true) -``` - -Returns nodes with `risk` (CRITICAL/HIGH/MEDIUM/LOW) based on hop depth, plus an `impact_summary` with counts. Risk mapping: hop 1=CRITICAL, 2=HIGH, 3=MEDIUM, 4+=LOW. - -## Detect Changes (Git Diff Impact) - -Map uncommitted changes to affected symbols and their blast radius: - -``` -detect_changes() -detect_changes(scope="staged") -detect_changes(scope="branch", base_branch="main") -``` - -Returns changed files, changed symbols, and impacted callers with risk classification. Scopes: `unstaged`, `staged`, `all` (default), `branch`. - -## Key Tips - -- Start with `depth=1` for quick answers, increase only if needed (max 5). -- Edge types in trace results: `CALLS` (direct), `HTTP_CALLS` (cross-service), `ASYNC_CALLS` (async dispatch), `USAGE` (read reference), `OVERRIDE` (interface implementation). -- `search_graph(relationship="HTTP_CALLS")` filters nodes by degree — it does NOT return edges. Use `query_graph` with Cypher to see actual edges with properties. -- Results are capped at 200 nodes per trace. -- `detect_changes` requires git in PATH. diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory/SKILL.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/SKILL.md new file mode 100644 index 0000000..b7837e4 --- /dev/null +++ b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/SKILL.md @@ -0,0 +1,54 @@ +--- +name: codebase-memory +description: > + Use the codebase knowledge graph for structural code queries. Triggers on: "explore the codebase", + "understand the architecture", "what functions exist", "show me the structure", "who calls this function", + "what does X call", "trace the call chain", "find callers of", "show dependencies", "impact analysis", + "dead code", "unused functions", "high fan-out", "refactor candidates", "code quality audit", + "graph query syntax", "Cypher query examples", "edge types", "how to use search_graph". +--- + +# Codebase Memory — Knowledge Graph Tools + +Graph tools return precise structural results in ~500 tokens vs ~80K for grep-based exploration. + +## Step 1: Check if project is indexed + +``` +list_projects +``` + +If the project is missing: `index_repository(repo_path="/path/to/project")`. If already indexed, skip — auto-sync keeps the graph fresh. + +## What do you want to do? + +| Goal | Read | +|------|------| +| **Explore codebase structure** — find functions, classes, routes, understand architecture | [references/exploring.md](references/exploring.md) | +| **Trace call chains** — who calls X, what does X call, impact analysis, cross-service calls | [references/tracing.md](references/tracing.md) | +| **Code quality analysis** — dead code, high fan-out, hidden coupling, refactor candidates | [references/quality.md](references/quality.md) | +| **Tool reference** — all 14 tools, edge types, node labels, Cypher syntax, regex patterns | [references/tool-reference.md](references/tool-reference.md) | + +## Quick Decision Matrix + +| Question | Tool call | +|----------|-----------| +| Who calls X? | `trace_call_path(direction="inbound")` | +| What does X call? | `trace_call_path(direction="outbound")` | +| Full call context | `trace_call_path(direction="both")` | +| Find by name pattern | `search_graph(name_pattern="...")` | +| Dead code | `search_graph(max_degree=0, exclude_entry_points=true)` | +| Cross-service edges | `query_graph` with Cypher | +| Impact of local changes | `detect_changes()` | +| Risk-classified trace | `trace_call_path(risk_labels=true)` | +| Text search | `search_code` or Grep | + +## Gotchas + +1. **`search_graph(relationship="HTTP_CALLS")` does NOT return edges** — it filters nodes by degree. Use `query_graph` with Cypher to see actual edge properties (url_path, confidence). +2. **`query_graph` has a 200-row cap** before aggregation — COUNT queries silently undercount on large codebases. Use `search_graph` with `min_degree`/`max_degree` for counting. +3. **`trace_call_path` needs exact names** — use `search_graph(name_pattern=".*Partial.*")` first to discover the exact function name. +4. **`direction="outbound"` misses cross-service callers** — always use `direction="both"` for complete context. Cross-service HTTP_CALLS appear as inbound edges. +5. **Results default to 10 per page** — check `has_more` and use `offset` to paginate. +6. **Dead code detection requires entry point exclusion** — without `exclude_entry_points=true`, route handlers and `main()` show as false positives. +7. **`search_graph` with degree filters has no row cap** (unlike `query_graph`). Use it for counting, not `query_graph`. diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/exploring.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/exploring.md new file mode 100644 index 0000000..bd3ff10 --- /dev/null +++ b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/exploring.md @@ -0,0 +1,61 @@ +# Codebase Exploration + +## Get a structural overview + +``` +get_graph_schema +``` + +Returns node label counts (functions, classes, routes), edge type counts, and relationship patterns. Use it to understand what's in the graph before querying. + +## Find specific code elements + +Find functions by name pattern: +``` +search_graph(label="Function", name_pattern=".*Handler.*") +``` + +Find classes: +``` +search_graph(label="Class", name_pattern=".*Service.*") +``` + +Find all REST routes: +``` +search_graph(label="Route") +``` + +Find modules/packages: +``` +search_graph(label="Module") +``` + +Scope to a specific directory: +``` +search_graph(label="Function", qn_pattern=".*services\\.order\\..*") +``` + +## Read source code + +After finding a function via search, read its source: +``` +get_code_snippet(qualified_name="project.path.to.FunctionName") +``` + +## File/directory exploration + +``` +list_directory(path="src/services") +``` + +## When to Use Grep Instead + +- Searching for **string literals** or error messages → `search_code` or Grep +- Finding a file by exact name → Glob +- The graph indexes structural elements, not text content + +## Tips + +- Use `project` parameter when multiple repos are indexed. +- Route nodes have a `properties.handler` field with the handler function name. +- `exclude_labels` removes noise (e.g., `exclude_labels=["Route"]` when searching by name pattern). diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-quality/SKILL.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/quality.md similarity index 58% rename from cmd/codebase-memory-mcp/assets/skills/codebase-memory-quality/SKILL.md rename to cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/quality.md index 1542eee..d72d142 100644 --- a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-quality/SKILL.md +++ b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/quality.md @@ -1,22 +1,8 @@ ---- -name: codebase-memory-quality -description: > - This skill should be used when the user asks about "dead code", - "find dead code", "detect dead code", "show dead code", "dead code analysis", - "unused functions", "find unused functions", "unreachable code", - "identify high fan-out functions", "find complex functions", - "code quality audit", "find functions nobody calls", - "reduce codebase size", "refactor candidates", "cleanup candidates", - or needs code quality analysis. ---- - -# Code Quality Analysis via Knowledge Graph +# Code Quality Analysis Use graph degree filtering to find dead code, high-complexity functions, and refactor candidates — all in single tool calls. -## Workflow - -### Dead Code Detection +## Dead Code Detection Find functions with zero inbound CALLS edges, excluding entry points: @@ -46,9 +32,9 @@ Also check for read references (callbacks, stored in variables): query_graph(query="MATCH (a)-[r:USAGE]->(b) WHERE b.name = 'SuspectFunction' RETURN a.name, a.file_path LIMIT 10") ``` -### High Fan-Out Functions (calling 10+ others) +## High Fan-Out Functions (calling 10+ others) -These are often doing too much and are refactor candidates: +Refactor candidates — often doing too much: ``` search_graph( @@ -59,9 +45,9 @@ search_graph( ) ``` -### High Fan-In Functions (called by 10+ others) +## High Fan-In Functions (called by 10+ others) -These are critical functions — changes have wide impact: +Critical functions — changes have wide impact: ``` search_graph( @@ -72,9 +58,7 @@ search_graph( ) ``` -### Files That Change Together (Hidden Coupling) - -Find files with high git change coupling: +## Files That Change Together (Hidden Coupling) ``` query_graph(query="MATCH (a)-[r:FILE_CHANGES_WITH]->(b) WHERE r.coupling_score >= 0.5 RETURN a.name, b.name, r.coupling_score, r.co_change_count ORDER BY r.coupling_score DESC LIMIT 20") @@ -82,7 +66,7 @@ query_graph(query="MATCH (a)-[r:FILE_CHANGES_WITH]->(b) WHERE r.coupling_score > High coupling between unrelated files suggests hidden dependencies. -### Unused Imports +## Unused Imports ``` search_graph( @@ -93,9 +77,9 @@ search_graph( ) ``` -## Key Tips +## Tips - `search_graph` with degree filters has no row cap (unlike `query_graph` which caps at 200). -- Use `file_pattern` to scope analysis to specific directories: `file_pattern="**/services/**"`. -- Dead code detection works best after a full index — run `index_repository` if the project was recently set up. -- Paginate results with `limit` and `offset` — check `has_more` in the response. +- Use `file_pattern` to scope analysis: `file_pattern="**/services/**"`. +- Dead code detection works best after a full index. +- Paginate results with `limit` and `offset` — check `has_more`. diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-reference/SKILL.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tool-reference.md similarity index 68% rename from cmd/codebase-memory-mcp/assets/skills/codebase-memory-reference/SKILL.md rename to cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tool-reference.md index 97dbfd6..8210c17 100644 --- a/cmd/codebase-memory-mcp/assets/skills/codebase-memory-reference/SKILL.md +++ b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tool-reference.md @@ -1,12 +1,3 @@ ---- -name: codebase-memory-reference -description: > - This skill should be used when the user asks about "codebase-memory-mcp tools", - "graph query syntax", "Cypher query examples", "edge types", - "how to use search_graph", "query_graph examples", or needs reference - documentation for the codebase knowledge graph tools. ---- - # Codebase Memory MCP — Tool Reference ## Tools (14 total) @@ -19,7 +10,7 @@ description: > | `delete_project` | Remove a project from the graph | | `search_graph` | Structured search with filters (name, label, degree, file pattern) | | `search_code` | Grep-like text search within indexed project files | -| `trace_call_path` | BFS call chain traversal (exact name match required). Supports `risk_labels=true` for impact classification. | +| `trace_call_path` | BFS call chain traversal (exact name match required). Supports `risk_labels=true`. | | `detect_changes` | Map git diff to affected symbols + blast radius with risk scoring | | `query_graph` | Cypher-like graph queries (200-row cap) | | `get_graph_schema` | Node/edge counts, relationship patterns | @@ -92,11 +83,7 @@ MATCH (a)-[r:FILE_CHANGES_WITH]->(b) WHERE r.coupling_score >= 0.5 RETURN a.name MATCH (f:Function)-[:CALLS]->(g:Function) WHERE g.name = 'ProcessOrder' RETURN f.name LIMIT 20 ``` -## Regex-Powered Search (No Full-Text Index Needed) - -`search_graph` and `search_code` support full Go regex, making full-text search indexes unnecessary. Regex patterns provide precise, composable queries that cover all common discovery scenarios: - -### search_graph — name_pattern / qn_pattern +## Regex Patterns for search_graph | Pattern | Matches | Use case | |---------|---------|----------| @@ -106,10 +93,9 @@ MATCH (f:Function)-[:CALLS]->(g:Function) WHERE g.name = 'ProcessOrder' RETURN f | `^on[A-Z]` | names starting with on + uppercase | Find event handlers | | `.*Service.*Impl` | Service...Impl pattern | Find service implementations | | `^(Get\|Set\|Delete)` | CRUD prefixes | Find CRUD operations | -| `.*_test$` | names ending in _test | Find test functions | | `.*\\.controllers\\..*` | qn_pattern for directory scoping | Scope to controllers dir | -### search_code — regex=true +## Regex Patterns for search_code | Pattern | Matches | Use case | |---------|---------|----------| @@ -117,38 +103,16 @@ MATCH (f:Function)-[:CALLS]->(g:Function) WHERE g.name = 'ProcessOrder' RETURN f | `(?i)password\|secret\|token` | case-insensitive secrets | Security scan | | `func\\s+Test` | Go test functions | Find test entry points | | `api[._/]v[0-9]` | API version references | Find versioned API usage | -| `import.*from ['"]@` | scoped npm imports | Find package imports | -### Combining Filters for Surgical Queries +## Combining Filters ``` # Find unused auth handlers search_graph(name_pattern="(?i).*auth.*handler.*", max_degree=0, exclude_entry_points=true) -# Find high fan-out functions in the services directory +# Find high fan-out functions in services directory search_graph(qn_pattern=".*\\.services\\..*", min_degree=10, relationship="CALLS", direction="outbound") -# Find all route handlers matching a URL pattern +# Find route handlers matching a URL pattern search_code(pattern="(?i)(POST|PUT).*\\/api\\/v[0-9]\\/orders", regex=true) ``` - -## Critical Pitfalls - -1. **`search_graph(relationship="HTTP_CALLS")` does NOT return edges** — it filters nodes by degree. Use `query_graph` with Cypher to see actual edges. -2. **`query_graph` has a 200-row cap** before aggregation — COUNT queries silently undercount on large codebases. Use `search_graph` with `min_degree`/`max_degree` for counting. -3. **`trace_call_path` needs exact names** — use `search_graph(name_pattern=".*Partial.*")` first to discover names. -4. **`direction="outbound"` misses cross-service callers** — use `direction="both"` for full context. - -## Decision Matrix - -| Question | Use | -|----------|-----| -| Who calls X? | `trace_call_path(direction="inbound")` | -| What does X call? | `trace_call_path(direction="outbound")` | -| Full call context | `trace_call_path(direction="both")` | -| Find by name pattern | `search_graph(name_pattern="...")` | -| Dead code | `search_graph(max_degree=0, exclude_entry_points=true)` | -| Cross-service edges | `query_graph` with Cypher | -| Impact of local changes | `detect_changes()` | -| Risk-classified trace | `trace_call_path(risk_labels=true)` | -| Text search | `search_code` or Grep | diff --git a/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tracing.md b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tracing.md new file mode 100644 index 0000000..e6791ae --- /dev/null +++ b/cmd/codebase-memory-mcp/assets/skills/codebase-memory/references/tracing.md @@ -0,0 +1,99 @@ +# Call Chain Tracing + +One `trace_call_path` call replaces dozens of grep searches across files. + +## Step 1: Discover the exact function name + +`trace_call_path` requires an **exact** name match. Discover it first with regex: + +``` +search_graph(name_pattern=".*Order.*", label="Function") +``` + +Useful regex patterns: +- `(?i)order` — case-insensitive +- `^(Get|Set|Delete)Order` — CRUD variants +- `.*Order.*Handler$` — handlers only +- `qn_pattern=".*services\\.order\\..*"` — scope to directory + +## Step 2: Trace callers (who calls this?) + +``` +trace_call_path(function_name="ProcessOrder", direction="inbound", depth=3) +``` + +## Step 3: Trace callees (what does this call?) + +``` +trace_call_path(function_name="ProcessOrder", direction="outbound", depth=3) +``` + +## Step 4: Full context (both) + +``` +trace_call_path(function_name="ProcessOrder", direction="both", depth=3) +``` + +**Always use `direction="both"` for complete context.** Cross-service HTTP_CALLS edges appear as inbound edges — `direction="outbound"` alone misses them. + +## Read suspicious code + +``` +get_code_snippet(qualified_name="project.path.module.FunctionName") +``` + +## Cross-Service HTTP Calls + +See all HTTP links with URLs and confidence: +``` +query_graph(query="MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path, r.confidence ORDER BY r.confidence DESC LIMIT 20") +``` + +Filter by URL: +``` +query_graph(query="MATCH (a)-[r:HTTP_CALLS]->(b) WHERE r.url_path CONTAINS '/orders' RETURN a.name, b.name, r.url_path") +``` + +## Async Dispatch (Cloud Tasks, Pub/Sub, etc.) + +``` +search_graph(name_pattern=".*CreateTask.*|.*send_to_pubsub.*") +trace_call_path(function_name="CreateMultidataTask", direction="both") +``` + +## Interface Implementations + +``` +query_graph(query="MATCH (s)-[r:OVERRIDE]->(i) WHERE i.name = 'Read' RETURN s.name, i.name LIMIT 20") +``` + +## Read References (callbacks, variable assignments) + +``` +query_graph(query="MATCH (a)-[r:USAGE]->(b) WHERE b.name = 'ProcessOrder' RETURN a.name, a.file_path LIMIT 20") +``` + +## Risk-Classified Impact Analysis + +``` +trace_call_path(function_name="ProcessOrder", direction="inbound", depth=3, risk_labels=true) +``` + +Returns nodes with `risk` (CRITICAL/HIGH/MEDIUM/LOW) based on hop depth. Hop 1=CRITICAL, 2=HIGH, 3=MEDIUM, 4+=LOW. + +## Detect Changes (Git Diff Impact) + +``` +detect_changes() +detect_changes(scope="staged") +detect_changes(scope="branch", base_branch="main") +``` + +Returns changed files, changed symbols, and impacted callers with risk classification. Scopes: `unstaged`, `staged`, `all` (default), `branch`. + +## Tips + +- Start with `depth=1` for quick answers, increase only if needed (max 5). +- Edge types in traces: `CALLS` (direct), `HTTP_CALLS` (cross-service), `ASYNC_CALLS` (async), `USAGE` (read reference), `OVERRIDE` (interface implementation). +- Results are capped at 200 nodes per trace. +- `detect_changes` requires git in PATH. diff --git a/src/cli/cli.c b/src/cli/cli.c index 7db8e67..4553b2b 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -256,112 +256,83 @@ int cbm_copy_file(const char *src, const char *dst) { } /* ── Skill file content (embedded) ────────────────────────────── */ +/* Consolidated from 4 separate skills into 1 with progressive disclosure. + * The full skill with references/ directory is in assets/skills/codebase-memory/. + * This embedded version is a self-contained fallback for the CLI installer. */ -static const char skill_exploring_content[] = +static const char skill_content[] = "---\n" - "name: codebase-memory-exploring\n" - "description: Codebase knowledge graph expert. ALWAYS invoke this skill when the user " - "explores code, searches for functions/classes/routes, asks about architecture, or needs " - "codebase orientation. Do not use Grep, Glob, or file search directly — use " - "codebase-memory-mcp search_graph and get_architecture first.\n" + "name: codebase-memory\n" + "description: Use the codebase knowledge graph for structural code queries. " + "Triggers on: explore the codebase, understand the architecture, what functions exist, " + "show me the structure, who calls this function, what does X call, trace the call chain, " + "find callers of, show dependencies, impact analysis, dead code, unused functions, " + "high fan-out, refactor candidates, code quality audit, graph query syntax, " + "Cypher query examples, edge types, how to use search_graph.\n" "---\n" "\n" - "# Codebase Exploration\n" + "# Codebase Memory — Knowledge Graph Tools\n" "\n" - "Use codebase-memory-mcp tools to explore the codebase:\n" + "Graph tools return precise structural results in ~500 tokens vs ~80K for grep.\n" "\n" - "## Workflow\n" - "1. `get_graph_schema` — understand what node/edge types exist\n" - "2. `search_graph` — find functions, classes, routes by pattern\n" - "3. `get_code_snippet` — read specific function implementations\n" - "4. `get_architecture` — get high-level project summary\n" + "## Quick Decision Matrix\n" "\n" - "## Tips\n" - "- Use `search_graph(name_pattern=\".*Pattern.*\")` for fuzzy matching\n" - "- Use `search_graph(label=\"Route\")` to find HTTP routes\n" - "- Use `search_graph(label=\"Function\", file_pattern=\"*.go\")` to scope by language\n"; - -static const char skill_tracing_content[] = - "---\n" - "name: codebase-memory-tracing\n" - "description: Call chain and dependency expert. ALWAYS invoke this skill when the user " - "asks who calls a function, what a function calls, needs impact analysis, or traces " - "dependencies. Do not grep for function names directly — use codebase-memory-mcp " - "trace_call_path first.\n" - "---\n" - "\n" - "# Call Tracing & Impact Analysis\n" - "\n" - "Use codebase-memory-mcp tools to trace call paths:\n" + "| Question | Tool call |\n" + "|----------|----------|\n" + "| Who calls X? | `trace_call_path(direction=\"inbound\")` |\n" + "| What does X call? | `trace_call_path(direction=\"outbound\")` |\n" + "| Full call context | `trace_call_path(direction=\"both\")` |\n" + "| Find by name pattern | `search_graph(name_pattern=\"...\")` |\n" + "| Dead code | `search_graph(max_degree=0, exclude_entry_points=true)` |\n" + "| Cross-service edges | `query_graph` with Cypher |\n" + "| Impact of local changes | `detect_changes()` |\n" + "| Risk-classified trace | `trace_call_path(risk_labels=true)` |\n" + "| Text search | `search_code` or Grep |\n" "\n" - "## Workflow\n" - "1. `search_graph(name_pattern=\".*FuncName.*\")` — find exact function name\n" - "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + " - "callees\n" - "3. `detect_changes` — find what changed and assess risk_labels\n" - "\n" - "## Direction Options\n" - "- `inbound` — who calls this function?\n" - "- `outbound` — what does this function call?\n" - "- `both` — full context (callers + callees)\n"; - -static const char skill_quality_content[] = - "---\n" - "name: codebase-memory-quality\n" - "description: Code quality analysis expert. ALWAYS invoke this skill when the user asks " - "about dead code, unused functions, complexity, refactor candidates, or cleanup " - "opportunities. Do not search files manually — use codebase-memory-mcp search_graph " - "with degree filters first.\n" - "---\n" + "## Exploration Workflow\n" + "1. `list_projects` — check if project is indexed\n" + "2. `get_graph_schema` — understand node/edge types\n" + "3. `search_graph(label=\"Function\", name_pattern=\".*Pattern.*\")` — find code\n" + "4. `get_code_snippet(qualified_name=\"project.path.FuncName\")` — read source\n" "\n" - "# Code Quality Analysis\n" + "## Tracing Workflow\n" + "1. `search_graph(name_pattern=\".*FuncName.*\")` — discover exact name\n" + "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\", depth=3)` — trace\n" + "3. `detect_changes()` — map git diff to affected symbols\n" "\n" - "Use codebase-memory-mcp tools for quality analysis:\n" + "## Quality Analysis\n" + "- Dead code: `search_graph(max_degree=0, exclude_entry_points=true)`\n" + "- High fan-out: `search_graph(min_degree=10, relationship=\"CALLS\", direction=\"outbound\")`\n" + "- High fan-in: `search_graph(min_degree=10, relationship=\"CALLS\", direction=\"inbound\")`\n" + "- Change coupling: `query_graph(query=\"MATCH (a)-[r:FILE_CHANGES_WITH]->(b) " + "WHERE r.coupling_score >= 0.5 RETURN a.name, b.name, r.coupling_score\")`\n" "\n" - "## Dead Code Detection\n" - "- `search_graph(max_degree=0, exclude_entry_points=true)` — find unreferenced functions\n" - "- `search_graph(max_degree=0, label=\"Function\")` — unreferenced functions only\n" - "\n" - "## Complexity Analysis\n" - "- `search_graph(min_degree=10)` — high fan-out functions\n" - "- `search_graph(label=\"Function\", sort_by=\"degree\")` — most-connected functions\n"; - -static const char skill_reference_content[] = - "---\n" - "name: codebase-memory-reference\n" - "description: Codebase-memory-mcp reference guide. ALWAYS invoke this skill when the user " - "asks about MCP tools, graph queries, Cypher syntax, edge types, or how to use the " - "knowledge graph. Do not guess tool parameters — load this reference first.\n" - "---\n" - "\n" - "# Codebase Memory MCP Reference\n" - "\n" - "## 14 total MCP Tools\n" - "- `index_repository` — index a project\n" - "- `index_status` — check indexing progress\n" - "- `detect_changes` — find what changed since last index\n" - "- `search_graph` — find nodes by pattern\n" - "- `search_code` — text search in source\n" - "- `query_graph` — Cypher query language\n" - "- `trace_call_path` — call chain traversal\n" - "- `get_code_snippet` — read function source\n" - "- `get_graph_schema` — node/edge type catalog\n" - "- `get_architecture` — high-level summary\n" - "- `list_projects` — indexed projects\n" - "- `delete_project` — remove a project\n" - "- `manage_adr` — architecture decision records\n" - "- `ingest_traces` — import runtime traces\n" + "## 14 MCP Tools\n" + "`index_repository`, `index_status`, `list_projects`, `delete_project`,\n" + "`search_graph`, `search_code`, `trace_call_path`, `detect_changes`,\n" + "`query_graph`, `get_graph_schema`, `get_code_snippet`, `read_file`,\n" + "`list_directory`, `ingest_traces`\n" "\n" "## Edge Types\n" "CALLS, HTTP_CALLS, ASYNC_CALLS, IMPORTS, DEFINES, DEFINES_METHOD,\n" - "HANDLES, IMPLEMENTS, CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n" + "HANDLES, IMPLEMENTS, OVERRIDE, USAGE, FILE_CHANGES_WITH,\n" + "CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n" "\n" - "## Cypher Examples\n" + "## Cypher Examples (for query_graph)\n" "```\n" + "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path, r.confidence LIMIT 20\n" "MATCH (f:Function) WHERE f.name =~ '.*Handler.*' RETURN f.name, f.file_path\n" - "MATCH (a)-[r:CALLS]->(b) WHERE a.name = 'main' RETURN b.name\n" - "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path\n" - "```\n"; + "MATCH (a)-[r:FILE_CHANGES_WITH]->(b) WHERE r.coupling_score >= 0.5 " + "RETURN a.name, b.name\n" + "```\n" + "\n" + "## Gotchas\n" + "1. `search_graph(relationship=\"HTTP_CALLS\")` filters nodes by degree — " + "use `query_graph` with Cypher to see actual edges.\n" + "2. `query_graph` has a 200-row cap — use `search_graph` with degree filters for counting.\n" + "3. `trace_call_path` needs exact names — use `search_graph(name_pattern=...)` first.\n" + "4. `direction=\"outbound\"` misses cross-service callers — use `direction=\"both\"`.\n" + "5. Results default to 10 per page — check `has_more` and use `offset`.\n"; static const char codex_instructions_content[] = "# Codebase Knowledge Graph\n" @@ -378,10 +349,7 @@ static const char codex_instructions_content[] = "Always prefer graph tools over grep for code discovery.\n"; static const cbm_skill_t skills[CBM_SKILL_COUNT] = { - {"codebase-memory-exploring", skill_exploring_content}, - {"codebase-memory-tracing", skill_tracing_content}, - {"codebase-memory-quality", skill_quality_content}, - {"codebase-memory-reference", skill_reference_content}, + {"codebase-memory", skill_content}, }; const cbm_skill_t *cbm_get_skills(void) { diff --git a/src/cli/cli.h b/src/cli/cli.h index 1c3a1f8..e69e898 100644 --- a/src/cli/cli.h +++ b/src/cli/cli.h @@ -48,11 +48,11 @@ int cbm_copy_file(const char *src, const char *dst); /* ── Skill file management ────────────────────────────────────── */ /* Number of skill files. */ -#define CBM_SKILL_COUNT 4 +#define CBM_SKILL_COUNT 1 /* Skill name/content pair. */ typedef struct { - const char *name; /* e.g. "codebase-memory-exploring" */ + const char *name; /* e.g. "codebase-memory" */ const char *content; /* full SKILL.md content */ } cbm_skill_t; diff --git a/tests/test_cli.c b/tests/test_cli.c index 19ccdec..7524686 100644 --- a/tests/test_cli.c +++ b/tests/test_cli.c @@ -530,41 +530,34 @@ TEST(cli_remove_old_monolithic_skill) { } TEST(cli_skill_files_content) { - /* Port of TestSkillFilesContent */ + /* Consolidated skill: all 4 former skills merged into one */ const cbm_skill_t *sk = cbm_get_skills(); - ASSERT_EQ(CBM_SKILL_COUNT, 4); + ASSERT_EQ(CBM_SKILL_COUNT, 1); + + /* The single consolidated skill must cover all use cases */ + ASSERT(strcmp(sk[0].name, "codebase-memory") == 0); + + /* Exploring capabilities */ + ASSERT(strstr(sk[0].content, "search_graph") != NULL); + ASSERT(strstr(sk[0].content, "get_graph_schema") != NULL); + + /* Tracing capabilities */ + ASSERT(strstr(sk[0].content, "trace_call_path") != NULL); + ASSERT(strstr(sk[0].content, "direction") != NULL); + ASSERT(strstr(sk[0].content, "detect_changes") != NULL); + + /* Quality capabilities */ + ASSERT(strstr(sk[0].content, "max_degree=0") != NULL); + ASSERT(strstr(sk[0].content, "exclude_entry_points") != NULL); + + /* Reference capabilities */ + ASSERT(strstr(sk[0].content, "query_graph") != NULL); + ASSERT(strstr(sk[0].content, "Cypher") != NULL); + ASSERT(strstr(sk[0].content, "14 MCP Tools") != NULL); + + /* Gotchas section (new — highest-value content per Anthropic best practices) */ + ASSERT(strstr(sk[0].content, "Gotchas") != NULL); - /* Check exploring skill */ - bool found_exploring = false, found_tracing = false; - bool found_quality = false, found_reference = false; - for (int i = 0; i < CBM_SKILL_COUNT; i++) { - if (strcmp(sk[i].name, "codebase-memory-exploring") == 0) { - found_exploring = true; - ASSERT(strstr(sk[i].content, "search_graph") != NULL); - ASSERT(strstr(sk[i].content, "get_graph_schema") != NULL); - } - if (strcmp(sk[i].name, "codebase-memory-tracing") == 0) { - found_tracing = true; - ASSERT(strstr(sk[i].content, "trace_call_path") != NULL); - ASSERT(strstr(sk[i].content, "direction") != NULL); - ASSERT(strstr(sk[i].content, "detect_changes") != NULL); - } - if (strcmp(sk[i].name, "codebase-memory-quality") == 0) { - found_quality = true; - ASSERT(strstr(sk[i].content, "max_degree=0") != NULL); - ASSERT(strstr(sk[i].content, "exclude_entry_points") != NULL); - } - if (strcmp(sk[i].name, "codebase-memory-reference") == 0) { - found_reference = true; - ASSERT(strstr(sk[i].content, "query_graph") != NULL); - ASSERT(strstr(sk[i].content, "Cypher") != NULL); - ASSERT(strstr(sk[i].content, "14 total") != NULL); - } - } - ASSERT_TRUE(found_exploring); - ASSERT_TRUE(found_tracing); - ASSERT_TRUE(found_quality); - ASSERT_TRUE(found_reference); PASS(); }