From 1b24985cf439695b22003539a7a3a6cbf3bda741 Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 11:50:04 -0700
Subject: [PATCH 1/8] =?UTF-8?q?docs:=20update=201.6=20plan=20=E2=80=94=20a?=
 =?UTF-8?q?ddress=20review=20findings=20before=20implementation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Key findings:
- callers metadata is dead code (not stored in index) — callees-only is correct
- Keep incomingRefs as real count, add score field for PageRank
- Existing tests need callees data + relative ordering assertions
- Add performance test for 2k-node graph

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../1.6-pagerank-map.md                       | 621 ++++++++++++++----
 .../phase-1-mcp-tools-improvement/overview.md |  16 +-
 .claude/scratchpad.md                         |   4 +-
 3 files changed, 527 insertions(+), 114 deletions(-)

diff --git a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.6-pagerank-map.md b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.6-pagerank-map.md
index 005d038..4ea4875 100644
--- a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.6-pagerank-map.md
+++ b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/1.6-pagerank-map.md
@@ -1,18 +1,24 @@
-# Part 1.6: PageRank File Ranking for dev_map
+# Part 1.6: Graph Algorithms for dev_map and dev_refs
 
 > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task.
 
-**Goal:** Replace simple reference counting in `dev_map` hot paths with PageRank over the call graph for more meaningful file ranking.
+**Goal:** Replace simple reference counting in `dev_map` hot paths with weighted PageRank
+over the call graph for more meaningful file ranking.
 
-**User stories:** US-12 (meaningful file importance in codebase map)
+Connected components and shortest path are implemented alongside PageRank in the
+same `graph.ts` module but wired into consumers (formatCodebaseMap, dev_refs) in a
+follow-up PR.
+
+**User stories:** US-12 (meaningful file importance)
 
 **Inspiration:** [aider's repo map](https://aider.chat/docs/repomap.html) — Apache 2.0. Uses PageRank over dependency graph to identify architecturally central files. We already have the call graph data from the scanner (callees metadata in Antfly).
 
 **Files:**
-- Create: `packages/core/src/map/pagerank.ts`
-- Create: `packages/core/src/map/__tests__/pagerank.test.ts`
-- Modify: `packages/core/src/map/index.ts`
-- Modify: `packages/core/src/map/types.ts`
+- Create: `packages/core/src/map/graph.ts` (PageRank, graph builder, connected components, shortest path)
+- Create: `packages/core/src/map/__tests__/graph.test.ts`
+- Modify: `packages/core/src/map/index.ts` (replace computeHotPaths with PageRank)
+- Modify: `packages/core/src/map/types.ts` (add `score` to HotPath)
+- Modify: `packages/core/src/map/__tests__/map.test.ts` (rewrite callers→callees tests)
 
 ---
 
@@ -30,6 +36,24 @@ This is a good proxy but misses graph structure. A file could have few direct re
 
 We already have the data: every indexed document has `callees: [{ name, file, line }]` metadata. This is the dependency graph.
 
+**Important finding:** `callers` metadata is NOT stored in the index — the scanner
+comment says "callers are computed at query time via reverse lookup." The current
+`computeHotPaths` reads `callers` from metadata (line 455 of map/index.ts) but this
+field is always empty for real indexed docs. Only the `callees` path (lines 469-480)
+produces results in production. This means switching to callees-only `buildDependencyGraph`
+is not a regression — it matches what actually works.
+
+---
+
+## Review findings (addressed before implementation)
+
+| Finding | Fix | Risk mitigation |
+|---------|-----|-----------------|
+| Plan only uses `callees`, current code uses both | `callers` is dead code in production (not stored in index). Use `callees` only. | Update tests from callers to callees mock data. Add comment explaining why. |
+| `incomingRefs` changes meaning | Keep `incomingRefs` as actual incoming edge count. Add `score` field for PageRank value. Sort by `score`. | Backward compatible — `incomingRefs` is still a real count. Display stays "refs". |
+| Existing tests will break | Update to use callees data + relative ordering assertions. | PageRank unit tests have exact assertions; integration tests verify behavior. |
+| Performance claim unverified | Add perf test: 2k-node graph, assert <50ms. | Generous threshold avoids CI flakiness. |
+
 ---
 
 ## Task 1: Implement PageRank algorithm
@@ -41,59 +65,102 @@ Pure function — takes a graph, returns ranked nodes. No I/O.
 ```typescript
 // In packages/core/src/map/__tests__/pagerank.test.ts
 
-import { pageRank } from '../pagerank';
+import { pageRank, buildDependencyGraph, type WeightedEdge } from '../pagerank';
+
+function edge(target: string, weight = 1): WeightedEdge {
+  return { target, weight };
+}
 
 describe('pageRank', () => {
   it('should rank nodes by importance', () => {
     // A -> B -> C, A -> C
     // C should rank highest (most incoming from important nodes)
-    const graph = new Map<string, string[]>();
-    graph.set('A', ['B', 'C']);
-    graph.set('B', ['C']);
-    graph.set('C', []);
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B'), edge('C')]);
+    graph.set('B', [edge('C')]);
 
     const ranks = pageRank(graph);
-    expect(ranks.get('C')).toBeGreaterThan(ranks.get('A')!);
-    expect(ranks.get('C')).toBeGreaterThan(ranks.get('B')!);
+    expect(ranks.get('C')!).toBeGreaterThan(ranks.get('A')!);
+    expect(ranks.get('C')!).toBeGreaterThan(ranks.get('B')!);
   });
 
   it('should handle cycles', () => {
-    // A -> B -> A (mutual dependency)
-    const graph = new Map<string, string[]>();
-    graph.set('A', ['B']);
-    graph.set('B', ['A']);
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('B', [edge('A')]);
 
     const ranks = pageRank(graph);
-    // Both should have similar rank
-    expect(Math.abs(ranks.get('A')! - ranks.get('B')!)).toBeLessThan(0.1);
+    expect(Math.abs(ranks.get('A')! - ranks.get('B')!)).toBeLessThan(0.01);
   });
 
   it('should handle disconnected nodes', () => {
-    const graph = new Map<string, string[]>();
-    graph.set('A', ['B']);
-    graph.set('B', []);
-    graph.set('C', []); // No connections
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    // B and C have no outgoing edges (dangling)
+    // B should rank higher — it has an incoming edge from A
+
+    const ranks = pageRank(graph);
+    expect(ranks.get('B')!).toBeGreaterThan(ranks.get('C') || 0);
+  });
+
+  it('should handle dangling nodes (no outgoing edges)', () => {
+    // types.ts is imported by many but exports nothing callable
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('a.ts', [edge('types.ts'), edge('b.ts')]);
+    graph.set('b.ts', [edge('types.ts')]);
+    // types.ts has no outgoing edges — dangling node
+
+    const ranks = pageRank(graph);
+    // types.ts should rank highest (most incoming)
+    expect(ranks.get('types.ts')!).toBeGreaterThan(ranks.get('a.ts')!);
+    // Dangling node's rank should be distributed, not lost
+    const totalRank = Array.from(ranks.values()).reduce((a, b) => a + b, 0);
+    expect(totalRank).toBeCloseTo(1.0, 2);
+  });
+
+  it('should respect edge weights', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // A depends heavily on B (weight 10), lightly on C (weight 1)
+    graph.set('A', [edge('B', 10), edge('C', 1)]);
 
     const ranks = pageRank(graph);
-    expect(ranks.get('A')).toBeDefined();
-    expect(ranks.get('C')).toBeDefined();
-    // Connected nodes should rank higher than isolated
     expect(ranks.get('B')!).toBeGreaterThan(ranks.get('C')!);
   });
 
   it('should return empty map for empty graph', () => {
-    const ranks = pageRank(new Map());
-    expect(ranks.size).toBe(0);
+    expect(pageRank(new Map()).size).toBe(0);
   });
 
-  it('should converge within iterations', () => {
-    // Large-ish graph
-    const graph = new Map<string, string[]>();
+  it('should converge for large ring graph', () => {
+    const graph = new Map<string, WeightedEdge[]>();
     for (let i = 0; i < 100; i++) {
-      graph.set(`node${i}`, [`node${(i + 1) % 100}`]);
+      graph.set(`node${i}`, [edge(`node${(i + 1) % 100}`)]);
     }
     const ranks = pageRank(graph);
     expect(ranks.size).toBe(100);
+    // All nodes in a ring should have equal rank
+    const values = Array.from(ranks.values());
+    const avg = values.reduce((a, b) => a + b, 0) / values.length;
+    for (const v of values) {
+      expect(v).toBeCloseTo(avg, 4);
+    }
+  });
+
+  it('should complete 2k-node graph in under 50ms', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    for (let i = 0; i < 2000; i++) {
+      const edges: WeightedEdge[] = [];
+      for (let j = 0; j < 5; j++) {
+        edges.push(edge(`node${(i + j + 1) % 2000}`, Math.random() * 5));
+      }
+      graph.set(`node${i}`, edges);
+    }
+    const start = Date.now();
+    const ranks = pageRank(graph);
+    const duration = Date.now() - start;
+    console.log(`PageRank: 2000 nodes, 10000 edges, ${duration}ms`);
+    expect(ranks.size).toBe(2000);
+    expect(duration).toBeLessThan(50);
   });
 });
 ```
@@ -103,71 +170,102 @@ describe('pageRank', () => {
 Run: `pnpm test -- packages/core/src/map/__tests__/pagerank.test.ts`
 Expected: FAIL — module not found
 
-- [ ] **Step 3: Implement PageRank**
+- [ ] **Step 3: Implement weighted PageRank with dangling node handling**
+
+Learnings from studying aider's implementation (Apache 2.0, uses NetworkX):
+- Weighted edges (sqrt-dampened reference counts)
+- Dangling node handling (files with no outgoing edges distribute rank to all)
+- Convergence check (stop early if delta < 1e-6)
+- Standard damping 0.85, max 100 iterations (matches NetworkX defaults)
+
+No external dependency — hand-rolled (~60 lines). If we ever need more
+graph algorithms, graphology (MIT, TS types, 1.6k stars) is the upgrade path.
 
 ```typescript
 // packages/core/src/map/pagerank.ts
 
+export interface WeightedEdge {
+  target: string;
+  weight: number;
+}
+
 /**
- * PageRank algorithm for ranking nodes in a directed graph.
+ * Weighted PageRank with dangling node handling and convergence.
  * Pure function — no I/O.
  *
- * Inspired by aider's repo map (https://github.com/Aider-AI/aider).
- *
- * @param graph - Map of node -> outgoing edges (dependencies)
- * @param damping - Damping factor (default 0.85, standard for PageRank)
- * @param iterations - Number of iterations (default 20, sufficient for convergence)
- * @returns Map of node -> rank score (higher = more important)
+ * Inspired by aider's repo map (https://github.com/Aider-AI/aider)
+ * which uses NetworkX PageRank over a weighted dependency graph.
  */
 export function pageRank(
-  graph: Map<string, string[]>,
+  graph: Map<string, WeightedEdge[]>,
   damping = 0.85,
-  iterations = 20
+  maxIterations = 100,
+  tolerance = 1e-6
 ): Map<string, number> {
+  // Collect all nodes (sources + targets)
   const nodes = new Set<string>();
-  for (const [src, targets] of graph) {
+  for (const [src, edges] of graph) {
     nodes.add(src);
-    for (const t of targets) nodes.add(t);
+    for (const e of edges) nodes.add(e.target);
   }
 
   if (nodes.size === 0) return new Map();
 
   const n = nodes.size;
-  const ranks = new Map<string, number>();
-  const initial = 1 / n;
+  let ranks = new Map<string, number>();
 
   // Initialize equal rank
-  for (const node of nodes) {
-    ranks.set(node, initial);
-  }
+  for (const node of nodes) ranks.set(node, 1 / n);
 
-  // Build reverse graph (who points to me?)
-  const inbound = new Map<string, string[]>();
+  // Build inbound map: target → [{ source, weight }]
+  const inbound = new Map<string, Array<{ source: string; weight: number }>>();
   for (const node of nodes) inbound.set(node, []);
-  for (const [src, targets] of graph) {
-    for (const t of targets) {
-      inbound.get(t)?.push(src);
+
+  // Build outgoing weight sums for normalization
+  const outWeightSum = new Map<string, number>();
+  for (const [src, edges] of graph) {
+    let sum = 0;
+    for (const e of edges) {
+      inbound.get(e.target)?.push({ source: src, weight: e.weight });
+      sum += e.weight;
     }
+    outWeightSum.set(src, sum);
   }
 
-  // Iterate
-  for (let i = 0; i < iterations; i++) {
+  // Identify dangling nodes (no outgoing edges)
+  const danglingNodes: string[] = [];
+  for (const node of nodes) {
+    if (!outWeightSum.has(node) || outWeightSum.get(node) === 0) {
+      danglingNodes.push(node);
+    }
+  }
+
+  // Iterate until convergence or max iterations
+  for (let iter = 0; iter < maxIterations; iter++) {
     const newRanks = new Map<string, number>();
 
+    // Dangling rank: sum of dangling nodes' ranks, distributed to all
+    let danglingRank = 0;
+    for (const d of danglingNodes) danglingRank += ranks.get(d) || 0;
+
     for (const node of nodes) {
       let sum = 0;
-      const sources = inbound.get(node) || [];
-      for (const src of sources) {
-        const outDegree = graph.get(src)?.length || 1;
-        sum += (ranks.get(src) || 0) / outDegree;
+      for (const { source, weight } of inbound.get(node) || []) {
+        const srcOutWeight = outWeightSum.get(source) || 1;
+        sum += ((ranks.get(source) || 0) * weight) / srcOutWeight;
       }
-      newRanks.set(node, (1 - damping) / n + damping * sum);
+      // Standard PageRank formula with dangling node contribution
+      newRanks.set(node, (1 - damping) / n + damping * (sum + danglingRank / n));
     }
 
-    // Update ranks
-    for (const [node, rank] of newRanks) {
-      ranks.set(node, rank);
+    // Check convergence
+    let delta = 0;
+    for (const node of nodes) {
+      delta += Math.abs((newRanks.get(node) || 0) - (ranks.get(node) || 0));
     }
+
+    ranks = newRanks;
+    if (delta < tolerance) break;
   }
 
   return ranks;
@@ -198,32 +296,40 @@ git commit -m "feat(core): add PageRank algorithm for file importance ranking"
 import type { SearchResult } from '../vector/types.js';
 
 /**
- * Build a file dependency graph from indexed documents.
- * Uses callees metadata to create edges: file A calls something in file B → A depends on B.
+ * Build a weighted file dependency graph from indexed documents.
+ * Uses callees metadata: file A calls N things in file B → edge weight = sqrt(N).
+ * sqrt dampening (from aider) prevents high-frequency references from dominating.
  * Pure function.
  */
-export function buildDependencyGraph(docs: SearchResult[]): Map<string, string[]> {
-  const graph = new Map<string, string[]>();
+export function buildDependencyGraph(docs: SearchResult[]): Map<string, WeightedEdge[]> {
+  // Count raw references per (source, target) pair
+  const rawCounts = new Map<string, Map<string, number>>();
 
   for (const doc of docs) {
     const sourceFile = doc.metadata.path as string;
     if (!sourceFile) continue;
 
-    if (!graph.has(sourceFile)) graph.set(sourceFile, []);
+    if (!rawCounts.has(sourceFile)) rawCounts.set(sourceFile, new Map());
 
     const callees = doc.metadata.callees as Array<{ file?: string }> | undefined;
     if (callees && Array.isArray(callees)) {
       for (const callee of callees) {
         if (callee.file && callee.file !== sourceFile) {
-          graph.get(sourceFile)!.push(callee.file);
+          const targets = rawCounts.get(sourceFile)!;
+          targets.set(callee.file, (targets.get(callee.file) || 0) + 1);
         }
       }
     }
   }
 
-  // Deduplicate edges
-  for (const [node, edges] of graph) {
-    graph.set(node, [...new Set(edges)]);
+  // Convert to weighted edges with sqrt dampening
+  const graph = new Map<string, WeightedEdge[]>();
+  for (const [source, targets] of rawCounts) {
+    const edges: WeightedEdge[] = [];
+    for (const [target, count] of targets) {
+      edges.push({ target, weight: Math.sqrt(count) });
+    }
+    graph.set(source, edges);
   }
 
   return graph;
@@ -234,8 +340,8 @@ export function buildDependencyGraph(docs: SearchResult[]): Map<string, string[]
 
 ```typescript
 describe('buildDependencyGraph', () => {
-  it('should build graph from callees metadata', () => {
-    const docs: SearchResult[] = [
+  it('should build weighted graph from callees metadata', () => {
+    const docs = [
       { id: '1', score: 0.9, metadata: {
         path: 'src/a.ts',
         callees: [{ name: 'foo', file: 'src/b.ts', line: 10 }],
@@ -246,38 +352,45 @@ describe('buildDependencyGraph', () => {
       }},
     ];
 
-    const graph = buildDependencyGraph(docs);
-    expect(graph.get('src/a.ts')).toContain('src/b.ts');
-    expect(graph.get('src/b.ts')).toContain('src/c.ts');
+    const graph = buildDependencyGraph(docs as any);
+    const aEdges = graph.get('src/a.ts')!;
+    expect(aEdges.some(e => e.target === 'src/b.ts')).toBe(true);
+    expect(aEdges[0].weight).toBe(1); // sqrt(1) = 1
   });
 
-  it('should handle docs without callees metadata', () => {
-    const docs: SearchResult[] = [
-      { id: '1', score: 0.9, metadata: { path: 'src/types.ts', type: 'interface' } },
-      { id: '2', score: 0.9, metadata: {
+  it('should sqrt-dampen weights for multiple references', () => {
+    const docs = [
+      { id: '1', score: 0.9, metadata: {
         path: 'src/a.ts',
-        callees: [{ name: 'MyType', file: 'src/types.ts', line: 1 }],
+        callees: [
+          { name: 'foo', file: 'src/b.ts', line: 10 },
+          { name: 'bar', file: 'src/b.ts', line: 20 },
+          { name: 'baz', file: 'src/b.ts', line: 30 },
+          { name: 'qux', file: 'src/b.ts', line: 40 },
+        ],
       }},
     ];
 
-    const graph = buildDependencyGraph(docs);
-    expect(graph.get('src/a.ts')).toContain('src/types.ts');
-    expect(graph.get('src/types.ts')).toEqual([]);
+    const graph = buildDependencyGraph(docs as any);
+    const aEdges = graph.get('src/a.ts')!;
+    expect(aEdges.length).toBe(1); // deduplicated to one edge
+    expect(aEdges[0].target).toBe('src/b.ts');
+    expect(aEdges[0].weight).toBe(2); // sqrt(4) = 2
   });
 
-  it('should deduplicate edges', () => {
-    const docs: SearchResult[] = [
-      { id: '1', score: 0.9, metadata: {
+  it('should handle docs without callees metadata', () => {
+    const docs = [
+      { id: '1', score: 0.9, metadata: { path: 'src/types.ts', type: 'interface' } },
+      { id: '2', score: 0.9, metadata: {
         path: 'src/a.ts',
-        callees: [
-          { name: 'foo', file: 'src/b.ts', line: 10 },
-          { name: 'bar', file: 'src/b.ts', line: 20 },
-        ],
+        callees: [{ name: 'MyType', file: 'src/types.ts', line: 1 }],
       }},
     ];
 
-    const graph = buildDependencyGraph(docs);
-    expect(graph.get('src/a.ts')).toEqual(['src/b.ts']);
+    const graph = buildDependencyGraph(docs as any);
+    expect(graph.get('src/a.ts')!.some(e => e.target === 'src/types.ts')).toBe(true);
+    // types.ts has no outgoing edges (not even in the graph as a source)
+    expect(graph.has('src/types.ts')).toBe(false);
   });
 });
 ```
@@ -298,12 +411,21 @@ git commit -m "feat(core): add dependency graph builder from indexed callees"
 Replace the current `computeHotPaths` function (simple reference count) with PageRank-based ranking:
 
 ```typescript
-import { buildDependencyGraph, pageRank } from './pagerank.js';
+import { buildDependencyGraph, pageRank } from './graph.js';
 
 function computeHotPaths(docs: SearchResult[], maxPaths: number): HotPath[] {
   const graph = buildDependencyGraph(docs);
   const ranks = pageRank(graph);
 
+  // Count real incoming edges per file (distinct source files)
+  const incomingCounts = new Map<string, Set<string>>();
+  for (const [src, edges] of graph) {
+    for (const e of edges) {
+      if (!incomingCounts.has(e.target)) incomingCounts.set(e.target, new Set());
+      incomingCounts.get(e.target)!.add(src);
+    }
+  }
+
   // Build a lookup for primary component name per file
   const componentByFile = new Map<string, string>();
   for (const doc of docs) {
@@ -313,48 +435,327 @@ function computeHotPaths(docs: SearchResult[], maxPaths: number): HotPath[] {
     }
   }
 
-  // Sort by PageRank score and take top N
+  // Sort by PageRank score, display real incoming ref count
   return Array.from(ranks.entries())
     .sort((a, b) => b[1] - a[1])
     .slice(0, maxPaths)
     .map(([file, score]) => ({
       file,
-      incomingRefs: Math.round(score * 1000), // Normalized PageRank for display
+      incomingRefs: incomingCounts.get(file)?.size ?? 0,
+      score,
       primaryComponent: componentByFile.get(file),
     }));
 }
 ```
 
-- [ ] **Step 2: Update HotPath type if needed**
+- [ ] **Step 2: Update HotPath type**
 
-In `packages/core/src/map/types.ts`, consider adding a `pageRankScore` field:
+In `packages/core/src/map/types.ts`, add `score` field:
 
 ```typescript
 export interface HotPath {
   file: string;
-  incomingRefs: number;
+  incomingRefs: number;    // actual count of files that depend on this file
+  score: number;           // PageRank score (used for sorting)
   primaryComponent?: string;
-  pageRankScore?: number; // Optional, for debugging/verbose output
 }
 ```
 
-- [ ] **Step 3: Run full test suite**
+Sort by `score` (PageRank), display `incomingRefs` (real count) — keeps display meaningful.
+
+- [ ] **Step 3: Rewrite existing hot paths tests (callers → callees)**
+
+Three tests in `map.test.ts` use `callers` mock data which is dead code in production.
+Rewrite them to use `callees` data and assert relative ordering (not exact counts):
+
+**Test 1: "should compute hot paths from callers data" (line 288)**
+→ Rewrite as "should compute hot paths from callees data"
+   - Mock docs with `callees` pointing to a common target file
+   - Assert the target file ranks first (PageRank should rank it highest)
+   - Assert `incomingRefs` is the real incoming edge count
+   - Assert `score` is a positive number
+
+**Test 2: "should limit hot paths to maxHotPaths" (line 365)**
+→ Keep structure, change mock data from `callers` to `callees`
+   - Assert `hotPaths.length <= maxHotPaths`
+   - Assert sorted by score descending
+
+**Test 3: "should format hot paths in output" (line 411)**
+→ Keep structure, change mock data from `callers` to `callees`
+   - Assert output contains "refs" label
+   - Assert file names appear in output
+
+**Test 4 (existing, unchanged): "should not include hot paths when disabled" (line 390)**
+→ No change needed — doesn't use callers data
+
+- [ ] **Step 4: Add test for empty callees array**
+
+```typescript
+it('should handle docs with empty callees array', () => {
+  const docs = [
+    { id: '1', score: 0.9, metadata: { path: 'src/types.ts', callees: [] } },
+  ];
+  const graph = buildDependencyGraph(docs as any);
+  expect(graph.get('src/types.ts')).toEqual([]);
+});
+```
+
+- [ ] **Step 5: Run full test suite**
 
 Run: `pnpm build && pnpm test`
-Expected: ALL PASS (hot paths test may need updating for new ranking order)
+Expected: ALL PASS
 
 - [ ] **Step 4: Commit**
 
 ```bash
-git add packages/core/src/map/index.ts packages/core/src/map/types.ts packages/core/src/map/pagerank.ts packages/core/src/map/__tests__/
+git add packages/core/src/map/
 git commit -m "feat(core): use PageRank for dev_map hot paths ranking"
 ```
 
 ---
 
+## Task 4 (deferred): Connected components for subsystem identification
+
+**Deferred to follow-up PR.** Implement the algorithm and tests in this PR
+(it's in graph.ts alongside PageRank), but don't wire it into CodebaseMap or
+formatCodebaseMap. Wire it when there's a consumer.
+
+Identifies clusters of files that form independent subsystems. Uses the
+undirected version of the dependency graph (A depends on B = A and B are connected).
+
+**Value for agents:** "This codebase has 3 isolated subsystems: core (45 files),
+CLI (12 files), and MCP server (18 files)." Helps agents scope their work.
+
+- [ ] **Step 1: Implement connected components (BFS)**
+
+```typescript
+// In graph.ts
+
+/**
+ * Find connected components in the dependency graph (undirected).
+ * Returns groups of files sorted by size (largest first).
+ * Pure function — no I/O.
+ */
+export function connectedComponents(
+  graph: Map<string, WeightedEdge[]>
+): string[][] {
+  // Build undirected adjacency list
+  const adj = new Map<string, Set<string>>();
+  const allNodes = new Set<string>();
+  for (const [src, edges] of graph) {
+    allNodes.add(src);
+    if (!adj.has(src)) adj.set(src, new Set());
+    for (const e of edges) {
+      allNodes.add(e.target);
+      if (!adj.has(e.target)) adj.set(e.target, new Set());
+      adj.get(src)!.add(e.target);
+      adj.get(e.target)!.add(src);
+    }
+  }
+
+  const visited = new Set<string>();
+  const components: string[][] = [];
+
+  for (const node of allNodes) {
+    if (visited.has(node)) continue;
+    // BFS from this node
+    const component: string[] = [];
+    const queue = [node];
+    visited.add(node);
+    while (queue.length > 0) {
+      const current = queue.shift()!;
+      component.push(current);
+      for (const neighbor of adj.get(current) || []) {
+        if (!visited.has(neighbor)) {
+          visited.add(neighbor);
+          queue.push(neighbor);
+        }
+      }
+    }
+    components.push(component);
+  }
+
+  // Sort by size (largest first)
+  return components.sort((a, b) => b.length - a.length);
+}
+```
+
+- [ ] **Step 2: Write tests**
+
+```typescript
+describe('connectedComponents', () => {
+  it('should identify separate clusters', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // Cluster 1: A -> B -> C
+    graph.set('A', [edge('B')]);
+    graph.set('B', [edge('C')]);
+    // Cluster 2: D -> E
+    graph.set('D', [edge('E')]);
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(2);
+    expect(components[0].length).toBe(3); // A, B, C
+    expect(components[1].length).toBe(2); // D, E
+  });
+
+  it('should treat the graph as undirected', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // A -> B, C -> B (B connects A and C even though edges point inward)
+    graph.set('A', [edge('B')]);
+    graph.set('C', [edge('B')]);
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(1); // All connected
+    expect(components[0].length).toBe(3);
+  });
+
+  it('should handle single-node components', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('lonely', []); // Isolated node
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(2);
+  });
+
+  it('should return empty for empty graph', () => {
+    expect(connectedComponents(new Map()).length).toBe(0);
+  });
+});
+```
+
+- [ ] **Step 3: Commit (algorithm + tests only, no wiring)**
+
+```bash
+git add packages/core/src/map/graph.ts packages/core/src/map/__tests__/graph.test.ts
+git commit -m "feat(core): add connected components algorithm (consumer wired in follow-up)"
+```
+
+---
+
+## Task 5 (deferred): Shortest path for call chain tracing
+
+**Deferred to follow-up PR.** Implement algorithm and tests in this PR.
+Wire into dev_refs as "trace path" feature in a separate PR.
+
+Answers "how does file A reach file B?" — BFS for hop count on the
+unweighted dependency graph. Not Dijkstra — agents care about hops, not weights.
+
+**Value for agents:** Instead of multiple `dev_refs` calls, one query shows:
+"auth.ts → user-service.ts → repository.ts → database.ts (3 hops)"
+
+- [ ] **Step 1: Implement shortest path (BFS)**
+
+```typescript
+// In graph.ts
+
+/**
+ * Find shortest path between two files in the dependency graph.
+ * Uses BFS (unweighted — hop count, not edge weight).
+ * Returns the path as an array of files, or null if unreachable.
+ * Pure function — no I/O.
+ */
+export function shortestPath(
+  graph: Map<string, WeightedEdge[]>,
+  from: string,
+  to: string
+): string[] | null {
+  if (from === to) return [from];
+  if (!graph.has(from)) return null;
+
+  const visited = new Set<string>([from]);
+  const parent = new Map<string, string>();
+  const queue = [from];
+
+  while (queue.length > 0) {
+    const current = queue.shift()!;
+    for (const { target } of graph.get(current) || []) {
+      if (visited.has(target)) continue;
+      visited.add(target);
+      parent.set(target, current);
+      if (target === to) {
+        // Reconstruct path
+        const path = [to];
+        let node = to;
+        while (parent.has(node)) {
+          node = parent.get(node)!;
+          path.unshift(node);
+        }
+        return path;
+      }
+      queue.push(target);
+    }
+  }
+
+  return null; // Unreachable
+}
+```
+
+- [ ] **Step 2: Write tests**
+
+```typescript
+describe('shortestPath', () => {
+  it('should find direct path', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+
+    expect(shortestPath(graph, 'A', 'B')).toEqual(['A', 'B']);
+  });
+
+  it('should find multi-hop path', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('B', [edge('C')]);
+    graph.set('C', [edge('D')]);
+
+    expect(shortestPath(graph, 'A', 'D')).toEqual(['A', 'B', 'C', 'D']);
+  });
+
+  it('should find shortest among multiple paths', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B'), edge('C')]);
+    graph.set('B', [edge('D')]);
+    graph.set('C', [edge('D')]); // A->C->D is same length as A->B->D
+
+    const path = shortestPath(graph, 'A', 'D');
+    expect(path?.length).toBe(3); // 3 nodes = 2 hops
+  });
+
+  it('should return null for unreachable target', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('C', [edge('D')]); // Disconnected
+
+    expect(shortestPath(graph, 'A', 'D')).toBeNull();
+  });
+
+  it('should return single-node path for self', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+
+    expect(shortestPath(graph, 'A', 'A')).toEqual(['A']);
+  });
+
+  it('should return null for unknown source', () => {
+    expect(shortestPath(new Map(), 'X', 'Y')).toBeNull();
+  });
+});
+```
+
+- [ ] **Step 3: Commit (algorithm + tests only, no wiring)**
+
+```bash
+git add packages/core/src/map/graph.ts packages/core/src/map/__tests__/graph.test.ts
+git commit -m "feat(core): add shortest path algorithm (consumer wired in follow-up)"
+```
+
+---
+
 ## Notes
 
-- **Existing map tests need updating:** Tests in `map.test.ts` mock `callers` data. After this change, `computeHotPaths` uses `callees` via `buildDependencyGraph`. Update mock data to use `callees` instead of `callers`, and adjust expected ranking order since PageRank differs from simple ref counting.
-- PageRank is ~O(V + E) per iteration × 20 iterations. For a 2k-file repo with 10k edges, this is <10ms. Negligible.
-- The `incomingRefs` field now shows normalized PageRank score, not raw count. Display label in `formatCodebaseMap` could change from "refs" to "importance" or keep "refs" for familiarity.
-- Attribution: add to ARCHITECTURE.md: "File importance ranking inspired by [aider's repo map](https://github.com/Aider-AI/aider)"
+- **Existing map tests:** 3 tests use `callers` mock data (dead code). Concrete rewrites specified in Task 3 Step 3.
+- **Performance:** PageRank is ~O(V + E) per iteration × 100 max iterations. <10ms for 2k files. Perf test verifies.
+- **Display:** `incomingRefs` = real incoming edge count. `score` = PageRank value for sorting. Label stays "refs".
+- **Deferred consumers:** Connected components → `formatCodebaseMap`. Shortest path → `dev_refs`. Both in follow-up PRs.
+- **All algorithms in one file:** `graph.ts` contains pageRank, buildDependencyGraph, connectedComponents, shortestPath. ~115 lines total.
+- **Attribution:** "File importance ranking inspired by [aider's repo map](https://github.com/Aider-AI/aider)"
diff --git a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md
index a482a85..1a8bb36 100644
--- a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md
+++ b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md
@@ -1,6 +1,6 @@
 # Phase 1: MCP Tools Improvement
 
-**Status:** Draft
+**Status:** In progress (Parts 1.1–1.5 merged, 1.6 in progress)
 
 ## Context
 
@@ -67,8 +67,18 @@ This is an acceptable trade-off: line count is a cheap stat call.
 | [1.2](./1.2-index-based-analysis.md) | Add `getDocsByFilePath`, index analysis path, wire VectorStorage | Medium — new code path |
 | [1.3](./1.3-cleanup.md) | Consolidate reads, remove dead code, remove GitHub from health | Low — cleanup |
 | [1.4](./1.4-agent-usability.md) | Merge status/health, add error suggestions, rename params, JSON output | Medium — tool surface change |
-| [1.5](./1.5-ast-pattern-analysis.md) | AST-based pattern analysis via ast-grep (optional dep) | Low — additive, regex fallback |
-| [1.6](./1.6-pagerank-map.md) | PageRank file ranking for dev_map hot paths | Low — replaces simple counting |
+| [1.5](./1.5-ast-pattern-analysis.md) | AST-based pattern analysis via tree-sitter queries | Low — additive, regex fallback |
+| [1.6](./1.6-pagerank-map.md) | Graph algorithms: PageRank, connected components, shortest path | Low — replaces simple counting |
+
+### Part 1.6 Commit Plan
+
+| # | Commit | What changes |
+|---|--------|-------------|
+| 1 | `feat(core): add graph algorithms — PageRank, connected components, shortest path` | New `graph.ts` with pure functions + `graph.test.ts` (~20 tests). No wiring. |
+| 2 | `feat(core): replace ref counting with PageRank in dev_map` | Wire PageRank into `computeHotPaths`. Add `score` to `HotPath`. Rewrite 3 callers→callees tests. |
+| 3 | `feat(core): wire connected components into dev_map output` | Add `components` to `CodebaseMap` + `formatCodebaseMap`. |
+| 4 | `feat(mcp): add path tracing to dev_refs` | New `trace` param on RefsAdapter. Schema + tests. |
+| 5 | `docs: complete MCP Phase 1, attribution, plan status` | Plan updates, aider attribution, mark Phase 1 complete. |
 
 ---
 
diff --git a/.claude/scratchpad.md b/.claude/scratchpad.md
index b00ea8f..2067b41 100644
--- a/.claude/scratchpad.md
+++ b/.claude/scratchpad.md
@@ -11,7 +11,9 @@
 ## Future Work
 
 - Antfly SDK: server-side path filter for `getDocsByFilePath` (eliminates 5k cap)
-- PageRank for `dev_map` hot paths (MCP Phase 1, Part 1.6)
+- Wire `shortestPath` into `dev_refs` as a "trace path" feature (graph.ts is ready, adapter wiring is separate scope)
+- Wire `connectedComponents` into `dev_map` verbose output (graph.ts is ready)
+- Betweenness centrality — identifies bridge files between subsystems. Worth adding if agents need refactoring guidance. graphology (MIT, 1.6k stars) is the upgrade path if we need more than 3 hand-rolled algorithms.
 - E2E tests in CI — blocked on Antfly memory requirements vs GitHub runner limits (7GB)
 - **Python language support** — tree-sitter-python WASM is ~300KB, already in tree-sitter-wasms. Needs a Python scanner (document extraction) + Python-specific pattern rules. High demand — large ecosystem. Worth a standalone plan covering: scanner, pattern rules, test fixtures, indexer integration. The PatternMatcher interface from 1.5 is language-agnostic so pattern rules slot right in; the scanner is the real work.
 - Vue/Svelte SFC support — `.vue`/`.svelte` files have embedded `<script lang="ts">` blocks. Would need script block extraction before tree-sitter parsing. Lower priority — co-located `.ts` files in those projects already get full analysis.

From 0543991aabd9e8bf602d2bc6d8e38962c729f466 Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:01:39 -0700
Subject: [PATCH 2/8] =?UTF-8?q?feat(core):=20add=20graph=20algorithms=20?=
 =?UTF-8?q?=E2=80=94=20PageRank,=20connected=20components,=20shortest=20pa?=
 =?UTF-8?q?th?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pure functions over the file dependency graph:
- pageRank: weighted with dangling node handling and convergence (4ms for 2k nodes)
- buildDependencyGraph: callees → weighted edges with sqrt dampening
- connectedComponents: BFS on undirected graph for subsystem identification
- shortestPath: BFS for call chain tracing (hop count)

24 tests covering ranking, cycles, dangling nodes, weights, components,
path finding, edge cases, and performance.

Inspired by aider's repo map (https://github.com/Aider-AI/aider).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/map/__tests__/graph.test.ts | 335 ++++++++++++++++++
 packages/core/src/map/graph.ts                | 259 ++++++++++++++
 2 files changed, 594 insertions(+)
 create mode 100644 packages/core/src/map/__tests__/graph.test.ts
 create mode 100644 packages/core/src/map/graph.ts

diff --git a/packages/core/src/map/__tests__/graph.test.ts b/packages/core/src/map/__tests__/graph.test.ts
new file mode 100644
index 0000000..7798031
--- /dev/null
+++ b/packages/core/src/map/__tests__/graph.test.ts
@@ -0,0 +1,335 @@
+/**
+ * Graph Algorithm Tests
+ *
+ * Tests for PageRank, connected components, shortest path, and graph builder.
+ * All pure functions — no I/O, no mocks needed.
+ */
+
+import { describe, expect, it } from 'vitest';
+import {
+  buildDependencyGraph,
+  connectedComponents,
+  pageRank,
+  shortestPath,
+  type WeightedEdge,
+} from '../graph';
+
+function edge(target: string, weight = 1): WeightedEdge {
+  return { target, weight };
+}
+
+// ============================================================================
+// PageRank
+// ============================================================================
+
+describe('pageRank', () => {
+  it('should rank nodes by importance', () => {
+    // A -> B -> C, A -> C
+    // C should rank highest (most incoming from important nodes)
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B'), edge('C')]);
+    graph.set('B', [edge('C')]);
+
+    const ranks = pageRank(graph);
+    expect(ranks.get('C')!).toBeGreaterThan(ranks.get('A')!);
+    expect(ranks.get('C')!).toBeGreaterThan(ranks.get('B')!);
+  });
+
+  it('should handle cycles', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('B', [edge('A')]);
+
+    const ranks = pageRank(graph);
+    expect(Math.abs(ranks.get('A')! - ranks.get('B')!)).toBeLessThan(0.01);
+  });
+
+  it('should handle disconnected nodes', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    // B has incoming edge, C does not — B should rank higher
+    graph.set('C', []);
+
+    const ranks = pageRank(graph);
+    expect(ranks.get('B')!).toBeGreaterThan(ranks.get('C')!);
+  });
+
+  it('should handle dangling nodes (no outgoing edges)', () => {
+    // types.ts is imported by many but exports nothing callable
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('a.ts', [edge('types.ts'), edge('b.ts')]);
+    graph.set('b.ts', [edge('types.ts')]);
+    // types.ts has no outgoing edges — dangling node
+
+    const ranks = pageRank(graph);
+    // types.ts should rank highest (most incoming)
+    expect(ranks.get('types.ts')!).toBeGreaterThan(ranks.get('a.ts')!);
+    // Dangling node's rank should be distributed, not lost
+    const totalRank = Array.from(ranks.values()).reduce((a, b) => a + b, 0);
+    expect(totalRank).toBeCloseTo(1.0, 2);
+  });
+
+  it('should respect edge weights', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // A depends heavily on B (weight 10), lightly on C (weight 1)
+    graph.set('A', [edge('B', 10), edge('C', 1)]);
+
+    const ranks = pageRank(graph);
+    expect(ranks.get('B')!).toBeGreaterThan(ranks.get('C')!);
+  });
+
+  it('should return empty map for empty graph', () => {
+    expect(pageRank(new Map()).size).toBe(0);
+  });
+
+  it('should converge for large ring graph (all equal rank)', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    for (let i = 0; i < 100; i++) {
+      graph.set(`node${i}`, [edge(`node${(i + 1) % 100}`)]);
+    }
+    const ranks = pageRank(graph);
+    expect(ranks.size).toBe(100);
+
+    // All nodes in a ring should have equal rank
+    const values = Array.from(ranks.values());
+    const avg = values.reduce((a, b) => a + b, 0) / values.length;
+    for (const v of values) {
+      expect(v).toBeCloseTo(avg, 4);
+    }
+  });
+
+  it('should complete 2k-node graph in under 50ms', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    for (let i = 0; i < 2000; i++) {
+      const edges: WeightedEdge[] = [];
+      for (let j = 0; j < 5; j++) {
+        edges.push(edge(`node${(i + j + 1) % 2000}`, 1 + (j % 3)));
+      }
+      graph.set(`node${i}`, edges);
+    }
+
+    const start = Date.now();
+    const ranks = pageRank(graph);
+    const duration = Date.now() - start;
+
+    console.log(`PageRank: 2000 nodes, 10000 edges, ${duration}ms`);
+    expect(ranks.size).toBe(2000);
+    expect(duration).toBeLessThan(50);
+  });
+});
+
+// ============================================================================
+// buildDependencyGraph
+// ============================================================================
+
+describe('buildDependencyGraph', () => {
+  it('should build weighted graph from callees metadata', () => {
+    const docs = [
+      {
+        id: '1',
+        score: 0.9,
+        metadata: {
+          path: 'src/a.ts',
+          callees: [{ name: 'foo', file: 'src/b.ts', line: 10 }],
+        },
+      },
+      {
+        id: '2',
+        score: 0.9,
+        metadata: {
+          path: 'src/b.ts',
+          callees: [{ name: 'bar', file: 'src/c.ts', line: 5 }],
+        },
+      },
+    ];
+
+    const graph = buildDependencyGraph(docs);
+    const aEdges = graph.get('src/a.ts')!;
+    expect(aEdges.some((e) => e.target === 'src/b.ts')).toBe(true);
+    expect(aEdges[0].weight).toBe(1); // sqrt(1) = 1
+  });
+
+  it('should sqrt-dampen weights for multiple references', () => {
+    const docs = [
+      {
+        id: '1',
+        score: 0.9,
+        metadata: {
+          path: 'src/a.ts',
+          callees: [
+            { name: 'foo', file: 'src/b.ts', line: 10 },
+            { name: 'bar', file: 'src/b.ts', line: 20 },
+            { name: 'baz', file: 'src/b.ts', line: 30 },
+            { name: 'qux', file: 'src/b.ts', line: 40 },
+          ],
+        },
+      },
+    ];
+
+    const graph = buildDependencyGraph(docs);
+    const aEdges = graph.get('src/a.ts')!;
+    expect(aEdges.length).toBe(1); // deduplicated to one edge
+    expect(aEdges[0].target).toBe('src/b.ts');
+    expect(aEdges[0].weight).toBe(2); // sqrt(4) = 2
+  });
+
+  it('should handle docs without callees metadata', () => {
+    const docs = [
+      { id: '1', score: 0.9, metadata: { path: 'src/types.ts', type: 'interface' } },
+      {
+        id: '2',
+        score: 0.9,
+        metadata: {
+          path: 'src/a.ts',
+          callees: [{ name: 'MyType', file: 'src/types.ts', line: 1 }],
+        },
+      },
+    ];
+
+    const graph = buildDependencyGraph(docs);
+    expect(graph.get('src/a.ts')!.some((e) => e.target === 'src/types.ts')).toBe(true);
+    // types.ts has no callees — it's in the graph as a source (from the doc) but with no edges
+    expect(graph.get('src/types.ts')).toEqual([]);
+  });
+
+  it('should handle docs with empty callees array', () => {
+    const docs = [{ id: '1', score: 0.9, metadata: { path: 'src/types.ts', callees: [] } }];
+
+    const graph = buildDependencyGraph(docs);
+    // Empty callees → no edges, but source is in the graph
+    expect(graph.get('src/types.ts')).toEqual([]);
+  });
+
+  it('should exclude self-references', () => {
+    const docs = [
+      {
+        id: '1',
+        score: 0.9,
+        metadata: {
+          path: 'src/a.ts',
+          callees: [
+            { name: 'foo', file: 'src/a.ts', line: 10 }, // self-reference
+            { name: 'bar', file: 'src/b.ts', line: 20 },
+          ],
+        },
+      },
+    ];
+
+    const graph = buildDependencyGraph(docs);
+    const aEdges = graph.get('src/a.ts')!;
+    expect(aEdges.length).toBe(1);
+    expect(aEdges[0].target).toBe('src/b.ts');
+  });
+});
+
+// ============================================================================
+// connectedComponents
+// ============================================================================
+
+describe('connectedComponents', () => {
+  it('should identify separate clusters', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // Cluster 1: A -> B -> C
+    graph.set('A', [edge('B')]);
+    graph.set('B', [edge('C')]);
+    // Cluster 2: D -> E
+    graph.set('D', [edge('E')]);
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(2);
+    expect(components[0].length).toBe(3); // A, B, C (largest first)
+    expect(components[1].length).toBe(2); // D, E
+  });
+
+  it('should treat the graph as undirected', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // A -> B, C -> B (B connects A and C even though edges point inward)
+    graph.set('A', [edge('B')]);
+    graph.set('C', [edge('B')]);
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(1);
+    expect(components[0].length).toBe(3);
+  });
+
+  it('should include target-only nodes', () => {
+    // types.ts only appears as a target, never as a source key
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('a.ts', [edge('types.ts')]);
+    graph.set('b.ts', [edge('types.ts')]);
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(1); // All connected through types.ts
+    expect(components[0]).toContain('types.ts');
+    expect(components[0].length).toBe(3);
+  });
+
+  it('should handle isolated source nodes', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('lonely', []); // Source with no edges
+
+    const components = connectedComponents(graph);
+    expect(components.length).toBe(2);
+    // Largest first: A+B (2), then lonely (1)
+    expect(components[0].length).toBe(2);
+    expect(components[1]).toEqual(['lonely']);
+  });
+
+  it('should return empty for empty graph', () => {
+    expect(connectedComponents(new Map()).length).toBe(0);
+  });
+});
+
+// ============================================================================
+// shortestPath
+// ============================================================================
+
+describe('shortestPath', () => {
+  it('should find direct path', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+
+    expect(shortestPath(graph, 'A', 'B')).toEqual(['A', 'B']);
+  });
+
+  it('should find multi-hop path', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('B', [edge('C')]);
+    graph.set('C', [edge('D')]);
+
+    expect(shortestPath(graph, 'A', 'D')).toEqual(['A', 'B', 'C', 'D']);
+  });
+
+  it('should find shortest among multiple paths', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    // A -> B -> D (2 hops) and A -> C -> X -> D (3 hops)
+    graph.set('A', [edge('B'), edge('C')]);
+    graph.set('B', [edge('D')]);
+    graph.set('C', [edge('X')]);
+    graph.set('X', [edge('D')]);
+
+    const path = shortestPath(graph, 'A', 'D');
+    expect(path).toEqual(['A', 'B', 'D']); // Shortest: 2 hops
+  });
+
+  it('should return null for unreachable target', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+    graph.set('C', [edge('D')]); // Disconnected
+
+    expect(shortestPath(graph, 'A', 'D')).toBeNull();
+  });
+
+  it('should return single-node path for self', () => {
+    const graph = new Map<string, WeightedEdge[]>();
+    graph.set('A', [edge('B')]);
+
+    expect(shortestPath(graph, 'A', 'A')).toEqual(['A']);
+  });
+
+  it('should return null for unknown source', () => {
+    expect(shortestPath(new Map(), 'X', 'Y')).toBeNull();
+  });
+});
diff --git a/packages/core/src/map/graph.ts b/packages/core/src/map/graph.ts
new file mode 100644
index 0000000..b435826
--- /dev/null
+++ b/packages/core/src/map/graph.ts
@@ -0,0 +1,259 @@
+/**
+ * Graph Algorithms for Codebase Analysis
+ *
+ * Pure functions over the file dependency graph:
+ * - PageRank: file importance ranking (replaces simple ref counting)
+ * - Connected components: subsystem identification
+ * - Shortest path: call chain tracing
+ * - Graph builder: constructs weighted graph from indexed callees
+ *
+ * Inspired by aider's repo map (https://github.com/Aider-AI/aider)
+ * which uses NetworkX PageRank over a weighted dependency graph.
+ */
+
+import type { SearchResult } from '../vector/types';
+
+// ============================================================================
+// Types
+// ============================================================================
+
+export interface WeightedEdge {
+  target: string;
+  weight: number;
+}
+
+// ============================================================================
+// Graph Builder
+// ============================================================================
+
+/**
+ * Build a weighted file dependency graph from indexed documents.
+ * Uses callees metadata: file A calls N things in file B → edge weight = sqrt(N).
+ * sqrt dampening (from aider) prevents high-frequency references from dominating.
+ *
+ * Note: `callers` metadata is NOT stored in the index (computed at query time
+ * by refs adapter). Only `callees` produces real data for indexed docs.
+ */
+export function buildDependencyGraph(docs: SearchResult[]): Map<string, WeightedEdge[]> {
+  // Count raw references per (source, target) pair
+  const rawCounts = new Map<string, Map<string, number>>();
+
+  for (const doc of docs) {
+    const sourceFile = (doc.metadata.path as string) || (doc.metadata.file as string);
+    if (!sourceFile) continue;
+
+    if (!rawCounts.has(sourceFile)) rawCounts.set(sourceFile, new Map());
+
+    const callees = doc.metadata.callees as Array<{ file?: string }> | undefined;
+    if (callees && Array.isArray(callees)) {
+      for (const callee of callees) {
+        if (callee.file && callee.file !== sourceFile) {
+          const targets = rawCounts.get(sourceFile)!;
+          targets.set(callee.file, (targets.get(callee.file) || 0) + 1);
+        }
+      }
+    }
+  }
+
+  // Convert to weighted edges with sqrt dampening
+  const graph = new Map<string, WeightedEdge[]>();
+  for (const [source, targets] of rawCounts) {
+    const edges: WeightedEdge[] = [];
+    for (const [target, count] of targets) {
+      edges.push({ target, weight: Math.sqrt(count) });
+    }
+    graph.set(source, edges);
+  }
+
+  return graph;
+}
+
+// ============================================================================
+// PageRank
+// ============================================================================
+
+/**
+ * Weighted PageRank with dangling node handling and convergence.
+ *
+ * Standard algorithm: damping 0.85, max 100 iterations, tolerance 1e-6.
+ * Matches NetworkX defaults used by aider.
+ *
+ * Dangling nodes (files with no outgoing edges, e.g. types.ts) distribute
+ * their rank equally to all nodes — standard PageRank behavior.
+ */
+export function pageRank(
+  graph: Map<string, WeightedEdge[]>,
+  damping = 0.85,
+  maxIterations = 100,
+  tolerance = 1e-6
+): Map<string, number> {
+  // Collect all nodes (sources + targets)
+  const nodes = new Set<string>();
+  for (const [src, edges] of graph) {
+    nodes.add(src);
+    for (const e of edges) nodes.add(e.target);
+  }
+
+  if (nodes.size === 0) return new Map();
+
+  const n = nodes.size;
+  let ranks = new Map<string, number>();
+
+  // Initialize equal rank
+  for (const node of nodes) ranks.set(node, 1 / n);
+
+  // Build inbound map: target → [{ source, weight }]
+  const inbound = new Map<string, Array<{ source: string; weight: number }>>();
+  for (const node of nodes) inbound.set(node, []);
+
+  // Build outgoing weight sums for normalization
+  const outWeightSum = new Map<string, number>();
+  for (const [src, edges] of graph) {
+    let sum = 0;
+    for (const e of edges) {
+      inbound.get(e.target)?.push({ source: src, weight: e.weight });
+      sum += e.weight;
+    }
+    outWeightSum.set(src, sum);
+  }
+
+  // Identify dangling nodes (no outgoing edges)
+  const danglingNodes: string[] = [];
+  for (const node of nodes) {
+    if (!outWeightSum.has(node) || outWeightSum.get(node) === 0) {
+      danglingNodes.push(node);
+    }
+  }
+
+  // Iterate until convergence or max iterations
+  for (let iter = 0; iter < maxIterations; iter++) {
+    const newRanks = new Map<string, number>();
+
+    // Dangling rank: sum of dangling nodes' ranks, distributed to all
+    let danglingRank = 0;
+    for (const d of danglingNodes) danglingRank += ranks.get(d) || 0;
+
+    for (const node of nodes) {
+      let sum = 0;
+      for (const { source, weight } of inbound.get(node) || []) {
+        const srcOutWeight = outWeightSum.get(source) || 1;
+        sum += ((ranks.get(source) || 0) * weight) / srcOutWeight;
+      }
+      // Standard PageRank formula with dangling node contribution
+      newRanks.set(node, (1 - damping) / n + damping * (sum + danglingRank / n));
+    }
+
+    // Check convergence (L1 norm)
+    let delta = 0;
+    for (const node of nodes) {
+      delta += Math.abs((newRanks.get(node) || 0) - (ranks.get(node) || 0));
+    }
+
+    ranks = newRanks;
+    if (delta < tolerance) break;
+  }
+
+  return ranks;
+}
+
+// ============================================================================
+// Connected Components
+// ============================================================================
+
+/**
+ * Find connected components in the dependency graph (undirected).
+ * Returns groups of files sorted by size (largest first).
+ *
+ * Treats the directed graph as undirected: if A depends on B,
+ * A and B are in the same component regardless of edge direction.
+ */
+export function connectedComponents(graph: Map<string, WeightedEdge[]>): string[][] {
+  // Build undirected adjacency list from all nodes
+  const adj = new Map<string, Set<string>>();
+  const allNodes = new Set<string>();
+
+  for (const [src, edges] of graph) {
+    allNodes.add(src);
+    if (!adj.has(src)) adj.set(src, new Set());
+    for (const e of edges) {
+      allNodes.add(e.target);
+      if (!adj.has(e.target)) adj.set(e.target, new Set());
+      adj.get(src)!.add(e.target);
+      adj.get(e.target)!.add(src);
+    }
+  }
+
+  const visited = new Set<string>();
+  const components: string[][] = [];
+
+  for (const node of allNodes) {
+    if (visited.has(node)) continue;
+
+    // BFS from this node
+    const component: string[] = [];
+    const queue = [node];
+    visited.add(node);
+
+    while (queue.length > 0) {
+      const current = queue.shift()!;
+      component.push(current);
+      for (const neighbor of adj.get(current) || []) {
+        if (!visited.has(neighbor)) {
+          visited.add(neighbor);
+          queue.push(neighbor);
+        }
+      }
+    }
+
+    components.push(component);
+  }
+
+  // Sort by size (largest first)
+  return components.sort((a, b) => b.length - a.length);
+}
+
+// ============================================================================
+// Shortest Path
+// ============================================================================
+
+/**
+ * Find shortest path between two files in the dependency graph.
+ * Uses BFS (hop count, not edge weight).
+ * Returns the path as an array of files, or null if unreachable.
+ */
+export function shortestPath(
+  graph: Map<string, WeightedEdge[]>,
+  from: string,
+  to: string
+): string[] | null {
+  if (from === to) return [from];
+  if (!graph.has(from)) return null;
+
+  const visited = new Set<string>([from]);
+  const parent = new Map<string, string>();
+  const queue = [from];
+
+  while (queue.length > 0) {
+    const current = queue.shift()!;
+    for (const { target } of graph.get(current) || []) {
+      if (visited.has(target)) continue;
+      visited.add(target);
+      parent.set(target, current);
+
+      if (target === to) {
+        // Reconstruct path
+        const path = [to];
+        let node = to;
+        while (parent.has(node)) {
+          node = parent.get(node)!;
+          path.unshift(node);
+        }
+        return path;
+      }
+
+      queue.push(target);
+    }
+  }
+
+  return null;
+}

From 098849f192ff95bc97239259629baf8ed6d58399 Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:08:25 -0700
Subject: [PATCH 3/8] feat(core): replace ref counting with PageRank in dev_map

Replace computeHotPaths simple reference counting with PageRank over
the weighted dependency graph. Files depended on by other important
files now rank higher.

- HotPath.score: PageRank value (used for sorting)
- HotPath.incomingRefs: real incoming edge count (used for display)
- Rewrite 4 hot paths tests from dead callers data to callees data
- Assert relative ordering, not exact counts

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/map/__tests__/map.test.ts | 127 +++++++++-----------
 packages/core/src/map/index.ts              |  65 +++++-----
 packages/core/src/map/types.ts              |   4 +-
 3 files changed, 88 insertions(+), 108 deletions(-)

diff --git a/packages/core/src/map/__tests__/map.test.ts b/packages/core/src/map/__tests__/map.test.ts
index c6b0303..40784f1 100644
--- a/packages/core/src/map/__tests__/map.test.ts
+++ b/packages/core/src/map/__tests__/map.test.ts
@@ -285,85 +285,57 @@ describe('Codebase Map', () => {
   });
 
   describe('Hot Paths', () => {
-    it('should compute hot paths from callers data', async () => {
-      const resultsWithCallers: SearchResult[] = [
+    it('should compute hot paths via PageRank from callees', async () => {
+      // 3 files depend on core.ts via callees — it should rank highest
+      const docs: SearchResult[] = [
         {
-          id: 'src/core.ts:coreFunction:1',
+          id: 'src/a.ts:fnA:1',
           score: 0.9,
           metadata: {
-            path: 'src/core.ts',
+            path: 'src/a.ts',
             type: 'function',
-            name: 'coreFunction',
+            name: 'fnA',
             exported: true,
-            callers: [
-              { name: 'caller1', file: 'src/a.ts', startLine: 10 },
-              { name: 'caller2', file: 'src/b.ts', startLine: 20 },
-              { name: 'caller3', file: 'src/c.ts', startLine: 30 },
-            ],
+            callees: [{ name: 'coreFunction', file: 'src/core.ts', line: 10 }],
           },
         },
         {
-          id: 'src/utils.ts:utilFunction:1',
-          score: 0.8,
-          metadata: {
-            path: 'src/utils.ts',
-            type: 'function',
-            name: 'utilFunction',
-            exported: true,
-            callers: [{ name: 'caller1', file: 'src/a.ts', startLine: 15 }],
-          },
-        },
-      ];
-
-      const indexer = createMockIndexer(resultsWithCallers);
-      const map = await generateCodebaseMap(indexer, { includeHotPaths: true });
-
-      expect(map.hotPaths.length).toBeGreaterThan(0);
-      // coreFunction should be first (more callers)
-      expect(map.hotPaths[0].file).toBe('src/core.ts');
-      expect(map.hotPaths[0].incomingRefs).toBe(3);
-    });
-
-    it('should compute hot paths from callees data', async () => {
-      const resultsWithCallees: SearchResult[] = [
-        {
-          id: 'src/main.ts:main:1',
+          id: 'src/b.ts:fnB:1',
           score: 0.9,
           metadata: {
-            path: 'src/main.ts',
+            path: 'src/b.ts',
             type: 'function',
-            name: 'main',
+            name: 'fnB',
             exported: true,
-            callees: [
-              { name: 'helper', file: 'src/helpers.ts', line: 10 },
-              { name: 'helper', file: 'src/helpers.ts', line: 10 },
-            ],
+            callees: [{ name: 'coreFunction', file: 'src/core.ts', line: 10 }],
           },
         },
         {
-          id: 'src/other.ts:other:1',
-          score: 0.8,
+          id: 'src/c.ts:fnC:1',
+          score: 0.9,
           metadata: {
-            path: 'src/other.ts',
+            path: 'src/c.ts',
             type: 'function',
-            name: 'other',
+            name: 'fnC',
             exported: true,
-            callees: [{ name: 'helper', file: 'src/helpers.ts', line: 10 }],
+            callees: [{ name: 'coreFunction', file: 'src/core.ts', line: 10 }],
           },
         },
       ];
 
-      const indexer = createMockIndexer(resultsWithCallees);
+      const indexer = createMockIndexer(docs);
       const map = await generateCodebaseMap(indexer, { includeHotPaths: true });
 
       expect(map.hotPaths.length).toBeGreaterThan(0);
-      // helpers.ts should be referenced most
-      expect(map.hotPaths[0].file).toBe('src/helpers.ts');
+      // core.ts has 3 incoming deps — should rank first
+      expect(map.hotPaths[0].file).toBe('src/core.ts');
       expect(map.hotPaths[0].incomingRefs).toBe(3);
+      expect(map.hotPaths[0].score).toBeGreaterThan(0);
     });
 
     it('should limit hot paths to maxHotPaths', async () => {
-      const manyRefs: SearchResult[] = Array.from({ length: 20 }, (_, i) => ({
+      // Create many files, each calling a unique target
+      const docs: SearchResult[] = Array.from({ length: 20 }, (_, i) => ({
         id: `src/file${i}.ts:fn:1`,
         score: 0.9,
         metadata: {
@@ -371,68 +343,79 @@ describe('Codebase Map', () => {
           type: 'function',
           name: `fn${i}`,
           exported: true,
-          callers: Array.from({ length: 20 - i }, (_, j) => ({
-            name: `caller${j}`,
-            file: `src/other${j}.ts`,
-            startLine: j * 10,
+          callees: Array.from({ length: 5 }, (_, j) => ({
+            name: `dep${j}`,
+            file: `src/dep${j}.ts`,
+            line: j * 10,
           })),
         },
       }));
 
-      const indexer = createMockIndexer(manyRefs);
+      const indexer = createMockIndexer(docs);
       const map = await generateCodebaseMap(indexer, { includeHotPaths: true, maxHotPaths: 3 });
 
       expect(map.hotPaths.length).toBe(3);
-      // Should be sorted by refs descending
-      expect(map.hotPaths[0].incomingRefs).toBeGreaterThanOrEqual(map.hotPaths[1].incomingRefs);
+      // Should be sorted by score descending
+      expect(map.hotPaths[0].score).toBeGreaterThanOrEqual(map.hotPaths[1].score);
     });
 
     it('should not include hot paths when disabled', async () => {
-      const resultsWithCallers: SearchResult[] = [
+      const docs: SearchResult[] = [
         {
-          id: 'src/core.ts:coreFunction:1',
+          id: 'src/a.ts:fn:1',
           score: 0.9,
           metadata: {
-            path: 'src/core.ts',
+            path: 'src/a.ts',
             type: 'function',
-            name: 'coreFunction',
+            name: 'fn',
             exported: true,
-            callers: [{ name: 'caller1', file: 'src/a.ts', startLine: 10 }],
+            callees: [{ name: 'dep', file: 'src/dep.ts', line: 1 }],
           },
         },
       ];
 
-      const indexer = createMockIndexer(resultsWithCallers);
+      const indexer = createMockIndexer(docs);
       const map = await generateCodebaseMap(indexer, { includeHotPaths: false });
 
       expect(map.hotPaths.length).toBe(0);
     });
 
     it('should format hot paths in output', async () => {
-      const resultsWithCallers: SearchResult[] = [
+      const docs: SearchResult[] = [
         {
-          id: 'src/core.ts:coreFunction:1',
+          id: 'src/a.ts:fnA:1',
           score: 0.9,
           metadata: {
-            path: 'src/core.ts',
+            path: 'src/a.ts',
             type: 'function',
-            name: 'coreFunction',
+            name: 'fnA',
             exported: true,
-            callers: [
-              { name: 'caller1', file: 'src/a.ts', startLine: 10 },
-              { name: 'caller2', file: 'src/b.ts', startLine: 20 },
+            callees: [
+              { name: 'core', file: 'src/core.ts', line: 10 },
+              { name: 'core2', file: 'src/core.ts', line: 20 },
             ],
           },
         },
+        {
+          id: 'src/b.ts:fnB:1',
+          score: 0.9,
+          metadata: {
+            path: 'src/b.ts',
+            type: 'function',
+            name: 'fnB',
+            exported: true,
+            callees: [{ name: 'core', file: 'src/core.ts', line: 10 }],
+          },
+        },
       ];
 
-      const indexer = createMockIndexer(resultsWithCallers);
+      const indexer = createMockIndexer(docs);
       const map = await generateCodebaseMap(indexer, { includeHotPaths: true });
       const output = formatCodebaseMap(map, { includeHotPaths: true });
 
       expect(output).toContain('Hot paths:');
       expect(output).toContain('core.ts');
-      expect(output).toContain('2 refs');
+      expect(output).toContain('refs');
       expect(output).toContain('src');
     });
   });
diff --git a/packages/core/src/map/index.ts b/packages/core/src/map/index.ts
index e09851b..5fc9394 100644
--- a/packages/core/src/map/index.ts
+++ b/packages/core/src/map/index.ts
@@ -10,6 +10,7 @@ import { stripFocusPrefix } from '../indexer/utils/change-frequency.js';
 import { getFileIcon } from '../utils/icons';
 import type { SearchResult } from '../vector/types';
 import type { LocalGitExtractor } from './git-extractor';
+import { buildDependencyGraph, pageRank } from './graph';
 import type {
   ChangeFrequency,
   CodebaseMap,
@@ -21,6 +22,7 @@ import type {
 
 export { GitExtractor, LocalGitExtractor } from './git-extractor';
 export * from './git-types';
+export * from './graph';
 export * from './types';
 
 /** Default options for map generation */
@@ -445,51 +447,44 @@ function applyChangeFrequency(node: MapNode, frequencyMap: Map<string, ChangeFre
 }
 
 /**
- * Compute hot paths - files with the most incoming references
+ * Compute hot paths using PageRank over the dependency graph.
+ *
+ * Replaces simple reference counting with graph-aware ranking.
+ * Files that are depended on by other important files rank higher.
+ * Sort by PageRank score, display real incoming edge count.
  */
 function computeHotPaths(docs: SearchResult[], maxPaths: number): HotPath[] {
-  // Count incoming references per file
-  const refCounts = new Map<string, { count: number; component?: string }>();
-
-  for (const doc of docs) {
-    const callers = doc.metadata.callers as Array<{ file: string }> | undefined;
-    if (callers && Array.isArray(callers)) {
-      // This document is called by others - count it
-      const filePath = (doc.metadata.path as string) || (doc.metadata.file as string) || '';
-      if (filePath) {
-        const existing = refCounts.get(filePath) || { count: 0 };
-        existing.count += callers.length;
-        existing.component = existing.component || (doc.metadata.name as string);
-        refCounts.set(filePath, existing);
-      }
+  const graph = buildDependencyGraph(docs);
+  const ranks = pageRank(graph);
+
+  // Count real incoming edges per file (distinct source files)
+  const incomingCounts = new Map<string, Set<string>>();
+  for (const [src, edges] of graph) {
+    for (const e of edges) {
+      if (!incomingCounts.has(e.target)) incomingCounts.set(e.target, new Set());
+      incomingCounts.get(e.target)?.add(src);
     }
   }
 
-  // Also count based on callees pointing to files
+  // Build a lookup for primary component name per file
+  const componentByFile = new Map<string, string>();
   for (const doc of docs) {
-    const callees = doc.metadata.callees as Array<{ file: string; name: string }> | undefined;
-    if (callees && Array.isArray(callees)) {
-      for (const callee of callees) {
-        if (callee.file) {
-          const existing = refCounts.get(callee.file) || { count: 0 };
-          existing.count += 1;
-          refCounts.set(callee.file, existing);
-        }
-      }
+    const filePath = (doc.metadata.path as string) || (doc.metadata.file as string) || '';
+    if (filePath && doc.metadata.name && !componentByFile.has(filePath)) {
+      componentByFile.set(filePath, doc.metadata.name as string);
     }
   }
 
-  // Sort by count and take top N
-  const sorted = Array.from(refCounts.entries())
-    .map(([file, data]) => ({
+  // Sort by PageRank score, display real incoming ref count
+  return Array.from(ranks.entries())
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, maxPaths)
+    .map(([file, score]) => ({
       file,
-      incomingRefs: data.count,
-      primaryComponent: data.component,
-    }))
-    .sort((a, b) => b.incomingRefs - a.incomingRefs)
-    .slice(0, maxPaths);
-
-  return sorted;
+      incomingRefs: incomingCounts.get(file)?.size ?? 0,
+      score,
+      primaryComponent: componentByFile.get(file),
+    }));
 }
 
 /**
diff --git a/packages/core/src/map/types.ts b/packages/core/src/map/types.ts
index 648316e..97b5448 100644
--- a/packages/core/src/map/types.ts
+++ b/packages/core/src/map/types.ts
@@ -83,8 +83,10 @@ export interface MapOptions {
 export interface HotPath {
   /** File path */
   file: string;
-  /** Number of incoming references (callers) */
+  /** Number of distinct files that depend on this file */
   incomingRefs: number;
+  /** PageRank score (used for sorting — higher = more architecturally central) */
+  score: number;
   /** Primary component name in this file */
   primaryComponent?: string;
 }

From e77890d9dbd21a4ec027bce3b9e5b583cd141560 Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:11:14 -0700
Subject: [PATCH 4/8] feat(core): wire connected components into dev_map output

Add components field to CodebaseMap. Compute connected components from
the dependency graph and display as "Subsystems" section in formatted
output. Only shows multi-file components (singles filtered out).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/core/src/map/index.ts | 48 +++++++++++++++++++++++++++++++---
 packages/core/src/map/types.ts |  2 ++
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/packages/core/src/map/index.ts b/packages/core/src/map/index.ts
index 5fc9394..33cf430 100644
--- a/packages/core/src/map/index.ts
+++ b/packages/core/src/map/index.ts
@@ -10,7 +10,7 @@ import { stripFocusPrefix } from '../indexer/utils/change-frequency.js';
 import { getFileIcon } from '../utils/icons';
 import type { SearchResult } from '../vector/types';
 import type { LocalGitExtractor } from './git-extractor';
-import { buildDependencyGraph, pageRank } from './graph';
+import { buildDependencyGraph, connectedComponents, pageRank } from './graph';
 import type {
   ChangeFrequency,
   CodebaseMap,
@@ -117,11 +117,19 @@ export async function generateCodebaseMap(
     'Counted components'
   );
 
-  // Compute hot paths (most referenced files)
+  // Compute hot paths and connected components (share the dependency graph)
   const t7 = Date.now();
   const hotPaths = opts.includeHotPaths ? computeHotPaths(allDocs, opts.maxHotPaths) : [];
+  const graph = buildDependencyGraph(allDocs);
+  const rawComponents = connectedComponents(graph);
+  const components = rawComponents
+    .filter((c) => c.length > 1) // Only show multi-file subsystems
+    .map((files) => ({ files, size: files.length }));
   const t8 = Date.now();
-  logger?.debug({ duration_ms: t8 - t7, hotPathCount: hotPaths.length }, 'Computed hot paths');
+  logger?.debug(
+    { duration_ms: t8 - t7, hotPathCount: hotPaths.length, componentCount: components.length },
+    'Computed hot paths and components'
+  );
 
   // Compute change frequency if requested and git extractor is available
   if (opts.includeChangeFrequency && context.gitExtractor) {
@@ -147,6 +155,7 @@ export async function generateCodebaseMap(
     totalComponents,
     totalDirectories,
     hotPaths,
+    components: components.length > 0 ? components : undefined,
     generatedAt: new Date().toISOString(),
   };
 }
@@ -515,6 +524,22 @@ export function formatCodebaseMap(map: CodebaseMap, options: MapOptions = {}): s
     lines.push('');
   }
 
+  // Format connected components if present
+  if (map.components && map.components.length > 1) {
+    lines.push(`Subsystems (${map.components.length} connected):`);
+    for (let i = 0; i < Math.min(5, map.components.length); i++) {
+      const comp = map.components[i];
+      // Show the common directory prefix for the component
+      const prefix = findCommonPrefix(comp.files);
+      const label = prefix || 'mixed';
+      lines.push(`  ${i + 1}. ${label} (${comp.size} files)`);
+    }
+    if (map.components.length > 5) {
+      lines.push(`  ...${map.components.length - 5} more`);
+    }
+    lines.push('');
+  }
+
   // Format tree
   lines.push('Structure:');
   formatNode(map.root, lines, '  ', true, opts, true);
@@ -550,3 +575,20 @@ function formatNode(
     formatNode(child, lines, childPrefix, isChildLast, opts);
   }
 }
+
+/**
+ * Find common directory prefix for a set of file paths
+ */
+function findCommonPrefix(files: string[]): string {
+  if (files.length === 0) return '';
+  const dirs = files.map((f) => f.substring(0, f.lastIndexOf('/')));
+  if (dirs.length === 0) return '';
+
+  let prefix = dirs[0];
+  for (const dir of dirs) {
+    while (prefix && !dir.startsWith(prefix)) {
+      prefix = prefix.substring(0, prefix.lastIndexOf('/'));
+    }
+  }
+  return prefix;
+}
diff --git a/packages/core/src/map/types.ts b/packages/core/src/map/types.ts
index 97b5448..f038faf 100644
--- a/packages/core/src/map/types.ts
+++ b/packages/core/src/map/types.ts
@@ -103,6 +103,8 @@ export interface CodebaseMap {
   totalDirectories: number;
   /** Most referenced files (hot paths) */
   hotPaths: HotPath[];
+  /** Connected subsystems (groups of interdependent files) */
+  components?: Array<{ files: string[]; size: number }>;
   /** Generation timestamp */
   generatedAt: string;
 }

From 3c10d72984f3386d6de35e04c408e960c358c7bc Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:15:04 -0700
Subject: [PATCH 5/8] feat(mcp): add path tracing to dev_refs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New traceTo parameter on dev_refs: traces the dependency chain from
a function's file to a target file through the call graph.

Example: dev_refs { name: "authenticate", traceTo: "src/database.ts" }
→ "src/auth.ts → src/user-service.ts → src/repository.ts → src/database.ts (3 hops)"

Uses shortestPath BFS from graph.ts. Requires indexer for graph building
(optional — traceTo is ignored without it).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 packages/mcp-server/bin/dev-agent-mcp.ts      |  1 +
 .../src/adapters/built-in/refs-adapter.ts     | 52 +++++++++++++++++--
 packages/mcp-server/src/schemas/index.ts      |  1 +
 3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/packages/mcp-server/bin/dev-agent-mcp.ts b/packages/mcp-server/bin/dev-agent-mcp.ts
index 5b5fe16..47144e1 100644
--- a/packages/mcp-server/bin/dev-agent-mcp.ts
+++ b/packages/mcp-server/bin/dev-agent-mcp.ts
@@ -277,6 +277,7 @@ async function main() {
 
     const refsAdapter = new RefsAdapter({
       searchService,
+      indexer,
       defaultLimit: 20,
     });
 
diff --git a/packages/mcp-server/src/adapters/built-in/refs-adapter.ts b/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
index aeb0cfe..19f6d24 100644
--- a/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
+++ b/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
@@ -3,7 +3,13 @@
  * Provides call graph queries via the dev_refs tool
  */
 
-import type { CalleeInfo, SearchResult, SearchService } from '@prosdevlab/dev-agent-core';
+import type {
+  CalleeInfo,
+  RepositoryIndexer,
+  SearchResult,
+  SearchService,
+} from '@prosdevlab/dev-agent-core';
+import { buildDependencyGraph, shortestPath } from '@prosdevlab/dev-agent-core';
 import { estimateTokensForText, startTimer } from '../../formatters/utils';
 import { RefsArgsSchema } from '../../schemas/index.js';
 import { ToolAdapter } from '../tool-adapter';
@@ -24,6 +30,11 @@ export interface RefsAdapterConfig {
    */
   searchService: SearchService;
 
+  /**
+   * Repository indexer — needed for path tracing (optional)
+   */
+  indexer?: RepositoryIndexer;
+
   /**
    * Default result limit
    */
@@ -54,13 +65,17 @@ export class RefsAdapter extends ToolAdapter {
   };
 
   private searchService: SearchService;
-  private config: Required<Omit<RefsAdapterConfig, 'searchService'>> & {
+  private config: {
     searchService: SearchService;
+    defaultLimit: number;
   };
 
+  private indexer?: RepositoryIndexer;
+
   constructor(config: RefsAdapterConfig) {
     super();
     this.searchService = config.searchService;
+    this.indexer = config.indexer;
     this.config = {
       searchService: config.searchService,
       defaultLimit: config.defaultLimit ?? 20,
@@ -101,6 +116,12 @@ export class RefsAdapter extends ToolAdapter {
             maximum: 50,
             default: this.config.defaultLimit,
           },
+          traceTo: {
+            type: 'string',
+            description:
+              "Trace the dependency chain from this function's file to a target file " +
+              '(e.g., "src/database.ts"). Shows the shortest path through the call graph.',
+          },
         },
         required: ['name'],
       },
@@ -114,11 +135,11 @@ export class RefsAdapter extends ToolAdapter {
       return validation.error;
     }
 
-    const { name, direction, limit } = validation.data;
+    const { name, direction, limit, traceTo } = validation.data;
 
     try {
       const timer = startTimer();
-      context.logger.debug('Executing refs query', { name, direction, limit });
+      context.logger.debug('Executing refs query', { name, direction, limit, traceTo });
 
       // First, find the target component
       const searchResults = await this.searchService.search(name, { limit: 10 });
@@ -136,6 +157,29 @@ export class RefsAdapter extends ToolAdapter {
         };
       }
 
+      // Handle traceTo — find shortest dependency path
+      if (traceTo && this.indexer) {
+        const sourceFile = (target.metadata.path as string) || '';
+        const allDocs = await this.indexer.getAll({ limit: 10000 });
+        const graph = buildDependencyGraph(allDocs);
+        const path = shortestPath(graph, sourceFile, traceTo);
+
+        const content = path
+          ? `## Dependency Path: ${sourceFile} → ${traceTo}\n\n${path.join(' → ')}\n\n**${path.length - 1} hop${path.length - 1 === 1 ? '' : 's'}**`
+          : `## No Path Found\n\nNo dependency chain from \`${sourceFile}\` to \`${traceTo}\`.\nThese files may be in separate subsystems.`;
+
+        return {
+          success: true,
+          data: content,
+          metadata: {
+            tokens: estimateTokensForText(content),
+            duration_ms: timer.elapsed(),
+            timestamp: new Date().toISOString(),
+            cached: false,
+          },
+        };
+      }
+
       const result: {
         target: {
           name: string;
diff --git a/packages/mcp-server/src/schemas/index.ts b/packages/mcp-server/src/schemas/index.ts
index 936bf84..15de295 100644
--- a/packages/mcp-server/src/schemas/index.ts
+++ b/packages/mcp-server/src/schemas/index.ts
@@ -63,6 +63,7 @@ export const RefsArgsSchema = z
     name: z.string().min(1, 'Name must be a non-empty string'),
     direction: z.enum(['callees', 'callers', 'both']).default('both'),
     limit: z.number().int().min(1).max(50).default(20),
+    traceTo: z.string().optional(),
   })
   .strict();
 

From 471bfcdd388381e7e537883245792e5176b3d06c Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:15:50 -0700
Subject: [PATCH 6/8] docs: complete MCP Phase 1, mark all parts merged

All 6 parts of MCP Phase 1 Tools Improvement are complete:
- 1.1: Pure pattern extractors
- 1.2: Index-based analysis (10-30x faster)
- 1.3: Dead code cleanup
- 1.4: Agent usability (merge health, rename params, JSON, suggestions)
- 1.5: AST pattern analysis (tree-sitter queries)
- 1.6: Graph algorithms (PageRank, components, shortest path)

File importance ranking inspired by aider's repo map
(https://github.com/Aider-AI/aider).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .claude/da-plans/README.md                                     | 2 +-
 .claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.claude/da-plans/README.md b/.claude/da-plans/README.md
index 77b9ff1..f3332b5 100644
--- a/.claude/da-plans/README.md
+++ b/.claude/da-plans/README.md
@@ -11,7 +11,7 @@ Implementation deviations are logged at the bottom of each plan file.
 |-------|-------------|--------|
 | [Core](core/) | Scanner, vector storage, services, indexer | Phase 1: Merged, Phase 2: Merged (indexing rethink) |
 | [CLI](cli/) | Command-line interface | Not started |
-| [MCP Server](mcp/) | Model Context Protocol server + adapters | Phase 1: Draft (tools improvement) |
+| [MCP Server](mcp/) | Model Context Protocol server + adapters | Phase 1: Merged (tools improvement) |
 | [Subagents](subagents/) | Coordinator, explorer, planner, GitHub agents | Not started |
 | [Integrations](integrations/) | Claude Code, VS Code, Cursor | Not started |
 | [Logger](logger/) | @prosdevlab/kero centralized logging | Not started |
diff --git a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md
index 1a8bb36..ddc4ec2 100644
--- a/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md
+++ b/.claude/da-plans/mcp/phase-1-mcp-tools-improvement/overview.md
@@ -1,6 +1,6 @@
 # Phase 1: MCP Tools Improvement
 
-**Status:** In progress (Parts 1.1–1.5 merged, 1.6 in progress)
+**Status:** Complete (all parts merged)
 
 ## Context
 

From 31e678b747ceea7b48609282e3eb6e0b58ba5ba7 Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:35:18 -0700
Subject: [PATCH 7/8] =?UTF-8?q?fix(core,mcp):=20address=20code=20review=20?=
 =?UTF-8?q?=E2=80=94=20single=20graph=20build,=20BFS=20perf,=20traceTo=20e?=
 =?UTF-8?q?rror?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Build dependency graph once, pass to both computeHotPaths and connectedComponents
- Return explicit error when traceTo is set but indexer is missing
- Document directed-only limitation in traceTo description
- Switch BFS from queue.shift() (O(n)) to index-based (O(1))
- Require 2+ path segments for subsystem labels (avoid "packages" alone)
- Track traceTo adapter test gap in scratchpad

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .changeset/graph-algorithms.md                | 11 +++++++++
 .claude/scratchpad.md                         |  1 +
 packages/core/src/map/graph.ts                | 12 ++++++----
 packages/core/src/map/index.ts                | 16 +++++++++----
 .../src/adapters/built-in/refs-adapter.ts     | 13 ++++++++++-
 website/content/docs/tools/dev-map.mdx        | 23 ++++++++++++++-----
 website/content/docs/tools/dev-refs.mdx       | 23 +++++++++++++++++++
 website/content/latest-version.ts             |  8 +++----
 website/content/updates/index.mdx             | 15 ++++++++++++
 9 files changed, 101 insertions(+), 21 deletions(-)
 create mode 100644 .changeset/graph-algorithms.md

diff --git a/.changeset/graph-algorithms.md b/.changeset/graph-algorithms.md
new file mode 100644
index 0000000..84001fe
--- /dev/null
+++ b/.changeset/graph-algorithms.md
@@ -0,0 +1,11 @@
+---
+'@prosdevlab/dev-agent': patch
+---
+
+Graph algorithms for dev_map and dev_refs
+
+- `dev_map` hot paths now use PageRank over the weighted dependency graph — files depended on by other important files rank higher
+- `dev_map` shows connected subsystems ("Subsystems: packages/core (45 files), packages/cli (12 files)")
+- `dev_refs` new `traceTo` parameter traces the dependency chain between files through the call graph
+- All algorithms are hand-rolled pure functions (~230 lines), no new dependencies
+- Inspired by aider's repo map (PageRank over dependency graphs)
diff --git a/.claude/scratchpad.md b/.claude/scratchpad.md
index 2067b41..8587a1d 100644
--- a/.claude/scratchpad.md
+++ b/.claude/scratchpad.md
@@ -25,6 +25,7 @@
 
 ## Test Gaps
 
+- **RefsAdapter integration test with `traceTo`.** The `traceTo` path tracing feature is tested at the algorithm level (shortestPath in graph.test.ts) but not at the adapter level. Needs a test that constructs RefsAdapter with a mock indexer, calls `execute()` with `traceTo`, and verifies the path output format. Also needs a test for the error case when indexer is missing.
 - **InspectAdapter integration test with PatternMatcher.** The InspectAdapter test constructs without a `patternMatcher` — the AST path is never exercised through the MCP layer. Needs a test that constructs `InspectAdapter` with `createPatternMatcher()`, mocks the search service, calls `execute()`, and verifies AST-enhanced results flow through. Requires mock search service setup — larger integration test scope.
 
 ## Notes
diff --git a/packages/core/src/map/graph.ts b/packages/core/src/map/graph.ts
index b435826..1c62c44 100644
--- a/packages/core/src/map/graph.ts
+++ b/packages/core/src/map/graph.ts
@@ -189,13 +189,14 @@ export function connectedComponents(graph: Map<string, WeightedEdge[]>): string[
   for (const node of allNodes) {
     if (visited.has(node)) continue;
 
-    // BFS from this node
+    // BFS from this node (index-based to avoid O(n) shift)
     const component: string[] = [];
     const queue = [node];
+    let qi = 0;
     visited.add(node);
 
-    while (queue.length > 0) {
-      const current = queue.shift()!;
+    while (qi < queue.length) {
+      const current = queue[qi++];
       component.push(current);
       for (const neighbor of adj.get(current) || []) {
         if (!visited.has(neighbor)) {
@@ -232,9 +233,10 @@ export function shortestPath(
   const visited = new Set<string>([from]);
   const parent = new Map<string, string>();
   const queue = [from];
+  let qi = 0;
 
-  while (queue.length > 0) {
-    const current = queue.shift()!;
+  while (qi < queue.length) {
+    const current = queue[qi++];
     for (const { target } of graph.get(current) || []) {
       if (visited.has(target)) continue;
       visited.add(target);
diff --git a/packages/core/src/map/index.ts b/packages/core/src/map/index.ts
index 33cf430..4021187 100644
--- a/packages/core/src/map/index.ts
+++ b/packages/core/src/map/index.ts
@@ -117,10 +117,10 @@ export async function generateCodebaseMap(
     'Counted components'
   );
 
-  // Compute hot paths and connected components (share the dependency graph)
+  // Build dependency graph once, share between hot paths and components
   const t7 = Date.now();
-  const hotPaths = opts.includeHotPaths ? computeHotPaths(allDocs, opts.maxHotPaths) : [];
   const graph = buildDependencyGraph(allDocs);
+  const hotPaths = opts.includeHotPaths ? computeHotPaths(allDocs, graph, opts.maxHotPaths) : [];
   const rawComponents = connectedComponents(graph);
   const components = rawComponents
     .filter((c) => c.length > 1) // Only show multi-file subsystems
@@ -462,8 +462,11 @@ function applyChangeFrequency(node: MapNode, frequencyMap: Map<string, ChangeFre
  * Files that are depended on by other important files rank higher.
  * Sort by PageRank score, display real incoming edge count.
  */
-function computeHotPaths(docs: SearchResult[], maxPaths: number): HotPath[] {
-  const graph = buildDependencyGraph(docs);
+function computeHotPaths(
+  docs: SearchResult[],
+  graph: Map<string, import('./graph').WeightedEdge[]>,
+  maxPaths: number
+): HotPath[] {
   const ranks = pageRank(graph);
 
   // Count real incoming edges per file (distinct source files)
@@ -590,5 +593,8 @@ function findCommonPrefix(files: string[]): string {
       prefix = prefix.substring(0, prefix.lastIndexOf('/'));
     }
   }
-  return prefix;
+  // Require at least 2 path segments for a meaningful label
+  // "packages" alone is too generic; "packages/core" is useful
+  const segments = prefix.split('/').filter(Boolean);
+  return segments.length >= 2 ? prefix : '';
 }
diff --git a/packages/mcp-server/src/adapters/built-in/refs-adapter.ts b/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
index 19f6d24..f1eac03 100644
--- a/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
+++ b/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
@@ -120,7 +120,7 @@ export class RefsAdapter extends ToolAdapter {
             type: 'string',
             description:
               "Trace the dependency chain from this function's file to a target file " +
-              '(e.g., "src/database.ts"). Shows the shortest path through the call graph.',
+              '(e.g., "src/database.ts"). Follows directed call graph edges (A calls B, not B calls A).',
           },
         },
         required: ['name'],
@@ -158,6 +158,17 @@ export class RefsAdapter extends ToolAdapter {
       }
 
       // Handle traceTo — find shortest dependency path
+      if (traceTo && !this.indexer) {
+        return {
+          success: false,
+          error: {
+            code: 'INDEX_REQUIRED',
+            message: 'Path tracing requires a repository index.',
+            suggestion: 'Run "dev index" to index the repository first.',
+          },
+        };
+      }
+
       if (traceTo && this.indexer) {
         const sourceFile = (target.metadata.path as string) || '';
         const allDocs = await this.indexer.getAll({ limit: 10000 });
diff --git a/website/content/docs/tools/dev-map.mdx b/website/content/docs/tools/dev-map.mdx
index c08795c..145c51b 100644
--- a/website/content/docs/tools/dev-map.mdx
+++ b/website/content/docs/tools/dev-map.mdx
@@ -63,14 +63,25 @@ This helps AI assistants quickly understand codebase organization and activity.
 
 ## Features
 
-### Hot Paths
+### Hot Paths (PageRank)
 
-Shows the most referenced files in your codebase. These are typically:
-- Core utilities used everywhere
-- Base classes/interfaces
-- Central orchestrators
+Shows the most architecturally important files in your codebase, ranked by PageRank
+over the weighted dependency graph. Unlike simple reference counting, PageRank identifies
+files that are depended on by other important files — not just files with many direct imports.
 
-Useful for understanding which code is most critical.
+Inspired by [aider's repo map](https://github.com/Aider-AI/aider).
+
+### Subsystems
+
+Shows connected components in the dependency graph — groups of files that depend on each
+other, forming independent subsystems. Helps agents understand codebase boundaries.
+
+```
+Subsystems (3 connected):
+  1. packages/core (45 files)
+  2. packages/cli (12 files)
+  3. packages/mcp-server (18 files)
+```
 
 ### Change Frequency ✨ v0.4
 
diff --git a/website/content/docs/tools/dev-refs.mdx b/website/content/docs/tools/dev-refs.mdx
index 3f58aef..4b02db0 100644
--- a/website/content/docs/tools/dev-refs.mdx
+++ b/website/content/docs/tools/dev-refs.mdx
@@ -17,6 +17,7 @@ This is invaluable for understanding impact of changes and navigating unfamiliar
 | `name` | string | required | Symbol name to query |
 | `direction` | string | `"both"` | `"callers"`, `"callees"`, or `"both"` |
 | `limit` | number | `10` | Maximum results |
+| `traceTo` | string | — | Trace dependency chain to a target file (e.g., `"src/database.ts"`) |
 | `tokenBudget` | number | `2000` | Max tokens for output |
 
 ## Examples
@@ -84,6 +85,28 @@ What does the validateToken function call?
 }
 ```
 
+### Trace Dependency Path
+
+> "How does the auth module reach the database?"
+
+```json
+{
+  "name": "authenticate",
+  "traceTo": "src/database.ts"
+}
+```
+
+**Output:**
+```
+## Dependency Path: src/auth.ts → src/database.ts
+
+src/auth.ts → src/user-service.ts → src/repository.ts → src/database.ts
+
+**3 hops**
+```
+
+Follows directed call graph edges (A calls B). Returns "No path found" if files are in separate subsystems.
+
 ## Use Cases
 
 ### Impact Analysis
diff --git a/website/content/latest-version.ts b/website/content/latest-version.ts
index 578e841..4235d9c 100644
--- a/website/content/latest-version.ts
+++ b/website/content/latest-version.ts
@@ -4,10 +4,10 @@
  */
 
 export const latestVersion = {
-  version: '0.10.5',
-  title: 'AST-Based Pattern Analysis',
+  version: '0.10.6',
+  title: 'Graph Algorithms for dev_map and dev_refs',
   date: 'March 31, 2026',
   summary:
-    'dev_patterns uses tree-sitter AST queries for more accurate detection of error handling, imports, and type coverage across .ts, .tsx, .js, .jsx files.',
-  link: '/updates#v0105--ast-based-pattern-analysis',
+    'PageRank-based file ranking, subsystem detection, and dependency path tracing via traceTo.',
+  link: '/updates#v0106--graph-algorithms-for-dev_map-and-dev_refs',
 } as const;
diff --git a/website/content/updates/index.mdx b/website/content/updates/index.mdx
index ada56d8..7038748 100644
--- a/website/content/updates/index.mdx
+++ b/website/content/updates/index.mdx
@@ -9,6 +9,21 @@ What's new in dev-agent. We ship improvements regularly to help AI assistants un
 
 ---
 
+## v0.10.6 — Graph Algorithms for dev_map and dev_refs
+
+*March 31, 2026*
+
+**PageRank-based file ranking, subsystem detection, and dependency path tracing.**
+
+- `dev_map` hot paths now use PageRank over the weighted dependency graph — files depended on by other important files rank higher than files with many shallow references
+- `dev_map` shows connected subsystems: groups of interdependent files identified via graph analysis
+- `dev_refs` new `traceTo` parameter: `dev_refs { name: "authenticate", traceTo: "src/database.ts" }` → traces the shortest dependency chain between files
+- Weighted edges with sqrt dampening (inspired by [aider's repo map](https://github.com/Aider-AI/aider))
+- PageRank: 2,000 nodes + 10,000 edges in 4ms — no performance impact
+- All algorithms hand-rolled (~230 lines), no new dependencies
+
+---
+
 ## v0.10.5 — AST-Based Pattern Analysis
 
 *March 31, 2026*

From 8d56e25be6f355a642eef616b94cd0421d7c86b4 Mon Sep 17 00:00:00 2001
From: prosdev <prosdevlab@gmail.com>
Date: Tue, 31 Mar 2026 13:50:09 -0700
Subject: [PATCH 8/8] fix(core,mcp): address graph architect review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename traceTo → dependsOn (makes call direction unambiguous)
- Cache dependency graph on RefsAdapter (60s TTL, avoids rebuilding per request)
- Log warning when 10k doc limit is hit in both dev_map and dev_refs
- Remove overly strict shortestPath early return for unknown source nodes
- Update docs for dependsOn parameter name
- Track hub filtering, 10k scaling, and perf concerns in scratchpad

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .claude/da-plans/README.md                    |   2 +-
 .../3.1-index-time-graph.md                   | 106 ++++++++
 .../phase-3-graph-cache/3.2-load-on-demand.md | 109 ++++++++
 .../3.3-incremental-graph.md                  |  89 +++++++
 .../core/phase-3-graph-cache/overview.md      | 239 ++++++++++++++++++
 .claude/scratchpad.md                         |   6 +-
 packages/core/src/map/graph.ts                |   1 -
 packages/core/src/map/index.ts                |   3 +
 .../src/adapters/built-in/refs-adapter.ts     |  54 +++-
 packages/mcp-server/src/schemas/index.ts      |   2 +-
 website/content/docs/tools/dev-refs.mdx       |   4 +-
 11 files changed, 595 insertions(+), 20 deletions(-)
 create mode 100644 .claude/da-plans/core/phase-3-graph-cache/3.1-index-time-graph.md
 create mode 100644 .claude/da-plans/core/phase-3-graph-cache/3.2-load-on-demand.md
 create mode 100644 .claude/da-plans/core/phase-3-graph-cache/3.3-incremental-graph.md
 create mode 100644 .claude/da-plans/core/phase-3-graph-cache/overview.md

diff --git a/.claude/da-plans/README.md b/.claude/da-plans/README.md
index f3332b5..1385aa7 100644
--- a/.claude/da-plans/README.md
+++ b/.claude/da-plans/README.md
@@ -9,7 +9,7 @@ Implementation deviations are logged at the bottom of each plan file.
 
 | Track | Description | Status |
 |-------|-------------|--------|
-| [Core](core/) | Scanner, vector storage, services, indexer | Phase 1: Merged, Phase 2: Merged (indexing rethink) |
+| [Core](core/) | Scanner, vector storage, services, indexer | Phase 1: Merged, Phase 2: Merged, Phase 3: Draft (graph cache) |
 | [CLI](cli/) | Command-line interface | Not started |
 | [MCP Server](mcp/) | Model Context Protocol server + adapters | Phase 1: Merged (tools improvement) |
 | [Subagents](subagents/) | Coordinator, explorer, planner, GitHub agents | Not started |
diff --git a/.claude/da-plans/core/phase-3-graph-cache/3.1-index-time-graph.md b/.claude/da-plans/core/phase-3-graph-cache/3.1-index-time-graph.md
new file mode 100644
index 0000000..d8d8bd8
--- /dev/null
+++ b/.claude/da-plans/core/phase-3-graph-cache/3.1-index-time-graph.md
@@ -0,0 +1,106 @@
+# Part 3.1: Build and Save Dependency Graph at Index Time
+
+See [overview.md](overview.md) for architecture context.
+
+## Goal
+
+After `linearMerge` (full index) or `batchUpsertAndDelete` (incremental), build the
+dependency graph from the scan results and save it as JSON.
+
+## What changes
+
+### `packages/core/src/storage/path.ts`
+
+Add `dependencyGraph` to `getStorageFilePaths`:
+
+```typescript
+export function getStorageFilePaths(storagePath: string): {
+  vectors: string;
+  metadata: string;
+  watcherSnapshot: string;
+  dependencyGraph: string;  // NEW
+  // ... deprecated paths
+} {
+  return {
+    // ... existing
+    dependencyGraph: path.join(storagePath, 'dependency-graph.json'),
+  };
+}
+```
+
+### `packages/core/src/map/graph.ts`
+
+Add serialization/deserialization:
+
+```typescript
+export interface CachedGraph {
+  version: 1;
+  generatedAt: string;
+  nodeCount: number;
+  edgeCount: number;
+  graph: Record<string, WeightedEdge[]>;
+}
+
+export function serializeGraph(graph: Map<string, WeightedEdge[]>): string {
+  let edgeCount = 0;
+  const obj: Record<string, WeightedEdge[]> = {};
+  for (const [key, edges] of graph) {
+    obj[key] = edges;
+    edgeCount += edges.length;
+  }
+  return JSON.stringify({
+    version: 1,
+    generatedAt: new Date().toISOString(),
+    nodeCount: graph.size,
+    edgeCount,
+    graph: obj,
+  });
+}
+
+export function deserializeGraph(json: string): Map<string, WeightedEdge[]> | null {
+  try {
+    const data = JSON.parse(json);
+    if (data.version !== 1) return null;
+    const graph = new Map<string, WeightedEdge[]>();
+    for (const [key, edges] of Object.entries(data.graph)) {
+      graph.set(key, edges as WeightedEdge[]);
+    }
+    return graph;
+  } catch {
+    return null;
+  }
+}
+```
+
+### `packages/core/src/indexer/index.ts`
+
+After `linearMerge` completes in `index()`, build and save the graph:
+
+```typescript
+// After linearMerge (line ~180)
+const documents = prepareDocumentsForEmbedding(scanResult.documents);
+// ... linearMerge call ...
+
+// Build and cache dependency graph
+const graph = buildDependencyGraph(
+  documents.map(d => ({ id: d.id, score: 0, metadata: d.metadata }))
+);
+const graphJson = serializeGraph(graph);
+await fs.writeFile(filePaths.dependencyGraph, graphJson, 'utf-8');
+```
+
+## Tests
+
+| Test | What it verifies |
+|------|-----------------|
+| `serializeGraph` round-trips correctly | Serialize → deserialize → same graph |
+| `deserializeGraph` returns null for invalid JSON | Error handling |
+| `deserializeGraph` returns null for wrong version | Schema evolution |
+| `getStorageFilePaths` includes `dependencyGraph` | Path registration |
+| After `index()`, graph file exists | Integration |
+
+## Commit
+
+```
+feat(core): build and save dependency graph at index time
+```
diff --git a/.claude/da-plans/core/phase-3-graph-cache/3.2-load-on-demand.md b/.claude/da-plans/core/phase-3-graph-cache/3.2-load-on-demand.md
new file mode 100644
index 0000000..d36795d
--- /dev/null
+++ b/.claude/da-plans/core/phase-3-graph-cache/3.2-load-on-demand.md
@@ -0,0 +1,109 @@
+# Part 3.2: Load Cached Graph in dev_map and dev_refs
+
+See [overview.md](overview.md) for architecture context.
+
+## Goal
+
+Replace `getAll(limit: 10000)` → `buildDependencyGraph()` in `dev_map` and `dev_refs`
+with loading the cached graph from disk. Falls back to current approach if graph file
+is missing or corrupted.
+
+## What changes
+
+### `packages/core/src/map/graph.ts`
+
+Add a loader that reads from disk with fallback:
+
+```typescript
+import * as fs from 'node:fs/promises';
+
+/**
+ * Load dependency graph from cache, or build from docs as fallback.
+ */
+export async function loadOrBuildGraph(
+  graphPath: string | undefined,
+  fallbackDocs: () => Promise<SearchResult[]>
+): Promise<Map<string, WeightedEdge[]>> {
+  // Try cached graph first
+  if (graphPath) {
+    try {
+      const json = await fs.readFile(graphPath, 'utf-8');
+      const graph = deserializeGraph(json);
+      if (graph) return graph;
+    } catch {
+      // File missing or unreadable — fall through to build
+    }
+  }
+
+  // Fallback: build from docs (current approach)
+  const docs = await fallbackDocs();
+  return buildDependencyGraph(docs);
+}
+```
+
+### `packages/core/src/map/index.ts`
+
+Replace the graph build in `generateCodebaseMap`:
+
+```typescript
+// Before (current):
+const graph = buildDependencyGraph(allDocs);
+
+// After:
+const graph = await loadOrBuildGraph(
+  context.graphPath,  // new optional field on MapGenerationContext
+  async () => allDocs  // fallback uses already-fetched docs
+);
+```
+
+Add `graphPath` to `MapGenerationContext`:
+
+```typescript
+export interface MapGenerationContext {
+  indexer: RepositoryIndexer;
+  gitExtractor?: LocalGitExtractor;
+  logger?: Logger;
+  graphPath?: string;  // NEW — path to cached dependency-graph.json
+}
+```
+
+### `packages/mcp-server/src/adapters/built-in/refs-adapter.ts`
+
+Replace the `getDependencyGraph` method:
+
+```typescript
+private async getDependencyGraph() {
+  const CACHE_TTL_MS = 60_000;
+  if (this.cachedGraph && Date.now() - this.cachedGraphTime < CACHE_TTL_MS) {
+    return this.cachedGraph;
+  }
+
+  // Try loading from disk first (no getAll needed)
+  this.cachedGraph = await loadOrBuildGraph(
+    this.graphPath,
+    async () => this.indexer!.getAll({ limit: 50000 })  // raised limit as fallback
+  );
+  this.cachedGraphTime = Date.now();
+  return this.cachedGraph;
+}
+```
+
+### `packages/mcp-server/bin/dev-agent-mcp.ts`
+
+Pass `graphPath` to both MapAdapter and RefsAdapter from `getStorageFilePaths`.
+
+## Tests
+
+| Test | What it verifies |
+|------|-----------------|
+| `loadOrBuildGraph` with valid cached file | Loads from disk, doesn't call fallback |
+| `loadOrBuildGraph` with missing file | Calls fallback, builds from docs |
+| `loadOrBuildGraph` with corrupted file | Calls fallback, doesn't crash |
+| `generateCodebaseMap` uses cached graph when available | Integration |
+| `dev_refs dependsOn` uses cached graph | Integration |
+
+## Commit
+
+```
+feat(core,mcp): load cached dependency graph in dev_map and dev_refs
+```
diff --git a/.claude/da-plans/core/phase-3-graph-cache/3.3-incremental-graph.md b/.claude/da-plans/core/phase-3-graph-cache/3.3-incremental-graph.md
new file mode 100644
index 0000000..02f6408
--- /dev/null
+++ b/.claude/da-plans/core/phase-3-graph-cache/3.3-incremental-graph.md
@@ -0,0 +1,89 @@
+# Part 3.3: Incremental Graph Updates via File Watcher
+
+See [overview.md](overview.md) for architecture context.
+
+## Goal
+
+When the file watcher detects changes and calls `applyIncremental`, update the
+cached dependency graph without a full rebuild. This keeps the graph fresh as
+files are edited.
+
+## What changes
+
+### `packages/core/src/map/graph.ts`
+
+Add an incremental update function:
+
+```typescript
+/**
+ * Update a dependency graph incrementally.
+ *
+ * For changed/new files: remove old edges from those files, add new edges.
+ * For deleted files: remove all edges from those files.
+ * Pure function — returns a new graph.
+ */
+export function updateGraphIncremental(
+  existing: Map<string, WeightedEdge[]>,
+  changedDocs: SearchResult[],
+  deletedFiles: string[]
+): Map<string, WeightedEdge[]> {
+  const updated = new Map(existing);
+
+  // Remove edges for deleted files
+  for (const file of deletedFiles) {
+    updated.delete(file);
+  }
+
+  // Remove old edges for changed files, then add new ones
+  const changedGraph = buildDependencyGraph(changedDocs);
+  for (const file of changedGraph.keys()) {
+    // Remove old edges (the file was re-scanned)
+    updated.delete(file);
+  }
+  for (const [file, edges] of changedGraph) {
+    updated.set(file, edges);
+  }
+
+  return updated;
+}
+```
+
+### `packages/core/src/indexer/index.ts`
+
+In `applyIncremental`, update the cached graph:
+
+```typescript
+async applyIncremental(upserts: EmbeddingDocument[], deleteIds: string[]): Promise<void> {
+  await this.vectorStorage.batchUpsertAndDelete(upserts, deleteIds);
+
+  // Update cached dependency graph
+  const graphPath = getStorageFilePaths(this.config.vectorStorePath).dependencyGraph;
+  try {
+    const existing = await loadGraphFromDisk(graphPath);
+    if (existing) {
+      const deletedFiles = extractFilesFromDeleteIds(deleteIds);
+      const changedDocs = upserts.map(d => ({ id: d.id, score: 0, metadata: d.metadata }));
+      const updated = updateGraphIncremental(existing, changedDocs, deletedFiles);
+      await fs.writeFile(graphPath, serializeGraph(updated), 'utf-8');
+    }
+  } catch {
+    // Graph update failed — next full index will fix it
+  }
+}
+```
+
+## Tests
+
+| Test | What it verifies |
+|------|-----------------|
+| `updateGraphIncremental` adds edges for new files | New file → new edges appear |
+| `updateGraphIncremental` removes edges for deleted files | Deleted file → edges gone |
+| `updateGraphIncremental` replaces edges for changed files | Changed file → old edges removed, new edges added |
+| `updateGraphIncremental` with empty existing graph | Handles first incremental gracefully |
+| Incremental update failure doesn't crash indexer | Error resilience |
+
+## Commit
+
+```
+feat(core): incremental dependency graph updates via file watcher
+```
diff --git a/.claude/da-plans/core/phase-3-graph-cache/overview.md b/.claude/da-plans/core/phase-3-graph-cache/overview.md
new file mode 100644
index 0000000..e08ec5b
--- /dev/null
+++ b/.claude/da-plans/core/phase-3-graph-cache/overview.md
@@ -0,0 +1,239 @@
+# Phase 3: Cached Dependency Graph for Scale
+
+**Status:** Draft
+
+## Context
+
+Phase 2 established the indexing pipeline (scan → Linear Merge → Antfly). MCP Phase 1
+added graph algorithms (PageRank, connected components, shortest path) that operate
+over the dependency graph built from indexed `callees` metadata.
+
+The current approach rebuilds the dependency graph from scratch on every `dev_map` and
+`dev_refs dependsOn` call by fetching all documents via `getAll(limit: 10000)`. This
+works at our current scale (~2,200 docs) but breaks at medium-to-large repos:
+
+| Repo size | Docs | Current behavior |
+|-----------|------|-----------------|
+| Small (dev-agent) | ~2k | Works. Graph build <1ms, PageRank 4ms. |
+| Medium (product monorepo) | 10-15k | **Silently truncated** at 10k. Graph is incomplete. |
+| Large (platform monorepo) | 20-50k | Completely broken. Missing most of the graph. |
+
+### What breaks
+
+1. **`getAll(limit: 10000)` hard wall** — docs beyond 10k are silently dropped.
+   The graph is incomplete with no indication. PageRank scores are wrong.
+
+2. **Memory** — 50k docs × ~5KB each = ~250MB just for raw data. The graph itself
+   is much smaller (~50k nodes × ~5 edges × 16 bytes = ~4MB).
+
+3. **Latency per request** — `dev_refs dependsOn` fetches all docs and rebuilds the
+   graph on every call. For a 10k-doc repo, that's ~50ms fetch + ~5ms graph build
+   on every MCP request. The RefsAdapter has a 60s cache but it still rebuilds from
+   scratch after expiry.
+
+### What we already have
+
+- `buildDependencyGraph(docs)` — pure function, returns `Map<string, WeightedEdge[]>`
+- `pageRank(graph)` — pure function, weighted with dangling nodes
+- `connectedComponents(graph)` — pure function, BFS on undirected graph
+- `shortestPath(graph, from, to)` — pure function, BFS on directed graph
+- File watcher that detects changes and triggers incremental re-indexing
+- Storage paths at `~/.dev-agent/indexes/{hash}/` with `metadata.json` and `watcher-snapshot`
+
+---
+
+## Proposed architecture
+
+### Current flow (what we're fixing)
+
+```
+┌──────────────────────────────────────────────────────────┐
+│                  dev_map / dev_refs                       │
+│                                                          │
+│   getAll(limit: 10000)  ──────────►  Antfly              │
+│         │                            (fetch ALL docs)    │
+│         │ ~250MB for 50k docs                            │
+│         ▼                                                │
+│   buildDependencyGraph()                                 │
+│         │ rebuild from scratch every time                │
+│         ▼                                                │
+│   pageRank() / shortestPath()                            │
+└──────────────────────────────────────────────────────────┘
+
+Problem: fetches ALL docs (truncated at 10k), rebuilds graph every call
+```
+
+### Proposed flow
+
+```
+┌──────────────────────────────────────────────────────────┐
+│                  Index time (dev index)                   │
+│                                                          │
+│   scan ──► prepareDocuments ──► linearMerge ──► Antfly   │
+│                    │                                     │
+│                    │ NEW: also build graph               │
+│                    ▼                                     │
+│            buildDependencyGraph()                        │
+│                    │                                     │
+│                    ▼                                     │
+│            dependency-graph.json  (~1-5MB)               │
+│            ~/.dev-agent/indexes/{hash}/                   │
+└──────────────────────────────────────────────────────────┘
+
+┌──────────────────────────────────────────────────────────┐
+│              dev_map / dev_refs (query time)              │
+│                                                          │
+│   Load dependency-graph.json  ──► Map<string, Edge[]>    │
+│         │ ~50ms for 5MB                                  │
+│         │ (no getAll, no Antfly fetch)                   │
+│         ▼                                                │
+│   pageRank() / shortestPath() / connectedComponents()    │
+└──────────────────────────────────────────────────────────┘
+
+Fix: graph built once at index time, loaded from disk at query time
+```
+
+### Incremental updates (file watcher)
+
+```
+┌──────────────────────────────────────────────────────────┐
+│              File change detected                        │
+│                                                          │
+│   @parcel/watcher: files A, B changed; file C deleted    │
+│         │                                                │
+│         ▼                                                │
+│   scan changed files ──► batchUpsertAndDelete ──► Antfly │
+│         │                                                │
+│         │ NEW: also update graph                         │
+│         ▼                                                │
+│   Load existing graph                                    │
+│   Remove edges for changed/deleted files                 │
+│   Add edges from re-scanned callees                      │
+│   Save updated graph                                     │
+│                                                          │
+│   O(changed files), not O(all files)                     │
+└──────────────────────────────────────────────────────────┘
+```
+
+### Storage layout
+
+```
+~/.dev-agent/indexes/{hash}/
+    ├── metadata.json            (existing — index config)
+    ├── watcher-snapshot         (existing — @parcel/watcher state)
+    └── dependency-graph.json    (NEW — ~1-5MB, serialized graph)
+```
+
+### Graph JSON format
+
+```json
+{
+  "version": 1,
+  "generatedAt": "2026-03-31T20:00:00Z",
+  "nodeCount": 2214,
+  "edgeCount": 8456,
+  "graph": {
+    "src/services/search.ts": [
+      { "target": "src/vector/index.ts", "weight": 1.414 },
+      { "target": "src/scanner/types.ts", "weight": 1.0 }
+    ]
+  }
+}
+```
+
+### Consumer changes
+
+| Consumer | Before | After |
+|----------|--------|-------|
+| `dev_map` (generateCodebaseMap) | `getAll(10000)` → build graph → PageRank | Load cached graph → PageRank |
+| `dev_refs dependsOn` | `getAll(10000)` → build graph → shortestPath | Load cached graph → shortestPath |
+| `dev_map` (directory tree) | Still needs `getAll` for component counts + exports | Unchanged — separate concern |
+
+**Important:** `generateCodebaseMap` still needs `getAll` for the directory tree
+(component counts, exports). But the graph algorithms no longer depend on it.
+The directory tree already has its own limit handling. Only the graph operations
+are decoupled.
+
+### Incremental updates
+
+When the file watcher detects changes and calls `applyIncremental`:
+1. Load existing graph JSON
+2. Remove edges from changed/deleted files
+3. Add edges from newly scanned files' callees
+4. Save updated graph JSON
+
+This is O(changed files), not O(all files). The graph stays up to date without
+a full rebuild.
+
+---
+
+## Parts
+
+| Part | Description | Risk |
+|------|-------------|------|
+| [3.1](./3.1-index-time-graph.md) | Build and save dependency graph at index time | Low — additive |
+| [3.2](./3.2-load-on-demand.md) | Load cached graph in dev_map + dev_refs, remove getAll dependency | Medium — changes data flow |
+| [3.3](./3.3-incremental-graph.md) | Incremental graph updates via file watcher | Medium — new update path |
+
+---
+
+## Decisions
+
+| Decision | Rationale | Alternatives |
+|----------|-----------|-------------|
+| JSON file, not DB | Graph is small (~1-5MB), read-only between updates, JSON is debuggable | SQLite: overkill. Antfly: no server-side graph API. |
+| Build at index time | Amortizes cost. Graph only changes when index changes. | Build on demand: current approach, doesn't scale. |
+| Incremental updates | Watcher already knows which files changed. Graph update is O(changed). | Full rebuild on every change: wasteful at scale. |
+| Keep getAll for directory tree | Directory tree needs component counts and exports which aren't in the graph. | Index component counts separately: premature optimization. |
+| Version field in JSON | Allows schema evolution without migration headaches. | No version: breaks silently on format change. |
+
+---
+
+## Risk register
+
+| Risk | Likelihood | Impact | Mitigation |
+|------|-----------|--------|------------|
+| Graph JSON out of sync with index | Medium | Medium | Rebuild graph on `dev index --force`. Watcher keeps it updated incrementally. |
+| Graph file corrupted or missing | Low | Low | Fallback to current approach (getAll + build). Never crash. |
+| Graph file too large for huge repos | Low | Low | 50k nodes × 5 edges × ~50 bytes = ~12MB. Acceptable. |
+| Incremental update misses edge cases | Medium | Medium | Full rebuild always available via `dev index --force`. Incremental is best-effort. |
+| JSON parse performance | Low | Low | 5MB JSON parses in <50ms. Not a bottleneck. |
+
+---
+
+## Test strategy
+
+| Test | Priority | What it verifies |
+|------|----------|-----------------|
+| Build graph from scan results and save JSON | P0 | Index time graph generation |
+| Load graph JSON and run PageRank | P0 | Cached graph → algorithms work |
+| Missing graph file → fallback to getAll | P0 | Graceful degradation |
+| Corrupted graph file → fallback to getAll | P0 | Error handling |
+| Incremental: add file → graph updated | P0 | Watcher integration |
+| Incremental: delete file → edges removed | P0 | Watcher integration |
+| Graph version mismatch → full rebuild | P1 | Schema evolution |
+| 10k+ node graph serialization round-trip | P1 | Scale |
+| dev_map uses cached graph (not getAll) | P1 | Integration |
+| dev_refs dependsOn uses cached graph | P1 | Integration |
+
+---
+
+## Verification checklist
+
+- [ ] `dev index` produces `dependency-graph.json` alongside `metadata.json`
+- [ ] `dev_map` loads cached graph instead of calling `getAll` for PageRank
+- [ ] `dev_refs dependsOn` loads cached graph
+- [ ] Missing graph file → falls back to getAll (current behavior)
+- [ ] `dev index --force` rebuilds graph from scratch
+- [ ] File watcher change → graph incrementally updated
+- [ ] Graph JSON < 15MB for 50k-node repo
+- [ ] PageRank on cached 50k-node graph < 500ms
+- [ ] `pnpm build && pnpm test` passes
+
+---
+
+## Dependencies
+
+- Phase 2 (indexing rethink) — merged
+- MCP Phase 1 Part 1.6 (graph algorithms) — merged (pending PR #19)
+- `getStorageFilePaths` in `packages/core/src/storage/path.ts` — add `dependencyGraph` path
diff --git a/.claude/scratchpad.md b/.claude/scratchpad.md
index 8587a1d..dbdea2d 100644
--- a/.claude/scratchpad.md
+++ b/.claude/scratchpad.md
@@ -3,6 +3,7 @@
 ## Known Limitations
 
 - **`getDocsByFilePath` fetches all docs client-side (capped at 5k).** Uses `getAll(limit: 5000)` + exact path filter. Fine for single repos (dev-agent has ~2,200 docs). Won't scale to monorepos with 50k+ files. Future fix: server-side path filter in Antfly SDK.
+- **Two clones of the same repo share one index.** Storage path is hashed from git remote URL (`prosdevlab/dev-agent` → `a1b2c3d4`). Two local clones on different branches share the same index, graph cache, and watcher snapshot. Stale data possible if branches diverge significantly. Pre-existing design — not introduced by graph cache. Fix would be to include branch or worktree path in the hash.
 
 ## Open Questions
 
@@ -14,6 +15,9 @@
 - Wire `shortestPath` into `dev_refs` as a "trace path" feature (graph.ts is ready, adapter wiring is separate scope)
 - Wire `connectedComponents` into `dev_map` verbose output (graph.ts is ready)
 - Betweenness centrality — identifies bridge files between subsystems. Worth adding if agents need refactoring guidance. graphology (MIT, 1.6k stars) is the upgrade path if we need more than 3 hand-rolled algorithms.
+- **Connected components hub filtering** — widely-shared utility files (e.g., logger.ts imported by 50+ files) merge separate subsystems into one component. Filter out hub nodes (high in-degree) before computing components for better subsystem identification.
+- **PageRank at 10k+ nodes** — convergence tolerance 1e-6 may require all 100 iterations for large sparse graphs. Monitor performance. Consider reducing maxIterations or loosening tolerance for dev_map where approximate ranks are fine.
+- **getAll(limit: 10000) truncation** — medium-large monorepos may exceed 10k docs. Warning is logged but results are silently incomplete. Long-term: paginate or make limit configurable.
 - E2E tests in CI — blocked on Antfly memory requirements vs GitHub runner limits (7GB)
 - **Python language support** — tree-sitter-python WASM is ~300KB, already in tree-sitter-wasms. Needs a Python scanner (document extraction) + Python-specific pattern rules. High demand — large ecosystem. Worth a standalone plan covering: scanner, pattern rules, test fixtures, indexer integration. The PatternMatcher interface from 1.5 is language-agnostic so pattern rules slot right in; the scanner is the real work.
 - Vue/Svelte SFC support — `.vue`/`.svelte` files have embedded `<script lang="ts">` blocks. Would need script block extraction before tree-sitter parsing. Lower priority — co-located `.ts` files in those projects already get full analysis.
@@ -25,7 +29,7 @@
 
 ## Test Gaps
 
-- **RefsAdapter integration test with `traceTo`.** The `traceTo` path tracing feature is tested at the algorithm level (shortestPath in graph.test.ts) but not at the adapter level. Needs a test that constructs RefsAdapter with a mock indexer, calls `execute()` with `traceTo`, and verifies the path output format. Also needs a test for the error case when indexer is missing.
+- **RefsAdapter integration test with `dependsOn`.** The `traceTo` path tracing feature is tested at the algorithm level (shortestPath in graph.test.ts) but not at the adapter level. Needs a test that constructs RefsAdapter with a mock indexer, calls `execute()` with `traceTo`, and verifies the path output format. Also needs a test for the error case when indexer is missing.
 - **InspectAdapter integration test with PatternMatcher.** The InspectAdapter test constructs without a `patternMatcher` — the AST path is never exercised through the MCP layer. Needs a test that constructs `InspectAdapter` with `createPatternMatcher()`, mocks the search service, calls `execute()`, and verifies AST-enhanced results flow through. Requires mock search service setup — larger integration test scope.
 
 ## Notes
diff --git a/packages/core/src/map/graph.ts b/packages/core/src/map/graph.ts
index 1c62c44..45f7461 100644
--- a/packages/core/src/map/graph.ts
+++ b/packages/core/src/map/graph.ts
@@ -228,7 +228,6 @@ export function shortestPath(
   to: string
 ): string[] | null {
   if (from === to) return [from];
-  if (!graph.has(from)) return null;
 
   const visited = new Set<string>([from]);
   const parent = new Map<string, string>();
diff --git a/packages/core/src/map/index.ts b/packages/core/src/map/index.ts
index 4021187..1a35057 100644
--- a/packages/core/src/map/index.ts
+++ b/packages/core/src/map/index.ts
@@ -96,6 +96,9 @@ export async function generateCodebaseMap(
   });
   const t2 = Date.now();
   logger?.debug({ duration_ms: t2 - t1, docCount: allDocs.length }, 'Retrieved all documents');
+  if (allDocs.length >= 10000) {
+    logger?.warn('Document limit (10000) reached — map and graph results may be incomplete');
+  }
 
   // Build directory tree from documents
   const t3 = Date.now();
diff --git a/packages/mcp-server/src/adapters/built-in/refs-adapter.ts b/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
index f1eac03..5eab119 100644
--- a/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
+++ b/packages/mcp-server/src/adapters/built-in/refs-adapter.ts
@@ -71,6 +71,8 @@ export class RefsAdapter extends ToolAdapter {
   };
 
   private indexer?: RepositoryIndexer;
+  private cachedGraph?: Map<string, import('@prosdevlab/dev-agent-core').WeightedEdge[]>;
+  private cachedGraphTime = 0;
 
   constructor(config: RefsAdapterConfig) {
     super();
@@ -88,6 +90,31 @@ export class RefsAdapter extends ToolAdapter {
     });
   }
 
+  /**
+   * Get dependency graph, cached for 60 seconds.
+   * Avoids rebuilding the full graph on every dependsOn call.
+   */
+  private async getDependencyGraph(): Promise<
+    Map<string, import('@prosdevlab/dev-agent-core').WeightedEdge[]>
+  > {
+    const CACHE_TTL_MS = 60_000;
+    if (this.cachedGraph && Date.now() - this.cachedGraphTime < CACHE_TTL_MS) {
+      return this.cachedGraph;
+    }
+
+    const DOC_LIMIT = 10_000;
+    const allDocs = await this.indexer!.getAll({ limit: DOC_LIMIT });
+    if (allDocs.length >= DOC_LIMIT) {
+      console.error(
+        `[dev-agent] Warning: dependency graph hit ${DOC_LIMIT} doc limit. Results may be incomplete.`
+      );
+    }
+
+    this.cachedGraph = buildDependencyGraph(allDocs);
+    this.cachedGraphTime = Date.now();
+    return this.cachedGraph;
+  }
+
   getToolDefinition(): ToolDefinition {
     return {
       name: 'dev_refs',
@@ -116,11 +143,11 @@ export class RefsAdapter extends ToolAdapter {
             maximum: 50,
             default: this.config.defaultLimit,
           },
-          traceTo: {
+          dependsOn: {
             type: 'string',
             description:
-              "Trace the dependency chain from this function's file to a target file " +
-              '(e.g., "src/database.ts"). Follows directed call graph edges (A calls B, not B calls A).',
+              "Trace the call chain from this function's file to a target it depends on " +
+              '(e.g., "src/database.ts"). Follows call direction: A calls B, B calls C.',
           },
         },
         required: ['name'],
@@ -135,11 +162,11 @@ export class RefsAdapter extends ToolAdapter {
       return validation.error;
     }
 
-    const { name, direction, limit, traceTo } = validation.data;
+    const { name, direction, limit, dependsOn } = validation.data;
 
     try {
       const timer = startTimer();
-      context.logger.debug('Executing refs query', { name, direction, limit, traceTo });
+      context.logger.debug('Executing refs query', { name, direction, limit, dependsOn });
 
       // First, find the target component
       const searchResults = await this.searchService.search(name, { limit: 10 });
@@ -157,27 +184,26 @@ export class RefsAdapter extends ToolAdapter {
         };
       }
 
-      // Handle traceTo — find shortest dependency path
-      if (traceTo && !this.indexer) {
+      // Handle dependsOn — find shortest dependency path
+      if (dependsOn && !this.indexer) {
         return {
           success: false,
           error: {
             code: 'INDEX_REQUIRED',
-            message: 'Path tracing requires a repository index.',
+            message: 'Dependency path tracing requires a repository index.',
             suggestion: 'Run "dev index" to index the repository first.',
           },
         };
       }
 
-      if (traceTo && this.indexer) {
+      if (dependsOn && this.indexer) {
         const sourceFile = (target.metadata.path as string) || '';
-        const allDocs = await this.indexer.getAll({ limit: 10000 });
-        const graph = buildDependencyGraph(allDocs);
-        const path = shortestPath(graph, sourceFile, traceTo);
+        const graph = await this.getDependencyGraph();
+        const path = shortestPath(graph, sourceFile, dependsOn);
 
         const content = path
-          ? `## Dependency Path: ${sourceFile} → ${traceTo}\n\n${path.join(' → ')}\n\n**${path.length - 1} hop${path.length - 1 === 1 ? '' : 's'}**`
-          : `## No Path Found\n\nNo dependency chain from \`${sourceFile}\` to \`${traceTo}\`.\nThese files may be in separate subsystems.`;
+          ? `## Dependency Path: ${sourceFile} → ${dependsOn}\n\n${path.join(' → ')}\n\n**${path.length - 1} hop${path.length - 1 === 1 ? '' : 's'}**`
+          : `## No Path Found\n\nNo dependency chain from \`${sourceFile}\` to \`${dependsOn}\`.\nThese files may be in separate subsystems.`;
 
         return {
           success: true,
diff --git a/packages/mcp-server/src/schemas/index.ts b/packages/mcp-server/src/schemas/index.ts
index 15de295..ca5e993 100644
--- a/packages/mcp-server/src/schemas/index.ts
+++ b/packages/mcp-server/src/schemas/index.ts
@@ -63,7 +63,7 @@ export const RefsArgsSchema = z
     name: z.string().min(1, 'Name must be a non-empty string'),
     direction: z.enum(['callees', 'callers', 'both']).default('both'),
     limit: z.number().int().min(1).max(50).default(20),
-    traceTo: z.string().optional(),
+    dependsOn: z.string().optional(),
   })
   .strict();
 
diff --git a/website/content/docs/tools/dev-refs.mdx b/website/content/docs/tools/dev-refs.mdx
index 4b02db0..1b2a51d 100644
--- a/website/content/docs/tools/dev-refs.mdx
+++ b/website/content/docs/tools/dev-refs.mdx
@@ -17,7 +17,7 @@ This is invaluable for understanding impact of changes and navigating unfamiliar
 | `name` | string | required | Symbol name to query |
 | `direction` | string | `"both"` | `"callers"`, `"callees"`, or `"both"` |
 | `limit` | number | `10` | Maximum results |
-| `traceTo` | string | — | Trace dependency chain to a target file (e.g., `"src/database.ts"`) |
+| `dependsOn` | string | — | Trace the call chain to a file this function depends on (e.g., `"src/database.ts"`) |
 | `tokenBudget` | number | `2000` | Max tokens for output |
 
 ## Examples
@@ -92,7 +92,7 @@ What does the validateToken function call?
 ```json
 {
   "name": "authenticate",
-  "traceTo": "src/database.ts"
+  "dependsOn": "src/database.ts"
 }
 ```