safishamsi · zoedecker · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/graphify/__main__.py b/graphify/__main__.py
@@ -830,6 +830,8 @@ def main() -> None:
         print("  watch <path>            watch a folder and rebuild the graph on code changes")
         print("  update <path>           re-extract code files and update the graph (no LLM needed)")
         print("  cluster-only <path>     rerun clustering on an existing graph.json and regenerate report")
+        print("    --hierarchical          run multi-resolution Leiden (3 levels) + community summaries")
+        print("    --summary-backend B     summary backend: extractive (default), ollama, claude")
         print("  query \"<question>\"       BFS traversal of graph.json for a question")
         print("    --dfs                   use depth-first instead of breadth-first")
         print("    --budget N              cap output at N tokens (default 2000)")
@@ -1199,6 +1201,11 @@ def main() -> None:
         if not graph_json.exists():
             print(f"error: no graph found at {graph_json} — run /graphify first", file=sys.stderr)
             sys.exit(1)
+        use_hierarchical = "--hierarchical" in sys.argv
+        summary_backend = "extractive"
+        for i, a in enumerate(sys.argv):
+            if a == "--summary-backend" and i + 1 < len(sys.argv):
+                summary_backend = sys.argv[i + 1]
         from networkx.readwrite import json_graph as _jg
         from graphify.build import build_from_json
         from graphify.cluster import cluster, score_all
@@ -1221,7 +1228,19 @@ def main() -> None:
                           {}, tokens, str(watch_path), suggested_questions=questions)
         out = watch_path / "graphify-out"
         (out / "GRAPH_REPORT.md").write_text(report, encoding="utf-8")
-        to_json(G, communities, str(out / "graph.json"))
+        # Hierarchical clustering + summaries (opt-in)
+        hierarchy = None
+        summaries = None
+        if use_hierarchical:
+            from graphify.cluster import hierarchical_cluster
+            from graphify.summarize import summarize_all_communities
+            print(f"Running hierarchical clustering (3 levels)...")
+            hierarchy = hierarchical_cluster(G)
+            print(f"Generating community summaries (backend={summary_backend})...")
+            summaries = summarize_all_communities(G, communities, backend=summary_backend)
+            print(f"  {len(hierarchy)} hierarchy levels, {len(summaries)} summaries generated")
+        to_json(G, communities, str(out / "graph.json"),
+                community_hierarchy=hierarchy, community_summaries=summaries)
         print(f"Done — {len(communities)} communities. GRAPH_REPORT.md and graph.json updated.")
 
     elif cmd == "update":

diff --git a/graphify/cluster.py b/graphify/cluster.py
@@ -135,3 +135,84 @@ def cohesion_score(G: nx.Graph, community_nodes: list[str]) -> float:
 
 def score_all(G: nx.Graph, communities: dict[int, list[str]]) -> dict[int, float]:
     return {cid: cohesion_score(G, nodes) for cid, nodes in communities.items()}
+
+
+def _partition_at_resolution(G: nx.Graph, resolution: float) -> dict[str, int]:
+    """Run community detection at a specific resolution parameter.
+
+    Higher resolution → more communities (finer granularity).
+    Lower resolution → fewer communities (coarser granularity).
+
+    Tries Leiden (graspologic) first, falls back to Louvain (networkx).
+    """
+    try:
+        from graspologic.partition import leiden
+        old_stderr = sys.stderr
+        try:
+            sys.stderr = io.StringIO()
+            with _suppress_output():
+                result = leiden(G, resolution=resolution)
+        finally:
+            sys.stderr = old_stderr
+        return result
+    except ImportError:
+        pass
+
+    # Fallback: networkx louvain with resolution parameter
+    kwargs: dict = {"seed": 42, "threshold": 1e-4, "resolution": resolution}
+    if "max_level" in inspect.signature(nx.community.louvain_communities).parameters:
+        kwargs["max_level"] = 10
+    communities = nx.community.louvain_communities(G, **kwargs)
+    return {node: cid for cid, nodes in enumerate(communities) for node in nodes}
+
+
+def hierarchical_cluster(
+    G: nx.Graph,
+    resolutions: list[float] | None = None,
+) -> dict[int, dict[int, list[str]]]:
+    """Run Leiden community detection at multiple resolution levels.
+
+    Returns {level: {community_id: [node_ids]}} where level 0 is the coarsest
+    (fewest communities) and higher levels are progressively finer.
+
+    Default resolutions are [0.5, 1.0, 2.0] — coarse, medium, fine.
+
+    Accepts directed or undirected graphs. DiGraphs are converted to undirected
+    internally since Louvain/Leiden require undirected input.
+    """
+    if resolutions is None:
+        resolutions = [0.5, 1.0, 2.0]
+
+    if G.number_of_nodes() == 0:
+        return {level: {} for level in range(len(resolutions))}
+    if G.is_directed():
+        G = G.to_undirected()
+    if G.number_of_edges() == 0:
+        singleton = {i: [n] for i, n in enumerate(sorted(G.nodes))}
+        return {level: dict(singleton) for level in range(len(resolutions))}
+
+    # Handle isolates separately (same as cluster())
+    isolates = [n for n in G.nodes() if G.degree(n) == 0]
+    connected_nodes = [n for n in G.nodes() if G.degree(n) > 0]
+    connected = G.subgraph(connected_nodes)
+
+    hierarchy: dict[int, dict[int, list[str]]] = {}
+
+    for level, res in enumerate(sorted(resolutions)):
+        raw: dict[int, list[str]] = {}
+        if connected.number_of_nodes() > 0:
+            partition = _partition_at_resolution(connected, res)
+            for node, cid in partition.items():
+                raw.setdefault(cid, []).append(node)
+
+        # Each isolate becomes its own single-node community
+        next_cid = max(raw.keys(), default=-1) + 1
+        for node in isolates:
+            raw[next_cid] = [node]
+            next_cid += 1
+
+        # Re-index by size descending for deterministic ordering
+        communities_list = sorted(raw.values(), key=len, reverse=True)
+        hierarchy[level] = {i: sorted(nodes) for i, nodes in enumerate(communities_list)}
+
+    return hierarchy
diff --git a/graphify/export.py b/graphify/export.py
@@ -288,7 +288,14 @@ def attach_hyperedges(G: nx.Graph, hyperedges: list) -> None:
     G.graph["hyperedges"] = existing
 
 
-def to_json(G: nx.Graph, communities: dict[int, list[str]], output_path: str) -> None:
+def to_json(
+    G: nx.Graph,
+    communities: dict[int, list[str]],
+    output_path: str,
+    *,
+    community_hierarchy: dict[int, dict[int, list[str]]] | None = None,
+    community_summaries: dict[int, str] | None = None,
+) -> None:
     node_community = _node_community_map(communities)
     try:
         data = json_graph.node_link_data(G, edges="links")
@@ -302,6 +309,11 @@ def to_json(G: nx.Graph, communities: dict[int, list[str]], output_path: str) ->
             conf = link.get("confidence", "EXTRACTED")
             link["confidence_score"] = _CONFIDENCE_SCORE_DEFAULTS.get(conf, 1.0)
     data["hyperedges"] = getattr(G, "graph", {}).get("hyperedges", [])
+    # Hierarchical community data (opt-in via --hierarchical flag)
+    if community_hierarchy is not None:
+        data["community_hierarchy"] = {str(k): v for k, v in community_hierarchy.items()}
+    if community_summaries is not None:
+        data["community_summaries"] = {str(k): v for k, v in community_summaries.items()}
     with open(output_path, "w", encoding="utf-8") as f:
         json.dump(data, f, indent=2)
 

diff --git a/graphify/serve.py b/graphify/serve.py
@@ -45,6 +45,39 @@ def _strip_diacritics(text: str) -> str:
     return "".join(c for c in nfkd if not unicodedata.combining(c))
 
 
+def _community_summaries_from_data(graph_path: str) -> dict[int, str]:
+    """Load community_summaries from graph.json if present."""
+    try:
+        data = json.loads(Path(graph_path).resolve().read_text(encoding="utf-8"))
+        raw = data.get("community_summaries", {})
+        return {int(k): v for k, v in raw.items()}
+    except Exception:
+        return {}
+
+
+def _community_hierarchy_from_data(graph_path: str) -> dict[int, dict[int, list[str]]]:
+    """Load community_hierarchy from graph.json if present."""
+    try:
+        data = json.loads(Path(graph_path).resolve().read_text(encoding="utf-8"))
+        raw = data.get("community_hierarchy", {})
+        return {int(level): {int(cid): nodes for cid, nodes in comms.items()} for level, comms in raw.items()}
+    except Exception:
+        return {}
+
+
+def _community_relevance_score(summary: str, terms: list[str]) -> float:
+    """Score a community summary against query terms using simple term overlap.
+
+    Returns a float in [0, 1] based on the fraction of query terms found in
+    the summary text (case-insensitive).
+    """
+    if not terms or not summary:
+        return 0.0
+    summary_lower = summary.lower()
+    matches = sum(1 for t in terms if t in summary_lower)
+    return matches / len(terms)
+
+
 def _score_nodes(G: nx.Graph, terms: list[str]) -> list[tuple[float, str]]:
     scored = []
     norm_terms = [_strip_diacritics(t).lower() for t in terms]
@@ -158,6 +191,8 @@ def serve(graph_path: str = "graphify-out/graph.json") -> None:
 
     G = _load_graph(graph_path)
     communities = _communities_from_graph(G)
+    community_summaries = _community_summaries_from_data(graph_path)
+    community_hierarchy = _community_hierarchy_from_data(graph_path)
 
     server = Server("graphify")
 
@@ -232,6 +267,19 @@ async def list_tools() -> list[types.Tool]:
                     "required": ["source", "target"],
                 },
             ),
+            types.Tool(
+                name="list_communities",
+                description="List all communities with their summaries. Use to browse the graph structure before querying.",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "level": {
+                            "type": "integer",
+                            "description": "Hierarchy level (0=coarse, higher=finer). Omit for flat communities.",
+                        },
+                    },
+                },
+            ),
         ]
 
     def _tool_query_graph(arguments: dict) -> str:
@@ -240,6 +288,53 @@ def _tool_query_graph(arguments: dict) -> str:
         depth = min(int(arguments.get("depth", 3)), 6)
         budget = int(arguments.get("token_budget", 2000))
         terms = [t.lower() for t in question.split() if len(t) > 2]
+
+        # Community-pruned query: if summaries exist, score communities first
+        # and restrict traversal to nodes in top-K relevant communities
+        if community_summaries and terms:
+            scored_communities = [
+                (_community_relevance_score(summary, terms), cid)
+                for cid, summary in community_summaries.items()
+            ]
+            scored_communities.sort(reverse=True)
+            # Take communities with score > 0, up to top 3
+            relevant_cids = [
+                cid for score, cid in scored_communities
+                if score > 0
+            ][:3]
+
+            if relevant_cids:
+                # Restrict to nodes in relevant communities
+                relevant_nodes = set()
+                for cid in relevant_cids:
+                    relevant_nodes.update(communities.get(cid, []))
+                # Score only within relevant community nodes
+                subgraph = G.subgraph(relevant_nodes)
+                scored = _score_nodes(subgraph, terms)
+                start_nodes = [nid for _, nid in scored[:3]]
+                if not start_nodes:
+                    # Fall back to community hub nodes (highest degree in each community)
+                    for cid in relevant_cids:
+                        c_nodes = communities.get(cid, [])
+                        if c_nodes:
+                            hub = max(c_nodes, key=lambda n: G.degree(n))
+                            start_nodes.append(hub)
+                            if len(start_nodes) >= 3:
+                                break
+                if start_nodes:
+                    nodes, edges = _dfs(G, start_nodes, depth) if mode == "dfs" else _bfs(G, start_nodes, depth)
+                    # Constrain output to relevant community scope
+                    nodes = nodes & relevant_nodes
+                    matched_summaries = [f"  Community {cid}: {community_summaries[cid]}" for cid in relevant_cids if cid in community_summaries]
+                    header = (
+                        f"Traversal: {mode.upper()} depth={depth} | "
+                        f"Pruned to {len(relevant_cids)} communities | "
+                        f"{len(nodes)} nodes found\n"
+                        f"Matched communities:\n" + "\n".join(matched_summaries) + "\n\n"
+                    )
+                    return header + _subgraph_to_text(G, nodes, edges, budget)
+
+        # Fallback: original behavior (no community summaries or no matches)
         scored = _score_nodes(G, terms)
         start_nodes = [nid for _, nid in scored[:3]]
         if not start_nodes:
@@ -338,6 +433,30 @@ def _tool_shortest_path(arguments: dict) -> str:
             segments.append(f"--{rel}{conf_str}--> {G.nodes[v].get('label', v)}")
         return f"Shortest path ({hops} hops):\n  " + " ".join(segments)
 
+    def _tool_list_communities(arguments: dict) -> str:
+        level = arguments.get("level")
+        if level is not None and community_hierarchy:
+            level = int(level)
+            level_data = community_hierarchy.get(level)
+            if level_data is None:
+                available = sorted(community_hierarchy.keys())
+                return f"Hierarchy level {level} not found. Available levels: {available}"
+            lines = [f"Communities at hierarchy level {level} ({len(level_data)} communities):"]
+            for cid, nodes in sorted(level_data.items(), key=lambda x: -len(x[1])):
+                top_labels = [G.nodes[n].get("label", n) for n in nodes[:3] if n in G]
+                lines.append(f"  Community {cid} ({len(nodes)} nodes): {', '.join(top_labels)}...")
+            return "\n".join(lines)
+
+        # Flat communities with summaries
+        lines = [f"Communities ({len(communities)} total):"]
+        for cid, nodes in sorted(communities.items(), key=lambda x: -len(x[1])):
+            summary = community_summaries.get(cid, "")
+            summary_str = f" — {summary}" if summary else ""
+            lines.append(f"  Community {cid} ({len(nodes)} nodes){summary_str}")
+        if community_hierarchy:
+            lines.append(f"\nHierarchy available ({len(community_hierarchy)} levels). Use level=0..{len(community_hierarchy)-1} to browse.")
+        return "\n".join(lines)
+
     _handlers = {
         "query_graph": _tool_query_graph,
         "get_node": _tool_get_node,
@@ -346,6 +465,7 @@ def _tool_shortest_path(arguments: dict) -> str:
         "god_nodes": _tool_god_nodes,
         "graph_stats": _tool_graph_stats,
         "shortest_path": _tool_shortest_path,
+        "list_communities": _tool_list_communities,
     }
 
     @server.call_tool()