safishamsi · akhiljns · Apr 16, 2026
diff --git a/.opencode/plugins/graphify.js b/.opencode/plugins/graphify.js
@@ -0,0 +1,22 @@
+// graphify OpenCode plugin
+// Injects a knowledge graph reminder before bash tool calls when the graph exists.
+import { existsSync } from "fs";
+import { join } from "path";
+
+export const GraphifyPlugin = async ({ directory }) => {
+  let reminded = false;
+
+  return {
+    "tool.execute.before": async (input, output) => {
+      if (reminded) return;
+      if (!existsSync(join(directory, "graphify-out", "graph.json"))) return;
+
+      if (input.tool === "bash") {
+        output.args.command =
+          'echo "[graphify] Knowledge graph available. Read graphify-out/GRAPH_REPORT.md for god nodes and architecture context before searching files." && ' +
+          output.args.command;
+        reminded = true;
+      }
+    },
+  };
+};
diff --git a/graphify/__main__.py b/graphify/__main__.py
@@ -899,6 +899,7 @@ def main() -> None:
         print("Commands:")
         print("  install [--platform P]  copy skill to platform config dir (claude|windows|codex|opencode|aider|claw|droid|trae|trae-cn|gemini|cursor|antigravity|hermes|kiro)")
         print("  path \"A\" \"B\"            shortest path between two nodes in graph.json")
+        print("    --weighted              use Dijkstra with confidence-based edge weights")
         print("    --graph <path>          path to graph.json (default graphify-out/graph.json)")
         print("  explain \"X\"             plain-language explanation of a node and its neighbors")
         print("    --graph <path>          path to graph.json (default graphify-out/graph.json)")
@@ -911,6 +912,7 @@ def main() -> None:
         print("  cluster-only <path>     rerun clustering on an existing graph.json and regenerate report")
         print("  query \"<question>\"       BFS traversal of graph.json for a question")
         print("    --dfs                   use depth-first instead of breadth-first")
+        print("    --weighted              priority-queue BFS preferring high-confidence edges")
         print("    --budget N              cap output at N tokens (default 2000)")
         print("    --graph <path>          path to graph.json (default graphify-out/graph.json)")
         print("  save-result             save a Q&A result to graphify-out/memory/ for graph feedback loop")
@@ -1074,13 +1076,14 @@ def main() -> None:
             sys.exit(1)
     elif cmd == "query":
         if len(sys.argv) < 3:
-            print("Usage: graphify query \"<question>\" [--dfs] [--budget N] [--graph path]", file=sys.stderr)
+            print("Usage: graphify query \"<question>\" [--dfs] [--weighted] [--budget N] [--graph path]", file=sys.stderr)
             sys.exit(1)
-        from graphify.serve import _score_nodes, _bfs, _dfs, _subgraph_to_text
+        from graphify.serve import _score_nodes, _bfs, _dfs, _weighted_bfs, _subgraph_to_text
         from graphify.security import sanitize_label
         from networkx.readwrite import json_graph
         question = sys.argv[2]
         use_dfs = "--dfs" in sys.argv
+        use_weighted = "--weighted" in sys.argv
         budget = 2000
         graph_path = "graphify-out/graph.json"
         args = sys.argv[3:]
@@ -1128,7 +1131,12 @@ def main() -> None:
             print("No matching nodes found.")
             sys.exit(0)
         start = [nid for _, nid in scored[:5]]
-        nodes, edges = (_dfs if use_dfs else _bfs)(G, start, depth=2)
+        if use_weighted:
+            nodes, edges = _weighted_bfs(G, start, depth=2)
+        elif use_dfs:
+            nodes, edges = _dfs(G, start, depth=2)
+        else:
+            nodes, edges = _bfs(G, start, depth=2)
         print(_subgraph_to_text(G, nodes, edges, token_budget=budget))
     elif cmd == "save-result":
         # graphify save-result --question Q --answer A --type T [--nodes N1 N2 ...]
@@ -1151,13 +1159,14 @@ def main() -> None:
         print(f"Saved to {out}")
     elif cmd == "path":
         if len(sys.argv) < 4:
-            print("Usage: graphify path \"<source>\" \"<target>\" [--graph path]", file=sys.stderr)
+            print("Usage: graphify path \"<source>\" \"<target>\" [--weighted] [--graph path]", file=sys.stderr)
             sys.exit(1)
-        from graphify.serve import _score_nodes
+        from graphify.serve import _score_nodes, _weighted_shortest_path
         from networkx.readwrite import json_graph
         import networkx as _nx
         source_label = sys.argv[2]
         target_label = sys.argv[3]
+        use_weighted = "--weighted" in sys.argv
         graph_path = "graphify-out/graph.json"
         args = sys.argv[4:]
         for i, a in enumerate(args):
@@ -1181,23 +1190,34 @@ def main() -> None:
             print(f"No node matching '{target_label}' found.", file=sys.stderr)
             sys.exit(1)
         src_nid, tgt_nid = src_scored[0][1], tgt_scored[0][1]
-        try:
-            path_nodes = _nx.shortest_path(G, src_nid, tgt_nid)
-        except (_nx.NetworkXNoPath, _nx.NodeNotFound):
-            print(f"No path found between '{source_label}' and '{target_label}'.")
-            sys.exit(0)
+        if use_weighted:
+            path_nodes = _weighted_shortest_path(G, src_nid, tgt_nid)
+            if path_nodes is None:
+                print(f"No path found between '{source_label}' and '{target_label}'.")
+                sys.exit(0)
+        else:
+            try:
+                path_nodes = _nx.shortest_path(G, src_nid, tgt_nid)
+            except (_nx.NetworkXNoPath, _nx.NodeNotFound):
+                print(f"No path found between '{source_label}' and '{target_label}'.")
+                sys.exit(0)
         hops = len(path_nodes) - 1
+        mode_label = "Weighted shortest path (Dijkstra)" if use_weighted else "Shortest path"
         segments = []
         for i in range(len(path_nodes) - 1):
             u, v = path_nodes[i], path_nodes[i + 1]
             edata = G.edges[u, v]
             rel = edata.get("relation", "")
             conf = edata.get("confidence", "")
-            conf_str = f" [{conf}]" if conf else ""
+            cost = edata.get("cost", 1.0)
+            conf_str = f" [{conf}"
+            if use_weighted:
+                conf_str += f" cost={cost:.2f}"
+            conf_str += "]"
             if i == 0:
                 segments.append(G.nodes[u].get("label", u))
             segments.append(f"--{rel}{conf_str}--> {G.nodes[v].get('label', v)}")
-        print(f"Shortest path ({hops} hops):\n  " + " ".join(segments))
+        print(f"{mode_label} ({hops} hops):\n  " + " ".join(segments))
 
     elif cmd == "explain":
         if len(sys.argv) < 3:

diff --git a/graphify/build.py b/graphify/build.py
@@ -55,6 +55,10 @@ def build_from_json(extraction: dict, *, directed: bool = False) -> nx.Graph:
         if src not in node_set or tgt not in node_set:
             continue  # skip edges to external/stdlib nodes - expected, not an error
         attrs = {k: v for k, v in edge.items() if k not in ("source", "target")}
+        # Derive traversal cost from confidence_score: high confidence = low cost.
+        # Used by weighted Dijkstra and priority-queue BFS.
+        cs = float(attrs.get("confidence_score", 1.0) or 1.0)
+        attrs["cost"] = 1.0 / max(cs, 0.01)
         # Preserve original edge direction - undirected graphs lose it otherwise,
         # causing display functions to show edges backwards.
         attrs["_src"] = src

diff --git a/graphify/cluster.py b/graphify/cluster.py
@@ -18,12 +18,16 @@ def _suppress_output():
     return contextlib.redirect_stdout(io.StringIO())
 
 
-def _partition(G: nx.Graph) -> dict[str, int]:
+def _partition(G: nx.Graph, *, weighted: bool = False) -> dict[str, int]:
     """Run community detection. Returns {node_id: community_id}.
 
     Tries Leiden (graspologic) first — best quality.
     Falls back to Louvain (built into networkx) if graspologic is not installed.
 
+    When weighted=True, uses the ``confidence_score`` edge attribute so that
+    EXTRACTED edges (1.0) bind communities more tightly than INFERRED (0.6-0.9)
+    or AMBIGUOUS (0.1-0.3) edges.
+
     Output from graspologic is suppressed to prevent ANSI escape codes
     from corrupting terminal scroll buffers on Windows PowerShell 5.1.
     """
@@ -35,7 +39,15 @@ def _partition(G: nx.Graph) -> dict[str, int]:
         try:
             sys.stderr = io.StringIO()
             with _suppress_output():
-                result = leiden(G)
+                # graspologic leiden reads edge weights from the 'weight' attr
+                # by default. Copy confidence_score into 'weight' if weighted.
+                if weighted:
+                    Gw = G.copy()
+                    for u, v, d in Gw.edges(data=True):
+                        d["weight"] = float(d.get("confidence_score", 1.0) or 1.0)
+                    result = leiden(Gw)
+                else:
+                    result = leiden(G)
         finally:
             sys.stderr = old_stderr
         return result
@@ -48,21 +60,32 @@ def _partition(G: nx.Graph) -> dict[str, int]:
     kwargs: dict = {"seed": 42, "threshold": 1e-4}
     if "max_level" in inspect.signature(nx.community.louvain_communities).parameters:
         kwargs["max_level"] = 10
-    communities = nx.community.louvain_communities(G, **kwargs)
+    if weighted:
+        # Copy confidence_score into 'weight' attr for louvain
+        Gw = G.copy()
+        for u, v, d in Gw.edges(data=True):
+            d["weight"] = float(d.get("confidence_score", 1.0) or 1.0)
+        kwargs["weight"] = "weight"
+        communities = nx.community.louvain_communities(Gw, **kwargs)
+    else:
+        communities = nx.community.louvain_communities(G, **kwargs)
     return {node: cid for cid, nodes in enumerate(communities) for node in nodes}
 
 
 _MAX_COMMUNITY_FRACTION = 0.25   # communities larger than 25% of graph get split
 _MIN_SPLIT_SIZE = 10             # only split if community has at least this many nodes
 
 
-def cluster(G: nx.Graph) -> dict[int, list[str]]:
+def cluster(G: nx.Graph, *, weighted: bool = False) -> dict[int, list[str]]:
     """Run Leiden community detection. Returns {community_id: [node_ids]}.
 
     Community IDs are stable across runs: 0 = largest community after splitting.
     Oversized communities (> 25% of graph nodes, min 10) are split by running
     a second Leiden pass on the subgraph.
 
+    When weighted=True, uses confidence_score as edge weight so EXTRACTED edges
+    bind communities more tightly than INFERRED or AMBIGUOUS edges.
+
     Accepts directed or undirected graphs. DiGraphs are converted to undirected
     internally since Louvain/Leiden require undirected input.
     """
@@ -80,7 +103,7 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
 
     raw: dict[int, list[str]] = {}
     if connected.number_of_nodes() > 0:
-        partition = _partition(connected)
+        partition = _partition(connected, weighted=weighted)
         for node, cid in partition.items():
             raw.setdefault(cid, []).append(node)
 
@@ -95,7 +118,7 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
     final_communities: list[list[str]] = []
     for nodes in raw.values():
         if len(nodes) > max_size:
-            final_communities.extend(_split_community(G, nodes))
+            final_communities.extend(_split_community(G, nodes, weighted=weighted))
         else:
             final_communities.append(nodes)
 
@@ -104,14 +127,14 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
     return {i: sorted(nodes) for i, nodes in enumerate(final_communities)}
 
 
-def _split_community(G: nx.Graph, nodes: list[str]) -> list[list[str]]:
+def _split_community(G: nx.Graph, nodes: list[str], *, weighted: bool = False) -> list[list[str]]:
     """Run a second Leiden pass on a community subgraph to split it further."""
     subgraph = G.subgraph(nodes)
     if subgraph.number_of_edges() == 0:
         # No edges - split into individual nodes
         return [[n] for n in sorted(nodes)]
     try:
-        sub_partition = _partition(subgraph)
+        sub_partition = _partition(subgraph, weighted=weighted)
         sub_communities: dict[int, list[str]] = {}
         for node, cid in sub_partition.items():
             sub_communities.setdefault(cid, []).append(node)

diff --git a/graphify/serve.py b/graphify/serve.py
@@ -89,6 +89,44 @@ def _dfs(G: nx.Graph, start_nodes: list[str], depth: int) -> tuple[set[str], lis
     return visited, edges_seen
 
 
+def _weighted_bfs(G: nx.Graph, start_nodes: list[str], depth: int) -> tuple[set[str], list[tuple]]:
+    """Priority-queue BFS: explores low-cost (high-confidence) edges first.
+
+    Uses the ``cost`` attribute set by build.py (1/confidence_score).
+    Falls back to cost=1.0 for edges without the attribute.
+    """
+    import heapq
+    visited: set[str] = set()
+    edges_seen: list[tuple] = []
+    # (cumulative_cost, hop_count, node_id)
+    heap: list[tuple[float, int, str]] = [(0.0, 0, n) for n in start_nodes]
+    while heap:
+        cost, hops, node = heapq.heappop(heap)
+        if node in visited or hops > depth:
+            continue
+        visited.add(node)
+        for neighbor in G.neighbors(node):
+            if neighbor not in visited:
+                edata = G[node][neighbor]
+                if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):
+                    edata = next(iter(edata.values()), {})
+                edge_cost = float(edata.get("cost", 1.0))
+                heapq.heappush(heap, (cost + edge_cost, hops + 1, neighbor))
+                edges_seen.append((node, neighbor))
+    return visited, edges_seen
+
+
+def _weighted_shortest_path(G: nx.Graph, src: str, tgt: str, max_hops: int = 8) -> list[str] | None:
+    """Dijkstra shortest path using the ``cost`` edge attribute."""
+    try:
+        path = nx.dijkstra_path(G, src, tgt, weight="cost")
+    except (nx.NetworkXNoPath, nx.NodeNotFound):
+        return None
+    if len(path) - 1 > max_hops:
+        return None
+    return path
+
+
 def _subgraph_to_text(G: nx.Graph, nodes: set[str], edges: list[tuple], token_budget: int = 2000) -> str:
     """Render subgraph as text, cutting at token_budget (approx 3 chars/token)."""
     char_budget = token_budget * 3
@@ -171,8 +209,8 @@ async def list_tools() -> list[types.Tool]:
                     "type": "object",
                     "properties": {
                         "question": {"type": "string", "description": "Natural language question or keyword search"},
-                        "mode": {"type": "string", "enum": ["bfs", "dfs"], "default": "bfs",
-                                 "description": "bfs=broad context, dfs=trace a specific path"},
+                        "mode": {"type": "string", "enum": ["bfs", "dfs", "weighted"], "default": "bfs",
+                                 "description": "bfs=broad context, dfs=trace a specific path, weighted=priority-queue BFS preferring high-confidence edges"},
                         "depth": {"type": "integer", "default": 3, "description": "Traversal depth (1-6)"},
                         "token_budget": {"type": "integer", "default": 2000, "description": "Max output tokens"},
                     },
@@ -228,6 +266,7 @@ async def list_tools() -> list[types.Tool]:
                         "source": {"type": "string", "description": "Source concept label or keyword"},
                         "target": {"type": "string", "description": "Target concept label or keyword"},
                         "max_hops": {"type": "integer", "default": 8, "description": "Maximum hops to consider"},
+                        "weighted": {"type": "boolean", "default": False, "description": "Use Dijkstra with confidence-based weights (high confidence = low cost)"},
                     },
                     "required": ["source", "target"],
                 },
@@ -244,7 +283,12 @@ def _tool_query_graph(arguments: dict) -> str:
         start_nodes = [nid for _, nid in scored[:3]]
         if not start_nodes:
             return "No matching nodes found."
-        nodes, edges = _dfs(G, start_nodes, depth) if mode == "dfs" else _bfs(G, start_nodes, depth)
+        if mode == "weighted":
+            nodes, edges = _weighted_bfs(G, start_nodes, depth)
+        elif mode == "dfs":
+            nodes, edges = _dfs(G, start_nodes, depth)
+        else:
+            nodes, edges = _bfs(G, start_nodes, depth)
         header = f"Traversal: {mode.upper()} depth={depth} | Start: {[G.nodes[n].get('label', n) for n in start_nodes]} | {len(nodes)} nodes found\n\n"
         return header + _subgraph_to_text(G, nodes, edges, budget)
 
@@ -319,24 +363,35 @@ def _tool_shortest_path(arguments: dict) -> str:
             return f"No node matching target '{arguments['target']}' found."
         src_nid, tgt_nid = src_scored[0][1], tgt_scored[0][1]
         max_hops = int(arguments.get("max_hops", 8))
-        try:
-            path_nodes = nx.shortest_path(G, src_nid, tgt_nid)
-        except (nx.NetworkXNoPath, nx.NodeNotFound):
-            return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
+        use_weighted = arguments.get("weighted", False)
+        if use_weighted:
+            path_nodes = _weighted_shortest_path(G, src_nid, tgt_nid, max_hops)
+            if path_nodes is None:
+                return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
+        else:
+            try:
+                path_nodes = nx.shortest_path(G, src_nid, tgt_nid)
+            except (nx.NetworkXNoPath, nx.NodeNotFound):
+                return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
+            if len(path_nodes) - 1 > max_hops:
+                return f"Path exceeds max_hops={max_hops} ({len(path_nodes) - 1} hops found)."
         hops = len(path_nodes) - 1
-        if hops > max_hops:
-            return f"Path exceeds max_hops={max_hops} ({hops} hops found)."
+        mode_label = "Weighted shortest path (Dijkstra)" if use_weighted else "Shortest path"
         segments = []
         for i in range(len(path_nodes) - 1):
             u, v = path_nodes[i], path_nodes[i + 1]
             edata = G.edges[u, v]
             rel = edata.get("relation", "")
             conf = edata.get("confidence", "")
-            conf_str = f" [{conf}]" if conf else ""
+            cost = edata.get("cost", 1.0)
+            conf_str = f" [{conf}"
+            if use_weighted:
+                conf_str += f" cost={cost:.2f}"
+            conf_str += "]"
             if i == 0:
                 segments.append(G.nodes[u].get("label", u))
             segments.append(f"--{rel}{conf_str}--> {G.nodes[v].get('label', v)}")
-        return f"Shortest path ({hops} hops):\n  " + " ".join(segments)
+        return f"{mode_label} ({hops} hops):\n  " + " ".join(segments)
 
     _handlers = {
         "query_graph": _tool_query_graph,

diff --git a/opencode.json b/opencode.json
@@ -0,0 +1,5 @@
+{
+  "plugin": [
+    ".opencode/plugins/graphify.js"
+  ]
+}