Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .opencode/plugins/graphify.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// graphify OpenCode plugin
// Injects a knowledge graph reminder before bash tool calls when the graph exists.
import { existsSync } from "fs";
import { join } from "path";

export const GraphifyPlugin = async ({ directory }) => {
let reminded = false;

return {
"tool.execute.before": async (input, output) => {
if (reminded) return;
if (!existsSync(join(directory, "graphify-out", "graph.json"))) return;

if (input.tool === "bash") {
output.args.command =
'echo "[graphify] Knowledge graph available. Read graphify-out/GRAPH_REPORT.md for god nodes and architecture context before searching files." && ' +
output.args.command;
reminded = true;
}
},
};
};
44 changes: 32 additions & 12 deletions graphify/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ def main() -> None:
print("Commands:")
print(" install [--platform P] copy skill to platform config dir (claude|windows|codex|opencode|aider|claw|droid|trae|trae-cn|gemini|cursor|antigravity|hermes|kiro)")
print(" path \"A\" \"B\" shortest path between two nodes in graph.json")
print(" --weighted use Dijkstra with confidence-based edge weights")
print(" --graph <path> path to graph.json (default graphify-out/graph.json)")
print(" explain \"X\" plain-language explanation of a node and its neighbors")
print(" --graph <path> path to graph.json (default graphify-out/graph.json)")
Expand All @@ -911,6 +912,7 @@ def main() -> None:
print(" cluster-only <path> rerun clustering on an existing graph.json and regenerate report")
print(" query \"<question>\" BFS traversal of graph.json for a question")
print(" --dfs use depth-first instead of breadth-first")
print(" --weighted priority-queue BFS preferring high-confidence edges")
print(" --budget N cap output at N tokens (default 2000)")
print(" --graph <path> path to graph.json (default graphify-out/graph.json)")
print(" save-result save a Q&A result to graphify-out/memory/ for graph feedback loop")
Expand Down Expand Up @@ -1074,13 +1076,14 @@ def main() -> None:
sys.exit(1)
elif cmd == "query":
if len(sys.argv) < 3:
print("Usage: graphify query \"<question>\" [--dfs] [--budget N] [--graph path]", file=sys.stderr)
print("Usage: graphify query \"<question>\" [--dfs] [--weighted] [--budget N] [--graph path]", file=sys.stderr)
sys.exit(1)
from graphify.serve import _score_nodes, _bfs, _dfs, _subgraph_to_text
from graphify.serve import _score_nodes, _bfs, _dfs, _weighted_bfs, _subgraph_to_text
from graphify.security import sanitize_label
from networkx.readwrite import json_graph
question = sys.argv[2]
use_dfs = "--dfs" in sys.argv
use_weighted = "--weighted" in sys.argv
budget = 2000
graph_path = "graphify-out/graph.json"
args = sys.argv[3:]
Expand Down Expand Up @@ -1128,7 +1131,12 @@ def main() -> None:
print("No matching nodes found.")
sys.exit(0)
start = [nid for _, nid in scored[:5]]
nodes, edges = (_dfs if use_dfs else _bfs)(G, start, depth=2)
if use_weighted:
nodes, edges = _weighted_bfs(G, start, depth=2)
elif use_dfs:
nodes, edges = _dfs(G, start, depth=2)
else:
nodes, edges = _bfs(G, start, depth=2)
print(_subgraph_to_text(G, nodes, edges, token_budget=budget))
elif cmd == "save-result":
# graphify save-result --question Q --answer A --type T [--nodes N1 N2 ...]
Expand All @@ -1151,13 +1159,14 @@ def main() -> None:
print(f"Saved to {out}")
elif cmd == "path":
if len(sys.argv) < 4:
print("Usage: graphify path \"<source>\" \"<target>\" [--graph path]", file=sys.stderr)
print("Usage: graphify path \"<source>\" \"<target>\" [--weighted] [--graph path]", file=sys.stderr)
sys.exit(1)
from graphify.serve import _score_nodes
from graphify.serve import _score_nodes, _weighted_shortest_path
from networkx.readwrite import json_graph
import networkx as _nx
source_label = sys.argv[2]
target_label = sys.argv[3]
use_weighted = "--weighted" in sys.argv
graph_path = "graphify-out/graph.json"
args = sys.argv[4:]
for i, a in enumerate(args):
Expand All @@ -1181,23 +1190,34 @@ def main() -> None:
print(f"No node matching '{target_label}' found.", file=sys.stderr)
sys.exit(1)
src_nid, tgt_nid = src_scored[0][1], tgt_scored[0][1]
try:
path_nodes = _nx.shortest_path(G, src_nid, tgt_nid)
except (_nx.NetworkXNoPath, _nx.NodeNotFound):
print(f"No path found between '{source_label}' and '{target_label}'.")
sys.exit(0)
if use_weighted:
path_nodes = _weighted_shortest_path(G, src_nid, tgt_nid)
if path_nodes is None:
print(f"No path found between '{source_label}' and '{target_label}'.")
sys.exit(0)
else:
try:
path_nodes = _nx.shortest_path(G, src_nid, tgt_nid)
except (_nx.NetworkXNoPath, _nx.NodeNotFound):
print(f"No path found between '{source_label}' and '{target_label}'.")
sys.exit(0)
hops = len(path_nodes) - 1
mode_label = "Weighted shortest path (Dijkstra)" if use_weighted else "Shortest path"
segments = []
for i in range(len(path_nodes) - 1):
u, v = path_nodes[i], path_nodes[i + 1]
edata = G.edges[u, v]
rel = edata.get("relation", "")
conf = edata.get("confidence", "")
conf_str = f" [{conf}]" if conf else ""
cost = edata.get("cost", 1.0)
conf_str = f" [{conf}"
if use_weighted:
conf_str += f" cost={cost:.2f}"
conf_str += "]"
if i == 0:
segments.append(G.nodes[u].get("label", u))
segments.append(f"--{rel}{conf_str}--> {G.nodes[v].get('label', v)}")
print(f"Shortest path ({hops} hops):\n " + " ".join(segments))
print(f"{mode_label} ({hops} hops):\n " + " ".join(segments))

elif cmd == "explain":
if len(sys.argv) < 3:
Expand Down
4 changes: 4 additions & 0 deletions graphify/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ def build_from_json(extraction: dict, *, directed: bool = False) -> nx.Graph:
if src not in node_set or tgt not in node_set:
continue # skip edges to external/stdlib nodes - expected, not an error
attrs = {k: v for k, v in edge.items() if k not in ("source", "target")}
# Derive traversal cost from confidence_score: high confidence = low cost.
# Used by weighted Dijkstra and priority-queue BFS.
cs = float(attrs.get("confidence_score", 1.0) or 1.0)
attrs["cost"] = 1.0 / max(cs, 0.01)
# Preserve original edge direction - undirected graphs lose it otherwise,
# causing display functions to show edges backwards.
attrs["_src"] = src
Expand Down
39 changes: 31 additions & 8 deletions graphify/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,16 @@ def _suppress_output():
return contextlib.redirect_stdout(io.StringIO())


def _partition(G: nx.Graph) -> dict[str, int]:
def _partition(G: nx.Graph, *, weighted: bool = False) -> dict[str, int]:
"""Run community detection. Returns {node_id: community_id}.

Tries Leiden (graspologic) first — best quality.
Falls back to Louvain (built into networkx) if graspologic is not installed.

When weighted=True, uses the ``confidence_score`` edge attribute so that
EXTRACTED edges (1.0) bind communities more tightly than INFERRED (0.6-0.9)
or AMBIGUOUS (0.1-0.3) edges.

Output from graspologic is suppressed to prevent ANSI escape codes
from corrupting terminal scroll buffers on Windows PowerShell 5.1.
"""
Expand All @@ -35,7 +39,15 @@ def _partition(G: nx.Graph) -> dict[str, int]:
try:
sys.stderr = io.StringIO()
with _suppress_output():
result = leiden(G)
# graspologic leiden reads edge weights from the 'weight' attr
# by default. Copy confidence_score into 'weight' if weighted.
if weighted:
Gw = G.copy()
for u, v, d in Gw.edges(data=True):
d["weight"] = float(d.get("confidence_score", 1.0) or 1.0)
result = leiden(Gw)
else:
result = leiden(G)
finally:
sys.stderr = old_stderr
return result
Expand All @@ -48,21 +60,32 @@ def _partition(G: nx.Graph) -> dict[str, int]:
kwargs: dict = {"seed": 42, "threshold": 1e-4}
if "max_level" in inspect.signature(nx.community.louvain_communities).parameters:
kwargs["max_level"] = 10
communities = nx.community.louvain_communities(G, **kwargs)
if weighted:
# Copy confidence_score into 'weight' attr for louvain
Gw = G.copy()
for u, v, d in Gw.edges(data=True):
d["weight"] = float(d.get("confidence_score", 1.0) or 1.0)
kwargs["weight"] = "weight"
communities = nx.community.louvain_communities(Gw, **kwargs)
else:
communities = nx.community.louvain_communities(G, **kwargs)
return {node: cid for cid, nodes in enumerate(communities) for node in nodes}


_MAX_COMMUNITY_FRACTION = 0.25 # communities larger than 25% of graph get split
_MIN_SPLIT_SIZE = 10 # only split if community has at least this many nodes


def cluster(G: nx.Graph) -> dict[int, list[str]]:
def cluster(G: nx.Graph, *, weighted: bool = False) -> dict[int, list[str]]:
"""Run Leiden community detection. Returns {community_id: [node_ids]}.

Community IDs are stable across runs: 0 = largest community after splitting.
Oversized communities (> 25% of graph nodes, min 10) are split by running
a second Leiden pass on the subgraph.

When weighted=True, uses confidence_score as edge weight so EXTRACTED edges
bind communities more tightly than INFERRED or AMBIGUOUS edges.

Accepts directed or undirected graphs. DiGraphs are converted to undirected
internally since Louvain/Leiden require undirected input.
"""
Expand All @@ -80,7 +103,7 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:

raw: dict[int, list[str]] = {}
if connected.number_of_nodes() > 0:
partition = _partition(connected)
partition = _partition(connected, weighted=weighted)
for node, cid in partition.items():
raw.setdefault(cid, []).append(node)

Expand All @@ -95,7 +118,7 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
final_communities: list[list[str]] = []
for nodes in raw.values():
if len(nodes) > max_size:
final_communities.extend(_split_community(G, nodes))
final_communities.extend(_split_community(G, nodes, weighted=weighted))
else:
final_communities.append(nodes)

Expand All @@ -104,14 +127,14 @@ def cluster(G: nx.Graph) -> dict[int, list[str]]:
return {i: sorted(nodes) for i, nodes in enumerate(final_communities)}


def _split_community(G: nx.Graph, nodes: list[str]) -> list[list[str]]:
def _split_community(G: nx.Graph, nodes: list[str], *, weighted: bool = False) -> list[list[str]]:
"""Run a second Leiden pass on a community subgraph to split it further."""
subgraph = G.subgraph(nodes)
if subgraph.number_of_edges() == 0:
# No edges - split into individual nodes
return [[n] for n in sorted(nodes)]
try:
sub_partition = _partition(subgraph)
sub_partition = _partition(subgraph, weighted=weighted)
sub_communities: dict[int, list[str]] = {}
for node, cid in sub_partition.items():
sub_communities.setdefault(cid, []).append(node)
Expand Down
77 changes: 66 additions & 11 deletions graphify/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,44 @@ def _dfs(G: nx.Graph, start_nodes: list[str], depth: int) -> tuple[set[str], lis
return visited, edges_seen


def _weighted_bfs(G: nx.Graph, start_nodes: list[str], depth: int) -> tuple[set[str], list[tuple]]:
"""Priority-queue BFS: explores low-cost (high-confidence) edges first.

Uses the ``cost`` attribute set by build.py (1/confidence_score).
Falls back to cost=1.0 for edges without the attribute.
"""
import heapq
visited: set[str] = set()
edges_seen: list[tuple] = []
# (cumulative_cost, hop_count, node_id)
heap: list[tuple[float, int, str]] = [(0.0, 0, n) for n in start_nodes]
while heap:
cost, hops, node = heapq.heappop(heap)
if node in visited or hops > depth:
continue
visited.add(node)
for neighbor in G.neighbors(node):
if neighbor not in visited:
edata = G[node][neighbor]
if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):
edata = next(iter(edata.values()), {})
edge_cost = float(edata.get("cost", 1.0))
heapq.heappush(heap, (cost + edge_cost, hops + 1, neighbor))
edges_seen.append((node, neighbor))
return visited, edges_seen


def _weighted_shortest_path(G: nx.Graph, src: str, tgt: str, max_hops: int = 8) -> list[str] | None:
"""Dijkstra shortest path using the ``cost`` edge attribute."""
try:
path = nx.dijkstra_path(G, src, tgt, weight="cost")
except (nx.NetworkXNoPath, nx.NodeNotFound):
return None
if len(path) - 1 > max_hops:
return None
return path


def _subgraph_to_text(G: nx.Graph, nodes: set[str], edges: list[tuple], token_budget: int = 2000) -> str:
"""Render subgraph as text, cutting at token_budget (approx 3 chars/token)."""
char_budget = token_budget * 3
Expand Down Expand Up @@ -171,8 +209,8 @@ async def list_tools() -> list[types.Tool]:
"type": "object",
"properties": {
"question": {"type": "string", "description": "Natural language question or keyword search"},
"mode": {"type": "string", "enum": ["bfs", "dfs"], "default": "bfs",
"description": "bfs=broad context, dfs=trace a specific path"},
"mode": {"type": "string", "enum": ["bfs", "dfs", "weighted"], "default": "bfs",
"description": "bfs=broad context, dfs=trace a specific path, weighted=priority-queue BFS preferring high-confidence edges"},
"depth": {"type": "integer", "default": 3, "description": "Traversal depth (1-6)"},
"token_budget": {"type": "integer", "default": 2000, "description": "Max output tokens"},
},
Expand Down Expand Up @@ -228,6 +266,7 @@ async def list_tools() -> list[types.Tool]:
"source": {"type": "string", "description": "Source concept label or keyword"},
"target": {"type": "string", "description": "Target concept label or keyword"},
"max_hops": {"type": "integer", "default": 8, "description": "Maximum hops to consider"},
"weighted": {"type": "boolean", "default": False, "description": "Use Dijkstra with confidence-based weights (high confidence = low cost)"},
},
"required": ["source", "target"],
},
Expand All @@ -244,7 +283,12 @@ def _tool_query_graph(arguments: dict) -> str:
start_nodes = [nid for _, nid in scored[:3]]
if not start_nodes:
return "No matching nodes found."
nodes, edges = _dfs(G, start_nodes, depth) if mode == "dfs" else _bfs(G, start_nodes, depth)
if mode == "weighted":
nodes, edges = _weighted_bfs(G, start_nodes, depth)
elif mode == "dfs":
nodes, edges = _dfs(G, start_nodes, depth)
else:
nodes, edges = _bfs(G, start_nodes, depth)
header = f"Traversal: {mode.upper()} depth={depth} | Start: {[G.nodes[n].get('label', n) for n in start_nodes]} | {len(nodes)} nodes found\n\n"
return header + _subgraph_to_text(G, nodes, edges, budget)

Expand Down Expand Up @@ -319,24 +363,35 @@ def _tool_shortest_path(arguments: dict) -> str:
return f"No node matching target '{arguments['target']}' found."
src_nid, tgt_nid = src_scored[0][1], tgt_scored[0][1]
max_hops = int(arguments.get("max_hops", 8))
try:
path_nodes = nx.shortest_path(G, src_nid, tgt_nid)
except (nx.NetworkXNoPath, nx.NodeNotFound):
return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
use_weighted = arguments.get("weighted", False)
if use_weighted:
path_nodes = _weighted_shortest_path(G, src_nid, tgt_nid, max_hops)
if path_nodes is None:
return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
else:
try:
path_nodes = nx.shortest_path(G, src_nid, tgt_nid)
except (nx.NetworkXNoPath, nx.NodeNotFound):
return f"No path found between '{G.nodes[src_nid].get('label', src_nid)}' and '{G.nodes[tgt_nid].get('label', tgt_nid)}'."
if len(path_nodes) - 1 > max_hops:
return f"Path exceeds max_hops={max_hops} ({len(path_nodes) - 1} hops found)."
hops = len(path_nodes) - 1
if hops > max_hops:
return f"Path exceeds max_hops={max_hops} ({hops} hops found)."
mode_label = "Weighted shortest path (Dijkstra)" if use_weighted else "Shortest path"
segments = []
for i in range(len(path_nodes) - 1):
u, v = path_nodes[i], path_nodes[i + 1]
edata = G.edges[u, v]
rel = edata.get("relation", "")
conf = edata.get("confidence", "")
conf_str = f" [{conf}]" if conf else ""
cost = edata.get("cost", 1.0)
conf_str = f" [{conf}"
if use_weighted:
conf_str += f" cost={cost:.2f}"
conf_str += "]"
if i == 0:
segments.append(G.nodes[u].get("label", u))
segments.append(f"--{rel}{conf_str}--> {G.nodes[v].get('label', v)}")
return f"Shortest path ({hops} hops):\n " + " ".join(segments)
return f"{mode_label} ({hops} hops):\n " + " ".join(segments)

_handlers = {
"query_graph": _tool_query_graph,
Expand Down
5 changes: 5 additions & 0 deletions opencode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"plugin": [
".opencode/plugins/graphify.js"
]
}