From 44e31964fd40a8e9296b34da78d03aaa3744efba Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 21:03:16 +0000 Subject: [PATCH 1/2] feat: expose analysis and indexing tools over MCP The MCP server previously exposed only search_code, analyze_impact, get_symbol_context, and query_graph. Wire up the remaining read-only analysis use cases plus repository indexing so AI clients can reach the same capabilities as the CLI: - list_repositories (also serves as stats) - list_features / get_feature / get_impacted_features - file_uses (cross-repo / file dependency edges) - list_clusters / get_file_cluster / architecture_overview - index_repository Each tool delegates to the existing container use cases and serialises the domain result as JSON (architecture_overview returns Markdown). Update the server instructions and README tool table to match. --- README.md | 9 + src/connector/adapter/mcp/server.rs | 462 +++++++++++++++++++++++++++- 2 files changed, 457 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 59fb36d..2d9d773 100644 --- a/README.md +++ b/README.md @@ -388,6 +388,15 @@ The HTTP server exposes the MCP endpoint at `/mcp`. | `analyze_impact` | Blast-radius analysis for a symbol. Accepts `symbol`, `repository_id`, and `regex`. | | `get_symbol_context` | 360° caller/callee context for a symbol. Accepts `symbol`, `repository_id`, and `regex`. | | `query_graph` | Precise relationship queries over the call graph. Accepts `pattern`, `target`, `repository_id`, and `limit`. | +| `list_repositories` | List indexed repositories with file/chunk counts and language breakdown (also serves as stats). Takes no arguments. | +| `list_features` | Entry-point execution features scored by criticality. Accepts `repository_id` and `limit`. | +| `get_feature` | A single execution feature by entry-point symbol. Accepts `symbol` and `repository_id`. | +| `get_impacted_features` | Features whose call chain includes any changed symbol. Accepts `symbols` and `repository_id`. | +| `file_uses` | Files in one repository that depend on files in another. Accepts `from` and `to` (repository name or ID). | +| `list_clusters` | Architectural clusters via Leiden community detection. Accepts `repository_id`. | +| `get_file_cluster` | The cluster a given file belongs to. Accepts `file_path` and `repository_id`. | +| `architecture_overview` | Markdown table summarising clusters and inter-cluster dependencies. Accepts `repository_id`. | +| `index_repository` | Index or re-index a repository at a filesystem path. Accepts `path`, `name`, and `force`. | The `query_graph` tool supports eight intention-named relationship `pattern`s, returning only the requested edge type instead of every relationship at once: diff --git a/src/connector/adapter/mcp/server.rs b/src/connector/adapter/mcp/server.rs index 9609cc6..5c41849 100644 --- a/src/connector/adapter/mcp/server.rs +++ b/src/connector/adapter/mcp/server.rs @@ -17,7 +17,7 @@ use serde::{Deserialize, Serialize}; use crate::application::CallGraphQuery; use crate::connector::api::Container; -use crate::domain::SearchQuery; +use crate::domain::{FileEdge, SearchQuery, VectorStore}; use super::tools::SearchResultOutput; @@ -154,6 +154,126 @@ pub struct GraphQueryResult { pub total: usize, } +fn default_features_limit() -> usize { + 20 +} + +/// Input parameters for the list_repositories tool (takes no arguments). +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ListRepositoriesInput {} + +/// Input parameters for the list_features tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ListFeaturesInput { + /// Repository ID to discover execution features (entry-point call chains) in. + pub repository_id: String, + + /// Maximum number of features to return, sorted by descending criticality + /// (default: 20). + #[serde(default = "default_features_limit")] + pub limit: usize, +} + +/// Input parameters for the get_feature tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GetFeatureInput { + /// Entry-point symbol name (exact or substring) to retrieve the feature for. + pub symbol: String, + + /// Restrict the lookup to a specific repository ID. + pub repository_id: Option, +} + +/// Input parameters for the get_impacted_features tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ImpactedFeaturesInput { + /// Changed symbols. Every feature whose forward call chain includes at least + /// one of these symbols is returned, sorted by descending criticality. + pub symbols: Vec, + + /// Restrict the analysis to a specific repository ID. + pub repository_id: Option, +} + +/// Input parameters for the file_uses tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FileUsesInput { + /// Source repository (name or ID): the dependent side of the relationship. + pub from: String, + + /// Target repository (name or ID): the dependency side of the relationship. + pub to: String, +} + +/// A file-level dependency relationship returned by the file_uses tool. +#[derive(Debug, Serialize)] +pub struct FileUsesResult { + /// Resolved name of the source ("from") repository. + pub from_repository: String, + /// Resolved name of the target ("to") repository. + pub to_repository: String, + /// Directed file→file edges from the source repository into the target. + pub edges: Vec, + /// Total number of edges returned. + pub total: usize, +} + +/// Input parameters for the list_clusters tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ListClustersInput { + /// Repository ID to detect architectural clusters in. + pub repository_id: String, +} + +/// Input parameters for the get_file_cluster tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GetFileClusterInput { + /// File path to locate within the repository's cluster graph. + pub file_path: String, + + /// Repository ID the file belongs to. + pub repository_id: String, +} + +/// Input parameters for the architecture_overview tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct ArchitectureOverviewInput { + /// Repository ID to summarise as a Markdown architecture table. + pub repository_id: String, +} + +/// Input parameters for the index_repository tool +#[derive(Debug, Deserialize, JsonSchema)] +pub struct IndexRepositoryInput { + /// Filesystem path to the repository to index. + pub path: String, + + /// Optional human-readable name (defaults to the directory name). + pub name: Option, + + /// When true, delete any existing index for this path and re-index from + /// scratch. Defaults to false (incremental indexing). + #[serde(default)] + pub force: bool, +} + +/// Result returned by the index_repository tool. +#[derive(Debug, Serialize)] +pub struct IndexRepositoryResult { + /// Stable repository ID. + pub id: String, + /// Repository name. + pub name: String, + /// Absolute path that was indexed. + pub path: String, + /// Number of files indexed. + pub file_count: u64, + /// Number of code chunks produced. + pub chunk_count: u64, + /// Per-language file counts. + pub languages: std::collections::HashMap, +} + // ── MCP Server ─────────────────────────────────────────────────────────────── /// MCP Server that exposes codesearch functionality @@ -376,12 +496,13 @@ impl CodesearchMcpServer { .map_err(|e| { McpError::internal_error(format!("query_graph failed: {}", e), None) })?; - let mut refs2 = use_case - .find_callers(&input.target, &q_imp) - .await - .map_err(|e| { - McpError::internal_error(format!("query_graph failed: {}", e), None) - })?; + let mut refs2 = + use_case + .find_callers(&input.target, &q_imp) + .await + .map_err(|e| { + McpError::internal_error(format!("query_graph failed: {}", e), None) + })?; refs.append(&mut refs2); (refs, true) } @@ -407,12 +528,13 @@ impl CodesearchMcpServer { .map_err(|e| { McpError::internal_error(format!("query_graph failed: {}", e), None) })?; - let mut refs2 = use_case - .find_callees(&input.target, &q_imp) - .await - .map_err(|e| { - McpError::internal_error(format!("query_graph failed: {}", e), None) - })?; + let mut refs2 = + use_case + .find_callees(&input.target, &q_imp) + .await + .map_err(|e| { + McpError::internal_error(format!("query_graph failed: {}", e), None) + })?; refs.append(&mut refs2); (refs, false) } @@ -513,6 +635,309 @@ impl CodesearchMcpServer { Ok(CallToolResult::success(vec![Content::text(json)])) } + + /// List every indexed repository together with its file/chunk counts and + /// per-language breakdown. Doubles as the "stats" view: sum the `file_count` + /// and `chunk_count` fields across the returned repositories for aggregate + /// totals. Use the returned repository IDs as the `repository_id` argument + /// for the other tools. + #[tool(name = "list_repositories")] + async fn list_repositories( + &self, + _params: Parameters, + ) -> Result { + let use_case = self.container.list_use_case(); + let repos = use_case.execute().await.map_err(|e| { + McpError::internal_error(format!("Failed to list repositories: {}", e), None) + })?; + + let json = serde_json::to_string_pretty(&repos).map_err(|e| { + McpError::internal_error(format!("Failed to serialize repositories: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Discover execution features — named forward call chains rooted at + /// entry-point symbols (symbols that call others but are never called within + /// the repository) — and score each for criticality. Returns up to `limit` + /// features sorted by descending criticality. + /// Requires the repository to have been indexed with call-graph support. + #[tool(name = "list_features")] + async fn list_features( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let use_case = self.container.execution_features_use_case(); + let features = use_case + .list_features(&input.repository_id, input.limit) + .await + .map_err(|e| { + McpError::internal_error(format!("Listing features failed: {}", e), None) + })?; + + let json = serde_json::to_string_pretty(&features).map_err(|e| { + McpError::internal_error(format!("Failed to serialize features: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Retrieve a single execution feature by entry-point symbol name (exact or + /// substring match). Returns `null` when the symbol cannot be resolved to an + /// entry point in the call graph. + /// Requires the repository to have been indexed with call-graph support. + #[tool(name = "get_feature")] + async fn get_feature( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let use_case = self.container.execution_features_use_case(); + let feature = use_case + .get_feature(&input.symbol, input.repository_id.as_deref()) + .await + .map_err(|e| McpError::internal_error(format!("Feature lookup failed: {}", e), None))?; + + let json = serde_json::to_string_pretty(&feature).map_err(|e| { + McpError::internal_error(format!("Failed to serialize feature: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Given a set of changed symbols, return every execution feature whose + /// forward call chain includes at least one of them, sorted by descending + /// criticality. Use this to assess which user-facing flows a change touches. + /// Requires the repository to have been indexed with call-graph support. + #[tool(name = "get_impacted_features")] + async fn get_impacted_features( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let use_case = self.container.execution_features_use_case(); + let features = use_case + .get_impacted_features(&input.symbols, input.repository_id.as_deref()) + .await + .map_err(|e| { + McpError::internal_error(format!("Impacted features lookup failed: {}", e), None) + })?; + + let json = serde_json::to_string_pretty(&features).map_err(|e| { + McpError::internal_error(format!("Failed to serialize features: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Show which files in one repository depend on files in another (or the same) + /// repository. Resolves both `from` and `to` by repository name or ID, builds + /// the cross-repository file-dependency graph, and returns the directed + /// file→file edges flowing from the source into the target, each annotated + /// with the referenced symbols and reference kinds. + /// Requires the repositories to have been indexed with call-graph support. + #[tool(name = "file_uses")] + async fn file_uses( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let repos = self + .container + .list_use_case() + .execute() + .await + .map_err(|e| { + McpError::internal_error(format!("Failed to list repositories: {}", e), None) + })?; + + let resolve = |name_or_id: &str| -> Option<(String, String)> { + repos + .iter() + .find(|r| r.id() == name_or_id) + .or_else(|| { + repos + .iter() + .find(|r| r.name().eq_ignore_ascii_case(name_or_id)) + }) + .map(|r| (r.id().to_string(), r.name().to_string())) + }; + + let (from_id, from_name) = resolve(&input.from).ok_or_else(|| { + McpError::invalid_params(format!("Repository not found: '{}'", input.from), None) + })?; + let (to_id, to_name) = resolve(&input.to).ok_or_else(|| { + McpError::invalid_params(format!("Repository not found: '{}'", input.to), None) + })?; + + let graph = self + .container + .file_graph_use_case() + .build_graph(Some(&[from_id.clone(), to_id.clone()]), 1, true) + .await + .map_err(|e| { + McpError::internal_error(format!("Failed to build file graph: {}", e), None) + })?; + + let mut edges: Vec = graph + .edges + .into_iter() + .filter(|e| e.from_repo_id == from_id && e.to_repo_id == to_id) + .collect(); + edges.sort_by(|a, b| { + a.to_file + .cmp(&b.to_file) + .then(a.from_file.cmp(&b.from_file)) + }); + + let total = edges.len(); + let result = FileUsesResult { + from_repository: from_name, + to_repository: to_name, + edges, + total, + }; + + let json = serde_json::to_string_pretty(&result).map_err(|e| { + McpError::internal_error(format!("Failed to serialize file uses: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Detect architectural clusters in a repository by running Leiden community + /// detection over its file-dependency graph. Returns the clusters with their + /// names, dominant language, cohesion score, and member files. + /// Requires the repository to have been indexed with call-graph support. + #[tool(name = "list_clusters")] + async fn list_clusters( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let use_case = self.container.cluster_detection_use_case(); + let cluster_graph = use_case + .create_clusters(&input.repository_id) + .await + .map_err(|e| { + McpError::internal_error(format!("Cluster detection failed: {}", e), None) + })?; + + let json = serde_json::to_string_pretty(&cluster_graph).map_err(|e| { + McpError::internal_error(format!("Failed to serialize clusters: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Return the architectural cluster a specific file belongs to. Returns + /// `null` when the file is not part of any detected cluster. + /// Requires the repository to have been indexed with call-graph support. + #[tool(name = "get_file_cluster")] + async fn get_file_cluster( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let use_case = self.container.cluster_detection_use_case(); + let cluster = use_case + .cluster_for_file(&input.file_path, &input.repository_id) + .await + .map_err(|e| McpError::internal_error(format!("Cluster lookup failed: {}", e), None))?; + + let json = serde_json::to_string_pretty(&cluster).map_err(|e| { + McpError::internal_error(format!("Failed to serialize cluster: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } + + /// Produce a high-level architecture overview of a repository as a Markdown + /// table: one row per cluster with its file count, dominant language, and top + /// inter-cluster dependencies. + /// Requires the repository to have been indexed with call-graph support. + #[tool(name = "architecture_overview")] + async fn architecture_overview( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + let use_case = self.container.cluster_detection_use_case(); + let overview = use_case + .architecture_overview(&input.repository_id) + .await + .map_err(|e| { + McpError::internal_error(format!("Architecture overview failed: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(overview)])) + } + + /// Index (or incrementally re-index) a repository at the given filesystem + /// path so its code becomes searchable and its call graph is built. Set + /// `force=true` to delete any existing index for the path and re-index from + /// scratch. Returns the resulting repository's ID, file/chunk counts, and + /// language breakdown. This is a heavy, long-running operation. + #[tool(name = "index_repository")] + async fn index_repository( + &self, + params: Parameters, + ) -> Result { + let input = params.0; + + // Mirror the CLI IndexController: pick the vector store and namespace + // based on how the container was configured. + let (store, namespace) = if self.container.memory_storage() { + (VectorStore::InMemory, None) + } else { + ( + VectorStore::DuckDb, + Some(self.container.namespace().to_string()), + ) + }; + + let use_case = self.container.index_use_case(); + let repo = use_case + .execute( + &input.path, + input.name.as_deref(), + store, + namespace, + input.force, + ) + .await + .map_err(|e| McpError::internal_error(format!("Indexing failed: {}", e), None))?; + + let languages = repo + .languages() + .iter() + .map(|(lang, stats)| (lang.clone(), stats.file_count)) + .collect(); + + let result = IndexRepositoryResult { + id: repo.id().to_string(), + name: repo.name().to_string(), + path: repo.path().to_string(), + file_count: repo.file_count(), + chunk_count: repo.chunk_count(), + languages, + }; + + let json = serde_json::to_string_pretty(&result).map_err(|e| { + McpError::internal_error(format!("Failed to serialize index result: {}", e), None) + })?; + + Ok(CallToolResult::success(vec![Content::text(json)])) + } } #[tool_handler] @@ -529,7 +954,16 @@ impl ServerHandler for CodesearchMcpServer { • analyze_impact — blast-radius analysis: what breaks if symbol X changes?\n\ • get_symbol_context — 360° view of a symbol's callers and callees\n\ • query_graph — precise relationship queries: callers_of, callees_of, \ - imports_of, importers_of, inheritors_of, children_of, tests_for, file_summary" + imports_of, importers_of, inheritors_of, children_of, tests_for, file_summary\n\ + • list_repositories — list indexed repositories with file/chunk counts (stats)\n\ + • list_features — entry-point call chains scored by criticality\n\ + • get_feature — a single execution feature by entry-point symbol\n\ + • get_impacted_features — features whose call chain includes changed symbols\n\ + • file_uses — which files in one repository depend on files in another\n\ + • list_clusters — architectural clusters via Leiden community detection\n\ + • get_file_cluster — the cluster a given file belongs to\n\ + • architecture_overview — Markdown table summarising clusters and dependencies\n\ + • index_repository — index or re-index a repository at a filesystem path" .into(), ), } From 83f231fe64092590dcfcbeeceb59c39826df7faa Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 5 Jun 2026 22:47:23 +0000 Subject: [PATCH 2/2] refactor: cap list_features limit and drop index_repository MCP tool Address PR review feedback: - Clamp the caller-supplied list_features `limit` to a server-side MAX_FEATURES_LIMIT (100), mirroring search_code's MAX_LIMIT, so a huge value cannot trigger unbounded call-graph traversal and serialization. - Remove the index_repository tool. Indexing an arbitrary filesystem path over MCP is a data-exfiltration risk in public HTTP deployments; keep indexing a CLI-only operation. --- README.md | 1 - src/connector/adapter/mcp/server.rs | 102 +++------------------------- 2 files changed, 9 insertions(+), 94 deletions(-) diff --git a/README.md b/README.md index 2d9d773..0c16cc1 100644 --- a/README.md +++ b/README.md @@ -396,7 +396,6 @@ The HTTP server exposes the MCP endpoint at `/mcp`. | `list_clusters` | Architectural clusters via Leiden community detection. Accepts `repository_id`. | | `get_file_cluster` | The cluster a given file belongs to. Accepts `file_path` and `repository_id`. | | `architecture_overview` | Markdown table summarising clusters and inter-cluster dependencies. Accepts `repository_id`. | -| `index_repository` | Index or re-index a repository at a filesystem path. Accepts `path`, `name`, and `force`. | The `query_graph` tool supports eight intention-named relationship `pattern`s, returning only the requested edge type instead of every relationship at once: diff --git a/src/connector/adapter/mcp/server.rs b/src/connector/adapter/mcp/server.rs index 5c41849..1f676bd 100644 --- a/src/connector/adapter/mcp/server.rs +++ b/src/connector/adapter/mcp/server.rs @@ -17,13 +17,18 @@ use serde::{Deserialize, Serialize}; use crate::application::CallGraphQuery; use crate::connector::api::Container; -use crate::domain::{FileEdge, SearchQuery, VectorStore}; +use crate::domain::{FileEdge, SearchQuery}; use super::tools::SearchResultOutput; /// Server-side maximum for the number of results a single search can return. const MAX_LIMIT: usize = 100; +/// Server-side maximum for the number of execution features `list_features` can +/// return. Caps caller-supplied limits so a huge value cannot trigger unbounded +/// call-graph traversal and serialization. +const MAX_FEATURES_LIMIT: usize = 100; + fn default_limit() -> usize { 10 } @@ -242,38 +247,6 @@ pub struct ArchitectureOverviewInput { pub repository_id: String, } -/// Input parameters for the index_repository tool -#[derive(Debug, Deserialize, JsonSchema)] -pub struct IndexRepositoryInput { - /// Filesystem path to the repository to index. - pub path: String, - - /// Optional human-readable name (defaults to the directory name). - pub name: Option, - - /// When true, delete any existing index for this path and re-index from - /// scratch. Defaults to false (incremental indexing). - #[serde(default)] - pub force: bool, -} - -/// Result returned by the index_repository tool. -#[derive(Debug, Serialize)] -pub struct IndexRepositoryResult { - /// Stable repository ID. - pub id: String, - /// Repository name. - pub name: String, - /// Absolute path that was indexed. - pub path: String, - /// Number of files indexed. - pub file_count: u64, - /// Number of code chunks produced. - pub chunk_count: u64, - /// Per-language file counts. - pub languages: std::collections::HashMap, -} - // ── MCP Server ─────────────────────────────────────────────────────────────── /// MCP Server that exposes codesearch functionality @@ -669,10 +642,11 @@ impl CodesearchMcpServer { params: Parameters, ) -> Result { let input = params.0; + let limit = input.limit.min(MAX_FEATURES_LIMIT); let use_case = self.container.execution_features_use_case(); let features = use_case - .list_features(&input.repository_id, input.limit) + .list_features(&input.repository_id, limit) .await .map_err(|e| { McpError::internal_error(format!("Listing features failed: {}", e), None) @@ -881,63 +855,6 @@ impl CodesearchMcpServer { Ok(CallToolResult::success(vec![Content::text(overview)])) } - - /// Index (or incrementally re-index) a repository at the given filesystem - /// path so its code becomes searchable and its call graph is built. Set - /// `force=true` to delete any existing index for the path and re-index from - /// scratch. Returns the resulting repository's ID, file/chunk counts, and - /// language breakdown. This is a heavy, long-running operation. - #[tool(name = "index_repository")] - async fn index_repository( - &self, - params: Parameters, - ) -> Result { - let input = params.0; - - // Mirror the CLI IndexController: pick the vector store and namespace - // based on how the container was configured. - let (store, namespace) = if self.container.memory_storage() { - (VectorStore::InMemory, None) - } else { - ( - VectorStore::DuckDb, - Some(self.container.namespace().to_string()), - ) - }; - - let use_case = self.container.index_use_case(); - let repo = use_case - .execute( - &input.path, - input.name.as_deref(), - store, - namespace, - input.force, - ) - .await - .map_err(|e| McpError::internal_error(format!("Indexing failed: {}", e), None))?; - - let languages = repo - .languages() - .iter() - .map(|(lang, stats)| (lang.clone(), stats.file_count)) - .collect(); - - let result = IndexRepositoryResult { - id: repo.id().to_string(), - name: repo.name().to_string(), - path: repo.path().to_string(), - file_count: repo.file_count(), - chunk_count: repo.chunk_count(), - languages, - }; - - let json = serde_json::to_string_pretty(&result).map_err(|e| { - McpError::internal_error(format!("Failed to serialize index result: {}", e), None) - })?; - - Ok(CallToolResult::success(vec![Content::text(json)])) - } } #[tool_handler] @@ -962,8 +879,7 @@ impl ServerHandler for CodesearchMcpServer { • file_uses — which files in one repository depend on files in another\n\ • list_clusters — architectural clusters via Leiden community detection\n\ • get_file_cluster — the cluster a given file belongs to\n\ - • architecture_overview — Markdown table summarising clusters and dependencies\n\ - • index_repository — index or re-index a repository at a filesystem path" + • architecture_overview — Markdown table summarising clusters and dependencies" .into(), ), }