diff --git a/backend/app/database/falkor/code-graph-backend/api/analyzers/java/analyzer.py b/backend/app/database/falkor/code-graph-backend/api/analyzers/java/analyzer.py index 4ae01d50..18f3eb0e 100644 --- a/backend/app/database/falkor/code-graph-backend/api/analyzers/java/analyzer.py +++ b/backend/app/database/falkor/code-graph-backend/api/analyzers/java/analyzer.py @@ -8,7 +8,7 @@ from multilspy import SyncLanguageServer import tree_sitter_java as tsjava -from tree_sitter import Language, Node +from tree_sitter import Language, QueryCursor, Node from xml.etree import ElementTree @@ -19,23 +19,35 @@ class JavaAnalyzer(AbstractAnalyzer): def __init__(self) -> None: super().__init__(Language(tsjava.language())) + def _run_query(self, query, node): + cursor = QueryCursor(query) + captures = {} + + for _, match_captures in cursor.matches(node): + for name, nodes in match_captures.items(): + captures.setdefault(name, []).extend(nodes) + + return captures + def add_dependencies(self, path: Path, files: list[Path]): - # if not Path("java-decompiler-engine-243.23654.153.jar").is_file(): - # subprocess.run(["wget", "https://www.jetbrains.com/intellij-repository/releases/com/jetbrains/intellij/java/java-decompiler-engine/243.23654.153/java-decompiler-engine-243.23654.153.jar"]) - subprocess.run(["rm", "-rf", f"{path}/temp_deps"]) - pom = ElementTree.parse(str(path) + '/pom.xml') - for dependency in pom.findall('.//{http://maven.apache.org/POM/4.0.0}dependency'): - groupId = dependency.find('{http://maven.apache.org/POM/4.0.0}groupId').text.replace('.', '/') - artifactId = dependency.find('{http://maven.apache.org/POM/4.0.0}artifactId').text - version = dependency.find('{http://maven.apache.org/POM/4.0.0}version').text - # jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}.jar" - jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}-sources.jar" - - os.makedirs(f"{path}/temp_deps/{artifactId}-{version}", exist_ok=True) - # subprocess.run(["java", "-jar", "java-decompiler-engine-243.23654.153.jar", "-hdc=0 -iib=1 -rsy=1 -rbr=1 -dgs=1 -din=1 -den=1 -asc=1 -bsm=1", jar_path, f"{path}/temp_deps/{artifactId}-{version}"]) - subprocess.run(["cp", jar_path, f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}") - subprocess.run(["unzip", f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}") - files.extend(Path(f"{path}/temp_deps").rglob("*.java")) + # ----- Keeping read-only ---- + # # if not Path("java-decompiler-engine-243.23654.153.jar").is_file(): + # # subprocess.run(["wget", "https://www.jetbrains.com/intellij-repository/releases/com/jetbrains/intellij/java/java-decompiler-engine/243.23654.153/java-decompiler-engine-243.23654.153.jar"]) + # subprocess.run(["rm", "-rf", f"{path}/temp_deps"]) + # pom = ElementTree.parse(str(path) + '/pom.xml') + # for dependency in pom.findall('.//{http://maven.apache.org/POM/4.0.0}dependency'): + # groupId = dependency.find('{http://maven.apache.org/POM/4.0.0}groupId').text.replace('.', '/') + # artifactId = dependency.find('{http://maven.apache.org/POM/4.0.0}artifactId').text + # version = dependency.find('{http://maven.apache.org/POM/4.0.0}version').text + # # jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}.jar" + # jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}-sources.jar" + + # os.makedirs(f"{path}/temp_deps/{artifactId}-{version}", exist_ok=True) + # # subprocess.run(["java", "-jar", "java-decompiler-engine-243.23654.153.jar", "-hdc=0 -iib=1 -rsy=1 -rbr=1 -dgs=1 -din=1 -den=1 -asc=1 -bsm=1", jar_path, f"{path}/temp_deps/{artifactId}-{version}"]) + # subprocess.run(["cp", jar_path, f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}") + # subprocess.run(["unzip", f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}") + # files.extend(Path(f"{path}/temp_deps").rglob("*.java")) + return def get_entity_label(self, node: Node) -> str: if node.type == 'class_declaration': @@ -68,34 +80,39 @@ def get_entity_types(self) -> list[str]: def add_symbols(self, entity: Entity) -> None: if entity.node.type == 'class_declaration': interfaces_query = self.language.query("(super_interfaces (type_list (type_identifier) @interface))") - interfaces_captures = interfaces_query.captures(entity.node) + + interfaces_captures = self._run_query(interfaces_query, entity.node) + if 'interface' in interfaces_captures: for interface in interfaces_captures['interface']: entity.add_symbol("implement_interface", interface) base_class_query = self.language.query("(superclass (type_identifier) @base_class)") - base_class_captures = base_class_query.captures(entity.node) + base_class_captures = self._run_query(base_class_query, entity.node) + if 'base_class' in base_class_captures: base_class = base_class_captures['base_class'][0] entity.add_symbol("base_class", base_class) elif entity.node.type == 'interface_declaration': - query = self.language.query("(extends_interfaces (type_list (type_identifier) @type))?") - extends_captures = query.captures(entity.node) + extends_query = self.language.query("(extends_interfaces (type_list (type_identifier) @type))") + extends_captures = self._run_query(extends_query, entity.node) + if 'type' in extends_captures: for interface in extends_captures['type']: entity.add_symbol("extend_interface", interface) elif entity.node.type in ['method_declaration', 'constructor_declaration']: - query = self.language.query("(method_invocation) @reference.call") - captures = query.captures(entity.node) + call_query = self.language.query("(method_invocation) @reference.call") + captures = self._run_query(call_query, entity.node) + if 'reference.call' in captures: for caller in captures['reference.call']: entity.add_symbol("call", caller) if entity.node.type == 'method_declaration': - query = self.language.query("(formal_parameters (formal_parameter type: (_) @parameter))") - captures = query.captures(entity.node) + param_query = self.language.query("(formal_parameters (formal_parameter type: (_) @parameter))") + captures = self._run_query(param_query, entity.node) + if 'parameter' in captures: for parameter in captures['parameter']: entity.add_symbol("parameters", parameter) - entity.add_symbol("return_type", entity.node.child_by_field_name('type')) def is_dependency(self, file_path: str) -> bool: return ".jar" in file_path diff --git a/backend/app/database/falkor/code-graph-backend/api/analyzers/python/analyzer.py b/backend/app/database/falkor/code-graph-backend/api/analyzers/python/analyzer.py index 07b051ad..6469b266 100644 --- a/backend/app/database/falkor/code-graph-backend/api/analyzers/python/analyzer.py +++ b/backend/app/database/falkor/code-graph-backend/api/analyzers/python/analyzer.py @@ -9,7 +9,7 @@ from ..analyzer import AbstractAnalyzer import tree_sitter_python as tspython -from tree_sitter import Language, Node +from tree_sitter import Language, QueryCursor, Node import logging logger = logging.getLogger('code_graph') @@ -18,30 +18,42 @@ class PythonAnalyzer(AbstractAnalyzer): def __init__(self) -> None: super().__init__(Language(tspython.language())) + def _run_query(self, query, node): + cursor = QueryCursor(query) + captures = {} + + for _, match_captures in cursor.matches(node): + for name, nodes in match_captures.items(): + captures.setdefault(name, []).extend(nodes) + + return captures + def add_dependencies(self, path: Path, files: list[Path]): - if Path(f"{path}/venv").is_dir(): - return - subprocess.run(["python3", "-m", "venv", "venv"], cwd=str(path)) - if Path(f"{path}/pyproject.toml").is_file(): - subprocess.run(["pip", "install", "poetry"], cwd=str(path), env={ - "VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"}) - subprocess.run(["poetry", "install"], cwd=str(path), env={ - "VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"}) - with open(f"{path}/pyproject.toml", 'r') as file: - pyproject_data = toml.load(file) - try: - for requirement in pyproject_data.get("tool").get("poetry").get("dependencies"): - files.extend(Path(f"{path}/venv/lib").rglob(f"**/site-packages/{requirement}/*.py")) - except Exception as e: - logger.error(f"Error adding dependencies: {e}") - pass - elif Path(f"{path}/requirements.txt").is_file(): - subprocess.run(["pip", "install", "-r", "requirements.txt"], cwd=str(path), - env={"VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"}) - with open(f"{path}/requirements.txt", 'r') as file: - requirements = [line.strip().split("==") for line in file if line.strip()] - for requirement in requirements: - files.extend(Path(f"{path}/venv/lib/").rglob(f"**/site-packages/{requirement}/*.py")) + ## ------- Read only----- + # if Path(f"{path}/venv").is_dir(): + # return + # subprocess.run(["python3", "-m", "venv", "venv"], cwd=str(path)) + # if Path(f"{path}/pyproject.toml").is_file(): + # subprocess.run(["pip", "install", "poetry"], cwd=str(path), env={ + # "VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"}) + # subprocess.run(["poetry", "install"], cwd=str(path), env={ + # "VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"}) + # with open(f"{path}/pyproject.toml", 'r') as file: + # pyproject_data = toml.load(file) + # try: + # for requirement in pyproject_data.get("tool").get("poetry").get("dependencies"): + # files.extend(Path(f"{path}/venv/lib").rglob(f"**/site-packages/{requirement}/*.py")) + # except Exception as e: + # logger.error(f"Error adding dependencies: {e}") + # pass + # elif Path(f"{path}/requirements.txt").is_file(): + # subprocess.run(["pip", "install", "-r", "requirements.txt"], cwd=str(path), + # env={"VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"}) + # with open(f"{path}/requirements.txt", 'r') as file: + # requirements = [line.strip().split("==") for line in file if line.strip()] + # for requirement in requirements: + # files.extend(Path(f"{path}/venv/lib/").rglob(f"**/site-packages/{requirement}/*.py")) + return def get_entity_label(self, node: Node) -> str: if node.type == 'class_definition': @@ -72,18 +84,23 @@ def add_symbols(self, entity: Entity) -> None: superclasses = entity.node.child_by_field_name("superclasses") if superclasses: base_classes_query = self.language.query("(argument_list (_) @base_class)") - base_classes_captures = base_classes_query.captures(superclasses) - if 'base_class' in base_classes_captures: - for base_class in base_classes_captures['base_class']: + captures = self._run_query(base_classes_query, superclasses) + + if 'base_class' in captures: + for base_class in captures['base_class']: entity.add_symbol("base_class", base_class) + elif entity.node.type == 'function_definition': - query = self.language.query("(call) @reference.call") - captures = query.captures(entity.node) + call_query = self.language.query("(call) @reference.call") + captures = self._run_query(call_query, entity.node) + if 'reference.call' in captures: for caller in captures['reference.call']: entity.add_symbol("call", caller) - query = self.language.query("(typed_parameter type: (_) @parameter)") - captures = query.captures(entity.node) + + param_query = self.language.query("(typed_parameter type: (_) @parameter)") + captures = self._run_query(param_query, entity.node) + if 'parameter' in captures: for parameter in captures['parameter']: entity.add_symbol("parameters", parameter) diff --git a/backend/app/database/falkor/code-graph-backend/api/analyzers/source_analyzer.py b/backend/app/database/falkor/code-graph-backend/api/analyzers/source_analyzer.py index 12502ab9..20191900 100644 --- a/backend/app/database/falkor/code-graph-backend/api/analyzers/source_analyzer.py +++ b/backend/app/database/falkor/code-graph-backend/api/analyzers/source_analyzer.py @@ -31,8 +31,9 @@ def start_server(self): return nullcontext() class SourceAnalyzer(): - def __init__(self) -> None: + def __init__(self, static_only: bool = True) -> None: self.files: dict[Path, File] = {} + self.static_only = static_only def supported_types(self) -> list[str]: """ @@ -82,8 +83,9 @@ def first_pass(self, path: Path, files: list[Path], ignore: list[str], graph: Gr """ supoorted_types = self.supported_types() - for ext in set([file.suffix for file in files if file.suffix in supoorted_types]): - analyzers[ext].add_dependencies(path, files) + if not self.static_only: + for ext in set([file.suffix for file in files if file.suffix in supoorted_types]): + analyzers[ext].add_dependencies(path, files) files_len = len(files) for i, file_path in enumerate(files): @@ -166,9 +168,10 @@ def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None: files = list(path.rglob("*.java")) + list(path.rglob("*.py")) # First pass analysis of the source code self.first_pass(path, files, ignore, graph) - - # Second pass analysis of the source code - self.second_pass(graph, files, path) + + if not self.static_only: + # Second pass analysis of the source code + self.second_pass(graph, files, path) def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None: """ diff --git a/backend/app/database/falkor/code-graph-backend/api/index.py b/backend/app/database/falkor/code-graph-backend/api/index.py index 0a87dd5e..64b61128 100644 --- a/backend/app/database/falkor/code-graph-backend/api/index.py +++ b/backend/app/database/falkor/code-graph-backend/api/index.py @@ -405,20 +405,27 @@ def analyze_repo(): logger.debug('Received repo_url: %s', url) ignore = data.get('ignore', []) + + try: + proj = Project.from_git_repository(url) + proj.analyze_sources(ignore) + proj.process_git_history(ignore) - proj = Project.from_git_repository(url) - proj.analyze_sources(ignore) - proj.process_git_history(ignore) - - stats = proj.graph.stats() + stats = proj.graph.stats() - response = { - 'status': 'success', - 'node_count': stats.get('node_count', 0), - 'edge_count': stats.get('edge_count', 0) - } + response = { + 'status': 'success', + 'node_count': stats.get('node_count', 0), + 'edge_count': stats.get('edge_count', 0) + } - return jsonify(response), 200 + return jsonify(response), 200 + except Exception as e: + logger.exception("Repository analysis failed") + return jsonify({ + "status": "error", + "error": str(e) + }), 500 @app.route('/switch_commit', methods=['POST']) @public_access # Apply public access decorator diff --git a/backend/app/services/codegraph/repo_service.py b/backend/app/services/codegraph/repo_service.py index eba4fca8..b719e784 100644 --- a/backend/app/services/codegraph/repo_service.py +++ b/backend/app/services/codegraph/repo_service.py @@ -114,8 +114,14 @@ async def index_repo(self, repo_input: str, discord_id: str) -> Dict[str, Any]: } ) as response: if response.status == 200: - data = await response.json() if await response.text() else {} - + raw_text= await response.text() + try: + import json + data = json.loads(raw_text) if raw_text else {} + except Exception: + logger.debug("Failed to parse backend JSON") + data= {} + await self.supabase.table("indexed_repositories").update({ "indexing_status": "completed", "indexed_at": datetime.now().isoformat(), @@ -134,7 +140,7 @@ async def index_repo(self, repo_input: str, discord_id: str) -> Dict[str, Any]: "edges": data.get("edge_count", 0) } else: - error_msg = (await response.text())[:500] + error_msg = f"Backend returned status {response.status}" await self.supabase.table("indexed_repositories").update({ "indexing_status": "failed",