Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from multilspy import SyncLanguageServer

import tree_sitter_java as tsjava
from tree_sitter import Language, Node
from tree_sitter import Language, QueryCursor, Node

from xml.etree import ElementTree

Expand All @@ -19,23 +19,35 @@ class JavaAnalyzer(AbstractAnalyzer):
def __init__(self) -> None:
super().__init__(Language(tsjava.language()))

def _run_query(self, query, node):
cursor = QueryCursor(query)
captures = {}

for _, match_captures in cursor.matches(node):
for name, nodes in match_captures.items():
captures.setdefault(name, []).extend(nodes)

return captures

def add_dependencies(self, path: Path, files: list[Path]):
# if not Path("java-decompiler-engine-243.23654.153.jar").is_file():
# subprocess.run(["wget", "https://www.jetbrains.com/intellij-repository/releases/com/jetbrains/intellij/java/java-decompiler-engine/243.23654.153/java-decompiler-engine-243.23654.153.jar"])
subprocess.run(["rm", "-rf", f"{path}/temp_deps"])
pom = ElementTree.parse(str(path) + '/pom.xml')
for dependency in pom.findall('.//{http://maven.apache.org/POM/4.0.0}dependency'):
groupId = dependency.find('{http://maven.apache.org/POM/4.0.0}groupId').text.replace('.', '/')
artifactId = dependency.find('{http://maven.apache.org/POM/4.0.0}artifactId').text
version = dependency.find('{http://maven.apache.org/POM/4.0.0}version').text
# jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}.jar"
jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}-sources.jar"

os.makedirs(f"{path}/temp_deps/{artifactId}-{version}", exist_ok=True)
# subprocess.run(["java", "-jar", "java-decompiler-engine-243.23654.153.jar", "-hdc=0 -iib=1 -rsy=1 -rbr=1 -dgs=1 -din=1 -den=1 -asc=1 -bsm=1", jar_path, f"{path}/temp_deps/{artifactId}-{version}"])
subprocess.run(["cp", jar_path, f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}")
subprocess.run(["unzip", f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}")
files.extend(Path(f"{path}/temp_deps").rglob("*.java"))
# ----- Keeping read-only ----
# # if not Path("java-decompiler-engine-243.23654.153.jar").is_file():
# # subprocess.run(["wget", "https://www.jetbrains.com/intellij-repository/releases/com/jetbrains/intellij/java/java-decompiler-engine/243.23654.153/java-decompiler-engine-243.23654.153.jar"])
# subprocess.run(["rm", "-rf", f"{path}/temp_deps"])
# pom = ElementTree.parse(str(path) + '/pom.xml')
# for dependency in pom.findall('.//{http://maven.apache.org/POM/4.0.0}dependency'):
# groupId = dependency.find('{http://maven.apache.org/POM/4.0.0}groupId').text.replace('.', '/')
# artifactId = dependency.find('{http://maven.apache.org/POM/4.0.0}artifactId').text
# version = dependency.find('{http://maven.apache.org/POM/4.0.0}version').text
# # jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}.jar"
# jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}-sources.jar"

# os.makedirs(f"{path}/temp_deps/{artifactId}-{version}", exist_ok=True)
# # subprocess.run(["java", "-jar", "java-decompiler-engine-243.23654.153.jar", "-hdc=0 -iib=1 -rsy=1 -rbr=1 -dgs=1 -din=1 -den=1 -asc=1 -bsm=1", jar_path, f"{path}/temp_deps/{artifactId}-{version}"])
# subprocess.run(["cp", jar_path, f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}")
# subprocess.run(["unzip", f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}")
# files.extend(Path(f"{path}/temp_deps").rglob("*.java"))
return

def get_entity_label(self, node: Node) -> str:
if node.type == 'class_declaration':
Expand Down Expand Up @@ -68,34 +80,39 @@ def get_entity_types(self) -> list[str]:
def add_symbols(self, entity: Entity) -> None:
if entity.node.type == 'class_declaration':
interfaces_query = self.language.query("(super_interfaces (type_list (type_identifier) @interface))")
interfaces_captures = interfaces_query.captures(entity.node)

interfaces_captures = self._run_query(interfaces_query, entity.node)

if 'interface' in interfaces_captures:
for interface in interfaces_captures['interface']:
entity.add_symbol("implement_interface", interface)
base_class_query = self.language.query("(superclass (type_identifier) @base_class)")
base_class_captures = base_class_query.captures(entity.node)
base_class_captures = self._run_query(base_class_query, entity.node)

if 'base_class' in base_class_captures:
base_class = base_class_captures['base_class'][0]
entity.add_symbol("base_class", base_class)
elif entity.node.type == 'interface_declaration':
query = self.language.query("(extends_interfaces (type_list (type_identifier) @type))?")
extends_captures = query.captures(entity.node)
extends_query = self.language.query("(extends_interfaces (type_list (type_identifier) @type))")
extends_captures = self._run_query(extends_query, entity.node)

if 'type' in extends_captures:
for interface in extends_captures['type']:
entity.add_symbol("extend_interface", interface)
elif entity.node.type in ['method_declaration', 'constructor_declaration']:
query = self.language.query("(method_invocation) @reference.call")
captures = query.captures(entity.node)
call_query = self.language.query("(method_invocation) @reference.call")
captures = self._run_query(call_query, entity.node)

if 'reference.call' in captures:
for caller in captures['reference.call']:
entity.add_symbol("call", caller)
if entity.node.type == 'method_declaration':
query = self.language.query("(formal_parameters (formal_parameter type: (_) @parameter))")
captures = query.captures(entity.node)
param_query = self.language.query("(formal_parameters (formal_parameter type: (_) @parameter))")
captures = self._run_query(param_query, entity.node)

if 'parameter' in captures:
for parameter in captures['parameter']:
entity.add_symbol("parameters", parameter)
entity.add_symbol("return_type", entity.node.child_by_field_name('type'))

def is_dependency(self, file_path: str) -> bool:
return ".jar" in file_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ..analyzer import AbstractAnalyzer

import tree_sitter_python as tspython
from tree_sitter import Language, Node
from tree_sitter import Language, QueryCursor, Node

import logging
logger = logging.getLogger('code_graph')
Expand All @@ -18,30 +18,42 @@ class PythonAnalyzer(AbstractAnalyzer):
def __init__(self) -> None:
super().__init__(Language(tspython.language()))

def _run_query(self, query, node):
cursor = QueryCursor(query)
captures = {}

for _, match_captures in cursor.matches(node):
for name, nodes in match_captures.items():
captures.setdefault(name, []).extend(nodes)

return captures

def add_dependencies(self, path: Path, files: list[Path]):
if Path(f"{path}/venv").is_dir():
return
subprocess.run(["python3", "-m", "venv", "venv"], cwd=str(path))
if Path(f"{path}/pyproject.toml").is_file():
subprocess.run(["pip", "install", "poetry"], cwd=str(path), env={
"VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"})
subprocess.run(["poetry", "install"], cwd=str(path), env={
"VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"})
with open(f"{path}/pyproject.toml", 'r') as file:
pyproject_data = toml.load(file)
try:
for requirement in pyproject_data.get("tool").get("poetry").get("dependencies"):
files.extend(Path(f"{path}/venv/lib").rglob(f"**/site-packages/{requirement}/*.py"))
except Exception as e:
logger.error(f"Error adding dependencies: {e}")
pass
elif Path(f"{path}/requirements.txt").is_file():
subprocess.run(["pip", "install", "-r", "requirements.txt"], cwd=str(path),
env={"VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"})
with open(f"{path}/requirements.txt", 'r') as file:
requirements = [line.strip().split("==") for line in file if line.strip()]
for requirement in requirements:
files.extend(Path(f"{path}/venv/lib/").rglob(f"**/site-packages/{requirement}/*.py"))
## ------- Read only-----
# if Path(f"{path}/venv").is_dir():
# return
# subprocess.run(["python3", "-m", "venv", "venv"], cwd=str(path))
# if Path(f"{path}/pyproject.toml").is_file():
# subprocess.run(["pip", "install", "poetry"], cwd=str(path), env={
# "VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"})
# subprocess.run(["poetry", "install"], cwd=str(path), env={
# "VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"})
# with open(f"{path}/pyproject.toml", 'r') as file:
# pyproject_data = toml.load(file)
# try:
# for requirement in pyproject_data.get("tool").get("poetry").get("dependencies"):
# files.extend(Path(f"{path}/venv/lib").rglob(f"**/site-packages/{requirement}/*.py"))
# except Exception as e:
# logger.error(f"Error adding dependencies: {e}")
# pass
# elif Path(f"{path}/requirements.txt").is_file():
# subprocess.run(["pip", "install", "-r", "requirements.txt"], cwd=str(path),
# env={"VIRTUAL_ENV": f"{path}/venv", "PATH": f"{path}/venv/bin:{os.environ['PATH']}"})
# with open(f"{path}/requirements.txt", 'r') as file:
# requirements = [line.strip().split("==") for line in file if line.strip()]
# for requirement in requirements:
# files.extend(Path(f"{path}/venv/lib/").rglob(f"**/site-packages/{requirement}/*.py"))
return

def get_entity_label(self, node: Node) -> str:
if node.type == 'class_definition':
Expand Down Expand Up @@ -72,18 +84,23 @@ def add_symbols(self, entity: Entity) -> None:
superclasses = entity.node.child_by_field_name("superclasses")
if superclasses:
base_classes_query = self.language.query("(argument_list (_) @base_class)")
base_classes_captures = base_classes_query.captures(superclasses)
if 'base_class' in base_classes_captures:
for base_class in base_classes_captures['base_class']:
captures = self._run_query(base_classes_query, superclasses)

if 'base_class' in captures:
for base_class in captures['base_class']:
entity.add_symbol("base_class", base_class)

elif entity.node.type == 'function_definition':
query = self.language.query("(call) @reference.call")
captures = query.captures(entity.node)
call_query = self.language.query("(call) @reference.call")
captures = self._run_query(call_query, entity.node)

if 'reference.call' in captures:
for caller in captures['reference.call']:
entity.add_symbol("call", caller)
query = self.language.query("(typed_parameter type: (_) @parameter)")
captures = query.captures(entity.node)

param_query = self.language.query("(typed_parameter type: (_) @parameter)")
captures = self._run_query(param_query, entity.node)

if 'parameter' in captures:
for parameter in captures['parameter']:
entity.add_symbol("parameters", parameter)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ def start_server(self):
return nullcontext()

class SourceAnalyzer():
def __init__(self) -> None:
def __init__(self, static_only: bool = True) -> None:
self.files: dict[Path, File] = {}
self.static_only = static_only

def supported_types(self) -> list[str]:
"""
Expand Down Expand Up @@ -82,8 +83,9 @@ def first_pass(self, path: Path, files: list[Path], ignore: list[str], graph: Gr
"""

supoorted_types = self.supported_types()
for ext in set([file.suffix for file in files if file.suffix in supoorted_types]):
analyzers[ext].add_dependencies(path, files)
if not self.static_only:
for ext in set([file.suffix for file in files if file.suffix in supoorted_types]):
analyzers[ext].add_dependencies(path, files)

files_len = len(files)
for i, file_path in enumerate(files):
Expand Down Expand Up @@ -166,9 +168,10 @@ def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
files = list(path.rglob("*.java")) + list(path.rglob("*.py"))
# First pass analysis of the source code
self.first_pass(path, files, ignore, graph)

# Second pass analysis of the source code
self.second_pass(graph, files, path)

if not self.static_only:
# Second pass analysis of the source code
self.second_pass(graph, files, path)

def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
"""
Expand Down
29 changes: 18 additions & 11 deletions backend/app/database/falkor/code-graph-backend/api/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,20 +405,27 @@ def analyze_repo():
logger.debug('Received repo_url: %s', url)

ignore = data.get('ignore', [])

try:
proj = Project.from_git_repository(url)
proj.analyze_sources(ignore)
proj.process_git_history(ignore)

proj = Project.from_git_repository(url)
proj.analyze_sources(ignore)
proj.process_git_history(ignore)

stats = proj.graph.stats()
stats = proj.graph.stats()

response = {
'status': 'success',
'node_count': stats.get('node_count', 0),
'edge_count': stats.get('edge_count', 0)
}
response = {
'status': 'success',
'node_count': stats.get('node_count', 0),
'edge_count': stats.get('edge_count', 0)
}

return jsonify(response), 200
return jsonify(response), 200
except Exception as e:
logger.exception("Repository analysis failed")
return jsonify({
"status": "error",
"error": str(e)
}), 500

@app.route('/switch_commit', methods=['POST'])
@public_access # Apply public access decorator
Expand Down
12 changes: 9 additions & 3 deletions backend/app/services/codegraph/repo_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,14 @@ async def index_repo(self, repo_input: str, discord_id: str) -> Dict[str, Any]:
}
) as response:
if response.status == 200:
data = await response.json() if await response.text() else {}

raw_text= await response.text()
try:
import json
data = json.loads(raw_text) if raw_text else {}
except Exception:
logger.debug("Failed to parse backend JSON")
data= {}

await self.supabase.table("indexed_repositories").update({
"indexing_status": "completed",
"indexed_at": datetime.now().isoformat(),
Expand All @@ -134,7 +140,7 @@ async def index_repo(self, repo_input: str, discord_id: str) -> Dict[str, Any]:
"edges": data.get("edge_count", 0)
}
else:
error_msg = (await response.text())[:500]
error_msg = f"Backend returned status {response.status}"

await self.supabase.table("indexed_repositories").update({
"indexing_status": "failed",
Expand Down