Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions crates/aegis-core/src/ast/imports.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//! Per-file import extraction — Layer 1 fact derivation.
//!
//! Every language adapter declares a tree-sitter `import_query` that
//! captures imported / required module names as `@import`. Multiple
//! downstream consumers (signals, workspace, security) each used to
//! re-run this query independently. This module pulls the work into
//! Layer 1 so the query runs **at most once per file** regardless of
//! how many consumers ask.
//!
//! `ParsedFile::imports()` lazily populates the cache on first read.
//! All cached `Import` values are normalized: leading/trailing quotes
//! and backticks already stripped via `LanguageAdapter::normalize_import`.

use tree_sitter::{Query, QueryCursor};

use crate::ast::parsed_file::ParsedFile;

/// One imported module discovered in a parsed file.
///
/// This is the **textual** module reference as written in source —
/// `"math/rand"` for Go, `"react"` for JS/TS, `".helpers"` for Python
/// relative imports. Alias resolution (`import myrand "math/rand"`
/// → `myrand` is `math/rand`) is intentionally not modelled here yet:
/// it requires language-specific AST shapes that we'll add as needed.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Import {
/// Module path as written in source, with surrounding quotes /
/// backticks stripped by the adapter's `normalize_import`.
pub module: String,
/// 1-indexed line where the import appears.
pub line: usize,
}

/// Run the language adapter's `import_query` over the parse tree and
/// collect every `@import` capture. Caller is responsible for caching;
/// `ParsedFile::imports()` is the supported entry point.
pub fn extract_imports(parsed: &ParsedFile<'_>) -> Vec<Import> {
let adapter = parsed.adapter();
let lang = adapter.tree_sitter_language();
let Ok(query) = Query::new(lang, adapter.import_query()) else {
return Vec::new();
};
let src = parsed.source_bytes();
let mut qc = QueryCursor::new();
let mut out: Vec<Import> = Vec::new();
let mut seen: Vec<(String, usize)> = Vec::new();
for m in qc.matches(&query, parsed.root_node(), src) {
for cap in m.captures {
let Ok(raw) = cap.node.utf8_text(src) else { continue };
let module = adapter.normalize_import(raw);
if module.is_empty() {
continue;
}
let line = cap.node.start_position().row + 1;
let key = (module.clone(), line);
if seen.contains(&key) {
continue;
}
seen.push(key);
out.push(Import { module, line });
}
}
out
}

#[cfg(test)]
mod tests {
use super::*;
use crate::ast::parsed_file::parse;

#[test]
fn python_imports_extracted() {
let src = "import os\nfrom collections import OrderedDict\nimport secrets as s\n";
let pf = parse("foo.py", src).unwrap();
let imports = extract_imports(&pf);
let modules: Vec<&str> = imports.iter().map(|i| i.module.as_str()).collect();
assert!(modules.contains(&"os"), "got {modules:?}");
assert!(modules.contains(&"collections"), "got {modules:?}");
assert!(modules.contains(&"secrets"), "got {modules:?}");
}

#[test]
fn go_imports_strip_quotes() {
let src = "package main\n\nimport (\n \"math/rand\"\n \"crypto/rand\"\n)\n";
let pf = parse("main.go", src).unwrap();
let modules: Vec<String> = extract_imports(&pf)
.into_iter()
.map(|i| i.module)
.collect();
assert!(modules.iter().any(|m| m == "math/rand"), "got {modules:?}");
assert!(modules.iter().any(|m| m == "crypto/rand"), "got {modules:?}");
}

#[test]
fn js_imports_strip_quotes() {
let src = "import express from 'express';\nimport { ok } from \"./util\";\n";
let pf = parse("a.js", src).unwrap();
let modules: Vec<String> = extract_imports(&pf)
.into_iter()
.map(|i| i.module)
.collect();
assert!(modules.iter().any(|m| m == "express"), "got {modules:?}");
assert!(modules.iter().any(|m| m == "./util"), "got {modules:?}");
}

#[test]
fn unsupported_extension_returns_empty() {
// parse() returns None for unknown extensions, so this exercises
// the contract that extract_imports must be called on a
// successfully-parsed file.
assert!(parse("notes.xyz", "anything").is_none());
}

#[test]
fn captured_line_is_one_indexed() {
let src = "x = 1\nimport os\n";
let pf = parse("foo.py", src).unwrap();
let imports = extract_imports(&pf);
let os_import = imports.iter().find(|i| i.module == "os").unwrap();
assert_eq!(os_import.line, 2);
}
}
2 changes: 2 additions & 0 deletions crates/aegis-core/src/ast/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
pub mod adapter;
pub mod imports;
pub mod languages;
pub mod parsed_file;
pub mod registry;

pub use adapter::{default_max_chain_depth, LanguageAdapter};
pub use imports::{extract_imports, Import};
pub use parsed_file::{parse, ParsedFile};
pub use registry::LanguageRegistry;
97 changes: 97 additions & 0 deletions crates/aegis-core/src/ast/parsed_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,25 @@
//! file should be flagged is a *finding* decision left to consumers,
//! not a hard short-circuit baked into the parse layer.

use std::cell::OnceCell;

use crate::ast::adapter::LanguageAdapter;
use crate::ast::imports::{extract_imports, Import};
use crate::ast::registry::LanguageRegistry;

/// Output of a successful parse — tree + source + the adapter that
/// produced it. Cheap to pass by reference; nothing here is cloned.
///
/// Per-file derived facts (imports, …) are stored as lazy
/// `OnceCell` caches: extraction runs at most once per file
/// regardless of how many consumers ask. Single-threaded by design —
/// `ParsedFile` is owned by the caller of `gather_findings` and not
/// shared across threads.
pub struct ParsedFile<'src> {
tree: tree_sitter::Tree,
source: &'src str,
language_name: &'static str,
imports_cache: OnceCell<Vec<Import>>,
}

impl<'src> ParsedFile<'src> {
Expand Down Expand Up @@ -66,6 +76,46 @@ impl<'src> ParsedFile<'src> {
pub fn has_syntax_errors(&self) -> bool {
self.tree.root_node().has_error()
}

/// Imports discovered in this file, lazily extracted on first call.
///
/// The returned slice is the canonical list every consumer should
/// read — consumers used to each run `Query::new(lang,
/// adapter.import_query())` independently; that work is now done
/// at most once per file.
pub fn imports(&self) -> &[Import] {
self.imports_cache.get_or_init(|| extract_imports(self))
}

/// Best-effort receiver-to-import lookup: given a call receiver
/// like `rand` or `myrand`, return the matching `Import` if one
/// of the file's imports plausibly produced that name.
///
/// Today this is a coarse match (last-segment of module path, or
/// the bare module name for Python-style imports). Alias-aware
/// resolution (`import myrand "math/rand"` → `myrand`) requires
/// language-specific AST shapes; this function will be tightened
/// per-language as the security layer demands.
pub fn resolve_receiver(&self, receiver: &str) -> Option<&Import> {
if receiver.is_empty() {
return None;
}
for imp in self.imports() {
if imp.module == receiver {
return Some(imp);
}
// Last path segment: "math/rand" → "rand", "react-dom" → "react-dom".
let last = imp
.module
.rsplit(|c| c == '/' || c == '.')
.next()
.unwrap_or(imp.module.as_str());
if last == receiver {
return Some(imp);
}
}
None
}
}

/// Parse `source` as the language inferred from `path`'s extension.
Expand All @@ -89,6 +139,7 @@ pub fn parse<'src>(path: &str, source: &'src str) -> Option<ParsedFile<'src>> {
tree,
source,
language_name: adapter.name(),
imports_cache: OnceCell::new(),
})
}

Expand Down Expand Up @@ -133,4 +184,50 @@ mod tests {
// Sanity: TS program root has named children.
assert!(root.named_child_count() > 0);
}

#[test]
fn imports_cache_is_populated_on_first_call() {
let pf = parse("foo.py", "import os\nimport secrets\n").unwrap();
let imports = pf.imports();
let modules: Vec<&str> = imports.iter().map(|i| i.module.as_str()).collect();
assert!(modules.contains(&"os"));
assert!(modules.contains(&"secrets"));
}

#[test]
fn imports_cache_is_stable_across_calls() {
// Second call must reuse the same Vec — confirms the OnceCell
// is doing its job rather than re-extracting.
let pf = parse("foo.py", "import os\n").unwrap();
let first = pf.imports().as_ptr();
let second = pf.imports().as_ptr();
assert_eq!(first, second, "cache should return identical slice");
}

#[test]
fn resolve_receiver_matches_python_module_name() {
let pf = parse("foo.py", "import secrets\nimport os\n").unwrap();
assert!(pf.resolve_receiver("secrets").is_some());
assert!(pf.resolve_receiver("os").is_some());
assert!(pf.resolve_receiver("random").is_none(),
"random was not imported — receiver lookup must miss");
}

#[test]
fn resolve_receiver_picks_last_segment_for_paths() {
// Go-style `import "math/rand"`. A receiver call `rand.Intn(...)`
// should map back to the math/rand import via last-segment.
let pf = parse(
"main.go",
"package main\n\nimport \"math/rand\"\n\nfunc f() { rand.Intn(10) }\n",
).unwrap();
let resolved = pf.resolve_receiver("rand").expect("rand resolves");
assert_eq!(resolved.module, "math/rand");
}

#[test]
fn resolve_receiver_empty_string_misses() {
let pf = parse("foo.py", "import os\n").unwrap();
assert!(pf.resolve_receiver("").is_none());
}
}
Loading
Loading