diff --git a/crates/aegis-core/src/security.rs b/crates/aegis-core/src/security.rs index 87eca44..f0b728b 100644 --- a/crates/aegis-core/src/security.rs +++ b/crates/aegis-core/src/security.rs @@ -66,12 +66,13 @@ fn walk(parsed: &ParsedFile<'_>, node: Node, out: &mut Vec) { kind, "call" | "call_expression" | "invocation_expression" | "method_invocation" | "function_call_expression" + | "scoped_call_expression" | "member_call_expression" ) { if let Some(name) = call_name(node, src) { check_eval(&name, node, src, out); check_tls_off(&name, node, src, out); check_shell_concat(&name, node, src, out); - check_jwt_unsafe(&name, node, src, out); + check_jwt_unsafe(&name, parsed, node, out); check_insecure_deserialization(&name, node, src, out); check_sql_concat(&name, node, src, out); check_weak_crypto(&name, parsed, node, out); @@ -124,6 +125,29 @@ fn call_name(node: Node, src: &[u8]) -> Option { } return Some(name_text.to_string()); } + // PHP's `scoped_call_expression` for `Class::method(...)`: scope + // (the class name) + name (the method) live in separate fields. + // Compose with `::` to match how PHP source spells it. + if node.kind() == "scoped_call_expression" { + let name_node = node.child_by_field_name("name")?; + let name_text = name_node.utf8_text(src).ok()?; + if let Some(scope) = node.child_by_field_name("scope") { + let scope_text = scope.utf8_text(src).ok()?; + return Some(format!("{scope_text}::{name_text}")); + } + return Some(name_text.to_string()); + } + // PHP's `member_call_expression` for `$obj->method(...)`. Same + // shape as Java's method_invocation but with `->` separator. + if node.kind() == "member_call_expression" { + let name_node = node.child_by_field_name("name")?; + let name_text = name_node.utf8_text(src).ok()?; + if let Some(obj) = node.child_by_field_name("object") { + let obj_text = obj.utf8_text(src).ok()?; + return Some(format!("{obj_text}->{name_text}")); + } + return Some(name_text.to_string()); + } let func = node .child_by_field_name("function") .or_else(|| node.named_child(0))?; @@ -337,20 +361,96 @@ fn normalize_kv(s: &str) -> String { } // ─── Rule SEC004: shell command with interpolation ─────────────── +// +// PR #18 audit: previously this rule required `shell=True` literal, +// which is a Python-specific gating. Other languages have shell +// invocation without that flag (Node.js child_process.exec, Java +// Runtime.exec(String), PHP shell_exec, Go exec.Command("sh", ...)). +// Now matches the receiver+method per language and asks for +// interpolation in the first arg. fn check_shell_concat(name: &str, node: Node, src: &[u8], out: &mut Vec) { + let Ok(text) = node.utf8_text(src) else { return }; let last = name.rsplit('.').next().unwrap_or(name); - if !matches!(last, "system" | "popen" | "call" | "run" | "Popen" | "exec" | "execSync" | "spawn") { - return; + + // Python: shell=True + interpolation. The shell=True kwarg is a + // strong signal — without it `subprocess.run([...])` is safe. + let python_shell_call = matches!( + last, + "system" | "popen" | "call" | "run" | "Popen" + ); + if python_shell_call { + let has_shell_true = text.contains("shell=True") || text.contains("shell: true"); + if has_shell_true && text_has_interp(text) { + push(out, node, "SEC004", "block", + "shell command with `shell=True` and string interpolation — command-injection risk".into()); + return; + } } - let Ok(text) = node.utf8_text(src) else { return }; - let has_shell_true = text.contains("shell=True") || text.contains("shell: true"); - let has_interp = text.contains("${") - || text.contains("f\"") || text.contains("f'") - || (text.contains("+") && text.contains("\"")); - if has_shell_true && has_interp { - push(out, node, "SEC004", "block", - "shell command with `shell=True` and string interpolation — command-injection risk".into()); + + // Node.js: child_process.exec / execSync always run via shell. + // Any interp in the first arg is a problem. + if matches!(last, "exec" | "execSync") && name.contains("child_process") { + if text_has_interp(text) { + push(out, node, "SEC004", "block", + "child_process.exec with string interpolation — command-injection risk; use execFile or pass an array".into()); + return; + } } + + // PHP: global functions that always run via shell. + if matches!(name, "shell_exec" | "exec" | "passthru" | "system" | "proc_open") { + if text_has_interp(text) { + push(out, node, "SEC004", "block", + "PHP shell-running function with string interpolation — command-injection risk; escape via escapeshellarg() or use safer APIs".into()); + return; + } + } + + // Java: Runtime.getRuntime().exec(String) — single-string overload + // splits on whitespace via StringTokenizer (no shell, but still + // dangerous when concatenated). Only flag the String overload, not + // exec(String[]). + if last == "exec" && name.contains("Runtime") { + if text_has_interp(text) && !text.contains("new String[") { + push(out, node, "SEC004", "block", + "Runtime.exec(String) with concatenation — pass a String[] array of args instead".into()); + return; + } + } + + // Go: exec.Command with `sh -c` is the dangerous shape; bare + // exec.Command("ls", arg) is fine because it doesn't shell-out. + if last == "Command" && name.ends_with("exec.Command") { + if (text.contains("\"sh\"") || text.contains("\"bash\"") + || text.contains("\"/bin/sh\"") || text.contains("\"/bin/bash\"")) + && text.contains("\"-c\"") + && text_has_interp(text) + { + push(out, node, "SEC004", "block", + "exec.Command(\"sh\", \"-c\", ...) with interpolation — command-injection risk; use exec.Command with separate args".into()); + } + } +} + +/// Last segment of a (possibly polyglot-shaped) call name, splitting +/// on `.` (Python / JS / Java composed by call_name) **and** `::` +/// (PHP scoped) **and** `->` (PHP member). +fn leaf_method_name(name: &str) -> &str { + let after_dot = name.rsplit('.').next().unwrap_or(name); + let after_arrow = after_dot.rsplit("->").next().unwrap_or(after_dot); + after_arrow.rsplit("::").next().unwrap_or(after_arrow) +} + +fn text_has_interp(text: &str) -> bool { + text.contains("${") + || text.contains("f\"") + || text.contains("f'") + || text.contains(".format(") + || (text.contains(" + ") && text.contains("\"")) + // PHP uses `.` for string concat. Only count it when the + // text also has a `$` (a PHP variable) to avoid matching + // floating-point literals like `1.5`. + || (text.contains(" . ") && text.contains('$')) } // ─── Rule SEC005: SQL string concat ────────────────────────────── @@ -404,11 +504,58 @@ fn contains_sql_in_string_literal(node: Node, src: &[u8]) -> bool { } // ─── Rule SEC007: JWT decoded without verification ─────────────── -fn check_jwt_unsafe(name: &str, node: Node, src: &[u8], out: &mut Vec) { - let last = name.rsplit('.').next().unwrap_or(name); +// +// PR #18 audit: previously gated on `name.contains("jwt"|"JWT")` +// which caught Python's `import jwt; jwt.decode(...)` but missed +// Node `jsonwebtoken.decode(...)`, Java `JWT.decode()`, PHP +// `JWT::decode`. Now matches the JWT-shaped call APIs across +// languages. +fn check_jwt_unsafe( + name: &str, + parsed: &ParsedFile<'_>, + node: Node, + out: &mut Vec, +) { + let last = leaf_method_name(name); + let src = parsed.source_bytes(); let Ok(text) = node.utf8_text(src) else { return }; - if last == "decode" && (name.contains("jwt") || name.contains("JWT")) { - if text.contains("verify=False") || text.contains("\"verify\": false") || text.contains("'verify': false") { + let lang = parsed.language_name(); + + // Algorithm "none" is unambiguous across languages and library + // styles. Catch first (independent of language dispatch). + if text.contains("algorithms=['none']") + || text.contains("algorithms=[\"none\"]") + || text.contains("algorithms: ['none']") + || text.contains("algorithms: [\"none\"]") + || text.contains("algorithms:['none']") + || text.contains("\"alg\": \"none\"") + || text.contains("'alg': 'none'") + { + push(out, node, "SEC007", "block", + "JWT `algorithms` list contains 'none' — disables signature verification".into()); + return; + } + + let is_python_jwt = lang == "python" + && last == "decode" + && (name.contains("jwt") || name.contains("JWT")); + let is_node_jwt = matches!(lang, "javascript" | "typescript") + && matches!(last, "decode" | "verify") + && (name.contains("jsonwebtoken") || name == "jwt.decode" || name == "jwt.verify"); + // Java: JWT.decode(token) — Auth0 lib's "decode without verify". + let is_java_jwt_decode = matches!(lang, "java" | "kotlin") + && last == "decode" + && (name.contains("JWT") || name.contains("Jwts")); + // PHP: firebase/php-jwt's JWT::decode($token, $key, $algos). + let is_php_jwt = lang == "php" + && last == "decode" + && (name == "JWT::decode" || name.ends_with("\\JWT::decode")); + + if is_python_jwt { + if text.contains("verify=False") + || text.contains("\"verify\": false") + || text.contains("'verify': false") + { push(out, node, "SEC007", "block", "JWT decoded with `verify=False` — accepts forged tokens".into()); return; @@ -419,13 +566,47 @@ fn check_jwt_unsafe(name: &str, node: Node, src: &[u8], out: &mut Vec