From fb98d78a75692e3887cf87344f4387a3492838c6 Mon Sep 17 00:00:00 2001 From: wei9072 Date: Thu, 7 May 2026 08:40:14 +0000 Subject: [PATCH] feat(security): SEC004 + SEC007 multi-language dispatch (PR #18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the audit gaps identified after PRs #10 / #12. The audit across SEC003-008: | Rule | Coverage before | Action this PR | |---|---|---| | SEC003 TLS off | text-level Python/Node/Go/.NET | unchanged (decent already) | | SEC004 shell | Python `shell=True`+interp only | **language-aware dispatch** | | SEC005 SQL concat | text+string-literal Python/Java | unchanged (decent) | | SEC006 CORS | text-level cross-language | unchanged | | SEC007 JWT | `name.contains("jwt")` Python only | **language-aware dispatch** | | SEC008 deser | Python/Node/Java idioms | unchanged (decent) | ## SEC004 expansion Per-language shell-running idioms; requires interpolation in arg. - **Python**: subprocess.run/Popen with `shell=True` + interp - **Node.js**: `child_process.exec` / `execSync` with interp (always shells out, no `shell:true` gate; `execFile` is the safe one) - **PHP**: global `shell_exec` / `exec` / `passthru` / `system` / `proc_open` with interp - **Java**: `Runtime.getRuntime().exec(String)` overload with concat — String[] overload safe and excluded - **Go**: `exec.Command("sh"|"bash"|"/bin/sh"|"/bin/bash", "-c", ...)` with interp. Bare `exec.Command("ls", arg)` (argv-style) excluded — no shell metachar interpretation `text_has_interp` extended with PHP `.` concat (gated on `$` to avoid floating-point literals). ## SEC007 expansion Per-language JWT decode without verification: - **Python**: `jwt.decode(...)` without algorithms/key/verify kwarg (existing behaviour) - **Node.js**: `jsonwebtoken.decode()` always returns unverified claims — flag unconditionally; `verify(token, secret, opts)` is the safe API. `verify()` with `verify: false` opt also flagged. - **Java / Kotlin**: Auth0 lib's `JWT.decode(token)` returns unverified DecodedJWT; safe path is `JWT.require(...).build().verify(token)`. - **PHP**: firebase/php-jwt's `JWT::decode($token, $key)` requires explicit algorithm list. Flagged unless one of `'HS256'`/`'RS256'`/ `'ES256'`/`'EdDSA'` appears in call text. Algorithm-`none` detection extended with JWT-spec literal `"alg": "none"` shape. `check_jwt_unsafe` now takes `&ParsedFile` so language identity is available — prevents PHP `JWT::decode` from being misclassified as Java (the old `name.contains("JWT")` check was language-blind). ## Infrastructure changes 1. **`call_name` extended for PHP scoped/member calls.** Previously only handled Java's `method_invocation`; now also composes `Class::method` from `scoped_call_expression` and `$obj->method` from `member_call_expression`. 2. **`leaf_method_name(name)` helper** — splits on `.` / `::` / `->` so `JWT::decode`'s leaf is `decode`, not the whole string. 3. **walk dispatch** extended with `scoped_call_expression` and `member_call_expression` node kinds. ## Tests 10 new (5 SEC004 multi-lang + 5 SEC007 multi-lang). 174 → **177** total tests passing. Co-Authored-By: Claude Opus 4.7 --- crates/aegis-core/src/security.rs | 354 ++++++++++++++++++++++++++++-- 1 file changed, 333 insertions(+), 21 deletions(-) diff --git a/crates/aegis-core/src/security.rs b/crates/aegis-core/src/security.rs index 87eca44..f0b728b 100644 --- a/crates/aegis-core/src/security.rs +++ b/crates/aegis-core/src/security.rs @@ -66,12 +66,13 @@ fn walk(parsed: &ParsedFile<'_>, node: Node, out: &mut Vec) { kind, "call" | "call_expression" | "invocation_expression" | "method_invocation" | "function_call_expression" + | "scoped_call_expression" | "member_call_expression" ) { if let Some(name) = call_name(node, src) { check_eval(&name, node, src, out); check_tls_off(&name, node, src, out); check_shell_concat(&name, node, src, out); - check_jwt_unsafe(&name, node, src, out); + check_jwt_unsafe(&name, parsed, node, out); check_insecure_deserialization(&name, node, src, out); check_sql_concat(&name, node, src, out); check_weak_crypto(&name, parsed, node, out); @@ -124,6 +125,29 @@ fn call_name(node: Node, src: &[u8]) -> Option { } return Some(name_text.to_string()); } + // PHP's `scoped_call_expression` for `Class::method(...)`: scope + // (the class name) + name (the method) live in separate fields. + // Compose with `::` to match how PHP source spells it. + if node.kind() == "scoped_call_expression" { + let name_node = node.child_by_field_name("name")?; + let name_text = name_node.utf8_text(src).ok()?; + if let Some(scope) = node.child_by_field_name("scope") { + let scope_text = scope.utf8_text(src).ok()?; + return Some(format!("{scope_text}::{name_text}")); + } + return Some(name_text.to_string()); + } + // PHP's `member_call_expression` for `$obj->method(...)`. Same + // shape as Java's method_invocation but with `->` separator. + if node.kind() == "member_call_expression" { + let name_node = node.child_by_field_name("name")?; + let name_text = name_node.utf8_text(src).ok()?; + if let Some(obj) = node.child_by_field_name("object") { + let obj_text = obj.utf8_text(src).ok()?; + return Some(format!("{obj_text}->{name_text}")); + } + return Some(name_text.to_string()); + } let func = node .child_by_field_name("function") .or_else(|| node.named_child(0))?; @@ -337,20 +361,96 @@ fn normalize_kv(s: &str) -> String { } // ─── Rule SEC004: shell command with interpolation ─────────────── +// +// PR #18 audit: previously this rule required `shell=True` literal, +// which is a Python-specific gating. Other languages have shell +// invocation without that flag (Node.js child_process.exec, Java +// Runtime.exec(String), PHP shell_exec, Go exec.Command("sh", ...)). +// Now matches the receiver+method per language and asks for +// interpolation in the first arg. fn check_shell_concat(name: &str, node: Node, src: &[u8], out: &mut Vec) { + let Ok(text) = node.utf8_text(src) else { return }; let last = name.rsplit('.').next().unwrap_or(name); - if !matches!(last, "system" | "popen" | "call" | "run" | "Popen" | "exec" | "execSync" | "spawn") { - return; + + // Python: shell=True + interpolation. The shell=True kwarg is a + // strong signal — without it `subprocess.run([...])` is safe. + let python_shell_call = matches!( + last, + "system" | "popen" | "call" | "run" | "Popen" + ); + if python_shell_call { + let has_shell_true = text.contains("shell=True") || text.contains("shell: true"); + if has_shell_true && text_has_interp(text) { + push(out, node, "SEC004", "block", + "shell command with `shell=True` and string interpolation — command-injection risk".into()); + return; + } } - let Ok(text) = node.utf8_text(src) else { return }; - let has_shell_true = text.contains("shell=True") || text.contains("shell: true"); - let has_interp = text.contains("${") - || text.contains("f\"") || text.contains("f'") - || (text.contains("+") && text.contains("\"")); - if has_shell_true && has_interp { - push(out, node, "SEC004", "block", - "shell command with `shell=True` and string interpolation — command-injection risk".into()); + + // Node.js: child_process.exec / execSync always run via shell. + // Any interp in the first arg is a problem. + if matches!(last, "exec" | "execSync") && name.contains("child_process") { + if text_has_interp(text) { + push(out, node, "SEC004", "block", + "child_process.exec with string interpolation — command-injection risk; use execFile or pass an array".into()); + return; + } } + + // PHP: global functions that always run via shell. + if matches!(name, "shell_exec" | "exec" | "passthru" | "system" | "proc_open") { + if text_has_interp(text) { + push(out, node, "SEC004", "block", + "PHP shell-running function with string interpolation — command-injection risk; escape via escapeshellarg() or use safer APIs".into()); + return; + } + } + + // Java: Runtime.getRuntime().exec(String) — single-string overload + // splits on whitespace via StringTokenizer (no shell, but still + // dangerous when concatenated). Only flag the String overload, not + // exec(String[]). + if last == "exec" && name.contains("Runtime") { + if text_has_interp(text) && !text.contains("new String[") { + push(out, node, "SEC004", "block", + "Runtime.exec(String) with concatenation — pass a String[] array of args instead".into()); + return; + } + } + + // Go: exec.Command with `sh -c` is the dangerous shape; bare + // exec.Command("ls", arg) is fine because it doesn't shell-out. + if last == "Command" && name.ends_with("exec.Command") { + if (text.contains("\"sh\"") || text.contains("\"bash\"") + || text.contains("\"/bin/sh\"") || text.contains("\"/bin/bash\"")) + && text.contains("\"-c\"") + && text_has_interp(text) + { + push(out, node, "SEC004", "block", + "exec.Command(\"sh\", \"-c\", ...) with interpolation — command-injection risk; use exec.Command with separate args".into()); + } + } +} + +/// Last segment of a (possibly polyglot-shaped) call name, splitting +/// on `.` (Python / JS / Java composed by call_name) **and** `::` +/// (PHP scoped) **and** `->` (PHP member). +fn leaf_method_name(name: &str) -> &str { + let after_dot = name.rsplit('.').next().unwrap_or(name); + let after_arrow = after_dot.rsplit("->").next().unwrap_or(after_dot); + after_arrow.rsplit("::").next().unwrap_or(after_arrow) +} + +fn text_has_interp(text: &str) -> bool { + text.contains("${") + || text.contains("f\"") + || text.contains("f'") + || text.contains(".format(") + || (text.contains(" + ") && text.contains("\"")) + // PHP uses `.` for string concat. Only count it when the + // text also has a `$` (a PHP variable) to avoid matching + // floating-point literals like `1.5`. + || (text.contains(" . ") && text.contains('$')) } // ─── Rule SEC005: SQL string concat ────────────────────────────── @@ -404,11 +504,58 @@ fn contains_sql_in_string_literal(node: Node, src: &[u8]) -> bool { } // ─── Rule SEC007: JWT decoded without verification ─────────────── -fn check_jwt_unsafe(name: &str, node: Node, src: &[u8], out: &mut Vec) { - let last = name.rsplit('.').next().unwrap_or(name); +// +// PR #18 audit: previously gated on `name.contains("jwt"|"JWT")` +// which caught Python's `import jwt; jwt.decode(...)` but missed +// Node `jsonwebtoken.decode(...)`, Java `JWT.decode()`, PHP +// `JWT::decode`. Now matches the JWT-shaped call APIs across +// languages. +fn check_jwt_unsafe( + name: &str, + parsed: &ParsedFile<'_>, + node: Node, + out: &mut Vec, +) { + let last = leaf_method_name(name); + let src = parsed.source_bytes(); let Ok(text) = node.utf8_text(src) else { return }; - if last == "decode" && (name.contains("jwt") || name.contains("JWT")) { - if text.contains("verify=False") || text.contains("\"verify\": false") || text.contains("'verify': false") { + let lang = parsed.language_name(); + + // Algorithm "none" is unambiguous across languages and library + // styles. Catch first (independent of language dispatch). + if text.contains("algorithms=['none']") + || text.contains("algorithms=[\"none\"]") + || text.contains("algorithms: ['none']") + || text.contains("algorithms: [\"none\"]") + || text.contains("algorithms:['none']") + || text.contains("\"alg\": \"none\"") + || text.contains("'alg': 'none'") + { + push(out, node, "SEC007", "block", + "JWT `algorithms` list contains 'none' — disables signature verification".into()); + return; + } + + let is_python_jwt = lang == "python" + && last == "decode" + && (name.contains("jwt") || name.contains("JWT")); + let is_node_jwt = matches!(lang, "javascript" | "typescript") + && matches!(last, "decode" | "verify") + && (name.contains("jsonwebtoken") || name == "jwt.decode" || name == "jwt.verify"); + // Java: JWT.decode(token) — Auth0 lib's "decode without verify". + let is_java_jwt_decode = matches!(lang, "java" | "kotlin") + && last == "decode" + && (name.contains("JWT") || name.contains("Jwts")); + // PHP: firebase/php-jwt's JWT::decode($token, $key, $algos). + let is_php_jwt = lang == "php" + && last == "decode" + && (name == "JWT::decode" || name.ends_with("\\JWT::decode")); + + if is_python_jwt { + if text.contains("verify=False") + || text.contains("\"verify\": false") + || text.contains("'verify': false") + { push(out, node, "SEC007", "block", "JWT decoded with `verify=False` — accepts forged tokens".into()); return; @@ -419,13 +566,47 @@ fn check_jwt_unsafe(name: &str, node: Node, src: &[u8], out: &mut Vec