diff --git a/crates/aegis-core/src/security.rs b/crates/aegis-core/src/security.rs index 839406f..87eca44 100644 --- a/crates/aegis-core/src/security.rs +++ b/crates/aegis-core/src/security.rs @@ -74,7 +74,7 @@ fn walk(parsed: &ParsedFile<'_>, node: Node, out: &mut Vec) { check_jwt_unsafe(&name, node, src, out); check_insecure_deserialization(&name, node, src, out); check_sql_concat(&name, node, src, out); - check_weak_crypto(&name, node, src, out); + check_weak_crypto(&name, parsed, node, out); check_weak_random_for_token(&name, parsed, node, out); check_ssrf_user_input_url(&name, node, src, out); } @@ -458,21 +458,33 @@ fn check_insecure_deserialization(name: &str, node: Node, src: &[u8], out: &mut } // ─── Rule SEC009: weak crypto for security context ─────────────── -fn check_weak_crypto(name: &str, node: Node, src: &[u8], out: &mut Vec) { - let last = name.rsplit('.').next().unwrap_or(name); - // Common hash entry points across languages. - let weak = matches!( - last, - "md5" | "sha1" | "MD5" | "SHA1" | "createHash" - ) || name.ends_with(".md5") - || name.ends_with(".sha1") - || name.ends_with("hashlib.md5") - || name.ends_with("hashlib.sha1"); - if !weak { +// +// Language-aware dispatch. Each language's matcher hard-codes the +// weak-hash call shape — sometimes in the receiver path (Python +// `hashlib.md5`, Go `md5.Sum`), sometimes in a string argument +// (Java `MessageDigest.getInstance("MD5")`, Node +// `createHash('md5')`, PHP `hash('md5', ...)`), sometimes in the +// class name (C# `MD5.Create()`). Round 9 surfaced the Go and Java +// cases as previously-uncovered gaps in this rule. +fn check_weak_crypto( + name: &str, + parsed: &ParsedFile<'_>, + node: Node, + out: &mut Vec, +) { + let src = parsed.source_bytes(); + let is_weak = match parsed.language_name() { + "python" => is_python_weak_hash(name), + "javascript" | "typescript" => is_js_weak_hash(name, node, src), + "go" => is_go_weak_hash(name, parsed), + "java" | "kotlin" => is_java_weak_hash(name, node, src), + "csharp" => is_csharp_weak_hash(name), + "php" => is_php_weak_hash(name, node, src), + _ => false, + }; + if !is_weak { return; } - // Look at the *enclosing assignment* for a security-context - // identifier. Walk up a few parents. if let Some(ctx) = enclosing_security_context(node, src) { push( out, @@ -487,27 +499,142 @@ fn check_weak_crypto(name: &str, node: Node, src: &[u8], out: &mut Vec bool { + // hashlib.md5 / hashlib.sha1 (direct module call). `hashlib.new(name)` + // with a string algo arg is rarer; covered conservatively. + let last = name.rsplit('.').next().unwrap_or(name); + matches!(last, "md5" | "sha1") + && (name == "md5" || name == "sha1" || name.starts_with("hashlib.")) +} + +fn is_js_weak_hash(name: &str, node: Node, src: &[u8]) -> bool { + // Node.js: `crypto.createHash('md5'|'sha1')`. The function name + // is `createHash`; the algorithm lives in the first string arg. + let last = name.rsplit('.').next().unwrap_or(name); + if last == "createHash" { + return first_arg_is_weak_alg_string(node, src); + } + false +} + +fn is_go_weak_hash(name: &str, parsed: &ParsedFile<'_>) -> bool { + // `md5.Sum`/`md5.New` / `sha1.Sum`/`sha1.New` — when imported + // `crypto/md5` or `crypto/sha1`. Layer 1 import resolution + // confirms the receiver came from the right package. + let receiver = name.split('.').next().unwrap_or(""); + let last = name.rsplit('.').next().unwrap_or(name); + if !matches!(receiver, "md5" | "sha1") { + return false; + } + if !matches!(last, "New" | "Sum") { + return false; + } + match parsed.resolve_receiver(receiver) { + Some(imp) => imp.module == "crypto/md5" || imp.module == "crypto/sha1", + None => true, + } +} + +fn is_java_weak_hash(name: &str, node: Node, src: &[u8]) -> bool { + // MessageDigest.getInstance("MD5"|"SHA-1"|"SHA1") + if name == "MessageDigest.getInstance" || name.ends_with(".MessageDigest.getInstance") { + return first_arg_is_weak_alg_string(node, src); + } + // Apache Commons Codec: DigestUtils.md5Hex, DigestUtils.sha1Hex, + // and the unsuffixed md5 / sha1 helpers. + let last = name.rsplit('.').next().unwrap_or(name); + if matches!(last, "md5Hex" | "sha1Hex" | "md5" | "sha1") + && name.contains("DigestUtils") + { + return true; + } + false +} + +fn is_csharp_weak_hash(name: &str) -> bool { + // MD5.Create(), SHA1.Create(), MD5CryptoServiceProvider. + // Receiver-anchored to avoid catching `SomeMD5Field` / `SHA1024`. + name.starts_with("MD5.") + || name.starts_with("SHA1.") + || name.contains(".MD5.") + || name.contains(".SHA1.") + || name.contains("MD5CryptoServiceProvider") + || name.contains("SHA1CryptoServiceProvider") + || name.contains("MD5Managed") + || name.contains("SHA1Managed") +} + +fn is_php_weak_hash(name: &str, node: Node, src: &[u8]) -> bool { + if matches!(name, "md5" | "sha1") { + return true; + } + if name == "hash" { + return first_arg_is_weak_alg_string(node, src); + } + false +} + +fn first_arg_is_weak_alg_string(node: Node, src: &[u8]) -> bool { + let args = node.child_by_field_name("arguments").or_else(|| { + let mut cursor = node.walk(); + for ch in node.children(&mut cursor) { + if matches!(ch.kind(), "arguments" | "argument_list") { + return Some(ch); + } + } + None + }); + let Some(args) = args else { return false }; + let mut cursor = args.walk(); + let Some(first) = args.named_children(&mut cursor).next() else { return false }; + if !matches!( + first.kind(), + "string" | "string_literal" | "interpreted_string_literal" + ) { + return false; + } + let Ok(text) = first.utf8_text(src) else { return false }; + let stripped = text.trim_matches(|c: char| matches!(c, '"' | '\'' | '`')); + let lower = stripped.to_ascii_lowercase(); + matches!(lower.as_str(), "md5" | "sha1" | "sha-1") +} + fn enclosing_security_context(node: Node, src: &[u8]) -> Option<&'static str> { - let mut cur = node.parent(); - // Deliberately conservative: only identifiers that strongly - // imply a security context. Avoid generic words like "digest" - // (matches hashlib's `.hexdigest()` method on an etag), or - // "key" (matches dictionary keys in unrelated contexts). + // Deliberately conservative needle set: only identifiers that + // strongly imply a hash-bearing security context. Avoids generic + // words like "digest" (matches hashlib's `.hexdigest()` method + // on an etag) or "key" (dictionary keys in unrelated contexts). let names: &[(&str, &str)] = &[ ("password", "password"), ("passwd", "password"), + ("passphrase", "password"), ("signature", "signature"), ("hmac", "hmac"), ("token", "token"), ("secret", "secret"), ]; - for _ in 0..6 { + let mut cur = node.parent(); + let mut saw_assignment = false; + for _ in 0..10 { let Some(n) = cur else { break }; if matches!( n.kind(), + // JS / TS / Python idioms "assignment" | "assignment_expression" | "variable_declarator" - | "lexical_declaration" | "let_declaration" + | "augmented_assignment_expression" + | "lexical_declaration" | "return_statement" + // Go + | "var_declaration" | "var_spec" | "short_var_declaration" + | "assignment_statement" | "const_declaration" | "const_spec" + // Java / Kotlin / C# + | "local_variable_declaration" | "field_declaration" + | "property_declaration" + // Rust + | "let_declaration" ) { + saw_assignment = true; if let Ok(text) = n.utf8_text(src) { let lower = text.to_ascii_lowercase(); for (needle, label) in names { @@ -517,6 +644,30 @@ fn enclosing_security_context(node: Node, src: &[u8]) -> Option<&'static str> { } } } + let is_function_shape = matches!( + n.kind(), + "function_definition" | "function_declaration" + | "method_definition" | "method_declaration" + | "function_expression" | "lambda_expression" | "function_item" + | "constructor_declaration" + ); + if saw_assignment && is_function_shape { + // Function name carries the hash-context signal when the + // local assignment used a generic variable. Round 9 + // production case: `h := md5.Sum(...)` inside `func + // HashPassword(...)`. + if let Some(name_node) = n.child_by_field_name("name") { + if let Ok(name_text) = name_node.utf8_text(src) { + let lower = name_text.to_ascii_lowercase(); + for (needle, label) in names { + if lower.contains(needle) { + return Some(label); + } + } + } + } + break; + } cur = n.parent(); } None @@ -1322,6 +1473,127 @@ mod tests { assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}"); } + // ─── SEC009 multi-language dispatch (PR #12) ───────────────── + #[test] + fn sec009_node_create_hash_md5_for_password_blocks() { + let v = check( + ".js", + "const crypto = require('crypto');\n\ + function hashPassword(pw) {\n \ + const h = crypto.createHash('md5');\n \ + h.update(pw);\n \ + return h.digest('hex');\n\ + }\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}"); + } + + #[test] + fn sec009_node_create_hash_sha256_does_not_block() { + let v = check( + ".js", + "const crypto = require('crypto');\n\ + function hashPassword(pw) {\n \ + const h = crypto.createHash('sha256');\n \ + h.update(pw);\n \ + return h.digest('hex');\n\ + }\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}"); + } + + #[test] + fn sec009_go_md5_sum_for_password_blocks() { + // Round 9 case. Layer 1 import resolution sees `crypto/md5`. + let v = check( + ".go", + "package main\n\n\ + import (\n \ + \"crypto/md5\"\n \ + \"encoding/hex\"\n\ + )\n\n\ + func HashPassword(password string) string {\n \ + h := md5.Sum([]byte(password))\n \ + return hex.EncodeToString(h[:])\n\ + }\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}"); + } + + #[test] + fn sec009_java_message_digest_md5_for_password_blocks() { + // Round 9 case. Algo lives in the string arg. + let v = check( + ".java", + "import java.security.MessageDigest;\n\n\ + public class Auth {\n \ + public static String hashPassword(String password) throws Exception {\n \ + MessageDigest md = MessageDigest.getInstance(\"MD5\");\n \ + byte[] digest = md.digest(password.getBytes());\n \ + return digest.toString();\n \ + }\n\ + }\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}"); + } + + #[test] + fn sec009_java_sha256_does_not_block() { + let v = check( + ".java", + "import java.security.MessageDigest;\n\n\ + public class Auth {\n \ + public static String hashPassword(String password) throws Exception {\n \ + MessageDigest md = MessageDigest.getInstance(\"SHA-256\");\n \ + return md.digest(password.getBytes()).toString();\n \ + }\n\ + }\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}"); + } + + #[test] + fn sec009_php_md5_for_password_blocks() { + let v = check( + ".php", + "