Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
314 changes: 293 additions & 21 deletions crates/aegis-core/src/security.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn walk(parsed: &ParsedFile<'_>, node: Node, out: &mut Vec<SecurityViolation>) {
check_jwt_unsafe(&name, node, src, out);
check_insecure_deserialization(&name, node, src, out);
check_sql_concat(&name, node, src, out);
check_weak_crypto(&name, node, src, out);
check_weak_crypto(&name, parsed, node, out);
check_weak_random_for_token(&name, parsed, node, out);
check_ssrf_user_input_url(&name, node, src, out);
}
Expand Down Expand Up @@ -458,21 +458,33 @@ fn check_insecure_deserialization(name: &str, node: Node, src: &[u8], out: &mut
}

// ─── Rule SEC009: weak crypto for security context ───────────────
fn check_weak_crypto(name: &str, node: Node, src: &[u8], out: &mut Vec<SecurityViolation>) {
let last = name.rsplit('.').next().unwrap_or(name);
// Common hash entry points across languages.
let weak = matches!(
last,
"md5" | "sha1" | "MD5" | "SHA1" | "createHash"
) || name.ends_with(".md5")
|| name.ends_with(".sha1")
|| name.ends_with("hashlib.md5")
|| name.ends_with("hashlib.sha1");
if !weak {
//
// Language-aware dispatch. Each language's matcher hard-codes the
// weak-hash call shape — sometimes in the receiver path (Python
// `hashlib.md5`, Go `md5.Sum`), sometimes in a string argument
// (Java `MessageDigest.getInstance("MD5")`, Node
// `createHash('md5')`, PHP `hash('md5', ...)`), sometimes in the
// class name (C# `MD5.Create()`). Round 9 surfaced the Go and Java
// cases as previously-uncovered gaps in this rule.
fn check_weak_crypto(
name: &str,
parsed: &ParsedFile<'_>,
node: Node,
out: &mut Vec<SecurityViolation>,
) {
let src = parsed.source_bytes();
let is_weak = match parsed.language_name() {
"python" => is_python_weak_hash(name),
"javascript" | "typescript" => is_js_weak_hash(name, node, src),
"go" => is_go_weak_hash(name, parsed),
"java" | "kotlin" => is_java_weak_hash(name, node, src),
"csharp" => is_csharp_weak_hash(name),
"php" => is_php_weak_hash(name, node, src),
_ => false,
};
if !is_weak {
return;
}
// Look at the *enclosing assignment* for a security-context
// identifier. Walk up a few parents.
if let Some(ctx) = enclosing_security_context(node, src) {
push(
out,
Expand All @@ -487,27 +499,142 @@ fn check_weak_crypto(name: &str, node: Node, src: &[u8], out: &mut Vec<SecurityV
}
}

// ── Per-language weak-hash matchers ──────────────────────────────

fn is_python_weak_hash(name: &str) -> bool {
// hashlib.md5 / hashlib.sha1 (direct module call). `hashlib.new(name)`
// with a string algo arg is rarer; covered conservatively.
let last = name.rsplit('.').next().unwrap_or(name);
matches!(last, "md5" | "sha1")
&& (name == "md5" || name == "sha1" || name.starts_with("hashlib."))
}

fn is_js_weak_hash(name: &str, node: Node, src: &[u8]) -> bool {
// Node.js: `crypto.createHash('md5'|'sha1')`. The function name
// is `createHash`; the algorithm lives in the first string arg.
let last = name.rsplit('.').next().unwrap_or(name);
if last == "createHash" {
return first_arg_is_weak_alg_string(node, src);
}
false
}

fn is_go_weak_hash(name: &str, parsed: &ParsedFile<'_>) -> bool {
// `md5.Sum`/`md5.New` / `sha1.Sum`/`sha1.New` — when imported
// `crypto/md5` or `crypto/sha1`. Layer 1 import resolution
// confirms the receiver came from the right package.
let receiver = name.split('.').next().unwrap_or("");
let last = name.rsplit('.').next().unwrap_or(name);
if !matches!(receiver, "md5" | "sha1") {
return false;
}
if !matches!(last, "New" | "Sum") {
return false;
}
match parsed.resolve_receiver(receiver) {
Some(imp) => imp.module == "crypto/md5" || imp.module == "crypto/sha1",
None => true,
}
}

fn is_java_weak_hash(name: &str, node: Node, src: &[u8]) -> bool {
// MessageDigest.getInstance("MD5"|"SHA-1"|"SHA1")
if name == "MessageDigest.getInstance" || name.ends_with(".MessageDigest.getInstance") {
return first_arg_is_weak_alg_string(node, src);
}
// Apache Commons Codec: DigestUtils.md5Hex, DigestUtils.sha1Hex,
// and the unsuffixed md5 / sha1 helpers.
let last = name.rsplit('.').next().unwrap_or(name);
if matches!(last, "md5Hex" | "sha1Hex" | "md5" | "sha1")
&& name.contains("DigestUtils")
{
return true;
}
false
}

fn is_csharp_weak_hash(name: &str) -> bool {
// MD5.Create(), SHA1.Create(), MD5CryptoServiceProvider.
// Receiver-anchored to avoid catching `SomeMD5Field` / `SHA1024`.
name.starts_with("MD5.")
|| name.starts_with("SHA1.")
|| name.contains(".MD5.")
|| name.contains(".SHA1.")
|| name.contains("MD5CryptoServiceProvider")
|| name.contains("SHA1CryptoServiceProvider")
|| name.contains("MD5Managed")
|| name.contains("SHA1Managed")
}

fn is_php_weak_hash(name: &str, node: Node, src: &[u8]) -> bool {
if matches!(name, "md5" | "sha1") {
return true;
}
if name == "hash" {
return first_arg_is_weak_alg_string(node, src);
}
false
}

fn first_arg_is_weak_alg_string(node: Node, src: &[u8]) -> bool {
let args = node.child_by_field_name("arguments").or_else(|| {
let mut cursor = node.walk();
for ch in node.children(&mut cursor) {
if matches!(ch.kind(), "arguments" | "argument_list") {
return Some(ch);
}
}
None
});
let Some(args) = args else { return false };
let mut cursor = args.walk();
let Some(first) = args.named_children(&mut cursor).next() else { return false };
if !matches!(
first.kind(),
"string" | "string_literal" | "interpreted_string_literal"
) {
return false;
}
let Ok(text) = first.utf8_text(src) else { return false };
let stripped = text.trim_matches(|c: char| matches!(c, '"' | '\'' | '`'));
let lower = stripped.to_ascii_lowercase();
matches!(lower.as_str(), "md5" | "sha1" | "sha-1")
}

fn enclosing_security_context(node: Node, src: &[u8]) -> Option<&'static str> {
let mut cur = node.parent();
// Deliberately conservative: only identifiers that strongly
// imply a security context. Avoid generic words like "digest"
// (matches hashlib's `.hexdigest()` method on an etag), or
// "key" (matches dictionary keys in unrelated contexts).
// Deliberately conservative needle set: only identifiers that
// strongly imply a hash-bearing security context. Avoids generic
// words like "digest" (matches hashlib's `.hexdigest()` method
// on an etag) or "key" (dictionary keys in unrelated contexts).
let names: &[(&str, &str)] = &[
("password", "password"),
("passwd", "password"),
("passphrase", "password"),
("signature", "signature"),
("hmac", "hmac"),
("token", "token"),
("secret", "secret"),
];
for _ in 0..6 {
let mut cur = node.parent();
let mut saw_assignment = false;
for _ in 0..10 {
let Some(n) = cur else { break };
if matches!(
n.kind(),
// JS / TS / Python idioms
"assignment" | "assignment_expression" | "variable_declarator"
| "lexical_declaration" | "let_declaration"
| "augmented_assignment_expression"
| "lexical_declaration" | "return_statement"
// Go
| "var_declaration" | "var_spec" | "short_var_declaration"
| "assignment_statement" | "const_declaration" | "const_spec"
// Java / Kotlin / C#
| "local_variable_declaration" | "field_declaration"
| "property_declaration"
// Rust
| "let_declaration"
) {
saw_assignment = true;
if let Ok(text) = n.utf8_text(src) {
let lower = text.to_ascii_lowercase();
for (needle, label) in names {
Expand All @@ -517,6 +644,30 @@ fn enclosing_security_context(node: Node, src: &[u8]) -> Option<&'static str> {
}
}
}
let is_function_shape = matches!(
n.kind(),
"function_definition" | "function_declaration"
| "method_definition" | "method_declaration"
| "function_expression" | "lambda_expression" | "function_item"
| "constructor_declaration"
);
if saw_assignment && is_function_shape {
// Function name carries the hash-context signal when the
// local assignment used a generic variable. Round 9
// production case: `h := md5.Sum(...)` inside `func
// HashPassword(...)`.
if let Some(name_node) = n.child_by_field_name("name") {
if let Ok(name_text) = name_node.utf8_text(src) {
let lower = name_text.to_ascii_lowercase();
for (needle, label) in names {
if lower.contains(needle) {
return Some(label);
}
}
}
}
break;
}
cur = n.parent();
}
None
Expand Down Expand Up @@ -1322,6 +1473,127 @@ mod tests {
assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

// ─── SEC009 multi-language dispatch (PR #12) ─────────────────
#[test]
fn sec009_node_create_hash_md5_for_password_blocks() {
let v = check(
".js",
"const crypto = require('crypto');\n\
function hashPassword(pw) {\n \
const h = crypto.createHash('md5');\n \
h.update(pw);\n \
return h.digest('hex');\n\
}\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_node_create_hash_sha256_does_not_block() {
let v = check(
".js",
"const crypto = require('crypto');\n\
function hashPassword(pw) {\n \
const h = crypto.createHash('sha256');\n \
h.update(pw);\n \
return h.digest('hex');\n\
}\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_go_md5_sum_for_password_blocks() {
// Round 9 case. Layer 1 import resolution sees `crypto/md5`.
let v = check(
".go",
"package main\n\n\
import (\n \
\"crypto/md5\"\n \
\"encoding/hex\"\n\
)\n\n\
func HashPassword(password string) string {\n \
h := md5.Sum([]byte(password))\n \
return hex.EncodeToString(h[:])\n\
}\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_java_message_digest_md5_for_password_blocks() {
// Round 9 case. Algo lives in the string arg.
let v = check(
".java",
"import java.security.MessageDigest;\n\n\
public class Auth {\n \
public static String hashPassword(String password) throws Exception {\n \
MessageDigest md = MessageDigest.getInstance(\"MD5\");\n \
byte[] digest = md.digest(password.getBytes());\n \
return digest.toString();\n \
}\n\
}\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_java_sha256_does_not_block() {
let v = check(
".java",
"import java.security.MessageDigest;\n\n\
public class Auth {\n \
public static String hashPassword(String password) throws Exception {\n \
MessageDigest md = MessageDigest.getInstance(\"SHA-256\");\n \
return md.digest(password.getBytes()).toString();\n \
}\n\
}\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_php_md5_for_password_blocks() {
let v = check(
".php",
"<?php\n\
function hashPassword($password) {\n \
$passwordHash = md5($password);\n \
return $passwordHash;\n\
}\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_php_hash_sha256_does_not_block() {
let v = check(
".php",
"<?php\n\
function hashPassword($password) {\n \
$passwordHash = hash('sha256', $password);\n \
return $passwordHash;\n\
}\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec009_csharp_md5_create_for_password_blocks() {
let v = check(
".cs",
"using System.Security.Cryptography;\n\n\
public class Auth {\n \
public static string HashPassword(string password) {\n \
var md5 = MD5.Create();\n \
var passwordHash = md5.ComputeHash(System.Text.Encoding.UTF8.GetBytes(password));\n \
return passwordHash.ToString();\n \
}\n\
}\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC009"), "got {v:?}");
}

#[test]
fn sec010_random_for_token_blocks() {
let v = check(
Expand Down
Loading