From 4cbe0c17f70b1dce8b18761fa84e65a3ebb273a0 Mon Sep 17 00:00:00 2001 From: wei9072 Date: Wed, 6 May 2026 01:54:23 +0000 Subject: [PATCH 1/4] =?UTF-8?q?feat(security):=20SEC014=20(=CE=B1)=20?= =?UTF-8?q?=E2=80=94=20hardcoded=20PEM=20private=20key?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-language text scan for `-----BEGIN ... PRIVATE KEY-----` in any source file. α-class — language-agnostic, runs on every file regardless of grammar. Precision is essentially perfect: the PEM header is unique enough that no legitimate code embeds it as a string literal. Public keys (`-----BEGIN PUBLIC KEY-----`) are intentionally excluded — they aren't credentials. Tests: 3 — Python triple-string positive, JS escaped-string positive, public-key negative. Co-Authored-By: Claude Opus 4.7 --- crates/aegis-core/src/security.rs | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/crates/aegis-core/src/security.rs b/crates/aegis-core/src/security.rs index 2d0ea19..b17824a 100644 --- a/crates/aegis-core/src/security.rs +++ b/crates/aegis-core/src/security.rs @@ -714,6 +714,29 @@ fn scan_text_rules(code: &str) -> Vec { severity: "block".into(), }); } + // Rule SEC014: hardcoded PEM private key block in source. The + // BEGIN PRIVATE KEY header is unique enough to scan for as a + // raw substring — no LLM-generated source legitimately needs + // a private key inlined as a literal. + for (idx, line) in code.lines().enumerate() { + if line.contains("-----BEGIN ") + && line.contains(" PRIVATE KEY-----") + { + out.push(SecurityViolation { + rule_id: "SEC014".into(), + message: "hardcoded PEM private key — \ + load from a secret store / KMS / env file, never inline a key in source".into(), + start_line: idx + 1, + start_col: 1, + end_line: idx + 1, + end_col: 1, + severity: "block".into(), + }); + // First match only — multi-line PEM blocks naturally + // contain the BEGIN line exactly once. + break; + } + } // Rule SEC011: hardcoded `Authorization: Bearer ` token. // Cross-language text scan — bearer tokens appear in code as raw // header strings regardless of source language. @@ -936,6 +959,35 @@ mod tests { assert!(!v.iter().any(|v| v.rule_id == "SEC010"), "got {v:?}"); } + // ─── SEC014 (α): hardcoded PEM private key ───────────────────── + #[test] + fn sec014_hardcoded_pem_private_key_blocks() { + let v = check( + ".py", + "PRIVATE_KEY = \"\"\"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----\"\"\"\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC014"), "got {v:?}"); + } + + #[test] + fn sec014_pem_in_js_blocks() { + let v = check( + ".js", + "const key = \"-----BEGIN PRIVATE KEY-----\\nMIIEvQ...\";\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC014"), "got {v:?}"); + } + + #[test] + fn sec014_only_public_key_does_not_block() { + // Public keys in source are fine — they're not credentials. + let v = check( + ".py", + "PUBLIC_KEY = \"-----BEGIN PUBLIC KEY-----\\nMIIB...\\n-----END PUBLIC KEY-----\"\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC014"), "got {v:?}"); + } + // ─── SEC011 (α): hardcoded Authorization Bearer ────────────── #[test] fn sec011_hardcoded_bearer_token_in_python_dict_blocks() { From 45f4d1ad3f7075402856c244f33aecd2dc381a65 Mon Sep 17 00:00:00 2001 From: wei9072 Date: Wed, 6 May 2026 01:55:50 +0000 Subject: [PATCH 2/4] =?UTF-8?q?feat(security):=20SEC015=20(=CE=B2)=20?= =?UTF-8?q?=E2=80=94=20silent=20broad=20except=20/=20empty=20catch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Catches the slightly more sophisticated form of the bare-except antipattern: agent picked an exception type but chose the broadest one and then silently swallowed it. Two language shapes: - **Python**: `except Exception: pass` or `except BaseException: pass` — broad type, body is `pass` only / empty / single comment / a bare `return` / `return None`. - **JavaScript / TypeScript**: `catch (e) {}` or `catch (e) { /* nothing */ }` — catch is implicitly broad in JS, so any catch with an empty / comment-only body fires. Excludes (FP control): - Bare `except:` — already SEC013's territory - Specific types: `except ValueError: pass` — developer expected this case - Body that logs / re-raises / does anything substantive Body-emptiness logic mirrors `signals/smells.rs::handler_body_is_empty` so behaviour stays consistent with the existing `empty_handler_count` signal. Tests: 6 — Python broad+pass / Python broad+log negative / Python specific-type negative / Python BaseException positive / JS empty positive / JS log+throw negative. Co-Authored-By: Claude Opus 4.7 --- crates/aegis-core/src/security.rs | 165 ++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/crates/aegis-core/src/security.rs b/crates/aegis-core/src/security.rs index b17824a..77ab548 100644 --- a/crates/aegis-core/src/security.rs +++ b/crates/aegis-core/src/security.rs @@ -99,6 +99,11 @@ fn walk(node: Node, src: &[u8], out: &mut Vec) { check_bare_except(node, src, out); } + // Rule SEC015: silent broad except / empty catch. + if matches!(kind, "except_clause" | "catch_clause") { + check_silent_broad_except(node, src, out); + } + let mut cursor = node.walk(); for child in node.children(&mut cursor) { walk(child, src, out); @@ -674,6 +679,109 @@ fn check_bare_except(node: Node, src: &[u8], out: &mut Vec) { catch the specific exception type, or use `except Exception:` if truly necessary".into()); } +// ─── Rule SEC015: silent broad except / empty catch ────────────── +fn check_silent_broad_except(node: Node, src: &[u8], out: &mut Vec) { + // Two shapes we flag: + // + // Python: except Exception: pass (broad type, silent body) + // except BaseException: pass + // JS/TS: catch (e) {} (catches everything, empty body) + // catch (e) { /* no log/no rethrow */ } + // + // Bare `except:` is SEC013's job — this rule covers the slightly + // more sophisticated form where the agent did pick a type but + // chose the broadest one and then silently swallowed it. + let kind = node.kind(); + let is_python = kind == "except_clause"; + let is_js = kind == "catch_clause"; + + if is_python { + // Confirm broad type. Walk named children for an identifier / + // attribute / tuple whose text starts with `Exception` or + // `BaseException`. Bare except has no such child — that's + // SEC013's territory, skip here. + let mut cursor = node.walk(); + let mut broad = false; + for child in node.named_children(&mut cursor) { + let ck = child.kind(); + if matches!(ck, "identifier" | "attribute" | "tuple" + | "as_pattern" | "expression_list") + { + if let Ok(t) = child.utf8_text(src) { + let trimmed = t.trim(); + if trimmed == "Exception" + || trimmed == "BaseException" + || trimmed.starts_with("Exception ") + || trimmed.starts_with("BaseException ") + || trimmed.starts_with("Exception\n") + || trimmed.starts_with("BaseException\n") + { + broad = true; + break; + } + } + } + } + if !broad { + return; + } + } + // JS catch_clause is always broad — there's no typed catch. + + if !handler_body_is_silent(node, src) { + return; + } + + let _ = is_js; + push(out, node, "SEC015", "warn", + "broad exception caught and silently swallowed — \ + either log + re-raise, or catch a specific exception type that you actually expect".into()); +} + +fn handler_body_is_silent(node: Node, src: &[u8]) -> bool { + let body = node + .child_by_field_name("body") + .or_else(|| node.child_by_field_name("block")) + .or_else(|| node.child_by_field_name("handler")) + .or_else(|| { + let mut cursor = node.walk(); + let mut found: Option = None; + for child in node.named_children(&mut cursor) { + if matches!( + child.kind(), + "block" | "compound_statement" | "statement_block" + | "function_body" | "case_block" + ) { + found = Some(child); + } + } + found + }); + let Some(body) = body else { return false }; + let count = body.named_child_count(); + if count == 0 { + return true; + } + if count == 1 { + if let Some(only) = body.named_child(0) { + let k = only.kind(); + if matches!(k, "pass_statement" | "comment" | "empty_statement") { + return true; + } + if let Ok(text) = only.utf8_text(src) { + let trimmed = text.trim(); + if matches!( + trimmed, + "pass" | ";" | "" | "return" | "return null" | "return None" | "return false" + ) { + return true; + } + } + } + } + false +} + // ─── Text-level rule(s) ────────────────────────────────────────── fn scan_text_rules(code: &str) -> Vec { let mut out = Vec::new(); @@ -959,6 +1067,63 @@ mod tests { assert!(!v.iter().any(|v| v.rule_id == "SEC010"), "got {v:?}"); } + // ─── SEC015 (β): silent broad except / empty catch ──────────── + #[test] + fn sec015_python_except_exception_pass_warns() { + let v = check( + ".py", + "try:\n do_thing()\nexcept Exception:\n pass\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}"); + } + + #[test] + fn sec015_python_except_baseexception_pass_warns() { + let v = check( + ".py", + "try:\n do_thing()\nexcept BaseException:\n pass\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}"); + } + + #[test] + fn sec015_python_except_exception_with_log_does_not_warn() { + let v = check( + ".py", + "try:\n do_thing()\nexcept Exception as e:\n log.error(e)\n raise\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}"); + } + + #[test] + fn sec015_python_specific_except_does_not_warn() { + // Catching a specific type with empty body is fine — the + // developer expected this case and chose to ignore it. + let v = check( + ".py", + "try:\n do_thing()\nexcept ValueError:\n pass\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}"); + } + + #[test] + fn sec015_js_empty_catch_warns() { + let v = check( + ".js", + "try { doThing(); } catch (e) {}\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}"); + } + + #[test] + fn sec015_js_catch_with_log_does_not_warn() { + let v = check( + ".js", + "try { doThing(); } catch (e) { console.error(e); throw e; }\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}"); + } + // ─── SEC014 (α): hardcoded PEM private key ───────────────────── #[test] fn sec014_hardcoded_pem_private_key_blocks() { From fa099be485ab52404e822a7d63c498343c63badd Mon Sep 17 00:00:00 2001 From: wei9072 Date: Wed, 6 May 2026 01:56:51 +0000 Subject: [PATCH 3/4] =?UTF-8?q?feat(security):=20SEC016=20(=CE=B2)=20?= =?UTF-8?q?=E2=80=94=20SSRF=20marker=20on=20HTTP=20call=20with=20user-inpu?= =?UTF-8?q?t=20URL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AST rule on call expressions. Fires when an outbound HTTP call (`requests.get`, `httpx.X`, `aiohttp.X`, `urllib.urlopen`, `fetch`, `axios.X`, `http.Get`) is given a URL argument that obviously came from request input. Per-language receiver gating keeps `dict.get()` / `cache.get()` / `Map.get()` from firing — only http-shaped receivers count. User-input shapes (conservative needle list): - Flask / Django / FastAPI: `request.args`, `request.form`, `request.json`, `request.values`, `request.params` - Express / Koa: `req.body`, `req.params`, `req.query`, `req.headers`, `ctx.request`, `ctx.params` - Subscript access: `params[...]`, `query[...]`, `body[...]` - Generic: `user_input`, `user_url`, `input(...)` Tests: 5 — Python `requests.get(req.params.url)` positive, Python `request.args.get('url')` positive, JS `fetch(req.query.url)` positive, static URL negative, `dict.get(req.params.url)` negative (receiver-name disambiguator). Co-Authored-By: Claude Opus 4.7 --- crates/aegis-core/src/security.rs | 125 ++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/crates/aegis-core/src/security.rs b/crates/aegis-core/src/security.rs index 77ab548..94d9f48 100644 --- a/crates/aegis-core/src/security.rs +++ b/crates/aegis-core/src/security.rs @@ -76,6 +76,7 @@ fn walk(node: Node, src: &[u8], out: &mut Vec) { check_sql_concat(&name, node, src, out); check_weak_crypto(&name, node, src, out); check_weak_random_for_token(&name, node, src, out); + check_ssrf_user_input_url(&name, node, src, out); } } @@ -679,6 +680,82 @@ fn check_bare_except(node: Node, src: &[u8], out: &mut Vec) { catch the specific exception type, or use `except Exception:` if truly necessary".into()); } +// ─── Rule SEC016: SSRF — HTTP call with user-input URL ─────────── +fn check_ssrf_user_input_url(name: &str, node: Node, src: &[u8], out: &mut Vec) { + // Match calls likely to issue an outbound HTTP request. Each + // language has its own canonical names; one polyglot needle list + // keeps the rule β-class (single rule, multi-language). + let last = name.rsplit('.').next().unwrap_or(name); + let is_http_call = matches!( + last, + "get" | "post" | "put" | "delete" | "patch" | "head" | "options" | "request" + ) && ( + // Filter out non-http `.get()` (dict/Map/cache lookups). The + // receiver name disambiguates: only flag when the call site + // text suggests an HTTP client. + name_suggests_http_client(name) + ) + || matches!(last, "fetch" | "urlopen" | "urlretrieve") + || name.ends_with("axios.get") || name.ends_with("axios.post") + || name == "fetch" + || name == "http.Get" || name == "http.Post"; + if !is_http_call { + return; + } + // Inspect first argument (the URL). If its text looks like it + // came directly from request input — no sanitization, no host + // allowlist — flag it. + let args_node = node.child_by_field_name("arguments").or_else(|| { + let mut cursor = node.walk(); + let mut found = None; + for child in node.children(&mut cursor) { + if matches!(child.kind(), "arguments" | "argument_list") { + found = Some(child); + break; + } + } + found + }); + let Some(args) = args_node else { return }; + let mut cursor = args.walk(); + let Some(first_arg) = args.named_children(&mut cursor).next() else { return }; + let Ok(arg_text) = first_arg.utf8_text(src) else { return }; + let lower = arg_text.to_ascii_lowercase(); + // User-input shapes. Conservative — must be obviously + // request-derived to keep FP rate low. + let user_input_patterns = [ + "request.get(", "request.args", "request.form", "request.json", + "request.values", "request.params", "request.body", "request.query", + "req.body", "req.params", "req.query", "req.headers", + "req.url", "req.input", "req.data", + "params[", "query[", "body[", "headers[", + "input(", "user_input", "user_url", + ".body.url", ".query.url", ".params.url", + "ctx.request", "ctx.params", "ctx.query", + ]; + if !user_input_patterns.iter().any(|p| lower.contains(p)) { + return; + } + push(out, node, "SEC016", "block", + "outbound HTTP call with URL derived from request input — \ + SSRF risk; validate against an allowlist of permitted hosts \ + or use a dedicated outbound proxy".into()); +} + +fn name_suggests_http_client(name: &str) -> bool { + let lower = name.to_ascii_lowercase(); + lower.starts_with("requests.") + || lower.starts_with("httpx.") + || lower.starts_with("aiohttp.") + || lower.starts_with("urllib.") + || lower.starts_with("http.client.") + || lower.starts_with("axios.") + || lower.contains(".http.") + || lower.starts_with("client.") // common when client = httpx.Client() + || lower.starts_with("session.") // requests.Session() + || lower == "fetch" +} + // ─── Rule SEC015: silent broad except / empty catch ────────────── fn check_silent_broad_except(node: Node, src: &[u8], out: &mut Vec) { // Two shapes we flag: @@ -1067,6 +1144,54 @@ mod tests { assert!(!v.iter().any(|v| v.rule_id == "SEC010"), "got {v:?}"); } + // ─── SEC016 (β): SSRF — HTTP call with user-input URL ───────── + #[test] + fn sec016_requests_get_user_url_blocks() { + let v = check( + ".py", + "import requests\ndef proxy(req):\n return requests.get(req.params.url)\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}"); + } + + #[test] + fn sec016_requests_get_request_args_blocks() { + let v = check( + ".py", + "import requests\ndef fetch_remote():\n return requests.get(request.args.get('url'))\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}"); + } + + #[test] + fn sec016_node_fetch_user_url_blocks() { + let v = check( + ".js", + "app.get('/proxy', (req, res) => { fetch(req.query.url).then(r => r.text()); });\n", + ); + assert!(v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}"); + } + + #[test] + fn sec016_requests_get_static_url_does_not_block() { + let v = check( + ".py", + "import requests\ndef fetch():\n return requests.get(\"https://api.example.com/data\")\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}"); + } + + #[test] + fn sec016_dict_get_does_not_block() { + // `.get()` on a dict is not an HTTP call; receiver name + // disambiguator must filter this out. + let v = check( + ".py", + "def f(req):\n return data.get(req.params.url)\n", + ); + assert!(!v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}"); + } + // ─── SEC015 (β): silent broad except / empty catch ──────────── #[test] fn sec015_python_except_exception_pass_warns() { From 55527ed4888d2d9fa62fb044ebd4df890ee7e47c Mon Sep 17 00:00:00 2001 From: wei9072 Date: Wed, 6 May 2026 01:57:31 +0000 Subject: [PATCH 4/4] =?UTF-8?q?docs(readme):=20bump=20SEC=20rule=20count?= =?UTF-8?q?=2013=20=E2=86=92=2016=20+=20name=20new=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 --- README.md | 2 +- README.zh-TW.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 10004df..8d09d42 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ described below. Every finding carries `file`, optional `range` and | :--- | :--- | :--- | | **Syntax** | Tree-sitter found ERROR / MISSING nodes. | `ring0_violation` | | **Signal** | A structural counter (14 of them). When `old_content` is supplied, `context` carries `value_before` / `value_after` / `delta`. | `fan_out`, `max_chain_depth`, `cyclomatic_complexity`, `nesting_depth`, `empty_handler_count`, `unfinished_marker_count`, `unreachable_stmt_count`, `mutable_default_arg_count`, `shadowed_local_count`, `suspicious_literal_count`, `unresolved_local_import_count`, `member_access_count`, `type_leakage_count`, `cross_module_chain_count`, `import_usage_count`, `test_count_lost` | -| **Security** | A specific anti-pattern matched (13 rules). `context.severity_hint` is a hint, not a verdict. | `SEC001`–`SEC013` (eval/exec, hardcoded secret, TLS-off, shell injection, SQL concat, CORS wildcard+credentials, JWT unsafe, insecure deserialization, weak crypto, weak RNG, hardcoded Bearer token, timing-unsafe credential compare, Python bare `except:`) | +| **Security** | A specific anti-pattern matched (16 rules). `context.severity_hint` is a hint, not a verdict. | `SEC001`–`SEC016` (eval/exec, hardcoded secret, TLS-off, shell injection, SQL concat, CORS wildcard+credentials, JWT unsafe, insecure deserialization, weak crypto, weak RNG, hardcoded Bearer token, timing-unsafe credential compare, Python bare `except:`, hardcoded PEM private key, silent broad except, SSRF on user-input URL) | | **Workspace** | Cross-file finding. Only emitted when `workspace_root` is supplied. | `cycle_introduced`, `public_symbol_removed`, `file_role` | `aegis-allow: ` (or `aegis-allow: all`) on the same or diff --git a/README.zh-TW.md b/README.zh-TW.md index a510ab0..6d5059e 100644 --- a/README.zh-TW.md +++ b/README.zh-TW.md @@ -112,7 +112,7 @@ symbols、per-file signals),用 mtime cache 確保重複呼叫只重 parse | :--- | :--- | :--- | | **Syntax** | tree-sitter 找到 ERROR / MISSING 節點。 | `ring0_violation` | | **Signal** | 結構性計數器(14 個)。傳了 `old_content` 時,`context` 帶 `value_before` / `value_after` / `delta`。 | `fan_out`、`max_chain_depth`、`cyclomatic_complexity`、`nesting_depth`、`empty_handler_count`、`unfinished_marker_count`、`unreachable_stmt_count`、`mutable_default_arg_count`、`shadowed_local_count`、`suspicious_literal_count`、`unresolved_local_import_count`、`member_access_count`、`type_leakage_count`、`cross_module_chain_count`、`import_usage_count`、`test_count_lost` | -| **Security** | 命中具體的反模式(13 條規則)。`context.severity_hint` 是建議不是判決。 | `SEC001`–`SEC013`(eval/exec、寫死的 secret、關 TLS、shell injection、SQL 拼接、CORS 萬用字元+credentials、JWT 不驗證、危險反序列化、弱 hash、弱 RNG、寫死 Bearer token、timing-unsafe 憑證比對、Python 裸 `except:`)| +| **Security** | 命中具體的反模式(16 條規則)。`context.severity_hint` 是建議不是判決。 | `SEC001`–`SEC016`(eval/exec、寫死的 secret、關 TLS、shell injection、SQL 拼接、CORS 萬用字元+credentials、JWT 不驗證、危險反序列化、弱 hash、弱 RNG、寫死 Bearer token、timing-unsafe 憑證比對、Python 裸 `except:`、寫死 PEM private key、靜默 broad except、SSRF user-input URL)| | **Workspace** | 跨檔 finding。只有傳 `workspace_root` 才會出現。 | `cycle_introduced`、`public_symbol_removed`、`file_role` | `aegis-allow: `(或 `aegis-allow: all`)寫在同一行或前一行