Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ described below. Every finding carries `file`, optional `range` and
| :--- | :--- | :--- |
| **Syntax** | Tree-sitter found ERROR / MISSING nodes. | `ring0_violation` |
| **Signal** | A structural counter (14 of them). When `old_content` is supplied, `context` carries `value_before` / `value_after` / `delta`. | `fan_out`, `max_chain_depth`, `cyclomatic_complexity`, `nesting_depth`, `empty_handler_count`, `unfinished_marker_count`, `unreachable_stmt_count`, `mutable_default_arg_count`, `shadowed_local_count`, `suspicious_literal_count`, `unresolved_local_import_count`, `member_access_count`, `type_leakage_count`, `cross_module_chain_count`, `import_usage_count`, `test_count_lost` |
| **Security** | A specific anti-pattern matched (13 rules). `context.severity_hint` is a hint, not a verdict. | `SEC001`–`SEC013` (eval/exec, hardcoded secret, TLS-off, shell injection, SQL concat, CORS wildcard+credentials, JWT unsafe, insecure deserialization, weak crypto, weak RNG, hardcoded Bearer token, timing-unsafe credential compare, Python bare `except:`) |
| **Security** | A specific anti-pattern matched (16 rules). `context.severity_hint` is a hint, not a verdict. | `SEC001`–`SEC016` (eval/exec, hardcoded secret, TLS-off, shell injection, SQL concat, CORS wildcard+credentials, JWT unsafe, insecure deserialization, weak crypto, weak RNG, hardcoded Bearer token, timing-unsafe credential compare, Python bare `except:`, hardcoded PEM private key, silent broad except, SSRF on user-input URL) |
| **Workspace** | Cross-file finding. Only emitted when `workspace_root` is supplied. | `cycle_introduced`, `public_symbol_removed`, `file_role` |

`aegis-allow: <rule_id>` (or `aegis-allow: all`) on the same or
Expand Down
2 changes: 1 addition & 1 deletion README.zh-TW.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ symbols、per-file signals),用 mtime cache 確保重複呼叫只重 parse
| :--- | :--- | :--- |
| **Syntax** | tree-sitter 找到 ERROR / MISSING 節點。 | `ring0_violation` |
| **Signal** | 結構性計數器(14 個)。傳了 `old_content` 時,`context` 帶 `value_before` / `value_after` / `delta`。 | `fan_out`、`max_chain_depth`、`cyclomatic_complexity`、`nesting_depth`、`empty_handler_count`、`unfinished_marker_count`、`unreachable_stmt_count`、`mutable_default_arg_count`、`shadowed_local_count`、`suspicious_literal_count`、`unresolved_local_import_count`、`member_access_count`、`type_leakage_count`、`cross_module_chain_count`、`import_usage_count`、`test_count_lost` |
| **Security** | 命中具體的反模式(13 條規則)。`context.severity_hint` 是建議不是判決。 | `SEC001`–`SEC013`(eval/exec、寫死的 secret、關 TLS、shell injection、SQL 拼接、CORS 萬用字元+credentials、JWT 不驗證、危險反序列化、弱 hash、弱 RNG、寫死 Bearer token、timing-unsafe 憑證比對、Python 裸 `except:`)|
| **Security** | 命中具體的反模式(16 條規則)。`context.severity_hint` 是建議不是判決。 | `SEC001`–`SEC016`(eval/exec、寫死的 secret、關 TLS、shell injection、SQL 拼接、CORS 萬用字元+credentials、JWT 不驗證、危險反序列化、弱 hash、弱 RNG、寫死 Bearer token、timing-unsafe 憑證比對、Python 裸 `except:`、寫死 PEM private key、靜默 broad except、SSRF user-input URL)|
| **Workspace** | 跨檔 finding。只有傳 `workspace_root` 才會出現。 | `cycle_introduced`、`public_symbol_removed`、`file_role` |

`aegis-allow: <rule_id>`(或 `aegis-allow: all`)寫在同一行或前一行
Expand Down
342 changes: 342 additions & 0 deletions crates/aegis-core/src/security.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ fn walk(node: Node, src: &[u8], out: &mut Vec<SecurityViolation>) {
check_sql_concat(&name, node, src, out);
check_weak_crypto(&name, node, src, out);
check_weak_random_for_token(&name, node, src, out);
check_ssrf_user_input_url(&name, node, src, out);
}
}

Expand All @@ -99,6 +100,11 @@ fn walk(node: Node, src: &[u8], out: &mut Vec<SecurityViolation>) {
check_bare_except(node, src, out);
}

// Rule SEC015: silent broad except / empty catch.
if matches!(kind, "except_clause" | "catch_clause") {
check_silent_broad_except(node, src, out);
}

let mut cursor = node.walk();
for child in node.children(&mut cursor) {
walk(child, src, out);
Expand Down Expand Up @@ -674,6 +680,185 @@ fn check_bare_except(node: Node, src: &[u8], out: &mut Vec<SecurityViolation>) {
catch the specific exception type, or use `except Exception:` if truly necessary".into());
}

// ─── Rule SEC016: SSRF — HTTP call with user-input URL ───────────
fn check_ssrf_user_input_url(name: &str, node: Node, src: &[u8], out: &mut Vec<SecurityViolation>) {
// Match calls likely to issue an outbound HTTP request. Each
// language has its own canonical names; one polyglot needle list
// keeps the rule β-class (single rule, multi-language).
let last = name.rsplit('.').next().unwrap_or(name);
let is_http_call = matches!(
last,
"get" | "post" | "put" | "delete" | "patch" | "head" | "options" | "request"
) && (
// Filter out non-http `.get()` (dict/Map/cache lookups). The
// receiver name disambiguates: only flag when the call site
// text suggests an HTTP client.
name_suggests_http_client(name)
)
|| matches!(last, "fetch" | "urlopen" | "urlretrieve")
|| name.ends_with("axios.get") || name.ends_with("axios.post")
|| name == "fetch"
|| name == "http.Get" || name == "http.Post";
if !is_http_call {
return;
}
// Inspect first argument (the URL). If its text looks like it
// came directly from request input — no sanitization, no host
// allowlist — flag it.
let args_node = node.child_by_field_name("arguments").or_else(|| {
let mut cursor = node.walk();
let mut found = None;
for child in node.children(&mut cursor) {
if matches!(child.kind(), "arguments" | "argument_list") {
found = Some(child);
break;
}
}
found
});
let Some(args) = args_node else { return };
let mut cursor = args.walk();
let Some(first_arg) = args.named_children(&mut cursor).next() else { return };
let Ok(arg_text) = first_arg.utf8_text(src) else { return };
let lower = arg_text.to_ascii_lowercase();
// User-input shapes. Conservative — must be obviously
// request-derived to keep FP rate low.
let user_input_patterns = [
"request.get(", "request.args", "request.form", "request.json",
"request.values", "request.params", "request.body", "request.query",
"req.body", "req.params", "req.query", "req.headers",
"req.url", "req.input", "req.data",
"params[", "query[", "body[", "headers[",
"input(", "user_input", "user_url",
".body.url", ".query.url", ".params.url",
"ctx.request", "ctx.params", "ctx.query",
];
if !user_input_patterns.iter().any(|p| lower.contains(p)) {
return;
}
push(out, node, "SEC016", "block",
"outbound HTTP call with URL derived from request input — \
SSRF risk; validate against an allowlist of permitted hosts \
or use a dedicated outbound proxy".into());
}

fn name_suggests_http_client(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
lower.starts_with("requests.")
|| lower.starts_with("httpx.")
|| lower.starts_with("aiohttp.")
|| lower.starts_with("urllib.")
|| lower.starts_with("http.client.")
|| lower.starts_with("axios.")
|| lower.contains(".http.")
|| lower.starts_with("client.") // common when client = httpx.Client()
|| lower.starts_with("session.") // requests.Session()
|| lower == "fetch"
}

// ─── Rule SEC015: silent broad except / empty catch ──────────────
fn check_silent_broad_except(node: Node, src: &[u8], out: &mut Vec<SecurityViolation>) {
// Two shapes we flag:
//
// Python: except Exception: pass (broad type, silent body)
// except BaseException: pass
// JS/TS: catch (e) {} (catches everything, empty body)
// catch (e) { /* no log/no rethrow */ }
//
// Bare `except:` is SEC013's job — this rule covers the slightly
// more sophisticated form where the agent did pick a type but
// chose the broadest one and then silently swallowed it.
let kind = node.kind();
let is_python = kind == "except_clause";
let is_js = kind == "catch_clause";

if is_python {
// Confirm broad type. Walk named children for an identifier /
// attribute / tuple whose text starts with `Exception` or
// `BaseException`. Bare except has no such child — that's
// SEC013's territory, skip here.
let mut cursor = node.walk();
let mut broad = false;
for child in node.named_children(&mut cursor) {
let ck = child.kind();
if matches!(ck, "identifier" | "attribute" | "tuple"
| "as_pattern" | "expression_list")
{
if let Ok(t) = child.utf8_text(src) {
let trimmed = t.trim();
if trimmed == "Exception"
|| trimmed == "BaseException"
|| trimmed.starts_with("Exception ")
|| trimmed.starts_with("BaseException ")
|| trimmed.starts_with("Exception\n")
|| trimmed.starts_with("BaseException\n")
{
broad = true;
break;
}
}
}
}
if !broad {
return;
}
}
// JS catch_clause is always broad — there's no typed catch.

if !handler_body_is_silent(node, src) {
return;
}

let _ = is_js;
push(out, node, "SEC015", "warn",
"broad exception caught and silently swallowed — \
either log + re-raise, or catch a specific exception type that you actually expect".into());
}

fn handler_body_is_silent(node: Node, src: &[u8]) -> bool {
let body = node
.child_by_field_name("body")
.or_else(|| node.child_by_field_name("block"))
.or_else(|| node.child_by_field_name("handler"))
.or_else(|| {
let mut cursor = node.walk();
let mut found: Option<Node> = None;
for child in node.named_children(&mut cursor) {
if matches!(
child.kind(),
"block" | "compound_statement" | "statement_block"
| "function_body" | "case_block"
) {
found = Some(child);
}
}
found
});
let Some(body) = body else { return false };
let count = body.named_child_count();
if count == 0 {
return true;
}
if count == 1 {
if let Some(only) = body.named_child(0) {
let k = only.kind();
if matches!(k, "pass_statement" | "comment" | "empty_statement") {
return true;
}
if let Ok(text) = only.utf8_text(src) {
let trimmed = text.trim();
if matches!(
trimmed,
"pass" | ";" | "" | "return" | "return null" | "return None" | "return false"
) {
return true;
}
}
}
}
false
}

// ─── Text-level rule(s) ──────────────────────────────────────────
fn scan_text_rules(code: &str) -> Vec<SecurityViolation> {
let mut out = Vec::new();
Expand Down Expand Up @@ -714,6 +899,29 @@ fn scan_text_rules(code: &str) -> Vec<SecurityViolation> {
severity: "block".into(),
});
}
// Rule SEC014: hardcoded PEM private key block in source. The
// BEGIN PRIVATE KEY header is unique enough to scan for as a
// raw substring — no LLM-generated source legitimately needs
// a private key inlined as a literal.
for (idx, line) in code.lines().enumerate() {
if line.contains("-----BEGIN ")
&& line.contains(" PRIVATE KEY-----")
{
out.push(SecurityViolation {
rule_id: "SEC014".into(),
message: "hardcoded PEM private key — \
load from a secret store / KMS / env file, never inline a key in source".into(),
start_line: idx + 1,
start_col: 1,
end_line: idx + 1,
end_col: 1,
severity: "block".into(),
});
// First match only — multi-line PEM blocks naturally
// contain the BEGIN line exactly once.
break;
}
}
// Rule SEC011: hardcoded `Authorization: Bearer <literal>` token.
// Cross-language text scan — bearer tokens appear in code as raw
// header strings regardless of source language.
Expand Down Expand Up @@ -936,6 +1144,140 @@ mod tests {
assert!(!v.iter().any(|v| v.rule_id == "SEC010"), "got {v:?}");
}

// ─── SEC016 (β): SSRF — HTTP call with user-input URL ─────────
#[test]
fn sec016_requests_get_user_url_blocks() {
let v = check(
".py",
"import requests\ndef proxy(req):\n return requests.get(req.params.url)\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}");
}

#[test]
fn sec016_requests_get_request_args_blocks() {
let v = check(
".py",
"import requests\ndef fetch_remote():\n return requests.get(request.args.get('url'))\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}");
}

#[test]
fn sec016_node_fetch_user_url_blocks() {
let v = check(
".js",
"app.get('/proxy', (req, res) => { fetch(req.query.url).then(r => r.text()); });\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}");
}

#[test]
fn sec016_requests_get_static_url_does_not_block() {
let v = check(
".py",
"import requests\ndef fetch():\n return requests.get(\"https://api.example.com/data\")\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}");
}

#[test]
fn sec016_dict_get_does_not_block() {
// `.get()` on a dict is not an HTTP call; receiver name
// disambiguator must filter this out.
let v = check(
".py",
"def f(req):\n return data.get(req.params.url)\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC016"), "got {v:?}");
}

// ─── SEC015 (β): silent broad except / empty catch ────────────
#[test]
fn sec015_python_except_exception_pass_warns() {
let v = check(
".py",
"try:\n do_thing()\nexcept Exception:\n pass\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}");
}

#[test]
fn sec015_python_except_baseexception_pass_warns() {
let v = check(
".py",
"try:\n do_thing()\nexcept BaseException:\n pass\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}");
}

#[test]
fn sec015_python_except_exception_with_log_does_not_warn() {
let v = check(
".py",
"try:\n do_thing()\nexcept Exception as e:\n log.error(e)\n raise\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}");
}

#[test]
fn sec015_python_specific_except_does_not_warn() {
// Catching a specific type with empty body is fine — the
// developer expected this case and chose to ignore it.
let v = check(
".py",
"try:\n do_thing()\nexcept ValueError:\n pass\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}");
}

#[test]
fn sec015_js_empty_catch_warns() {
let v = check(
".js",
"try { doThing(); } catch (e) {}\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}");
}

#[test]
fn sec015_js_catch_with_log_does_not_warn() {
let v = check(
".js",
"try { doThing(); } catch (e) { console.error(e); throw e; }\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC015"), "got {v:?}");
}

// ─── SEC014 (α): hardcoded PEM private key ─────────────────────
#[test]
fn sec014_hardcoded_pem_private_key_blocks() {
let v = check(
".py",
"PRIVATE_KEY = \"\"\"-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----\"\"\"\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC014"), "got {v:?}");
}

#[test]
fn sec014_pem_in_js_blocks() {
let v = check(
".js",
"const key = \"-----BEGIN PRIVATE KEY-----\\nMIIEvQ...\";\n",
);
assert!(v.iter().any(|v| v.rule_id == "SEC014"), "got {v:?}");
}

#[test]
fn sec014_only_public_key_does_not_block() {
// Public keys in source are fine — they're not credentials.
let v = check(
".py",
"PUBLIC_KEY = \"-----BEGIN PUBLIC KEY-----\\nMIIB...\\n-----END PUBLIC KEY-----\"\n",
);
assert!(!v.iter().any(|v| v.rule_id == "SEC014"), "got {v:?}");
}

// ─── SEC011 (α): hardcoded Authorization Bearer ──────────────
#[test]
fn sec011_hardcoded_bearer_token_in_python_dict_blocks() {
Expand Down
Loading