From 17a4a9df7c32ebf65629b756c95c3c647638c91a Mon Sep 17 00:00:00 2001 From: Lalit Shrotriya Date: Thu, 18 Jun 2026 10:20:52 +0000 Subject: [PATCH] fix(P2): add bidi control character detection (CVE-2021-42574 / Trojan Source) P2_PATTERNS was missing Unicode bidi override/embedding/isolate characters (U+202A-U+202E, U+2066-U+2069) that can be used to hide malicious instructions from human code review while the LLM sees and executes them. Add the range with confidence 0.85 (higher than plain zero-width chars because bidi controls have almost no legitimate use in AI skill content). Closes #39 Signed-off-by: Lalit Shrotriya --- .../static_patterns_prompt_injection.py | 1 + tests/nodes/analyzers/test_static_patterns.py | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py index c062ee6..a43ffb8 100644 --- a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py +++ b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py @@ -51,6 +51,7 @@ (r"", 0.7), (r"\[//\]:\s*#\s*\(.*?(?:system|instructions?|ignore|POST|GET|send|transmit).*?\)", 0.8), (r"[\u200b\u200c\u200d\u2060\ufeff]", 0.6), + (r"[\u202a-\u202e\u2066-\u2069]", 0.85), (r"data:text/plain;base64,[A-Za-z0-9+/=]{50,}", 0.7), ] # P3: Exfiltration Commands diff --git a/tests/nodes/analyzers/test_static_patterns.py b/tests/nodes/analyzers/test_static_patterns.py index fbcac38..84da8b4 100644 --- a/tests/nodes/analyzers/test_static_patterns.py +++ b/tests/nodes/analyzers/test_static_patterns.py @@ -66,6 +66,31 @@ def test_p2_hidden_comment_produces_finding(self): assert len(findings) >= 1 assert any(f.rule_id == "P2" for f in findings) + def test_p2_bidi_control_chars_produce_finding(self): + """Bidi control characters (Trojan Source CVE-2021-42574) yield P2.""" + # ‮ is RIGHT-TO-LEFT OVERRIDE — a bidi control character + state = { + "components": ["SKILL.md"], + "file_cache": { + "SKILL.md": "Normal text‮ evil hidden content‬", + }, + } + findings = static_runner.run_static_patterns(state, [prompt_injection_module]) + assert len(findings) >= 1 + assert any(f.rule_id == "P2" for f in findings) + + def test_p2_bidi_rlo_edge_cases(self): + """Bidi override variants ‪-‮ and ⁦-⁩ all yield P2.""" + bidi_chars = ["‪", "‫", "‬", "‭", "‮", "⁦", "⁧", "⁨", "⁩"] + for ch in bidi_chars: + state = { + "components": ["skill.md"], + "file_cache": {"skill.md": f"text{ch}more"}, + } + findings = static_runner.run_static_patterns(state, [prompt_injection_module]) + p2 = [f for f in findings if f.rule_id == "P2"] + assert len(p2) >= 1, f"Expected P2 for bidi char U+{ord(ch):04X}" + def test_safe_content_no_p1_p2(self): """Safe content does not produce P1/P2.""" state = {