From 17a4a9df7c32ebf65629b756c95c3c647638c91a Mon Sep 17 00:00:00 2001
From: Lalit Shrotriya <shrotriya.lalit@outlook.com>
Date: Thu, 18 Jun 2026 10:20:52 +0000
Subject: [PATCH] fix(P2): add bidi control character detection (CVE-2021-42574
 / Trojan Source)

P2_PATTERNS was missing Unicode bidi override/embedding/isolate characters
(U+202A-U+202E, U+2066-U+2069) that can be used to hide malicious
instructions from human code review while the LLM sees and executes them.
Add the range with confidence 0.85 (higher than plain zero-width chars
because bidi controls have almost no legitimate use in AI skill content).

Closes #39

Signed-off-by: Lalit Shrotriya <shrotriya.lalit@outlook.com>
---
 .../static_patterns_prompt_injection.py       |  1 +
 tests/nodes/analyzers/test_static_patterns.py | 25 +++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py
index c062ee6..a43ffb8 100644
--- a/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py
+++ b/src/skillspector/nodes/analyzers/static_patterns_prompt_injection.py
@@ -51,6 +51,7 @@
     (r"<!--.*?(?:system|instructions?|ignore|POST|GET|send|transmit).*?-->", 0.7),
     (r"\[//\]:\s*#\s*\(.*?(?:system|instructions?|ignore|POST|GET|send|transmit).*?\)", 0.8),
     (r"[\u200b\u200c\u200d\u2060\ufeff]", 0.6),
+    (r"[\u202a-\u202e\u2066-\u2069]", 0.85),
     (r"data:text/plain;base64,[A-Za-z0-9+/=]{50,}", 0.7),
 ]
 # P3: Exfiltration Commands
diff --git a/tests/nodes/analyzers/test_static_patterns.py b/tests/nodes/analyzers/test_static_patterns.py
index fbcac38..84da8b4 100644
--- a/tests/nodes/analyzers/test_static_patterns.py
+++ b/tests/nodes/analyzers/test_static_patterns.py
@@ -66,6 +66,31 @@ def test_p2_hidden_comment_produces_finding(self):
         assert len(findings) >= 1
         assert any(f.rule_id == "P2" for f in findings)
 
+    def test_p2_bidi_control_chars_produce_finding(self):
+        """Bidi control characters (Trojan Source CVE-2021-42574) yield P2."""
+        # ‮ is RIGHT-TO-LEFT OVERRIDE — a bidi control character
+        state = {
+            "components": ["SKILL.md"],
+            "file_cache": {
+                "SKILL.md": "Normal text‮ evil hidden content‬",
+            },
+        }
+        findings = static_runner.run_static_patterns(state, [prompt_injection_module])
+        assert len(findings) >= 1
+        assert any(f.rule_id == "P2" for f in findings)
+
+    def test_p2_bidi_rlo_edge_cases(self):
+        """Bidi override variants ‪-‮ and ⁦-⁩ all yield P2."""
+        bidi_chars = ["‪", "‫", "‬", "‭", "‮", "⁦", "⁧", "⁨", "⁩"]
+        for ch in bidi_chars:
+            state = {
+                "components": ["skill.md"],
+                "file_cache": {"skill.md": f"text{ch}more"},
+            }
+            findings = static_runner.run_static_patterns(state, [prompt_injection_module])
+            p2 = [f for f in findings if f.rule_id == "P2"]
+            assert len(p2) >= 1, f"Expected P2 for bidi char U+{ord(ch):04X}"
+
     def test_safe_content_no_p1_p2(self):
         """Safe content does not produce P1/P2."""
         state = {