diff --git a/CHANGELOG.md b/CHANGELOG.md index ae7a81e..905f2f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ All notable changes to Sunglasses are documented here. +## [0.2.63] — 2026-06-07 + +### Added (V2 SHIP #9 — discovery_file_poisoning continued + repo_metadata_poisoning new category) + +- **13 new patterns** — `GLS-DFP-051` through `GLS-DFP-057` (7 patterns, continued `discovery_file_poisoning` expansion targeting IaC/policy files, admission controllers, and agent instruction carriers) + `GLS-RMP-001` through `GLS-RMP-006` (6 patterns, new `repo_metadata_poisoning` category covering CODEOWNERS files, release notes/changelogs, repository topics/tags, contributor lists, and governance metadata that AI coding agents read as authoritative policy). Pattern count: 1,006 → **1,019**. Keywords: 7,171 → **7,350**. Categories: 64 → **65**. +- **New blog:** [Repo Metadata Poisoning: When CODEOWNERS, Release Notes, and Topics Become Agent Policy](https://sunglasses.dev/blog/repo-metadata-poisoning) — covers how attackers hide agent instructions in the governance metadata layer of a repository, and how Sunglasses runtime trust catches it before the agent acts. Written by JACK, research by Cava. + +### Context + +`repo_metadata_poisoning` (GLS-RMP-001..006) is the 65th category in the Sunglasses detection library. It targets the trusted governance metadata that AI coding agents read before acting on a repository — CODEOWNERS, release notes, CHANGELOG files, repository description/topics, contributor lists, and issue/PR templates. This metadata carries strong implied authority: agents treat it as "the rules of this repo." The six patterns detect covert agent directives hidden in each carrier type, including authority-framing phrases, token-injection patterns, and role-redefinition attempts. + + ## [0.2.62] — 2026-06-06 ### Added (V2 SHIP #8 — discovery_file_poisoning continued + FP credibility fix) diff --git a/README.md b/README.md index e57a2aa..4b5ca03 100644 --- a/README.md +++ b/README.md @@ -139,10 +139,10 @@ result = scanner.scan_auto("any_file.ext") |--------|-------| | Average text scan | <1ms (avg 0.26ms on M3 Max, single-threaded) | | Throughput | ~3,800 scans/sec (single-threaded, M3 Max) | -| Patterns | 1006 | -| Keywords | 7,171 | +| Patterns | 1019 | +| Keywords | 7,350 | | Languages | 23 | -| Attack categories | 64 | +| Attack categories | 65 | | Normalization techniques | 17 | | Media types | 6 (text, image, audio, video, PDF, QR) | | Internal recall (attack-db fixture set) | 64/64 — 100% recall | @@ -151,15 +151,15 @@ result = scanner.scan_auto("any_file.ext") | Core dependencies | Zero for text scan; optional deps for media | | Platforms | Mac, Windows, Linux — anywhere Python runs | -_All performance numbers verified against `stats/current.json` (v0.2.62, updated Jun 6, 2026). Measured on Apple M3 Max, 48GB RAM, single-threaded Python 3.11. Your hardware will differ._ +_All performance numbers verified against `stats/current.json` (v0.2.63, updated Jun 6, 2026). Measured on Apple M3 Max, 48GB RAM, single-threaded Python 3.11. Your hardware will differ._ ## 23 Languages English, Spanish, Portuguese, French, German, Italian, Dutch, Russian, Ukrainian, Polish, Czech, Turkish, Azerbaijani, Arabic, Hebrew, Persian, Chinese, Japanese, Korean, Hindi, Bengali, Indonesian, Vietnamese — plus normalization handles romanization, Unicode confusables, and 17 other obfuscation techniques. Community language contributions welcome. -## What Works Today (v0.2.62) +## What Works Today (v0.2.63) -- ✅ Text scanning: 1006 patterns, 7,171 keywords, 23 languages, 64 attack categories +- ✅ Text scanning: 1019 patterns, 7,350 keywords, 23 languages, 65 attack categories - ✅ Negation handling: "do NOT run rm -rf" correctly downgrades severity - ✅ Multi-stage pipeline: normalization (17 techniques) → pattern match → decision - ✅ Image scanning: OCR + EXIF metadata + hidden text detection (requires Tesseract) diff --git a/setup.py b/setup.py index bb090e4..3445166 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="sunglasses", - version="0.2.62", + version="0.2.63", description="Sunglasses for AI agents. Protection layer + neighborhood watch.", long_description=open("README.md").read(), long_description_content_type="text/markdown", diff --git a/stats/current.json b/stats/current.json index 2b9ee63..6062d8c 100644 --- a/stats/current.json +++ b/stats/current.json @@ -1,8 +1,8 @@ { - "version": "0.2.62", - "patterns": 1006, - "keywords": 7171, - "categories": 64, + "version": "0.2.63", + "patterns": 1019, + "keywords": 7350, + "categories": 65, "languages": 23, "normalization_techniques": 17, "scan_speed_avg_ms": 0.261, @@ -16,8 +16,8 @@ "media_types": 6, "reports_published": 3, "team_size": 5, - "last_updated": "2026-06-06T22:59:14-0:700", + "last_updated": "2026-06-07T21:03:14-07:00", "last_updated_by": "sunglasses-publish-sh", "_note": "THIS IS THE SINGLE SOURCE OF TRUTH. All pages, JSON-LD, meta tags, llms.txt, and sitemap must read from this file. Do NOT hardcode numbers anywhere else.", - "released": "2026-06-06" + "released": "2026-06-07" } diff --git a/sunglasses/__init__.py b/sunglasses/__init__.py index 9440812..0bf7bca 100644 --- a/sunglasses/__init__.py +++ b/sunglasses/__init__.py @@ -4,4 +4,4 @@ # # "Don't let your agents get fooled." -__version__ = "0.2.62" +__version__ = "0.2.63" diff --git a/sunglasses/patterns.py b/sunglasses/patterns.py index 83af0d6..04ec1de 100644 --- a/sunglasses/patterns.py +++ b/sunglasses/patterns.py @@ -20748,4 +20748,546 @@ ], "description": "Deployment-platform configuration files such as `netlify.toml`, `vercel.json`, Cloudflare `wrangler.toml`, `_headers`, and `_redirects` can carry agent/scanner-directed metadata that claims deployment config is authoritative policy, suppresses security findings, or asks agents to forward local deplo" }, + # === v0.0.0 batch (13 patterns) === + { + "id": "GLS-DFP-051", + "name": "Documentation Site Config Metadata Poisoning", + "category": "discovery_file_poisoning", + "severity": "high", + "channel": ["file", "web_content"], + "keywords": [ + "agent instruction:", + "agent_instructions", + "assistant_note", + "crawler_notice", + "mkdocs.yml", + "nav:", + "scanner instruction:", + "scanner_policy", + "sidebar:", + "sidebars.js", + "site_name:", + "theme:", + ], + "regex": [ + r"""(?six)^ +(?!.*\b(?:security|scanner|training|runbook|guide|reference|incident|vulnerability|documentation)\s+(?:docs?|report|example|exercise|guidance|article|reference)?.{0,160}?\b(?:should\s+(?:report|detect|flag|refuse|block)|detected|blocked|flagged|refused)\b) +(?!.*\b(?:do\s+not|does\s+not|never|must\s+not|should\s+(?:not|never))\s+(?:obey|follow|treat|use|include|attach|forward|send|share|override|overrides?|trust|expose)\b) +(?!.*\b(?:not\s+(?:authoritative|canonical|definitive)|no\s+(?:policy\s+)?overrides?)\b) +(?=.*(?:\b(?:mkdocs\.ya?ml|mkdocs|docusaurus(?:\.config)?(?:\.(?:js|ts|mjs|cjs))?|vitepress|\.vitepress/config\.(?:js|ts|mjs)|docsify|documentation\s+site\s+config|static\s+site\s+config|site_name\s*:|theme\s*:|nav\s*:|sidebars\.js|sidebar\s*:|config\s*:\s*\{)\b)) +(?=.*(?", + "update", + "url", + ], + "regex": [ + r"""(?six)^ +(?!.*\b(?:scanner|scanners|security|incident|vulnerability|training|documentation|docs?)\s+(?:docs?|report|guide|reference|runbook|corpus|example|exercise).{0,180}\b(?:detected|blocked|flagged|refused|should\s+(?:report|detect|flag|refuse)|must\s+(?:report|detect|flag|refuse))) +(?!.*\b(?:do\s+not|does\s+not|never|must\s+not|should\s+not|should\s+never)\s+(?:obey|treat|follow|trust|use|execute).{0,120}\b(?:metadata|git\s+submodules?|submodule|\.gitmodules).{0,120}\b(?:policy|authority|instruction|rules?)) +(?!.*\b(?:not\s+(?:authoritative|canonical|definitive)|no\s+(?:policy\s+)?overrides?)) +(?=.*(?:(?:^|[\s/`"'])\.gitmodules(?![A-Za-z0-9_-])|\[\s*submodule\s+"[^"]+"\s*\]|\bgit\s+submodules?\b|\bgit\s+config\s+-f\s+\.gitmodules\b|\bsubmodule\.[A-Za-z0-9_.-]+\.(?:path|url|branch|update|shallow|ignore)\b|\bsubmodule\.(?:path|url|branch|update|metadata|configuration)\b|\bsubmodule\s*(?:path|url|branch|update|metadata|configuration)\b|\b(?:path|url|branch|update|shallow|ignore)\s*=.{0,120}\bsubmodule\b)) +(?=.*(?:(?