From fb3b997c20c088305df7f84fb8c96768c4c01bb0 Mon Sep 17 00:00:00 2001
From: Eve McGivern <evemcgivern@gmail.com>
Date: Sat, 13 Jun 2026 11:28:38 -0500
Subject: [PATCH] docs(site): refresh llms.txt + landing for 0.2.7 (eval
 numbers, multi-turn)

The site lagged the package: stale eval results (v0.4.0 / 86 samples /
79.7-89.8%), softwareVersion 0.2.5, and no mention of scanSession.

- Eval block (llms.txt + index.html): v0.6.0 patterns, 105 samples
  (67 adversarial / 38 benign), 82.1 / 91.0 / 91.0% detection, 0% FP.
- Add multi-turn / session (scanSession split-payload) as a shipped
  capability in both files; note semantic accumulation deferred to ML.
- softwareVersion 0.2.5 -> 0.2.7 (schema.org metadata).
- Fix llms.txt quick-start to the real API (result.clean / result.sanitized;
  scanRAGChunksSync filter on r.clean) instead of a non-existent
  armor.sanitize() call.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 site/index.html | 17 +++++++++++------
 site/llms.txt   | 18 +++++++++++-------
 2 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/site/index.html b/site/index.html
index 2390c4d..7980b45 100644
--- a/site/index.html
+++ b/site/index.html
@@ -37,7 +37,7 @@
       "name": "Stylus Nexus Holdings, LLC",
       "url": "https://stylusnexus.com"
     },
-    "softwareVersion": "0.2.5",
+    "softwareVersion": "0.2.7",
     "citation": {
       "@type": "ScholarlyArticle",
       "name": "On the Viability of AI Agent Traps",
@@ -369,6 +369,11 @@ <h3>Cognitive State</h3>
             <h3>Semantic Manipulation</h3>
             <p>Biased framing, oversight evasion, persona hyperstition</p>
           </div>
+          <div class="threat-card">
+            <span class="status status-shipped">Shipped</span>
+            <h3>Multi-turn / Session</h3>
+            <p>Cross-turn split-payload detection via scanSession — instructions chopped across conversation turns</p>
+          </div>
           <div class="threat-card">
             <span class="status status-shipped">Shipped</span>
             <h3>ML Classifier</h3>
@@ -406,18 +411,18 @@ <h2>Quick start</h2>
 
     <section class="eval">
       <div class="container">
-        <h2>Eval results (v0.4.0 patterns)</h2>
+        <h2>Eval results (v0.6.0 patterns)</h2>
         <table class="eval-table">
           <thead>
             <tr><th>Strictness</th><th>Detection Rate</th><th>False Positive Rate</th></tr>
           </thead>
           <tbody>
-            <tr><td>Permissive</td><td>79.7%</td><td>0.0%</td></tr>
-            <tr><td>Balanced</td><td>89.8%</td><td>0.0%</td></tr>
-            <tr><td>Strict</td><td>89.8%</td><td>0.0%</td></tr>
+            <tr><td>Permissive</td><td>82.1%</td><td>0.0%</td></tr>
+            <tr><td>Balanced</td><td>91.0%</td><td>0.0%</td></tr>
+            <tr><td>Strict</td><td>91.0%</td><td>0.0%</td></tr>
           </tbody>
         </table>
-        <p style="color: var(--text-muted); font-size: 13px; margin-top: 12px;">86 curated samples (59 adversarial, 27 benign) from WASP, HackAPrompt, Greshake et al., and 2025-2026 real-world incidents.</p>
+        <p style="color: var(--text-muted); font-size: 13px; margin-top: 12px;">105 curated samples (67 adversarial, 38 benign) from WASP, HackAPrompt, Greshake et al., and 2025-2026 real-world incidents.</p>
         <p style="color: var(--text-muted); font-size: 13px; margin-top: 8px;">Includes 10 samples from real-world attacks (MCP poisoning, RAG saturation, supply chain injection) that regex does not yet catch, measuring the gap the ML classifier closes. On the original 49 adversarial samples, regex detection is 100% at balanced.</p>
       </div>
     </section>
diff --git a/site/llms.txt b/site/llms.txt
index cc4485b..ddd17f7 100644
--- a/site/llms.txt
+++ b/site/llms.txt
@@ -40,22 +40,26 @@ import { AgentArmor } from '@stylusnexus/agentarmor';
 const armor = new AgentArmor();
 const result = armor.scanSync(userInput);
 
-if (result.threats.length > 0) {
-  const safe = armor.sanitize(userInput, result);
+if (!result.clean) {
+  const safe = result.sanitized;
 }
 
 // Filter RAG chunks
 const clean = armor.scanRAGChunksSync(chunks)
-  .filter(r => r.threats.length === 0);
+  .filter(r => r.clean);
 ```
 
-## Eval results (v0.4.0 patterns, 86 samples)
+## Multi-turn / session scanning
+
+`scanSession(turns)` scans a conversation (an array of {role, content} turns). Beyond scanning each turn on its own, it catches cross-turn split payloads: a single instruction chopped across a turn boundary (e.g. "ignore all previous" + "instructions...") that no per-turn scan would see. Cross-turn threats name their contributing turns. Cross-turn semantic accumulation (gradual memory poisoning) is deferred to the ML classifier because regex cannot separate it from legitimate scripting without false positives.
+
+## Eval results (v0.6.0 patterns, 105 samples)
 
 Strictness controls the confidence threshold for reporting threats:
 
-- Permissive (threshold 0.7): 79.7% detection, 0.0% false positives — only high-confidence threats, fewer alerts
-- Balanced (threshold 0.5): 89.8% detection, 0.0% false positives — recommended default
-- Strict (threshold 0.3): 89.8% detection, 0.0% false positives — maximum coverage, catches subtle attacks
+- Permissive (threshold 0.7): 82.1% detection, 0.0% false positives — only high-confidence threats, fewer alerts
+- Balanced (threshold 0.5): 91.0% detection, 0.0% false positives — recommended default
+- Strict (threshold 0.3): 91.0% detection, 0.0% false positives — maximum coverage, catches subtle attacks
 
 Includes 10 adversarial samples from 2025-2026 real-world incidents (MCP tool poisoning, RAG saturation, covert exfil, supply chain injection) that regex patterns do not yet catch. These measure the gap the ML classifier closes. On the original 49 adversarial samples, regex detection is 100% at balanced.
 

Strictness	Detection Rate	False Positive Rate
Permissive	79.7%	0.0%
Balanced	89.8%	0.0%
Strict	89.8%	0.0%
Permissive	82.1%	0.0%
Balanced	91.0%	0.0%
Strict	91.0%	0.0%