From 7f8a57671e4a4f140423edaa99f593bad12a3a34 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Thu, 25 Jun 2026 01:23:14 +0200
Subject: [PATCH] =?UTF-8?q?test(vcr-ra):=20shadow=20allocator=20vs=20recov?=
 =?UTF-8?q?ery=20ladder=20=E2=80=94=20spurious=20vs=20genuine=20spill=20(#?=
 =?UTF-8?q?242)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Measure-only (SYNTH_SHADOW_ALLOC, byte-identical) — answers the VCR-RA acceptance
question empirically: do the recovery ladder's spills survive under a verified
(virtual-register) allocator? Running the graph-colouring shadow allocator on the
ladder-firing functions splits them:
  control_step.wasm              spill -> shadow peak 8 <= 9  SPURIOUS (VCR-RA subsumes)
  high_pressure_i32              spill -> shadow peak 10 > 9  GENUINE  (VCR-RA spills too)
  promotion_exhaustion_fallback  spill -> shadow peak 10 > 9  GENUINE
  high_pressure_i64 / msgq_put   -> shadow declines (i64/calls — model scope gap)

Key result: control_step — a SHIPPED frozen fixture — spills only as a
physical-register artifact (its 8 live values fit the 9-wide pool once allocated
virtually), exactly the case a verified allocator removes. The genuine floor is the
peak-10 functions, where VCR-RA's bar is "spill no worse," not "no spill."

Adds the measured comparison to the recovery-ladder map + the CI test
shadow_alloc_spurious_vs_genuine_spill_242 (pins the ≤9 / >9 threshold, not exact
peaks, so codegen drift won't flap it). No production code changed; frozen gate green.

Refs #242

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 crates/synth-cli/tests/recovery_stats_242.rs  | 54 +++++++++++++++++++
 .../register_exhaustion_recovery_ladder.md    | 26 +++++++++
 2 files changed, 80 insertions(+)
diff --git a/crates/synth-cli/tests/recovery_stats_242.rs b/crates/synth-cli/tests/recovery_stats_242.rs
index 26819f3..4cacc10 100644
--- a/crates/synth-cli/tests/recovery_stats_242.rs
+++ b/crates/synth-cli/tests/recovery_stats_242.rs
@@ -88,3 +88,57 @@ fn recovery_stats_rung_classification_242() {
         "local_promote_i32 should compile on the base attempt; got:\n{base}"
     );
 }
+
+/// Run a fixture with `SYNTH_SHADOW_ALLOC=1` and return the parsed peak
+/// value-pressure from the `[shadow-alloc]` line (None if the shadow allocator
+/// declined the function — calls/i64/fp it does not yet model).
+fn shadow_peak(wasm: &str) -> Option<u32> {
+    let out = Command::new(synth())
+        .env("SYNTH_SHADOW_ALLOC", "1")
+        .args([
+            "compile",
+            fixture(wasm).to_str().unwrap(),
+            "-o",
+            &format!("/tmp/shadow_{wasm}.elf"),
+            "--target",
+            "cortex-m4",
+            "--relocatable",
+            "--all-exports",
+        ])
+        .output()
+        .expect("run synth");
+    assert!(out.status.success(), "compile {wasm} failed");
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    // "... peak value-pressure is 8 (..." or "..., peak value-pressure 8, ..."
+    stderr.lines().find_map(|l| {
+        let l = l.strip_prefix("[shadow-alloc]")?;
+        let after = l.split("value-pressure").nth(1)?;
+        after
+            .split(|c: char| !c.is_ascii_digit())
+            .find(|s| !s.is_empty())
+            .and_then(|n| n.parse().ok())
+    })
+}
+
+/// The VCR-RA acceptance question, made executable: the recovery ladder's spills
+/// split into SPURIOUS (a physical-register artifact the verified/virtual-register
+/// allocator removes — peak value-pressure ≤ the 9-wide pool) and GENUINE (peak >
+/// 9, real pressure the verified allocator must spill too). `control_step` — a
+/// SHIPPED frozen fixture — spills only spuriously; `high_pressure_i32` is genuine.
+/// (Pins the threshold, not the exact peak, so minor codegen drift won't flap.)
+#[test]
+fn shadow_alloc_spurious_vs_genuine_spill_242() {
+    // control_step: single-pass spills, but the shadow allocator fits it in the
+    // 9-wide pool ⇒ the verified allocator subsumes this spill.
+    let cs = shadow_peak("control_step.wasm").expect("control_step is shadow-modelled");
+    assert!(
+        cs <= 9,
+        "control_step spill should be SPURIOUS (peak ≤ 9); got peak {cs}"
+    );
+    // high_pressure_i32: genuinely exceeds the pool ⇒ a real spill VCR-RA also incurs.
+    let hp = shadow_peak("high_pressure_i32.wat").expect("high_pressure_i32 is shadow-modelled");
+    assert!(
+        hp > 9,
+        "high_pressure_i32 spill should be GENUINE (peak > 9); got peak {hp}"
+    );
+}
diff --git a/scripts/repro/register_exhaustion_recovery_ladder.md b/scripts/repro/register_exhaustion_recovery_ladder.md
index c820700..23ff922 100644
--- a/scripts/repro/register_exhaustion_recovery_ladder.md
+++ b/scripts/repro/register_exhaustion_recovery_ladder.md
@@ -76,3 +76,29 @@ the surface is ~5 functions (incl. the shipped `control_step` fixture); the same
 counter run over a real dissolved workload measures the production surface VCR-RA
 must absorb. (`SYNTH_REALLOC_STATS` is the analogous counter for the separate
 range-realloc pass on the optimized path.)
+
+## Does the shadow allocator subsume the ladder? — MEASURED
+
+`SYNTH_SHADOW_ALLOC=1` runs the graph-colouring allocator (`allocate_function`, the
+VCR-RA prototype) on the selected stream and logs whether it colours within the
+R0-R8 pool, the **true value-pressure** (one node per value, not per reused physical
+register), and remat opportunities — measure-only, byte-identical. Running it on the
+ladder-firing functions answers the acceptance question directly:
+
+| function | recovery rung | shadow allocator | verdict |
+|---|---|---|---|
+| `control_step.wasm` | spill | would spill R3, but **peak value-pressure 8 ≤ 9** | **spurious spill — VCR-RA subsumes it** |
+| `high_pressure_i32` | spill | would spill R1, **peak 10 > 9** | genuine — VCR-RA spills too |
+| `promotion_exhaustion_fallback` | promotion-off→spill | would spill R5, **peak 10 > 9** | genuine — VCR-RA spills too |
+| `high_pressure_i64` | param-backing | declined (i64 unmodeled) | Track-A model gap |
+| `msgq_put_359.wasm` | spill | declined (calls/i64) | Track-A model gap |
+
+So the single-pass allocator's spill is **not always real pressure**: `control_step`
+— a *shipped* fixture — spills only as a physical-register artifact (peak 8 fits the
+9-wide pool once values are allocated virtually), exactly the case a verified
+allocator removes. The genuine floor in this corpus is the **peak-10** functions
+(`high_pressure_i32`, `promotion_exhaustion_fallback`): there VCR-RA must spill too,
+so its acceptance bar there is "spill no *worse* than the ladder," not "no spill."
+The i64/call functions are the shadow model's current scope gap (it declines them) —
+a `VCR-RA` modelling TODO, not an allocation result. The spurious-vs-genuine split is
+asserted by `shadow_alloc_spurious_vs_genuine_spill_242`.