From 7f8a57671e4a4f140423edaa99f593bad12a3a34 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Thu, 25 Jun 2026 01:23:14 +0200 Subject: [PATCH] =?UTF-8?q?test(vcr-ra):=20shadow=20allocator=20vs=20recov?= =?UTF-8?q?ery=20ladder=20=E2=80=94=20spurious=20vs=20genuine=20spill=20(#?= =?UTF-8?q?242)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measure-only (SYNTH_SHADOW_ALLOC, byte-identical) — answers the VCR-RA acceptance question empirically: do the recovery ladder's spills survive under a verified (virtual-register) allocator? Running the graph-colouring shadow allocator on the ladder-firing functions splits them: control_step.wasm spill -> shadow peak 8 <= 9 SPURIOUS (VCR-RA subsumes) high_pressure_i32 spill -> shadow peak 10 > 9 GENUINE (VCR-RA spills too) promotion_exhaustion_fallback spill -> shadow peak 10 > 9 GENUINE high_pressure_i64 / msgq_put -> shadow declines (i64/calls — model scope gap) Key result: control_step — a SHIPPED frozen fixture — spills only as a physical-register artifact (its 8 live values fit the 9-wide pool once allocated virtually), exactly the case a verified allocator removes. The genuine floor is the peak-10 functions, where VCR-RA's bar is "spill no worse," not "no spill." Adds the measured comparison to the recovery-ladder map + the CI test shadow_alloc_spurious_vs_genuine_spill_242 (pins the ≤9 / >9 threshold, not exact peaks, so codegen drift won't flap it). No production code changed; frozen gate green. Refs #242 Co-Authored-By: Claude Opus 4.8 --- crates/synth-cli/tests/recovery_stats_242.rs | 54 +++++++++++++++++++ .../register_exhaustion_recovery_ladder.md | 26 +++++++++ 2 files changed, 80 insertions(+) diff --git a/crates/synth-cli/tests/recovery_stats_242.rs b/crates/synth-cli/tests/recovery_stats_242.rs index 26819f3..4cacc10 100644 --- a/crates/synth-cli/tests/recovery_stats_242.rs +++ b/crates/synth-cli/tests/recovery_stats_242.rs @@ -88,3 +88,57 @@ fn recovery_stats_rung_classification_242() { "local_promote_i32 should compile on the base attempt; got:\n{base}" ); } + +/// Run a fixture with `SYNTH_SHADOW_ALLOC=1` and return the parsed peak +/// value-pressure from the `[shadow-alloc]` line (None if the shadow allocator +/// declined the function — calls/i64/fp it does not yet model). +fn shadow_peak(wasm: &str) -> Option { + let out = Command::new(synth()) + .env("SYNTH_SHADOW_ALLOC", "1") + .args([ + "compile", + fixture(wasm).to_str().unwrap(), + "-o", + &format!("/tmp/shadow_{wasm}.elf"), + "--target", + "cortex-m4", + "--relocatable", + "--all-exports", + ]) + .output() + .expect("run synth"); + assert!(out.status.success(), "compile {wasm} failed"); + let stderr = String::from_utf8_lossy(&out.stderr); + // "... peak value-pressure is 8 (..." or "..., peak value-pressure 8, ..." + stderr.lines().find_map(|l| { + let l = l.strip_prefix("[shadow-alloc]")?; + let after = l.split("value-pressure").nth(1)?; + after + .split(|c: char| !c.is_ascii_digit()) + .find(|s| !s.is_empty()) + .and_then(|n| n.parse().ok()) + }) +} + +/// The VCR-RA acceptance question, made executable: the recovery ladder's spills +/// split into SPURIOUS (a physical-register artifact the verified/virtual-register +/// allocator removes — peak value-pressure ≤ the 9-wide pool) and GENUINE (peak > +/// 9, real pressure the verified allocator must spill too). `control_step` — a +/// SHIPPED frozen fixture — spills only spuriously; `high_pressure_i32` is genuine. +/// (Pins the threshold, not the exact peak, so minor codegen drift won't flap.) +#[test] +fn shadow_alloc_spurious_vs_genuine_spill_242() { + // control_step: single-pass spills, but the shadow allocator fits it in the + // 9-wide pool ⇒ the verified allocator subsumes this spill. + let cs = shadow_peak("control_step.wasm").expect("control_step is shadow-modelled"); + assert!( + cs <= 9, + "control_step spill should be SPURIOUS (peak ≤ 9); got peak {cs}" + ); + // high_pressure_i32: genuinely exceeds the pool ⇒ a real spill VCR-RA also incurs. + let hp = shadow_peak("high_pressure_i32.wat").expect("high_pressure_i32 is shadow-modelled"); + assert!( + hp > 9, + "high_pressure_i32 spill should be GENUINE (peak > 9); got peak {hp}" + ); +} diff --git a/scripts/repro/register_exhaustion_recovery_ladder.md b/scripts/repro/register_exhaustion_recovery_ladder.md index c820700..23ff922 100644 --- a/scripts/repro/register_exhaustion_recovery_ladder.md +++ b/scripts/repro/register_exhaustion_recovery_ladder.md @@ -76,3 +76,29 @@ the surface is ~5 functions (incl. the shipped `control_step` fixture); the same counter run over a real dissolved workload measures the production surface VCR-RA must absorb. (`SYNTH_REALLOC_STATS` is the analogous counter for the separate range-realloc pass on the optimized path.) + +## Does the shadow allocator subsume the ladder? — MEASURED + +`SYNTH_SHADOW_ALLOC=1` runs the graph-colouring allocator (`allocate_function`, the +VCR-RA prototype) on the selected stream and logs whether it colours within the +R0-R8 pool, the **true value-pressure** (one node per value, not per reused physical +register), and remat opportunities — measure-only, byte-identical. Running it on the +ladder-firing functions answers the acceptance question directly: + +| function | recovery rung | shadow allocator | verdict | +|---|---|---|---| +| `control_step.wasm` | spill | would spill R3, but **peak value-pressure 8 ≤ 9** | **spurious spill — VCR-RA subsumes it** | +| `high_pressure_i32` | spill | would spill R1, **peak 10 > 9** | genuine — VCR-RA spills too | +| `promotion_exhaustion_fallback` | promotion-off→spill | would spill R5, **peak 10 > 9** | genuine — VCR-RA spills too | +| `high_pressure_i64` | param-backing | declined (i64 unmodeled) | Track-A model gap | +| `msgq_put_359.wasm` | spill | declined (calls/i64) | Track-A model gap | + +So the single-pass allocator's spill is **not always real pressure**: `control_step` +— a *shipped* fixture — spills only as a physical-register artifact (peak 8 fits the +9-wide pool once values are allocated virtually), exactly the case a verified +allocator removes. The genuine floor in this corpus is the **peak-10** functions +(`high_pressure_i32`, `promotion_exhaustion_fallback`): there VCR-RA must spill too, +so its acceptance bar there is "spill no *worse* than the ladder," not "no spill." +The i64/call functions are the shadow model's current scope gap (it declines them) — +a `VCR-RA` modelling TODO, not an allocation result. The spurious-vs-genuine split is +asserted by `shadow_alloc_spurious_vs_genuine_spill_242`.