From 76b2ccb494e9dddd87f046db56a25783cf726074 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Thu, 25 Jun 2026 00:28:05 +0200 Subject: [PATCH] feat(vcr-ra): per-rung recovery-stats counter + measured ladder map (#242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SYNTH_RECOVERY_STATS=1 makes select_direct log which exhaustion-recovery rung produced each function's code ([recovery-stats] rung=… result=…). Logging only — emitted bytes are unchanged, frozen byte gate green (verified). This wires the per-rung counter the recovery-ladder map (#242) named as its next measurement, and turns that doc's design-intent fixture→rung claims into measured facts. Measured across the full scripts/repro corpus (cortex-m4 --relocatable --all-exports): the recovery ladder fires for ~5 functions — spill: high_pressure_i32, control_step.wasm, msgq_put_359 (1 of 5 fns) param-back: high_pressure_i64 promotion-off: promotion_exhaustion_fallback (#474, then spill) Everything else compiles on the base attempt. Notable: control_step (the canonical 0x00210A55 frozen fixture) is produced via the SPILL rung — it sits at the register-pressure edge, so the spill rung is load-bearing for a shipped fixture, not just stress tests. That is the size of the failure surface a verified allocator (#242 north star) must subsume. New CI test recovery_stats_rung_classification_242 pins the classification. Refs #242 Co-Authored-By: Claude Opus 4.8 --- crates/synth-backend/src/arm_backend.rs | 84 +++++++++++------ crates/synth-cli/tests/recovery_stats_242.rs | 90 +++++++++++++++++++ .../register_exhaustion_recovery_ladder.md | 46 ++++++---- 3 files changed, 174 insertions(+), 46 deletions(-) create mode 100644 crates/synth-cli/tests/recovery_stats_242.rs diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs index 33eeeb5..710d1f5 100644 --- a/crates/synth-backend/src/arm_backend.rs +++ b/crates/synth-backend/src/arm_backend.rs @@ -233,29 +233,34 @@ fn compile_wasm_to_arm( // The full exhaustion-recovery ladder, parameterized on whether local // promotion is enabled. Each rung is reached only when the previous one // returned a recoverable register-exhaustion Err, so a function that - // compiles on the first attempt is untouched by the later rungs. - let recovery_ladder = |promote: bool| -> Result, synth_core::Error> { - let mut attempt = select_direct_attempt(false, false, promote); - // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion - // hard-fail is recoverable — retry with spill-on-exhaustion, which - // reserves the spill area and spills the deepest stack value when the - // pool is full. - if let Err(e) = &attempt - && e.to_string().contains(SINGLE_EXHAUSTION) - { - attempt = select_direct_attempt(true, false, promote); - } - // VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR - // exhaustion is recoverable too — not by stack spilling (the pair - // allocator already spills stack values, #171) but by frame-backing - // the params (#204) so they stop pinning R0-R3, with spill kept on. - if let Err(e) = &attempt - && e.to_string().contains(PAIR_EXHAUSTION) - { - attempt = select_direct_attempt(true, true, promote); - } - attempt - }; + // compiles on the first attempt is untouched by the later rungs. Returns + // the result AND which rung produced it (for the #242 measurement below). + let recovery_ladder = + |promote: bool| -> (Result, synth_core::Error>, &'static str) { + let mut attempt = select_direct_attempt(false, false, promote); + let mut rung = "base"; + // VCR-RA-001 step 3b-lite (#242): the i32 register-exhaustion + // hard-fail is recoverable — retry with spill-on-exhaustion, which + // reserves the spill area and spills the deepest stack value when + // the pool is full. + if let Err(e) = &attempt + && e.to_string().contains(SINGLE_EXHAUSTION) + { + attempt = select_direct_attempt(true, false, promote); + rung = "spill"; + } + // VCR-RA-001 acceptance increment (#242): the i64 consecutive-PAIR + // exhaustion is recoverable too — not by stack spilling (the pair + // allocator already spills stack values, #171) but by frame-backing + // the params (#204) so they stop pinning R0-R3, with spill kept on. + if let Err(e) = &attempt + && e.to_string().contains(PAIR_EXHAUSTION) + { + attempt = select_direct_attempt(true, true, promote); + rung = "param-backing"; + } + (attempt, rung) + }; // #474: local promotion (default-on since v0.14.0) is an OPTIMIZATION — it // must never be the reason a function fails to compile. Run the full ladder // with promotion first (so every function that compiles today is @@ -267,13 +272,36 @@ fn compile_wasm_to_arm( // is reached ONLY by functions that exhaust WITH promotion, so promotion-on // output is untouched by construction (frozen byte gate stays green). let promote = std::env::var("SYNTH_NO_LOCAL_PROMOTE").is_err(); - let mut attempt = recovery_ladder(promote); + let (mut attempt, mut rung) = recovery_ladder(promote); + let mut promotion_dropped = false; if promote - && let Err(e) = &attempt - && e.to_string().contains("register exhaustion") - && let Ok(rescued) = recovery_ladder(false) + && attempt + .as_ref() + .err() + .is_some_and(|e| e.to_string().contains("register exhaustion")) { - attempt = Ok(rescued); + let (rescued, off_rung) = recovery_ladder(false); + if rescued.is_ok() { + attempt = rescued; + rung = off_rung; + promotion_dropped = true; + } + } + // VCR-RA measurement (#242): log which recovery rung produced the result, + // so the per-rung distribution across a corpus can be measured — the size + // of the failure surface a verified allocator must subsume (see + // scripts/repro/register_exhaustion_recovery_ladder.md). Logging only: + // emitted bytes are unchanged, so the frozen byte gate is unaffected. + if std::env::var("SYNTH_RECOVERY_STATS").is_ok() { + eprintln!( + "[recovery-stats] rung={rung}{} result={}", + if promotion_dropped { + " promotion-off" + } else { + "" + }, + if attempt.is_ok() { "ok" } else { "exhausted" }, + ); } attempt.map_err(|e| format!("instruction selection failed: {}", e)) }; diff --git a/crates/synth-cli/tests/recovery_stats_242.rs b/crates/synth-cli/tests/recovery_stats_242.rs new file mode 100644 index 0000000..26819f3 --- /dev/null +++ b/crates/synth-cli/tests/recovery_stats_242.rs @@ -0,0 +1,90 @@ +//! VCR-RA recovery-rung measurement (#242) — assert which exhaustion-recovery +//! rung `select_direct` uses for the corpus fixtures that exercise the ladder. +//! +//! `SYNTH_RECOVERY_STATS=1` makes the backend log `[recovery-stats] rung=…` per +//! function (logging only — emitted bytes are unchanged, the frozen byte gate is +//! unaffected). This test pins the rung classification documented in +//! `scripts/repro/register_exhaustion_recovery_ladder.md`: the recovery ladder is +//! the failure surface the verified allocator (#242) must subsume, and this is the +//! executable record of how big that surface is on the in-repo corpus (3 fixtures). + +use std::process::Command; + +fn synth() -> &'static str { + env!("CARGO_BIN_EXE_synth") +} + +fn fixture(name: &str) -> std::path::PathBuf { + std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("../..") + .join("scripts/repro") + .join(name) +} + +/// Compile a fixture with `SYNTH_RECOVERY_STATS=1` and return the captured +/// `[recovery-stats]` stderr lines joined. +fn recovery_stats(wasm: &str) -> String { + let out = Command::new(synth()) + .env("SYNTH_RECOVERY_STATS", "1") + .args([ + "compile", + fixture(wasm).to_str().unwrap(), + "-o", + &format!("/tmp/recstats_{wasm}.elf"), + "--target", + "cortex-m4", + "--relocatable", + "--all-exports", + ]) + .output() + .expect("run synth"); + assert!( + out.status.success(), + "compile {wasm} failed: {}", + String::from_utf8_lossy(&out.stderr) + ); + String::from_utf8_lossy(&out.stderr) + .lines() + .filter(|l| l.contains("[recovery-stats]")) + .collect::>() + .join("\n") +} + +#[test] +fn recovery_stats_rung_classification_242() { + // The three corpus fixtures that exercise the recovery ladder. + let hp32 = recovery_stats("high_pressure_i32.wat"); + assert!( + hp32.contains("rung=spill"), + "high_pressure_i32 should hit the spill rung; got:\n{hp32}" + ); + + let hp64 = recovery_stats("high_pressure_i64.wat"); + assert!( + hp64.contains("rung=param-backing"), + "high_pressure_i64 should hit the param-backing rung; got:\n{hp64}" + ); + + // #474 fallback: promotion is dropped, then the promotion-off ladder spills. + let pef = recovery_stats("promotion_exhaustion_fallback.wat"); + assert!( + pef.contains("promotion-off"), + "promotion_exhaustion_fallback should hit the promotion-off fallback; got:\n{pef}" + ); + + // control_step (the canonical frozen fixture, 0x00210A55) sits right at the + // register-pressure edge — it is produced via the SPILL rung, not the base + // attempt. Pinning this guards that its frozen bytes are the spill-rung output. + let cs = recovery_stats("control_step.wasm"); + assert!( + cs.contains("rung=spill") && !cs.contains("promotion-off"), + "control_step should compile via the spill rung; got:\n{cs}" + ); + + // A representative base-rung fixture compiles on the first attempt. + let base = recovery_stats("local_promote_i32.wat"); + assert!( + base.contains("rung=base") && !base.contains("spill"), + "local_promote_i32 should compile on the base attempt; got:\n{base}" + ); +} diff --git a/scripts/repro/register_exhaustion_recovery_ladder.md b/scripts/repro/register_exhaustion_recovery_ladder.md index 9fa583f..c820700 100644 --- a/scripts/repro/register_exhaustion_recovery_ladder.md +++ b/scripts/repro/register_exhaustion_recovery_ladder.md @@ -36,20 +36,30 @@ need it: it would either color the function with the locals in registers, or spi them — never fail. Rung 4 existing at all is a direct symptom of the single-pass allocator the north star replaces. -## Which fixtures exercise which rung +## Which fixtures exercise which rung — MEASURED -Only rung 4 is **confirmed load-bearing** here (the `promotion_exhaustion_fallback` -fixture fails to compile without its rung and compiles with it — verified in the -#475 regression test). The rest are by *design intent* (the fixture/name + the -issue that introduced the rung); attributing a rung precisely needs the per-rung -counter noted below — this table is the hypothesis that counter would confirm. +`SYNTH_RECOVERY_STATS=1` makes `select_direct` log the rung that produced each +function's code (`[recovery-stats] rung=… result=…`). Logging only — emitted bytes +are unchanged (frozen gate green). Measured across the whole `scripts/repro/*.wat` +corpus (`--target cortex-m4 --relocatable --all-exports`): -| fixture | rung (status) | +| fixture (function) | rung (measured) | |---|---| -| `control_step`, `flight_seam*`, most repros | 0 base (compile cleanly) | -| `high_pressure_i32` | 2 spill — *by design, unconfirmed* | -| `high_pressure_i64` | 3 param frame-backing — *by design, unconfirmed* | -| `promotion_exhaustion_fallback` (#475) | 4 promotion-off — **confirmed** | +| most of the ~49-fixture corpus | **base** — compile on the first attempt | +| `high_pressure_i32` | **spill** (rung 2) | +| `control_step.wasm` (the 0x00210A55 frozen fixture) | **spill** (rung 2) | +| `msgq_put_359.wasm` (1 of its 5 functions) | **spill** (rung 2) | +| `high_pressure_i64` | **param-backing** (rung 3) | +| `promotion_exhaustion_fallback` (#475) | **promotion-off** then spill (rung 4 → 2) | + +So across the corpus the recovery ladder fires for **~5 functions** — that is the +entire local failure surface. The notable one is `control_step`: the canonical +frozen fixture is itself produced via the **spill** rung, i.e. it sits right at the +register-pressure edge (a base-attempt allocator would have *failed* it — the spill +rung is load-bearing for a shipped fixture, not just stress tests). `local_promote_*`, +`mutex_pressure`, the `flight_seam*` / `native_pointer_*` families, etc. all compile +on the base attempt. The classification is asserted by the CI test +`recovery_stats_rung_classification_242`. ## North-star implication @@ -59,10 +69,10 @@ Rung 1 (decline on unlowerable IR) is a *coverage* gap, not an allocation failur and stays until the selector DSL (`VCR-SEL-001`) covers those constructs. So the VCR-RA acceptance bar includes: **every function in this corpus that today needs rungs 2-4 must allocate on the first pass under the verified allocator**, with the -ladder retained only as a defense-in-depth assertion that should never fire. -`SYNTH_REALLOC_STATS` already reports per-function counters for the *range-realloc* -pass (segments / reallocated / declined / needs-spill) on the optimized path; -the analogous next measurement is a per-rung counter on **this** `select_direct` -ladder (how often each rung fires across a real dissolved corpus = the size of the -surface VCR-RA must absorb). That instrumentation is env-gated logging only -(byte-identical output), so it stays frozen-safe. +ladder retained only as a defense-in-depth assertion that should never fire. The +per-rung counter that quantifies this surface is now wired (`SYNTH_RECOVERY_STATS`, +above) — env-gated logging only (byte-identical, frozen-safe). On the local corpus +the surface is ~5 functions (incl. the shipped `control_step` fixture); the same +counter run over a real dissolved workload measures the production surface VCR-RA +must absorb. (`SYNTH_REALLOC_STATS` is the analogous counter for the separate +range-realloc pass on the optimized path.)