From b53d4a15180d02f32bdd88cb5a32a588c6b235e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ralph=20K=C3=BCpper?= Date: Sun, 14 Jun 2026 09:30:53 +0200 Subject: [PATCH] test(gc): make the gc_write_barrier_stress tests optional (off the blocking gate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two GC write-barrier stress tests run compiled binaries under the slowest GC configuration (PERRY_GC_FORCE_EVACUATE + PERRY_GC_VERIFY_EVACUATION) to hunt a *rare* corruption window (#5029). They're ~200s each and nondeterministic by nature, which makes them a poor fit for the blocking per-PR `cargo-test` gate — one flake blocked every unrelated PR (e.g. #5115, a one-line test-only change, failed `tenured_mutation_stress`). - `#[ignore]` both tests so the per-PR `cargo test -p perry` skips them. The gate stays meaningful (all the fast, deterministic unit/integration tests still block) and runs ~6-7 min faster. - Add an opt-in, non-blocking `gc-stress` CI job (`continue-on-error`, gated by the `run-extended-tests` label / `workflow_dispatch` / tag push, like the existing parity/compile-smoke jobs) that runs them with `--ignored`. The signal is preserved without blocking PRs. Run locally with: cargo test -p perry --test gc_write_barrier_stress -- --ignored The underlying corruption (#5029) is real (verify-evacuation only aborts on a genuine un-rewritten live slot) and should stay tracked / reopened; this change just stops a nondeterministic stress test from gating every PR. --- .github/workflows/test.yml | 45 +++++++++++++++++++ crates/perry/tests/gc_write_barrier_stress.rs | 13 ++++++ 2 files changed, 58 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7ef302796a..19601046d0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -258,6 +258,51 @@ jobs: find target/debug/deps -maxdepth 1 -type f -perm -111 ! -name '*.so' -delete done + # --------------------------------------------------------------------------- + # GC write-barrier stress (optional / non-blocking) + # + # `crates/perry/tests/gc_write_barrier_stress.rs` runs compiled binaries + # under the slowest GC configuration (PERRY_GC_FORCE_EVACUATE + + # PERRY_GC_VERIFY_EVACUATION) to hunt a *rare* corruption window (#5029). + # Those tests are ~200s each and nondeterministic by nature, so they are a + # poor fit for the blocking per-PR `cargo-test` gate (one flake blocked + # every unrelated PR). They are `#[ignore]`d there and run here instead. + # + # Opt-in + informational: `continue-on-error` so a flake never fails the + # workflow; triggered by the `run-extended-tests` PR label, a + # `workflow_dispatch` with `run_extended_tests=true`, or a tag push. + # --------------------------------------------------------------------------- + gc-stress: + continue-on-error: true + if: >- + github.event_name == 'push' || + (github.event_name == 'workflow_dispatch' && inputs.run_extended_tests) || + (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-extended-tests')) + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v6 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + with: + shared-key: "${{ runner.os }}-perry" + save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Install clang + run: | + sudo apt-get update + sudo apt-get install -y clang + + - name: Run GC write-barrier stress tests + env: + # Match the cargo-test gate's linker workaround (lld SIGBUS on the + # shared runner during large test links). + CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUSTFLAGS: "-C linker-features=-lld" + run: cargo test -p perry --test gc_write_barrier_stress -- --ignored + # --------------------------------------------------------------------------- # Compiler-output regression gate # diff --git a/crates/perry/tests/gc_write_barrier_stress.rs b/crates/perry/tests/gc_write_barrier_stress.rs index f2618e72b8..3c7f5f5898 100644 --- a/crates/perry/tests/gc_write_barrier_stress.rs +++ b/crates/perry/tests/gc_write_barrier_stress.rs @@ -151,7 +151,17 @@ fn assert_ok_output(run: &std::process::Output, expected: &str) { /// globals) to point at freshly allocated nursery values, GC again, and /// verify every value survived. Any missed barrier on those store paths /// shows up as corrupt reads, an evacuation-verify panic, or a crash. +/// +/// Optional / non-blocking (#5029): runs under the slowest GC config +/// (force-evacuate + verify-evacuation) and hunts a *rare* corruption +/// window, so it's nondeterministic and ~200s long — a poor fit for the +/// blocking per-PR `cargo-test` gate (one flake blocks every unrelated PR). +/// `#[ignore]`d by default; the `gc-stress` CI job runs it with `--ignored` +/// (opt-in via the `run-extended-tests` label or `workflow_dispatch`), and +/// you can run it locally with `cargo test -p perry --test +/// gc_write_barrier_stress -- --ignored`. #[test] +#[ignore = "#5029: nondeterministic GC-corruption stress test; runs in the opt-in gc-stress CI job, not the blocking gate"] fn tenured_mutation_stress() { let run = compile_and_run( r#" @@ -221,7 +231,10 @@ console.log(bad === 0 ? "BARRIER_STRESS_OK" : "BARRIER_STRESS_CORRUPT " + bad); /// deep-clone loop in `js_structured_clone` now routes through the shared /// barriered store). Uses a 300-key literal (all fields inline) plus a deep /// nested chain so the clone itself allocates enough to run GCs mid-clone. +/// +/// Optional / non-blocking (#5029) — see `tenured_mutation_stress` above. #[test] +#[ignore = "#5029: nondeterministic GC-corruption stress test; runs in the opt-in gc-stress CI job, not the blocking gate"] fn structured_clone_gc_churn_stress() { let mut fields = String::new(); for i in 0..300 {