From 931e2a20692adf77b865957b01436f40f7b86e93 Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Thu, 25 Jun 2026 13:25:22 +0200 Subject: [PATCH 1/2] ci(vcr-oracle): CI-gate the RV32 immediate-shift-fold execution oracle (#472, #242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VCR-ORACLE-001's deliverable is CI-gating the differential oracles, not just shipping them as dev-time scripts. The RV32 immediate-shift-fold lever (#487, PR landed flag-off behind SYNTH_RV_SHIFT_FOLD) came with a unicorn UC_ARCH_RISCV differential (shift_fold_riscv_differential.py) but it only ran by hand. Since the lever sits flag-off awaiting the on-silicon flip, nothing else exercises the flag-on path — exactly the gap the cmp-select two-move oracle was added to close. Adds an isolated `rv32-shift-fold-oracle` CI job mirroring the existing `cmp-select-oracle` job: build synth, pip-install wasmtime+unicorn+pyelftools in that job ONLY (the main `cargo test` gate is not taxed with the C-library build graph), and run the differential. It executes every fixture function in BOTH flag states under unicorn and asserts bit-identical-to-wasmtime — continuously validating the slli/srli/srai folds, the `& 31` mask on >=32 and negative shift amounts, and the variable-shift non-fold, plus non-vacuity (.text 168B->148B, 5 folds). The differential now honors a SYNTH env override (default release for local dev; CI points it at the debug build for speed, like cmp-select). Frozen-safe: no codegen change, no emitted bytes change — wires an already-written, already-passing oracle into CI. Verified locally with the exact CI invocation (debug binary via SYNTH=./target/debug/synth): ORACLE PASS. ci.yml parses; new job well-formed. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 35 +++++++++++++++++++ .../repro/shift_fold_riscv_differential.py | 4 ++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a524c4..aa0a949 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -268,3 +268,38 @@ jobs: run: pip install wasmtime unicorn pyelftools - name: Run two-move execution oracle run: python scripts/repro/cmp_select_two_move_differential.py + + rv32-shift-fold-oracle: + name: rv32 immediate-shift-fold execution oracle + # VCR-ORACLE-001 (#242, #472): EXECUTE the RV32 immediate-shift-fold lever + # under unicorn (UC_ARCH_RISCV) in BOTH flag states and diff vs wasmtime. The + # lever ships flag-off (SYNTH_RV_SHIFT_FOLD) awaiting the on-silicon flip, so + # nothing else exercises the flag-on path; this continuously validates it + # (slli/srli/srai folds incl. the &31 mask on >=32 and negative amounts, and + # the variable-shift non-fold) against regression — the RV32 analogue of the + # cmp-select oracle above. Isolated job: emulation deps pip-installed here ONLY. + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v7 + - uses: dtolnay/rust-toolchain@stable + - name: Cache Cargo dependencies + uses: actions/cache@v5 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + - name: Build synth + run: cargo build -p synth-cli + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install emulation deps + run: pip install wasmtime unicorn pyelftools + - name: Run RV32 shift-fold execution oracle + env: + SYNTH: ./target/debug/synth + run: python scripts/repro/shift_fold_riscv_differential.py diff --git a/scripts/repro/shift_fold_riscv_differential.py b/scripts/repro/shift_fold_riscv_differential.py index 90bb90f..1f0deb9 100644 --- a/scripts/repro/shift_fold_riscv_differential.py +++ b/scripts/repro/shift_fold_riscv_differential.py @@ -39,7 +39,9 @@ ) WAT = "scripts/repro/shift_fold.wat" -SYNTH = "./target/release/synth" +# Default to the release binary for local dev; CI sets SYNTH=./target/debug/synth +# (it builds `cargo build -p synth-cli`, debug) so the isolated oracle job stays fast. +SYNTH = os.environ.get("SYNTH", "./target/release/synth") CODE, LIN, STK, RET = 0x100000, 0x40000, 0x90000, 0x200000 # (export name, list of argument tuples). Each function returns its result in a0. From a5ceb763e0b9331e0f8c9f2409e7e18995e0f65b Mon Sep 17 00:00:00 2001 From: Ralf Anton Beier Date: Thu, 25 Jun 2026 13:55:29 +0200 Subject: [PATCH 2/2] fix(oracle): read RV32 fixture symbols from the ELF symtab, not `synth disasm` text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI oracle job failed with `SYMBOL MISSING` on the fresh runner while passing locally: the harness scraped function addresses out of `synth disasm` stdout with a regex, and that text is host-dependent (the disasm backend even decodes RISC-V bytes with an ARM decoder, and on the bare runner the symbol-line format differs so the regex matched nothing). Read the addresses straight from the ELF symbol table via pyelftools instead — the same backend-independent approach base_cse_differential.py uses. synth emits the symtab with an empty section name, so it's found by sh_type (SHT_SYMTAB), and addresses are made .text-relative by subtracting sh_addr. Re-verified with the exact CI invocation (debug binary via SYNTH env): ORACLE PASS, 5 folds, all 6 functions matched. Co-Authored-By: Claude Opus 4.8 --- .../repro/shift_fold_riscv_differential.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/scripts/repro/shift_fold_riscv_differential.py b/scripts/repro/shift_fold_riscv_differential.py index 1f0deb9..9b0886b 100644 --- a/scripts/repro/shift_fold_riscv_differential.py +++ b/scripts/repro/shift_fold_riscv_differential.py @@ -23,7 +23,6 @@ Exits nonzero on any mismatch or vacuity failure. """ import os -import re import subprocess import sys @@ -69,11 +68,25 @@ def compile_elf(out, fold): def syms_and_code(elf): - dis = subprocess.run([SYNTH, "disasm", elf], capture_output=True, text=True).stdout - syms = {m.group(2): int(m.group(1), 16) - for m in re.finditer(r'^([0-9a-f]{8}) <(\w+)>:', dis, re.M)} - code = ELFFile(open(elf, "rb")).get_section_by_name(".text").data() - return syms, code + """Return ({export_name: text_relative_offset}, text_bytes). + + Read function addresses straight from the ELF symbol table, NOT by scraping + `synth disasm` text — the disasm backend/format is host-dependent (on a bare + CI runner it decodes RISC-V bytes with the wrong decoder and the symbol-line + regex matches nothing). synth emits the symtab with an EMPTY section name, so + find it by sh_type, and make addresses .text-relative by subtracting the + section's sh_addr (0 for a relocatable object, but subtract defensively). + """ + f = ELFFile(open(elf, "rb")) + text = f.get_section_by_name(".text") + base = text["sh_addr"] + syms = {} + for s in f.iter_sections(): + if s.header.sh_type == "SHT_SYMTAB": + for sym in s.iter_symbols(): + if sym.name: + syms[sym.name] = sym["st_value"] - base + return syms, text.data() def text_len(elf):