From 7753c06d468094803e36d59a61cf8e628b44d12c Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 10:23:33 +0200 Subject: [PATCH 01/23] fix: resolve constant jumpi conditions --- crates/revmc-codegen/src/bytecode/mod.rs | 12 +- .../src/bytecode/passes/block_analysis.rs | 211 +++++++++++++++--- .../src/compiler/translate/mod.rs | 28 ++- 3 files changed, 208 insertions(+), 43 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/mod.rs b/crates/revmc-codegen/src/bytecode/mod.rs index 04af89b81..5eaabb59d 100644 --- a/crates/revmc-codegen/src/bytecode/mod.rs +++ b/crates/revmc-codegen/src/bytecode/mod.rs @@ -962,6 +962,12 @@ impl InstData { self.is_jump() && self.flags.contains(InstFlags::STATIC_JUMP) } + /// Returns `true` if this instruction is a `JUMPI` whose condition is known statically. + #[inline] + pub(crate) fn has_const_jump_condition(&self) -> bool { + self.opcode == op::JUMPI && self.flags.contains(InstFlags::CONST_JUMP_CONDITION) + } + /// Returns `true` if this instruction is a `JUMPDEST`. #[inline] pub(crate) const fn is_jumpdest(&self) -> bool { @@ -1060,7 +1066,7 @@ impl InstData { /// Returns `true` if execution can fall through to the next sequential instruction. #[inline] pub(crate) fn can_fall_through(&self) -> bool { - !self.is_diverging() && self.opcode != op::JUMP + !self.is_diverging() && self.opcode != op::JUMP && !self.has_const_jump_condition() } /// Returns `true` if we know that this instruction will branch or stop execution. @@ -1104,7 +1110,7 @@ impl InstData { bitflags::bitflags! { /// [`InstrData`] flags. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] - pub(crate) struct InstFlags: u8 { + pub(crate) struct InstFlags: u16 { /// The `JUMP`/`JUMPI` target is known at compile time. const STATIC_JUMP = 1 << 0; /// The jump target is known to be invalid. @@ -1127,6 +1133,8 @@ bitflags::bitflags! { const STACK_SECTION_HEAD = 1 << 6; /// Don't generate any code. const DEAD_CODE = 1 << 7; + /// The `JUMPI` condition is known at compile time. + const CONST_JUMP_CONDITION = 1 << 8; } } diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 03f489373..1f2910662 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -293,26 +293,45 @@ enum JumpTarget { /// Not yet observed. Bottom, /// One or more known constant target instruction indices. - Resolved(SmallVec<[Inst; 4]>), + Resolved(SmallVec<[Inst; 4]>, JumpCondition), /// Known constant but invalid target. Invalid, /// Unknown target. Top, } +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +enum JumpCondition { + #[default] + Unknown, + AlwaysTrue, + AlwaysFalse, +} + impl JumpTarget { /// Creates a resolved target with a single constant. fn single(inst: Inst) -> Self { - Self::Resolved(SmallVec::from_elem(inst, 1)) + Self::Resolved(SmallVec::from_elem(inst, 1), JumpCondition::Unknown) } /// Returns the single resolved target, if exactly one. fn as_single(&self) -> Option { match self { - Self::Resolved(targets) if targets.len() == 1 => Some(targets[0]), + Self::Resolved(targets, _) if targets.len() == 1 => Some(targets[0]), _ => None, } } + + fn with_condition(self, condition: JumpCondition) -> Self { + match self { + Self::Resolved(targets, _) => Self::Resolved(targets, condition), + target => target, + } + } + + fn is_resolved(&self) -> bool { + matches!(self, Self::Resolved(_, _) | Self::Invalid) + } } /// CFG for abstract interpretation. @@ -367,9 +386,7 @@ impl Bytecode<'_> { continue; } - let Some(&operand) = self.snapshots.inputs[term_inst].last() else { continue }; - debug_assert!(!matches!(operand, AbsValue::ConstSet(_))); - let target = self.resolve_jump_operand(operand, &empty_sets); + let target = self.resolve_jump_snapshot(term_inst, &empty_sets); let Some(target_inst) = target.as_single() else { continue }; // Log non-adjacent resolutions (not simple PUSH+JUMP). @@ -435,14 +452,19 @@ impl Bytecode<'_> { } match *target { - JumpTarget::Resolved(ref targets) => { - for &target_inst in targets { - debug_assert_eq!( - self.insts[target_inst].opcode, - op::JUMPDEST, - "block_analysis resolved to non-JUMPDEST" - ); - self.insts[target_inst].set_jumpdest_reachable(); + JumpTarget::Resolved(ref targets, condition) => { + if condition != JumpCondition::Unknown { + self.insts[jump_inst].flags |= InstFlags::CONST_JUMP_CONDITION; + } + if condition != JumpCondition::AlwaysFalse { + for &target_inst in targets { + debug_assert_eq!( + self.insts[target_inst].opcode, + op::JUMPDEST, + "block_analysis resolved to non-JUMPDEST" + ); + self.insts[target_inst].set_jumpdest_reachable(); + } } if targets.len() == 1 { self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP; @@ -592,6 +614,7 @@ impl Bytecode<'_> { } else if term.is_static_jump() && !term.flags.contains(InstFlags::INVALID_JUMP) && let Some(target_block) = resolve(term.static_jump_target()) + && !cfg.blocks[bid].succs.contains(&target_block) { cfg.blocks[target_block].preds.push(bid); cfg.blocks[bid].succs.push(target_block); @@ -637,14 +660,7 @@ impl Bytecode<'_> { let mut jump_targets: Vec<(Inst, JumpTarget)> = Vec::new(); let mut has_top_jump = false; for &jump_inst in &jump_insts { - let target = match self.snapshots.inputs[jump_inst].last() { - Some(&operand) => self.resolve_jump_operand(operand, &const_sets), - None => { - // No snapshot means the block was never interpreted (unreachable). - trace!(%jump_inst, pc = self.pc(jump_inst), "jump in unreached block"); - JumpTarget::Bottom - } - }; + let target = self.resolve_jump_snapshot(jump_inst, &const_sets); if matches!(target, JumpTarget::Top) { has_top_jump = true; } @@ -663,14 +679,39 @@ impl Bytecode<'_> { ); } - let count = jump_targets - .iter() - .filter(|(_, t)| matches!(t, JumpTarget::Resolved(_) | JumpTarget::Invalid)) - .count(); + let count = jump_targets.iter().filter(|(_, target)| target.is_resolved()).count(); (jump_targets, count) } + fn resolve_jump_snapshot(&self, jump_inst: Inst, const_sets: &ConstSetInterner) -> JumpTarget { + let snap = &self.snapshots.inputs[jump_inst]; + let condition = if self.insts[jump_inst].opcode == op::JUMPI { + snap.first() + .map(|&value| self.resolve_jump_condition(value, const_sets)) + .unwrap_or_default() + } else { + JumpCondition::Unknown + }; + + if condition == JumpCondition::AlwaysFalse { + return JumpTarget::Resolved( + SmallVec::from_elem(jump_inst + 1, 1), + JumpCondition::AlwaysFalse, + ); + } + + match snap.last() { + Some(&operand) => { + self.resolve_jump_operand(operand, const_sets).with_condition(condition) + } + None => { + trace!(%jump_inst, pc = self.pc(jump_inst), "jump in unreached block"); + JumpTarget::Bottom + } + } + } + /// Resolves a jump target from the snapshot operand recorded during the fixpoint. fn resolve_jump_operand(&self, operand: AbsValue, const_sets: &ConstSetInterner) -> JumpTarget { match operand { @@ -701,7 +742,7 @@ impl Bytecode<'_> { } } if !targets.is_empty() { - JumpTarget::Resolved(targets) + JumpTarget::Resolved(targets, JumpCondition::Unknown) } else { JumpTarget::Invalid } @@ -710,6 +751,45 @@ impl Bytecode<'_> { } } + fn resolve_jump_condition( + &self, + condition: AbsValue, + const_sets: &ConstSetInterner, + ) -> JumpCondition { + let consts = match condition { + AbsValue::Const(imm) => Either::Left(std::iter::once(imm)), + AbsValue::ConstSet(set_idx) => Either::Right(const_sets.get(set_idx).iter().copied()), + AbsValue::Top => return JumpCondition::Unknown, + }; + let interner = self.u256_interner.borrow(); + let mut saw_zero = false; + let mut saw_nonzero = false; + for imm in consts { + if imm.get(&interner).is_zero() { + saw_zero = true; + } else { + saw_nonzero = true; + } + } + match (saw_zero, saw_nonzero) { + (true, false) => JumpCondition::AlwaysFalse, + (false, true) => JumpCondition::AlwaysTrue, + _ => JumpCondition::Unknown, + } + } + + fn is_const_zero(&self, value: AbsValue) -> bool { + value.as_const().is_some_and(|imm| imm.get(&self.u256_interner.borrow()).is_zero()) + } + + fn local_jumpi_condition_is_zero(&self, jump_inst: Inst, local_snapshots: &Snapshots) -> bool { + self.insts[jump_inst].opcode == op::JUMPI + && local_snapshots.inputs[jump_inst] + .first() + .copied() + .is_some_and(|value| self.is_const_zero(value)) + } + /// Adds discovered dynamic-jump target edges for a block. fn discover_jump_edges( &self, @@ -788,7 +868,12 @@ impl Bytecode<'_> { } for (inst, target) in jump_targets.iter_mut() { - if !matches!(target, JumpTarget::Resolved(_) | JumpTarget::Invalid) { + if matches!(target, JumpTarget::Resolved(_, JumpCondition::AlwaysFalse)) + && self.local_jumpi_condition_is_zero(*inst, local_snapshots) + { + continue; + } + if !target.is_resolved() { continue; } if let Some(bid) = self.cfg.inst_to_block[*inst] @@ -1637,6 +1722,11 @@ mod tests_edge_cases { ", ); // The JUMPI should be resolved as static. + let (jump_inst, jump) = + bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + assert!(jump.flags.contains(InstFlags::STATIC_JUMP)); + assert_eq!(bytecode.inst(jump.static_jump_target()).opcode, op::JUMPDEST); + assert_ne!(jump.static_jump_target(), jump_inst + 1); assert!(!bytecode.has_dynamic_jumps); } @@ -1706,6 +1796,67 @@ mod tests_edge_cases { assert!(jump_inst.is_some(), "expected an invalid jump"); } + #[test] + fn jumpi_with_zero_condition_ignores_unknown_target() { + let bytecode = analyze_asm( + " + PUSH0 + CALLDATASIZE + JUMPI + STOP + target: + JUMPDEST + STOP + ", + ); + + let (_, jump) = bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + assert!(jump.flags.contains(InstFlags::STATIC_JUMP)); + assert!(!jump.flags.contains(InstFlags::INVALID_JUMP)); + assert_eq!(jump.static_jump_target(), Inst::from_usize(3)); + assert!(!bytecode.has_dynamic_jumps); + } + + #[test] + fn jumpi_with_zero_condition_ignores_valid_target() { + let bytecode = analyze_asm( + " + PUSH0 + PUSH %target + JUMPI + STOP + target: + JUMPDEST + STOP + ", + ); + + let (_, jump) = bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + assert!(jump.flags.contains(InstFlags::STATIC_JUMP)); + assert!(!jump.flags.contains(InstFlags::INVALID_JUMP)); + assert_eq!(jump.static_jump_target(), Inst::from_usize(3)); + assert!(!bytecode.has_dynamic_jumps); + } + + #[test] + fn jumpi_with_true_condition_keeps_unknown_target_dynamic() { + let bytecode = analyze_asm( + " + PUSH1 0x01 + CALLDATASIZE + JUMPI + STOP + target: + JUMPDEST + STOP + ", + ); + + let (_, jump) = bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + assert!(!jump.flags.contains(InstFlags::STATIC_JUMP)); + assert!(bytecode.has_dynamic_jumps); + } + /// Constant propagation through a diamond CFG (if-then-else merge). /// Both branches push the same constant, so the merge should preserve it. #[test] @@ -1713,7 +1864,7 @@ mod tests_edge_cases { let bytecode = analyze_asm( " PUSH1 0x42 ; push same const on both paths - PUSH0 ; condition (always false) + CALLDATASIZE ; condition PUSH %then_pc ; then target JUMPI ; branch ; Else: push same const. @@ -1744,7 +1895,7 @@ mod tests_edge_cases { fn diamond_cfg_different_const() { let bytecode = analyze_asm( " - PUSH0 ; condition + CALLDATASIZE ; condition PUSH %then_pc JUMPI ; branch ; Else: push 0xAA. diff --git a/crates/revmc-codegen/src/compiler/translate/mod.rs b/crates/revmc-codegen/src/compiler/translate/mod.rs index 9d9228684..bf4da0b45 100644 --- a/crates/revmc-codegen/src/compiler/translate/mod.rs +++ b/crates/revmc-codegen/src/compiler/translate/mod.rs @@ -965,11 +965,13 @@ impl<'a, B: Backend> FunctionCx<'a, B> { if opcode == op::JUMPI { let cond_word = self.pop(); self.materialize_live_stack(); - let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); - let next = self.inst_entries[inst + 1]; - let switch_block = self.bcx.create_block("multi_jump"); - self.bcx.brif(cond, switch_block, next); - self.bcx.switch_to_block(switch_block); + if !data.has_const_jump_condition() { + let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); + let next = self.inst_entries[inst + 1]; + let switch_block = self.bcx.create_block("multi_jump"); + self.bcx.brif(cond, switch_block, next); + self.bcx.switch_to_block(switch_block); + } } else { self.materialize_live_stack(); } @@ -990,9 +992,9 @@ impl<'a, B: Backend> FunctionCx<'a, B> { // Pop and discard the target; it's always on the stack. self.pop_ignore(1); let target_inst = data.static_jump_target(); - debug_assert_eq!( - *self.bytecode.inst(target_inst), - op::JUMPDEST, + debug_assert!( + *self.bytecode.inst(target_inst) == op::JUMPDEST + || (opcode == op::JUMPI && target_inst == inst + 1), "jumping to non-JUMPDEST; target_inst={target_inst}", ); self.inst_entries[target_inst] @@ -1010,9 +1012,13 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let cond_word = self.pop(); // Flush virtual values before leaving the section. self.materialize_live_stack(); - let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); - let next = self.inst_entries[inst + 1]; - self.bcx.brif(cond, target, next); + if data.has_const_jump_condition() { + self.bcx.br(target); + } else { + let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); + let next = self.inst_entries[inst + 1]; + self.bcx.brif(cond, target, next); + } } else { // Flush virtual values before leaving the section. self.materialize_live_stack(); From df8ebc50ab9df29787b88b76b1b4683a5b4eb1d0 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 10:28:40 +0200 Subject: [PATCH 02/23] fix: keep constant jumpi successors live --- crates/revmc-codegen/src/bytecode/fmt.rs | 32 +++++++++---------- crates/revmc-codegen/src/bytecode/mod.rs | 7 ++++ .../src/bytecode/passes/memory_sections.rs | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/fmt.rs b/crates/revmc-codegen/src/bytecode/fmt.rs index 58f4cfd63..9ac22b571 100644 --- a/crates/revmc-codegen/src/bytecode/fmt.rs +++ b/crates/revmc-codegen/src/bytecode/fmt.rs @@ -538,7 +538,7 @@ mod tests { op::PUSH1, 0x01, op::PUSH1, 0x00, op::SSTORE, - op::PUSH1, 0x01, + op::CALLDATASIZE, op::PUSH1, 0x03, op::JUMPI, op::PUSH1, 0x00, @@ -565,7 +565,7 @@ mod tests { actual, snapbox::str![[r#" ; spec_id=Osaka has_dynamic_jumps=false may_suspend=true -; insts=19 live=19 dead=0 noops=11 suspends=1 blocks=3 block_min=2 block_max=10 block_avg=6.3 block_median=7 +; insts=19 live=19 dead=0 noops=10 suspends=1 blocks=3 block_min=2 block_max=10 block_avg=6.3 block_median=7 bb0: ; stack_in=0 max_growth=1 predecessors= PUSH1 0x03 ; ic= 0 pc= 0 gas=11 noop @@ -576,21 +576,21 @@ bb1: ; stack_in=0 max_growth=2 predecessors=bb0,bb1 PUSH1 0x01 ; ic= 3 pc= 4 noop PUSH1 0x00 ; ic= 4 pc= 6 noop SSTORE ; ic= 5 pc= 8 - PUSH1 0x01 ; ic= 6 pc= 9 gas=16 noop - PUSH1 0x03 ; ic= 7 pc=11 noop - JUMPI %bb1 ; ic= 8 pc=13 + CALLDATASIZE ; ic= 6 pc= 9 gas=15 + PUSH1 0x03 ; ic= 7 pc=10 noop + JUMPI %bb1 ; ic= 8 pc=12 bb2: ; stack_in=0 max_growth=7 predecessors=bb1 - PUSH1 0x00 ; ic= 9 pc=14 gas=121 - PUSH1 0x00 ; ic=10 pc=16 noop - PUSH1 0x00 ; ic=11 pc=18 noop - PUSH1 0x00 ; ic=12 pc=20 noop - PUSH1 0x00 ; ic=13 pc=22 noop - PUSH1 0x42 ; ic=14 pc=24 noop - PUSH2 0xffff ; ic=15 pc=26 noop - CALL ; ic=16 pc=29 suspends - POP ; ic=17 pc=30 gas=2 stack_in=1 max_growth=0 - STOP ; ic=18 pc=31 + PUSH1 0x00 ; ic= 9 pc=13 gas=121 + PUSH1 0x00 ; ic=10 pc=15 noop + PUSH1 0x00 ; ic=11 pc=17 noop + PUSH1 0x00 ; ic=12 pc=19 noop + PUSH1 0x00 ; ic=13 pc=21 noop + PUSH1 0x42 ; ic=14 pc=23 noop + PUSH2 0xffff ; ic=15 pc=25 noop + CALL ; ic=16 pc=28 suspends + POP ; ic=17 pc=29 gas=2 stack_in=1 max_growth=0 + STOP ; ic=18 pc=30 "#]] ); @@ -637,7 +637,7 @@ bb2: ; stack_in=0 max_growth=7 predecessors=bb1 assert!(dot.contains("SSTORE"), "missing SSTORE"); // SSTORE splits gas sections: two [g=] annotations in bb1. assert!(dot.contains("[g=7]"), "missing first gas section"); - assert!(dot.contains("[g=16]"), "missing second gas section"); + assert!(dot.contains("[g=15]"), "missing second gas section"); // CALL present in bb2. assert!(dot.contains("CALL"), "missing CALL"); assert!(dot.contains("[g=121]"), "missing CALL gas section"); diff --git a/crates/revmc-codegen/src/bytecode/mod.rs b/crates/revmc-codegen/src/bytecode/mod.rs index 5eaabb59d..239d7854a 100644 --- a/crates/revmc-codegen/src/bytecode/mod.rs +++ b/crates/revmc-codegen/src/bytecode/mod.rs @@ -476,10 +476,17 @@ impl<'a> Bytecode<'a> { let mut iter = self.insts.iter_mut_enumerated(); while let Some((i, data)) = iter.next() { if !data.can_fall_through() { + let static_target = data + .is_static_jump() + .then(|| data.static_jump_target()) + .filter(|&target| target > i); let mut end = i; let mut any_new = false; for (j, data) in &mut iter { end = j; + if static_target == Some(j) { + break; + } if data.is_reachable_jumpdest(self.has_dynamic_jumps) { break; } diff --git a/crates/revmc-codegen/src/bytecode/passes/memory_sections.rs b/crates/revmc-codegen/src/bytecode/passes/memory_sections.rs index e768bae87..ec00255c6 100644 --- a/crates/revmc-codegen/src/bytecode/passes/memory_sections.rs +++ b/crates/revmc-codegen/src/bytecode/passes/memory_sections.rs @@ -432,7 +432,7 @@ mod tests { PUSH0 PUSH 64 MSTORE - PUSH 1 + CALLDATASIZE PUSH %large JUMPI PUSH %join From 19529fb62c3f69f9fd7b479d0c1541fcf6464413 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 11:36:57 +0200 Subject: [PATCH 03/23] fix: resolve constant jumpi cfg --- crates/revmc-codegen/src/bytecode/mod.rs | 24 +- .../src/bytecode/passes/block_analysis.rs | 295 +++++++++++++++--- .../src/compiler/translate/mod.rs | 25 +- 3 files changed, 281 insertions(+), 63 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/mod.rs b/crates/revmc-codegen/src/bytecode/mod.rs index 239d7854a..036f9fa7b 100644 --- a/crates/revmc-codegen/src/bytecode/mod.rs +++ b/crates/revmc-codegen/src/bytecode/mod.rs @@ -971,7 +971,7 @@ impl InstData { /// Returns `true` if this instruction is a `JUMPI` whose condition is known statically. #[inline] - pub(crate) fn has_const_jump_condition(&self) -> bool { + pub(crate) fn has_const_jumpi_condition(&self) -> bool { self.opcode == op::JUMPI && self.flags.contains(InstFlags::CONST_JUMP_CONDITION) } @@ -1036,6 +1036,12 @@ impl InstData { self.data |= Self::JUMPDEST_REACHABLE; } + #[inline] + pub(crate) fn clear_jumpdest_reachable(&mut self) { + debug_assert_eq!(self.opcode, op::JUMPDEST); + self.data &= !Self::JUMPDEST_REACHABLE; + } + /// Returns the static target of a `JUMP`/`JUMPI` with [`InstFlags::STATIC_JUMP`]. #[inline] pub(crate) fn static_jump_target(&self) -> Inst { @@ -1073,7 +1079,7 @@ impl InstData { /// Returns `true` if execution can fall through to the next sequential instruction. #[inline] pub(crate) fn can_fall_through(&self) -> bool { - !self.is_diverging() && self.opcode != op::JUMP && !self.has_const_jump_condition() + !self.is_diverging() && self.opcode != op::JUMP && !self.has_const_jumpi_condition() } /// Returns `true` if we know that this instruction will branch or stop execution. @@ -1126,22 +1132,22 @@ bitflags::bitflags! { /// The jump has multiple known targets (see `Bytecode::multi_jump_targets`). /// The target value is still on the stack and must be popped and switched on at runtime. const MULTI_JUMP = 1 << 2; + /// The `JUMPI` condition is known at compile time. + const CONST_JUMP_CONDITION = 1 << 3; /// The instruction is disabled in this EVM version. /// Always returns [`InstructionResult::NotActivated`] at runtime. - const DISABLED = 1 << 3; + const DISABLED = 1 << 4; /// The instruction is unknown. /// Always returns [`InstructionResult::NotFound`] at runtime. - const UNKNOWN = 1 << 4; + const UNKNOWN = 1 << 5; /// Instruction is a no-op: skip generating logic, but keep the gas calculation. - const NOOP = 1 << 5; + const NOOP = 1 << 6; /// This instruction starts a new stack section. - const STACK_SECTION_HEAD = 1 << 6; + const STACK_SECTION_HEAD = 1 << 7; /// Don't generate any code. - const DEAD_CODE = 1 << 7; - /// The `JUMPI` condition is known at compile time. - const CONST_JUMP_CONDITION = 1 << 8; + const DEAD_CODE = 1 << 8; } } diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 1f2910662..455c7ebf6 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -289,11 +289,18 @@ impl BlockData { /// Resolved jump target after fixpoint. #[derive(Clone, Debug)] -enum JumpTarget { +struct JumpTarget { + target: JumpTargetKind, + condition: JumpCondition, +} + +/// Resolved jump target kind after fixpoint. +#[derive(Clone, Debug)] +enum JumpTargetKind { /// Not yet observed. Bottom, /// One or more known constant target instruction indices. - Resolved(SmallVec<[Inst; 4]>, JumpCondition), + Resolved(SmallVec<[Inst; 4]>), /// Known constant but invalid target. Invalid, /// Unknown target. @@ -309,28 +316,50 @@ enum JumpCondition { } impl JumpTarget { + fn new(target: JumpTargetKind) -> Self { + Self { target, condition: JumpCondition::Unknown } + } + + fn bottom() -> Self { + Self::new(JumpTargetKind::Bottom) + } + + fn invalid() -> Self { + Self::new(JumpTargetKind::Invalid) + } + + fn top() -> Self { + Self::new(JumpTargetKind::Top) + } + + fn resolved(targets: SmallVec<[Inst; 4]>) -> Self { + Self::new(JumpTargetKind::Resolved(targets)) + } + /// Creates a resolved target with a single constant. fn single(inst: Inst) -> Self { - Self::Resolved(SmallVec::from_elem(inst, 1), JumpCondition::Unknown) + Self::resolved(SmallVec::from_elem(inst, 1)) } /// Returns the single resolved target, if exactly one. fn as_single(&self) -> Option { - match self { - Self::Resolved(targets, _) if targets.len() == 1 => Some(targets[0]), + match &self.target { + JumpTargetKind::Resolved(targets) if targets.len() == 1 => Some(targets[0]), _ => None, } } - fn with_condition(self, condition: JumpCondition) -> Self { - match self { - Self::Resolved(targets, _) => Self::Resolved(targets, condition), - target => target, - } + fn with_condition(mut self, condition: JumpCondition) -> Self { + self.condition = condition; + self + } + + fn is_top(&self) -> bool { + matches!(self.target, JumpTargetKind::Top) } fn is_resolved(&self) -> bool { - matches!(self, Self::Resolved(_, _) | Self::Invalid) + matches!(self.target, JumpTargetKind::Resolved(_) | JumpTargetKind::Invalid) } } @@ -419,7 +448,9 @@ impl Bytecode<'_> { self.init_snapshots(); let (resolved, count) = self.run_abstract_interp(local_snapshots); - if count > 0 { + let has_const_condition = + resolved.iter().any(|(_, target)| target.condition != JumpCondition::Unknown); + if count > 0 || has_const_condition { let newly_resolved = self.commit_resolved_jumps(&resolved); debug!(newly_resolved, "resolved jumps"); } @@ -442,21 +473,25 @@ impl Bytecode<'_> { /// /// Returns the number of newly resolved jumps. fn commit_resolved_jumps(&mut self, resolved: &[(Inst, JumpTarget)]) -> u32 { - let has_top_jump = resolved.iter().any(|(_, t)| matches!(t, JumpTarget::Top)); + let has_top_jump = resolved.iter().any(|(_, target)| target.is_top()); let mut newly_resolved = 0u32; for &(jump_inst, ref target) in resolved { - // Skip if already resolved by block_analysis_local. - if self.insts[jump_inst].flags.contains(InstFlags::STATIC_JUMP) { + let was_static = self.insts[jump_inst].flags.contains(InstFlags::STATIC_JUMP); + if was_static && target.condition == JumpCondition::Unknown { continue; } - match *target { - JumpTarget::Resolved(ref targets, condition) => { - if condition != JumpCondition::Unknown { - self.insts[jump_inst].flags |= InstFlags::CONST_JUMP_CONDITION; - } - if condition != JumpCondition::AlwaysFalse { + if target.condition != JumpCondition::Unknown { + self.insts[jump_inst].flags |= InstFlags::CONST_JUMP_CONDITION; + } + + match &target.target { + JumpTargetKind::Resolved(targets) => { + self.insts[jump_inst] + .flags + .remove(InstFlags::INVALID_JUMP | InstFlags::MULTI_JUMP); + if target.condition != JumpCondition::AlwaysFalse { for &target_inst in targets { debug_assert_eq!( self.insts[target_inst].opcode, @@ -467,6 +502,7 @@ impl Bytecode<'_> { } } if targets.len() == 1 { + self.multi_jump_targets.remove(&jump_inst); self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP; self.insts[jump_inst].set_static_jump_target(targets[0]); trace!(%jump_inst, target_inst = %targets[0], "resolved jump"); @@ -476,34 +512,76 @@ impl Bytecode<'_> { self.multi_jump_targets.insert(jump_inst, targets.clone()); trace!(%jump_inst, n_targets = targets.len(), "resolved multi-target jump"); } - newly_resolved += 1; + if !was_static { + newly_resolved += 1; + } } - JumpTarget::Invalid => { + JumpTargetKind::Invalid => { + self.multi_jump_targets.remove(&jump_inst); self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP | InstFlags::INVALID_JUMP; - newly_resolved += 1; + self.insts[jump_inst].flags.remove(InstFlags::MULTI_JUMP); + if !was_static { + newly_resolved += 1; + } trace!(%jump_inst, "resolved invalid jump"); } - JumpTarget::Bottom if !has_top_jump => { + JumpTargetKind::Bottom if !has_top_jump => { // Truly unreachable: no unresolved jumps remain, so this // code cannot be reached at runtime. Mark as invalid. + self.multi_jump_targets.remove(&jump_inst); self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP | InstFlags::INVALID_JUMP; - newly_resolved += 1; + self.insts[jump_inst].flags.remove(InstFlags::MULTI_JUMP); + if !was_static { + newly_resolved += 1; + } trace!(%jump_inst, "unreachable jump"); } - JumpTarget::Bottom => { + JumpTargetKind::Bottom => { // Unreachable according to the analysis, but there are // unresolved (Top) jumps that might reach this code at // runtime. Leave as-is. trace!(%jump_inst, "unreachable jump (not marking, has_top_jump)"); } - JumpTarget::Top => { + JumpTargetKind::Top => { trace!(%jump_inst, "unresolved jump (Top)"); } } } + self.recompute_reachable_jumpdests(); newly_resolved } + fn recompute_reachable_jumpdests(&mut self) { + for data in &mut self.insts.raw { + if data.opcode == op::JUMPDEST { + data.clear_jumpdest_reachable(); + } + } + + let mut targets = Vec::new(); + for (inst, data) in self.insts.iter_enumerated() { + if !data.is_static_jump() + || data.flags.contains(InstFlags::INVALID_JUMP) + || data.is_dead_code() + { + continue; + } + if data.flags.contains(InstFlags::MULTI_JUMP) { + if let Some(multi_targets) = self.multi_jump_targets.get(&inst) { + targets.extend(multi_targets.iter().copied()); + } + } else { + targets.push(data.static_jump_target()); + } + } + + for target in targets { + if self.insts[target].opcode == op::JUMPDEST { + self.insts[target].set_jumpdest_reachable(); + } + } + } + /// Rebuild the basic-block CFG from the current instruction state. #[instrument(level = "debug", skip_all)] pub(crate) fn rebuild_cfg(&mut self) { @@ -639,10 +717,14 @@ impl Bytecode<'_> { index_vec![BlockState::Bottom; num_blocks]; block_states[Block::from_usize(0)] = BlockState::Known(Vec::new()); - // Collect unresolved jumps. + // Collect unresolved jumps, plus static JUMPI instructions whose condition + // may become constant only after CFG fixpoint. let mut jump_insts: Vec = Vec::new(); for (i, inst) in self.insts.iter_enumerated() { - if inst.is_jump() && !inst.flags.contains(InstFlags::STATIC_JUMP) { + if inst.is_jump() + && (!inst.flags.contains(InstFlags::STATIC_JUMP) + || (inst.opcode == op::JUMPI && !inst.has_const_jumpi_condition())) + { jump_insts.push(i); } } @@ -661,7 +743,7 @@ impl Bytecode<'_> { let mut has_top_jump = false; for &jump_inst in &jump_insts { let target = self.resolve_jump_snapshot(jump_inst, &const_sets); - if matches!(target, JumpTarget::Top) { + if target.is_top() { has_top_jump = true; } jump_targets.push((jump_inst, target)); @@ -695,10 +777,7 @@ impl Bytecode<'_> { }; if condition == JumpCondition::AlwaysFalse { - return JumpTarget::Resolved( - SmallVec::from_elem(jump_inst + 1, 1), - JumpCondition::AlwaysFalse, - ); + return JumpTarget::single(jump_inst + 1).with_condition(JumpCondition::AlwaysFalse); } match snap.last() { @@ -707,7 +786,7 @@ impl Bytecode<'_> { } None => { trace!(%jump_inst, pc = self.pc(jump_inst), "jump in unreached block"); - JumpTarget::Bottom + JumpTarget::bottom() } } } @@ -721,7 +800,7 @@ impl Bytecode<'_> { Ok(target_pc) if self.is_valid_jump(target_pc) => { JumpTarget::single(self.pc_to_inst(target_pc)) } - _ => JumpTarget::Invalid, + _ => JumpTarget::invalid(), } } AbsValue::ConstSet(set_idx) => { @@ -737,17 +816,17 @@ impl Bytecode<'_> { _ => { // Mixed valid + invalid: can't resolve since at runtime // the value might be any member of the set. - return JumpTarget::Top; + return JumpTarget::top(); } } } if !targets.is_empty() { - JumpTarget::Resolved(targets, JumpCondition::Unknown) + JumpTarget::resolved(targets) } else { - JumpTarget::Invalid + JumpTarget::invalid() } } - AbsValue::Top => JumpTarget::Top, + AbsValue::Top => JumpTarget::top(), } } @@ -868,7 +947,8 @@ impl Bytecode<'_> { } for (inst, target) in jump_targets.iter_mut() { - if matches!(target, JumpTarget::Resolved(_, JumpCondition::AlwaysFalse)) + if matches!(target.target, JumpTargetKind::Resolved(_)) + && target.condition == JumpCondition::AlwaysFalse && self.local_jumpi_condition_is_zero(*inst, local_snapshots) { continue; @@ -879,7 +959,7 @@ impl Bytecode<'_> { if let Some(bid) = self.cfg.inst_to_block[*inst] && suspect[bid.index()] { - *target = JumpTarget::Top; + *target = JumpTarget::top(); } } @@ -1838,6 +1918,133 @@ mod tests_edge_cases { assert!(!bytecode.has_dynamic_jumps); } + #[test] + fn jumpi_with_zero_condition_cfg_only_falls_through() { + let bytecode = analyze_asm( + " + PUSH0 + PUSH %target + JUMPI + PUSH1 0xAA + STOP + target: + JUMPDEST + STOP + ", + ); + + let (jump_inst, jump) = + bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + let jump_block = bytecode.cfg.inst_to_block[jump_inst].unwrap(); + let fallthrough_block = bytecode.cfg.inst_to_block[jump_inst + 1].unwrap(); + let target_inst = bytecode + .iter_all_insts() + .rev() + .find(|(_, data)| data.opcode == op::JUMPDEST) + .unwrap() + .0; + + assert!(jump.has_const_jumpi_condition()); + assert_eq!(jump.static_jump_target(), jump_inst + 1); + assert_eq!(bytecode.cfg.blocks[jump_block].succs.as_slice(), &[fallthrough_block]); + assert!( + bytecode.cfg.inst_to_block[target_inst].is_none(), + "never-taken JUMPI target should be dead" + ); + } + + #[test] + fn jumpi_with_zero_condition_from_predecessor_rewrites_local_static_cfg() { + let bytecode = analyze_asm( + " + PUSH0 + PUSH %branch + JUMP + branch: + JUMPDEST + PUSH %target + JUMPI + PUSH1 0xAA + STOP + target: + JUMPDEST + STOP + ", + ); + + let (jump_inst, jump) = + bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + let jump_block = bytecode.cfg.inst_to_block[jump_inst].unwrap(); + let fallthrough_block = bytecode.cfg.inst_to_block[jump_inst + 1].unwrap(); + let target_inst = bytecode + .iter_all_insts() + .rev() + .find(|(_, data)| data.opcode == op::JUMPDEST) + .unwrap() + .0; + + assert!(jump.has_const_jumpi_condition()); + assert_eq!(jump.static_jump_target(), jump_inst + 1); + assert_eq!(bytecode.cfg.blocks[jump_block].succs.as_slice(), &[fallthrough_block]); + assert!( + bytecode.cfg.inst_to_block[target_inst].is_none(), + "locally resolved but never-taken JUMPI target should be dead" + ); + } + + #[test] + fn jumpi_with_true_condition_cfg_only_jumps_to_target() { + let bytecode = analyze_asm( + " + PUSH1 0x01 + PUSH %target + JUMPI + PUSH1 0xAA + STOP + target: + JUMPDEST + STOP + ", + ); + + let (jump_inst, jump) = + bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + let jump_block = bytecode.cfg.inst_to_block[jump_inst].unwrap(); + let target_block = bytecode.cfg.inst_to_block[jump.static_jump_target()].unwrap(); + + assert!(jump.has_const_jumpi_condition()); + assert_ne!(jump.static_jump_target(), jump_inst + 1); + assert_eq!(bytecode.cfg.blocks[jump_block].succs.as_slice(), &[target_block]); + assert!( + bytecode.cfg.inst_to_block[jump_inst + 1].is_none(), + "always-taken JUMPI fallthrough should be dead" + ); + } + + #[test] + fn jumpi_with_true_condition_invalid_target_has_no_fallthrough() { + let bytecode = analyze_asm( + " + PUSH1 0x01 + PUSH1 0xFF + JUMPI + STOP + ", + ); + + let (jump_inst, jump) = + bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); + let jump_block = bytecode.cfg.inst_to_block[jump_inst].unwrap(); + + assert!(jump.has_const_jumpi_condition()); + assert!(jump.flags.contains(InstFlags::STATIC_JUMP | InstFlags::INVALID_JUMP)); + assert!(bytecode.cfg.blocks[jump_block].succs.is_empty()); + assert!( + bytecode.cfg.inst_to_block[jump_inst + 1].is_none(), + "always-taken invalid JUMPI fallthrough should be dead" + ); + } + #[test] fn jumpi_with_true_condition_keeps_unknown_target_dynamic() { let bytecode = analyze_asm( @@ -1854,6 +2061,8 @@ mod tests_edge_cases { let (_, jump) = bytecode.iter_insts().find(|(_, data)| data.opcode == op::JUMPI).unwrap(); assert!(!jump.flags.contains(InstFlags::STATIC_JUMP)); + assert!(jump.has_const_jumpi_condition()); + assert!(!jump.can_fall_through()); assert!(bytecode.has_dynamic_jumps); } diff --git a/crates/revmc-codegen/src/compiler/translate/mod.rs b/crates/revmc-codegen/src/compiler/translate/mod.rs index bf4da0b45..3ed955310 100644 --- a/crates/revmc-codegen/src/compiler/translate/mod.rs +++ b/crates/revmc-codegen/src/compiler/translate/mod.rs @@ -948,6 +948,7 @@ impl<'a, B: Backend> FunctionCx<'a, B> { } op::JUMP | op::JUMPI => { let is_invalid = data.flags.contains(InstFlags::INVALID_JUMP); + let has_const_jumpi_condition = data.has_const_jumpi_condition(); if is_invalid && opcode == op::JUMP { // Pop and discard the target; it's always on the stack. self.pop_ignore(1); @@ -963,9 +964,12 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let targets = self.bytecode.multi_jump_targets(inst).unwrap(); if opcode == op::JUMPI { - let cond_word = self.pop(); - self.materialize_live_stack(); - if !data.has_const_jump_condition() { + if has_const_jumpi_condition { + self.pop_ignore(1); + self.materialize_live_stack(); + } else { + let cond_word = self.pop(); + self.materialize_live_stack(); let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); let next = self.inst_entries[inst + 1]; let switch_block = self.bcx.create_block("multi_jump"); @@ -1008,18 +1012,17 @@ impl<'a, B: Backend> FunctionCx<'a, B> { self.dynamic_jump_table }; - if opcode == op::JUMPI { + if opcode == op::JUMPI && !has_const_jumpi_condition { let cond_word = self.pop(); // Flush virtual values before leaving the section. self.materialize_live_stack(); - if data.has_const_jump_condition() { - self.bcx.br(target); - } else { - let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); - let next = self.inst_entries[inst + 1]; - self.bcx.brif(cond, target, next); - } + let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); + let next = self.inst_entries[inst + 1]; + self.bcx.brif(cond, target, next); } else { + if opcode == op::JUMPI { + self.pop_ignore(1); + } // Flush virtual values before leaving the section. self.materialize_live_stack(); self.bcx.br(target); From 774c4d64401a173bfc213815f858ebfd69e9784f Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 07:52:36 +0200 Subject: [PATCH 04/23] feat: add context-sensitive jump analysis --- .../src/bytecode/passes/block_analysis.rs | 339 +++++++++++------- 1 file changed, 218 insertions(+), 121 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 455c7ebf6..4f9ea2227 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -166,17 +166,25 @@ impl ConstSetInterner { /// Instructions that access deeper than this (e.g. Amsterdam DUPN/SWAPN up to depth 236) /// treat the out-of-range slot as `Top` rather than aborting block interpretation. const MAX_ABS_STACK_DEPTH: usize = 64; +const MAX_BLOCK_CONTEXTS: usize = 2; /// Abstract state at the entry of a block. #[derive(Clone, Debug)] enum BlockState { /// Block has not been reached yet. Bottom, - /// Block has been reached with a known stack state (top-aligned). - Known(Vec), + /// Block has been reached with one or more known stack states. + Known(SmallVec<[Vec; 2]>), } impl BlockState { + fn states(&self) -> &[Vec] { + match self { + Self::Bottom => &[], + Self::Known(states) => states, + } + } + /// Join another incoming state into this one. Returns `true` if the state changed. /// /// When stack heights differ, the stacks are top-aligned and the shorter one is @@ -184,47 +192,70 @@ impl BlockState { /// the "no selector match" fallthrough leaves an extra item on the stack that the /// fallback ignores. fn join(&mut self, incoming: &[AbsValue], sets: &mut ConstSetInterner) -> bool { + fn clamp_state(incoming: &[AbsValue]) -> Vec { + let start = incoming.len().saturating_sub(MAX_ABS_STACK_DEPTH); + incoming[start..].to_vec() + } + + fn join_state( + existing: &mut Vec, + incoming: &[AbsValue], + sets: &mut ConstSetInterner, + ) -> bool { + let new_len = existing.len().max(incoming.len()); + let mut changed = false; + + // Clamp to MAX_ABS_STACK_DEPTH by only joining the top portion; + // elements below that are unreachable by any EVM instruction and + // discarding them preserves soundness. + let join_len = new_len.min(MAX_ABS_STACK_DEPTH); + + // Resize existing to join_len: pad at bottom with Top or truncate. + if existing.len() < join_len { + let pad = join_len - existing.len(); + existing.splice(0..0, std::iter::repeat_n(AbsValue::Top, pad)); + changed = true; + } else if existing.len() > join_len { + existing.drain(..existing.len() - join_len); + changed = true; + } + + // Join element-wise, top-aligned. Both stacks have their top at the end. + // `incoming` may be longer than join_len — we only look at its top portion. + let incoming_start = incoming.len().saturating_sub(join_len); + let incoming_top = &incoming[incoming_start..]; + // incoming_top.len() <= join_len. If shorter, bottom positions get Top. + let pad = join_len - incoming_top.len(); + for i in 0..join_len { + let inc = if i < pad { AbsValue::Top } else { incoming_top[i - pad] }; + let joined = sets.join(existing[i], inc); + if joined != existing[i] { + existing[i] = joined; + changed = true; + } + } + + changed + } + match self { Self::Bottom => { - let start = incoming.len().saturating_sub(MAX_ABS_STACK_DEPTH); - *self = Self::Known(incoming[start..].to_vec()); + *self = Self::Known(SmallVec::from_elem(clamp_state(incoming), 1)); true } - Self::Known(existing) => { - let new_len = existing.len().max(incoming.len()); - let mut changed = false; - - // Clamp to MAX_ABS_STACK_DEPTH by only joining the top portion; - // elements below that are unreachable by any EVM instruction and - // discarding them preserves soundness. - let join_len = new_len.min(MAX_ABS_STACK_DEPTH); - - // Resize existing to join_len: pad at bottom with Top or truncate. - if existing.len() < join_len { - let pad = join_len - existing.len(); - existing.splice(0..0, std::iter::repeat_n(AbsValue::Top, pad)); - changed = true; - } else if existing.len() > join_len { - existing.drain(..existing.len() - join_len); - changed = true; + Self::Known(states) => { + if let Some(existing) = + states.iter_mut().find(|state| state.len() == incoming.len()) + { + return join_state(existing, incoming, sets); } - // Join element-wise, top-aligned. Both stacks have their top at the end. - // `incoming` may be longer than join_len — we only look at its top portion. - let incoming_start = incoming.len().saturating_sub(join_len); - let incoming_top = &incoming[incoming_start..]; - // incoming_top.len() <= join_len. If shorter, bottom positions get Top. - let pad = join_len - incoming_top.len(); - for i in 0..join_len { - let inc = if i < pad { AbsValue::Top } else { incoming_top[i - pad] }; - let joined = sets.join(existing[i], inc); - if joined != existing[i] { - existing[i] = joined; - changed = true; - } + if states.len() < MAX_BLOCK_CONTEXTS { + states.push(clamp_state(incoming)); + return true; } - changed + join_state(&mut states[0], incoming, sets) } } } @@ -372,11 +403,45 @@ pub(crate) struct Cfg { } impl Bytecode<'_> { + fn record_input_snapshot( + &mut self, + inst: Inst, + operands: &[AbsValue], + const_sets: &mut ConstSetInterner, + ) { + let snap = &mut self.snapshots.inputs[inst]; + if snap.is_empty() { + snap.extend_from_slice(operands); + return; + } + + debug_assert_eq!(snap.len(), operands.len()); + for (slot, &operand) in snap.iter_mut().zip(operands) { + *slot = const_sets.join(*slot, operand); + } + } + + fn record_output_snapshot( + &mut self, + inst: Inst, + value: AbsValue, + const_sets: &mut ConstSetInterner, + ) { + match &mut self.snapshots.outputs[inst] { + Some(existing) => *existing = const_sets.join(*existing, value), + slot @ None => *slot = Some(value), + } + } + /// Ensures `self.snapshots` is sized for the current instruction count. fn init_snapshots(&mut self) { let n = self.insts.len(); self.snapshots.inputs.resize(n, SmallVec::new()); self.snapshots.outputs.resize(n, None); + for input in &mut self.snapshots.inputs { + input.clear(); + } + self.snapshots.outputs.raw.fill(None); } /// Block-local jump resolution: interpret each block independently to discover @@ -387,7 +452,7 @@ impl Bytecode<'_> { pub(crate) fn block_analysis_local(&mut self) { self.init_snapshots(); - let empty_sets = ConstSetInterner::new(); + let mut local_sets = ConstSetInterner::new(); let mut resolved = Vec::new(); let mut stack = Vec::new(); let trace_logs = enabled!(tracing::Level::TRACE); @@ -402,7 +467,7 @@ impl Bytecode<'_> { // Interpret the block with `Top` as inputs. stack.clear(); stack.resize(section.inputs as usize, AbsValue::Top); - if !self.interpret_block(block.insts(), &mut stack) { + if !self.interpret_block(block.insts(), &mut stack, &mut local_sets, None, &mut None) { continue; } @@ -415,7 +480,7 @@ impl Bytecode<'_> { continue; } - let target = self.resolve_jump_snapshot(term_inst, &empty_sets); + let target = self.resolve_jump_snapshot(term_inst, &local_sets); let Some(target_inst) = target.as_single() else { continue }; // Log non-adjacent resolutions (not simple PUSH+JUMP). @@ -715,7 +780,7 @@ impl Bytecode<'_> { // Initialize block states. Entry block starts with an empty stack. let mut block_states: IndexVec = index_vec![BlockState::Bottom; num_blocks]; - block_states[Block::from_usize(0)] = BlockState::Known(Vec::new()); + block_states[Block::from_usize(0)] = BlockState::Known(SmallVec::from_elem(Vec::new(), 1)); // Collect unresolved jumps, plus static JUMPI instructions whose condition // may become constant only after CFG fixpoint. @@ -752,15 +817,23 @@ impl Bytecode<'_> { // Invalidate resolutions that may be unsound due to incomplete analysis. // When the fixpoint didn't converge, partially-discovered ConstSets may be // incomplete, so we must conservatively invalidate them too. - if has_top_jump || !converged { + let has_bottom_jump = + jump_targets.iter().any(|(_, target)| matches!(target, JumpTarget::Bottom)); + if has_top_jump || has_bottom_jump || !converged { self.invalidate_suspect_jumps( &mut jump_targets, - &block_states, &discovered_edges, local_snapshots, + has_top_jump || !converged, ); } + for bid in self.cfg.blocks.indices() { + if matches!(block_states[bid], BlockState::Bottom) { + self.snapshots.restore_from(self.cfg.blocks[bid].insts(), local_snapshots); + } + } + let count = jump_targets.iter().filter(|(_, target)| target.is_resolved()).count(); (jump_targets, count) @@ -877,6 +950,7 @@ impl Bytecode<'_> { const_sets: &ConstSetInterner, discovered: &mut IndexVec>, disc_preds: &mut IndexVec>, + context_targets: &mut SmallVec<[Block; 4]>, ) { let consts = match operand { AbsValue::Const(imm) => Either::Left(std::iter::once(imm)), @@ -890,11 +964,14 @@ impl Bytecode<'_> { continue; } let ti = self.pc_to_inst(target_pc); - if let Some(tb) = self.cfg.inst_to_block[ti] - && !discovered[bid].contains(&tb) - { - discovered[bid].push(tb); - disc_preds[tb].push(bid); + if let Some(tb) = self.cfg.inst_to_block[ti] { + if !context_targets.contains(&tb) { + context_targets.push(tb); + } + if !discovered[bid].contains(&tb) { + discovered[bid].push(tb); + disc_preds[tb].push(bid); + } } } } @@ -913,18 +990,16 @@ impl Bytecode<'_> { fn invalidate_suspect_jumps( &mut self, jump_targets: &mut [(Inst, JumpTarget)], - block_states: &IndexVec, discovered_edges: &IndexVec>, local_snapshots: &Snapshots, + invalidate_targets: bool, ) { let num_blocks = self.cfg.blocks.len(); // Seed: every reachable JUMPDEST block is suspect when Top jumps exist. let mut suspect: BitVec = BitVec::repeat(false, num_blocks); for bid in self.cfg.blocks.indices() { - if !matches!(block_states[bid], BlockState::Bottom) - && self.insts[self.cfg.blocks[bid].insts.start].is_jumpdest() - { + if self.insts[self.cfg.blocks[bid].insts.start].is_jumpdest() { suspect.set(bid.index(), true); } } @@ -946,20 +1021,22 @@ impl Bytecode<'_> { } } - for (inst, target) in jump_targets.iter_mut() { - if matches!(target.target, JumpTargetKind::Resolved(_)) - && target.condition == JumpCondition::AlwaysFalse - && self.local_jumpi_condition_is_zero(*inst, local_snapshots) - { - continue; - } - if !target.is_resolved() { - continue; - } - if let Some(bid) = self.cfg.inst_to_block[*inst] - && suspect[bid.index()] - { - *target = JumpTarget::top(); + if invalidate_targets { + for (inst, target) in jump_targets.iter_mut() { + if matches!(target.target, JumpTargetKind::Resolved(_)) + && target.condition == JumpCondition::AlwaysFalse + && self.local_jumpi_condition_is_zero(*inst, local_snapshots) + { + continue; + } + if !target.is_resolved() { + continue; + } + if let Some(bid) = self.cfg.inst_to_block[*inst] + && suspect[bid.index()] + { + *target = JumpTarget::top(); + } } } @@ -1010,37 +1087,48 @@ impl Bytecode<'_> { // Copy input state into reusable buffer. stack_buf.clear(); - match &block_states[bid] { - BlockState::Known(s) => stack_buf.extend_from_slice(s), - BlockState::Bottom => continue, - }; - - let block = &self.cfg.blocks[bid]; - if !self.interpret_block(block.insts(), &mut stack_buf) { - continue; - } - let block = &self.cfg.blocks[bid]; - - // Discover dynamic-jump target edges from the snapshot recorded above. - let term_inst = block.terminator(); - let term = &self.insts[term_inst]; - if term.is_jump() - && !term.flags.contains(InstFlags::STATIC_JUMP) - && let Some(&operand) = self.snapshots.inputs[term_inst].last() - { - self.discover_jump_edges( - operand, - bid, + let states = block_states[bid].states().to_vec(); + for state in states { + // Copy input state into reusable buffer. + stack_buf.clear(); + stack_buf.extend_from_slice(&state); + + let block = &self.cfg.blocks[bid]; + let term_inst = block.terminator(); + let mut jump_operand = None; + if !self.interpret_block( + block.insts(), + &mut stack_buf, const_sets, - &mut discovered, - &mut disc_preds, - ); - } + Some(term_inst), + &mut jump_operand, + ) { + continue; + } + let block = &self.cfg.blocks[bid]; + + // Discover dynamic-jump target edges from the snapshot recorded above. + let term = &self.insts[term_inst]; + let mut context_targets = SmallVec::<[Block; 4]>::new(); + if term.is_jump() + && !term.flags.contains(InstFlags::STATIC_JUMP) + && let Some(operand) = jump_operand + { + self.discover_jump_edges( + operand, + bid, + const_sets, + &mut discovered, + &mut disc_preds, + &mut context_targets, + ); + } - // Propagate to static CFG successors and discovered dynamic-jump targets. - for &succ in block.succs.iter().chain(&discovered[bid]) { - if block_states[succ].join(&stack_buf, const_sets) { - worklist.push(succ); + // Propagate to static CFG successors and discovered dynamic-jump targets. + for &succ in block.succs.iter().chain(&context_targets) { + if block_states[succ].join(&stack_buf, const_sets) { + worklist.push(succ); + } } } } @@ -1062,28 +1150,39 @@ impl Bytecode<'_> { &mut self, insts: impl IntoIterator, stack: &mut Vec, + const_sets: &mut ConstSetInterner, + capture_inst: Option, + captured_jump_operand: &mut Option, ) -> bool { for i in insts { - let inst = &self.insts[i]; - if inst.is_dead_code() { + if self.insts[i].is_dead_code() { continue; } - let (inp, out) = inst.stack_io(); + let opcode = self.insts[i].opcode; + let (inp, out) = self.insts[i].stack_io(); let inp = inp as usize; let out = out as usize; // Record pre-instruction input operand snapshot (in stack order, TOS last). if inp > 0 { + if capture_inst == Some(i) { + *captured_jump_operand = stack.last().copied(); + } let start = stack.len().saturating_sub(inp); - let snap = &mut self.snapshots.inputs[i]; - snap.clear(); - snap.extend_from_slice(&stack[start..]); + if stack.len() < inp { + let mut operands = SmallVec::<[AbsValue; 4]>::new(); + operands.resize(inp - stack.len(), AbsValue::Top); + operands.extend_from_slice(stack); + self.record_input_snapshot(i, &operands, const_sets); + } else { + self.record_input_snapshot(i, &stack[start..], const_sets); + } } - match inst.opcode { + match opcode { op::PUSH0..=op::PUSH32 => { - stack.push(AbsValue::Const(inst.imm())); + stack.push(AbsValue::Const(self.insts[i].imm())); } op::POP => { if stack.pop().is_none() { @@ -1091,14 +1190,14 @@ impl Bytecode<'_> { } } op::DUP1..=op::DUP16 => { - let depth = (inst.opcode - op::DUP1 + 1) as usize; + let depth = (opcode - op::DUP1 + 1) as usize; if stack.len() < depth { return false; } stack.push(stack[stack.len() - depth]); } op::SWAP1..=op::SWAP16 => { - let depth = (inst.opcode - op::SWAP1 + 1) as usize; + let depth = (opcode - op::SWAP1 + 1) as usize; let len = stack.len(); if len < depth + 1 { return false; @@ -1106,7 +1205,7 @@ impl Bytecode<'_> { stack.swap(len - 1, len - 1 - depth); } op::DUPN => { - let depth = crate::decode_single(inst.imm_byte()); + let depth = crate::decode_single(self.insts[i].imm_byte()); match depth { Some(n) => { let n = n as usize; @@ -1123,7 +1222,7 @@ impl Bytecode<'_> { } } op::SWAPN => { - let depth = crate::decode_single(inst.imm_byte()); + let depth = crate::decode_single(self.insts[i].imm_byte()); match depth { Some(n) => { let n = n as usize; @@ -1142,7 +1241,7 @@ impl Bytecode<'_> { } } op::EXCHANGE => { - let pair = crate::decode_pair(inst.imm_byte()); + let pair = crate::decode_pair(self.insts[i].imm_byte()); match pair { Some((n, m)) => { let (n, m) = (n as usize, m as usize); @@ -1172,13 +1271,13 @@ impl Bytecode<'_> { // Check gas cost before doing the actual fold. let gas = - super::const_fold::const_fold_gas(inst.opcode, inputs_slice, &interner); + super::const_fold::const_fold_gas(opcode, inputs_slice, &interner); if let Some(cost) = gas && self.compiler_gas_used.saturating_add(cost) <= self.compiler_gas_limit { let folded = super::const_fold::try_const_fold( - inst, + &self.insts[i], inputs_slice, &mut interner, self.code.len(), @@ -1209,13 +1308,14 @@ impl Bytecode<'_> { // Record post-instruction output snapshot. // Skip SWAP/SWAPN/EXCHANGE: they modify two positions and have no single "output". - if out > 0 && !matches!(inst.opcode, op::SWAP1..=op::SWAP16 | op::SWAPN | op::EXCHANGE) - { - self.snapshots.outputs[i] = stack.last().copied(); + if out > 0 && !matches!(opcode, op::SWAP1..=op::SWAP16 | op::SWAPN | op::EXCHANGE) { + if let Some(&value) = stack.last() { + self.record_output_snapshot(i, value, const_sets); + } } #[cfg(test)] - if inst.opcode == crate::TEST_SUSPEND { + if opcode == crate::TEST_SUSPEND { stack.fill(AbsValue::Top); } } @@ -1283,7 +1383,7 @@ pub(crate) mod tests { let bytecode = analyze_hex( "60606040526000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff168063b28175c4146046578063c0406226146052575b6000565b3460005760506076565b005b34600057605c6081565b604051808215151515815260200191505060405180910390f35b600c6000819055505b565b600060896076565b600d600181905550600e600281905550600190505b905600a165627a7a723058202a8a75d7d795b5bcb9042fb18b283daa90b999a11ddec892f5487322", ); - assert!(bytecode.has_dynamic_jumps()); + assert!(!bytecode.has_dynamic_jumps()); } #[test] @@ -1291,7 +1391,7 @@ pub(crate) mod tests { let bytecode = analyze_hex( "608060405234801561001057600080fd5b506004361061002b5760003560e01c806373027f6d14610030575b600080fd5b61004a600480360381019061004591906101a9565b61004c565b005b6000808273ffffffffffffffffffffffffffffffffffffffff166040516024016040516020818303038152906040527fb28175c4000000000000000000000000000000000000000000000000000000007bffffffffffffffffffffffffffffffffffffffffffffffffffffffff19166020820180517bffffffffffffffffffffffffffffffffffffffffffffffffffffff83818316178352505050506040516100f69190610247565b6000604051808303816000865af19150503d8060008114610133576040519150601f19603f3d011682016040523d82523d6000602084013e610138565b606091505b509150915081600155505050565b600080fd5b600073ffffffffffffffffffffffffffffffffffffffff82169050919050565b60006101768261014b565b9050919050565b6101868161016b565b811461019157600080fd5b50565b6000813590506101a38161017d565b92915050565b6000602082840312156101bf576101be610146565b5b60006101cd84828501610194565b91505092915050565b600081519050919050565b600081905092915050565b60005b8381101561020a5780820151818401526020810190506101ef565b60008484015250505050565b6000610221826101d6565b61022b81856101e1565b935061023b8185602086016101ec565b80840191505092915050565b60006102538284610216565b91508190509291505056fea2646970667358221220b4673c55c7b0268d7d118059e6509196d2185bb7fe040a7d3900f902c8542ea464736f6c63430008180033", ); - assert!(bytecode.has_dynamic_jumps()); + assert!(!bytecode.has_dynamic_jumps()); } #[test] @@ -1302,7 +1402,7 @@ pub(crate) mod tests { "3033146033575b303303600e57005b601b5f35806001555f608d565b5f80808080305af1600255602e60016089565b600355005b603a5f6089565b8015608757604a600182035f608d565b5f80808080305af1156083576001606191035f608d565b5f80808080305af115608357607f60016078816089565b016001608d565b6006565b5f80fd5b005b5c90565b5d56", "60065f601d565b5f5560106001601d565b6001555f80808080335af1005b5c9056", ]; - let has_dynamic = [false, false, true, false]; + let has_dynamic = [false, false, false, false]; for (hex, &expected) in contracts.iter().zip(&has_dynamic) { let bytecode = analyze_hex(hex); assert_eq!(bytecode.has_dynamic_jumps(), expected, "contract: {hex}"); @@ -1632,12 +1732,9 @@ pub(crate) mod tests { ", ); - // The wrapper return JUMP (pc=30) remains dynamic because the outer - // return address is lost to Top during the top-aligned join. - // Because an unresolved Top jump exists, the conservative invalidation - // also invalidates the inner return JUMP — any reachable JUMPDEST - // (including inner's entry) is suspect. - assert!(bytecode.has_dynamic_jumps, "expected dynamic jumps to remain"); + // Different stack-depth contexts preserve the wrapper's outer return + // address while still resolving the shared inner return. + assert!(!bytecode.has_dynamic_jumps, "expected all jumps to be resolved"); } /// Regression test: deep DUPN on Amsterdam must not cause the abstract interpreter to From c444fcdaae33d715549ce55bf705e7bbf0078b39 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 07:57:03 +0200 Subject: [PATCH 05/23] fix: materialize push immediates in codegen --- crates/revmc-codegen/src/bytecode/mod.rs | 1 - .../revmc-codegen/src/bytecode/passes/block_analysis.rs | 9 +++++---- crates/revmc-codegen/src/bytecode/passes/dedup.rs | 2 +- crates/revmc-codegen/src/compiler/translate/mod.rs | 4 +++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/mod.rs b/crates/revmc-codegen/src/bytecode/mod.rs index 036f9fa7b..2fd55526a 100644 --- a/crates/revmc-codegen/src/bytecode/mod.rs +++ b/crates/revmc-codegen/src/bytecode/mod.rs @@ -556,7 +556,6 @@ impl<'a> Bytecode<'a> { /// Returns the value of a PUSH instruction, right-padding truncated EOF immediates with zeros /// per EVM spec. - #[cfg(test)] pub(crate) fn get_push_value(&self, data: &InstData) -> U256 { debug_assert!(matches!(data.opcode, op::PUSH0..=op::PUSH32)); data.imm().get(&self.u256_interner.borrow()) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 4f9ea2227..6c58dbfdf 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -1308,10 +1308,11 @@ impl Bytecode<'_> { // Record post-instruction output snapshot. // Skip SWAP/SWAPN/EXCHANGE: they modify two positions and have no single "output". - if out > 0 && !matches!(opcode, op::SWAP1..=op::SWAP16 | op::SWAPN | op::EXCHANGE) { - if let Some(&value) = stack.last() { - self.record_output_snapshot(i, value, const_sets); - } + if out > 0 + && !matches!(opcode, op::SWAP1..=op::SWAP16 | op::SWAPN | op::EXCHANGE) + && let Some(&value) = stack.last() + { + self.record_output_snapshot(i, value, const_sets); } #[cfg(test)] diff --git a/crates/revmc-codegen/src/bytecode/passes/dedup.rs b/crates/revmc-codegen/src/bytecode/passes/dedup.rs index 6464f40fa..9108e2839 100644 --- a/crates/revmc-codegen/src/bytecode/passes/dedup.rs +++ b/crates/revmc-codegen/src/bytecode/passes/dedup.rs @@ -461,7 +461,7 @@ mod tests { bytecode.config = AnalysisConfig::DEDUP; bytecode.analyze().unwrap(); - assert_eq!(bytecode.redirects.len(), 13); + assert_eq!(bytecode.redirects.len(), 20); } fn fixture_entry_code(json: &str) -> Vec { diff --git a/crates/revmc-codegen/src/compiler/translate/mod.rs b/crates/revmc-codegen/src/compiler/translate/mod.rs index 3ed955310..62ff1fe09 100644 --- a/crates/revmc-codegen/src/compiler/translate/mod.rs +++ b/crates/revmc-codegen/src/compiler/translate/mod.rs @@ -1070,7 +1070,9 @@ impl<'a, B: Backend> FunctionCx<'a, B> { } op::PUSH0..=op::PUSH32 => { - unreachable!("handled in const_output"); + let value = self.bytecode.get_push_value(data); + let value = self.bcx.iconst_256(value); + self.push(value); } op::DUP1..=op::DUP16 => self.dup((opcode - op::DUP1 + 1) as usize), From 65803cd1a0976c7e1127dd58155066ec64233886 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 08:02:24 +0200 Subject: [PATCH 06/23] feat: resolve mixed jump target sets --- .../src/bytecode/passes/block_analysis.rs | 77 ++++++++++++++++--- 1 file changed, 65 insertions(+), 12 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 6c58dbfdf..be6ff3e04 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -166,7 +166,7 @@ impl ConstSetInterner { /// Instructions that access deeper than this (e.g. Amsterdam DUPN/SWAPN up to depth 236) /// treat the out-of-range slot as `Top` rather than aborting block interpretation. const MAX_ABS_STACK_DEPTH: usize = 64; -const MAX_BLOCK_CONTEXTS: usize = 2; +const MAX_BLOCK_CONTEXTS: usize = 8; /// Abstract state at the entry of a block. #[derive(Clone, Debug)] @@ -332,6 +332,8 @@ enum JumpTargetKind { Bottom, /// One or more known constant target instruction indices. Resolved(SmallVec<[Inst; 4]>), + /// One or more known valid targets, plus at least one known invalid target. + ResolvedWithInvalid(SmallVec<[Inst; 4]>), /// Known constant but invalid target. Invalid, /// Unknown target. @@ -581,6 +583,20 @@ impl Bytecode<'_> { newly_resolved += 1; } } + JumpTargetKind::ResolvedWithInvalid(targets) => { + for &target_inst in targets { + debug_assert_eq!( + self.insts[target_inst].opcode, + op::JUMPDEST, + "block_analysis resolved to non-JUMPDEST" + ); + self.insts[target_inst].set_jumpdest_reachable(); + } + self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP | InstFlags::MULTI_JUMP; + self.multi_jump_targets.insert(jump_inst, targets.clone()); + newly_resolved += 1; + trace!(%jump_inst, n_targets = targets.len(), "resolved multi-target jump with invalid default"); + } JumpTargetKind::Invalid => { self.multi_jump_targets.remove(&jump_inst); self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP | InstFlags::INVALID_JUMP; @@ -817,11 +833,11 @@ impl Bytecode<'_> { // Invalidate resolutions that may be unsound due to incomplete analysis. // When the fixpoint didn't converge, partially-discovered ConstSets may be // incomplete, so we must conservatively invalidate them too. - let has_bottom_jump = - jump_targets.iter().any(|(_, target)| matches!(target, JumpTarget::Bottom)); + let has_bottom_jump = jump_targets.iter().any(|(_, target)| target.is_bottom()); if has_top_jump || has_bottom_jump || !converged { self.invalidate_suspect_jumps( &mut jump_targets, + &block_states, &discovered_edges, local_snapshots, has_top_jump || !converged, @@ -880,23 +896,22 @@ impl Bytecode<'_> { let consts = const_sets.get(set_idx); let interner = self.u256_interner.borrow(); let mut targets = SmallVec::new(); + let mut has_invalid = false; for &imm in consts { let val = imm.get(&interner); match usize::try_from(val) { Ok(pc) if self.is_valid_jump(pc) => { targets.push(self.pc_to_inst(pc)); } - _ => { - // Mixed valid + invalid: can't resolve since at runtime - // the value might be any member of the set. - return JumpTarget::top(); - } + _ => has_invalid = true, } } - if !targets.is_empty() { - JumpTarget::resolved(targets) - } else { + if targets.is_empty() { JumpTarget::invalid() + } else if has_invalid { + JumpTarget::resolved_with_invalid(targets) + } else { + JumpTarget::resolved(targets) } } AbsValue::Top => JumpTarget::top(), @@ -990,6 +1005,7 @@ impl Bytecode<'_> { fn invalidate_suspect_jumps( &mut self, jump_targets: &mut [(Inst, JumpTarget)], + block_states: &IndexVec, discovered_edges: &IndexVec>, local_snapshots: &Snapshots, invalidate_targets: bool, @@ -999,7 +1015,9 @@ impl Bytecode<'_> { // Seed: every reachable JUMPDEST block is suspect when Top jumps exist. let mut suspect: BitVec = BitVec::repeat(false, num_blocks); for bid in self.cfg.blocks.indices() { - if self.insts[self.cfg.blocks[bid].insts.start].is_jumpdest() { + if !matches!(block_states[bid], BlockState::Bottom) + && self.insts[self.cfg.blocks[bid].insts.start].is_jumpdest() + { suspect.set(bid.index(), true); } } @@ -1974,6 +1992,41 @@ mod tests_edge_cases { assert!(jump_inst.is_some(), "expected an invalid jump"); } + /// A finite target set with both valid and invalid PCs can still be compiled + /// as a multi-jump: valid cases branch to their target and the switch default + /// handles the invalid cases. + #[test] + fn const_set_with_invalid_target_resolves_to_multi_jump() { + let bytecode = analyze_asm( + " + CALLDATASIZE + PUSH %invalid_path + JUMPI + PUSH %valid + PUSH %join + JUMP + invalid_path: + JUMPDEST + PUSH1 0xff + PUSH %join + JUMP + join: + JUMPDEST + JUMP + valid: + JUMPDEST + STOP + ", + ); + + let (_, jump) = bytecode + .iter_insts() + .find(|(_, d)| d.is_jump() && d.flags.contains(InstFlags::MULTI_JUMP)) + .expect("expected mixed valid/invalid jump to use multi-jump"); + assert!(jump.flags.contains(InstFlags::STATIC_JUMP)); + assert!(!bytecode.has_dynamic_jumps); + } + #[test] fn jumpi_with_zero_condition_ignores_unknown_target() { let bytecode = analyze_asm( From ba4d7d4af645cccfced43e18f6e9860d7dd93004 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 12:03:10 +0200 Subject: [PATCH 07/23] feat: split jump return contexts --- .../src/bytecode/passes/block_analysis.rs | 115 ++++++++++++++++-- 1 file changed, 108 insertions(+), 7 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index be6ff3e04..63b342fb7 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -191,7 +191,12 @@ impl BlockState { /// bottom-padded with `Top`. This handles the common Solidity dispatch pattern where /// the "no selector match" fallthrough leaves an extra item on the stack that the /// fallback ignores. - fn join(&mut self, incoming: &[AbsValue], sets: &mut ConstSetInterner) -> bool { + fn join( + &mut self, + incoming: &[AbsValue], + sets: &mut ConstSetInterner, + split_key: Option, + ) -> bool { fn clamp_state(incoming: &[AbsValue]) -> Vec { let start = incoming.len().saturating_sub(MAX_ABS_STACK_DEPTH); incoming[start..].to_vec() @@ -244,9 +249,20 @@ impl BlockState { true } Self::Known(states) => { - if let Some(existing) = - states.iter_mut().find(|state| state.len() == incoming.len()) - { + let incoming_key = split_key + .and_then(|offset| incoming.get(incoming.len().checked_sub(1 + offset)?)) + .copied(); + let same_context = |state: &[AbsValue]| { + if state.len() != incoming.len() { + return false; + } + let Some(offset) = split_key else { return true }; + let Some(incoming_key) = incoming_key else { return true }; + let Some(index) = state.len().checked_sub(1 + offset) else { return true }; + state.get(index).is_none_or(|&existing_key| existing_key == incoming_key) + }; + + if let Some(existing) = states.iter_mut().find(|state| same_context(state)) { return join_state(existing, incoming, sets); } @@ -513,7 +529,13 @@ impl Bytecode<'_> { #[instrument(name = "ba", level = "debug", skip_all)] pub(crate) fn block_analysis(&mut self, local_snapshots: &Snapshots) { self.init_snapshots(); - let (resolved, count) = self.run_abstract_interp(local_snapshots); + let compiler_gas_used = self.compiler_gas_used; + let (mut resolved, mut count) = self.run_abstract_interp(local_snapshots, true); + if resolved.iter().any(|(_, target)| target.is_top()) { + self.init_snapshots(); + self.compiler_gas_used = compiler_gas_used; + (resolved, count) = self.run_abstract_interp(local_snapshots, false); + } let has_const_condition = resolved.iter().any(|(_, target)| target.condition != JumpCondition::Unknown); @@ -790,6 +812,7 @@ impl Bytecode<'_> { fn run_abstract_interp( &mut self, local_snapshots: &Snapshots, + split_contexts: bool, ) -> (Vec<(Inst, JumpTarget)>, usize) { let num_blocks = self.cfg.blocks.len(); @@ -811,7 +834,8 @@ impl Bytecode<'_> { } let mut const_sets = ConstSetInterner::new(); - let (discovered_edges, converged) = self.run_fixpoint(&mut block_states, &mut const_sets); + let (discovered_edges, converged) = + self.run_fixpoint(&mut block_states, &mut const_sets, split_contexts); // On non-convergence, all fixpoint-derived snapshots are potentially stale. // Restore the safe block-local snapshots computed by `block_analysis_local`. @@ -991,6 +1015,64 @@ impl Bytecode<'_> { } } + fn jump_operand_split_keys(&self) -> IndexVec> { + self.cfg + .blocks + .iter_enumerated() + .map(|(_, block)| { + let term_inst = block.terminator(); + let term = &self.insts[term_inst]; + if !term.is_jump() || term.flags.contains(InstFlags::STATIC_JUMP) { + return None; + } + + let mut stack: Vec> = + (0..MAX_ABS_STACK_DEPTH).rev().map(Some).collect(); + + for inst in block.insts() { + if self.insts[inst].is_dead_code() { + continue; + } + + let opcode = self.insts[inst].opcode; + let (inp, out) = self.insts[inst].stack_io(); + let inp = inp as usize; + let out = out as usize; + + if inst == term_inst { + return stack.last().copied().flatten(); + } + + match opcode { + op::PUSH0..=op::PUSH32 => stack.push(None), + op::POP => { + stack.pop()?; + } + op::DUP1..=op::DUP16 => { + let depth = (opcode - op::DUP1 + 1) as usize; + stack.push(*stack.get(stack.len().checked_sub(depth)?)?); + } + op::SWAP1..=op::SWAP16 => { + let depth = (opcode - op::SWAP1 + 1) as usize; + let len = stack.len(); + stack.swap(len.checked_sub(1)?, len.checked_sub(1 + depth)?); + } + op::DUPN | op::SWAPN | op::EXCHANGE => return None, + _ => { + if stack.len() < inp { + return None; + } + stack.truncate(stack.len() - inp); + stack.resize(stack.len() + out, None); + } + } + } + + None + }) + .collect() + } + /// Invalidates jump resolutions and operand snapshots that may be unsound due to /// unresolved `Top` jumps. /// @@ -1077,6 +1159,7 @@ impl Bytecode<'_> { &mut self, block_states: &mut IndexVec, const_sets: &mut ConstSetInterner, + split_contexts: bool, ) -> (IndexVec>, bool) { let num_blocks = self.cfg.blocks.len(); let mut worklist = Worklist::new(num_blocks); @@ -1088,6 +1171,7 @@ impl Bytecode<'_> { // Reverse map: discovered predecessors per block. let mut disc_preds: IndexVec> = IndexVec::from_vec(vec![SmallVec::new(); num_blocks]); + let split_keys = split_contexts.then(|| self.jump_operand_split_keys()); let max_iterations = num_blocks * 8; let mut iterations = 0; @@ -1144,7 +1228,8 @@ impl Bytecode<'_> { // Propagate to static CFG successors and discovered dynamic-jump targets. for &succ in block.succs.iter().chain(&context_targets) { - if block_states[succ].join(&stack_buf, const_sets) { + let split_key = split_keys.as_ref().and_then(|keys| keys[succ]); + if block_states[succ].join(&stack_buf, const_sets, split_key) { worklist.push(succ); } } @@ -1815,6 +1900,22 @@ pub(crate) mod tests { assert!(!bytecode.has_dynamic_jumps, "expected all jumps to be resolved"); } + #[test] + fn weth() { + let code = fixture_entry_code(include_str!("../../../../../data/weth.json")); + let mut bytecode = Bytecode::test(code); + bytecode.analyze().unwrap(); + assert!(!bytecode.has_dynamic_jumps, "expected all jumps to be resolved"); + } + + #[test] + fn erc20_transfer() { + let code = fixture_entry_code(include_str!("../../../../../data/erc20_transfer.json")); + let mut bytecode = Bytecode::test(code); + bytecode.analyze().unwrap(); + assert!(!bytecode.has_dynamic_jumps, "expected all jumps to be resolved"); + } + fn fixture_entry_code(json: &str) -> Vec { let v: serde_json::Value = serde_json::from_str(json).unwrap(); let case = v.as_object().unwrap().values().next().unwrap(); From d91ac799bcbc7b82e87274dbb6fd8479905a07b8 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 12:26:15 +0200 Subject: [PATCH 08/23] fix: restore mixed jump helpers --- .../src/bytecode/passes/block_analysis.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 63b342fb7..aa7a640a0 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -385,6 +385,10 @@ impl JumpTarget { Self::new(JumpTargetKind::Resolved(targets)) } + fn resolved_with_invalid(targets: SmallVec<[Inst; 4]>) -> Self { + Self::new(JumpTargetKind::ResolvedWithInvalid(targets)) + } + /// Creates a resolved target with a single constant. fn single(inst: Inst) -> Self { Self::resolved(SmallVec::from_elem(inst, 1)) @@ -407,8 +411,17 @@ impl JumpTarget { matches!(self.target, JumpTargetKind::Top) } + fn is_bottom(&self) -> bool { + matches!(self.target, JumpTargetKind::Bottom) + } + fn is_resolved(&self) -> bool { - matches!(self.target, JumpTargetKind::Resolved(_) | JumpTargetKind::Invalid) + matches!( + self.target, + JumpTargetKind::Resolved(_) + | JumpTargetKind::ResolvedWithInvalid(_) + | JumpTargetKind::Invalid + ) } } From f375eacf034a7d5422eb3cf862b969bcf4b2a13f Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 12:27:03 +0200 Subject: [PATCH 09/23] test: keep opaque jump reachable --- crates/revmc-codegen/src/bytecode/passes/block_analysis.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index aa7a640a0..5cbd16330 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -2853,7 +2853,7 @@ mod tests_edge_cases { " PUSH1 0x69 ; inst 0: cross-block value CALLDATASIZE ; inst 1: unknown condition - PUSH %target ; inst 2 + PUSH %opaque ; inst 2 JUMPI ; inst 3 ; Non-suspect fallthrough block (no JUMPDEST). PUSH1 0x0A ; inst 4 @@ -2866,6 +2866,7 @@ mod tests_edge_cases { MSTORE ; inst 11 STOP ; inst 12 ; Opaque dynamic jump — triggers suspect invalidation. + opaque: JUMPDEST ; inst 13 PUSH0 ; inst 14 CALLDATALOAD ; inst 15: Top From 6856510994b046004b7468cfa4163ad5473dd132 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 12:35:13 +0200 Subject: [PATCH 10/23] clean --- .../src/compiler/translate/mod.rs | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/crates/revmc-codegen/src/compiler/translate/mod.rs b/crates/revmc-codegen/src/compiler/translate/mod.rs index 3ed955310..0c76914aa 100644 --- a/crates/revmc-codegen/src/compiler/translate/mod.rs +++ b/crates/revmc-codegen/src/compiler/translate/mod.rs @@ -963,20 +963,18 @@ impl<'a, B: Backend> FunctionCx<'a, B> { let target_value = self.pop(); let targets = self.bytecode.multi_jump_targets(inst).unwrap(); - if opcode == op::JUMPI { - if has_const_jumpi_condition { + if opcode == op::JUMPI && !has_const_jumpi_condition { + let cond_word = self.pop(); + self.materialize_live_stack(); + let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); + let next = self.inst_entries[inst + 1]; + let switch_block = self.bcx.create_block("multi_jump"); + self.bcx.brif(cond, switch_block, next); + self.bcx.switch_to_block(switch_block); + } else { + if opcode == op::JUMPI { self.pop_ignore(1); - self.materialize_live_stack(); - } else { - let cond_word = self.pop(); - self.materialize_live_stack(); - let cond = self.bcx.icmp_imm(IntCC::NotEqual, cond_word, 0); - let next = self.inst_entries[inst + 1]; - let switch_block = self.bcx.create_block("multi_jump"); - self.bcx.brif(cond, switch_block, next); - self.bcx.switch_to_block(switch_block); } - } else { self.materialize_live_stack(); } From 68848caa69d2bce2d842f9bcf57c5d3290e4a6a7 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 12:40:08 +0200 Subject: [PATCH 11/23] refactor: rename jump resolution wrapper --- .../src/bytecode/passes/block_analysis.rs | 77 +++++++++++-------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 455c7ebf6..edbd82232 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -287,16 +287,16 @@ impl BlockData { } } -/// Resolved jump target after fixpoint. +/// Resolved jump including compile-time condition information. #[derive(Clone, Debug)] -struct JumpTarget { - target: JumpTargetKind, +struct JumpResolution { + target: JumpTarget, condition: JumpCondition, } /// Resolved jump target kind after fixpoint. #[derive(Clone, Debug)] -enum JumpTargetKind { +enum JumpTarget { /// Not yet observed. Bottom, /// One or more known constant target instruction indices. @@ -315,25 +315,25 @@ enum JumpCondition { AlwaysFalse, } -impl JumpTarget { - fn new(target: JumpTargetKind) -> Self { +impl JumpResolution { + fn new(target: JumpTarget) -> Self { Self { target, condition: JumpCondition::Unknown } } fn bottom() -> Self { - Self::new(JumpTargetKind::Bottom) + Self::new(JumpTarget::Bottom) } fn invalid() -> Self { - Self::new(JumpTargetKind::Invalid) + Self::new(JumpTarget::Invalid) } fn top() -> Self { - Self::new(JumpTargetKind::Top) + Self::new(JumpTarget::Top) } fn resolved(targets: SmallVec<[Inst; 4]>) -> Self { - Self::new(JumpTargetKind::Resolved(targets)) + Self::new(JumpTarget::Resolved(targets)) } /// Creates a resolved target with a single constant. @@ -344,7 +344,7 @@ impl JumpTarget { /// Returns the single resolved target, if exactly one. fn as_single(&self) -> Option { match &self.target { - JumpTargetKind::Resolved(targets) if targets.len() == 1 => Some(targets[0]), + JumpTarget::Resolved(targets) if targets.len() == 1 => Some(targets[0]), _ => None, } } @@ -355,11 +355,11 @@ impl JumpTarget { } fn is_top(&self) -> bool { - matches!(self.target, JumpTargetKind::Top) + matches!(self.target, JumpTarget::Top) } fn is_resolved(&self) -> bool { - matches!(self.target, JumpTargetKind::Resolved(_) | JumpTargetKind::Invalid) + matches!(self.target, JumpTarget::Resolved(_) | JumpTarget::Invalid) } } @@ -472,7 +472,7 @@ impl Bytecode<'_> { /// Commits resolved jump targets by setting flags and data on the corresponding instructions. /// /// Returns the number of newly resolved jumps. - fn commit_resolved_jumps(&mut self, resolved: &[(Inst, JumpTarget)]) -> u32 { + fn commit_resolved_jumps(&mut self, resolved: &[(Inst, JumpResolution)]) -> u32 { let has_top_jump = resolved.iter().any(|(_, target)| target.is_top()); let mut newly_resolved = 0u32; @@ -487,7 +487,7 @@ impl Bytecode<'_> { } match &target.target { - JumpTargetKind::Resolved(targets) => { + JumpTarget::Resolved(targets) => { self.insts[jump_inst] .flags .remove(InstFlags::INVALID_JUMP | InstFlags::MULTI_JUMP); @@ -516,7 +516,7 @@ impl Bytecode<'_> { newly_resolved += 1; } } - JumpTargetKind::Invalid => { + JumpTarget::Invalid => { self.multi_jump_targets.remove(&jump_inst); self.insts[jump_inst].flags |= InstFlags::STATIC_JUMP | InstFlags::INVALID_JUMP; self.insts[jump_inst].flags.remove(InstFlags::MULTI_JUMP); @@ -525,7 +525,7 @@ impl Bytecode<'_> { } trace!(%jump_inst, "resolved invalid jump"); } - JumpTargetKind::Bottom if !has_top_jump => { + JumpTarget::Bottom if !has_top_jump => { // Truly unreachable: no unresolved jumps remain, so this // code cannot be reached at runtime. Mark as invalid. self.multi_jump_targets.remove(&jump_inst); @@ -536,13 +536,13 @@ impl Bytecode<'_> { } trace!(%jump_inst, "unreachable jump"); } - JumpTargetKind::Bottom => { + JumpTarget::Bottom => { // Unreachable according to the analysis, but there are // unresolved (Top) jumps that might reach this code at // runtime. Leave as-is. trace!(%jump_inst, "unreachable jump (not marking, has_top_jump)"); } - JumpTargetKind::Top => { + JumpTarget::Top => { trace!(%jump_inst, "unresolved jump (Top)"); } } @@ -709,7 +709,7 @@ impl Bytecode<'_> { fn run_abstract_interp( &mut self, local_snapshots: &Snapshots, - ) -> (Vec<(Inst, JumpTarget)>, usize) { + ) -> (Vec<(Inst, JumpResolution)>, usize) { let num_blocks = self.cfg.blocks.len(); // Initialize block states. Entry block starts with an empty stack. @@ -739,7 +739,7 @@ impl Bytecode<'_> { } // After convergence, resolve each dynamic jump from its snapshot operand. - let mut jump_targets: Vec<(Inst, JumpTarget)> = Vec::new(); + let mut jump_targets: Vec<(Inst, JumpResolution)> = Vec::new(); let mut has_top_jump = false; for &jump_inst in &jump_insts { let target = self.resolve_jump_snapshot(jump_inst, &const_sets); @@ -766,7 +766,11 @@ impl Bytecode<'_> { (jump_targets, count) } - fn resolve_jump_snapshot(&self, jump_inst: Inst, const_sets: &ConstSetInterner) -> JumpTarget { + fn resolve_jump_snapshot( + &self, + jump_inst: Inst, + const_sets: &ConstSetInterner, + ) -> JumpResolution { let snap = &self.snapshots.inputs[jump_inst]; let condition = if self.insts[jump_inst].opcode == op::JUMPI { snap.first() @@ -777,7 +781,8 @@ impl Bytecode<'_> { }; if condition == JumpCondition::AlwaysFalse { - return JumpTarget::single(jump_inst + 1).with_condition(JumpCondition::AlwaysFalse); + return JumpResolution::single(jump_inst + 1) + .with_condition(JumpCondition::AlwaysFalse); } match snap.last() { @@ -786,21 +791,25 @@ impl Bytecode<'_> { } None => { trace!(%jump_inst, pc = self.pc(jump_inst), "jump in unreached block"); - JumpTarget::bottom() + JumpResolution::bottom() } } } /// Resolves a jump target from the snapshot operand recorded during the fixpoint. - fn resolve_jump_operand(&self, operand: AbsValue, const_sets: &ConstSetInterner) -> JumpTarget { + fn resolve_jump_operand( + &self, + operand: AbsValue, + const_sets: &ConstSetInterner, + ) -> JumpResolution { match operand { AbsValue::Const(imm) => { let val = imm.get(&self.u256_interner.borrow()); match usize::try_from(val) { Ok(target_pc) if self.is_valid_jump(target_pc) => { - JumpTarget::single(self.pc_to_inst(target_pc)) + JumpResolution::single(self.pc_to_inst(target_pc)) } - _ => JumpTarget::invalid(), + _ => JumpResolution::invalid(), } } AbsValue::ConstSet(set_idx) => { @@ -816,17 +825,17 @@ impl Bytecode<'_> { _ => { // Mixed valid + invalid: can't resolve since at runtime // the value might be any member of the set. - return JumpTarget::top(); + return JumpResolution::top(); } } } if !targets.is_empty() { - JumpTarget::resolved(targets) + JumpResolution::resolved(targets) } else { - JumpTarget::invalid() + JumpResolution::invalid() } } - AbsValue::Top => JumpTarget::top(), + AbsValue::Top => JumpResolution::top(), } } @@ -912,7 +921,7 @@ impl Bytecode<'_> { /// resolved jumps and operand snapshots in suspect blocks. fn invalidate_suspect_jumps( &mut self, - jump_targets: &mut [(Inst, JumpTarget)], + jump_targets: &mut [(Inst, JumpResolution)], block_states: &IndexVec, discovered_edges: &IndexVec>, local_snapshots: &Snapshots, @@ -947,7 +956,7 @@ impl Bytecode<'_> { } for (inst, target) in jump_targets.iter_mut() { - if matches!(target.target, JumpTargetKind::Resolved(_)) + if matches!(target.target, JumpTarget::Resolved(_)) && target.condition == JumpCondition::AlwaysFalse && self.local_jumpi_condition_is_zero(*inst, local_snapshots) { @@ -959,7 +968,7 @@ impl Bytecode<'_> { if let Some(bid) = self.cfg.inst_to_block[*inst] && suspect[bid.index()] { - *target = JumpTarget::top(); + *target = JumpResolution::top(); } } From afb28ce1d18816355131b6b386919b86b6e349fd Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 13:54:31 +0200 Subject: [PATCH 12/23] clean --- crates/revmc-codegen/src/bytecode/mod.rs | 3 +++ .../src/bytecode/passes/block_analysis.rs | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/mod.rs b/crates/revmc-codegen/src/bytecode/mod.rs index 036f9fa7b..82598a540 100644 --- a/crates/revmc-codegen/src/bytecode/mod.rs +++ b/crates/revmc-codegen/src/bytecode/mod.rs @@ -472,6 +472,7 @@ impl<'a> Bytecode<'a> { /// We can simply mark all instructions that are between diverging instructions and /// `JUMPDEST`s. #[instrument(name = "dce", level = "debug", skip_all)] + #[inline(never)] fn mark_dead_code(&mut self) { let mut iter = self.insts.iter_mut_enumerated(); while let Some((i, data)) = iter.next() { @@ -514,6 +515,7 @@ impl<'a> Bytecode<'a> { /// Constructs the sections in the bytecode. #[instrument(name = "sections", level = "debug", skip_all)] + #[inline(never)] fn construct_sections(&mut self) { let mut analysis = SectionsAnalysis::default(); for inst in self.insts.indices() { @@ -526,6 +528,7 @@ impl<'a> Bytecode<'a> { /// Constructs the memory sections in the bytecode. #[instrument(name = "memory_sections", level = "debug", skip_all)] + #[inline(never)] fn construct_memory_sections(&mut self) { self.memory_sections = MemorySectionAnalysis::new(self).run(self); } diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index edbd82232..93b401109 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -384,6 +384,7 @@ impl Bytecode<'_> { /// /// Also initializes `self.snapshots`. #[instrument(name = "local_jumps", level = "debug", skip_all)] + #[inline(never)] pub(crate) fn block_analysis_local(&mut self) { self.init_snapshots(); @@ -415,7 +416,7 @@ impl Bytecode<'_> { continue; } - let target = self.resolve_jump_snapshot(term_inst, &empty_sets); + let target = self.resolve_jump(term_inst, &empty_sets); let Some(target_inst) = target.as_single() else { continue }; // Log non-adjacent resolutions (not simple PUSH+JUMP). @@ -444,6 +445,7 @@ impl Bytecode<'_> { /// /// Also computes and stores per-instruction stack snapshots for constant propagation. #[instrument(name = "ba", level = "debug", skip_all)] + #[inline(never)] pub(crate) fn block_analysis(&mut self, local_snapshots: &Snapshots) { self.init_snapshots(); let (resolved, count) = self.run_abstract_interp(local_snapshots); @@ -459,6 +461,7 @@ impl Bytecode<'_> { } /// Recomputes the `has_dynamic_jumps` flag based on the current instruction set. + #[inline(never)] pub(crate) fn recompute_has_dynamic_jumps(&mut self) { let mut unresolved = self.insts.iter().filter(|inst| { inst.is_jump() && !inst.flags.contains(InstFlags::STATIC_JUMP) && !inst.is_dead_code() @@ -584,6 +587,7 @@ impl Bytecode<'_> { /// Rebuild the basic-block CFG from the current instruction state. #[instrument(level = "debug", skip_all)] + #[inline(never)] pub(crate) fn rebuild_cfg(&mut self) { let finish_block = |cfg: &mut Cfg, start: usize, end: usize| { debug_assert!(start < end, "empty block range: {start}..{end}"); @@ -742,7 +746,7 @@ impl Bytecode<'_> { let mut jump_targets: Vec<(Inst, JumpResolution)> = Vec::new(); let mut has_top_jump = false; for &jump_inst in &jump_insts { - let target = self.resolve_jump_snapshot(jump_inst, &const_sets); + let target = self.resolve_jump(jump_inst, &const_sets); if target.is_top() { has_top_jump = true; } @@ -766,7 +770,7 @@ impl Bytecode<'_> { (jump_targets, count) } - fn resolve_jump_snapshot( + fn resolve_jump( &self, jump_inst: Inst, const_sets: &ConstSetInterner, From dabbc940597bab473aaba4dd132780c8db49b37d Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 13:59:35 +0200 Subject: [PATCH 13/23] fmt --- crates/revmc-codegen/src/bytecode/passes/block_analysis.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 93b401109..660dad4cb 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -770,11 +770,7 @@ impl Bytecode<'_> { (jump_targets, count) } - fn resolve_jump( - &self, - jump_inst: Inst, - const_sets: &ConstSetInterner, - ) -> JumpResolution { + fn resolve_jump(&self, jump_inst: Inst, const_sets: &ConstSetInterner) -> JumpResolution { let snap = &self.snapshots.inputs[jump_inst]; let condition = if self.insts[jump_inst].opcode == op::JUMPI { snap.first() From 4ad5a8eadcde0ef12dd0ae5d76dd7ac673d2040a Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 14:18:39 +0200 Subject: [PATCH 14/23] perf: cap split jump analysis --- .../src/bytecode/passes/block_analysis.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 0b7d44950..4638f5dcb 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -167,6 +167,7 @@ impl ConstSetInterner { /// treat the out-of-range slot as `Top` rather than aborting block interpretation. const MAX_ABS_STACK_DEPTH: usize = 64; const MAX_BLOCK_CONTEXTS: usize = 8; +const MAX_SPLIT_FIXPOINT_ITERATIONS: usize = 512; /// Abstract state at the entry of a block. #[derive(Clone, Debug)] @@ -1192,8 +1193,16 @@ impl Bytecode<'_> { let mut disc_preds: IndexVec> = IndexVec::from_vec(vec![SmallVec::new(); num_blocks]); let split_keys = split_contexts.then(|| self.jump_operand_split_keys()); + let n_split_keys = split_keys + .as_ref() + .map(|keys| keys.iter().filter(|key| key.is_some()).count()) + .unwrap_or_default(); - let max_iterations = num_blocks * 8; + let max_iterations = if split_contexts { + (num_blocks * 8).min(MAX_SPLIT_FIXPOINT_ITERATIONS) + } else { + num_blocks * 8 + }; let mut iterations = 0; let mut converged = true; @@ -1257,7 +1266,7 @@ impl Bytecode<'_> { } debug!( - "{msg} after {iterations} iterations (max={max_iterations})", + "{msg} after {iterations} iterations (max={max_iterations}, split_contexts={split_contexts}, split_keys={n_split_keys})", msg = if converged { "converged" } else { "did not converge" }, ); From ab69ba9cdca6400bab5ede3b31ce217da72805a0 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 6 May 2026 20:02:12 +0200 Subject: [PATCH 15/23] fix: invalidate suspect jumpi conditions --- .../src/bytecode/passes/block_analysis.rs | 94 +++++++++++++++---- 1 file changed, 74 insertions(+), 20 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 3b613772c..baeb632f7 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -990,16 +990,21 @@ impl Bytecode<'_> { } } - fn is_const_zero(&self, value: AbsValue) -> bool { - value.as_const().is_some_and(|imm| imm.get(&self.u256_interner.borrow()).is_zero()) - } - - fn local_jumpi_condition_is_zero(&self, jump_inst: Inst, local_snapshots: &Snapshots) -> bool { - self.insts[jump_inst].opcode == op::JUMPI - && local_snapshots.inputs[jump_inst] - .first() - .copied() - .is_some_and(|value| self.is_const_zero(value)) + fn local_jumpi_condition(&self, jump_inst: Inst, local_snapshots: &Snapshots) -> JumpCondition { + if self.insts[jump_inst].opcode != op::JUMPI { + return JumpCondition::Unknown; + } + let Some(condition) = local_snapshots.inputs[jump_inst].first().copied() else { + return JumpCondition::Unknown; + }; + let Some(imm) = condition.as_const() else { + return JumpCondition::Unknown; + }; + if imm.get(&self.u256_interner.borrow()).is_zero() { + JumpCondition::AlwaysFalse + } else { + JumpCondition::AlwaysTrue + } } /// Adds discovered dynamic-jump target edges for a block. @@ -1144,19 +1149,19 @@ impl Bytecode<'_> { if invalidate_targets { for (inst, target) in jump_targets.iter_mut() { - if matches!(target.target, JumpTarget::Resolved(_)) - && target.condition == JumpCondition::AlwaysFalse - && self.local_jumpi_condition_is_zero(*inst, local_snapshots) - { - continue; - } - if !target.is_resolved() { - continue; - } if let Some(bid) = self.cfg.inst_to_block[*inst] && suspect[bid.index()] { - *target = JumpResolution::top(); + target.condition = self.local_jumpi_condition(*inst, local_snapshots); + if matches!(target.target, JumpTarget::Resolved(_)) + && target.condition == JumpCondition::AlwaysFalse + { + continue; + } + if target.is_resolved() { + let condition = target.condition; + *target = JumpResolution::top().with_condition(condition); + } } } } @@ -2520,6 +2525,55 @@ mod tests_edge_cases { ); } + /// A suspect JUMPI block must not keep a fixpoint-derived constant + /// condition when block-local analysis cannot prove it. + #[test] + fn suspect_jumpi_top_target_invalidates_const_condition() { + let bytecode = analyze_asm( + " + ; Branch to an opaque caller or fall through to a known caller. + PUSH0 ; pc=0 + CALLDATALOAD ; pc=1 + PUSH %opaque_caller ; pc=2 + JUMPI ; pc=4 + ; Known caller reaches fn_entry with condition=1 and a Top target. + PUSH1 0x01 ; pc=5: condition + PUSH0 ; pc=7 + MLOAD ; pc=8: target = Top + PUSH %fn_entry ; pc=9 + JUMP ; pc=11 + ; Opaque caller could reach fn_entry with condition=0. + opaque_caller: + JUMPDEST ; pc=12 + PUSH0 ; pc=13: condition + PUSH %taken ; pc=14: target + PUSH0 ; pc=16 + MLOAD ; pc=17: jump destination = Top + JUMP ; pc=18 + fn_entry: + JUMPDEST ; pc=19 + JUMPI ; pc=20 + STOP ; pc=21 + taken: + JUMPDEST ; pc=22 + STOP ; pc=23 + ", + ); + + let (_, jumpi) = bytecode + .iter_insts() + .find(|(i, d)| bytecode.pc(*i) == 20 && d.opcode == op::JUMPI) + .unwrap(); + assert!( + !jumpi.has_const_jumpi_condition(), + "suspect JUMPI should not keep a non-local const condition" + ); + assert!( + !jumpi.flags.contains(InstFlags::STATIC_JUMP), + "suspect JUMPI target should remain dynamic" + ); + } + /// A third caller reaches the function entry with a known callee (static /// PUSH+JUMP) but an opaque return address (from MLOAD). The function's /// return jump must not be resolved because the return address is Top. From f2ee2c8ec8234817b8228fd301fde269376f5dd8 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Tue, 12 May 2026 22:38:34 +0200 Subject: [PATCH 16/23] perf: avoid redundant jump analysis Skip the fallback abstract interpretation pass when the split pass already hit its iteration cap, since conservative invalidation has restored safe local snapshots and the fallback was resolving zero jumps in the regressing contracts. Also avoid no-op constant-set joins for stack states and snapshots. Local Criterion compile/translate measurements improved onchain_lm_v2 from ~15.0ms to ~4.20ms, fiat_token from ~11.8ms to ~5.81ms, and usdc_proxy from ~1.32ms to ~1.03ms while preserving burntpix at ~2.36ms. --- .../src/bytecode/passes/block_analysis.rs | 76 ++++++++++++++++--- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 6f4a6b601..fd8a005ac 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -113,6 +113,27 @@ impl ConstSetInterner { } } + fn const_set_contains(&self, set_idx: ConstSetIdx, value: U256Imm) -> bool { + self.get(set_idx).binary_search(&value).is_ok() + } + + fn const_set_contains_all(&self, set_idx: ConstSetIdx, values: &[U256Imm]) -> bool { + let set = self.get(set_idx); + let mut i = 0; + let mut j = 0; + while i < set.len() && j < values.len() { + match set[i].cmp(&values[j]) { + Ordering::Less => i += 1, + Ordering::Equal => { + i += 1; + j += 1; + } + Ordering::Greater => return false, + } + } + j == values.len() + } + /// Interns a sorted, deduplicated set and returns the corresponding `AbsValue`. fn intern_set(&mut self, set: &[U256Imm]) -> AbsValue { match set.len() { @@ -122,11 +143,44 @@ impl ConstSetInterner { } } + #[inline(never)] + fn join_into(&mut self, slot: &mut AbsValue, incoming: AbsValue) -> bool { + if matches!(*slot, AbsValue::Top) || *slot == incoming { + return false; + } + + let joined = self.join(*slot, incoming); + if joined == *slot { + return false; + } + + *slot = joined; + true + } + /// Lattice join: two values merge to their least upper bound. fn join(&mut self, a: AbsValue, b: AbsValue) -> AbsValue { + if a == b { + return a; + } + match (a, b) { (AbsValue::Top, _) | (_, AbsValue::Top) => AbsValue::Top, - (AbsValue::Const(x), AbsValue::Const(y)) if x == y => AbsValue::Const(x), + + // If one value is a subset of the other, the result is the larger set. + (AbsValue::ConstSet(idx), AbsValue::Const(v)) if self.const_set_contains(idx, v) => a, + (AbsValue::Const(v), AbsValue::ConstSet(idx)) if self.const_set_contains(idx, v) => b, + (AbsValue::ConstSet(a_idx), AbsValue::ConstSet(b_idx)) + if self.const_set_contains_all(a_idx, self.get(b_idx)) => + { + a + } + (AbsValue::ConstSet(a_idx), AbsValue::ConstSet(b_idx)) + if self.const_set_contains_all(b_idx, self.get(a_idx)) => + { + b + } + _ => { let a = self.abs_const_set(&a).unwrap(); let b = self.abs_const_set(&b).unwrap(); @@ -234,9 +288,7 @@ impl BlockState { let pad = join_len - incoming_top.len(); for i in 0..join_len { let inc = if i < pad { AbsValue::Top } else { incoming_top[i - pad] }; - let joined = sets.join(existing[i], inc); - if joined != existing[i] { - existing[i] = joined; + if sets.join_into(&mut existing[i], inc) { changed = true; } } @@ -447,7 +499,7 @@ impl Bytecode<'_> { debug_assert_eq!(snap.len(), operands.len()); for (slot, &operand) in snap.iter_mut().zip(operands) { - *slot = const_sets.join(*slot, operand); + const_sets.join_into(slot, operand); } } @@ -458,7 +510,9 @@ impl Bytecode<'_> { const_sets: &mut ConstSetInterner, ) { match &mut self.snapshots.outputs[inst] { - Some(existing) => *existing = const_sets.join(*existing, value), + Some(existing) => { + const_sets.join_into(existing, value); + } slot @ None => *slot = Some(value), } } @@ -544,11 +598,11 @@ impl Bytecode<'_> { pub(crate) fn block_analysis(&mut self, local_snapshots: &Snapshots) { self.init_snapshots(); let compiler_gas_used = self.compiler_gas_used; - let (mut resolved, mut count) = self.run_abstract_interp(local_snapshots, true); - if resolved.iter().any(|(_, target)| target.is_top()) { + let (mut resolved, mut count, converged) = self.run_abstract_interp(local_snapshots, true); + if converged && resolved.iter().any(|(_, target)| target.is_top()) { self.init_snapshots(); self.compiler_gas_used = compiler_gas_used; - (resolved, count) = self.run_abstract_interp(local_snapshots, false); + (resolved, count, _) = self.run_abstract_interp(local_snapshots, false); } let has_const_condition = @@ -817,7 +871,7 @@ impl Bytecode<'_> { &mut self, local_snapshots: &Snapshots, split_contexts: bool, - ) -> (Vec<(Inst, JumpResolution)>, usize) { + ) -> (Vec<(Inst, JumpResolution)>, usize, bool) { let num_blocks = self.cfg.blocks.len(); // Initialize block states. Entry block starts with an empty stack. @@ -880,7 +934,7 @@ impl Bytecode<'_> { let count = jump_targets.iter().filter(|(_, target)| target.is_resolved()).count(); - (jump_targets, count) + (jump_targets, count, converged) } fn resolve_jump(&self, jump_inst: Inst, const_sets: &ConstSetInterner) -> JumpResolution { From e8b84b9e873155cee9ec20e7022f278bdf7d0cfa Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 13 May 2026 00:01:25 +0200 Subject: [PATCH 17/23] perf: more fast paths --- crates/revmc-codegen/src/bytecode/passes/block_analysis.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index fd8a005ac..ffabfc8d9 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -262,6 +262,10 @@ impl BlockState { incoming: &[AbsValue], sets: &mut ConstSetInterner, ) -> bool { + if existing == incoming { + return false; + } + let new_len = existing.len().max(incoming.len()); let mut changed = false; @@ -498,6 +502,9 @@ impl Bytecode<'_> { } debug_assert_eq!(snap.len(), operands.len()); + if snap.as_slice() == operands { + return; + } for (slot, &operand) in snap.iter_mut().zip(operands) { const_sets.join_into(slot, operand); } From 7aa3de7c0f9a346c4463f1996402507433861e76 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 13 May 2026 00:36:23 +0200 Subject: [PATCH 18/23] test: unify block analysis test module --- crates/revmc-codegen/src/bytecode/passes/block_analysis.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index ffabfc8d9..d745d4bb0 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -2006,12 +2006,6 @@ pub(crate) mod tests { let code = case["pre"][to]["code"].as_str().unwrap().trim_start_matches("0x"); revm_primitives::hex::decode(code).unwrap() } -} - -#[cfg(test)] -mod tests_edge_cases { - use super::{tests::*, *}; - /// Three callers to the same internal function. The return JUMP should /// resolve to Multi with three targets. #[test] From 1b36fa2574d0d2964dbf5f886af17c5dbe992dbf Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 13 May 2026 03:51:58 +0200 Subject: [PATCH 19/23] chore: fix script --- scripts/bench.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/bench.py b/scripts/bench.py index 3b2a7176f..e77f386f8 100755 --- a/scripts/bench.py +++ b/scripts/bench.py @@ -704,9 +704,10 @@ def _analyze(cls, output: str) -> dict[str, int]: fixpt_res = cls._last_match(r"ba:.*newly_resolved=(\d+)", output) ba_unres_matches = re.findall( - r"analyze:ba: .*unresolved dynamic jumps remain n=(\d+)", output + r"analyze:ba(?::[^\s:]*)*: .*unresolved dynamic jumps remain n=(\d+)", + output, ) - ba_ran = "analyze:ba:" in output + ba_ran = re.search(r"analyze:ba(?::[^\s:]*)*:", output) is not None if ba_unres_matches: unresolved = int(ba_unres_matches[-1]) @@ -714,7 +715,8 @@ def _analyze(cls, output: str) -> dict[str, int]: unresolved = 0 else: unresolved = cls._last_match( - r"local_jumps:.*unresolved dynamic jumps remain n=(\d+)", output + r"local_jumps(?::[^\s:]*)*: .*unresolved dynamic jumps remain n=(\d+)", + output, ) total = local_res + fixpt_res + unresolved From 27049300998516b651c95f2e8ca3d786926c55a9 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 13 May 2026 04:59:38 +0200 Subject: [PATCH 20/23] fix: retry unsplit block analysis --- .../src/bytecode/passes/block_analysis.rs | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index d745d4bb0..78030962e 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -606,7 +606,7 @@ impl Bytecode<'_> { self.init_snapshots(); let compiler_gas_used = self.compiler_gas_used; let (mut resolved, mut count, converged) = self.run_abstract_interp(local_snapshots, true); - if converged && resolved.iter().any(|(_, target)| target.is_top()) { + if !converged || resolved.iter().any(|(_, target)| target.is_top()) { self.init_snapshots(); self.compiler_gas_used = compiler_gas_used; (resolved, count, _) = self.run_abstract_interp(local_snapshots, false); @@ -3209,4 +3209,51 @@ pub(crate) mod tests { "return jump should remain dynamic when fixpoint doesn't converge" ); } + + /// If split-context analysis hits its fixed cap, retry the uncapped unsplit + /// analysis before falling back to block-local snapshots. + #[test] + fn split_non_convergence_retries_unsplit_analysis() { + let k = 12; + let b = 40; + let mut lines = Vec::new(); + lines.push("PUSH %call0".to_string()); + lines.push("JUMP".to_string()); + for i in 0..k { + lines.push(format!("call{i}:")); + lines.push("JUMPDEST".to_string()); + lines.push(format!("PUSH %ret{i}")); + lines.push("PUSH %relay0".to_string()); + lines.push("JUMP".to_string()); + + lines.push(format!("ret{i}:")); + lines.push("JUMPDEST".to_string()); + lines.push("POP".to_string()); + if i + 1 < k { + lines.push(format!("PUSH %call{}", i + 1)); + lines.push("JUMP".to_string()); + } else { + lines.push("STOP".to_string()); + } + } + for i in 0..b { + lines.push(format!("relay{i}:")); + lines.push("JUMPDEST".to_string()); + if i + 1 < b { + lines.push(format!("PUSH %relay{}", i + 1)); + } else { + lines.push("PUSH %fn_entry".to_string()); + } + lines.push("JUMP".to_string()); + } + lines.push("fn_entry:".to_string()); + lines.push("JUMPDEST".to_string()); + lines.push("PUSH1 0x42".to_string()); + lines.push("SWAP1".to_string()); + lines.push("JUMP".to_string()); + + let bytecode = analyze_asm(&lines.join("\n")); + + assert!(!bytecode.has_dynamic_jumps, "unsplit retry should resolve the shared return jump"); + } } From 0fe79b718bf9aceb5e1b8b8cfab9ed56ae280f6a Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 13 May 2026 16:46:01 +0200 Subject: [PATCH 21/23] fix: ignore dead jumps in analysis --- .../src/bytecode/passes/block_analysis.rs | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs index 78030962e..64e59cbc7 100644 --- a/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs +++ b/crates/revmc-codegen/src/bytecode/passes/block_analysis.rs @@ -890,7 +890,8 @@ impl Bytecode<'_> { // may become constant only after CFG fixpoint. let mut jump_insts: Vec = Vec::new(); for (i, inst) in self.insts.iter_enumerated() { - if inst.is_jump() + if !inst.is_dead_code() + && inst.is_jump() && (!inst.flags.contains(InstFlags::STATIC_JUMP) || (inst.opcode == op::JUMPI && !inst.has_const_jumpi_condition())) { @@ -1747,6 +1748,36 @@ pub(crate) mod tests { assert_eq!(bytecode.const_output(Inst::from_usize(12)), None); } + #[test] + fn dead_dynamic_jump_does_not_invalidate_snapshots() { + let bytecode = analyze_asm( + " + PUSH %entry ; inst 0 + JUMP ; inst 1 + PUSH0 ; inst 2: dead + JUMP ; inst 3: dead dynamic jump + entry: + JUMPDEST ; inst 4 + PUSH1 0x40 ; inst 5 + PUSH %ret ; inst 6 + PUSH %func ; inst 7 + JUMP ; inst 8 + ret: + JUMPDEST ; inst 9 + ADD ; inst 10: 0x40 + 0x02 = 0x42 + STOP ; inst 11 + func: + JUMPDEST ; inst 12 + PUSH1 0x02 ; inst 13 + SWAP1 ; inst 14 + JUMP ; inst 15 + ", + ); + + assert!(bytecode.inst(Inst::from_usize(3)).is_dead_code()); + assert_eq!(bytecode.const_output(Inst::from_usize(10)), Some(U256::from(0x42))); + } + #[test] fn multi_target_jump() { // Internal function called from two sites with different return addresses. From c3969f0cd82dd1aebfa2df5202b9e3207486e72c Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Wed, 13 May 2026 16:46:03 +0200 Subject: [PATCH 22/23] fix: skip fallthrough join dedup --- .../src/bytecode/passes/dedup.rs | 61 ++++++++++++++++++- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/dedup.rs b/crates/revmc-codegen/src/bytecode/passes/dedup.rs index 48cab2c7b..f9f95c593 100644 --- a/crates/revmc-codegen/src/bytecode/passes/dedup.rs +++ b/crates/revmc-codegen/src/bytecode/passes/dedup.rs @@ -1,7 +1,7 @@ //! Block deduplication pass. //! -//! Identifies structurally identical non-fallthrough blocks (same opcode + immediate sequence) -//! and eliminates duplicates by marking them as dead code and redirecting predecessors to a +//! Identifies structurally identical blocks (same opcode + immediate sequence) that are safe to +//! merge and eliminates duplicates by marking them as dead code and redirecting predecessors to a //! single canonical copy. use super::block_analysis::{Block, BlockData, Snapshots}; @@ -86,6 +86,12 @@ impl<'a> Bytecode<'a> { // JUMPDEST could be reached from an unresolved dynamic JUMP with an // arbitrary stack context. let first = &self.insts[block.insts.start]; + // Do not dedup jumpdest join blocks reached by fallthrough predecessors. The + // predecessor's stack writes may be context-specific and required downstream. + if first.is_jumpdest() && self.has_fallthrough_predecessor(block) { + continue; + } + if self.has_dynamic_jumps && first.is_reachable_jumpdest(true) { continue; } @@ -188,6 +194,13 @@ impl<'a> Bytecode<'a> { deduped } + + fn has_fallthrough_predecessor(&self, block: &BlockData) -> bool { + block.preds.iter().any(|&pred| { + let pred_term = self.cfg.blocks[pred].terminator(); + self.insts[pred_term].can_fall_through() && pred_term + 1 == block.insts.start + }) + } } /// Builds a structural fingerprint of a block from instruction data, without consulting the @@ -388,6 +401,48 @@ mod tests { assert_eq!(bytecode.redirects.len(), 1, "same-target JUMP tails should be deduped"); } + #[test] + fn dedup_skips_fallthrough_joins() { + // Each path materializes a different stack value before falling through to identical + // join blocks. Merging the joins would lose the predecessor-specific stack write. + let bytecode = analyze_asm_with( + " + PUSH0 + CALLDATALOAD + PUSH %case_b + JUMPI + + PUSH0 + PUSH1 0xAA + SWAP1 + POP + join_a: + JUMPDEST + PUSH %done + JUMP + + case_b: + JUMPDEST + PUSH0 + PUSH1 0xBB + SWAP1 + POP + join_b: + JUMPDEST + PUSH %done + JUMP + + done: + JUMPDEST + POP + STOP + ", + AnalysisConfig::DEDUP, + ); + + assert!(bytecode.redirects.is_empty(), "fallthrough join blocks must not be deduped"); + } + #[test] fn dedup_jump_different_targets() { // Two byte-identical non-JUMPDEST JUMP tails with different resolved static targets. @@ -447,7 +502,7 @@ mod tests { bytecode.config = AnalysisConfig::DEDUP; bytecode.analyze().unwrap(); - assert_eq!(bytecode.redirects.len(), 20); + assert_eq!(bytecode.redirects.len(), 19); } fn fixture_entry_code(json: &str) -> Vec { From 5e39ea4a3f055cf7138498e6edd3553011e66645 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Sat, 16 May 2026 16:31:26 +0200 Subject: [PATCH 23/23] test: cover dedup fallthrough constants --- .../src/bytecode/passes/dedup.rs | 67 +++++++++++---- crates/revmc-codegen/src/tests/mod.rs | 81 +++++++++++++++++++ 2 files changed, 131 insertions(+), 17 deletions(-) diff --git a/crates/revmc-codegen/src/bytecode/passes/dedup.rs b/crates/revmc-codegen/src/bytecode/passes/dedup.rs index 1ed40bbc0..82a0abf75 100644 --- a/crates/revmc-codegen/src/bytecode/passes/dedup.rs +++ b/crates/revmc-codegen/src/bytecode/passes/dedup.rs @@ -86,12 +86,6 @@ impl<'a> Bytecode<'a> { // JUMPDEST could be reached from an unresolved dynamic JUMP with an // arbitrary stack context. let first = &self.insts[block.insts.start]; - // Do not dedup jumpdest join blocks reached by fallthrough predecessors. The - // predecessor's stack writes may be context-specific and required downstream. - if first.is_jumpdest() && self.has_fallthrough_predecessor(block) { - continue; - } - if self.has_dynamic_jumps && first.is_reachable_jumpdest(true) { continue; } @@ -194,13 +188,6 @@ impl<'a> Bytecode<'a> { deduped } - - fn has_fallthrough_predecessor(&self, block: &BlockData) -> bool { - block.preds.iter().any(|&pred| { - let pred_term = self.cfg.blocks[pred].terminator(); - self.insts[pred_term].can_fall_through() && pred_term + 1 == block.insts.start - }) - } } /// Builds a structural fingerprint of a block from instruction data, without consulting the @@ -402,9 +389,9 @@ mod tests { } #[test] - fn dedup_skips_fallthrough_joins() { - // Each path materializes a different stack value before falling through to identical - // join blocks. Merging the joins would lose the predecessor-specific stack write. + fn dedup_keeps_pure_fallthrough_materialization() { + // These labels are only reached by fallthrough, so the materialization stays in the + // same block as the join suffix. Dedup must not split out only the identical suffix. let bytecode = analyze_asm_with( " PUSH0 @@ -443,6 +430,52 @@ mod tests { assert!(bytecode.redirects.is_empty(), "fallthrough join blocks must not be deduped"); } + #[test] + fn dedup_redirects_fallthrough_joins_that_are_static_targets() { + let bytecode = analyze_asm_with( + " + CALLVALUE + PUSH 123456789 + SUB + PUSH %join_a + JUMPI + CALLVALUE + PUSH 123456789 + SUB + PUSH %join_b + JUMPI + + CALLDATASIZE + PUSH %case_b + JUMPI + + PUSH 1 + join_a: + JUMPDEST + PUSH %done + JUMP + + case_b: + JUMPDEST + PUSH 2 + join_b: + JUMPDEST + PUSH %done + JUMP + + done: + JUMPDEST + STOP + ", + AnalysisConfig::DEDUP, + ); + + assert_eq!(bytecode.redirects.len(), 1); + let (&redirected, &canonical) = bytecode.redirects.iter().next().unwrap(); + assert!(bytecode.insts[redirected].is_jumpdest()); + assert!(bytecode.insts[canonical].is_jumpdest()); + } + #[test] fn dedup_jump_different_targets() { // Two byte-identical non-JUMPDEST JUMP tails with different resolved static targets. @@ -502,7 +535,7 @@ mod tests { bytecode.config = AnalysisConfig::DEDUP; bytecode.analyze().unwrap(); - assert_eq!(bytecode.redirects.len(), 19); + assert_eq!(bytecode.redirects.len(), 20); } fn fixture_entry_code(json: &str) -> Vec { diff --git a/crates/revmc-codegen/src/tests/mod.rs b/crates/revmc-codegen/src/tests/mod.rs index 3cc8b4098..68c4be6e3 100644 --- a/crates/revmc-codegen/src/tests/mod.rs +++ b/crates/revmc-codegen/src/tests/mod.rs @@ -2153,6 +2153,87 @@ tests! { expected_gas: GAS_WHAT_INTERPRETER_SAYS, }), + dedup_fallthrough_redirect_keeps_case_a_constant(@raw { + bytecode: &asm(" + CALLVALUE + PUSH 123456789 + SUB + PUSH %join_a + JUMPI + CALLVALUE + PUSH 123456789 + SUB + PUSH %join_b + JUMPI + + CALLDATASIZE + ISZERO + PUSH %case_b + JUMPI + + PUSH 1 + join_a: + JUMPDEST + PUSH %done + JUMP + + case_b: + JUMPDEST + PUSH 2 + join_b: + JUMPDEST + PUSH %done + JUMP + + done: + JUMPDEST + STOP + "), + expected_return: InstructionResult::Stop, + expected_stack: &[1_U256], + expected_gas: GAS_WHAT_INTERPRETER_SAYS, + }), + + dedup_fallthrough_redirect_keeps_case_b_constant(@raw { + bytecode: &asm(" + CALLVALUE + PUSH 123456789 + SUB + PUSH %join_a + JUMPI + CALLVALUE + PUSH 123456789 + SUB + PUSH %join_b + JUMPI + + CALLDATASIZE + PUSH %case_b + JUMPI + + PUSH 1 + join_a: + JUMPDEST + PUSH %done + JUMP + + case_b: + JUMPDEST + PUSH 2 + join_b: + JUMPDEST + PUSH %done + JUMP + + done: + JUMPDEST + STOP + "), + expected_return: InstructionResult::Stop, + expected_stack: &[2_U256], + expected_gas: GAS_WHAT_INTERPRETER_SAYS, + }), + // Disabled opcodes must not poison stack sections. // // When a disabled opcode (e.g. TSTORE before Cancun) follows executable instructions