From 1909f01d7e72e0ad31422d58de8ef2dd05c7190d Mon Sep 17 00:00:00 2001 From: Erick Cestari Date: Sun, 3 May 2026 12:11:35 -0300 Subject: [PATCH 1/6] smite-ir: add Operation::has_side_effects predicate `has_side_effects` returns `true` for operations that have I/O side effects (`SendMessage` and `RecvAcceptChannel`) and therefore cannot be dropped by DCE or deduplicated by CSE. Used by both minimizers introduced in the next commit. Also derive `Hash` on `Operation` and `AcceptChannelField` so CSE can key its canonical map on `(operation, canonicalized_inputs)`. --- smite-ir/src/operation.rs | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/smite-ir/src/operation.rs b/smite-ir/src/operation.rs index e1e1c2a..bbd1d70 100644 --- a/smite-ir/src/operation.rs +++ b/smite-ir/src/operation.rs @@ -18,7 +18,7 @@ use super::VariableType; /// An IR operation. Each instruction in a program contains one operation plus /// input variable indices. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Operation { // -- Load: produce a variable from an embedded literal or the context -- /// Load a satoshi or millisatoshi amount. @@ -106,7 +106,7 @@ pub enum Operation { /// Each variant encodes to a script matching one of the formats required by /// BOLT 2 for the upfront shutdown TLV. `Empty` opts out of upfront shutdown /// entirely and is accepted regardless of feature negotiation. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum ShutdownScriptVariant { /// Zero-length script. Opts out of upfront shutdown. Empty, @@ -294,7 +294,7 @@ impl fmt::Display for ShutdownScriptVariant { /// Additionally, the following bits can be added to any channel type: /// - `option_scid_alias` (bit 46) /// - `option_zeroconf` (bit 50) -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum ChannelTypeVariant { /// bit 12 StaticRemoteKey, @@ -437,7 +437,7 @@ impl fmt::Display for ChannelTypeVariant { } /// Fields that can be extracted from an `AcceptChannel` compound variable. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum AcceptChannelField { TemporaryChannelId, DustLimitSatoshis, @@ -640,6 +640,31 @@ impl Operation { } } + /// Returns `true` if this operation has I/O side effects and therefore + /// cannot be dropped by DCE or deduplicated by CSE. + #[must_use] + pub fn has_side_effects(&self) -> bool { + match self { + Self::SendMessage | Self::RecvAcceptChannel | Self::MineBlocks(_) => true, + Self::LoadAmount(_) + | Self::LoadFeeratePerKw(_) + | Self::LoadBlockHeight(_) + | Self::LoadU16(_) + | Self::LoadU8(_) + | Self::LoadBytes(_) + | Self::LoadFeatures(_) + | Self::LoadPrivateKey(_) + | Self::LoadChannelId(_) + | Self::LoadShutdownScript(_) + | Self::LoadChannelType(_) + | Self::LoadTargetPubkeyFromContext + | Self::LoadChainHashFromContext + | Self::DerivePoint + | Self::ExtractAcceptChannel(_) + | Self::BuildOpenChannel => false, + } + } + /// Returns true if this operation has parameters that can be mutated /// by `OperationParamMutator`. #[must_use] From 46193042475227b691d4830cb9021f0b802e21b1 Mon Sep 17 00:00:00 2001 From: Erick Cestari Date: Sun, 3 May 2026 12:11:50 -0300 Subject: [PATCH 2/6] smite-ir: add dead-code and common-subexpression minimizers Introduces the `Minimizer` trait (mirroring the `Mutator` trait shape) and two implementations that shrink an IR program in place: fn minimize(&self, program: &mut Program) -> bool; The bool reports whether the program was modified, so callers can skip an `==` walk over every instruction. - `DeadCodeEliminator` keeps an instruction if it has side effects or is referenced by a later kept instruction. A reverse pass marks liveness; a forward pass consumes the program and rewrites the surviving instructions' inputs to their new indices. - `CommonSubexpressionEliminator` merges instructions that compute the same expression. A single forward pass canonicalizes inputs as it goes and dedupes via a `HashMap` keyed on `(operation, canonicalized_inputs)`. SSA guarantees inputs are already canonicalized by the time we reach each instruction, so the merge is transitive: two compute ops whose inputs collapsed to the same canonical loads are themselves recognized as equivalent. Both transforms are safe in IR semantics (don't change observable behaviour modulo `SendMessage`/`RecvAcceptChannel` side-effects), so they don't take an oracle. --- smite-ir/src/instruction.rs | 2 +- smite-ir/src/lib.rs | 3 + smite-ir/src/minimizers.rs | 24 ++ smite-ir/src/minimizers/cse.rs | 52 +++++ smite-ir/src/minimizers/dead_code.rs | 49 ++++ smite-ir/src/tests.rs | 337 +++++++++++++++++++++++++++ 6 files changed, 466 insertions(+), 1 deletion(-) create mode 100644 smite-ir/src/minimizers.rs create mode 100644 smite-ir/src/minimizers/cse.rs create mode 100644 smite-ir/src/minimizers/dead_code.rs diff --git a/smite-ir/src/instruction.rs b/smite-ir/src/instruction.rs index 8c5b9c3..c4a18af 100644 --- a/smite-ir/src/instruction.rs +++ b/smite-ir/src/instruction.rs @@ -12,7 +12,7 @@ use super::Operation; /// /// In SSA form, each instruction produces at most one variable (at the index /// equal to the instruction's position in the program). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Instruction { /// The operation to perform. pub operation: Operation, diff --git a/smite-ir/src/lib.rs b/smite-ir/src/lib.rs index 72d0874..70992c4 100644 --- a/smite-ir/src/lib.rs +++ b/smite-ir/src/lib.rs @@ -7,6 +7,7 @@ //! //! # Modules //! - [`instruction`] - Single IR instruction (operation + input references). +//! - [`minimizers`] - Shrink a program while preserving interesting behaviour. //! - [`operation`] - Operations that load, compute, build or act. //! - [`program`] - Ordered list of instructions. //! - [`variable`] - Typed runtime values and lightweight type tags. @@ -14,6 +15,7 @@ pub mod builder; pub mod generators; pub mod instruction; +pub mod minimizers; pub mod mutators; pub mod operation; pub mod program; @@ -22,6 +24,7 @@ pub mod variable; pub use builder::ProgramBuilder; pub use generators::Generator; pub use instruction::Instruction; +pub use minimizers::Minimizer; pub use mutators::Mutator; pub use operation::Operation; pub use program::Program; diff --git a/smite-ir/src/minimizers.rs b/smite-ir/src/minimizers.rs new file mode 100644 index 0000000..7cf7cd7 --- /dev/null +++ b/smite-ir/src/minimizers.rs @@ -0,0 +1,24 @@ +//! IR program minimizers. +//! +//! A [`Minimizer`] reduces a [`Program`] to a smaller, behaviourally +//! equivalent version in a single pass. Both transforms are safe in IR +//! semantics, so they don't need an oracle to drive the search. +//! +//! Run them in pipeline order for best results: +//! 1. [`DeadCodeEliminator`] — drop dead instructions and reindex +//! 2. [`CommonSubexpressionEliminator`] — merge equivalent pure expressions + +mod cse; +mod dead_code; + +pub use cse::CommonSubexpressionEliminator; +pub use dead_code::DeadCodeEliminator; + +use super::Program; + +/// A minimizer that reduces an IR program in one call. +pub trait Minimizer { + /// Reduces `program` in place to a smaller, behaviourally equivalent + /// version. Returns `true` if the program was modified. + fn minimize(&self, program: &mut Program) -> bool; +} diff --git a/smite-ir/src/minimizers/cse.rs b/smite-ir/src/minimizers/cse.rs new file mode 100644 index 0000000..e900a6d --- /dev/null +++ b/smite-ir/src/minimizers/cse.rs @@ -0,0 +1,52 @@ +//! Common-subexpression elimination minimizer. + +use std::collections::HashMap; +use std::collections::hash_map::Entry; + +use super::Minimizer; +use crate::{Instruction, Program}; + +/// Merges instructions that compute the same pure expression. +/// +/// Two pure instructions are equivalent when they share the same operation +/// and the same canonicalized inputs. Walking the program in order makes +/// the merge transitive: by the time we reach instruction `i`, SSA +/// guarantees every input it references is already canonicalized, so two +/// compute ops whose inputs collapsed to the same canonical loads are +/// themselves recognized as equivalent. +pub struct CommonSubexpressionEliminator; + +impl Minimizer for CommonSubexpressionEliminator { + fn minimize(&self, program: &mut Program) -> bool { + let n = program.instructions.len(); + let mut canonical: HashMap = HashMap::new(); + let mut new_idx = vec![0usize; n]; + let mut instructions = Vec::with_capacity(n); + + for (i, mut instr) in std::mem::take(&mut program.instructions) + .into_iter() + .enumerate() + { + for input in &mut instr.inputs { + *input = new_idx[*input]; + } + if instr.operation.has_side_effects() { + new_idx[i] = instructions.len(); + instructions.push(instr); + continue; + } + match canonical.entry(instr.clone()) { + Entry::Occupied(e) => new_idx[i] = *e.get(), + Entry::Vacant(e) => { + e.insert(instructions.len()); + new_idx[i] = instructions.len(); + instructions.push(instr); + } + } + } + + let changed = instructions.len() < n; + program.instructions = instructions; + changed + } +} diff --git a/smite-ir/src/minimizers/dead_code.rs b/smite-ir/src/minimizers/dead_code.rs new file mode 100644 index 0000000..be2709f --- /dev/null +++ b/smite-ir/src/minimizers/dead_code.rs @@ -0,0 +1,49 @@ +//! Dead-code elimination minimizer. + +use super::Minimizer; +use crate::Program; + +/// Removes unreferenced instructions and reindexes the remaining inputs. +/// +/// An instruction is removed when (a) its operation has no side effects +/// and (b) no later instruction references its output. The reverse +/// traversal lets a chain of dead instructions collapse, once we drop the +/// user of some load, that load's reference count falls to zero and the +/// load itself becomes eligible. +pub struct DeadCodeEliminator; + +impl Minimizer for DeadCodeEliminator { + fn minimize(&self, program: &mut Program) -> bool { + let n = program.instructions.len(); + let mut keep = vec![false; n]; + for idx in (0..n).rev() { + if !keep[idx] && !program.instructions[idx].operation.has_side_effects() { + continue; + } + keep[idx] = true; + for &input in &program.instructions[idx].inputs { + keep[input] = true; + } + } + + let mut remap = vec![0usize; n]; + let mut instructions = Vec::with_capacity(n); + for (old, mut instr) in std::mem::take(&mut program.instructions) + .into_iter() + .enumerate() + { + if !keep[old] { + continue; + } + for input in &mut instr.inputs { + *input = remap[*input]; + } + remap[old] = instructions.len(); + instructions.push(instr); + } + + let changed = instructions.len() < n; + program.instructions = instructions; + changed + } +} diff --git a/smite-ir/src/tests.rs b/smite-ir/src/tests.rs index 9e907b3..9762c5c 100644 --- a/smite-ir/src/tests.rs +++ b/smite-ir/src/tests.rs @@ -7,6 +7,7 @@ use smite::bolt::MAX_MESSAGE_SIZE; use super::*; use generators::OpenChannelGenerator; +use minimizers::{CommonSubexpressionEliminator, DeadCodeEliminator, Minimizer}; use mutators::{InputSwapMutator, OperationParamMutator}; use operation::{AcceptChannelField, ChannelTypeVariant, ShutdownScriptVariant}; use program::ValidateError; @@ -1164,3 +1165,339 @@ fn input_swap_preserves_types() { } } } + +// -- DeadCodeEliminator tests -- + +#[test] +fn dead_code_removes_dead_instructions() { + // All three LoadAmount instructions are unreferenced; all three are dropped. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadAmount(1), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadAmount(2), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadAmount(3), + inputs: vec![], + }, + ], + }; + assert!(DeadCodeEliminator.minimize(&mut program)); + assert!( + program.instructions.is_empty(), + "all dead instructions should be removed" + ); + program.validate().expect("trimmed program should validate"); +} + +#[test] +fn dead_code_returns_false_on_empty_program() { + let mut program = Program { + instructions: vec![], + }; + assert!(!DeadCodeEliminator.minimize(&mut program)); + assert!(program.instructions.is_empty()); +} + +/// Build a program with a dead load appended after the generated program. +/// This gives the `DeadCodeEliminator` at least one candidate to try. +fn program_with_dead_load() -> Program { + let mut p = generate_program(0); + p.instructions.push(Instruction { + operation: Operation::LoadAmount(42), + inputs: vec![], + }); + p +} + +#[test] +fn dead_code_keeps_send_message() { + let mut program = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut program); + let has_send = program + .instructions + .iter() + .any(|i| matches!(i.operation, Operation::SendMessage)); + assert!(has_send, "DeadCodeEliminator must not remove SendMessage"); +} + +#[test] +fn dead_code_keeps_recv_accept_channel() { + let mut program = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut program); + let has_recv = program + .instructions + .iter() + .any(|i| matches!(i.operation, Operation::RecvAcceptChannel)); + assert!( + has_recv, + "DeadCodeEliminator must not remove RecvAcceptChannel" + ); +} + +#[test] +fn dead_code_result_validates() { + let mut program = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut program); + program.validate().expect("final program should validate"); +} + +#[test] +fn dead_code_reindexes_remaining_inputs() { + // Indexes 0 and 1 are dead loads; 2 is a referenced load; 3 references 2. + // After dropping 0 and 1, the surviving load shifts to index 0 and the + // DerivePoint must be rewritten to reference it. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadAmount(1), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadAmount(2), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadPrivateKey(key(1)), + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![2], + }, + ], + }; + assert!(DeadCodeEliminator.minimize(&mut program)); + assert_eq!(program.instructions.len(), 2); + assert!(matches!( + program.instructions[0].operation, + Operation::LoadPrivateKey(_) + )); + assert!(matches!( + program.instructions[1].operation, + Operation::SendMessage + )); + assert_eq!(program.instructions[1].inputs, vec![0]); +} + +#[test] +fn dead_code_chains_collapse() { + // Two chains share a root LoadPrivateKey. One DerivePoint feeds an + // impure SendMessage (alive); the other is unreferenced (dead). DCE + // drops the dead DerivePoint, but the shared root must survive because + // the alive chain still references it. + // + // Note: this program is type-invalid (SendMessage expects Message, not + // Point), but the minimizer doesn't typecheck so it's fine for the test. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(1)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, // alive + inputs: vec![0], + }, + Instruction { + operation: Operation::DerivePoint, // dead + inputs: vec![0], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![1], + }, + ], + }; + let expected = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(1)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![0], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![1], + }, + ], + }; + assert!(DeadCodeEliminator.minimize(&mut program)); + assert_eq!(program, expected); +} + +#[test] +fn dead_code_idempotent() { + let mut once = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut once); + let mut twice = once.clone(); + assert!( + !DeadCodeEliminator.minimize(&mut twice), + "second pass must report unchanged" + ); + assert_eq!(once, twice, "elimination is idempotent"); +} + +// -- CommonSubexpressionEliminator tests -- + +#[test] +fn cse_returns_false_on_empty_program() { + let mut program = Program { + instructions: vec![], + }; + assert!(!CommonSubexpressionEliminator.minimize(&mut program)); + assert!(program.instructions.is_empty()); +} + +#[test] +fn cse_rewires_references() { + // A downstream DerivePoint consumes the duplicate load. After CSE, its + // input must be rewired from the dropped duplicate (index 1) to the + // surviving canonical load (index 0). + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(7)), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadPrivateKey(key(7)), // duplicate of index 0 + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![1], // must be rewired to 0 + }, + ], + }; + let expected = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(7)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![0], + }, + ], + }; + assert!(CommonSubexpressionEliminator.minimize(&mut program)); + assert_eq!(program, expected); + program.validate().expect("program should still validate"); +} + +#[test] +fn cse_result_validates() { + let mut program = generate_program(0); + CommonSubexpressionEliminator.minimize(&mut program); + program.validate().expect("merged program should validate"); +} + +#[test] +fn cse_idempotent() { + let mut once = generate_program(0); + CommonSubexpressionEliminator.minimize(&mut once); + let mut twice = once.clone(); + assert!( + !CommonSubexpressionEliminator.minimize(&mut twice), + "second pass must report unchanged" + ); + assert_eq!(once, twice, "merging is idempotent"); +} + +#[test] +fn cse_merges_compute_ops_through_canonicalized_inputs() { + // Two LoadPrivateKey duplicates feed two DerivePoint instructions. + // CSE first merges the loads, which canonicalizes the DerivePoint + // inputs to the same index, which in turn lets CSE merge the + // DerivePoints themselves. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(7)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![0], + }, + Instruction { + operation: Operation::LoadPrivateKey(key(7)), // duplicate of 0 + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![2], // canonicalizes to 0 -> matches index 1 + }, + ], + }; + program.validate().expect("input program should validate"); + assert!(CommonSubexpressionEliminator.minimize(&mut program)); + assert_eq!(program.instructions.len(), 2); + assert!(matches!( + program.instructions[0].operation, + Operation::LoadPrivateKey(_) + )); + assert!(matches!( + program.instructions[1].operation, + Operation::DerivePoint + )); + assert_eq!(program.instructions[1].inputs, vec![0]); +} + +#[test] +fn cse_does_not_merge_send_message() { + // SendMessage is not pure (network side-effect): two with the same + // input must both survive. The duplicate LoadBytes upstream should + // be merged, and both SendMessages remapped to the surviving load. + // + // Note: this program is type-invalid (SendMessage expects Message, not + // Bytes), but the minimizer doesn't typecheck so it's fine for the test. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadBytes(vec![0xab]), + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![0], + }, + Instruction { + operation: Operation::LoadBytes(vec![0xab]), // duplicate of 0 + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![2], // canonicalizes to 0 + }, + ], + }; + let expected = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadBytes(vec![0xab]), + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![0], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![0], + }, + ], + }; + assert!(CommonSubexpressionEliminator.minimize(&mut program)); + assert_eq!(program, expected, "SendMessage must not be deduplicated"); +} From f8d97a177e895b41afaa94f73f6bb9b932a713f9 Mon Sep 17 00:00:00 2001 From: Erick Cestari Date: Sun, 3 May 2026 13:02:25 -0300 Subject: [PATCH 3/6] smite-ir-mutator: implement afl_custom_trim with minimizer pipeline Wires the `DeadCodeEliminator` and `CommonSubexpressionEliminator` minimizers into AFL++'s custom-mutator trim ABI as a single composed pass. Both are deterministic in-process transforms safe in IR semantics, so we run them once during `afl_custom_init_trim`, serialize the result into `out_buf`, and offer it to AFL as a single candidate. `afl_custom_init_trim` returns `1` if either minimizer reports a change (or `0` if both no-op'd; AFL skips trim entirely). `afl_custom_trim` hands back the pre-serialized buffer. `afl_custom_post_trim` returns `1` unconditionally to terminate AFL's `while (stage_cur < stage_max)` loop after the single iteration. AFL itself decides whether to persist the trimmed bytes based on its coverage-cksum check; we don't need to track partial state across iterations because there's only one. --- README.md | 4 +- smite-ir-mutator/src/lib.rs | 217 +++++++++++++++++++++++++++++++++++- 2 files changed, 216 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 89bcbd5..a0a056b 100644 --- a/README.md +++ b/README.md @@ -65,13 +65,11 @@ printf '\x00' > /tmp/smite-seeds/empty # Start fuzzing with the custom mutator AFL_CUSTOM_MUTATOR_LIBRARY=target/release/libsmite_ir_mutator.so \ AFL_CUSTOM_MUTATOR_ONLY=1 \ -AFL_DISABLE_TRIM=1 \ ~/AFLplusplus/afl-fuzz -X -i /tmp/smite-seeds -o /tmp/smite-out -- /tmp/smite-nyx ``` `AFL_CUSTOM_MUTATOR_ONLY=1` disables AFL++'s built-in mutators (which would -corrupt the postcard encoding). `AFL_DISABLE_TRIM=1` prevents AFL++ from -trimming inputs (which would also corrupt the encoding). +corrupt the postcard encoding). ## Running Modes diff --git a/smite-ir-mutator/src/lib.rs b/smite-ir-mutator/src/lib.rs index 0903ee2..f258f62 100644 --- a/smite-ir-mutator/src/lib.rs +++ b/smite-ir-mutator/src/lib.rs @@ -13,8 +13,6 @@ //! - `AFL_CUSTOM_MUTATOR_ONLY=1` -- disable AFL++'s byte mutators. This also //! disables the havoc stage entirely, so we deliberately do not implement //! `afl_custom_havoc_mutation`. -//! - `AFL_DISABLE_TRIM=1` -- this library does not implement custom trim and -//! AFL++'s default byte-level trim would corrupt our structured programs. //! //! # Buffer ownership //! @@ -30,6 +28,7 @@ use rand::rngs::SmallRng; use rand::{RngExt, SeedableRng}; use smite_ir::generators::OpenChannelGenerator; +use smite_ir::minimizers::{CommonSubexpressionEliminator, DeadCodeEliminator, Minimizer}; use smite_ir::mutators::{InputSwapMutator, OperationParamMutator}; use smite_ir::{Generator, Mutator, Program, ProgramBuilder}; @@ -190,6 +189,105 @@ pub unsafe extern "C" fn afl_custom_fuzz( len } +/// Runs the full minimizer pipeline (`DeadCodeEliminator` then +/// `CommonSubexpressionEliminator`) on the corpus entry and stages the +/// resulting candidate for [`afl_custom_trim`] to hand back. +/// +/// Both minimizers are deterministic in-process transforms safe in IR +/// semantics, so we don't need iterative AFL feedback. We compose them +/// once and offer a single candidate. AFL still gets to verify it (its +/// coverage cksum is the source of truth); on rejection AFL silently +/// discards the candidate and keeps the original corpus entry. +/// +/// AFL drives the trim loop with `while (stage_cur < stage_max)`, where +/// `stage_max` is this function's return value and `stage_cur` is updated +/// from [`afl_custom_post_trim`]'s return. +/// +/// # Returns +/// +/// - `1` if there's a candidate to offer (decode succeeded, validate +/// passed, and the trim actually shrank the program). AFL enters the +/// trim loop for one iteration. +/// - `0` if there's nothing to do (decode/validate failed, or the trim +/// was a no-op). AFL skips trim entirely. +/// - Negative would signal a fatal error to AFL; we never produce one. +/// +/// # Safety +/// +/// - `data` must be a pointer returned by [`afl_custom_init`]. +/// - `buf` must point to `buf_size` readable bytes. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn afl_custom_init_trim( + data: *mut c_void, + buf: *mut u8, + buf_size: usize, +) -> i32 { + let state = unsafe { &mut *data.cast::() }; + + let input = unsafe { slice::from_raw_parts(buf, buf_size) }; + let Some(program) = decode_and_validate(input) else { + return 0; + }; + + let mut trimmed = program; + let dce_changed = DeadCodeEliminator.minimize(&mut trimmed); + let cse_changed = CommonSubexpressionEliminator.minimize(&mut trimmed); + if (!dce_changed && !cse_changed) || !state.serialize(&trimmed, buf_size) { + return 0; + } + + 1 +} + +/// Hands the pre-serialized trimmed candidate back to AFL. +/// +/// The pointer written into `*out_buf` borrows from `MutatorState::out_buf` +/// and is valid until the next call into this library; AFL copies the +/// bytes before re-entering us. We always write a non-null pointer (even +/// on the zero-length path) to satisfy AFL's `if (unlikely(!retbuf)) +/// FATAL(...)` check. +/// +/// # Returns +/// +/// - `> 0` on the first call after [`afl_custom_init_trim`]: the byte +/// length of the candidate at `*out_buf`. +/// - `0` afterwards. AFL treats this as "skip this iteration" rather than +/// a stop signal; the loop terminates via [`afl_custom_post_trim`]'s +/// return. +/// +/// # Safety +/// +/// - `data` must be a pointer returned by [`afl_custom_init`]. +/// - `out_buf` must be a valid, writable pointer to a `*const u8` slot. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn afl_custom_trim(data: *mut c_void, out_buf: *mut *const u8) -> usize { + let state = unsafe { &mut *data.cast::() }; + unsafe { *out_buf = state.out_buf.as_ptr() }; + state.out_buf.len() +} + +/// Always returns `1` to terminate AFL's trim loop after a single +/// iteration. +/// +/// AFL drives trim with `while (stage_cur < stage_max)` and assigns +/// `stage_cur` from this function's return value. With `stage_max = 1` +/// (set by [`afl_custom_init_trim`]), returning `1` makes the condition +/// `1 < 1` false and breaks the loop. +/// +/// `success` indicates whether the candidate's coverage cksum matched the +/// original. We don't need to act on it: AFL itself either persists the +/// trimmed buffer (on success) or keeps the original corpus entry (on +/// failure), and we don't track partial state across iterations because +/// there's only one. +/// +/// # Safety +/// +/// - `data` must be a pointer returned by [`afl_custom_init`]. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn afl_custom_post_trim(_data: *mut c_void, _success: u8) -> i32 { + 1 +} + /// Marker symbol that tells AFL++ not to populate `add_buf` for /// [`afl_custom_fuzz`]. AFL++ never actually calls this function -- it only /// checks for the symbol's presence via `dlsym` and, if found, skips picking a @@ -300,6 +398,19 @@ mod tests { postcard::to_allocvec(&builder.build()).expect("postcard serialization") } + /// `seed_program_bytes()` plus an unreferenced `LoadAmount`, so the + /// pipeline has something for DCE to drop (and thus `init_trim` + /// returns `1`). + fn reducible_seed_bytes() -> Vec { + let bytes = seed_program_bytes(); + let mut program: Program = postcard::from_bytes(&bytes).expect("decode"); + program.instructions.push(smite_ir::Instruction { + operation: smite_ir::Operation::LoadAmount(0xdead_beef), + inputs: vec![], + }); + postcard::to_allocvec(&program).expect("encode") + } + #[test] fn init_returns_nonnull() { let state = State::new(0); @@ -397,4 +508,106 @@ mod tests { // crash either. unsafe { afl_custom_splice_optout(ptr::null_mut()) }; } + + // -- Trim tests -- + + fn init_trim_via_ffi(state: &State, mut input: Vec) -> i32 { + unsafe { afl_custom_init_trim(state.0, input.as_mut_ptr(), input.len()) } + } + + fn trim_via_ffi(state: &State) -> (*const u8, usize) { + let mut out: *const u8 = ptr::null(); + let len = unsafe { afl_custom_trim(state.0, &raw mut out) }; + (out, len) + } + + fn post_trim_via_ffi(state: &State, success: bool) -> i32 { + unsafe { afl_custom_post_trim(state.0, u8::from(success)) } + } + + #[test] + fn trim_init_returns_1_when_reduction_possible() { + let state = State::new(0); + let rv = init_trim_via_ffi(&state, reducible_seed_bytes()); + assert_eq!(rv, 1); + } + + #[test] + fn trim_init_returns_0_when_no_reduction_possible() { + // Generator output has no dead code or duplicate loads; the + // pipeline is a no-op, so we tell AFL to skip trim entirely. + let state = State::new(0); + let rv = init_trim_via_ffi(&state, seed_program_bytes()); + assert_eq!(rv, 0); + } + + #[test] + fn trim_init_returns_0_for_garbage() { + let state = State::new(0); + let rv = init_trim_via_ffi(&state, vec![0xFF; 16]); + assert_eq!(rv, 0); + } + + #[test] + fn trim_yields_candidate_after_init() { + let state = State::new(0); + init_trim_via_ffi(&state, reducible_seed_bytes()); + let (out, len) = trim_via_ffi(&state); + assert!(len > 0); + decode_and_validate(out, len); + } + + #[test] + fn trim_post_trim_returns_1_to_terminate_loop() { + let state = State::new(0); + init_trim_via_ffi(&state, reducible_seed_bytes()); + let _ = trim_via_ffi(&state); + // post_trim returns 1 unconditionally — it's the load-bearing + // termination signal that pushes AFL's `stage_cur` to `stage_max`. + assert_eq!(post_trim_via_ffi(&state, true), 1); + assert_eq!(post_trim_via_ffi(&state, false), 1); + } + + #[test] + fn trim_init_does_not_overwrite_sequence() { + // Trim is not a mutation; `last_sequence` (used by `describe` to + // name queue entries from fuzz) must survive both the no-op and + // successful trim paths. + for (label, input, expected_rv) in [ + ("no-op", seed_program_bytes(), 0), + ("success", reducible_seed_bytes(), 1), + ] { + let state = State::new(0); + // Run a fuzz call so last_sequence has known contents. + let _ = fuzz_via_ffi(&state, Vec::new(), 1 << 16); + let before = unsafe { CStr::from_ptr(afl_custom_describe(state.0, 256)) } + .to_str() + .expect("valid utf-8") + .to_string(); + let rv = init_trim_via_ffi(&state, input); + assert_eq!(rv, expected_rv, "{label}"); + let after = unsafe { CStr::from_ptr(afl_custom_describe(state.0, 256)) } + .to_str() + .expect("valid utf-8") + .to_string(); + assert_eq!(before, after, "{label}"); + } + } + + #[test] + fn trim_candidate_is_smaller_than_input() { + let original_bytes = reducible_seed_bytes(); + let original_program: Program = postcard::from_bytes(&original_bytes).expect("decode"); + + let state = State::new(0); + init_trim_via_ffi(&state, original_bytes); + + let (out, len) = trim_via_ffi(&state); + assert!(len > 0, "trim should yield a candidate"); + let trimmed = decode_and_validate(out, len); + assert!( + trimmed.instructions.len() < original_program.instructions.len(), + "trim should shrink instruction count" + ); + } } From c8dc8f2fc3525dc3e265f30404a3cb9e37ce4ae0 Mon Sep 17 00:00:00 2001 From: Erick Cestari Date: Thu, 14 May 2026 09:23:17 -0300 Subject: [PATCH 4/6] smite-ir-e2e-test: add AFL harness with DCE/CSE-stable coverage Minimal AFL++ harness binary that decodes a postcard-encoded `Program`, validates it, and publishes coverage manually to `__afl_area_ptr`. The e2e test for the custom mutator's trim pipeline drives `afl-fuzz` against this binary. The bitmap must be bit-identical across DCE/CSE-trimmed variants of the same program (so AFL's trim cksum accepts shrunk candidates) yet vary under our mutators (so AFL queues new entries). Any compiler- inserted edge whose hit count tracks `program.instructions.len()` fails the first half: DCE/CSE move the count across AFL's hit-count buckets and the cksum mismatches. `postcard::from_bytes` and `Program::validate` both contain such loops, and rustc doesn't expose a SanitizerCoverage allowlist to exclude them. So the harness is built with `RUSTFLAGS=-Cllvm-args=-sanitizer-coverage-level=0` and publishes coverage manually: for each instruction reachable (via `inputs`) from a side-effect root (`SendMessage`, `RecvAcceptChannel`), mark a slot derived from a content hash of `(operation, hashes of inputs)`. Because the hash folds input content (not indices), DCE renumbering doesn't change it; CSE merges duplicates whose hashes were already equal; `OperationParamMutator` shifts an operation's hash; `InputSwapMutator` rewires an edge and shifts the consumer's hash. This also encodes a broader smite design principle: coverage is driven only by side-effecting work. Pure setup instructions that never feed a Send/Recv produce zero coverage and AFL never queues them. The fuzzing signal lines up with the minimizer's notion of "useful work", the same reachability DCE uses, so trimming can't change coverage. The crate is workspace-excluded so AFL's link-arg insertions don't leak into the rest of the workspace. We deliberately don't use the `afl` crate: its `fuzz!` macro forces persistent + shmem delivery, which hangs during AFL's calibration when SanitizerCoverage is off. The harness calls `__afl_manual_init` and reads stdin instead. --- Cargo.toml | 2 +- smite-ir-e2e-test/Cargo.lock | 502 ++++++++++++++++++++++++++++++++++ smite-ir-e2e-test/Cargo.toml | 13 + smite-ir-e2e-test/src/main.rs | 116 ++++++++ 4 files changed, 632 insertions(+), 1 deletion(-) create mode 100644 smite-ir-e2e-test/Cargo.lock create mode 100644 smite-ir-e2e-test/Cargo.toml create mode 100644 smite-ir-e2e-test/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 3c01e0b..a309c07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ "smite-nyx-sys", "smite-scenarios", ] -exclude = ["workloads/ldk"] +exclude = ["workloads/ldk", "smite-ir-e2e-test"] [workspace.package] version = "0.0.0" diff --git a/smite-ir-e2e-test/Cargo.lock b/smite-ir-e2e-test/Cargo.lock new file mode 100644 index 0000000..061adc2 --- /dev/null +++ b/smite-ir-e2e-test/Cargo.lock @@ -0,0 +1,502 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "base58ck" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c8d66485a3a2ea485c1913c4572ce0256067a5377ac8c75c4960e1cda98605f" +dependencies = [ + "bitcoin-internals", + "bitcoin_hashes", +] + +[[package]] +name = "bech32" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32637268377fc7b10a8c6d51de3e7fba1ce5dd371a96e342b34e6078db558e7f" + +[[package]] +name = "bitcoin" +version = "0.32.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf93e61f2dbc3e3c41234ca26a65e2c0b0975c52e0f069ab9893ebbede584d3" +dependencies = [ + "base58ck", + "bech32", + "bitcoin-internals", + "bitcoin-io", + "bitcoin-units", + "bitcoin_hashes", + "hex-conservative", + "hex_lit", + "secp256k1", +] + +[[package]] +name = "bitcoin-internals" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30bdbe14aa07b06e6cfeffc529a1f099e5fbe249524f8125358604df99a4bed2" + +[[package]] +name = "bitcoin-io" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dee39a0ee5b4095224a0cfc6bf4cc1baf0f9624b96b367e53b66d974e51d953" + +[[package]] +name = "bitcoin-units" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346568ebaab2918487cea76dd55dae13c27bb618cdb737c952e69eb2017c4118" +dependencies = [ + "bitcoin-internals", +] + +[[package]] +name = "bitcoin_hashes" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26ec84b80c482df901772e931a9a681e26a1b9ee2302edeff23cb30328745c8b" +dependencies = [ + "bitcoin-io", + "hex-conservative", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cc" +version = "1.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hex-conservative" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda06d18ac606267c40c04e41b9947729bf8b9efe74bd4e82b61a5f26a510b9f" +dependencies = [ + "arrayvec", +] + +[[package]] +name = "hex_lit" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3011d1213f159867b13cfd6ac92d2cd5f1345762c63be3554e84092d85a50bbd" + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + +[[package]] +name = "secp256k1" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9465315bc9d4566e1724f0fffcbcc446268cb522e60f9a27bcded6b19c108113" +dependencies = [ + "bitcoin_hashes", + "secp256k1-sys", +] + +[[package]] +name = "secp256k1-sys" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4387882333d3aa8cb20530a17c69a3752e97837832f34f6dccc760e715001d9" +dependencies = [ + "cc", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simple_logger" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7038d0e96661bf9ce647e1a6f6ef6d6f3663f66d9bf741abf14ba4876071c17" +dependencies = [ + "log", + "windows-sys", +] + +[[package]] +name = "smite" +version = "0.0.0" +dependencies = [ + "bitcoin", + "chacha20poly1305", + "hex", + "log", + "nix", + "simple_logger", + "thiserror", +] + +[[package]] +name = "smite-ir" +version = "0.0.0" +dependencies = [ + "bitcoin", + "postcard", + "rand", + "serde", + "smite", + "thiserror", +] + +[[package]] +name = "smite-ir-e2e-test" +version = "0.0.0" +dependencies = [ + "postcard", + "smite-ir", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" diff --git a/smite-ir-e2e-test/Cargo.toml b/smite-ir-e2e-test/Cargo.toml new file mode 100644 index 0000000..c89c835 --- /dev/null +++ b/smite-ir-e2e-test/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "smite-ir-e2e-test" +version = "0.0.0" +edition = "2024" +license = "MIT" + +[[bin]] +name = "smite-ir-e2e-test" +path = "src/main.rs" + +[dependencies] +smite-ir = { path = "../smite-ir" } +postcard = { version = "1.1", default-features = false, features = ["alloc"] } diff --git a/smite-ir-e2e-test/src/main.rs b/smite-ir-e2e-test/src/main.rs new file mode 100644 index 0000000..1bfbd1e --- /dev/null +++ b/smite-ir-e2e-test/src/main.rs @@ -0,0 +1,116 @@ +//! Minimal AFL++ harness for the smite IR custom-mutator e2e test. +//! +//! ## Design principle: coverage comes only from side-effecting work +//! +//! Smite only cares about what the IR program *does* against the +//! target -- bytes it sends, responses it receives. Pure setup +//! instructions (load a literal, derive a point, extract a field) +//! are means to that end and aren't fuzzing signal by themselves. +//! This is true for any smite workload, not just this e2e test: the +//! harness emits coverage feedback only for instructions +//! transitively feeding a side-effect root. +//! Programs that load and compute but never act produce *zero* +//! coverage and AFL never queues them. +//! +//! ## Why we hand-roll the bitmap +//! +//! The bitmap must be *bit-identical* across DCE/CSE-trimmed variants +//! of the same program (so AFL's trim cksum accepts the shrunk +//! candidate) yet *vary* under our mutators (so AFL queues new +//! entries). Any compiler-inserted edge whose hit count tracks +//! `program.instructions.len()` fails the first half: DCE/CSE move +//! the count across AFL's hit-count buckets and the cksum mismatches. +//! `postcard::from_bytes` and `Program::validate` both contain such +//! loops, and rustc doesn't expose a SanitizerCoverage allowlist to +//! exclude them. +//! +//! So we disable SanitizerCoverage entirely (build with +//! `RUSTFLAGS=-Cllvm-args=-sanitizer-coverage-level=0`) and publish +//! coverage manually. The signal: for each instruction reachable from +//! a side-effect root, mark a slot derived from a content hash of +//! `(operation, hashes of inputs)`. Because the hash folds *input +//! content* (not indices), DCE renumbering doesn't change it; CSE +//! merges duplicates whose hashes were already equal; +//! `OperationParamMutator` shifts an operation's hash (and its +//! consumers'); `InputSwapMutator` rewires an edge and shifts the +//! consumer's hash. +//! +//! We don't use `afl::fuzz!`: it forces persistent + shmem delivery, +//! which hangs during calibration when SanitizerCoverage is off. We +//! call `__afl_manual_init` and read each test case from stdin. + +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::io::Read; + +use smite_ir::{Operation, Program}; + +unsafe extern "C" { + static __afl_area_ptr: *mut u8; + fn __afl_manual_init(); +} + +/// Overrides afl-compiler-rt's weak symbol to keep test cases on +/// stdin instead of shared memory (see module docs). +#[unsafe(no_mangle)] +pub static mut __afl_sharedmem_fuzzing: i32 = 0; + +/// Matches `AFL_MAP_SIZE=65536` set by the test driver. +const MAP_MASK: u32 = (1 << 16) - 1; + +fn main() { + unsafe { __afl_manual_init() }; + + let mut data = Vec::new(); + if std::io::stdin().lock().read_to_end(&mut data).is_err() { + return; + } + let Ok(program) = postcard::from_bytes::(&data) else { + return; + }; + if program.validate().is_err() { + return; + } + + // Content hash per instruction. SSA order means an instruction's + // inputs are already hashed by the time we reach it, so one + // forward pass is enough -- no recursion or memoization needed. + let n = program.instructions.len(); + let mut hashes = vec![0u64; n]; + for (i, instr) in program.instructions.iter().enumerate() { + let mut h = DefaultHasher::new(); + instr.operation.hash(&mut h); + for &inp in &instr.inputs { + if inp < i { + hashes[inp].hash(&mut h); + } + } + hashes[i] = h.finish(); + } + + // Mark slots for instructions reachable from side-effect roots. + // Pure instructions that never feed a SendMessage/RecvAcceptChannel + // contribute no coverage. Walk in reverse so a marked instruction + // propagates to its (earlier) inputs in one pass. + let mut reachable = vec![false; n]; + let ptr = unsafe { __afl_area_ptr }; + for i in (0..n).rev() { + let instr = &program.instructions[i]; + let is_root = matches!( + instr.operation, + Operation::SendMessage | Operation::RecvAcceptChannel + ); + if !(is_root || reachable[i]) { + continue; + } + reachable[i] = true; + for &inp in &instr.inputs { + if inp < n { + reachable[inp] = true; + } + } + if !ptr.is_null() { + let slot = (hashes[i] as u32) & MAP_MASK; + unsafe { *ptr.add(slot as usize) = 1 }; + } + } +} From 2b50e7622846a9a9f0747b00bd68950540ea5c35 Mon Sep 17 00:00:00 2001 From: Erick Cestari Date: Thu, 14 May 2026 09:23:36 -0300 Subject: [PATCH 5/6] smite-ir-mutator: add e2e test for AFL custom mutator hooks Drives the real `afl-fuzz` binary against the `smite-ir-e2e-test` harness with our cdylib loaded as `AFL_CUSTOM_MUTATOR_LIBRARY`, then asserts every hook we export is actually used in a real fuzzing run. All signals come from AFL's own `AFL_DEBUG=1` output, so the cdylib stays instrumentation-free. Five signals checked: 1. `Found 'afl_custom_'` lines at startup for all six hooks we export (init/fuzz/deinit bundled as `afl_custom_mutator`, plus describe, init_trim, trim, post_trim, splice_optout). 2. Queue filenames carry `smite-ir:` from `afl_custom_describe`. Both branches of `mutate_stacked` must surface: `fresh` (regenerate) and one of `op-param` / `input-swap` (stacked mutation). 3. `[Custom Trimming] START` lines confirm `afl_custom_init_trim` is invoked. 4. `START: Max 1` confirms the DCE+CSE pipeline shrank at least one input. The seed corpus mixes a DCE-reducible program (dead `LoadAmount` appended) and a CSE-reducible one (duplicate `LoadPrivateKey` injected mid-program) so both minimizer paths can fire. 5. `[Custom Trimming] SUCCESS` confirms AFL persisted at least one trimmed candidate, i.e. the trimmed bytes' coverage cksum matched the original. Verifies DCE+CSE preserve coverage end-to-end -- relies on the harness's DCE/CSE-invariant signal. The harness is built with `RUSTFLAGS=-Cllvm-args=-sanitizer-coverage-level=0` from this test (cargo-afl appends user RUSTFLAGS to its own, and LLVM honors the last `-Cllvm-args=` seen). `AFL_MAP_SIZE` + `AFL_SKIP_BIN_CHECK` are set because sancov is off so `__afl_final_loc` is 0 and AFL wouldn't otherwise know the binary is fuzzable. Marked `#[ignore]` so `cargo test` skips it by default; spawns afl-fuzz for ~30s. Skips cleanly if `cargo-afl` isn't on `PATH`. Working files land in `/tmp/smite-e2e/` so they survive a panic for post-mortem. --- .../tests/afl_custom_mutator_e2e.rs | 253 ++++++++++++++++++ 1 file changed, 253 insertions(+) create mode 100644 smite-ir-mutator/tests/afl_custom_mutator_e2e.rs diff --git a/smite-ir-mutator/tests/afl_custom_mutator_e2e.rs b/smite-ir-mutator/tests/afl_custom_mutator_e2e.rs new file mode 100644 index 0000000..0785117 --- /dev/null +++ b/smite-ir-mutator/tests/afl_custom_mutator_e2e.rs @@ -0,0 +1,253 @@ +//! End-to-end test for the smite IR custom mutator. Drives the real +//! `afl-fuzz` binary against our harness with the cdylib loaded as +//! `AFL_CUSTOM_MUTATOR_LIBRARY`, and asserts every hook we export is +//! actually used in a real fuzzing run. +//! +//! Marked `#[ignore]`; run with: +//! +//! ``` +//! cargo test -p smite-ir-mutator --test afl_custom_mutator_e2e -- \ +//! --ignored --nocapture +//! ``` +//! +//! Skips cleanly if `cargo-afl` isn't on `PATH`. Working files (seeds, +//! queue, AFL stdout/stderr) live in `/tmp/smite-e2e/` so they survive +//! a panic for post-mortem. +//! +//! ## Signals (all from AFL's own output with `AFL_DEBUG=1`) +//! +//! 1. **Hooks resolved.** AFL prints `Found 'afl_custom_'` per +//! `dlsym` hit at startup; we assert all six. +//! 2. **fuzz + describe produced queue entries.** Queue filenames carry +//! `smite-ir:` from `afl_custom_describe`. We require +//! both branches of `mutate_stacked`: `fresh` and one of +//! `op-param` / `input-swap`. +//! 3. **Trim was invoked** (`[Custom Trimming] START`). +//! 4. **Trim produced a smaller candidate** (`START: Max 1`). The +//! seed corpus mixes one DCE-reducible program (dead `LoadAmount` +//! appended) and one CSE-reducible program (duplicate +//! `LoadPrivateKey` injected) so both minimizers can fire. +//! 5. **AFL accepted a trimmed candidate** (`[Custom Trimming] +//! SUCCESS`). Only emitted when the trimmed bytes' coverage cksum +//! matches the original. Verifies DCE+CSE preserve coverage +//! end-to-end -- relies on the harness publishing a DCE/CSE-invariant +//! signal, see `smite-ir-e2e-test/src/main.rs`. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use rand::SeedableRng; +use rand::rngs::SmallRng; +use smite_ir::generators::OpenChannelGenerator; +use smite_ir::{Generator, Instruction, Operation, Program, ProgramBuilder}; + +const AFL_RUN_SECONDS: u64 = 30; + +/// `true` when `bin` isn't on `PATH`. +fn missing(bin: &str) -> bool { + Command::new(bin) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .is_err() +} + +/// Builds the cdylib and the harness, returning their paths. +/// +/// The harness is built with `SanitizerCoverage` disabled +/// (`-Cllvm-args=-sanitizer-coverage-level=0`) because postcard's +/// decoder and `Program::validate` emit length-dependent edges that +/// bucket-shift under trim and break AFL's trim cksum. The harness +/// publishes coverage manually instead (see its module docs). +/// cargo-afl appends user RUSTFLAGS to its own and LLVM honors the +/// last `-Cllvm-args=` seen, so level=0 overrides cargo-afl's level=3. +fn build_artifacts(workspace: &Path) -> (PathBuf, PathBuf) { + let cargo = env!("CARGO"); + let run = |args: &[&str], dir: &Path, env: &[(&str, &str)]| { + let mut cmd = Command::new(cargo); + cmd.args(args).current_dir(dir); + for (k, v) in env { + cmd.env(k, v); + } + assert!( + cmd.status().expect("spawn cargo").success(), + "{args:?} failed" + ); + }; + run( + &["build", "--release", "-p", "smite-ir-mutator"], + workspace, + &[], + ); + let harness_dir = workspace.join("smite-ir-e2e-test"); + run( + &["afl", "build", "--release"], + &harness_dir, + &[("RUSTFLAGS", "-Cllvm-args=-sanitizer-coverage-level=0")], + ); + ( + workspace.join("target/release/libsmite_ir_mutator.so"), + harness_dir.join("target/release/smite-ir-e2e-test"), + ) +} + +/// Generator output, mutated by `f`, postcard-encoded. +fn build_seed(seed: u64, f: impl FnOnce(&mut Program)) -> Vec { + let mut rng = SmallRng::seed_from_u64(seed); + let mut builder = ProgramBuilder::new(); + OpenChannelGenerator.generate(&mut builder, &mut rng); + let mut program = builder.build(); + f(&mut program); + program.validate().expect("seed validates"); + postcard::to_allocvec(&program).expect("encode seed") +} + +/// Writes one DCE-reducible and one CSE-reducible seed into `in_dir`. +fn write_seeds(in_dir: &Path) { + let dce = build_seed(0, |p| { + p.instructions.push(Instruction { + operation: Operation::LoadAmount(0xdead_beef), + inputs: vec![], + }); + }); + let cse = build_seed(1, |p| { + let keys: Vec = p + .instructions + .iter() + .enumerate() + .filter_map(|(i, instr)| { + matches!(instr.operation, Operation::LoadPrivateKey(_)).then_some(i) + }) + .collect(); + assert!( + keys.len() >= 2, + "CSE seed needs >=2 LoadPrivateKey instructions to inject a duplicate; got {}", + keys.len(), + ); + p.instructions[keys[1]] = p.instructions[keys[0]].clone(); + }); + fs::write(in_dir.join("dce.bin"), dce).expect("write dce seed"); + fs::write(in_dir.join("cse.bin"), cse).expect("write cse seed"); +} + +/// Spawns `cargo afl fuzz`, blocks until self-termination, returns +/// the combined stdout+stderr. +/// +/// `AFL_MAP_SIZE`+`AFL_SKIP_BIN_CHECK` are needed because the harness +/// has sancov disabled, so `__afl_final_loc` is 0 and AFL wouldn't +/// otherwise know the binary is fuzzable. +fn run_afl(cdylib: &Path, harness: &Path, work: &Path) -> String { + let in_dir = work.join("in"); + let out_dir = work.join("out"); + let stdout = work.join("afl.stdout"); + let stderr = work.join("afl.stderr"); + let status = Command::new(env!("CARGO")) + .args(["afl", "fuzz"]) + .env("AFL_CUSTOM_MUTATOR_LIBRARY", cdylib) + .env("AFL_CUSTOM_MUTATOR_ONLY", "1") + .env("AFL_SKIP_CPUFREQ", "1") + .env("AFL_NO_AFFINITY", "1") + .env("AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES", "1") + .env("AFL_DEBUG", "1") + .env("AFL_MAP_SIZE", "65536") + .env("AFL_SKIP_BIN_CHECK", "1") + .args([ + "-V", + &AFL_RUN_SECONDS.to_string(), + "-i", + in_dir.to_str().unwrap(), + "-o", + out_dir.to_str().unwrap(), + "--", + harness.to_str().unwrap(), + ]) + .stdout(Stdio::from(fs::File::create(&stdout).unwrap())) + .stderr(Stdio::from(fs::File::create(&stderr).unwrap())) + .status() + .expect("spawn cargo afl fuzz"); + assert!(status.code().is_some(), "afl-fuzz killed by signal"); + format!( + "{}{}", + fs::read_to_string(&stdout).unwrap_or_default(), + fs::read_to_string(&stderr).unwrap_or_default(), + ) +} + +#[test] +#[ignore = "spawns afl-fuzz for ~30s; run with --ignored"] +fn afl_drives_custom_mutator() { + if missing("cargo-afl") { + eprintln!("SKIP: cargo-afl not on PATH (install with `cargo install cargo-afl`)"); + return; + } + + let workspace = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .to_path_buf(); + let (cdylib, harness) = build_artifacts(&workspace); + + let work = std::env::temp_dir().join("smite-e2e"); + let _ = fs::remove_dir_all(&work); + fs::create_dir_all(work.join("in")).expect("mkdir in"); + write_seeds(&work.join("in")); + + let logs = run_afl(&cdylib, &harness, &work); + let hint = format!("see {}", work.display()); + + // 1. Every exported hook was resolved by AFL at startup. + for hook in [ + "afl_custom_mutator", + "afl_custom_describe", + "afl_custom_init_trim", + "afl_custom_trim", + "afl_custom_post_trim", + "afl_custom_splice_optout", + ] { + assert!( + logs.contains(&format!("Found '{hook}'")), + "AFL did not log \"Found '{hook}'\"; {hint}", + ); + } + + // 2. fuzz + describe surfaced both mutate_stacked branches. + let names: Vec = fs::read_dir(work.join("out/default/queue")) + .expect("read queue") + .filter_map(Result::ok) + .map(|e| e.file_name().to_string_lossy().into_owned()) + .collect(); + assert!( + names.iter().any(|n| n.contains("smite-ir:fresh")), + "no 'smite-ir:fresh' queue entry; {hint}", + ); + assert!( + names + .iter() + .any(|n| n.contains("op-param") || n.contains("input-swap")), + "no stacked-mutation queue entry; {hint}", + ); + + // 3. Trim was invoked. + let starts = logs.matches("[Custom Trimming] START").count(); + assert!(starts > 0, "init_trim was never invoked; {hint}"); + + // 4. Trim produced a smaller candidate (DCE or CSE fired). + let useful = logs.matches("[Custom Trimming] START: Max 1").count(); + assert!( + useful > 0, + "init_trim ran {starts} times but never returned a smaller candidate; {hint}", + ); + + // 5. AFL accepted a trimmed candidate (coverage cksum matched). + let success = logs.matches("[Custom Trimming] SUCCESS").count(); + assert!( + success > 0, + "init_trim offered {useful} candidate(s) but AFL accepted none (coverage mismatch); {hint}", + ); + + eprintln!( + "e2e summary: queue={} entries, trim starts={starts}, useful={useful}, success={success}", + names.len(), + ); +} From 8611863cdbc46d04a3fa30b676ef5c995bdc5978 Mon Sep 17 00:00:00 2001 From: Erick Cestari Date: Thu, 14 May 2026 09:23:48 -0300 Subject: [PATCH 6/6] ci: add AFL custom mutator e2e workflow Runs the smite-ir-mutator e2e test on PRs and pushes to master that touch the AFL-relevant crates (smite-ir, smite-ir-mutator, smite-ir-e2e-test, workspace manifests, or the workflow itself). Installs `cargo-afl` (cached across runs), then runs the `#[ignore]` test with `--ignored`. Kept as a separate workflow rather than a step in `rust.yml` because the AFL toolchain install + harness build adds several minutes; the fast Rust gate stays fast. On failure, tars `/tmp/smite-e2e/` (seeds, queue, AFL stdout/stderr) and uploads it as an artifact -- AFL queue filenames contain colons, which actions/upload-artifact rejects, so the tarball is required. --- .github/workflows/afl-e2e.yml | 77 +++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/afl-e2e.yml diff --git a/.github/workflows/afl-e2e.yml b/.github/workflows/afl-e2e.yml new file mode 100644 index 0000000..58354ca --- /dev/null +++ b/.github/workflows/afl-e2e.yml @@ -0,0 +1,77 @@ +name: AFL e2e + +on: + push: + branches: ["master"] + paths: + - "smite-ir/**" + - "smite-ir-mutator/**" + - "smite-ir-e2e-test/**" + - "Cargo.toml" + - "Cargo.lock" + - ".github/workflows/afl-e2e.yml" + pull_request: + branches: ["master"] + paths: + - "smite-ir/**" + - "smite-ir-mutator/**" + - "smite-ir-e2e-test/**" + - "Cargo.toml" + - "Cargo.lock" + - ".github/workflows/afl-e2e.yml" + +env: + CARGO_TERM_COLOR: always + # Bump to invalidate the cargo-afl cache and pull a newer cargo-afl + # (and the bundled AFL++ runtime). + CARGO_AFL_VERSION: "0.18.1" + +jobs: + afl-custom-mutator-e2e: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + id: rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: | + . + smite-ir-e2e-test + + - name: Cache cargo-afl + id: cargo-afl-cache + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/cargo-afl + ~/.local/share/afl.rs + key: cargo-afl-${{ runner.os }}-${{ steps.rust.outputs.cachekey }}-${{ env.CARGO_AFL_VERSION }} + + - name: Install cargo-afl + if: steps.cargo-afl-cache.outputs.cache-hit != 'true' + run: cargo install cargo-afl --locked --force --version ${{ env.CARGO_AFL_VERSION }} + + - name: Run AFL custom mutator e2e + run: | + cargo test -p smite-ir-mutator --test afl_custom_mutator_e2e \ + -- --ignored --nocapture + + # AFL's queue filenames contain colons (e.g. `id:000000,...`), + # which actions/upload-artifact rejects. Tar the directory first. + - name: Archive AFL output + if: failure() + run: tar -czf /tmp/smite-e2e-logs.tar.gz -C /tmp smite-e2e + + - name: Upload AFL output on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: afl-e2e-logs + path: /tmp/smite-e2e-logs.tar.gz + if-no-files-found: ignore