diff --git a/.github/workflows/afl-e2e.yml b/.github/workflows/afl-e2e.yml new file mode 100644 index 0000000..58354ca --- /dev/null +++ b/.github/workflows/afl-e2e.yml @@ -0,0 +1,77 @@ +name: AFL e2e + +on: + push: + branches: ["master"] + paths: + - "smite-ir/**" + - "smite-ir-mutator/**" + - "smite-ir-e2e-test/**" + - "Cargo.toml" + - "Cargo.lock" + - ".github/workflows/afl-e2e.yml" + pull_request: + branches: ["master"] + paths: + - "smite-ir/**" + - "smite-ir-mutator/**" + - "smite-ir-e2e-test/**" + - "Cargo.toml" + - "Cargo.lock" + - ".github/workflows/afl-e2e.yml" + +env: + CARGO_TERM_COLOR: always + # Bump to invalidate the cargo-afl cache and pull a newer cargo-afl + # (and the bundled AFL++ runtime). + CARGO_AFL_VERSION: "0.18.1" + +jobs: + afl-custom-mutator-e2e: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust toolchain + id: rust + uses: dtolnay/rust-toolchain@stable + + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + workspaces: | + . + smite-ir-e2e-test + + - name: Cache cargo-afl + id: cargo-afl-cache + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/cargo-afl + ~/.local/share/afl.rs + key: cargo-afl-${{ runner.os }}-${{ steps.rust.outputs.cachekey }}-${{ env.CARGO_AFL_VERSION }} + + - name: Install cargo-afl + if: steps.cargo-afl-cache.outputs.cache-hit != 'true' + run: cargo install cargo-afl --locked --force --version ${{ env.CARGO_AFL_VERSION }} + + - name: Run AFL custom mutator e2e + run: | + cargo test -p smite-ir-mutator --test afl_custom_mutator_e2e \ + -- --ignored --nocapture + + # AFL's queue filenames contain colons (e.g. `id:000000,...`), + # which actions/upload-artifact rejects. Tar the directory first. + - name: Archive AFL output + if: failure() + run: tar -czf /tmp/smite-e2e-logs.tar.gz -C /tmp smite-e2e + + - name: Upload AFL output on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: afl-e2e-logs + path: /tmp/smite-e2e-logs.tar.gz + if-no-files-found: ignore diff --git a/Cargo.toml b/Cargo.toml index 3c01e0b..a309c07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ "smite-nyx-sys", "smite-scenarios", ] -exclude = ["workloads/ldk"] +exclude = ["workloads/ldk", "smite-ir-e2e-test"] [workspace.package] version = "0.0.0" diff --git a/README.md b/README.md index 89bcbd5..a0a056b 100644 --- a/README.md +++ b/README.md @@ -65,13 +65,11 @@ printf '\x00' > /tmp/smite-seeds/empty # Start fuzzing with the custom mutator AFL_CUSTOM_MUTATOR_LIBRARY=target/release/libsmite_ir_mutator.so \ AFL_CUSTOM_MUTATOR_ONLY=1 \ -AFL_DISABLE_TRIM=1 \ ~/AFLplusplus/afl-fuzz -X -i /tmp/smite-seeds -o /tmp/smite-out -- /tmp/smite-nyx ``` `AFL_CUSTOM_MUTATOR_ONLY=1` disables AFL++'s built-in mutators (which would -corrupt the postcard encoding). `AFL_DISABLE_TRIM=1` prevents AFL++ from -trimming inputs (which would also corrupt the encoding). +corrupt the postcard encoding). ## Running Modes diff --git a/smite-ir-e2e-test/Cargo.lock b/smite-ir-e2e-test/Cargo.lock new file mode 100644 index 0000000..061adc2 --- /dev/null +++ b/smite-ir-e2e-test/Cargo.lock @@ -0,0 +1,502 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common", + "generic-array", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "base58ck" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c8d66485a3a2ea485c1913c4572ce0256067a5377ac8c75c4960e1cda98605f" +dependencies = [ + "bitcoin-internals", + "bitcoin_hashes", +] + +[[package]] +name = "bech32" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32637268377fc7b10a8c6d51de3e7fba1ce5dd371a96e342b34e6078db558e7f" + +[[package]] +name = "bitcoin" +version = "0.32.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf93e61f2dbc3e3c41234ca26a65e2c0b0975c52e0f069ab9893ebbede584d3" +dependencies = [ + "base58ck", + "bech32", + "bitcoin-internals", + "bitcoin-io", + "bitcoin-units", + "bitcoin_hashes", + "hex-conservative", + "hex_lit", + "secp256k1", +] + +[[package]] +name = "bitcoin-internals" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30bdbe14aa07b06e6cfeffc529a1f099e5fbe249524f8125358604df99a4bed2" + +[[package]] +name = "bitcoin-io" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dee39a0ee5b4095224a0cfc6bf4cc1baf0f9624b96b367e53b66d974e51d953" + +[[package]] +name = "bitcoin-units" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "346568ebaab2918487cea76dd55dae13c27bb618cdb737c952e69eb2017c4118" +dependencies = [ + "bitcoin-internals", +] + +[[package]] +name = "bitcoin_hashes" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26ec84b80c482df901772e931a9a681e26a1b9ee2302edeff23cb30328745c8b" +dependencies = [ + "bitcoin-io", + "hex-conservative", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cc" +version = "1.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chacha20" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "chacha20poly1305" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10cd79432192d1c0f4e1a0fef9527696cc039165d729fb41b3f4f4f354c2dc35" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", + "zeroize", +] + +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hex-conservative" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda06d18ac606267c40c04e41b9947729bf8b9efe74bd4e82b61a5f26a510b9f" +dependencies = [ + "arrayvec", +] + +[[package]] +name = "hex_lit" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3011d1213f159867b13cfd6ac92d2cd5f1345762c63be3554e84092d85a50bbd" + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + +[[package]] +name = "poly1305" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" +dependencies = [ + "cpufeatures", + "opaque-debug", + "universal-hash", +] + +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + +[[package]] +name = "secp256k1" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9465315bc9d4566e1724f0fffcbcc446268cb522e60f9a27bcded6b19c108113" +dependencies = [ + "bitcoin_hashes", + "secp256k1-sys", +] + +[[package]] +name = "secp256k1-sys" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4387882333d3aa8cb20530a17c69a3752e97837832f34f6dccc760e715001d9" +dependencies = [ + "cc", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simple_logger" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7038d0e96661bf9ce647e1a6f6ef6d6f3663f66d9bf741abf14ba4876071c17" +dependencies = [ + "log", + "windows-sys", +] + +[[package]] +name = "smite" +version = "0.0.0" +dependencies = [ + "bitcoin", + "chacha20poly1305", + "hex", + "log", + "nix", + "simple_logger", + "thiserror", +] + +[[package]] +name = "smite-ir" +version = "0.0.0" +dependencies = [ + "bitcoin", + "postcard", + "rand", + "serde", + "smite", + "thiserror", +] + +[[package]] +name = "smite-ir-e2e-test" +version = "0.0.0" +dependencies = [ + "postcard", + "smite-ir", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typenum" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common", + "subtle", +] + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" diff --git a/smite-ir-e2e-test/Cargo.toml b/smite-ir-e2e-test/Cargo.toml new file mode 100644 index 0000000..c89c835 --- /dev/null +++ b/smite-ir-e2e-test/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "smite-ir-e2e-test" +version = "0.0.0" +edition = "2024" +license = "MIT" + +[[bin]] +name = "smite-ir-e2e-test" +path = "src/main.rs" + +[dependencies] +smite-ir = { path = "../smite-ir" } +postcard = { version = "1.1", default-features = false, features = ["alloc"] } diff --git a/smite-ir-e2e-test/src/main.rs b/smite-ir-e2e-test/src/main.rs new file mode 100644 index 0000000..1bfbd1e --- /dev/null +++ b/smite-ir-e2e-test/src/main.rs @@ -0,0 +1,116 @@ +//! Minimal AFL++ harness for the smite IR custom-mutator e2e test. +//! +//! ## Design principle: coverage comes only from side-effecting work +//! +//! Smite only cares about what the IR program *does* against the +//! target -- bytes it sends, responses it receives. Pure setup +//! instructions (load a literal, derive a point, extract a field) +//! are means to that end and aren't fuzzing signal by themselves. +//! This is true for any smite workload, not just this e2e test: the +//! harness emits coverage feedback only for instructions +//! transitively feeding a side-effect root. +//! Programs that load and compute but never act produce *zero* +//! coverage and AFL never queues them. +//! +//! ## Why we hand-roll the bitmap +//! +//! The bitmap must be *bit-identical* across DCE/CSE-trimmed variants +//! of the same program (so AFL's trim cksum accepts the shrunk +//! candidate) yet *vary* under our mutators (so AFL queues new +//! entries). Any compiler-inserted edge whose hit count tracks +//! `program.instructions.len()` fails the first half: DCE/CSE move +//! the count across AFL's hit-count buckets and the cksum mismatches. +//! `postcard::from_bytes` and `Program::validate` both contain such +//! loops, and rustc doesn't expose a SanitizerCoverage allowlist to +//! exclude them. +//! +//! So we disable SanitizerCoverage entirely (build with +//! `RUSTFLAGS=-Cllvm-args=-sanitizer-coverage-level=0`) and publish +//! coverage manually. The signal: for each instruction reachable from +//! a side-effect root, mark a slot derived from a content hash of +//! `(operation, hashes of inputs)`. Because the hash folds *input +//! content* (not indices), DCE renumbering doesn't change it; CSE +//! merges duplicates whose hashes were already equal; +//! `OperationParamMutator` shifts an operation's hash (and its +//! consumers'); `InputSwapMutator` rewires an edge and shifts the +//! consumer's hash. +//! +//! We don't use `afl::fuzz!`: it forces persistent + shmem delivery, +//! which hangs during calibration when SanitizerCoverage is off. We +//! call `__afl_manual_init` and read each test case from stdin. + +use std::hash::{DefaultHasher, Hash, Hasher}; +use std::io::Read; + +use smite_ir::{Operation, Program}; + +unsafe extern "C" { + static __afl_area_ptr: *mut u8; + fn __afl_manual_init(); +} + +/// Overrides afl-compiler-rt's weak symbol to keep test cases on +/// stdin instead of shared memory (see module docs). +#[unsafe(no_mangle)] +pub static mut __afl_sharedmem_fuzzing: i32 = 0; + +/// Matches `AFL_MAP_SIZE=65536` set by the test driver. +const MAP_MASK: u32 = (1 << 16) - 1; + +fn main() { + unsafe { __afl_manual_init() }; + + let mut data = Vec::new(); + if std::io::stdin().lock().read_to_end(&mut data).is_err() { + return; + } + let Ok(program) = postcard::from_bytes::(&data) else { + return; + }; + if program.validate().is_err() { + return; + } + + // Content hash per instruction. SSA order means an instruction's + // inputs are already hashed by the time we reach it, so one + // forward pass is enough -- no recursion or memoization needed. + let n = program.instructions.len(); + let mut hashes = vec![0u64; n]; + for (i, instr) in program.instructions.iter().enumerate() { + let mut h = DefaultHasher::new(); + instr.operation.hash(&mut h); + for &inp in &instr.inputs { + if inp < i { + hashes[inp].hash(&mut h); + } + } + hashes[i] = h.finish(); + } + + // Mark slots for instructions reachable from side-effect roots. + // Pure instructions that never feed a SendMessage/RecvAcceptChannel + // contribute no coverage. Walk in reverse so a marked instruction + // propagates to its (earlier) inputs in one pass. + let mut reachable = vec![false; n]; + let ptr = unsafe { __afl_area_ptr }; + for i in (0..n).rev() { + let instr = &program.instructions[i]; + let is_root = matches!( + instr.operation, + Operation::SendMessage | Operation::RecvAcceptChannel + ); + if !(is_root || reachable[i]) { + continue; + } + reachable[i] = true; + for &inp in &instr.inputs { + if inp < n { + reachable[inp] = true; + } + } + if !ptr.is_null() { + let slot = (hashes[i] as u32) & MAP_MASK; + unsafe { *ptr.add(slot as usize) = 1 }; + } + } +} diff --git a/smite-ir-mutator/src/lib.rs b/smite-ir-mutator/src/lib.rs index 0903ee2..f258f62 100644 --- a/smite-ir-mutator/src/lib.rs +++ b/smite-ir-mutator/src/lib.rs @@ -13,8 +13,6 @@ //! - `AFL_CUSTOM_MUTATOR_ONLY=1` -- disable AFL++'s byte mutators. This also //! disables the havoc stage entirely, so we deliberately do not implement //! `afl_custom_havoc_mutation`. -//! - `AFL_DISABLE_TRIM=1` -- this library does not implement custom trim and -//! AFL++'s default byte-level trim would corrupt our structured programs. //! //! # Buffer ownership //! @@ -30,6 +28,7 @@ use rand::rngs::SmallRng; use rand::{RngExt, SeedableRng}; use smite_ir::generators::OpenChannelGenerator; +use smite_ir::minimizers::{CommonSubexpressionEliminator, DeadCodeEliminator, Minimizer}; use smite_ir::mutators::{InputSwapMutator, OperationParamMutator}; use smite_ir::{Generator, Mutator, Program, ProgramBuilder}; @@ -190,6 +189,105 @@ pub unsafe extern "C" fn afl_custom_fuzz( len } +/// Runs the full minimizer pipeline (`DeadCodeEliminator` then +/// `CommonSubexpressionEliminator`) on the corpus entry and stages the +/// resulting candidate for [`afl_custom_trim`] to hand back. +/// +/// Both minimizers are deterministic in-process transforms safe in IR +/// semantics, so we don't need iterative AFL feedback. We compose them +/// once and offer a single candidate. AFL still gets to verify it (its +/// coverage cksum is the source of truth); on rejection AFL silently +/// discards the candidate and keeps the original corpus entry. +/// +/// AFL drives the trim loop with `while (stage_cur < stage_max)`, where +/// `stage_max` is this function's return value and `stage_cur` is updated +/// from [`afl_custom_post_trim`]'s return. +/// +/// # Returns +/// +/// - `1` if there's a candidate to offer (decode succeeded, validate +/// passed, and the trim actually shrank the program). AFL enters the +/// trim loop for one iteration. +/// - `0` if there's nothing to do (decode/validate failed, or the trim +/// was a no-op). AFL skips trim entirely. +/// - Negative would signal a fatal error to AFL; we never produce one. +/// +/// # Safety +/// +/// - `data` must be a pointer returned by [`afl_custom_init`]. +/// - `buf` must point to `buf_size` readable bytes. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn afl_custom_init_trim( + data: *mut c_void, + buf: *mut u8, + buf_size: usize, +) -> i32 { + let state = unsafe { &mut *data.cast::() }; + + let input = unsafe { slice::from_raw_parts(buf, buf_size) }; + let Some(program) = decode_and_validate(input) else { + return 0; + }; + + let mut trimmed = program; + let dce_changed = DeadCodeEliminator.minimize(&mut trimmed); + let cse_changed = CommonSubexpressionEliminator.minimize(&mut trimmed); + if (!dce_changed && !cse_changed) || !state.serialize(&trimmed, buf_size) { + return 0; + } + + 1 +} + +/// Hands the pre-serialized trimmed candidate back to AFL. +/// +/// The pointer written into `*out_buf` borrows from `MutatorState::out_buf` +/// and is valid until the next call into this library; AFL copies the +/// bytes before re-entering us. We always write a non-null pointer (even +/// on the zero-length path) to satisfy AFL's `if (unlikely(!retbuf)) +/// FATAL(...)` check. +/// +/// # Returns +/// +/// - `> 0` on the first call after [`afl_custom_init_trim`]: the byte +/// length of the candidate at `*out_buf`. +/// - `0` afterwards. AFL treats this as "skip this iteration" rather than +/// a stop signal; the loop terminates via [`afl_custom_post_trim`]'s +/// return. +/// +/// # Safety +/// +/// - `data` must be a pointer returned by [`afl_custom_init`]. +/// - `out_buf` must be a valid, writable pointer to a `*const u8` slot. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn afl_custom_trim(data: *mut c_void, out_buf: *mut *const u8) -> usize { + let state = unsafe { &mut *data.cast::() }; + unsafe { *out_buf = state.out_buf.as_ptr() }; + state.out_buf.len() +} + +/// Always returns `1` to terminate AFL's trim loop after a single +/// iteration. +/// +/// AFL drives trim with `while (stage_cur < stage_max)` and assigns +/// `stage_cur` from this function's return value. With `stage_max = 1` +/// (set by [`afl_custom_init_trim`]), returning `1` makes the condition +/// `1 < 1` false and breaks the loop. +/// +/// `success` indicates whether the candidate's coverage cksum matched the +/// original. We don't need to act on it: AFL itself either persists the +/// trimmed buffer (on success) or keeps the original corpus entry (on +/// failure), and we don't track partial state across iterations because +/// there's only one. +/// +/// # Safety +/// +/// - `data` must be a pointer returned by [`afl_custom_init`]. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn afl_custom_post_trim(_data: *mut c_void, _success: u8) -> i32 { + 1 +} + /// Marker symbol that tells AFL++ not to populate `add_buf` for /// [`afl_custom_fuzz`]. AFL++ never actually calls this function -- it only /// checks for the symbol's presence via `dlsym` and, if found, skips picking a @@ -300,6 +398,19 @@ mod tests { postcard::to_allocvec(&builder.build()).expect("postcard serialization") } + /// `seed_program_bytes()` plus an unreferenced `LoadAmount`, so the + /// pipeline has something for DCE to drop (and thus `init_trim` + /// returns `1`). + fn reducible_seed_bytes() -> Vec { + let bytes = seed_program_bytes(); + let mut program: Program = postcard::from_bytes(&bytes).expect("decode"); + program.instructions.push(smite_ir::Instruction { + operation: smite_ir::Operation::LoadAmount(0xdead_beef), + inputs: vec![], + }); + postcard::to_allocvec(&program).expect("encode") + } + #[test] fn init_returns_nonnull() { let state = State::new(0); @@ -397,4 +508,106 @@ mod tests { // crash either. unsafe { afl_custom_splice_optout(ptr::null_mut()) }; } + + // -- Trim tests -- + + fn init_trim_via_ffi(state: &State, mut input: Vec) -> i32 { + unsafe { afl_custom_init_trim(state.0, input.as_mut_ptr(), input.len()) } + } + + fn trim_via_ffi(state: &State) -> (*const u8, usize) { + let mut out: *const u8 = ptr::null(); + let len = unsafe { afl_custom_trim(state.0, &raw mut out) }; + (out, len) + } + + fn post_trim_via_ffi(state: &State, success: bool) -> i32 { + unsafe { afl_custom_post_trim(state.0, u8::from(success)) } + } + + #[test] + fn trim_init_returns_1_when_reduction_possible() { + let state = State::new(0); + let rv = init_trim_via_ffi(&state, reducible_seed_bytes()); + assert_eq!(rv, 1); + } + + #[test] + fn trim_init_returns_0_when_no_reduction_possible() { + // Generator output has no dead code or duplicate loads; the + // pipeline is a no-op, so we tell AFL to skip trim entirely. + let state = State::new(0); + let rv = init_trim_via_ffi(&state, seed_program_bytes()); + assert_eq!(rv, 0); + } + + #[test] + fn trim_init_returns_0_for_garbage() { + let state = State::new(0); + let rv = init_trim_via_ffi(&state, vec![0xFF; 16]); + assert_eq!(rv, 0); + } + + #[test] + fn trim_yields_candidate_after_init() { + let state = State::new(0); + init_trim_via_ffi(&state, reducible_seed_bytes()); + let (out, len) = trim_via_ffi(&state); + assert!(len > 0); + decode_and_validate(out, len); + } + + #[test] + fn trim_post_trim_returns_1_to_terminate_loop() { + let state = State::new(0); + init_trim_via_ffi(&state, reducible_seed_bytes()); + let _ = trim_via_ffi(&state); + // post_trim returns 1 unconditionally — it's the load-bearing + // termination signal that pushes AFL's `stage_cur` to `stage_max`. + assert_eq!(post_trim_via_ffi(&state, true), 1); + assert_eq!(post_trim_via_ffi(&state, false), 1); + } + + #[test] + fn trim_init_does_not_overwrite_sequence() { + // Trim is not a mutation; `last_sequence` (used by `describe` to + // name queue entries from fuzz) must survive both the no-op and + // successful trim paths. + for (label, input, expected_rv) in [ + ("no-op", seed_program_bytes(), 0), + ("success", reducible_seed_bytes(), 1), + ] { + let state = State::new(0); + // Run a fuzz call so last_sequence has known contents. + let _ = fuzz_via_ffi(&state, Vec::new(), 1 << 16); + let before = unsafe { CStr::from_ptr(afl_custom_describe(state.0, 256)) } + .to_str() + .expect("valid utf-8") + .to_string(); + let rv = init_trim_via_ffi(&state, input); + assert_eq!(rv, expected_rv, "{label}"); + let after = unsafe { CStr::from_ptr(afl_custom_describe(state.0, 256)) } + .to_str() + .expect("valid utf-8") + .to_string(); + assert_eq!(before, after, "{label}"); + } + } + + #[test] + fn trim_candidate_is_smaller_than_input() { + let original_bytes = reducible_seed_bytes(); + let original_program: Program = postcard::from_bytes(&original_bytes).expect("decode"); + + let state = State::new(0); + init_trim_via_ffi(&state, original_bytes); + + let (out, len) = trim_via_ffi(&state); + assert!(len > 0, "trim should yield a candidate"); + let trimmed = decode_and_validate(out, len); + assert!( + trimmed.instructions.len() < original_program.instructions.len(), + "trim should shrink instruction count" + ); + } } diff --git a/smite-ir-mutator/tests/afl_custom_mutator_e2e.rs b/smite-ir-mutator/tests/afl_custom_mutator_e2e.rs new file mode 100644 index 0000000..0785117 --- /dev/null +++ b/smite-ir-mutator/tests/afl_custom_mutator_e2e.rs @@ -0,0 +1,253 @@ +//! End-to-end test for the smite IR custom mutator. Drives the real +//! `afl-fuzz` binary against our harness with the cdylib loaded as +//! `AFL_CUSTOM_MUTATOR_LIBRARY`, and asserts every hook we export is +//! actually used in a real fuzzing run. +//! +//! Marked `#[ignore]`; run with: +//! +//! ``` +//! cargo test -p smite-ir-mutator --test afl_custom_mutator_e2e -- \ +//! --ignored --nocapture +//! ``` +//! +//! Skips cleanly if `cargo-afl` isn't on `PATH`. Working files (seeds, +//! queue, AFL stdout/stderr) live in `/tmp/smite-e2e/` so they survive +//! a panic for post-mortem. +//! +//! ## Signals (all from AFL's own output with `AFL_DEBUG=1`) +//! +//! 1. **Hooks resolved.** AFL prints `Found 'afl_custom_'` per +//! `dlsym` hit at startup; we assert all six. +//! 2. **fuzz + describe produced queue entries.** Queue filenames carry +//! `smite-ir:` from `afl_custom_describe`. We require +//! both branches of `mutate_stacked`: `fresh` and one of +//! `op-param` / `input-swap`. +//! 3. **Trim was invoked** (`[Custom Trimming] START`). +//! 4. **Trim produced a smaller candidate** (`START: Max 1`). The +//! seed corpus mixes one DCE-reducible program (dead `LoadAmount` +//! appended) and one CSE-reducible program (duplicate +//! `LoadPrivateKey` injected) so both minimizers can fire. +//! 5. **AFL accepted a trimmed candidate** (`[Custom Trimming] +//! SUCCESS`). Only emitted when the trimmed bytes' coverage cksum +//! matches the original. Verifies DCE+CSE preserve coverage +//! end-to-end -- relies on the harness publishing a DCE/CSE-invariant +//! signal, see `smite-ir-e2e-test/src/main.rs`. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use rand::SeedableRng; +use rand::rngs::SmallRng; +use smite_ir::generators::OpenChannelGenerator; +use smite_ir::{Generator, Instruction, Operation, Program, ProgramBuilder}; + +const AFL_RUN_SECONDS: u64 = 30; + +/// `true` when `bin` isn't on `PATH`. +fn missing(bin: &str) -> bool { + Command::new(bin) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .is_err() +} + +/// Builds the cdylib and the harness, returning their paths. +/// +/// The harness is built with `SanitizerCoverage` disabled +/// (`-Cllvm-args=-sanitizer-coverage-level=0`) because postcard's +/// decoder and `Program::validate` emit length-dependent edges that +/// bucket-shift under trim and break AFL's trim cksum. The harness +/// publishes coverage manually instead (see its module docs). +/// cargo-afl appends user RUSTFLAGS to its own and LLVM honors the +/// last `-Cllvm-args=` seen, so level=0 overrides cargo-afl's level=3. +fn build_artifacts(workspace: &Path) -> (PathBuf, PathBuf) { + let cargo = env!("CARGO"); + let run = |args: &[&str], dir: &Path, env: &[(&str, &str)]| { + let mut cmd = Command::new(cargo); + cmd.args(args).current_dir(dir); + for (k, v) in env { + cmd.env(k, v); + } + assert!( + cmd.status().expect("spawn cargo").success(), + "{args:?} failed" + ); + }; + run( + &["build", "--release", "-p", "smite-ir-mutator"], + workspace, + &[], + ); + let harness_dir = workspace.join("smite-ir-e2e-test"); + run( + &["afl", "build", "--release"], + &harness_dir, + &[("RUSTFLAGS", "-Cllvm-args=-sanitizer-coverage-level=0")], + ); + ( + workspace.join("target/release/libsmite_ir_mutator.so"), + harness_dir.join("target/release/smite-ir-e2e-test"), + ) +} + +/// Generator output, mutated by `f`, postcard-encoded. +fn build_seed(seed: u64, f: impl FnOnce(&mut Program)) -> Vec { + let mut rng = SmallRng::seed_from_u64(seed); + let mut builder = ProgramBuilder::new(); + OpenChannelGenerator.generate(&mut builder, &mut rng); + let mut program = builder.build(); + f(&mut program); + program.validate().expect("seed validates"); + postcard::to_allocvec(&program).expect("encode seed") +} + +/// Writes one DCE-reducible and one CSE-reducible seed into `in_dir`. +fn write_seeds(in_dir: &Path) { + let dce = build_seed(0, |p| { + p.instructions.push(Instruction { + operation: Operation::LoadAmount(0xdead_beef), + inputs: vec![], + }); + }); + let cse = build_seed(1, |p| { + let keys: Vec = p + .instructions + .iter() + .enumerate() + .filter_map(|(i, instr)| { + matches!(instr.operation, Operation::LoadPrivateKey(_)).then_some(i) + }) + .collect(); + assert!( + keys.len() >= 2, + "CSE seed needs >=2 LoadPrivateKey instructions to inject a duplicate; got {}", + keys.len(), + ); + p.instructions[keys[1]] = p.instructions[keys[0]].clone(); + }); + fs::write(in_dir.join("dce.bin"), dce).expect("write dce seed"); + fs::write(in_dir.join("cse.bin"), cse).expect("write cse seed"); +} + +/// Spawns `cargo afl fuzz`, blocks until self-termination, returns +/// the combined stdout+stderr. +/// +/// `AFL_MAP_SIZE`+`AFL_SKIP_BIN_CHECK` are needed because the harness +/// has sancov disabled, so `__afl_final_loc` is 0 and AFL wouldn't +/// otherwise know the binary is fuzzable. +fn run_afl(cdylib: &Path, harness: &Path, work: &Path) -> String { + let in_dir = work.join("in"); + let out_dir = work.join("out"); + let stdout = work.join("afl.stdout"); + let stderr = work.join("afl.stderr"); + let status = Command::new(env!("CARGO")) + .args(["afl", "fuzz"]) + .env("AFL_CUSTOM_MUTATOR_LIBRARY", cdylib) + .env("AFL_CUSTOM_MUTATOR_ONLY", "1") + .env("AFL_SKIP_CPUFREQ", "1") + .env("AFL_NO_AFFINITY", "1") + .env("AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES", "1") + .env("AFL_DEBUG", "1") + .env("AFL_MAP_SIZE", "65536") + .env("AFL_SKIP_BIN_CHECK", "1") + .args([ + "-V", + &AFL_RUN_SECONDS.to_string(), + "-i", + in_dir.to_str().unwrap(), + "-o", + out_dir.to_str().unwrap(), + "--", + harness.to_str().unwrap(), + ]) + .stdout(Stdio::from(fs::File::create(&stdout).unwrap())) + .stderr(Stdio::from(fs::File::create(&stderr).unwrap())) + .status() + .expect("spawn cargo afl fuzz"); + assert!(status.code().is_some(), "afl-fuzz killed by signal"); + format!( + "{}{}", + fs::read_to_string(&stdout).unwrap_or_default(), + fs::read_to_string(&stderr).unwrap_or_default(), + ) +} + +#[test] +#[ignore = "spawns afl-fuzz for ~30s; run with --ignored"] +fn afl_drives_custom_mutator() { + if missing("cargo-afl") { + eprintln!("SKIP: cargo-afl not on PATH (install with `cargo install cargo-afl`)"); + return; + } + + let workspace = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .expect("workspace root") + .to_path_buf(); + let (cdylib, harness) = build_artifacts(&workspace); + + let work = std::env::temp_dir().join("smite-e2e"); + let _ = fs::remove_dir_all(&work); + fs::create_dir_all(work.join("in")).expect("mkdir in"); + write_seeds(&work.join("in")); + + let logs = run_afl(&cdylib, &harness, &work); + let hint = format!("see {}", work.display()); + + // 1. Every exported hook was resolved by AFL at startup. + for hook in [ + "afl_custom_mutator", + "afl_custom_describe", + "afl_custom_init_trim", + "afl_custom_trim", + "afl_custom_post_trim", + "afl_custom_splice_optout", + ] { + assert!( + logs.contains(&format!("Found '{hook}'")), + "AFL did not log \"Found '{hook}'\"; {hint}", + ); + } + + // 2. fuzz + describe surfaced both mutate_stacked branches. + let names: Vec = fs::read_dir(work.join("out/default/queue")) + .expect("read queue") + .filter_map(Result::ok) + .map(|e| e.file_name().to_string_lossy().into_owned()) + .collect(); + assert!( + names.iter().any(|n| n.contains("smite-ir:fresh")), + "no 'smite-ir:fresh' queue entry; {hint}", + ); + assert!( + names + .iter() + .any(|n| n.contains("op-param") || n.contains("input-swap")), + "no stacked-mutation queue entry; {hint}", + ); + + // 3. Trim was invoked. + let starts = logs.matches("[Custom Trimming] START").count(); + assert!(starts > 0, "init_trim was never invoked; {hint}"); + + // 4. Trim produced a smaller candidate (DCE or CSE fired). + let useful = logs.matches("[Custom Trimming] START: Max 1").count(); + assert!( + useful > 0, + "init_trim ran {starts} times but never returned a smaller candidate; {hint}", + ); + + // 5. AFL accepted a trimmed candidate (coverage cksum matched). + let success = logs.matches("[Custom Trimming] SUCCESS").count(); + assert!( + success > 0, + "init_trim offered {useful} candidate(s) but AFL accepted none (coverage mismatch); {hint}", + ); + + eprintln!( + "e2e summary: queue={} entries, trim starts={starts}, useful={useful}, success={success}", + names.len(), + ); +} diff --git a/smite-ir/src/instruction.rs b/smite-ir/src/instruction.rs index 8c5b9c3..c4a18af 100644 --- a/smite-ir/src/instruction.rs +++ b/smite-ir/src/instruction.rs @@ -12,7 +12,7 @@ use super::Operation; /// /// In SSA form, each instruction produces at most one variable (at the index /// equal to the instruction's position in the program). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Instruction { /// The operation to perform. pub operation: Operation, diff --git a/smite-ir/src/lib.rs b/smite-ir/src/lib.rs index 72d0874..70992c4 100644 --- a/smite-ir/src/lib.rs +++ b/smite-ir/src/lib.rs @@ -7,6 +7,7 @@ //! //! # Modules //! - [`instruction`] - Single IR instruction (operation + input references). +//! - [`minimizers`] - Shrink a program while preserving interesting behaviour. //! - [`operation`] - Operations that load, compute, build or act. //! - [`program`] - Ordered list of instructions. //! - [`variable`] - Typed runtime values and lightweight type tags. @@ -14,6 +15,7 @@ pub mod builder; pub mod generators; pub mod instruction; +pub mod minimizers; pub mod mutators; pub mod operation; pub mod program; @@ -22,6 +24,7 @@ pub mod variable; pub use builder::ProgramBuilder; pub use generators::Generator; pub use instruction::Instruction; +pub use minimizers::Minimizer; pub use mutators::Mutator; pub use operation::Operation; pub use program::Program; diff --git a/smite-ir/src/minimizers.rs b/smite-ir/src/minimizers.rs new file mode 100644 index 0000000..7cf7cd7 --- /dev/null +++ b/smite-ir/src/minimizers.rs @@ -0,0 +1,24 @@ +//! IR program minimizers. +//! +//! A [`Minimizer`] reduces a [`Program`] to a smaller, behaviourally +//! equivalent version in a single pass. Both transforms are safe in IR +//! semantics, so they don't need an oracle to drive the search. +//! +//! Run them in pipeline order for best results: +//! 1. [`DeadCodeEliminator`] — drop dead instructions and reindex +//! 2. [`CommonSubexpressionEliminator`] — merge equivalent pure expressions + +mod cse; +mod dead_code; + +pub use cse::CommonSubexpressionEliminator; +pub use dead_code::DeadCodeEliminator; + +use super::Program; + +/// A minimizer that reduces an IR program in one call. +pub trait Minimizer { + /// Reduces `program` in place to a smaller, behaviourally equivalent + /// version. Returns `true` if the program was modified. + fn minimize(&self, program: &mut Program) -> bool; +} diff --git a/smite-ir/src/minimizers/cse.rs b/smite-ir/src/minimizers/cse.rs new file mode 100644 index 0000000..e900a6d --- /dev/null +++ b/smite-ir/src/minimizers/cse.rs @@ -0,0 +1,52 @@ +//! Common-subexpression elimination minimizer. + +use std::collections::HashMap; +use std::collections::hash_map::Entry; + +use super::Minimizer; +use crate::{Instruction, Program}; + +/// Merges instructions that compute the same pure expression. +/// +/// Two pure instructions are equivalent when they share the same operation +/// and the same canonicalized inputs. Walking the program in order makes +/// the merge transitive: by the time we reach instruction `i`, SSA +/// guarantees every input it references is already canonicalized, so two +/// compute ops whose inputs collapsed to the same canonical loads are +/// themselves recognized as equivalent. +pub struct CommonSubexpressionEliminator; + +impl Minimizer for CommonSubexpressionEliminator { + fn minimize(&self, program: &mut Program) -> bool { + let n = program.instructions.len(); + let mut canonical: HashMap = HashMap::new(); + let mut new_idx = vec![0usize; n]; + let mut instructions = Vec::with_capacity(n); + + for (i, mut instr) in std::mem::take(&mut program.instructions) + .into_iter() + .enumerate() + { + for input in &mut instr.inputs { + *input = new_idx[*input]; + } + if instr.operation.has_side_effects() { + new_idx[i] = instructions.len(); + instructions.push(instr); + continue; + } + match canonical.entry(instr.clone()) { + Entry::Occupied(e) => new_idx[i] = *e.get(), + Entry::Vacant(e) => { + e.insert(instructions.len()); + new_idx[i] = instructions.len(); + instructions.push(instr); + } + } + } + + let changed = instructions.len() < n; + program.instructions = instructions; + changed + } +} diff --git a/smite-ir/src/minimizers/dead_code.rs b/smite-ir/src/minimizers/dead_code.rs new file mode 100644 index 0000000..be2709f --- /dev/null +++ b/smite-ir/src/minimizers/dead_code.rs @@ -0,0 +1,49 @@ +//! Dead-code elimination minimizer. + +use super::Minimizer; +use crate::Program; + +/// Removes unreferenced instructions and reindexes the remaining inputs. +/// +/// An instruction is removed when (a) its operation has no side effects +/// and (b) no later instruction references its output. The reverse +/// traversal lets a chain of dead instructions collapse, once we drop the +/// user of some load, that load's reference count falls to zero and the +/// load itself becomes eligible. +pub struct DeadCodeEliminator; + +impl Minimizer for DeadCodeEliminator { + fn minimize(&self, program: &mut Program) -> bool { + let n = program.instructions.len(); + let mut keep = vec![false; n]; + for idx in (0..n).rev() { + if !keep[idx] && !program.instructions[idx].operation.has_side_effects() { + continue; + } + keep[idx] = true; + for &input in &program.instructions[idx].inputs { + keep[input] = true; + } + } + + let mut remap = vec![0usize; n]; + let mut instructions = Vec::with_capacity(n); + for (old, mut instr) in std::mem::take(&mut program.instructions) + .into_iter() + .enumerate() + { + if !keep[old] { + continue; + } + for input in &mut instr.inputs { + *input = remap[*input]; + } + remap[old] = instructions.len(); + instructions.push(instr); + } + + let changed = instructions.len() < n; + program.instructions = instructions; + changed + } +} diff --git a/smite-ir/src/operation.rs b/smite-ir/src/operation.rs index e1e1c2a..bbd1d70 100644 --- a/smite-ir/src/operation.rs +++ b/smite-ir/src/operation.rs @@ -18,7 +18,7 @@ use super::VariableType; /// An IR operation. Each instruction in a program contains one operation plus /// input variable indices. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Operation { // -- Load: produce a variable from an embedded literal or the context -- /// Load a satoshi or millisatoshi amount. @@ -106,7 +106,7 @@ pub enum Operation { /// Each variant encodes to a script matching one of the formats required by /// BOLT 2 for the upfront shutdown TLV. `Empty` opts out of upfront shutdown /// entirely and is accepted regardless of feature negotiation. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum ShutdownScriptVariant { /// Zero-length script. Opts out of upfront shutdown. Empty, @@ -294,7 +294,7 @@ impl fmt::Display for ShutdownScriptVariant { /// Additionally, the following bits can be added to any channel type: /// - `option_scid_alias` (bit 46) /// - `option_zeroconf` (bit 50) -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum ChannelTypeVariant { /// bit 12 StaticRemoteKey, @@ -437,7 +437,7 @@ impl fmt::Display for ChannelTypeVariant { } /// Fields that can be extracted from an `AcceptChannel` compound variable. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum AcceptChannelField { TemporaryChannelId, DustLimitSatoshis, @@ -640,6 +640,31 @@ impl Operation { } } + /// Returns `true` if this operation has I/O side effects and therefore + /// cannot be dropped by DCE or deduplicated by CSE. + #[must_use] + pub fn has_side_effects(&self) -> bool { + match self { + Self::SendMessage | Self::RecvAcceptChannel | Self::MineBlocks(_) => true, + Self::LoadAmount(_) + | Self::LoadFeeratePerKw(_) + | Self::LoadBlockHeight(_) + | Self::LoadU16(_) + | Self::LoadU8(_) + | Self::LoadBytes(_) + | Self::LoadFeatures(_) + | Self::LoadPrivateKey(_) + | Self::LoadChannelId(_) + | Self::LoadShutdownScript(_) + | Self::LoadChannelType(_) + | Self::LoadTargetPubkeyFromContext + | Self::LoadChainHashFromContext + | Self::DerivePoint + | Self::ExtractAcceptChannel(_) + | Self::BuildOpenChannel => false, + } + } + /// Returns true if this operation has parameters that can be mutated /// by `OperationParamMutator`. #[must_use] diff --git a/smite-ir/src/tests.rs b/smite-ir/src/tests.rs index 9e907b3..9762c5c 100644 --- a/smite-ir/src/tests.rs +++ b/smite-ir/src/tests.rs @@ -7,6 +7,7 @@ use smite::bolt::MAX_MESSAGE_SIZE; use super::*; use generators::OpenChannelGenerator; +use minimizers::{CommonSubexpressionEliminator, DeadCodeEliminator, Minimizer}; use mutators::{InputSwapMutator, OperationParamMutator}; use operation::{AcceptChannelField, ChannelTypeVariant, ShutdownScriptVariant}; use program::ValidateError; @@ -1164,3 +1165,339 @@ fn input_swap_preserves_types() { } } } + +// -- DeadCodeEliminator tests -- + +#[test] +fn dead_code_removes_dead_instructions() { + // All three LoadAmount instructions are unreferenced; all three are dropped. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadAmount(1), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadAmount(2), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadAmount(3), + inputs: vec![], + }, + ], + }; + assert!(DeadCodeEliminator.minimize(&mut program)); + assert!( + program.instructions.is_empty(), + "all dead instructions should be removed" + ); + program.validate().expect("trimmed program should validate"); +} + +#[test] +fn dead_code_returns_false_on_empty_program() { + let mut program = Program { + instructions: vec![], + }; + assert!(!DeadCodeEliminator.minimize(&mut program)); + assert!(program.instructions.is_empty()); +} + +/// Build a program with a dead load appended after the generated program. +/// This gives the `DeadCodeEliminator` at least one candidate to try. +fn program_with_dead_load() -> Program { + let mut p = generate_program(0); + p.instructions.push(Instruction { + operation: Operation::LoadAmount(42), + inputs: vec![], + }); + p +} + +#[test] +fn dead_code_keeps_send_message() { + let mut program = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut program); + let has_send = program + .instructions + .iter() + .any(|i| matches!(i.operation, Operation::SendMessage)); + assert!(has_send, "DeadCodeEliminator must not remove SendMessage"); +} + +#[test] +fn dead_code_keeps_recv_accept_channel() { + let mut program = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut program); + let has_recv = program + .instructions + .iter() + .any(|i| matches!(i.operation, Operation::RecvAcceptChannel)); + assert!( + has_recv, + "DeadCodeEliminator must not remove RecvAcceptChannel" + ); +} + +#[test] +fn dead_code_result_validates() { + let mut program = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut program); + program.validate().expect("final program should validate"); +} + +#[test] +fn dead_code_reindexes_remaining_inputs() { + // Indexes 0 and 1 are dead loads; 2 is a referenced load; 3 references 2. + // After dropping 0 and 1, the surviving load shifts to index 0 and the + // DerivePoint must be rewritten to reference it. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadAmount(1), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadAmount(2), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadPrivateKey(key(1)), + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![2], + }, + ], + }; + assert!(DeadCodeEliminator.minimize(&mut program)); + assert_eq!(program.instructions.len(), 2); + assert!(matches!( + program.instructions[0].operation, + Operation::LoadPrivateKey(_) + )); + assert!(matches!( + program.instructions[1].operation, + Operation::SendMessage + )); + assert_eq!(program.instructions[1].inputs, vec![0]); +} + +#[test] +fn dead_code_chains_collapse() { + // Two chains share a root LoadPrivateKey. One DerivePoint feeds an + // impure SendMessage (alive); the other is unreferenced (dead). DCE + // drops the dead DerivePoint, but the shared root must survive because + // the alive chain still references it. + // + // Note: this program is type-invalid (SendMessage expects Message, not + // Point), but the minimizer doesn't typecheck so it's fine for the test. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(1)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, // alive + inputs: vec![0], + }, + Instruction { + operation: Operation::DerivePoint, // dead + inputs: vec![0], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![1], + }, + ], + }; + let expected = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(1)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![0], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![1], + }, + ], + }; + assert!(DeadCodeEliminator.minimize(&mut program)); + assert_eq!(program, expected); +} + +#[test] +fn dead_code_idempotent() { + let mut once = program_with_dead_load(); + DeadCodeEliminator.minimize(&mut once); + let mut twice = once.clone(); + assert!( + !DeadCodeEliminator.minimize(&mut twice), + "second pass must report unchanged" + ); + assert_eq!(once, twice, "elimination is idempotent"); +} + +// -- CommonSubexpressionEliminator tests -- + +#[test] +fn cse_returns_false_on_empty_program() { + let mut program = Program { + instructions: vec![], + }; + assert!(!CommonSubexpressionEliminator.minimize(&mut program)); + assert!(program.instructions.is_empty()); +} + +#[test] +fn cse_rewires_references() { + // A downstream DerivePoint consumes the duplicate load. After CSE, its + // input must be rewired from the dropped duplicate (index 1) to the + // surviving canonical load (index 0). + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(7)), + inputs: vec![], + }, + Instruction { + operation: Operation::LoadPrivateKey(key(7)), // duplicate of index 0 + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![1], // must be rewired to 0 + }, + ], + }; + let expected = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(7)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![0], + }, + ], + }; + assert!(CommonSubexpressionEliminator.minimize(&mut program)); + assert_eq!(program, expected); + program.validate().expect("program should still validate"); +} + +#[test] +fn cse_result_validates() { + let mut program = generate_program(0); + CommonSubexpressionEliminator.minimize(&mut program); + program.validate().expect("merged program should validate"); +} + +#[test] +fn cse_idempotent() { + let mut once = generate_program(0); + CommonSubexpressionEliminator.minimize(&mut once); + let mut twice = once.clone(); + assert!( + !CommonSubexpressionEliminator.minimize(&mut twice), + "second pass must report unchanged" + ); + assert_eq!(once, twice, "merging is idempotent"); +} + +#[test] +fn cse_merges_compute_ops_through_canonicalized_inputs() { + // Two LoadPrivateKey duplicates feed two DerivePoint instructions. + // CSE first merges the loads, which canonicalizes the DerivePoint + // inputs to the same index, which in turn lets CSE merge the + // DerivePoints themselves. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadPrivateKey(key(7)), + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![0], + }, + Instruction { + operation: Operation::LoadPrivateKey(key(7)), // duplicate of 0 + inputs: vec![], + }, + Instruction { + operation: Operation::DerivePoint, + inputs: vec![2], // canonicalizes to 0 -> matches index 1 + }, + ], + }; + program.validate().expect("input program should validate"); + assert!(CommonSubexpressionEliminator.minimize(&mut program)); + assert_eq!(program.instructions.len(), 2); + assert!(matches!( + program.instructions[0].operation, + Operation::LoadPrivateKey(_) + )); + assert!(matches!( + program.instructions[1].operation, + Operation::DerivePoint + )); + assert_eq!(program.instructions[1].inputs, vec![0]); +} + +#[test] +fn cse_does_not_merge_send_message() { + // SendMessage is not pure (network side-effect): two with the same + // input must both survive. The duplicate LoadBytes upstream should + // be merged, and both SendMessages remapped to the surviving load. + // + // Note: this program is type-invalid (SendMessage expects Message, not + // Bytes), but the minimizer doesn't typecheck so it's fine for the test. + let mut program = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadBytes(vec![0xab]), + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![0], + }, + Instruction { + operation: Operation::LoadBytes(vec![0xab]), // duplicate of 0 + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![2], // canonicalizes to 0 + }, + ], + }; + let expected = Program { + instructions: vec![ + Instruction { + operation: Operation::LoadBytes(vec![0xab]), + inputs: vec![], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![0], + }, + Instruction { + operation: Operation::SendMessage, + inputs: vec![0], + }, + ], + }; + assert!(CommonSubexpressionEliminator.minimize(&mut program)); + assert_eq!(program, expected, "SendMessage must not be deduplicated"); +}