From 25b65378d8e171c6c4a62adb44f5d5aaeb7ba28a Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Wed, 20 May 2026 16:29:45 -0500 Subject: [PATCH 1/4] smite: write marker file before getting first input The presence of the marker file indicates that startup of the target is complete and that fuzzing is about to begin. This will be used by the JVM crash handler to filter out exits that occur during Eclair startup. --- smite/src/runners.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/smite/src/runners.rs b/smite/src/runners.rs index 33411ca..23d9ecb 100644 --- a/smite/src/runners.rs +++ b/smite/src/runners.rs @@ -10,6 +10,15 @@ #[cfg(feature = "nyx")] use smite_nyx_sys::{nyx_fail, nyx_get_fuzz_input, nyx_init, nyx_release, nyx_skip}; +/// Marker file created right before the first fuzz input is delivered, so +/// crash handlers can filter out expected subprocess exits that occur during +/// node startup. +const STARTUP_COMPLETE_MARKER: &str = "/tmp/smite-startup-complete"; + +fn create_startup_complete_marker() { + let _ = std::fs::File::create(STARTUP_COMPLETE_MARKER); +} + /// `Runner` provides an abstraction for a smite test case runner (e.g. run under nyx, /// local system for reproduction, etc.) pub trait Runner { @@ -36,6 +45,9 @@ impl Runner for LocalRunner { fn get_fuzz_input(&self) -> Vec { use std::io::Read; + + create_startup_complete_marker(); + if let Ok(path) = std::env::var("SMITE_INPUT") { log::info!("Reading input from {path:?}"); std::fs::read(&path).unwrap_or_default() @@ -74,6 +86,8 @@ impl Runner for NyxRunner { } fn get_fuzz_input(&self) -> Vec { + create_startup_complete_marker(); + let mut data = vec![0u8; self.max_input_size]; // SAFETY: We pass a valid pointer to an allocated buffer of exactly // max_input_size bytes. The C code will write at most max_input_size From e2ce997d84839e93be1a49ee74ee1bba46662b46 Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Wed, 20 May 2026 16:33:10 -0500 Subject: [PATCH 2/4] smite-nyx-sys: ignore exits during startup Eclair runs various commands as subprocesses during node startup that are expected to call exit(0). We don't want to generate crash reports for those. --- smite-nyx-sys/src/jvm-crash-handler.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/smite-nyx-sys/src/jvm-crash-handler.c b/smite-nyx-sys/src/jvm-crash-handler.c index 5c4fbde..272c8a4 100644 --- a/smite-nyx-sys/src/jvm-crash-handler.c +++ b/smite-nyx-sys/src/jvm-crash-handler.c @@ -15,7 +15,9 @@ /// /// Crashes are reported for ALL exit codes (including code 0). Any exit /// triggered by a peer message is a bug, since an offline LN node cannot -/// enforce contracts on chain. +/// enforce contracts on chain. Exits that occur during node startup are not +/// reported since Eclair runs various external binaries during startup that +/// are expected to exit. /// /// Note that our fuzz scenarios send SIGTERM to shut down LN nodes when done /// running in local mode. The JVM calls exit(143) in this case, which triggers @@ -42,6 +44,13 @@ #include "nyx.h" #endif +// Must match STARTUP_COMPLETE_MARKER in smite/src/runners.rs. +#define STARTUP_COMPLETE_MARKER "/tmp/smite-startup-complete" + +static int startup_complete(void) { + return access(STARTUP_COMPLETE_MARKER, F_OK) == 0; +} + static void report_crash(const char *reason, int code) { char buf[256]; int len = snprintf(buf, sizeof(buf), "%s (code %d)\n", reason, code); @@ -62,14 +71,16 @@ static void report_crash(const char *reason, int code) { // Override exit(). The JVM routes all normal termination through exit(). void exit(int status) { - report_crash("exit", status); + if (startup_complete()) + report_crash("exit", status); syscall(SYS_exit_group, status); __builtin_unreachable(); } // Override abort(). The JVM calls abort() for crash dumps (SIGSEGV, etc.). void abort(void) { - report_crash("abort", 134); + if (startup_complete()) + report_crash("abort", 134); syscall(SYS_exit_group, 134); __builtin_unreachable(); } From 94e81d9a85e1dadded199d00fc1d7fa0f7554bd4 Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Wed, 20 May 2026 16:39:38 -0500 Subject: [PATCH 3/4] smite-scenarios: remove -no-version-check flag The flag was added to prevent a false-positive error report when starting Eclair and is no longer needed now that we use a marker file to detect once startup is complete. --- smite-scenarios/src/targets/eclair.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/smite-scenarios/src/targets/eclair.rs b/smite-scenarios/src/targets/eclair.rs index e4c1931..a5f626c 100644 --- a/smite-scenarios/src/targets/eclair.rs +++ b/smite-scenarios/src/targets/eclair.rs @@ -118,11 +118,7 @@ impl EclairTarget { Self::write_config(config, &eclair_dir)?; let mut cmd = Command::new("eclair-node.sh"); - // Skip java_version_check() in eclair-node.sh. It runs `java -version`, - // which inherits our crash handler wrapper and could trigger a false - // crash report on exit(). - cmd.arg("-no-version-check") - .arg(format!("-Declair.datadir={}", eclair_dir.display())) + cmd.arg(format!("-Declair.datadir={}", eclair_dir.display())) .stdout(Stdio::null()) .stderr(Stdio::null()); From 9037627aeb8449ee299ec637b593b50bb0901aca Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Wed, 20 May 2026 17:04:45 -0500 Subject: [PATCH 4/4] workloads: remove java binary wrapper Now that we filter out exits during node startup, we don't need to have a hacky java wrapper to limit the scope of our crash handler. --- smite-scenarios/src/targets/eclair.rs | 7 +++++++ workloads/eclair/Dockerfile | 9 --------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/smite-scenarios/src/targets/eclair.rs b/smite-scenarios/src/targets/eclair.rs index a5f626c..8a0a561 100644 --- a/smite-scenarios/src/targets/eclair.rs +++ b/smite-scenarios/src/targets/eclair.rs @@ -118,6 +118,13 @@ impl EclairTarget { Self::write_config(config, &eclair_dir)?; let mut cmd = Command::new("eclair-node.sh"); + + // LD_PRELOAD the crash handler to report crashes immediately (before + // process teardown closes TCP sockets). + if let Ok(handler) = std::env::var("SMITE_CRASH_HANDLER") { + cmd.env("LD_PRELOAD", handler); + } + cmd.arg(format!("-Declair.datadir={}", eclair_dir.display())) .stdout(Stdio::null()) .stderr(Stdio::null()); diff --git a/workloads/eclair/Dockerfile b/workloads/eclair/Dockerfile index 90f9186..4b0fd33 100644 --- a/workloads/eclair/Dockerfile +++ b/workloads/eclair/Dockerfile @@ -109,15 +109,6 @@ COPY --from=builder /smite/target/release/eclair_${SCENARIO} /eclair-scenario # Default to the local crash handler; init.sh overrides with the Nyx version. ENV SMITE_CRASH_HANDLER=/jvm-crash-handler.so -# Install a java wrapper that LD_PRELOADs the crash handler on the JVM only. -# eclair-node.sh runs shell commands (ls, grep, awk) that would inherit -# LD_PRELOAD and trigger false crash reports when they exit(0). The wrapper -# ensures only the JVM process gets the crash handler. -RUN mv "$JAVA_HOME/bin/java" "$JAVA_HOME/bin/java.real" && \ - printf '#!/bin/sh\nLD_PRELOAD=$SMITE_CRASH_HANDLER exec %s/bin/java.real "$@"\n' \ - "$JAVA_HOME" > "$JAVA_HOME/bin/java" && \ - chmod +x "$JAVA_HOME/bin/java" - # Copy init script COPY workloads/eclair/init.sh /init.sh RUN chmod +x /init.sh /eclair-scenario