diff --git a/smite-nyx-sys/src/jvm-crash-handler.c b/smite-nyx-sys/src/jvm-crash-handler.c index 5c4fbde..272c8a4 100644 --- a/smite-nyx-sys/src/jvm-crash-handler.c +++ b/smite-nyx-sys/src/jvm-crash-handler.c @@ -15,7 +15,9 @@ /// /// Crashes are reported for ALL exit codes (including code 0). Any exit /// triggered by a peer message is a bug, since an offline LN node cannot -/// enforce contracts on chain. +/// enforce contracts on chain. Exits that occur during node startup are not +/// reported since Eclair runs various external binaries during startup that +/// are expected to exit. /// /// Note that our fuzz scenarios send SIGTERM to shut down LN nodes when done /// running in local mode. The JVM calls exit(143) in this case, which triggers @@ -42,6 +44,13 @@ #include "nyx.h" #endif +// Must match STARTUP_COMPLETE_MARKER in smite/src/runners.rs. +#define STARTUP_COMPLETE_MARKER "/tmp/smite-startup-complete" + +static int startup_complete(void) { + return access(STARTUP_COMPLETE_MARKER, F_OK) == 0; +} + static void report_crash(const char *reason, int code) { char buf[256]; int len = snprintf(buf, sizeof(buf), "%s (code %d)\n", reason, code); @@ -62,14 +71,16 @@ static void report_crash(const char *reason, int code) { // Override exit(). The JVM routes all normal termination through exit(). void exit(int status) { - report_crash("exit", status); + if (startup_complete()) + report_crash("exit", status); syscall(SYS_exit_group, status); __builtin_unreachable(); } // Override abort(). The JVM calls abort() for crash dumps (SIGSEGV, etc.). void abort(void) { - report_crash("abort", 134); + if (startup_complete()) + report_crash("abort", 134); syscall(SYS_exit_group, 134); __builtin_unreachable(); } diff --git a/smite-scenarios/src/targets/eclair.rs b/smite-scenarios/src/targets/eclair.rs index e4c1931..8a0a561 100644 --- a/smite-scenarios/src/targets/eclair.rs +++ b/smite-scenarios/src/targets/eclair.rs @@ -118,11 +118,14 @@ impl EclairTarget { Self::write_config(config, &eclair_dir)?; let mut cmd = Command::new("eclair-node.sh"); - // Skip java_version_check() in eclair-node.sh. It runs `java -version`, - // which inherits our crash handler wrapper and could trigger a false - // crash report on exit(). - cmd.arg("-no-version-check") - .arg(format!("-Declair.datadir={}", eclair_dir.display())) + + // LD_PRELOAD the crash handler to report crashes immediately (before + // process teardown closes TCP sockets). + if let Ok(handler) = std::env::var("SMITE_CRASH_HANDLER") { + cmd.env("LD_PRELOAD", handler); + } + + cmd.arg(format!("-Declair.datadir={}", eclair_dir.display())) .stdout(Stdio::null()) .stderr(Stdio::null()); diff --git a/smite/src/runners.rs b/smite/src/runners.rs index 33411ca..23d9ecb 100644 --- a/smite/src/runners.rs +++ b/smite/src/runners.rs @@ -10,6 +10,15 @@ #[cfg(feature = "nyx")] use smite_nyx_sys::{nyx_fail, nyx_get_fuzz_input, nyx_init, nyx_release, nyx_skip}; +/// Marker file created right before the first fuzz input is delivered, so +/// crash handlers can filter out expected subprocess exits that occur during +/// node startup. +const STARTUP_COMPLETE_MARKER: &str = "/tmp/smite-startup-complete"; + +fn create_startup_complete_marker() { + let _ = std::fs::File::create(STARTUP_COMPLETE_MARKER); +} + /// `Runner` provides an abstraction for a smite test case runner (e.g. run under nyx, /// local system for reproduction, etc.) pub trait Runner { @@ -36,6 +45,9 @@ impl Runner for LocalRunner { fn get_fuzz_input(&self) -> Vec { use std::io::Read; + + create_startup_complete_marker(); + if let Ok(path) = std::env::var("SMITE_INPUT") { log::info!("Reading input from {path:?}"); std::fs::read(&path).unwrap_or_default() @@ -74,6 +86,8 @@ impl Runner for NyxRunner { } fn get_fuzz_input(&self) -> Vec { + create_startup_complete_marker(); + let mut data = vec![0u8; self.max_input_size]; // SAFETY: We pass a valid pointer to an allocated buffer of exactly // max_input_size bytes. The C code will write at most max_input_size diff --git a/workloads/eclair/Dockerfile b/workloads/eclair/Dockerfile index 90f9186..4b0fd33 100644 --- a/workloads/eclair/Dockerfile +++ b/workloads/eclair/Dockerfile @@ -109,15 +109,6 @@ COPY --from=builder /smite/target/release/eclair_${SCENARIO} /eclair-scenario # Default to the local crash handler; init.sh overrides with the Nyx version. ENV SMITE_CRASH_HANDLER=/jvm-crash-handler.so -# Install a java wrapper that LD_PRELOADs the crash handler on the JVM only. -# eclair-node.sh runs shell commands (ls, grep, awk) that would inherit -# LD_PRELOAD and trigger false crash reports when they exit(0). The wrapper -# ensures only the JVM process gets the crash handler. -RUN mv "$JAVA_HOME/bin/java" "$JAVA_HOME/bin/java.real" && \ - printf '#!/bin/sh\nLD_PRELOAD=$SMITE_CRASH_HANDLER exec %s/bin/java.real "$@"\n' \ - "$JAVA_HOME" > "$JAVA_HOME/bin/java" && \ - chmod +x "$JAVA_HOME/bin/java" - # Copy init script COPY workloads/eclair/init.sh /init.sh RUN chmod +x /init.sh /eclair-scenario