diff --git a/.github/workflows/backend-regression.yml b/.github/workflows/backend-regression.yml new file mode 100644 index 00000000..d0c7efe7 --- /dev/null +++ b/.github/workflows/backend-regression.yml @@ -0,0 +1,42 @@ +name: Backend performance regression + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + backend-regression: + name: Backend regression (${{ matrix.backend.name }}) + runs-on: ubuntu-latest + permissions: + contents: read + strategy: + fail-fast: false + matrix: + backend: + - name: rust_backend + command: cargo test --release --test backend-regression-bench -- --ignored --nocapture + - name: zlib-rs + command: cargo test --release --test backend-regression-bench --features zlib-rs --no-default-features -- --ignored --nocapture + - name: zlib + command: cargo test --release --test backend-regression-bench --features zlib --no-default-features -- --ignored --nocapture + - name: zlib-ng + command: cargo test --release --test backend-regression-bench --features zlib-ng --no-default-features -- --ignored --nocapture + - name: zlib-ng-compat + command: cargo test --release --test backend-regression-bench --features zlib-ng-compat --no-default-features -- --ignored --nocapture + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Rust + run: rustup update stable --no-self-update && rustup default stable + shell: bash + - name: Run backend regression benchmark + run: ${{ matrix.backend.command }} + - name: Upload backend benchmark CSV + if: always() + uses: actions/upload-artifact@v4 + with: + name: backend-regression-${{ matrix.backend.name }} + path: target/backend-bench/${{ matrix.backend.name }}*.csv diff --git a/Cargo.toml b/Cargo.toml index 6d413511..bfeeea31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ exclude = [".*"] libz-sys = { version = "1.1.20", optional = true, default-features = false } libz-ng-sys = { version = "1.1.16", optional = true } # this matches the default features, but we don't want to depend on the default features staying the same -zlib-rs = { version = "0.6.0", optional = true, default-features = false, features = ["std", "rust-allocator"] } +zlib-rs = { version = "0.6.0", optional = true, default-features = false, features = ["std", "rust-allocator", "__internal-api"] } ## This implementation uses only safe Rust code and doesn't require a C compiler. ## It provides good performance for most use cases while being completely portable. miniz_oxide = { version = "0.9.0", optional = true, features = ["simd"] } diff --git a/src/ffi/zlib_rs.rs b/src/ffi/zlib_rs.rs index f5524bbe..fd928fd8 100644 --- a/src/ffi/zlib_rs.rs +++ b/src/ffi/zlib_rs.rs @@ -17,9 +17,14 @@ //! With zlib_rs the state is not self-referential and hence no boxing is needed. The `new` methods //! internally do allocate space for the (de)compression state. -use std::fmt; +use std::{ffi::CStr, fmt, mem::MaybeUninit}; -use ::zlib_rs::{DeflateFlush, InflateError, InflateFlush}; +use ::zlib_rs::{ + c_api::z_stream, + deflate::{self, DeflateStream}, + inflate::{self, InflateStream}, + DeflateConfig, DeflateFlush, InflateConfig, InflateFlush, ReturnCode, +}; pub const MZ_NO_FLUSH: isize = DeflateFlush::NoFlush as isize; pub const MZ_PARTIAL_FLUSH: isize = DeflateFlush::PartialFlush as isize; @@ -32,16 +37,6 @@ pub const MZ_DEFAULT_WINDOW_BITS: core::ffi::c_int = 15; use super::*; use crate::mem::{compress_failed, decompress_failed}; -impl From<::zlib_rs::Status> for crate::mem::Status { - fn from(value: ::zlib_rs::Status) -> Self { - match value { - ::zlib_rs::Status::Ok => crate::mem::Status::Ok, - ::zlib_rs::Status::BufError => crate::mem::Status::BufError, - ::zlib_rs::Status::StreamEnd => crate::mem::Status::StreamEnd, - } - } -} - #[derive(Clone, Default)] pub struct ErrorMessage(Option<&'static str>); @@ -52,12 +47,15 @@ impl ErrorMessage { } pub struct Inflate { - pub(crate) inner: ::zlib_rs::Inflate, + pub(crate) inner: z_stream, // NOTE: these counts do not count the dictionary. total_in: u64, total_out: u64, } +unsafe impl Send for Inflate {} +unsafe impl Sync for Inflate {} + impl fmt::Debug for Inflate { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( @@ -69,20 +67,18 @@ impl fmt::Debug for Inflate { } } -impl From for DeflateFlush { - fn from(value: FlushDecompress) -> Self { - match value { - FlushDecompress::None => Self::NoFlush, - FlushDecompress::Sync => Self::SyncFlush, - FlushDecompress::Finish => Self::Finish, - } - } -} - impl InflateBackend for Inflate { fn make(zlib_header: bool, window_bits: u8) -> Self { + let config = InflateConfig { + window_bits: if zlib_header { + i32::from(window_bits) + } else { + -i32::from(window_bits) + }, + }; + Inflate { - inner: ::zlib_rs::Inflate::new(zlib_header, window_bits), + inner: stream_with_inflate_config(config), total_in: 0, total_out: 0, } @@ -94,31 +90,28 @@ impl InflateBackend for Inflate { output: &mut [u8], flush: FlushDecompress, ) -> Result { - let flush = match flush { - FlushDecompress::None => InflateFlush::NoFlush, - FlushDecompress::Sync => InflateFlush::SyncFlush, - FlushDecompress::Finish => InflateFlush::Finish, - }; - - let total_in_start = self.inner.total_in(); - let total_out_start = self.inner.total_out(); - - let result = self.inner.decompress(input, output, flush); - - self.total_in += self.inner.total_in() - total_in_start; - self.total_out += self.inner.total_out() - total_out_start; + self.decompress_impl(input, output.as_mut_ptr(), output.len(), flush) + } - match result { - Ok(status) => Ok(status.into()), - Err(InflateError::NeedDict { dict_id }) => crate::mem::decompress_need_dict(dict_id), - Err(_) => self.decompress_error(), - } + fn decompress_uninit( + &mut self, + input: &[u8], + output: &mut [MaybeUninit], + flush: FlushDecompress, + ) -> Result { + self.decompress_impl(input, output.as_mut_ptr().cast::(), output.len(), flush) } fn reset(&mut self, zlib_header: bool) { self.total_in = 0; self.total_out = 0; - self.inner.reset(zlib_header); + + let mut config = InflateConfig::default(); + if !zlib_header { + config.window_bits = -config.window_bits; + } + + let _ = inflate::reset_with_config(self.stream_mut(), config); } } @@ -135,25 +128,84 @@ impl Backend for Inflate { } impl Inflate { + fn decompress_impl( + &mut self, + input: &[u8], + output_ptr: *mut u8, + output_len: usize, + flush: FlushDecompress, + ) -> Result { + let flush = match flush { + FlushDecompress::None => InflateFlush::NoFlush, + FlushDecompress::Sync => InflateFlush::SyncFlush, + FlushDecompress::Finish => InflateFlush::Finish, + }; + + let total_in_start = self.inner.total_in; + let total_out_start = self.inner.total_out; + + self.inner.avail_in = Ord::min(input.len(), u32::MAX as usize) as u32; + self.inner.avail_out = Ord::min(output_len, u32::MAX as usize) as u32; + self.inner.next_in = input.as_ptr(); + self.inner.next_out = output_ptr; + + let result = unsafe { inflate::inflate(self.stream_mut(), flush) }; + + self.accumulate_totals(total_in_start, total_out_start); + + match result { + ReturnCode::Ok => Ok(Status::Ok), + ReturnCode::StreamEnd => Ok(Status::StreamEnd), + ReturnCode::BufError => Ok(Status::BufError), + ReturnCode::NeedDict => crate::mem::decompress_need_dict(self.inner.adler as u32), + ReturnCode::ErrNo | ReturnCode::VersionError => unreachable!(), + ReturnCode::StreamError | ReturnCode::DataError | ReturnCode::MemError => { + self.decompress_error() + } + } + } + + fn accumulate_totals(&mut self, total_in_start: u64, total_out_start: u64) { + self.total_in += self.inner.total_in - total_in_start; + self.total_out += self.inner.total_out - total_out_start; + } + + fn stream_mut(&mut self) -> &mut InflateStream<'static> { + unsafe { InflateStream::from_stream_mut(&mut self.inner) } + .expect("zlib-rs inflate stream is initialized") + } + fn decompress_error(&self) -> Result { - decompress_failed(ErrorMessage(self.inner.error_message())) + decompress_failed(ErrorMessage(error_message(self.inner.msg))) } pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - match self.inner.set_dictionary(dictionary) { - Ok(v) => Ok(v), - Err(_) => self.decompress_error(), + match inflate::set_dictionary(self.stream_mut(), dictionary) { + ReturnCode::Ok => Ok(self.inner.adler as u32), + ReturnCode::StreamError | ReturnCode::DataError => self.decompress_error(), + _other => unreachable!("set_dictionary does not return {:?}", _other), + } + } +} + +impl Drop for Inflate { + fn drop(&mut self) { + if let Some(stream) = unsafe { InflateStream::from_stream_mut(&mut self.inner) } { + let _ = inflate::end(stream); } } } pub struct Deflate { - pub(crate) inner: ::zlib_rs::Deflate, + pub(crate) inner: z_stream, // NOTE: these counts do not count the dictionary. total_in: u64, total_out: u64, } +unsafe impl Send for Deflate {} +unsafe impl Sync for Deflate {} + impl fmt::Debug for Deflate { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( @@ -167,11 +219,20 @@ impl fmt::Debug for Deflate { impl DeflateBackend for Deflate { fn make(level: Compression, zlib_header: bool, window_bits: u8) -> Self { - // Check in case the integer value changes at some point. debug_assert!(level.level() <= 9); + let config = DeflateConfig { + window_bits: if zlib_header { + i32::from(window_bits) + } else { + -i32::from(window_bits) + }, + level: level.level() as i32, + ..DeflateConfig::default() + }; + Deflate { - inner: ::zlib_rs::Deflate::new(level.level() as i32, zlib_header, window_bits), + inner: stream_with_deflate_config(config), total_in: 0, total_out: 0, } @@ -183,32 +244,22 @@ impl DeflateBackend for Deflate { output: &mut [u8], flush: FlushCompress, ) -> Result { - let flush = match flush { - FlushCompress::None => DeflateFlush::NoFlush, - FlushCompress::Partial => DeflateFlush::PartialFlush, - FlushCompress::Sync => DeflateFlush::SyncFlush, - FlushCompress::Full => DeflateFlush::FullFlush, - FlushCompress::Finish => DeflateFlush::Finish, - }; - - let total_in_start = self.inner.total_in(); - let total_out_start = self.inner.total_out(); - - let result = self.inner.compress(input, output, flush); - - self.total_in += self.inner.total_in() - total_in_start; - self.total_out += self.inner.total_out() - total_out_start; + self.compress_impl(input, output.as_mut_ptr(), output.len(), flush) + } - match result { - Ok(status) => Ok(status.into()), - Err(_) => self.compress_error(), - } + fn compress_uninit( + &mut self, + input: &[u8], + output: &mut [MaybeUninit], + flush: FlushCompress, + ) -> Result { + self.compress_impl(input, output.as_mut_ptr().cast::(), output.len(), flush) } fn reset(&mut self) { self.total_in = 0; self.total_out = 0; - self.inner.reset(); + let _ = deflate::reset(self.stream_mut()); } } @@ -225,29 +276,102 @@ impl Backend for Deflate { } impl Deflate { + fn compress_impl( + &mut self, + input: &[u8], + output_ptr: *mut u8, + output_len: usize, + flush: FlushCompress, + ) -> Result { + let flush = match flush { + FlushCompress::None => DeflateFlush::NoFlush, + FlushCompress::Partial => DeflateFlush::PartialFlush, + FlushCompress::Sync => DeflateFlush::SyncFlush, + FlushCompress::Full => DeflateFlush::FullFlush, + FlushCompress::Finish => DeflateFlush::Finish, + }; + + let total_in_start = self.inner.total_in; + let total_out_start = self.inner.total_out; + + self.inner.avail_in = Ord::min(input.len(), u32::MAX as usize) as u32; + self.inner.avail_out = Ord::min(output_len, u32::MAX as usize) as u32; + self.inner.next_in = input.as_ptr(); + self.inner.next_out = output_ptr; + + let result = deflate::deflate(self.stream_mut(), flush); + + self.accumulate_totals(total_in_start, total_out_start); + + match result { + ReturnCode::Ok => Ok(Status::Ok), + ReturnCode::StreamEnd => Ok(Status::StreamEnd), + ReturnCode::BufError => Ok(Status::BufError), + ReturnCode::NeedDict | ReturnCode::ErrNo | ReturnCode::VersionError => unreachable!(), + ReturnCode::StreamError | ReturnCode::DataError | ReturnCode::MemError => { + self.compress_error() + } + } + } + + fn accumulate_totals(&mut self, total_in_start: u64, total_out_start: u64) { + self.total_in += self.inner.total_in - total_in_start; + self.total_out += self.inner.total_out - total_out_start; + } + + fn stream_mut(&mut self) -> &mut DeflateStream<'static> { + unsafe { DeflateStream::from_stream_mut(&mut self.inner) } + .expect("zlib-rs deflate stream is initialized") + } + fn compress_error(&self) -> Result { - compress_failed(ErrorMessage(self.inner.error_message())) + compress_failed(ErrorMessage(error_message(self.inner.msg))) } pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - match self.inner.set_dictionary(dictionary) { - Ok(v) => Ok(v), - Err(_) => self.compress_error(), + match deflate::set_dictionary(self.stream_mut(), dictionary) { + ReturnCode::Ok => Ok(self.inner.adler as u32), + ReturnCode::StreamError => self.compress_error(), + _other => unreachable!("set_dictionary does not return {:?}", _other), } } pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> { - use ::zlib_rs::Status; - - match self.inner.set_level(level.level() as i32) { - Ok(status) => match status { - Status::Ok => Ok(()), - Status::BufError => compress_failed(ErrorMessage(Some("insufficient space"))), - Status::StreamEnd => { - unreachable!("zlib-rs is known to never return the StreamEnd status") - } - }, - Err(_) => self.compress_error(), + match deflate::params(self.stream_mut(), level.level() as i32, Default::default()) { + ReturnCode::Ok => Ok(()), + ReturnCode::BufError => compress_failed(ErrorMessage(Some("insufficient space"))), + ReturnCode::StreamError => self.compress_error(), + _other => unreachable!("set_level does not return {:?}", _other), + } + } +} + +impl Drop for Deflate { + fn drop(&mut self) { + if let Some(stream) = unsafe { DeflateStream::from_stream_mut(&mut self.inner) } { + let _ = deflate::end(stream); } } } + +fn error_message(msg: *const core::ffi::c_char) -> Option<&'static str> { + if msg.is_null() { + None + } else { + unsafe { CStr::from_ptr(msg).to_str() }.ok() + } +} + +fn stream_with_inflate_config(config: InflateConfig) -> z_stream { + let mut stream = z_stream::default(); + let result = inflate::init(&mut stream, config); + assert_eq!(result, ReturnCode::Ok); + stream +} + +fn stream_with_deflate_config(config: DeflateConfig) -> z_stream { + let mut stream = z_stream::default(); + let result = deflate::init(&mut stream, config); + assert_eq!(result, ReturnCode::Ok); + stream +} diff --git a/tests/backend-regression-bench.rs b/tests/backend-regression-bench.rs new file mode 100644 index 00000000..71486ca6 --- /dev/null +++ b/tests/backend-regression-bench.rs @@ -0,0 +1,660 @@ +use std::env; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{self, Command, Output, Stdio}; +use std::time::{SystemTime, UNIX_EPOCH}; + +const KNOWN_GOOD_COMMIT: &str = "b9afa93d70e19a213a3594190e31fb39d83aba0e"; +const KNOWN_GOOD_COMMIT_ENV: &str = "FLATE2_BACKEND_BENCH_KNOWN_GOOD_COMMIT"; +const DRIVER_LABEL_ENV: &str = "FLATE2_BACKEND_BENCH_LABEL"; +const DRIVER_COMPARE_UNINIT_CFG: &str = "--cfg flate2_compare_uninit_cases"; +const DRIVER_UNINIT_CFG: &str = "--cfg flate2_has_uninit_api"; +const MIN_MEASUREMENT_SLACK_FACTOR: f64 = 0.10; + +#[derive(Clone)] +struct MeasurementRecord { + backend: String, + case: String, + iterations_per_sample: usize, + samples: usize, + ns_per_byte: f64, + measurement_uncertainty: f64, +} + +struct BenchmarkResult { + case: String, + iterations_per_sample: usize, + samples: usize, + ns_per_byte: f64, + measurement_uncertainty: f64, + baseline_iterations_per_sample: usize, + baseline_samples: usize, + baseline_ns_per_byte: f64, + baseline_measurement_uncertainty: f64, +} + +#[derive(Clone, Copy)] +struct BackendConfig { + name: &'static str, + driver_feature: &'static str, + compare_uninit_against_legacy_baseline: bool, +} + +fn known_good_commit() -> String { + env::var(KNOWN_GOOD_COMMIT_ENV).unwrap_or_else(|_| KNOWN_GOOD_COMMIT.to_owned()) +} + +fn parse_measurement_record(line: &str) -> MeasurementRecord { + let mut fields = line.split(','); + let backend = fields + .next() + .expect("missing backend field in benchmark CSV") + .trim() + .to_owned(); + let case = fields + .next() + .expect("missing case field in benchmark CSV") + .trim() + .to_owned(); + let iterations_per_sample = fields + .next() + .expect("missing iterations_per_sample field in benchmark CSV") + .trim() + .parse() + .expect("invalid iterations_per_sample field in benchmark CSV"); + let samples = fields + .next() + .expect("missing samples field in benchmark CSV") + .trim() + .parse() + .expect("invalid samples field in benchmark CSV"); + let ns_per_byte = fields + .next() + .expect("missing ns_per_byte field in benchmark CSV") + .trim() + .parse() + .expect("invalid ns_per_byte field in benchmark CSV"); + let measurement_uncertainty = fields + .next() + .expect("missing measurement_uncertainty field in benchmark CSV") + .trim() + .parse() + .expect("invalid measurement_uncertainty field in benchmark CSV"); + assert!( + fields.next().is_none(), + "unexpected trailing benchmark CSV fields" + ); + MeasurementRecord { + backend, + case, + iterations_per_sample, + samples, + ns_per_byte, + measurement_uncertainty, + } +} + +fn merge_measurements( + backend: &str, + current: &[MeasurementRecord], + baseline: &[MeasurementRecord], +) -> Vec { + current + .iter() + .map(|current| { + let baseline = baseline + .iter() + .find(|baseline| baseline.backend == backend && baseline.case == current.case) + .unwrap_or_else(|| { + panic!( + "missing baseline for backend={backend}, case={}", + current.case + ) + }); + BenchmarkResult { + case: current.case.clone(), + iterations_per_sample: current.iterations_per_sample, + samples: current.samples, + ns_per_byte: current.ns_per_byte, + measurement_uncertainty: current.measurement_uncertainty, + baseline_iterations_per_sample: baseline.iterations_per_sample, + baseline_samples: baseline.samples, + baseline_ns_per_byte: baseline.ns_per_byte, + baseline_measurement_uncertainty: baseline.measurement_uncertainty, + } + }) + .collect() +} + +fn allowed_slowdown_factor(result: &BenchmarkResult) -> f64 { + 1.0 + measurement_slack_factor(result) +} + +fn allowed_ns_per_byte(result: &BenchmarkResult) -> f64 { + result.baseline_ns_per_byte * allowed_slowdown_factor(result) +} + +fn slowdown_factor(result: &BenchmarkResult) -> f64 { + result.ns_per_byte / result.baseline_ns_per_byte +} + +fn measurement_slack_factor(result: &BenchmarkResult) -> f64 { + (result.measurement_uncertainty + result.baseline_measurement_uncertainty) + .max(MIN_MEASUREMENT_SLACK_FACTOR) +} + +fn failure_summary(result: &BenchmarkResult) -> String { + format!( + "{}: {:.2}x slowdown of {:.2}x allowed, measured {:.3} ns/byte, baseline {:.3} ns/byte", + result.case, + slowdown_factor(result), + allowed_slowdown_factor(result), + result.ns_per_byte, + result.baseline_ns_per_byte, + ) +} + +fn status_for(result: &BenchmarkResult) -> &'static str { + if result.ns_per_byte <= allowed_ns_per_byte(result) { + "pass" + } else { + "fail" + } +} + +fn results_dir() -> PathBuf { + let target_dir = env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")).join("target")); + target_dir.join("backend-bench") +} + +fn results_csv_path(backend: &str) -> PathBuf { + results_dir().join(format!("{backend}.csv")) +} + +fn repo_relative_display_path(path: &Path) -> String { + path.strip_prefix(Path::new(env!("CARGO_MANIFEST_DIR"))) + .unwrap_or(path) + .display() + .to_string() +} + +fn write_results_csv(backend: &str, results: &[BenchmarkResult]) -> PathBuf { + let dir = results_dir(); + fs::create_dir_all(&dir).unwrap(); + + let mut csv = String::from( + "backend,case,iterations_per_sample,samples,ns_per_byte,measurement_uncertainty,baseline_iterations_per_sample,baseline_samples,baseline_ns_per_byte,baseline_measurement_uncertainty,allowed_slowdown_factor,allowed_ns_per_byte,slowdown_factor,measurement_slack_factor,status\n", + ); + for result in results { + csv.push_str(&format!( + "{backend},{},{},{},{:.9},{:.6},{},{},{:.9},{:.6},{:.6},{:.9},{:.6},{:.6},{}\n", + result.case, + result.iterations_per_sample, + result.samples, + result.ns_per_byte, + result.measurement_uncertainty, + result.baseline_iterations_per_sample, + result.baseline_samples, + result.baseline_ns_per_byte, + result.baseline_measurement_uncertainty, + allowed_slowdown_factor(result), + allowed_ns_per_byte(result), + slowdown_factor(result), + measurement_slack_factor(result), + status_for(result), + )); + } + + let path = results_csv_path(backend); + fs::write(&path, csv).unwrap(); + path +} + +fn write_measurement_csv(path: &Path, records: &[MeasurementRecord], comment: Option<&str>) { + let parent = path + .parent() + .expect("measurement CSV output must have a parent directory"); + fs::create_dir_all(parent).unwrap(); + + let mut csv = String::new(); + if let Some(comment) = comment { + csv.push('#'); + csv.push(' '); + csv.push_str(comment); + csv.push('\n'); + } + csv.push_str( + "backend,case,iterations_per_sample,samples,ns_per_byte,measurement_uncertainty\n", + ); + for record in records { + csv.push_str(&format!( + "{},{},{},{},{:.9},{:.6}\n", + record.backend, + record.case, + record.iterations_per_sample, + record.samples, + record.ns_per_byte, + record.measurement_uncertainty, + )); + } + + fs::write(path, csv).unwrap(); +} + +fn status(message: impl AsRef) { + eprintln!("[backend-regression] {}", message.as_ref()); +} + +fn unique_temp_dir(backend: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + env::temp_dir().join(format!( + "flate2-backend-regression-{backend}-{}-{nanos}", + process::id() + )) +} + +fn checked_output(command: &mut Command, context: &str) -> Output { + let output = command + .output() + .unwrap_or_else(|err| panic!("failed to {}: {}", context, err)); + assert!( + output.status.success(), + "failed to {context} (status {}):\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + output +} + +fn checked_output_with_inherited_stderr(command: &mut Command, context: &str) -> Output { + let child = command + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .unwrap_or_else(|err| panic!("failed to {}: {}", context, err)); + let output = child + .wait_with_output() + .unwrap_or_else(|err| panic!("failed to {}: {}", context, err)); + assert!( + output.status.success(), + "failed to {context} (status {}):\nstdout:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + ); + output +} + +struct KnownGoodWorktree { + repo_root: PathBuf, + path: PathBuf, +} + +impl KnownGoodWorktree { + fn create(repo_root: &Path, commit: &str, path: &Path) -> Self { + checked_output( + Command::new("git") + .arg("-C") + .arg(repo_root) + .arg("cat-file") + .arg("-e") + .arg(format!("{commit}^{{commit}}")), + &format!( + "verify known-good commit {commit} is available locally; fetch more history if needed" + ), + ); + checked_output( + Command::new("git") + .arg("-C") + .arg(repo_root) + .arg("worktree") + .arg("add") + .arg("--detach") + .arg("--force") + .arg(path) + .arg(commit), + &format!("create worktree for known-good commit {commit}"), + ); + Self { + repo_root: repo_root.to_path_buf(), + path: path.to_path_buf(), + } + } +} + +impl Drop for KnownGoodWorktree { + fn drop(&mut self) { + if let Err(err) = Command::new("git") + .arg("-C") + .arg(&self.repo_root) + .arg("worktree") + .arg("remove") + .arg("--force") + .arg(&self.path) + .output() + { + eprintln!( + "failed to remove temporary worktree {}: {}", + self.path.display(), + err + ); + } + } +} + +fn escaped_toml_path(path: &Path) -> String { + path.display().to_string().replace('\\', "\\\\") +} + +fn driver_manifest(crate_path: &Path) -> String { + format!( + r#"[package] +name = "flate2-backend-driver" +version = "0.1.0" +edition = "2021" + +[dependencies] +flate2 = {{ path = "{}", default-features = false }} + +[features] +default = [] +"rust_backend" = ["flate2/rust_backend"] +"zlib-rs" = ["flate2/zlib-rs"] +"zlib" = ["flate2/zlib"] +"zlib-ng" = ["flate2/zlib-ng"] +"zlib-ng-compat" = ["flate2/zlib-ng-compat"] +"#, + escaped_toml_path(crate_path) + ) +} + +fn driver_source_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("support") + .join("backend-regression-driver.rs") +} + +fn crate_has_uninit_api(crate_path: &Path) -> bool { + fs::read_to_string(crate_path.join("src").join("mem.rs")) + .map(|mem_rs| { + mem_rs.contains("pub fn compress_uninit(") + && mem_rs.contains("pub fn decompress_uninit(") + }) + .unwrap_or(false) +} + +fn commit_has_uninit_api(repo_root: &Path, commit: &str) -> bool { + Command::new("git") + .arg("-C") + .arg(repo_root) + .arg("show") + .arg(format!("{commit}:src/mem.rs")) + .output() + .ok() + .filter(|output| output.status.success()) + .and_then(|output| String::from_utf8(output.stdout).ok()) + .map(|mem_rs| { + mem_rs.contains("pub fn compress_uninit(") + && mem_rs.contains("pub fn decompress_uninit(") + }) + .unwrap_or(false) +} + +fn driver_rustflags(include_uninit_cases: bool, has_uninit_api: bool) -> Option { + if !include_uninit_cases && !has_uninit_api { + return None; + } + + let mut extra_flags = Vec::new(); + if include_uninit_cases { + extra_flags.push(DRIVER_COMPARE_UNINIT_CFG); + } + if has_uninit_api { + extra_flags.push(DRIVER_UNINIT_CFG); + } + let extra_flags = extra_flags.join(" "); + + match env::var("RUSTFLAGS") { + Ok(existing) if !existing.trim().is_empty() => Some(format!("{existing} {extra_flags}")), + _ => Some(extra_flags), + } +} + +fn run_driver( + backend: BackendConfig, + crate_path: &Path, + driver_path: &Path, + cargo_target_dir: &Path, + label: &str, + compare_uninit: bool, + context: &str, +) -> Vec { + fs::create_dir_all(driver_path.join("src")).unwrap(); + fs::write(driver_path.join("Cargo.toml"), driver_manifest(crate_path)).unwrap(); + fs::copy( + driver_source_path(), + driver_path.join("src").join("main.rs"), + ) + .unwrap(); + + let mut command = Command::new("cargo"); + command + .arg("run") + .arg("--release") + .arg("--features") + .arg(backend.driver_feature) + .current_dir(driver_path) + .env("CARGO_TARGET_DIR", cargo_target_dir) + .env(DRIVER_LABEL_ENV, label); + if let Some(rustflags) = driver_rustflags(compare_uninit, crate_has_uninit_api(crate_path)) { + command.env("RUSTFLAGS", rustflags); + } + + let output = checked_output_with_inherited_stderr(&mut command, context); + + String::from_utf8(output.stdout) + .expect("driver output must be valid UTF-8") + .lines() + .skip(1) + .filter(|line| !line.trim().is_empty()) + .map(parse_measurement_record) + .collect() +} + +fn measure_current( + backend: BackendConfig, + temp_root: &Path, + compare_uninit: bool, +) -> Vec { + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")); + status(format!( + "running driver for current {} checkout at {}", + backend.name, + repo_root.display() + )); + run_driver( + backend, + repo_root, + &temp_root.join("current-driver"), + &temp_root.join("current-target"), + "current checkout", + compare_uninit, + &format!("run current driver for {}", backend.name), + ) +} + +fn measure_known_good( + backend: BackendConfig, + temp_root: &Path, + commit: &str, + compare_uninit: bool, +) -> Vec { + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")); + let worktree_path = temp_root.join("known-good"); + status(format!( + "creating detached worktree for known-good commit {} at {}", + commit, + worktree_path.display() + )); + let _worktree = KnownGoodWorktree::create(repo_root, commit, &worktree_path); + status(format!( + "running driver for {} at known-good commit {}", + backend.name, commit + )); + run_driver( + backend, + &worktree_path, + &temp_root.join("baseline-driver"), + &temp_root.join("baseline-target"), + &format!("known-good commit {commit}"), + compare_uninit, + &format!( + "run baseline driver for {} at commit {commit}", + backend.name + ), + ) +} + +#[cfg(feature = "zlib-ng")] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib-ng", + driver_feature: "zlib-ng", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all(not(feature = "zlib-ng"), feature = "zlib-ng-compat"))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib-ng-compat", + driver_feature: "zlib-ng-compat", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + feature = "zlib-rs" +))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib-rs", + driver_feature: "zlib-rs", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + any( + feature = "zlib", + feature = "zlib-default", + feature = "cloudflare_zlib" + ) +))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib", + driver_feature: "zlib", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + not(feature = "zlib"), + not(feature = "zlib-default"), + not(feature = "cloudflare_zlib") +))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "rust_backend", + driver_feature: "rust_backend", + compare_uninit_against_legacy_baseline: false, + } +} + +#[test] +#[ignore] +fn backend_regression_bench() { + let backend = backend_config(); + let commit = known_good_commit(); + let temp_root = unique_temp_dir(backend.name); + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")); + let compare_uninit = crate_has_uninit_api(repo_root) + && (commit_has_uninit_api(repo_root, &commit) + || backend.compare_uninit_against_legacy_baseline); + + status(format!( + "starting backend regression benchmark for {} against known-good commit {}", + backend.name, commit + )); + status("generating known-good baseline measurements"); + if !compare_uninit { + status( + "skipping uninit benchmark cases because they are not supported by both current and known-good revisions", + ); + } + let baselines = measure_known_good(backend, &temp_root, &commit, compare_uninit); + status(format!( + "benchmarking current {} backend via driver", + backend.name + )); + let current = measure_current(backend, &temp_root, compare_uninit); + let results = merge_measurements(backend.name, ¤t, &baselines); + + status("writing benchmark artifacts"); + let dir = results_dir(); + write_measurement_csv( + &dir.join(format!("{}-baseline.csv", backend.name)), + &baselines, + Some(&format!("Generated on the fly from commit {commit}.")), + ); + write_measurement_csv( + &dir.join(format!("{}-current.csv", backend.name)), + ¤t, + Some("Generated from the current checkout."), + ); + let results_csv_path = write_results_csv(backend.name, &results); + status(format!( + "wrote benchmark results to {}", + repo_relative_display_path(&results_csv_path) + )); + + fs::remove_dir_all(&temp_root).unwrap_or_else(|err| { + panic!( + "failed to remove temporary benchmark directory {}: {}", + temp_root.display(), + err + ) + }); + + let failures: Vec<_> = results + .iter() + .filter(|result| result.ns_per_byte > allowed_ns_per_byte(result)) + .collect(); + assert!( + failures.is_empty(), + "backend regression benchmark failures for {} against known-good commit {}:\n {}", + backend.name, + commit, + failures + .iter() + .map(|result| failure_summary(result)) + .collect::>() + .join("\n ") + ); + status("benchmark completed without threshold failures"); +} diff --git a/tests/support/backend-regression-driver.rs b/tests/support/backend-regression-driver.rs new file mode 100644 index 00000000..eb17fda4 --- /dev/null +++ b/tests/support/backend-regression-driver.rs @@ -0,0 +1,316 @@ +#![allow(unexpected_cfgs)] + +use flate2::write::ZlibEncoder; +use flate2::{ + read::ZlibDecoder, Compress, Compression, Decompress, FlushCompress, FlushDecompress, Status, +}; +use std::env; +use std::hint::black_box; +use std::io::{Read, Write}; +use std::time::{Duration, Instant}; +#[cfg(flate2_has_uninit_api)] +use std::mem::MaybeUninit; + +const DRIVER_LABEL_ENV: &str = "FLATE2_BACKEND_BENCH_LABEL"; +const CHUNK_IN: usize = 2 * 1024; +const CHUNK_OUT: usize = 2 * 1024 * 1024; +const PLAIN_LEN: usize = 16 * 1024 * 1024; +const BENCH_TARGET_SAMPLE_TIME: Duration = Duration::from_millis(200); +const BENCH_SAMPLES: usize = 5; +const BENCH_MAX_ITERS_PER_SAMPLE: usize = 12; + +struct BenchmarkData { + plain: Vec, + zlib: Vec, +} + +struct DriverResult { + iterations_per_sample: usize, + samples: usize, + ns_per_byte: f64, + measurement_uncertainty: f64, +} + +fn benchmark_data() -> BenchmarkData { + let line = + b"The quick brown fox jumps over the lazy dog. 0123456789 abcdefghijklmnopqrstuvwxyz\n"; + let mut plain = Vec::with_capacity(PLAIN_LEN); + while plain.len() < PLAIN_LEN { + plain.extend_from_slice(line); + } + plain.truncate(PLAIN_LEN); + + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast()); + encoder.write_all(&plain).unwrap(); + let zlib = encoder.finish().unwrap(); + + BenchmarkData { plain, zlib } +} + +fn run_decompress_chunked_large_output_buf(data: &BenchmarkData) { + let mut decoder = Decompress::new(true); + let mut chunk = vec![0u8; CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.plain.len()); + loop { + let prior_out = decoder.total_out(); + let in_start = decoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.zlib.len()); + let status = decoder + .decompress( + &data.zlib[in_start..in_end], + &mut chunk, + FlushDecompress::None, + ) + .unwrap(); + let bytes_written = (decoder.total_out() - prior_out) as usize; + result.extend_from_slice(&chunk[..bytes_written]); + if status == Status::StreamEnd { + break; + } + } + assert_eq!(result, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(flate2_has_uninit_api)] +fn initialized_prefix(output: &[MaybeUninit], bytes_written: usize) -> &[u8] { + unsafe { std::slice::from_raw_parts(output.as_ptr() as *const u8, bytes_written) } +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(flate2_has_uninit_api)] +fn run_decompress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + let mut decoder = Decompress::new(true); + let mut chunk = vec![MaybeUninit::::uninit(); CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.plain.len()); + loop { + let prior_out = decoder.total_out(); + let in_start = decoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.zlib.len()); + let status = decoder + .decompress_uninit( + &data.zlib[in_start..in_end], + &mut chunk, + FlushDecompress::None, + ) + .unwrap(); + let bytes_written = (decoder.total_out() - prior_out) as usize; + result.extend_from_slice(initialized_prefix(&chunk, bytes_written)); + if status == Status::StreamEnd { + break; + } + } + assert_eq!(result, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(not(flate2_has_uninit_api))] +fn run_decompress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + run_decompress_chunked_large_output_buf(data); +} + +fn run_compress_chunked_large_output_buf(data: &BenchmarkData) { + let mut encoder = Compress::new(Compression::fast(), true); + let mut chunk = vec![0u8; CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.zlib.len() * 2); + loop { + let prior_out = encoder.total_out(); + let in_start = encoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.plain.len()); + let flush = if in_end == data.plain.len() { + FlushCompress::Finish + } else { + FlushCompress::None + }; + let status = encoder + .compress(&data.plain[in_start..in_end], &mut chunk, flush) + .unwrap(); + let bytes_written = (encoder.total_out() - prior_out) as usize; + result.extend_from_slice(&chunk[..bytes_written]); + if status == Status::StreamEnd { + break; + } + } + + let mut decoder = ZlibDecoder::new(result.as_slice()); + let mut decoded = Vec::with_capacity(data.plain.len()); + decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(decoded, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(flate2_has_uninit_api)] +fn run_compress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + let mut encoder = Compress::new(Compression::fast(), true); + let mut chunk = vec![MaybeUninit::::uninit(); CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.zlib.len() * 2); + loop { + let prior_out = encoder.total_out(); + let in_start = encoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.plain.len()); + let flush = if in_end == data.plain.len() { + FlushCompress::Finish + } else { + FlushCompress::None + }; + let status = encoder + .compress_uninit(&data.plain[in_start..in_end], &mut chunk, flush) + .unwrap(); + let bytes_written = (encoder.total_out() - prior_out) as usize; + result.extend_from_slice(initialized_prefix(&chunk, bytes_written)); + if status == Status::StreamEnd { + break; + } + } + + let mut decoder = ZlibDecoder::new(result.as_slice()); + let mut decoded = Vec::with_capacity(data.plain.len()); + decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(decoded, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(not(flate2_has_uninit_api))] +fn run_compress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + run_compress_chunked_large_output_buf(data); +} + +fn benchmark_case(data: &BenchmarkData, run: fn(&BenchmarkData)) -> DriverResult { + let warmup_started = Instant::now(); + run(data); + let warmup_elapsed = warmup_started.elapsed(); + let warmup_nanos = warmup_elapsed.as_nanos(); + let target_nanos = BENCH_TARGET_SAMPLE_TIME.as_nanos(); + let iterations_per_sample = if warmup_nanos == 0 { + BENCH_MAX_ITERS_PER_SAMPLE + } else { + (target_nanos / warmup_nanos) as usize + } + .clamp(1, BENCH_MAX_ITERS_PER_SAMPLE); + + let mut samples = Vec::with_capacity(BENCH_SAMPLES); + for _ in 0..BENCH_SAMPLES { + let started = Instant::now(); + for _ in 0..iterations_per_sample { + run(black_box(data)); + } + samples.push(started.elapsed()); + } + samples.sort_unstable(); + + let iterations = iterations_per_sample as f64; + let bytes = data.plain.len() as f64; + let median = samples[samples.len() / 2]; + let ns_per_byte = duration_ns_per_byte(median, iterations, bytes); + let measurement_uncertainty = + relative_measurement_uncertainty(ns_per_byte, &samples, iterations, bytes); + + DriverResult { + iterations_per_sample, + samples: BENCH_SAMPLES, + ns_per_byte, + measurement_uncertainty, + } +} + +fn duration_ns_per_byte(duration: Duration, iterations: f64, bytes: f64) -> f64 { + duration.as_nanos() as f64 / (iterations * bytes) +} + +fn relative_measurement_uncertainty( + median_ns_per_byte: f64, + samples: &[Duration], + iterations: f64, + bytes: f64, +) -> f64 { + samples + .iter() + .map(|sample| { + let sample_ns_per_byte = duration_ns_per_byte(*sample, iterations, bytes); + ((sample_ns_per_byte - median_ns_per_byte) / median_ns_per_byte).abs() + }) + .fold(0.0_f64, f64::max) +} + +#[cfg(feature = "zlib-ng")] +fn backend_name() -> &'static str { + "zlib-ng" +} + +#[cfg(all(not(feature = "zlib-ng"), feature = "zlib-ng-compat"))] +fn backend_name() -> &'static str { + "zlib-ng-compat" +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + feature = "zlib-rs" +))] +fn backend_name() -> &'static str { + "zlib-rs" +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + feature = "zlib" +))] +fn backend_name() -> &'static str { + "zlib" +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + not(feature = "zlib") +))] +fn backend_name() -> &'static str { + "rust_backend" +} + +fn driver_label() -> String { + env::var(DRIVER_LABEL_ENV).unwrap_or_else(|_| "benchmark target".to_owned()) +} + +fn print_record(backend: &str, case: &str, result: &DriverResult) { + println!( + "{backend},{case},{},{},{:.9},{:.6}", + result.iterations_per_sample, + result.samples, + result.ns_per_byte, + result.measurement_uncertainty, + ); +} + +fn main() { + let backend = backend_name(); + let label = driver_label(); + eprintln!("[backend-driver] measuring {label} for {backend}"); + let data = benchmark_data(); + let compress = benchmark_case(&data, run_compress_chunked_large_output_buf); + let decompress = benchmark_case(&data, run_decompress_chunked_large_output_buf); + #[cfg(flate2_compare_uninit_cases)] + let compress_uninit = benchmark_case(&data, run_compress_uninit_chunked_large_output_buf); + #[cfg(flate2_compare_uninit_cases)] + let decompress_uninit = benchmark_case(&data, run_decompress_uninit_chunked_large_output_buf); + + eprintln!("[backend-driver] emitting benchmark CSV for {label} and {backend}"); + println!("backend,case,iterations_per_sample,samples,ns_per_byte,measurement_uncertainty"); + print_record(backend, "compress_chunked_large_output_buf", &compress); + print_record(backend, "decompress_chunked_large_output_buf", &decompress); + #[cfg(flate2_compare_uninit_cases)] + print_record( + backend, + "compress_uninit_chunked_large_output_buf", + &compress_uninit, + ); + #[cfg(flate2_compare_uninit_cases)] + print_record( + backend, + "decompress_uninit_chunked_large_output_buf", + &decompress_uninit, + ); +}