From 04931befd7d3c727a646c49bd96b7fd7c831bb0d Mon Sep 17 00:00:00 2001 From: "GPT 5.4" Date: Thu, 16 Apr 2026 12:21:47 +0800 Subject: [PATCH 1/2] Add backend regression benchmark harness Add known-good backend regression benchmark harness Introduce an ignored integration test that measures backend-sensitive compress/decompress cases against a known-good commit. - the test benchmarks the current checkout in release mode - it creates a detached worktree at KNOWN_GOOD_COMMIT on the same runner - it generates a tiny temporary Cargo project whose manifest points `flate2` at that worktree - it copies `tests/support/backend-regression-driver.rs` into that project and runs it to produce baseline CSV data - it compares the current results against those baseline measurements and fails when the observed slowdown exceeds the combined measurement uncertainty of the current run and the baseline run - run a single backend locally with: `cargo test --release --test backend-regression-bench -- --ignored --exact backend_regression_bench --nocapture` - switch backends the same way the rest of the test matrix does, for example: `cargo test --release --test backend-regression-bench --features zlib-rs --no-default-features -- --ignored --exact backend_regression_bench --nocapture` - optionally, inspect `target/backend-bench/*.csv` for the raw baseline/current measurements after a run This is intended to be maintained as a regression guard: - update KNOWN_GOOD_COMMIT only when intentionally accepting a new performance baseline - keep the driver source reusable and branch-independent so the baseline setup stays lightweight and easy to reason about ``` > Add this benchmark (https://github.com/rust-lang/flate2-rs/issues/544#issuecomment-4256274786) to version of the library before https://github.com/rust-lang/flate2-rs/pull/502 was merged to get a baseline for all backends. Store that baseline, and add a CI job that checks against the baseline. Consider other uses of performance-critical low-level methods and see if it makes sense to add more kinds of tests to the baseline the most recent version. Success means that the baseline as run on main reproduces issue #544 as well, there is no need to attempt to fix it. Co-authored-by: Sebastian Thiel --- .github/workflows/backend-regression.yml | 42 ++ tests/backend-regression-bench.rs | 660 +++++++++++++++++++++ tests/support/backend-regression-driver.rs | 316 ++++++++++ 3 files changed, 1018 insertions(+) create mode 100644 .github/workflows/backend-regression.yml create mode 100644 tests/backend-regression-bench.rs create mode 100644 tests/support/backend-regression-driver.rs diff --git a/.github/workflows/backend-regression.yml b/.github/workflows/backend-regression.yml new file mode 100644 index 00000000..d0c7efe7 --- /dev/null +++ b/.github/workflows/backend-regression.yml @@ -0,0 +1,42 @@ +name: Backend performance regression + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + backend-regression: + name: Backend regression (${{ matrix.backend.name }}) + runs-on: ubuntu-latest + permissions: + contents: read + strategy: + fail-fast: false + matrix: + backend: + - name: rust_backend + command: cargo test --release --test backend-regression-bench -- --ignored --nocapture + - name: zlib-rs + command: cargo test --release --test backend-regression-bench --features zlib-rs --no-default-features -- --ignored --nocapture + - name: zlib + command: cargo test --release --test backend-regression-bench --features zlib --no-default-features -- --ignored --nocapture + - name: zlib-ng + command: cargo test --release --test backend-regression-bench --features zlib-ng --no-default-features -- --ignored --nocapture + - name: zlib-ng-compat + command: cargo test --release --test backend-regression-bench --features zlib-ng-compat --no-default-features -- --ignored --nocapture + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Rust + run: rustup update stable --no-self-update && rustup default stable + shell: bash + - name: Run backend regression benchmark + run: ${{ matrix.backend.command }} + - name: Upload backend benchmark CSV + if: always() + uses: actions/upload-artifact@v4 + with: + name: backend-regression-${{ matrix.backend.name }} + path: target/backend-bench/${{ matrix.backend.name }}*.csv diff --git a/tests/backend-regression-bench.rs b/tests/backend-regression-bench.rs new file mode 100644 index 00000000..71486ca6 --- /dev/null +++ b/tests/backend-regression-bench.rs @@ -0,0 +1,660 @@ +use std::env; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{self, Command, Output, Stdio}; +use std::time::{SystemTime, UNIX_EPOCH}; + +const KNOWN_GOOD_COMMIT: &str = "b9afa93d70e19a213a3594190e31fb39d83aba0e"; +const KNOWN_GOOD_COMMIT_ENV: &str = "FLATE2_BACKEND_BENCH_KNOWN_GOOD_COMMIT"; +const DRIVER_LABEL_ENV: &str = "FLATE2_BACKEND_BENCH_LABEL"; +const DRIVER_COMPARE_UNINIT_CFG: &str = "--cfg flate2_compare_uninit_cases"; +const DRIVER_UNINIT_CFG: &str = "--cfg flate2_has_uninit_api"; +const MIN_MEASUREMENT_SLACK_FACTOR: f64 = 0.10; + +#[derive(Clone)] +struct MeasurementRecord { + backend: String, + case: String, + iterations_per_sample: usize, + samples: usize, + ns_per_byte: f64, + measurement_uncertainty: f64, +} + +struct BenchmarkResult { + case: String, + iterations_per_sample: usize, + samples: usize, + ns_per_byte: f64, + measurement_uncertainty: f64, + baseline_iterations_per_sample: usize, + baseline_samples: usize, + baseline_ns_per_byte: f64, + baseline_measurement_uncertainty: f64, +} + +#[derive(Clone, Copy)] +struct BackendConfig { + name: &'static str, + driver_feature: &'static str, + compare_uninit_against_legacy_baseline: bool, +} + +fn known_good_commit() -> String { + env::var(KNOWN_GOOD_COMMIT_ENV).unwrap_or_else(|_| KNOWN_GOOD_COMMIT.to_owned()) +} + +fn parse_measurement_record(line: &str) -> MeasurementRecord { + let mut fields = line.split(','); + let backend = fields + .next() + .expect("missing backend field in benchmark CSV") + .trim() + .to_owned(); + let case = fields + .next() + .expect("missing case field in benchmark CSV") + .trim() + .to_owned(); + let iterations_per_sample = fields + .next() + .expect("missing iterations_per_sample field in benchmark CSV") + .trim() + .parse() + .expect("invalid iterations_per_sample field in benchmark CSV"); + let samples = fields + .next() + .expect("missing samples field in benchmark CSV") + .trim() + .parse() + .expect("invalid samples field in benchmark CSV"); + let ns_per_byte = fields + .next() + .expect("missing ns_per_byte field in benchmark CSV") + .trim() + .parse() + .expect("invalid ns_per_byte field in benchmark CSV"); + let measurement_uncertainty = fields + .next() + .expect("missing measurement_uncertainty field in benchmark CSV") + .trim() + .parse() + .expect("invalid measurement_uncertainty field in benchmark CSV"); + assert!( + fields.next().is_none(), + "unexpected trailing benchmark CSV fields" + ); + MeasurementRecord { + backend, + case, + iterations_per_sample, + samples, + ns_per_byte, + measurement_uncertainty, + } +} + +fn merge_measurements( + backend: &str, + current: &[MeasurementRecord], + baseline: &[MeasurementRecord], +) -> Vec { + current + .iter() + .map(|current| { + let baseline = baseline + .iter() + .find(|baseline| baseline.backend == backend && baseline.case == current.case) + .unwrap_or_else(|| { + panic!( + "missing baseline for backend={backend}, case={}", + current.case + ) + }); + BenchmarkResult { + case: current.case.clone(), + iterations_per_sample: current.iterations_per_sample, + samples: current.samples, + ns_per_byte: current.ns_per_byte, + measurement_uncertainty: current.measurement_uncertainty, + baseline_iterations_per_sample: baseline.iterations_per_sample, + baseline_samples: baseline.samples, + baseline_ns_per_byte: baseline.ns_per_byte, + baseline_measurement_uncertainty: baseline.measurement_uncertainty, + } + }) + .collect() +} + +fn allowed_slowdown_factor(result: &BenchmarkResult) -> f64 { + 1.0 + measurement_slack_factor(result) +} + +fn allowed_ns_per_byte(result: &BenchmarkResult) -> f64 { + result.baseline_ns_per_byte * allowed_slowdown_factor(result) +} + +fn slowdown_factor(result: &BenchmarkResult) -> f64 { + result.ns_per_byte / result.baseline_ns_per_byte +} + +fn measurement_slack_factor(result: &BenchmarkResult) -> f64 { + (result.measurement_uncertainty + result.baseline_measurement_uncertainty) + .max(MIN_MEASUREMENT_SLACK_FACTOR) +} + +fn failure_summary(result: &BenchmarkResult) -> String { + format!( + "{}: {:.2}x slowdown of {:.2}x allowed, measured {:.3} ns/byte, baseline {:.3} ns/byte", + result.case, + slowdown_factor(result), + allowed_slowdown_factor(result), + result.ns_per_byte, + result.baseline_ns_per_byte, + ) +} + +fn status_for(result: &BenchmarkResult) -> &'static str { + if result.ns_per_byte <= allowed_ns_per_byte(result) { + "pass" + } else { + "fail" + } +} + +fn results_dir() -> PathBuf { + let target_dir = env::var_os("CARGO_TARGET_DIR") + .map(PathBuf::from) + .unwrap_or_else(|| Path::new(env!("CARGO_MANIFEST_DIR")).join("target")); + target_dir.join("backend-bench") +} + +fn results_csv_path(backend: &str) -> PathBuf { + results_dir().join(format!("{backend}.csv")) +} + +fn repo_relative_display_path(path: &Path) -> String { + path.strip_prefix(Path::new(env!("CARGO_MANIFEST_DIR"))) + .unwrap_or(path) + .display() + .to_string() +} + +fn write_results_csv(backend: &str, results: &[BenchmarkResult]) -> PathBuf { + let dir = results_dir(); + fs::create_dir_all(&dir).unwrap(); + + let mut csv = String::from( + "backend,case,iterations_per_sample,samples,ns_per_byte,measurement_uncertainty,baseline_iterations_per_sample,baseline_samples,baseline_ns_per_byte,baseline_measurement_uncertainty,allowed_slowdown_factor,allowed_ns_per_byte,slowdown_factor,measurement_slack_factor,status\n", + ); + for result in results { + csv.push_str(&format!( + "{backend},{},{},{},{:.9},{:.6},{},{},{:.9},{:.6},{:.6},{:.9},{:.6},{:.6},{}\n", + result.case, + result.iterations_per_sample, + result.samples, + result.ns_per_byte, + result.measurement_uncertainty, + result.baseline_iterations_per_sample, + result.baseline_samples, + result.baseline_ns_per_byte, + result.baseline_measurement_uncertainty, + allowed_slowdown_factor(result), + allowed_ns_per_byte(result), + slowdown_factor(result), + measurement_slack_factor(result), + status_for(result), + )); + } + + let path = results_csv_path(backend); + fs::write(&path, csv).unwrap(); + path +} + +fn write_measurement_csv(path: &Path, records: &[MeasurementRecord], comment: Option<&str>) { + let parent = path + .parent() + .expect("measurement CSV output must have a parent directory"); + fs::create_dir_all(parent).unwrap(); + + let mut csv = String::new(); + if let Some(comment) = comment { + csv.push('#'); + csv.push(' '); + csv.push_str(comment); + csv.push('\n'); + } + csv.push_str( + "backend,case,iterations_per_sample,samples,ns_per_byte,measurement_uncertainty\n", + ); + for record in records { + csv.push_str(&format!( + "{},{},{},{},{:.9},{:.6}\n", + record.backend, + record.case, + record.iterations_per_sample, + record.samples, + record.ns_per_byte, + record.measurement_uncertainty, + )); + } + + fs::write(path, csv).unwrap(); +} + +fn status(message: impl AsRef) { + eprintln!("[backend-regression] {}", message.as_ref()); +} + +fn unique_temp_dir(backend: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + env::temp_dir().join(format!( + "flate2-backend-regression-{backend}-{}-{nanos}", + process::id() + )) +} + +fn checked_output(command: &mut Command, context: &str) -> Output { + let output = command + .output() + .unwrap_or_else(|err| panic!("failed to {}: {}", context, err)); + assert!( + output.status.success(), + "failed to {context} (status {}):\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + output +} + +fn checked_output_with_inherited_stderr(command: &mut Command, context: &str) -> Output { + let child = command + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .unwrap_or_else(|err| panic!("failed to {}: {}", context, err)); + let output = child + .wait_with_output() + .unwrap_or_else(|err| panic!("failed to {}: {}", context, err)); + assert!( + output.status.success(), + "failed to {context} (status {}):\nstdout:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + ); + output +} + +struct KnownGoodWorktree { + repo_root: PathBuf, + path: PathBuf, +} + +impl KnownGoodWorktree { + fn create(repo_root: &Path, commit: &str, path: &Path) -> Self { + checked_output( + Command::new("git") + .arg("-C") + .arg(repo_root) + .arg("cat-file") + .arg("-e") + .arg(format!("{commit}^{{commit}}")), + &format!( + "verify known-good commit {commit} is available locally; fetch more history if needed" + ), + ); + checked_output( + Command::new("git") + .arg("-C") + .arg(repo_root) + .arg("worktree") + .arg("add") + .arg("--detach") + .arg("--force") + .arg(path) + .arg(commit), + &format!("create worktree for known-good commit {commit}"), + ); + Self { + repo_root: repo_root.to_path_buf(), + path: path.to_path_buf(), + } + } +} + +impl Drop for KnownGoodWorktree { + fn drop(&mut self) { + if let Err(err) = Command::new("git") + .arg("-C") + .arg(&self.repo_root) + .arg("worktree") + .arg("remove") + .arg("--force") + .arg(&self.path) + .output() + { + eprintln!( + "failed to remove temporary worktree {}: {}", + self.path.display(), + err + ); + } + } +} + +fn escaped_toml_path(path: &Path) -> String { + path.display().to_string().replace('\\', "\\\\") +} + +fn driver_manifest(crate_path: &Path) -> String { + format!( + r#"[package] +name = "flate2-backend-driver" +version = "0.1.0" +edition = "2021" + +[dependencies] +flate2 = {{ path = "{}", default-features = false }} + +[features] +default = [] +"rust_backend" = ["flate2/rust_backend"] +"zlib-rs" = ["flate2/zlib-rs"] +"zlib" = ["flate2/zlib"] +"zlib-ng" = ["flate2/zlib-ng"] +"zlib-ng-compat" = ["flate2/zlib-ng-compat"] +"#, + escaped_toml_path(crate_path) + ) +} + +fn driver_source_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("support") + .join("backend-regression-driver.rs") +} + +fn crate_has_uninit_api(crate_path: &Path) -> bool { + fs::read_to_string(crate_path.join("src").join("mem.rs")) + .map(|mem_rs| { + mem_rs.contains("pub fn compress_uninit(") + && mem_rs.contains("pub fn decompress_uninit(") + }) + .unwrap_or(false) +} + +fn commit_has_uninit_api(repo_root: &Path, commit: &str) -> bool { + Command::new("git") + .arg("-C") + .arg(repo_root) + .arg("show") + .arg(format!("{commit}:src/mem.rs")) + .output() + .ok() + .filter(|output| output.status.success()) + .and_then(|output| String::from_utf8(output.stdout).ok()) + .map(|mem_rs| { + mem_rs.contains("pub fn compress_uninit(") + && mem_rs.contains("pub fn decompress_uninit(") + }) + .unwrap_or(false) +} + +fn driver_rustflags(include_uninit_cases: bool, has_uninit_api: bool) -> Option { + if !include_uninit_cases && !has_uninit_api { + return None; + } + + let mut extra_flags = Vec::new(); + if include_uninit_cases { + extra_flags.push(DRIVER_COMPARE_UNINIT_CFG); + } + if has_uninit_api { + extra_flags.push(DRIVER_UNINIT_CFG); + } + let extra_flags = extra_flags.join(" "); + + match env::var("RUSTFLAGS") { + Ok(existing) if !existing.trim().is_empty() => Some(format!("{existing} {extra_flags}")), + _ => Some(extra_flags), + } +} + +fn run_driver( + backend: BackendConfig, + crate_path: &Path, + driver_path: &Path, + cargo_target_dir: &Path, + label: &str, + compare_uninit: bool, + context: &str, +) -> Vec { + fs::create_dir_all(driver_path.join("src")).unwrap(); + fs::write(driver_path.join("Cargo.toml"), driver_manifest(crate_path)).unwrap(); + fs::copy( + driver_source_path(), + driver_path.join("src").join("main.rs"), + ) + .unwrap(); + + let mut command = Command::new("cargo"); + command + .arg("run") + .arg("--release") + .arg("--features") + .arg(backend.driver_feature) + .current_dir(driver_path) + .env("CARGO_TARGET_DIR", cargo_target_dir) + .env(DRIVER_LABEL_ENV, label); + if let Some(rustflags) = driver_rustflags(compare_uninit, crate_has_uninit_api(crate_path)) { + command.env("RUSTFLAGS", rustflags); + } + + let output = checked_output_with_inherited_stderr(&mut command, context); + + String::from_utf8(output.stdout) + .expect("driver output must be valid UTF-8") + .lines() + .skip(1) + .filter(|line| !line.trim().is_empty()) + .map(parse_measurement_record) + .collect() +} + +fn measure_current( + backend: BackendConfig, + temp_root: &Path, + compare_uninit: bool, +) -> Vec { + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")); + status(format!( + "running driver for current {} checkout at {}", + backend.name, + repo_root.display() + )); + run_driver( + backend, + repo_root, + &temp_root.join("current-driver"), + &temp_root.join("current-target"), + "current checkout", + compare_uninit, + &format!("run current driver for {}", backend.name), + ) +} + +fn measure_known_good( + backend: BackendConfig, + temp_root: &Path, + commit: &str, + compare_uninit: bool, +) -> Vec { + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")); + let worktree_path = temp_root.join("known-good"); + status(format!( + "creating detached worktree for known-good commit {} at {}", + commit, + worktree_path.display() + )); + let _worktree = KnownGoodWorktree::create(repo_root, commit, &worktree_path); + status(format!( + "running driver for {} at known-good commit {}", + backend.name, commit + )); + run_driver( + backend, + &worktree_path, + &temp_root.join("baseline-driver"), + &temp_root.join("baseline-target"), + &format!("known-good commit {commit}"), + compare_uninit, + &format!( + "run baseline driver for {} at commit {commit}", + backend.name + ), + ) +} + +#[cfg(feature = "zlib-ng")] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib-ng", + driver_feature: "zlib-ng", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all(not(feature = "zlib-ng"), feature = "zlib-ng-compat"))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib-ng-compat", + driver_feature: "zlib-ng-compat", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + feature = "zlib-rs" +))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib-rs", + driver_feature: "zlib-rs", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + any( + feature = "zlib", + feature = "zlib-default", + feature = "cloudflare_zlib" + ) +))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "zlib", + driver_feature: "zlib", + compare_uninit_against_legacy_baseline: true, + } +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + not(feature = "zlib"), + not(feature = "zlib-default"), + not(feature = "cloudflare_zlib") +))] +fn backend_config() -> BackendConfig { + BackendConfig { + name: "rust_backend", + driver_feature: "rust_backend", + compare_uninit_against_legacy_baseline: false, + } +} + +#[test] +#[ignore] +fn backend_regression_bench() { + let backend = backend_config(); + let commit = known_good_commit(); + let temp_root = unique_temp_dir(backend.name); + let repo_root = Path::new(env!("CARGO_MANIFEST_DIR")); + let compare_uninit = crate_has_uninit_api(repo_root) + && (commit_has_uninit_api(repo_root, &commit) + || backend.compare_uninit_against_legacy_baseline); + + status(format!( + "starting backend regression benchmark for {} against known-good commit {}", + backend.name, commit + )); + status("generating known-good baseline measurements"); + if !compare_uninit { + status( + "skipping uninit benchmark cases because they are not supported by both current and known-good revisions", + ); + } + let baselines = measure_known_good(backend, &temp_root, &commit, compare_uninit); + status(format!( + "benchmarking current {} backend via driver", + backend.name + )); + let current = measure_current(backend, &temp_root, compare_uninit); + let results = merge_measurements(backend.name, ¤t, &baselines); + + status("writing benchmark artifacts"); + let dir = results_dir(); + write_measurement_csv( + &dir.join(format!("{}-baseline.csv", backend.name)), + &baselines, + Some(&format!("Generated on the fly from commit {commit}.")), + ); + write_measurement_csv( + &dir.join(format!("{}-current.csv", backend.name)), + ¤t, + Some("Generated from the current checkout."), + ); + let results_csv_path = write_results_csv(backend.name, &results); + status(format!( + "wrote benchmark results to {}", + repo_relative_display_path(&results_csv_path) + )); + + fs::remove_dir_all(&temp_root).unwrap_or_else(|err| { + panic!( + "failed to remove temporary benchmark directory {}: {}", + temp_root.display(), + err + ) + }); + + let failures: Vec<_> = results + .iter() + .filter(|result| result.ns_per_byte > allowed_ns_per_byte(result)) + .collect(); + assert!( + failures.is_empty(), + "backend regression benchmark failures for {} against known-good commit {}:\n {}", + backend.name, + commit, + failures + .iter() + .map(|result| failure_summary(result)) + .collect::>() + .join("\n ") + ); + status("benchmark completed without threshold failures"); +} diff --git a/tests/support/backend-regression-driver.rs b/tests/support/backend-regression-driver.rs new file mode 100644 index 00000000..eb17fda4 --- /dev/null +++ b/tests/support/backend-regression-driver.rs @@ -0,0 +1,316 @@ +#![allow(unexpected_cfgs)] + +use flate2::write::ZlibEncoder; +use flate2::{ + read::ZlibDecoder, Compress, Compression, Decompress, FlushCompress, FlushDecompress, Status, +}; +use std::env; +use std::hint::black_box; +use std::io::{Read, Write}; +use std::time::{Duration, Instant}; +#[cfg(flate2_has_uninit_api)] +use std::mem::MaybeUninit; + +const DRIVER_LABEL_ENV: &str = "FLATE2_BACKEND_BENCH_LABEL"; +const CHUNK_IN: usize = 2 * 1024; +const CHUNK_OUT: usize = 2 * 1024 * 1024; +const PLAIN_LEN: usize = 16 * 1024 * 1024; +const BENCH_TARGET_SAMPLE_TIME: Duration = Duration::from_millis(200); +const BENCH_SAMPLES: usize = 5; +const BENCH_MAX_ITERS_PER_SAMPLE: usize = 12; + +struct BenchmarkData { + plain: Vec, + zlib: Vec, +} + +struct DriverResult { + iterations_per_sample: usize, + samples: usize, + ns_per_byte: f64, + measurement_uncertainty: f64, +} + +fn benchmark_data() -> BenchmarkData { + let line = + b"The quick brown fox jumps over the lazy dog. 0123456789 abcdefghijklmnopqrstuvwxyz\n"; + let mut plain = Vec::with_capacity(PLAIN_LEN); + while plain.len() < PLAIN_LEN { + plain.extend_from_slice(line); + } + plain.truncate(PLAIN_LEN); + + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast()); + encoder.write_all(&plain).unwrap(); + let zlib = encoder.finish().unwrap(); + + BenchmarkData { plain, zlib } +} + +fn run_decompress_chunked_large_output_buf(data: &BenchmarkData) { + let mut decoder = Decompress::new(true); + let mut chunk = vec![0u8; CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.plain.len()); + loop { + let prior_out = decoder.total_out(); + let in_start = decoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.zlib.len()); + let status = decoder + .decompress( + &data.zlib[in_start..in_end], + &mut chunk, + FlushDecompress::None, + ) + .unwrap(); + let bytes_written = (decoder.total_out() - prior_out) as usize; + result.extend_from_slice(&chunk[..bytes_written]); + if status == Status::StreamEnd { + break; + } + } + assert_eq!(result, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(flate2_has_uninit_api)] +fn initialized_prefix(output: &[MaybeUninit], bytes_written: usize) -> &[u8] { + unsafe { std::slice::from_raw_parts(output.as_ptr() as *const u8, bytes_written) } +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(flate2_has_uninit_api)] +fn run_decompress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + let mut decoder = Decompress::new(true); + let mut chunk = vec![MaybeUninit::::uninit(); CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.plain.len()); + loop { + let prior_out = decoder.total_out(); + let in_start = decoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.zlib.len()); + let status = decoder + .decompress_uninit( + &data.zlib[in_start..in_end], + &mut chunk, + FlushDecompress::None, + ) + .unwrap(); + let bytes_written = (decoder.total_out() - prior_out) as usize; + result.extend_from_slice(initialized_prefix(&chunk, bytes_written)); + if status == Status::StreamEnd { + break; + } + } + assert_eq!(result, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(not(flate2_has_uninit_api))] +fn run_decompress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + run_decompress_chunked_large_output_buf(data); +} + +fn run_compress_chunked_large_output_buf(data: &BenchmarkData) { + let mut encoder = Compress::new(Compression::fast(), true); + let mut chunk = vec![0u8; CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.zlib.len() * 2); + loop { + let prior_out = encoder.total_out(); + let in_start = encoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.plain.len()); + let flush = if in_end == data.plain.len() { + FlushCompress::Finish + } else { + FlushCompress::None + }; + let status = encoder + .compress(&data.plain[in_start..in_end], &mut chunk, flush) + .unwrap(); + let bytes_written = (encoder.total_out() - prior_out) as usize; + result.extend_from_slice(&chunk[..bytes_written]); + if status == Status::StreamEnd { + break; + } + } + + let mut decoder = ZlibDecoder::new(result.as_slice()); + let mut decoded = Vec::with_capacity(data.plain.len()); + decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(decoded, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(flate2_has_uninit_api)] +fn run_compress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + let mut encoder = Compress::new(Compression::fast(), true); + let mut chunk = vec![MaybeUninit::::uninit(); CHUNK_OUT].into_boxed_slice(); + let mut result = Vec::with_capacity(data.zlib.len() * 2); + loop { + let prior_out = encoder.total_out(); + let in_start = encoder.total_in() as usize; + let in_end = (in_start + CHUNK_IN).min(data.plain.len()); + let flush = if in_end == data.plain.len() { + FlushCompress::Finish + } else { + FlushCompress::None + }; + let status = encoder + .compress_uninit(&data.plain[in_start..in_end], &mut chunk, flush) + .unwrap(); + let bytes_written = (encoder.total_out() - prior_out) as usize; + result.extend_from_slice(initialized_prefix(&chunk, bytes_written)); + if status == Status::StreamEnd { + break; + } + } + + let mut decoder = ZlibDecoder::new(result.as_slice()); + let mut decoded = Vec::with_capacity(data.plain.len()); + decoder.read_to_end(&mut decoded).unwrap(); + assert_eq!(decoded, data.plain); +} + +#[cfg_attr(not(flate2_compare_uninit_cases), allow(dead_code))] +#[cfg(not(flate2_has_uninit_api))] +fn run_compress_uninit_chunked_large_output_buf(data: &BenchmarkData) { + run_compress_chunked_large_output_buf(data); +} + +fn benchmark_case(data: &BenchmarkData, run: fn(&BenchmarkData)) -> DriverResult { + let warmup_started = Instant::now(); + run(data); + let warmup_elapsed = warmup_started.elapsed(); + let warmup_nanos = warmup_elapsed.as_nanos(); + let target_nanos = BENCH_TARGET_SAMPLE_TIME.as_nanos(); + let iterations_per_sample = if warmup_nanos == 0 { + BENCH_MAX_ITERS_PER_SAMPLE + } else { + (target_nanos / warmup_nanos) as usize + } + .clamp(1, BENCH_MAX_ITERS_PER_SAMPLE); + + let mut samples = Vec::with_capacity(BENCH_SAMPLES); + for _ in 0..BENCH_SAMPLES { + let started = Instant::now(); + for _ in 0..iterations_per_sample { + run(black_box(data)); + } + samples.push(started.elapsed()); + } + samples.sort_unstable(); + + let iterations = iterations_per_sample as f64; + let bytes = data.plain.len() as f64; + let median = samples[samples.len() / 2]; + let ns_per_byte = duration_ns_per_byte(median, iterations, bytes); + let measurement_uncertainty = + relative_measurement_uncertainty(ns_per_byte, &samples, iterations, bytes); + + DriverResult { + iterations_per_sample, + samples: BENCH_SAMPLES, + ns_per_byte, + measurement_uncertainty, + } +} + +fn duration_ns_per_byte(duration: Duration, iterations: f64, bytes: f64) -> f64 { + duration.as_nanos() as f64 / (iterations * bytes) +} + +fn relative_measurement_uncertainty( + median_ns_per_byte: f64, + samples: &[Duration], + iterations: f64, + bytes: f64, +) -> f64 { + samples + .iter() + .map(|sample| { + let sample_ns_per_byte = duration_ns_per_byte(*sample, iterations, bytes); + ((sample_ns_per_byte - median_ns_per_byte) / median_ns_per_byte).abs() + }) + .fold(0.0_f64, f64::max) +} + +#[cfg(feature = "zlib-ng")] +fn backend_name() -> &'static str { + "zlib-ng" +} + +#[cfg(all(not(feature = "zlib-ng"), feature = "zlib-ng-compat"))] +fn backend_name() -> &'static str { + "zlib-ng-compat" +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + feature = "zlib-rs" +))] +fn backend_name() -> &'static str { + "zlib-rs" +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + feature = "zlib" +))] +fn backend_name() -> &'static str { + "zlib" +} + +#[cfg(all( + not(feature = "zlib-ng"), + not(feature = "zlib-ng-compat"), + not(feature = "zlib-rs"), + not(feature = "zlib") +))] +fn backend_name() -> &'static str { + "rust_backend" +} + +fn driver_label() -> String { + env::var(DRIVER_LABEL_ENV).unwrap_or_else(|_| "benchmark target".to_owned()) +} + +fn print_record(backend: &str, case: &str, result: &DriverResult) { + println!( + "{backend},{case},{},{},{:.9},{:.6}", + result.iterations_per_sample, + result.samples, + result.ns_per_byte, + result.measurement_uncertainty, + ); +} + +fn main() { + let backend = backend_name(); + let label = driver_label(); + eprintln!("[backend-driver] measuring {label} for {backend}"); + let data = benchmark_data(); + let compress = benchmark_case(&data, run_compress_chunked_large_output_buf); + let decompress = benchmark_case(&data, run_decompress_chunked_large_output_buf); + #[cfg(flate2_compare_uninit_cases)] + let compress_uninit = benchmark_case(&data, run_compress_uninit_chunked_large_output_buf); + #[cfg(flate2_compare_uninit_cases)] + let decompress_uninit = benchmark_case(&data, run_decompress_uninit_chunked_large_output_buf); + + eprintln!("[backend-driver] emitting benchmark CSV for {label} and {backend}"); + println!("backend,case,iterations_per_sample,samples,ns_per_byte,measurement_uncertainty"); + print_record(backend, "compress_chunked_large_output_buf", &compress); + print_record(backend, "decompress_chunked_large_output_buf", &decompress); + #[cfg(flate2_compare_uninit_cases)] + print_record( + backend, + "compress_uninit_chunked_large_output_buf", + &compress_uninit, + ); + #[cfg(flate2_compare_uninit_cases)] + print_record( + backend, + "decompress_uninit_chunked_large_output_buf", + &decompress_uninit, + ); +} From a79547eacb9bb5c3ac25250250dd6b3cca0e6bcb Mon Sep 17 00:00:00 2001 From: "GPT 5.4" Date: Thu, 16 Apr 2026 13:25:02 +0800 Subject: [PATCH 2/2] Prototoype fix for zlib-rs to a secret internal API via cargo feature toggle. We'd want to wait for a publicly available API though. --- Cargo.toml | 2 +- src/ffi/zlib_rs.rs | 294 ++++++++++++++++++++++++++++++++------------- 2 files changed, 210 insertions(+), 86 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6d413511..bfeeea31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ exclude = [".*"] libz-sys = { version = "1.1.20", optional = true, default-features = false } libz-ng-sys = { version = "1.1.16", optional = true } # this matches the default features, but we don't want to depend on the default features staying the same -zlib-rs = { version = "0.6.0", optional = true, default-features = false, features = ["std", "rust-allocator"] } +zlib-rs = { version = "0.6.0", optional = true, default-features = false, features = ["std", "rust-allocator", "__internal-api"] } ## This implementation uses only safe Rust code and doesn't require a C compiler. ## It provides good performance for most use cases while being completely portable. miniz_oxide = { version = "0.9.0", optional = true, features = ["simd"] } diff --git a/src/ffi/zlib_rs.rs b/src/ffi/zlib_rs.rs index f5524bbe..fd928fd8 100644 --- a/src/ffi/zlib_rs.rs +++ b/src/ffi/zlib_rs.rs @@ -17,9 +17,14 @@ //! With zlib_rs the state is not self-referential and hence no boxing is needed. The `new` methods //! internally do allocate space for the (de)compression state. -use std::fmt; +use std::{ffi::CStr, fmt, mem::MaybeUninit}; -use ::zlib_rs::{DeflateFlush, InflateError, InflateFlush}; +use ::zlib_rs::{ + c_api::z_stream, + deflate::{self, DeflateStream}, + inflate::{self, InflateStream}, + DeflateConfig, DeflateFlush, InflateConfig, InflateFlush, ReturnCode, +}; pub const MZ_NO_FLUSH: isize = DeflateFlush::NoFlush as isize; pub const MZ_PARTIAL_FLUSH: isize = DeflateFlush::PartialFlush as isize; @@ -32,16 +37,6 @@ pub const MZ_DEFAULT_WINDOW_BITS: core::ffi::c_int = 15; use super::*; use crate::mem::{compress_failed, decompress_failed}; -impl From<::zlib_rs::Status> for crate::mem::Status { - fn from(value: ::zlib_rs::Status) -> Self { - match value { - ::zlib_rs::Status::Ok => crate::mem::Status::Ok, - ::zlib_rs::Status::BufError => crate::mem::Status::BufError, - ::zlib_rs::Status::StreamEnd => crate::mem::Status::StreamEnd, - } - } -} - #[derive(Clone, Default)] pub struct ErrorMessage(Option<&'static str>); @@ -52,12 +47,15 @@ impl ErrorMessage { } pub struct Inflate { - pub(crate) inner: ::zlib_rs::Inflate, + pub(crate) inner: z_stream, // NOTE: these counts do not count the dictionary. total_in: u64, total_out: u64, } +unsafe impl Send for Inflate {} +unsafe impl Sync for Inflate {} + impl fmt::Debug for Inflate { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( @@ -69,20 +67,18 @@ impl fmt::Debug for Inflate { } } -impl From for DeflateFlush { - fn from(value: FlushDecompress) -> Self { - match value { - FlushDecompress::None => Self::NoFlush, - FlushDecompress::Sync => Self::SyncFlush, - FlushDecompress::Finish => Self::Finish, - } - } -} - impl InflateBackend for Inflate { fn make(zlib_header: bool, window_bits: u8) -> Self { + let config = InflateConfig { + window_bits: if zlib_header { + i32::from(window_bits) + } else { + -i32::from(window_bits) + }, + }; + Inflate { - inner: ::zlib_rs::Inflate::new(zlib_header, window_bits), + inner: stream_with_inflate_config(config), total_in: 0, total_out: 0, } @@ -94,31 +90,28 @@ impl InflateBackend for Inflate { output: &mut [u8], flush: FlushDecompress, ) -> Result { - let flush = match flush { - FlushDecompress::None => InflateFlush::NoFlush, - FlushDecompress::Sync => InflateFlush::SyncFlush, - FlushDecompress::Finish => InflateFlush::Finish, - }; - - let total_in_start = self.inner.total_in(); - let total_out_start = self.inner.total_out(); - - let result = self.inner.decompress(input, output, flush); - - self.total_in += self.inner.total_in() - total_in_start; - self.total_out += self.inner.total_out() - total_out_start; + self.decompress_impl(input, output.as_mut_ptr(), output.len(), flush) + } - match result { - Ok(status) => Ok(status.into()), - Err(InflateError::NeedDict { dict_id }) => crate::mem::decompress_need_dict(dict_id), - Err(_) => self.decompress_error(), - } + fn decompress_uninit( + &mut self, + input: &[u8], + output: &mut [MaybeUninit], + flush: FlushDecompress, + ) -> Result { + self.decompress_impl(input, output.as_mut_ptr().cast::(), output.len(), flush) } fn reset(&mut self, zlib_header: bool) { self.total_in = 0; self.total_out = 0; - self.inner.reset(zlib_header); + + let mut config = InflateConfig::default(); + if !zlib_header { + config.window_bits = -config.window_bits; + } + + let _ = inflate::reset_with_config(self.stream_mut(), config); } } @@ -135,25 +128,84 @@ impl Backend for Inflate { } impl Inflate { + fn decompress_impl( + &mut self, + input: &[u8], + output_ptr: *mut u8, + output_len: usize, + flush: FlushDecompress, + ) -> Result { + let flush = match flush { + FlushDecompress::None => InflateFlush::NoFlush, + FlushDecompress::Sync => InflateFlush::SyncFlush, + FlushDecompress::Finish => InflateFlush::Finish, + }; + + let total_in_start = self.inner.total_in; + let total_out_start = self.inner.total_out; + + self.inner.avail_in = Ord::min(input.len(), u32::MAX as usize) as u32; + self.inner.avail_out = Ord::min(output_len, u32::MAX as usize) as u32; + self.inner.next_in = input.as_ptr(); + self.inner.next_out = output_ptr; + + let result = unsafe { inflate::inflate(self.stream_mut(), flush) }; + + self.accumulate_totals(total_in_start, total_out_start); + + match result { + ReturnCode::Ok => Ok(Status::Ok), + ReturnCode::StreamEnd => Ok(Status::StreamEnd), + ReturnCode::BufError => Ok(Status::BufError), + ReturnCode::NeedDict => crate::mem::decompress_need_dict(self.inner.adler as u32), + ReturnCode::ErrNo | ReturnCode::VersionError => unreachable!(), + ReturnCode::StreamError | ReturnCode::DataError | ReturnCode::MemError => { + self.decompress_error() + } + } + } + + fn accumulate_totals(&mut self, total_in_start: u64, total_out_start: u64) { + self.total_in += self.inner.total_in - total_in_start; + self.total_out += self.inner.total_out - total_out_start; + } + + fn stream_mut(&mut self) -> &mut InflateStream<'static> { + unsafe { InflateStream::from_stream_mut(&mut self.inner) } + .expect("zlib-rs inflate stream is initialized") + } + fn decompress_error(&self) -> Result { - decompress_failed(ErrorMessage(self.inner.error_message())) + decompress_failed(ErrorMessage(error_message(self.inner.msg))) } pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - match self.inner.set_dictionary(dictionary) { - Ok(v) => Ok(v), - Err(_) => self.decompress_error(), + match inflate::set_dictionary(self.stream_mut(), dictionary) { + ReturnCode::Ok => Ok(self.inner.adler as u32), + ReturnCode::StreamError | ReturnCode::DataError => self.decompress_error(), + _other => unreachable!("set_dictionary does not return {:?}", _other), + } + } +} + +impl Drop for Inflate { + fn drop(&mut self) { + if let Some(stream) = unsafe { InflateStream::from_stream_mut(&mut self.inner) } { + let _ = inflate::end(stream); } } } pub struct Deflate { - pub(crate) inner: ::zlib_rs::Deflate, + pub(crate) inner: z_stream, // NOTE: these counts do not count the dictionary. total_in: u64, total_out: u64, } +unsafe impl Send for Deflate {} +unsafe impl Sync for Deflate {} + impl fmt::Debug for Deflate { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( @@ -167,11 +219,20 @@ impl fmt::Debug for Deflate { impl DeflateBackend for Deflate { fn make(level: Compression, zlib_header: bool, window_bits: u8) -> Self { - // Check in case the integer value changes at some point. debug_assert!(level.level() <= 9); + let config = DeflateConfig { + window_bits: if zlib_header { + i32::from(window_bits) + } else { + -i32::from(window_bits) + }, + level: level.level() as i32, + ..DeflateConfig::default() + }; + Deflate { - inner: ::zlib_rs::Deflate::new(level.level() as i32, zlib_header, window_bits), + inner: stream_with_deflate_config(config), total_in: 0, total_out: 0, } @@ -183,32 +244,22 @@ impl DeflateBackend for Deflate { output: &mut [u8], flush: FlushCompress, ) -> Result { - let flush = match flush { - FlushCompress::None => DeflateFlush::NoFlush, - FlushCompress::Partial => DeflateFlush::PartialFlush, - FlushCompress::Sync => DeflateFlush::SyncFlush, - FlushCompress::Full => DeflateFlush::FullFlush, - FlushCompress::Finish => DeflateFlush::Finish, - }; - - let total_in_start = self.inner.total_in(); - let total_out_start = self.inner.total_out(); - - let result = self.inner.compress(input, output, flush); - - self.total_in += self.inner.total_in() - total_in_start; - self.total_out += self.inner.total_out() - total_out_start; + self.compress_impl(input, output.as_mut_ptr(), output.len(), flush) + } - match result { - Ok(status) => Ok(status.into()), - Err(_) => self.compress_error(), - } + fn compress_uninit( + &mut self, + input: &[u8], + output: &mut [MaybeUninit], + flush: FlushCompress, + ) -> Result { + self.compress_impl(input, output.as_mut_ptr().cast::(), output.len(), flush) } fn reset(&mut self) { self.total_in = 0; self.total_out = 0; - self.inner.reset(); + let _ = deflate::reset(self.stream_mut()); } } @@ -225,29 +276,102 @@ impl Backend for Deflate { } impl Deflate { + fn compress_impl( + &mut self, + input: &[u8], + output_ptr: *mut u8, + output_len: usize, + flush: FlushCompress, + ) -> Result { + let flush = match flush { + FlushCompress::None => DeflateFlush::NoFlush, + FlushCompress::Partial => DeflateFlush::PartialFlush, + FlushCompress::Sync => DeflateFlush::SyncFlush, + FlushCompress::Full => DeflateFlush::FullFlush, + FlushCompress::Finish => DeflateFlush::Finish, + }; + + let total_in_start = self.inner.total_in; + let total_out_start = self.inner.total_out; + + self.inner.avail_in = Ord::min(input.len(), u32::MAX as usize) as u32; + self.inner.avail_out = Ord::min(output_len, u32::MAX as usize) as u32; + self.inner.next_in = input.as_ptr(); + self.inner.next_out = output_ptr; + + let result = deflate::deflate(self.stream_mut(), flush); + + self.accumulate_totals(total_in_start, total_out_start); + + match result { + ReturnCode::Ok => Ok(Status::Ok), + ReturnCode::StreamEnd => Ok(Status::StreamEnd), + ReturnCode::BufError => Ok(Status::BufError), + ReturnCode::NeedDict | ReturnCode::ErrNo | ReturnCode::VersionError => unreachable!(), + ReturnCode::StreamError | ReturnCode::DataError | ReturnCode::MemError => { + self.compress_error() + } + } + } + + fn accumulate_totals(&mut self, total_in_start: u64, total_out_start: u64) { + self.total_in += self.inner.total_in - total_in_start; + self.total_out += self.inner.total_out - total_out_start; + } + + fn stream_mut(&mut self) -> &mut DeflateStream<'static> { + unsafe { DeflateStream::from_stream_mut(&mut self.inner) } + .expect("zlib-rs deflate stream is initialized") + } + fn compress_error(&self) -> Result { - compress_failed(ErrorMessage(self.inner.error_message())) + compress_failed(ErrorMessage(error_message(self.inner.msg))) } pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - match self.inner.set_dictionary(dictionary) { - Ok(v) => Ok(v), - Err(_) => self.compress_error(), + match deflate::set_dictionary(self.stream_mut(), dictionary) { + ReturnCode::Ok => Ok(self.inner.adler as u32), + ReturnCode::StreamError => self.compress_error(), + _other => unreachable!("set_dictionary does not return {:?}", _other), } } pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> { - use ::zlib_rs::Status; - - match self.inner.set_level(level.level() as i32) { - Ok(status) => match status { - Status::Ok => Ok(()), - Status::BufError => compress_failed(ErrorMessage(Some("insufficient space"))), - Status::StreamEnd => { - unreachable!("zlib-rs is known to never return the StreamEnd status") - } - }, - Err(_) => self.compress_error(), + match deflate::params(self.stream_mut(), level.level() as i32, Default::default()) { + ReturnCode::Ok => Ok(()), + ReturnCode::BufError => compress_failed(ErrorMessage(Some("insufficient space"))), + ReturnCode::StreamError => self.compress_error(), + _other => unreachable!("set_level does not return {:?}", _other), + } + } +} + +impl Drop for Deflate { + fn drop(&mut self) { + if let Some(stream) = unsafe { DeflateStream::from_stream_mut(&mut self.inner) } { + let _ = deflate::end(stream); } } } + +fn error_message(msg: *const core::ffi::c_char) -> Option<&'static str> { + if msg.is_null() { + None + } else { + unsafe { CStr::from_ptr(msg).to_str() }.ok() + } +} + +fn stream_with_inflate_config(config: InflateConfig) -> z_stream { + let mut stream = z_stream::default(); + let result = inflate::init(&mut stream, config); + assert_eq!(result, ReturnCode::Ok); + stream +} + +fn stream_with_deflate_config(config: DeflateConfig) -> z_stream { + let mut stream = z_stream::default(); + let result = deflate::init(&mut stream, config); + assert_eq!(result, ReturnCode::Ok); + stream +}