diff --git a/.gitignore b/.gitignore
index 73fab07..a69c6b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,9 @@ target/
 
 # MSVC Windows builds of rustc generate these, which store debugging information
 *.pdb
+
+# Local Claude Code settings (machine-specific)
+.claude/
+
+# Unreferenced / scratch files
+.unref/
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..87e889d
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,73 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+> **Important:** This file (and all sub-project `CLAUDE.md` files) are checked into the
+> repository. Only include information that is valid for **any** developer or machine:
+> project conventions, architecture, commands, constraints. **Do not** add machine-specific
+> paths, personal tool preferences, local environment settings, or anything that would
+> not apply to every contributor.
+
+## Commands
+
+```bash
+# Build
+cargo build --all
+cargo build --release --all
+
+# Test
+cargo test --all
+cargo test <test_name>          # Run a single test by name
+cargo test -- --nocapture       # Show test output
+
+# Lint and format
+cargo fmt --check --all
+cargo clippy
+```
+
+The CI runs on `windows-latest` and builds for multiple targets: `wasm32-wasip1`, `aarch64-unknown-linux-musl`, `x86_64-pc-windows-msvc`, `x86_64-unknown-linux-gnu`.
+
+The release build uses Spectre mitigations (`/Qspectre /sdl`) and produces `preflate_rs_0_7.dll` and `preflate_util.exe`.
+
+## Architecture
+
+**preflate-rs** analyzes DEFLATE-compressed streams, extracts the uncompressed data plus a compact set of reconstruction parameters, and later recreates the exact original DEFLATE bitstream. This enables re-compression with modern algorithms (Zstd, Brotli) while preserving binary-exact round-trip fidelity. The key insight is detecting which compressor (zlib, libdeflate, zlib-ng, miniz, Windows zlib) produced a stream and storing only the differences from what that compressor would predict.
+
+### Workspace layout
+
+| Crate | Output | Role |
+|---|---|---|
+| `preflate/` | library | Core DEFLATE analysis and reconstruction |
+| `container/` | library | Scans binary files (ZIP, PNG, JPEG) for DEFLATE streams |
+| `util/` | `preflate_util.exe` | CLI for testing on files/directories |
+| `dll/` | `preflate_rs_0_7.dll` | C FFI wrapper for .NET interop |
+| `fuzz/` | fuzz harnesses | libfuzzer targets |
+| `tests/` | integration tests | End-to-end round-trip tests using `samples/` |
+
+### preflate crate (core)
+
+The processing pipeline in `preflate/src/stream_processor.rs`:
+1. **`deflate/`** — Reads a DEFLATE bitstream into tokens (literals and length/distance back-references) and writes tokens back to DEFLATE with custom Huffman trees.
+2. **`estimator/`** — Estimates the compressor's parameters (`TokenPredictorParameters`): hash algorithm, `nice_length`, `max_chain`, window bits, add policy, matching type.
+3. **`token_predictor.rs`** — Replays the compression using estimated parameters and hash chains to predict what tokens the original compressor would have produced.
+4. **`tree_predictor.rs`** — Predicts Huffman tree structure.
+5. **`statistical_codec.rs` / `cabac_codec.rs`** — Encodes the *differences* from prediction using CABAC (Context Adaptive Binary Arithmetic Coding, shared with Lepton JPEG).
+6. **`stream_processor.rs`** — Public API: `PreflateStreamProcessor::decompress()` and `RecreateStreamProcessor::recreate()`.
+
+Parameters are serialized via `bitcode`; corrections via CABAC. The format is chunked to bound memory use.
+
+### container crate
+
+- **`scan_deflate.rs`** — Scans raw bytes to locate DEFLATE stream boundaries, identifying stream type (raw deflate, zlib-wrapped, PNG IDAT, ZIP, JPEG, etc.).
+- **`idat_parse.rs`** — Extracts and reassembles PNG IDAT chunks.
+- **`container_processor.rs`** — Orchestrates scanning → preflate → Zstd (compress) and Zstd → recreate → reassembly (decompress). Zstd encode/decode is handled inline using a single persistent encoder.
+- **`utils.rs`** — `process_limited_buffer()` and test helpers.
+- **`scoped_read.rs`** — Bounded reader adapter.
+
+The optional `webp` feature (enabled by default) allows PNG images to be stored as WebP instead of losslessly. PDF streams are not scanned (pdf_parse was removed).
+
+### Code constraints
+
+- **No unsafe code** — enforced via `#![forbid(unsafe_code)]` in each crate.
+- Minimum Rust version: **1.85**, Edition **2024**.
+- `.cargo/config.toml` sets Windows MSVC linker flags (`/DYNAMICBASE`, `/CETCOMPAT`, `/guard:cf`).
diff --git a/Cargo.lock b/Cargo.lock
index 269ac3b..900124c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -551,7 +551,7 @@ dependencies = [
 
 [[package]]
 name = "preflate-container"
-version = "0.7.5"
+version = "0.7.6"
 dependencies = [
  "adler32",
  "byteorder",
@@ -567,7 +567,7 @@ dependencies = [
 
 [[package]]
 name = "preflate-rs"
-version = "0.7.5"
+version = "0.7.6"
 dependencies = [
  "bitcode",
  "byteorder",
@@ -589,7 +589,7 @@ dependencies = [
 
 [[package]]
 name = "preflate-rs-root"
-version = "0.0.0"
+version = "0.7.6"
 dependencies = [
  "libdeflate-sys",
  "libz-ng-sys",
@@ -601,7 +601,7 @@ dependencies = [
 
 [[package]]
 name = "preflate_rs_0_7"
-version = "0.7.5"
+version = "0.7.6"
 dependencies = [
  "preflate-container",
  "preflate-rs",
@@ -609,7 +609,7 @@ dependencies = [
 
 [[package]]
 name = "preflate_util"
-version = "0.0.0"
+version = "0.7.6"
 dependencies = [
  "clap",
  "cpu-time",
diff --git a/Cargo.toml b/Cargo.toml
index 0196a28..8b4cacc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,11 +1,11 @@
 # root project only exists to refer to the packages
-# and run the end-to-end tests in the tests directory 
+# and run the end-to-end tests in the tests directory
 
 [package]
 name = "preflate-rs-root"
-version = "0.0.0"
-edition = "2024"
-rust-version = "1.85"
+version.workspace = true
+edition.workspace = true
+rust-version.workspace = true
 
 [profile.release]
 debug = true
@@ -14,6 +14,14 @@ debug = true
 members = ["preflate", "container", "dll", "util", "fuzz"]
 resolver = "2"
 
+[workspace.package]
+version = "0.7.6"
+edition = "2024"
+authors = ["Kristof Roomp <kristofr@microsoft.com>"]
+license = "Apache-2.0"
+rust-version = "1.85"
+repository = "https://github.com/microsoft/preflate-rs"
+
 [dev-dependencies]
 preflate-rs = { path = "preflate" }
 preflate-container = { path = "container" }
diff --git a/container/CLAUDE.md b/container/CLAUDE.md
new file mode 100644
index 0000000..a752e85
--- /dev/null
+++ b/container/CLAUDE.md
@@ -0,0 +1,203 @@
+# container (preflate-container)
+
+Scans binary files (ZIP, PNG, JPEG) for DEFLATE streams, orchestrates the
+preflate + Zstd pipeline, and reassembles the output. Only format version 2 exists
+(v1 was removed).
+
+## Public API (`lib.rs`)
+
+```rust
+// Compress a file/buffer containing embedded DEFLATE streams
+PreflateContainerProcessor::new(config: &PreflateContainerConfig, level: i32, test_baseline: bool) -> Self
+impl ProcessBuffer for PreflateContainerProcessor { ... }
+
+// Decompress a preflate container back to the original file
+RecreateContainerProcessor::new(capacity: usize) -> Self
+impl ProcessBuffer for RecreateContainerProcessor { ... }
+
+// Core trait — both processors implement this
+pub trait ProcessBuffer {
+    fn process_buffer(&mut self, input: &[u8], input_complete: bool, writer: &mut impl Write) -> Result<()>;
+    fn stats(&self) -> PreflateStats { PreflateStats::default() }  // default no-op; overridden by Compress
+    fn copy_to_end(&mut self, input: &mut impl BufRead, output: &mut impl Write) -> Result<()>;
+    fn copy_to_end_size(&mut self, input: &mut impl BufRead, output: &mut impl Write, chunk: usize) -> Result<()>;
+}
+
+// DLL helper: writes to a fixed output buffer, spills overflow into a VecDeque
+fn process_limited_buffer(
+    process: &mut impl ProcessBuffer,
+    input: &[u8],
+    input_complete: bool,
+    output_buffer: &mut [u8],
+    output_extra: &mut VecDeque<u8>,
+) -> Result<(bool, usize)>;  // (all_output_drained, bytes_written_to_output_buffer)
+```
+
+`PreflateContainerConfig` holds knobs: `min_chunk_size`, `max_chunk_size`,
+`total_plain_text_limit`, `chunk_plain_text_limit`, `validate_compression`, `max_chain_length`.
+
+## Wire Format (v2 only)
+
+### Outer framing (always raw / uncompressed)
+
+```
+[0x02]                             ← COMPRESSED_WRAPPER_VERSION_2 (1 byte, raw)
+
+Repeat for each block:
+  [type]                           ← block type byte (1 byte, raw) — see bit-field below
+  [varint(content_len)]            ← byte count of what follows (1–5 bytes, raw)
+  [content_bytes × content_len]    ← meaning depends on type (see below)
+```
+
+All framing bytes (`type`, `varint`) are written directly to the output stream —
+they are **never** inside the Zstd encoder.
+
+### Block type byte bit-field
+
+Each block type byte encodes two fields:
+
+```
+Bit 7-6  BLOCK_COMPRESSION_*   00 = none/raw   01 = Zstd   10-11 = reserved
+Bit 5-0  BLOCK_TYPE_*          block content kind (0–63)
+```
+
+Mask constants (defined in `container_processor.rs`):
+
+| Constant | Value | Meaning |
+|---|---|---|
+| `BLOCK_COMPRESSION_MASK` | `0xC0` | extracts bits 7–6 |
+| `BLOCK_TYPE_MASK` | `0x3F` | extracts bits 5–0 |
+| `BLOCK_COMPRESSION_NONE` | `0x00` | content is raw (not Zstd) |
+| `BLOCK_COMPRESSION_ZSTD` | `0x40` | content is a Zstd flush segment |
+
+### Block content kinds and combined wire values
+
+| `BLOCK_TYPE_*` | Value | Combined wire byte | Description |
+|---|---|---|---|
+| `BLOCK_TYPE_LITERAL` | `0x00` | `0x40` | Raw input bytes with no detectable DEFLATE stream |
+| `BLOCK_TYPE_DEFLATE` | `0x01` | `0x41` | A raw/zlib DEFLATE stream (start of a new stream) |
+| `BLOCK_TYPE_PNG` | `0x02` | `0x42` | A PNG IDAT stream stored without WebP |
+| `BLOCK_TYPE_DEFLATE_CONTINUE` | `0x03` | `0x43` | Continuation of a DEFLATE stream that spanned a chunk boundary |
+| `BLOCK_TYPE_JPEG_LEPTON` | `0x04` | `0x04` | JPEG re-compressed with Lepton; bypasses Zstd entirely |
+| `BLOCK_TYPE_WEBP` | `0x05` | `0x05` | PNG image stored as WebP lossless; bypasses Zstd entirely |
+
+### Zstd encoder/decoder lifecycle
+
+- A **single persistent `zstd::stream::write::Encoder`** is created once and shared across
+  all Zstd-compressed blocks (compression bits `0x40`).
+- After writing each block's inner payload into the encoder, `encoder.flush()` is called,
+  which emits a Zstd `ZSTD_e_flush` segment. Those bytes are what get stored as
+  `content_bytes` in the outer framing.
+- Each flush segment is decodable in sequence: the decoder is a persistent
+  `zstd::stream::raw::Decoder` that maintains cross-block history, so compression
+  quality benefits from all previously seen blocks.
+- The stream is terminated by EOF — there is no explicit end-of-stream block.
+
+### Inner payload layout (inside Zstd, after decompression)
+
+**`BLOCK_TYPE_LITERAL` (wire `0x40`)**
+```
+varint(data_len)
+data[data_len]         ← verbatim bytes from the original input
+```
+
+**`BLOCK_TYPE_DEFLATE` (wire `0x41`) and `BLOCK_TYPE_DEFLATE_CONTINUE` (wire `0x43`)**
+```
+varint(corrections_len)
+varint(plaintext_len)
+corrections[corrections_len]   ← CABAC-encoded differences from predicted tokens
+plaintext[plaintext_len]       ← uncompressed data
+```
+`BLOCK_TYPE_DEFLATE_CONTINUE` has the same layout; the decoder reuses the
+`RecreateStreamProcessor` state from the preceding `BLOCK_TYPE_DEFLATE` block.
+
+**`BLOCK_TYPE_PNG` (wire `0x42`) — non-WebP path**
+```
+varint(corrections_len)
+varint(plaintext_len)
+IdatContents metadata:
+  varint(chunk_size_1) … varint(chunk_size_N) varint(0)   ← IDAT chunk size list (0-terminated)
+  zlib_header[2]
+  addler32[4]
+  0xFF                                                     ← sentinel: no png_header present
+corrections[corrections_len]
+plaintext[plaintext_len]         ← raw unfiltered pixel data
+```
+
+### Raw block payload layout (outside Zstd)
+
+**`BLOCK_TYPE_JPEG_LEPTON` (wire `0x04`)**
+```
+lepton_bytes[content_len]      ← Lepton-compressed JPEG; decoded by lepton_jpeg::decode_lepton()
+```
+
+**`BLOCK_TYPE_WEBP` (wire `0x05`)**
+```
+varint(corrections_len)
+varint(webp_data_len)
+IdatContents metadata:
+  varint(chunk_size_1) … varint(chunk_size_N) varint(0)
+  zlib_header[2]
+  addler32[4]
+  color_type[1]                                ← PngColorType (RGB=2, RGBA=6)
+  varint(width)
+  varint(height)
+filters[height]                ← PNG row filter bytes (one per row)
+corrections[corrections_len]
+webp_data[webp_data_len]       ← WebP lossless encoded pixel data
+```
+On decode, the WebP bytes are decompressed back to pixels, PNG filters are re-applied,
+and the result is re-deflated using the corrections to recreate the original IDAT stream.
+
+## Idempotent Finalization (important bug history)
+
+`process_buffer` may be called with `input_complete=true` multiple times (DLL pattern).
+The finalization block must guard against double-finalization:
+
+```rust
+if input_complete && !self.input_complete {   // NOT just `if input_complete`
+    self.input_complete = true;
+    // ... encoder.take().unwrap()
+}
+```
+
+## Module Layout
+
+```
+src/
+  lib.rs                  ← public types and re-exports
+  container_processor.rs  ← PreflateContainerProcessor, RecreateContainerProcessor,
+                            ProcessBuffer trait, MeasureWriteSink,
+                            block-type constants, emit_compressed_block(),
+                            write_chunk_block_v2(), write_varint(), read_varint()
+  scan_deflate.rs         ← locates DEFLATE stream boundaries in raw bytes
+                            identifies: raw deflate, zlib-wrapped, PNG IDAT, ZIP, JPEG
+  idat_parse.rs           ← extracts / reassembles PNG IDAT chunks; parses IHDR
+  scoped_read.rs          ← bounded reader adapter
+  utils.rs                ← process_limited_buffer(), TakeReader, test helpers
+```
+
+## Key Internal Types
+
+| Type | Purpose |
+|---|---|
+| `MeasureWriteSink` | `pub(crate)` sink that counts bytes; used for baseline Zstd measurement |
+| `PreflateStats` | pub struct: `deflate_compressed_size`, `zstd_compressed_size`, `uncompressed_size`, `overhead_bytes`, `hash_algorithm`, `zstd_baseline_size` |
+| `TakeReader<T>` | `pub` BufRead wrapper that reads at most N bytes (used in utils.rs) |
+
+## Features
+
+- `webp` (default-enabled) — allows PNG images to be stored as WebP instead of lossless PNG,
+  using the `webp` crate.
+
+## Dependencies of Note
+
+- `lepton_jpeg` (0.5.1) — JPEG blocks are recompressed with Lepton, bypassing Zstd entirely.
+- `zstd` (0.13) — single persistent encoder across all non-JPEG/WebP blocks.
+- `preflate-rs` — core analysis/reconstruction (path dependency).
+- `webp` (0.3, optional, default-enabled) — PNG images can be stored as WebP lossless.
+
+## Constraints
+
+- `#![forbid(unsafe_code)]` enforced.
+- `main.rs` exists but is a stub; this crate is a library.
diff --git a/container/Cargo.toml b/container/Cargo.toml
index ef285df..11826a7 100644
--- a/container/Cargo.toml
+++ b/container/Cargo.toml
@@ -1,15 +1,15 @@
 [package]
 name = "preflate-container"
-version = "0.7.5"
-edition = "2024"
-authors = ["Kristof Roomp <kristofr@microsoft.com>"]
-license = "Apache-2.0"
-rust-version = "1.85"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+rust-version.workspace = true
+repository.workspace = true
 description = """
 Scans binary files for zStd streams and uses Preflate-rs to decompress the stream and repack with
-zStd compression. For PNG files, we use WEBP compression for RGB and RGBA to get better results. 
+zStd compression. For PNG files, we use WEBP compression for RGB and RGBA to get better results.
 """
-repository = "https://github.com/microsoft/preflate-rs"
 categories = ["compression"]
 keywords = ["gzip", "deflate", "zlib", "zip"]
 
diff --git a/container/src/container_common.rs b/container/src/container_common.rs
new file mode 100644
index 0000000..b41e771
--- /dev/null
+++ b/container/src/container_common.rs
@@ -0,0 +1,1085 @@
+use std::io::{BufRead, Read, Write};
+
+use preflate_rs::{AddContext, HashAlgorithm, PreflateConfig, Result};
+
+/// Configuration for the deflate process
+#[derive(Debug, Clone)]
+pub struct PreflateContainerConfig {
+    /// As we scan for deflate streams, we need to have a minimum memory
+    /// chunk to process. We scan this chunk for deflate streams and at least
+    /// deflate one block has to fit into a chunk for us to recognize it.
+    pub min_chunk_size: usize,
+
+    /// The maximum size of a deflate or PNG compressed block we will consider. If
+    /// a deflate stream is larger than this, we will not decompress it and
+    /// just write it out as a literal block.
+    pub max_chunk_size: usize,
+
+    /// The maximum overall size of plain text that we will compress. This is
+    /// global to the entire container and limits the amount of processing that
+    /// we will do to avoid running out of CPU time on a single file. Once we
+    /// hit this limit, we will stop looking for deflate streams and just write
+    /// out the rest of the data as literal blocks.
+    pub total_plain_text_limit: u64,
+
+    /// The maximum size of a plain text chunk that we will decompress at a time. This limits
+    /// the memory usage of the decompression process.
+    pub chunk_plain_text_limit: usize,
+
+    /// true if we should verify that the decompressed data can be recompressed to the same bytes.
+    /// This is important since there may be corner cases where the data may not yield the same bytes.
+    ///
+    /// If this is false, we will not verify the decompressed data and just write it out as is and it is
+    /// up to the caller to make sure the data is valid. In no case should you just assume that you
+    /// can get the same data back without verifying it.
+    pub validate_compression: bool,
+
+    /// Maximum number of lookups we will do in the hash chain. This will limit the CPU time we spend
+    /// on deflate stream processing but also means that we won't be able to recompress deflate streams
+    /// that were compressed with a larger chain length (eg level 9 has 4096).
+    pub max_chain_length: u32,
+}
+
+impl Default for PreflateContainerConfig {
+    fn default() -> Self {
+        PreflateContainerConfig {
+            min_chunk_size: 1024 * 1024,
+            max_chunk_size: 64 * 1024 * 1024,
+            total_plain_text_limit: 512 * 1024 * 1024,
+            chunk_plain_text_limit: 128 * 1024 * 1024,
+            max_chain_length: 4096,
+            validate_compression: true,
+        }
+    }
+}
+
+impl PreflateContainerConfig {
+    pub fn preflate_config(&self) -> PreflateConfig {
+        PreflateConfig {
+            max_chain_length: self.max_chain_length,
+            plain_text_limit: self.chunk_plain_text_limit,
+            verify_compression: self.validate_compression,
+        }
+    }
+}
+
+pub(crate) const COMPRESSED_WRAPPER_VERSION_2: u8 = 2;
+
+// Bit-field masks for the block type byte
+// Bits 7-6: compression algorithm  Bits 5-0: block content kind
+pub(crate) const BLOCK_COMPRESSION_MASK: u8 = 0xC0;
+pub(crate) const BLOCK_TYPE_MASK: u8 = 0x3F;
+
+// Compression algorithms (top 2 bits)
+pub(crate) const BLOCK_COMPRESSION_NONE: u8 = 0x00;
+pub(crate) const BLOCK_COMPRESSION_ZSTD: u8 = 0x40;
+
+// Block content kinds (bottom 6 bits)
+pub(crate) const BLOCK_TYPE_LITERAL: u8 = 0x00;
+pub(crate) const BLOCK_TYPE_DEFLATE: u8 = 0x01;
+pub(crate) const BLOCK_TYPE_PNG: u8 = 0x02;
+pub(crate) const BLOCK_TYPE_DEFLATE_CONTINUE: u8 = 0x03;
+pub(crate) const BLOCK_TYPE_JPEG_LEPTON: u8 = 0x04;
+pub(crate) const BLOCK_TYPE_WEBP: u8 = 0x05;
+
+pub(crate) fn write_varint(destination: &mut impl Write, value: u32) -> std::io::Result<()> {
+    let mut value = value;
+    loop {
+        let mut byte = (value & 0x7F) as u8;
+        value >>= 7;
+        if value != 0 {
+            byte |= 0x80;
+        }
+        destination.write_all(&[byte])?;
+        if value == 0 {
+            break;
+        }
+    }
+
+    Ok(())
+}
+
+pub(crate) fn read_varint(source: &mut impl Read) -> std::io::Result<u32> {
+    let mut result = 0;
+    let mut shift = 0;
+    loop {
+        let mut byte = [0u8; 1];
+        source.read_exact(&mut byte)?;
+        let byte = byte[0];
+        result |= ((byte & 0x7F) as u32) << shift;
+        shift += 7;
+        if byte & 0x80 == 0 {
+            break;
+        }
+    }
+    Ok(result)
+}
+
+#[test]
+fn test_variant_roundtrip() {
+    let values = [
+        0, 1, 127, 128, 255, 256, 16383, 16384, 2097151, 2097152, 268435455, 268435456, 4294967295,
+    ];
+
+    let mut buffer = Vec::new();
+    for &v in values.iter() {
+        write_varint(&mut buffer, v).unwrap();
+    }
+
+    let mut buffer = &buffer[..];
+
+    for &v in values.iter() {
+        assert_eq!(v, read_varint(&mut buffer).unwrap());
+    }
+}
+
+/// Statistics about the preflate process
+#[derive(Debug, Copy, Clone, Default)]
+pub struct PreflateStats {
+    pub deflate_compressed_size: u64,
+    pub zstd_compressed_size: u64,
+    pub uncompressed_size: u64,
+    pub overhead_bytes: u64,
+    pub hash_algorithm: HashAlgorithm,
+    pub zstd_baseline_size: u64,
+}
+
+/// Processes an input buffer and writes the output to a writer
+pub trait ProcessBuffer {
+    fn process_buffer(
+        &mut self,
+        input: &[u8],
+        input_complete: bool,
+        writer: &mut impl Write,
+    ) -> Result<()>;
+
+    #[cfg(test)]
+    fn process_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
+        let mut writer = Vec::new();
+
+        self.copy_to_end(&mut std::io::Cursor::new(&input), &mut writer)
+            .context()?;
+
+        Ok(writer)
+    }
+
+    #[cfg(test)]
+    fn process_vec_size(&mut self, input: &[u8], read_chunk_size: usize) -> Result<Vec<u8>> {
+        let mut writer = Vec::new();
+
+        self.copy_to_end_size(
+            &mut std::io::Cursor::new(&input),
+            &mut writer,
+            read_chunk_size,
+        )
+        .context()?;
+
+        Ok(writer)
+    }
+
+    /// Reads everything from input and writes it to the output.
+    /// Wraps calls to process buffer
+    fn copy_to_end(&mut self, input: &mut impl BufRead, output: &mut impl Write) -> Result<()> {
+        self.copy_to_end_size(input, output, 1024 * 1024)
+    }
+
+    /// Reads everything from input and writes it to the output.
+    /// Wraps calls to process buffer
+    fn copy_to_end_size(
+        &mut self,
+        input: &mut impl BufRead,
+        output: &mut impl Write,
+        read_chunk_size: usize,
+    ) -> Result<()> {
+        let mut input_complete = false;
+        loop {
+            let buffer: &[u8];
+            if input_complete {
+                buffer = &[];
+            } else {
+                buffer = input.fill_buf().context()?;
+                if buffer.len() == 0 {
+                    input_complete = true
+                }
+            };
+
+            if input_complete {
+                self.process_buffer(&[], true, output).context()?;
+                break;
+            } else {
+                // process buffer a piece at a time to avoid overflowing memory
+                let mut amount_read = 0;
+                while amount_read < buffer.len() {
+                    let chunk_size = (buffer.len() - amount_read).min(read_chunk_size);
+
+                    self.process_buffer(
+                        &buffer[amount_read..amount_read + chunk_size],
+                        false,
+                        output,
+                    )
+                    .context()?;
+
+                    amount_read += chunk_size;
+                }
+
+                let buflen = buffer.len();
+                input.consume(buflen);
+            }
+        }
+
+        Ok(())
+    }
+
+    fn stats(&self) -> PreflateStats {
+        PreflateStats::default()
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod test {
+    use std::io::Write;
+
+    use preflate_rs::{AddContext, Result};
+
+    use crate::container_common::{
+        BLOCK_COMPRESSION_MASK, BLOCK_COMPRESSION_NONE, BLOCK_COMPRESSION_ZSTD, BLOCK_TYPE_DEFLATE,
+        BLOCK_TYPE_DEFLATE_CONTINUE, BLOCK_TYPE_JPEG_LEPTON, BLOCK_TYPE_LITERAL, BLOCK_TYPE_MASK,
+        BLOCK_TYPE_PNG, BLOCK_TYPE_WEBP, COMPRESSED_WRAPPER_VERSION_2, PreflateContainerConfig,
+        ProcessBuffer, read_varint,
+    };
+    use crate::container_read::RecreateContainerProcessor;
+    use crate::container_write::PreflateContainerProcessor;
+
+    pub struct NopProcessBuffer {}
+
+    impl ProcessBuffer for NopProcessBuffer {
+        fn process_buffer(
+            &mut self,
+            input: &[u8],
+            _input_complete: bool,
+            writer: &mut impl Write,
+        ) -> Result<()> {
+            writer.write_all(input).context()?;
+
+            Ok(())
+        }
+    }
+
+    fn roundtrip_deflate_chunks(filename: &str) {
+        use crate::utils::assert_eq_array;
+
+        let f = crate::utils::read_file(filename);
+
+        println!("Processing file: {}", filename);
+
+        let mut expanded = Vec::new();
+        let mut ctx =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        ctx.copy_to_end(&mut std::io::Cursor::new(&f), &mut expanded)
+            .unwrap();
+
+        println!("Recreating file: {}", filename);
+
+        let mut destination = Vec::new();
+        let mut ctx = RecreateContainerProcessor::new(usize::MAX);
+        ctx.copy_to_end(&mut std::io::Cursor::new(expanded), &mut destination)
+            .unwrap();
+
+        assert_eq_array(&destination, &f);
+    }
+
+    #[test]
+    fn roundtrip_skip_length_crash() {
+        roundtrip_deflate_chunks("skiplengthcrash.bin");
+    }
+
+    #[test]
+    fn roundtrip_png_chunks() {
+        roundtrip_deflate_chunks("treegdi.png");
+    }
+
+    #[test]
+    fn roundtrip_zip_chunks() {
+        roundtrip_deflate_chunks("samplezip.zip");
+    }
+
+    #[test]
+    fn roundtrip_gz_chunks() {
+        roundtrip_deflate_chunks("sample1.bin.gz");
+    }
+
+    #[test]
+    fn roundtrip_png_chunks2() {
+        roundtrip_deflate_chunks("starcontrol.samplesave");
+    }
+
+    #[test]
+    fn roundtrip_small_chunk() {
+        use crate::utils::{assert_eq_array, read_file};
+
+        let original = read_file("pptxplaintext.zip");
+
+        let mut context = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                min_chunk_size: 100000,
+                max_chunk_size: 100000,
+                total_plain_text_limit: u64::MAX,
+                ..Default::default()
+            },
+            1,
+            false,
+        );
+
+        let compressed = context.process_vec_size(&original, 20001).unwrap();
+
+        let mut context = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = context.process_vec_size(&compressed, 20001).unwrap();
+
+        assert_eq_array(&original, &recreated);
+    }
+
+    #[test]
+    fn roundtrip_small_plain_text() {
+        use crate::utils::{assert_eq_array, read_file};
+
+        let original = read_file("pptxplaintext.zip");
+
+        let mut context = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                min_chunk_size: 100000,
+                max_chunk_size: 100000,
+                total_plain_text_limit: u64::MAX,
+                ..Default::default()
+            },
+            1,
+            false,
+        );
+
+        let compressed = context.process_vec_size(&original, 2001).unwrap();
+
+        let mut context = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = context.process_vec_size(&compressed, 2001).unwrap();
+
+        assert_eq_array(&original, &recreated);
+    }
+
+    #[test]
+    fn roundtrip_zstd_per_block() {
+        use crate::utils::{assert_eq_array, read_file};
+
+        let original = read_file("samplezip.zip");
+
+        let mut context =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+
+        let compressed = context.process_vec(&original).unwrap();
+
+        let mut context = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = context.process_vec(&compressed).unwrap();
+
+        assert_eq_array(&original, &recreated);
+    }
+
+    // ── Block type bit-field tests ───────────────────────────────────────────────
+
+    /// Parse the outer framing of a v2 container and return each block's
+    /// (compression_bits, block_type_bits) in order, stopping at the 0xFF CRC end block.
+    fn parse_wire_block_types(data: &[u8]) -> Vec<(u8, u8)> {
+        use byteorder::ReadBytesExt;
+        let mut cursor = std::io::Cursor::new(data);
+        let version = cursor.read_u8().unwrap();
+        assert_eq!(version, COMPRESSED_WRAPPER_VERSION_2);
+        let mut blocks = Vec::new();
+        while (cursor.position() as usize) < data.len() {
+            let type_byte = cursor.read_u8().unwrap();
+            if type_byte == 0xFF {
+                break; // CRC end block; 4 raw bytes follow but we stop here
+            }
+            let compression = type_byte & BLOCK_COMPRESSION_MASK;
+            let block_type = type_byte & BLOCK_TYPE_MASK;
+            blocks.push((compression, block_type));
+            let size = read_varint(&mut cursor).unwrap() as u64;
+            cursor.set_position(cursor.position() + size);
+        }
+        blocks
+    }
+
+    /// Feed `stream` to the decoder with input_complete=true and assert the
+    /// error exit code matches `expected`.
+    fn assert_decoder_fails(stream: &[u8], expected: preflate_rs::ExitCode) {
+        let mut ctx = RecreateContainerProcessor::new(usize::MAX);
+        let mut out = Vec::new();
+        let err = ctx
+            .process_buffer(stream, true, &mut out)
+            .expect_err("expected an error, but decoder returned Ok");
+        assert_eq!(
+            err.exit_code(),
+            expected,
+            "wrong exit code for stream {stream:02X?}"
+        );
+    }
+
+    /// The two masks must partition the byte: non-overlapping and together covering all 8 bits.
+    /// Every content-kind constant must sit entirely within BLOCK_TYPE_MASK, and every
+    /// compression constant within BLOCK_COMPRESSION_MASK.
+    #[test]
+    fn test_bit_field_masks_partition_byte() {
+        assert_eq!(
+            BLOCK_COMPRESSION_MASK | BLOCK_TYPE_MASK,
+            0xFF,
+            "masks do not cover all bits"
+        );
+        assert_eq!(
+            BLOCK_COMPRESSION_MASK & BLOCK_TYPE_MASK,
+            0x00,
+            "masks overlap"
+        );
+        for kind in [
+            BLOCK_TYPE_LITERAL,
+            BLOCK_TYPE_DEFLATE,
+            BLOCK_TYPE_PNG,
+            BLOCK_TYPE_DEFLATE_CONTINUE,
+            BLOCK_TYPE_JPEG_LEPTON,
+            BLOCK_TYPE_WEBP,
+        ] {
+            assert_eq!(
+                kind & BLOCK_COMPRESSION_MASK,
+                0,
+                "BLOCK_TYPE 0x{kind:02X} bleeds into compression bits"
+            );
+        }
+        for comp in [BLOCK_COMPRESSION_NONE, BLOCK_COMPRESSION_ZSTD] {
+            assert_eq!(
+                comp & BLOCK_TYPE_MASK,
+                0,
+                "BLOCK_COMPRESSION 0x{comp:02X} bleeds into type bits"
+            );
+        }
+    }
+
+    /// The combined (compression | kind) wire bytes must match the expected values
+    /// documented in CLAUDE.md. This catches accidental constant drift.
+    #[test]
+    fn test_combined_wire_values() {
+        assert_eq!(BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_LITERAL, 0x40);
+        assert_eq!(BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_DEFLATE, 0x41);
+        assert_eq!(BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_PNG, 0x42);
+        assert_eq!(BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_DEFLATE_CONTINUE, 0x43);
+        assert_eq!(BLOCK_COMPRESSION_NONE | BLOCK_TYPE_JPEG_LEPTON, 0x04);
+        assert_eq!(BLOCK_COMPRESSION_NONE | BLOCK_TYPE_WEBP, 0x05);
+    }
+
+    /// Reserved compression bits 0x80 (10xx_xxxx) must be rejected by the decoder.
+    #[test]
+    fn test_decoder_rejects_reserved_compression_bits_10() {
+        assert_decoder_fails(
+            &[COMPRESSED_WRAPPER_VERSION_2, 0x80],
+            preflate_rs::ExitCode::InvalidCompressedWrapper,
+        );
+    }
+
+    /// Reserved compression bits 0xC0 (11xx_xxxx) must be rejected by the decoder.
+    #[test]
+    fn test_decoder_rejects_reserved_compression_bits_11() {
+        assert_decoder_fails(
+            &[COMPRESSED_WRAPPER_VERSION_2, 0xC0],
+            preflate_rs::ExitCode::InvalidCompressedWrapper,
+        );
+    }
+
+    /// BLOCK_COMPRESSION_NONE | BLOCK_TYPE_LITERAL (0x00) must be rejected:
+    /// literal blocks are Zstd-only; there is no raw literal block type.
+    #[test]
+    fn test_decoder_rejects_raw_literal_block_type() {
+        let byte = BLOCK_COMPRESSION_NONE | BLOCK_TYPE_LITERAL; // == 0x00
+        assert_decoder_fails(
+            &[COMPRESSED_WRAPPER_VERSION_2, byte],
+            preflate_rs::ExitCode::InvalidCompressedWrapper,
+        );
+    }
+
+    /// Any BLOCK_COMPRESSION_NONE byte that is not JPEG_LEPTON or WEBP must be rejected.
+    #[test]
+    fn test_decoder_rejects_undefined_raw_block_types() {
+        // 0x10 is arbitrary: not 0x04 (JPEG) or 0x05 (WEBP)
+        let byte = BLOCK_COMPRESSION_NONE | 0x10;
+        assert_decoder_fails(
+            &[COMPRESSED_WRAPPER_VERSION_2, byte],
+            preflate_rs::ExitCode::InvalidCompressedWrapper,
+        );
+    }
+
+    /// Compressing plain bytes (no embedded DEFLATE streams) must produce a stream
+    /// whose first block carries BLOCK_COMPRESSION_ZSTD and BLOCK_TYPE_LITERAL.
+    #[test]
+    fn test_encoder_literal_block_carries_zstd_compression_bit() {
+        let input = vec![0xABu8; 512];
+        let mut ctx =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = ctx.process_vec(&input).unwrap();
+
+        let blocks = parse_wire_block_types(&compressed);
+        assert!(
+            !blocks.is_empty(),
+            "expected at least one block in the output"
+        );
+        assert_eq!(
+            blocks[0],
+            (BLOCK_COMPRESSION_ZSTD, BLOCK_TYPE_LITERAL),
+            "first block should be a Zstd literal block"
+        );
+    }
+
+    /// Every block type byte in a real compressed output must have compression bits
+    /// of either BLOCK_COMPRESSION_NONE or BLOCK_COMPRESSION_ZSTD — never the
+    /// reserved patterns 0x80 or 0xC0.
+    #[test]
+    fn test_encoder_never_emits_reserved_compression_bits() {
+        let input = crate::utils::read_file("samplezip.zip");
+        let mut ctx =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = ctx.process_vec(&input).unwrap();
+
+        for &(compression, _) in &parse_wire_block_types(&compressed) {
+            assert!(
+                compression == BLOCK_COMPRESSION_NONE || compression == BLOCK_COMPRESSION_ZSTD,
+                "found reserved compression bits 0x{compression:02X} in output"
+            );
+        }
+    }
+
+    /// Verify that the decoder extracts the lower 6 bits as block_type rather
+    /// than passing the full byte to process_compressed_block. If it passed the
+    /// full byte (0x41) instead of the kind bits (0x01), the match would fall
+    /// through to the error arm and the round-trip would fail.
+    #[test]
+    fn test_decoder_strips_compression_bits_before_dispatch() {
+        use crate::utils::{assert_eq_array, read_file};
+        // A zip file exercises DEFLATE blocks (wire type 0x41 = ZSTD|DEFLATE).
+        // A successful round-trip proves the decoder is matching on 0x01, not 0x41.
+        let original = read_file("samplezip.zip");
+        let mut enc =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = enc.process_vec(&original).unwrap();
+
+        // Confirm the stream actually contains DEFLATE blocks (type 0x41),
+        // so the test is meaningful and not trivially passing.
+        let has_deflate = parse_wire_block_types(&compressed)
+            .iter()
+            .any(|&(c, t)| c == BLOCK_COMPRESSION_ZSTD && t == BLOCK_TYPE_DEFLATE);
+        assert!(
+            has_deflate,
+            "test file produced no DEFLATE blocks — test is vacuous"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// A PNG file must produce at least one PNG or WebP IDAT block (not merely a DEFLATE
+    /// block), and must round-trip to the original bytes.  The PNG code path in the encoder
+    /// is distinct from the plain DEFLATE path: it reconstructs IDAT framing and, when the
+    /// `webp` feature is enabled, may store pixels as WebP lossless instead of raw.
+    #[test]
+    fn test_png_produces_idat_block_and_roundtrips() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("treegdi.png");
+        let mut enc =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = enc.process_vec(&original).unwrap();
+
+        let blocks = parse_wire_block_types(&compressed);
+        let has_png_block = blocks
+            .iter()
+            .any(|&(_, t)| t == BLOCK_TYPE_PNG || t == BLOCK_TYPE_WEBP);
+        assert!(
+            has_png_block,
+            "PNG input should produce at least one PNG (0x02) or WebP (0x05) block, \
+             got: {blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// A PDF containing embedded JPEG images must produce JPEG_LEPTON blocks (raw,
+    /// outside Zstd) as well as DEFLATE blocks for the PDF's own compressed object
+    /// streams. Both must survive a full round-trip.
+    #[test]
+    fn test_pdf_with_jpegs_produces_lepton_and_deflate_blocks_and_roundtrips() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("embedded-images.pdf");
+        let mut enc =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = enc.process_vec(&original).unwrap();
+
+        let blocks = parse_wire_block_types(&compressed);
+
+        let has_lepton = blocks
+            .iter()
+            .any(|&(c, t)| c == BLOCK_COMPRESSION_NONE && t == BLOCK_TYPE_JPEG_LEPTON);
+        assert!(
+            has_lepton,
+            "PDF with embedded JPEGs should produce at least one JPEG_LEPTON block"
+        );
+
+        let has_deflate = blocks.iter().any(|&(_, t)| t == BLOCK_TYPE_DEFLATE);
+        assert!(
+            has_deflate,
+            "PDF with embedded JPEGs should also produce DEFLATE blocks for compressed objects"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// DEFLATE_CONTINUE blocks are produced when the compressed-data buffer is
+    /// truncated mid-stream: `DeflateParser::parse` reads to EOF and returns
+    /// `Ok` with `is_done()=false`, the encoder emits a DEFLATE block for the
+    /// plaintext decoded so far, saves the mid-stream state, and resumes on
+    /// subsequent calls via DEFLATE_CONTINUE blocks.
+    ///
+    /// `sample1.bin.gz` is a single gzip stream with ~418 KiB of uncompressed
+    /// content.  Feeding it in 10 KiB slices (with `min_chunk_size=5000` so the
+    /// processor starts immediately) means the scanner always sees only a
+    /// partial window of the compressed stream, forcing many DEFLATE_CONTINUE
+    /// blocks that must all round-trip correctly.
+    #[test]
+    fn test_deflate_continue_blocks_appear_and_roundtrip() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("sample1.bin.gz");
+        // min_chunk_size: 0 so the loop processes data immediately after Start,
+        // letting Searching run with the first truncated chunk rather than waiting
+        // for an additional min_chunk_size bytes before beginning.
+        let mut enc = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                min_chunk_size: 0,
+                ..PreflateContainerConfig::default()
+            },
+            1,
+            false,
+        );
+        // Feed the 263 KiB file in two pieces. The first piece (200 KiB) truncates
+        // the DEFLATE stream mid-way; decompress() hits EOF with at least one
+        // complete block already parsed, so it returns Ok(partial) / is_done()=false,
+        // causing the encoder to emit a DEFLATE block and enter DeflateContinue.
+        // The second piece completes the stream → DEFLATE_CONTINUE block.
+        let mut compressed = Vec::new();
+        {
+            let chunk1 = &original[..200_000.min(original.len())];
+            enc.process_buffer(chunk1, false, &mut compressed).unwrap();
+            if original.len() > 200_000 {
+                let chunk2 = &original[200_000..];
+                enc.process_buffer(chunk2, false, &mut compressed).unwrap();
+            }
+            enc.process_buffer(&[], true, &mut compressed).unwrap();
+        }
+
+        let blocks = parse_wire_block_types(&compressed);
+        let n_continue = blocks
+            .iter()
+            .filter(|&&(_, t)| t == BLOCK_TYPE_DEFLATE_CONTINUE)
+            .count();
+        assert!(
+            n_continue > 0,
+            "200 KiB chunks on a ~263 KiB gzip should force at least one DEFLATE_CONTINUE block; \
+             blocks seen: {blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec_size(&compressed, 10_000).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// When `total_plain_text_limit` is exceeded the encoder stops analysing
+    /// deflate streams and writes the remaining bytes as LITERAL blocks.  The
+    /// decoder must still reproduce the original bytes exactly, including the
+    /// unprocessed portion.
+    #[test]
+    fn test_total_plain_text_limit_forces_literal_fallback_and_roundtrips() {
+        use crate::utils::{assert_eq_array, read_file};
+        // samplezip.zip has several DEFLATE entries; setting the limit to 1 byte
+        // ensures that after the first DEFLATE entry's plaintext is accumulated,
+        // every subsequent scan sees total_plain_text_seen > limit and falls back
+        // to writing remaining content as a single LITERAL block.
+        let original = read_file("samplezip.zip");
+        let mut enc = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                total_plain_text_limit: 1,
+                ..PreflateContainerConfig::default()
+            },
+            1,
+            false,
+        );
+        let compressed = enc.process_vec(&original).unwrap();
+
+        let blocks = parse_wire_block_types(&compressed);
+
+        // At least one LITERAL block must appear (the fallback content).
+        let has_literal = blocks.iter().any(|&(_, t)| t == BLOCK_TYPE_LITERAL);
+        assert!(
+            has_literal,
+            "after total_plain_text_limit is exceeded, remaining content must be LITERAL"
+        );
+
+        // The stream must still decode back to the original bytes.
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    // ── Multi-scheme fixture tests ───────────────────────────────────────────────
+
+    /// Helper: compress `data` in one shot and return `(compressed, blocks)`.
+    fn compress_default(data: &[u8]) -> (Vec<u8>, Vec<(u8, u8)>) {
+        let mut enc =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = enc.process_vec(data).unwrap();
+        let blocks = parse_wire_block_types(&compressed);
+        (compressed, blocks)
+    }
+
+    /// Helper: full roundtrip assertion — compress then decompress, check byte equality.
+    fn assert_roundtrip(original: &[u8]) {
+        let (compressed, _) = compress_default(original);
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(original, &recreated);
+    }
+
+    /// Count how many blocks have a given block-type kind.
+    fn count_block_type(blocks: &[(u8, u8)], kind: u8) -> usize {
+        blocks.iter().filter(|&&(_, t)| t == kind).count()
+    }
+
+    /// Two concatenated gzip streams — each contains plaintext well above
+    /// MIN_BLOCKSIZE=1024, so the scanner must emit exactly two DEFLATE blocks.
+    ///
+    /// Fixture: `test_two_gzip_streams.bin`
+    /// Expected wire sequence: literal, deflate, literal, deflate, literal, EOS
+    #[test]
+    fn test_two_gzip_streams_produce_two_deflate_blocks_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_two_gzip_streams.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            2,
+            "two consecutive gzip streams should each produce one DEFLATE block; blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// A gzip stream whose plaintext is below MIN_BLOCKSIZE (500 < 1024) must NOT
+    /// be promoted to a DEFLATE block — the whole file becomes a single literal chunk.
+    ///
+    /// Fixture: `test_tiny_gzip.bin`
+    /// Expected wire sequence: literal, EOS (no DEFLATE blocks)
+    #[test]
+    fn test_tiny_gzip_below_min_blocksize_becomes_literal_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_tiny_gzip.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            0,
+            "gzip with 500-byte plaintext (<MIN_BLOCKSIZE) must not become a DEFLATE block; \
+             blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// File contains a large gzip (plaintext > MIN_BLOCKSIZE) immediately followed by
+    /// a tiny gzip (plaintext < MIN_BLOCKSIZE).  Only the large stream must become a
+    /// DEFLATE block; the small one stays literal.
+    ///
+    /// Fixture: `test_big_then_small_gzip.bin`
+    /// Expected wire sequence: literal, deflate, literal, EOS (exactly 1 DEFLATE block)
+    #[test]
+    fn test_big_gzip_deflate_small_gzip_literal_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_big_then_small_gzip.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            1,
+            "only the large gzip stream should become a DEFLATE block; blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// A file with a valid gzip header but a deliberately corrupted DEFLATE body
+    /// (0xFF leading byte) must not crash.  The scanner must gracefully abandon the
+    /// stream and encode the entire file as a literal block.
+    ///
+    /// Fixture: `test_corrupted_deflate.bin`
+    /// Expected wire sequence: literal, EOS (0 DEFLATE blocks)
+    #[test]
+    fn test_corrupted_deflate_body_falls_back_to_literal_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_corrupted_deflate.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            0,
+            "corrupted DEFLATE body must not produce a DEFLATE block; blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// A file containing padding bytes, then two zlib streams (each with plaintext
+    /// > MIN_BLOCKSIZE), then more padding.  The scanner must find both zlib headers
+    /// and emit exactly two DEFLATE blocks.
+    ///
+    /// Fixture: `test_two_zlib_streams.bin`
+    ///   layout: 100 × `\xDE\xAD` | zlib(EEEE×6000) | 100 × `\xDE\xAD` | zlib(FFFF×6000) | 100 × `\xDE\xAD`
+    /// Expected wire sequence: literal, deflate, literal, deflate, literal, EOS
+    #[test]
+    fn test_two_zlib_streams_produce_two_deflate_blocks_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_two_zlib_streams.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            2,
+            "two zlib streams surrounded by literal bytes should each produce a DEFLATE block; \
+             blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// A ZIP file containing three DEFLATE-compressed entries must produce exactly three
+    /// DEFLATE blocks — one per entry — and round-trip correctly.
+    ///
+    /// Fixture: `test_zip_3entries.zip`  (entries G×20000, H×20000, I×20000 bytes)
+    /// Expected wire sequence: literal, deflate, literal, deflate, literal, deflate, literal, EOS
+    #[test]
+    fn test_zip_three_deflated_entries_produce_three_deflate_blocks_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_zip_3entries.zip");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            3,
+            "ZIP with 3 DEFLATED entries should produce 3 DEFLATE blocks; blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// A ZIP file with a STORED entry (method=0) followed by a DEFLATED entry (method=8).
+    /// `parse_zip_stream` returns `Err` for STORED entries so they become literal blocks;
+    /// only the DEFLATED entry is analysed and emitted as a DEFLATE block.
+    ///
+    /// Fixture: `test_zip_stored_then_deflated.zip`  (J×8000 STORED, K×20000 DEFLATED)
+    /// Expected wire sequence: literal, deflate, literal, EOS (exactly 1 DEFLATE block)
+    #[test]
+    fn test_zip_stored_entry_stays_literal_deflated_entry_becomes_deflate_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_zip_stored_then_deflated.zip");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            1,
+            "only the DEFLATED entry should become a DEFLATE block; STORED stays literal; \
+             blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// A buffer filled with pseudo-random bytes contains no recognisable DEFLATE/zlib/gzip
+    /// signatures.  The entire file must be emitted as a single literal block with no
+    /// DEFLATE analysis.
+    ///
+    /// Fixture: `test_random_bytes.bin`  (32 KiB pseudo-random)
+    /// Expected wire sequence: literal, EOS
+    #[test]
+    fn test_random_bytes_produce_no_deflate_blocks_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_random_bytes.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            0,
+            "random bytes contain no DEFLATE streams; blocks={blocks:?}"
+        );
+
+        // The literal block must survive the round-trip.
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// Two gzip streams separated by a 1000-byte null gap.  Both streams have
+    /// plaintext > MIN_BLOCKSIZE, so both must produce DEFLATE blocks, and the gap
+    /// must appear as a literal block between them.
+    ///
+    /// Fixture: `test_gzip_with_gap.bin`
+    /// Expected wire sequence: literal, deflate, literal, deflate, literal, EOS
+    #[test]
+    fn test_two_gzip_streams_with_null_gap_produce_two_deflate_blocks_and_roundtrip() {
+        use crate::utils::read_file;
+        let original = read_file("test_gzip_with_gap.bin");
+        let (compressed, blocks) = compress_default(&original);
+
+        assert_eq!(
+            count_block_type(&blocks, BLOCK_TYPE_DEFLATE),
+            2,
+            "both gzip streams should become DEFLATE blocks; null gap stays literal; \
+             blocks={blocks:?}"
+        );
+        // There should be at least one literal block (the gap between the two streams).
+        assert!(
+            count_block_type(&blocks, BLOCK_TYPE_LITERAL) >= 1,
+            "null gap between gzip streams should produce at least one literal block; \
+             blocks={blocks:?}"
+        );
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        crate::utils::assert_eq_array(&original, &recreated);
+    }
+
+    /// Feed a fixture containing two gzip streams in very small chunks (64 bytes at a
+    /// time) via the incremental `process_buffer` API to exercise boundary handling.
+    /// The round-trip result must be byte-exact regardless of where chunk boundaries fall.
+    #[test]
+    fn test_two_gzip_streams_incremental_small_chunks_roundtrip() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("test_two_gzip_streams.bin");
+
+        let mut enc = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                min_chunk_size: 0,
+                ..PreflateContainerConfig::default()
+            },
+            1,
+            false,
+        );
+        let mut compressed = Vec::new();
+        let chunk_size = 64;
+        let mut pos = 0;
+        while pos < original.len() {
+            let end = (pos + chunk_size).min(original.len());
+            enc.process_buffer(&original[pos..end], false, &mut compressed)
+                .unwrap();
+            pos = end;
+        }
+        enc.process_buffer(&[], true, &mut compressed).unwrap();
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// Feed `test_two_zlib_streams.bin` in small chunks (128 bytes) to confirm that
+    /// the incremental path handles mixed literal padding + zlib streams correctly.
+    #[test]
+    fn test_two_zlib_streams_incremental_small_chunks_roundtrip() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("test_two_zlib_streams.bin");
+
+        let mut enc = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                min_chunk_size: 0,
+                ..PreflateContainerConfig::default()
+            },
+            1,
+            false,
+        );
+        let mut compressed = Vec::new();
+        let chunk_size = 128;
+        let mut pos = 0;
+        while pos < original.len() {
+            let end = (pos + chunk_size).min(original.len());
+            enc.process_buffer(&original[pos..end], false, &mut compressed)
+                .unwrap();
+            pos = end;
+        }
+        enc.process_buffer(&[], true, &mut compressed).unwrap();
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// Feed a ZIP fixture in small chunks (256 bytes) to check that chunk boundaries
+    /// inside the ZIP local-file headers and DEFLATE bodies are handled gracefully.
+    #[test]
+    fn test_zip_three_entries_incremental_small_chunks_roundtrip() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("test_zip_3entries.zip");
+
+        let mut enc = PreflateContainerProcessor::new(
+            &PreflateContainerConfig {
+                min_chunk_size: 0,
+                ..PreflateContainerConfig::default()
+            },
+            1,
+            false,
+        );
+        let mut compressed = Vec::new();
+        let chunk_size = 256;
+        let mut pos = 0;
+        while pos < original.len() {
+            let end = (pos + chunk_size).min(original.len());
+            enc.process_buffer(&original[pos..end], false, &mut compressed)
+                .unwrap();
+            pos = end;
+        }
+        enc.process_buffer(&[], true, &mut compressed).unwrap();
+
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec(&compressed).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+
+    /// Verify that the decoder also handles the recreated stream correctly when fed in
+    /// small chunks, not just when given the entire buffer at once.
+    /// Uses `test_zip_stored_then_deflated.zip` (mixed STORED + DEFLATED entries).
+    #[test]
+    fn test_zip_stored_then_deflated_decoder_small_chunks_roundtrip() {
+        use crate::utils::{assert_eq_array, read_file};
+        let original = read_file("test_zip_stored_then_deflated.zip");
+
+        let mut enc =
+            PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 1, false);
+        let compressed = enc.process_vec(&original).unwrap();
+
+        // Decompress in 512-byte chunks to exercise the incremental decoder.
+        let mut dec = RecreateContainerProcessor::new(usize::MAX);
+        let recreated = dec.process_vec_size(&compressed, 512).unwrap();
+        assert_eq_array(&original, &recreated);
+    }
+}
diff --git a/container/src/container_processor.rs b/container/src/container_processor.rs
deleted file mode 100644
index 1d2fe24..0000000
--- a/container/src/container_processor.rs
+++ /dev/null
@@ -1,1301 +0,0 @@
-use byteorder::ReadBytesExt;
-use lepton_jpeg::{DEFAULT_THREAD_POOL, EnabledFeatures};
-
-use std::{
-    collections::VecDeque,
-    io::{BufRead, Cursor, Read, Write},
-    usize,
-};
-
-use crate::{
-    idat_parse::{IdatContents, PngHeader, recreate_idat},
-    scan_deflate::{FindStreamResult, FoundStream, FoundStreamType, find_compressable_stream},
-    scoped_read::ScopedRead,
-    utils::TakeReader,
-};
-
-use preflate_rs::{
-    AddContext, ExitCode, HashAlgorithm, PreflateConfig, PreflateError, PreflateStreamProcessor,
-    RecreateStreamProcessor, Result, err_exit_code, recreate_whole_deflate_stream,
-};
-
-/// Configuration for the deflate process
-#[derive(Debug, Clone)]
-pub struct PreflateContainerConfig {
-    /// As we scan for deflate streams, we need to have a minimum memory
-    /// chunk to process. We scan this chunk for deflate streams and at least
-    /// deflate one block has to fit into a chunk for us to recognize it.
-    pub min_chunk_size: usize,
-
-    /// The maximum size of a deflate or PNG compressed block we will consider. If
-    /// a deflate stream is larger than this, we will not decompress it and
-    /// just write it out as a literal block.
-    pub max_chunk_size: usize,
-
-    /// The maximum overall size of plain text that we will compress. This is
-    /// global to the entire container and limits the amount of processing that
-    /// we will do to avoid running out of CPU time on a single file. Once we
-    /// hit this limit, we will stop looking for deflate streams and just write
-    /// out the rest of the data as literal blocks.
-    pub total_plain_text_limit: u64,
-
-    /// The maximum size of a plain text chunk that we will decompress at a time. This limits
-    /// the memory usage of the decompression process.
-    pub chunk_plain_text_limit: usize,
-
-    /// true if we should verify that the decompressed data can be recompressed to the same bytes.
-    /// This is important since there may be corner cases where the data may not yield the same bytes.
-    ///
-    /// If this is false, we will not verify the decompressed data and just write it out as is and it is
-    /// up to the caller to make sure the data is valid. In no case should you just assume that you
-    /// can get the same data back without verifying it.
-    pub validate_compression: bool,
-
-    /// Maximum number of lookups we will do in the hash chain. This will limit the CPU time we spend
-    /// on deflate stream processing but also means that we won't be able to recompress deflate streams
-    /// that were compressed with a larger chain length (eg level 9 has 4096).
-    pub max_chain_length: u32,
-}
-
-impl Default for PreflateContainerConfig {
-    fn default() -> Self {
-        PreflateContainerConfig {
-            min_chunk_size: 1024 * 1024,
-            max_chunk_size: 64 * 1024 * 1024,
-            total_plain_text_limit: 512 * 1024 * 1024,
-            chunk_plain_text_limit: 128 * 1024 * 1024,
-            max_chain_length: 4096,
-            validate_compression: true,
-        }
-    }
-}
-
-impl PreflateContainerConfig {
-    pub fn preflate_config(&self) -> PreflateConfig {
-        PreflateConfig {
-            max_chain_length: self.max_chain_length,
-            plain_text_limit: self.chunk_plain_text_limit,
-            verify_compression: self.validate_compression,
-        }
-    }
-}
-
-const COMPRESSED_WRAPPER_VERSION_1: u8 = 1;
-
-/// literal chunks are just copied to the output
-const LITERAL_CHUNK: u8 = 0;
-
-/// zlib compressed chunks are zlib compressed
-const DEFLATE_STREAM: u8 = 1;
-
-/// PNG chunks are IDAT chunks that are zlib compressed
-const PNG_COMPRESSED: u8 = 2;
-
-/// deflate stream that continues the previous one with the same dictionary, bitstream etc
-const DEFLATE_STREAM_CONTINUE: u8 = 3;
-
-/// JPEG Lepton compressed chunks are JPEG Lepton compressed
-const JPEG_LEPTON_COMPRESSED: u8 = 4;
-
-pub(crate) fn write_varint(destination: &mut impl Write, value: u32) -> std::io::Result<()> {
-    let mut value = value;
-    loop {
-        let mut byte = (value & 0x7F) as u8;
-        value >>= 7;
-        if value != 0 {
-            byte |= 0x80;
-        }
-        destination.write_all(&[byte])?;
-        if value == 0 {
-            break;
-        }
-    }
-
-    Ok(())
-}
-
-pub(crate) fn read_varint(source: &mut impl Read) -> std::io::Result<u32> {
-    let mut result = 0;
-    let mut shift = 0;
-    loop {
-        let mut byte = [0u8; 1];
-        source.read_exact(&mut byte)?;
-        let byte = byte[0];
-        result |= ((byte & 0x7F) as u32) << shift;
-        shift += 7;
-        if byte & 0x80 == 0 {
-            break;
-        }
-    }
-    Ok(result)
-}
-
-#[test]
-fn test_variant_roundtrip() {
-    let values = [
-        0, 1, 127, 128, 255, 256, 16383, 16384, 2097151, 2097152, 268435455, 268435456, 4294967295,
-    ];
-
-    let mut buffer = Vec::new();
-    for &v in values.iter() {
-        write_varint(&mut buffer, v).unwrap();
-    }
-
-    let mut buffer = &buffer[..];
-
-    for &v in values.iter() {
-        assert_eq!(v, read_varint(&mut buffer).unwrap());
-    }
-}
-
-fn write_literal_block(content: &[u8], destination: &mut impl Write) -> Result<()> {
-    destination.write_all(&[LITERAL_CHUNK])?;
-    write_varint(destination, content.len() as u32)?;
-    destination.write_all(content)?;
-    Ok(())
-}
-
-fn write_chunk_block(
-    result: &mut impl Write,
-    chunk: FoundStream,
-    stats: &mut PreflateStats,
-) -> Result<Option<PreflateStreamProcessor>> {
-    match chunk.chunk_type {
-        FoundStreamType::DeflateStream(parameters, state) => {
-            result.write_all(&[DEFLATE_STREAM])?;
-
-            write_varint(result, chunk.corrections.len() as u32)?;
-            write_varint(result, state.plain_text().text().len() as u32)?;
-
-            result.write_all(&chunk.corrections)?;
-            result.write_all(&state.plain_text().text())?;
-
-            stats.overhead_bytes += chunk.corrections.len() as u64;
-            stats.uncompressed_size += state.plain_text().len() as u64;
-            stats.hash_algorithm = parameters.hash_algorithm;
-
-            if !state.is_done() {
-                return Ok(Some(state));
-            }
-        }
-
-        FoundStreamType::IDATDeflate(parameters, mut idat, plain_text) => {
-            log::debug!(
-                "IDATDeflate param {:?} corrections {}",
-                parameters,
-                chunk.corrections.len()
-            );
-
-            if webp_compress(result, plain_text.text(), &chunk.corrections, &idat).is_err() {
-                log::debug!("non-Webp compressed {}", idat.total_chunk_length);
-
-                result.write_all(&[PNG_COMPRESSED])?;
-                write_varint(result, chunk.corrections.len() as u32)?;
-                write_varint(result, plain_text.text().len() as u32)?;
-
-                idat.png_header = None;
-                idat.write_to_bytestream(result)?;
-
-                result.write_all(&chunk.corrections)?;
-                result.write_all(&plain_text.text())?;
-            }
-
-            stats.uncompressed_size += plain_text.len() as u64;
-            stats.hash_algorithm = parameters.hash_algorithm;
-            stats.overhead_bytes += chunk.corrections.len() as u64;
-        }
-        FoundStreamType::JPEGLepton(data) => {
-            result.write_all(&[JPEG_LEPTON_COMPRESSED])?;
-            write_varint(result, data.len() as u32)?;
-
-            result.write_all(&data)?;
-
-            stats.uncompressed_size += data.len() as u64;
-        }
-    }
-    Ok(None)
-}
-
-/// Scans for multiple deflate streams in an arbitrary binary file, decompresses the streams and
-/// returns an uncompressed file that can then be recompressed using a better algorithm.
-/// This can then be passed back into recreate_whole_from_container to recreate the exact original file.
-///
-/// Note that the result is NOT compressed and has to be compressed by some other algorithm
-/// in order to see any savings.
-///
-/// This is a wrapper for PreflateContainerProcessor.
-pub fn preflate_whole_into_container(
-    config: &PreflateContainerConfig,
-    compressed_data: &mut impl BufRead,
-    write: &mut impl Write,
-) -> Result<PreflateStats> {
-    let mut context = PreflateContainerProcessor::new(&config);
-    context.copy_to_end(compressed_data, write).unwrap();
-
-    Ok(context.stats())
-}
-
-/// Takes the binary output of preflate_whole_into_container and recreates the original file.
-///
-/// This is a wrapper for RecreateContainerProcessor.
-pub fn recreate_whole_from_container(
-    source: &mut impl BufRead,
-    destination: &mut impl Write,
-) -> Result<()> {
-    let mut recreate = RecreateContainerProcessor::new(usize::MAX);
-    recreate.copy_to_end(source, destination).context()
-}
-
-#[cfg(test)]
-fn read_chunk_block_slow(
-    source: &mut impl BufRead,
-    destination: &mut impl Write,
-) -> std::result::Result<(), PreflateError> {
-    let mut p = RecreateContainerProcessor::new_single_chunk(usize::MAX);
-    p.copy_to_end_size(source, destination, 1).context()
-}
-
-#[test]
-fn roundtrip_chunk_block_literal() {
-    let mut buffer = Vec::new();
-
-    write_literal_block(b"hello", &mut buffer).unwrap();
-
-    let mut read_cursor = std::io::Cursor::new(buffer);
-    let mut destination = Vec::new();
-    read_chunk_block_slow(&mut read_cursor, &mut destination).unwrap();
-
-    assert!(destination == b"hello");
-}
-
-#[test]
-fn roundtrip_chunk_block_deflate() {
-    let contents = crate::utils::read_file("compressed_zlib_level1.deflate");
-
-    let mut stream_state = PreflateStreamProcessor::new(&PreflateConfig::default());
-    let results = stream_state.decompress(&contents).unwrap();
-
-    let mut buffer = Vec::new();
-
-    let mut stats = PreflateStats::default();
-    write_chunk_block(
-        &mut buffer,
-        FoundStream {
-            chunk_type: FoundStreamType::DeflateStream(results.parameters.unwrap(), stream_state),
-            corrections: results.corrections,
-        },
-        &mut stats,
-    )
-    .unwrap();
-
-    let mut read_cursor = std::io::Cursor::new(buffer);
-    let mut destination = Vec::new();
-    read_chunk_block_slow(&mut read_cursor, &mut destination).unwrap();
-
-    assert!(destination == contents);
-}
-
-#[test]
-fn roundtrip_chunk_block_png() {
-    let f = crate::utils::read_file("treegdi.png");
-
-    // we know the first IDAT chunk starts at 83 (avoid testing the scan_deflate code in a unit teast)
-    let (idat_contents, deflate_stream) = crate::idat_parse::parse_idat(None, &f[83..]).unwrap();
-    let mut stream = PreflateStreamProcessor::new(&PreflateConfig::default());
-    let results = stream.decompress(&deflate_stream).unwrap();
-
-    let total_chunk_length = idat_contents.total_chunk_length;
-
-    let mut buffer = Vec::new();
-
-    let mut stats = PreflateStats::default();
-    write_chunk_block(
-        &mut buffer,
-        FoundStream {
-            chunk_type: FoundStreamType::IDATDeflate(
-                results.parameters.unwrap(),
-                idat_contents,
-                stream.detach_plain_text(),
-            ),
-            corrections: results.corrections,
-        },
-        &mut stats,
-    )
-    .unwrap();
-
-    let mut read_cursor = std::io::Cursor::new(buffer);
-    let mut destination = Vec::new();
-    read_chunk_block_slow(&mut read_cursor, &mut destination).unwrap();
-
-    assert!(destination == &f[83..83 + total_chunk_length]);
-}
-
-#[cfg(test)]
-fn roundtrip_deflate_chunks(filename: &str) {
-    use crate::utils::assert_eq_array;
-
-    let f = crate::utils::read_file(filename);
-
-    println!("Processing file: {}", filename);
-
-    let mut expanded = Vec::new();
-    preflate_whole_into_container(
-        &PreflateContainerConfig::default(),
-        &mut std::io::Cursor::new(&f),
-        &mut expanded,
-    )
-    .unwrap();
-
-    println!("Recreating file: {}", filename);
-
-    let mut read_cursor = std::io::Cursor::new(expanded);
-
-    let mut destination = Vec::new();
-    recreate_whole_from_container(&mut read_cursor, &mut destination).unwrap();
-
-    assert_eq_array(&destination, &f);
-}
-
-#[test]
-fn roundtrip_skip_length_crash() {
-    roundtrip_deflate_chunks("skiplengthcrash.bin");
-}
-
-#[test]
-fn roundtrip_png_chunks() {
-    roundtrip_deflate_chunks("treegdi.png");
-}
-
-#[test]
-fn roundtrip_zip_chunks() {
-    roundtrip_deflate_chunks("samplezip.zip");
-}
-
-#[test]
-fn roundtrip_gz_chunks() {
-    roundtrip_deflate_chunks("sample1.bin.gz");
-}
-
-#[test]
-fn roundtrip_png_chunks2() {
-    roundtrip_deflate_chunks("starcontrol.samplesave");
-}
-
-#[test]
-fn verify_zip_compress() {
-    use crate::utils::read_file;
-    let v = read_file("samplezip.zip");
-
-    let mut expanded = Vec::new();
-    preflate_whole_into_container(
-        &PreflateContainerConfig::default(),
-        &mut std::io::Cursor::new(&v),
-        &mut expanded,
-    )
-    .unwrap();
-
-    let mut recompressed = Vec::new();
-    recreate_whole_from_container(&mut std::io::Cursor::new(expanded), &mut recompressed).unwrap();
-
-    assert!(v == recompressed);
-}
-
-/// Statistics about the preflate process
-#[derive(Debug, Copy, Clone, Default)]
-pub struct PreflateStats {
-    pub deflate_compressed_size: u64,
-    pub zstd_compressed_size: u64,
-    pub uncompressed_size: u64,
-    pub overhead_bytes: u64,
-    pub hash_algorithm: HashAlgorithm,
-    pub zstd_baseline_size: u64,
-}
-
-/// Processes an input buffer and writes the output to a writer
-pub trait ProcessBuffer {
-    fn process_buffer(
-        &mut self,
-        input: &[u8],
-        input_complete: bool,
-        writer: &mut impl Write,
-    ) -> Result<()>;
-
-    #[cfg(test)]
-    fn process_vec(&mut self, input: &[u8]) -> Result<Vec<u8>> {
-        let mut writer = Vec::new();
-
-        self.copy_to_end(&mut std::io::Cursor::new(&input), &mut writer)
-            .context()?;
-
-        Ok(writer)
-    }
-
-    #[cfg(test)]
-    fn process_vec_size(&mut self, input: &[u8], read_chunk_size: usize) -> Result<Vec<u8>> {
-        let mut writer = Vec::new();
-
-        self.copy_to_end_size(
-            &mut std::io::Cursor::new(&input),
-            &mut writer,
-            read_chunk_size,
-        )
-        .context()?;
-
-        Ok(writer)
-    }
-
-    /// Reads everything from input and writes it to the output.
-    /// Wraps calls to process buffer
-    fn copy_to_end(&mut self, input: &mut impl BufRead, output: &mut impl Write) -> Result<()> {
-        self.copy_to_end_size(input, output, 1024 * 1024)
-    }
-
-    /// Reads everything from input and writes it to the output.
-    /// Wraps calls to process buffer
-    fn copy_to_end_size(
-        &mut self,
-        input: &mut impl BufRead,
-        output: &mut impl Write,
-        read_chunk_size: usize,
-    ) -> Result<()> {
-        let mut input_complete = false;
-        loop {
-            let buffer: &[u8];
-            if input_complete {
-                buffer = &[];
-            } else {
-                buffer = input.fill_buf().context()?;
-                if buffer.len() == 0 {
-                    input_complete = true
-                }
-            };
-
-            if input_complete {
-                self.process_buffer(&[], true, output).context()?;
-                break;
-            } else {
-                // process buffer a piece at a time to avoid overflowing memory
-                let mut amount_read = 0;
-                while amount_read < buffer.len() {
-                    let chunk_size = (buffer.len() - amount_read).min(read_chunk_size);
-
-                    self.process_buffer(
-                        &buffer[amount_read..amount_read + chunk_size],
-                        false,
-                        output,
-                    )
-                    .context()?;
-
-                    amount_read += chunk_size;
-                }
-
-                let buflen = buffer.len();
-                input.consume(buflen);
-            }
-        }
-
-        Ok(())
-    }
-
-    fn stats(&self) -> PreflateStats {
-        PreflateStats::default()
-    }
-}
-
-#[derive(Debug)]
-enum ChunkParseState {
-    Start,
-    /// we are looking for a deflate stream or PNG chunk. The data of the PNG file
-    /// is stored later than the IHDR chunk that will tell us the dimensions of the image,
-    /// so we need to keep track of the IHDR chunk so we can use it later to properly
-    /// compress the PNG data.
-    Searching(Option<PngHeader>),
-    DeflateContinue(PreflateStreamProcessor),
-}
-
-/// Takes a sequence of bytes that may contain deflate streams, find
-/// the streams, and emits a new stream that containus the decompressed
-/// streams along with the corrections needed to recreate the original.
-///
-/// This output can then be compressed with a better algorithm, like Zstandard
-/// and achieve much better compression than if we tried to compress the
-/// deflate stream directlyh.
-pub struct PreflateContainerProcessor {
-    content: Vec<u8>,
-    compression_stats: PreflateStats,
-    input_complete: bool,
-    total_plain_text_seen: u64,
-
-    /// used to track the last attempted chunk size, in case we
-    /// need more input to continue, we will collect at least min_chunk_size
-    /// more input before trying to process again until we reach max_chunk_size
-    last_attempt_chunk_size: usize,
-
-    state: ChunkParseState,
-    config: PreflateContainerConfig,
-}
-
-impl PreflateContainerProcessor {
-    pub fn new(config: &PreflateContainerConfig) -> Self {
-        PreflateContainerProcessor {
-            content: Vec::new(),
-            compression_stats: PreflateStats::default(),
-            input_complete: false,
-            state: ChunkParseState::Start,
-            total_plain_text_seen: 0,
-            last_attempt_chunk_size: 0,
-            config: config.clone(),
-        }
-    }
-}
-
-impl ProcessBuffer for PreflateContainerProcessor {
-    fn process_buffer(
-        &mut self,
-        input: &[u8],
-        input_complete: bool,
-        writer: &mut impl Write,
-    ) -> Result<()> {
-        if self.input_complete && (input.len() > 0 || !input_complete) {
-            return Err(PreflateError::new(
-                ExitCode::InvalidParameter,
-                "more data provided after input_complete signaled",
-            ));
-        }
-
-        if input.len() > 0 {
-            self.compression_stats.deflate_compressed_size += input.len() as u64;
-            self.content.extend_from_slice(input);
-        }
-
-        loop {
-            // wait until we have at least min_chunk_size before we start processing
-            if self.content.is_empty()
-                || (!input_complete
-                    && (self.content.len() - self.last_attempt_chunk_size)
-                        < self.config.min_chunk_size
-                    && self.content.len() <= self.config.max_chunk_size)
-            {
-                break;
-            }
-
-            self.last_attempt_chunk_size = self.content.len();
-
-            match &mut self.state {
-                ChunkParseState::Start => {
-                    writer.write_all(&[COMPRESSED_WRAPPER_VERSION_1])?;
-                    self.state = ChunkParseState::Searching(None);
-                }
-                ChunkParseState::Searching(prev_ihdr) => {
-                    if self.total_plain_text_seen > self.config.total_plain_text_limit {
-                        // once we've exceeded our limit, we don't do any more compression
-                        // this is to ensure we don't suck the CPU time for too long on
-                        // a single file
-                        write_literal_block(&self.content, writer)?;
-
-                        self.last_attempt_chunk_size = 0;
-                        self.content.clear();
-                        break;
-                    }
-
-                    // here we are looking for a deflate stream or PNG chunk
-                    match find_compressable_stream(
-                        &self.content,
-                        prev_ihdr,
-                        input_complete,
-                        &self.config,
-                    ) {
-                        FindStreamResult::Found(next, chunk) => {
-                            // the gap between the start and the beginning of the deflate stream
-                            // is written out as a literal block
-                            if next.start != 0 {
-                                write_literal_block(&self.content[..next.start], writer)?;
-                            }
-
-                            if let Some(mut state) =
-                                write_chunk_block(writer, chunk, &mut self.compression_stats)
-                                    .context()?
-                            {
-                                self.total_plain_text_seen += state.plain_text().len() as u64;
-                                state.shrink_to_dictionary();
-
-                                self.state = ChunkParseState::DeflateContinue(state);
-                            }
-
-                            self.content.drain(0..next.end);
-                            self.last_attempt_chunk_size = self.content.len();
-                        }
-                        FindStreamResult::ShortRead => {
-                            if input_complete || self.content.len() > self.config.max_chunk_size {
-                                // if we have too much data or have no more data,
-                                // we just write it out as a literal block with everything we have
-                                write_literal_block(&self.content, writer)?;
-
-                                self.content.clear();
-                                self.last_attempt_chunk_size = 0;
-                            } else {
-                                // we don't have enough data to process the stream, so we just
-                                // wait for more data
-                                break;
-                            }
-                        }
-                        FindStreamResult::None => {
-                            // couldn't find anything, just write the rest as a literal block
-                            write_literal_block(&self.content, writer)?;
-
-                            self.content.clear();
-                            self.last_attempt_chunk_size = 0;
-                        }
-                    }
-                }
-                ChunkParseState::DeflateContinue(state) => {
-                    // here we have a deflate stream that we need to continue
-                    // right now we error out if the continuation cannot be processed
-                    match state.decompress(&self.content) {
-                        Err(_e) => {
-                            // indicate that we got an error while trying to continue
-                            // the compression of a previous chunk, this happens
-                            // when the stream significantly diverged from the behavior we estimated
-                            // in the first chunk that we saw
-                            self.state = ChunkParseState::Searching(None);
-
-                            log::debug!("Error while trying to continue compression {:?}", _e);
-                        }
-                        Ok(res) => {
-                            log::debug!(
-                                "Deflate continue: {} -> {}",
-                                state.plain_text().len(),
-                                res.compressed_size
-                            );
-
-                            writer.write_all(&[DEFLATE_STREAM_CONTINUE])?;
-
-                            write_varint(writer, res.corrections.len() as u32)?;
-                            write_varint(writer, state.plain_text().len() as u32)?;
-
-                            writer.write_all(&res.corrections)?;
-                            writer.write_all(&state.plain_text().text())?;
-
-                            self.total_plain_text_seen += state.plain_text().len() as u64;
-                            self.compression_stats.overhead_bytes += res.corrections.len() as u64;
-                            self.compression_stats.uncompressed_size +=
-                                state.plain_text().len() as u64;
-
-                            self.content.drain(0..res.compressed_size);
-                            self.last_attempt_chunk_size = self.content.len();
-
-                            if state.is_done() {
-                                self.state = ChunkParseState::Searching(None);
-                            } else {
-                                state.shrink_to_dictionary();
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        if input_complete {
-            self.input_complete = true;
-
-            if self.content.len() > 0 {
-                write_literal_block(&self.content, writer)?;
-            }
-            self.content.clear();
-        }
-
-        Ok(())
-    }
-
-    fn stats(&self) -> PreflateStats {
-        self.compression_stats
-    }
-}
-
-#[cfg(test)]
-pub struct NopProcessBuffer {}
-
-#[cfg(test)]
-impl ProcessBuffer for NopProcessBuffer {
-    fn process_buffer(
-        &mut self,
-        input: &[u8],
-        _input_complete: bool,
-        writer: &mut impl Write,
-    ) -> Result<()> {
-        writer.write_all(input).context()?;
-
-        Ok(())
-    }
-}
-
-enum DecompressionState {
-    Start,
-    StartSegment,
-    LiteralBlock(usize),
-    DeflateBlock(usize, usize),
-    PNGBlock {
-        correction_length: usize,
-        uncompressed_length: usize,
-        idat: IdatContents,
-        filters: Vec<u8>,
-    },
-    JPEGBlock {
-        lepton_length: usize,
-    },
-}
-
-/// recreates the orignal content from the chunked data
-pub struct RecreateContainerProcessor {
-    capacity: usize,
-    input: VecDeque<u8>,
-    input_complete: bool,
-    state: DecompressionState,
-
-    /// state of the predictor and plain text if we need to contiune a deflate stream
-    /// if it was too big to complete in a single chunk
-    deflate_continue_state: Option<RecreateStreamProcessor>,
-}
-
-impl RecreateContainerProcessor {
-    pub fn new(capacity: usize) -> Self {
-        RecreateContainerProcessor {
-            input: VecDeque::new(),
-            capacity,
-            input_complete: false,
-            state: DecompressionState::Start,
-            deflate_continue_state: None,
-        }
-    }
-
-    /// for testing reading a single chunk (skip header)
-    pub fn new_single_chunk(capacity: usize) -> Self {
-        RecreateContainerProcessor {
-            input: VecDeque::new(),
-            capacity,
-            input_complete: false,
-            state: DecompressionState::StartSegment,
-            deflate_continue_state: None,
-        }
-    }
-}
-
-impl ProcessBuffer for RecreateContainerProcessor {
-    fn process_buffer(
-        &mut self,
-        input: &[u8],
-        input_complete: bool,
-        writer: &mut impl Write,
-    ) -> Result<()> {
-        if self.input_complete && (input.len() > 0 || !input_complete) {
-            return Err(PreflateError::new(
-                ExitCode::InvalidParameter,
-                "more data provided after input_complete signaled",
-            ));
-        }
-
-        // we could have been passed a big buffer, so we need to process it in chunks
-        let mut amount_read = 0;
-        loop {
-            let amount_to_read = (input.len() - amount_read).min(self.capacity);
-
-            // when we get to the end and we've read everything, we can signal that we are done
-            if amount_read + amount_to_read == input.len() && input_complete {
-                self.input_complete = true;
-            }
-
-            self.input
-                .extend(&input[amount_read..amount_read + amount_to_read]);
-
-            amount_read += amount_to_read;
-
-            self.process_buffer_internal(writer)?;
-
-            if amount_read == input.len() {
-                break;
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl RecreateContainerProcessor {
-    fn process_buffer_internal(&mut self, writer: &mut impl Write) -> Result<()> {
-        loop {
-            match &mut self.state {
-                DecompressionState::Start => {
-                    if !self.input_complete && self.input.len() == 0 {
-                        break;
-                    }
-
-                    let version = self.input.read_u8()?;
-
-                    if version != COMPRESSED_WRAPPER_VERSION_1 {
-                        return err_exit_code(
-                            ExitCode::InvalidCompressedWrapper,
-                            format!("Invalid version {version}"),
-                        );
-                    }
-
-                    self.state = DecompressionState::StartSegment;
-                }
-                DecompressionState::StartSegment => {
-                    // here's a good place to stop if we run out of input
-                    if self.input.len() == 0 {
-                        break;
-                    }
-
-                    // use scoped read so that if we run out of bytes we can undo the read and wait for more input
-                    self.state = match self.input.scoped_read(|r| match r.read_u8()? {
-                        LITERAL_CHUNK => {
-                            let length = read_varint(r)? as usize;
-
-                            Ok(DecompressionState::LiteralBlock(length))
-                        }
-                        DEFLATE_STREAM => {
-                            let correction_length = read_varint(r)? as usize;
-                            let uncompressed_length = read_varint(r)? as usize;
-
-                            // clear the deflate state if we are starting a new block
-                            self.deflate_continue_state = None;
-
-                            Ok(DecompressionState::DeflateBlock(
-                                correction_length,
-                                uncompressed_length,
-                            ))
-                        }
-                        DEFLATE_STREAM_CONTINUE => {
-                            let correction_length = read_varint(r)? as usize;
-                            let uncompressed_length = read_varint(r)? as usize;
-
-                            if self.deflate_continue_state.is_none() {
-                                return err_exit_code(
-                                    ExitCode::InvalidCompressedWrapper,
-                                    "no deflate state to continue",
-                                );
-                            }
-
-                            Ok(DecompressionState::DeflateBlock(
-                                correction_length,
-                                uncompressed_length,
-                            ))
-                        }
-                        PNG_COMPRESSED => {
-                            let correction_length = read_varint(r)? as usize;
-                            let uncompressed_length = read_varint(r)? as usize;
-                            let idat = IdatContents::read_from_bytestream(r)?;
-
-                            let mut filters = Vec::new();
-                            if let Some(png_header) = &idat.png_header {
-                                filters.resize(png_header.height as usize, 0);
-                                r.read_exact(&mut filters[..])?;
-                            }
-
-                            Ok(DecompressionState::PNGBlock {
-                                correction_length,
-                                uncompressed_length,
-                                idat,
-                                filters,
-                            })
-                        }
-                        JPEG_LEPTON_COMPRESSED => {
-                            let lepton_length = read_varint(r)? as usize;
-
-                            Ok(DecompressionState::JPEGBlock { lepton_length })
-                        }
-
-                        _ => Err(PreflateError::new(
-                            ExitCode::InvalidCompressedWrapper,
-                            "Invalid chunk",
-                        )),
-                    }) {
-                        Ok(s) => s,
-                        Err(e) => {
-                            if !self.input_complete && e.exit_code() == ExitCode::ShortRead {
-                                // wait for more input if we ran out of bytes here
-                                break;
-                            } else {
-                                return Err(e);
-                            }
-                        }
-                    }
-                }
-
-                DecompressionState::LiteralBlock(length) => {
-                    let source_size = self.input.len();
-                    if source_size < *length {
-                        if self.input_complete {
-                            return Err(PreflateError::new(
-                                ExitCode::InvalidCompressedWrapper,
-                                "unexpected end of input",
-                            ));
-                        }
-
-                        std::io::copy(&mut self.input, writer).context()?;
-                        *length -= source_size;
-                        break;
-                    }
-
-                    std::io::copy(&mut (&mut self.input).take(*length as u64), writer).context()?;
-                    self.state = DecompressionState::StartSegment;
-                }
-
-                DecompressionState::DeflateBlock(correction_length, uncompressed_length) => {
-                    let source_size = self.input.len();
-                    let total_length = *correction_length + *uncompressed_length;
-
-                    if source_size < total_length {
-                        if self.input_complete {
-                            return Err(PreflateError::new(
-                                ExitCode::InvalidCompressedWrapper,
-                                "unexpected end of input",
-                            ));
-                        }
-                        break;
-                    }
-
-                    let corrections: Vec<u8> = self.input.drain(0..*correction_length).collect();
-
-                    if let Some(reconstruct) = &mut self.deflate_continue_state {
-                        let (comp, _) = reconstruct
-                            .recompress(
-                                &mut TakeReader::new(&mut self.input, *uncompressed_length),
-                                &corrections,
-                            )
-                            .context()?;
-
-                        writer.write_all(&comp)?;
-                    } else {
-                        let mut reconstruct = RecreateStreamProcessor::new();
-                        let (comp, _) = reconstruct
-                            .recompress(
-                                &mut TakeReader::new(&mut self.input, *uncompressed_length),
-                                &corrections,
-                            )
-                            .context()?;
-
-                        writer.write_all(&comp)?;
-
-                        self.deflate_continue_state = Some(reconstruct);
-                    }
-
-                    self.state = DecompressionState::StartSegment;
-                }
-
-                DecompressionState::PNGBlock {
-                    correction_length,
-                    uncompressed_length,
-                    idat,
-                    filters,
-                } => {
-                    let source_size = self.input.len();
-
-                    let total_length = *correction_length + *uncompressed_length;
-                    if source_size < total_length {
-                        // wait till we have the full block
-                        if self.input_complete {
-                            return Err(PreflateError::new(
-                                ExitCode::InvalidCompressedWrapper,
-                                "unexpected end of input",
-                            ));
-                        }
-                        break;
-                    }
-
-                    let corrections: Vec<u8> = self.input.drain(0..*correction_length).collect();
-
-                    let plain_text;
-
-                    if let Some(header) = &idat.png_header {
-                        let webp: Vec<u8> = self.input.drain(0..*uncompressed_length).collect();
-
-                        plain_text = webp_decompress(filters, webp, header).context()?;
-                    } else {
-                        plain_text = self.input.drain(0..*uncompressed_length).collect();
-                    }
-
-                    let recompressed =
-                        recreate_whole_deflate_stream(&plain_text, &corrections).context()?;
-
-                    recreate_idat(&idat, &recompressed[..], writer).context()?;
-
-                    self.state = DecompressionState::StartSegment;
-                }
-                DecompressionState::JPEGBlock { lepton_length } => {
-                    let source_size = self.input.len();
-                    if source_size < *lepton_length {
-                        if self.input_complete {
-                            return Err(PreflateError::new(
-                                ExitCode::InvalidCompressedWrapper,
-                                "unexpected end of input",
-                            ));
-                        }
-                        break;
-                    }
-
-                    let lepton_data: Vec<u8> = self.input.drain(0..*lepton_length).collect();
-
-                    match lepton_jpeg::decode_lepton(
-                        &mut Cursor::new(&lepton_data),
-                        writer,
-                        &EnabledFeatures::compat_lepton_vector_read(),
-                        &DEFAULT_THREAD_POOL,
-                    ) {
-                        Err(e) => {
-                            return Err(PreflateError::new(
-                                ExitCode::InvalidCompressedWrapper,
-                                format!("JPEG Lepton decode failed: {}", e),
-                            ));
-                        }
-                        Ok(_) => {}
-                    }
-
-                    self.state = DecompressionState::StartSegment;
-                }
-            }
-        }
-
-        Ok(())
-    }
-}
-
-fn webp_compress(
-    result: &mut impl Write,
-    plain_text: &[u8],
-    corrections: &[u8],
-    idat: &IdatContents,
-) -> Result<()> {
-    log::debug!("{:?}", idat);
-
-    #[cfg(feature = "webp")]
-    if let Some(png_header) = idat.png_header {
-        use crate::idat_parse::{PngColorType, undo_png_filters};
-        use std::ops::Deref;
-
-        let bbp = png_header.color_type.bytes_per_pixel();
-        let w = png_header.width as usize;
-        let h = png_header.height as usize;
-
-        log::debug!(
-            "plain text compressing {} bytes ({}x{}x{})",
-            plain_text.len(),
-            w,
-            h,
-            bbp
-        );
-
-        // see if the bitmap looks like the way with think it should (bits per pixel map + 1 height worth of filter bytes)
-        if (bbp * w * h) + h == plain_text.len() {
-            let (bitmap, filters) = undo_png_filters(plain_text, w, h, bbp);
-
-            let enc = webp::Encoder::new(
-                &bitmap,
-                match png_header.color_type {
-                    PngColorType::RGB => webp::PixelLayout::Rgb,
-                    PngColorType::RGBA => webp::PixelLayout::Rgba,
-                },
-                png_header.width,
-                png_header.height,
-            );
-
-            let mut webpconfig = webp::WebPConfig::new().unwrap();
-            webpconfig.lossless = 1;
-            webpconfig.alpha_compression = 0;
-            webpconfig.exact = 1; // undocumented option, but required to not throw away color if alpha channel is zero
-
-            // this is the default quality setting for webp lossless, we could dial it up
-            // but the quality gains are marginal for the CPU cost, although the
-            // CPU decompression cost is the same.
-            webpconfig.quality = 75.0; // 0..100 higher is slower but better compression
-            webpconfig.method = 4; // 0..6 higher is slower but better compression
-
-            let comp = match enc.encode_advanced(&webpconfig) {
-                Ok(c) => c,
-                Err(e) => {
-                    return err_exit_code(
-                        ExitCode::WebPDecodeError,
-                        format!("Webp encode failed: {:?}", e),
-                    );
-                }
-            };
-
-            result.write_all(&[PNG_COMPRESSED])?;
-
-            write_varint(result, corrections.len() as u32)?;
-            write_varint(result, comp.deref().len() as u32)?;
-
-            log::debug!(
-                "Webp compressed {} bytes (vs {})",
-                comp.deref().len(),
-                idat.total_chunk_length
-            );
-
-            idat.write_to_bytestream(result)?;
-            result.write_all(&filters)?;
-
-            result.write_all(&corrections)?;
-            result.write_all(comp.deref())?;
-
-            return Ok(());
-        }
-    }
-
-    return err_exit_code(
-        ExitCode::InvalidCompressedWrapper,
-        "Webp compression not supported",
-    );
-}
-
-fn webp_decompress(
-    filters: &[u8],
-    webp: Vec<u8>,
-    header: &crate::idat_parse::PngHeader,
-) -> Result<Vec<u8>> {
-    #[cfg(feature = "webp")]
-    match webp::Decoder::new(webp.as_slice()).decode() {
-        Some(result) => {
-            use crate::idat_parse::apply_png_filters_with_types;
-            use std::ops::Deref;
-
-            let m = result.deref();
-
-            return Ok(apply_png_filters_with_types(
-                m,
-                header.width as usize,
-                header.height as usize,
-                if result.is_alpha() { 4 } else { 3 },
-                header.color_type.bytes_per_pixel(),
-                &filters,
-            ));
-        }
-        _ => {}
-    }
-    return err_exit_code(ExitCode::InvalidCompressedWrapper, "Webp decode failed");
-}
-
-#[test]
-fn test_baseline_calc() {
-    use crate::utils::read_file;
-    use crate::zstd_compression::ZstdCompressContext;
-
-    let v = read_file("samplezip.zip");
-
-    let mut context = ZstdCompressContext::new(
-        PreflateContainerProcessor::new(&PreflateContainerConfig::default()),
-        9,
-        true,
-    );
-
-    let _r = context.process_vec(&v).unwrap();
-
-    let stats = context.stats();
-
-    println!("stats: {:?}", stats);
-
-    // these change if the compression algorithm is altered, update them
-    assert_eq!(stats.overhead_bytes, 463);
-    assert_eq!(stats.zstd_compressed_size, 12444);
-    assert_eq!(stats.uncompressed_size, 54871);
-    assert_eq!(stats.zstd_baseline_size, 13664);
-}
-
-#[test]
-fn roundtrip_small_chunk() {
-    use crate::utils::{assert_eq_array, read_file};
-
-    let original = read_file("pptxplaintext.zip");
-
-    let mut context = PreflateContainerProcessor::new(&PreflateContainerConfig {
-        min_chunk_size: 100000,
-        max_chunk_size: 100000,
-        total_plain_text_limit: u64::MAX,
-        ..Default::default()
-    });
-
-    let compressed = context.process_vec_size(&original, 20001).unwrap();
-
-    let mut context = RecreateContainerProcessor::new(usize::MAX);
-    let recreated = context.process_vec_size(&compressed, 20001).unwrap();
-
-    assert_eq_array(&original, &recreated);
-}
-
-#[test]
-fn roundtrip_small_plain_text() {
-    use crate::utils::{assert_eq_array, read_file};
-
-    let original = read_file("pptxplaintext.zip");
-
-    let mut context = PreflateContainerProcessor::new(&PreflateContainerConfig {
-        min_chunk_size: 100000,
-        max_chunk_size: 100000,
-        total_plain_text_limit: u64::MAX,
-        ..Default::default()
-    });
-
-    let compressed = context.process_vec_size(&original, 2001).unwrap();
-
-    let mut context = RecreateContainerProcessor::new(usize::MAX);
-    let recreated = context.process_vec_size(&compressed, 2001).unwrap();
-
-    assert_eq_array(&original, &recreated);
-}
-
-#[test]
-fn roundtrip_png_e2e() {
-    crate::init_logging();
-
-    use crate::utils::{assert_eq_array, read_file};
-
-    let original = read_file("figma.png");
-
-    println!("Compressing file");
-
-    let mut context = PreflateContainerProcessor::new(&PreflateContainerConfig {
-        min_chunk_size: 100000,
-        max_chunk_size: original.len(),
-        ..Default::default()
-    });
-
-    let compressed = context.process_vec_size(&original, 100100).unwrap();
-
-    println!("Recreating file");
-
-    let mut context = RecreateContainerProcessor::new(usize::MAX);
-    let recreated = context.process_vec_size(&compressed, 100100).unwrap();
-
-    assert_eq_array(&original, &recreated);
-}
-
-#[test]
-fn roundtrip_jpg() {
-    crate::init_logging();
-
-    use crate::utils::{assert_eq_array, read_file};
-
-    let original = read_file("embedded-images.pdf");
-
-    println!("Compressing file");
-
-    let mut context = PreflateContainerProcessor::new(&PreflateContainerConfig {
-        min_chunk_size: 1000000,
-        max_chunk_size: original.len(),
-        ..Default::default()
-    });
-
-    let compressed = context.process_vec_size(&original, usize::MAX).unwrap();
-
-    println!(
-        "Compressed size: {} vs {}",
-        compressed.len(),
-        original.len()
-    );
-
-    println!("Recreating file");
-
-    let mut context = RecreateContainerProcessor::new(usize::MAX);
-    let recreated = context.process_vec_size(&compressed, usize::MAX).unwrap();
-
-    assert_eq_array(&original, &recreated);
-}
diff --git a/container/src/container_read.rs b/container/src/container_read.rs
new file mode 100644
index 0000000..795382a
--- /dev/null
+++ b/container/src/container_read.rs
@@ -0,0 +1,535 @@
+use byteorder::ReadBytesExt;
+use crc32fast::Hasher as CrcHasher;
+use lepton_jpeg::{DEFAULT_THREAD_POOL, EnabledFeatures};
+
+use std::{
+    collections::VecDeque,
+    io::{Cursor, Read, Write},
+};
+
+use crate::{
+    container_common::{
+        BLOCK_COMPRESSION_MASK, BLOCK_COMPRESSION_NONE, BLOCK_COMPRESSION_ZSTD, BLOCK_TYPE_DEFLATE,
+        BLOCK_TYPE_DEFLATE_CONTINUE, BLOCK_TYPE_JPEG_LEPTON, BLOCK_TYPE_LITERAL, BLOCK_TYPE_MASK,
+        BLOCK_TYPE_PNG, BLOCK_TYPE_WEBP, COMPRESSED_WRAPPER_VERSION_2, ProcessBuffer, read_varint,
+    },
+    idat_parse::{IdatContents, recreate_idat},
+    scoped_read::ScopedRead,
+};
+
+use preflate_rs::{
+    AddContext, ExitCode, PreflateError, RecreateStreamProcessor, Result, err_exit_code,
+    recreate_whole_deflate_stream,
+};
+
+/// Write wrapper that computes a running CRC-32 of every byte written.
+struct CrcWriter<'a, W: Write> {
+    inner: &'a mut W,
+    hasher: &'a mut CrcHasher,
+}
+
+impl<W: Write> Write for CrcWriter<'_, W> {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        let n = self.inner.write(buf)?;
+        self.hasher.update(&buf[..n]);
+        Ok(n)
+    }
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.inner.flush()
+    }
+}
+
+enum DecompressionState {
+    Start,
+    StartSegment,
+    /// accumulate compressed_size bytes then decode and process the block immediately.
+    AccumulateBlock {
+        block_type: u8,
+        compressed_size: usize,
+    },
+    /// accumulate lepton bytes then decode the JPEG block immediately.
+    JpegAccumulate {
+        lepton_length: usize,
+    },
+    /// accumulate raw WebP-compressed PNG bytes then process the block immediately.
+    WebpAccumulate {
+        total_len: usize,
+    },
+    /// 0xFF end block was parsed; CRC check deferred to process_buffer.
+    CrcCheck {
+        expected: u32,
+    },
+}
+
+/// recreates the orignal content from the chunked data
+pub struct RecreateContainerProcessor {
+    capacity: usize,
+    input: VecDeque<u8>,
+    input_complete: bool,
+    state: DecompressionState,
+
+    /// state of the predictor and plain text if we need to contiune a deflate stream
+    /// if it was too big to complete in a single chunk
+    deflate_continue_state: Option<RecreateStreamProcessor>,
+
+    /// persistent Zstd decoder — maintains the streaming context across blocks
+    zstd_decoder: zstd::stream::raw::Decoder<'static>,
+
+    /// running CRC-32 of all output bytes, verified against the 0xFF end block
+    output_crc: CrcHasher,
+}
+
+impl RecreateContainerProcessor {
+    pub fn new(capacity: usize) -> Self {
+        RecreateContainerProcessor {
+            input: VecDeque::new(),
+            capacity,
+            input_complete: false,
+            state: DecompressionState::Start,
+            deflate_continue_state: None,
+            zstd_decoder: zstd::stream::raw::Decoder::new().expect("failed to create zstd decoder"),
+            output_crc: CrcHasher::new(),
+        }
+    }
+}
+
+impl ProcessBuffer for RecreateContainerProcessor {
+    fn process_buffer(
+        &mut self,
+        input: &[u8],
+        input_complete: bool,
+        writer: &mut impl Write,
+    ) -> Result<()> {
+        if self.input_complete && (input.len() > 0 || !input_complete) {
+            return Err(PreflateError::new(
+                ExitCode::InvalidParameter,
+                "more data provided after input_complete signaled",
+            ));
+        }
+
+        // we could have been passed a big buffer, so we need to process it in chunks
+        let mut amount_read = 0;
+        loop {
+            let amount_to_read = (input.len() - amount_read).min(self.capacity);
+
+            // when we get to the end and we've read everything, we can signal that we are done
+            if amount_read + amount_to_read == input.len() && input_complete {
+                self.input_complete = true;
+            }
+
+            self.input
+                .extend(&input[amount_read..amount_read + amount_to_read]);
+
+            amount_read += amount_to_read;
+
+            self.with_crc_writer(writer, |this, crc_writer| {
+                this.process_buffer_internal(crc_writer)
+            })?;
+
+            // If process_buffer_internal parsed the 0xFF end block, verify the CRC now
+            // that output_crc has been restored with all written bytes.
+            if let DecompressionState::CrcCheck { expected } = self.state {
+                let actual = self.output_crc.clone().finalize();
+                if actual != expected {
+                    return err_exit_code(
+                        ExitCode::InvalidCompressedWrapper,
+                        format!("CRC-32 mismatch: expected {expected:#010x}, got {actual:#010x}"),
+                    );
+                }
+                self.state = DecompressionState::StartSegment;
+            }
+
+            if amount_read == input.len() {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl RecreateContainerProcessor {
+    /// Runs `f` with a `CrcWriter` wrapping `writer`, then restores `self.output_crc`.
+    ///
+    /// `self.output_crc` must be borrowed mutably through the `CrcWriter`, but
+    /// `f` also needs `&mut self` to drive the state machine — a direct borrow
+    /// conflict.  This helper resolves it by temporarily swapping `output_crc`
+    /// out of `self` with `mem::replace`, so the field is no longer part of the
+    /// active `&mut self` borrow while `f` runs.
+    fn with_crc_writer<W: Write, F>(&mut self, writer: &mut W, f: F) -> Result<()>
+    where
+        F: FnOnce(&mut Self, &mut CrcWriter<'_, W>) -> Result<()>,
+    {
+        let mut hasher = std::mem::replace(&mut self.output_crc, CrcHasher::new());
+        let result = {
+            let mut crc_writer = CrcWriter {
+                inner: writer,
+                hasher: &mut hasher,
+            };
+            f(self, &mut crc_writer)
+        };
+        self.output_crc = hasher;
+        result
+    }
+
+    fn process_buffer_internal(&mut self, writer: &mut impl Write) -> Result<()> {
+        loop {
+            match &mut self.state {
+                DecompressionState::Start => {
+                    if !self.input_complete && self.input.len() == 0 {
+                        break;
+                    }
+
+                    let version = self.input.read_u8()?;
+
+                    match version {
+                        COMPRESSED_WRAPPER_VERSION_2 => {
+                            self.state = DecompressionState::StartSegment;
+                        }
+                        _ => {
+                            return err_exit_code(
+                                ExitCode::InvalidCompressedWrapper,
+                                format!("Invalid version {version}"),
+                            );
+                        }
+                    }
+                }
+                DecompressionState::StartSegment => {
+                    // here's a good place to stop if we run out of input
+                    if self.input.len() == 0 {
+                        break;
+                    }
+
+                    // read type byte, then dispatch
+                    self.state = match self.input.scoped_read(|r| {
+                        let type_byte = r.read_u8()?;
+
+                        // 0xFF is the CRC end block: 4 raw bytes, no varint.
+                        if type_byte == 0xFF {
+                            let mut buf = [0u8; 4];
+                            r.read_exact(&mut buf)?;
+                            return Ok(DecompressionState::CrcCheck {
+                                expected: u32::from_le_bytes(buf),
+                            });
+                        }
+
+                        let compression = type_byte & BLOCK_COMPRESSION_MASK;
+                        let block_type = type_byte & BLOCK_TYPE_MASK;
+                        match compression {
+                            BLOCK_COMPRESSION_NONE => match block_type {
+                                BLOCK_TYPE_JPEG_LEPTON => {
+                                    let lepton_length = read_varint(r)? as usize;
+                                    Ok(DecompressionState::JpegAccumulate { lepton_length })
+                                }
+                                BLOCK_TYPE_WEBP => {
+                                    let total_len = read_varint(r)? as usize;
+                                    Ok(DecompressionState::WebpAccumulate { total_len })
+                                }
+                                _ => err_exit_code(
+                                    ExitCode::InvalidCompressedWrapper,
+                                    "unknown raw block type",
+                                ),
+                            },
+                            BLOCK_COMPRESSION_ZSTD => {
+                                let compressed_size = read_varint(r)? as usize;
+                                Ok(DecompressionState::AccumulateBlock {
+                                    block_type,
+                                    compressed_size,
+                                })
+                            }
+                            _ => err_exit_code(
+                                ExitCode::InvalidCompressedWrapper,
+                                "unknown compression algorithm",
+                            ),
+                        }
+                    }) {
+                        Ok(s) => s,
+                        Err(e) => {
+                            if !self.input_complete && e.exit_code() == ExitCode::ShortRead {
+                                break;
+                            } else {
+                                return Err(e);
+                            }
+                        }
+                    };
+                }
+
+                DecompressionState::AccumulateBlock {
+                    block_type,
+                    compressed_size,
+                } => {
+                    if self.input.len() < *compressed_size {
+                        if self.input_complete {
+                            return Err(PreflateError::new(
+                                ExitCode::InvalidCompressedWrapper,
+                                "unexpected end of input in block",
+                            ));
+                        }
+                        break;
+                    }
+
+                    let block_type = *block_type;
+                    let compressed_bytes: Vec<u8> = self.input.drain(0..*compressed_size).collect();
+                    let decoded = drain_zstd_block(&mut self.zstd_decoder, &compressed_bytes)?;
+                    process_compressed_block(
+                        block_type,
+                        &mut Cursor::new(decoded),
+                        &mut self.deflate_continue_state,
+                        writer,
+                    )?;
+                    self.state = DecompressionState::StartSegment;
+                }
+
+                DecompressionState::JpegAccumulate { lepton_length } => {
+                    if self.input.len() < *lepton_length {
+                        if self.input_complete {
+                            return Err(PreflateError::new(
+                                ExitCode::InvalidCompressedWrapper,
+                                "unexpected end of input in jpeg block",
+                            ));
+                        }
+                        break;
+                    }
+
+                    let lepton_bytes: Vec<u8> = self.input.drain(0..*lepton_length).collect();
+                    match lepton_jpeg::decode_lepton(
+                        &mut Cursor::new(&lepton_bytes),
+                        writer,
+                        &EnabledFeatures::compat_lepton_vector_read(),
+                        &DEFAULT_THREAD_POOL,
+                    ) {
+                        Err(e) => {
+                            return Err(PreflateError::new(
+                                ExitCode::InvalidCompressedWrapper,
+                                format!("JPEG Lepton decode failed: {}", e),
+                            ));
+                        }
+                        Ok(_) => {}
+                    }
+                    self.state = DecompressionState::StartSegment;
+                }
+
+                DecompressionState::WebpAccumulate { total_len } => {
+                    if self.input.len() < *total_len {
+                        if self.input_complete {
+                            return Err(PreflateError::new(
+                                ExitCode::InvalidCompressedWrapper,
+                                "unexpected end of input in webp block",
+                            ));
+                        }
+                        break;
+                    }
+
+                    let webp_bytes: Vec<u8> = self.input.drain(0..*total_len).collect();
+                    // Payload is what webp_compress wrote after the BLOCK_TYPE_PNG type byte,
+                    // so process_compressed_block can parse it directly.
+                    process_compressed_block(
+                        BLOCK_TYPE_PNG,
+                        &mut Cursor::new(webp_bytes),
+                        &mut self.deflate_continue_state,
+                        writer,
+                    )?;
+                    self.state = DecompressionState::StartSegment;
+                }
+
+                DecompressionState::CrcCheck { .. } => {
+                    // CRC verification is handled in process_buffer after this returns.
+                    break;
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Feeds `compressed` bytes into the persistent `decoder` and returns all decompressed output.
+///
+/// Each call corresponds to one Zstd flush frame (written by the encoder via `flush()`).
+/// After consuming all input bytes the decoder is drained until it produces no more output,
+/// which is guaranteed because `ZSTD_e_flush` ensures all data is available to the decoder
+/// before the next block starts.
+fn drain_zstd_block(
+    decoder: &mut zstd::stream::raw::Decoder<'static>,
+    compressed: &[u8],
+) -> Result<Vec<u8>> {
+    use zstd::stream::raw::{InBuffer, Operation, OutBuffer};
+
+    let mut output = Vec::new();
+    let mut scratch = vec![0u8; 65536];
+    let mut in_buf = InBuffer::around(compressed);
+
+    loop {
+        let mut out_buf = OutBuffer::around(scratch.as_mut_slice());
+        decoder.run(&mut in_buf, &mut out_buf).map_err(|e| {
+            PreflateError::new(
+                ExitCode::InvalidCompressedWrapper,
+                format!("zstd decode failed: {e}"),
+            )
+        })?;
+        let produced = out_buf.pos();
+        output.extend_from_slice(&scratch[..produced]);
+
+        // Stop when all input has been consumed and the decoder produced no more output.
+        // zstd guarantees progress (either bytes_read > 0 or bytes_written > 0) so this
+        // loop always terminates.
+        if in_buf.pos() >= compressed.len() && produced == 0 {
+            break;
+        }
+    }
+
+    Ok(output)
+}
+
+/// Parses and processes a single non-JPEG/non-WebP block.
+///
+/// `cursor` wraps the output of `drain_zstd_block` for compressed blocks,
+/// or the raw WebP payload for `BLOCK_TYPE_PNG` blocks stored outside Zstd.
+///
+/// Layout written by the encoder for each block type (block_type = lower 6 bits):
+///   BLOCK_TYPE_LITERAL:          varint(len) + data
+///   BLOCK_TYPE_DEFLATE:          varint(corrections_len) + varint(plaintext_len) + corrections + plaintext
+///   BLOCK_TYPE_DEFLATE_CONTINUE: same as BLOCK_TYPE_DEFLATE
+///   BLOCK_TYPE_PNG:              varint(correction_length) + varint(uncompressed_length) +
+///                                IdatContents + [filters if png_header present] +
+///                                corrections + (webp_data or raw_plaintext)
+fn process_compressed_block(
+    block_type: u8,
+    cursor: &mut Cursor<Vec<u8>>,
+    deflate_continue_state: &mut Option<RecreateStreamProcessor>,
+    writer: &mut impl Write,
+) -> Result<()> {
+    match block_type {
+        BLOCK_TYPE_LITERAL => {
+            let length = read_varint(cursor)? as usize;
+            let mut data = vec![0u8; length];
+            cursor.read_exact(&mut data).map_err(|e| {
+                PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+            })?;
+            writer.write_all(&data)?;
+        }
+        BLOCK_TYPE_DEFLATE => {
+            *deflate_continue_state = None;
+
+            let correction_length = read_varint(cursor)? as usize;
+            let uncompressed_length = read_varint(cursor)? as usize;
+
+            let mut corrections = vec![0u8; correction_length];
+            cursor.read_exact(&mut corrections).map_err(|e| {
+                PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+            })?;
+
+            let mut plain_text_buf = vec![0u8; uncompressed_length];
+            cursor.read_exact(&mut plain_text_buf).map_err(|e| {
+                PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+            })?;
+
+            let mut reconstruct = RecreateStreamProcessor::new();
+            let (comp, _) = reconstruct
+                .recompress(&mut Cursor::new(&plain_text_buf), &corrections)
+                .context()?;
+
+            writer.write_all(&comp)?;
+            *deflate_continue_state = Some(reconstruct);
+        }
+        BLOCK_TYPE_DEFLATE_CONTINUE => {
+            let correction_length = read_varint(cursor)? as usize;
+            let uncompressed_length = read_varint(cursor)? as usize;
+
+            let mut corrections = vec![0u8; correction_length];
+            cursor.read_exact(&mut corrections).map_err(|e| {
+                PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+            })?;
+
+            let mut plain_text_buf = vec![0u8; uncompressed_length];
+            cursor.read_exact(&mut plain_text_buf).map_err(|e| {
+                PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+            })?;
+
+            let reconstruct = deflate_continue_state.as_mut().ok_or_else(|| {
+                PreflateError::new(
+                    ExitCode::InvalidCompressedWrapper,
+                    "no deflate state to continue",
+                )
+            })?;
+
+            let (comp, _) = reconstruct
+                .recompress(&mut Cursor::new(&plain_text_buf), &corrections)
+                .context()?;
+
+            writer.write_all(&comp)?;
+        }
+        BLOCK_TYPE_PNG => {
+            let correction_length = read_varint(cursor)? as usize;
+            let uncompressed_length = read_varint(cursor)? as usize;
+            let idat = IdatContents::read_from_bytestream(cursor)?;
+
+            let mut filters = Vec::new();
+            if let Some(png_header) = &idat.png_header {
+                filters.resize(png_header.height as usize, 0);
+                cursor.read_exact(&mut filters[..]).map_err(|e| {
+                    PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+                })?;
+            }
+
+            let mut corrections = vec![0u8; correction_length];
+            cursor.read_exact(&mut corrections).map_err(|e| {
+                PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+            })?;
+
+            let plain_text;
+            if let Some(header) = &idat.png_header {
+                let mut webp = vec![0u8; uncompressed_length];
+                cursor.read_exact(&mut webp).map_err(|e| {
+                    PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+                })?;
+                plain_text = webp_decompress(&filters, webp, header).context()?;
+            } else {
+                let mut raw = vec![0u8; uncompressed_length];
+                cursor.read_exact(&mut raw).map_err(|e| {
+                    PreflateError::new(ExitCode::InvalidCompressedWrapper, e.to_string())
+                })?;
+                plain_text = raw;
+            }
+
+            let recompressed =
+                recreate_whole_deflate_stream(&plain_text, &corrections).context()?;
+
+            recreate_idat(&idat, &recompressed[..], writer).context()?;
+        }
+        _ => {
+            return err_exit_code(
+                ExitCode::InvalidCompressedWrapper,
+                format!("Unknown block type {block_type}"),
+            );
+        }
+    }
+    Ok(())
+}
+
+fn webp_decompress(
+    filters: &[u8],
+    webp: Vec<u8>,
+    header: &crate::idat_parse::PngHeader,
+) -> Result<Vec<u8>> {
+    #[cfg(feature = "webp")]
+    match webp::Decoder::new(webp.as_slice()).decode() {
+        Some(result) => {
+            use crate::idat_parse::apply_png_filters_with_types;
+            use std::ops::Deref;
+
+            let m = result.deref();
+
+            return Ok(apply_png_filters_with_types(
+                m,
+                header.width as usize,
+                header.height as usize,
+                if result.is_alpha() { 4 } else { 3 },
+                header.color_type.bytes_per_pixel(),
+                &filters,
+            ));
+        }
+        _ => {}
+    }
+    return err_exit_code(ExitCode::InvalidCompressedWrapper, "Webp decode failed");
+}
diff --git a/container/src/container_write.rs b/container/src/container_write.rs
new file mode 100644
index 0000000..2e9c5e5
--- /dev/null
+++ b/container/src/container_write.rs
@@ -0,0 +1,534 @@
+use crc32fast::Hasher as CrcHasher;
+
+use std::io::Write;
+
+use crate::{
+    container_common::{
+        BLOCK_COMPRESSION_NONE, BLOCK_COMPRESSION_ZSTD, BLOCK_TYPE_DEFLATE,
+        BLOCK_TYPE_DEFLATE_CONTINUE, BLOCK_TYPE_JPEG_LEPTON, BLOCK_TYPE_LITERAL, BLOCK_TYPE_PNG,
+        BLOCK_TYPE_WEBP, PreflateContainerConfig, PreflateStats, ProcessBuffer, write_varint,
+    },
+    idat_parse::{IdatContents, PngHeader},
+    scan_deflate::{FindStreamResult, FoundStream, FoundStreamType, find_compressable_stream},
+};
+
+use preflate_rs::{AddContext, ExitCode, PreflateError, PreflateStreamProcessor, Result};
+
+/// used to measure the length of the output without storing it
+pub(crate) struct MeasureWriteSink {
+    pub length: usize,
+}
+
+impl Write for MeasureWriteSink {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        self.length += buf.len();
+        Ok(buf.len())
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+pub(crate) enum ChunkParseState {
+    Start,
+    /// we are looking for a deflate stream or PNG chunk. The data of the PNG file
+    /// is stored later than the IHDR chunk that will tell us the dimensions of the image,
+    /// so we need to keep track of the IHDR chunk so we can use it later to properly
+    /// compress the PNG data.
+    Searching(Option<PngHeader>),
+    DeflateContinue(PreflateStreamProcessor),
+}
+
+/// V2 variant of write_chunk_block: block content goes through the persistent Zstd encoder.
+/// JPEG blocks are written raw to writer (bypass encoder).
+/// Returns (total compressed bytes written, optional continue state).
+pub(crate) fn write_chunk_block_v2(
+    encoder: &mut zstd::stream::write::Encoder<'static, Vec<u8>>,
+    writer: &mut impl Write,
+    chunk: FoundStream,
+    stats: &mut PreflateStats,
+) -> Result<(usize, Option<PreflateStreamProcessor>)> {
+    match chunk.chunk_type {
+        FoundStreamType::DeflateStream(parameters, state) => {
+            write_varint(encoder, chunk.corrections.len() as u32)?;
+            write_varint(encoder, state.plain_text().text().len() as u32)?;
+            encoder.write_all(&chunk.corrections)?;
+            encoder.write_all(&state.plain_text().text())?;
+
+            let compressed_size = emit_compressed_block(
+                BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_DEFLATE,
+                encoder,
+                writer,
+            )?;
+
+            stats.overhead_bytes += chunk.corrections.len() as u64;
+            stats.uncompressed_size += state.plain_text().len() as u64;
+            stats.hash_algorithm = parameters.hash_algorithm;
+
+            if !state.is_done() {
+                return Ok((compressed_size, Some(state)));
+            }
+            Ok((compressed_size, None))
+        }
+
+        FoundStreamType::IDATDeflate(parameters, mut idat, plain_text) => {
+            log::debug!(
+                "IDATDeflate param {:?} corrections {}",
+                parameters,
+                chunk.corrections.len()
+            );
+
+            let mut temp_vec = Vec::new();
+
+            if webp_compress(&mut temp_vec, plain_text.text(), &chunk.corrections, &idat).is_ok() {
+                // WebP is already compressed — write raw, bypassing the Zstd encoder.
+                // temp_vec[0] is the BLOCK_TYPE_PNG placeholder byte; temp_vec[1..] is the payload.
+                let payload = &temp_vec[1..];
+                writer.write_all(&[BLOCK_COMPRESSION_NONE | BLOCK_TYPE_WEBP])?;
+                write_varint(writer, payload.len() as u32)?;
+                writer.write_all(payload)?;
+
+                stats.uncompressed_size += plain_text.len() as u64;
+                stats.hash_algorithm = parameters.hash_algorithm;
+                stats.overhead_bytes += chunk.corrections.len() as u64;
+
+                Ok((payload.len(), None))
+            } else {
+                // Non-WebP PNG: corrections + plaintext are compressible, send through Zstd.
+                log::debug!("non-Webp compressed {}", idat.total_chunk_length);
+                write_varint(encoder, chunk.corrections.len() as u32)?;
+                write_varint(encoder, plain_text.text().len() as u32)?;
+                idat.png_header = None;
+                idat.write_to_bytestream(encoder)?;
+                encoder.write_all(&chunk.corrections)?;
+                encoder.write_all(plain_text.text())?;
+
+                let compressed_size = emit_compressed_block(
+                    BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_PNG,
+                    encoder,
+                    writer,
+                )?;
+
+                stats.uncompressed_size += plain_text.len() as u64;
+                stats.hash_algorithm = parameters.hash_algorithm;
+                stats.overhead_bytes += chunk.corrections.len() as u64;
+
+                Ok((compressed_size, None))
+            }
+        }
+
+        FoundStreamType::JPEGLepton(data) => {
+            // JPEG is written raw (bypasses the encoder entirely)
+            writer.write_all(&[BLOCK_COMPRESSION_NONE | BLOCK_TYPE_JPEG_LEPTON])?;
+            write_varint(writer, data.len() as u32)?;
+            writer.write_all(&data)?;
+
+            stats.uncompressed_size += data.len() as u64;
+            Ok((0, None))
+        }
+    }
+}
+
+/// Takes a sequence of bytes that may contain deflate streams, find
+/// the streams, and emits a new stream that containus the decompressed
+/// streams along with the corrections needed to recreate the original.
+///
+/// This output can then be compressed with a better algorithm, like Zstandard
+/// and achieve much better compression than if we tried to compress the
+/// deflate stream directlyh.
+pub struct PreflateContainerProcessor {
+    content: Vec<u8>,
+    compression_stats: PreflateStats,
+    input_complete: bool,
+    total_plain_text_seen: u64,
+
+    /// used to track the last attempted chunk size, in case we
+    /// need more input to continue, we will collect at least min_chunk_size
+    /// more input before trying to process again until we reach max_chunk_size
+    last_attempt_chunk_size: usize,
+
+    state: ChunkParseState,
+    config: PreflateContainerConfig,
+
+    /// running CRC-32 of all input bytes, written as the final block
+    input_crc: CrcHasher,
+
+    /// each block is individually compressed with this encoder (v2 format)
+    encoder: Option<zstd::stream::write::Encoder<'static, Vec<u8>>>,
+
+    /// when present, all raw input is also fed to this encoder so we can measure
+    /// baseline Zstd compression (without preflate processing)
+    baseline_encoder: Option<zstd::stream::write::Encoder<'static, MeasureWriteSink>>,
+}
+
+impl PreflateContainerProcessor {
+    /// Creates a processor that uses v2 format with a persistent Zstd encoder shared
+    /// across all non-JPEG blocks. JPEG blocks bypass the encoder entirely.
+    pub fn new(config: &PreflateContainerConfig, level: i32, test_baseline: bool) -> Self {
+        PreflateContainerProcessor {
+            content: Vec::new(),
+            compression_stats: PreflateStats::default(),
+            input_complete: false,
+            state: ChunkParseState::Start,
+            total_plain_text_seen: 0,
+            last_attempt_chunk_size: 0,
+            config: config.clone(),
+            input_crc: CrcHasher::new(),
+            encoder: Some(zstd::stream::write::Encoder::new(Vec::new(), level).unwrap()),
+            baseline_encoder: if test_baseline {
+                Some(
+                    zstd::stream::write::Encoder::new(MeasureWriteSink { length: 0 }, level)
+                        .unwrap(),
+                )
+            } else {
+                None
+            },
+        }
+    }
+}
+
+impl ProcessBuffer for PreflateContainerProcessor {
+    fn process_buffer(
+        &mut self,
+        input: &[u8],
+        input_complete: bool,
+        writer: &mut impl Write,
+    ) -> Result<()> {
+        use crate::container_common::COMPRESSED_WRAPPER_VERSION_2;
+
+        if self.input_complete && (input.len() > 0 || !input_complete) {
+            return Err(PreflateError::new(
+                ExitCode::InvalidParameter,
+                "more data provided after input_complete signaled",
+            ));
+        }
+
+        if input.len() > 0 {
+            self.compression_stats.deflate_compressed_size += input.len() as u64;
+            self.input_crc.update(input);
+            self.content.extend_from_slice(input);
+
+            if let Some(encoder) = &mut self.baseline_encoder {
+                encoder.write_all(input).context()?;
+            }
+        }
+
+        loop {
+            // wait until we have at least min_chunk_size before we start processing
+            if self.content.is_empty()
+                || (!input_complete
+                    && (self.content.len() - self.last_attempt_chunk_size)
+                        < self.config.min_chunk_size
+                    && self.content.len() <= self.config.max_chunk_size)
+            {
+                break;
+            }
+
+            self.last_attempt_chunk_size = self.content.len();
+
+            match &mut self.state {
+                ChunkParseState::Start => {
+                    writer.write_all(&[COMPRESSED_WRAPPER_VERSION_2])?;
+                    self.state = ChunkParseState::Searching(None);
+                }
+                ChunkParseState::Searching(prev_ihdr) => {
+                    if self.total_plain_text_seen > self.config.total_plain_text_limit {
+                        // once we've exceeded our limit, we don't do any more compression
+                        let encoder = self.encoder.as_mut().unwrap();
+                        write_varint(encoder, self.content.len() as u32)?;
+                        encoder.write_all(&self.content)?;
+                        let sz = emit_compressed_block(
+                            BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_LITERAL,
+                            encoder,
+                            writer,
+                        )?;
+                        self.compression_stats.zstd_compressed_size += sz as u64;
+
+                        self.last_attempt_chunk_size = 0;
+                        self.content.clear();
+                        break;
+                    }
+
+                    // here we are looking for a deflate stream or PNG chunk
+                    match find_compressable_stream(
+                        &self.content,
+                        prev_ihdr,
+                        input_complete,
+                        &self.config,
+                    ) {
+                        FindStreamResult::Found(next, chunk) => {
+                            // the gap between the start and the beginning of the deflate stream
+                            // is written out as a literal block
+                            if next.start != 0 {
+                                let encoder = self.encoder.as_mut().unwrap();
+                                write_varint(encoder, next.start as u32)?;
+                                encoder.write_all(&self.content[..next.start])?;
+                                let sz = emit_compressed_block(
+                                    BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_LITERAL,
+                                    encoder,
+                                    writer,
+                                )?;
+                                self.compression_stats.zstd_compressed_size += sz as u64;
+                            }
+
+                            let (compressed_size, next_state) = write_chunk_block_v2(
+                                self.encoder.as_mut().unwrap(),
+                                writer,
+                                chunk,
+                                &mut self.compression_stats,
+                            )
+                            .context()?;
+                            self.compression_stats.zstd_compressed_size += compressed_size as u64;
+
+                            if let Some(mut state) = next_state {
+                                self.total_plain_text_seen += state.plain_text().len() as u64;
+                                state.shrink_to_dictionary();
+                                self.state = ChunkParseState::DeflateContinue(state);
+                            }
+
+                            self.content.drain(0..next.end);
+                            self.last_attempt_chunk_size = self.content.len();
+                        }
+                        FindStreamResult::ShortRead => {
+                            if input_complete || self.content.len() > self.config.max_chunk_size {
+                                // if we have too much data or have no more data,
+                                // we just write it out as a literal block with everything we have
+                                let encoder = self.encoder.as_mut().unwrap();
+                                write_varint(encoder, self.content.len() as u32)?;
+                                encoder.write_all(&self.content)?;
+                                let sz = emit_compressed_block(
+                                    BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_LITERAL,
+                                    encoder,
+                                    writer,
+                                )?;
+                                self.compression_stats.zstd_compressed_size += sz as u64;
+
+                                self.content.clear();
+                                self.last_attempt_chunk_size = 0;
+                            } else {
+                                // we don't have enough data to process the stream, so we just
+                                // wait for more data
+                                break;
+                            }
+                        }
+                        FindStreamResult::None => {
+                            // couldn't find anything, just write the rest as a literal block
+                            let encoder = self.encoder.as_mut().unwrap();
+                            write_varint(encoder, self.content.len() as u32)?;
+                            encoder.write_all(&self.content)?;
+                            let sz = emit_compressed_block(
+                                BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_LITERAL,
+                                encoder,
+                                writer,
+                            )?;
+                            self.compression_stats.zstd_compressed_size += sz as u64;
+
+                            self.content.clear();
+                            self.last_attempt_chunk_size = 0;
+                        }
+                    }
+                }
+                ChunkParseState::DeflateContinue(state) => {
+                    // here we have a deflate stream that we need to continue
+                    match state.decompress(&self.content) {
+                        Err(ref e)
+                            if e.exit_code() == ExitCode::ShortRead
+                                && !input_complete
+                                && self.content.len() <= self.config.max_chunk_size =>
+                        {
+                            // Not enough data to complete the next block yet; wait for more.
+                            break;
+                        }
+                        Err(_e) => {
+                            // Stream analysis diverged or no more data is coming; give up on
+                            // continuation and fall back to treating the remaining bytes as raw.
+                            self.state = ChunkParseState::Searching(None);
+
+                            log::debug!("Error while trying to continue compression {:?}", _e);
+                        }
+                        Ok(res) => {
+                            log::debug!(
+                                "Deflate continue: {} -> {}",
+                                state.plain_text().len(),
+                                res.compressed_size
+                            );
+
+                            let encoder = self.encoder.as_mut().unwrap();
+                            write_varint(encoder, res.corrections.len() as u32)?;
+                            write_varint(encoder, state.plain_text().len() as u32)?;
+                            encoder.write_all(&res.corrections)?;
+                            encoder.write_all(&state.plain_text().text())?;
+                            let sz = emit_compressed_block(
+                                BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_DEFLATE_CONTINUE,
+                                encoder,
+                                writer,
+                            )?;
+                            self.compression_stats.zstd_compressed_size += sz as u64;
+
+                            self.total_plain_text_seen += state.plain_text().len() as u64;
+                            self.compression_stats.overhead_bytes += res.corrections.len() as u64;
+                            self.compression_stats.uncompressed_size +=
+                                state.plain_text().len() as u64;
+
+                            self.content.drain(0..res.compressed_size);
+                            self.last_attempt_chunk_size = self.content.len();
+
+                            if state.is_done() {
+                                self.state = ChunkParseState::Searching(None);
+                            } else {
+                                state.shrink_to_dictionary();
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if input_complete && !self.input_complete {
+            self.input_complete = true;
+
+            if self.content.len() > 0 {
+                let encoder = self.encoder.as_mut().unwrap();
+                write_varint(encoder, self.content.len() as u32)?;
+                encoder.write_all(&self.content)?;
+                let sz = emit_compressed_block(
+                    BLOCK_COMPRESSION_ZSTD | BLOCK_TYPE_LITERAL,
+                    encoder,
+                    writer,
+                )?;
+                self.compression_stats.zstd_compressed_size += sz as u64;
+            }
+            self.content.clear();
+
+            // Finalize the Zstd encoder; finish bytes are discarded since each block
+            // was already flushed and the decoder relies on EOF as the stream terminator.
+            let encoder = self.encoder.take().unwrap();
+            let _ = encoder.finish();
+
+            // Write the CRC-32 end block: 0xFF sentinel + 4-byte LE CRC of original input.
+            let crc = self.input_crc.clone().finalize();
+            writer.write_all(&[0xFF])?;
+            writer.write_all(&crc.to_le_bytes())?;
+
+            // Finalize baseline encoder for stats
+            if let Some(mut encoder) = self.baseline_encoder.take() {
+                encoder.flush().context()?;
+                encoder.do_finish().context()?;
+                self.compression_stats.zstd_baseline_size = encoder.get_mut().length as u64;
+            }
+        }
+
+        Ok(())
+    }
+
+    fn stats(&self) -> PreflateStats {
+        self.compression_stats
+    }
+}
+
+/// Flushes the encoder, writes [block_type][varint(compressed_size)][compressed_bytes] to
+/// destination, clears the encoder's inner buffer, and returns the compressed byte count.
+fn emit_compressed_block(
+    block_type: u8,
+    encoder: &mut zstd::stream::write::Encoder<'static, Vec<u8>>,
+    destination: &mut impl Write,
+) -> Result<usize> {
+    encoder.flush().context()?;
+    let compressed = encoder.get_mut();
+    let len = compressed.len();
+    destination.write_all(&[block_type])?;
+    write_varint(destination, len as u32)?;
+    destination.write_all(compressed)?;
+    compressed.clear();
+    Ok(len)
+}
+
+fn webp_compress(
+    result: &mut impl Write,
+    plain_text: &[u8],
+    corrections: &[u8],
+    idat: &IdatContents,
+) -> Result<()> {
+    use crate::container_common::BLOCK_TYPE_PNG;
+    log::debug!("{:?}", idat);
+
+    #[cfg(feature = "webp")]
+    if let Some(png_header) = idat.png_header {
+        use crate::idat_parse::{PngColorType, undo_png_filters};
+        use std::ops::Deref;
+
+        let bbp = png_header.color_type.bytes_per_pixel();
+        let w = png_header.width as usize;
+        let h = png_header.height as usize;
+
+        log::debug!(
+            "plain text compressing {} bytes ({}x{}x{})",
+            plain_text.len(),
+            w,
+            h,
+            bbp
+        );
+
+        // see if the bitmap looks like the way with think it should (bits per pixel map + 1 height worth of filter bytes)
+        if (bbp * w * h) + h == plain_text.len() {
+            let (bitmap, filters) = undo_png_filters(plain_text, w, h, bbp);
+
+            let enc = webp::Encoder::new(
+                &bitmap,
+                match png_header.color_type {
+                    PngColorType::RGB => webp::PixelLayout::Rgb,
+                    PngColorType::RGBA => webp::PixelLayout::Rgba,
+                },
+                png_header.width,
+                png_header.height,
+            );
+
+            let mut webpconfig = webp::WebPConfig::new().unwrap();
+            webpconfig.lossless = 1;
+            webpconfig.alpha_compression = 0;
+            webpconfig.exact = 1; // undocumented option, but required to not throw away color if alpha channel is zero
+
+            // this is the default quality setting for webp lossless, we could dial it up
+            // but the quality gains are marginal for the CPU cost, although the
+            // CPU decompression cost is the same.
+            webpconfig.quality = 75.0; // 0..100 higher is slower but better compression
+            webpconfig.method = 4; // 0..6 higher is slower but better compression
+
+            let comp = match enc.encode_advanced(&webpconfig) {
+                Ok(c) => c,
+                Err(e) => {
+                    return preflate_rs::err_exit_code(
+                        ExitCode::WebPDecodeError,
+                        format!("Webp encode failed: {:?}", e),
+                    );
+                }
+            };
+
+            result.write_all(&[BLOCK_TYPE_PNG])?; // placeholder — caller skips this byte
+
+            write_varint(result, corrections.len() as u32)?;
+            write_varint(result, comp.deref().len() as u32)?;
+
+            log::debug!(
+                "Webp compressed {} bytes (vs {})",
+                comp.deref().len(),
+                idat.total_chunk_length
+            );
+
+            idat.write_to_bytestream(result)?;
+            result.write_all(&filters)?;
+
+            result.write_all(&corrections)?;
+            result.write_all(comp.deref())?;
+
+            return Ok(());
+        }
+    }
+
+    return preflate_rs::err_exit_code(
+        ExitCode::InvalidCompressedWrapper,
+        "Webp compression not supported",
+    );
+}
diff --git a/container/src/idat_parse.rs b/container/src/idat_parse.rs
index cc56363..ed225b0 100644
--- a/container/src/idat_parse.rs
+++ b/container/src/idat_parse.rs
@@ -4,7 +4,7 @@ use byteorder::{ReadBytesExt, WriteBytesExt};
 
 use preflate_rs::{ExitCode, Result, err_exit_code};
 
-use crate::container_processor::{read_varint, write_varint};
+use crate::container_common::{read_varint, write_varint};
 
 /// The contents of a PNG IDat stream. These are treated specially since they
 /// contain a Zlib stream that is split into multiple chunks and would be
diff --git a/container/src/lib.rs b/container/src/lib.rs
index 5a1f638..9b0593e 100644
--- a/container/src/lib.rs
+++ b/container/src/lib.rs
@@ -20,23 +20,17 @@
 #![forbid(macro_use_extern_crate)]
 #![forbid(missing_unsafe_on_extern)]
 
-mod container_processor;
+mod container_common;
+mod container_read;
+mod container_write;
 mod idat_parse;
 mod scan_deflate;
 mod scoped_read;
 mod utils;
-mod zstd_compression;
-pub use zstd_compression::{
-    zstd_preflate_whole_deflate_stream, zstd_recreate_whole_deflate_stream,
-};
 
-pub use container_processor::{PreflateContainerConfig, PreflateStats};
-pub use container_processor::{
-    PreflateContainerProcessor, ProcessBuffer, RecreateContainerProcessor,
-    preflate_whole_into_container, recreate_whole_from_container,
-};
-
-pub use zstd_compression::{ZstdCompressContext, ZstdDecompressContext};
+pub use container_common::{PreflateContainerConfig, PreflateStats, ProcessBuffer};
+pub use container_read::RecreateContainerProcessor;
+pub use container_write::PreflateContainerProcessor;
 
 pub use utils::process_limited_buffer;
 
diff --git a/container/src/utils.rs b/container/src/utils.rs
index 0278ad0..8d345f5 100644
--- a/container/src/utils.rs
+++ b/container/src/utils.rs
@@ -1,48 +1,12 @@
 use std::{
     collections::VecDeque,
-    io::{BufRead, Read, Write},
+    io::{Read, Write},
 };
 
 use preflate_rs::Result;
 
 use crate::ProcessBuffer;
 
-/// A BufRead implementation that reads at most `limit` bytes from the underlying reader.
-pub struct TakeReader<T> {
-    inner: T,
-    amount_left: usize,
-}
-
-impl<T> TakeReader<T> {
-    pub fn new(inner: T, limit: usize) -> Self {
-        TakeReader {
-            inner,
-            amount_left: limit,
-        }
-    }
-}
-
-impl<T: BufRead + Read> BufRead for TakeReader<T> {
-    fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
-        let buf = self.inner.fill_buf()?;
-        Ok(&buf[..buf.len().min(self.amount_left)])
-    }
-
-    fn consume(&mut self, amt: usize) {
-        self.amount_left -= amt;
-        self.inner.consume(amt);
-    }
-}
-
-impl<T: Read> Read for TakeReader<T> {
-    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
-        let len = buf.len().min(self.amount_left);
-        let read = self.inner.read(&mut buf[..len])?;
-        self.amount_left -= read;
-        Ok(read)
-    }
-}
-
 #[allow(dead_code)]
 #[cfg(test)]
 pub fn write_file(filename: &str, data: &[u8]) {
@@ -174,7 +138,7 @@ pub fn process_limited_buffer(
 
 #[test]
 fn test_process_limited_buffer() {
-    let mut p = crate::container_processor::NopProcessBuffer {};
+    let mut p = crate::container_common::test::NopProcessBuffer {};
 
     let input = b"Hello, world!";
     let mut output = [0u8; 5];
diff --git a/container/src/zstd_compression.rs b/container/src/zstd_compression.rs
deleted file mode 100644
index c0f1e69..0000000
--- a/container/src/zstd_compression.rs
+++ /dev/null
@@ -1,277 +0,0 @@
-//! Implements processors for Zstandard compression and decompression using
-//! the ProcessBuffer model. These are designed to be chained together with
-//! the other ProcessBuffer implementations to create a full compression or
-//! decompression pipeline.
-
-use std::io::{BufRead, Write};
-
-use crate::{
-    PreflateContainerProcessor, PreflateStats, ProcessBuffer, RecreateContainerProcessor,
-    container_processor::PreflateContainerConfig,
-};
-
-use preflate_rs::{AddContext, ExitCode, PreflateError, Result};
-
-/// processor that compresses the input using Zstandard
-///
-/// Designed to wrap around the PreflateChunkProcessor.
-pub struct ZstdCompressContext<D: ProcessBuffer> {
-    zstd_compress: zstd::stream::write::Encoder<'static, Vec<u8>>,
-    input_complete: bool,
-    internal: D,
-
-    /// if set, the encoder will write all the input to a null zstd encoder to see how much
-    /// compression we would get if we just used Zstandard without any Preflate processing.
-    ///
-    /// This gives a fairer comparison of the compression ratio of Preflate + Zstandard vs. Zstandard
-    /// since Zstd does compress the data a bit, especially if there is a lot of non-Deflate streams
-    /// in the file.
-    test_baseline: Option<zstd::stream::write::Encoder<'static, MeasureWriteSink>>,
-
-    zstd_baseline_size: u64,
-    zstd_compressed_size: u64,
-}
-
-impl<D: ProcessBuffer> ZstdCompressContext<D> {
-    pub fn new(internal: D, compression_level: i32, test_baseline: bool) -> Self {
-        ZstdCompressContext {
-            zstd_compress: zstd::stream::write::Encoder::new(Vec::new(), compression_level)
-                .unwrap(),
-            input_complete: false,
-            internal,
-            zstd_baseline_size: 0,
-            zstd_compressed_size: 0,
-            test_baseline: if test_baseline {
-                Some(
-                    zstd::stream::write::Encoder::new(
-                        MeasureWriteSink { length: 0 },
-                        compression_level,
-                    )
-                    .unwrap(),
-                )
-            } else {
-                None
-            },
-        }
-    }
-}
-
-impl<D: ProcessBuffer> ProcessBuffer for ZstdCompressContext<D> {
-    fn process_buffer(
-        &mut self,
-        input: &[u8],
-        input_complete: bool,
-        writer: &mut impl Write,
-    ) -> Result<()> {
-        if self.input_complete && (input.len() > 0 || !input_complete) {
-            return Err(PreflateError::new(
-                ExitCode::InvalidParameter,
-                "more data provided after input_complete signaled",
-            ));
-        }
-
-        if input.len() > 0 {
-            if let Some(encoder) = &mut self.test_baseline {
-                encoder.write_all(input).context()?;
-            }
-        }
-
-        self.internal
-            .process_buffer(input, input_complete, &mut self.zstd_compress)
-            .context()?;
-
-        if input_complete && !self.input_complete {
-            self.input_complete = true;
-
-            self.zstd_compress.flush().context()?;
-
-            if let Some(encoder) = &mut self.test_baseline {
-                encoder.flush()?;
-                encoder.do_finish()?;
-                self.zstd_baseline_size = encoder.get_mut().length as u64;
-            }
-        }
-
-        let compressed = self.zstd_compress.get_mut();
-        writer.write_all(compressed).context()?;
-        self.zstd_compressed_size += compressed.len() as u64;
-        compressed.drain(..);
-
-        Ok(())
-    }
-
-    fn stats(&self) -> PreflateStats {
-        PreflateStats {
-            zstd_compressed_size: self.zstd_compressed_size,
-            zstd_baseline_size: self.zstd_baseline_size,
-            ..self.internal.stats()
-        }
-    }
-}
-
-/// used to measure the length of the output without storing it anyway
-struct MeasureWriteSink {
-    pub length: usize,
-}
-
-impl Write for MeasureWriteSink {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.length += buf.len();
-        Ok(buf.len())
-    }
-
-    fn flush(&mut self) -> std::io::Result<()> {
-        Ok(())
-    }
-}
-
-/// Processor that decompresses the input using Zstandard
-///
-/// Designed to wrap around the RecreateContainerProcessor.
-pub struct ZstdDecompressContext<D: ProcessBuffer> {
-    zstd_decompress: zstd::stream::write::Decoder<'static, AcceptWrite<D, Vec<u8>>>,
-}
-
-/// used to accept the output from the Zstandard decoder and write it to the output buffer.
-/// Since the plain text is significantly larger than the compressed version, we want
-/// to avoid buffering the output in memory, so we send it directly to the recreator.
-struct AcceptWrite<D: ProcessBuffer, O: Write> {
-    internal: D,
-    output: O,
-    input_complete: bool,
-}
-
-impl<P: ProcessBuffer, O: Write> Write for AcceptWrite<P, O> {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.internal
-            .process_buffer(buf, self.input_complete, &mut self.output)?;
-        Ok(buf.len())
-    }
-
-    fn flush(&mut self) -> std::io::Result<()> {
-        Ok(())
-    }
-}
-
-impl<D: ProcessBuffer> ZstdDecompressContext<D> {
-    pub fn new(internal: D) -> Self {
-        ZstdDecompressContext {
-            zstd_decompress: zstd::stream::write::Decoder::new(AcceptWrite {
-                internal: internal,
-                output: Vec::new(),
-                input_complete: false,
-            })
-            .unwrap(),
-        }
-    }
-}
-
-impl<D: ProcessBuffer> ProcessBuffer for ZstdDecompressContext<D> {
-    fn process_buffer(
-        &mut self,
-        input: &[u8],
-        input_complete: bool,
-        writer: &mut impl Write,
-    ) -> Result<()> {
-        if self.zstd_decompress.get_mut().input_complete && (input.len() > 0 || !input_complete) {
-            return Err(PreflateError::new(
-                ExitCode::InvalidParameter,
-                "more data provided after input_complete signaled",
-            ));
-        }
-
-        if input.len() > 0 {
-            self.zstd_decompress.write_all(input).context()?;
-        }
-
-        if input_complete && !self.zstd_decompress.get_mut().input_complete {
-            self.zstd_decompress.flush().context()?;
-            self.zstd_decompress.get_mut().input_complete = true;
-        }
-
-        let a = self.zstd_decompress.get_mut();
-        writer.write_all(&a.output).context()?;
-        a.output.clear();
-
-        Ok(())
-    }
-}
-
-/// Expands the Zlib compressed streams in the data and then recompresses the result
-/// with Zstd with the given level.
-pub fn zstd_preflate_whole_deflate_stream(
-    config: &PreflateContainerConfig,
-    input: &mut impl BufRead,
-    output: &mut impl Write,
-    compression_level: i32,
-) -> Result<PreflateStats> {
-    let mut ctx = ZstdCompressContext::new(
-        PreflateContainerProcessor::new(config),
-        compression_level,
-        false,
-    );
-
-    ctx.copy_to_end(input, output).context()?;
-
-    Ok(ctx.stats())
-}
-
-/// Decompresses the Zstd compressed data and then recompresses the result back
-/// to the original Zlib compressed streams.
-pub fn zstd_recreate_whole_deflate_stream(
-    input: &mut impl BufRead,
-    output: &mut impl Write,
-) -> Result<()> {
-    let mut ctx = ZstdDecompressContext::<RecreateContainerProcessor>::new(
-        RecreateContainerProcessor::new(1024 * 1024 * 128),
-    );
-
-    ctx.copy_to_end(input, output).context()?;
-
-    Ok(())
-}
-
-#[test]
-fn verify_zip_compress_zstd() {
-    use crate::utils::read_file;
-    let v = read_file("samplezip.zip");
-
-    let mut compressed = Vec::new();
-    let stats = zstd_preflate_whole_deflate_stream(
-        &PreflateContainerConfig::default(),
-        &mut std::io::Cursor::new(&v),
-        &mut compressed,
-        1, // for testing use a lower level to save CPU
-    )
-    .unwrap();
-
-    let mut recreated = Vec::new();
-    zstd_recreate_whole_deflate_stream(&mut std::io::Cursor::new(&compressed), &mut recreated)
-        .unwrap();
-
-    assert!(v == recreated);
-    println!(
-        "original zip = {} bytes, expanded = {} bytes recompressed zip = {} bytes",
-        v.len(),
-        stats.uncompressed_size,
-        compressed.len()
-    );
-}
-
-/// tests zstd compression buffer processing without involving preflate code
-#[test]
-fn roundtrip_zstd_only_contexts() {
-    use crate::container_processor::NopProcessBuffer;
-    use crate::utils::{assert_eq_array, read_file};
-    use crate::zstd_compression::{ZstdCompressContext, ZstdDecompressContext};
-
-    let original = read_file("samplezip.zip");
-
-    let mut context = ZstdCompressContext::new(NopProcessBuffer {}, 9, false);
-    let compressed = context.process_vec_size(&original, 997).unwrap();
-
-    let mut context = ZstdDecompressContext::new(NopProcessBuffer {});
-    let recreated = context.process_vec_size(&compressed, 997).unwrap();
-
-    assert_eq_array(&original, &recreated);
-}
diff --git a/dll/CLAUDE.md b/dll/CLAUDE.md
new file mode 100644
index 0000000..28ca341
--- /dev/null
+++ b/dll/CLAUDE.md
@@ -0,0 +1,73 @@
+# dll (preflate_rs_0_7)
+
+C-compatible DLL for .NET interop. Exposes a streaming compress/decompress API as
+`extern "C"` functions. The version number is baked into the crate name
+(`preflate_rs_0_7`) for binary compatibility.
+
+## Exported C API (`src/unmanaged_api.rs`)
+
+### Compression
+
+```c
+void* create_compression_context(uint32_t flags);
+void  free_compression_context(void* context);
+int32_t compress_buffer(
+    void*        context,
+    const uint8_t* input,      size_t input_size,
+    bool         input_complete,
+    uint8_t*     output,       size_t output_size,
+    size_t*      result_size,
+    char*        error_string, size_t error_string_buffer_len
+);
+void get_compression_stats(void* context, /* stat out-params */);
+```
+
+`flags` encoding:
+- bits 0–4: Zstd compression level
+- bit 5: `test_baseline`
+- bit 6: `verify`
+
+Return value of `compress_buffer`: `0` = more output available, `1` = done, `<0` = error.
+
+### Decompression
+
+```c
+void* create_decompression_context(uint32_t flags, size_t capacity);
+void  free_decompression_context(void* context);
+int32_t decompress_buffer(
+    void*        context,
+    const uint8_t* input,      size_t input_size,
+    bool         input_complete,
+    uint8_t*     output,       size_t output_size,
+    size_t*      result_size,
+    char*        error_string, size_t error_string_buffer_len
+);
+```
+
+## Internal Structs
+
+```rust
+struct CompressionContext {
+    magic: u32,                   // MAGIC_COMPRESSION_CONTEXT = 0x4B3CFF2E
+    internal: PreflateContainerProcessor,
+    output_extra: VecDeque<u8>,   // buffers overflow when C buffer is too small
+}
+struct DecompressionContext {
+    magic: u32,                   // MAGIC_DECOMPRESSION_CONTEXT = 0x053D2AB1
+    internal: RecreateContainerProcessor,
+    output_extra: VecDeque<u8>,
+}
+```
+
+Magic numbers are validated on every call to catch dangling/wrong pointer bugs.
+
+## Safety Notes
+
+- Uses `#[unsafe(no_mangle)]` on exported functions — the only place in the workspace
+  where `unsafe` appears (required for C FFI entry points).
+- `catch_unwind_result()` wraps every entry point to prevent panics crossing the FFI boundary.
+- All other code in the crate remains safe Rust.
+
+## Build Output
+
+`cdylib` — produces `preflate_rs_0_7.dll` on Windows.
diff --git a/dll/Cargo.toml b/dll/Cargo.toml
index e4f06a0..a4ef4ad 100644
--- a/dll/Cargo.toml
+++ b/dll/Cargo.toml
@@ -3,12 +3,12 @@
 # this makes sure that we can keep old versions around to decode old encodings since the format
 # is complicated enough that maintaining backwards compat is hard (even minor changes in the
 # predictor will break the format)
-version = "0.7.5"
 name = "preflate_rs_0_7"
-edition = "2024"
-authors = ["Kristof Roomp <kristofr@microsoft.com>"]
-license = "Apache-2.0"
-rust-version = "1.85"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+rust-version.workspace = true
 
 [dependencies]
 preflate-rs = { path = "../preflate" }
diff --git a/dll/src/unmanaged_api.rs b/dll/src/unmanaged_api.rs
index 99b8701..4d117c7 100644
--- a/dll/src/unmanaged_api.rs
+++ b/dll/src/unmanaged_api.rs
@@ -6,7 +6,7 @@ use std::{
 
 use preflate_container::{
     PreflateContainerConfig, PreflateContainerProcessor, ProcessBuffer, RecreateContainerProcessor,
-    ZstdCompressContext, ZstdDecompressContext, process_limited_buffer,
+    process_limited_buffer,
 };
 use preflate_rs::{ExitCode, PreflateError};
 
@@ -198,7 +198,7 @@ pub unsafe extern "C" fn get_compression_stats(
 
 struct CompressionContext {
     magic: u32,
-    internal: ZstdCompressContext<PreflateContainerProcessor>,
+    internal: PreflateContainerProcessor,
     output_extra: VecDeque<u8>,
 }
 
@@ -221,12 +221,12 @@ impl CompressionContext {
     fn new(verify: bool, compression_level: i32, test_baseline: bool) -> Self {
         CompressionContext {
             magic: MAGIC_COMPRESSION_CONTEXT,
-            internal: ZstdCompressContext::new(
-                PreflateContainerProcessor::new(&PreflateContainerConfig {
+            internal: PreflateContainerProcessor::new(
+                &PreflateContainerConfig {
                     validate_compression: verify,
                     max_chain_length: 1024, // lower max chain to avoid excessive CPU usage
                     ..PreflateContainerConfig::default()
-                }),
+                },
                 compression_level,
                 test_baseline,
             ),
@@ -237,7 +237,7 @@ impl CompressionContext {
 
 struct DecompressionContext {
     magic: u32,
-    internal: ZstdDecompressContext<RecreateContainerProcessor>,
+    internal: RecreateContainerProcessor,
     output_extra: VecDeque<u8>,
 }
 
@@ -255,11 +255,9 @@ impl DecompressionContext {
     }
 
     fn new(capacity: usize) -> Self {
-        let internal = ZstdDecompressContext::new(RecreateContainerProcessor::new(capacity));
-
         DecompressionContext {
             magic: MAGIC_DECOMRESSION_CONTEXT,
-            internal,
+            internal: RecreateContainerProcessor::new(capacity),
             output_extra: VecDeque::new(),
         }
     }
diff --git a/fuzz/CLAUDE.md b/fuzz/CLAUDE.md
new file mode 100644
index 0000000..608fd4c
--- /dev/null
+++ b/fuzz/CLAUDE.md
@@ -0,0 +1,30 @@
+# fuzz (preflate-rs-fuzz)
+
+libfuzzer harnesses for fuzzing the core and container APIs. Not published; requires
+the `fuzzing` cargo feature.
+
+## Harnesses
+
+### `fuzz_target_1` — core round-trip
+
+Feeds arbitrary bytes to `preflate_whole_deflate_stream()` as a raw DEFLATE stream,
+then attempts `recreate_whole_deflate_stream()` on the result. Verifies no crash or panic.
+
+### `fuzz_container` — container round-trip
+
+Feeds arbitrary bytes (minimum 1 byte) to `preflate_whole_into_container()`, then
+`recreate_whole_from_container()`, and asserts the output matches the original input.
+
+## Running Fuzz Tests
+
+```bash
+# Requires nightly and cargo-fuzz
+cargo +nightly fuzz run fuzz_target_1
+cargo +nightly fuzz run fuzz_container
+```
+
+## Notes
+
+- Edition 2021 (older than the main workspace crates which use 2024).
+- `libfuzzer-sys` (0.4) provides the fuzzing harness glue.
+- Corpus and artifacts are stored under `fuzz/corpus/` and `fuzz/artifacts/` (gitignored).
diff --git a/preflate/CLAUDE.md b/preflate/CLAUDE.md
new file mode 100644
index 0000000..e02dddd
--- /dev/null
+++ b/preflate/CLAUDE.md
@@ -0,0 +1,83 @@
+# preflate (core library)
+
+Core DEFLATE analysis and reconstruction. Analyzes a DEFLATE bitstream, extracts the
+uncompressed plaintext plus a compact set of reconstruction parameters, and later recreates
+the bit-exact original bitstream.
+
+## Public API (`lib.rs`)
+
+```rust
+// Compress: analyze a DEFLATE stream and produce plaintext + correction data
+PreflateStreamProcessor::new(config) -> Self
+PreflateStreamProcessor::decompress(input: &[u8]) -> Result<PreflateStreamChunkResult>
+
+// Recreate: given plaintext + correction data, reproduce the original DEFLATE stream
+RecreateStreamProcessor::new() -> Self
+RecreateStreamProcessor::recreate(chunk: PreflateStreamChunkResult) -> Result<Vec<u8>>
+
+// One-shot helpers
+preflate_whole_deflate_stream(input, config) -> Result<(Vec<u8>, Vec<u8>)>
+recreate_whole_deflate_stream(plaintext, correction_data) -> Result<Vec<u8>>
+```
+
+`PreflateConfig` controls `max_chain_length`, `plain_text_limit`, and `verify_compression`.
+
+## Processing Pipeline
+
+```
+DEFLATE bytes
+    └─ deflate/deflate_reader.rs  → tokens (literals + back-refs)
+    └─ estimator/                 → TokenPredictorParameters (hash algo, nice_len, max_chain…)
+    └─ token_predictor.rs         → predicted tokens (replaying the original compressor)
+    └─ tree_predictor.rs          → predicted Huffman trees
+    └─ cabac_codec.rs             → encode *differences* from prediction → correction bytes
+```
+
+Reconstruction runs the same pipeline in reverse.
+
+## Key Types
+
+| Type | Where | Purpose |
+|---|---|---|
+| `PlainText` | `preflate_input.rs` | Wraps uncompressed data |
+| `TokenPredictorParameters` | `token_predictor.rs` | Compressor fingerprint |
+| `HashAlgorithm` | `hash_algorithm.rs` | Zlib / Miniz / Libdeflate / zlib-ng / … |
+| `PreflateError` / `ExitCode` | `preflate_error.rs` | 29 error variants with context |
+| `DeflateToken` | `deflate/deflate_token.rs` | Literal or length/distance match |
+
+## Module Layout
+
+```
+src/
+  lib.rs                      ← public API, PreflateConfig
+  stream_processor.rs         ← PreflateStreamProcessor, RecreateStreamProcessor
+  deflate/
+    deflate_reader.rs         ← DEFLATE bitstream → tokens
+    deflate_writer.rs         ← tokens → DEFLATE bitstream
+    bit_reader.rs / bit_writer.rs
+    huffman_calc.rs / huffman_encoding.rs
+    deflate_token.rs / deflate_constants.rs
+  estimator/
+    preflate_parameter_estimator.rs  ← main estimator entry point
+    complevel_estimator.rs
+    depth_estimator.rs
+    add_policy_estimator.rs
+    preflate_parse_config.rs
+    preflate_stream_info.rs
+  token_predictor.rs
+  tree_predictor.rs
+  statistical_codec.rs
+  cabac_codec.rs
+  hash_algorithm.rs
+  hash_chain.rs / hash_chain_holder.rs
+  preflate_input.rs
+  preflate_error.rs
+  bit_helper.rs / utils.rs
+```
+
+## Constraints
+
+- `#![forbid(unsafe_code)]` — strictly enforced.
+- Serialization: parameters via `bitcode`; correction data via CABAC (`cabac` crate).
+- The format is chunked to bound peak memory use.
+- `#![deny(trivial_casts, non_ascii_idents)]` also set.
diff --git a/preflate/Cargo.toml b/preflate/Cargo.toml
index cf89cf9..6310898 100644
--- a/preflate/Cargo.toml
+++ b/preflate/Cargo.toml
@@ -1,17 +1,17 @@
 [package]
 name = "preflate-rs"
-version = "0.7.5"
-edition = "2024"
-authors = ["Kristof Roomp <kristofr@microsoft.com>"]
-license = "Apache-2.0"
-rust-version = "1.85"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+rust-version.workspace = true
+repository.workspace = true
 description = """
 Decompresses existing DEFLATE and PNG streams to allow for better with a more state-of-the-art compression
-(eg with ZStandard, Brotli) while allowing the exact original binary DEFLATE stream to be recreated 
+(eg with ZStandard, Brotli) while allowing the exact original binary DEFLATE stream to be recreated
 by detecting the parameters used during compression.
 """
 readme = "../README.md"
-repository = "https://github.com/microsoft/preflate-rs"
 categories = ["compression"]
 keywords = ["gzip", "deflate", "zlib", "zip", "png"]
 
diff --git a/preflate/src/stream_processor.rs b/preflate/src/stream_processor.rs
index 813abe6..717ca49 100644
--- a/preflate/src/stream_processor.rs
+++ b/preflate/src/stream_processor.rs
@@ -876,3 +876,115 @@ fn verify_partial_blocks() {
         );
     }
 }
+
+/// Replicates exactly what `scan_deflate::find_compressable_stream` does when it
+/// encounters a gzip stream in a truncated 200 KiB content buffer:
+///
+/// 1. strip the 10-byte gzip header (and 8-byte footer) to obtain the raw DEFLATE body
+/// 2. call `decompress` with only the first ~190 KB of that body (what fits in the
+///    200 KiB content window after the header)
+/// 3. assert the call returns `Ok` and `is_done() == false`  ← DeflateContinue requires this
+/// 4. call `decompress` again with the remainder of the body, assert `is_done() == true`
+/// 5. reconstruct the original DEFLATE bytes and assert roundtrip identity
+#[test]
+fn verify_decompress_partial_gzip_deflate_body_roundtrip() {
+    crate::init_logging();
+
+    // sample1.bin.gz: 263 972 bytes total; gzip header = 10 bytes, footer = 8 bytes
+    // → raw DEFLATE body = 263 954 bytes
+    let gzip = crate::utils::read_file("sample1.bin.gz");
+    assert!(gzip.len() > 18, "gzip file too short");
+
+    // Gzip header for this file has no extra flags, so it is exactly 10 bytes.
+    let deflate_start: usize = 10;
+    let deflate_end: usize = gzip.len() - 8;
+    let deflate_body = &gzip[deflate_start..deflate_end];
+
+    // Mimic what find_compressable_stream does: content buffer contains the first
+    // 200 000 bytes of the gzip file, and the DEFLATE body starts at offset 10,
+    // so the slice passed to decompress() is bytes [10..200000] = 199 990 bytes.
+    let content_window: usize = 200_000 - deflate_start; // 199 990
+    assert!(
+        content_window < deflate_body.len(),
+        "content window must truncate the DEFLATE body (window={content_window}, body={})",
+        deflate_body.len()
+    );
+
+    let mut state = PreflateStreamProcessor::new(&PreflateConfig::default());
+
+    // ── First call: truncated slice ──────────────────────────────────────────
+    let r1 = state.decompress(&deflate_body[..content_window]);
+    let r1 = match r1 {
+        Ok(r) => r,
+        Err(e) => panic!(
+            "decompress on truncated DEFLATE body ({content_window} B of {} B) returned \
+             Err({e:?}); expected Ok(partial) with is_done()=false",
+            deflate_body.len()
+        ),
+    };
+    assert!(
+        !state.is_done(),
+        "is_done() must be false after truncated first call; compressed_size={}",
+        r1.compressed_size
+    );
+    assert!(
+        r1.compressed_size > 0,
+        "at least one block must have been consumed"
+    );
+    assert!(
+        r1.compressed_size <= content_window,
+        "compressed_size ({}) must not exceed the slice length ({content_window})",
+        r1.compressed_size
+    );
+    println!(
+        "first call: compressed_size={} / {content_window}  blocks={}",
+        r1.compressed_size,
+        r1.blocks.len()
+    );
+
+    let first_corrections = r1.corrections.clone();
+    let first_plain_text = state.plain_text().text().to_vec();
+    state.shrink_to_dictionary();
+
+    // ── Second call: remainder ────────────────────────────────────────────────
+    let offset = r1.compressed_size;
+    let r2 = state.decompress(&deflate_body[offset..]);
+    let r2 = match r2 {
+        Ok(r) => r,
+        Err(e) => panic!(
+            "decompress on remainder ({} B) returned Err({e:?})",
+            deflate_body.len() - offset
+        ),
+    };
+    assert!(
+        state.is_done(),
+        "is_done() must be true after consuming the full body"
+    );
+    println!(
+        "second call: compressed_size={}  blocks={}",
+        r2.compressed_size,
+        r2.blocks.len()
+    );
+
+    let second_corrections = r2.corrections.clone();
+    let second_plain_text = state.plain_text().text().to_vec();
+
+    // ── Roundtrip: reconstruct the original bytes ─────────────────────────────
+    let mut reconstruct = RecreateStreamProcessor::new();
+    let (mut recompressed, _) = reconstruct
+        .recompress(
+            &mut std::io::Cursor::new(&first_plain_text),
+            &first_corrections,
+        )
+        .expect("recompress chunk 1 failed");
+
+    let (mut rest, _) = reconstruct
+        .recompress(
+            &mut std::io::Cursor::new(&second_plain_text),
+            &second_corrections,
+        )
+        .expect("recompress chunk 2 failed");
+
+    recompressed.append(&mut rest);
+    crate::utils::assert_eq_array(deflate_body, &recompressed);
+}
diff --git a/samples/test_big_then_small_gzip.bin b/samples/test_big_then_small_gzip.bin
new file mode 100644
index 0000000..c3618df
Binary files /dev/null and b/samples/test_big_then_small_gzip.bin differ
diff --git a/samples/test_corrupted_deflate.bin b/samples/test_corrupted_deflate.bin
new file mode 100644
index 0000000..8ae9d48
Binary files /dev/null and b/samples/test_corrupted_deflate.bin differ
diff --git a/samples/test_gzip_with_gap.bin b/samples/test_gzip_with_gap.bin
new file mode 100644
index 0000000..1cdd72c
Binary files /dev/null and b/samples/test_gzip_with_gap.bin differ
diff --git a/samples/test_random_bytes.bin b/samples/test_random_bytes.bin
new file mode 100644
index 0000000..f1ab9b1
Binary files /dev/null and b/samples/test_random_bytes.bin differ
diff --git a/samples/test_tiny_gzip.bin b/samples/test_tiny_gzip.bin
new file mode 100644
index 0000000..d3f8415
Binary files /dev/null and b/samples/test_tiny_gzip.bin differ
diff --git a/samples/test_two_gzip_streams.bin b/samples/test_two_gzip_streams.bin
new file mode 100644
index 0000000..aafddc4
Binary files /dev/null and b/samples/test_two_gzip_streams.bin differ
diff --git a/samples/test_two_zlib_streams.bin b/samples/test_two_zlib_streams.bin
new file mode 100644
index 0000000..ffd8032
Binary files /dev/null and b/samples/test_two_zlib_streams.bin differ
diff --git a/samples/test_zip_3entries.zip b/samples/test_zip_3entries.zip
new file mode 100644
index 0000000..e5965a2
Binary files /dev/null and b/samples/test_zip_3entries.zip differ
diff --git a/samples/test_zip_stored_then_deflated.zip b/samples/test_zip_stored_then_deflated.zip
new file mode 100644
index 0000000..bfc6f74
Binary files /dev/null and b/samples/test_zip_stored_then_deflated.zip differ
diff --git a/tests/CLAUDE.md b/tests/CLAUDE.md
new file mode 100644
index 0000000..e105e7f
--- /dev/null
+++ b/tests/CLAUDE.md
@@ -0,0 +1,37 @@
+# tests (integration tests)
+
+End-to-end round-trip tests against real sample files in `samples/`.
+
+## What Is Tested
+
+- **Core round-trip**: decompress a `.deflate` file with `preflate_whole_deflate_stream`,
+  recompress with `recreate_whole_deflate_stream`, assert bitwise identical output.
+- **Container round-trip**: compress a ZIP / PNG / DOCX / PDF through
+  `PreflateContainerProcessor`, decompress with `RecreateContainerProcessor`,
+  assert the output matches the original file byte-for-byte.
+
+## Sample Files (`samples/`)
+
+The `samples/` directory contains real-world compressed files used as test fixtures:
+deflate streams, zlib streams, PNGs, ZIPs, PDFs, DOCX, JPEG, WebP, and binary blobs
+from various compressors (zlib, zlib-ng, libdeflate, miniz, Windows zlib).
+
+These files are checked into the repository. Do not remove or alter them without
+updating the corresponding tests.
+
+## Running
+
+```bash
+cargo test --all                    # all integration tests
+cargo test --package preflate-rs    # core tests only
+cargo test --package preflate-container  # container tests only
+cargo test <test_name>              # single test by name
+cargo test -- --nocapture           # show println! output
+```
+
+## Notes
+
+- Tests live in `tests/end_to_end.rs` at the workspace root.
+- Some tests use `libdeflate-sys` to generate reference compressions on the fly.
+- Test failures often mean a regression in the estimator or token predictor; check
+  those modules first.
diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs
index a98010b..0458039 100644
--- a/tests/end_to_end.rs
+++ b/tests/end_to_end.rs
@@ -10,7 +10,9 @@ use std::path::Path;
 use std::{mem, ptr};
 
 use libdeflate_sys::{libdeflate_alloc_compressor, libdeflate_deflate_compress};
-use preflate_container::{zstd_preflate_whole_deflate_stream, zstd_recreate_whole_deflate_stream};
+use preflate_container::{
+    PreflateContainerConfig, PreflateContainerProcessor, ProcessBuffer, RecreateContainerProcessor,
+};
 use preflate_rs::{PreflateConfig, preflate_whole_deflate_stream, recreate_whole_deflate_stream};
 
 #[cfg(test)]
@@ -77,18 +79,15 @@ fn test_container(filename: &str) {
     let v = read_file(filename);
 
     let mut c = Vec::new();
-
-    let stats = zstd_preflate_whole_deflate_stream(
-        &preflate_container::PreflateContainerConfig::default(),
-        &mut std::io::Cursor::new(&v),
-        &mut c,
-        4, // use lower level to save CPU on testing
-    )
-    .unwrap();
+    let mut ctx = PreflateContainerProcessor::new(&PreflateContainerConfig::default(), 4, false);
+    ctx.copy_to_end(&mut std::io::Cursor::new(&v), &mut c)
+        .unwrap();
+    let stats = ctx.stats();
 
     let mut r = Vec::new();
-
-    zstd_recreate_whole_deflate_stream(&mut std::io::Cursor::new(&c), &mut r).unwrap();
+    let mut ctx = RecreateContainerProcessor::new(128 * 1024 * 1024);
+    ctx.copy_to_end(&mut std::io::Cursor::new(&c), &mut r)
+        .unwrap();
     assert!(v == r);
 
     println!(
diff --git a/util/CLAUDE.md b/util/CLAUDE.md
new file mode 100644
index 0000000..bfa5ba6
--- /dev/null
+++ b/util/CLAUDE.md
@@ -0,0 +1,38 @@
+# util (preflate_util CLI)
+
+CLI tool for manually testing preflate compression on files and directories.
+
+## Usage
+
+```
+preflate_util [OPTIONS] <PATH>
+
+Options:
+  --max-chain <N>    Hash chain depth limit (default: 4096)
+  -c, --level <N>    Zstd compression level 0-14 (default: 9)
+  --loglevel <L>     Log level (default: Error)
+  --verify <bool>    Round-trip verify after compress (default: true)
+  --baseline <bool>  Measure baseline Zstd-only size (default: false)
+```
+
+`<PATH>` may be a single file or a directory (scanned recursively).
+
+## What It Does
+
+1. For each file, calls `PreflateContainerProcessor` to compress.
+2. Optionally calls `RecreateContainerProcessor` to decompress and byte-compares the result.
+3. Prints per-file and aggregate statistics: compressed size, baseline size, CPU time.
+
+## Source
+
+Single file: `src/main.rs` (~193 lines).
+
+Helper `assert_eq_array<T>()` provides detailed positional diff output for debugging
+mismatches during verification.
+
+## Dependencies
+
+- `clap` (4, derive) — argument parsing
+- `cpu-time` (1) — CPU time measurement
+- `preflate-rs` and `preflate-container` — core logic
+- `env_logger` / `log` — logging
diff --git a/util/Cargo.toml b/util/Cargo.toml
index 5c5b026..a8f4a4a 100644
--- a/util/Cargo.toml
+++ b/util/Cargo.toml
@@ -1,9 +1,10 @@
 [package]
 name = "preflate_util"
-edition = "2024"
-authors = ["Kristof Roomp <kristofr@microsoft.com>"]
-license = "Apache-2.0"
-rust-version = "1.85"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+rust-version.workspace = true
 
 [dependencies]
 preflate-rs = { path = "../preflate" }
diff --git a/util/src/main.rs b/util/src/main.rs
index 244daeb..5d8cca4 100644
--- a/util/src/main.rs
+++ b/util/src/main.rs
@@ -11,7 +11,6 @@ use std::{
 
 use preflate_container::{
     PreflateContainerConfig, PreflateContainerProcessor, ProcessBuffer, RecreateContainerProcessor,
-    ZstdCompressContext, ZstdDecompressContext,
 };
 
 #[derive(Parser)]
@@ -99,11 +98,7 @@ fn main() {
         // open file for reading
         let original = fs::read(&entry).unwrap();
 
-        let mut ctx = ZstdCompressContext::new(
-            PreflateContainerProcessor::new(&config),
-            cli.level as i32,
-            cli.baseline,
-        );
+        let mut ctx = PreflateContainerProcessor::new(&config, cli.level as i32, cli.baseline);
 
         let compress_start = ProcessTime::now();
 
@@ -140,9 +135,7 @@ fn main() {
             let start = ProcessTime::now();
 
             let mut recreated = Vec::new();
-            let mut decomp = ZstdDecompressContext::new(RecreateContainerProcessor::new(
-                config.chunk_plain_text_limit,
-            ));
+            let mut decomp = RecreateContainerProcessor::new(config.chunk_plain_text_limit);
 
             if let Err(e) = decomp.copy_to_end_size(
                 &mut Cursor::new(&preflate_compressed),