From fb6604f8353d67e49d46ea63247d732d9c56350e Mon Sep 17 00:00:00 2001
From: mcrumpface <kristofr@gmail.com>
Date: Fri, 13 Mar 2026 17:14:21 +0100
Subject: [PATCH 1/2] update rust to 90

---
 azure-pipelines.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index f7428b6..d7b52ed 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -42,7 +42,7 @@ extends:
               workflow: Rust
               rust:
                 rustToolchain:
-                  version: ms-prod-1.88
+                  version: ms-prod-1.90
                   toolchainFeed: $(toolchainFeed)
                   cratesIoFeed: $(cratesIoFeed)
                 target: x86_64-pc-windows-msvc

From 2fb711af0440f5737043ff369c88090aa26ec49d Mon Sep 17 00:00:00 2001
From: mcrumpface <kristofr@gmail.com>
Date: Fri, 13 Mar 2026 18:30:46 +0100
Subject: [PATCH 2/2] update readme

---
 .github/workflows/publish.yml |  70 +++++++++++++++
 CLAUDE.md                     |   2 +-
 Cargo.toml                    |   2 +-
 README.md                     | 159 ++++++++++++++++++++++------------
 azure-pipelines.yml           |   9 +-
 container/README.md           | 158 +++++++++++++++++++++++++++++++++
 dll/README.md                 |  92 ++++++++++++++++++++
 fuzz/README.md                |  52 +++++++++++
 preflate/README.md            | 118 +++++++++++++++++++++++++
 util/README.md                |  54 ++++++++++++
 10 files changed, 656 insertions(+), 60 deletions(-)
 create mode 100644 .github/workflows/publish.yml
 create mode 100644 container/README.md
 create mode 100644 dll/README.md
 create mode 100644 fuzz/README.md
 create mode 100644 preflate/README.md
 create mode 100644 util/README.md

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..5e66fcd
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,70 @@
+name: Publish Crate
+
+permissions:
+  contents: read
+
+on:
+  push:
+    tags:
+      - "v*.*.*"  # Triggers only for version tag pushes
+
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code with full history
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0  # Needed to compare commits and access tag history
+
+    - name: Ensure tag is at tip of main
+      id: verify_tag_commit
+      run: |
+        echo "Verifying tag points to main branch tip..."
+        git fetch origin main
+
+        TAG_COMMIT=$(git rev-parse ${{ github.ref }})
+        MAIN_COMMIT=$(git rev-parse origin/main)
+
+        echo "Tag commit:  $TAG_COMMIT"
+        echo "Main commit: $MAIN_COMMIT"
+
+        if [ "$TAG_COMMIT" != "$MAIN_COMMIT" ]; then
+          echo "Tag is not at tip of main. Aborting."
+          exit 1
+        fi
+        echo "Tag is at tip of main."
+
+    - name: Extract tag version
+      id: tag_version
+      run: |
+        echo "TAG_VERSION=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
+
+    - name: Read version from Cargo.toml
+      id: cargo_version
+      run: |
+        CARGO_VERSION=$(grep '^version\s*=' Cargo.toml | head -1 | sed -E 's/version\s*=\s*"([^"]+)"/\1/')
+        echo "CARGO_VERSION=$CARGO_VERSION" >> "$GITHUB_OUTPUT"
+
+    - name: Check tag version matches Cargo.toml
+      run: |
+        echo "Comparing tag and Cargo.toml versions..."
+        echo "Tag:          ${{ steps.tag_version.outputs.TAG_VERSION }}"
+        echo "Cargo.toml:   ${{ steps.cargo_version.outputs.CARGO_VERSION }}"
+
+        if [ "${{ steps.tag_version.outputs.TAG_VERSION }}" != "${{ steps.cargo_version.outputs.CARGO_VERSION }}" ]; then
+          echo "Version mismatch: tag does not match Cargo.toml"
+          exit 1
+        fi
+        echo "Tag version matches Cargo.toml."
+
+    - name: Set up Rust
+      uses: dtolnay/rust-toolchain@stable
+      with:
+        toolchain: stable
+
+    - name: Publish preflate-rs to crates.io
+      env:
+        CARGO_REGISTRY_TOKEN: ${{ secrets.CRATE_PUBLISH }}
+      run: cargo publish --verbose --package preflate-rs
diff --git a/CLAUDE.md b/CLAUDE.md
index 87e889d..e0b7c87 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -69,5 +69,5 @@ The optional `webp` feature (enabled by default) allows PNG images to be stored
 ### Code constraints
 
 - **No unsafe code** — enforced via `#![forbid(unsafe_code)]` in each crate.
-- Minimum Rust version: **1.85**, Edition **2024**.
+- Minimum Rust version: **1.89**, Edition **2024**.
 - `.cargo/config.toml` sets Windows MSVC linker flags (`/DYNAMICBASE`, `/CETCOMPAT`, `/guard:cf`).
diff --git a/Cargo.toml b/Cargo.toml
index eaaf6a0..de7d05e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,7 @@ version = "0.7.6"
 edition = "2024"
 authors = ["Kristof Roomp <kristofr@microsoft.com>"]
 license = "Apache-2.0"
-rust-version = "1.85"
+rust-version = "1.89"
 repository = "https://github.com/microsoft/preflate-rs"
 
 [dev-dependencies]
diff --git a/README.md b/README.md
index bd0b6ba..db5ac15 100644
--- a/README.md
+++ b/README.md
@@ -1,93 +1,140 @@
 # preflate-rs
-Preflate-rs is a library initally based on a port of the C++ [preflate library](https://github.com/deus-libri/preflate/) with the purpose of splitting deflate streams into uncompressed data and reconstruction information, or reconstruct the original deflate stream from those two.
 
-Other similar libraries include precomp, reflate, grittibanzli, although this libary is probably the most feature rich and supports a lower overhead from more libraries.
+**preflate-rs** is a Rust library for lossless re-compression of DEFLATE-compressed data. It analyzes an existing DEFLATE bitstream, extracts the uncompressed plaintext along with a compact set of reconstruction parameters, and later recreates the **bit-exact** original DEFLATE stream from those two pieces. This makes it possible to re-compress the plaintext with a more modern algorithm (Zstd, Brotli, LZMA) while preserving perfect binary round-trip fidelity.
 
-IMPORTANT: This library is still in initial development, so there are probably breaking changes being done fairly frequently.
+The library is used in production cloud storage systems where content must be stored with bit-exact fidelity while still benefiting from better compression ratios.
 
-The resulting uncompressed content can then be recompressed by a more modern compression technique such as Zstd, lzma, etc. This library is designed to be used as part of a cloud
-storage system that requires exact binary storage of content, so the libary needs to make
-sure that the DEFLATE content is recreated exactly as it was written. This is not trivial, since
-DEFLATE has a large degree of freedom in choosing both how the distance/length pairs are chose
-and how the Huffman trees are created.
+[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
 
-The library tries to detect the following compressors to try to do a reasonable job:
-- [Zlib](https://github.com/madler/zlib): Zlib is more or less perfectly compressed.
-- [MiniZ](https://github.com/richgel999/miniz): The fastest mode uses a different hash function.
-- [Libdeflate](https://github.com/ebiggers/libdeflate): This library uses 4 byte hash-tables, which we try to detect.
-- [Libzng](https://github.com/zlib-ng/zlib-ng): Works well except level 9
-- Windows zlib implementation (used by the built-in PNG codec and shell ZIP compression) 
+---
 
-The general approach is as follows:
-1. Decompress stream into plaintext and a list of blocks containing tokens that are either literals (bytes) or distance, length pairs.
-2. Estimate the dictionary update strategy by looking at which strings are referenced by the compressed data. For example, zlib will only add the beginning of each compressed token for low compression levels.
-3. Estimate the maximum number times we execute the loop to look for matches (also called chains, as in walking the chain of the hash table). We also test with different hash functions to figure out which hash funciton was likely used. Given the chain length, we estimate the other parameters that were likely used.
-4. Rerun compression using the zlib algorithm using the parameters gathered above. A difference encoder is used to record each instance where the token predicted by our implementation of DEFLATE differs from what we found in the file. 
+## Why preflate-rs?
 
-The following differences are corrected:
-- Type of block (uncompressed, static huffman, dynamic huffman)
-- Number of tokens in block (normally 16385)
-- Dynamic huffman encoding (estimated using the zlib algorithm, but there are multiple ways to construct more or less optimal length limited Huffman codes)
-- Literal vs (distance, length) pair (corrected by a single bit)
-- Length or distance is incorrect (corrected by encoding the number of hops backwards until the correct one)
+DEFLATE streams are not uniquely determined by their plaintext. The same input can compress to many different valid bitstreams depending on the compressor, its version, and the parameters used. Simply decompressing and recompressing will produce a *different* bitstream — which is a problem for systems that need to verify or reproduce file hashes exactly.
 
-Note that the data formats of the recompression information are different and incompatible to the original preflate implementation, as this library uses a different arithmetic encoder (shared from the Lepton JPEG compression library).
+preflate-rs solves this by treating the original DEFLATE stream as the ground truth and recording only the *differences* from what a reference model would predict. Since well-tuned compressors are highly predictable, these corrections are very small — typically well under 1% of the uncompressed data size.
 
-[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
+---
+
+## How It Works
+
+### Analysis (compress direction)
+
+1. **Parse** — The DEFLATE bitstream is decoded into a sequence of tokens: literals (raw bytes) and length/distance back-references.
+2. **Estimate** — The token sequence is analyzed to fingerprint the original compressor: hash algorithm, chain depth, nice-length cutoff, window size, and block-splitting strategy.
+3. **Predict** — Compression is re-run using the estimated parameters. For each token, the model predicts what the original compressor would have chosen.
+4. **Encode differences** — Wherever the prediction differs from the actual token, a correction is recorded using CABAC (Context Adaptive Binary Arithmetic Coding, the same codec used in Lepton JPEG compression).
+
+The result is the uncompressed plaintext plus a small corrections blob. Both can be stored or re-compressed with any modern algorithm.
+
+### Reconstruction (decompress direction)
 
-### Overhead
+The plaintext and corrections are fed back into the predictor, which replays the original compression decisions step by step to recreate the exact original DEFLATE bitstream.
 
-In order to faithfully recreate the exact deflate stream, the library stores
-a stream of corrections to its predictive model. Depending on how good the
-predictive model is, the corrections can take up more or less space. If you
-want to improve the library, it's probably worth targetting the lower compression
-levels that currently have significant overhead.
+---
 
-The amount of overhead vs uncompressed data is approximately the following,
-depending on the compression level. If you want to benefit from using this
-library, whatever better compression algorithm you use needs to be at least
-that much better to make it worthwhile to recompress. 
+## Supported Compressors
 
-| Library            | 0      | 1      | 2      | 3      | 4      | 5      | 6      | 7      | 8      | 9     |
-|--------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
+The library detects and models the following DEFLATE implementations:
+
+| Compressor | Notes |
+|---|---|
+| [zlib](https://github.com/madler/zlib) | All levels; near-zero overhead |
+| [zlib-ng](https://github.com/zlib-ng/zlib-ng) | All levels except level 9 |
+| [libdeflate](https://github.com/ebiggers/libdeflate) | 4-byte hash table variant detected |
+| [miniz / miniz_oxide](https://github.com/richgel999/miniz) | Fastest mode uses distinct hash function |
+| Windows zlib | Built-in PNG codec and shell ZIP compression |
+
+Unrecognized compressors still round-trip correctly — the corrections overhead is simply higher.
+
+---
+
+## Reconstruction Overhead
+
+The table below shows overhead (corrections size as a percentage of uncompressed data) for each supported compressor at each compression level. To benefit from re-compression, your target algorithm needs to beat the original by at least this margin.
+
+| Compressor         | 0      | 1      | 2      | 3      | 4      | 5      | 6      | 7      | 8      | 9     |
+|--------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|-------|
 | **zlib**           | 0.01%  | 0.01%  | 0.01%  | 0.01%  | 0.01%  | 0.01%  | 0.01%  | 0.08%  | 0.03%  | 0.01%  |
-| **libngz**      | 0.01%  | 0.01%  | 0.01%  | 0.01%  | 0.97%  | 1.07%  | 0.90%  | 0.01%  | 0.01%  | NoCompressionCandidates |
+| **zlib-ng**        | 0.01%  | 0.01%  | 0.01%  | 0.01%  | 0.97%  | 1.07%  | 0.90%  | 0.01%  | 0.01%  | N/A    |
 | **libdeflate**     | 0.01%  | 0.25%  | 1.04%  | 0.91%  | 1.51%  | 1.04%  | 0.96%  | 0.87%  | 1.04%  | 1.03%  |
 | **miniz_oxide**    | 0.01%  | 0.06%  | 2.70%  | 1.78%  | 0.53%  | 0.30%  | 0.09%  | 0.06%  | 0.08%  | 0.07%  |
 
-## How to Use This Library
+---
+
+## Workspace Layout
 
-#### Building From Source
+| Crate | Output | Description |
+|---|---|---|
+| [`preflate/`](preflate/) | library | Core DEFLATE analysis and reconstruction engine |
+| [`container/`](container/) | library | Scans binary files (ZIP, PNG, JPEG) for DEFLATE streams and orchestrates the Zstd pipeline |
+| [`util/`](util/) | `preflate_util.exe` | CLI tool for testing and benchmarking |
+| [`dll/`](dll/) | `preflate_rs_0_7.dll` | C FFI wrapper for .NET interop |
+| [`fuzz/`](fuzz/) | fuzz harnesses | libfuzzer targets for the core and container APIs |
 
-- [Rust 1.70 or Above](https://www.rust-lang.org/tools/install)
+---
 
-```Shell
+## Getting Started
+
+### Requirements
+
+- [Rust 1.89 or above](https://www.rust-lang.org/tools/install)
+
+### Build from Source
+
+```shell
 git clone https://github.com/microsoft/preflate-rs
 cd preflate-rs
-cargo build
-cargo test
-cargo build --release
+cargo build --all
+cargo test --all
+cargo build --release --all
 ```
 
-#### Running
+### Using the CLI
+
+The `preflate_util` binary lets you test the library against any file or directory of files:
+
+```shell
+preflate_util [OPTIONS] <PATH>
+
+Options:
+  --max-chain <N>    Hash chain depth limit (default: 4096)
+  -c, --level <N>    Zstd compression level 0–14 (default: 9)
+  --loglevel <L>     Log verbosity (default: Error)
+  --verify <bool>    Round-trip verify after compression (default: true)
+  --baseline <bool>  Also measure raw Zstd-only size for comparison (default: false)
+```
+
+### Library Usage
+
+For direct use of the core DEFLATE analysis API, see the [`preflate` crate](preflate/). For processing full binary files containing embedded DEFLATE streams (ZIP, PNG, JPEG), see the [`container` crate](container/).
+
+---
+
+## Design Notes
+
+- **No unsafe code** — `#![forbid(unsafe_code)]` is enforced in every crate.
+- **Chunked processing** — memory use is bounded regardless of input size.
+- **Format versioning** — the DLL name encodes the format version (`preflate_rs_0_7.dll`) so old decoders can coexist with new ones during upgrades.
+- **CABAC coding** — the corrections codec is shared with the [Lepton](https://github.com/microsoft/lepton_jpeg_rust) JPEG re-compression library.
+- Parameters are serialized via [`bitcode`](https://crates.io/crates/bitcode); corrections via CABAC.
 
-There is an `preflate_util.exe` wrapper that is built as part of the project that can be used to
-test out the library against Deflate compressed content. 
+---
 
 ## Contributing
 
-There are many ways in which you can participate in this project, for example:
+* [Submit bugs and feature requests](https://github.com/microsoft/preflate-rs/issues)
+* [Review or submit pull requests](https://github.com/microsoft/preflate-rs/pulls)
+* The library uses only **stable Rust features**.
 
-* [Submit bugs and feature requests](https://github.com/microsoft/preflate-rs/issues), and help us verify as they are checked in
-* Review [source code changes](https://github.com/microsoft/preflate-rs/pulls) or submit your own features as pull requests.
-* The library uses only **stable features**. 
+---
 
 ## Code of Conduct
 
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
+This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). See the [FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions.
 
 ## License
 
 Copyright (c) Microsoft Corporation. All rights reserved.
 
-Licensed under the Apache 2.0 license.
\ No newline at end of file
+Licensed under the [Apache 2.0](LICENSE) license.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index d7b52ed..22426c2 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -1,5 +1,10 @@
 trigger:
-- main
+  branches:
+    include:
+    - main
+  tags:
+    include:
+    - v*.*.*
 
 resources:
   repositories:
@@ -147,7 +152,7 @@ extends:
 
                 - task: 1ES.PublishNuGet@1
                   displayName: 'NuGet push'
-                  condition: and(succeeded(), in(variables['Build.Reason'], 'Manual'), eq(variables['Build.SourceBranch'], 'refs/heads/main'))
+                  condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/tags/v'))
                   inputs:
                     packageParentPath: '$(Pipeline.Workspace)'
                     packagesToPush: '$(Build.ArtifactStagingDirectory)\*.nupkg'
diff --git a/container/README.md b/container/README.md
new file mode 100644
index 0000000..687135a
--- /dev/null
+++ b/container/README.md
@@ -0,0 +1,158 @@
+# preflate-container
+
+File-level scanning and re-compression pipeline for the preflate-rs workspace.
+
+This crate handles real-world binary files that contain embedded DEFLATE streams — ZIP archives, PNG images, JPEG files, and arbitrary binary blobs. It scans input bytes for DEFLATE stream boundaries, passes each stream through the [`preflate`](../preflate/) core for analysis, and packages everything into a Zstd-compressed container format that can be fully reconstructed to the original byte-for-byte input.
+
+[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
+
+## Features
+
+- Detects and processes raw DEFLATE, zlib-wrapped, PNG IDAT, ZIP, and JPEG streams
+- Optionally re-encodes PNG images as lossless WebP for additional savings
+- JPEG files are re-compressed using [Lepton](https://github.com/microsoft/lepton_jpeg_rust)
+- Non-DEFLATE bytes are passed through as literal blocks
+- Streaming API with bounded memory use regardless of input size
+- Single persistent Zstd encoder for cross-block compression quality
+
+## API
+
+### Whole-file (simple)
+
+```rust
+use preflate_container::{
+    preflate_whole_into_container, recreate_whole_from_container, PreflateContainerConfig,
+};
+
+let config = PreflateContainerConfig::default();
+
+// Compress: scan file → extract DEFLATE streams → write container
+preflate_whole_into_container(&config, &mut input_reader, &mut output_writer)?;
+
+// Decompress: read container → recreate DEFLATE streams → original file
+recreate_whole_from_container(&mut container_reader, &mut output_writer)?;
+```
+
+### Streaming
+
+Both processors implement the `ProcessBuffer` trait for incremental use:
+
+```rust
+use preflate_container::{PreflateContainerProcessor, RecreateContainerProcessor, ProcessBuffer};
+
+// Compression
+let mut compressor = PreflateContainerProcessor::new(&config);
+compressor.process_buffer(&input_chunk, input_complete, &mut output)?;
+let stats = compressor.stats();
+
+// Decompression
+let mut decompressor = RecreateContainerProcessor::new();
+decompressor.process_buffer(&container_chunk, input_complete, &mut output)?;
+```
+
+### DLL / fixed-size output buffer
+
+For C FFI use where the output buffer size is fixed, `process_limited_buffer` handles overflow internally:
+
+```rust
+use preflate_container::process_limited_buffer;
+use std::collections::VecDeque;
+
+let mut overflow = VecDeque::new();
+let (done, written) = process_limited_buffer(
+    &mut compressor,
+    &input,
+    input_complete,
+    &mut output_buffer,
+    &mut overflow,
+)?;
+```
+
+## Configuration
+
+```rust
+pub struct PreflateContainerConfig {
+    /// Minimum input buffer size before scanning begins. Default: 1 MB.
+    pub min_chunk_size: usize,
+
+    /// Maximum DEFLATE stream size to process. Larger streams are passed through
+    /// as literals. Default: 64 MB.
+    pub max_chunk_size: usize,
+
+    /// Global cap on total plaintext held in memory. Default: 512 MB.
+    pub total_plain_text_limit: u64,
+
+    /// Per-chunk plaintext memory limit. Default: 128 MB.
+    pub chunk_plain_text_limit: usize,
+
+    /// Verify round-trip correctness after each stream. Default: true.
+    pub validate_compression: bool,
+
+    /// Hash chain traversal limit passed to the core engine. Default: 4096.
+    pub max_chain_length: u32,
+}
+```
+
+## Statistics
+
+After compression, `stats()` returns a `PreflateStats` struct:
+
+```rust
+pub struct PreflateStats {
+    pub deflate_compressed_size: u64,  // Original DEFLATE bytes
+    pub zstd_compressed_size: u64,     // Final container size
+    pub uncompressed_size: u64,        // Total plaintext bytes
+    pub overhead_bytes: u64,           // Corrections blob size
+    pub hash_algorithm: HashAlgorithm, // Detected compressor family
+    pub zstd_baseline_size: u64,       // Raw Zstd-only size (if measured)
+}
+```
+
+## Container Format
+
+The output is a self-describing binary format (version 2). Framing bytes are written raw; block payloads go through a single persistent Zstd encoder:
+
+```
+[0x02]                           ← format version byte
+
+For each block:
+  [type_byte]                    ← block kind + compression flag
+  [varint: content_length]
+  [content_bytes]                ← Zstd-compressed or raw, depending on type
+```
+
+Block types:
+
+| Block | Description |
+|---|---|
+| Literal | Raw input bytes that contain no DEFLATE stream |
+| Deflate | A DEFLATE stream: corrections + plaintext (Zstd) |
+| PNG | PNG IDAT stream with chunk metadata (Zstd, or WebP if enabled) |
+| Deflate-continue | Continuation of a previous DEFLATE stream |
+| JPEG/Lepton | Lepton-recompressed JPEG (raw, bypasses Zstd) |
+| WebP | PNG stored as lossless WebP (raw, bypasses Zstd) |
+
+The Zstd encoder is flushed after each block payload so each block is independently decodable, while the persistent encoder context maintains cross-block history for better compression.
+
+## Stream Detection
+
+The scanner (`scan_deflate.rs`) identifies DEFLATE stream boundaries by looking for:
+
+- **zlib headers** — CMF/FLG byte pair validation
+- **gzip markers** — `\x1f\x8b` magic
+- **PNG signatures** — IHDR/IDAT chunk structure
+- **ZIP local file headers** — `PK\x03\x04` magic
+- **JPEG markers** — SOI/APP0 structure
+
+Non-DEFLATE regions between streams are emitted as literal blocks.
+
+## Feature Flags
+
+| Feature | Default | Description |
+|---|---|---|
+| `webp` | enabled | Re-encode PNG images as lossless WebP |
+
+## Constraints
+
+- **No unsafe code** — `#![forbid(unsafe_code)]`
+- Minimum Rust version: **1.89**, Edition **2024**
diff --git a/dll/README.md b/dll/README.md
new file mode 100644
index 0000000..db0bb18
--- /dev/null
+++ b/dll/README.md
@@ -0,0 +1,92 @@
+# preflate_rs_0_7
+
+C-compatible DLL exposing the preflate-rs container pipeline for .NET interop.
+
+The DLL name encodes the container format version (`0_7`). This allows multiple format versions to coexist side-by-side on the same machine — old decoders remain functional while new encoders produce newer formats.
+
+## Exported API
+
+The DLL exports a streaming compress/decompress API as `extern "C"` functions.
+
+### Compression
+
+```c
+// Create a compression context.
+// flags bits 0-4: Zstd compression level (0-14)
+// flags bit  5:   measure baseline Zstd-only size (for stats)
+// flags bit  6:   skip round-trip verification
+void* create_compression_context(uint32_t flags);
+void  free_compression_context(void* context);
+
+// Feed input data and receive compressed output.
+// Call repeatedly until done == true.
+// Returns: 1 = done, 0 = more data needed/available, <0 = error code.
+int32_t compress_buffer(
+    void*          context,
+    const uint8_t* input,            size_t input_size,
+    bool           input_complete,
+    uint8_t*       output,           size_t output_size,
+    size_t*        result_size,
+    char*          error_string,     size_t error_string_buffer_len
+);
+
+// Read compression statistics after processing completes.
+void get_compression_stats(
+    void*     context,
+    uint64_t* deflate_compressed_size,
+    uint64_t* zstd_compressed_size,
+    uint64_t* zstd_baseline_size,
+    uint64_t* uncompressed_size,
+    uint64_t* overhead_bytes,
+    uint32_t* hash_algorithm
+);
+```
+
+### Decompression
+
+```c
+// Create a decompression context.
+// capacity: initial output buffer hint (0 = use default).
+void* create_decompression_context(uint32_t flags, size_t capacity);
+void  free_decompression_context(void* context);
+
+// Feed compressed input and receive reconstructed output.
+// Call repeatedly until done == true.
+// Returns: 1 = done, 0 = more data needed/available, <0 = error code.
+int32_t decompress_buffer(
+    void*          context,
+    const uint8_t* input,            size_t input_size,
+    bool           input_complete,
+    uint8_t*       output,           size_t output_size,
+    size_t*        result_size,
+    char*          error_string,     size_t error_string_buffer_len
+);
+```
+
+## Streaming Pattern
+
+Both compress and decompress follow the same loop:
+
+1. Call `compress_buffer` / `decompress_buffer` with a chunk of input.
+2. If `result_size > 0`, consume the output bytes.
+3. If return value is `0`, call again — either with more input or with the same position if the output buffer was too small.
+4. When all input has been fed, set `input_complete = true`.
+5. Continue calling until return value is `1` (done).
+
+The DLL buffers any output that does not fit in the provided output buffer internally, so the output buffer can be any convenient size.
+
+## Safety
+
+- Magic numbers (`0x4K3CFF2E` for compression, `0x053D2AB1` for decompression contexts) are validated on every call to catch dangling or mismatched pointers.
+- All entry points use `catch_unwind` to prevent Rust panics from crossing the FFI boundary.
+- Error messages are copied as UTF-8 into the caller-provided buffer with null termination.
+- All logic outside the FFI boundary layer is safe Rust (`#![forbid(unsafe_code)]` on all dependencies).
+
+## Building
+
+```shell
+cargo build --release -p preflate_rs_dll
+# Output: target/release/preflate_rs_0_7.dll (Windows)
+```
+
+The release build applies Spectre mitigations (`/Qspectre /sdl`) and Control Flow Guard (`/guard:cf`).
diff --git a/fuzz/README.md b/fuzz/README.md
new file mode 100644
index 0000000..e36df28
--- /dev/null
+++ b/fuzz/README.md
@@ -0,0 +1,52 @@
+# preflate-rs-fuzz
+
+libfuzzer harnesses for the preflate-rs workspace.
+
+These targets feed arbitrary bytes into the core and container APIs to find crashes, panics, or round-trip failures. Fuzzing requires nightly Rust.
+
+## Targets
+
+### `fuzz_target_1` — core round-trip
+
+Feeds arbitrary bytes to `preflate_whole_deflate_stream` as a raw DEFLATE stream. If analysis succeeds, the result is immediately fed to `recreate_whole_deflate_stream` and the output must match the input exactly.
+
+```rust
+fuzz_target!(|data: &[u8]| {
+    if let Ok((result, plain_text)) = preflate_whole_deflate_stream(data, &config) {
+        recreate_whole_deflate_stream(plain_text.text(), &result.corrections).unwrap();
+    }
+});
+```
+
+### `fuzz_container` — container round-trip
+
+Feeds arbitrary bytes through the full container pipeline. If compression succeeds, the compressed output is decompressed and compared byte-for-byte with the original input.
+
+```rust
+fuzz_target!(|data: &[u8]| {
+    if let Ok(_) = preflate_whole_into_container(&config, &mut Cursor::new(data), &mut output) {
+        recreate_whole_from_container(&mut Cursor::new(&output), &mut original).unwrap();
+        assert_eq!(data, &original[..]);
+    }
+});
+```
+
+## Running
+
+```shell
+# Run the core round-trip fuzzer
+cargo +nightly fuzz run fuzz_target_1
+
+# Run the container round-trip fuzzer
+cargo +nightly fuzz run fuzz_container
+
+# Run with a specific corpus directory
+cargo +nightly fuzz run fuzz_container fuzz/corpus/fuzz_container/
+
+# Limit to N iterations
+cargo +nightly fuzz run fuzz_target_1 -- -runs=1000000
+```
+
+## Corpus and Artifacts
+
+Crash inputs and corpus entries are stored under `fuzz/artifacts/` and `fuzz/corpus/` respectively (both gitignored). To seed the corpus with real files, copy them into the appropriate corpus directory before running.
diff --git a/preflate/README.md b/preflate/README.md
new file mode 100644
index 0000000..9bcba4a
--- /dev/null
+++ b/preflate/README.md
@@ -0,0 +1,118 @@
+# preflate
+
+Core DEFLATE analysis and reconstruction engine for the preflate-rs workspace.
+
+This crate takes an existing DEFLATE-compressed bitstream and splits it into two parts:
+- The **uncompressed plaintext**
+- A compact **corrections blob** that captures everything needed to recreate the original bitstream exactly
+
+Given those two parts, the original DEFLATE stream can be reconstructed bit-for-bit. This enables re-compressing the plaintext with a modern algorithm (Zstd, Brotli, LZMA) while preserving binary-exact fidelity.
+
+[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
+
+## How It Works
+
+### Analysis pipeline
+
+1. **Parse** (`deflate/`) — Decode the DEFLATE bitstream into a token sequence: literals and length/distance back-references.
+2. **Estimate** (`estimator/`) — Analyze the token sequence to fingerprint the original compressor. Identifies hash algorithm, chain depth, nice-length cutoff, window size, and add policy.
+3. **Predict** (`token_predictor.rs`) — Re-run compression using the estimated parameters, generating a predicted token sequence.
+4. **Encode** (`statistical_codec.rs`, `cabac_codec.rs`) — Record each position where the actual token differs from the prediction, using CABAC arithmetic coding.
+
+### Reconstruction pipeline
+
+Feed the plaintext and corrections back into the predictor. It replays every compression decision, applying corrections where recorded, and writes the resulting tokens back to a DEFLATE bitstream using the original Huffman trees.
+
+## API
+
+### Whole-stream (simple)
+
+```rust
+use preflate_rs::{preflate_whole_deflate_stream, recreate_whole_deflate_stream, PreflateConfig};
+
+let config = PreflateConfig::default();
+
+// Analysis: DEFLATE → plaintext + corrections
+let (result, plain_text) = preflate_whole_deflate_stream(&compressed_data, &config)?;
+
+// Reconstruction: plaintext + corrections → original DEFLATE
+let recreated = recreate_whole_deflate_stream(plain_text.text(), &result.corrections)?;
+
+assert_eq!(compressed_data, recreated);
+```
+
+### Streaming (chunked)
+
+For large streams where memory is a concern, use the streaming processors:
+
+```rust
+use preflate_rs::{PreflateStreamProcessor, RecreateStreamProcessor, PreflateConfig};
+
+// Analysis
+let mut processor = PreflateStreamProcessor::new(&config);
+let chunk_result = processor.decompress(&compressed_chunk)?;
+// chunk_result.corrections contains the encoded corrections for this chunk
+// processor.plain_text() gives access to the decompressed data
+
+// Reconstruction
+let mut recreator = RecreateStreamProcessor::new();
+let (deflate_output, _blocks) = recreator.recompress(&mut plain_text_reader, &corrections)?;
+```
+
+## Configuration
+
+```rust
+pub struct PreflateConfig {
+    /// Maximum hash chain traversal depth. Higher values improve prediction
+    /// accuracy for streams compressed with high chain limits, at the cost
+    /// of analysis time. Default: 4096.
+    pub max_chain_length: u32,
+
+    /// Maximum plaintext held in memory at once. Default: 128 MB.
+    pub plain_text_limit: usize,
+
+    /// Verify round-trip correctness after analysis. Default: true.
+    pub verify_compression: bool,
+}
+```
+
+## Supported Compressors
+
+The estimator detects the following DEFLATE implementations by their token patterns and hash algorithms:
+
+| Compressor | Detection method |
+|---|---|
+| zlib | Default hash, standard chain behavior |
+| zlib-ng | Distinct hash variant |
+| libdeflate | 4-byte hash tables |
+| miniz / miniz_oxide | Fastest-mode hash function |
+| Windows zlib | Built-in PNG/ZIP codec fingerprint |
+
+Unknown compressors still round-trip correctly with higher corrections overhead.
+
+## Key Types
+
+| Type | Description |
+|---|---|
+| `PreflateStreamChunkResult` | Output of analysis: corrections blob, compressed size, estimated parameters |
+| `TokenPredictorParameters` | Compressor fingerprint: hash algorithm, nice_length, max_chain, window_bits, add policy |
+| `HashAlgorithm` | Enum of detected compressor families |
+| `PlainText` | Decompressed data with sliding-window dictionary support |
+| `PreflateError` | Rich error type with detailed exit codes |
+
+## Encoding Format
+
+Parameters are serialized with [`bitcode`](https://crates.io/crates/bitcode). Corrections use CABAC (Context Adaptive Binary Arithmetic Coding), the same codec used in [Lepton JPEG](https://github.com/microsoft/lepton_jpeg_rust) compression. The format is chunked so memory use is bounded regardless of input size.
+
+The following differences from the predicted stream are encoded:
+
+- Block type (uncompressed, static Huffman, dynamic Huffman)
+- Token count per block
+- Dynamic Huffman tree encoding
+- Literal vs. length/distance choice
+- Incorrect distance or length (encoded as hop count back through the hash chain)
+
+## Constraints
+
+- **No unsafe code** — `#![forbid(unsafe_code)]`
+- Minimum Rust version: **1.89**, Edition **2024**
diff --git a/util/README.md b/util/README.md
new file mode 100644
index 0000000..c2a0dcd
--- /dev/null
+++ b/util/README.md
@@ -0,0 +1,54 @@
+# preflate_util
+
+Command-line tool for testing and benchmarking the preflate-rs workspace against real files.
+
+Given a file or directory, `preflate_util` compresses each file using the preflate container pipeline, optionally decompresses it back, and prints compression statistics. It is primarily a development and validation tool rather than a production compressor.
+
+## Usage
+
+```shell
+preflate_util [OPTIONS] <PATH>
+```
+
+`PATH` can be a single file or a directory. Directories are scanned recursively.
+
+### Options
+
+| Option | Default | Description |
+|---|---|---|
+| `--max-chain <N>` | 4096 | Hash chain depth limit for the DEFLATE predictor |
+| `-c, --level <N>` | 9 | Zstd compression level (0–14) |
+| `--loglevel <L>` | Error | Log verbosity (`Error`, `Warn`, `Info`, `Debug`, `Trace`) |
+| `--verify <bool>` | true | Round-trip decompress and verify each file after compression |
+| `--baseline <bool>` | false | Also measure raw Zstd-only size for comparison |
+
+### Output
+
+For each file, a line is printed showing:
+- Input file path
+- Original size, compressed size, and ratio
+- Detected compressor (hash algorithm)
+- CPU time for compression
+
+A summary line at the end shows aggregate totals across all processed files.
+
+If `--baseline true` is set, a second column shows what raw Zstd (without preflate) would produce, so you can see the benefit of DEFLATE-aware re-compression.
+
+If verification fails (reconstructed output does not match original), the tool prints the first differing byte position and exits with an error.
+
+## Building
+
+```shell
+cargo build --release -p preflate_util
+# Binary at: target/release/preflate_util.exe (Windows) or target/release/preflate_util (Linux)
+```
+
+## Example
+
+```shell
+# Test a single ZIP file
+preflate_util archive.zip
+
+# Benchmark a directory with verbose logging and baseline comparison
+preflate_util --loglevel Info --baseline true ./test-corpus/
+```