diff --git a/_typos.toml b/_typos.toml
index df533d6..b9800fd 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -45,3 +45,7 @@ Pagent = "Pagent"
 Flavour = "Flavour"
 flavours = "flavours"
 initialise = "initialise"
+# The PDF spec's own key name `DecodeParms` (and PDFBox's COSName.DECODE_PARMS
+# constant mirroring it) — third-party/spec identifiers we must name verbatim.
+Parms = "Parms"
+PARMS = "PARMS"
diff --git a/despeckle/application/build.gradle.kts b/despeckle/application/build.gradle.kts
index a6f1593..165103b 100644
--- a/despeckle/application/build.gradle.kts
+++ b/despeckle/application/build.gradle.kts
@@ -25,6 +25,10 @@ dependencies {
     // implementations are no longer duplicated here. Static calls returning java.base types only, so
     // implementation (not api) is the right scope.
     implementation(project(":shared:io"))
+    // Tasks.awaitAll(Workers...): the shared fail-fast page fan-out (batch-owned executor,
+    // sibling interruption, quiescence before the failure propagates) DespeckleService runs its
+    // per-page work on.
+    implementation(project(":shared:process"))
     implementation(libs.slf4j.api)
 }
 
diff --git a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java
index 062e974..4bd632b 100644
--- a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java
+++ b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java
@@ -9,18 +9,13 @@
 import io.github.p4suta.shared.io.CorpusFiles;
 import io.github.p4suta.shared.io.OutputDirs;
 import io.github.p4suta.shared.kernel.PageProgressListener;
+import io.github.p4suta.shared.process.Tasks;
 import java.io.IOException;
-import java.io.UncheckedIOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.atomic.AtomicInteger;
 import org.jspecify.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -75,7 +70,8 @@ public record Config(
      * Aggregate outcome of a run.
      *
      * @param pages number of pages processed
-     * @param componentsRemoved total components removed across all pages
+     * @param componentsRemoved total components removed across all pages; {@code 0} when no report
+     *     consumed component stats (counting is skipped for speed without a report)
      * @param overRemovalWarnings number of pages flagged for possible over-removal
      */
     public record Summary(int pages, long componentsRemoved, int overRemovalWarnings) {}
@@ -88,8 +84,8 @@ public Summary run(Config config) throws IOException {
     /**
      * Execute a run, reporting each finished page to {@code progress}.
      *
-     * @param progress called once per page as it completes (1-based count, total page count); may
-     *     be invoked from the worker threads, so it must be thread-safe
+     * @param progress called once per page as it completes (1-based count, total page count),
+     *     always on the calling thread and in order
      */
     public Summary run(Config config, PageProgressListener progress) throws IOException {
         OutputDirs.prepare(config.outputDir(), config.force());
@@ -101,39 +97,45 @@ public Summary run(Config config, PageProgressListener progress) throws IOExcept
         }
         LOG.info("despeckling {} page(s) with {} thread(s)", files.size(), config.jobs());
 
+        @Nullable Path reportDir = config.reportDir();
+        boolean reporting = reportDir != null;
         Reporter report =
-                config.reportDir() == null
-                        ? Reporter.noOp()
-                        : reporterFactory.create(config.reportDir(), config.flipbook());
-
-        AtomicInteger done = new AtomicInteger();
-        List<PageOutcome> outcomes;
-        ExecutorService pool = Executors.newFixedThreadPool(config.jobs());
-        try {
-            List<Callable<PageOutcome>> tasks = new ArrayList<>(files.size());
-            for (Path src : files) {
-                tasks.add(
-                        () -> {
-                            PageOutcome outcome = processOne(src, config, report);
-                            int n = done.incrementAndGet();
-                            progress.onPage(n, files.size());
-                            if (n % PROGRESS_EVERY == 0 || n == files.size()) {
-                                LOG.info("{}/{}", n, files.size());
+                reportDir != null
+                        ? reporterFactory.create(reportDir, config.flipbook())
+                        : Reporter.noOp();
+        // The report is the only consumer of per-page component counts, and counting is a full
+        // connected-component labeling twice per page — skip it when no report will be written.
+        ProcessOptions options =
+                reporting ? config.options() : config.options().withoutComponentStats();
+
+        List<Callable<PageOutcome>> tasks = new ArrayList<>(files.size());
+        for (Path src : files) {
+            tasks.add(() -> processOne(src, config, options, report));
+        }
+        // Platform workers: each page is CPU-bound Leptonica work (FFM downcalls pin virtual
+        // threads' carriers). The fan-out fails fast and quiesces before throwing, so a failed
+        // run never leaves workers writing into the output directory. Progress and the human
+        // log arrive on this thread, ordered.
+        List<PageOutcome> outcomes =
+                Tasks.awaitAll(
+                        Tasks.Workers.platform(config.jobs()),
+                        tasks,
+                        "despeckle",
+                        (done, total) -> {
+                            progress.onPage(done, total);
+                            if (done % PROGRESS_EVERY == 0 || done == total) {
+                                LOG.info("{}/{}", done, total);
                             }
-                            return outcome;
                         });
-            }
-            outcomes = invokeAll(pool, tasks);
-        } finally {
-            pool.shutdown();
-        }
 
         report.finish();
 
         long totalRemoved = 0;
+        long blackRemoved = 0;
         int warnings = 0;
         for (PageOutcome outcome : outcomes) {
             totalRemoved += outcome.result().componentsRemoved();
+            blackRemoved += outcome.result().blackPixelsRemoved();
             if (outcome.result().isOverRemoval()) {
                 warnings++;
                 LOG.warn(
@@ -142,17 +144,28 @@ public Summary run(Config config, PageProgressListener progress) throws IOExcept
                         Math.round(outcome.result().removedBlackPixelRatio() * 100));
             }
         }
-        LOG.info(
-                "done: {} page(s), {} component(s) removed, {} over-removal warning(s)",
-                files.size(),
-                totalRemoved,
-                warnings);
+        if (reporting) {
+            LOG.info(
+                    "done: {} page(s), {} component(s) removed, {} over-removal warning(s)",
+                    files.size(),
+                    totalRemoved,
+                    warnings);
+        } else {
+            // Without a report nothing counted components (the counting passes are skipped for
+            // speed), so the summary speaks in the always-measured black-pixel terms.
+            LOG.info(
+                    "done: {} page(s), {} black pixel(s) removed, {} over-removal warning(s)",
+                    files.size(),
+                    blackRemoved,
+                    warnings);
+        }
         return new Summary(files.size(), totalRemoved, warnings);
     }
 
     private record PageOutcome(Path source, ProcessResult result) {}
 
-    private PageOutcome processOne(Path src, Config config, Reporter report) {
+    private PageOutcome processOne(Path src, Config config, ProcessOptions options, Reporter report)
+            throws IOException {
         Path dest =
                 CorpusFiles.mirrorDestination(
                         src, config.inputDir(), config.outputDir(), config.format().extension());
@@ -161,35 +174,14 @@ private PageOutcome processOne(Path src, Config config, Reporter report) {
             if (parent != null) {
                 Files.createDirectories(parent);
             }
-            ProcessResult result = pageCleaner.clean(src, dest, config.format(), config.options());
+            ProcessResult result = pageCleaner.clean(src, dest, config.format(), options);
             Path stem = config.inputDir().relativize(src);
             report.addPage(stem, src, dest, result);
             return new PageOutcome(src, result);
         } catch (IOException e) {
-            throw new UncheckedIOException("failed to process " + src, e);
-        }
-    }
-
-    private static List<PageOutcome> invokeAll(
-            ExecutorService pool, List<Callable<PageOutcome>> tasks) throws IOException {
-        List<Future<PageOutcome>> futures;
-        try {
-            futures = pool.invokeAll(tasks);
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new IOException("despeckle run interrupted", e);
-        }
-        List<PageOutcome> outcomes = new ArrayList<>(futures.size());
-        for (Future<PageOutcome> future : futures) {
-            try {
-                outcomes.add(future.get());
-            } catch (InterruptedException e) {
-                Thread.currentThread().interrupt();
-                throw new IOException("despeckle run interrupted", e);
-            } catch (ExecutionException e) {
-                throw new IOException("page processing failed", e.getCause());
-            }
+            // Re-throw with the page named: Tasks surfaces a task's IOException unchanged, so
+            // this context reaches the user instead of being lost in a generic wrapper.
+            throw new IOException("failed to process " + src, e);
         }
-        return outcomes;
     }
 }
diff --git a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/Jbig2PackService.java b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/Jbig2PackService.java
index c82f4cb..665badd 100644
--- a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/Jbig2PackService.java
+++ b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/Jbig2PackService.java
@@ -9,8 +9,6 @@
 import java.nio.file.Path;
 import java.util.Comparator;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 import java.util.stream.Stream;
 import org.jspecify.annotations.Nullable;
 import org.slf4j.Logger;
@@ -77,17 +75,15 @@ public void run(Config config) throws IOException {
         }
 
         Path jb2Dir = createWorkDir(config.outPdf());
-        ExecutorService pool = Executors.newFixedThreadPool(config.jobs());
         try {
             assembler.assemble(
                     config.imageDir(),
                     config.outPdf(),
                     config.source(),
                     config.dpi(),
-                    pool,
+                    config.jobs(),
                     jb2Dir);
         } finally {
-            pool.shutdown();
             deleteRecursively(jb2Dir);
         }
         linearizer.linearize(config.outPdf());
diff --git a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java
index c42cf03..d1c5da2 100644
--- a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java
+++ b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java
@@ -148,14 +148,23 @@ public Summary run(Config config) throws IOException {
 
         if (config.reportParent() != null) {
             batchReporter.write(config.reportParent(), books);
+            LOG.info(
+                    "done: {} ok, {} skipped, {} failed, {} page(s), {} component(s) removed",
+                    ok,
+                    skipped,
+                    failed,
+                    totalPages,
+                    totalComponentsRemoved);
+        } else {
+            // Without reports the runs skip component counting (an expensive labeling, twice per
+            // page), so a component total would always read 0 — leave it out of the line.
+            LOG.info(
+                    "done: {} ok, {} skipped, {} failed, {} page(s)",
+                    ok,
+                    skipped,
+                    failed,
+                    totalPages);
         }
-        LOG.info(
-                "done: {} ok, {} skipped, {} failed, {} page(s), {} component(s) removed",
-                ok,
-                skipped,
-                failed,
-                totalPages,
-                totalComponentsRemoved);
         return new Summary(ok, skipped, failed, totalPages, totalComponentsRemoved);
     }
 
diff --git a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfPipelineService.java b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfPipelineService.java
index e90a508..16357c6 100644
--- a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfPipelineService.java
+++ b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfPipelineService.java
@@ -12,8 +12,6 @@
 import java.nio.file.Path;
 import java.util.Comparator;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 import java.util.stream.Stream;
 import org.jspecify.annotations.Nullable;
 import org.slf4j.Logger;
@@ -96,41 +94,37 @@ public DespeckleService.Summary run(Config config) throws IOException {
             Path extracted = Files.createDirectories(work.resolve("in"));
             Path cleaned = Files.createDirectories(work.resolve("clean"));
             Path jbig2Dir = Files.createDirectories(work.resolve("jb2"));
-            ExecutorService pool = Executors.newFixedThreadPool(config.jobs());
-            DespeckleService.Summary summary;
-            try {
-                int dpi =
-                        config.options().dpi().isPresent()
-                                ? config.options().dpi().getAsInt()
-                                : extractor.dominantDpi(config.inputPdf());
-                LOG.info(
-                        "pipeline: {} -> {} at {} dpi", config.inputPdf(), config.outputPdf(), dpi);
+            int dpi =
+                    config.options().dpi().isPresent()
+                            ? config.options().dpi().getAsInt()
+                            : extractor.dominantDpi(config.inputPdf());
+            LOG.info("pipeline: {} -> {} at {} dpi", config.inputPdf(), config.outputPdf(), dpi);
 
-                extractor.extract(config.inputPdf(), extracted, config.jobs(), pool);
+            // Each step fans out on its own batch-owned workers bounded by the same jobs budget,
+            // so the steps never hold idle threads for each other (the old shared outer pool sat
+            // idle through the whole despeckle step).
+            extractor.extract(config.inputPdf(), extracted, config.jobs());
 
-                DespeckleService.Config clean =
-                        new DespeckleService.Config(
-                                extracted,
-                                cleaned,
-                                OutputFormat.TIFF,
-                                "*.tif",
-                                config.jobs(),
-                                true,
-                                config.options().withDpi(dpi),
-                                config.reportDir(),
-                                config.flipbook());
-                summary = despeckleService.run(clean);
+            DespeckleService.Config clean =
+                    new DespeckleService.Config(
+                            extracted,
+                            cleaned,
+                            OutputFormat.TIFF,
+                            "*.tif",
+                            config.jobs(),
+                            true,
+                            config.options().withDpi(dpi),
+                            config.reportDir(),
+                            config.flipbook());
+            DespeckleService.Summary summary = despeckleService.run(clean);
 
-                assembler.assemble(
-                        cleaned,
-                        config.outputPdf(),
-                        config.inputPdf(),
-                        OptionalInt.of(dpi),
-                        pool,
-                        jbig2Dir);
-            } finally {
-                pool.shutdown();
-            }
+            assembler.assemble(
+                    cleaned,
+                    config.outputPdf(),
+                    config.inputPdf(),
+                    OptionalInt.of(dpi),
+                    config.jobs(),
+                    jbig2Dir);
             linearizer.linearize(config.outputPdf());
             LOG.info("wrote {}", config.outputPdf());
             return summary;
diff --git a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java
index 4bb8187..ba543df 100644
--- a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java
+++ b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java
@@ -1,6 +1,7 @@
 package io.github.p4suta.despeckle.application;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -141,4 +142,30 @@ void drivesTheReporterWhenAReportDirIsGiven(@TempDir Path tmp) throws IOExceptio
         assertEquals(2, factory.reporter.pages.get(), "every page is reported");
         assertEquals(1, factory.reporter.finished.get(), "the report is finalized once");
     }
+
+    @Test
+    void skipsComponentCountingWithoutAReport(@TempDir Path tmp) throws IOException {
+        Path in = tmp.resolve("in");
+        writeInputs(in, 1);
+        FakePageCleaner cleaner = new FakePageCleaner(LIGHT);
+
+        new DespeckleService(cleaner, new RecordingReporterFactory())
+                .run(config(in, tmp.resolve("out"), false, null));
+
+        ProcessOptions seen = java.util.Objects.requireNonNull(cleaner.lastOptions);
+        assertFalse(seen.collectComponentStats(), "no report -> counting passes are skipped");
+    }
+
+    @Test
+    void keepsComponentCountingForTheReportPath(@TempDir Path tmp) throws IOException {
+        Path in = tmp.resolve("in");
+        writeInputs(in, 1);
+        FakePageCleaner cleaner = new FakePageCleaner(LIGHT);
+
+        new DespeckleService(cleaner, new RecordingReporterFactory())
+                .run(config(in, tmp.resolve("out"), false, tmp.resolve("report")));
+
+        ProcessOptions seen = java.util.Objects.requireNonNull(cleaner.lastOptions);
+        assertTrue(seen.collectComponentStats(), "the report consumes the counts");
+    }
 }
diff --git a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java
index a970181..a061822 100644
--- a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java
+++ b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java
@@ -17,7 +17,6 @@
 import java.nio.file.Path;
 import java.util.List;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.jspecify.annotations.Nullable;
 
@@ -34,6 +33,7 @@ private Fakes() {}
     /** A {@link PageCleaner} that writes a stub output file and returns a fixed result. */
     static final class FakePageCleaner implements PageCleaner {
         final AtomicInteger calls = new AtomicInteger();
+        volatile @Nullable ProcessOptions lastOptions;
         private final ProcessResult result;
 
         FakePageCleaner(ProcessResult result) {
@@ -49,6 +49,7 @@ public ProcessResult clean(
                 throw new java.io.UncheckedIOException(e);
             }
             calls.incrementAndGet();
+            lastOptions = options;
             return result;
         }
     }
@@ -111,8 +112,7 @@ public int dominantDpi(Path pdf) {
         }
 
         @Override
-        public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool)
-                throws IOException {
+        public void extract(Path pdf, Path outDir, int jobs) throws IOException {
             if (failOn != null && pdf.getFileName().toString().contains(failOn)) {
                 throw new IOException("fake extract failed for " + pdf);
             }
@@ -135,7 +135,7 @@ public void assemble(
                 Path outPdf,
                 @Nullable Path source,
                 OptionalInt forcedDpi,
-                ExecutorService pool,
+                int jobs,
                 Path scratchDir)
                 throws IOException {
             Files.writeString(outPdf, "%PDF-fake", StandardCharsets.UTF_8);
diff --git a/despeckle/docs/cleaner-baseline.md b/despeckle/docs/cleaner-baseline.md
new file mode 100644
index 0000000..96c422a
--- /dev/null
+++ b/despeckle/docs/cleaner-baseline.md
@@ -0,0 +1,31 @@
+# despeckle cleaner op-level baseline
+
+Generated by `CleanerBenchmark` (`./gradlew :despeckle:infrastructure:benchCleaner`, dev container).
+Times each Leptonica primitive the page cleaner composes on a synthetic
+600-dpi A5 page (3496x4961 px, fixed seed). Re-run after any change to the
+cleaner or the imaging bindings and compare before merging.
+
+- Date (UTC): 2026-06-10 06:25:05
+- Host: Linux amd64, 8 CPUs
+- Samples: median of 10 reps after 2 warmups; single-threaded.
+
+| op | median (ms) | min (ms) | calls/clean() | est. share of clean() |
+|---|---:|---:|---:|---:|
+| read TIFF-G4 | 2.59 | 2.52 | 1 | 1.5% |
+| selectBySize k=6 (page) | 15.22 | 14.98 | 1 | 8.7% |
+| selectBySize 15 (page) | 15.19 | 14.80 | 1 | 8.7% |
+| selectBySize k=6 (inverted) | 22.16 | 21.89 | 2 | 25.3% |
+| dilate 43x43 (text mask) | 37.52 | 37.01 | 1 | 21.5% |
+| open 7x7 (page) | 12.18 | 12.04 | 1 | 7.0% |
+| invert | 0.26 | 0.25 | 2 | 0.3% |
+| subtract | 0.38 | 0.36 | 5 | 1.1% |
+| and | 0.40 | 0.38 | 1 | 0.2% |
+| or | 0.33 | 0.32 | 3 | 0.6% |
+| countConnComp | 11.82 | 11.46 | 2 | 13.5% |
+| countPixels | 0.41 | 0.41 | 2 | 0.5% |
+| write TIFF-G4 | 6.44 | 6.29 | 1 | 3.7% |
+| **Σ(median × calls)** | 161.74 | | | 92.5% |
+| **clean() end-to-end** | 174.91 | 173.28 | 1 | 100% |
+
+The Σ row landing near 100% means the table accounts for clean()'s real cost;
+a large gap points at untimed work (allocation churn, codec internals).
diff --git a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java
index a565b1f..2051160 100644
--- a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java
+++ b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java
@@ -27,13 +27,16 @@
  * @param removeIsolatedDust whether to run the isolated-medium-dust pass
  * @param isolatedDustSizePx an explicit max size for an isolated speck, or empty to derive it; a
  *     present value also implies {@code removeIsolatedDust}
+ * @param collectComponentStats whether to count 8-connected components before and after — two of
+ *     the most expensive full-page scans, so off when nothing consumes the counts (no report)
  */
 public record ProcessOptions(
         OptionalInt dpi,
         OptionalInt speckSizePx,
         boolean fillHoles,
         boolean removeIsolatedDust,
-        OptionalInt isolatedDustSizePx) {
+        OptionalInt isolatedDustSizePx,
+        boolean collectComponentStats) {
 
     /** Resolution assumed for the speck filter when neither a flag nor the image supplies one. */
     public static final int DEFAULT_DPI = 300;
@@ -50,6 +53,16 @@ public record ProcessOptions(
         }
     }
 
+    /** The five-knob shape every direct caller uses; component counting defaults to on. */
+    public ProcessOptions(
+            OptionalInt dpi,
+            OptionalInt speckSizePx,
+            boolean fillHoles,
+            boolean removeIsolatedDust,
+            OptionalInt isolatedDustSizePx) {
+        this(dpi, speckSizePx, fillHoles, removeIsolatedDust, isolatedDustSizePx, true);
+    }
+
     /** Options with the isolated-dust pass off — the common case. */
     public static ProcessOptions of(OptionalInt dpi, OptionalInt speckSizePx, boolean fillHoles) {
         return new ProcessOptions(dpi, speckSizePx, fillHoles, false, OptionalInt.empty());
@@ -66,7 +79,17 @@ public ProcessOptions withDpi(int dpi) {
                 speckSizePx,
                 fillHoles,
                 removeIsolatedDust,
-                isolatedDustSizePx);
+                isolatedDustSizePx,
+                collectComponentStats);
+    }
+
+    /**
+     * A copy with component counting disabled — for runs where nothing consumes the counts (no
+     * report), saving two full connected-component labelings per page.
+     */
+    public ProcessOptions withoutComponentStats() {
+        return new ProcessOptions(
+                dpi, speckSizePx, fillHoles, removeIsolatedDust, isolatedDustSizePx, false);
     }
 
     /**
diff --git a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java
index f6efbf9..8e6fd25 100644
--- a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java
+++ b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java
@@ -1,15 +1,26 @@
 package io.github.p4suta.despeckle.domain.model;
 
+import java.util.OptionalInt;
+
 /**
  * The outcome of despeckling one page.
  *
- * @param componentsBefore 8-connected component count of the input
- * @param componentsAfter 8-connected component count of the output
+ * <p>The black-pixel counts are always measured — they feed the over-removal guard. The component
+ * counts are measured only when something consumes them (the HTML report): counting 8-connected
+ * components is a full connected-component labeling of the page, one of the most expensive scans in
+ * the whole clean, so a run with no report skips both counting passes and carries empty components
+ * here.
+ *
+ * @param componentsBefore 8-connected component count of the input, when counted
+ * @param componentsAfter 8-connected component count of the output, when counted
  * @param blackPixelsBefore foreground pixel count of the input
  * @param blackPixelsAfter foreground pixel count of the output
  */
 public record ProcessResult(
-        int componentsBefore, int componentsAfter, long blackPixelsBefore, long blackPixelsAfter) {
+        OptionalInt componentsBefore,
+        OptionalInt componentsAfter,
+        long blackPixelsBefore,
+        long blackPixelsAfter) {
 
     /**
      * A black-pixel removal ratio above this flags a possibly over-cleaned page. The single domain
@@ -17,12 +28,38 @@ public record ProcessResult(
      */
     public static final double OVER_REMOVAL_WARN_RATIO = 0.03;
 
+    /** A counted result — the shape the report path consumes. */
+    public ProcessResult(
+            int componentsBefore,
+            int componentsAfter,
+            long blackPixelsBefore,
+            long blackPixelsAfter) {
+        this(
+                OptionalInt.of(componentsBefore),
+                OptionalInt.of(componentsAfter),
+                blackPixelsBefore,
+                blackPixelsAfter);
+    }
+
+    /** The result of a run that skipped component counting (nothing consumes the counts). */
+    public static ProcessResult withoutComponentStats(
+            long blackPixelsBefore, long blackPixelsAfter) {
+        return new ProcessResult(
+                OptionalInt.empty(), OptionalInt.empty(), blackPixelsBefore, blackPixelsAfter);
+    }
+
+    /** Whether component counting ran (true on the report path). */
+    public boolean hasComponentStats() {
+        return componentsBefore.isPresent();
+    }
+
     /**
      * Net drop in 8-connected components — dust removed minus any holes filled back in. Summed into
-     * the run total and plotted per page in the report.
+     * the run total and plotted per page in the report. {@code 0} when counting was skipped (see
+     * {@link #hasComponentStats()}).
      */
     public int componentsRemoved() {
-        return componentsBefore - componentsAfter;
+        return componentsBefore.orElse(0) - componentsAfter.orElse(0);
     }
 
     /**
@@ -36,6 +73,11 @@ public double removedBlackPixelRatio() {
         return (double) (blackPixelsBefore - blackPixelsAfter) / blackPixelsBefore;
     }
 
+    /** Black pixels removed from the page — the cost-free removal measure every run carries. */
+    public long blackPixelsRemoved() {
+        return blackPixelsBefore - blackPixelsAfter;
+    }
+
     /**
      * Whether {@link #removedBlackPixelRatio()} exceeds {@link #OVER_REMOVAL_WARN_RATIO}, the
      * threshold the runner logs on and the report flags.
diff --git a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java
index 700035a..6d70e42 100644
--- a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java
+++ b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java
@@ -163,4 +163,21 @@ void isolatedDustSizeStaysAboveTheSpeckSizeAtVeryLowResolution() {
         assertEquals(
                 2, options.isolatedDustSize(noImg()), "isolated-dust size floors at speckSize + 1");
     }
+
+    @Test
+    void componentStatsDefaultOnAndWitherTurnsThemOff() {
+        ProcessOptions defaults = ProcessOptions.defaults();
+        assertTrue(defaults.collectComponentStats(), "counting is on unless turned off");
+
+        ProcessOptions off = defaults.withoutComponentStats();
+        assertFalse(off.collectComponentStats());
+        // Every other knob survives the wither.
+        assertEquals(defaults.dpi(), off.dpi());
+        assertEquals(defaults.speckSizePx(), off.speckSizePx());
+        assertEquals(defaults.fillHoles(), off.fillHoles());
+        assertEquals(defaults.removeIsolatedDust(), off.removeIsolatedDust());
+        assertEquals(defaults.isolatedDustSizePx(), off.isolatedDustSizePx());
+        // ...including through withDpi.
+        assertFalse(off.withDpi(600).collectComponentStats());
+    }
 }
diff --git a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java
index fac55e0..7e2508b 100644
--- a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java
+++ b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java
@@ -60,4 +60,21 @@ void isOverRemovalIsFalseBelowTheThreshold() {
     void isOverRemovalIsFalseForABlankInputPage() {
         assertFalse(new ProcessResult(0, 0, 0, 0).isOverRemoval());
     }
+
+    @Test
+    void withoutComponentStatsCarriesNoCountsButFullPixelMath() {
+        ProcessResult result = ProcessResult.withoutComponentStats(1000, 950);
+        assertFalse(result.hasComponentStats());
+        assertEquals(0, result.componentsRemoved(), "absent counts read as zero, documented");
+        assertEquals(50L, result.blackPixelsRemoved());
+        assertTrue(result.isOverRemoval(), "the over-removal guard works without counts");
+    }
+
+    @Test
+    void countedConstructorHasComponentStats() {
+        ProcessResult result = new ProcessResult(120, 95, 1000, 990);
+        assertTrue(result.hasComponentStats());
+        assertEquals(25, result.componentsRemoved());
+        assertEquals(10L, result.blackPixelsRemoved());
+    }
 }
diff --git a/despeckle/infrastructure/build.gradle.kts b/despeckle/infrastructure/build.gradle.kts
index 90f7db5..c957b23 100644
--- a/despeckle/infrastructure/build.gradle.kts
+++ b/despeckle/infrastructure/build.gradle.kts
@@ -78,3 +78,21 @@ tasks.named<JacocoCoverageVerification>("jacocoTestCoverageVerification") {
 }
 
 tasks.named("check") { dependsOn("jacocoTestCoverageVerification") }
+
+// ---- Cleaner op-level micro-benchmark (see despeckle/docs/cleaner-baseline.md) ------------------
+// Times each Leptonica primitive the page cleaner composes, plus end-to-end clean(), on a
+// deterministic synthetic 600-dpi A5 page — the measurement every cleaner/imaging optimization is
+// judged against. Knob: -Preps=N (default 10). Dev-container only (needs the native Leptonica).
+tasks.register<JavaExec>("benchCleaner") {
+    group = "verification"
+    description = "Micro-benchmark cleaner ops + end-to-end clean(); writes despeckle/docs/cleaner-baseline.md"
+    dependsOn(tasks.named("testClasses"))
+    classpath = sourceSets["test"].runtimeClasspath
+    mainClass = "io.github.p4suta.despeckle.infrastructure.tools.CleanerBenchmark"
+    workingDir = rootDir
+    args =
+        listOf(
+            "despeckle/docs/cleaner-baseline.md",
+            providers.gradleProperty("reps").getOrElse("10"),
+        )
+}
diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java
index 7f8434c..6fd7d77 100644
--- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java
+++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java
@@ -52,7 +52,11 @@ public ProcessResult clean(
             int raw = source.resolution();
             Optional<Resolution> img = raw > 0 ? Optional.of(Resolution.of(raw)) : Optional.empty();
             int k = options.speckSize(img);
-            int componentsBefore = source.connectedComponents();
+            // Component counting is a full connected-component labeling — one of the most
+            // expensive scans here — so it runs only when something consumes the counts (the
+            // report). The black-pixel counts always run: they feed the over-removal guard.
+            boolean countComponents = options.collectComponentStats();
+            int componentsBefore = countComponents ? source.connectedComponents() : 0;
             long blackBefore = source.blackPixels();
             int sourceFormat = source.inputFormat();
 
@@ -78,14 +82,18 @@ public ProcessResult clean(
                     current = filled;
                 }
 
-                int componentsAfter = current.connectedComponents();
                 long blackAfter = current.blackPixels();
                 // Stamp the honored resolution so a TIFF/PNG output carries an accurate tag. Only a
                 // known resolution is written; an unknown one is left untouched.
                 options.resolution(img).map(Resolution::dpi).ifPresent(current::setResolution);
                 writeIn(current, output, format, sourceFormat);
-                return new ProcessResult(
-                        componentsBefore, componentsAfter, blackBefore, blackAfter);
+                return countComponents
+                        ? new ProcessResult(
+                                componentsBefore,
+                                current.connectedComponents(),
+                                blackBefore,
+                                blackAfter)
+                        : ProcessResult.withoutComponentStats(blackBefore, blackAfter);
             } finally {
                 current.close();
             }
diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfBoxJbig2Assembler.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfBoxJbig2Assembler.java
index 907e663..a1a7a7b 100644
--- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfBoxJbig2Assembler.java
+++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfBoxJbig2Assembler.java
@@ -4,7 +4,6 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
 import org.jspecify.annotations.Nullable;
 
 /**
@@ -34,7 +33,7 @@ public PdfBoxJbig2Assembler() {}
      * @param outPdf the lossless-JBIG2 PDF to write
      * @param source a PDF whose Info dict, XMP and version are inherited, or {@code null} for none
      * @param forcedDpi a single DPI to size every page with, or empty to read each image's own
-     * @param pool the worker pool the per-page {@code jbig2} encodes run on
+     * @param jobs how many {@code jbig2} children may encode at once
      * @param scratchDir scratch directory for the intermediate per-page JBIG2 streams
      *     (caller-owned)
      * @throws IOException if the directory is empty, a tool fails, or the write fails
@@ -45,9 +44,9 @@ public void assemble(
             Path outPdf,
             @Nullable Path source,
             OptionalInt forcedDpi,
-            ExecutorService pool,
+            int jobs,
             Path scratchDir)
             throws IOException {
-        delegate.assemble(imageDir, outPdf, source, forcedDpi, pool, scratchDir);
+        delegate.assemble(imageDir, outPdf, source, forcedDpi, jobs, scratchDir);
     }
 }
diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfImagesCliExtractor.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfImagesCliExtractor.java
index 044a175..ab186b5 100644
--- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfImagesCliExtractor.java
+++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/pdf/PdfImagesCliExtractor.java
@@ -3,7 +3,6 @@
 import io.github.p4suta.despeckle.port.PdfImageExtractor;
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.concurrent.ExecutorService;
 
 /**
  * Despeckle's {@link PdfImageExtractor} adapter — a wrapper over the shared {@code
@@ -34,10 +33,10 @@ public int dominantDpi(Path pdf) throws IOException {
 
     /**
      * Extract all pages of {@code pdf} into {@code outDir} as TIFFs, parallelized over page-range
-     * chunks. {@code jobs} bounds both the chunk count and the pool slots used.
+     * chunks. at most {@code jobs} chunks run at once.
      */
     @Override
-    public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throws IOException {
-        delegate.extract(pdf, outDir, jobs, pool);
+    public void extract(Path pdf, Path outDir, int jobs) throws IOException {
+        delegate.extract(pdf, outDir, jobs);
     }
 }
diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/process/NativeTools.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/process/NativeTools.java
index 5812478..dfdf3c5 100644
--- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/process/NativeTools.java
+++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/process/NativeTools.java
@@ -6,12 +6,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Path;
-import java.util.ArrayList;
 import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -26,10 +21,10 @@
  * <p>The launch helpers stay local rather than routing through {@code :shared:process}'s {@code
  * ProcessRunner}/{@code Tasks}: {@link #capture} returns the raw {@code byte[]} a JBIG2 stream
  * needs (the shared runner decodes to a UTF-8 {@code String}, which is lossy for binary output),
- * and {@link #run}/{@link #awaitAll} raise/propagate the tagged {@link DespeckleException} kind
- * (the shared {@code Tasks} flattens any non-{@code IOException} cause, losing the kind and its
- * exit code). The {@code qpdf} call site, whose output is text and whose exit-3 tolerance the
- * shared runner models directly, uses {@code ProcessRunner} instead.
+ * and {@link #run} raises/propagates the tagged {@link DespeckleException} kind (the shared {@code
+ * Tasks} flattens any non-{@code IOException} cause, losing the kind and its exit code). The {@code
+ * qpdf} call site, whose output is text and whose exit-3 tolerance the shared runner models
+ * directly, uses {@code ProcessRunner} instead.
  *
  * <p>Public so the sibling {@code infrastructure.pdf} adapters can call it; it never leaves the
  * {@code :infrastructure} module (the application and CLI layers depend only on the {@code :port}
@@ -121,41 +116,4 @@ private static void awaitExit(Process process, List<String> command, long timeou
                     null);
         }
     }
-
-    /**
-     * Run every task on {@code pool}, in submission order, surfacing the first failure as
-     * IOException.
-     */
-    public static <T> List<T> awaitAll(ExecutorService pool, List<Callable<T>> tasks)
-            throws IOException {
-        List<Future<T>> futures;
-        try {
-            futures = pool.invokeAll(tasks);
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new IOException("pipeline interrupted", e);
-        }
-        List<T> results = new ArrayList<>(futures.size());
-        for (Future<T> future : futures) {
-            try {
-                results.add(future.get());
-            } catch (InterruptedException e) {
-                Thread.currentThread().interrupt();
-                throw new IOException("pipeline interrupted", e);
-            } catch (ExecutionException e) {
-                Throwable cause = e.getCause();
-                // Preserve a tagged domain failure (e.g. NATIVE_TOOL_FAILED / IMAGE_UNREADABLE
-                // raised inside a worker) so its kind — and exit code — survives the pool boundary
-                // instead of being flattened to a generic IOException.
-                if (cause instanceof DespeckleException de) {
-                    throw de;
-                }
-                if (cause instanceof IOException io) {
-                    throw io;
-                }
-                throw new IOException("pipeline task failed", cause);
-            }
-        }
-        return results;
-    }
 }
diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java
index 2024be8..32d243f 100644
--- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java
+++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java
@@ -88,11 +88,13 @@ public void addPage(Path relativeStem, Path inputImage, Path outputImage, Proces
         }
         writeOverlayAndAccumulate(beforeImg, afterImg, overlayWebp);
 
+        // The report path always runs with component counting on (DespeckleService enables it
+        // whenever a reportDir is set), so the counts are present here; orElse(0) is type-driven.
         stats.add(
                 new PageStat(
                         stem,
-                        result.componentsBefore(),
-                        result.componentsAfter(),
+                        result.componentsBefore().orElse(0),
+                        result.componentsAfter().orElse(0),
                         result.removedBlackPixelRatio()));
     }
 
diff --git a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java
index 4bd4523..15d3cef 100644
--- a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java
+++ b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java
@@ -1,6 +1,7 @@
 package io.github.p4suta.despeckle.infrastructure.leptonica;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import io.github.p4suta.despeckle.domain.model.OutputFormat;
@@ -231,4 +232,37 @@ void fillHolesSparesTheGapInsideAThinWalledGlyph(@TempDir Path dir) throws Excep
                     ring, cleaned.blackPixels(), "the thin-walled gap is preserved, not filled");
         }
     }
+
+    @Test
+    void skippedComponentCountingStillCleansAndGuardsOverRemoval(@TempDir Path dir)
+            throws Exception {
+        // The same speck-removal scenario as removesSpecksButPreservesGlyph, with counting off:
+        // the output is identical, the result carries no component stats, and the always-measured
+        // black-pixel math still drives the over-removal guard.
+        Path src = dir.resolve("page.pbm");
+        Path out = dir.resolve("page-out.pbm");
+        boolean[][] img = TestImages.blank(40, 40);
+        TestImages.fillRect(img, 8, 8, 19, 25);
+        TestImages.dot(img, 2, 2);
+        TestImages.dot(img, 35, 30);
+        TestImages.dot(img, 30, 4);
+        TestImages.writePbm(src, img);
+
+        ProcessResult result =
+                cleaner.clean(
+                        src,
+                        out,
+                        OutputFormat.PBM,
+                        ProcessOptions.of(OptionalInt.of(300), OptionalInt.of(3), false)
+                                .withoutComponentStats());
+
+        assertFalse(result.hasComponentStats());
+        assertEquals(0, result.componentsRemoved(), "absent counts read as zero");
+        assertEquals(3L, result.blackPixelsRemoved(), "three 1px specks gone");
+        assertFalse(result.isOverRemoval(), "3 of 219 black pixels is under the 3% threshold");
+        try (Pix cleaned = Pix.read(out)) {
+            assertEquals(1, cleaned.connectedComponents());
+            assertEquals(12L * 18L, cleaned.blackPixels(), "the whole glyph survives intact");
+        }
+    }
 }
diff --git a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java
new file mode 100644
index 0000000..6811533
--- /dev/null
+++ b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java
@@ -0,0 +1,360 @@
+package io.github.p4suta.despeckle.infrastructure.tools;
+
+import io.github.p4suta.despeckle.domain.model.OutputFormat;
+import io.github.p4suta.despeckle.domain.model.ProcessOptions;
+import io.github.p4suta.despeckle.infrastructure.leptonica.LeptonicaPageCleaner;
+import io.github.p4suta.despeckle.testsupport.TestImages;
+import io.github.p4suta.shared.imaging.Pix;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.Random;
+
+/**
+ * Op-level micro-benchmark for the despeckle page cleaner: times each Leptonica primitive {@link
+ * LeptonicaPageCleaner} composes, plus the end-to-end {@code clean()}, on a deterministic synthetic
+ * 600-dpi A5 page (3496×4961 px — the pdfbook pipeline's real page geometry). Writes a Markdown
+ * table to {@code despeckle/docs/cleaner-baseline.md} so every optimization claim is judged against
+ * a committed measurement, mirroring the pipeline's {@code benchPipeline} convention.
+ *
+ * <p>Each op row reports the median and minimum of N timed reps (after warmup), the number of times
+ * {@code clean()} invokes that op per page with default options, and the estimated share of the
+ * end-to-end time the op therefore accounts for. The Σ(median × calls) sanity row should land near
+ * the {@code clean()} median; a large gap means an untimed cost (allocation, G4 codec internals).
+ *
+ * <p>Test-sources tool (driven by the {@code benchCleaner} Gradle task): never ships, and needs the
+ * dev container's Leptonica. Usage: {@code CleanerBenchmark <outDoc> [reps]}.
+ */
+public final class CleanerBenchmark {
+
+    private static final int DPI = 600;
+    private static final int WIDTH = Math.round(148f * DPI / 25.4f); // A5 portrait
+    private static final int HEIGHT = Math.round(210f * DPI / 25.4f);
+    private static final int WARMUP_REPS = 2;
+
+    // The cleaner's literal Leptonica selection flags (pix.h values, see LeptonicaPageCleaner).
+    private static final int CONN_8 = 8;
+    private static final int IF_EITHER = 5;
+    private static final int IF_GT = 2;
+
+    // Default-derived sizes at 600 dpi: speck k=6, isolated dust 15, proximity 21, stroke 3.
+    private static final int SPECK = 6;
+    private static final int DUST = 15;
+    private static final int PROXIMITY = 21;
+    private static final int STROKE = 3;
+
+    private CleanerBenchmark() {}
+
+    /** One measured op: its label, how often clean() runs it, and the timed reps. */
+    private record Row(String op, int callsPerClean, double medianMs, double minMs) {}
+
+    public static void main(String[] args) throws IOException {
+        Path outDoc = Path.of(args.length > 0 ? args[0] : "despeckle/docs/cleaner-baseline.md");
+        int reps = args.length > 1 ? Integer.parseInt(args[1]) : 10;
+
+        Path work = Files.createTempDirectory("cleaner-bench");
+        try {
+            Path pbm = work.resolve("page.pbm");
+            TestImages.writePbm(pbm, syntheticPage());
+            Path g4 = work.resolve("page.tif");
+            try (Pix page = Pix.read(pbm)) {
+                page.setResolution(DPI);
+                page.writeTiffG4(g4);
+            }
+
+            List<Row> rows = new ArrayList<>();
+            try (Pix page = Pix.read(g4);
+                    Pix inverted = page.inverted();
+                    Pix text6 = page.selectBySize(SPECK, SPECK, CONN_8, IF_EITHER, IF_GT);
+                    Pix text15 = page.selectBySize(DUST, DUST, CONN_8, IF_EITHER, IF_GT)) {
+
+                rows.add(timePix("read TIFF-G4", 1, reps, () -> Pix.read(g4)));
+                rows.add(
+                        timePix(
+                                "selectBySize k=6 (page)",
+                                1,
+                                reps,
+                                () -> page.selectBySize(SPECK, SPECK, CONN_8, IF_EITHER, IF_GT)));
+                rows.add(
+                        timePix(
+                                "selectBySize 15 (page)",
+                                1,
+                                reps,
+                                () -> page.selectBySize(DUST, DUST, CONN_8, IF_EITHER, IF_GT)));
+                rows.add(
+                        timePix(
+                                "selectBySize k=6 (inverted)",
+                                2,
+                                reps,
+                                () ->
+                                        inverted.selectBySize(
+                                                SPECK, SPECK, CONN_8, IF_EITHER, IF_GT)));
+                rows.add(
+                        timePix(
+                                "dilate 43x43 (text mask)",
+                                1,
+                                reps,
+                                () -> text15.dilated(PROXIMITY)));
+                rows.add(timePix("open 7x7 (page)", 1, reps, () -> page.opened(STROKE)));
+                rows.add(timePix("invert", 2, reps, page::inverted));
+                rows.add(timePix("subtract", 5, reps, () -> page.subtract(text6)));
+                rows.add(timePix("and", 1, reps, () -> page.and(text6)));
+                rows.add(timePix("or", 3, reps, () -> page.or(text6)));
+                rows.add(timeVoid("countConnComp", 2, reps, page::connectedComponents));
+                rows.add(timeVoid("countPixels", 2, reps, page::blackPixels));
+                Path scratchTif = work.resolve("write.tif");
+                rows.add(timeVoid("write TIFF-G4", 1, reps, () -> page.writeTiffG4(scratchTif)));
+            }
+
+            LeptonicaPageCleaner cleaner = new LeptonicaPageCleaner();
+            Path cleaned = work.resolve("cleaned.tif");
+            Row clean =
+                    timeVoid(
+                            "clean() end-to-end",
+                            1,
+                            reps,
+                            () -> cleaner.clean(g4, cleaned, OutputFormat.TIFF, options()));
+            // The pipeline's actual configuration (no report -> component counting skipped).
+            Row cleanNoStats =
+                    timeVoid(
+                            "clean() without component stats",
+                            1,
+                            reps,
+                            () ->
+                                    cleaner.clean(
+                                            g4,
+                                            cleaned,
+                                            OutputFormat.TIFF,
+                                            options().withoutComponentStats()));
+
+            String report = render(rows, clean, cleanNoStats, reps);
+            Path parent = outDoc.toAbsolutePath().getParent();
+            if (parent != null) {
+                Files.createDirectories(parent);
+            }
+            Files.writeString(outDoc, report, StandardCharsets.UTF_8);
+            System.out.print(report);
+            System.err.println();
+            System.err.println("→ wrote " + outDoc);
+        } finally {
+            deleteTree(work);
+        }
+    }
+
+    private static ProcessOptions options() {
+        // The pipeline's exact configuration: defaults (fillHoles + isolatedDust on) at a known
+        // dpi, so the derived sizes match the op rows above.
+        return ProcessOptions.defaults().withDpi(DPI);
+    }
+
+    // ---- timing ----
+
+    @FunctionalInterface
+    private interface PixOp {
+        Pix run() throws IOException;
+    }
+
+    @FunctionalInterface
+    private interface VoidOp {
+        void run() throws IOException;
+    }
+
+    /** Times an op returning a Pix; the result's close() happens outside the timed window. */
+    private static Row timePix(String op, int calls, int reps, PixOp body) throws IOException {
+        double[] samples = new double[reps];
+        for (int i = -WARMUP_REPS; i < reps; i++) {
+            long start = System.nanoTime();
+            Pix result = body.run();
+            long elapsed = System.nanoTime() - start;
+            result.close();
+            if (i >= 0) {
+                samples[i] = elapsed / 1e6;
+            }
+        }
+        return new Row(op, calls, median(samples), Arrays.stream(samples).min().orElse(0));
+    }
+
+    private static Row timeVoid(String op, int calls, int reps, VoidOp body) throws IOException {
+        double[] samples = new double[reps];
+        for (int i = -WARMUP_REPS; i < reps; i++) {
+            long start = System.nanoTime();
+            body.run();
+            long elapsed = System.nanoTime() - start;
+            if (i >= 0) {
+                samples[i] = elapsed / 1e6;
+            }
+        }
+        return new Row(op, calls, median(samples), Arrays.stream(samples).min().orElse(0));
+    }
+
+    private static double median(double[] values) {
+        double[] sorted = values.clone();
+        Arrays.sort(sorted);
+        int n = sorted.length;
+        return (n % 2 == 1) ? sorted[n / 2] : (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0;
+    }
+
+    // ---- the synthetic page ----
+
+    /**
+     * A deterministic 600-dpi A5 "scan": right-to-left glyph columns (despeckle's protected text),
+     * 1–3 px dust everywhere (the speck filter's work), isolated 8–12 px blots in the margins (the
+     * isolated-dust pass's work), and pin-holes punched into glyph blocks (the fill-holes pass's
+     * work). Fixed seed, so the baseline is reproducible.
+     */
+    private static boolean[][] syntheticPage() {
+        boolean[][] img = TestImages.blank(WIDTH, HEIGHT);
+        Random random = new Random(42);
+
+        int margin = WIDTH / 10;
+        int glyph = Math.max(4, WIDTH / 60);
+        int leading = glyph / 2;
+        int top = HEIGHT / 12;
+        int bottom = HEIGHT - HEIGHT / 12;
+        int glyphIndex = 0;
+        for (int x = WIDTH - margin - glyph; x >= margin; x -= glyph + leading) {
+            int y = top;
+            while (y + glyph <= bottom) {
+                if (random.nextInt(100) < 8) {
+                    y += glyph * (2 + random.nextInt(4));
+                    continue;
+                }
+                TestImages.fillRect(img, x, y, x + glyph - 3, y + glyph - 3);
+                // Every ~40th glyph carries a pin-hole, so fillHoles has real work.
+                if (glyphIndex % 40 == 0) {
+                    int hx = x + glyph / 2;
+                    int hy = y + glyph / 2;
+                    img[hy][hx] = false;
+                    img[hy][hx + 1] = false;
+                    img[hy + 1][hx] = false;
+                    img[hy + 1][hx + 1] = false;
+                }
+                glyphIndex++;
+                y += glyph;
+            }
+        }
+
+        // Salt-and-pepper dust (1–3 px), what the main speck filter removes.
+        int specks = WIDTH * HEIGHT / 25_000;
+        for (int i = 0; i < specks; i++) {
+            int size = 1 + random.nextInt(3);
+            int x = random.nextInt(WIDTH - size);
+            int y = random.nextInt(HEIGHT - size);
+            TestImages.fillRect(img, x, y, x + size - 1, y + size - 1);
+        }
+
+        // Isolated 8–12 px blots out in the margins, what the isolated-dust pass removes.
+        for (int i = 0; i < 20; i++) {
+            int size = 8 + random.nextInt(5);
+            int x =
+                    random.nextBoolean()
+                            ? random.nextInt(margin - size)
+                            : WIDTH - margin + random.nextInt(margin - size);
+            int y = top + random.nextInt(bottom - top - size);
+            TestImages.fillRect(img, x, y, x + size - 1, y + size - 1);
+        }
+        return img;
+    }
+
+    // ---- rendering ----
+
+    private static String render(List<Row> rows, Row clean, Row cleanNoStats, int reps) {
+        var sb = new StringBuilder();
+        sb.append("# despeckle cleaner op-level baseline\n\n")
+                .append("Generated by `CleanerBenchmark`")
+                .append(" (`./gradlew :despeckle:infrastructure:benchCleaner`, dev container).\n")
+                .append("Times each Leptonica primitive the page cleaner composes on a synthetic\n")
+                .append("600-dpi A5 page (")
+                .append(WIDTH)
+                .append("x")
+                .append(HEIGHT)
+                .append(" px, fixed seed). Re-run after any change to the\n")
+                .append("cleaner or the imaging bindings and compare before merging.\n\n");
+        String date =
+                DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss", Locale.ROOT)
+                        .withZone(ZoneOffset.UTC)
+                        .format(Instant.now());
+        sb.append("- Date (UTC): ").append(date).append('\n');
+        sb.append("- Host: ")
+                .append(System.getProperty("os.name", "?"))
+                .append(' ')
+                .append(System.getProperty("os.arch", "?"))
+                .append(", ")
+                .append(Runtime.getRuntime().availableProcessors())
+                .append(" CPUs\n");
+        sb.append("- Samples: median of ")
+                .append(reps)
+                .append(" reps after ")
+                .append(WARMUP_REPS)
+                .append(" warmups; single-threaded.\n\n");
+
+        sb.append("| op | median (ms) | min (ms) | calls/clean() | est. share of clean() |\n");
+        sb.append("|---|---:|---:|---:|---:|\n");
+        double estimatedTotal = 0;
+        for (Row row : rows) {
+            double estimated = row.medianMs() * row.callsPerClean();
+            estimatedTotal += estimated;
+            sb.append(
+                    String.format(
+                            Locale.ROOT,
+                            "| %s | %.2f | %.2f | %d | %.1f%% |%n",
+                            row.op(),
+                            row.medianMs(),
+                            row.minMs(),
+                            row.callsPerClean(),
+                            estimated * 100.0 / clean.medianMs()));
+        }
+        sb.append(
+                String.format(
+                        Locale.ROOT,
+                        "| **Σ(median × calls)** | %.2f | | | %.1f%% |%n",
+                        estimatedTotal,
+                        estimatedTotal * 100.0 / clean.medianMs()));
+        sb.append(
+                String.format(
+                        Locale.ROOT,
+                        "| **clean() end-to-end** | %.2f | %.2f | 1 | 100%% |%n",
+                        clean.medianMs(),
+                        clean.minMs()));
+        sb.append(
+                String.format(
+                        Locale.ROOT,
+                        "| **clean() without component stats** | %.2f | %.2f | 1 | %.1f%% |%n",
+                        cleanNoStats.medianMs(),
+                        cleanNoStats.minMs(),
+                        cleanNoStats.medianMs() * 100.0 / clean.medianMs()));
+        sb.append(
+                        "\nThe Σ row landing near 100% means the table accounts for clean()'s real"
+                                + " cost;\n")
+                .append(
+                        "a large gap points at untimed work (allocation churn, codec"
+                                + " internals).\n");
+        return sb.toString();
+    }
+
+    private static void deleteTree(Path dir) throws IOException {
+        if (!Files.exists(dir)) {
+            return;
+        }
+        try (var paths = Files.walk(dir)) {
+            paths.sorted((a, b) -> b.getNameCount() - a.getNameCount())
+                    .forEach(
+                            p -> {
+                                try {
+                                    Files.deleteIfExists(p);
+                                } catch (IOException e) {
+                                    System.err.println(
+                                            "warn: could not delete " + p + ": " + e.getMessage());
+                                }
+                            });
+        }
+    }
+}
diff --git a/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/Jbig2Assembler.java b/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/Jbig2Assembler.java
index 7db4260..a7549e5 100644
--- a/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/Jbig2Assembler.java
+++ b/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/Jbig2Assembler.java
@@ -3,13 +3,12 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
 import org.jspecify.annotations.Nullable;
 
 /**
  * Packs a directory of cleaned bitonal pages into a lossless-JBIG2 PDF. The abstraction over the
  * {@code jbig2} encoder + PDFBox container; the implementation ({@code
- * infrastructure.pdf.PdfBoxJbig2Assembler}) runs the per-page encodes on the supplied pool.
+ * infrastructure.pdf.PdfBoxJbig2Assembler}) runs at most {@code jobs} per-page encodes at once.
  */
 public interface Jbig2Assembler {
 
@@ -20,7 +19,7 @@ public interface Jbig2Assembler {
      * @param outPdf the lossless-JBIG2 PDF to write
      * @param source a PDF whose Info dict, XMP and version are inherited, or {@code null} for none
      * @param forcedDpi a single DPI to size every page with, or empty to read each image's own
-     * @param pool the worker pool the per-page encodes run on
+     * @param jobs how many per-page encodes may run at once
      * @param scratchDir scratch directory for the intermediate per-page JBIG2 streams
      *     (caller-owned)
      * @throws IOException if the directory is empty, a tool fails, or the write fails
@@ -30,7 +29,7 @@ void assemble(
             Path outPdf,
             @Nullable Path source,
             OptionalInt forcedDpi,
-            ExecutorService pool,
+            int jobs,
             Path scratchDir)
             throws IOException;
 }
diff --git a/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/PdfImageExtractor.java b/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/PdfImageExtractor.java
index 77a4969..f80348b 100644
--- a/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/PdfImageExtractor.java
+++ b/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/PdfImageExtractor.java
@@ -2,13 +2,12 @@
 
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.concurrent.ExecutorService;
 
 /**
  * Extracts a PDF's embedded bitonal images, and reports its dominant scan resolution. The
  * abstraction over the {@code pdfimages}/{@code pdfinfo} drivers; the implementation ({@code
- * infrastructure.pdf.PdfImagesCliExtractor}) splits the page range across the supplied pool, one
- * {@code pdfimages} invocation per chunk.
+ * infrastructure.pdf.PdfImagesCliExtractor}) splits the page range into bounded chunks, one {@code
+ * pdfimages} invocation per chunk.
  */
 public interface PdfImageExtractor {
 
@@ -17,7 +16,7 @@ public interface PdfImageExtractor {
 
     /**
      * Extract all pages of {@code pdf} into {@code outDir} as TIFFs, parallelized over page-range
-     * chunks. {@code jobs} bounds both the chunk count and the pool slots used.
+     * chunks. at most {@code jobs} chunks run at once.
      */
-    void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throws IOException;
+    void extract(Path pdf, Path outDir, int jobs) throws IOException;
 }
diff --git a/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/package-info.java b/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/package-info.java
index 4c8bdf4..23f5510 100644
--- a/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/package-info.java
+++ b/despeckle/port/src/main/java/io/github/p4suta/despeckle/port/package-info.java
@@ -2,9 +2,9 @@
  * The despeckle ports: the narrow interfaces the application layer drives and the infrastructure
  * layer implements. Every signature is expressed purely in terms of {@code domain.model} value
  * types and JDK types ({@link java.nio.file.Path}, primitives, {@link java.util.OptionalInt},
- * {@link java.util.List}, {@link java.util.concurrent.ExecutorService}, {@link
- * java.io.IOException}); no Leptonica {@code Pix}, PDFBox or AWT type ever crosses a port boundary,
- * so the dependency is kept unidirectional and the adapters stay swappable.
+ * {@link java.util.List}, {@link java.io.IOException}); no Leptonica {@code Pix}, PDFBox or AWT
+ * type ever crosses a port boundary, so the dependency is kept unidirectional and the adapters stay
+ * swappable.
  */
 @NullMarked
 package io.github.p4suta.despeckle.port;
diff --git a/pipeline/README.md b/pipeline/README.md
index b8d47ef..12d15ef 100644
--- a/pipeline/README.md
+++ b/pipeline/README.md
@@ -33,6 +33,7 @@ never stops the rest; existing outputs are skipped unless `--force`).
 | `--pdf-a` | off | emit PDF/A-2b conformance |
 | `--force` | off | overwrite an existing output (batch: regenerate, don't skip) |
 | `--progress-file <path>` | — | write machine-readable JSONL progress events (single input only) |
+| `--timings` | off | print a per-stage wall-clock breakdown to stderr when each run ends |
 | `-i, --interactive` | off | guided mode: prompt for the input, options and output |
 | `-h, --help` | — | show help and exit |
 | `-V, --version` | — | print version and exit |
diff --git a/pipeline/app/build.gradle.kts b/pipeline/app/build.gradle.kts
index 9764cfe..5c2572d 100644
--- a/pipeline/app/build.gradle.kts
+++ b/pipeline/app/build.gradle.kts
@@ -34,6 +34,10 @@ dependencies {
     implementation(libs.commons.cli)
     implementation(libs.slf4j.api)
     runtimeOnly(libs.slf4j.simple)
+
+    // The benchmark fixture generator (test sources, never shipped — mirroring register's
+    // createSamplePdf) draws synthetic scan pages with PDFBox directly.
+    testImplementation(libs.pdfbox)
 }
 
 // The one place native access is granted to the launched app; run, test and JavaExec inherit it.
@@ -85,3 +89,46 @@ selfContainedApp {
     // jbig2 (its register stage writes TIFF-G4; the spread pack embeds CCITT G4).
     bundleQpdf(this, libs.versions.qpdf.get())
 }
+
+// ---- Stage-level benchmark (see pipeline/docs/perf-baseline.md) ---------------------------------
+
+// Deterministic synthetic scan book for the benchmark: an existing output is reused, so the
+// generation cost (a minute at 200 pages × 600 dpi) is paid once. Knob: -Ppages=N (default 200).
+tasks.register<JavaExec>("createSampleScan") {
+    group = "verification"
+    description = "Generate the synthetic bitonal scan book the benchmark converts (cached)"
+    dependsOn(tasks.named("testClasses"))
+    classpath = sourceSets["test"].runtimeClasspath
+    mainClass = "io.github.p4suta.pipeline.tools.SampleScanGenerator"
+    val pages = providers.gradleProperty("pages").getOrElse("200")
+    args = listOf("build/test-data/sample-scan-${pages}p.pdf", pages, "600")
+}
+
+// Stage-level runtime + memory benchmark (the pdfbook counterpart of tate's benchRuntime): runs the
+// installDist launcher in-container with --timings, parses the per-stage breakdown, samples peak
+// RSS from /proc, and writes pipeline/docs/perf-baseline.md. Knobs: -Pruns=N (warm runs, default
+// 3), -Pjobs=1,8 (comma-separated -j sweep; default auto = the launcher's CPU-count default),
+// -Ppages=N (fixture size, default 200), -Pinputs="a.pdf b.pdf" (real books instead of the
+// fixture; resolved against the repo root).
+tasks.register<JavaExec>("benchPipeline") {
+    group = "verification"
+    description = "Benchmark pdfbook stage timings + peak memory; writes pipeline/docs/perf-baseline.md"
+    dependsOn(tasks.named("installDist"), tasks.named("createSampleScan"))
+    classpath = sourceSets["test"].runtimeClasspath
+    mainClass = "io.github.p4suta.pipeline.tools.PipelineBenchmark"
+    workingDir = rootDir
+    val runs = providers.gradleProperty("runs").getOrElse("3")
+    val jobs = providers.gradleProperty("jobs").getOrElse("auto")
+    val pages = providers.gradleProperty("pages").getOrElse("200")
+    val extraInputs =
+        providers
+            .gradleProperty("inputs")
+            .orNull
+            ?.split(Regex("\\s+"))
+            ?.filter { it.isNotBlank() }
+            ?: emptyList()
+    val launcher = "pipeline/app/build/install/pdfbook/bin/pdfbook"
+    val inputs =
+        extraInputs.ifEmpty { listOf("pipeline/app/build/test-data/sample-scan-${pages}p.pdf") }
+    args = listOf(launcher, "qpdf", "pipeline/docs/perf-baseline.md", runs, jobs) + inputs
+}
diff --git a/pipeline/app/src/main/java/io/github/p4suta/pipeline/Main.java b/pipeline/app/src/main/java/io/github/p4suta/pipeline/Main.java
index 67b1db9..81ccc68 100644
--- a/pipeline/app/src/main/java/io/github/p4suta/pipeline/Main.java
+++ b/pipeline/app/src/main/java/io/github/p4suta/pipeline/Main.java
@@ -2,20 +2,62 @@
 
 import io.github.p4suta.pipeline.cli.PipelineCommand;
 import io.github.p4suta.shared.observability.FatalUncaughtHandler;
+import java.time.Duration;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 
 /** Process entry point for the unified pipeline tool ({@code pdfbook}). */
 public final class Main {
 
+    /**
+     * How long the shutdown hook waits for the interrupted run to unwind. Generous: the page
+     * fan-outs quiesce in at most one page's worth of native work per worker, and the extractor
+     * kills its children on interrupt.
+     */
+    private static final Duration SHUTDOWN_GRACE = Duration.ofSeconds(15);
+
     private Main() {}
 
     /**
-     * Installs the fatal uncaught-exception handler, runs the CLI, and translates the result into a
-     * process exit code. All argument parsing and terminal output live in {@link PipelineCommand}.
+     * Installs the fatal uncaught-exception handler and the Ctrl-C cleanup hook, runs the CLI, and
+     * translates the result into a process exit code. All argument parsing and terminal output live
+     * in {@link PipelineCommand}.
+     *
+     * <p>On Ctrl-C (or any normal-termination signal) the shutdown hook interrupts the main thread
+     * and waits — bounded by {@link #SHUTDOWN_GRACE} — for the run to unwind: the page fan-outs
+     * stop their workers and quiesce, subprocesses are killed, and the run's {@code finally} blocks
+     * delete the temp work area. A Ctrl-C therefore no longer leaks a {@code p4suta-pipeline-}
+     * directory full of intermediates. On a normal exit the latch is already down, so the hook
+     * returns immediately.
      *
      * @param args command-line arguments
      */
     public static void main(String[] args) {
         Thread.setDefaultUncaughtExceptionHandler(new FatalUncaughtHandler());
-        System.exit(new PipelineCommand().run(args));
+        CountDownLatch unwound = new CountDownLatch(1);
+        Thread main = Thread.currentThread();
+        Runtime.getRuntime()
+                .addShutdownHook(
+                        new Thread(
+                                () -> {
+                                    main.interrupt();
+                                    try {
+                                        boolean clean =
+                                                unwound.await(
+                                                        SHUTDOWN_GRACE.toMillis(),
+                                                        TimeUnit.MILLISECONDS);
+                                        if (!clean) {
+                                            System.err.println(
+                                                    "pdfbook: shutdown grace expired; temp files"
+                                                            + " may remain");
+                                        }
+                                    } catch (InterruptedException ignored) {
+                                        // The JVM is exiting anyway; stop waiting.
+                                    }
+                                },
+                                "pdfbook-shutdown"));
+        int code = new PipelineCommand().run(args);
+        unwound.countDown();
+        System.exit(code);
     }
 }
diff --git a/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java
index 99e453c..25f8125 100644
--- a/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java
+++ b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java
@@ -153,6 +153,13 @@ private static Options buildOptions() {
                                 "Write machine-readable JSONL progress events to this file (single"
                                         + " input only); used by front ends to report progress.")
                         .get());
+        options.addOption(
+                Option.builder()
+                        .longOpt("timings")
+                        .desc(
+                                "Print a per-stage wall-clock breakdown to stderr when each run"
+                                        + " ends.")
+                        .get());
         CliDocs.options(options);
         return options;
     }
@@ -351,6 +358,7 @@ record Plan(Path input, Path output, Config config) {}
                         deskew,
                         scale,
                         pdfA,
+                        false,
                         force);
         return new Plan(input, output, config);
     }
@@ -380,14 +388,32 @@ private static String defaultOutput(Path input) {
     private static void runOne(Path input, Path output, Config config, @Nullable Path progressFile)
             throws IOException {
         if (progressFile == null) {
-            runWith(input, output, config, ProgressSink.NO_OP);
+            runWith(input, output, config, withTimings(config, ProgressSink.NO_OP));
         } else {
             try (JsonlFileProgressSink progress = new JsonlFileProgressSink(progressFile)) {
-                runWith(input, output, config, progress);
+                runWith(input, output, config, withTimings(config, progress));
             }
         }
     }
 
+    /**
+     * Wraps {@code sink} with a fresh {@link StageTimingSink} when {@code --timings} is set, so
+     * each run (every book of a batch separately) prints its own per-stage breakdown to stderr.
+     */
+    private static ProgressSink withTimings(Config config, ProgressSink sink) {
+        if (!config.timings()) {
+            return sink;
+        }
+        StageTimingSink timings = new StageTimingSink(System.err);
+        if (sink == ProgressSink.NO_OP) {
+            return timings;
+        }
+        return event -> {
+            sink.emit(event);
+            timings.emit(event);
+        };
+    }
+
     // Resolves the progress sink first so the stages and sink report page-level PageProcessed
     // events into the same sink PipelineRunner reports stage boundaries into. With no
     // --progress-file the sink is NO_OP and every emit is a no-op.
@@ -401,9 +427,11 @@ private static void runWith(Path input, Path output, Config config, ProgressSink
             stages.add(new RegisterStage(config.jobs(), config.deskew(), config.scale(), progress));
         }
         if (stages.isEmpty()) {
-            // --no-despeckle --no-register: the raw pdfimages TIFFs are not CCITT G4, which the
-            // spread sink's pass-through embedding requires; despeckle/register each re-encode G4
-            // themselves, so only the no-stage path needs this normalization.
+            // --no-despeckle --no-register: a non-CCITT source extracts as decoded TIFFs that are
+            // not the single-strip CCITT G4 the spread sink's pass-through embedding requires;
+            // despeckle/register each re-encode G4 themselves, so only the no-stage path needs
+            // this normalization (an all-CCITT source arrives already G4 — then this is a cheap
+            // lossless re-encode that keeps the path uniform).
             stages.add(new G4EncodeStage(config.jobs(), progress));
         }
         Source source = new PdfExtractSource(input, config.jobs());
@@ -447,6 +475,7 @@ private static Config parseConfig(CommandLine cmd) throws ParseException {
                 !cmd.hasOption("no-deskew"),
                 !cmd.hasOption("no-scale"),
                 cmd.hasOption("pdf-a"),
+                cmd.hasOption("timings"),
                 cmd.hasOption("force"));
     }
 
@@ -475,5 +504,6 @@ record Config(
             boolean deskew,
             boolean scale,
             boolean pdfA,
+            boolean timings,
             boolean force) {}
 }
diff --git a/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/StageTimingSink.java b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/StageTimingSink.java
new file mode 100644
index 0000000..496b0e4
--- /dev/null
+++ b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/StageTimingSink.java
@@ -0,0 +1,93 @@
+package io.github.p4suta.pipeline.cli;
+
+import io.github.p4suta.shared.progress.ProgressEvent;
+import io.github.p4suta.shared.progress.ProgressSink;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Measures each stage's wall clock from its {@link ProgressEvent.StageStarted}/{@link
+ * ProgressEvent.StageCompleted} boundaries and prints a per-stage breakdown when the run ends — the
+ * {@code --timings} flag's implementation. One line per stage in completion order, then the
+ * run-wide total:
+ *
+ * <pre>{@code
+ * timing: extract = 4.21s (18.3%)
+ * timing: despeckle = 9.87s (42.9%)
+ * timing: total = 23.01s
+ * }</pre>
+ *
+ * <p>The {@code timing: <stage> = <seconds>s} shape is a stable contract the {@code benchPipeline}
+ * harness parses; keep it machine-readable. A stage still open when the run fails is reported with
+ * its elapsed-so-far, so a failed run still shows where the time went. Thread-safe like every
+ * {@link ProgressSink}: events are handled under one lock.
+ */
+final class StageTimingSink implements ProgressSink {
+
+    private final PrintStream out;
+    private final Object lock = new Object();
+    private final List<String> stages = new ArrayList<>();
+    private final List<Long> stageNanos = new ArrayList<>();
+    private @Nullable String openStage;
+    private long openedAtNanos;
+    private long runStartedAtNanos;
+    private boolean runStarted;
+
+    StageTimingSink(PrintStream out) {
+        this.out = out;
+    }
+
+    @Override
+    public void emit(ProgressEvent event) {
+        synchronized (lock) {
+            switch (event) {
+                case ProgressEvent.RunStarted ignored -> markRunStarted();
+                case ProgressEvent.StageStarted s -> {
+                    // Defensive: a sink wired mid-run still measures from the first boundary.
+                    markRunStarted();
+                    openStage = s.stage();
+                    openedAtNanos = System.nanoTime();
+                }
+                case ProgressEvent.StageCompleted ignored -> closeOpenStage();
+                case ProgressEvent.PageProcessed ignored -> {
+                    // Stage boundaries carry all the timing information.
+                }
+                case ProgressEvent.RunCompleted ignored -> report();
+                case ProgressEvent.RunFailed ignored -> report();
+            }
+        }
+    }
+
+    private void markRunStarted() {
+        if (!runStarted) {
+            runStartedAtNanos = System.nanoTime();
+            runStarted = true;
+        }
+    }
+
+    private void closeOpenStage() {
+        @Nullable String stage = openStage;
+        if (stage != null) {
+            stages.add(stage);
+            stageNanos.add(System.nanoTime() - openedAtNanos);
+            openStage = null;
+        }
+    }
+
+    private void report() {
+        closeOpenStage();
+        long totalNanos = runStarted ? System.nanoTime() - runStartedAtNanos : 0;
+        for (int i = 0; i < stages.size(); i++) {
+            out.printf(
+                    Locale.ROOT,
+                    "timing: %s = %.2fs (%.1f%%)%n",
+                    stages.get(i),
+                    stageNanos.get(i) / 1e9,
+                    totalNanos > 0 ? stageNanos.get(i) * 100.0 / totalNanos : 0.0);
+        }
+        out.printf(Locale.ROOT, "timing: total = %.2fs%n", totalNanos / 1e9);
+    }
+}
diff --git a/pipeline/app/src/test/java/io/github/p4suta/pipeline/cli/StageTimingSinkTest.java b/pipeline/app/src/test/java/io/github/p4suta/pipeline/cli/StageTimingSinkTest.java
new file mode 100644
index 0000000..3fbb3f7
--- /dev/null
+++ b/pipeline/app/src/test/java/io/github/p4suta/pipeline/cli/StageTimingSinkTest.java
@@ -0,0 +1,75 @@
+package io.github.p4suta.pipeline.cli;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import io.github.p4suta.shared.progress.ProgressEvent;
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Pins the {@code --timings} report: one machine-parseable {@code timing: <stage> = <seconds>s}
+ * line per completed stage (in completion order, percentages attached) plus a {@code timing: total}
+ * line, printed only when the run ends — and on failure, the still-open stage is reported with its
+ * elapsed-so-far. The line shape is the contract the {@code benchPipeline} harness parses.
+ */
+final class StageTimingSinkTest {
+
+    private final ByteArrayOutputStream buf = new ByteArrayOutputStream();
+    private final StageTimingSink sink =
+            new StageTimingSink(new PrintStream(buf, true, StandardCharsets.UTF_8));
+
+    private String output() {
+        return buf.toString(StandardCharsets.UTF_8);
+    }
+
+    @Test
+    void completedRunReportsEachStageInOrderAndATotal() {
+        sink.emit(new ProgressEvent.RunStarted(2));
+        sink.emit(new ProgressEvent.StageStarted("extract", 0, 2));
+        sink.emit(new ProgressEvent.PageProcessed("extract", 1, 2));
+        sink.emit(new ProgressEvent.StageCompleted("extract"));
+        sink.emit(new ProgressEvent.StageStarted("spread", 1, 2));
+        sink.emit(new ProgressEvent.StageCompleted("spread"));
+        sink.emit(new ProgressEvent.RunCompleted());
+
+        assertThat(output().lines())
+                .hasSize(3)
+                .satisfiesExactly(
+                        extract ->
+                                assertThat(extract)
+                                        .matches(
+                                                "timing: extract = \\d+\\.\\d{2}s"
+                                                        + " \\(\\d+\\.\\d%\\)"),
+                        spread ->
+                                assertThat(spread)
+                                        .matches(
+                                                "timing: spread = \\d+\\.\\d{2}s"
+                                                        + " \\(\\d+\\.\\d%\\)"),
+                        total -> assertThat(total).matches("timing: total = \\d+\\.\\d{2}s"));
+    }
+
+    @Test
+    void nothingIsPrintedBeforeTheRunEnds() {
+        sink.emit(new ProgressEvent.RunStarted(1));
+        sink.emit(new ProgressEvent.StageStarted("extract", 0, 1));
+        sink.emit(new ProgressEvent.StageCompleted("extract"));
+
+        assertThat(output()).isEmpty();
+    }
+
+    @Test
+    void failedRunReportsTheStillOpenStage() {
+        sink.emit(new ProgressEvent.RunStarted(2));
+        sink.emit(new ProgressEvent.StageStarted("extract", 0, 2));
+        sink.emit(new ProgressEvent.StageCompleted("extract"));
+        sink.emit(new ProgressEvent.StageStarted("register", 1, 2));
+        sink.emit(new ProgressEvent.RunFailed("INTERNAL", "boom"));
+
+        assertThat(output())
+                .contains("timing: extract = ")
+                .contains("timing: register = ")
+                .contains("timing: total = ");
+    }
+}
diff --git a/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/PipelineBenchmark.java b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/PipelineBenchmark.java
new file mode 100644
index 0000000..1b59397
--- /dev/null
+++ b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/PipelineBenchmark.java
@@ -0,0 +1,498 @@
+package io.github.p4suta.pipeline.tools;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Stage-level runtime + memory benchmark for the installed {@code pdfbook} launcher — the pdfbook
+ * counterpart of tate's {@code RuntimeBenchmark}, with per-stage attribution as the addition.
+ *
+ * <p>Drives {@code pdfbook <in> -o <tmp>/out.pdf --force --timings [-j N]} as a child process,
+ * measuring end-to-end wall around the process ({@link System#nanoTime()}), peak RSS by sampling
+ * the child's {@code /proc/<pid>/status} {@code VmHWM} (Linux-only; {@code n/a} elsewhere), and the
+ * per-stage wall by parsing the stable {@code timing: <stage> = <seconds>s} lines {@code
+ * StageTimingSink} prints. Writes a Markdown report.
+ *
+ * <p>Test-sources tool (driven by the {@code benchPipeline} Gradle task): it never ships in the
+ * production launcher, and it expects the dev container's native toolchain (pdfimages, Leptonica,
+ * qpdf) on PATH — the installDist launcher, not the jpackage image, is what it measures.
+ *
+ * <p>Usage: {@code PipelineBenchmark <launcher> <qpdf> <outDoc> <runs> <jobsCsv|auto>
+ * <input.pdf>...} — {@code jobsCsv} is a comma-separated {@code -j} sweep ({@code auto} = omit
+ * {@code -j}, i.e. the launcher's CPU-count default).
+ */
+public final class PipelineBenchmark {
+
+    private static final Pattern TIMING =
+            Pattern.compile("^timing: (\\S+) = ([0-9.]+)s", Pattern.MULTILINE);
+    private static final Pattern VM_HWM = Pattern.compile("VmHWM:\\s*([0-9]+)");
+    private static final long POLL_MILLIS = 5;
+    private static final long PROCESS_TIMEOUT_NANOS = TimeUnit.MINUTES.toNanos(30);
+    private static final long MIB = 1024L * 1024L;
+
+    private final Path launcher;
+    private final String qpdf;
+    private final Path outDoc;
+    private final int runs;
+    private final List<String> jobsSweep;
+
+    private PipelineBenchmark(
+            Path launcher, String qpdf, Path outDoc, int runs, List<String> jobsSweep) {
+        this.launcher = launcher;
+        this.qpdf = qpdf;
+        this.outDoc = outDoc;
+        this.runs = runs;
+        this.jobsSweep = jobsSweep;
+    }
+
+    public static void main(String[] args) throws IOException, InterruptedException {
+        if (args.length < 6) {
+            System.err.println(
+                    "usage: PipelineBenchmark <launcher> <qpdf> <outDoc> <runs> <jobsCsv|auto>"
+                            + " <input.pdf>...");
+            System.exit(2);
+            return;
+        }
+        var benchmark =
+                new PipelineBenchmark(
+                        Path.of(args[0]),
+                        args[1],
+                        Path.of(args[2]),
+                        Integer.parseInt(args[3]),
+                        Arrays.stream(args[4].split(",")).map(String::trim).toList());
+        List<Path> inputs = Arrays.stream(args).skip(5).map(Path::of).toList();
+        benchmark.run(inputs);
+    }
+
+    // Result records
+
+    /** One measured child run: wall seconds, peak RSS (KiB, -1 if unavailable), merged output. */
+    private record Timed(double elapsedSeconds, long maxRssKib, String output) {}
+
+    /** A finished input × jobs measurement, ready to render. */
+    private record Row(
+            String name,
+            String jobs,
+            int pages,
+            long inputBytes,
+            double wallMedian,
+            double coldWall,
+            Map<String, Double> stageMedians,
+            long rssMedianKib,
+            long outputBytes) {}
+
+    // Orchestration
+
+    private void run(List<Path> inputs) throws IOException, InterruptedException {
+        requireExecutable(launcher, "pdfbook launcher", "build it first:  just pdfbook-install");
+
+        List<Row> rows = new ArrayList<>();
+        for (Path input : inputs) {
+            if (!Files.isRegularFile(input)) {
+                System.err.println("skip (not found): " + input);
+                continue;
+            }
+            for (String jobs : jobsSweep) {
+                rows.add(measure(input, jobs));
+            }
+        }
+
+        String report = render(rows);
+        Files.createDirectories(requireParent(outDoc));
+        Files.writeString(outDoc, report, StandardCharsets.UTF_8);
+        System.out.print(report);
+        System.err.println();
+        System.err.println("→ wrote " + outDoc);
+    }
+
+    private Row measure(Path input, String jobs) throws IOException, InterruptedException {
+        int pages = pageCount(input);
+        long inputBytes = Files.size(input);
+        System.err.printf(
+                Locale.ROOT,
+                "Measuring: %s (%dp, %s MiB, jobs=%s)…%n",
+                fileName(input),
+                pages,
+                mib(inputBytes),
+                jobs);
+
+        Path work = Files.createTempDirectory("pdfbook-bench");
+        try {
+            Path out = work.resolve("out.pdf");
+            List<String> convert = new ArrayList<>();
+            convert.add(launcher.toString());
+            convert.add(input.toString());
+            convert.add("-o");
+            convert.add(out.toString());
+            convert.add("--force");
+            convert.add("--timings");
+            if (!"auto".equals(jobs)) {
+                convert.add("-j");
+                convert.add(jobs);
+            }
+
+            // Cold run (fresh page cache for the input is not guaranteed, but a fresh JVM is) —
+            // recorded separately from the warm median.
+            Timed cold = timed(convert);
+
+            double[] walls = new double[runs];
+            long[] rsss = new long[runs];
+            Map<String, List<Double>> stages = new LinkedHashMap<>();
+            for (int r = 0; r < runs; r++) {
+                Timed t = timed(convert);
+                walls[r] = t.elapsedSeconds();
+                rsss[r] = t.maxRssKib();
+                parseTimings(t.output())
+                        .forEach(
+                                (stage, seconds) ->
+                                        stages.computeIfAbsent(stage, ignored -> new ArrayList<>())
+                                                .add(seconds));
+            }
+            Map<String, Double> stageMedians = new LinkedHashMap<>();
+            stages.forEach((stage, seconds) -> stageMedians.put(stage, median(seconds)));
+
+            long outputBytes = Files.isRegularFile(out) ? Files.size(out) : -1;
+            return new Row(
+                    fileName(input),
+                    jobs,
+                    pages,
+                    inputBytes,
+                    median(walls),
+                    cold.elapsedSeconds(),
+                    stageMedians,
+                    medianLong(rsss),
+                    outputBytes);
+        } finally {
+            deleteTree(work);
+        }
+    }
+
+    /**
+     * The per-stage seconds of one run, keyed by stage label in print order ({@code total}
+     * included). Repeated labels (a batch run) sum, though the harness always converts one book.
+     */
+    private static Map<String, Double> parseTimings(String output) {
+        Map<String, Double> timings = new LinkedHashMap<>();
+        Matcher m = TIMING.matcher(output);
+        while (m.find()) {
+            timings.merge(m.group(1), Double.parseDouble(m.group(2)), Double::sum);
+        }
+        return timings;
+    }
+
+    // Subprocess measurement
+
+    /**
+     * Runs {@code command}, returning its wall time, peak RSS (sampled from {@code
+     * /proc/<pid>/status} {@code VmHWM}; -1 where unavailable), and its merged stdout+stderr (so
+     * the {@code timing:} lines can be parsed). Output is drained on a separate thread so a chatty
+     * child cannot deadlock on a full pipe.
+     */
+    private static Timed timed(List<String> command) throws IOException, InterruptedException {
+        long start = System.nanoTime();
+        Process process = new ProcessBuilder(command).redirectErrorStream(true).start();
+
+        var captured = new AtomicReference<>("");
+        Thread drainer =
+                Thread.ofVirtual()
+                        .start(
+                                () -> {
+                                    try (var in = process.getInputStream()) {
+                                        captured.set(
+                                                new String(
+                                                        in.readAllBytes(), StandardCharsets.UTF_8));
+                                    } catch (IOException ignored) {
+                                        // Process gone; whatever was read is lost — acceptable for
+                                        // a benchmark.
+                                    }
+                                });
+
+        Path status = Path.of("/proc", Long.toString(process.pid()), "status");
+        long peakRssKib = -1;
+        while (process.isAlive()) {
+            if (System.nanoTime() - start > PROCESS_TIMEOUT_NANOS) {
+                process.destroyForcibly();
+                throw new IOException("timed command did not finish: " + command);
+            }
+            peakRssKib = Math.max(peakRssKib, readVmHwmKib(status));
+            Thread.sleep(POLL_MILLIS);
+        }
+        double elapsed = (System.nanoTime() - start) / 1.0e9;
+        int exit = process.waitFor();
+        drainer.join();
+        if (exit != 0) {
+            throw new IOException(
+                    "benchmark child failed with exit " + exit + ": " + captured.get());
+        }
+        return new Timed(elapsed, peakRssKib, captured.get());
+    }
+
+    /** Peak RSS (KiB) from {@code /proc/<pid>/status}, or -1 if unreadable / non-Linux. */
+    private static long readVmHwmKib(Path status) {
+        try {
+            Matcher m = VM_HWM.matcher(Files.readString(status, StandardCharsets.UTF_8));
+            return m.find() ? Long.parseLong(m.group(1)) : -1;
+        } catch (IOException | RuntimeException e) {
+            return -1; // process already exited, or /proc not present
+        }
+    }
+
+    /** Page count via {@code qpdf --show-npages} (PATH or absolute), or -1 when unavailable. */
+    private int pageCount(Path pdf) throws InterruptedException {
+        try {
+            Process process =
+                    new ProcessBuilder(qpdf, "--show-npages", pdf.toString())
+                            .redirectErrorStream(true)
+                            .start();
+            String output;
+            try (var in = process.getInputStream()) {
+                output = new String(in.readAllBytes(), StandardCharsets.UTF_8);
+            }
+            if (!process.waitFor(1, TimeUnit.MINUTES)) {
+                process.destroyForcibly();
+                return -1;
+            }
+            // The count is the one digits-only line; qpdf may surround it with warning lines.
+            return output.lines()
+                    .map(String::strip)
+                    .filter(line -> line.matches("\\d+"))
+                    .findFirst()
+                    .map(Integer::parseInt)
+                    .orElse(-1);
+        } catch (IOException e) {
+            return -1; // qpdf not installed — page count is cosmetic here
+        }
+    }
+
+    // Numeric helpers
+
+    private static double median(double[] values) {
+        double[] sorted = values.clone();
+        Arrays.sort(sorted);
+        int n = sorted.length;
+        if (n == 0) {
+            return 0;
+        }
+        return (n % 2 == 1) ? sorted[n / 2] : (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0;
+    }
+
+    private static double median(List<Double> values) {
+        return median(values.stream().mapToDouble(Double::doubleValue).toArray());
+    }
+
+    private static long medianLong(long[] values) {
+        return Math.round(median(Arrays.stream(values).asDoubleStream().toArray()));
+    }
+
+    private static String mib(long bytes) {
+        return bytes < 0 ? "n/a" : String.format(Locale.ROOT, "%.1f", bytes / (double) MIB);
+    }
+
+    // Rendering
+
+    private String render(List<Row> rows) {
+        // Stage columns: the union of stage labels across rows in first-appearance order, with
+        // "total" (the launcher's in-process conversion time) pulled out as its own column.
+        Set<String> stageNames = new LinkedHashSet<>();
+        for (Row row : rows) {
+            stageNames.addAll(row.stageMedians().keySet());
+        }
+        stageNames.remove("total");
+
+        var sb = new StringBuilder();
+        sb.append("# pdfbook runtime baseline (stage-level)\n\n")
+                .append("Generated by `PipelineBenchmark`")
+                .append(" (`./gradlew :pipeline:app:benchPipeline`, in the dev container).\n")
+                .append(
+                        "Tracks the **per-stage wall-clock breakdown, end-to-end runtime and peak"
+                                + " memory**\n")
+                .append(
+                        "of the installDist `pdfbook` launcher. Re-run after any change to the"
+                                + " pipeline\n")
+                .append(
+                        "and compare against the previous run before merging (acceptance: ≥5%"
+                                + " median\n")
+                .append(
+                        "total-wall improvement, or an explicit RSS/disk win, with output"
+                                + " validated).\n\n");
+        appendHostInfo(sb);
+        sb.append("\n## Stage breakdown (warm median of ").append(runs).append(" runs)\n\n");
+        sb.append(
+                        "`conv` is the launcher's in-process total (`timing: total`);"
+                                + " `startup+init` = E2E wall − conv\n")
+                .append(
+                        "(JVM boot + first-touch PDFBox/AWT init). `jobs=auto` is the launcher's"
+                                + " CPU-count default.\n\n");
+        sb.append("| Input | Jobs | Pages | E2E wall | conv |");
+        for (String stage : stageNames) {
+            sb.append(' ').append(stage).append(" |");
+        }
+        sb.append(" startup+init | Cold wall | Peak RSS (MiB) | Output (MiB) |\n");
+        sb.append("|---|---|---:|---:|---:|");
+        sb.append("---:|".repeat(stageNames.size()));
+        sb.append("---:|---:|---:|---:|\n");
+        for (Row row : rows) {
+            double conv = row.stageMedians().getOrDefault("total", 0.0);
+            sb.append("| ")
+                    .append(row.name())
+                    .append(" | ")
+                    .append(row.jobs())
+                    .append(" | ")
+                    .append(pages(row.pages()))
+                    .append(" | ")
+                    .append(secs(row.wallMedian()))
+                    .append(" | ")
+                    .append(conv > 0 ? secs(conv) : "n/a")
+                    .append(" |");
+            for (String stage : stageNames) {
+                Double seconds = row.stageMedians().get(stage);
+                sb.append(' ').append(seconds == null ? "n/a" : secs(seconds)).append(" |");
+            }
+            sb.append(' ')
+                    .append(conv > 0 ? secs(Math.max(0, row.wallMedian() - conv)) : "n/a")
+                    .append(" | ")
+                    .append(secs(row.coldWall()))
+                    .append(" | ")
+                    .append(rssMib(row.rssMedianKib()))
+                    .append(" | ")
+                    .append(mib(row.outputBytes()))
+                    .append(" |\n");
+        }
+        sb.append("\n## Stage shares (of conv, warm median)\n\n")
+                .append(
+                        "The shares that decide where optimization effort goes: a stage that is"
+                                + " ~5% of conv\n")
+                .append("cannot pay for a parallelization rewrite no matter how elegant.\n\n");
+        sb.append("| Input | Jobs |");
+        for (String stage : stageNames) {
+            sb.append(' ').append(stage).append(" |");
+        }
+        sb.append('\n').append("|---|---|").append("---:|".repeat(stageNames.size())).append('\n');
+        for (Row row : rows) {
+            double conv = row.stageMedians().getOrDefault("total", 0.0);
+            sb.append("| ").append(row.name()).append(" | ").append(row.jobs()).append(" |");
+            for (String stage : stageNames) {
+                Double seconds = row.stageMedians().get(stage);
+                sb.append(' ')
+                        .append(
+                                seconds == null || conv <= 0
+                                        ? "n/a"
+                                        : String.format(
+                                                Locale.ROOT, "%.1f%%", seconds * 100.0 / conv))
+                        .append(" |");
+            }
+            sb.append('\n');
+        }
+        return sb.toString();
+    }
+
+    private void appendHostInfo(StringBuilder sb) {
+        String date =
+                DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss", Locale.ROOT)
+                        .withZone(ZoneOffset.UTC)
+                        .format(Instant.now());
+        long totalRamBytes = totalPhysicalMemoryBytes();
+        sb.append("- Date (UTC): ").append(date).append('\n');
+        sb.append("- Host: ")
+                .append(System.getProperty("os.name", "?"))
+                .append(' ')
+                .append(System.getProperty("os.version", "?"))
+                .append(' ')
+                .append(System.getProperty("os.arch", "?"))
+                .append(", ")
+                .append(Runtime.getRuntime().availableProcessors())
+                .append(" CPUs, RAM ")
+                .append(totalRamBytes > 0 ? Math.round(totalRamBytes / 1.073741824e9) + "Gi" : "?")
+                .append('\n');
+        sb.append("- Launcher: `").append(launcher).append("`\n");
+        sb.append("- Samples per measurement: cold (1st run) + warm median of ")
+                .append(runs)
+                .append(".\n");
+        sb.append(
+                        "- The default input is the deterministic synthetic fixture"
+                                + " (`createSampleScan`,\n")
+                .append(
+                        "  seeded, so identical across machines). Real books are pluggable via"
+                                + " `-Pinputs=\"…\"`;\n")
+                .append("  only their page count and byte size are reported.\n");
+    }
+
+    private static long totalPhysicalMemoryBytes() {
+        if (ManagementFactory.getOperatingSystemMXBean()
+                instanceof com.sun.management.OperatingSystemMXBean os) {
+            return os.getTotalMemorySize();
+        }
+        return -1;
+    }
+
+    private static String secs(double seconds) {
+        return String.format(Locale.ROOT, "%.2fs", seconds);
+    }
+
+    private static String pages(int pages) {
+        return pages < 0 ? "?" : Integer.toString(pages);
+    }
+
+    private static String rssMib(long rssKib) {
+        return rssKib < 0 ? "n/a" : Long.toString(Math.round(rssKib / 1024.0));
+    }
+
+    // Small utilities
+
+    private static String fileName(Path path) {
+        Path name = path.getFileName();
+        return name != null ? name.toString() : path.toString();
+    }
+
+    private static void requireExecutable(Path path, String what, String hint) {
+        if (!Files.isExecutable(path)) {
+            System.err.println("error: " + what + " not found at " + path);
+            System.err.println("       " + hint);
+            System.exit(1);
+        }
+    }
+
+    private static Path requireParent(Path path) {
+        Path parent = path.getParent();
+        if (parent == null) {
+            throw new IllegalArgumentException("output path has no parent: " + path);
+        }
+        return parent;
+    }
+
+    private static void deleteTree(Path dir) throws IOException {
+        if (!Files.exists(dir)) {
+            return;
+        }
+        try (var paths = Files.walk(dir)) {
+            paths.sorted((a, b) -> b.getNameCount() - a.getNameCount())
+                    .forEach(
+                            p -> {
+                                try {
+                                    Files.deleteIfExists(p);
+                                } catch (IOException e) {
+                                    System.err.println(
+                                            "warn: could not delete " + p + ": " + e.getMessage());
+                                }
+                            });
+        }
+    }
+}
diff --git a/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/SampleScanGenerator.java b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/SampleScanGenerator.java
new file mode 100644
index 0000000..8b111a7
--- /dev/null
+++ b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/SampleScanGenerator.java
@@ -0,0 +1,135 @@
+package io.github.p4suta.pipeline.tools;
+
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Locale;
+import java.util.Random;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+
+/**
+ * Generates the synthetic, copyright-free bitonal "scan" book the {@code benchPipeline} harness
+ * converts: A5 pages at the requested dpi carrying vertical text-like columns with per-page
+ * position jitter, a small per-page skew of up to ±0.5° (so the register stage's deskew has real
+ * work) and salt-and-pepper specks (so despeckle has real work), embedded as CCITT G4 so {@code
+ * pdfimages} extracts them exactly like a real scan. A fixed seed keeps every generation
+ * byte-identical, so benchmark runs stay comparable across machines and branches.
+ *
+ * <p>This deliberately lives in test sources (driven by the {@code createSampleScan} Gradle task),
+ * mirroring register's {@code SamplePdfGenerator}: the dev tool never ships in the production
+ * launcher. An existing output is reused, so repeated benchmark runs skip the generation cost.
+ *
+ * <p>Usage: {@code SampleScanGenerator <output.pdf> [pages] [dpi]}
+ */
+public final class SampleScanGenerator {
+
+    private SampleScanGenerator() {}
+
+    /** {@code SampleScanGenerator <output.pdf> [pages] [dpi]} — writes the synthetic scan book. */
+    public static void main(String[] args) throws IOException {
+        Path out = Path.of(args.length > 0 ? args[0] : "sample-scan.pdf");
+        int pages = args.length > 1 ? Integer.parseInt(args[1]) : 200;
+        int dpi = args.length > 2 ? Integer.parseInt(args[2]) : 600;
+        if (Files.isRegularFile(out)) {
+            System.out.println("reusing existing " + out + " (delete it to regenerate)");
+            return;
+        }
+        long start = System.nanoTime();
+        write(out, pages, dpi);
+        System.out.printf(
+                Locale.ROOT,
+                "wrote %s: %d page(s) at %d dpi, %.1f MiB in %.1fs%n",
+                out,
+                pages,
+                dpi,
+                Files.size(out) / (1024.0 * 1024.0),
+                (System.nanoTime() - start) / 1e9);
+    }
+
+    /** Writes a {@code pages}-page synthetic bitonal scan book to {@code out} (A5 geometry). */
+    public static void write(Path out, int pages, int dpi) throws IOException {
+        int width = Math.round(148f * dpi / 25.4f); // A5 portrait: 148 mm × 210 mm
+        int height = Math.round(210f * dpi / 25.4f);
+        Random random = new Random(42);
+        Path parent = out.toAbsolutePath().getParent();
+        if (parent != null) {
+            Files.createDirectories(parent);
+        }
+        try (PDDocument doc = new PDDocument()) {
+            float widthPt = width * 72f / dpi;
+            float heightPt = height * 72f / dpi;
+            for (int i = 0; i < pages; i++) {
+                PDImageXObject image =
+                        CCITTFactory.createFromImage(doc, page(width, height, random));
+                PDPage page = new PDPage(new PDRectangle(widthPt, heightPt));
+                doc.addPage(page);
+                try (PDPageContentStream content = new PDPageContentStream(doc, page)) {
+                    content.drawImage(image, 0, 0, widthPt, heightPt);
+                }
+            }
+            doc.save(out.toFile());
+        }
+    }
+
+    /** One page: slightly skewed text-like columns plus unrotated scanner-dust specks. */
+    private static BufferedImage page(int width, int height, Random random) {
+        BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);
+        Graphics2D g = img.createGraphics();
+        try {
+            g.setColor(Color.WHITE);
+            g.fillRect(0, 0, width, height);
+            g.setColor(Color.BLACK);
+            double skew = Math.toRadians(random.nextDouble() - 0.5); // ±0.5°
+            g.rotate(skew, width / 2.0, height / 2.0);
+            drawColumns(g, width, height, random);
+            g.rotate(-skew, width / 2.0, height / 2.0);
+            drawSpecks(g, width, height, random);
+        } finally {
+            g.dispose();
+        }
+        return img;
+    }
+
+    /**
+     * Vertical "text" columns right-to-left (Japanese book layout): stacked glyph-sized blocks with
+     * per-page jitter so registration has a real column position to detect and correct, and random
+     * early line breaks so the texture resembles prose rather than a solid block.
+     */
+    private static void drawColumns(Graphics2D g, int width, int height, Random random) {
+        int margin = width / 10;
+        int glyph = Math.max(4, width / 60);
+        int leading = glyph / 2;
+        int jitterX = random.nextInt(glyph + 1) - glyph / 2;
+        int top = height / 12 + random.nextInt(glyph + 1);
+        int bottom = height - height / 12;
+        for (int x = width - margin - glyph + jitterX; x >= margin; x -= glyph + leading) {
+            int y = top;
+            while (y + glyph <= bottom) {
+                // ~8% of glyph slots end the "sentence" early, leaving prose-like white runs.
+                if (random.nextInt(100) < 8) {
+                    y += glyph * (2 + random.nextInt(4));
+                    continue;
+                }
+                g.fillRect(x, y, glyph - 2, glyph - 2);
+                y += glyph;
+            }
+        }
+    }
+
+    /** Salt-and-pepper dust: ~1 speck of 1–3 px per 25k pixels, what despeckle exists to remove. */
+    private static void drawSpecks(Graphics2D g, int width, int height, Random random) {
+        int specks = width * height / 25_000;
+        for (int i = 0; i < specks; i++) {
+            int size = 1 + random.nextInt(3);
+            g.fillRect(random.nextInt(width - size), random.nextInt(height - size), size, size);
+        }
+    }
+}
diff --git a/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java b/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java
index 0810505..8ae99b8 100644
--- a/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java
+++ b/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java
@@ -70,7 +70,11 @@ public void run(
 
             progress.emit(new ProgressEvent.StageStarted(source.name(), position, total));
             Corpus corpus = source.open(stageDir(work, 0, source.name()));
-            log.info("source: {} page(s) at {} dpi", corpus.pageCount(), corpus.dpi());
+            log.info(
+                    "source: {} page(s) at {} dpi, {}",
+                    corpus.pageCount(),
+                    corpus.dpi(),
+                    intermediatesSize(corpus.dir()));
             progress.emit(new ProgressEvent.StageCompleted(source.name()));
             position++;
 
@@ -78,7 +82,12 @@ public void run(
             for (Stage stage : stages) {
                 progress.emit(new ProgressEvent.StageStarted(stage.name(), position, total));
                 corpus = stage.apply(corpus, stageDir(work, dirIndex, stage.name()));
-                log.info("stage {} ({}): {} page(s)", dirIndex, stage.name(), corpus.pageCount());
+                log.info(
+                        "stage {} ({}): {} page(s), {}",
+                        dirIndex,
+                        stage.name(),
+                        corpus.pageCount(),
+                        intermediatesSize(corpus.dir()));
                 progress.emit(new ProgressEvent.StageCompleted(stage.name()));
                 position++;
                 dirIndex++;
@@ -111,6 +120,28 @@ private static Path stageDir(Path work, int index, String name) throws IOExcepti
                 work.resolve(String.format(Locale.ROOT, "%02d-%s", index, name)));
     }
 
+    /**
+     * The stage directory's total file bytes rendered as MiB — visibility into how much
+     * intermediate I/O each stage produces (best-effort: {@code ?} when the walk fails).
+     */
+    private static String intermediatesSize(Path dir) {
+        try (Stream<Path> files = Files.walk(dir)) {
+            long bytes =
+                    files.filter(Files::isRegularFile).mapToLong(PipelineRunner::sizeQuietly).sum();
+            return String.format(Locale.ROOT, "%.1f MiB", bytes / (1024.0 * 1024.0));
+        } catch (IOException e) {
+            return "? MiB";
+        }
+    }
+
+    private static long sizeQuietly(Path file) {
+        try {
+            return Files.size(file);
+        } catch (IOException e) {
+            return 0L;
+        }
+    }
+
     private static void deleteRecursively(Path dir) {
         try (Stream<Path> walk = Files.walk(dir)) {
             walk.sorted(Comparator.reverseOrder()).forEach(PipelineRunner::deleteQuietly);
diff --git a/pipeline/docs/perf-baseline.md b/pipeline/docs/perf-baseline.md
new file mode 100644
index 0000000..9727575
--- /dev/null
+++ b/pipeline/docs/perf-baseline.md
@@ -0,0 +1,35 @@
+# pdfbook runtime baseline (stage-level)
+
+Generated by `PipelineBenchmark` (`./gradlew :pipeline:app:benchPipeline`, in the dev container).
+Tracks the **per-stage wall-clock breakdown, end-to-end runtime and peak memory**
+of the installDist `pdfbook` launcher. Re-run after any change to the pipeline
+and compare against the previous run before merging (acceptance: ≥5% median
+total-wall improvement, or an explicit RSS/disk win, with output validated).
+
+- Date (UTC): 2026-06-10 04:46:07
+- Host: Linux 6.8.0-124-generic amd64, 8 CPUs, RAM 16Gi
+- Launcher: `pipeline/app/build/install/pdfbook/bin/pdfbook`
+- Samples per measurement: cold (1st run) + warm median of 3.
+- The default input is the deterministic synthetic fixture (`createSampleScan`,
+  seeded, so identical across machines). Real books are pluggable via `-Pinputs="…"`;
+  only their page count and byte size are reported.
+
+## Stage breakdown (warm median of 3 runs)
+
+`conv` is the launcher's in-process total (`timing: total`); `startup+init` = E2E wall − conv
+(JVM boot + first-touch PDFBox/AWT init). `jobs=auto` is the launcher's CPU-count default.
+
+| Input | Jobs | Pages | E2E wall | conv | extract | despeckle | register | spread | startup+init | Cold wall | Peak RSS (MiB) | Output (MiB) |
+|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
+| sample-scan-200p.pdf | 1 | 200 | 50.10s | 49.85s | 4.57s | 32.16s | 12.94s | 0.20s | 0.25s | 51.42s | 156 | 6.4 |
+| sample-scan-200p.pdf | 8 | 200 | 14.77s | 14.48s | 1.15s | 9.85s | 3.27s | 0.21s | 0.29s | 15.04s | 328 | 6.4 |
+
+## Stage shares (of conv, warm median)
+
+The shares that decide where optimization effort goes: a stage that is ~5% of conv
+cannot pay for a parallelization rewrite no matter how elegant.
+
+| Input | Jobs | extract | despeckle | register | spread |
+|---|---|---:|---:|---:|---:|
+| sample-scan-200p.pdf | 1 | 9.2% | 64.5% | 26.0% | 0.4% |
+| sample-scan-200p.pdf | 8 | 7.9% | 68.0% | 22.6% | 1.5% |
diff --git a/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java b/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java
index bcd1008..4fea0dc 100644
--- a/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java
+++ b/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java
@@ -12,17 +12,17 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * The G4-normalization {@link Stage}: re-encodes each extracted page as single-strip CCITT G4 TIFF
- * via Leptonica, which {@link SpreadPackSink}'s pass-through CCITT embedding requires. {@code
- * pdfimages -tiff} writes poppler's default (non-G4) TIFF compression, so the raw extract output
- * cannot be embedded directly; despeckle and register each re-encode their output as G4 themselves,
- * so the composition root inserts this stage only when neither of them runs. The corpus dpi is
- * stamped on every page, since {@code pdfimages} tags the extracted TIFFs at a default 72 dpi.
+ * via Leptonica, which {@link SpreadPackSink}'s pass-through CCITT embedding requires. The
+ * extractor's decoded mode ({@code pdfimages -tiff}, used for any source that is not all-CCITT)
+ * writes poppler's default (non-G4) TIFF compression at a default 72 dpi, so that output cannot be
+ * embedded directly; despeckle and register each re-encode their output as G4 themselves, so the
+ * composition root inserts this stage only when neither of them runs. The corpus dpi is stamped on
+ * every page. (For an all-CCITT source the extractor's remux already produces stamped single-strip
+ * G4 — this stage then re-encodes losslessly, a small constant cost that keeps the no-stage path
+ * uniform.)
  */
 public final class G4EncodeStage implements Stage {
 
@@ -54,29 +54,26 @@ public String name() {
     @Override
     public Corpus apply(Corpus input, Path workDir) throws IOException {
         List<Path> pages = CorpusFiles.collect(input.dir(), input.glob());
-        AtomicInteger done = new AtomicInteger();
-        ExecutorService pool = Executors.newFixedThreadPool(jobs);
-        try {
-            List<Callable<Void>> tasks = new ArrayList<>(pages.size());
-            for (Path src : pages) {
-                tasks.add(
-                        () -> {
-                            Path out =
-                                    CorpusFiles.mirrorDestination(src, input.dir(), workDir, "tif");
-                            try (Pix page = Pix.read(src)) {
-                                page.setResolution(input.dpi());
-                                page.writeTiffG4(out);
-                            }
-                            progress.emit(
-                                    new ProgressEvent.PageProcessed(
-                                            name(), done.incrementAndGet(), pages.size()));
-                            return null;
-                        });
-            }
-            Tasks.awaitAll(pool, tasks, "G4 encode interrupted", "G4 encode failed");
-        } finally {
-            pool.shutdown();
+        List<Callable<Void>> tasks = new ArrayList<>(pages.size());
+        for (Path src : pages) {
+            tasks.add(
+                    () -> {
+                        Path out = CorpusFiles.mirrorDestination(src, input.dir(), workDir, "tif");
+                        try (Pix page = Pix.read(src)) {
+                            page.setResolution(input.dpi());
+                            page.writeTiffG4(out);
+                        }
+                        return null;
+                    });
         }
+        // Platform workers: each task is a Leptonica decode + G4 encode (CPU-bound FFM downcalls,
+        // which would pin virtual threads' carriers). Progress arrives on this thread, ordered.
+        Tasks.awaitAll(
+                Tasks.Workers.platform(jobs),
+                tasks,
+                "G4 encode",
+                (done, total) ->
+                        progress.emit(new ProgressEvent.PageProcessed(name(), done, total)));
         return input.movedTo(workDir, OUTPUT_GLOB);
     }
 }
diff --git a/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/PdfExtractSource.java b/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/PdfExtractSource.java
index 1a11456..c40694d 100644
--- a/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/PdfExtractSource.java
+++ b/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/PdfExtractSource.java
@@ -7,8 +7,6 @@
 import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 
 /**
  * The pipeline {@link Source}: extracts a scan PDF's bitonal pages once, via the shared {@code
@@ -45,12 +43,7 @@ public String name() {
     @Override
     public Corpus open(Path workDir) throws IOException {
         int dpi = extractor.dominantDpi(sourcePdf);
-        ExecutorService pool = Executors.newFixedThreadPool(jobs);
-        try {
-            extractor.extract(sourcePdf, workDir, jobs, pool);
-        } finally {
-            pool.shutdown();
-        }
+        extractor.extract(sourcePdf, workDir, jobs);
         return new Corpus(workDir, EXTRACT_GLOB, dpi, count(workDir, EXTRACT_GLOB));
     }
 
diff --git a/pipeline/infrastructure/src/test/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStageTest.java b/pipeline/infrastructure/src/test/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStageTest.java
index 4ee3222..16a149c 100644
--- a/pipeline/infrastructure/src/test/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStageTest.java
+++ b/pipeline/infrastructure/src/test/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStageTest.java
@@ -1,7 +1,7 @@
 package io.github.p4suta.pipeline.infrastructure;
 
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatIOException;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import io.github.p4suta.pipeline.domain.Corpus;
 import io.github.p4suta.shared.imaging.Pix;
@@ -79,15 +79,17 @@ void reEncodesUncompressedTiffsAsCcittG4() throws Exception {
     }
 
     @Test
-    void surfacesAnUnreadablePageAsIoException() throws Exception {
+    void surfacesAnUnreadablePageWithItsOriginalException() throws Exception {
         Path inputDir = Files.createDirectories(tmp.resolve("in"));
         Path workDir = Files.createDirectories(tmp.resolve("out"));
         Files.writeString(inputDir.resolve("page-000.tif"), "not a tiff");
         Corpus input = new Corpus(inputDir, "*.tif", DPI, 1);
 
-        assertThatIOException()
-                .isThrownBy(() -> new G4EncodeStage(2).apply(input, workDir))
-                .withMessage("G4 encode failed");
+        // The fan-out preserves the failure's identity (no generic IOException wrapper), so the
+        // shared exception mapper still sees the original type and the message names the page.
+        assertThatThrownBy(() -> new G4EncodeStage(2).apply(input, workDir))
+                .isInstanceOf(IllegalStateException.class)
+                .hasMessageContaining("could not read image");
     }
 
     /**
diff --git a/register/application/build.gradle.kts b/register/application/build.gradle.kts
index 631d833..11a5a77 100644
--- a/register/application/build.gradle.kts
+++ b/register/application/build.gradle.kts
@@ -25,6 +25,10 @@ dependencies {
     // caller-supplied extension). Register passes its own glob/OutputFormat.extension() so this layer
     // imports neither the matcher nor the OutputFormat enum.
     implementation(project(":shared:io"))
+    // Tasks.awaitAll(Workers...): the shared fail-fast page fan-out (batch-owned executor,
+    // sibling interruption, quiescence before the failure propagates) both registration passes
+    // run their per-page work on.
+    implementation(project(":shared:process"))
     implementation(libs.slf4j.api)
 }
 
diff --git a/register/application/src/main/java/io/github/p4suta/register/application/PdfPipelineService.java b/register/application/src/main/java/io/github/p4suta/register/application/PdfPipelineService.java
index 0a826b4..6672142 100644
--- a/register/application/src/main/java/io/github/p4suta/register/application/PdfPipelineService.java
+++ b/register/application/src/main/java/io/github/p4suta/register/application/PdfPipelineService.java
@@ -11,8 +11,6 @@
 import java.nio.file.Path;
 import java.util.Comparator;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 import java.util.stream.Stream;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -80,40 +78,37 @@ public void run(Config config) throws IOException {
             Path extracted = Files.createDirectories(work.resolve("in"));
             Path registered = Files.createDirectories(work.resolve("reg"));
             Path jbig2Dir = Files.createDirectories(work.resolve("jb2"));
-            ExecutorService pool = Executors.newFixedThreadPool(config.jobs());
-            try {
-                int dpi =
-                        config.options().dpi().isPresent()
-                                ? config.options().dpi().getAsInt()
-                                : extractor.dominantDpi(config.inputPdf());
-                LOG.info(
-                        "pipeline: {} -> {} at {} dpi", config.inputPdf(), config.outputPdf(), dpi);
+            int dpi =
+                    config.options().dpi().isPresent()
+                            ? config.options().dpi().getAsInt()
+                            : extractor.dominantDpi(config.inputPdf());
+            LOG.info("pipeline: {} -> {} at {} dpi", config.inputPdf(), config.outputPdf(), dpi);
 
-                extractor.extract(config.inputPdf(), extracted, config.jobs(), pool);
+            // Each step fans out on its own batch-owned workers bounded by the same jobs budget,
+            // so the steps never hold idle threads for each other (the old shared outer pool sat
+            // idle through the whole registration step).
+            extractor.extract(config.inputPdf(), extracted, config.jobs());
 
-                RegistrationService.Config registration =
-                        new RegistrationService.Config(
-                                extracted,
-                                registered,
-                                OutputFormat.TIFF,
-                                "*.tif",
-                                config.jobs(),
-                                true,
-                                withDpi(config.options(), dpi),
-                                null,
-                                false);
-                registrationService.run(registration);
+            RegistrationService.Config registration =
+                    new RegistrationService.Config(
+                            extracted,
+                            registered,
+                            OutputFormat.TIFF,
+                            "*.tif",
+                            config.jobs(),
+                            true,
+                            withDpi(config.options(), dpi),
+                            null,
+                            false);
+            registrationService.run(registration);
 
-                assembler.assemble(
-                        registered,
-                        config.outputPdf(),
-                        config.inputPdf(),
-                        OptionalInt.of(dpi),
-                        pool,
-                        jbig2Dir);
-            } finally {
-                pool.shutdown();
-            }
+            assembler.assemble(
+                    registered,
+                    config.outputPdf(),
+                    config.inputPdf(),
+                    OptionalInt.of(dpi),
+                    config.jobs(),
+                    jbig2Dir);
             LOG.info("wrote {}", config.outputPdf());
         } finally {
             deleteRecursively(work);
diff --git a/register/application/src/main/java/io/github/p4suta/register/application/RegistrationService.java b/register/application/src/main/java/io/github/p4suta/register/application/RegistrationService.java
index a6210c3..378a6a1 100644
--- a/register/application/src/main/java/io/github/p4suta/register/application/RegistrationService.java
+++ b/register/application/src/main/java/io/github/p4suta/register/application/RegistrationService.java
@@ -17,6 +17,7 @@
 import io.github.p4suta.shared.io.OutputDirs;
 import io.github.p4suta.shared.kernel.Medians;
 import io.github.p4suta.shared.kernel.PageProgressListener;
+import io.github.p4suta.shared.process.Tasks;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -26,11 +27,6 @@
 import java.util.Locale;
 import java.util.OptionalInt;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;
 import org.jspecify.annotations.Nullable;
 import org.slf4j.Logger;
@@ -109,8 +105,8 @@ public Summary run(Config requested) throws IOException {
      *
      * @param requested run configuration (its empty {@code --dpi} is resolved from the inputs
      *     before rendering)
-     * @param progress called once per finished page in each pass (1-based count over {@code 2N});
-     *     invoked from the worker threads, so it must be thread-safe
+     * @param progress called once per finished page in each pass (1-based count over {@code 2N}),
+     *     always on the calling thread and in order
      * @throws IOException on filesystem failure
      */
     public Summary run(Config requested, PageProgressListener progress) throws IOException {
@@ -130,7 +126,6 @@ public Summary run(Config requested, PageProgressListener progress) throws IOExc
         // Per-page progress spans both passes (analyze 1..N, render N+1..2N), so each PageProcessed
         // the composing stage emits carries this 2N denominator.
         int progressTotal = files.size() * 2;
-        AtomicInteger pageProgress = new AtomicInteger();
 
         Path diagDir = config.diagDir();
         boolean recordDiagnostics = diagDir != null;
@@ -140,21 +135,15 @@ public Summary run(Config requested, PageProgressListener progress) throws IOExc
                         : Reporter.noOp();
 
         // The analysis pass writes each deskewed page here; the render pass reads it back, so the
-        // costly deskew and detection run once per page, not again in pass two. Removed at the end.
+        // costly deskew and detection run once per page, not again in pass two. Removed at the
+        // end — safely: each pass's fan-out quiesces before returning or throwing, so no worker
+        // is still writing into the scratch when the finally-delete runs.
         Path scratchDir = createBeside(config.outputDir(), ".register-deskewed-");
-        ExecutorService pool = Executors.newFixedThreadPool(config.jobs());
         try {
             // Pass 1: deskew once, detect, cache the deskewed page for pass two
             List<AnalyzedPage> pages =
                     analyzePass(
-                            files,
-                            scratchDir,
-                            config,
-                            recordDiagnostics,
-                            pool,
-                            progress,
-                            pageProgress,
-                            progressTotal);
+                            files, scratchDir, config, recordDiagnostics, progress, progressTotal);
             List<PageObservation> observations = toObservations(pages);
             int analyzed = observations.size();
 
@@ -189,9 +178,7 @@ public Summary run(Config requested, PageProgressListener progress) throws IOExc
                             config,
                             recordDiagnostics,
                             reporter,
-                            pool,
                             progress,
-                            pageProgress,
                             progressTotal);
 
             if (recordDiagnostics) {
@@ -221,7 +208,6 @@ public Summary run(Config requested, PageProgressListener progress) throws IOExc
                     analyzed);
             return new Summary(files.size(), analyzed);
         } finally {
-            pool.shutdown();
             deleteRecursively(scratchDir);
         }
     }
@@ -235,9 +221,7 @@ private List<AnalyzedPage> analyzePass(
             Path scratchDir,
             Config config,
             boolean recordDiagnostics,
-            ExecutorService pool,
             PageProgressListener progress,
-            AtomicInteger pageProgress,
             int progressTotal)
             throws IOException {
         List<Callable<AnalyzedPage>> tasks = new ArrayList<>(files.size());
@@ -246,18 +230,23 @@ private List<AnalyzedPage> analyzePass(
             Path src = files.get(i);
             Path scratch = scratchDir.resolve(String.format(Locale.ROOT, "%06d.tif", index));
             tasks.add(
-                    () -> {
-                        PageAnalysis analysis =
-                                pageRegistrar.analyze(
-                                        src, scratch, config.options(), recordDiagnostics);
-                        AnalyzedPage analyzed =
-                                new AnalyzedPage(index, Parity.of(index), src, scratch, analysis);
-                        // The analyze pass occupies the first N of the 2N progress units.
-                        progress.onPage(pageProgress.incrementAndGet(), progressTotal);
-                        return analyzed;
-                    });
+                    () ->
+                            new AnalyzedPage(
+                                    index,
+                                    Parity.of(index),
+                                    src,
+                                    scratch,
+                                    pageRegistrar.analyze(
+                                            src, scratch, config.options(), recordDiagnostics)));
         }
-        return awaitAll(pool, tasks);
+        // Platform workers: deskew + detection are CPU-bound Leptonica work (FFM downcalls pin
+        // virtual threads' carriers). The analyze pass occupies the first N of the 2N progress
+        // units; the callback arrives on this thread, ordered.
+        return Tasks.awaitAll(
+                Tasks.Workers.platform(config.jobs()),
+                tasks,
+                "register analyze",
+                (done, total) -> progress.onPage(done, progressTotal));
     }
 
     /** The detected main-column boxes from pass 1, one per page that had a detectable column. */
@@ -286,36 +275,27 @@ private List<Path> renderPass(
             Config config,
             boolean recordDiagnostics,
             Reporter reporter,
-            ExecutorService pool,
             PageProgressListener progress,
-            AtomicInteger pageProgress,
             int progressTotal)
             throws IOException {
-        AtomicInteger done = new AtomicInteger();
-        int total = pages.size();
-        List<Callable<Path>> tasks = new ArrayList<>(total);
+        List<Callable<Path>> tasks = new ArrayList<>(pages.size());
         for (AnalyzedPage page : pages) {
             tasks.add(
-                    () -> {
-                        Path dest =
-                                renderOne(
-                                        page,
-                                        reference,
-                                        canvas,
-                                        config,
-                                        recordDiagnostics,
-                                        reporter);
-                        int n = done.incrementAndGet();
-                        // The render pass occupies the second N of the 2N progress units; the local
-                        // count drives the human log at its own N denominator.
-                        progress.onPage(pageProgress.incrementAndGet(), progressTotal);
-                        if (n % PROGRESS_EVERY == 0 || n == total) {
-                            LOG.info("{}/{}", n, total);
-                        }
-                        return dest;
-                    });
+                    () -> renderOne(page, reference, canvas, config, recordDiagnostics, reporter));
         }
-        return awaitAll(pool, tasks);
+        // Platform workers: placement is CPU-bound Leptonica work. The render pass occupies the
+        // second N of the 2N progress units; the local count drives the human log at its own N
+        // denominator. Both arrive on this thread, ordered.
+        return Tasks.awaitAll(
+                Tasks.Workers.platform(config.jobs()),
+                tasks,
+                "register render",
+                (done, total) -> {
+                    progress.onPage(pages.size() + done, progressTotal);
+                    if (done % PROGRESS_EVERY == 0 || done == total) {
+                        LOG.info("{}/{}", done, total);
+                    }
+                });
     }
 
     /**
@@ -431,38 +411,6 @@ private Path renderOne(
         return dest;
     }
 
-    /**
-     * Run every task on {@code pool} in submission order, surfacing the first failure: a task's own
-     * {@link IOException} is re-thrown unchanged, any other failure is wrapped, and an interruption
-     * restores the interrupt flag.
-     */
-    private static <T> List<T> awaitAll(ExecutorService pool, List<Callable<T>> tasks)
-            throws IOException {
-        List<Future<T>> futures;
-        try {
-            futures = pool.invokeAll(tasks);
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new IOException("register run interrupted", e);
-        }
-        List<T> results = new ArrayList<>(futures.size());
-        for (Future<T> future : futures) {
-            try {
-                results.add(future.get());
-            } catch (InterruptedException e) {
-                Thread.currentThread().interrupt();
-                throw new IOException("register run interrupted", e);
-            } catch (ExecutionException e) {
-                Throwable cause = e.getCause();
-                if (cause instanceof IOException io) {
-                    throw io;
-                }
-                throw new IOException("page processing failed", cause);
-            }
-        }
-        return results;
-    }
-
     /**
      * Create a temporary directory beside {@code sibling} (under its parent), so the scratch space
      * shares a filesystem with the output it serves. Falls back to the working directory when
diff --git a/register/application/src/test/java/io/github/p4suta/register/application/Fakes.java b/register/application/src/test/java/io/github/p4suta/register/application/Fakes.java
index 28d7ad4..08f2a4e 100644
--- a/register/application/src/test/java/io/github/p4suta/register/application/Fakes.java
+++ b/register/application/src/test/java/io/github/p4suta/register/application/Fakes.java
@@ -23,7 +23,6 @@
 import java.util.List;
 import java.util.Optional;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.jspecify.annotations.Nullable;
 
@@ -194,8 +193,7 @@ public int dominantDpi(Path pdf) {
         }
 
         @Override
-        public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool)
-                throws IOException {
+        public void extract(Path pdf, Path outDir, int jobs) throws IOException {
             extractCalls.incrementAndGet();
             if (failOn != null && pdf.getFileName().toString().contains(failOn)) {
                 throw new IOException("fake extract failed for " + pdf);
@@ -220,7 +218,7 @@ public void assemble(
                 Path outPdf,
                 @Nullable Path source,
                 OptionalInt forcedDpi,
-                ExecutorService pool,
+                int jobs,
                 Path scratchDir)
                 throws IOException {
             lastForcedDpi = forcedDpi;
diff --git a/register/application/src/test/java/io/github/p4suta/register/application/RegistrationServiceTest.java b/register/application/src/test/java/io/github/p4suta/register/application/RegistrationServiceTest.java
index cb028db..5b16165 100644
--- a/register/application/src/test/java/io/github/p4suta/register/application/RegistrationServiceTest.java
+++ b/register/application/src/test/java/io/github/p4suta/register/application/RegistrationServiceTest.java
@@ -181,11 +181,12 @@ void writesDiagnosticsWhenADiagDirIsGiven(@TempDir Path tmp) throws IOException
     }
 
     @Test
-    void propagatesAWorkerFailureAsAnIoException(@TempDir Path tmp) throws IOException {
+    void propagatesAWorkerFailureWithItsIdentity(@TempDir Path tmp) throws IOException {
         Path in = Files.createDirectories(tmp.resolve("in"));
         Path out = tmp.resolve("out");
         writePages(in, 2);
-        // A registrar whose analyze pass throws: the service must surface it, not swallow it.
+        // A registrar whose analyze pass throws: the service must surface the original exception
+        // unchanged (the fan-out preserves identity, so domain kinds reach the exception mapper).
         PageRegistrar failing =
                 new FakePageRegistrar(true, 600, 1000, 1500) {
                     @Override
@@ -201,7 +202,7 @@ public PageAnalysis analyze(
                 new RegistrationService(failing, new RecordingReporterFactory());
 
         assertThrows(
-                IOException.class,
+                IllegalStateException.class,
                 () ->
                         service.run(
                                 config(
diff --git a/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfBoxJbig2Assembler.java b/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfBoxJbig2Assembler.java
index a5a1411..85224b2 100644
--- a/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfBoxJbig2Assembler.java
+++ b/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfBoxJbig2Assembler.java
@@ -4,7 +4,6 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
 import org.jspecify.annotations.Nullable;
 
 /**
@@ -40,9 +39,9 @@ public void assemble(
             Path outPdf,
             @Nullable Path source,
             OptionalInt forcedDpi,
-            ExecutorService pool,
+            int jobs,
             Path scratchDir)
             throws IOException {
-        delegate.assemble(imageDir, outPdf, source, forcedDpi, pool, scratchDir);
+        delegate.assemble(imageDir, outPdf, source, forcedDpi, jobs, scratchDir);
     }
 }
diff --git a/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfImagesCliExtractor.java b/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfImagesCliExtractor.java
index 231229a..07c55e5 100644
--- a/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfImagesCliExtractor.java
+++ b/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/pdf/PdfImagesCliExtractor.java
@@ -3,7 +3,6 @@
 import io.github.p4suta.register.port.PdfImageExtractor;
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.concurrent.ExecutorService;
 
 /**
  * Register's {@link PdfImageExtractor} adapter: a thin binding onto the cross-app {@link
@@ -43,10 +42,10 @@ public int dominantDpi(Path pdf) throws IOException {
 
     /**
      * Extract all pages of {@code pdf} into {@code outDir} as TIFFs, parallelized over page-range
-     * chunks. {@code jobs} bounds both the chunk count and the pool slots used.
+     * chunks. at most {@code jobs} chunks run at once.
      */
     @Override
-    public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throws IOException {
-        delegate.extract(pdf, outDir, jobs, pool);
+    public void extract(Path pdf, Path outDir, int jobs) throws IOException {
+        delegate.extract(pdf, outDir, jobs);
     }
 }
diff --git a/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/process/NativeTools.java b/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/process/NativeTools.java
index c254061..347a21b 100644
--- a/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/process/NativeTools.java
+++ b/register/infrastructure/src/main/java/io/github/p4suta/register/infrastructure/process/NativeTools.java
@@ -3,7 +3,6 @@
 import io.github.p4suta.register.domain.exception.RegisterErrorKind;
 import io.github.p4suta.register.domain.exception.RegisterException;
 import io.github.p4suta.shared.process.ProcessRunner;
-import io.github.p4suta.shared.process.Tasks;
 import io.github.p4suta.shared.process.ToolPath;
 import java.io.IOException;
 import java.io.InputStream;
@@ -11,8 +10,6 @@
 import java.time.Duration;
 import java.util.List;
 import java.util.Objects;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
@@ -22,12 +19,11 @@
  * io.github.p4suta.shared.process} plumbing: discovery is delegated to {@link
  * io.github.p4suta.shared.process.ToolPath} (passing register's per-tool {@code
  * -Dregister.<tool>.path} override key — this is how the packaged app-image points at its bundled
- * binaries, else the tool is looked up on {@code PATH}), the discard-output run is delegated to
- * {@link io.github.p4suta.shared.process.ProcessRunner}, and the worker-pool fan-out to {@link
- * io.github.p4suta.shared.process.Tasks}. Unlike the optional flip-book, a missing pipeline tool is
- * fatal — the pipeline cannot proceed — so resolution throws, and the shared layer's neutral {@code
- * IOException}/{@code TimeoutException} failures are re-mapped to register's {@code
- * RegisterException} (NATIVE_TOOL_FAILED) so the pipeline keeps its own error wording.
+ * binaries, else the tool is looked up on {@code PATH}), and the discard-output run is delegated to
+ * {@link io.github.p4suta.shared.process.ProcessRunner}. Unlike the optional flip-book, a missing
+ * pipeline tool is fatal — the pipeline cannot proceed — so resolution throws, and the shared
+ * layer's neutral {@code IOException}/{@code TimeoutException} failures are re-mapped to register's
+ * {@code RegisterException} (NATIVE_TOOL_FAILED) so the pipeline keeps its own error wording.
  */
 public final class NativeTools {
 
@@ -128,15 +124,4 @@ private static void awaitExit(Process process, List<String> command, long timeou
                     null);
         }
     }
-
-    /**
-     * Run every task on {@code pool}, in submission order, surfacing the first failure as an {@link
-     * IOException}. Thin wrapper over the shared {@link
-     * io.github.p4suta.shared.process.Tasks#awaitAll} that supplies the pipeline's messages, so the
-     * await semantics live in one place while the pipeline keeps its own error wording.
-     */
-    public static <T> List<T> awaitAll(ExecutorService pool, List<Callable<T>> tasks)
-            throws IOException {
-        return Tasks.awaitAll(pool, tasks, "pipeline interrupted", "pipeline task failed");
-    }
 }
diff --git a/register/port/src/main/java/io/github/p4suta/register/port/Jbig2Assembler.java b/register/port/src/main/java/io/github/p4suta/register/port/Jbig2Assembler.java
index 466e95a..95902d0 100644
--- a/register/port/src/main/java/io/github/p4suta/register/port/Jbig2Assembler.java
+++ b/register/port/src/main/java/io/github/p4suta/register/port/Jbig2Assembler.java
@@ -3,7 +3,6 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
 import org.jspecify.annotations.Nullable;
 
 /**
@@ -28,7 +27,7 @@ void assemble(
             Path outPdf,
             @Nullable Path source,
             OptionalInt forcedDpi,
-            ExecutorService pool,
+            int jobs,
             Path scratchDir)
             throws IOException;
 }
diff --git a/register/port/src/main/java/io/github/p4suta/register/port/PdfImageExtractor.java b/register/port/src/main/java/io/github/p4suta/register/port/PdfImageExtractor.java
index 7114285..23089b4 100644
--- a/register/port/src/main/java/io/github/p4suta/register/port/PdfImageExtractor.java
+++ b/register/port/src/main/java/io/github/p4suta/register/port/PdfImageExtractor.java
@@ -2,7 +2,6 @@
 
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.concurrent.ExecutorService;
 
 /**
  * Extracts a scanned PDF's embedded bitonal images as TIFFs. The abstraction over {@code pdfimages}
@@ -20,10 +19,10 @@ public interface PdfImageExtractor {
 
     /**
      * Extract every page of {@code pdf} into {@code outDir} as TIFFs, parallelized over page-range
-     * chunks across the worker pool.
+     * chunks, at most {@code jobs} at once.
      *
-     * @param jobs the worker thread count (bounds the chunk count and pool slots used)
+     * @param jobs how many chunks may extract at once
      * @throws IOException if the external tool fails
      */
-    void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throws IOException;
+    void extract(Path pdf, Path outDir, int jobs) throws IOException;
 }
diff --git a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java
index 9cf9945..1ca223d 100644
--- a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java
+++ b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java
@@ -296,6 +296,16 @@ private static MethodHandle handle(String name, FunctionDescriptor descriptor) {
             handle(
                     "pixOpenBrick",
                     FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, JAVA_INT, JAVA_INT));
+    private static final MethodHandle PIX_DILATE_BRICK_DWA =
+            handle(
+                    "pixDilateBrickDwa",
+                    FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, JAVA_INT, JAVA_INT));
+    private static final MethodHandle PIX_OPEN_BRICK_DWA =
+            handle(
+                    "pixOpenBrickDwa",
+                    FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, JAVA_INT, JAVA_INT));
+    private static final MethodHandle MAKE_PIXEL_SUM_TAB8 =
+            handle("makePixelSumTab8", FunctionDescriptor.of(ADDRESS));
     private static final MethodHandle PIX_AND =
             handle("pixAnd", FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, ADDRESS));
     private static final MethodHandle PIX_OR =
@@ -316,16 +326,38 @@ private static MethodHandle handle(String name, FunctionDescriptor descriptor) {
     private static final MethodHandle SET_MSG_SEVERITY =
             handle("setMsgSeverity", FunctionDescriptor.of(JAVA_INT, JAVA_INT));
 
+    /**
+     * Largest brick side routed to a single DWA kernel call. Leptonica generates DWA sels only for
+     * a fixed set of linear sizes; measured against this build's 1.82, {@code pixDilateBrickDwa}
+     * silently diverges from the generic brick for the missing sizes (every prime above 15, e.g.
+     * 17, 43), while sizes up to 15 are complete. Larger dilations are composed from safe passes in
+     * {@code Pix}; the equality sweep in {@code PixTest} pins all of this empirically.
+     */
+    static final int DWA_SAFE_BRICK = 15;
+
+    /**
+     * The process-lifetime 8-bit popcount table {@code pixCountPixels} consumes (~1 KiB native,
+     * deliberately never freed): without it Leptonica mallocs, fills and frees the same table on
+     * every call.
+     */
+    private static final MemorySegment PIXEL_SUM_TAB8;
+
     static {
         // Suppress Leptonica's stderr diagnostics once, at class load. The returned previous
         // severity is discarded.
         try {
             SET_MSG_SEVERITY.invoke(L_SEVERITY_NONE);
+            PIXEL_SUM_TAB8 = (MemorySegment) MAKE_PIXEL_SUM_TAB8.invoke();
         } catch (Throwable t) {
-            throw sneaky("setMsgSeverity", t);
+            throw sneaky("leptonica static init", t);
         }
     }
 
+    /** {@return the shared popcount table for {@code pixCountPixels}} */
+    static MemorySegment pixelSumTab8() {
+        return PIXEL_SUM_TAB8;
+    }
+
     // pix lifecycle / metadata
 
     /** Read an image file, returning the raw {@code PIX *} (0 on failure). */
@@ -613,6 +645,34 @@ static MemorySegment pixOpenBrick(MemorySegment src, int hsize, int vsize) {
         }
     }
 
+    /**
+     * Dilate {@code src} by a {@code hsize x vsize} brick via the word-accelerated DWA kernels into
+     * a fresh {@code PIX}. Only exact for sizes up to {@link #DWA_SAFE_BRICK} — see that constant;
+     * {@code Pix} owns the larger-size composition.
+     */
+    static MemorySegment pixDilateBrickDwa(MemorySegment src, int hsize, int vsize) {
+        try {
+            return (MemorySegment)
+                    PIX_DILATE_BRICK_DWA.invoke(MemorySegment.NULL, src, hsize, vsize);
+        } catch (Throwable t) {
+            throw sneaky("pixDilateBrickDwa", t);
+        }
+    }
+
+    /**
+     * Open (erode then dilate) {@code src} by a {@code hsize x vsize} brick via the
+     * word-accelerated DWA kernels into a fresh {@code PIX}. Routed only for sizes up to {@link
+     * #DWA_SAFE_BRICK}; pixel-identical there to {@link #pixOpenBrick} (pinned by {@code PixTest}'s
+     * sweep) and several times faster.
+     */
+    static MemorySegment pixOpenBrickDwa(MemorySegment src, int hsize, int vsize) {
+        try {
+            return (MemorySegment) PIX_OPEN_BRICK_DWA.invoke(MemorySegment.NULL, src, hsize, vsize);
+        } catch (Throwable t) {
+            throw sneaky("pixOpenBrickDwa", t);
+        }
+    }
+
     /** {@code s1 AND s2} into a fresh {@code PIX} (the {@code pixd == NULL} path). */
     static MemorySegment pixAnd(MemorySegment s1, MemorySegment s2) {
         try {
diff --git a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java
index 036887b..659c1cb 100644
--- a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java
+++ b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java
@@ -312,7 +312,8 @@ public long blackPixels() {
         MemorySegment h = requireHandle();
         try (Arena arena = Arena.ofConfined()) {
             MemorySegment count = arena.allocate(JAVA_INT);
-            Leptonica.pixCountPixels(h, count, MemorySegment.NULL);
+            // The shared popcount table: without it Leptonica rebuilds (and frees) one per call.
+            Leptonica.pixCountPixels(h, count, Leptonica.pixelSumTab8());
             return Integer.toUnsignedLong(count.get(JAVA_INT, 0));
         }
     }
@@ -352,8 +353,41 @@ public Pix subtract(Pix other) {
     /**
      * Return a new {@code Pix} grown by {@code radius} pixels in every direction (dilation by a
      * {@code (2*radius+1)} square). A {@code radius} of 0 is the identity.
+     *
+     * <p>Runs on Leptonica's word-accelerated DWA kernels for every size: bricks up to {@link
+     * Leptonica#DWA_SAFE_BRICK} directly, larger ones as a chain of safe-size passes — exact,
+     * because dilating by {@code brick(a)} then {@code brick(b)} equals dilating by {@code
+     * brick(a+b-1)} (Minkowski sum; within the image rectangle the L∞ paths between in-bounds
+     * points stay in bounds, so per-pass clipping changes nothing). Pixel-identity against the
+     * generic rasterop path is pinned by {@code PixTest}'s full sweep.
      */
     public Pix dilated(int radius) {
+        int size = 2 * radius + 1;
+        if (size <= Leptonica.DWA_SAFE_BRICK) {
+            return wrap(
+                    Leptonica.pixDilateBrickDwa(requireHandle(), size, size), "pixDilateBrickDwa");
+        }
+        int covered = Leptonica.DWA_SAFE_BRICK;
+        Pix current =
+                wrap(
+                        Leptonica.pixDilateBrickDwa(requireHandle(), covered, covered),
+                        "pixDilateBrickDwa");
+        while (covered < size) {
+            // size and covered stay odd, so the step is odd and within the safe sel set.
+            int step = Math.min(Leptonica.DWA_SAFE_BRICK, size - covered + 1);
+            Pix next =
+                    wrap(
+                            Leptonica.pixDilateBrickDwa(current.requireHandle(), step, step),
+                            "pixDilateBrickDwa");
+            current.close();
+            current = next;
+            covered += step - 1;
+        }
+        return current;
+    }
+
+    /** The generic rasterop dilation — kept as the DWA equality oracle for {@code PixTest}. */
+    Pix dilatedGeneric(int radius) {
         int size = 2 * radius + 1;
         return wrap(Leptonica.pixDilateBrick(requireHandle(), size, size), "pixDilateBrick");
     }
@@ -361,8 +395,22 @@ public Pix dilated(int radius) {
     /**
      * Return a new {@code Pix} opened (eroded then dilated) by a {@code (2*radius+1)} square — i.e.
      * foreground thinner than the brick in either axis is erased, leaving only the solid parts.
+     *
+     * <p>Bricks up to {@link Leptonica#DWA_SAFE_BRICK} run on Leptonica's word-accelerated DWA
+     * kernels — pixel-identical to the generic rasterop path (pinned by {@code PixTest}'s sweep)
+     * and several times faster; larger bricks fall back to the generic path (an opening, unlike a
+     * dilation, does not compose from smaller passes).
      */
     public Pix opened(int radius) {
+        int size = 2 * radius + 1;
+        if (size <= Leptonica.DWA_SAFE_BRICK) {
+            return wrap(Leptonica.pixOpenBrickDwa(requireHandle(), size, size), "pixOpenBrickDwa");
+        }
+        return openedGeneric(radius);
+    }
+
+    /** The generic rasterop opening — the large-brick fallback and the DWA equality oracle. */
+    Pix openedGeneric(int radius) {
         int size = 2 * radius + 1;
         return wrap(Leptonica.pixOpenBrick(requireHandle(), size, size), "pixOpenBrick");
     }
diff --git a/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java b/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java
index 3baa473..09cb681 100644
--- a/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java
+++ b/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java
@@ -295,4 +295,83 @@ void useAfterCloseThrows(@TempDir Path dir) throws Exception {
         pix.close();
         assertThrows(IllegalStateException.class, pix::width);
     }
+
+    // DWA vs generic morphology: the empirical gate for the fast path
+
+    /**
+     * The load-bearing equality sweep: {@code dilated} runs on the DWA fast path for every size
+     * (single safe kernel up to 15, composed safe passes beyond — Leptonica's generated DWA sels
+     * are incomplete above 15 and silently diverge there, so the composition is the only exact
+     * route), and {@code opened} up to 15. Both must be pixel-identical to the generic rasterop
+     * morphology — including at the image borders, where DWA's internal bordering is the classic
+     * divergence trap. The fixture has ink touching all four borders, interior glyphs and isolated
+     * dots; the radii sweep covers the production sizes (7x7 open, 43x43 dilate) and the 63x63
+     * ceiling. A failure here means the fast path may not ship.
+     */
+    @Test
+    void dwaMorphologyMatchesGenericBrickIncludingBorders(@TempDir Path dir) throws Exception {
+        Path pbm = dir.resolve("border-ink.pbm");
+        boolean[][] img = TestImages.blank(200, 150);
+        // Ink on all four borders: full-width top/bottom lines, full-height left/right lines.
+        TestImages.fillRect(img, 0, 0, 199, 1);
+        TestImages.fillRect(img, 0, 148, 199, 149);
+        TestImages.fillRect(img, 0, 0, 1, 149);
+        TestImages.fillRect(img, 198, 0, 199, 149);
+        // Corner blocks, interior glyphs, isolated dots.
+        TestImages.fillRect(img, 0, 0, 8, 8);
+        TestImages.fillRect(img, 190, 140, 199, 149);
+        TestImages.fillRect(img, 40, 30, 80, 90);
+        TestImages.fillRect(img, 120, 50, 150, 60);
+        TestImages.fillRect(img, 100, 110, 100, 110);
+        TestImages.fillRect(img, 20, 130, 20, 130);
+        TestImages.writePbm(pbm, img);
+
+        int[] radii = {0, 1, 3, 7, 10, 15, 21, 31};
+        try (Pix page = Pix.read(pbm)) {
+            for (int radius : radii) {
+                try (Pix dwa = page.dilated(radius);
+                        Pix generic = page.dilatedGeneric(radius)) {
+                    assertTrue(
+                            dwa.pixelsEqual(generic),
+                            "dilate radius " + radius + " must be pixel-identical");
+                }
+                try (Pix dwa = page.opened(radius);
+                        Pix generic = page.openedGeneric(radius)) {
+                    assertTrue(
+                            dwa.pixelsEqual(generic),
+                            "open radius " + radius + " must be pixel-identical");
+                }
+            }
+        }
+    }
+
+    /** The sweep on degenerate pages: smaller than DWA's border, all-black, and all-white. */
+    @Test
+    void dwaMorphologyMatchesGenericOnDegeneratePages(@TempDir Path dir) throws Exception {
+        boolean[][] tiny = TestImages.blank(20, 20);
+        TestImages.fillRect(tiny, 0, 0, 19, 2);
+        TestImages.fillRect(tiny, 17, 0, 19, 19);
+        TestImages.fillRect(tiny, 9, 9, 9, 9);
+        boolean[][] black = TestImages.blank(50, 40);
+        TestImages.fillRect(black, 0, 0, 49, 39);
+        boolean[][] white = TestImages.blank(50, 40);
+
+        int page = 0;
+        for (boolean[][] img : java.util.List.of(tiny, black, white)) {
+            Path pbm = dir.resolve("degenerate-" + page++ + ".pbm");
+            TestImages.writePbm(pbm, img);
+            try (Pix pix = Pix.read(pbm)) {
+                for (int radius : new int[] {1, 3, 21}) {
+                    try (Pix dwa = pix.dilated(radius);
+                            Pix generic = pix.dilatedGeneric(radius)) {
+                        assertTrue(dwa.pixelsEqual(generic), "dilate r=" + radius);
+                    }
+                    try (Pix dwa = pix.opened(radius);
+                            Pix generic = pix.openedGeneric(radius)) {
+                        assertTrue(dwa.pixelsEqual(generic), "open r=" + radius);
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/CcittTiffs.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/CcittTiffs.java
new file mode 100644
index 0000000..1bfd7c3
--- /dev/null
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/CcittTiffs.java
@@ -0,0 +1,159 @@
+package io.github.p4suta.shared.pdf;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Wraps the raw CCITT G4 stream {@code pdfimages -ccitt} dumps into a single-strip TIFF — the
+ * pass-through half of the extractor's remux mode: the scan's embedded G4 bytes become a readable
+ * TIFF without ever being decoded and re-encoded.
+ *
+ * <p>poppler writes a fax2tiff-style {@code .params} file beside each dump ({@code -4} G4 / {@code
+ * -1 -2} G3, {@code -A} EOL markers / {@code -P} none, {@code -X <columns>}, {@code -W} BlackIs1 /
+ * {@code -B} not, {@code -M} MSB-first). Only the plain shape TIFF's T.6 compression can represent
+ * verbatim is {@linkplain #supported supported}: G4, no EOL markers, MSB-first. Crucially, PDF's
+ * {@code EncodedByteAlign} never reaches the params file, so a wrapped stream is only trusted after
+ * the caller decodes it back successfully (see the extractor's read-back verification).
+ */
+final class CcittTiffs {
+
+    private static final short TYPE_SHORT = 3;
+    private static final short TYPE_LONG = 4;
+    private static final short TYPE_RATIONAL = 5;
+
+    private CcittTiffs() {}
+
+    /**
+     * The decode parameters poppler records beside a {@code .ccitt} dump.
+     *
+     * @param kind the coding scheme flag: {@code -4} (G4), {@code -2} (G3 2D) or {@code -1} (G3 1D)
+     * @param endOfLine whether rows are prefixed with EOL markers ({@code -A})
+     * @param columns the row width in pixels ({@code -X})
+     * @param blackIs1 whether decoded 1-bits are black ({@code -W}) or 0-bits are ({@code -B})
+     */
+    record Params(String kind, boolean endOfLine, int columns, boolean blackIs1) {}
+
+    /** Parse a {@code .params} file's text, or {@code null} when any token is unrecognized. */
+    static @Nullable Params parseParams(String text) {
+        @Nullable String kind = null;
+        @Nullable Boolean endOfLine = null;
+        @Nullable Integer columns = null;
+        @Nullable Boolean blackIs1 = null;
+        boolean msbFirst = false;
+        String[] tokens = text.trim().split("\\s+", -1);
+        for (int i = 0; i < tokens.length; i++) {
+            switch (tokens[i]) {
+                case "-4", "-2", "-1" -> kind = tokens[i];
+                case "-A" -> endOfLine = true;
+                case "-P" -> endOfLine = false;
+                case "-W" -> blackIs1 = true;
+                case "-B" -> blackIs1 = false;
+                case "-M" -> msbFirst = true;
+                case "-X" -> {
+                    i++;
+                    if (i >= tokens.length) {
+                        return null;
+                    }
+                    try {
+                        columns = Integer.parseInt(tokens[i]);
+                    } catch (NumberFormatException e) {
+                        return null;
+                    }
+                }
+                default -> {
+                    return null;
+                }
+            }
+        }
+        if (kind == null || endOfLine == null || columns == null || blackIs1 == null || !msbFirst) {
+            return null;
+        }
+        return new Params(kind, endOfLine, columns, blackIs1);
+    }
+
+    /**
+     * Whether {@code params} describes a stream TIFF T.6 represents verbatim: Group 4, no EOL
+     * markers, and a width agreeing with the listing row the dump corresponds to.
+     */
+    static boolean supported(Params params, int expectedWidth) {
+        return "-4".equals(params.kind())
+                && !params.endOfLine()
+                && params.columns() == expectedWidth;
+    }
+
+    /**
+     * Write {@code g4} as a little-endian, single-strip CCITT-G4 TIFF — header, the verbatim stream
+     * as the one strip, then the IFD.
+     *
+     * @param out the TIFF to write
+     * @param g4 the raw G4 (T.6) stream, verbatim
+     * @param width the row width in pixels
+     * @param height the row count
+     * @param blackIs1 the params' photometric hint: decoded 1-bits are black ({@code -W})
+     * @param dpi the resolution to stamp, or {@code <= 0} to omit the resolution tags
+     */
+    static void writeSingleStripG4(
+            Path out, byte[] g4, int width, int height, boolean blackIs1, int dpi)
+            throws IOException {
+        boolean withResolution = dpi > 0;
+        int entryCount = withResolution ? 14 : 11;
+        int stripOffset = 8;
+        int padding = g4.length % 2; // IFD offsets must be word-aligned
+        int ifdOffset = stripOffset + g4.length + padding;
+        int rationalOffset = ifdOffset + 2 + entryCount * 12 + 4;
+        ByteBuffer buf =
+                ByteBuffer.allocate(rationalOffset + (withResolution ? 16 : 0))
+                        .order(ByteOrder.LITTLE_ENDIAN);
+
+        buf.put((byte) 'I').put((byte) 'I').putShort((short) 42).putInt(ifdOffset);
+        buf.put(g4);
+        if (padding == 1) {
+            buf.put((byte) 0);
+        }
+
+        buf.putShort((short) entryCount); // entries below stay sorted by tag id
+        entry(buf, 256, TYPE_LONG, width); // ImageWidth
+        entry(buf, 257, TYPE_LONG, height); // ImageLength
+        entryShort(buf, 258, 1); // BitsPerSample
+        entryShort(buf, 259, 4); // Compression: CCITT T.6 (Group 4)
+        // The G4 stream encodes white/black runs; this tag tells readers which sense to
+        // materialize them in. The PDF default (-B, BlackIs1=false) is the standard fax sense —
+        // TIFF WhiteIsZero (0); -W (BlackIs1=true) is the inverted sense, BlackIsZero (1).
+        // Pinned empirically by CcittTiffsTest's pixel-identical round trip.
+        entryShort(buf, 262, blackIs1 ? 1 : 0); // PhotometricInterpretation
+        entryShort(buf, 266, 1); // FillOrder: MSB first (params -M)
+        entry(buf, 273, TYPE_LONG, stripOffset); // StripOffsets
+        entryShort(buf, 277, 1); // SamplesPerPixel
+        entry(buf, 278, TYPE_LONG, height); // RowsPerStrip: the single strip
+        entry(buf, 279, TYPE_LONG, g4.length); // StripByteCounts
+        if (withResolution) {
+            entry(buf, 282, TYPE_RATIONAL, rationalOffset); // XResolution
+            entry(buf, 283, TYPE_RATIONAL, rationalOffset + 8); // YResolution
+        }
+        entry(buf, 293, TYPE_LONG, 0); // T6Options: none
+        if (withResolution) {
+            entryShort(buf, 296, 2); // ResolutionUnit: inch
+        }
+        buf.putInt(0); // no next IFD
+
+        if (withResolution) {
+            buf.putInt(dpi).putInt(1).putInt(dpi).putInt(1);
+        }
+        Files.write(out, buf.array());
+    }
+
+    /** One IFD entry holding an inline LONG (or a RATIONAL's value offset). */
+    private static void entry(ByteBuffer buf, int tag, short type, int value) {
+        buf.putShort((short) tag).putShort(type).putInt(1).putInt(value);
+    }
+
+    /** One IFD entry holding an inline SHORT (left-justified in the 4-byte value field). */
+    private static void entryShort(ByteBuffer buf, int tag, int value) {
+        buf.putShort((short) tag).putShort(TYPE_SHORT).putInt(1);
+        buf.putShort((short) value).putShort((short) 0);
+    }
+}
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfBoxJbig2Assembler.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfBoxJbig2Assembler.java
index 8fc2306..3113420 100644
--- a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfBoxJbig2Assembler.java
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfBoxJbig2Assembler.java
@@ -14,7 +14,6 @@
 import java.util.Locale;
 import java.util.OptionalInt;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Stream;
 import org.apache.pdfbox.Loader;
@@ -85,7 +84,7 @@ private record ImageInfo(int width, int height, int resolution) {}
      * @param outPdf the lossless-JBIG2 PDF to write
      * @param source a PDF whose Info dict, XMP and version are inherited, or {@code null} for none
      * @param forcedDpi a single DPI to size every page with, or empty to read each image's own
-     * @param pool the worker pool the per-page {@code jbig2} encodes run on
+     * @param jobs how many {@code jbig2} children may encode at once
      * @param scratchDir scratch directory for the intermediate per-page JBIG2 streams
      *     (caller-owned)
      * @throws IOException if the directory is empty, a tool fails, or the write fails
@@ -95,7 +94,7 @@ public void assemble(
             Path outPdf,
             @Nullable Path source,
             OptionalInt forcedDpi,
-            ExecutorService pool,
+            int jobs,
             Path scratchDir)
             throws IOException {
         List<Path> images = sortedImages(imageDir);
@@ -109,8 +108,9 @@ public void assemble(
             int index = i;
             tasks.add(() -> encode(jbig2, image, scratchDir, index, forcedDpi));
         }
-        List<Page> pages =
-                Tasks.awaitAll(pool, tasks, "jbig2 encode interrupted", "jbig2 encode failed");
+        // Virtual workers: each task is one subprocess wait plus a brief Pix header read; the
+        // semaphore-bounded admission keeps at most `jobs` jbig2 children alive at once.
+        List<Page> pages = Tasks.awaitAll(Tasks.Workers.virtual(jobs), tasks, "jbig2 encode");
 
         try (PDDocument doc = new PDDocument()) {
             for (Page page : pages) {
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java
index 35c8b77..4ec4eeb 100644
--- a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java
@@ -1,24 +1,31 @@
 package io.github.p4suta.shared.pdf;
 
+import io.github.p4suta.shared.imaging.Pix;
 import io.github.p4suta.shared.process.ProcessRunner;
 import io.github.p4suta.shared.process.Tasks;
 import io.github.p4suta.shared.process.ToolPath;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.Duration;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.List;
 import java.util.Locale;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeoutException;
+import java.util.stream.Stream;
+import org.jspecify.annotations.Nullable;
 
 /**
  * Extracts a PDF's embedded bitonal images as TIFFs by driving {@code pdfimages}. The page range is
  * split across the worker pool (one {@code pdfimages -f/-l} per chunk) with distinct zero-padded
- * {@code page-cNN-} prefixes, so a name sort yields reading order and chunks never collide. The
- * dominant scan DPI is read from {@code pdfimages -list} and passed to the clean step as an
- * explicit DPI, since {@code pdfimages} tags the extracted TIFFs at a default 72 dpi.
+ * {@code page-cNN-} prefixes, so a name sort yields reading order and chunks never collide. An
+ * all-CCITT source is remuxed — the raw embedded G4 streams pass through into single-strip TIFFs
+ * with their true ppi stamped (see {@link #extract}); any other source is extracted decoded, where
+ * {@code pdfimages} tags the TIFFs at a default 72 dpi, so the dominant scan DPI from {@code
+ * pdfimages -list} is passed downstream explicitly either way.
  *
  * <p>The textual {@code pdfinfo}/{@code pdfimages -list} reports are parsed by the pure {@link
  * PdfListingParser}; this adapter only drives the external processes via {@link
@@ -93,13 +100,34 @@ public int dominantDpi(Path pdf) throws IOException {
 
     /**
      * Extract all pages of {@code pdf} into {@code outDir} as TIFFs, parallelized over page-range
-     * chunks. {@code jobs} bounds both the chunk count and the pool slots used.
+     * chunks of about {@link #CHUNK_PAGES} pages, at most {@code jobs} running at once (and at most
+     * {@code 4 * jobs} chunks).
+     *
+     * <p>One {@code pdfimages -list} pass picks the mode: when every embedded image is 1-bpp CCITT
+     * (the usual self-scanned book), each chunk dumps the raw G4 streams ({@code -ccitt}) and wraps
+     * them into single-strip CCITT-G4 TIFFs — a pure remux: no decode/re-encode, intermediates tens
+     * of KB per page instead of the decoded megabytes, and the image's true ppi stamped instead of
+     * {@code pdfimages}' default 72 dpi. Every wrapped page is decoded back once as verification
+     * (PDF's {@code EncodedByteAlign} never reaches the dumped params, so trust requires a decode);
+     * a chunk whose dump or wrap deviates in any way is re-extracted decoded ({@code -tiff}), which
+     * is also the whole-run mode for any other source.
      */
-    public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throws IOException {
+    public void extract(Path pdf, Path outDir, int jobs) throws IOException {
         int total = pageCount(pdf);
-        int chunks = Math.max(1, Math.min(jobs, total));
-        int per = (total + chunks - 1) / chunks;
         String pdfimages = resolve("pdfimages", pdfimagesPropertyKey);
+        List<PdfListingParser.ImageRow> rows =
+                PdfListingParser.parseImageRows(
+                        capture(List.of(pdfimages, "-list", pdf.toString()), INFO_TIMEOUT));
+        boolean rawCcitt =
+                !rows.isEmpty()
+                        && rows.stream().allMatch(r -> r.bpc() == 1 && "ccitt".equals(r.enc()));
+
+        // Chunks of ~CHUNK_PAGES rather than total/jobs: fast finishers free their pool slot early
+        // (the straggler tail shrinks from total/jobs to ~CHUNK_PAGES pages), and a streaming
+        // consumer can take pages chunk by chunk. Capped so a small book is not all process spawns.
+        int chunkCap = (int) Math.min(4L * jobs, total);
+        int chunks = Math.clamp(Math.ceilDiv(total, CHUNK_PAGES), 1, Math.max(1, chunkCap));
+        int per = Math.ceilDiv(total, chunks);
         List<Callable<Void>> tasks = new ArrayList<>();
         int chunk = 0;
         for (int first = 1; first <= total; first += per) {
@@ -108,23 +136,155 @@ public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throw
                     outDir.resolve(String.format(Locale.ROOT, "page-c%03d-", chunk)).toString();
             int from = first;
             int to = last;
+            List<PdfListingParser.ImageRow> chunkRows =
+                    rawCcitt ? rowsInRange(rows, from, to) : List.of();
             tasks.add(
                     () -> {
-                        runDiscarding(
-                                List.of(
-                                        pdfimages,
-                                        "-tiff",
-                                        "-f",
-                                        Integer.toString(from),
-                                        "-l",
-                                        Integer.toString(to),
-                                        pdf.toString(),
-                                        prefix));
+                        extractChunk(pdfimages, pdf, from, to, prefix, chunkRows);
                         return null;
                     });
             chunk++;
         }
-        Tasks.awaitAll(pool, tasks, "pdfimages extract interrupted", "pdfimages extract failed");
+        // Virtual workers: a chunk is one subprocess wait plus (in remux mode) a brief wrap; the
+        // semaphore-bounded admission keeps at most `jobs` pdfimages children alive at once.
+        Tasks.awaitAll(Tasks.Workers.virtual(jobs), tasks, "pdfimages extract");
+    }
+
+    /** Pages per extraction chunk; see {@link #extract}. */
+    private static final int CHUNK_PAGES = 12;
+
+    /** The listing rows for pages {@code from..to}, in listing (= dump) order. */
+    private static List<PdfListingParser.ImageRow> rowsInRange(
+            List<PdfListingParser.ImageRow> rows, int from, int to) {
+        return rows.stream().filter(r -> r.page() >= from && r.page() <= to).toList();
+    }
+
+    /**
+     * Extract one page-range chunk: raw-CCITT remux when {@code ccittRows} describes it, decoded
+     * {@code -tiff} otherwise — and the {@code -tiff} rerun as the fallback when the dump deviates
+     * from the listing in any way (count, params shape, or a wrap that does not decode back).
+     */
+    private void extractChunk(
+            String pdfimages,
+            Path pdf,
+            int from,
+            int to,
+            String prefix,
+            List<PdfListingParser.ImageRow> ccittRows)
+            throws IOException {
+        if (ccittRows.isEmpty()) {
+            runDiscarding(extractCommand(pdfimages, "-tiff", from, to, pdf, prefix));
+            return;
+        }
+        runDiscarding(extractCommand(pdfimages, "-ccitt", from, to, pdf, prefix));
+        if (!wrapChunk(prefix, ccittRows)) {
+            deleteByPrefix(prefix);
+            runDiscarding(extractCommand(pdfimages, "-tiff", from, to, pdf, prefix));
+        }
+    }
+
+    private static List<String> extractCommand(
+            String pdfimages, String format, int from, int to, Path pdf, String prefix) {
+        return List.of(
+                pdfimages,
+                format,
+                "-f",
+                Integer.toString(from),
+                "-l",
+                Integer.toString(to),
+                pdf.toString(),
+                prefix);
+    }
+
+    /**
+     * Wrap every {@code .ccitt} dump under {@code prefix} into a single-strip G4 TIFF, verifying
+     * each by decoding it back. Returns {@code false} (without cleaning up) on any deviation; the
+     * caller then discards the chunk's artifacts and falls back to a decoded extract.
+     */
+    private static boolean wrapChunk(String prefix, List<PdfListingParser.ImageRow> rows)
+            throws IOException {
+        List<Path> dumps = filesByPrefix(prefix, ".ccitt");
+        if (dumps.size() != rows.size()) {
+            return false;
+        }
+        for (int i = 0; i < dumps.size(); i++) {
+            Path ccitt = dumps.get(i);
+            PdfListingParser.ImageRow row = rows.get(i);
+            Path paramsFile = withExtension(ccitt, ".params");
+            if (!Files.isRegularFile(paramsFile)) {
+                return false;
+            }
+            CcittTiffs.@Nullable Params params =
+                    CcittTiffs.parseParams(Files.readString(paramsFile, StandardCharsets.UTF_8));
+            if (params == null || !CcittTiffs.supported(params, row.width())) {
+                return false;
+            }
+            Path out = withExtension(ccitt, ".tif");
+            CcittTiffs.writeSingleStripG4(
+                    out,
+                    Files.readAllBytes(ccitt),
+                    row.width(),
+                    row.height(),
+                    params.blackIs1(),
+                    Math.max(row.xPpi(), 0));
+            if (!decodesBack(out, row)) {
+                return false;
+            }
+            Files.delete(ccitt);
+            Files.delete(paramsFile);
+        }
+        return true;
+    }
+
+    /**
+     * Whether the wrapped TIFF decodes to the listing row's dimensions — the read-back proof that
+     * the stream really was plain T.6 (an {@code EncodedByteAlign} stream, undetectable from the
+     * params file, fails to decode or comes back the wrong size here).
+     */
+    private static boolean decodesBack(Path tif, PdfListingParser.ImageRow row) {
+        try (Pix pix = Pix.read(tif)) {
+            return pix.width() == row.width() && pix.height() == row.height();
+        } catch (IllegalStateException e) {
+            return false;
+        }
+    }
+
+    /** The files starting with {@code prefix}'s file name and ending in {@code suffix}, sorted. */
+    private static List<Path> filesByPrefix(String prefix, String suffix) throws IOException {
+        Path prefixPath = Path.of(prefix);
+        Path dir = prefixPath.getParent();
+        String name = String.valueOf(prefixPath.getFileName());
+        if (dir == null) {
+            throw new IOException("extract prefix has no parent directory: " + prefix);
+        }
+        try (Stream<Path> entries = Files.list(dir)) {
+            return entries.filter(
+                            p -> {
+                                String fileName = String.valueOf(p.getFileName());
+                                return fileName.startsWith(name) && fileName.endsWith(suffix);
+                            })
+                    .sorted(Comparator.comparing(p -> String.valueOf(p.getFileName())))
+                    .toList();
+        }
+    }
+
+    /**
+     * Delete every artifact of one chunk ({@code .ccitt}, {@code .params}, partial {@code .tif}).
+     */
+    private static void deleteByPrefix(String prefix) throws IOException {
+        for (String suffix : List.of(".ccitt", ".params", ".tif")) {
+            for (Path file : filesByPrefix(prefix, suffix)) {
+                Files.deleteIfExists(file);
+            }
+        }
+    }
+
+    /** A sibling of {@code file} with its extension replaced by {@code extension}. */
+    private static Path withExtension(Path file, String extension) {
+        String name = String.valueOf(file.getFileName());
+        int dot = name.lastIndexOf('.');
+        String base = dot < 0 ? name : name.substring(0, dot);
+        return file.resolveSibling(base + extension);
     }
 
     /** Run an extraction command, discarding its (file-producing) output. */
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java
index efe12aa..0a2bb1b 100644
--- a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java
@@ -1,6 +1,8 @@
 package io.github.p4suta.shared.pdf;
 
+import java.util.ArrayList;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -36,27 +38,17 @@ public static int parsePageCount(String pdfinfoOutput) {
     }
 
     /**
-     * The most common rounded x-ppi (column 13, 0-based 12) across the {@code image} rows of a
-     * {@code pdfimages -list} report, skipping the two header rows. Ties resolve to the first value
-     * seen and a non-positive winner falls back to {@link #DEFAULT_DPI}.
+     * The most common rounded x-ppi across the {@code image} rows of a {@code pdfimages -list}
+     * report. Ties resolve to the first value seen and a non-positive winner falls back to {@link
+     * #DEFAULT_DPI}.
      *
      * @param listOutput the full text {@code pdfimages -list} printed
      * @return the dominant rounded x-ppi, or {@link #DEFAULT_DPI} when none is usable
      */
     public static int parseDominantDpi(String listOutput) {
-        String[] lines = listOutput.split("\n", -1);
         Map<Integer, Integer> counts = new LinkedHashMap<>();
-        for (int i = 2; i < lines.length; i++) {
-            String[] fields = lines[i].trim().split("\\s+", -1);
-            if (fields.length < 13 || !"image".equals(fields[2])) {
-                continue;
-            }
-            try {
-                int ppi = (int) Math.round(Double.parseDouble(fields[12]));
-                counts.merge(ppi, 1, Integer::sum);
-            } catch (NumberFormatException ignored) {
-                // Non-numeric x-ppi cell: skip this row.
-            }
+        for (ImageRow row : parseImageRows(listOutput)) {
+            counts.merge(row.xPpi(), 1, Integer::sum);
         }
         if (counts.isEmpty()) {
             return DEFAULT_DPI;
@@ -71,4 +63,50 @@ public static int parseDominantDpi(String listOutput) {
         }
         return best > 0 ? best : DEFAULT_DPI;
     }
+
+    /**
+     * One {@code image} row of a {@code pdfimages -list} report — the columns the extractor needs
+     * to pick its mode and to wrap raw CCITT dumps.
+     *
+     * @param page the 1-based page the image sits on
+     * @param width the image width in pixels
+     * @param height the image height in pixels
+     * @param bpc bits per component ({@code 1} for bitonal)
+     * @param enc the embedded encoding token ({@code ccitt}, {@code jbig2}, {@code jpeg}, {@code
+     *     image}, …)
+     * @param xPpi the rounded x-ppi the image is placed at (0 when the cell is unusable)
+     */
+    public record ImageRow(int page, int width, int height, int bpc, String enc, int xPpi) {}
+
+    /**
+     * Parse the {@code image} rows of a {@code pdfimages -list} report, in listing order (the same
+     * order {@code pdfimages} dumps the images in), skipping the two header rows and any row with
+     * unparsable numeric cells.
+     *
+     * @param listOutput the full text {@code pdfimages -list} printed
+     * @return the parsed rows, possibly empty
+     */
+    public static List<ImageRow> parseImageRows(String listOutput) {
+        String[] lines = listOutput.split("\n", -1);
+        List<ImageRow> rows = new ArrayList<>();
+        for (int i = 2; i < lines.length; i++) {
+            String[] fields = lines[i].trim().split("\\s+", -1);
+            if (fields.length < 13 || !"image".equals(fields[2])) {
+                continue;
+            }
+            try {
+                rows.add(
+                        new ImageRow(
+                                Integer.parseInt(fields[0]),
+                                Integer.parseInt(fields[3]),
+                                Integer.parseInt(fields[4]),
+                                Integer.parseInt(fields[7]),
+                                fields[8],
+                                (int) Math.round(Double.parseDouble(fields[12]))));
+            } catch (NumberFormatException ignored) {
+                // A non-numeric cell: skip this row.
+            }
+        }
+        return rows;
+    }
 }
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/CcittTiffsTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/CcittTiffsTest.java
new file mode 100644
index 0000000..63e93d6
--- /dev/null
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/CcittTiffsTest.java
@@ -0,0 +1,160 @@
+package io.github.p4suta.shared.pdf;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import io.github.p4suta.shared.imaging.Pix;
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import javax.imageio.ImageIO;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * The CCITT remux building blocks: the fax2tiff-style params parser (pure) and the single-strip G4
+ * TIFF writer, verified by wrapping a real G4 stream (PDFBox's CCITT encoder, the same encoding a
+ * scanner PDF embeds) and decoding it back through Leptonica pixel-for-pixel.
+ */
+final class CcittTiffsTest {
+
+    // ---- params parsing ----
+
+    @Test
+    void parsesTheUsualScannerShape() {
+        CcittTiffs.Params params = parsed("-4 -P -X 3496 -B -M\n");
+        assertThat(params).isEqualTo(new CcittTiffs.Params("-4", false, 3496, false));
+        assertThat(CcittTiffs.supported(params, 3496)).isTrue();
+    }
+
+    @Test
+    void eolMarkersAreUnsupported() {
+        CcittTiffs.Params params = parsed("-4 -A -X 100 -W -M");
+        assertThat(params).isEqualTo(new CcittTiffs.Params("-4", true, 100, true));
+        // EOL markers are not representable in TIFF T.6.
+        assertThat(CcittTiffs.supported(params, 100)).isFalse();
+    }
+
+    @Test
+    void group3IsUnsupported() {
+        assertThat(CcittTiffs.supported(parsed("-2 -P -X 100 -B -M"), 100)).isFalse();
+    }
+
+    @Test
+    void widthMismatchIsUnsupported() {
+        assertThat(CcittTiffs.supported(parsed("-4 -P -X 100 -B -M"), 200)).isFalse();
+    }
+
+    /** Parse params the test asserts are well-formed, made non-null for NullAway. */
+    private static CcittTiffs.Params parsed(String text) {
+        return java.util.Objects.requireNonNull(CcittTiffs.parseParams(text));
+    }
+
+    @Test
+    void unknownTokensAndMissingFlagsAreUnparsable() {
+        assertThat(CcittTiffs.parseParams("-4 -P -X 100 -B -M -Z")).isNull(); // unknown flag
+        assertThat(CcittTiffs.parseParams("-4 -P -B -M")).isNull(); // no -X
+        assertThat(CcittTiffs.parseParams("-4 -P -X nope -B -M")).isNull(); // bad width
+        assertThat(CcittTiffs.parseParams("-4 -P -X 100 -B")).isNull(); // no -M
+        assertThat(CcittTiffs.parseParams("")).isNull();
+    }
+
+    // ---- TIFF wrapping ----
+
+    /**
+     * Round trip: draw a known bitonal pattern, encode it to a raw G4 stream with PDFBox's CCITT
+     * encoder (the very encoding a scanner PDF embeds and {@code pdfimages -ccitt} dumps), wrap the
+     * stream with {@link CcittTiffs#writeSingleStripG4}, and assert it decodes back through
+     * Leptonica pixel-identical to the original, with the stamped resolution.
+     */
+    @Test
+    void wrappedStreamDecodesBackPixelIdentical(@TempDir Path tmp) throws Exception {
+        int width = 200;
+        int height = 150;
+        BufferedImage img = pattern(width, height);
+        G4Stream g4 = encodeG4(img);
+
+        Path wrapped = tmp.resolve("wrapped.tif");
+        CcittTiffs.writeSingleStripG4(wrapped, g4.bytes, width, height, g4.blackIs1, 450);
+
+        Path referencePng = tmp.resolve("reference.png");
+        ImageIO.write(img, "png", referencePng.toFile());
+        try (Pix expected = Pix.read(referencePng);
+                Pix actual = Pix.read(wrapped)) {
+            assertThat(actual.width()).isEqualTo(width);
+            assertThat(actual.height()).isEqualTo(height);
+            assertThat(actual.resolution()).isEqualTo(450);
+            assertThat(actual.blackPixels()).isPositive();
+            assertThat(actual.pixelsEqual(expected)).isTrue();
+        }
+    }
+
+    @Test
+    void omitsResolutionTagsWhenDpiUnknown(@TempDir Path tmp) throws Exception {
+        int width = 64;
+        int height = 48;
+        G4Stream g4 = encodeG4(pattern(width, height));
+
+        Path wrapped = tmp.resolve("wrapped.tif");
+        CcittTiffs.writeSingleStripG4(wrapped, g4.bytes, width, height, g4.blackIs1, 0);
+
+        try (Pix actual = Pix.read(wrapped)) {
+            assertThat(actual.width()).isEqualTo(width);
+            assertThat(actual.resolution()).isZero();
+        }
+    }
+
+    /** A raw G4 (T.6) stream and the {@code BlackIs1} convention its encoder declared. */
+    private static final class G4Stream {
+        final byte[] bytes;
+        final boolean blackIs1;
+
+        G4Stream(byte[] bytes, boolean blackIs1) {
+            this.bytes = bytes;
+            this.blackIs1 = blackIs1;
+        }
+    }
+
+    /**
+     * The raw CCITT G4 stream PDFBox's {@link CCITTFactory} encodes {@code img} to (lifted verbatim
+     * from the image XObject, exactly the bytes {@code pdfimages -ccitt} would dump), along with
+     * the {@code BlackIs1} decode parameter it declared.
+     */
+    private static G4Stream encodeG4(BufferedImage img) throws IOException {
+        try (PDDocument doc = new PDDocument()) {
+            PDImageXObject image = CCITTFactory.createFromImage(doc, img);
+            COSDictionary decodeParms =
+                    (COSDictionary) image.getCOSObject().getDictionaryObject(COSName.DECODE_PARMS);
+            boolean blackIs1 =
+                    decodeParms != null && decodeParms.getBoolean(COSName.BLACK_IS_1, false);
+            try (InputStream in = image.getCOSObject().createRawInputStream()) {
+                return new G4Stream(in.readAllBytes(), blackIs1);
+            }
+        }
+    }
+
+    /** A deterministic bitonal pattern with structure (bars + a block). */
+    private static BufferedImage pattern(int width, int height) {
+        BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);
+        Graphics2D g = img.createGraphics();
+        try {
+            g.setColor(Color.WHITE);
+            g.fillRect(0, 0, width, height);
+            g.setColor(Color.BLACK);
+            for (int x = 4; x < width - 8; x += 12) {
+                g.fillRect(x, 8, 6, height - 16);
+            }
+            g.fillRect(width / 3, height / 3, width / 3, height / 3);
+        } finally {
+            g.dispose();
+        }
+        return img;
+    }
+}
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfBoxJbig2AssemblerTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfBoxJbig2AssemblerTest.java
index 3bd7edf..2bae0ec 100644
--- a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfBoxJbig2AssemblerTest.java
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfBoxJbig2AssemblerTest.java
@@ -8,8 +8,6 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.OptionalInt;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 import javax.imageio.ImageIO;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.cos.COSName;
@@ -57,13 +55,10 @@ void assemblesABitonalDirectoryIntoAJbig2Pdf(@TempDir Path tmp) throws Exception
         writeBitonalPage(imageDir.resolve("page-000.png"), 64, 48);
         writeBitonalPage(imageDir.resolve("page-001.png"), 80, 60);
         Path outPdf = tmp.resolve("out.pdf");
-
-        ExecutorService pool = Executors.newFixedThreadPool(2);
         try {
             new PdfBoxJbig2Assembler(JBIG2_KEY)
-                    .assemble(imageDir, outPdf, null, OptionalInt.of(300), pool, scratch);
+                    .assemble(imageDir, outPdf, null, OptionalInt.of(300), 2, scratch);
         } finally {
-            pool.shutdownNow();
         }
 
         assertThat(Files.exists(outPdf)).isTrue();
@@ -103,13 +98,10 @@ void inheritsInfoAndVersionFromTheSourcePdf(@TempDir Path tmp) throws Exception
         Path scratch = Files.createDirectory(tmp.resolve("scratch"));
         writeBitonalPage(imageDir.resolve("page-000.png"), 64, 48);
         Path outPdf = tmp.resolve("out.pdf");
-
-        ExecutorService pool = Executors.newSingleThreadExecutor();
         try {
             new PdfBoxJbig2Assembler(JBIG2_KEY)
-                    .assemble(imageDir, outPdf, source, OptionalInt.empty(), pool, scratch);
+                    .assemble(imageDir, outPdf, source, OptionalInt.empty(), 2, scratch);
         } finally {
-            pool.shutdownNow();
         }
 
         try (PDDocument doc = Loader.loadPDF(outPdf.toFile())) {
@@ -125,7 +117,6 @@ void emptyDirectoryFailsBeforeTouchingTheToolchain(@TempDir Path tmp) throws Exc
         // runs before tool resolution). Works on any host.
         Path imageDir = Files.createDirectory(tmp.resolve("empty"));
         Path scratch = Files.createDirectory(tmp.resolve("scratch"));
-        ExecutorService pool = Executors.newSingleThreadExecutor();
         try {
             PdfBoxJbig2Assembler assembler = new PdfBoxJbig2Assembler(JBIG2_KEY);
             assertThatThrownBy(
@@ -135,12 +126,11 @@ void emptyDirectoryFailsBeforeTouchingTheToolchain(@TempDir Path tmp) throws Exc
                                             tmp.resolve("out.pdf"),
                                             null,
                                             OptionalInt.empty(),
-                                            pool,
+                                            1,
                                             scratch))
                     .isInstanceOf(IOException.class)
                     .hasMessageContaining("no cleaned images");
         } finally {
-            pool.shutdownNow();
         }
     }
 
@@ -156,7 +146,6 @@ void missingJbig2BinaryFailsWithAClearMessage(@TempDir Path tmp) throws Exceptio
         String bogusKey = "shared.pdf.test.bogus.jbig2.path";
         System.setProperty(bogusKey, tmp.resolve("definitely-not-jbig2").toString());
         try {
-            ExecutorService pool = Executors.newSingleThreadExecutor();
             try {
                 PdfBoxJbig2Assembler assembler = new PdfBoxJbig2Assembler(bogusKey);
                 assertThatThrownBy(
@@ -166,11 +155,10 @@ void missingJbig2BinaryFailsWithAClearMessage(@TempDir Path tmp) throws Exceptio
                                                 tmp.resolve("out.pdf"),
                                                 null,
                                                 OptionalInt.of(300),
-                                                pool,
+                                                1,
                                                 scratch))
                         .isInstanceOf(IOException.class);
             } finally {
-                pool.shutdownNow();
             }
         } finally {
             System.clearProperty(bogusKey);
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java
index c9032d2..4d17e4d 100644
--- a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java
@@ -10,8 +10,6 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
 import java.util.stream.Stream;
 import javax.imageio.ImageIO;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -68,12 +66,7 @@ void extractsEmbeddedImagesAsTiffs(@TempDir Path tmp) throws Exception {
         writePdfWithImage(pdf, 120, 90);
         Path outDir = Files.createDirectory(tmp.resolve("out"));
 
-        ExecutorService pool = Executors.newFixedThreadPool(2);
-        try {
-            new PdfImagesCliExtractor(PDFIMAGES_KEY, PDFINFO_KEY).extract(pdf, outDir, 2, pool);
-        } finally {
-            pool.shutdownNow();
-        }
+        new PdfImagesCliExtractor(PDFIMAGES_KEY, PDFINFO_KEY).extract(pdf, outDir, 2);
 
         try (Stream<Path> entries = Files.list(outDir)) {
             List<Path> tiffs =
@@ -94,6 +87,64 @@ void dominantDpiReturnsAPositiveResolution(@TempDir Path tmp) throws Exception {
         assertThat(dpi).isPositive();
     }
 
+    /** Build a multi-page PDF embedding CCITT-G4 bitonal images, one per page, at ~200 ppi. */
+    private static void writeCcittPdf(Path pdf, int pages, int imgW, int imgH) throws IOException {
+        try (PDDocument doc = new PDDocument()) {
+            for (int i = 0; i < pages; i++) {
+                BufferedImage bitonal =
+                        new BufferedImage(imgW, imgH, BufferedImage.TYPE_BYTE_BINARY);
+                java.awt.Graphics2D g = bitonal.createGraphics();
+                try {
+                    g.setColor(java.awt.Color.WHITE);
+                    g.fillRect(0, 0, imgW, imgH);
+                    g.setColor(java.awt.Color.BLACK);
+                    g.fillRect(10 + i, 10, imgW / 3, imgH / 2);
+                } finally {
+                    g.dispose();
+                }
+                float wPt = imgW * 72f / 200;
+                float hPt = imgH * 72f / 200;
+                PDPage page = new PDPage(new PDRectangle(wPt, hPt));
+                doc.addPage(page);
+                PDImageXObject image =
+                        org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory.createFromImage(
+                                doc, bitonal);
+                try (PDPageContentStream content = new PDPageContentStream(doc, page)) {
+                    content.drawImage(image, 0, 0, wPt, hPt);
+                }
+            }
+            doc.save(pdf.toFile());
+        }
+    }
+
+    @Test
+    @EnabledIf("io.github.p4suta.shared.pdf.PdfImagesCliExtractorTest#toolsOnPath")
+    void remuxesAnAllCcittSourceIntoStampedG4Tiffs(@TempDir Path tmp) throws Exception {
+        Path pdf = tmp.resolve("scan.pdf");
+        writeCcittPdf(pdf, 3, 240, 180);
+        Path outDir = Files.createDirectory(tmp.resolve("out"));
+
+        new PdfImagesCliExtractor(PDFIMAGES_KEY, PDFINFO_KEY).extract(pdf, outDir, 2);
+
+        try (Stream<Path> entries = Files.list(outDir)) {
+            List<Path> files = entries.sorted().toList();
+            // The remux leaves exactly one .tif per page — no .ccitt/.params residue.
+            assertThat(files).hasSize(3);
+            assertThat(files).allSatisfy(p -> assertThat(p.toString()).endsWith(".tif"));
+            for (Path tif : files) {
+                try (io.github.p4suta.shared.imaging.Pix pix =
+                        io.github.p4suta.shared.imaging.Pix.read(tif)) {
+                    assertThat(pix.width()).isEqualTo(240);
+                    assertThat(pix.height()).isEqualTo(180);
+                    // The remux stamps the image's true ppi instead of pdfimages' default 72.
+                    assertThat(pix.resolution()).isEqualTo(200);
+                    // Black ink, not inverted: the drawn block is ~1/6 of the page.
+                    assertThat(pix.blackPixels()).isGreaterThan(0).isLessThan(240L * 180 / 2);
+                }
+            }
+        }
+    }
+
     @Test
     void missingToolFailsWithAClearMessage(@TempDir Path tmp) throws Exception {
         Path pdf = tmp.resolve("doc.pdf");
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java
index f228032..2bba8d0 100644
--- a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java
@@ -120,4 +120,27 @@ void parseDominantDpiSkipsNonImageRowsWithTooFewFields() {
                 PdfListingParser.DEFAULT_DPI,
                 PdfListingParser.parseDominantDpi("hdr\n----\n   1   0   smask\n"));
     }
+
+    @Test
+    void parseImageRowsReadsTheColumnsTheExtractorNeeds() {
+        var rows = PdfListingParser.parseImageRows(LIST);
+        assertEquals(3, rows.size());
+        assertEquals(new PdfListingParser.ImageRow(1, 2480, 3508, 1, "ccitt", 300), rows.get(0));
+        assertEquals(new PdfListingParser.ImageRow(3, 1240, 1754, 1, "ccitt", 150), rows.get(2));
+    }
+
+    @Test
+    void parseImageRowsSkipsMalformedAndNonImageRows() {
+        String mixed =
+                """
+                page   num  type   width height color comp bpc  enc  interp object ID x-ppi y-ppi size ratio
+                --------------------------------------------------------------------------------------------
+                   1     0 smask    2480  3508  gray    1   1  ccitt  no      7  0   300   300  101K 1.2%
+                   2     1 image     bad  3508  gray    1   1  ccitt  no     11  0   300   300   99K 1.1%
+                   3     2 image    2480  3508  rgb     3   8  jpeg   no     14  0   150   150   40K 1.0%
+                """;
+        var rows = PdfListingParser.parseImageRows(mixed);
+        assertEquals(1, rows.size());
+        assertEquals(new PdfListingParser.ImageRow(3, 2480, 3508, 8, "jpeg", 150), rows.get(0));
+    }
 }
diff --git a/shared/process/build.gradle.kts b/shared/process/build.gradle.kts
index 5ca5418..f34fc9a 100644
--- a/shared/process/build.gradle.kts
+++ b/shared/process/build.gradle.kts
@@ -20,8 +20,11 @@ plugins {
 //     (exitCode + stdout + stderr + elapsed Duration; tate's typed Result is the shape donor) and
 //     throwing TimeoutException on timeout. The caller passes the set of acceptable non-zero exits
 //     (generalizing despeckle's hardcoded qpdf-exit-3 tolerance) — a non-acceptable exit throws.
-//   * Tasks — awaitAll: a parameterized parallel fan-out over a caller-owned pool (register's),
-//     collecting results in submission order and aggregating the first failure as an IOException.
+//   * Tasks — awaitAll: a parallel fan-out over a batch-owned executor (Workers.platform for
+//     CPU-bound work, Workers.virtual for subprocess waits), collecting results in submission
+//     order with fail-fast sibling interruption, quiescence before the failure propagates, and
+//     exception identity preserved (domain RuntimeExceptions keep their error kind). Mirrors
+//     StructuredTaskScope.join() semantics on final-Java features.
 //
 // No domain exceptions are reachable here (that is the point of generalizing): a launch failure or
 // an unacceptable exit surfaces as a plain IOException, a timeout as a TimeoutException — so the
@@ -37,13 +40,11 @@ dependencies {
 
 // Coverage floor: the same realistic infra-like 0.75 line / 0.60 branch the despeckle
 // :infrastructure and :shared:imaging modules use — this is process/exec plumbing, not branch-rich
-// domain logic, so the domain-grade 0.95/0.90 the kernel/observability use would be dishonest. The
-// only branches a unit test cannot drive are the InterruptedException catches (you cannot
-// deterministically interrupt the waiting thread mid-waitFor) and a stray launch-failure path; at
-// CLASS granularity the imaging-style exclusion would throw away ProcessRunner's genuinely covered
-// happy/timeout/exit paths too, so NO class is excluded — the lenient floor absorbs the few
-// untestable catch arms. The same self-contained block the other shared modules carry, since the
-// floor is not applied by any convention plugin.
+// domain logic, so the domain-grade 0.95/0.90 the kernel/observability use would be dishonest.
+// (The interrupt arms are now deterministically covered — TasksTest and ProcessRunnerTest drive
+// them with latch-coordinated interrupts — but a stray launch-failure path remains; the lenient
+// floor absorbs it.) The same self-contained block the other shared modules carry, since the floor
+// is not applied by any convention plugin.
 tasks.named<JacocoCoverageVerification>("jacocoTestCoverageVerification") {
     dependsOn(tasks.named("test"))
     violationRules {
diff --git a/shared/process/src/main/java/io/github/p4suta/shared/process/ProcessRunner.java b/shared/process/src/main/java/io/github/p4suta/shared/process/ProcessRunner.java
index f1349fa..8ecfefe 100644
--- a/shared/process/src/main/java/io/github/p4suta/shared/process/ProcessRunner.java
+++ b/shared/process/src/main/java/io/github/p4suta/shared/process/ProcessRunner.java
@@ -81,9 +81,17 @@ public static Result run(
         try (ExecutorService drainers = Executors.newVirtualThreadPerTaskExecutor()) {
             Future<byte[]> out = drainers.submit(() -> process.getInputStream().readAllBytes());
             Future<byte[]> err = drainers.submit(() -> process.getErrorStream().readAllBytes());
-            if (!process.waitFor(timeout.toMillis(), TimeUnit.MILLISECONDS)) {
+            try {
+                if (!process.waitFor(timeout.toMillis(), TimeUnit.MILLISECONDS)) {
+                    process.destroyForcibly();
+                    throw new TimeoutException(command.get(0) + " timed out after " + timeout);
+                }
+            } catch (InterruptedException e) {
+                // Kill the child before propagating, exactly like the timeout path: the drainers
+                // then reach EOF, so the try-with-resources close() returns instead of waiting on
+                // a live child's pipes — and no orphaned process outlives the interrupted caller.
                 process.destroyForcibly();
-                throw new TimeoutException(command.get(0) + " timed out after " + timeout);
+                throw e;
             }
             // Decode leniently (replace malformed bytes), matching the long-standing behavior.
             String stdout = new String(await(out), StandardCharsets.UTF_8);
diff --git a/shared/process/src/main/java/io/github/p4suta/shared/process/Tasks.java b/shared/process/src/main/java/io/github/p4suta/shared/process/Tasks.java
index 725d3aa..7212886 100644
--- a/shared/process/src/main/java/io/github/p4suta/shared/process/Tasks.java
+++ b/shared/process/src/main/java/io/github/p4suta/shared/process/Tasks.java
@@ -1,65 +1,236 @@
 package io.github.p4suta.shared.process;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.jspecify.annotations.Nullable;
 
 /**
- * Runs a batch of {@link Callable}s on a caller-owned {@link ExecutorService} and collects their
- * results in submission order, turning the executor's checked machinery into a single {@link
- * IOException}.
+ * Runs a batch of {@link Callable}s on a batch-owned executor with fail-fast semantics: the first
+ * failure interrupts the remaining workers, the batch waits until every worker has actually stopped
+ * (quiescence), and only then does the failure propagate — so a caller's {@code finally} can safely
+ * delete the directories the workers were writing into.
+ *
+ * <p>The propagated failure keeps its identity: a task's own {@link IOException} or {@link
+ * RuntimeException} (the apps' domain exceptions are runtime exceptions) is re-thrown unchanged, so
+ * the shared exception mapper still sees the original kind and maps the right exit code and {@code
+ * RunFailed} token; an {@link UncheckedIOException} is unwrapped to its cause. This deliberately
+ * mirrors {@code StructuredTaskScope.join()}'s contract (fail-fast, sibling interruption,
+ * quiescence before propagation) on final-Java features, so a future swap to structured concurrency
+ * stays internal to this class.
  */
 public final class Tasks {
 
     private Tasks() {}
 
     /**
-     * Submit every task, wait for all of them, and return their results in the order {@code tasks}
-     * was given. The first failure stops collection and surfaces as an {@link IOException}: a
-     * task's own {@link IOException} is re-thrown unchanged (so callers see the real cause), any
-     * other failure is wrapped with {@code failureMessage}, and an interruption restores the
-     * thread's interrupt flag and is reported with {@code interruptedMessage}.
+     * How one batch schedules its workers.
      *
-     * @param pool the executor to run the tasks on (the caller owns its lifecycle)
+     * <p>{@link #platform(int)} is for CPU-bound work (Leptonica/FFM pixel ops, TIFF G4 encoding):
+     * a fixed pool of platform threads, because a long native downcall pins a virtual thread's
+     * carrier, which would silently cap concurrency at the carrier count. {@link #virtual(int)} is
+     * for subprocess-bound work (per-page {@code jbig2}, {@code pdfimages} chunks): one virtual
+     * thread per task, with at most {@code maxInFlight} admitted past an internal semaphore so the
+     * spawned child processes stay bounded.
+     */
+    public static final class Workers {
+
+        private final int limit;
+        private final boolean virtualThreads;
+
+        private Workers(int limit, boolean virtualThreads) {
+            if (limit < 1) {
+                throw new IllegalArgumentException("worker limit must be >= 1, got " + limit);
+            }
+            this.limit = limit;
+            this.virtualThreads = virtualThreads;
+        }
+
+        /** {@code jobs} platform threads — CPU-bound work. */
+        public static Workers platform(int jobs) {
+            return new Workers(jobs, false);
+        }
+
+        /**
+         * A virtual thread per task, at most {@code maxInFlight} running at once — work that mostly
+         * waits on a subprocess.
+         */
+        public static Workers virtual(int maxInFlight) {
+            return new Workers(maxInFlight, true);
+        }
+
+        private ExecutorService newExecutor() {
+            return virtualThreads
+                    ? Executors.newVirtualThreadPerTaskExecutor()
+                    : Executors.newFixedThreadPool(limit);
+        }
+
+        private @Nullable Semaphore newPermits() {
+            return virtualThreads ? new Semaphore(limit) : null;
+        }
+    }
+
+    /**
+     * Per-item completion callback, invoked on the orchestrating thread in completion order: {@code
+     * done} runs {@code 1..total}, strictly increasing, successes only — so progress events built
+     * on it are ordered without any thread-safety burden on the listener.
+     */
+    @FunctionalInterface
+    public interface ItemProgress {
+
+        /** The no-op listener. */
+        ItemProgress NONE = (done, total) -> {};
+
+        /**
+         * One more item finished.
+         *
+         * @param done how many items have finished so far (1-based, strictly increasing)
+         * @param total how many items the batch runs
+         */
+        void onItem(int done, int total);
+    }
+
+    /** {@link #awaitAll(Workers, List, String, ItemProgress)} without progress reporting. */
+    public static <T> List<T> awaitAll(Workers workers, List<Callable<T>> tasks, String label)
+            throws IOException {
+        return awaitAll(workers, tasks, label, ItemProgress.NONE);
+    }
+
+    /**
+     * Run every task on a batch-owned executor and return their results in submission order.
+     *
+     * <p>On the first failure the remaining workers are interrupted ({@code shutdownNow}) and the
+     * batch waits for full quiescence before the failure propagates with its identity intact:
+     * {@link IOException} and {@link RuntimeException} unchanged, {@link UncheckedIOException}
+     * unwrapped to its cause, anything else wrapped as {@code IOException(label + " failed")}.
+     * Caller interruption likewise stops and joins the workers, restores the interrupt flag, and
+     * surfaces as {@code IOException(label + " interrupted")}.
+     *
+     * @param workers the scheduling mode (see {@link Workers})
      * @param tasks the work to run, one result per task
-     * @param interruptedMessage the message for an {@link IOException} raised on interruption
-     * @param failureMessage the message wrapping a non-{@code IOException} task failure
+     * @param label the batch's name in failure/interruption messages (e.g. {@code "G4 encode"})
+     * @param progress called on this thread after each successful item
      * @param <T> the task result type
      * @return the task results, in submission order
      * @throws IOException if any task fails or the wait is interrupted
      */
     public static <T> List<T> awaitAll(
-            ExecutorService pool,
-            List<Callable<T>> tasks,
-            String interruptedMessage,
-            String failureMessage)
+            Workers workers, List<Callable<T>> tasks, String label, ItemProgress progress)
             throws IOException {
-        List<Future<T>> futures;
-        try {
-            futures = pool.invokeAll(tasks);
-        } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new IOException(interruptedMessage, e);
-        }
-        List<T> results = new ArrayList<>(futures.size());
-        for (Future<T> future : futures) {
+        int total = tasks.size();
+        List<@Nullable T> results = new ArrayList<>(Collections.nCopies(total, null));
+        try (ExecutorService pool = workers.newExecutor()) {
+            ExecutorCompletionService<IndexedResult<T>> completion =
+                    new ExecutorCompletionService<>(pool);
+            @Nullable Semaphore permits = workers.newPermits();
+            // The fail-fast gate: set by the first worker whose task throws, checked by every
+            // worker before it starts a task. shutdownNow() alone cannot stop a queued task a
+            // newly-freed worker dequeues in the instant before the orchestrator reacts; the gate
+            // closes that window (a gated task completes with the BatchCancelled marker instead
+            // of running user code).
+            AtomicBoolean failed = new AtomicBoolean();
+            for (int i = 0; i < total; i++) {
+                int index = i;
+                Callable<T> task = tasks.get(i);
+                completion.submit(
+                        () -> {
+                            if (failed.get()) {
+                                throw new BatchCancelled();
+                            }
+                            // Acquired inside the worker so submission never blocks and an
+                            // interrupt during the wait cancels cleanly.
+                            if (permits != null) {
+                                permits.acquire();
+                            }
+                            try {
+                                return new IndexedResult<>(index, task.call());
+                            } catch (Throwable t) {
+                                failed.set(true);
+                                throw t;
+                            } finally {
+                                if (permits != null) {
+                                    permits.release();
+                                }
+                            }
+                        });
+            }
             try {
-                results.add(future.get());
+                int done = 0;
+                while (done < total) {
+                    Future<IndexedResult<T>> future = completion.take();
+                    try {
+                        IndexedResult<T> result = future.get();
+                        results.set(result.index(), result.value());
+                        done++;
+                        progress.onItem(done, total);
+                    } catch (ExecutionException e) {
+                        if (e.getCause() instanceof BatchCancelled) {
+                            // A gated sibling, not the root cause — keep draining: the failure
+                            // that set the gate already ran, so its completion is on its way.
+                            continue;
+                        }
+                        pool.shutdownNow();
+                        throw failure(e.getCause(), label);
+                    }
+                }
             } catch (InterruptedException e) {
+                pool.shutdownNow();
                 Thread.currentThread().interrupt();
-                throw new IOException(interruptedMessage, e);
-            } catch (ExecutionException e) {
-                Throwable cause = e.getCause();
-                if (cause instanceof IOException io) {
-                    throw io;
-                }
-                throw new IOException(failureMessage, cause);
+                throw new IOException(label + " interrupted", e);
             }
+            // The try-with-resources close() awaits full termination on every path, so by the
+            // time a result or an exception reaches the caller, no worker is still running (or
+            // still writing into directories the caller is about to delete).
         }
-        return results;
+        return castNonNull(results);
+    }
+
+    /** One task's result tagged with its submission index, for submission-order assembly. */
+    private record IndexedResult<T>(int index, @Nullable T value) {}
+
+    /** The internal marker a gated (skipped-after-failure) task completes with; never user code. */
+    private static final class BatchCancelled extends RuntimeException {
+        private static final long serialVersionUID = 1L;
+
+        BatchCancelled() {
+            super("cancelled by an earlier failure in the batch", null, false, false);
+        }
+    }
+
+    /** The failure to propagate for a task that threw {@code cause}; see {@link #awaitAll}. */
+    private static IOException failure(@Nullable Throwable cause, String label) {
+        switch (cause) {
+            case IOException io -> {
+                return io;
+            }
+            case UncheckedIOException unchecked -> {
+                // UncheckedIOException's constructor requires a non-null cause.
+                return java.util.Objects.requireNonNull(unchecked.getCause());
+            }
+            case RuntimeException runtime -> throw runtime;
+            case Error error -> throw error;
+            case null, default -> {
+                return new IOException(label + " failed", cause);
+            }
+        }
+    }
+
+    /**
+     * {@return the results list with every slot filled} Every slot was set exactly once (one
+     * completion per submitted task), so the nullable build-up list is safe to expose as non-null.
+     */
+    @SuppressWarnings({"unchecked", "NullAway"}) // each index 0..total-1 was set by its task
+    private static <T> List<T> castNonNull(List<@Nullable T> results) {
+        return (List<T>) (List<?>) results;
     }
 }
diff --git a/shared/process/src/test/java/io/github/p4suta/shared/process/ProcessRunnerTest.java b/shared/process/src/test/java/io/github/p4suta/shared/process/ProcessRunnerTest.java
index 5514e24..c826d9d 100644
--- a/shared/process/src/test/java/io/github/p4suta/shared/process/ProcessRunnerTest.java
+++ b/shared/process/src/test/java/io/github/p4suta/shared/process/ProcessRunnerTest.java
@@ -111,4 +111,38 @@ void missingBinaryFailsWithIoException() {
                                         Duration.ofSeconds(10)))
                 .isInstanceOf(IOException.class);
     }
+
+    @Test
+    @org.junit.jupiter.api.Timeout(15)
+    void interruptedWaiterKillsTheChildAndUnwindsPromptly() throws Exception {
+        // Regression: an interrupt during waitFor used to propagate WITHOUT killing the child, so
+        // the drainer close() hung on the live child's pipes (here: up to the sleep's 30s) and the
+        // child leaked. Now the child is killed first, so the call unwinds promptly — the @Timeout
+        // and the bounded join are the proof.
+        var thrown = new java.util.concurrent.atomic.AtomicReference<Throwable>();
+        var flagRestoredPathTaken = new java.util.concurrent.atomic.AtomicBoolean();
+        var started = new java.util.concurrent.CountDownLatch(1);
+        Thread caller =
+                new Thread(
+                        () -> {
+                            try {
+                                started.countDown();
+                                ProcessRunner.run(List.of("sleep", "30"), Duration.ofMinutes(5));
+                            } catch (InterruptedException e) {
+                                thrown.set(e);
+                                flagRestoredPathTaken.set(true);
+                            } catch (Exception e) {
+                                thrown.set(e);
+                            }
+                        });
+        caller.start();
+        assertThat(started.await(10, java.util.concurrent.TimeUnit.SECONDS)).isTrue();
+        // Give the child a beat to actually spawn before interrupting the waiter.
+        Thread.sleep(300);
+        caller.interrupt();
+        caller.join(10_000);
+        assertThat(caller.isAlive()).as("the interrupted run must unwind promptly").isFalse();
+        assertThat(thrown.get()).isInstanceOf(InterruptedException.class);
+        assertThat(flagRestoredPathTaken).isTrue();
+    }
 }
diff --git a/shared/process/src/test/java/io/github/p4suta/shared/process/TasksTest.java b/shared/process/src/test/java/io/github/p4suta/shared/process/TasksTest.java
index 0b689d1..3356f83 100644
--- a/shared/process/src/test/java/io/github/p4suta/shared/process/TasksTest.java
+++ b/shared/process/src/test/java/io/github/p4suta/shared/process/TasksTest.java
@@ -1,74 +1,344 @@
 package io.github.p4suta.shared.process;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertSame;
 import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
 
+/**
+ * The batch contract of {@link Tasks#awaitAll(Tasks.Workers, List, String, Tasks.ItemProgress)}:
+ * submission-order results, fail-fast with sibling interruption, quiescence before the failure
+ * propagates, exception identity, orchestrator-thread progress, and the virtual mode's in-flight
+ * bound. All coordination is latch-based — no sleeps as logic.
+ */
+@Timeout(30)
 class TasksTest {
 
+    // ---- happy path ----
+
     @Test
     void returnsResultsInSubmissionOrder() throws IOException {
-        ExecutorService pool = Executors.newFixedThreadPool(4);
-        try {
-            List<Callable<Integer>> tasks = new ArrayList<>();
-            for (int i = 0; i < 16; i++) {
-                int value = i;
-                tasks.add(() -> value);
-            }
-            List<Integer> results = Tasks.awaitAll(pool, tasks, "interrupted", "failed");
-            for (int i = 0; i < 16; i++) {
-                assertEquals(i, results.get(i));
-            }
-        } finally {
-            pool.shutdown();
+        List<Callable<Integer>> tasks = new ArrayList<>();
+        for (int i = 0; i < 16; i++) {
+            int value = i;
+            tasks.add(() -> value);
+        }
+        List<Integer> results = Tasks.awaitAll(Tasks.Workers.platform(4), tasks, "batch");
+        for (int i = 0; i < 16; i++) {
+            assertEquals(i, results.get(i));
         }
     }
 
     @Test
-    void surfacesATasksIOExceptionUnchanged() {
-        ExecutorService pool = Executors.newFixedThreadPool(2);
-        try {
-            IOException boom = new IOException("boom");
-            Callable<Integer> task =
-                    () -> {
-                        throw boom;
-                    };
-            IOException thrown =
-                    assertThrows(
-                            IOException.class,
-                            () -> Tasks.awaitAll(pool, List.of(task), "interrupted", "failed"));
-            // The task's own IOException is re-thrown as-is, not wrapped with the failure message.
-            assertSame(boom, thrown);
-        } finally {
-            pool.shutdown();
+    void progressRunsOnTheCallingThreadInCompletionOrder() throws IOException {
+        Thread caller = Thread.currentThread();
+        List<int[]> seen = new ArrayList<>(); // safe: appended on the calling thread only
+        AtomicBoolean wrongThread = new AtomicBoolean();
+        List<Callable<Integer>> tasks = new ArrayList<>();
+        for (int i = 0; i < 8; i++) {
+            int value = i;
+            tasks.add(() -> value);
         }
+        Tasks.awaitAll(
+                Tasks.Workers.platform(4),
+                tasks,
+                "batch",
+                (done, total) -> {
+                    if (Thread.currentThread() != caller) {
+                        wrongThread.set(true);
+                    }
+                    seen.add(new int[] {done, total});
+                });
+        assertFalse(wrongThread.get(), "progress must run on the orchestrating thread");
+        assertEquals(8, seen.size());
+        for (int i = 0; i < 8; i++) {
+            assertEquals(i + 1, seen.get(i)[0], "done is strictly increasing from 1");
+            assertEquals(8, seen.get(i)[1]);
+        }
+    }
+
+    // ---- failure semantics ----
+
+    @Test
+    void surfacesATasksIOExceptionByIdentity() {
+        IOException boom = new IOException("boom");
+        IOException thrown =
+                assertThrows(
+                        IOException.class,
+                        () ->
+                                Tasks.awaitAll(
+                                        Tasks.Workers.platform(2),
+                                        List.of(failing(boom)),
+                                        "batch"));
+        assertSame(boom, thrown);
+    }
+
+    @Test
+    void surfacesARuntimeExceptionByIdentitySoErrorKindsSurvive() {
+        IllegalStateException boom = new IllegalStateException("domain kind carrier");
+        IllegalStateException thrown =
+                assertThrows(
+                        IllegalStateException.class,
+                        () ->
+                                Tasks.awaitAll(
+                                        Tasks.Workers.platform(2),
+                                        List.of(failing(boom)),
+                                        "batch"));
+        assertSame(boom, thrown);
+    }
+
+    @Test
+    void unwrapsAnUncheckedIOExceptionToItsCause() {
+        IOException cause = new IOException("disk");
+        IOException thrown =
+                assertThrows(
+                        IOException.class,
+                        () ->
+                                Tasks.awaitAll(
+                                        Tasks.Workers.platform(2),
+                                        List.of(failing(new UncheckedIOException(cause))),
+                                        "batch"));
+        assertSame(cause, thrown);
+    }
+
+    @Test
+    void wrapsAnUnknownCheckedFailureWithTheLabel() {
+        Exception checked = new Exception("odd");
+        IOException thrown =
+                assertThrows(
+                        IOException.class,
+                        () ->
+                                Tasks.awaitAll(
+                                        Tasks.Workers.platform(2),
+                                        List.of(failing(checked)),
+                                        "G4 encode"));
+        assertEquals("G4 encode failed", thrown.getMessage());
+        assertSame(checked, thrown.getCause());
+    }
+
+    // ---- fail-fast, sibling interruption, quiescence ----
+
+    @Test
+    void firstFailureInterruptsSiblingsAndSkipsQueuedWork() throws Exception {
+        CountDownLatch blockerStarted = new CountDownLatch(1);
+        AtomicBoolean blockerInterrupted = new AtomicBoolean();
+        AtomicBoolean queuedRan = new AtomicBoolean();
+        IllegalStateException boom = new IllegalStateException("first failure");
+
+        Callable<Void> blocker =
+                () -> {
+                    blockerStarted.countDown();
+                    try {
+                        new CountDownLatch(1).await(); // blocks until interrupted
+                    } catch (InterruptedException e) {
+                        blockerInterrupted.set(true);
+                    }
+                    return null;
+                };
+        Callable<Void> failing =
+                () -> {
+                    blockerStarted.await(); // fail only once the sibling is provably running
+                    throw boom;
+                };
+        Callable<Void> queued =
+                () -> {
+                    queuedRan.set(true);
+                    return null;
+                };
+
+        IllegalStateException thrown =
+                assertThrows(
+                        IllegalStateException.class,
+                        () ->
+                                Tasks.awaitAll(
+                                        Tasks.Workers.platform(2),
+                                        List.of(blocker, failing, queued),
+                                        "batch"));
+        assertSame(boom, thrown);
+        assertTrue(blockerInterrupted.get(), "the running sibling must be interrupted");
+        assertFalse(queuedRan.get(), "queued work must be discarded on failure");
     }
 
     @Test
-    void wrapsANonIoFailureWithTheFailureMessage() {
-        ExecutorService pool = Executors.newFixedThreadPool(2);
-        try {
-            Callable<Integer> task =
+    void quiescesBeforeTheFailurePropagates() {
+        AtomicInteger inFlight = new AtomicInteger();
+        CountDownLatch blockerStarted = new CountDownLatch(1);
+
+        Callable<Void> blocker =
+                () -> {
+                    inFlight.incrementAndGet();
+                    try {
+                        blockerStarted.countDown();
+                        new CountDownLatch(1).await(); // until interrupted
+                        return null;
+                    } catch (InterruptedException e) {
+                        return null;
+                    } finally {
+                        inFlight.decrementAndGet();
+                    }
+                };
+        Callable<Void> failing =
+                () -> {
+                    blockerStarted.await();
+                    throw new IllegalStateException("boom");
+                };
+
+        assertThrows(
+                IllegalStateException.class,
+                () ->
+                        Tasks.awaitAll(
+                                Tasks.Workers.platform(2), List.of(blocker, failing), "batch"));
+        // The contract callers' finally-blocks rely on: when awaitAll throws, no worker is still
+        // running (still writing into directories about to be deleted).
+        assertEquals(0, inFlight.get());
+    }
+
+    @Test
+    void progressCountsOnlySuccessesContiguously() {
+        // Released from the PROGRESS callback (the orchestrating thread), not from the ok tasks:
+        // a task-side latch would leave a window where the failing task's completion overtakes an
+        // ok task's still-being-enqueued completion, making the recorded sequence racy.
+        CountDownLatch twoConsumed = new CountDownLatch(2);
+        List<Integer> dones = new ArrayList<>(); // appended on the calling thread only
+        Callable<Void> ok = () -> null;
+        Callable<Void> failing =
+                () -> {
+                    twoConsumed.await(); // both successes are consumed first, provably
+                    throw new IllegalStateException("boom");
+                };
+        assertThrows(
+                IllegalStateException.class,
+                () ->
+                        Tasks.awaitAll(
+                                Tasks.Workers.platform(3),
+                                List.of(ok, ok, failing),
+                                "batch",
+                                (done, total) -> {
+                                    dones.add(done);
+                                    twoConsumed.countDown();
+                                }));
+        assertEquals(List.of(1, 2), dones);
+    }
+
+    // ---- caller interruption ----
+
+    @Test
+    void callerInterruptionStopsWorkersAndRestoresTheFlag() throws Exception {
+        CountDownLatch workerStarted = new CountDownLatch(1);
+        AtomicBoolean workerInterrupted = new AtomicBoolean();
+        AtomicInteger inFlight = new AtomicInteger();
+        AtomicReference<Throwable> thrown = new AtomicReference<>();
+        AtomicBoolean flagRestored = new AtomicBoolean();
+
+        Callable<Void> blocker =
+                () -> {
+                    inFlight.incrementAndGet();
+                    try {
+                        workerStarted.countDown();
+                        new CountDownLatch(1).await();
+                        return null;
+                    } catch (InterruptedException e) {
+                        workerInterrupted.set(true);
+                        return null;
+                    } finally {
+                        inFlight.decrementAndGet();
+                    }
+                };
+
+        Thread caller =
+                new Thread(
+                        () -> {
+                            try {
+                                Tasks.awaitAll(
+                                        Tasks.Workers.platform(1), List.of(blocker), "register");
+                            } catch (IOException e) {
+                                thrown.set(e);
+                                flagRestored.set(Thread.currentThread().isInterrupted());
+                            }
+                        });
+        caller.start();
+        assertTrue(workerStarted.await(10, TimeUnit.SECONDS));
+        caller.interrupt();
+        caller.join(10_000);
+        assertFalse(caller.isAlive(), "the interrupted batch must unwind promptly");
+
+        assertNotNull(thrown.get());
+        assertInstanceOf(IOException.class, thrown.get());
+        assertEquals("register interrupted", thrown.get().getMessage());
+        assertTrue(flagRestored.get(), "the interrupt flag must be restored");
+        assertTrue(workerInterrupted.get(), "the worker must be stopped");
+        assertEquals(0, inFlight.get(), "quiescent before the exception reached the caller");
+    }
+
+    // ---- virtual mode ----
+
+    @Test
+    void virtualModeBoundsInFlightWork() throws Exception {
+        int bound = 3;
+        AtomicInteger inFlight = new AtomicInteger();
+        AtomicInteger highWater = new AtomicInteger();
+        CountDownLatch gate = new CountDownLatch(1);
+        CountDownLatch boundRunning = new CountDownLatch(bound);
+
+        List<Callable<Void>> tasks = new ArrayList<>();
+        for (int i = 0; i < 12; i++) {
+            tasks.add(
                     () -> {
-                        throw new IllegalStateException("oops");
-                    };
-            IOException thrown =
-                    assertThrows(
-                            IOException.class,
-                            () -> Tasks.awaitAll(pool, List.of(task), "interrupted", "failed"));
-            assertEquals("failed", thrown.getMessage());
-            assertInstanceOf(IllegalStateException.class, thrown.getCause());
-        } finally {
-            pool.shutdown();
+                        int now = inFlight.incrementAndGet();
+                        highWater.accumulateAndGet(now, Math::max);
+                        boundRunning.countDown();
+                        try {
+                            gate.await();
+                            return null;
+                        } finally {
+                            inFlight.decrementAndGet();
+                        }
+                    });
         }
+        Thread opener =
+                new Thread(
+                        () -> {
+                            try {
+                                // Open the gate only once `bound` workers are provably in flight,
+                                // so the high-water assertion is meaningful, then let all pass.
+                                boundRunning.await();
+                                gate.countDown();
+                            } catch (InterruptedException e) {
+                                Thread.currentThread().interrupt();
+                            }
+                        });
+        opener.start();
+        Tasks.awaitAll(Tasks.Workers.virtual(bound), tasks, "jbig2 encode");
+        opener.join(10_000);
+        assertEquals(bound, highWater.get(), "no more than the bound may run at once");
+    }
+
+    @Test
+    void rejectsANonPositiveWorkerLimit() {
+        assertThrows(IllegalArgumentException.class, () -> Tasks.Workers.platform(0));
+        assertThrows(IllegalArgumentException.class, () -> Tasks.Workers.virtual(-1));
+    }
+
+    /** A task that throws {@code failure} once called. */
+    private static <T> Callable<T> failing(Exception failure) {
+        return () -> {
+            throw failure;
+        };
     }
 }