diff --git a/_typos.toml b/_typos.toml
index df533d6..b9800fd 100644
--- a/_typos.toml
+++ b/_typos.toml
@@ -45,3 +45,7 @@ Pagent = "Pagent"
 Flavour = "Flavour"
 flavours = "flavours"
 initialise = "initialise"
+# The PDF spec's own key name `DecodeParms` (and PDFBox's COSName.DECODE_PARMS
+# constant mirroring it) — third-party/spec identifiers we must name verbatim.
+Parms = "Parms"
+PARMS = "PARMS"
diff --git a/pipeline/README.md b/pipeline/README.md
index b8d47ef..12d15ef 100644
--- a/pipeline/README.md
+++ b/pipeline/README.md
@@ -33,6 +33,7 @@ never stops the rest; existing outputs are skipped unless `--force`).
 | `--pdf-a` | off | emit PDF/A-2b conformance |
 | `--force` | off | overwrite an existing output (batch: regenerate, don't skip) |
 | `--progress-file <path>` | — | write machine-readable JSONL progress events (single input only) |
+| `--timings` | off | print a per-stage wall-clock breakdown to stderr when each run ends |
 | `-i, --interactive` | off | guided mode: prompt for the input, options and output |
 | `-h, --help` | — | show help and exit |
 | `-V, --version` | — | print version and exit |
diff --git a/pipeline/app/build.gradle.kts b/pipeline/app/build.gradle.kts
index 9764cfe..5c2572d 100644
--- a/pipeline/app/build.gradle.kts
+++ b/pipeline/app/build.gradle.kts
@@ -34,6 +34,10 @@ dependencies {
     implementation(libs.commons.cli)
     implementation(libs.slf4j.api)
     runtimeOnly(libs.slf4j.simple)
+
+    // The benchmark fixture generator (test sources, never shipped — mirroring register's
+    // createSamplePdf) draws synthetic scan pages with PDFBox directly.
+    testImplementation(libs.pdfbox)
 }
 
 // The one place native access is granted to the launched app; run, test and JavaExec inherit it.
@@ -85,3 +89,46 @@ selfContainedApp {
     // jbig2 (its register stage writes TIFF-G4; the spread pack embeds CCITT G4).
     bundleQpdf(this, libs.versions.qpdf.get())
 }
+
+// ---- Stage-level benchmark (see pipeline/docs/perf-baseline.md) ---------------------------------
+
+// Deterministic synthetic scan book for the benchmark: an existing output is reused, so the
+// generation cost (a minute at 200 pages × 600 dpi) is paid once. Knob: -Ppages=N (default 200).
+tasks.register<JavaExec>("createSampleScan") {
+    group = "verification"
+    description = "Generate the synthetic bitonal scan book the benchmark converts (cached)"
+    dependsOn(tasks.named("testClasses"))
+    classpath = sourceSets["test"].runtimeClasspath
+    mainClass = "io.github.p4suta.pipeline.tools.SampleScanGenerator"
+    val pages = providers.gradleProperty("pages").getOrElse("200")
+    args = listOf("build/test-data/sample-scan-${pages}p.pdf", pages, "600")
+}
+
+// Stage-level runtime + memory benchmark (the pdfbook counterpart of tate's benchRuntime): runs the
+// installDist launcher in-container with --timings, parses the per-stage breakdown, samples peak
+// RSS from /proc, and writes pipeline/docs/perf-baseline.md. Knobs: -Pruns=N (warm runs, default
+// 3), -Pjobs=1,8 (comma-separated -j sweep; default auto = the launcher's CPU-count default),
+// -Ppages=N (fixture size, default 200), -Pinputs="a.pdf b.pdf" (real books instead of the
+// fixture; resolved against the repo root).
+tasks.register<JavaExec>("benchPipeline") {
+    group = "verification"
+    description = "Benchmark pdfbook stage timings + peak memory; writes pipeline/docs/perf-baseline.md"
+    dependsOn(tasks.named("installDist"), tasks.named("createSampleScan"))
+    classpath = sourceSets["test"].runtimeClasspath
+    mainClass = "io.github.p4suta.pipeline.tools.PipelineBenchmark"
+    workingDir = rootDir
+    val runs = providers.gradleProperty("runs").getOrElse("3")
+    val jobs = providers.gradleProperty("jobs").getOrElse("auto")
+    val pages = providers.gradleProperty("pages").getOrElse("200")
+    val extraInputs =
+        providers
+            .gradleProperty("inputs")
+            .orNull
+            ?.split(Regex("\\s+"))
+            ?.filter { it.isNotBlank() }
+            ?: emptyList()
+    val launcher = "pipeline/app/build/install/pdfbook/bin/pdfbook"
+    val inputs =
+        extraInputs.ifEmpty { listOf("pipeline/app/build/test-data/sample-scan-${pages}p.pdf") }
+    args = listOf(launcher, "qpdf", "pipeline/docs/perf-baseline.md", runs, jobs) + inputs
+}
diff --git a/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java
index 99e453c..25f8125 100644
--- a/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java
+++ b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/PipelineCommand.java
@@ -153,6 +153,13 @@ private static Options buildOptions() {
                                 "Write machine-readable JSONL progress events to this file (single"
                                         + " input only); used by front ends to report progress.")
                         .get());
+        options.addOption(
+                Option.builder()
+                        .longOpt("timings")
+                        .desc(
+                                "Print a per-stage wall-clock breakdown to stderr when each run"
+                                        + " ends.")
+                        .get());
         CliDocs.options(options);
         return options;
     }
@@ -351,6 +358,7 @@ record Plan(Path input, Path output, Config config) {}
                         deskew,
                         scale,
                         pdfA,
+                        false,
                         force);
         return new Plan(input, output, config);
     }
@@ -380,14 +388,32 @@ private static String defaultOutput(Path input) {
     private static void runOne(Path input, Path output, Config config, @Nullable Path progressFile)
             throws IOException {
         if (progressFile == null) {
-            runWith(input, output, config, ProgressSink.NO_OP);
+            runWith(input, output, config, withTimings(config, ProgressSink.NO_OP));
         } else {
             try (JsonlFileProgressSink progress = new JsonlFileProgressSink(progressFile)) {
-                runWith(input, output, config, progress);
+                runWith(input, output, config, withTimings(config, progress));
             }
         }
     }
 
+    /**
+     * Wraps {@code sink} with a fresh {@link StageTimingSink} when {@code --timings} is set, so
+     * each run (every book of a batch separately) prints its own per-stage breakdown to stderr.
+     */
+    private static ProgressSink withTimings(Config config, ProgressSink sink) {
+        if (!config.timings()) {
+            return sink;
+        }
+        StageTimingSink timings = new StageTimingSink(System.err);
+        if (sink == ProgressSink.NO_OP) {
+            return timings;
+        }
+        return event -> {
+            sink.emit(event);
+            timings.emit(event);
+        };
+    }
+
     // Resolves the progress sink first so the stages and sink report page-level PageProcessed
     // events into the same sink PipelineRunner reports stage boundaries into. With no
     // --progress-file the sink is NO_OP and every emit is a no-op.
@@ -401,9 +427,11 @@ private static void runWith(Path input, Path output, Config config, ProgressSink
             stages.add(new RegisterStage(config.jobs(), config.deskew(), config.scale(), progress));
         }
         if (stages.isEmpty()) {
-            // --no-despeckle --no-register: the raw pdfimages TIFFs are not CCITT G4, which the
-            // spread sink's pass-through embedding requires; despeckle/register each re-encode G4
-            // themselves, so only the no-stage path needs this normalization.
+            // --no-despeckle --no-register: a non-CCITT source extracts as decoded TIFFs that are
+            // not the single-strip CCITT G4 the spread sink's pass-through embedding requires;
+            // despeckle/register each re-encode G4 themselves, so only the no-stage path needs
+            // this normalization (an all-CCITT source arrives already G4 — then this is a cheap
+            // lossless re-encode that keeps the path uniform).
             stages.add(new G4EncodeStage(config.jobs(), progress));
         }
         Source source = new PdfExtractSource(input, config.jobs());
@@ -447,6 +475,7 @@ private static Config parseConfig(CommandLine cmd) throws ParseException {
                 !cmd.hasOption("no-deskew"),
                 !cmd.hasOption("no-scale"),
                 cmd.hasOption("pdf-a"),
+                cmd.hasOption("timings"),
                 cmd.hasOption("force"));
     }
 
@@ -475,5 +504,6 @@ record Config(
             boolean deskew,
             boolean scale,
             boolean pdfA,
+            boolean timings,
             boolean force) {}
 }
diff --git a/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/StageTimingSink.java b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/StageTimingSink.java
new file mode 100644
index 0000000..496b0e4
--- /dev/null
+++ b/pipeline/app/src/main/java/io/github/p4suta/pipeline/cli/StageTimingSink.java
@@ -0,0 +1,93 @@
+package io.github.p4suta.pipeline.cli;
+
+import io.github.p4suta.shared.progress.ProgressEvent;
+import io.github.p4suta.shared.progress.ProgressSink;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Measures each stage's wall clock from its {@link ProgressEvent.StageStarted}/{@link
+ * ProgressEvent.StageCompleted} boundaries and prints a per-stage breakdown when the run ends — the
+ * {@code --timings} flag's implementation. One line per stage in completion order, then the
+ * run-wide total:
+ *
+ * <pre>{@code
+ * timing: extract = 4.21s (18.3%)
+ * timing: despeckle = 9.87s (42.9%)
+ * timing: total = 23.01s
+ * }</pre>
+ *
+ * <p>The {@code timing: <stage> = <seconds>s} shape is a stable contract the {@code benchPipeline}
+ * harness parses; keep it machine-readable. A stage still open when the run fails is reported with
+ * its elapsed-so-far, so a failed run still shows where the time went. Thread-safe like every
+ * {@link ProgressSink}: events are handled under one lock.
+ */
+final class StageTimingSink implements ProgressSink {
+
+    private final PrintStream out;
+    private final Object lock = new Object();
+    private final List<String> stages = new ArrayList<>();
+    private final List<Long> stageNanos = new ArrayList<>();
+    private @Nullable String openStage;
+    private long openedAtNanos;
+    private long runStartedAtNanos;
+    private boolean runStarted;
+
+    StageTimingSink(PrintStream out) {
+        this.out = out;
+    }
+
+    @Override
+    public void emit(ProgressEvent event) {
+        synchronized (lock) {
+            switch (event) {
+                case ProgressEvent.RunStarted ignored -> markRunStarted();
+                case ProgressEvent.StageStarted s -> {
+                    // Defensive: a sink wired mid-run still measures from the first boundary.
+                    markRunStarted();
+                    openStage = s.stage();
+                    openedAtNanos = System.nanoTime();
+                }
+                case ProgressEvent.StageCompleted ignored -> closeOpenStage();
+                case ProgressEvent.PageProcessed ignored -> {
+                    // Stage boundaries carry all the timing information.
+                }
+                case ProgressEvent.RunCompleted ignored -> report();
+                case ProgressEvent.RunFailed ignored -> report();
+            }
+        }
+    }
+
+    private void markRunStarted() {
+        if (!runStarted) {
+            runStartedAtNanos = System.nanoTime();
+            runStarted = true;
+        }
+    }
+
+    private void closeOpenStage() {
+        @Nullable String stage = openStage;
+        if (stage != null) {
+            stages.add(stage);
+            stageNanos.add(System.nanoTime() - openedAtNanos);
+            openStage = null;
+        }
+    }
+
+    private void report() {
+        closeOpenStage();
+        long totalNanos = runStarted ? System.nanoTime() - runStartedAtNanos : 0;
+        for (int i = 0; i < stages.size(); i++) {
+            out.printf(
+                    Locale.ROOT,
+                    "timing: %s = %.2fs (%.1f%%)%n",
+                    stages.get(i),
+                    stageNanos.get(i) / 1e9,
+                    totalNanos > 0 ? stageNanos.get(i) * 100.0 / totalNanos : 0.0);
+        }
+        out.printf(Locale.ROOT, "timing: total = %.2fs%n", totalNanos / 1e9);
+    }
+}
diff --git a/pipeline/app/src/test/java/io/github/p4suta/pipeline/cli/StageTimingSinkTest.java b/pipeline/app/src/test/java/io/github/p4suta/pipeline/cli/StageTimingSinkTest.java
new file mode 100644
index 0000000..3fbb3f7
--- /dev/null
+++ b/pipeline/app/src/test/java/io/github/p4suta/pipeline/cli/StageTimingSinkTest.java
@@ -0,0 +1,75 @@
+package io.github.p4suta.pipeline.cli;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import io.github.p4suta.shared.progress.ProgressEvent;
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Pins the {@code --timings} report: one machine-parseable {@code timing: <stage> = <seconds>s}
+ * line per completed stage (in completion order, percentages attached) plus a {@code timing: total}
+ * line, printed only when the run ends — and on failure, the still-open stage is reported with its
+ * elapsed-so-far. The line shape is the contract the {@code benchPipeline} harness parses.
+ */
+final class StageTimingSinkTest {
+
+    private final ByteArrayOutputStream buf = new ByteArrayOutputStream();
+    private final StageTimingSink sink =
+            new StageTimingSink(new PrintStream(buf, true, StandardCharsets.UTF_8));
+
+    private String output() {
+        return buf.toString(StandardCharsets.UTF_8);
+    }
+
+    @Test
+    void completedRunReportsEachStageInOrderAndATotal() {
+        sink.emit(new ProgressEvent.RunStarted(2));
+        sink.emit(new ProgressEvent.StageStarted("extract", 0, 2));
+        sink.emit(new ProgressEvent.PageProcessed("extract", 1, 2));
+        sink.emit(new ProgressEvent.StageCompleted("extract"));
+        sink.emit(new ProgressEvent.StageStarted("spread", 1, 2));
+        sink.emit(new ProgressEvent.StageCompleted("spread"));
+        sink.emit(new ProgressEvent.RunCompleted());
+
+        assertThat(output().lines())
+                .hasSize(3)
+                .satisfiesExactly(
+                        extract ->
+                                assertThat(extract)
+                                        .matches(
+                                                "timing: extract = \\d+\\.\\d{2}s"
+                                                        + " \\(\\d+\\.\\d%\\)"),
+                        spread ->
+                                assertThat(spread)
+                                        .matches(
+                                                "timing: spread = \\d+\\.\\d{2}s"
+                                                        + " \\(\\d+\\.\\d%\\)"),
+                        total -> assertThat(total).matches("timing: total = \\d+\\.\\d{2}s"));
+    }
+
+    @Test
+    void nothingIsPrintedBeforeTheRunEnds() {
+        sink.emit(new ProgressEvent.RunStarted(1));
+        sink.emit(new ProgressEvent.StageStarted("extract", 0, 1));
+        sink.emit(new ProgressEvent.StageCompleted("extract"));
+
+        assertThat(output()).isEmpty();
+    }
+
+    @Test
+    void failedRunReportsTheStillOpenStage() {
+        sink.emit(new ProgressEvent.RunStarted(2));
+        sink.emit(new ProgressEvent.StageStarted("extract", 0, 2));
+        sink.emit(new ProgressEvent.StageCompleted("extract"));
+        sink.emit(new ProgressEvent.StageStarted("register", 1, 2));
+        sink.emit(new ProgressEvent.RunFailed("INTERNAL", "boom"));
+
+        assertThat(output())
+                .contains("timing: extract = ")
+                .contains("timing: register = ")
+                .contains("timing: total = ");
+    }
+}
diff --git a/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/PipelineBenchmark.java b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/PipelineBenchmark.java
new file mode 100644
index 0000000..1b59397
--- /dev/null
+++ b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/PipelineBenchmark.java
@@ -0,0 +1,498 @@
+package io.github.p4suta.pipeline.tools;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Stage-level runtime + memory benchmark for the installed {@code pdfbook} launcher — the pdfbook
+ * counterpart of tate's {@code RuntimeBenchmark}, with per-stage attribution as the addition.
+ *
+ * <p>Drives {@code pdfbook <in> -o <tmp>/out.pdf --force --timings [-j N]} as a child process,
+ * measuring end-to-end wall around the process ({@link System#nanoTime()}), peak RSS by sampling
+ * the child's {@code /proc/<pid>/status} {@code VmHWM} (Linux-only; {@code n/a} elsewhere), and the
+ * per-stage wall by parsing the stable {@code timing: <stage> = <seconds>s} lines {@code
+ * StageTimingSink} prints. Writes a Markdown report.
+ *
+ * <p>Test-sources tool (driven by the {@code benchPipeline} Gradle task): it never ships in the
+ * production launcher, and it expects the dev container's native toolchain (pdfimages, Leptonica,
+ * qpdf) on PATH — the installDist launcher, not the jpackage image, is what it measures.
+ *
+ * <p>Usage: {@code PipelineBenchmark <launcher> <qpdf> <outDoc> <runs> <jobsCsv|auto>
+ * <input.pdf>...} — {@code jobsCsv} is a comma-separated {@code -j} sweep ({@code auto} = omit
+ * {@code -j}, i.e. the launcher's CPU-count default).
+ */
+public final class PipelineBenchmark {
+
+    private static final Pattern TIMING =
+            Pattern.compile("^timing: (\\S+) = ([0-9.]+)s", Pattern.MULTILINE);
+    private static final Pattern VM_HWM = Pattern.compile("VmHWM:\\s*([0-9]+)");
+    private static final long POLL_MILLIS = 5;
+    private static final long PROCESS_TIMEOUT_NANOS = TimeUnit.MINUTES.toNanos(30);
+    private static final long MIB = 1024L * 1024L;
+
+    private final Path launcher;
+    private final String qpdf;
+    private final Path outDoc;
+    private final int runs;
+    private final List<String> jobsSweep;
+
+    private PipelineBenchmark(
+            Path launcher, String qpdf, Path outDoc, int runs, List<String> jobsSweep) {
+        this.launcher = launcher;
+        this.qpdf = qpdf;
+        this.outDoc = outDoc;
+        this.runs = runs;
+        this.jobsSweep = jobsSweep;
+    }
+
+    public static void main(String[] args) throws IOException, InterruptedException {
+        if (args.length < 6) {
+            System.err.println(
+                    "usage: PipelineBenchmark <launcher> <qpdf> <outDoc> <runs> <jobsCsv|auto>"
+                            + " <input.pdf>...");
+            System.exit(2);
+            return;
+        }
+        var benchmark =
+                new PipelineBenchmark(
+                        Path.of(args[0]),
+                        args[1],
+                        Path.of(args[2]),
+                        Integer.parseInt(args[3]),
+                        Arrays.stream(args[4].split(",")).map(String::trim).toList());
+        List<Path> inputs = Arrays.stream(args).skip(5).map(Path::of).toList();
+        benchmark.run(inputs);
+    }
+
+    // Result records
+
+    /** One measured child run: wall seconds, peak RSS (KiB, -1 if unavailable), merged output. */
+    private record Timed(double elapsedSeconds, long maxRssKib, String output) {}
+
+    /** A finished input × jobs measurement, ready to render. */
+    private record Row(
+            String name,
+            String jobs,
+            int pages,
+            long inputBytes,
+            double wallMedian,
+            double coldWall,
+            Map<String, Double> stageMedians,
+            long rssMedianKib,
+            long outputBytes) {}
+
+    // Orchestration
+
+    private void run(List<Path> inputs) throws IOException, InterruptedException {
+        requireExecutable(launcher, "pdfbook launcher", "build it first:  just pdfbook-install");
+
+        List<Row> rows = new ArrayList<>();
+        for (Path input : inputs) {
+            if (!Files.isRegularFile(input)) {
+                System.err.println("skip (not found): " + input);
+                continue;
+            }
+            for (String jobs : jobsSweep) {
+                rows.add(measure(input, jobs));
+            }
+        }
+
+        String report = render(rows);
+        Files.createDirectories(requireParent(outDoc));
+        Files.writeString(outDoc, report, StandardCharsets.UTF_8);
+        System.out.print(report);
+        System.err.println();
+        System.err.println("→ wrote " + outDoc);
+    }
+
+    private Row measure(Path input, String jobs) throws IOException, InterruptedException {
+        int pages = pageCount(input);
+        long inputBytes = Files.size(input);
+        System.err.printf(
+                Locale.ROOT,
+                "Measuring: %s (%dp, %s MiB, jobs=%s)…%n",
+                fileName(input),
+                pages,
+                mib(inputBytes),
+                jobs);
+
+        Path work = Files.createTempDirectory("pdfbook-bench");
+        try {
+            Path out = work.resolve("out.pdf");
+            List<String> convert = new ArrayList<>();
+            convert.add(launcher.toString());
+            convert.add(input.toString());
+            convert.add("-o");
+            convert.add(out.toString());
+            convert.add("--force");
+            convert.add("--timings");
+            if (!"auto".equals(jobs)) {
+                convert.add("-j");
+                convert.add(jobs);
+            }
+
+            // Cold run (fresh page cache for the input is not guaranteed, but a fresh JVM is) —
+            // recorded separately from the warm median.
+            Timed cold = timed(convert);
+
+            double[] walls = new double[runs];
+            long[] rsss = new long[runs];
+            Map<String, List<Double>> stages = new LinkedHashMap<>();
+            for (int r = 0; r < runs; r++) {
+                Timed t = timed(convert);
+                walls[r] = t.elapsedSeconds();
+                rsss[r] = t.maxRssKib();
+                parseTimings(t.output())
+                        .forEach(
+                                (stage, seconds) ->
+                                        stages.computeIfAbsent(stage, ignored -> new ArrayList<>())
+                                                .add(seconds));
+            }
+            Map<String, Double> stageMedians = new LinkedHashMap<>();
+            stages.forEach((stage, seconds) -> stageMedians.put(stage, median(seconds)));
+
+            long outputBytes = Files.isRegularFile(out) ? Files.size(out) : -1;
+            return new Row(
+                    fileName(input),
+                    jobs,
+                    pages,
+                    inputBytes,
+                    median(walls),
+                    cold.elapsedSeconds(),
+                    stageMedians,
+                    medianLong(rsss),
+                    outputBytes);
+        } finally {
+            deleteTree(work);
+        }
+    }
+
+    /**
+     * The per-stage seconds of one run, keyed by stage label in print order ({@code total}
+     * included). Repeated labels (a batch run) sum, though the harness always converts one book.
+     */
+    private static Map<String, Double> parseTimings(String output) {
+        Map<String, Double> timings = new LinkedHashMap<>();
+        Matcher m = TIMING.matcher(output);
+        while (m.find()) {
+            timings.merge(m.group(1), Double.parseDouble(m.group(2)), Double::sum);
+        }
+        return timings;
+    }
+
+    // Subprocess measurement
+
+    /**
+     * Runs {@code command}, returning its wall time, peak RSS (sampled from {@code
+     * /proc/<pid>/status} {@code VmHWM}; -1 where unavailable), and its merged stdout+stderr (so
+     * the {@code timing:} lines can be parsed). Output is drained on a separate thread so a chatty
+     * child cannot deadlock on a full pipe.
+     */
+    private static Timed timed(List<String> command) throws IOException, InterruptedException {
+        long start = System.nanoTime();
+        Process process = new ProcessBuilder(command).redirectErrorStream(true).start();
+
+        var captured = new AtomicReference<>("");
+        Thread drainer =
+                Thread.ofVirtual()
+                        .start(
+                                () -> {
+                                    try (var in = process.getInputStream()) {
+                                        captured.set(
+                                                new String(
+                                                        in.readAllBytes(), StandardCharsets.UTF_8));
+                                    } catch (IOException ignored) {
+                                        // Process gone; whatever was read is lost — acceptable for
+                                        // a benchmark.
+                                    }
+                                });
+
+        Path status = Path.of("/proc", Long.toString(process.pid()), "status");
+        long peakRssKib = -1;
+        while (process.isAlive()) {
+            if (System.nanoTime() - start > PROCESS_TIMEOUT_NANOS) {
+                process.destroyForcibly();
+                throw new IOException("timed command did not finish: " + command);
+            }
+            peakRssKib = Math.max(peakRssKib, readVmHwmKib(status));
+            Thread.sleep(POLL_MILLIS);
+        }
+        double elapsed = (System.nanoTime() - start) / 1.0e9;
+        int exit = process.waitFor();
+        drainer.join();
+        if (exit != 0) {
+            throw new IOException(
+                    "benchmark child failed with exit " + exit + ": " + captured.get());
+        }
+        return new Timed(elapsed, peakRssKib, captured.get());
+    }
+
+    /** Peak RSS (KiB) from {@code /proc/<pid>/status}, or -1 if unreadable / non-Linux. */
+    private static long readVmHwmKib(Path status) {
+        try {
+            Matcher m = VM_HWM.matcher(Files.readString(status, StandardCharsets.UTF_8));
+            return m.find() ? Long.parseLong(m.group(1)) : -1;
+        } catch (IOException | RuntimeException e) {
+            return -1; // process already exited, or /proc not present
+        }
+    }
+
+    /** Page count via {@code qpdf --show-npages} (PATH or absolute), or -1 when unavailable. */
+    private int pageCount(Path pdf) throws InterruptedException {
+        try {
+            Process process =
+                    new ProcessBuilder(qpdf, "--show-npages", pdf.toString())
+                            .redirectErrorStream(true)
+                            .start();
+            String output;
+            try (var in = process.getInputStream()) {
+                output = new String(in.readAllBytes(), StandardCharsets.UTF_8);
+            }
+            if (!process.waitFor(1, TimeUnit.MINUTES)) {
+                process.destroyForcibly();
+                return -1;
+            }
+            // The count is the one digits-only line; qpdf may surround it with warning lines.
+            return output.lines()
+                    .map(String::strip)
+                    .filter(line -> line.matches("\\d+"))
+                    .findFirst()
+                    .map(Integer::parseInt)
+                    .orElse(-1);
+        } catch (IOException e) {
+            return -1; // qpdf not installed — page count is cosmetic here
+        }
+    }
+
+    // Numeric helpers
+
+    private static double median(double[] values) {
+        double[] sorted = values.clone();
+        Arrays.sort(sorted);
+        int n = sorted.length;
+        if (n == 0) {
+            return 0;
+        }
+        return (n % 2 == 1) ? sorted[n / 2] : (sorted[n / 2 - 1] + sorted[n / 2]) / 2.0;
+    }
+
+    private static double median(List<Double> values) {
+        return median(values.stream().mapToDouble(Double::doubleValue).toArray());
+    }
+
+    private static long medianLong(long[] values) {
+        return Math.round(median(Arrays.stream(values).asDoubleStream().toArray()));
+    }
+
+    private static String mib(long bytes) {
+        return bytes < 0 ? "n/a" : String.format(Locale.ROOT, "%.1f", bytes / (double) MIB);
+    }
+
+    // Rendering
+
+    private String render(List<Row> rows) {
+        // Stage columns: the union of stage labels across rows in first-appearance order, with
+        // "total" (the launcher's in-process conversion time) pulled out as its own column.
+        Set<String> stageNames = new LinkedHashSet<>();
+        for (Row row : rows) {
+            stageNames.addAll(row.stageMedians().keySet());
+        }
+        stageNames.remove("total");
+
+        var sb = new StringBuilder();
+        sb.append("# pdfbook runtime baseline (stage-level)\n\n")
+                .append("Generated by `PipelineBenchmark`")
+                .append(" (`./gradlew :pipeline:app:benchPipeline`, in the dev container).\n")
+                .append(
+                        "Tracks the **per-stage wall-clock breakdown, end-to-end runtime and peak"
+                                + " memory**\n")
+                .append(
+                        "of the installDist `pdfbook` launcher. Re-run after any change to the"
+                                + " pipeline\n")
+                .append(
+                        "and compare against the previous run before merging (acceptance: ≥5%"
+                                + " median\n")
+                .append(
+                        "total-wall improvement, or an explicit RSS/disk win, with output"
+                                + " validated).\n\n");
+        appendHostInfo(sb);
+        sb.append("\n## Stage breakdown (warm median of ").append(runs).append(" runs)\n\n");
+        sb.append(
+                        "`conv` is the launcher's in-process total (`timing: total`);"
+                                + " `startup+init` = E2E wall − conv\n")
+                .append(
+                        "(JVM boot + first-touch PDFBox/AWT init). `jobs=auto` is the launcher's"
+                                + " CPU-count default.\n\n");
+        sb.append("| Input | Jobs | Pages | E2E wall | conv |");
+        for (String stage : stageNames) {
+            sb.append(' ').append(stage).append(" |");
+        }
+        sb.append(" startup+init | Cold wall | Peak RSS (MiB) | Output (MiB) |\n");
+        sb.append("|---|---|---:|---:|---:|");
+        sb.append("---:|".repeat(stageNames.size()));
+        sb.append("---:|---:|---:|---:|\n");
+        for (Row row : rows) {
+            double conv = row.stageMedians().getOrDefault("total", 0.0);
+            sb.append("| ")
+                    .append(row.name())
+                    .append(" | ")
+                    .append(row.jobs())
+                    .append(" | ")
+                    .append(pages(row.pages()))
+                    .append(" | ")
+                    .append(secs(row.wallMedian()))
+                    .append(" | ")
+                    .append(conv > 0 ? secs(conv) : "n/a")
+                    .append(" |");
+            for (String stage : stageNames) {
+                Double seconds = row.stageMedians().get(stage);
+                sb.append(' ').append(seconds == null ? "n/a" : secs(seconds)).append(" |");
+            }
+            sb.append(' ')
+                    .append(conv > 0 ? secs(Math.max(0, row.wallMedian() - conv)) : "n/a")
+                    .append(" | ")
+                    .append(secs(row.coldWall()))
+                    .append(" | ")
+                    .append(rssMib(row.rssMedianKib()))
+                    .append(" | ")
+                    .append(mib(row.outputBytes()))
+                    .append(" |\n");
+        }
+        sb.append("\n## Stage shares (of conv, warm median)\n\n")
+                .append(
+                        "The shares that decide where optimization effort goes: a stage that is"
+                                + " ~5% of conv\n")
+                .append("cannot pay for a parallelization rewrite no matter how elegant.\n\n");
+        sb.append("| Input | Jobs |");
+        for (String stage : stageNames) {
+            sb.append(' ').append(stage).append(" |");
+        }
+        sb.append('\n').append("|---|---|").append("---:|".repeat(stageNames.size())).append('\n');
+        for (Row row : rows) {
+            double conv = row.stageMedians().getOrDefault("total", 0.0);
+            sb.append("| ").append(row.name()).append(" | ").append(row.jobs()).append(" |");
+            for (String stage : stageNames) {
+                Double seconds = row.stageMedians().get(stage);
+                sb.append(' ')
+                        .append(
+                                seconds == null || conv <= 0
+                                        ? "n/a"
+                                        : String.format(
+                                                Locale.ROOT, "%.1f%%", seconds * 100.0 / conv))
+                        .append(" |");
+            }
+            sb.append('\n');
+        }
+        return sb.toString();
+    }
+
+    private void appendHostInfo(StringBuilder sb) {
+        String date =
+                DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss", Locale.ROOT)
+                        .withZone(ZoneOffset.UTC)
+                        .format(Instant.now());
+        long totalRamBytes = totalPhysicalMemoryBytes();
+        sb.append("- Date (UTC): ").append(date).append('\n');
+        sb.append("- Host: ")
+                .append(System.getProperty("os.name", "?"))
+                .append(' ')
+                .append(System.getProperty("os.version", "?"))
+                .append(' ')
+                .append(System.getProperty("os.arch", "?"))
+                .append(", ")
+                .append(Runtime.getRuntime().availableProcessors())
+                .append(" CPUs, RAM ")
+                .append(totalRamBytes > 0 ? Math.round(totalRamBytes / 1.073741824e9) + "Gi" : "?")
+                .append('\n');
+        sb.append("- Launcher: `").append(launcher).append("`\n");
+        sb.append("- Samples per measurement: cold (1st run) + warm median of ")
+                .append(runs)
+                .append(".\n");
+        sb.append(
+                        "- The default input is the deterministic synthetic fixture"
+                                + " (`createSampleScan`,\n")
+                .append(
+                        "  seeded, so identical across machines). Real books are pluggable via"
+                                + " `-Pinputs=\"…\"`;\n")
+                .append("  only their page count and byte size are reported.\n");
+    }
+
+    private static long totalPhysicalMemoryBytes() {
+        if (ManagementFactory.getOperatingSystemMXBean()
+                instanceof com.sun.management.OperatingSystemMXBean os) {
+            return os.getTotalMemorySize();
+        }
+        return -1;
+    }
+
+    private static String secs(double seconds) {
+        return String.format(Locale.ROOT, "%.2fs", seconds);
+    }
+
+    private static String pages(int pages) {
+        return pages < 0 ? "?" : Integer.toString(pages);
+    }
+
+    private static String rssMib(long rssKib) {
+        return rssKib < 0 ? "n/a" : Long.toString(Math.round(rssKib / 1024.0));
+    }
+
+    // Small utilities
+
+    private static String fileName(Path path) {
+        Path name = path.getFileName();
+        return name != null ? name.toString() : path.toString();
+    }
+
+    private static void requireExecutable(Path path, String what, String hint) {
+        if (!Files.isExecutable(path)) {
+            System.err.println("error: " + what + " not found at " + path);
+            System.err.println("       " + hint);
+            System.exit(1);
+        }
+    }
+
+    private static Path requireParent(Path path) {
+        Path parent = path.getParent();
+        if (parent == null) {
+            throw new IllegalArgumentException("output path has no parent: " + path);
+        }
+        return parent;
+    }
+
+    private static void deleteTree(Path dir) throws IOException {
+        if (!Files.exists(dir)) {
+            return;
+        }
+        try (var paths = Files.walk(dir)) {
+            paths.sorted((a, b) -> b.getNameCount() - a.getNameCount())
+                    .forEach(
+                            p -> {
+                                try {
+                                    Files.deleteIfExists(p);
+                                } catch (IOException e) {
+                                    System.err.println(
+                                            "warn: could not delete " + p + ": " + e.getMessage());
+                                }
+                            });
+        }
+    }
+}
diff --git a/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/SampleScanGenerator.java b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/SampleScanGenerator.java
new file mode 100644
index 0000000..8b111a7
--- /dev/null
+++ b/pipeline/app/src/test/java/io/github/p4suta/pipeline/tools/SampleScanGenerator.java
@@ -0,0 +1,135 @@
+package io.github.p4suta.pipeline.tools;
+
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Locale;
+import java.util.Random;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+
+/**
+ * Generates the synthetic, copyright-free bitonal "scan" book the {@code benchPipeline} harness
+ * converts: A5 pages at the requested dpi carrying vertical text-like columns with per-page
+ * position jitter, a small per-page skew of up to ±0.5° (so the register stage's deskew has real
+ * work) and salt-and-pepper specks (so despeckle has real work), embedded as CCITT G4 so {@code
+ * pdfimages} extracts them exactly like a real scan. A fixed seed keeps every generation
+ * byte-identical, so benchmark runs stay comparable across machines and branches.
+ *
+ * <p>This deliberately lives in test sources (driven by the {@code createSampleScan} Gradle task),
+ * mirroring register's {@code SamplePdfGenerator}: the dev tool never ships in the production
+ * launcher. An existing output is reused, so repeated benchmark runs skip the generation cost.
+ *
+ * <p>Usage: {@code SampleScanGenerator <output.pdf> [pages] [dpi]}
+ */
+public final class SampleScanGenerator {
+
+    private SampleScanGenerator() {}
+
+    /** {@code SampleScanGenerator <output.pdf> [pages] [dpi]} — writes the synthetic scan book. */
+    public static void main(String[] args) throws IOException {
+        Path out = Path.of(args.length > 0 ? args[0] : "sample-scan.pdf");
+        int pages = args.length > 1 ? Integer.parseInt(args[1]) : 200;
+        int dpi = args.length > 2 ? Integer.parseInt(args[2]) : 600;
+        if (Files.isRegularFile(out)) {
+            System.out.println("reusing existing " + out + " (delete it to regenerate)");
+            return;
+        }
+        long start = System.nanoTime();
+        write(out, pages, dpi);
+        System.out.printf(
+                Locale.ROOT,
+                "wrote %s: %d page(s) at %d dpi, %.1f MiB in %.1fs%n",
+                out,
+                pages,
+                dpi,
+                Files.size(out) / (1024.0 * 1024.0),
+                (System.nanoTime() - start) / 1e9);
+    }
+
+    /** Writes a {@code pages}-page synthetic bitonal scan book to {@code out} (A5 geometry). */
+    public static void write(Path out, int pages, int dpi) throws IOException {
+        int width = Math.round(148f * dpi / 25.4f); // A5 portrait: 148 mm × 210 mm
+        int height = Math.round(210f * dpi / 25.4f);
+        Random random = new Random(42);
+        Path parent = out.toAbsolutePath().getParent();
+        if (parent != null) {
+            Files.createDirectories(parent);
+        }
+        try (PDDocument doc = new PDDocument()) {
+            float widthPt = width * 72f / dpi;
+            float heightPt = height * 72f / dpi;
+            for (int i = 0; i < pages; i++) {
+                PDImageXObject image =
+                        CCITTFactory.createFromImage(doc, page(width, height, random));
+                PDPage page = new PDPage(new PDRectangle(widthPt, heightPt));
+                doc.addPage(page);
+                try (PDPageContentStream content = new PDPageContentStream(doc, page)) {
+                    content.drawImage(image, 0, 0, widthPt, heightPt);
+                }
+            }
+            doc.save(out.toFile());
+        }
+    }
+
+    /** One page: slightly skewed text-like columns plus unrotated scanner-dust specks. */
+    private static BufferedImage page(int width, int height, Random random) {
+        BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);
+        Graphics2D g = img.createGraphics();
+        try {
+            g.setColor(Color.WHITE);
+            g.fillRect(0, 0, width, height);
+            g.setColor(Color.BLACK);
+            double skew = Math.toRadians(random.nextDouble() - 0.5); // ±0.5°
+            g.rotate(skew, width / 2.0, height / 2.0);
+            drawColumns(g, width, height, random);
+            g.rotate(-skew, width / 2.0, height / 2.0);
+            drawSpecks(g, width, height, random);
+        } finally {
+            g.dispose();
+        }
+        return img;
+    }
+
+    /**
+     * Vertical "text" columns right-to-left (Japanese book layout): stacked glyph-sized blocks with
+     * per-page jitter so registration has a real column position to detect and correct, and random
+     * early line breaks so the texture resembles prose rather than a solid block.
+     */
+    private static void drawColumns(Graphics2D g, int width, int height, Random random) {
+        int margin = width / 10;
+        int glyph = Math.max(4, width / 60);
+        int leading = glyph / 2;
+        int jitterX = random.nextInt(glyph + 1) - glyph / 2;
+        int top = height / 12 + random.nextInt(glyph + 1);
+        int bottom = height - height / 12;
+        for (int x = width - margin - glyph + jitterX; x >= margin; x -= glyph + leading) {
+            int y = top;
+            while (y + glyph <= bottom) {
+                // ~8% of glyph slots end the "sentence" early, leaving prose-like white runs.
+                if (random.nextInt(100) < 8) {
+                    y += glyph * (2 + random.nextInt(4));
+                    continue;
+                }
+                g.fillRect(x, y, glyph - 2, glyph - 2);
+                y += glyph;
+            }
+        }
+    }
+
+    /** Salt-and-pepper dust: ~1 speck of 1–3 px per 25k pixels, what despeckle exists to remove. */
+    private static void drawSpecks(Graphics2D g, int width, int height, Random random) {
+        int specks = width * height / 25_000;
+        for (int i = 0; i < specks; i++) {
+            int size = 1 + random.nextInt(3);
+            g.fillRect(random.nextInt(width - size), random.nextInt(height - size), size, size);
+        }
+    }
+}
diff --git a/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java b/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java
index 0810505..8ae99b8 100644
--- a/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java
+++ b/pipeline/application/src/main/java/io/github/p4suta/pipeline/application/PipelineRunner.java
@@ -70,7 +70,11 @@ public void run(
 
             progress.emit(new ProgressEvent.StageStarted(source.name(), position, total));
             Corpus corpus = source.open(stageDir(work, 0, source.name()));
-            log.info("source: {} page(s) at {} dpi", corpus.pageCount(), corpus.dpi());
+            log.info(
+                    "source: {} page(s) at {} dpi, {}",
+                    corpus.pageCount(),
+                    corpus.dpi(),
+                    intermediatesSize(corpus.dir()));
             progress.emit(new ProgressEvent.StageCompleted(source.name()));
             position++;
 
@@ -78,7 +82,12 @@ public void run(
             for (Stage stage : stages) {
                 progress.emit(new ProgressEvent.StageStarted(stage.name(), position, total));
                 corpus = stage.apply(corpus, stageDir(work, dirIndex, stage.name()));
-                log.info("stage {} ({}): {} page(s)", dirIndex, stage.name(), corpus.pageCount());
+                log.info(
+                        "stage {} ({}): {} page(s), {}",
+                        dirIndex,
+                        stage.name(),
+                        corpus.pageCount(),
+                        intermediatesSize(corpus.dir()));
                 progress.emit(new ProgressEvent.StageCompleted(stage.name()));
                 position++;
                 dirIndex++;
@@ -111,6 +120,28 @@ private static Path stageDir(Path work, int index, String name) throws IOExcepti
                 work.resolve(String.format(Locale.ROOT, "%02d-%s", index, name)));
     }
 
+    /**
+     * The stage directory's total file bytes rendered as MiB — visibility into how much
+     * intermediate I/O each stage produces (best-effort: {@code ?} when the walk fails).
+     */
+    private static String intermediatesSize(Path dir) {
+        try (Stream<Path> files = Files.walk(dir)) {
+            long bytes =
+                    files.filter(Files::isRegularFile).mapToLong(PipelineRunner::sizeQuietly).sum();
+            return String.format(Locale.ROOT, "%.1f MiB", bytes / (1024.0 * 1024.0));
+        } catch (IOException e) {
+            return "? MiB";
+        }
+    }
+
+    private static long sizeQuietly(Path file) {
+        try {
+            return Files.size(file);
+        } catch (IOException e) {
+            return 0L;
+        }
+    }
+
     private static void deleteRecursively(Path dir) {
         try (Stream<Path> walk = Files.walk(dir)) {
             walk.sorted(Comparator.reverseOrder()).forEach(PipelineRunner::deleteQuietly);
diff --git a/pipeline/docs/perf-baseline.md b/pipeline/docs/perf-baseline.md
new file mode 100644
index 0000000..9727575
--- /dev/null
+++ b/pipeline/docs/perf-baseline.md
@@ -0,0 +1,35 @@
+# pdfbook runtime baseline (stage-level)
+
+Generated by `PipelineBenchmark` (`./gradlew :pipeline:app:benchPipeline`, in the dev container).
+Tracks the **per-stage wall-clock breakdown, end-to-end runtime and peak memory**
+of the installDist `pdfbook` launcher. Re-run after any change to the pipeline
+and compare against the previous run before merging (acceptance: ≥5% median
+total-wall improvement, or an explicit RSS/disk win, with output validated).
+
+- Date (UTC): 2026-06-10 04:46:07
+- Host: Linux 6.8.0-124-generic amd64, 8 CPUs, RAM 16Gi
+- Launcher: `pipeline/app/build/install/pdfbook/bin/pdfbook`
+- Samples per measurement: cold (1st run) + warm median of 3.
+- The default input is the deterministic synthetic fixture (`createSampleScan`,
+  seeded, so identical across machines). Real books are pluggable via `-Pinputs="…"`;
+  only their page count and byte size are reported.
+
+## Stage breakdown (warm median of 3 runs)
+
+`conv` is the launcher's in-process total (`timing: total`); `startup+init` = E2E wall − conv
+(JVM boot + first-touch PDFBox/AWT init). `jobs=auto` is the launcher's CPU-count default.
+
+| Input | Jobs | Pages | E2E wall | conv | extract | despeckle | register | spread | startup+init | Cold wall | Peak RSS (MiB) | Output (MiB) |
+|---|---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
+| sample-scan-200p.pdf | 1 | 200 | 50.10s | 49.85s | 4.57s | 32.16s | 12.94s | 0.20s | 0.25s | 51.42s | 156 | 6.4 |
+| sample-scan-200p.pdf | 8 | 200 | 14.77s | 14.48s | 1.15s | 9.85s | 3.27s | 0.21s | 0.29s | 15.04s | 328 | 6.4 |
+
+## Stage shares (of conv, warm median)
+
+The shares that decide where optimization effort goes: a stage that is ~5% of conv
+cannot pay for a parallelization rewrite no matter how elegant.
+
+| Input | Jobs | extract | despeckle | register | spread |
+|---|---|---:|---:|---:|---:|
+| sample-scan-200p.pdf | 1 | 9.2% | 64.5% | 26.0% | 0.4% |
+| sample-scan-200p.pdf | 8 | 7.9% | 68.0% | 22.6% | 1.5% |
diff --git a/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java b/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java
index bcd1008..bb3ad78 100644
--- a/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java
+++ b/pipeline/infrastructure/src/main/java/io/github/p4suta/pipeline/infrastructure/G4EncodeStage.java
@@ -18,11 +18,14 @@
 
 /**
  * The G4-normalization {@link Stage}: re-encodes each extracted page as single-strip CCITT G4 TIFF
- * via Leptonica, which {@link SpreadPackSink}'s pass-through CCITT embedding requires. {@code
- * pdfimages -tiff} writes poppler's default (non-G4) TIFF compression, so the raw extract output
- * cannot be embedded directly; despeckle and register each re-encode their output as G4 themselves,
- * so the composition root inserts this stage only when neither of them runs. The corpus dpi is
- * stamped on every page, since {@code pdfimages} tags the extracted TIFFs at a default 72 dpi.
+ * via Leptonica, which {@link SpreadPackSink}'s pass-through CCITT embedding requires. The
+ * extractor's decoded mode ({@code pdfimages -tiff}, used for any source that is not all-CCITT)
+ * writes poppler's default (non-G4) TIFF compression at a default 72 dpi, so that output cannot be
+ * embedded directly; despeckle and register each re-encode their output as G4 themselves, so the
+ * composition root inserts this stage only when neither of them runs. The corpus dpi is stamped on
+ * every page. (For an all-CCITT source the extractor's remux already produces stamped single-strip
+ * G4 — this stage then re-encodes losslessly, a small constant cost that keeps the no-stage path
+ * uniform.)
  */
 public final class G4EncodeStage implements Stage {
 
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/CcittTiffs.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/CcittTiffs.java
new file mode 100644
index 0000000..1bfd7c3
--- /dev/null
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/CcittTiffs.java
@@ -0,0 +1,159 @@
+package io.github.p4suta.shared.pdf;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.jspecify.annotations.Nullable;
+
+/**
+ * Wraps the raw CCITT G4 stream {@code pdfimages -ccitt} dumps into a single-strip TIFF — the
+ * pass-through half of the extractor's remux mode: the scan's embedded G4 bytes become a readable
+ * TIFF without ever being decoded and re-encoded.
+ *
+ * <p>poppler writes a fax2tiff-style {@code .params} file beside each dump ({@code -4} G4 / {@code
+ * -1 -2} G3, {@code -A} EOL markers / {@code -P} none, {@code -X <columns>}, {@code -W} BlackIs1 /
+ * {@code -B} not, {@code -M} MSB-first). Only the plain shape TIFF's T.6 compression can represent
+ * verbatim is {@linkplain #supported supported}: G4, no EOL markers, MSB-first. Crucially, PDF's
+ * {@code EncodedByteAlign} never reaches the params file, so a wrapped stream is only trusted after
+ * the caller decodes it back successfully (see the extractor's read-back verification).
+ */
+final class CcittTiffs {
+
+    private static final short TYPE_SHORT = 3;
+    private static final short TYPE_LONG = 4;
+    private static final short TYPE_RATIONAL = 5;
+
+    private CcittTiffs() {}
+
+    /**
+     * The decode parameters poppler records beside a {@code .ccitt} dump.
+     *
+     * @param kind the coding scheme flag: {@code -4} (G4), {@code -2} (G3 2D) or {@code -1} (G3 1D)
+     * @param endOfLine whether rows are prefixed with EOL markers ({@code -A})
+     * @param columns the row width in pixels ({@code -X})
+     * @param blackIs1 whether decoded 1-bits are black ({@code -W}) or 0-bits are ({@code -B})
+     */
+    record Params(String kind, boolean endOfLine, int columns, boolean blackIs1) {}
+
+    /** Parse a {@code .params} file's text, or {@code null} when any token is unrecognized. */
+    static @Nullable Params parseParams(String text) {
+        @Nullable String kind = null;
+        @Nullable Boolean endOfLine = null;
+        @Nullable Integer columns = null;
+        @Nullable Boolean blackIs1 = null;
+        boolean msbFirst = false;
+        String[] tokens = text.trim().split("\\s+", -1);
+        for (int i = 0; i < tokens.length; i++) {
+            switch (tokens[i]) {
+                case "-4", "-2", "-1" -> kind = tokens[i];
+                case "-A" -> endOfLine = true;
+                case "-P" -> endOfLine = false;
+                case "-W" -> blackIs1 = true;
+                case "-B" -> blackIs1 = false;
+                case "-M" -> msbFirst = true;
+                case "-X" -> {
+                    i++;
+                    if (i >= tokens.length) {
+                        return null;
+                    }
+                    try {
+                        columns = Integer.parseInt(tokens[i]);
+                    } catch (NumberFormatException e) {
+                        return null;
+                    }
+                }
+                default -> {
+                    return null;
+                }
+            }
+        }
+        if (kind == null || endOfLine == null || columns == null || blackIs1 == null || !msbFirst) {
+            return null;
+        }
+        return new Params(kind, endOfLine, columns, blackIs1);
+    }
+
+    /**
+     * Whether {@code params} describes a stream TIFF T.6 represents verbatim: Group 4, no EOL
+     * markers, and a width agreeing with the listing row the dump corresponds to.
+     */
+    static boolean supported(Params params, int expectedWidth) {
+        return "-4".equals(params.kind())
+                && !params.endOfLine()
+                && params.columns() == expectedWidth;
+    }
+
+    /**
+     * Write {@code g4} as a little-endian, single-strip CCITT-G4 TIFF — header, the verbatim stream
+     * as the one strip, then the IFD.
+     *
+     * @param out the TIFF to write
+     * @param g4 the raw G4 (T.6) stream, verbatim
+     * @param width the row width in pixels
+     * @param height the row count
+     * @param blackIs1 the params' photometric hint: decoded 1-bits are black ({@code -W})
+     * @param dpi the resolution to stamp, or {@code <= 0} to omit the resolution tags
+     */
+    static void writeSingleStripG4(
+            Path out, byte[] g4, int width, int height, boolean blackIs1, int dpi)
+            throws IOException {
+        boolean withResolution = dpi > 0;
+        int entryCount = withResolution ? 14 : 11;
+        int stripOffset = 8;
+        int padding = g4.length % 2; // IFD offsets must be word-aligned
+        int ifdOffset = stripOffset + g4.length + padding;
+        int rationalOffset = ifdOffset + 2 + entryCount * 12 + 4;
+        ByteBuffer buf =
+                ByteBuffer.allocate(rationalOffset + (withResolution ? 16 : 0))
+                        .order(ByteOrder.LITTLE_ENDIAN);
+
+        buf.put((byte) 'I').put((byte) 'I').putShort((short) 42).putInt(ifdOffset);
+        buf.put(g4);
+        if (padding == 1) {
+            buf.put((byte) 0);
+        }
+
+        buf.putShort((short) entryCount); // entries below stay sorted by tag id
+        entry(buf, 256, TYPE_LONG, width); // ImageWidth
+        entry(buf, 257, TYPE_LONG, height); // ImageLength
+        entryShort(buf, 258, 1); // BitsPerSample
+        entryShort(buf, 259, 4); // Compression: CCITT T.6 (Group 4)
+        // The G4 stream encodes white/black runs; this tag tells readers which sense to
+        // materialize them in. The PDF default (-B, BlackIs1=false) is the standard fax sense —
+        // TIFF WhiteIsZero (0); -W (BlackIs1=true) is the inverted sense, BlackIsZero (1).
+        // Pinned empirically by CcittTiffsTest's pixel-identical round trip.
+        entryShort(buf, 262, blackIs1 ? 1 : 0); // PhotometricInterpretation
+        entryShort(buf, 266, 1); // FillOrder: MSB first (params -M)
+        entry(buf, 273, TYPE_LONG, stripOffset); // StripOffsets
+        entryShort(buf, 277, 1); // SamplesPerPixel
+        entry(buf, 278, TYPE_LONG, height); // RowsPerStrip: the single strip
+        entry(buf, 279, TYPE_LONG, g4.length); // StripByteCounts
+        if (withResolution) {
+            entry(buf, 282, TYPE_RATIONAL, rationalOffset); // XResolution
+            entry(buf, 283, TYPE_RATIONAL, rationalOffset + 8); // YResolution
+        }
+        entry(buf, 293, TYPE_LONG, 0); // T6Options: none
+        if (withResolution) {
+            entryShort(buf, 296, 2); // ResolutionUnit: inch
+        }
+        buf.putInt(0); // no next IFD
+
+        if (withResolution) {
+            buf.putInt(dpi).putInt(1).putInt(dpi).putInt(1);
+        }
+        Files.write(out, buf.array());
+    }
+
+    /** One IFD entry holding an inline LONG (or a RATIONAL's value offset). */
+    private static void entry(ByteBuffer buf, int tag, short type, int value) {
+        buf.putShort((short) tag).putShort(type).putInt(1).putInt(value);
+    }
+
+    /** One IFD entry holding an inline SHORT (left-justified in the 4-byte value field). */
+    private static void entryShort(ByteBuffer buf, int tag, int value) {
+        buf.putShort((short) tag).putShort(TYPE_SHORT).putInt(1);
+        buf.putShort((short) value).putShort((short) 0);
+    }
+}
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java
index 35c8b77..77ed395 100644
--- a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractor.java
@@ -1,24 +1,32 @@
 package io.github.p4suta.shared.pdf;
 
+import io.github.p4suta.shared.imaging.Pix;
 import io.github.p4suta.shared.process.ProcessRunner;
 import io.github.p4suta.shared.process.Tasks;
 import io.github.p4suta.shared.process.ToolPath;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.time.Duration;
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.List;
 import java.util.Locale;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeoutException;
+import java.util.stream.Stream;
+import org.jspecify.annotations.Nullable;
 
 /**
  * Extracts a PDF's embedded bitonal images as TIFFs by driving {@code pdfimages}. The page range is
  * split across the worker pool (one {@code pdfimages -f/-l} per chunk) with distinct zero-padded
- * {@code page-cNN-} prefixes, so a name sort yields reading order and chunks never collide. The
- * dominant scan DPI is read from {@code pdfimages -list} and passed to the clean step as an
- * explicit DPI, since {@code pdfimages} tags the extracted TIFFs at a default 72 dpi.
+ * {@code page-cNN-} prefixes, so a name sort yields reading order and chunks never collide. An
+ * all-CCITT source is remuxed — the raw embedded G4 streams pass through into single-strip TIFFs
+ * with their true ppi stamped (see {@link #extract}); any other source is extracted decoded, where
+ * {@code pdfimages} tags the TIFFs at a default 72 dpi, so the dominant scan DPI from {@code
+ * pdfimages -list} is passed downstream explicitly either way.
  *
  * <p>The textual {@code pdfinfo}/{@code pdfimages -list} reports are parsed by the pure {@link
  * PdfListingParser}; this adapter only drives the external processes via {@link
@@ -93,13 +101,33 @@ public int dominantDpi(Path pdf) throws IOException {
 
     /**
      * Extract all pages of {@code pdf} into {@code outDir} as TIFFs, parallelized over page-range
-     * chunks. {@code jobs} bounds both the chunk count and the pool slots used.
+     * chunks of about {@link #CHUNK_PAGES} pages on {@code pool} (at most {@code 4 * jobs} chunks).
+     *
+     * <p>One {@code pdfimages -list} pass picks the mode: when every embedded image is 1-bpp CCITT
+     * (the usual self-scanned book), each chunk dumps the raw G4 streams ({@code -ccitt}) and wraps
+     * them into single-strip CCITT-G4 TIFFs — a pure remux: no decode/re-encode, intermediates tens
+     * of KB per page instead of the decoded megabytes, and the image's true ppi stamped instead of
+     * {@code pdfimages}' default 72 dpi. Every wrapped page is decoded back once as verification
+     * (PDF's {@code EncodedByteAlign} never reaches the dumped params, so trust requires a decode);
+     * a chunk whose dump or wrap deviates in any way is re-extracted decoded ({@code -tiff}), which
+     * is also the whole-run mode for any other source.
      */
     public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throws IOException {
         int total = pageCount(pdf);
-        int chunks = Math.max(1, Math.min(jobs, total));
-        int per = (total + chunks - 1) / chunks;
         String pdfimages = resolve("pdfimages", pdfimagesPropertyKey);
+        List<PdfListingParser.ImageRow> rows =
+                PdfListingParser.parseImageRows(
+                        capture(List.of(pdfimages, "-list", pdf.toString()), INFO_TIMEOUT));
+        boolean rawCcitt =
+                !rows.isEmpty()
+                        && rows.stream().allMatch(r -> r.bpc() == 1 && "ccitt".equals(r.enc()));
+
+        // Chunks of ~CHUNK_PAGES rather than total/jobs: fast finishers free their pool slot early
+        // (the straggler tail shrinks from total/jobs to ~CHUNK_PAGES pages), and a streaming
+        // consumer can take pages chunk by chunk. Capped so a small book is not all process spawns.
+        int chunkCap = (int) Math.min(4L * jobs, total);
+        int chunks = Math.clamp(Math.ceilDiv(total, CHUNK_PAGES), 1, Math.max(1, chunkCap));
+        int per = Math.ceilDiv(total, chunks);
         List<Callable<Void>> tasks = new ArrayList<>();
         int chunk = 0;
         for (int first = 1; first <= total; first += per) {
@@ -108,18 +136,11 @@ public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throw
                     outDir.resolve(String.format(Locale.ROOT, "page-c%03d-", chunk)).toString();
             int from = first;
             int to = last;
+            List<PdfListingParser.ImageRow> chunkRows =
+                    rawCcitt ? rowsInRange(rows, from, to) : List.of();
             tasks.add(
                     () -> {
-                        runDiscarding(
-                                List.of(
-                                        pdfimages,
-                                        "-tiff",
-                                        "-f",
-                                        Integer.toString(from),
-                                        "-l",
-                                        Integer.toString(to),
-                                        pdf.toString(),
-                                        prefix));
+                        extractChunk(pdfimages, pdf, from, to, prefix, chunkRows);
                         return null;
                     });
             chunk++;
@@ -127,6 +148,143 @@ public void extract(Path pdf, Path outDir, int jobs, ExecutorService pool) throw
         Tasks.awaitAll(pool, tasks, "pdfimages extract interrupted", "pdfimages extract failed");
     }
 
+    /** Pages per extraction chunk; see {@link #extract}. */
+    private static final int CHUNK_PAGES = 12;
+
+    /** The listing rows for pages {@code from..to}, in listing (= dump) order. */
+    private static List<PdfListingParser.ImageRow> rowsInRange(
+            List<PdfListingParser.ImageRow> rows, int from, int to) {
+        return rows.stream().filter(r -> r.page() >= from && r.page() <= to).toList();
+    }
+
+    /**
+     * Extract one page-range chunk: raw-CCITT remux when {@code ccittRows} describes it, decoded
+     * {@code -tiff} otherwise — and the {@code -tiff} rerun as the fallback when the dump deviates
+     * from the listing in any way (count, params shape, or a wrap that does not decode back).
+     */
+    private void extractChunk(
+            String pdfimages,
+            Path pdf,
+            int from,
+            int to,
+            String prefix,
+            List<PdfListingParser.ImageRow> ccittRows)
+            throws IOException {
+        if (ccittRows.isEmpty()) {
+            runDiscarding(extractCommand(pdfimages, "-tiff", from, to, pdf, prefix));
+            return;
+        }
+        runDiscarding(extractCommand(pdfimages, "-ccitt", from, to, pdf, prefix));
+        if (!wrapChunk(prefix, ccittRows)) {
+            deleteByPrefix(prefix);
+            runDiscarding(extractCommand(pdfimages, "-tiff", from, to, pdf, prefix));
+        }
+    }
+
+    private static List<String> extractCommand(
+            String pdfimages, String format, int from, int to, Path pdf, String prefix) {
+        return List.of(
+                pdfimages,
+                format,
+                "-f",
+                Integer.toString(from),
+                "-l",
+                Integer.toString(to),
+                pdf.toString(),
+                prefix);
+    }
+
+    /**
+     * Wrap every {@code .ccitt} dump under {@code prefix} into a single-strip G4 TIFF, verifying
+     * each by decoding it back. Returns {@code false} (without cleaning up) on any deviation; the
+     * caller then discards the chunk's artifacts and falls back to a decoded extract.
+     */
+    private static boolean wrapChunk(String prefix, List<PdfListingParser.ImageRow> rows)
+            throws IOException {
+        List<Path> dumps = filesByPrefix(prefix, ".ccitt");
+        if (dumps.size() != rows.size()) {
+            return false;
+        }
+        for (int i = 0; i < dumps.size(); i++) {
+            Path ccitt = dumps.get(i);
+            PdfListingParser.ImageRow row = rows.get(i);
+            Path paramsFile = withExtension(ccitt, ".params");
+            if (!Files.isRegularFile(paramsFile)) {
+                return false;
+            }
+            CcittTiffs.@Nullable Params params =
+                    CcittTiffs.parseParams(Files.readString(paramsFile, StandardCharsets.UTF_8));
+            if (params == null || !CcittTiffs.supported(params, row.width())) {
+                return false;
+            }
+            Path out = withExtension(ccitt, ".tif");
+            CcittTiffs.writeSingleStripG4(
+                    out,
+                    Files.readAllBytes(ccitt),
+                    row.width(),
+                    row.height(),
+                    params.blackIs1(),
+                    Math.max(row.xPpi(), 0));
+            if (!decodesBack(out, row)) {
+                return false;
+            }
+            Files.delete(ccitt);
+            Files.delete(paramsFile);
+        }
+        return true;
+    }
+
+    /**
+     * Whether the wrapped TIFF decodes to the listing row's dimensions — the read-back proof that
+     * the stream really was plain T.6 (an {@code EncodedByteAlign} stream, undetectable from the
+     * params file, fails to decode or comes back the wrong size here).
+     */
+    private static boolean decodesBack(Path tif, PdfListingParser.ImageRow row) {
+        try (Pix pix = Pix.read(tif)) {
+            return pix.width() == row.width() && pix.height() == row.height();
+        } catch (IllegalStateException e) {
+            return false;
+        }
+    }
+
+    /** The files starting with {@code prefix}'s file name and ending in {@code suffix}, sorted. */
+    private static List<Path> filesByPrefix(String prefix, String suffix) throws IOException {
+        Path prefixPath = Path.of(prefix);
+        Path dir = prefixPath.getParent();
+        String name = String.valueOf(prefixPath.getFileName());
+        if (dir == null) {
+            throw new IOException("extract prefix has no parent directory: " + prefix);
+        }
+        try (Stream<Path> entries = Files.list(dir)) {
+            return entries.filter(
+                            p -> {
+                                String fileName = String.valueOf(p.getFileName());
+                                return fileName.startsWith(name) && fileName.endsWith(suffix);
+                            })
+                    .sorted(Comparator.comparing(p -> String.valueOf(p.getFileName())))
+                    .toList();
+        }
+    }
+
+    /**
+     * Delete every artifact of one chunk ({@code .ccitt}, {@code .params}, partial {@code .tif}).
+     */
+    private static void deleteByPrefix(String prefix) throws IOException {
+        for (String suffix : List.of(".ccitt", ".params", ".tif")) {
+            for (Path file : filesByPrefix(prefix, suffix)) {
+                Files.deleteIfExists(file);
+            }
+        }
+    }
+
+    /** A sibling of {@code file} with its extension replaced by {@code extension}. */
+    private static Path withExtension(Path file, String extension) {
+        String name = String.valueOf(file.getFileName());
+        int dot = name.lastIndexOf('.');
+        String base = dot < 0 ? name : name.substring(0, dot);
+        return file.resolveSibling(base + extension);
+    }
+
     /** Run an extraction command, discarding its (file-producing) output. */
     private static void runDiscarding(List<String> command) throws IOException {
         try {
diff --git a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java
index efe12aa..0a2bb1b 100644
--- a/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java
+++ b/shared/pdf/src/main/java/io/github/p4suta/shared/pdf/PdfListingParser.java
@@ -1,6 +1,8 @@
 package io.github.p4suta.shared.pdf;
 
+import java.util.ArrayList;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -36,27 +38,17 @@ public static int parsePageCount(String pdfinfoOutput) {
     }
 
     /**
-     * The most common rounded x-ppi (column 13, 0-based 12) across the {@code image} rows of a
-     * {@code pdfimages -list} report, skipping the two header rows. Ties resolve to the first value
-     * seen and a non-positive winner falls back to {@link #DEFAULT_DPI}.
+     * The most common rounded x-ppi across the {@code image} rows of a {@code pdfimages -list}
+     * report. Ties resolve to the first value seen and a non-positive winner falls back to {@link
+     * #DEFAULT_DPI}.
      *
      * @param listOutput the full text {@code pdfimages -list} printed
      * @return the dominant rounded x-ppi, or {@link #DEFAULT_DPI} when none is usable
      */
     public static int parseDominantDpi(String listOutput) {
-        String[] lines = listOutput.split("\n", -1);
         Map<Integer, Integer> counts = new LinkedHashMap<>();
-        for (int i = 2; i < lines.length; i++) {
-            String[] fields = lines[i].trim().split("\\s+", -1);
-            if (fields.length < 13 || !"image".equals(fields[2])) {
-                continue;
-            }
-            try {
-                int ppi = (int) Math.round(Double.parseDouble(fields[12]));
-                counts.merge(ppi, 1, Integer::sum);
-            } catch (NumberFormatException ignored) {
-                // Non-numeric x-ppi cell: skip this row.
-            }
+        for (ImageRow row : parseImageRows(listOutput)) {
+            counts.merge(row.xPpi(), 1, Integer::sum);
         }
         if (counts.isEmpty()) {
             return DEFAULT_DPI;
@@ -71,4 +63,50 @@ public static int parseDominantDpi(String listOutput) {
         }
         return best > 0 ? best : DEFAULT_DPI;
     }
+
+    /**
+     * One {@code image} row of a {@code pdfimages -list} report — the columns the extractor needs
+     * to pick its mode and to wrap raw CCITT dumps.
+     *
+     * @param page the 1-based page the image sits on
+     * @param width the image width in pixels
+     * @param height the image height in pixels
+     * @param bpc bits per component ({@code 1} for bitonal)
+     * @param enc the embedded encoding token ({@code ccitt}, {@code jbig2}, {@code jpeg}, {@code
+     *     image}, …)
+     * @param xPpi the rounded x-ppi the image is placed at (0 when the cell is unusable)
+     */
+    public record ImageRow(int page, int width, int height, int bpc, String enc, int xPpi) {}
+
+    /**
+     * Parse the {@code image} rows of a {@code pdfimages -list} report, in listing order (the same
+     * order {@code pdfimages} dumps the images in), skipping the two header rows and any row with
+     * unparsable numeric cells.
+     *
+     * @param listOutput the full text {@code pdfimages -list} printed
+     * @return the parsed rows, possibly empty
+     */
+    public static List<ImageRow> parseImageRows(String listOutput) {
+        String[] lines = listOutput.split("\n", -1);
+        List<ImageRow> rows = new ArrayList<>();
+        for (int i = 2; i < lines.length; i++) {
+            String[] fields = lines[i].trim().split("\\s+", -1);
+            if (fields.length < 13 || !"image".equals(fields[2])) {
+                continue;
+            }
+            try {
+                rows.add(
+                        new ImageRow(
+                                Integer.parseInt(fields[0]),
+                                Integer.parseInt(fields[3]),
+                                Integer.parseInt(fields[4]),
+                                Integer.parseInt(fields[7]),
+                                fields[8],
+                                (int) Math.round(Double.parseDouble(fields[12]))));
+            } catch (NumberFormatException ignored) {
+                // A non-numeric cell: skip this row.
+            }
+        }
+        return rows;
+    }
 }
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/CcittTiffsTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/CcittTiffsTest.java
new file mode 100644
index 0000000..63e93d6
--- /dev/null
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/CcittTiffsTest.java
@@ -0,0 +1,160 @@
+package io.github.p4suta.shared.pdf;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import io.github.p4suta.shared.imaging.Pix;
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import javax.imageio.ImageIO;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * The CCITT remux building blocks: the fax2tiff-style params parser (pure) and the single-strip G4
+ * TIFF writer, verified by wrapping a real G4 stream (PDFBox's CCITT encoder, the same encoding a
+ * scanner PDF embeds) and decoding it back through Leptonica pixel-for-pixel.
+ */
+final class CcittTiffsTest {
+
+    // ---- params parsing ----
+
+    @Test
+    void parsesTheUsualScannerShape() {
+        CcittTiffs.Params params = parsed("-4 -P -X 3496 -B -M\n");
+        assertThat(params).isEqualTo(new CcittTiffs.Params("-4", false, 3496, false));
+        assertThat(CcittTiffs.supported(params, 3496)).isTrue();
+    }
+
+    @Test
+    void eolMarkersAreUnsupported() {
+        CcittTiffs.Params params = parsed("-4 -A -X 100 -W -M");
+        assertThat(params).isEqualTo(new CcittTiffs.Params("-4", true, 100, true));
+        // EOL markers are not representable in TIFF T.6.
+        assertThat(CcittTiffs.supported(params, 100)).isFalse();
+    }
+
+    @Test
+    void group3IsUnsupported() {
+        assertThat(CcittTiffs.supported(parsed("-2 -P -X 100 -B -M"), 100)).isFalse();
+    }
+
+    @Test
+    void widthMismatchIsUnsupported() {
+        assertThat(CcittTiffs.supported(parsed("-4 -P -X 100 -B -M"), 200)).isFalse();
+    }
+
+    /** Parse params the test asserts are well-formed, made non-null for NullAway. */
+    private static CcittTiffs.Params parsed(String text) {
+        return java.util.Objects.requireNonNull(CcittTiffs.parseParams(text));
+    }
+
+    @Test
+    void unknownTokensAndMissingFlagsAreUnparsable() {
+        assertThat(CcittTiffs.parseParams("-4 -P -X 100 -B -M -Z")).isNull(); // unknown flag
+        assertThat(CcittTiffs.parseParams("-4 -P -B -M")).isNull(); // no -X
+        assertThat(CcittTiffs.parseParams("-4 -P -X nope -B -M")).isNull(); // bad width
+        assertThat(CcittTiffs.parseParams("-4 -P -X 100 -B")).isNull(); // no -M
+        assertThat(CcittTiffs.parseParams("")).isNull();
+    }
+
+    // ---- TIFF wrapping ----
+
+    /**
+     * Round trip: draw a known bitonal pattern, encode it to a raw G4 stream with PDFBox's CCITT
+     * encoder (the very encoding a scanner PDF embeds and {@code pdfimages -ccitt} dumps), wrap the
+     * stream with {@link CcittTiffs#writeSingleStripG4}, and assert it decodes back through
+     * Leptonica pixel-identical to the original, with the stamped resolution.
+     */
+    @Test
+    void wrappedStreamDecodesBackPixelIdentical(@TempDir Path tmp) throws Exception {
+        int width = 200;
+        int height = 150;
+        BufferedImage img = pattern(width, height);
+        G4Stream g4 = encodeG4(img);
+
+        Path wrapped = tmp.resolve("wrapped.tif");
+        CcittTiffs.writeSingleStripG4(wrapped, g4.bytes, width, height, g4.blackIs1, 450);
+
+        Path referencePng = tmp.resolve("reference.png");
+        ImageIO.write(img, "png", referencePng.toFile());
+        try (Pix expected = Pix.read(referencePng);
+                Pix actual = Pix.read(wrapped)) {
+            assertThat(actual.width()).isEqualTo(width);
+            assertThat(actual.height()).isEqualTo(height);
+            assertThat(actual.resolution()).isEqualTo(450);
+            assertThat(actual.blackPixels()).isPositive();
+            assertThat(actual.pixelsEqual(expected)).isTrue();
+        }
+    }
+
+    @Test
+    void omitsResolutionTagsWhenDpiUnknown(@TempDir Path tmp) throws Exception {
+        int width = 64;
+        int height = 48;
+        G4Stream g4 = encodeG4(pattern(width, height));
+
+        Path wrapped = tmp.resolve("wrapped.tif");
+        CcittTiffs.writeSingleStripG4(wrapped, g4.bytes, width, height, g4.blackIs1, 0);
+
+        try (Pix actual = Pix.read(wrapped)) {
+            assertThat(actual.width()).isEqualTo(width);
+            assertThat(actual.resolution()).isZero();
+        }
+    }
+
+    /** A raw G4 (T.6) stream and the {@code BlackIs1} convention its encoder declared. */
+    private static final class G4Stream {
+        final byte[] bytes;
+        final boolean blackIs1;
+
+        G4Stream(byte[] bytes, boolean blackIs1) {
+            this.bytes = bytes;
+            this.blackIs1 = blackIs1;
+        }
+    }
+
+    /**
+     * The raw CCITT G4 stream PDFBox's {@link CCITTFactory} encodes {@code img} to (lifted verbatim
+     * from the image XObject, exactly the bytes {@code pdfimages -ccitt} would dump), along with
+     * the {@code BlackIs1} decode parameter it declared.
+     */
+    private static G4Stream encodeG4(BufferedImage img) throws IOException {
+        try (PDDocument doc = new PDDocument()) {
+            PDImageXObject image = CCITTFactory.createFromImage(doc, img);
+            COSDictionary decodeParms =
+                    (COSDictionary) image.getCOSObject().getDictionaryObject(COSName.DECODE_PARMS);
+            boolean blackIs1 =
+                    decodeParms != null && decodeParms.getBoolean(COSName.BLACK_IS_1, false);
+            try (InputStream in = image.getCOSObject().createRawInputStream()) {
+                return new G4Stream(in.readAllBytes(), blackIs1);
+            }
+        }
+    }
+
+    /** A deterministic bitonal pattern with structure (bars + a block). */
+    private static BufferedImage pattern(int width, int height) {
+        BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY);
+        Graphics2D g = img.createGraphics();
+        try {
+            g.setColor(Color.WHITE);
+            g.fillRect(0, 0, width, height);
+            g.setColor(Color.BLACK);
+            for (int x = 4; x < width - 8; x += 12) {
+                g.fillRect(x, 8, 6, height - 16);
+            }
+            g.fillRect(width / 3, height / 3, width / 3, height / 3);
+        } finally {
+            g.dispose();
+        }
+        return img;
+    }
+}
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java
index c9032d2..9b7d1c5 100644
--- a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfImagesCliExtractorTest.java
@@ -94,6 +94,69 @@ void dominantDpiReturnsAPositiveResolution(@TempDir Path tmp) throws Exception {
         assertThat(dpi).isPositive();
     }
 
+    /** Build a multi-page PDF embedding CCITT-G4 bitonal images, one per page, at ~200 ppi. */
+    private static void writeCcittPdf(Path pdf, int pages, int imgW, int imgH) throws IOException {
+        try (PDDocument doc = new PDDocument()) {
+            for (int i = 0; i < pages; i++) {
+                BufferedImage bitonal =
+                        new BufferedImage(imgW, imgH, BufferedImage.TYPE_BYTE_BINARY);
+                java.awt.Graphics2D g = bitonal.createGraphics();
+                try {
+                    g.setColor(java.awt.Color.WHITE);
+                    g.fillRect(0, 0, imgW, imgH);
+                    g.setColor(java.awt.Color.BLACK);
+                    g.fillRect(10 + i, 10, imgW / 3, imgH / 2);
+                } finally {
+                    g.dispose();
+                }
+                float wPt = imgW * 72f / 200;
+                float hPt = imgH * 72f / 200;
+                PDPage page = new PDPage(new PDRectangle(wPt, hPt));
+                doc.addPage(page);
+                PDImageXObject image =
+                        org.apache.pdfbox.pdmodel.graphics.image.CCITTFactory.createFromImage(
+                                doc, bitonal);
+                try (PDPageContentStream content = new PDPageContentStream(doc, page)) {
+                    content.drawImage(image, 0, 0, wPt, hPt);
+                }
+            }
+            doc.save(pdf.toFile());
+        }
+    }
+
+    @Test
+    @EnabledIf("io.github.p4suta.shared.pdf.PdfImagesCliExtractorTest#toolsOnPath")
+    void remuxesAnAllCcittSourceIntoStampedG4Tiffs(@TempDir Path tmp) throws Exception {
+        Path pdf = tmp.resolve("scan.pdf");
+        writeCcittPdf(pdf, 3, 240, 180);
+        Path outDir = Files.createDirectory(tmp.resolve("out"));
+
+        ExecutorService pool = Executors.newFixedThreadPool(2);
+        try {
+            new PdfImagesCliExtractor(PDFIMAGES_KEY, PDFINFO_KEY).extract(pdf, outDir, 2, pool);
+        } finally {
+            pool.shutdownNow();
+        }
+
+        try (Stream<Path> entries = Files.list(outDir)) {
+            List<Path> files = entries.sorted().toList();
+            // The remux leaves exactly one .tif per page — no .ccitt/.params residue.
+            assertThat(files).hasSize(3);
+            assertThat(files).allSatisfy(p -> assertThat(p.toString()).endsWith(".tif"));
+            for (Path tif : files) {
+                try (io.github.p4suta.shared.imaging.Pix pix =
+                        io.github.p4suta.shared.imaging.Pix.read(tif)) {
+                    assertThat(pix.width()).isEqualTo(240);
+                    assertThat(pix.height()).isEqualTo(180);
+                    // The remux stamps the image's true ppi instead of pdfimages' default 72.
+                    assertThat(pix.resolution()).isEqualTo(200);
+                    // Black ink, not inverted: the drawn block is ~1/6 of the page.
+                    assertThat(pix.blackPixels()).isGreaterThan(0).isLessThan(240L * 180 / 2);
+                }
+            }
+        }
+    }
+
     @Test
     void missingToolFailsWithAClearMessage(@TempDir Path tmp) throws Exception {
         Path pdf = tmp.resolve("doc.pdf");
diff --git a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java
index f228032..2bba8d0 100644
--- a/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java
+++ b/shared/pdf/src/test/java/io/github/p4suta/shared/pdf/PdfListingParserTest.java
@@ -120,4 +120,27 @@ void parseDominantDpiSkipsNonImageRowsWithTooFewFields() {
                 PdfListingParser.DEFAULT_DPI,
                 PdfListingParser.parseDominantDpi("hdr\n----\n   1   0   smask\n"));
     }
+
+    @Test
+    void parseImageRowsReadsTheColumnsTheExtractorNeeds() {
+        var rows = PdfListingParser.parseImageRows(LIST);
+        assertEquals(3, rows.size());
+        assertEquals(new PdfListingParser.ImageRow(1, 2480, 3508, 1, "ccitt", 300), rows.get(0));
+        assertEquals(new PdfListingParser.ImageRow(3, 1240, 1754, 1, "ccitt", 150), rows.get(2));
+    }
+
+    @Test
+    void parseImageRowsSkipsMalformedAndNonImageRows() {
+        String mixed =
+                """
+                page   num  type   width height color comp bpc  enc  interp object ID x-ppi y-ppi size ratio
+                --------------------------------------------------------------------------------------------
+                   1     0 smask    2480  3508  gray    1   1  ccitt  no      7  0   300   300  101K 1.2%
+                   2     1 image     bad  3508  gray    1   1  ccitt  no     11  0   300   300   99K 1.1%
+                   3     2 image    2480  3508  rgb     3   8  jpeg   no     14  0   150   150   40K 1.0%
+                """;
+        var rows = PdfListingParser.parseImageRows(mixed);
+        assertEquals(1, rows.size());
+        assertEquals(new PdfListingParser.ImageRow(3, 2480, 3508, 8, "jpeg", 150), rows.get(0));
+    }
 }