diff --git a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java index 034f2e2..4bd632b 100644 --- a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java +++ b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/DespeckleService.java @@ -70,7 +70,8 @@ public record Config( * Aggregate outcome of a run. * * @param pages number of pages processed - * @param componentsRemoved total components removed across all pages + * @param componentsRemoved total components removed across all pages; {@code 0} when no report + * consumed component stats (counting is skipped for speed without a report) * @param overRemovalWarnings number of pages flagged for possible over-removal */ public record Summary(int pages, long componentsRemoved, int overRemovalWarnings) {} @@ -96,14 +97,20 @@ public Summary run(Config config, PageProgressListener progress) throws IOExcept } LOG.info("despeckling {} page(s) with {} thread(s)", files.size(), config.jobs()); + @Nullable Path reportDir = config.reportDir(); + boolean reporting = reportDir != null; Reporter report = - config.reportDir() == null - ? Reporter.noOp() - : reporterFactory.create(config.reportDir(), config.flipbook()); + reportDir != null + ? reporterFactory.create(reportDir, config.flipbook()) + : Reporter.noOp(); + // The report is the only consumer of per-page component counts, and counting is a full + // connected-component labeling twice per page — skip it when no report will be written. + ProcessOptions options = + reporting ? config.options() : config.options().withoutComponentStats(); List> tasks = new ArrayList<>(files.size()); for (Path src : files) { - tasks.add(() -> processOne(src, config, report)); + tasks.add(() -> processOne(src, config, options, report)); } // Platform workers: each page is CPU-bound Leptonica work (FFM downcalls pin virtual // threads' carriers). The fan-out fails fast and quiesces before throwing, so a failed @@ -124,9 +131,11 @@ public Summary run(Config config, PageProgressListener progress) throws IOExcept report.finish(); long totalRemoved = 0; + long blackRemoved = 0; int warnings = 0; for (PageOutcome outcome : outcomes) { totalRemoved += outcome.result().componentsRemoved(); + blackRemoved += outcome.result().blackPixelsRemoved(); if (outcome.result().isOverRemoval()) { warnings++; LOG.warn( @@ -135,17 +144,28 @@ public Summary run(Config config, PageProgressListener progress) throws IOExcept Math.round(outcome.result().removedBlackPixelRatio() * 100)); } } - LOG.info( - "done: {} page(s), {} component(s) removed, {} over-removal warning(s)", - files.size(), - totalRemoved, - warnings); + if (reporting) { + LOG.info( + "done: {} page(s), {} component(s) removed, {} over-removal warning(s)", + files.size(), + totalRemoved, + warnings); + } else { + // Without a report nothing counted components (the counting passes are skipped for + // speed), so the summary speaks in the always-measured black-pixel terms. + LOG.info( + "done: {} page(s), {} black pixel(s) removed, {} over-removal warning(s)", + files.size(), + blackRemoved, + warnings); + } return new Summary(files.size(), totalRemoved, warnings); } private record PageOutcome(Path source, ProcessResult result) {} - private PageOutcome processOne(Path src, Config config, Reporter report) throws IOException { + private PageOutcome processOne(Path src, Config config, ProcessOptions options, Reporter report) + throws IOException { Path dest = CorpusFiles.mirrorDestination( src, config.inputDir(), config.outputDir(), config.format().extension()); @@ -154,7 +174,7 @@ private PageOutcome processOne(Path src, Config config, Reporter report) throws if (parent != null) { Files.createDirectories(parent); } - ProcessResult result = pageCleaner.clean(src, dest, config.format(), config.options()); + ProcessResult result = pageCleaner.clean(src, dest, config.format(), options); Path stem = config.inputDir().relativize(src); report.addPage(stem, src, dest, result); return new PageOutcome(src, result); diff --git a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java index c42cf03..d1c5da2 100644 --- a/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java +++ b/despeckle/application/src/main/java/io/github/p4suta/despeckle/application/PdfBatchService.java @@ -148,14 +148,23 @@ public Summary run(Config config) throws IOException { if (config.reportParent() != null) { batchReporter.write(config.reportParent(), books); + LOG.info( + "done: {} ok, {} skipped, {} failed, {} page(s), {} component(s) removed", + ok, + skipped, + failed, + totalPages, + totalComponentsRemoved); + } else { + // Without reports the runs skip component counting (an expensive labeling, twice per + // page), so a component total would always read 0 — leave it out of the line. + LOG.info( + "done: {} ok, {} skipped, {} failed, {} page(s)", + ok, + skipped, + failed, + totalPages); } - LOG.info( - "done: {} ok, {} skipped, {} failed, {} page(s), {} component(s) removed", - ok, - skipped, - failed, - totalPages, - totalComponentsRemoved); return new Summary(ok, skipped, failed, totalPages, totalComponentsRemoved); } diff --git a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java index 4bb8187..ba543df 100644 --- a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java +++ b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/DespeckleServiceTest.java @@ -1,6 +1,7 @@ package io.github.p4suta.despeckle.application; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -141,4 +142,30 @@ void drivesTheReporterWhenAReportDirIsGiven(@TempDir Path tmp) throws IOExceptio assertEquals(2, factory.reporter.pages.get(), "every page is reported"); assertEquals(1, factory.reporter.finished.get(), "the report is finalized once"); } + + @Test + void skipsComponentCountingWithoutAReport(@TempDir Path tmp) throws IOException { + Path in = tmp.resolve("in"); + writeInputs(in, 1); + FakePageCleaner cleaner = new FakePageCleaner(LIGHT); + + new DespeckleService(cleaner, new RecordingReporterFactory()) + .run(config(in, tmp.resolve("out"), false, null)); + + ProcessOptions seen = java.util.Objects.requireNonNull(cleaner.lastOptions); + assertFalse(seen.collectComponentStats(), "no report -> counting passes are skipped"); + } + + @Test + void keepsComponentCountingForTheReportPath(@TempDir Path tmp) throws IOException { + Path in = tmp.resolve("in"); + writeInputs(in, 1); + FakePageCleaner cleaner = new FakePageCleaner(LIGHT); + + new DespeckleService(cleaner, new RecordingReporterFactory()) + .run(config(in, tmp.resolve("out"), false, tmp.resolve("report"))); + + ProcessOptions seen = java.util.Objects.requireNonNull(cleaner.lastOptions); + assertTrue(seen.collectComponentStats(), "the report consumes the counts"); + } } diff --git a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java index aaf9f49..a061822 100644 --- a/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java +++ b/despeckle/application/src/test/java/io/github/p4suta/despeckle/application/Fakes.java @@ -33,6 +33,7 @@ private Fakes() {} /** A {@link PageCleaner} that writes a stub output file and returns a fixed result. */ static final class FakePageCleaner implements PageCleaner { final AtomicInteger calls = new AtomicInteger(); + volatile @Nullable ProcessOptions lastOptions; private final ProcessResult result; FakePageCleaner(ProcessResult result) { @@ -48,6 +49,7 @@ public ProcessResult clean( throw new java.io.UncheckedIOException(e); } calls.incrementAndGet(); + lastOptions = options; return result; } } diff --git a/despeckle/docs/cleaner-baseline.md b/despeckle/docs/cleaner-baseline.md index 96c422a..1304db0 100644 --- a/despeckle/docs/cleaner-baseline.md +++ b/despeckle/docs/cleaner-baseline.md @@ -5,27 +5,28 @@ Times each Leptonica primitive the page cleaner composes on a synthetic 600-dpi A5 page (3496x4961 px, fixed seed). Re-run after any change to the cleaner or the imaging bindings and compare before merging. -- Date (UTC): 2026-06-10 06:25:05 +- Date (UTC): 2026-06-10 06:36:38 - Host: Linux amd64, 8 CPUs - Samples: median of 10 reps after 2 warmups; single-threaded. | op | median (ms) | min (ms) | calls/clean() | est. share of clean() | |---|---:|---:|---:|---:| -| read TIFF-G4 | 2.59 | 2.52 | 1 | 1.5% | -| selectBySize k=6 (page) | 15.22 | 14.98 | 1 | 8.7% | -| selectBySize 15 (page) | 15.19 | 14.80 | 1 | 8.7% | -| selectBySize k=6 (inverted) | 22.16 | 21.89 | 2 | 25.3% | -| dilate 43x43 (text mask) | 37.52 | 37.01 | 1 | 21.5% | -| open 7x7 (page) | 12.18 | 12.04 | 1 | 7.0% | -| invert | 0.26 | 0.25 | 2 | 0.3% | -| subtract | 0.38 | 0.36 | 5 | 1.1% | -| and | 0.40 | 0.38 | 1 | 0.2% | -| or | 0.33 | 0.32 | 3 | 0.6% | -| countConnComp | 11.82 | 11.46 | 2 | 13.5% | +| read TIFF-G4 | 2.53 | 2.52 | 1 | 1.6% | +| selectBySize k=6 (page) | 16.04 | 14.90 | 1 | 9.9% | +| selectBySize 15 (page) | 15.02 | 14.84 | 1 | 9.3% | +| selectBySize k=6 (inverted) | 22.14 | 21.84 | 2 | 27.4% | +| dilate 43x43 (text mask) | 38.37 | 36.97 | 1 | 23.7% | +| open 7x7 (page) | 12.15 | 12.01 | 1 | 7.5% | +| invert | 0.25 | 0.25 | 2 | 0.3% | +| subtract | 0.40 | 0.38 | 5 | 1.2% | +| and | 0.41 | 0.40 | 1 | 0.3% | +| or | 0.39 | 0.35 | 3 | 0.7% | +| countConnComp | 11.53 | 11.46 | 2 | 14.3% | | countPixels | 0.41 | 0.41 | 2 | 0.5% | -| write TIFF-G4 | 6.44 | 6.29 | 1 | 3.7% | -| **Σ(median × calls)** | 161.74 | | | 92.5% | -| **clean() end-to-end** | 174.91 | 173.28 | 1 | 100% | +| write TIFF-G4 | 6.34 | 6.26 | 1 | 3.9% | +| **Σ(median × calls)** | 162.69 | | | 100.6% | +| **clean() end-to-end** | 161.78 | 159.91 | 1 | 100% | +| **clean() without component stats** | 139.46 | 137.07 | 1 | 86.2% | The Σ row landing near 100% means the table accounts for clean()'s real cost; a large gap points at untimed work (allocation churn, codec internals). diff --git a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java index a565b1f..2051160 100644 --- a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java +++ b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessOptions.java @@ -27,13 +27,16 @@ * @param removeIsolatedDust whether to run the isolated-medium-dust pass * @param isolatedDustSizePx an explicit max size for an isolated speck, or empty to derive it; a * present value also implies {@code removeIsolatedDust} + * @param collectComponentStats whether to count 8-connected components before and after — two of + * the most expensive full-page scans, so off when nothing consumes the counts (no report) */ public record ProcessOptions( OptionalInt dpi, OptionalInt speckSizePx, boolean fillHoles, boolean removeIsolatedDust, - OptionalInt isolatedDustSizePx) { + OptionalInt isolatedDustSizePx, + boolean collectComponentStats) { /** Resolution assumed for the speck filter when neither a flag nor the image supplies one. */ public static final int DEFAULT_DPI = 300; @@ -50,6 +53,16 @@ public record ProcessOptions( } } + /** The five-knob shape every direct caller uses; component counting defaults to on. */ + public ProcessOptions( + OptionalInt dpi, + OptionalInt speckSizePx, + boolean fillHoles, + boolean removeIsolatedDust, + OptionalInt isolatedDustSizePx) { + this(dpi, speckSizePx, fillHoles, removeIsolatedDust, isolatedDustSizePx, true); + } + /** Options with the isolated-dust pass off — the common case. */ public static ProcessOptions of(OptionalInt dpi, OptionalInt speckSizePx, boolean fillHoles) { return new ProcessOptions(dpi, speckSizePx, fillHoles, false, OptionalInt.empty()); @@ -66,7 +79,17 @@ public ProcessOptions withDpi(int dpi) { speckSizePx, fillHoles, removeIsolatedDust, - isolatedDustSizePx); + isolatedDustSizePx, + collectComponentStats); + } + + /** + * A copy with component counting disabled — for runs where nothing consumes the counts (no + * report), saving two full connected-component labelings per page. + */ + public ProcessOptions withoutComponentStats() { + return new ProcessOptions( + dpi, speckSizePx, fillHoles, removeIsolatedDust, isolatedDustSizePx, false); } /** diff --git a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java index f6efbf9..8e6fd25 100644 --- a/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java +++ b/despeckle/domain/src/main/java/io/github/p4suta/despeckle/domain/model/ProcessResult.java @@ -1,15 +1,26 @@ package io.github.p4suta.despeckle.domain.model; +import java.util.OptionalInt; + /** * The outcome of despeckling one page. * - * @param componentsBefore 8-connected component count of the input - * @param componentsAfter 8-connected component count of the output + *

The black-pixel counts are always measured — they feed the over-removal guard. The component + * counts are measured only when something consumes them (the HTML report): counting 8-connected + * components is a full connected-component labeling of the page, one of the most expensive scans in + * the whole clean, so a run with no report skips both counting passes and carries empty components + * here. + * + * @param componentsBefore 8-connected component count of the input, when counted + * @param componentsAfter 8-connected component count of the output, when counted * @param blackPixelsBefore foreground pixel count of the input * @param blackPixelsAfter foreground pixel count of the output */ public record ProcessResult( - int componentsBefore, int componentsAfter, long blackPixelsBefore, long blackPixelsAfter) { + OptionalInt componentsBefore, + OptionalInt componentsAfter, + long blackPixelsBefore, + long blackPixelsAfter) { /** * A black-pixel removal ratio above this flags a possibly over-cleaned page. The single domain @@ -17,12 +28,38 @@ public record ProcessResult( */ public static final double OVER_REMOVAL_WARN_RATIO = 0.03; + /** A counted result — the shape the report path consumes. */ + public ProcessResult( + int componentsBefore, + int componentsAfter, + long blackPixelsBefore, + long blackPixelsAfter) { + this( + OptionalInt.of(componentsBefore), + OptionalInt.of(componentsAfter), + blackPixelsBefore, + blackPixelsAfter); + } + + /** The result of a run that skipped component counting (nothing consumes the counts). */ + public static ProcessResult withoutComponentStats( + long blackPixelsBefore, long blackPixelsAfter) { + return new ProcessResult( + OptionalInt.empty(), OptionalInt.empty(), blackPixelsBefore, blackPixelsAfter); + } + + /** Whether component counting ran (true on the report path). */ + public boolean hasComponentStats() { + return componentsBefore.isPresent(); + } + /** * Net drop in 8-connected components — dust removed minus any holes filled back in. Summed into - * the run total and plotted per page in the report. + * the run total and plotted per page in the report. {@code 0} when counting was skipped (see + * {@link #hasComponentStats()}). */ public int componentsRemoved() { - return componentsBefore - componentsAfter; + return componentsBefore.orElse(0) - componentsAfter.orElse(0); } /** @@ -36,6 +73,11 @@ public double removedBlackPixelRatio() { return (double) (blackPixelsBefore - blackPixelsAfter) / blackPixelsBefore; } + /** Black pixels removed from the page — the cost-free removal measure every run carries. */ + public long blackPixelsRemoved() { + return blackPixelsBefore - blackPixelsAfter; + } + /** * Whether {@link #removedBlackPixelRatio()} exceeds {@link #OVER_REMOVAL_WARN_RATIO}, the * threshold the runner logs on and the report flags. diff --git a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java index 700035a..6d70e42 100644 --- a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java +++ b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessOptionsTest.java @@ -163,4 +163,21 @@ void isolatedDustSizeStaysAboveTheSpeckSizeAtVeryLowResolution() { assertEquals( 2, options.isolatedDustSize(noImg()), "isolated-dust size floors at speckSize + 1"); } + + @Test + void componentStatsDefaultOnAndWitherTurnsThemOff() { + ProcessOptions defaults = ProcessOptions.defaults(); + assertTrue(defaults.collectComponentStats(), "counting is on unless turned off"); + + ProcessOptions off = defaults.withoutComponentStats(); + assertFalse(off.collectComponentStats()); + // Every other knob survives the wither. + assertEquals(defaults.dpi(), off.dpi()); + assertEquals(defaults.speckSizePx(), off.speckSizePx()); + assertEquals(defaults.fillHoles(), off.fillHoles()); + assertEquals(defaults.removeIsolatedDust(), off.removeIsolatedDust()); + assertEquals(defaults.isolatedDustSizePx(), off.isolatedDustSizePx()); + // ...including through withDpi. + assertFalse(off.withDpi(600).collectComponentStats()); + } } diff --git a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java index fac55e0..7e2508b 100644 --- a/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java +++ b/despeckle/domain/src/test/java/io/github/p4suta/despeckle/domain/model/ProcessResultTest.java @@ -60,4 +60,21 @@ void isOverRemovalIsFalseBelowTheThreshold() { void isOverRemovalIsFalseForABlankInputPage() { assertFalse(new ProcessResult(0, 0, 0, 0).isOverRemoval()); } + + @Test + void withoutComponentStatsCarriesNoCountsButFullPixelMath() { + ProcessResult result = ProcessResult.withoutComponentStats(1000, 950); + assertFalse(result.hasComponentStats()); + assertEquals(0, result.componentsRemoved(), "absent counts read as zero, documented"); + assertEquals(50L, result.blackPixelsRemoved()); + assertTrue(result.isOverRemoval(), "the over-removal guard works without counts"); + } + + @Test + void countedConstructorHasComponentStats() { + ProcessResult result = new ProcessResult(120, 95, 1000, 990); + assertTrue(result.hasComponentStats()); + assertEquals(25, result.componentsRemoved()); + assertEquals(10L, result.blackPixelsRemoved()); + } } diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java index 7f8434c..6fd7d77 100644 --- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java +++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleaner.java @@ -52,7 +52,11 @@ public ProcessResult clean( int raw = source.resolution(); Optional img = raw > 0 ? Optional.of(Resolution.of(raw)) : Optional.empty(); int k = options.speckSize(img); - int componentsBefore = source.connectedComponents(); + // Component counting is a full connected-component labeling — one of the most + // expensive scans here — so it runs only when something consumes the counts (the + // report). The black-pixel counts always run: they feed the over-removal guard. + boolean countComponents = options.collectComponentStats(); + int componentsBefore = countComponents ? source.connectedComponents() : 0; long blackBefore = source.blackPixels(); int sourceFormat = source.inputFormat(); @@ -78,14 +82,18 @@ public ProcessResult clean( current = filled; } - int componentsAfter = current.connectedComponents(); long blackAfter = current.blackPixels(); // Stamp the honored resolution so a TIFF/PNG output carries an accurate tag. Only a // known resolution is written; an unknown one is left untouched. options.resolution(img).map(Resolution::dpi).ifPresent(current::setResolution); writeIn(current, output, format, sourceFormat); - return new ProcessResult( - componentsBefore, componentsAfter, blackBefore, blackAfter); + return countComponents + ? new ProcessResult( + componentsBefore, + current.connectedComponents(), + blackBefore, + blackAfter) + : ProcessResult.withoutComponentStats(blackBefore, blackAfter); } finally { current.close(); } diff --git a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java index 2024be8..32d243f 100644 --- a/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java +++ b/despeckle/infrastructure/src/main/java/io/github/p4suta/despeckle/infrastructure/report/HtmlReporter.java @@ -88,11 +88,13 @@ public void addPage(Path relativeStem, Path inputImage, Path outputImage, Proces } writeOverlayAndAccumulate(beforeImg, afterImg, overlayWebp); + // The report path always runs with component counting on (DespeckleService enables it + // whenever a reportDir is set), so the counts are present here; orElse(0) is type-driven. stats.add( new PageStat( stem, - result.componentsBefore(), - result.componentsAfter(), + result.componentsBefore().orElse(0), + result.componentsAfter().orElse(0), result.removedBlackPixelRatio())); } diff --git a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java index 4bd4523..15d3cef 100644 --- a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java +++ b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/leptonica/LeptonicaPageCleanerTest.java @@ -1,6 +1,7 @@ package io.github.p4suta.despeckle.infrastructure.leptonica; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import io.github.p4suta.despeckle.domain.model.OutputFormat; @@ -231,4 +232,37 @@ void fillHolesSparesTheGapInsideAThinWalledGlyph(@TempDir Path dir) throws Excep ring, cleaned.blackPixels(), "the thin-walled gap is preserved, not filled"); } } + + @Test + void skippedComponentCountingStillCleansAndGuardsOverRemoval(@TempDir Path dir) + throws Exception { + // The same speck-removal scenario as removesSpecksButPreservesGlyph, with counting off: + // the output is identical, the result carries no component stats, and the always-measured + // black-pixel math still drives the over-removal guard. + Path src = dir.resolve("page.pbm"); + Path out = dir.resolve("page-out.pbm"); + boolean[][] img = TestImages.blank(40, 40); + TestImages.fillRect(img, 8, 8, 19, 25); + TestImages.dot(img, 2, 2); + TestImages.dot(img, 35, 30); + TestImages.dot(img, 30, 4); + TestImages.writePbm(src, img); + + ProcessResult result = + cleaner.clean( + src, + out, + OutputFormat.PBM, + ProcessOptions.of(OptionalInt.of(300), OptionalInt.of(3), false) + .withoutComponentStats()); + + assertFalse(result.hasComponentStats()); + assertEquals(0, result.componentsRemoved(), "absent counts read as zero"); + assertEquals(3L, result.blackPixelsRemoved(), "three 1px specks gone"); + assertFalse(result.isOverRemoval(), "3 of 219 black pixels is under the 3% threshold"); + try (Pix cleaned = Pix.read(out)) { + assertEquals(1, cleaned.connectedComponents()); + assertEquals(12L * 18L, cleaned.blackPixels(), "the whole glyph survives intact"); + } + } } diff --git a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java index d1ff679..6811533 100644 --- a/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java +++ b/despeckle/infrastructure/src/test/java/io/github/p4suta/despeckle/infrastructure/tools/CleanerBenchmark.java @@ -122,8 +122,20 @@ public static void main(String[] args) throws IOException { 1, reps, () -> cleaner.clean(g4, cleaned, OutputFormat.TIFF, options())); + // The pipeline's actual configuration (no report -> component counting skipped). + Row cleanNoStats = + timeVoid( + "clean() without component stats", + 1, + reps, + () -> + cleaner.clean( + g4, + cleaned, + OutputFormat.TIFF, + options().withoutComponentStats())); - String report = render(rows, clean, reps); + String report = render(rows, clean, cleanNoStats, reps); Path parent = outDoc.toAbsolutePath().getParent(); if (parent != null) { Files.createDirectories(parent); @@ -254,7 +266,7 @@ private static boolean[][] syntheticPage() { // ---- rendering ---- - private static String render(List rows, Row clean, int reps) { + private static String render(List rows, Row clean, Row cleanNoStats, int reps) { var sb = new StringBuilder(); sb.append("# despeckle cleaner op-level baseline\n\n") .append("Generated by `CleanerBenchmark`") @@ -312,6 +324,13 @@ private static String render(List rows, Row clean, int reps) { "| **clean() end-to-end** | %.2f | %.2f | 1 | 100%% |%n", clean.medianMs(), clean.minMs())); + sb.append( + String.format( + Locale.ROOT, + "| **clean() without component stats** | %.2f | %.2f | 1 | %.1f%% |%n", + cleanNoStats.medianMs(), + cleanNoStats.minMs(), + cleanNoStats.medianMs() * 100.0 / clean.medianMs())); sb.append( "\nThe Σ row landing near 100% means the table accounts for clean()'s real" + " cost;\n")