diff --git a/despeckle/docs/cleaner-baseline.md b/despeckle/docs/cleaner-baseline.md index 1304db0..c244c1f 100644 --- a/despeckle/docs/cleaner-baseline.md +++ b/despeckle/docs/cleaner-baseline.md @@ -5,28 +5,28 @@ Times each Leptonica primitive the page cleaner composes on a synthetic 600-dpi A5 page (3496x4961 px, fixed seed). Re-run after any change to the cleaner or the imaging bindings and compare before merging. -- Date (UTC): 2026-06-10 06:36:38 +- Date (UTC): 2026-06-10 06:48:05 - Host: Linux amd64, 8 CPUs - Samples: median of 10 reps after 2 warmups; single-threaded. | op | median (ms) | min (ms) | calls/clean() | est. share of clean() | |---|---:|---:|---:|---:| -| read TIFF-G4 | 2.53 | 2.52 | 1 | 1.6% | -| selectBySize k=6 (page) | 16.04 | 14.90 | 1 | 9.9% | -| selectBySize 15 (page) | 15.02 | 14.84 | 1 | 9.3% | -| selectBySize k=6 (inverted) | 22.14 | 21.84 | 2 | 27.4% | -| dilate 43x43 (text mask) | 38.37 | 36.97 | 1 | 23.7% | -| open 7x7 (page) | 12.15 | 12.01 | 1 | 7.5% | -| invert | 0.25 | 0.25 | 2 | 0.3% | -| subtract | 0.40 | 0.38 | 5 | 1.2% | -| and | 0.41 | 0.40 | 1 | 0.3% | -| or | 0.39 | 0.35 | 3 | 0.7% | -| countConnComp | 11.53 | 11.46 | 2 | 14.3% | -| countPixels | 0.41 | 0.41 | 2 | 0.5% | -| write TIFF-G4 | 6.34 | 6.26 | 1 | 3.9% | -| **Σ(median × calls)** | 162.69 | | | 100.6% | -| **clean() end-to-end** | 161.78 | 159.91 | 1 | 100% | -| **clean() without component stats** | 139.46 | 137.07 | 1 | 86.2% | +| read TIFF-G4 | 2.58 | 2.54 | 1 | 2.0% | +| selectBySize k=6 (page) | 15.22 | 14.89 | 1 | 11.6% | +| selectBySize 15 (page) | 15.40 | 14.93 | 1 | 11.7% | +| selectBySize k=6 (inverted) | 22.36 | 22.04 | 2 | 34.1% | +| dilate 43x43 (text mask) | 14.08 | 13.74 | 1 | 10.7% | +| open 7x7 (page) | 4.01 | 3.85 | 1 | 3.1% | +| invert | 0.27 | 0.25 | 2 | 0.4% | +| subtract | 0.40 | 0.38 | 5 | 1.5% | +| and | 0.45 | 0.41 | 1 | 0.3% | +| or | 0.40 | 0.36 | 3 | 0.9% | +| countConnComp | 11.64 | 11.50 | 2 | 17.8% | +| countPixels | 0.42 | 0.41 | 2 | 0.6% | +| write TIFF-G4 | 6.34 | 6.27 | 1 | 4.8% | +| **Σ(median × calls)** | 130.66 | | | 99.6% | +| **clean() end-to-end** | 131.15 | 129.19 | 1 | 100% | +| **clean() without component stats** | 107.60 | 106.36 | 1 | 82.0% | The Σ row landing near 100% means the table accounts for clean()'s real cost; a large gap points at untimed work (allocation churn, codec internals). diff --git a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java index 9cf9945..1ca223d 100644 --- a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java +++ b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Leptonica.java @@ -296,6 +296,16 @@ private static MethodHandle handle(String name, FunctionDescriptor descriptor) { handle( "pixOpenBrick", FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, JAVA_INT, JAVA_INT)); + private static final MethodHandle PIX_DILATE_BRICK_DWA = + handle( + "pixDilateBrickDwa", + FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, JAVA_INT, JAVA_INT)); + private static final MethodHandle PIX_OPEN_BRICK_DWA = + handle( + "pixOpenBrickDwa", + FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, JAVA_INT, JAVA_INT)); + private static final MethodHandle MAKE_PIXEL_SUM_TAB8 = + handle("makePixelSumTab8", FunctionDescriptor.of(ADDRESS)); private static final MethodHandle PIX_AND = handle("pixAnd", FunctionDescriptor.of(ADDRESS, ADDRESS, ADDRESS, ADDRESS)); private static final MethodHandle PIX_OR = @@ -316,16 +326,38 @@ private static MethodHandle handle(String name, FunctionDescriptor descriptor) { private static final MethodHandle SET_MSG_SEVERITY = handle("setMsgSeverity", FunctionDescriptor.of(JAVA_INT, JAVA_INT)); + /** + * Largest brick side routed to a single DWA kernel call. Leptonica generates DWA sels only for + * a fixed set of linear sizes; measured against this build's 1.82, {@code pixDilateBrickDwa} + * silently diverges from the generic brick for the missing sizes (every prime above 15, e.g. + * 17, 43), while sizes up to 15 are complete. Larger dilations are composed from safe passes in + * {@code Pix}; the equality sweep in {@code PixTest} pins all of this empirically. + */ + static final int DWA_SAFE_BRICK = 15; + + /** + * The process-lifetime 8-bit popcount table {@code pixCountPixels} consumes (~1 KiB native, + * deliberately never freed): without it Leptonica mallocs, fills and frees the same table on + * every call. + */ + private static final MemorySegment PIXEL_SUM_TAB8; + static { // Suppress Leptonica's stderr diagnostics once, at class load. The returned previous // severity is discarded. try { SET_MSG_SEVERITY.invoke(L_SEVERITY_NONE); + PIXEL_SUM_TAB8 = (MemorySegment) MAKE_PIXEL_SUM_TAB8.invoke(); } catch (Throwable t) { - throw sneaky("setMsgSeverity", t); + throw sneaky("leptonica static init", t); } } + /** {@return the shared popcount table for {@code pixCountPixels}} */ + static MemorySegment pixelSumTab8() { + return PIXEL_SUM_TAB8; + } + // pix lifecycle / metadata /** Read an image file, returning the raw {@code PIX *} (0 on failure). */ @@ -613,6 +645,34 @@ static MemorySegment pixOpenBrick(MemorySegment src, int hsize, int vsize) { } } + /** + * Dilate {@code src} by a {@code hsize x vsize} brick via the word-accelerated DWA kernels into + * a fresh {@code PIX}. Only exact for sizes up to {@link #DWA_SAFE_BRICK} — see that constant; + * {@code Pix} owns the larger-size composition. + */ + static MemorySegment pixDilateBrickDwa(MemorySegment src, int hsize, int vsize) { + try { + return (MemorySegment) + PIX_DILATE_BRICK_DWA.invoke(MemorySegment.NULL, src, hsize, vsize); + } catch (Throwable t) { + throw sneaky("pixDilateBrickDwa", t); + } + } + + /** + * Open (erode then dilate) {@code src} by a {@code hsize x vsize} brick via the + * word-accelerated DWA kernels into a fresh {@code PIX}. Routed only for sizes up to {@link + * #DWA_SAFE_BRICK}; pixel-identical there to {@link #pixOpenBrick} (pinned by {@code PixTest}'s + * sweep) and several times faster. + */ + static MemorySegment pixOpenBrickDwa(MemorySegment src, int hsize, int vsize) { + try { + return (MemorySegment) PIX_OPEN_BRICK_DWA.invoke(MemorySegment.NULL, src, hsize, vsize); + } catch (Throwable t) { + throw sneaky("pixOpenBrickDwa", t); + } + } + /** {@code s1 AND s2} into a fresh {@code PIX} (the {@code pixd == NULL} path). */ static MemorySegment pixAnd(MemorySegment s1, MemorySegment s2) { try { diff --git a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java index 036887b..659c1cb 100644 --- a/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java +++ b/shared/imaging/src/main/java/io/github/p4suta/shared/imaging/Pix.java @@ -312,7 +312,8 @@ public long blackPixels() { MemorySegment h = requireHandle(); try (Arena arena = Arena.ofConfined()) { MemorySegment count = arena.allocate(JAVA_INT); - Leptonica.pixCountPixels(h, count, MemorySegment.NULL); + // The shared popcount table: without it Leptonica rebuilds (and frees) one per call. + Leptonica.pixCountPixels(h, count, Leptonica.pixelSumTab8()); return Integer.toUnsignedLong(count.get(JAVA_INT, 0)); } } @@ -352,8 +353,41 @@ public Pix subtract(Pix other) { /** * Return a new {@code Pix} grown by {@code radius} pixels in every direction (dilation by a * {@code (2*radius+1)} square). A {@code radius} of 0 is the identity. + * + *
Runs on Leptonica's word-accelerated DWA kernels for every size: bricks up to {@link + * Leptonica#DWA_SAFE_BRICK} directly, larger ones as a chain of safe-size passes — exact, + * because dilating by {@code brick(a)} then {@code brick(b)} equals dilating by {@code + * brick(a+b-1)} (Minkowski sum; within the image rectangle the L∞ paths between in-bounds + * points stay in bounds, so per-pass clipping changes nothing). Pixel-identity against the + * generic rasterop path is pinned by {@code PixTest}'s full sweep. */ public Pix dilated(int radius) { + int size = 2 * radius + 1; + if (size <= Leptonica.DWA_SAFE_BRICK) { + return wrap( + Leptonica.pixDilateBrickDwa(requireHandle(), size, size), "pixDilateBrickDwa"); + } + int covered = Leptonica.DWA_SAFE_BRICK; + Pix current = + wrap( + Leptonica.pixDilateBrickDwa(requireHandle(), covered, covered), + "pixDilateBrickDwa"); + while (covered < size) { + // size and covered stay odd, so the step is odd and within the safe sel set. + int step = Math.min(Leptonica.DWA_SAFE_BRICK, size - covered + 1); + Pix next = + wrap( + Leptonica.pixDilateBrickDwa(current.requireHandle(), step, step), + "pixDilateBrickDwa"); + current.close(); + current = next; + covered += step - 1; + } + return current; + } + + /** The generic rasterop dilation — kept as the DWA equality oracle for {@code PixTest}. */ + Pix dilatedGeneric(int radius) { int size = 2 * radius + 1; return wrap(Leptonica.pixDilateBrick(requireHandle(), size, size), "pixDilateBrick"); } @@ -361,8 +395,22 @@ public Pix dilated(int radius) { /** * Return a new {@code Pix} opened (eroded then dilated) by a {@code (2*radius+1)} square — i.e. * foreground thinner than the brick in either axis is erased, leaving only the solid parts. + * + *
Bricks up to {@link Leptonica#DWA_SAFE_BRICK} run on Leptonica's word-accelerated DWA + * kernels — pixel-identical to the generic rasterop path (pinned by {@code PixTest}'s sweep) + * and several times faster; larger bricks fall back to the generic path (an opening, unlike a + * dilation, does not compose from smaller passes). */ public Pix opened(int radius) { + int size = 2 * radius + 1; + if (size <= Leptonica.DWA_SAFE_BRICK) { + return wrap(Leptonica.pixOpenBrickDwa(requireHandle(), size, size), "pixOpenBrickDwa"); + } + return openedGeneric(radius); + } + + /** The generic rasterop opening — the large-brick fallback and the DWA equality oracle. */ + Pix openedGeneric(int radius) { int size = 2 * radius + 1; return wrap(Leptonica.pixOpenBrick(requireHandle(), size, size), "pixOpenBrick"); } diff --git a/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java b/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java index 3baa473..09cb681 100644 --- a/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java +++ b/shared/imaging/src/test/java/io/github/p4suta/shared/imaging/PixTest.java @@ -295,4 +295,83 @@ void useAfterCloseThrows(@TempDir Path dir) throws Exception { pix.close(); assertThrows(IllegalStateException.class, pix::width); } + + // DWA vs generic morphology: the empirical gate for the fast path + + /** + * The load-bearing equality sweep: {@code dilated} runs on the DWA fast path for every size + * (single safe kernel up to 15, composed safe passes beyond — Leptonica's generated DWA sels + * are incomplete above 15 and silently diverge there, so the composition is the only exact + * route), and {@code opened} up to 15. Both must be pixel-identical to the generic rasterop + * morphology — including at the image borders, where DWA's internal bordering is the classic + * divergence trap. The fixture has ink touching all four borders, interior glyphs and isolated + * dots; the radii sweep covers the production sizes (7x7 open, 43x43 dilate) and the 63x63 + * ceiling. A failure here means the fast path may not ship. + */ + @Test + void dwaMorphologyMatchesGenericBrickIncludingBorders(@TempDir Path dir) throws Exception { + Path pbm = dir.resolve("border-ink.pbm"); + boolean[][] img = TestImages.blank(200, 150); + // Ink on all four borders: full-width top/bottom lines, full-height left/right lines. + TestImages.fillRect(img, 0, 0, 199, 1); + TestImages.fillRect(img, 0, 148, 199, 149); + TestImages.fillRect(img, 0, 0, 1, 149); + TestImages.fillRect(img, 198, 0, 199, 149); + // Corner blocks, interior glyphs, isolated dots. + TestImages.fillRect(img, 0, 0, 8, 8); + TestImages.fillRect(img, 190, 140, 199, 149); + TestImages.fillRect(img, 40, 30, 80, 90); + TestImages.fillRect(img, 120, 50, 150, 60); + TestImages.fillRect(img, 100, 110, 100, 110); + TestImages.fillRect(img, 20, 130, 20, 130); + TestImages.writePbm(pbm, img); + + int[] radii = {0, 1, 3, 7, 10, 15, 21, 31}; + try (Pix page = Pix.read(pbm)) { + for (int radius : radii) { + try (Pix dwa = page.dilated(radius); + Pix generic = page.dilatedGeneric(radius)) { + assertTrue( + dwa.pixelsEqual(generic), + "dilate radius " + radius + " must be pixel-identical"); + } + try (Pix dwa = page.opened(radius); + Pix generic = page.openedGeneric(radius)) { + assertTrue( + dwa.pixelsEqual(generic), + "open radius " + radius + " must be pixel-identical"); + } + } + } + } + + /** The sweep on degenerate pages: smaller than DWA's border, all-black, and all-white. */ + @Test + void dwaMorphologyMatchesGenericOnDegeneratePages(@TempDir Path dir) throws Exception { + boolean[][] tiny = TestImages.blank(20, 20); + TestImages.fillRect(tiny, 0, 0, 19, 2); + TestImages.fillRect(tiny, 17, 0, 19, 19); + TestImages.fillRect(tiny, 9, 9, 9, 9); + boolean[][] black = TestImages.blank(50, 40); + TestImages.fillRect(black, 0, 0, 49, 39); + boolean[][] white = TestImages.blank(50, 40); + + int page = 0; + for (boolean[][] img : java.util.List.of(tiny, black, white)) { + Path pbm = dir.resolve("degenerate-" + page++ + ".pbm"); + TestImages.writePbm(pbm, img); + try (Pix pix = Pix.read(pbm)) { + for (int radius : new int[] {1, 3, 21}) { + try (Pix dwa = pix.dilated(radius); + Pix generic = pix.dilatedGeneric(radius)) { + assertTrue(dwa.pixelsEqual(generic), "dilate r=" + radius); + } + try (Pix dwa = pix.opened(radius); + Pix generic = pix.openedGeneric(radius)) { + assertTrue(dwa.pixelsEqual(generic), "open r=" + radius); + } + } + } + } + } }