From 825aea54873dd1322160a8a705834f7204de6f2f Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Fri, 26 Jun 2026 17:39:57 +0200 Subject: [PATCH 1/2] docs: ByteBuffer interop + pledged-size zero-copy decode Document the ByteBuffer gap in the zero-copy design: the segment API already bridges NIO both ways at zero copy (MemorySegment.ofBuffer in, asByteBuffer out), so no parallel ByteBuffer overload set is needed. Note the rough edges that a proposed toByteBuffer() sugar would smooth (byte order, borrowed arena lifetime). Explain why pledging the source size is a correctness gate for zero-copy decode, not a micro-optimization: a streamed frame omits the content size, so decompress(arena, frame) cannot size the output arena and throws; withPledgedSize stamps the header and unlocks one-shot zero-copy decode. Add two unit tests covering the contrast: - a plain stream frame cannot be sized for zero-copy decode - a pledged frame decodes zero-copy into an arena in one shot Co-Authored-By: Claude Opus 4.8 --- docs/zero-copy.md | 82 ++++++++++++++++++- .../io/github/dfa1/zstd/ZstdStreamTest.java | 45 ++++++++++ 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/docs/zero-copy.md b/docs/zero-copy.md index 048887a..a8de5c8 100644 --- a/docs/zero-copy.md +++ b/docs/zero-copy.md @@ -87,7 +87,44 @@ MemorySegment decoded = dctx.decompress(arena, frame); // header-sized, exact le | decompress | `decompress(dst, src)` → bytes written | `decompress(arena, frame)` → output segment | The arena form of `decompress` requires the frame to store its decompressed size -(frames this library produces do). For size-less frames, size `dst` yourself. +(one-shot `compress` always stamps it; a *streamed* frame only does so when you +pledge the size up front — see [Pledged size](#pledged-size-unlocks-zero-copy-decode)). +For size-less frames, size `dst` yourself. + +## ByteBuffer interop + +Much of the Java ecosystem speaks `ByteBuffer`, not `MemorySegment` — NIO +channels, Netty, and `FileChannel.map`'s `MappedByteBuffer`. We deliberately do +**not** add a third set of `ByteBuffer` overloads: the segment API already +bridges both directions of the FFM↔NIO boundary at zero copy, because FFM defines +the conversions. + +- **`ByteBuffer` in** — wrap a *direct* buffer as a segment with + `MemorySegment.ofBuffer(buf)` (zero copy; a heap-backed buffer copies, the same + caveat as `byte[]`). Hand the segment to `compress` / `decompress`. +- **`MemorySegment` out to `ByteBuffer`** — `segment.asByteBuffer()` returns a + buffer view over the native bytes, no copy. The decompressed arena segment is + consumable by an existing `ByteBuffer` pipeline as-is. + +```java +// an mmap'd frame is already a direct ByteBuffer (FileChannel.map) +MemorySegment frame = MemorySegment.ofBuffer(mappedByteBuffer); +MemorySegment out = dctx.decompress(arena, frame); // zero-copy decode +ByteBuffer result = out.asByteBuffer(); // zero-copy hand-off +``` + +**Gap / proposed sugar.** The one-liner above is the supported path today, but it +leaks two FFM details onto the caller: `asByteBuffer()` returns a `BIG_ENDIAN` +buffer regardless of platform, and the segment's lifetime is the arena's, not the +buffer's. A thin `toByteBuffer()` convenience on the arena-returning results would +fix both in one place — set native byte order, document the borrowed lifetime: + +```java +ByteBuffer result = dctx.decompress(arena, frame).toByteBuffer(); // proposed +``` + +This keeps the API segment-first (no parallel `ByteBuffer` surface to maintain); +it is purely an output adapter for callers already living in NIO. ## Zero-copy streaming @@ -117,3 +154,46 @@ try (ZstdCompressStream cs = new ZstdCompressStream(level)) { Both drivers take an optional `ZstdDictionary`. Decompression mirrors the loop, calling `decompress(dst, src)` until a result `isComplete()` (frame fully decoded). + +## Pledged size unlocks zero-copy decode + +Streaming compression has a hidden cost the one-shot path does not: **a streamed +frame does not record its decompressed size.** zstd writes the content-size field +in the frame header only when the encoder knows the total up front — trivially +true for `ZSTD_compress`, but a streaming encoder is fed incrementally and closes +the frame without ever being told the total. + +That field is exactly what the zero-copy decode path reads to size the output +arena. So a plain `ZstdOutputStream` frame **cannot be decoded zero-copy**: + +```java +byte[] frame = streamCompress(data); // no pledged size +Zstd.decompressedSize(segmentOf(frame)); // throws: "decompressed size not stored in frame" +dctx.decompress(arena, segmentOf(frame)); // same — it can't size the arena +``` + +The consumer is forced back onto the bounded streaming decoder (allocate, decode a +chunk, grow, repeat) or a guessed `maxSize` — the very heap-bounce the segment API +exists to avoid. + +`ZstdOutputStream.withPledgedSize(out, level, total)` closes the loop. Tell the +encoder the total before the first byte and it stamps the content size into the +header, so a downstream reader can size the output arena exactly and decode in one +shot: + +```java +try (var zout = ZstdOutputStream.withPledgedSize(sink, 19, data.length)) { + zout.write(data); // pledge must match the bytes written +} +byte[] frame = sink.toByteArray(); + +// downstream, in a memory-mapped reader: +MemorySegment src = MemorySegment.ofBuffer(mmap); +MemorySegment out = dctx.decompress(arena, src); // one allocation, zero copies +``` + +This is the case where pledging is not a micro-optimization but a correctness +gate: it is the difference between a frame that participates in the zero-copy +decode path and one that does not. Pledge whenever the producer streams but the +total is known (file length, serialized record count, `Content-Length`). The pledge +must equal the bytes actually written — a mismatch raises an error on close. diff --git a/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java b/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java index b4d218c..a2c98ff 100644 --- a/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java +++ b/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java @@ -9,9 +9,12 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; import java.nio.charset.StandardCharsets; import java.util.Random; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -200,6 +203,48 @@ void recordsContentSizeInTheFrame() throws IOException { assertThat(ZstdFrame.header(frame).contentSize()).hasValue(original.length); assertThat(Zstd.decompress(frame)).isEqualTo(original); } + + @Test + void plainStreamFrameCannotBeSizedForZeroCopyDecode() throws IOException { + // Given a streamed frame with no pledged size + byte[] original = "no pledge ".repeat(500).getBytes(StandardCharsets.UTF_8); + byte[] frame = streamCompress(original, 6); + + // When the zero-copy decoder asks the frame how big the output is + try (Arena arena = Arena.ofConfined()) { + MemorySegment src = Zstd.copyIn(arena, frame); + ThrowingCallable result = () -> Zstd.decompressedSize(src); + + // Then it cannot answer — the content size was never recorded + assertThatThrownBy(result) + .isInstanceOf(ZstdException.class) + .hasMessageContaining("not stored"); + } + } + + @Test + void pledgedFrameDecodesZeroCopyIntoArenaInOneShot() throws IOException { + // Given a streamed frame that pledged its total up front + byte[] original = "pledge enables zero-copy ".repeat(500).getBytes(StandardCharsets.UTF_8); + ByteArrayOutputStream sink = new ByteArrayOutputStream(); + try (ZstdOutputStream zout = ZstdOutputStream.withPledgedSize(sink, 6, original.length)) { + zout.write(original); + } + byte[] frame = sink.toByteArray(); + + // When a memory-mapped-style reader decodes straight into its arena + byte[] restored; + try (Arena arena = Arena.ofConfined(); + ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { + MemorySegment src = Zstd.copyIn(arena, frame); + MemorySegment out = dctx.decompress(arena, src); + + // Then the arena was sized exactly from the header and decode round-trips + assertThat(out.byteSize()).isEqualTo(original.length); + restored = out.toArray(JAVA_BYTE); + } + assertThat(restored).isEqualTo(original); + } } private static byte[] streamCompress(byte[] data, int level) throws IOException { From 73b97e0256f898f1630c31813d1fa71e6c04d009 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Fri, 26 Jun 2026 17:45:31 +0200 Subject: [PATCH 2/2] docs: real ByteBuffer conversion + true zero-copy pledged test asByteBuffer() on a native segment already returns a direct buffer aliasing the off-heap bytes; the only wart is byte order. Replace the nonexistent toByteBuffer() headline with the working one-liner (asByteBuffer().order(nativeOrder())) and demote the sugar to an optional wrapper. Align the pledged-size example to level 6. Rework pledgedFrameDecodesZeroCopyIntoArenaInOneShot to exercise both zero-copy directions: direct ByteBuffer -> MemorySegment.ofBuffer in, out.asByteBuffer() hand-off out, instead of the copyIn heap path. Co-Authored-By: Claude Opus 4.8 --- docs/zero-copy.md | 25 ++++++++++++------- .../io/github/dfa1/zstd/ZstdStreamTest.java | 15 +++++++---- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/docs/zero-copy.md b/docs/zero-copy.md index a8de5c8..d553107 100644 --- a/docs/zero-copy.md +++ b/docs/zero-copy.md @@ -113,18 +113,25 @@ MemorySegment out = dctx.decompress(arena, frame); // zero-copy decode ByteBuffer result = out.asByteBuffer(); // zero-copy hand-off ``` -**Gap / proposed sugar.** The one-liner above is the supported path today, but it -leaks two FFM details onto the caller: `asByteBuffer()` returns a `BIG_ENDIAN` -buffer regardless of platform, and the segment's lifetime is the arena's, not the -buffer's. A thin `toByteBuffer()` convenience on the arena-returning results would -fix both in one place — set native byte order, document the borrowed lifetime: +**Byte order.** `asByteBuffer()` on a *native* segment already returns a **direct** +buffer aliasing the same off-heap bytes — there is no copy and nothing to convert. +The one wart is byte order: it comes back `BIG_ENDIAN` regardless of platform, so a +caller doing multi-byte reads must restore the native order: ```java -ByteBuffer result = dctx.decompress(arena, frame).toByteBuffer(); // proposed +import java.nio.ByteOrder; + +ByteBuffer result = dctx.decompress(arena, frame) + .asByteBuffer() + .order(ByteOrder.nativeOrder()); // direct buffer, native order, zero copy ``` -This keeps the API segment-first (no parallel `ByteBuffer` surface to maintain); -it is purely an output adapter for callers already living in NIO. +(For a pure byte payload the order does not matter and even that is unneeded.) The +remaining caveat is lifetime: the buffer borrows the arena's scope, so it must not +outlive the `try`-with-resources. A thin `toByteBuffer()` convenience on the +arena-returning results could fold the `order(nativeOrder())` call in one place, but +it would be a one-line output adapter, not new capability — the conversion already +exists. We keep the API segment-first (no parallel `ByteBuffer` surface to maintain). ## Zero-copy streaming @@ -182,7 +189,7 @@ header, so a downstream reader can size the output arena exactly and decode in o shot: ```java -try (var zout = ZstdOutputStream.withPledgedSize(sink, 19, data.length)) { +try (var zout = ZstdOutputStream.withPledgedSize(sink, 6, data.length)) { zout.write(data); // pledge must match the bytes written } byte[] frame = sink.toByteArray(); diff --git a/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java b/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java index a2c98ff..f77a437 100644 --- a/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java +++ b/zstd/src/test/java/io/github/dfa1/zstd/ZstdStreamTest.java @@ -11,10 +11,10 @@ import java.io.IOException; import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Random; -import static java.lang.foreign.ValueLayout.JAVA_BYTE; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -232,16 +232,21 @@ void pledgedFrameDecodesZeroCopyIntoArenaInOneShot() throws IOException { } byte[] frame = sink.toByteArray(); - // When a memory-mapped-style reader decodes straight into its arena + // a memory-mapped reader sees the frame as a direct ByteBuffer — no heap copy in + ByteBuffer mmap = ByteBuffer.allocateDirect(frame.length).put(frame).flip(); + + // When it decodes straight into its arena and hands the result back as a ByteBuffer byte[] restored; try (Arena arena = Arena.ofConfined(); ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { - MemorySegment src = Zstd.copyIn(arena, frame); - MemorySegment out = dctx.decompress(arena, src); + MemorySegment src = MemorySegment.ofBuffer(mmap); // zero-copy input view + MemorySegment out = dctx.decompress(arena, src); // one allocation, zero copies + ByteBuffer result = out.asByteBuffer(); // zero-copy hand-off out // Then the arena was sized exactly from the header and decode round-trips assertThat(out.byteSize()).isEqualTo(original.length); - restored = out.toArray(JAVA_BYTE); + restored = new byte[result.remaining()]; + result.get(restored); } assertThat(restored).isEqualTo(original); }