diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ec3e41..06a2003 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,39 @@ All notable changes to this project are documented here. Format loosely follows [Keep a Changelog](https://keepachangelog.com/); versions are released as `v*` git tags, which trigger publication to Maven Central. +## [0.5] + +### Added +- `ZstdCompressCtx.reset(ZstdResetDirective)` / `ZstdDecompressCtx.reset(...)` — + recycle a context's native state between frames without freeing and recreating + it. `SESSION_ONLY` keeps the level, parameters, and dictionary; `PARAMETERS` / + `SESSION_AND_PARAMETERS` restore the defaults. Binds `ZSTD_CCtx_reset` / + `ZSTD_DCtx_reset`. +- `ZstdCompressCtx.loadDictionary(...)` / `ZstdDecompressCtx.loadDictionary(...)` + (a `ZstdDictionary` or a native `MemorySegment`) and `refDictionary(...)` (a + pre-digested `ZstdCompressDict` / `ZstdDecompressDict`, attached by reference, + no copy). A sticky dictionary on the context lets compression combine a + dictionary with the advanced parameters (checksum, window log, long-distance + matching) — impossible through the per-call `compress(src, dict)` overloads, + which route the legacy dictionary path. A parameter `reset(...)` clears it. + Binds `ZSTD_CCtx_loadDictionary` / `ZSTD_DCtx_loadDictionary` (now on contexts, + not just streams), `ZSTD_CCtx_refCDict`, `ZSTD_DCtx_refDDict`. + +### Changed +- `NativeLibrary.classifier()` now throws a clear `UnsatisfiedLinkError` naming + the unsupported CPU arch instead of silently mapping it to x86_64 (which + deferred failure to a cryptic `dlopen` error). Added an explicit `amd64` + branch so Linux JVMs (which report `os.arch=amd64`) still resolve x86_64. + ([ea1ac84](https://github.com/dfa1/zstd-java/commit/ea1ac84)) + +### Fixed +- Native JARs are much smaller. The ELF shared library is now stripped at link + time (`-s`), dropping debug info (`libzstd.so` 4.0M -> ~650K), and the + multi-MB `.pdb` debug database and `.lib` import library that lld emits next + to the Windows `.dll` are no longer bundled (neither is needed at runtime). + Net: linux-x86_64 native jar 1.2M -> 285K, windows-x86_64 1.2M -> 372K. + ([ea1ac84](https://github.com/dfa1/zstd-java/commit/ea1ac84)) + ## [0.4] ### Added diff --git a/docs/how-to.md b/docs/how-to.md index 79e64f2..c8f1826 100644 --- a/docs/how-to.md +++ b/docs/how-to.md @@ -18,6 +18,64 @@ try (ZstdCompressCtx cctx = new ZstdCompressCtx().level(19); Pick the level explicitly with `Zstd.maxCompressionLevel()` / `minCompressionLevel()` when you need the extreme ends. +## Reset a context to recycle it + +A context is already reusable across whole `compress` / `decompress` calls. Reset +goes further: it recycles the *native state* of one context — for pooled contexts, +or to abort a half-written frame and start clean — without freeing and recreating +it. Pick what to clear with `ZstdResetDirective`: + +```java +try (ZstdCompressCtx cctx = new ZstdCompressCtx().level(19)) { + byte[] a = cctx.compress(first); + + // Cheap: drop any unflushed frame state, keep the level and parameters. + cctx.reset(ZstdResetDirective.SESSION_ONLY); + byte[] b = cctx.compress(second); + + // Full wipe: parameters back to default, dictionary cleared, level reset to + // Zstd.defaultCompressionLevel(). Only valid between frames, not mid-frame. + cctx.reset(ZstdResetDirective.SESSION_AND_PARAMETERS); +} +``` + +`ZstdDecompressCtx.reset(...)` works the same way. Reuse alone amortises +allocation; reset lets a long-lived or pooled context return to a known state +without churning native memory. + +## Compress with a dictionary *and* advanced parameters + +The per-call `compress(src, dict)` overloads take the legacy dictionary path, +which ignores the advanced parameters (checksum, window log, long-distance +matching) set on the context. To combine the two, make the dictionary *sticky* +with `loadDictionary` — then the normal `compress` path honours both: + +```java +try (ZstdCompressCtx cctx = new ZstdCompressCtx().level(19).checksum(true)) { + cctx.loadDictionary(dict); // ZstdDictionary, or a native MemorySegment + byte[] frame = cctx.compress(record); // dictionary + checksum, together +} +``` + +For a dictionary reused across a pool of contexts, digest it once and attach it +by reference — no per-call digesting, no copy. It pairs with `reset` for a +pooled, recycled context: + +```java +try (ZstdCompressDict cdict = new ZstdCompressDict(dict, 19)) { + // one cctx per pooled worker, all sharing the one digested dictionary + try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { + cctx.refDictionary(cdict); // borrowed; cdict must outlive cctx + byte[] a = cctx.compress(first); + cctx.reset(ZstdResetDirective.SESSION_ONLY); // recycle, keep the dictionary + byte[] b = cctx.compress(second); + } +} +``` + +A loaded or referenced dictionary stays until replaced, cleared with `null`, or +dropped by a parameter `reset`. `ZstdDecompressCtx` mirrors all of this. + ## Compress many small payloads with a dictionary For many small, similar payloads (log lines, JSON records, protobufs), a diff --git a/docs/supported.md b/docs/supported.md index f131936..f548f15 100644 --- a/docs/supported.md +++ b/docs/supported.md @@ -33,7 +33,7 @@ rather than the deprecated `ZSTD_getDecompressedSize`. | Dictionary training (ZDICT) | 8 / 12 | trainFromBuffer, cover/fastCover optimizers, finalizeDictionary, getDictHeaderSize | | Streaming — compress | 3 / 22 | `ZstdOutputStream` (compressStream2 + buffer sizes) | | Streaming — decompress | 3 / 15 | `ZstdInputStream` (decompressStream + buffer sizes) | -| Advanced parameters | 8 / 38 | all `ZSTD_cParameter` + `ZSTD_dParameter` via `ZstdCompressParameter`/`ZstdDecompressParameter`; `compress2`, `C/DCtx_setParameter`, `loadDictionary`, `c/dParam_getBounds`; MT inert on single-thread build | +| Advanced parameters | 12 / 38 | all `ZSTD_cParameter` + `ZSTD_dParameter` via `ZstdCompressParameter`/`ZstdDecompressParameter`; `compress2`, `C/DCtx_setParameter`, `C/DCtx_reset`, `C/DCtx_loadDictionary`, `CCtx_refCDict`/`DCtx_refDDict`, `c/dParam_getBounds`; MT inert on single-thread build | | Frame inspection | 10 / 13 | `ZstdFrame` + getFrameProgression; `_advanced` not bound | | Memory sizing | 8 / 14 | sizeof_C/DCtx, sizeof_C/DDict, estimate C/DCtx + C/DDict size | | Low-level block | 0 / 12 | expert block/continue API not bound | @@ -63,10 +63,12 @@ rather than the deprecated `ZSTD_getDecompressedSize`. | `ZSTD_compress2`, `ZSTD_CCtx_setParameter` | `ZstdCompressCtx.parameter` / `checksum` / `longDistanceMatching` / `windowLog` (all of `ZstdCompressParameter`) | | `ZSTD_DCtx_setParameter` | `ZstdDecompressCtx.parameter` / `windowLogMax` (`ZstdDecompressParameter`) | | `ZSTD_CCtx_setPledgedSrcSize` | `ZstdOutputStream.withPledgedSize` | +| `ZSTD_CCtx_reset`, `ZSTD_DCtx_reset` | `ZstdCompressCtx.reset` / `ZstdDecompressCtx.reset` (`ZstdResetDirective`) | | `ZSTD_getDictID_fromCDict`, `ZSTD_getDictID_fromDDict` | `ZstdCompressDict.id()` / `ZstdDecompressDict.id()` | | `ZSTD_getErrorString` | `ZstdErrorCode.description()` | | `ZSTD_cParam_getBounds`, `ZSTD_dParam_getBounds` | `ZstdCompressParameter.bounds()` / `ZstdDecompressParameter.bounds()` (`ZstdBounds`) | -| `ZSTD_CCtx_loadDictionary`, `ZSTD_DCtx_loadDictionary` | `ZstdOutputStream` / `ZstdInputStream` dictionary constructors | +| `ZSTD_CCtx_loadDictionary`, `ZSTD_DCtx_loadDictionary` | `ZstdCompressCtx.loadDictionary` / `ZstdDecompressCtx.loadDictionary`; `ZstdOutputStream` / `ZstdInputStream` dictionary constructors | +| `ZSTD_CCtx_refCDict`, `ZSTD_DCtx_refDDict` | `ZstdCompressCtx.refDictionary` / `ZstdDecompressCtx.refDictionary` | | `ZSTD_isFrame`, `ZSTD_findFrameCompressedSize`, `ZSTD_decompressBound`, `ZSTD_getDictID_fromFrame`, `ZSTD_getFrameHeader`, `ZSTD_isSkippableFrame`, `ZSTD_writeSkippableFrame`, `ZSTD_readSkippableFrame` | `ZstdFrame` (+ `ZstdFrameHeader`, `ZstdFrameType`, `ZstdSkippableContent`) | | `ZSTD_getErrorCode` | `ZstdException.code()` (+ `ZstdErrorCode`) | | `ZSTD_getFrameProgression` | `ZstdCompressStream.progress()` (`ZstdFrameProgression`) | @@ -90,7 +92,7 @@ zstd-jni's JNI sources (v1.5.7-11, `src/main/native/*.c`). The latter is symbol-exact, not functional equivalence: zstd-jni may expose an operation through a different symbol than this library — e.g. it routes one-shot compression through `ZSTD_compress2`, so `ZSTD_compress` reads `—` for it even though `Zstd.compress` -works. zstd-jni references 53 of these symbols; this library binds 55. They +works. zstd-jni references 53 of these symbols; this library binds 59. They overlap on the modern context/streaming API and diverge mainly on zstd-jni's sequence-producer hooks vs this library's frame-inspection and typed-error surface. @@ -231,7 +233,7 @@ sequence-producer hooks vs this library's frame-inspection and typed-error surfa | `ZSTD_resetDStream` | — ᵈ | — | | `ZSTD_sizeof_DStream` | — | — | -### Advanced parameters (8/38) +### Advanced parameters (12/38) | Symbol | Bound | zstd-jni | |---|:---:|:---:| @@ -245,11 +247,11 @@ sequence-producer hooks vs this library's frame-inspection and typed-error surfa | `ZSTD_CCtx_loadDictionary` | ✅ | ✅ | | `ZSTD_CCtx_loadDictionary_advanced` | — | — | | `ZSTD_CCtx_loadDictionary_byReference` | — | — | -| `ZSTD_CCtx_refCDict` | — | ✅ | +| `ZSTD_CCtx_refCDict` | ✅ | ✅ | | `ZSTD_CCtx_refPrefix` | — | — | | `ZSTD_CCtx_refPrefix_advanced` | — | — | | `ZSTD_CCtx_refThreadPool` | — | — | -| `ZSTD_CCtx_reset` | — | ✅ | +| `ZSTD_CCtx_reset` | ✅ | ✅ | | `ZSTD_CCtx_setCParams` | — | — | | `ZSTD_CCtx_setFParams` | — | — | | `ZSTD_CCtx_setParameter` | ✅ | ✅ | @@ -260,10 +262,10 @@ sequence-producer hooks vs this library's frame-inspection and typed-error surfa | `ZSTD_DCtx_loadDictionary` | ✅ | ✅ | | `ZSTD_DCtx_loadDictionary_advanced` | — | — | | `ZSTD_DCtx_loadDictionary_byReference` | — | — | -| `ZSTD_DCtx_refDDict` | — | ✅ | +| `ZSTD_DCtx_refDDict` | ✅ | ✅ | | `ZSTD_DCtx_refPrefix` | — | — | | `ZSTD_DCtx_refPrefix_advanced` | — | — | -| `ZSTD_DCtx_reset` | — | ✅ | +| `ZSTD_DCtx_reset` | ✅ | ✅ | | `ZSTD_DCtx_setFormat` | — ᵈ | — | | `ZSTD_DCtx_setMaxWindowSize` | — | — | | `ZSTD_DCtx_setParameter` | ✅ | ✅ | diff --git a/integration-tests/src/test/java/io/github/dfa1/zstd/it/ZstdJniInteropTest.java b/integration-tests/src/test/java/io/github/dfa1/zstd/it/ZstdJniInteropTest.java index a094fbe..9148a59 100644 --- a/integration-tests/src/test/java/io/github/dfa1/zstd/it/ZstdJniInteropTest.java +++ b/integration-tests/src/test/java/io/github/dfa1/zstd/it/ZstdJniInteropTest.java @@ -4,6 +4,7 @@ import com.github.luben.zstd.ZstdDictDecompress; import io.github.dfa1.zstd.Zstd; import io.github.dfa1.zstd.ZstdCompressCtx; +import io.github.dfa1.zstd.ZstdCompressDict; import io.github.dfa1.zstd.ZstdDecompressCtx; import io.github.dfa1.zstd.ZstdDictionary; import io.github.dfa1.zstd.ZstdInputStream; @@ -124,6 +125,39 @@ void jniDictCompressJavaDictDecompress() { assertThat(restored).isEqualTo(record); } + @Test + void javaLoadedDictWithChecksumJniDictDecompress() { + // A sticky loaded dictionary combined with an advanced parameter + // (checksum) — the COMPRESS2 path — must still produce a frame zstd-jni + // decodes against the same dictionary. + ZstdDictionary dict = trainDict(); + byte[] record = record(33); + + byte[] frame; + try (ZstdCompressCtx ctx = new ZstdCompressCtx().checksum(true)) { + ctx.loadDictionary(dict); + frame = ctx.compress(record); + } + ZstdDictDecompress jniDict = new ZstdDictDecompress(dict.toByteArray()); + assertThat(com.github.luben.zstd.Zstd.decompress(frame, jniDict, record.length)).isEqualTo(record); + } + + @Test + void javaReferencedDigestedDictJniDictDecompress() { + // A frame from a context referencing a digested CDict must decode in zstd-jni. + ZstdDictionary dict = trainDict(); + byte[] record = record(44); + + byte[] frame; + try (ZstdCompressDict cdict = new ZstdCompressDict(dict, Zstd.defaultCompressionLevel()); + ZstdCompressCtx ctx = new ZstdCompressCtx()) { + ctx.refDictionary(cdict); + frame = ctx.compress(record); + } + ZstdDictDecompress jniDict = new ZstdDictDecompress(dict.toByteArray()); + assertThat(com.github.luben.zstd.Zstd.decompress(frame, jniDict, record.length)).isEqualTo(record); + } + private ZstdDictionary trainDict() { List samples = new ArrayList<>(); for (int i = 0; i < 3000; i++) { diff --git a/zstd/src/main/java/io/github/dfa1/zstd/Bindings.java b/zstd/src/main/java/io/github/dfa1/zstd/Bindings.java index 7ab7d66..268d033 100644 --- a/zstd/src/main/java/io/github/dfa1/zstd/Bindings.java +++ b/zstd/src/main/java/io/github/dfa1/zstd/Bindings.java @@ -138,6 +138,11 @@ final class Bindings { NativeLibrary.lookup("ZSTD_CCtx_setParameter", FunctionDescriptor.of(JAVA_LONG, ADDRESS, JAVA_INT, JAVA_INT)); + // size_t ZSTD_CCtx_reset(ZSTD_CCtx*, ZSTD_ResetDirective) + static final MethodHandle CCTX_RESET = + NativeLibrary.lookup("ZSTD_CCtx_reset", + FunctionDescriptor.of(JAVA_LONG, ADDRESS, JAVA_INT)); + // size_t ZSTD_compress2(ZSTD_CCtx*, void* dst, size_t dstCap, const void* src, size_t srcSize) // Uses the advanced parameters set on the context (unlike ZSTD_compressCCtx). static final MethodHandle COMPRESS2 = @@ -149,6 +154,11 @@ final class Bindings { NativeLibrary.lookup("ZSTD_DCtx_setParameter", FunctionDescriptor.of(JAVA_LONG, ADDRESS, JAVA_INT, JAVA_INT)); + // size_t ZSTD_DCtx_reset(ZSTD_DCtx*, ZSTD_ResetDirective) + static final MethodHandle DCTX_RESET = + NativeLibrary.lookup("ZSTD_DCtx_reset", + FunctionDescriptor.of(JAVA_LONG, ADDRESS, JAVA_INT)); + // size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx*, unsigned long long pledgedSrcSize) static final MethodHandle CCTX_SET_PLEDGED_SRC_SIZE = NativeLibrary.lookup("ZSTD_CCtx_setPledgedSrcSize", @@ -238,6 +248,10 @@ final class Bindings { static final MethodHandle COMPRESS_USING_CDICT = NativeLibrary.lookup("ZSTD_compress_usingCDict", FunctionDescriptor.of(JAVA_LONG, ADDRESS, ADDRESS, JAVA_LONG, ADDRESS, JAVA_LONG, ADDRESS)); + // size_t ZSTD_CCtx_refCDict(ZSTD_CCtx*, const ZSTD_CDict*) + static final MethodHandle CCTX_REF_CDICT = + NativeLibrary.lookup("ZSTD_CCtx_refCDict", + FunctionDescriptor.of(JAVA_LONG, ADDRESS, ADDRESS)); // ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) static final MethodHandle CREATE_DDICT = @@ -250,6 +264,10 @@ final class Bindings { static final MethodHandle DECOMPRESS_USING_DDICT = NativeLibrary.lookup("ZSTD_decompress_usingDDict", FunctionDescriptor.of(JAVA_LONG, ADDRESS, ADDRESS, JAVA_LONG, ADDRESS, JAVA_LONG, ADDRESS)); + // size_t ZSTD_DCtx_refDDict(ZSTD_DCtx*, const ZSTD_DDict*) + static final MethodHandle DCTX_REF_DDICT = + NativeLibrary.lookup("ZSTD_DCtx_refDDict", + FunctionDescriptor.of(JAVA_LONG, ADDRESS, ADDRESS)); // --- dictionary training (ZDICT, from dictBuilder) --- diff --git a/zstd/src/main/java/io/github/dfa1/zstd/NativeCall.java b/zstd/src/main/java/io/github/dfa1/zstd/NativeCall.java index 88c9d06..e486156 100644 --- a/zstd/src/main/java/io/github/dfa1/zstd/NativeCall.java +++ b/zstd/src/main/java/io/github/dfa1/zstd/NativeCall.java @@ -57,6 +57,13 @@ private static String errorName(long code) { } } + /// Whether `seg` denotes "no segment": either a Java `null` reference or the + /// [MemorySegment#NULL] zero-address sentinel. Both map to a null pointer in C, + /// which the dictionary entry points read as "clear". + static boolean isNull(MemorySegment seg) { + return seg == null || MemorySegment.NULL.equals(seg); + } + /// Guards a zero-copy entry point: the segment handed to zstd must be backed /// by native (off-heap) memory, since its address is dereferenced in C. Fails /// fast with a clear message instead of the FFM linker's cryptic error. diff --git a/zstd/src/main/java/io/github/dfa1/zstd/ZstdCompressCtx.java b/zstd/src/main/java/io/github/dfa1/zstd/ZstdCompressCtx.java index 50e40fb..cb617c1 100644 --- a/zstd/src/main/java/io/github/dfa1/zstd/ZstdCompressCtx.java +++ b/zstd/src/main/java/io/github/dfa1/zstd/ZstdCompressCtx.java @@ -90,6 +90,100 @@ public ZstdCompressCtx windowLog(int windowLog) { return parameter(ZstdCompressParameter.WINDOW_LOG, windowLog); } + /// Resets this context so it can be reused for the next frame without the + /// cost of freeing and recreating its native state. + /// + /// - [ZstdResetDirective#SESSION_ONLY] aborts the current frame and drops + /// unflushed data, keeping the level, parameters, and any dictionary. + /// - [ZstdResetDirective#PARAMETERS] and + /// [ZstdResetDirective#SESSION_AND_PARAMETERS] also restore every + /// parameter to its default and clear the dictionary; the level returns to + /// [Zstd#defaultCompressionLevel()]. A parameter reset is valid only + /// between frames — one-shot [#compress(byte[])] always finishes its frame, + /// so this constraint only bites advanced multi-frame reuse. + /// + /// @param directive what to clear + /// @return `this`, for chaining + /// @throws ZstdException if the reset fails natively + public ZstdCompressCtx reset(ZstdResetDirective directive) { + Objects.requireNonNull(directive, "directive"); + NativeCall.checkReturnValue(() -> (long) Bindings.CCTX_RESET.invokeExact(ptr(), directive.value())); + if (directive != ZstdResetDirective.SESSION_ONLY) { + this.level = Zstd.defaultCompressionLevel(); + } + return this; + } + + /// Loads `dict` as the sticky dictionary for this context, so subsequent + /// [#compress(byte[])] / [#compress(MemorySegment, MemorySegment)] calls + /// compress against it **while still honouring the advanced parameters** + /// (checksum, window log, long-distance matching) set on this context — the + /// combination the per-call `compress(src, dict)` overloads cannot give you, + /// since they route through the legacy dictionary path. + /// + /// The dictionary is copied internally and digested at the next compression + /// using this context's level and parameters, so `dict` may be discarded + /// afterwards. + /// It stays loaded until replaced, cleared with [#loadDictionary(ZstdDictionary)] + /// passing `null`, or dropped by a parameter [#reset(ZstdResetDirective)]. For + /// a dictionary reused across many contexts, digest it once and attach it with + /// [#refDictionary(ZstdCompressDict)] instead. + /// + /// @param dict the dictionary to load, or `null` to clear the loaded dictionary + /// @return `this`, for chaining + /// @throws ZstdException if the dictionary cannot be loaded + public ZstdCompressCtx loadDictionary(ZstdDictionary dict) { + if (dict == null) { + return loadDictionary(MemorySegment.NULL, 0L); + } + try (Arena arena = Arena.ofConfined()) { + byte[] raw = dict.raw(); + return loadDictionary(Zstd.copyIn(arena, raw), raw.length); + } + } + + /// Loads dictionary content straight from a native [MemorySegment], without a + /// heap copy — the zero-copy path when your dictionary is already off-heap + /// (e.g. an mmap slice). Otherwise identical to + /// [#loadDictionary(ZstdDictionary)]. + /// + /// @param dict native dictionary content (its bytes are copied into the + /// context), or `null` / [MemorySegment#NULL] to clear the loaded dictionary + /// @return `this`, for chaining + /// @throws ZstdException if the dictionary cannot be loaded + public ZstdCompressCtx loadDictionary(MemorySegment dict) { + if (NativeCall.isNull(dict)) { + return loadDictionary(MemorySegment.NULL, 0L); + } + NativeCall.requireNative(dict, "dict"); + return loadDictionary(dict, dict.byteSize()); + } + + private ZstdCompressCtx loadDictionary(MemorySegment dict, long size) { + NativeCall.checkReturnValue(() -> (long) Bindings.CCTX_LOAD_DICTIONARY.invokeExact(ptr(), dict, size)); + return this; + } + + /// Attaches a pre-digested `dict` to this context by reference — no per-call + /// digesting and no copy. Subsequent [#compress(byte[])] / + /// [#compress(MemorySegment, MemorySegment)] calls compress against it while + /// honouring this context's advanced parameters; the compression level comes + /// from the [ZstdCompressDict]. This is the hot path for a pooled context + /// recycled with [#reset(ZstdResetDirective)] between frames. + /// + /// The reference is borrowed: `dict` must stay open for as long as this + /// context uses it. The reference is dropped by a parameter + /// [#reset(ZstdResetDirective)] or by passing `null`. + /// + /// @param dict the digested dictionary to reference, or `null` to clear it + /// @return `this`, for chaining + /// @throws ZstdException if the dictionary cannot be referenced + public ZstdCompressCtx refDictionary(ZstdCompressDict dict) { + MemorySegment cdict = dict == null ? MemorySegment.NULL : dict.ptr(); + NativeCall.checkReturnValue(() -> (long) Bindings.CCTX_REF_CDICT.invokeExact(ptr(), cdict)); + return this; + } + /// Compresses `src` into a new zstd frame using this context and its /// advanced parameters. /// diff --git a/zstd/src/main/java/io/github/dfa1/zstd/ZstdDecompressCtx.java b/zstd/src/main/java/io/github/dfa1/zstd/ZstdDecompressCtx.java index f864695..2fbd36e 100644 --- a/zstd/src/main/java/io/github/dfa1/zstd/ZstdDecompressCtx.java +++ b/zstd/src/main/java/io/github/dfa1/zstd/ZstdDecompressCtx.java @@ -49,6 +49,91 @@ public ZstdDecompressCtx windowLogMax(int windowLogMax) { return parameter(ZstdDecompressParameter.WINDOW_LOG_MAX, windowLogMax); } + /// Resets this context so it can be reused for the next frame without the + /// cost of freeing and recreating its native state. + /// + /// - [ZstdResetDirective#SESSION_ONLY] aborts the current frame and drops + /// buffered state, keeping all parameters and any dictionary. + /// - [ZstdResetDirective#PARAMETERS] and + /// [ZstdResetDirective#SESSION_AND_PARAMETERS] also restore every + /// parameter to its default and clear the dictionary. A parameter reset is + /// valid only between frames — one-shot [#decompress(byte[], int)] always + /// consumes its frame, so this constraint only bites advanced multi-frame reuse. + /// + /// @param directive what to clear + /// @return `this`, for chaining + /// @throws ZstdException if the reset fails natively + public ZstdDecompressCtx reset(ZstdResetDirective directive) { + Objects.requireNonNull(directive, "directive"); + NativeCall.checkReturnValue(() -> (long) Bindings.DCTX_RESET.invokeExact(ptr(), directive.value())); + return this; + } + + /// Loads `dict` as the sticky dictionary for this context, so subsequent + /// [#decompress(byte[], int)] / [#decompress(MemorySegment, MemorySegment)] + /// calls decode frames compressed against it while still honouring the + /// advanced parameters (e.g. window-log max) set on this context. + /// + /// The dictionary is copied internally, so `dict` may be discarded afterwards. + /// It stays loaded until replaced, cleared with [#loadDictionary(ZstdDictionary)] + /// passing `null`, or dropped by a parameter [#reset(ZstdResetDirective)]. For + /// a dictionary reused across many contexts, digest it once and attach it with + /// [#refDictionary(ZstdDecompressDict)] instead. + /// + /// @param dict the dictionary to load, or `null` to clear the loaded dictionary + /// @return `this`, for chaining + /// @throws ZstdException if the dictionary cannot be loaded + public ZstdDecompressCtx loadDictionary(ZstdDictionary dict) { + if (dict == null) { + return loadDictionary(MemorySegment.NULL, 0L); + } + try (Arena arena = Arena.ofConfined()) { + byte[] raw = dict.raw(); + return loadDictionary(Zstd.copyIn(arena, raw), raw.length); + } + } + + /// Loads dictionary content straight from a native [MemorySegment], without a + /// heap copy — the zero-copy path when your dictionary is already off-heap + /// (e.g. an mmap slice). Otherwise identical to + /// [#loadDictionary(ZstdDictionary)]. + /// + /// @param dict native dictionary content (its bytes are copied into the + /// context), or `null` / [MemorySegment#NULL] to clear the loaded dictionary + /// @return `this`, for chaining + /// @throws ZstdException if the dictionary cannot be loaded + public ZstdDecompressCtx loadDictionary(MemorySegment dict) { + if (NativeCall.isNull(dict)) { + return loadDictionary(MemorySegment.NULL, 0L); + } + NativeCall.requireNative(dict, "dict"); + return loadDictionary(dict, dict.byteSize()); + } + + private ZstdDecompressCtx loadDictionary(MemorySegment dict, long size) { + NativeCall.checkReturnValue(() -> (long) Bindings.DCTX_LOAD_DICTIONARY.invokeExact(ptr(), dict, size)); + return this; + } + + /// Attaches a pre-digested `dict` to this context by reference — no per-call + /// digesting and no copy. Subsequent [#decompress(byte[], int)] / + /// [#decompress(MemorySegment, MemorySegment)] calls decode against it while + /// honouring this context's advanced parameters. This is the hot path for a + /// pooled context recycled with [#reset(ZstdResetDirective)] between frames. + /// + /// The reference is borrowed: `dict` must stay open for as long as this + /// context uses it. The reference is dropped by a parameter + /// [#reset(ZstdResetDirective)] or by passing `null`. + /// + /// @param dict the digested dictionary to reference, or `null` to clear it + /// @return `this`, for chaining + /// @throws ZstdException if the dictionary cannot be referenced + public ZstdDecompressCtx refDictionary(ZstdDecompressDict dict) { + MemorySegment ddict = dict == null ? MemorySegment.NULL : dict.ptr(); + NativeCall.checkReturnValue(() -> (long) Bindings.DCTX_REF_DDICT.invokeExact(ptr(), ddict)); + return this; + } + /// Decompresses a frame into a buffer of at most `maxSize` bytes. /// /// @param compressed a complete zstd frame diff --git a/zstd/src/main/java/io/github/dfa1/zstd/ZstdResetDirective.java b/zstd/src/main/java/io/github/dfa1/zstd/ZstdResetDirective.java new file mode 100644 index 0000000..591b48e --- /dev/null +++ b/zstd/src/main/java/io/github/dfa1/zstd/ZstdResetDirective.java @@ -0,0 +1,28 @@ +package io.github.dfa1.zstd; + +/// Selects what a context reset clears, mirroring `ZSTD_ResetDirective`. +/// +/// Use it with [ZstdCompressCtx#reset(ZstdResetDirective)] and +/// [ZstdDecompressCtx#reset(ZstdResetDirective)] to recycle a context for the +/// next frame without freeing and recreating its native state. +public enum ZstdResetDirective { + + /// Abort the current frame and discard any unflushed data, keeping all + /// parameters and the loaded dictionary. Cheap; use it between frames. + SESSION_ONLY(1), + /// Restore every parameter to its default and clear the dictionary. Only + /// valid when no frame is in progress. + PARAMETERS(2), + /// Do both: reset the session and the parameters in one call. + SESSION_AND_PARAMETERS(3); + + private final int value; + + ZstdResetDirective(int value) { + this.value = value; + } + + int value() { + return value; + } +} diff --git a/zstd/src/test/java/io/github/dfa1/zstd/ZstdDictionaryTest.java b/zstd/src/test/java/io/github/dfa1/zstd/ZstdDictionaryTest.java index ae350bb..ced1984 100644 --- a/zstd/src/test/java/io/github/dfa1/zstd/ZstdDictionaryTest.java +++ b/zstd/src/test/java/io/github/dfa1/zstd/ZstdDictionaryTest.java @@ -336,6 +336,159 @@ private MemorySegment nativeDict(Arena arena, byte[] raw) { } } + @Nested + class StickyDictionary { + + @Test + void loadedDictionaryCombinesWithAdvancedParameters() { + // Given a context with both a loaded dictionary AND a checksum — the + // combination the per-call compress(src, dict) overloads cannot give + byte[] record = samples.get(123); + byte[] frame; + try (ZstdCompressCtx cctx = new ZstdCompressCtx().checksum(true)) { + cctx.loadDictionary(sut); + frame = cctx.compress(record); + } + byte[] plain; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + plain = ctx.compress(record); + } + + // Then the dictionary is honoured (smaller than dictionaryless) and decodes + assertThat(frame.length).isLessThan(plain.length); + byte[] restored; + try (ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { + dctx.loadDictionary(sut); + restored = dctx.decompress(frame, record.length); + } + assertThat(restored).isEqualTo(record); + } + + @Test + void referencedDigestedDictionarySurvivesSessionReset() { + // Given a pooled context referencing a digested dictionary, recycled between frames + byte[] first = samples.get(1); + byte[] second = samples.get(2); + byte[] restoredFirst; + byte[] restoredSecond; + try (ZstdCompressDict cdict = new ZstdCompressDict(sut, 19); + ZstdDecompressDict ddict = new ZstdDecompressDict(sut); + ZstdCompressCtx cctx = new ZstdCompressCtx(); + ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { + cctx.refDictionary(cdict); + byte[] frameFirst = cctx.compress(first); + cctx.reset(ZstdResetDirective.SESSION_ONLY); + byte[] frameSecond = cctx.compress(second); + + dctx.refDictionary(ddict); + restoredFirst = dctx.decompress(frameFirst, first.length); + dctx.reset(ZstdResetDirective.SESSION_ONLY); + restoredSecond = dctx.decompress(frameSecond, second.length); + } + + // Then both frames round-trip: the reference outlived the session reset + assertThat(restoredFirst).isEqualTo(first); + assertThat(restoredSecond).isEqualTo(second); + } + + @Test + void parameterResetClearsTheLoadedDictionary() { + // Given a context that loaded a dictionary, then cleared its parameters + byte[] record = samples.get(7); + byte[] afterReset; + try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { + cctx.loadDictionary(sut); + cctx.compress(record); + cctx.reset(ZstdResetDirective.SESSION_AND_PARAMETERS); + afterReset = cctx.compress(record); + } + byte[] noDict; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + noDict = ctx.compress(record); + } + + // Then the dictionary is gone: the frame matches a fresh dictionaryless one + assertThat(afterReset).isEqualTo(noDict); + } + + @Test + void nullClearsTheLoadedDictionary() { + // Given a context whose loaded dictionary is then cleared with null + byte[] record = samples.get(7); + byte[] cleared; + try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { + cctx.loadDictionary(sut); + cctx.loadDictionary((ZstdDictionary) null); + cleared = cctx.compress(record); + } + byte[] noDict; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + noDict = ctx.compress(record); + } + + // Then it compresses as if no dictionary was ever loaded + assertThat(cleared).isEqualTo(noDict); + } + + @Test + void loadsDictionaryFromNativeSegmentWithoutHeapCopy() { + // Given a dictionary loaded straight from native segments (zero-copy path) + byte[] record = samples.get(2048); + byte[] raw = sut.toByteArray(); + byte[] restored; + try (Arena arena = Arena.ofConfined(); + ZstdCompressCtx cctx = new ZstdCompressCtx(); + ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { + cctx.loadDictionary(nativeDict(arena, raw)); + byte[] frame = cctx.compress(record); + dctx.loadDictionary(nativeDict(arena, raw)); + restored = dctx.decompress(frame, record.length); + } + + // Then the record round-trips through the segment-loaded dictionary + assertThat(restored).isEqualTo(record); + } + + @Test + void rejectsHeapDictionarySegment() { + // Given a heap-backed dictionary segment + MemorySegment heap = MemorySegment.ofArray(sut.toByteArray()); + + // When loaded into a context + try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { + ThrowingCallable result = () -> cctx.loadDictionary(heap); + + // Then it fails fast rather than handing C a heap address + assertThatThrownBy(result).isInstanceOf(IllegalArgumentException.class); + } + } + + @Test + void nullNativeSegmentClearsTheLoadedDictionary() { + // Given a context whose dictionary is cleared through the native overload + byte[] record = samples.get(7); + byte[] cleared; + try (ZstdCompressCtx cctx = new ZstdCompressCtx()) { + cctx.loadDictionary(sut); + cctx.loadDictionary((MemorySegment) null); + cleared = cctx.compress(record); + } + byte[] noDict; + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + noDict = ctx.compress(record); + } + + // Then it compresses as if no dictionary was ever loaded + assertThat(cleared).isEqualTo(noDict); + } + + private MemorySegment nativeDict(Arena arena, byte[] raw) { + MemorySegment seg = arena.allocate(raw.length); + MemorySegment.copy(raw, 0, seg, ValueLayout.JAVA_BYTE, 0, raw.length); + return seg; + } + } + private static byte[] record(int i) { return ("{\"id\":" + i + ",\"user\":\"user_" + (i % 50) diff --git a/zstd/src/test/java/io/github/dfa1/zstd/ZstdParameterTest.java b/zstd/src/test/java/io/github/dfa1/zstd/ZstdParameterTest.java index dc90527..087cf45 100644 --- a/zstd/src/test/java/io/github/dfa1/zstd/ZstdParameterTest.java +++ b/zstd/src/test/java/io/github/dfa1/zstd/ZstdParameterTest.java @@ -143,6 +143,92 @@ void rejectsOutOfRangeValue() { } } + @Nested + class Reset { + + @Test + void sessionOnlyKeepsLevelAndParameters() { + // Given a context used once, then reset for the session only + byte[] reused; + byte[] fresh; + try (ZstdCompressCtx sut = new ZstdCompressCtx().level(19)) { + sut.compress(PAYLOAD); + sut.reset(ZstdResetDirective.SESSION_ONLY); + reused = sut.compress(PAYLOAD); + } + try (ZstdCompressCtx ctx = new ZstdCompressCtx().level(19)) { + fresh = ctx.compress(PAYLOAD); + } + + // Then the level survives the reset: the next frame matches a fresh level-19 frame + assertThat(reused).isEqualTo(fresh); + } + + @ParameterizedTest + @EnumSource(value = ZstdResetDirective.class, names = {"PARAMETERS", "SESSION_AND_PARAMETERS"}) + void parameterResetRestoresTheDefaultLevel(ZstdResetDirective directive) { + // Given a level-19 context reset with parameters cleared + byte[] afterReset; + byte[] atDefault; + try (ZstdCompressCtx sut = new ZstdCompressCtx().level(19)) { + sut.compress(PAYLOAD); + sut.reset(directive); + afterReset = sut.compress(PAYLOAD); + } + try (ZstdCompressCtx ctx = new ZstdCompressCtx()) { + atDefault = ctx.compress(PAYLOAD); + } + + // Then the level falls back to the default, matching a fresh default-level frame + assertThat(afterReset).isEqualTo(atDefault); + } + + @Test + void dictionaryRoundTripsAfterParameterReset() { + // Given a context that compressed against a dictionary, then cleared its parameters + ZstdDictionary dict = + ZstdDictionary.of("dictionary sample payload ".repeat(64).getBytes(StandardCharsets.UTF_8)); + byte[] frame; + try (ZstdCompressCtx sut = new ZstdCompressCtx().level(19)) { + sut.compress(PAYLOAD, dict); + sut.reset(ZstdResetDirective.SESSION_AND_PARAMETERS); + + // When it compresses against the dictionary again after the reset + frame = sut.compress(PAYLOAD, dict); + } + + // Then the frame still round-trips through the same dictionary + try (ZstdDecompressCtx dctx = new ZstdDecompressCtx()) { + assertThat(dctx.decompress(frame, PAYLOAD.length, dict)).isEqualTo(PAYLOAD); + } + } + + @Test + void decompressContextStillDecodesAfterReset() { + // Given a decompression context reset between frames + byte[] frame = Zstd.compress(PAYLOAD); + try (ZstdDecompressCtx sut = new ZstdDecompressCtx()) { + sut.decompress(frame, PAYLOAD.length); + sut.reset(ZstdResetDirective.SESSION_AND_PARAMETERS); + + // Then the next frame still decodes + assertThat(sut.decompress(frame, PAYLOAD.length)).isEqualTo(PAYLOAD); + } + } + + @Test + void rejectsNullDirective() { + // Given a compression context + try (ZstdCompressCtx sut = new ZstdCompressCtx()) { + // When reset with a null directive + ThrowingCallable result = () -> sut.reset(null); + + // Then it fails fast + assertThatThrownBy(result).isInstanceOf(NullPointerException.class); + } + } + } + @Nested class GenericSetter {