From 45b10fe7e14ec6ba87d39993000b5b2287d2d378 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Fri, 26 Jun 2026 21:53:58 +0200 Subject: [PATCH] test(reader): close depth-guard mutation-coverage gaps PIT on the reader (-P pitest) flagged six survivors in the depth/bounds guards added by the hardening work. Each marks a real coverage hole, not an arbitrary edge: - convertDType recursion was only exercised through the List arm, so the depth+1 increment on the Struct / FixedSizeList / Extension arms could be mutated away undetected. Parameterize the dtype depth-bomb over all four NestKind variants. - Pin the convertDType and convertArrayNode depth caps at their exact boundary (limit parses, limit+1 throws), mirroring the existing layout-depth tests. - Pin parseBlobs treating a present-but-empty dtype blob as a null dtype. Reader PIT is now 0 survivors (112 killed, 1 timed-out, 100% test strength). Co-Authored-By: Claude Opus 4.8 --- .../ArrayNodeDepthBombSecurityTest.java | 48 +++++++++++---- .../reader/DTypeDepthBombSecurityTest.java | 23 ++++--- .../dfa1/vortex/reader/MalformedFiles.java | 47 ++++++++++++-- .../PostscriptParserParseBlobsBoundsTest.java | 61 +++++++++++++++++++ 4 files changed, 153 insertions(+), 26 deletions(-) diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/ArrayNodeDepthBombSecurityTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/ArrayNodeDepthBombSecurityTest.java index 3a12f63a..682d99c2 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/ArrayNodeDepthBombSecurityTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/ArrayNodeDepthBombSecurityTest.java @@ -14,13 +14,17 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; -/// Adversarial test for the encoded-array-tree recursion in +/// Adversarial tests for the encoded-array-tree recursion in /// [FlatSegmentDecoder]'s `convertArrayNode`. /// /// The decoder walks the array node tree recursively (validity, patches, run-ends, dictionary /// codes/values, …). Without the [FlatSegmentDecoder#MAX_ARRAY_TREE_DEPTH] cap a crafted segment /// with thousands of nested children produces a [StackOverflowError] — an `Error` that escapes the -/// "malformed input must surface as [VortexException]" contract (ADR 0003). This pins that contract. +/// "malformed input must surface as [VortexException]" contract (ADR 0003). +/// +/// The two cases bracket the exact `depth > MAX_ARRAY_TREE_DEPTH` boundary: a tree whose deepest +/// node sits at exactly the limit must clear the guard (and then fail later for an unrelated reason +/// — no decoder), while one node deeper must be rejected by the depth guard itself. class ArrayNodeDepthBombSecurityTest { private static final DType DTYPE = DType.I32; @@ -28,22 +32,44 @@ class ArrayNodeDepthBombSecurityTest { private final FlatSegmentDecoder sut = new FlatSegmentDecoder(ReadRegistry.empty()); @Test - void deeplyNestedArrayTree_throwsVortexException() { + void arrayTreeAtDepthLimit_clearsGuard() { + try (Arena arena = Arena.ofConfined()) { + // Given — deepest node at exactly MAX_ARRAY_TREE_DEPTH: convertArrayNode is entered with + // depth == limit there, and `limit > limit` is false. The walk completes and only then + // fails because the empty registry has no decoder. Kills `depth >` relaxed to `>=`, + // which would wrongly reject this legal max-depth tree with the depth message. + byte[] fb = deeplyNestedArrayFlatBuffer(FlatSegmentDecoder.MAX_ARRAY_TREE_DEPTH); + MemorySegment seg = wrapAsSegment(fb, arena); + + // When / Then + assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("no decoder"); + } + } + + @Test + void arrayTreeOneOverDepthLimit_throwsVortexException() { try (Arena arena = Arena.ofConfined()) { - // Given — a flat segment whose FbsArray root nests 65536 levels of single-child nodes. - // Real encodings nest only a handful of levels; 65536 reliably blows the JVM stack on - // the recursive convertArrayNode walk if the depth cap is removed. - byte[] fb = deeplyNestedArrayFlatBuffer(65536); - MemorySegment seg = arena.allocate(fb.length + 4L); - MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, 0, fb.length); - seg.set(PTypeIO.LE_INT, fb.length, fb.length); + // Given — one level deeper: the deepest node reaches limit + 1, tripping the guard + // before any StackOverflowError can escape. + byte[] fb = deeplyNestedArrayFlatBuffer(FlatSegmentDecoder.MAX_ARRAY_TREE_DEPTH + 1); + MemorySegment seg = wrapAsSegment(fb, arena); // When / Then — must surface as VortexException, not StackOverflowError assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena)) - .isInstanceOf(VortexException.class); + .isInstanceOf(VortexException.class) + .hasMessageContaining("depth"); } } + private static MemorySegment wrapAsSegment(byte[] fb, Arena arena) { + MemorySegment seg = arena.allocate(fb.length + 4L); + MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, 0, fb.length); + seg.set(PTypeIO.LE_INT, fb.length, fb.length); + return seg; + } + /// Builds a minimal `FbsArray` whose root node has `depth` levels of single-child nesting, /// each level a buffer-less node referencing encoding index 0. private static byte[] deeplyNestedArrayFlatBuffer(int depth) { diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/DTypeDepthBombSecurityTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/DTypeDepthBombSecurityTest.java index b93f811f..f63afb88 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/DTypeDepthBombSecurityTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/DTypeDepthBombSecurityTest.java @@ -2,8 +2,10 @@ import io.github.dfa1.vortex.core.error.VortexException; import io.github.dfa1.vortex.core.io.VortexFormat; -import org.junit.jupiter.api.Test; +import io.github.dfa1.vortex.reader.MalformedFiles.NestKind; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -11,7 +13,7 @@ import java.nio.file.Files; import java.nio.file.Path; -import static io.github.dfa1.vortex.reader.MalformedFiles.buildDeeplyNestedListDtype; +import static io.github.dfa1.vortex.reader.MalformedFiles.buildDeeplyNestedDtype; import static io.github.dfa1.vortex.reader.MalformedFiles.buildFlatLayout; import static io.github.dfa1.vortex.reader.MalformedFiles.buildFooter; import static io.github.dfa1.vortex.reader.MalformedFiles.buildPostscript; @@ -29,25 +31,28 @@ class DTypeDepthBombSecurityTest { private static final ReadRegistry REGISTRY = ReadRegistry.empty(); - @Test - void deeplyNestedDtype_throwsVortexException(@TempDir Path tmp) throws Exception { - // Given — a file whose DType nests 65536 levels of List. Real schemas nest a handful of - // levels; 65536 reliably blows the JVM stack on the recursive convertDType walk. - Path file = buildDeeplyNestedDtypeFile(tmp, 65536); + @ParameterizedTest(name = "nested via {0}") + @EnumSource(NestKind.class) + void deeplyNestedDtype_throwsVortexException(NestKind kind, @TempDir Path tmp) throws Exception { + // Given — a file whose DType nests 65536 levels of `kind`. Real schemas nest a handful of + // levels; 65536 reliably blows the JVM stack on the recursive convertDType walk. Each kind + // drives a different recursion arm (List/Struct/FixedSizeList/Extension), so all four must + // increment the depth counter for the MAX_DTYPE_DEPTH guard to bound them. + Path file = buildDeeplyNestedDtypeFile(tmp, 65536, kind); // When / Then — must surface as VortexException, not StackOverflowError assertThatThrownBy(() -> VortexReader.open(file, REGISTRY)) .isInstanceOf(VortexException.class); } - private static Path buildDeeplyNestedDtypeFile(Path dir, int depth) throws Exception { + private static Path buildDeeplyNestedDtypeFile(Path dir, int depth, NestKind kind) throws Exception { byte[] body = new byte[8]; // unused placeholder ByteBuffer footerBuf = buildFooter( new String[]{"vortex.primitive"}, new String[]{"vortex.flat"}, new long[]{0L}, new long[]{(long) body.length}); - ByteBuffer dtypeBuf = buildDeeplyNestedListDtype(depth); + ByteBuffer dtypeBuf = buildDeeplyNestedDtype(depth, kind); ByteBuffer layoutBuf = buildFlatLayout(0, 1L, 0); long footerOff = body.length; diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/MalformedFiles.java b/reader/src/test/java/io/github/dfa1/vortex/reader/MalformedFiles.java index c1bf82b5..b39e6546 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/MalformedFiles.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/MalformedFiles.java @@ -5,7 +5,10 @@ import io.github.dfa1.vortex.core.fbs.FbsFooter; import io.github.dfa1.vortex.core.fbs.FbsLayout; import io.github.dfa1.vortex.core.fbs.FbsLayoutSpec; +import io.github.dfa1.vortex.core.fbs.FbsExtension; +import io.github.dfa1.vortex.core.fbs.FbsFixedSizeList; import io.github.dfa1.vortex.core.fbs.FbsList; +import io.github.dfa1.vortex.core.fbs.FbsStruct_; import io.github.dfa1.vortex.core.fbs.FbsPostscript; import io.github.dfa1.vortex.core.fbs.FbsPostscriptSegment; import io.github.dfa1.vortex.core.fbs.FbsPrimitive; @@ -35,18 +38,50 @@ static ByteBuffer buildI64Dtype() { return slice(fbb); } - /// Builds a DType blob nesting `depth` levels of `List` around an I64 primitive leaf. + /// The recursive DType variant used to wrap each level of a depth-bomb dtype. Each constant + /// exercises a different recursion arm of `PostscriptParser.convertDType`. + enum NestKind { + /// Nest via `List` element type. + LIST, + /// Nest via single-field `Struct` field type. + STRUCT, + /// Nest via `FixedSizeList` element type. + FIXED_SIZE_LIST, + /// Nest via `Extension` storage type. + EXTENSION + } + + /// Builds a DType blob nesting `depth` levels of `kind` around an I64 primitive leaf. /// - /// @param depth number of nested `List` wrappers around the leaf + /// @param depth number of nested wrappers around the leaf + /// @param kind the recursive DType variant to wrap each level with /// @return the finished DType FlatBuffer - static ByteBuffer buildDeeplyNestedListDtype(int depth) { - var fbb = new FbsBuilder(depth * 32); + static ByteBuffer buildDeeplyNestedDtype(int depth, NestKind kind) { + var fbb = new FbsBuilder(depth * 64); // Leaf first; FlatBuffer requires children be finished before parents. int prim = FbsPrimitive.createFbsPrimitive(fbb, io.github.dfa1.vortex.core.fbs.FbsPType.I64, false); int current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsPrimitive, prim); for (int i = 0; i < depth; i++) { - int list = FbsList.createFbsList(fbb, current, false); - current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsList, list); + current = switch (kind) { + case LIST -> { + int list = FbsList.createFbsList(fbb, current, false); + yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsList, list); + } + case STRUCT -> { + int names = FbsStruct_.createNamesVector(fbb, new int[]{fbb.createString("f")}); + int dtypes = FbsStruct_.createDtypesVector(fbb, new int[]{current}); + int struct = FbsStruct_.createFbsStruct_(fbb, names, dtypes, false); + yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsStruct_, struct); + } + case FIXED_SIZE_LIST -> { + int fsl = FbsFixedSizeList.createFbsFixedSizeList(fbb, current, 1L, false); + yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsFixedSizeList, fsl); + } + case EXTENSION -> { + int ext = FbsExtension.createFbsExtension(fbb, fbb.createString("x"), current, 0); + yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsExtension, ext); + } + }; } io.github.dfa1.vortex.core.fbs.FbsDType.finishFbsDTypeBuffer(fbb, current); return slice(fbb); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java index 16bd9dd7..466a9269 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java @@ -10,6 +10,8 @@ import io.github.dfa1.vortex.core.fbs.FbsFooter; import io.github.dfa1.vortex.core.fbs.FbsLayout; import io.github.dfa1.vortex.core.fbs.FbsLayoutSpec; +import io.github.dfa1.vortex.core.fbs.FbsList; +import io.github.dfa1.vortex.core.fbs.FbsPrimitive; import io.github.dfa1.vortex.core.fbs.FbsType; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -158,6 +160,51 @@ void parseBlobs_layoutMetadata_oneOverLimit_throws() { .hasMessageContaining("metadata size"); } + // ── FbsDType depth bound: depth > MAX_DTYPE_DEPTH (64) ────────────────────────── + + @Test + void parseBlobs_dtypeDepth_atLimit_parses() { + // Given — a single-child List chain whose deepest node sits at exactly MAX_DTYPE_DEPTH. + // convertDType is entered with depth == 64 there, and `64 > 64` is false. Kills the + // `depth >` relaxed to `depth >=` mutant, which would reject the legal max-depth dtype. + MemorySegment dtype = nestedListDtype(PostscriptParser.MAX_DTYPE_DEPTH); + + // When + DType result = parseDtype(dtype); + + // Then + assertThat(result).isInstanceOf(DType.List.class); + } + + @Test + void parseBlobs_dtypeDepth_oneOverLimit_throws() { + // Given — one level deeper: the deepest node reaches depth 65, tripping `65 > 64` + MemorySegment dtype = nestedListDtype(PostscriptParser.MAX_DTYPE_DEPTH + 1); + + // When / Then + assertThatThrownBy(() -> parseDtype(dtype)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("depth"); + } + + // ── parseBlobs dtype-presence guard: dtypeBuf != null && byteSize() > 0 ────────── + + @Test + void parseBlobs_emptyDtypeBlob_yieldsNullDtype() { + // Given — a present but zero-length dtype blob. The guard treats an empty blob as "no dtype" + // (`byteSize() > 0`); relaxing `> 0` to `>= 0` would instead feed the empty buffer to + // getRootAsFbsDType and throw. Pins that an empty blob parses to a null dtype. + MemorySegment footer = footerWithLayoutSpecs("vortex.flat"); + MemorySegment layout = flatLayout(0); + MemorySegment emptyDtype = MemorySegment.ofArray(new byte[0]); + + // When + DType result = PostscriptParser.parseBlobs(footer, layout, emptyDtype).dtype(); + + // Then + assertThat(result).isNull(); + } + // ── helpers ────────────────────────────────────────────────────────────────── /// Parses a dtype blob through the full parseBlobs path, paired with a minimal valid @@ -214,6 +261,20 @@ private static MemorySegment nestedLayout(int depth) { return slice(fbb); } + private static MemorySegment nestedListDtype(int depth) { + var fbb = new FbsBuilder(depth * 32 + 64); + // Leaf first; FlatBuffer requires children be finished before parents. + int prim = FbsPrimitive.createFbsPrimitive(fbb, io.github.dfa1.vortex.core.fbs.FbsPType.I64, false); + int current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsPrimitive, prim); + // Wrap `depth` times: the innermost leaf ends up at recursion depth == `depth`. + for (int i = 0; i < depth; i++) { + int list = FbsList.createFbsList(fbb, current, false); + current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsList, list); + } + io.github.dfa1.vortex.core.fbs.FbsDType.finishFbsDTypeBuffer(fbb, current); + return slice(fbb); + } + private static MemorySegment decimalDtype(int precision, byte scale) { var fbb = new FbsBuilder(64); int dec = FbsDecimal.createFbsDecimal(fbb, precision, scale, false);