diff --git a/CLAUDE.md b/CLAUDE.md index b2120776..ffdb853a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -61,6 +61,27 @@ brew install flatbuffers # only for .fbs edits (any flatc version; is in-process via `proto-gen` (no `protoc`/`protobuf-java`): one record per message with static `decode(MemorySegment, long, long)` + `encode()` operating directly on a segment. +### Mutation testing + +Opt-in [PIT](https://pitest.org) profile in `core` and `reader` (`-P pitest`), bound to the +`verify` phase and scoped to the bounds/parse classes via `` in each module POM. +Used to harden the security-critical bounds guards (ADR 0003 Phase E). + +```bash +./mvnw -pl reader -am -P pitest verify -DskipITs # reader run (-am builds core; -DskipITs skips ITs) +./mvnw -pl core -P pitest verify # core run (IoBounds) +``` + +Report: `/target/pit-reports/index.html` (+ `mutations.xml` for scripting). Widen a run by +adding `` entries under `` in the module's `pitest` profile. + +Do not invoke the goal directly (`org.pitest:pitest-maven:mutationCoverage`) — it resolves the +latest plugin without the JUnit 5 engine and ignores the profile; always go through `-P pitest`. + +Read survivors as a **simplify-first** signal, not only a test-gap signal: an equivalent mutant +often marks a clause that can never change the outcome (dead code) — delete it rather than writing +an unkillable test. Only add a test when the mutated bound is a genuine, independent edge. + ### Releasing ```bash diff --git a/core/pom.xml b/core/pom.xml index 8062842d..72342f1a 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -176,6 +176,27 @@ + + + + pitest + + + + org.pitest + pitest-maven + + + io.github.dfa1.vortex.core.IoBounds + + + + + + diff --git a/pom.xml b/pom.xml index e43ef828..c3592bc9 100644 --- a/pom.xml +++ b/pom.xml @@ -303,6 +303,11 @@ maven-dependency-plugin 3.7.0 + + org.apache.maven.plugins + maven-jar-plugin + 3.4.1 + org.codehaus.mojo exec-maven-plugin @@ -531,6 +536,46 @@ + + + pitest + + + + + org.pitest + pitest-maven + 1.20.0 + + + org.pitest + pitest-junit5-plugin + 1.2.3 + + + + + pit-report + verify + + mutationCoverage + + + + + + HTML + XML + + false + + + + + + diff --git a/reader/pom.xml b/reader/pom.xml index d1f9d4a9..ea488b34 100644 --- a/reader/pom.xml +++ b/reader/pom.xml @@ -73,4 +73,33 @@ + + + + + pitest + + + + org.pitest + pitest-maven + + + io.github.dfa1.vortex.reader.Footer + io.github.dfa1.vortex.reader.Trailer + io.github.dfa1.vortex.reader.PostscriptParser + io.github.dfa1.vortex.reader.SegmentSpec + io.github.dfa1.vortex.reader.Layout + io.github.dfa1.vortex.reader.FlatSegmentDecoder + + + + + + + diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java index d9e38b63..51549be4 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java @@ -78,7 +78,10 @@ static void validateSegmentSpecs(List specs, long fileSize) { SegmentSpec s = specs.get(i); long offset = s.offset(); long length = s.length(); - if (offset < 0 || length < 0 || offset > fileSize || length > fileSize - offset) { + // Overflow-safe containment in [0, fileSize], same shape as IoBounds.checkRange. An + // `offset > fileSize` clause would be redundant: with length >= 0 already guaranteed, + // offset > fileSize forces length > fileSize - offset, so the final clause covers it. + if (offset < 0 || length < 0 || length > fileSize - offset) { throw new VortexException( "footer segmentSpecs[" + i + "] out of bounds: offset=" + offset + " length=" + length + " fileSize=" + fileSize); @@ -87,7 +90,10 @@ static void validateSegmentSpecs(List specs, long fileSize) { } private static void checkBlobBounds(String name, long offset, long length, long fileSize) { - if (offset < 0 || length < 0 || offset > fileSize || length > fileSize - offset) { + // Same overflow-safe range form as IoBounds.checkRange (no redundant `offset > fileSize` + // clause: length >= 0 makes it implied by the final comparison). Keeps the blob-named + // message that checkRange's generic text would lose. + if (offset < 0 || length < 0 || length > fileSize - offset) { throw new VortexException( "postscript " + name + " blob out of bounds: offset=" + offset + " length=" + length + " fileSize=" + fileSize); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/LayoutKindTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/LayoutKindTest.java new file mode 100644 index 00000000..7d49d9c1 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/LayoutKindTest.java @@ -0,0 +1,61 @@ +package io.github.dfa1.vortex.reader; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; + +/// Pins the [Layout] encoding-kind predicates (`isFlat`, `isChunked`, `isStruct`, `isZoned`, +/// `isDict`). [ScanIterator] dispatches layout-tree traversal on these, so a predicate that +/// silently returns a constant would route a whole layout family down the wrong decode path. +/// One layout per encoding id, asserting the matching predicate is `true` and every other is +/// `false` — which fixes each method's return to its `encodingId` rather than a constant. +class LayoutKindTest { + + private static Layout layout(String encodingId) { + return new Layout(encodingId, 0L, null, List.of(), List.of()); + } + + static Stream kinds() { + // (encodingId, isFlat, isChunked, isStruct, isZoned, isDict) + return Stream.of( + Arguments.of(Layout.FLAT, true, false, false, false, false), + Arguments.of(Layout.CHUNKED, false, true, false, false, false), + Arguments.of(Layout.STRUCT, false, false, true, false, false), + Arguments.of(Layout.ZONED, false, false, false, true, false), + Arguments.of(Layout.DICT, false, false, false, false, true)); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("kinds") + void predicates_matchOnlyOwnEncodingId( + String encodingId, boolean flat, boolean chunked, boolean struct, boolean zoned, boolean dict) { + // Given + Layout sut = layout(encodingId); + + // When / Then — exactly one predicate is true, the rest false + assertThat(sut.isFlat()).as("isFlat").isEqualTo(flat); + assertThat(sut.isChunked()).as("isChunked").isEqualTo(chunked); + assertThat(sut.isStruct()).as("isStruct").isEqualTo(struct); + assertThat(sut.isZoned()).as("isZoned").isEqualTo(zoned); + assertThat(sut.isDict()).as("isDict").isEqualTo(dict); + } + + @Test + void predicates_allFalse_forUnknownEncodingId() { + // Given — an id matching no known layout kind + Layout sut = layout("vortex.bogus"); + + // When / Then — no predicate claims it + assertThat(sut.isFlat()).isFalse(); + assertThat(sut.isChunked()).isFalse(); + assertThat(sut.isStruct()).isFalse(); + assertThat(sut.isZoned()).isFalse(); + assertThat(sut.isDict()).isFalse(); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserBlobBoundsTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserBlobBoundsTest.java new file mode 100644 index 00000000..9bed8b66 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserBlobBoundsTest.java @@ -0,0 +1,205 @@ +package io.github.dfa1.vortex.reader; + +import com.google.flatbuffers.FlatBufferBuilder; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.fbs.ArraySpec; +import io.github.dfa1.vortex.fbs.Footer; +import io.github.dfa1.vortex.fbs.Layout; +import io.github.dfa1.vortex.fbs.LayoutSpec; +import io.github.dfa1.vortex.fbs.Postscript; +import io.github.dfa1.vortex.fbs.PostscriptSegment; +import io.github.dfa1.vortex.fbs.Primitive; +import io.github.dfa1.vortex.fbs.Type; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Bounds coverage for `PostscriptParser.checkBlobBounds` — the guard that rejects a postscript +/// whose footer / layout / dtype blob pointer escapes the mapped file, run before any blob is +/// sliced or parsed. Drives [PostscriptParser#parse] directly with an in-memory +/// [MemorySegment] (no file I/O): a valid file segment of `[layout | dtype | footer]`, then a +/// postscript whose one blob pointer is moved out of range. +/// +/// Two things must hold for every blob: +/// - an out-of-range pointer throws a `VortexException` naming *that blob* — not the generic +/// `IoBounds` slice message. Asserting the specific message is what proves the dedicated +/// `checkBlobBounds` call still runs (delete it and the failure shifts to the later slice, +/// with a different message). +/// - the footer blob is laid out last so it ends exactly at EOF: its `length == fileSize - +/// offset`, the largest legal range, which must still pass. +class PostscriptParserBlobBoundsTest { + + /// In-memory file: blobs concatenated as `[layout | dtype | footer]` with the footer last so + /// it ends at EOF (exercises the exact-fit upper bound). Offsets/lengths are recorded for the + /// postscript builder. + private record Fixture(MemorySegment segment, long fileSize, + long footerOff, int footerLen, + long dtypeOff, int dtypeLen, + long layoutOff, int layoutLen) { + } + + private static Fixture validFile() { + ByteBuffer layout = buildFlatLayout(); + ByteBuffer dtype = buildI64Dtype(); + ByteBuffer footer = buildFooter(); + int layoutLen = layout.remaining(); + int dtypeLen = dtype.remaining(); + int footerLen = footer.remaining(); + + int layoutOff = 0; + int dtypeOff = layoutOff + layoutLen; + int footerOff = dtypeOff + dtypeLen; + int fileSize = footerOff + footerLen; + + byte[] file = new byte[fileSize]; + copyInto(file, layoutOff, layout); + copyInto(file, dtypeOff, dtype); + copyInto(file, footerOff, footer); + + return new Fixture(MemorySegment.ofArray(file), fileSize, + footerOff, footerLen, dtypeOff, dtypeLen, layoutOff, layoutLen); + } + + @Test + void parse_validInBoundsBlobs_succeeds() { + // Given — every blob pointer fits; footer ends exactly at EOF (length == fileSize - offset) + Fixture f = validFile(); + ByteBuffer ps = buildPostscript(f.footerOff, f.footerLen, f.dtypeOff, f.dtypeLen, + f.layoutOff, f.layoutLen); + + // When / Then — the largest legal footer range must not be rejected + assertThatCode(() -> PostscriptParser.parse(ps, f.segment, f.fileSize)) + .doesNotThrowAnyException(); + } + + @Test + void parse_footerBlobPastEof_throwsNamingFooter() { + // Given — footer pointer one byte past EOF; everything else valid + Fixture f = validFile(); + ByteBuffer ps = buildPostscript(f.fileSize + 1, f.footerLen, f.dtypeOff, f.dtypeLen, + f.layoutOff, f.layoutLen); + + // When / Then — rejected by the footer-specific check, not the later slice + assertThatThrownBy(() -> PostscriptParser.parse(ps, f.segment, f.fileSize)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("footer blob out of bounds"); + } + + @Test + void parse_layoutBlobLengthOverrunsEof_throwsNamingLayout() { + // Given — layout length reaches one byte past EOF (offset valid, offset + length > fileSize) + Fixture f = validFile(); + ByteBuffer ps = buildPostscript(f.footerOff, f.footerLen, f.dtypeOff, f.dtypeLen, + f.layoutOff, (int) (f.fileSize - f.layoutOff + 1)); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parse(ps, f.segment, f.fileSize)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("layout blob out of bounds"); + } + + @Test + void parse_footerBlobLengthOverrunsEof_throwsNamingFooter() { + // Given — footer length one byte past EOF. The footer sits at a non-zero offset (after + // layout + dtype), so `length > fileSize - offset` and `length > fileSize + offset` give + // different answers — this is what kills the `fileSize - offset` → `+` math mutant that a + // zero-offset overrun (the layout case) cannot distinguish. + Fixture f = validFile(); + ByteBuffer ps = buildPostscript(f.footerOff, (int) (f.fileSize - f.footerOff + 1), + f.dtypeOff, f.dtypeLen, f.layoutOff, f.layoutLen); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parse(ps, f.segment, f.fileSize)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("footer blob out of bounds"); + } + + @Test + void parse_dtypeBlobPastEof_throwsNamingDtype() { + // Given — dtype pointer past EOF with non-zero length, so the dtype check runs + Fixture f = validFile(); + ByteBuffer ps = buildPostscript(f.footerOff, f.footerLen, f.fileSize + 1, f.dtypeLen, + f.layoutOff, f.layoutLen); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parse(ps, f.segment, f.fileSize)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("dtype blob out of bounds"); + } + + @Test + void parse_dtypeBlobLengthZero_skipsDtypeCheckEvenWithBadOffset() { + // Given — dtype length 0 with a wildly out-of-range offset. The `length > 0` gate must + // skip both the bounds check and the slice, so a zero-length dtype never trips on its + // offset and the file still parses (dtype absent). Kills the `length > 0` boundary/negate + // mutants that would otherwise run the check on an empty dtype. + Fixture f = validFile(); + ByteBuffer ps = buildPostscript(f.footerOff, f.footerLen, f.fileSize + 999, 0, + f.layoutOff, f.layoutLen); + + // When + PostscriptParser.ParsedFile result = PostscriptParser.parse(ps, f.segment, f.fileSize); + + // Then — parsed, with no dtype + assertThat(result.dtype()).isNull(); + } + + // ── FlatBuffer blob builders (minimal, just enough to parse) ──────────────── + + private static ByteBuffer buildFooter() { + var fbb = new FlatBufferBuilder(256); + int asv = Footer.createArraySpecsVector(fbb, new int[]{ + ArraySpec.createArraySpec(fbb, fbb.createString("vortex.primitive"))}); + int lsv = Footer.createLayoutSpecsVector(fbb, new int[]{ + LayoutSpec.createLayoutSpec(fbb, fbb.createString(io.github.dfa1.vortex.reader.Layout.FLAT))}); + // No segment_specs: validateSegmentSpecs has its own dedicated test; keep this fixture + // focused on the blob-pointer bounds. + Footer.startSegmentSpecsVector(fbb, 0); + int ssv = fbb.endVector(); + int footOff = Footer.createFooter(fbb, asv, lsv, ssv, 0, 0); + fbb.finish(footOff); + return slice(fbb); + } + + private static ByteBuffer buildI64Dtype() { + var fbb = new FlatBufferBuilder(64); + int prim = Primitive.createPrimitive(fbb, io.github.dfa1.vortex.fbs.PType.I64, false); + int off = io.github.dfa1.vortex.fbs.DType.createDType(fbb, Type.Primitive, prim); + io.github.dfa1.vortex.fbs.DType.finishDTypeBuffer(fbb, off); + return slice(fbb); + } + + private static ByteBuffer buildFlatLayout() { + var fbb = new FlatBufferBuilder(128); + int segV = Layout.createSegmentsVector(fbb, new long[]{0}); + int layoutOff = Layout.createLayout(fbb, 0, 1L, 0, 0, segV); + Layout.finishLayoutBuffer(fbb, layoutOff); + return slice(fbb); + } + + private static ByteBuffer buildPostscript( + long footerOff, int footerLen, long dtypeOff, int dtypeLen, long layoutOff, int layoutLen) { + var fbb = new FlatBufferBuilder(128); + int footSeg = PostscriptSegment.createPostscriptSegment(fbb, footerOff, footerLen, 0, 0, 0); + int dtypeSeg = PostscriptSegment.createPostscriptSegment(fbb, dtypeOff, dtypeLen, 0, 0, 0); + int layoutSeg = PostscriptSegment.createPostscriptSegment(fbb, layoutOff, layoutLen, 0, 0, 0); + int psOff = Postscript.createPostscript(fbb, dtypeSeg, layoutSeg, 0, footSeg); + Postscript.finishPostscriptBuffer(fbb, psOff); + return slice(fbb); + } + + private static ByteBuffer slice(FlatBufferBuilder fbb) { + ByteBuffer data = fbb.dataBuffer(); + return data.slice(data.position(), data.remaining()); + } + + private static void copyInto(byte[] dst, int offset, ByteBuffer src) { + ByteBuffer dup = src.duplicate(); + dup.get(dst, offset, dup.remaining()); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java new file mode 100644 index 00000000..2b2b1cfd --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserParseBlobsBoundsTest.java @@ -0,0 +1,193 @@ +package io.github.dfa1.vortex.reader; + +import com.google.flatbuffers.FlatBufferBuilder; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.fbs.ArraySpec; +import io.github.dfa1.vortex.fbs.Decimal; +import io.github.dfa1.vortex.fbs.Footer; +import io.github.dfa1.vortex.fbs.Layout; +import io.github.dfa1.vortex.fbs.LayoutSpec; +import io.github.dfa1.vortex.fbs.Type; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.nio.ByteBuffer; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Boundary coverage for the validation guards inside `PostscriptParser.convertLayout` (layout +/// depth, encoding index) and `convertDType` (decimal precision / scale). Drives the +/// package-private [PostscriptParser#parseBlobs] directly with crafted footer / layout / dtype +/// FlatBuffers — no file or segment needed — so each guard is hit at its exact edge: the largest +/// legal value must parse, the first illegal value must throw a [VortexException]. +class PostscriptParserParseBlobsBoundsTest { + + // ── Layout encoding-index bound: encIdx < 0 || encIdx >= layoutSpecs.size() ── + + @Test + void parseBlobs_layoutEncodingIndex_atLastSpec_parses() { + // Given — one layout spec, layout references index 0 (== size - 1, the last valid index) + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayout(0); + + // When / Then — the top valid index must be accepted (kills `>=` relaxed to `>`) + assertThatCode(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .doesNotThrowAnyException(); + } + + @Test + void parseBlobs_layoutEncodingIndex_equalToSize_throws() { + // Given — one layout spec (size 1), layout references index 1 (first out-of-range index) + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayout(1); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("encoding index"); + } + + // ── Layout depth bound: depth > MAX_LAYOUT_DEPTH (64) ──────────────────────── + + @Test + void parseBlobs_layoutDepth_atLimit_parses() { + // Given — a single-child chain whose deepest node sits at exactly MAX_LAYOUT_DEPTH. + // convertLayout is called with depth == 64 there, and `64 > 64` is false. Kills the + // `depth >` relaxed to `depth >=` mutant, which would reject the legal max-depth tree. + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = nestedLayout(PostscriptParser.MAX_LAYOUT_DEPTH); + + // When / Then + assertThatCode(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .doesNotThrowAnyException(); + } + + @Test + void parseBlobs_layoutDepth_oneOverLimit_throws() { + // Given — one level deeper: the deepest node reaches depth 65, tripping `65 > 64` + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = nestedLayout(PostscriptParser.MAX_LAYOUT_DEPTH + 1); + + // When / Then + assertThatThrownBy(() -> PostscriptParser.parseBlobs(footer, layout, null)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("depth"); + } + + // ── Decimal precision bound: precision < 1 || precision > 38 ───────────────── + + @ParameterizedTest + @ValueSource(ints = {1, 38}) + void parseBlobs_decimalPrecision_atEdges_parses(int precision) { + // Given — precision at the inclusive edges 1 and 38 (scale 0 keeps the scale guard happy). + // Kills `precision < 1` -> `<= 1` (would reject precision 1) and `precision > 38` -> + // `>= 38` (would reject precision 38). + ByteBuffer dtype = decimalDtype(precision, (byte) 0); + + // When + DType result = parseDtype(dtype); + + // Then + assertThat(result).isInstanceOf(DType.Decimal.class); + } + + @ParameterizedTest + @ValueSource(ints = {0, 39}) + void parseBlobs_decimalPrecision_outOfRange_throws(int precision) { + // Given — precision just outside [1, 38] + ByteBuffer dtype = decimalDtype(precision, (byte) 0); + + // When / Then + assertThatThrownBy(() -> parseDtype(dtype)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("precision"); + } + + // ── Decimal scale bound: scale < 0 || scale > precision ────────────────────── + + @Test + void parseBlobs_decimalScale_atEdges_parses() { + // Given — scale 0 (lower edge) and scale == precision (upper edge) must both pass. + // Kills `scale < 0` -> `<= 0` (would reject scale 0) and `scale > precision` -> + // `>= precision` (would reject scale == precision). + + // When / Then + assertThatCode(() -> parseDtype(decimalDtype(10, (byte) 0))).doesNotThrowAnyException(); + assertThatCode(() -> parseDtype(decimalDtype(10, (byte) 10))).doesNotThrowAnyException(); + } + + @Test + void parseBlobs_decimalScale_abovePrecision_throws() { + // Given — scale one past precision + ByteBuffer dtype = decimalDtype(10, (byte) 11); + + // When / Then + assertThatThrownBy(() -> parseDtype(dtype)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("scale"); + } + + // ── helpers ────────────────────────────────────────────────────────────────── + + /// Parses a dtype blob through the full parseBlobs path, paired with a minimal valid + /// footer + flat layout so convertDType is reached. Returns the decoded [DType]. + private static DType parseDtype(ByteBuffer dtype) { + ByteBuffer footer = footerWithLayoutSpecs("vortex.flat"); + ByteBuffer layout = flatLayout(0); + return PostscriptParser.parseBlobs(footer, layout, dtype).dtype(); + } + + private static ByteBuffer footerWithLayoutSpecs(String... layoutSpecs) { + var fbb = new FlatBufferBuilder(256); + int asv = Footer.createArraySpecsVector(fbb, new int[]{ + ArraySpec.createArraySpec(fbb, fbb.createString("vortex.primitive"))}); + int[] ls = new int[layoutSpecs.length]; + for (int i = 0; i < layoutSpecs.length; i++) { + ls[i] = LayoutSpec.createLayoutSpec(fbb, fbb.createString(layoutSpecs[i])); + } + int lsv = Footer.createLayoutSpecsVector(fbb, ls); + Footer.startSegmentSpecsVector(fbb, 0); + int ssv = fbb.endVector(); + int footOff = Footer.createFooter(fbb, asv, lsv, ssv, 0, 0); + fbb.finish(footOff); + return slice(fbb); + } + + private static ByteBuffer flatLayout(int encodingIdx) { + var fbb = new FlatBufferBuilder(128); + int segV = Layout.createSegmentsVector(fbb, new long[]{0}); + int off = Layout.createLayout(fbb, encodingIdx, 1L, 0, 0, segV); + Layout.finishLayoutBuffer(fbb, off); + return slice(fbb); + } + + private static ByteBuffer nestedLayout(int depth) { + var fbb = new FlatBufferBuilder(depth * 32 + 64); + int segV = Layout.createSegmentsVector(fbb, new long[]{0}); + int current = Layout.createLayout(fbb, 0, 1L, 0, 0, segV); + // Wrap `depth` times: the innermost leaf ends up at recursion depth == `depth`. + for (int i = 0; i < depth; i++) { + int childV = Layout.createChildrenVector(fbb, new int[]{current}); + current = Layout.createLayout(fbb, 0, 1L, 0, childV, 0); + } + Layout.finishLayoutBuffer(fbb, current); + return slice(fbb); + } + + private static ByteBuffer decimalDtype(int precision, byte scale) { + var fbb = new FlatBufferBuilder(64); + int dec = Decimal.createDecimal(fbb, precision, scale, false); + int off = io.github.dfa1.vortex.fbs.DType.createDType(fbb, Type.Decimal, dec); + io.github.dfa1.vortex.fbs.DType.finishDTypeBuffer(fbb, off); + return slice(fbb); + } + + private static ByteBuffer slice(FlatBufferBuilder fbb) { + ByteBuffer data = fbb.dataBuffer(); + return data.slice(data.position(), data.remaining()); + } +} diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserSegmentSpecBoundsTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserSegmentSpecBoundsTest.java new file mode 100644 index 00000000..d1d86281 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/PostscriptParserSegmentSpecBoundsTest.java @@ -0,0 +1,87 @@ +package io.github.dfa1.vortex.reader; + +import io.github.dfa1.vortex.core.VortexException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +/// Exact-boundary coverage for [PostscriptParser#validateSegmentSpecs(List, long)], the guard +/// that rejects a footer `SegmentSpec` whose declared byte range escapes the mapped file. +/// +/// The guard predicate is `offset < 0 || length < 0 || offset > fileSize || length > fileSize - +/// offset`. Each comparison is one off-by-one away from a bug: relax any `<`/`>` to `<=`/`>=`, or +/// swap the `fileSize - offset` subtraction for addition, and a slice that overruns the mapping +/// slips through to a raw `IndexOutOfBoundsException` at scan time — breaking the +/// "malformed input → VortexException" contract. These cases pin every edge of that predicate: +/// the largest range that must still pass, and the smallest overrun that must be rejected. +class PostscriptParserSegmentSpecBoundsTest { + + private static SegmentSpec spec(long offset, long length) { + return new SegmentSpec(offset, length, (byte) 0, CompressionScheme.NONE); + } + + static Stream inBoundsRanges() { + long fileSize = 100; + return Stream.of( + // offset 0 / length 0: the lower edge — must pass, else `offset < 0` / `length < 0` + // were relaxed to `<= 0`. + Arguments.of("empty at start", spec(0, 0), fileSize), + // offset == fileSize with an empty segment: the range [100,100) touches EOF but reads + // nothing — valid. Catches `offset > fileSize` relaxed to `>=`. + Arguments.of("empty at EOF", spec(fileSize, 0), fileSize), + // length == fileSize - offset: the segment ends exactly at EOF — the largest range + // that still fits. Catches `length > fileSize - offset` relaxed to `>=`. + Arguments.of("exact fit to EOF", spec(10, 90), fileSize), + // whole file in one segment — same exact-fit edge from offset 0. + Arguments.of("whole file", spec(0, fileSize), fileSize)); + } + + static Stream outOfBoundsRanges() { + long fileSize = 100; + return Stream.of( + Arguments.of("negative offset", spec(-1, 0), fileSize), + Arguments.of("negative length", spec(0, -1), fileSize), + Arguments.of("offset past EOF", spec(fileSize + 1, 0), fileSize), + // length one byte past the exact fit: overruns EOF by 1. This also kills the + // `fileSize - offset` → `fileSize + offset` math mutant — 91 > 90 rejects, but + // 91 > 110 (the mutated bound) would wrongly accept. + Arguments.of("overrun by one byte", spec(10, 91), fileSize)); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("inBoundsRanges") + void validateSegmentSpecs_accepts_inBoundsRange(String name, SegmentSpec s, long fileSize) { + // Given — a spec whose range sits exactly on the in-bounds edge (see provider) + + // When / Then — the largest legal range must not be rejected + assertThatCode(() -> PostscriptParser.validateSegmentSpecs(List.of(s), fileSize)) + .doesNotThrowAnyException(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("outOfBoundsRanges") + void validateSegmentSpecs_rejects_outOfBoundsRange(String name, SegmentSpec s, long fileSize) { + // Given — a spec one step past a bound (see provider) + + // When / Then — the smallest overrun must be rejected as a VortexException, not leak an IOOBE + assertThatThrownBy(() -> PostscriptParser.validateSegmentSpecs(List.of(s), fileSize)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("out of bounds"); + } + + @Test + void validateSegmentSpecs_acceptsEmptyList() { + // Given — no segments at all + + // When / Then — nothing to validate, nothing thrown + assertDoesNotThrow(() -> PostscriptParser.validateSegmentSpecs(List.of(), 100)); + } +}