From 9663b161e2e32beb1efe688fa98a7da646388b34 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 07:49:37 +0200 Subject: [PATCH 1/5] feat(core): add IoBounds for untrusted-segment bounds typing (ADR 0003 Phase E) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parse-side offsets/lengths/counts from untrusted file bytes must surface as VortexException, not raw IndexOutOfBoundsException / ArithmeticException / NegativeArraySizeException. IoBounds wraps the four shapes: slice/checkRange (asSlice bounds), toIntSize (2 GB ByteBuffer/array cap, replaces Math.toIntExact), checkCount (new T[n] alloc guard). Uses the current VortexException(String) constructor — bounds messages carry only numeric offsets, no attacker strings — and migrates to the VortexError catalog when ADR 0003 Phase A lands. Extends ADR 0003 to cover the exception *type* axis alongside message sanitization; records why a static helper beats the PR #27 BoundedSegment wrapper (no new type on the zero-copy hot path). Call-site migration + the Objects.checkIndex consumer-access sweep + the checkstyle ban on raw asSlice follow in subsequent commits. Co-Authored-By: Claude Opus 4.8 --- .../io/github/dfa1/vortex/core/IoBounds.java | 77 ++++++++++ .../github/dfa1/vortex/core/IoBoundsTest.java | 135 ++++++++++++++++++ .../adr/0003-vortex-exception-sanitization.md | 132 ++++++++++++++++- 3 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 core/src/main/java/io/github/dfa1/vortex/core/IoBounds.java create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/IoBoundsTest.java diff --git a/core/src/main/java/io/github/dfa1/vortex/core/IoBounds.java b/core/src/main/java/io/github/dfa1/vortex/core/IoBounds.java new file mode 100644 index 00000000..6a7451bc --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/core/IoBounds.java @@ -0,0 +1,77 @@ +package io.github.dfa1.vortex.core; + +import java.lang.foreign.MemorySegment; + +/// Bounds-checked access to untrusted [MemorySegment] regions. +/// +/// The reader memory-maps and parses attacker-controlled binary input. Per the +/// [VortexException] contract, any malformed offset, length, or element count +/// must surface as a [VortexException] — never a raw JDK +/// [IndexOutOfBoundsException], [ArithmeticException], or +/// [NegativeArraySizeException]. Offsets and lengths drawn from parsed file +/// bytes flow through these helpers before they reach the JDK. +/// +/// This covers the *parse* side only. A caller's random-access index into a +/// decoded array (`array.getInt(5)`) is a different contract: that is consumer +/// misuse and should throw [IndexOutOfBoundsException] via +/// [java.util.Objects#checkIndex(long, long)], not a `VortexException`. +/// +/// See ADR 0003 (`docs/adr/0003-vortex-exception-sanitization.md`). +public final class IoBounds { + + private IoBounds() { + } + + /// Verifies that the range `[off, off + len)` lies within `[0, size]`. + /// + /// @param off start offset into the region + /// @param len length of the range + /// @param size total size of the region the range must fit within + /// @throws VortexException if `off` or `len` is negative, or `off + len` + /// exceeds `size` (overflow-safe: checks `len > size - off`) + public static void checkRange(long off, long len, long size) { + if (off < 0 || len < 0 || len > size - off) { + throw new VortexException( + "slice out of bounds: off=" + off + " len=" + len + " size=" + size); + } + } + + /// Bounds-checked [MemorySegment#asSlice(long, long)] — the canonical + /// replacement for a raw `asSlice` on an untrusted offset or length. + /// + /// @param seg the segment to slice + /// @param off start offset into `seg` + /// @param len length of the slice + /// @return the slice `seg[off, off + len)` + /// @throws VortexException if the range falls outside `seg` + public static MemorySegment slice(MemorySegment seg, long off, long len) { + checkRange(off, len, seg.byteSize()); + return seg.asSlice(off, len); + } + + /// Narrows a `long` size or count to `int` for use as a [java.nio.ByteBuffer] + /// index or a Java array length. Replaces [Math#toIntExact(long)] (which + /// throws [ArithmeticException]) and guards the 2 GB `ByteBuffer` / array cap. + /// + /// @param n the size or count, drawn from parsed file metadata + /// @return `n` as an `int` + /// @throws VortexException if `n` is negative or exceeds [Integer#MAX_VALUE] + public static int toIntSize(long n) { + if (n < 0 || n > Integer.MAX_VALUE) { + throw new VortexException("size exceeds 2 GB limit: " + n); + } + return (int) n; + } + + /// Validates an element count before a `new T[count]` decode allocation. + /// + /// Same guard as [#toIntSize(long)], named for the allocation call sites; a + /// per-encoding resource cap (ADR 0004) plugs in here later. + /// + /// @param n the element count, drawn from parsed file metadata + /// @return `n` as an `int` + /// @throws VortexException if `n` is negative or exceeds [Integer#MAX_VALUE] + public static int checkCount(long n) { + return toIntSize(n); + } +} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/IoBoundsTest.java b/core/src/test/java/io/github/dfa1/vortex/core/IoBoundsTest.java new file mode 100644 index 00000000..eef254f7 --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/IoBoundsTest.java @@ -0,0 +1,135 @@ +package io.github.dfa1.vortex.core; + +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class IoBoundsTest { + + @Nested + class CheckRange { + + @ParameterizedTest + // off, len against a size-16 region — every in-bounds case, including the + // exact end (off+len == size) and a zero-length slice at the boundary. + @CsvSource({"0,16", "0,0", "16,0", "4,8", "15,1"}) + void acceptsInBoundsRange(long off, long len) { + // Given a region of 16 bytes + // When / Then no exception + IoBounds.checkRange(off, len, 16); + } + + @Test + void rejectsNegativeOffset() { + // Given / When / Then + assertThatThrownBy(() -> IoBounds.checkRange(-1, 4, 16)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("out of bounds"); + } + + @Test + void rejectsNegativeLength() { + // Given / When / Then + assertThatThrownBy(() -> IoBounds.checkRange(0, -1, 16)) + .isInstanceOf(VortexException.class); + } + + @Test + void rejectsRangePastEnd() { + // Given off+len = 17 > size 16 + // When / Then + assertThatThrownBy(() -> IoBounds.checkRange(10, 7, 16)) + .isInstanceOf(VortexException.class); + } + + @Test + void rejectsOverflowingLengthWithoutWrapping() { + // Given a crafted huge length that would overflow off+len if added naively; + // the check uses len > size - off precisely to stay overflow-safe. + assertThatThrownBy(() -> IoBounds.checkRange(8, Long.MAX_VALUE, 16)) + .isInstanceOf(VortexException.class); + } + } + + @Nested + class Slice { + + @Test + void returnsRequestedSubRegion() { + try (Arena arena = Arena.ofConfined()) { + // Given a 16-byte segment + MemorySegment seg = arena.allocate(16); + + // When slicing the middle 8 bytes + MemorySegment result = IoBounds.slice(seg, 4, 8); + + // Then the slice has the requested size + assertThat(result.byteSize()).isEqualTo(8); + } + } + + @Test + void throwsVortexExceptionNotJdkOnOverflow() { + try (Arena arena = Arena.ofConfined()) { + // Given a 16-byte segment and an out-of-range request + MemorySegment seg = arena.allocate(16); + + // When / Then the contract holds — VortexException, never IndexOutOfBoundsException + assertThatThrownBy(() -> IoBounds.slice(seg, 0, 32)) + .isInstanceOf(VortexException.class); + } + } + } + + @Nested + class ToIntSize { + + @ParameterizedTest + @ValueSource(longs = {0, 1, 1024, Integer.MAX_VALUE}) + void narrowsValuesWithinIntRange(long n) { + // Given / When / Then + assertThat(IoBounds.toIntSize(n)).isEqualTo((int) n); + } + + @Test + void rejectsValueAboveIntMax() { + // Given a length one past the 2 GB ByteBuffer/array cap + assertThatThrownBy(() -> IoBounds.toIntSize(Integer.MAX_VALUE + 1L)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("2 GB"); + } + + @Test + void rejectsNegativeValue() { + // Given a length that read back negative (e.g. a u32 stored as signed) + assertThatThrownBy(() -> IoBounds.toIntSize(-1)) + .isInstanceOf(VortexException.class); + } + } + + @Nested + class CheckCount { + + @Test + void delegatesToTheSameGuard() { + // Given a valid count + // When / Then it returns the narrowed value + assertThat(IoBounds.checkCount(42)).isEqualTo(42); + } + + @Test + void rejectsOversizedCount() { + // Given a crafted huge element count for a new T[n] allocation + assertThatThrownBy(() -> IoBounds.checkCount(Long.MAX_VALUE)) + .isInstanceOf(VortexException.class); + } + } +} diff --git a/docs/adr/0003-vortex-exception-sanitization.md b/docs/adr/0003-vortex-exception-sanitization.md index 68058622..148ce5c3 100644 --- a/docs/adr/0003-vortex-exception-sanitization.md +++ b/docs/adr/0003-vortex-exception-sanitization.md @@ -1,9 +1,10 @@ -# ADR 0003: Structured sanitization of `VortexException` messages +# ADR 0003: `VortexException` contract — message sanitization and bounds typing - **Status:** Accepted — implementation pending (see Phases below) -- **Date:** 2026-06-13 +- **Date:** 2026-06-13 (bounds-typing scope added 2026-06-20) - **Deciders:** project maintainer - **Related:** [ADR 0001 — Split read and write runtimes](0001-split-read-and-write-runtimes.md), + [ADR 0004 — Resource caps and `ReadOptions`](0004-resource-caps-read-options.md), [SECURITY.md](../../SECURITY.md) ## Context @@ -40,6 +41,37 @@ typed `EncodingId` for the attribution field but accepts a free-form `String` for the message body, which callers build via `+` or `.formatted()`. There is no sanitization contract. +### Second axis — exception *type*, not just message + +Message sanitization governs *what a `VortexException` says*. A separate, +orthogonal gap governs *whether a `VortexException` is thrown at all*. The +reader's contract (SECURITY.md) is: any malformed input throws +`VortexException`, never a raw JDK exception. But ~21 `MemorySegment.asSlice` +call sites take offsets/lengths straight from untrusted layout/footer +metadata and pass them to the JDK unguarded: + +```java +// ScanIterator — fbStart/fbLen decoded from an attacker FlatBuffer +ByteBuffer fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer()... // raw IndexOutOfBoundsException on overflow +``` + +Only `PostscriptParser` guards its slices (a private `slice()` + +`checkBlobBounds()`); its own comment warns that every *other* scan-time +`asSlice` would throw `IndexOutOfBoundsException` and break the contract. The +same leak appears in three more shapes: + +- `Math.toIntExact(storage.length())` (4 extension decoders) → raw + `ArithmeticException` on a > 2 GB declared length. +- `new byte[(int)(end - start)]` / `new long[(int) rowCount]` (VarBin, AlpRd, + Delta) → `NegativeArraySizeException` / `OutOfMemoryError` on crafted + non-monotonic offsets or huge counts. +- `ByteBuffer` is `int`-indexed (2 GB cap); a slice fed to `asByteBuffer()` + past that throws raw too. + +These are the same `VortexException`-contract violation as a leaked ANSI +escape — just on the type axis instead of the content axis — so they belong +in the same ADR. + ## Decision **Pick Option A (enum error catalog) as the structural shape.** Add a @@ -203,6 +235,73 @@ throw new VortexException(VortexError.UNKNOWN_LAYOUT_ENCODING, layout.encodingId The message format `[UNKNOWN_LAYOUT_ENCODING] vortex.flat\x0a` is machine-parseable, log-friendly, and injection-safe. +### Bounds typing: the `IoBounds` helper + +A public static utility in `io.github.dfa1.vortex.core` (must be reachable by +core itself — `ProtoReader` has a site — plus reader, `reader.array`, and +`reader.decode`; `reader → core`, so core is the only home that covers all +layers). It wraps the untrusted-offset operations and throws `VortexException` +(via the `VortexError` catalog above) instead of the raw JDK exception: + +```java +public final class IoBounds { + private IoBounds() {} + + /// off/len must lie within [0, size]. Throws VortexException otherwise. + public static void checkRange(long off, long len, long size) { + if (off < 0 || len < 0 || len > size - off) { + throw new VortexException(VortexError.SEGMENT_INDEX_OUT_OF_RANGE, off, len, size); + } + } + + /// Bounds-checked asSlice — the canonical replacement for raw seg.asSlice. + public static MemorySegment slice(MemorySegment seg, long off, long len) { + checkRange(off, len, seg.byteSize()); + return seg.asSlice(off, len); + } + + /// long → int for sizes/counts that index a ByteBuffer or back a Java array. + /// Replaces Math.toIntExact (ArithmeticException) and guards the 2 GB cap. + public static int toIntSize(long n) { + if (n < 0 || n > Integer.MAX_VALUE) { + throw new VortexException(VortexError.SEGMENT_INDEX_OUT_OF_RANGE, n); + } + return (int) n; + } + + /// Element count for a `new T[n]` decode buffer; same guard as toIntSize, + /// named for the alloc-count call sites (the per-encoding cap from ADR 0004 + /// plugs in here later). + public static int checkCount(long n) { + return toIntSize(n); + } +} +``` + +Why a static helper, not a `BoundedSegment` wrapper (the approach explored in +[PR #27](https://github.com/dfa1/vortex-java/pull/27)): + +- The hot path is `MemorySegment` zero-copy slices; a wrapper type would have + to be unwrapped at every typed accessor or it taxes per-element reads. A + static call slices once, off the per-element path, and returns a plain + `MemorySegment` — no new type crosses module boundaries. +- It mirrors the `Sanitize` decision: one small pure primitive in `core`, not + a new abstraction. `Sanitize` cleans the message; `IoBounds` types the + throw. Symmetric. + +#### The consumer-access carve-out + +The ~14 per-element guards in `Lazy*` / `Materialized*` / `Generic` accessors +(`getInt(i)` etc.) throw `IndexOutOfBoundsException` and **stay that way** — +they are *consumer* random-access (`array.getInt(5)`), where IOOBE is the +correct JDK-idiomatic signal (cf. `List.get`), not a malformed-file event. +These must **not** be routed through `IoBounds`. They are instead collapsed +onto the JDK built-in `Objects.checkIndex(i, length)` (Java 16+) — stdlib, no +custom helper. The dividing line: + +- offset/length/count from **parsed file bytes** → `IoBounds` → `VortexException` +- index from a **caller's accessor argument** → `Objects.checkIndex` → `IndexOutOfBoundsException` + ## Migration phases ### Phase A — Foundation (~1.5 h) @@ -244,6 +343,35 @@ approximate-fit existing ones. prevent regression. Also flag `+` inside the VortexException args to catch interpolation-before-sanitization. +### Phase E — Bounds typing via `IoBounds` (0.8.0) + +Independent of A–D. The `VortexError` catalog (Phase A) is not built yet, so +`IoBounds` ships using the current `VortexException(String)` constructor with a +fixed, non-interpolated message (no attacker strings in the bounds messages — +only numeric offsets/lengths, which need no sanitization). When Phase A lands, +`IoBounds` migrates to `VortexError.SEGMENT_INDEX_OUT_OF_RANGE` mechanically +with every other site. Lands in 0.8.0 before the release, since variant decode +widens the parse surface. + +1. Add `IoBounds` (`slice` / `checkRange` / `toIntSize` / `checkCount`) in + `core` with unit tests: negative offset, length overflow, off+len past end, + > 2 GB size, exact-boundary pass. +2. Route the ~21 raw `asSlice` sites through `IoBounds.slice`; fold + `PostscriptParser`'s private `slice()`/`checkBlobBounds` and `ProtoReader`'s + hand-rolled guard into it. +3. Replace `Math.toIntExact(...length())` (4 extension decoders) with + `IoBounds.toIntSize`; guard the `new T[(int) n]` alloc sites with + `IoBounds.checkCount`. +4. Collapse the ~14 consumer-access `getX(i)` guards onto + `Objects.checkIndex(i, length)` (separate commit — different error class, + no `IoBounds`). +5. Checkstyle `RegexpSingleline` rejecting raw `.asSlice(` in + `reader` / `reader.array` / `reader.decode` / `core.proto` packages + (mirrors the existing `

`-blocking rule), so new raw slices can't regress. +6. `BoundsTypingSecurityTest`: crafted file with out-of-range slice offset, + oversize declared length, and non-monotonic VarBin offsets each produce a + `VortexException`, never a raw JDK exception. + ## Alternative considered **Option B — Sealed `VortexException` hierarchy:** Make `VortexException` From 0415c1aea4cd489459f6d816fa47c35c9f447ea9 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 07:58:02 +0200 Subject: [PATCH 2/5] feat(reader): route untrusted slices/sizes through IoBounds (ADR 0003 Phase E) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the file-structure and decode-side bounds operations that take offsets/lengths from parsed file bytes so a malformed file throws VortexException, not a raw JDK exception: - asSlice → IoBounds.slice: VortexReader (trailer/postscript/stats/segment), VortexHttpReader, Trailer magic, ScanIterator stats, FlatSegmentDecoder buffer descriptors, PostscriptParser blob slice. - FlatSegmentDecoder and ScanIterator stats now guard the trailing u32 fbLen read (checkRange) and segLen narrowing (toIntSize) — both were previously unguarded; a crafted fbLen leaked IndexOutOfBoundsException. - Math.toIntExact(storage.length()) → IoBounds.toIntSize in the Date/Time/ Timestamp/Uuid extension decoders (ArithmeticException → VortexException on a > 2 GB declared length). reader.array .limited() re-slices are left raw: offset 0, rows < length, bounded by construction — not untrusted input (ADR 0003 Phase E item 5). Existing MalformedTrailer/Footer/ZipBomb security tests stay green (no behaviour change on already-validated paths). Follow-ups: checkstyle ban on raw asSlice, checkCount guards on new T[(int)n] alloc sites, and the Objects.checkIndex sweep of consumer-access getters. Co-Authored-By: Claude Opus 4.8 --- docs/adr/0003-vortex-exception-sanitization.md | 9 ++++++--- .../dfa1/vortex/reader/FlatSegmentDecoder.java | 12 ++++++++---- .../github/dfa1/vortex/reader/PostscriptParser.java | 3 ++- .../io/github/dfa1/vortex/reader/ScanIterator.java | 4 +++- .../java/io/github/dfa1/vortex/reader/Trailer.java | 3 ++- .../github/dfa1/vortex/reader/VortexHttpReader.java | 5 +++-- .../io/github/dfa1/vortex/reader/VortexReader.java | 13 +++++++------ .../reader/extension/DateExtensionDecoder.java | 3 ++- .../reader/extension/TimeExtensionDecoder.java | 3 ++- .../reader/extension/TimestampExtensionDecoder.java | 3 ++- .../reader/extension/UuidExtensionDecoder.java | 3 ++- 11 files changed, 39 insertions(+), 22 deletions(-) diff --git a/docs/adr/0003-vortex-exception-sanitization.md b/docs/adr/0003-vortex-exception-sanitization.md index 148ce5c3..543f9d8e 100644 --- a/docs/adr/0003-vortex-exception-sanitization.md +++ b/docs/adr/0003-vortex-exception-sanitization.md @@ -365,9 +365,12 @@ widens the parse surface. 4. Collapse the ~14 consumer-access `getX(i)` guards onto `Objects.checkIndex(i, length)` (separate commit — different error class, no `IoBounds`). -5. Checkstyle `RegexpSingleline` rejecting raw `.asSlice(` in - `reader` / `reader.array` / `reader.decode` / `core.proto` packages - (mirrors the existing `

`-blocking rule), so new raw slices can't regress. +5. Checkstyle `RegexpSingleline` rejecting raw `.asSlice(` in the + file-structure (`reader` root) and `reader.decode` packages — the layers + that slice on offsets parsed from file bytes. `reader.array` is excluded: + its only `asSlice` calls are inside `limited(rows)`, re-slicing an already + validated segment at offset 0 with `rows < length` (bounded by construction, + no untrusted offset), so wrapping them adds noise without closing a gap. 6. `BoundsTypingSecurityTest`: crafted file with out-of-range slice offset, oversize declared length, and non-monotonic VarBin offsets each produce a `VortexException`, never a raw JDK exception. diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java index 3359b6f9..6106bfe8 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.reader.array.Array; import io.github.dfa1.vortex.encoding.EncodingId; import io.github.dfa1.vortex.fbs.Buffer; @@ -44,12 +45,15 @@ public FlatSegmentDecoder(ReadRegistry registry) { /// @return the decoded [Array] for this segment public Array decode(MemorySegment seg, List encodingSpecs, DType dtype, long rowCount, SegmentAllocator arena) { - int segLen = (int) seg.byteSize(); + int segLen = IoBounds.toIntSize(seg.byteSize()); ByteBuffer bb = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + // The trailing u32 length field must itself be in range before we read it. + IoBounds.checkRange(segLen - 4L, 4, segLen); int fbLen = bb.getInt(segLen - 4); - int fbStart = segLen - 4 - fbLen; - ByteBuffer fbBuf = bb.slice(fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); + long fbStart = segLen - 4L - fbLen; + IoBounds.checkRange(fbStart, fbLen, segLen); + ByteBuffer fbBuf = bb.slice((int) fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); int numBuffers = fbArray.buffersLength(); @@ -58,7 +62,7 @@ public Array decode(MemorySegment seg, List encodingSpecs, for (int i = 0; i < numBuffers; i++) { Buffer bufDesc = fbArray.buffers(i); dataOffset += bufDesc.padding(); - bufs[i] = seg.asSlice(dataOffset, bufDesc.length()); + bufs[i] = IoBounds.slice(seg, dataOffset, bufDesc.length()); dataOffset += bufDesc.length(); } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java index 79d5f31d..d9e38b63 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/PostscriptParser.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.fbs.Binary; @@ -115,7 +116,7 @@ static ParsedFile parseBlobs(ByteBuffer footerBuf, ByteBuffer layoutBuf, ByteBuf } private static ByteBuffer slice(MemorySegment seg, long offset, long length) { - return seg.asSlice(offset, length).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + return IoBounds.slice(seg, offset, length).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); } static Footer convertFooter(io.github.dfa1.vortex.fbs.Footer f) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java index 5d790a07..c5480635 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; @@ -788,9 +789,10 @@ private ArrayStats readFlatStats(Layout flat) { // Stats FlatBuffer lives in the segment's last 4+fbLen bytes; reading the whole // segment as a ByteBuffer would fail for segments larger than 2 GB (ByteBuffer cap). + IoBounds.checkRange(segLen - 4L, 4, segLen); int fbLen = seg.get(LE_INT, segLen - 4); long fbStart = segLen - 4L - fbLen; - ByteBuffer fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer fbBuf = IoBounds.slice(seg, fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); io.github.dfa1.vortex.fbs.ArrayNode root = fbArray.root(); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/Trailer.java b/reader/src/main/java/io/github/dfa1/vortex/reader/Trailer.java index 5f961f5b..562c74d1 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/Trailer.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/Trailer.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.reader; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; @@ -32,7 +33,7 @@ static Trailer parse(MemorySegment trailerSeg, long bodyBytes) { int version = Short.toUnsignedInt(trailerSeg.get(LE_SHORT, 0)); int postscriptLen = Short.toUnsignedInt(trailerSeg.get(LE_SHORT, 2)); - MemorySegment magicSlice = trailerSeg.asSlice(4, VortexFormat.MAGIC_SIZE); + MemorySegment magicSlice = IoBounds.slice(trailerSeg, 4, VortexFormat.MAGIC_SIZE); if (magicSlice.mismatch(VortexFormat.MAGIC) != -1) { throw new VortexException( "invalid magic bytes [%02x %02x %02x %02x]".formatted( diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/VortexHttpReader.java b/reader/src/main/java/io/github/dfa1/vortex/reader/VortexHttpReader.java index 941affd7..492d0e08 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/VortexHttpReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/VortexHttpReader.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; import io.github.dfa1.vortex.fbs.Postscript; @@ -90,7 +91,7 @@ public static VortexHttpReader open(URI uri, ReadRegistry registry, HttpClient c MemorySegment tailSeg = MemorySegment.ofArray(tail); long trailerOff = tailLen - VortexFormat.TRAILER_SIZE; long bodyBytes = fileSize - VortexFormat.TRAILER_SIZE; - Trailer trailer = Trailer.parse(tailSeg.asSlice(trailerOff, VortexFormat.TRAILER_SIZE), bodyBytes); + Trailer trailer = Trailer.parse(IoBounds.slice(tailSeg, trailerOff, VortexFormat.TRAILER_SIZE), bodyBytes); // HTTP-specific: postscript may extend past the prefetched tail and need a larger fetch. long psOffInTail = trailerOff - trailer.postscriptLen(); @@ -100,7 +101,7 @@ public static VortexHttpReader open(URI uri, ReadRegistry registry, HttpClient c .formatted(trailer.postscriptLen(), TAIL_SIZE)); } - ByteBuffer postscriptBuf = tailSeg.asSlice(psOffInTail, trailer.postscriptLen()) + ByteBuffer postscriptBuf = IoBounds.slice(tailSeg, psOffInTail, trailer.postscriptLen()) .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var ps = Postscript.getRootAsPostscript(postscriptBuf); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/VortexReader.java b/reader/src/main/java/io/github/dfa1/vortex/reader/VortexReader.java index 46d9f874..20cec67f 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/VortexReader.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/VortexReader.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.core.VortexFormat; @@ -78,11 +79,11 @@ private static VortexReader parse( MemorySegment seg, long size, Arena arena, ReadRegistry registry ) { long bodyBytes = size - VortexFormat.TRAILER_SIZE; - var trailerSeg = seg.asSlice(bodyBytes, VortexFormat.TRAILER_SIZE); + var trailerSeg = IoBounds.slice(seg, bodyBytes, VortexFormat.TRAILER_SIZE); Trailer trailer = Trailer.parse(trailerSeg, bodyBytes); long postscriptOffset = bodyBytes - trailer.postscriptLen(); - var postscriptBuf = seg.asSlice(postscriptOffset, trailer.postscriptLen()) + var postscriptBuf = IoBounds.slice(seg, postscriptOffset, trailer.postscriptLen()) .asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); PostscriptParser.ParsedFile parsed; @@ -211,7 +212,7 @@ private ArrayStats readFlatStats(Layout flat) { if (segLen < 4) { return ArrayStats.empty(); } - MemorySegment seg = fileSegment.asSlice(spec.offset(), segLen); + MemorySegment seg = IoBounds.slice(fileSegment, spec.offset(), segLen); int fbLen = seg.get(LE_INT, segLen - 4); // Reject negative fbLen (signed int from untrusted bytes) or any value that would push // fbStart below 0 → asSlice(negative, ...) throws IndexOutOfBoundsException without this guard. @@ -219,7 +220,7 @@ private ArrayStats readFlatStats(Layout flat) { return ArrayStats.empty(); } long fbStart = segLen - 4L - fbLen; - var fbBuf = seg.asSlice(fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); + var fbBuf = IoBounds.slice(seg, fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); var root = fbArray.root(); if (root == null) { @@ -234,7 +235,7 @@ public io.github.dfa1.vortex.reader.array.Array decodeFlatSegment( DType dtype, long rowCount, java.lang.foreign.SegmentAllocator arena ) { - MemorySegment seg = fileSegment.asSlice(spec.offset(), spec.length()).asReadOnly(); + MemorySegment seg = IoBounds.slice(fileSegment, spec.offset(), spec.length()).asReadOnly(); return new FlatSegmentDecoder(registry) .decode(seg, footer.arraySpecs(), dtype, rowCount, arena); } @@ -242,7 +243,7 @@ public io.github.dfa1.vortex.reader.array.Array decodeFlatSegment( /// Zero-copy read-only slice of the memory-mapped file covering the given spec. @Override public MemorySegment rawSegment(SegmentSpec spec) { - return fileSegment.asSlice(spec.offset(), spec.length()).asReadOnly(); + return IoBounds.slice(fileSegment, spec.offset(), spec.length()).asReadOnly(); } @Override diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java index da2adf37..63330516 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader.extension; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.encoding.TimeUnit; import io.github.dfa1.vortex.reader.array.Array; @@ -60,7 +61,7 @@ public LocalDate decode(Array storage, long i) { /// @param storage signed-integer storage array (optionally wrapped in `MaskedArray`) /// @return list of decoded dates in row order; `null` entries mark invalid rows public List decodeAll(Array storage) { - int n = Math.toIntExact(storage.length()); + int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { for (long i = 0; i < n; i++) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java index e24b38a0..6306e3e8 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader.extension; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; import io.github.dfa1.vortex.reader.array.MaskedArray; @@ -72,7 +73,7 @@ public LocalTime decode(DType.Extension ext, Array storage, long i) { /// @param storage signed-integer storage array (optionally wrapped in `MaskedArray`) /// @return list of decoded times in row order; `null` entries mark invalid rows public List decodeAll(DType.Extension ext, Array storage) { - int n = Math.toIntExact(storage.length()); + int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { for (long i = 0; i < n; i++) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java index 6ce78ab1..70807995 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader.extension; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; import io.github.dfa1.vortex.reader.array.MaskedArray; @@ -95,7 +96,7 @@ public Optional timezone(DType.Extension ext) { /// @param storage signed-integer storage array (optionally wrapped in `MaskedArray`) /// @return list of decoded instants in row order; `null` entries mark invalid rows public List decodeAll(DType.Extension ext, Array storage) { - int n = Math.toIntExact(storage.length()); + int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { for (long i = 0; i < n; i++) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java index cc751cc9..1f79dbaf 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java @@ -1,6 +1,7 @@ package io.github.dfa1.vortex.reader.extension; import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.IoBounds; import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.reader.array.Array; @@ -87,7 +88,7 @@ public UUID decode(Array storage, long i) { /// @param storage UUID storage array (optionally wrapped in `MaskedArray`) /// @return list of decoded UUIDs in row order; `null` entries mark invalid rows public List decodeAll(Array storage) { - int n = Math.toIntExact(storage.length()); + int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { for (long i = 0; i < n; i++) { From 076133baaf9e9cb43ec03da33e1daa7728ec96de Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 08:10:44 +0200 Subject: [PATCH 3/5] refactor(reader): replace hand-rolled index guards with Objects.checkIndex (ADR 0003 Phase E) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ~14 consumer-access getters in the Lazy*/Generic array families and ExtensionStorage hand-rolled the same bounds check: if (i < 0 || i >= length) { throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); } Collapse each onto the JDK built-in Objects.checkIndex(i, length). This is the consumer-access carve-out from ADR 0003 Phase E: a caller's bad accessor index is consumer misuse and correctly stays IndexOutOfBoundsException (cf. List.get), distinct from the untrusted-parse offsets that route through IoBounds and throw VortexException. Objects.checkIndex is an @IntrinsicCandidate, so the JIT inlines it to the same check — no regression on these scalar accessors. 12 files, 14 guards. reader 679 green; checkstyle + javadoc clean. Co-Authored-By: Claude Opus 4.8 --- .../io/github/dfa1/vortex/reader/array/GenericArray.java | 5 ++--- .../dfa1/vortex/reader/array/LazyConstantBoolArray.java | 6 +++--- .../dfa1/vortex/reader/array/LazyConstantByteArray.java | 9 +++------ .../vortex/reader/array/LazyConstantDecimalArray.java | 5 ++--- .../vortex/reader/array/LazyConstantDoubleArray.java | 5 ++--- .../dfa1/vortex/reader/array/LazyConstantFloatArray.java | 5 ++--- .../dfa1/vortex/reader/array/LazyConstantIntArray.java | 5 ++--- .../dfa1/vortex/reader/array/LazyConstantLongArray.java | 5 ++--- .../dfa1/vortex/reader/array/LazyConstantShortArray.java | 9 +++------ .../dfa1/vortex/reader/array/LazyDecimalArray.java | 5 ++--- .../vortex/reader/array/LazyDecimalBytePartsArray.java | 5 ++--- .../dfa1/vortex/reader/extension/ExtensionStorage.java | 5 ++--- 12 files changed, 27 insertions(+), 42 deletions(-) diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java index 94479f84..af787270 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java @@ -9,6 +9,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteOrder; +import java.util.Objects; import java.util.Optional; /// Fallback [Array] for dtypes that lack a dedicated concrete subtype. @@ -109,9 +110,7 @@ public Optional segmentIfPresent() { /// shape isn't the single-buffer layout /// @throws IndexOutOfBoundsException if `i` is outside `[0, length())` public BigDecimal getDecimal(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); if (!(dtype instanceof DType.Decimal d)) { throw new VortexException("getDecimal called on non-decimal dtype: " + dtype); } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantBoolArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantBoolArray.java index 3b3b68db..e5ce8c1d 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantBoolArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantBoolArray.java @@ -2,6 +2,8 @@ import io.github.dfa1.vortex.core.DType; +import java.util.Objects; + /// Metadata-only [BoolArray] for `vortex.constant` columns. /// /// Holds a single boolean value broadcast across `length` logical rows. No @@ -16,9 +18,7 @@ public record LazyConstantBoolArray(DType dtype, long length, boolean value) imp @Override public boolean getBoolean(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantByteArray.java index 0d3aca56..48900789 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantByteArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantByteArray.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; +import java.util.Objects; import java.util.function.LongBinaryOperator; /// Metadata-only [ByteArray] for `vortex.constant` columns. @@ -19,17 +20,13 @@ public record LazyConstantByteArray(DType dtype, long length, byte value) implem @Override public byte getByte(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } @Override public int getInt(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); boolean unsigned = dtype instanceof DType.Primitive p && p.ptype() == PType.U8; return unsigned ? Byte.toUnsignedInt(value) : (int) value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java index b48eb551..e530acad 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDecimalArray.java @@ -9,6 +9,7 @@ import java.lang.foreign.ValueLayout; import java.math.BigDecimal; import java.math.BigInteger; +import java.util.Objects; /// Metadata-only decimal array for `vortex.constant` columns. /// @@ -27,9 +28,7 @@ public record LazyConstantDecimalArray(DType dtype, long length, BigDecimal valu /// @param i row index, `0 <= i < length` /// @return the constant [java.math.BigDecimal] value public BigDecimal getDecimal(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDoubleArray.java index 555c7d77..97b5f4ac 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantDoubleArray.java @@ -2,6 +2,7 @@ import io.github.dfa1.vortex.core.DType; +import java.util.Objects; import java.util.function.DoubleBinaryOperator; import java.util.function.DoubleConsumer; @@ -17,9 +18,7 @@ public record LazyConstantDoubleArray(DType dtype, long length, double value) im @Override public double getDouble(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantFloatArray.java index 57e89518..fd2c5503 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantFloatArray.java @@ -2,6 +2,7 @@ import io.github.dfa1.vortex.core.DType; +import java.util.Objects; import java.util.function.DoubleBinaryOperator; /// Metadata-only [FloatArray] for `vortex.constant` columns. @@ -16,9 +17,7 @@ public record LazyConstantFloatArray(DType dtype, long length, float value) impl @Override public float getFloat(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantIntArray.java index 641b8c81..369bc9c5 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantIntArray.java @@ -2,6 +2,7 @@ import io.github.dfa1.vortex.core.DType; +import java.util.Objects; import java.util.function.IntBinaryOperator; import java.util.function.IntConsumer; @@ -18,9 +19,7 @@ public record LazyConstantIntArray(DType dtype, long length, int value) implemen @Override public int getInt(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantLongArray.java index 26cb9ce4..6cf6ef07 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantLongArray.java @@ -2,6 +2,7 @@ import io.github.dfa1.vortex.core.DType; +import java.util.Objects; import java.util.function.LongBinaryOperator; import java.util.function.LongConsumer; @@ -22,9 +23,7 @@ public record LazyConstantLongArray(DType dtype, long length, long value) implem @Override public long getLong(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantShortArray.java index a57a4538..0c6e08f4 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantShortArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyConstantShortArray.java @@ -3,6 +3,7 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; +import java.util.Objects; import java.util.function.LongBinaryOperator; /// Metadata-only [ShortArray] for `vortex.constant` columns. @@ -19,17 +20,13 @@ public record LazyConstantShortArray(DType dtype, long length, short value) impl @Override public short getShort(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); return value; } @Override public int getInt(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); boolean unsigned = dtype instanceof DType.Primitive p && p.ptype() == PType.U16; return unsigned ? Short.toUnsignedInt(value) : (int) value; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java index f51725ba..e7ad1b65 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java @@ -9,6 +9,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteOrder; +import java.util.Objects; import java.util.Optional; /// Lazy `vortex.decimal` array. @@ -40,9 +41,7 @@ public record LazyDecimalArray(DType dtype, long length, MemorySegment buf, int /// @throws VortexException if the dtype isn't a [DType.Decimal] /// @throws IndexOutOfBoundsException if `i` is outside `[0, length())` public BigDecimal getDecimal(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); if (!(dtype instanceof DType.Decimal d)) { throw new VortexException("LazyDecimalArray: non-decimal dtype " + dtype); } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java index d1c92786..0c479249 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalBytePartsArray.java @@ -6,6 +6,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; import java.math.BigDecimal; +import java.util.Objects; /// Lazy `vortex.decimal_byte_parts` reassembly. /// @@ -30,9 +31,7 @@ public record LazyDecimalBytePartsArray(DType dtype, long length, Array msp) imp /// mantissa cell is null /// @throws IndexOutOfBoundsException if `i` is outside `[0, length())` public BigDecimal getDecimal(long i) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); if (!(dtype instanceof DType.Decimal d)) { throw new VortexException("LazyDecimalBytePartsArray: non-decimal dtype " + dtype); } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/ExtensionStorage.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/ExtensionStorage.java index 5b09b309..4bb48bae 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/ExtensionStorage.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/ExtensionStorage.java @@ -12,6 +12,7 @@ import java.nio.ByteBuffer; import java.time.Instant; +import java.util.Objects; /// Low-level storage helpers for extension decode paths. public final class ExtensionStorage { @@ -87,8 +88,6 @@ public static Instant instantFromRaw(long raw, TimeUnit unit) { /// @param i row index to check /// @param length array length public static void checkBounds(long i, long length) { - if (i < 0 || i >= length) { - throw new IndexOutOfBoundsException("index " + i + " out of bounds for length " + length); - } + Objects.checkIndex(i, length); } } From 96ee6e9496280e6975f43800b562c9cc7f7f4149 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 08:18:59 +0200 Subject: [PATCH 4/5] test(reader): crafted flat-segment bounds security test (ADR 0003 Phase E) End-to-end coverage of the FlatSegmentDecoder hardening: drives the public decode() with crafted segments and asserts VortexException, never a raw JDK exception. Four cases: - segment smaller than the trailing 4-byte length field - declared fbLen larger than the segment (fbStart goes negative) - negative fbLen (0xFFFFFFFF read as signed -1) - a well-formed Array FlatBuffer whose single buffer descriptor claims a 1 000 000-byte payload past the segment end (exercises IoBounds.slice in the buffer-collection loop) Builds the Array FlatBuffer with FlatBufferBuilder so the buffer-descriptor case reaches the real decode path. reader 683 green. Co-Authored-By: Claude Opus 4.8 --- .../reader/FlatSegmentBoundsSecurityTest.java | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentBoundsSecurityTest.java diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentBoundsSecurityTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentBoundsSecurityTest.java new file mode 100644 index 00000000..9a3ac9eb --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/FlatSegmentBoundsSecurityTest.java @@ -0,0 +1,106 @@ +package io.github.dfa1.vortex.reader; + +import com.google.flatbuffers.FlatBufferBuilder; +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; +import io.github.dfa1.vortex.fbs.Array; +import io.github.dfa1.vortex.fbs.ArrayNode; +import io.github.dfa1.vortex.fbs.Buffer; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/// Adversarial tests for the flat-segment decode path's offset/length arithmetic. +/// +/// A flat segment is `buffer_data... | FlatBuffer(Array) | u32 LE = FlatBuffer byte length`. +/// Both the trailing length field and each buffer descriptor's offset/length come straight +/// from untrusted file bytes. After ADR 0003 Phase E, every malformed value must surface as +/// a [VortexException], never a raw `IndexOutOfBoundsException` from `MemorySegment.asSlice`. +class FlatSegmentBoundsSecurityTest { + + private static final ValueLayout.OfInt LE_INT = + ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN); + + private static final DType DTYPE = new DType.Primitive(PType.I32, false); + + private final FlatSegmentDecoder sut = new FlatSegmentDecoder(ReadRegistry.empty()); + + @Test + void segmentSmallerThanLengthField_throwsVortexException() { + try (Arena arena = Arena.ofConfined()) { + // Given a 2-byte segment — too small to even hold the trailing 4-byte u32 length + MemorySegment seg = arena.allocate(2); + + // When / Then the length-field read is bounds-checked, not left to crash get() + assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena)) + .isInstanceOf(VortexException.class); + } + } + + @Test + void declaredFbLenLargerThanSegment_throwsVortexException() { + try (Arena arena = Arena.ofConfined()) { + // Given a 16-byte segment whose trailing u32 claims a 1 000 000-byte FlatBuffer; + // fbStart = segLen - 4 - fbLen would go deeply negative. + MemorySegment seg = arena.allocate(16); + seg.set(LE_INT, 12, 1_000_000); + + // When / Then + assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena)) + .isInstanceOf(VortexException.class); + } + } + + @Test + void negativeFbLen_throwsVortexException() { + try (Arena arena = Arena.ofConfined()) { + // Given a trailing u32 of 0xFFFFFFFF — reads back as a signed int of -1 + MemorySegment seg = arena.allocate(16); + seg.set(LE_INT, 12, -1); + + // When / Then the negative length is rejected (checkRange len < 0) + assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena)) + .isInstanceOf(VortexException.class); + } + } + + @Test + void bufferDescriptorLengthPastSegment_throwsVortexException() { + try (Arena arena = Arena.ofConfined()) { + // Given a well-formed Array FlatBuffer whose single buffer descriptor claims a + // 1 000 000-byte payload that cannot fit the actual segment. + byte[] fb = arrayFlatBufferWithOneBuffer(1_000_000L); + MemorySegment seg = arena.allocate(fb.length + 4L); + MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, 0, fb.length); + seg.set(LE_INT, fb.length, fb.length); + + // When / Then the buffer slice is bounds-checked before asSlice + assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena)) + .isInstanceOf(VortexException.class); + } + } + + /// Builds a minimal valid `Array` FlatBuffer with one buffer descriptor of the given length. + private static byte[] arrayFlatBufferWithOneBuffer(long bufferLength) { + FlatBufferBuilder b = new FlatBufferBuilder(); + + int rootChildren = ArrayNode.createChildrenVector(b, new int[0]); + int rootBuffers = ArrayNode.createBuffersVector(b, new int[]{0}); + int root = ArrayNode.createArrayNode(b, 0, 0, rootChildren, rootBuffers, 0); + + Array.startBuffersVector(b, 1); + Buffer.createBuffer(b, 0, 0, 0, bufferLength); + int buffers = b.endVector(); + + int array = Array.createArray(b, root, buffers); + Array.finishArrayBuffer(b, array); + return b.sizedByteArray(); + } +} From 17aa2b20f984108976bce0241189a1b5eac2b3a8 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 08:31:53 +0200 Subject: [PATCH 5/5] refactor(reader): address review of IoBounds bounds typing (ADR 0003 Phase E) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups from PR review: - ScanIterator.readFlatStats: degrade to ArrayStats.empty() on a malformed stats segment instead of throwing VortexException, matching VortexReader.readFlatStats. Stats are an optional zone-map pruning optimization — a corrupt stats segment must not abort the scan. Also adds the missing segIdx bounds guard the other reader already had. - FlatSegmentDecoder: route the buffer-count allocation through IoBounds.checkCount before new MemorySegment[numBuffers]. - Date/Time/Timestamp/Uuid extension decoders: the decodeAll loops counted with a long index over an int bound (n); narrow to int i. reader 705 green; checkstyle + javadoc clean. Co-Authored-By: Claude Opus 4.8 --- .../dfa1/vortex/reader/FlatSegmentDecoder.java | 2 +- .../dfa1/vortex/reader/ScanIterator.java | 18 ++++++++++++++---- .../reader/extension/DateExtensionDecoder.java | 4 ++-- .../reader/extension/TimeExtensionDecoder.java | 4 ++-- .../extension/TimestampExtensionDecoder.java | 4 ++-- .../reader/extension/UuidExtensionDecoder.java | 4 ++-- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java index 6106bfe8..ee28272b 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/FlatSegmentDecoder.java @@ -56,7 +56,7 @@ public Array decode(MemorySegment seg, List encodingSpecs, ByteBuffer fbBuf = bb.slice((int) fbStart, fbLen).order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); - int numBuffers = fbArray.buffersLength(); + int numBuffers = IoBounds.checkCount(fbArray.buffersLength()); MemorySegment[] bufs = new MemorySegment[numBuffers]; long dataOffset = 0; for (int i = 0; i < numBuffers; i++) { diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java index c5480635..2fc5ac5d 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java @@ -783,14 +783,24 @@ private ArrayStats readFlatStats(Layout flat) { return ArrayStats.empty(); } int segIdx = flat.segments().getFirst(); + if (segIdx < 0 || segIdx >= file.footer().segmentSpecs().size()) { + return ArrayStats.empty(); + } SegmentSpec spec = file.footer().segmentSpecs().get(segIdx); long segLen = spec.length(); - MemorySegment seg = file.rawSegment(spec); - - // Stats FlatBuffer lives in the segment's last 4+fbLen bytes; reading the whole + // Stats are an optional zone-map pruning optimization: a malformed stats segment + // degrades to "no stats" (empty) and never aborts the scan. This mirrors + // VortexReader.readFlatStats — both stats readers swallow bounds errors here. + // The trailing 4-byte fbLen lives in the segment's last bytes; reading the whole // segment as a ByteBuffer would fail for segments larger than 2 GB (ByteBuffer cap). - IoBounds.checkRange(segLen - 4L, 4, segLen); + if (segLen < 4) { + return ArrayStats.empty(); + } + MemorySegment seg = file.rawSegment(spec); int fbLen = seg.get(LE_INT, segLen - 4); + if (fbLen < 0 || fbLen > segLen - 4) { + return ArrayStats.empty(); + } long fbStart = segLen - 4L - fbLen; ByteBuffer fbBuf = IoBounds.slice(seg, fbStart, fbLen).asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); var fbArray = io.github.dfa1.vortex.fbs.Array.getRootAsArray(fbBuf); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java index 63330516..2ce1f2ad 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/DateExtensionDecoder.java @@ -64,12 +64,12 @@ public List decodeAll(Array storage) { int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(masked.isValid(i) ? decode(masked.inner(), i) : null); } return out; } - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(decode(storage, i)); } return out; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java index 6306e3e8..b42209ce 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimeExtensionDecoder.java @@ -76,12 +76,12 @@ public List decodeAll(DType.Extension ext, Array storage) { int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(masked.isValid(i) ? decode(ext, masked.inner(), i) : null); } return out; } - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(decode(ext, storage, i)); } return out; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java index 70807995..d9153ffb 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/TimestampExtensionDecoder.java @@ -99,12 +99,12 @@ public List decodeAll(DType.Extension ext, Array storage) { int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(masked.isValid(i) ? instant(ext, masked.inner(), i) : null); } return out; } - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(instant(ext, storage, i)); } return out; diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java index 1f79dbaf..37e6ccbc 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/extension/UuidExtensionDecoder.java @@ -91,12 +91,12 @@ public List decodeAll(Array storage) { int n = IoBounds.toIntSize(storage.length()); List out = new ArrayList<>(n); if (storage instanceof MaskedArray masked) { - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(masked.isValid(i) ? decode(masked.inner(), i) : null); } return out; } - for (long i = 0; i < n; i++) { + for (int i = 0; i < n; i++) { out.add(decode(storage, i)); } return out;