From a38b1911a1382a07350a30834e529a1d80720c20 Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Sat, 20 Jun 2026 07:19:29 +0200 Subject: [PATCH] refactor(reader): replace ArraySegments.trySegment with Array.segmentIfPresent() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deprecated ArraySegments class held a single static probe (trySegment) behind the dict zip-bomb guard. Replace it with an instance method on the Array interface: default Optional segmentIfPresent() // empty by default overridden by the segment-backed types to return their existing buffer, and by MaskedArray to delegate to its inner data. ScanIterator calls codes.segmentIfPresent(). Because each override now reads its own field directly, the package-private accessors that existed only to feed the central switch are gone: Materialized*.buffer() (8) and GenericArray.buffer(int). bytesSegment() (5 decoder callers) and the LazyDecimalArray buf record component stay; their overrides reuse them. ArraySegments is deleted. Naming: segmentIfPresent() reads as the non-allocating counterpart to materialize(arena) — returns the buffer only when one already exists, empty otherwise (no allocate/decode). core 223, reader 638 green; full reactor compiles; javadoc + checkstyle clean. Co-Authored-By: Claude Opus 4.8 --- .../dfa1/vortex/reader/ScanIterator.java | 6 +- .../dfa1/vortex/reader/array/Array.java | 21 +++++++ .../vortex/reader/array/ArraySegments.java | 58 ------------------- .../vortex/reader/array/GenericArray.java | 13 +++-- .../vortex/reader/array/LazyDecimalArray.java | 9 +++ .../dfa1/vortex/reader/array/MaskedArray.java | 14 ++++- .../reader/array/MaterializedBoolArray.java | 10 ++-- .../reader/array/MaterializedByteArray.java | 10 ++-- .../reader/array/MaterializedDoubleArray.java | 10 ++-- .../array/MaterializedFloat16Array.java | 10 ++-- .../reader/array/MaterializedFloatArray.java | 10 ++-- .../reader/array/MaterializedIntArray.java | 10 ++-- .../reader/array/MaterializedLongArray.java | 10 ++-- .../reader/array/MaterializedShortArray.java | 10 ++-- .../dfa1/vortex/reader/array/VarBinArray.java | 34 +++++++++++ .../reader/array/ArrayMaterializeTest.java | 7 ++- .../reader/array/VarBinChunkedModeTest.java | 42 ++++++++++++++ 17 files changed, 179 insertions(+), 105 deletions(-) delete mode 100644 reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java index 69986af8..5d790a07 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java @@ -5,7 +5,6 @@ import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; import io.github.dfa1.vortex.reader.array.Array; -import io.github.dfa1.vortex.reader.array.ArraySegments; import io.github.dfa1.vortex.reader.array.BoolArray; import io.github.dfa1.vortex.reader.array.ByteArray; import io.github.dfa1.vortex.reader.array.ChunkedBoolArray; @@ -641,11 +640,8 @@ private Array decodeDictLayout(Layout dictLayout, DType dtype, SegmentAllocator /// @param codes the decoded codes array /// @param codesPType code ptype reported by the dict layout metadata /// @param n claimed dict row count - // ArraySegments is deprecated-for-removal; this guard is its only caller and moves to - // the decode-limits layer with it. - @SuppressWarnings("removal") private static void validateDictCodesCapacity(Array codes, PType codesPType, long n) { - Optional maybeSeg = ArraySegments.trySegment(codes); + Optional maybeSeg = codes.segmentIfPresent(); if (maybeSeg.isEmpty()) { return; } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java index 2b8b6622..ac3bb8e7 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/Array.java @@ -4,6 +4,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; /// Decoded columnar data. Concrete subtypes specialise element access for the JIT; /// each covers a specific dtype family. @@ -77,4 +78,24 @@ static Array limited(Array arr, long rows) { } return arr.length() <= rows ? arr : arr.limited(rows); } + + /// Returns this array's primary backing segment if it is already segment-backed, + /// otherwise empty — a non-allocating probe. + /// + /// Unlike [#materialize(java.lang.foreign.SegmentAllocator)], this never allocates or + /// decodes: lazy and composite arrays return empty rather than being materialised. The + /// default is empty; segment-backed types (the `Materialized*` records, `VarBinArray`, + /// `GenericArray`, `LazyDecimalArray`) override to return their existing buffer, and + /// [MaskedArray] delegates to its inner data. The scan layer's dictionary zip-bomb guard + /// uses it to inspect a codes buffer's real size without expanding an oversized claimed + /// row count. + /// + /// **Vortex-internal.** Application code should prefer the typed accessors on concrete + /// subtypes ([LongArray#getLong(long)], [IntArray#getInt(long)], …) or + /// [#materialize(java.lang.foreign.SegmentAllocator)]. + /// + /// @return the primary [MemorySegment], or empty if this array has no segment backing + default Optional segmentIfPresent() { + return Optional.empty(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java deleted file mode 100644 index cfd8b91d..00000000 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/ArraySegments.java +++ /dev/null @@ -1,58 +0,0 @@ -package io.github.dfa1.vortex.reader.array; - -import java.lang.foreign.MemorySegment; -import java.util.Optional; - -/// Internal non-allocating probe for an [Array]'s primary [MemorySegment]. -/// -/// Unwraps a [MaskedArray] to its inner (data) array first; the validity mask is -/// not surfaced here — callers that need validity must read it from the -/// [MaskedArray] separately. To force a segment (materialising lazy variants), -/// call [Array#materialize(java.lang.foreign.SegmentAllocator)] directly. -/// -/// **Vortex-internal — not public API.** This class is `public` only because the reader, -/// writer, and encoding implementations live in separate Maven modules and need cross-package -/// access; its signatures may change without a deprecation cycle. It backs the scan layer's -/// dictionary zip-bomb validation, which needs to inspect a backing buffer only when one -/// already exists. Application code should prefer the typed accessors on concrete subtypes — -/// [LongArray#getLong(long)], [IntArray#getInt(long)], -/// [DoubleArray#getDouble(long)], and friends. -/// -/// @deprecated transitional — this class survives only as the home of -/// [#trySegment(Array)], the non-allocating probe behind the dictionary -/// zip-bomb guard in [io.github.dfa1.vortex.reader.ScanIterator]. Once the -/// decode-limits layer owns that bound, this class is removed; do not add -/// new callers. Use [Array#materialize(java.lang.foreign.SegmentAllocator)] -/// to obtain a column's segment. -@Deprecated(forRemoval = true) -public final class ArraySegments { - - private ArraySegments() { - } - - /// Returns the primary backing segment of `arr` if it is segment-backed, otherwise empty. - /// - /// Non-throwing probe for callers that want to operate on the raw buffer only when one - /// exists (e.g. zone-map / capacity validation) and skip lazy variants without allocating. - /// To force a segment for a lazy array, use [Array#materialize(java.lang.foreign.SegmentAllocator)]. - /// - /// @param arr the array whose segment is needed - /// @return the primary [MemorySegment], or empty if `arr` has no segment backing - public static Optional trySegment(Array arr) { - Array data = arr instanceof MaskedArray m ? m.inner() : arr; - return switch (data) { - case MaterializedIntArray a -> Optional.of(a.buffer()); - case MaterializedLongArray a -> Optional.of(a.buffer()); - case MaterializedDoubleArray a -> Optional.of(a.buffer()); - case MaterializedFloatArray a -> Optional.of(a.buffer()); - case MaterializedShortArray a -> Optional.of(a.buffer()); - case MaterializedByteArray a -> Optional.of(a.buffer()); - case MaterializedBoolArray a -> Optional.of(a.buffer()); - case MaterializedFloat16Array a -> Optional.of(a.buffer()); - case VarBinArray a -> Optional.of(a.bytesSegment()); - case GenericArray a -> Optional.of(a.buffer(0)); - case LazyDecimalArray a -> Optional.of(a.buf()); - default -> Optional.empty(); - }; - } -} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java index 6b2d5c41..94479f84 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/GenericArray.java @@ -9,6 +9,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteOrder; +import java.util.Optional; /// Fallback [Array] for dtypes that lack a dedicated concrete subtype. /// @@ -74,10 +75,6 @@ public GenericArray limited(long rows) { return new GenericArray(dtype, rows, buffers, children); } - MemorySegment buffer(int i) { - return buffers[i]; - } - /// Returns the primary (index 0) raw buffer directly — no copy or allocation. /// /// @param arena unused; the existing buffer is returned as-is @@ -87,6 +84,14 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffers[0]; } + /// Returns the primary (index 0) raw buffer — already materialised, no allocation. + /// + /// @return the first backing [MemorySegment] + @Override + public Optional segmentIfPresent() { + return Optional.of(buffers[0]); + } + /// Decodes the decimal value at row `i` from a single-buffer layout. /// /// The buffer holds one little-endian two's-complement integer per row. Element diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java index d8607133..f51725ba 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazyDecimalArray.java @@ -9,6 +9,7 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteOrder; +import java.util.Optional; /// Lazy `vortex.decimal` array. /// @@ -82,4 +83,12 @@ public Array limited(long rows) { public MemorySegment materialize(SegmentAllocator arena) { return buf; } + + /// Returns the backing buffer directly — already materialised, no allocation. + /// + /// @return the backing little-endian two's-complement segment + @Override + public Optional segmentIfPresent() { + return Optional.of(buf); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java index 538b9780..e040ccbd 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaskedArray.java @@ -4,6 +4,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; /// Decoded `vortex.masked` array: a non-nullable child paired with an optional validity bitmap. /// @@ -66,9 +67,8 @@ public Array limited(long rows) { } /// Materialises the inner (data) payload, ignoring the validity mask — the - /// segment returned is the data buffer only. This matches the prior - /// `ArraySegments` behaviour of unwrapping a masked array to its inner data; - /// callers that need validity must read [#validity()] separately. + /// segment returned is the data buffer only. Unwraps to the inner array's own + /// materialisation; callers that need validity must read [#validity()] separately. /// /// @param arena allocator used to materialise lazy inner variants /// @return the inner payload's primary [MemorySegment] @@ -76,4 +76,12 @@ public Array limited(long rows) { public MemorySegment materialize(SegmentAllocator arena) { return child.materialize(arena); } + + /// Probes the inner (data) payload's backing segment, ignoring the validity mask. + /// + /// @return the inner array's segment if segment-backed, otherwise empty + @Override + public Optional segmentIfPresent() { + return child.segmentIfPresent(); + } } diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java index d12ea6cb..409c9b92 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedBoolArray.java @@ -6,6 +6,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; +import java.util.Optional; /// Buffer-backed [BoolArray] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. @@ -36,10 +37,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already an LSB-first packed bitmap, /// matching the format produced by [BoolArray#materialize(SegmentAllocator)], /// so no copy or allocation is needed. @@ -51,6 +48,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public boolean getBoolean(long i) { byte b = buffer.get(ValueLayout.JAVA_BYTE, i >>> 3); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java index 55a73a6f..1483f81e 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedByteArray.java @@ -7,6 +7,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; +import java.util.Optional; import java.util.function.LongBinaryOperator; /// Buffer-backed [ByteArray] — the fallback used when an encoding decoder @@ -40,10 +41,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous one-byte-per-element /// segment, so no copy or allocation is needed. /// @@ -54,6 +51,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public byte getByte(long i) { return buffer.get(ValueLayout.JAVA_BYTE, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java index c3583287..9341e707 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedDoubleArray.java @@ -5,6 +5,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; import java.util.function.DoubleBinaryOperator; import java.util.function.DoubleConsumer; @@ -39,10 +40,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous little-endian /// `f64` segment, so no copy or allocation is needed. /// @@ -53,6 +50,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public double getDouble(long i) { return buffer.getAtIndex(PTypeIO.LE_DOUBLE, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java index 2ec5cbf8..e40c3690 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloat16Array.java @@ -6,6 +6,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; /// Buffer-backed [Float16Array] — the fallback used when an encoding decoder /// either materialises the output eagerly or has no lazy variant of its own. @@ -36,10 +37,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous little-endian /// half-precision segment (2 bytes per element), so no copy or allocation is needed. /// @@ -50,6 +47,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public float getFloat(long i) { return Float.float16ToFloat(buffer.getAtIndex(PTypeIO.LE_SHORT, i)); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java index 27b46eba..b3b6d63d 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedFloatArray.java @@ -6,6 +6,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; import java.util.function.DoubleBinaryOperator; /// Buffer-backed [FloatArray] — the fallback used when an encoding decoder @@ -39,10 +40,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous little-endian /// `f32` segment, so no copy or allocation is needed. /// @@ -53,6 +50,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public float getFloat(long i) { return buffer.getAtIndex(PTypeIO.LE_FLOAT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java index 9dfeb022..1786bc1a 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedIntArray.java @@ -6,6 +6,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; import java.util.function.IntBinaryOperator; import java.util.function.IntConsumer; @@ -40,10 +41,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous little-endian /// `i32` segment, so no copy or allocation is needed. /// @@ -54,6 +51,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public int getInt(long i) { return buffer.getAtIndex(PTypeIO.LE_INT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java index 45add0bd..dbff416a 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedLongArray.java @@ -6,6 +6,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; import java.util.function.LongBinaryOperator; import java.util.function.LongConsumer; @@ -40,10 +41,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous little-endian /// `i64` segment, so no copy or allocation is needed. /// @@ -54,6 +51,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public long getLong(long i) { return buffer.getAtIndex(PTypeIO.LE_LONG, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java index ebb42106..250b0b32 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/MaterializedShortArray.java @@ -7,6 +7,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; +import java.util.Optional; import java.util.function.LongBinaryOperator; /// Buffer-backed [ShortArray] — the fallback used when an encoding decoder @@ -40,10 +41,6 @@ public long length() { return length; } - MemorySegment buffer() { - return buffer; - } - /// Returns the backing buffer directly — already a contiguous little-endian /// `i16` segment, so no copy or allocation is needed. /// @@ -54,6 +51,11 @@ public MemorySegment materialize(SegmentAllocator arena) { return buffer; } + @Override + public Optional segmentIfPresent() { + return Optional.of(buffer); + } + @Override public short getShort(long i) { return buffer.getAtIndex(PTypeIO.LE_SHORT, length == elementCount ? i : i % elementCount); diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java index f97c8995..db2e1661 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/VarBinArray.java @@ -9,6 +9,7 @@ import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.nio.charset.StandardCharsets; +import java.util.Optional; import java.util.function.IntConsumer; /// Sealed interface for variable-length binary / UTF-8 string columns. @@ -41,6 +42,15 @@ public MemorySegment bytesSegment() { return inner.bytesSegment(); } + /// Delegates the probe to the wrapped array — empty if the inner is + /// itself composite (chunked / view). + /// + /// @return the inner array's segment if segment-backed, otherwise empty + @Override + public Optional segmentIfPresent() { + return inner.segmentIfPresent(); + } + @Override public byte[] getBytes(long i) { return inner.getBytes(i + offset); @@ -90,6 +100,14 @@ default MemorySegment materialize(SegmentAllocator arena) { return bytesSegment(); } + /// Returns the concatenated raw bytes segment — already materialised, no allocation. + /// + /// @return the bytes [MemorySegment] + @Override + default Optional segmentIfPresent() { + return Optional.of(bytesSegment()); + } + /// Returns a copy of the raw bytes for element `i`. /// /// @param i zero-based logical index (must be in `[0, length)`) @@ -382,6 +400,14 @@ public MemorySegment bytesSegment() { return MemorySegment.NULL; } + /// No single contiguous segment — chunked data lives across children. + /// + /// @return always empty + @Override + public Optional segmentIfPresent() { + return Optional.empty(); + } + @Override public byte[] getBytes(long i) { int c = findChunk(i); @@ -459,6 +485,14 @@ public MemorySegment bytesSegment() { return MemorySegment.NULL; } + /// No single contiguous segment — view rows reference shared data buffers. + /// + /// @return always empty + @Override + public Optional segmentIfPresent() { + return Optional.empty(); + } + @Override public int getByteLength(long i) { return views.get(PTypeIO.LE_INT, i * VIEW_SIZE); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java index fe2564f6..bd053aac 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/array/ArrayMaterializeTest.java @@ -41,7 +41,8 @@ void materializedLongReturnsBackingBufferWithoutCopy() { MemorySegment result = sut.materialize(arena); // Then the exact backing segment is handed back — no allocation, no copy - assertThat(result).isSameAs(sut.buffer()); + // (segmentIfPresent() exposes the same zero-copy buffer). + assertThat(result).isSameAs(sut.segmentIfPresent().orElseThrow()); } @Test @@ -52,8 +53,8 @@ void materializedBoolReturnsBackingBitmapWithoutCopy() { // When MemorySegment result = sut.materialize(arena); - // Then - assertThat(result).isSameAs(sut.buffer()); + // Then (segmentIfPresent() exposes the same zero-copy buffer) + assertThat(result).isSameAs(sut.segmentIfPresent().orElseThrow()); } } diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/array/VarBinChunkedModeTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/array/VarBinChunkedModeTest.java index a0fc137f..c283917b 100644 --- a/reader/src/test/java/io/github/dfa1/vortex/reader/array/VarBinChunkedModeTest.java +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/array/VarBinChunkedModeTest.java @@ -142,6 +142,48 @@ void keepsPrefix() { } } + @Nested + class SegmentProbe { + + @Test + void chunkedHasNoContiguousSegment() { + try (Arena arena = Arena.ofConfined()) { + // Given a chunked array — bytes are spread across child segments + VarBinArray c0 = stringChunk(arena, "a", "b"); + VarBinArray c1 = stringChunk(arena, "c"); + VarBinArray.ChunkedMode sut = VarBinArray.ChunkedMode.of(UTF8, 3, List.of(c0, c1)); + + // When / Then the probe must not surface the NULL bytesSegment() sentinel + assertThat(sut.segmentIfPresent()).isEmpty(); + } + } + + @Test + void slicedDelegatesToInnerProbe() { + try (Arena arena = Arena.ofConfined()) { + // Given a slice over a chunked inner — still no single segment + VarBinArray c0 = stringChunk(arena, "a", "b"); + VarBinArray c1 = stringChunk(arena, "c"); + VarBinArray.ChunkedMode chunked = VarBinArray.ChunkedMode.of(UTF8, 3, List.of(c0, c1)); + VarBinArray.SlicedMode sut = new VarBinArray.SlicedMode(UTF8, 2, chunked, 1); + + // When / Then the probe follows the inner array, not the NULL bytesSegment() + assertThat(sut.segmentIfPresent()).isEmpty(); + } + } + + @Test + void offsetBackedSurfacesItsSegment() { + try (Arena arena = Arena.ofConfined()) { + // Given a single-segment offset-backed array + VarBinArray sut = stringChunk(arena, "a", "b"); + + // When / Then the probe returns the real backing bytes segment + assertThat(sut.segmentIfPresent()).containsSame(sut.bytesSegment()); + } + } + } + private static VarBinArray stringChunk(Arena arena, String... values) { int totalBytes = 0; for (String s : values) {