diff --git a/core/src/main/java/io/github/dfa1/vortex/core/PType.java b/core/src/main/java/io/github/dfa1/vortex/core/PType.java index a4917402..41052741 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/PType.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/PType.java @@ -56,6 +56,14 @@ public boolean isSigned() { || this == F16 || this == F32 || this == F64; } + /// Returns `true` for the unsigned integer types (`U8`–`U64`) — the complement of + /// [#isSigned()], since every non-unsigned ptype is either a signed integer or floating-point. + /// + /// @return `true` if this ptype is an unsigned integer + public boolean isUnsigned() { + return !isSigned(); + } + /// Returns the [PType] for the given enum ordinal — the integer value the wire format /// uses to identify a physical type. /// diff --git a/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveArrays.java b/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveArrays.java new file mode 100644 index 00000000..ec3ada59 --- /dev/null +++ b/core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveArrays.java @@ -0,0 +1,109 @@ +package io.github.dfa1.vortex.encoding; + +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; + +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.ValueLayout; + +/// Conversions between a boxed Java primitive value array and its wide / off-heap forms, +/// shared by the integer encodings on both the read and write sides. +/// +/// [#toLongs(Object, PType, EncodingId)] and [#fromLongs(long[], PType, SegmentAllocator)] are +/// inverses: the first widens any 8–64 bit integer array to a `long[]`, the second writes a +/// `long[]` back to a little-endian off-heap segment of the target width. Floating-point ptypes +/// are not handled here — they reinterpret to raw bits or take type-specific encode paths instead. +public final class PrimitiveArrays { + + private PrimitiveArrays() { + } + + /// Widens a boxed primitive integer array to `long[]`, zero-extending the unsigned ptypes and + /// sign-extending the signed ones. The I64/U64 case returns the input array directly (no copy). + /// + /// @param data the value array; its runtime type must match `ptype` + /// (`byte[]` for I8/U8, `short[]` for I16/U16, `int[]` for I32/U32, `long[]` for I64/U64) + /// @param ptype the logical primitive type of `data` + /// @param encoding the encoding requesting the widening, used for error attribution + /// @return a `long[]` holding every element of `data` widened to 64 bits + /// @throws VortexException if `ptype` is not an integer ptype + public static long[] toLongs(Object data, PType ptype, EncodingId encoding) { + return switch (ptype) { + case I8 -> { + byte[] arr = (byte[]) data; + long[] r = new long[arr.length]; + for (int i = 0; i < arr.length; i++) { + r[i] = arr[i]; + } + yield r; + } + case U8 -> { + byte[] arr = (byte[]) data; + long[] r = new long[arr.length]; + for (int i = 0; i < arr.length; i++) { + r[i] = Byte.toUnsignedLong(arr[i]); + } + yield r; + } + case I16 -> { + short[] arr = (short[]) data; + long[] r = new long[arr.length]; + for (int i = 0; i < arr.length; i++) { + r[i] = arr[i]; + } + yield r; + } + case U16 -> { + short[] arr = (short[]) data; + long[] r = new long[arr.length]; + for (int i = 0; i < arr.length; i++) { + r[i] = Short.toUnsignedLong(arr[i]); + } + yield r; + } + case I32 -> { + int[] arr = (int[]) data; + long[] r = new long[arr.length]; + for (int i = 0; i < arr.length; i++) { + r[i] = arr[i]; + } + yield r; + } + case U32 -> { + int[] arr = (int[]) data; + long[] r = new long[arr.length]; + for (int i = 0; i < arr.length; i++) { + r[i] = Integer.toUnsignedLong(arr[i]); + } + yield r; + } + case I64, U64 -> (long[]) data; + default -> throw new VortexException(encoding, "unsupported ptype: " + ptype); + }; + } + + /// Writes a `long[]` to a freshly allocated little-endian off-heap segment whose element width + /// is that of `ptype`, narrowing each element to the low bytes. Inverse of + /// [#toLongs(Object, PType, EncodingId)]. The I64/U64 case bulk-copies; narrower widths write + /// element by element through [PTypeIO#set(MemorySegment, long, PType, long)]. + /// + /// @param longs the wide values to write + /// @param ptype the target primitive width + /// @param arena allocator for the output segment + /// @return a little-endian segment of `longs.length` elements at `ptype`'s width + public static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) { + if (ptype == PType.I64 || ptype == PType.U64) { + MemorySegment dst = arena.allocate((long) longs.length * 8); + MemorySegment.copy(MemorySegment.ofArray(longs), ValueLayout.JAVA_LONG, 0L, dst, PTypeIO.LE_LONG, 0L, longs.length); + return dst; + } + int n = longs.length; + long elemSize = ptype.byteSize(); + MemorySegment seg = arena.allocate(n * elemSize); + for (int i = 0; i < n; i++) { + PTypeIO.set(seg, i * elemSize, ptype, longs[i]); + } + return seg; + } +} diff --git a/core/src/test/java/io/github/dfa1/vortex/core/PTypeTest.java b/core/src/test/java/io/github/dfa1/vortex/core/PTypeTest.java index 74ce819e..8ed1338c 100644 --- a/core/src/test/java/io/github/dfa1/vortex/core/PTypeTest.java +++ b/core/src/test/java/io/github/dfa1/vortex/core/PTypeTest.java @@ -51,6 +51,27 @@ void isSigned_falseForUnsigned(PType ptype) { assertThat(ptype.isSigned()).isFalse(); } + @ParameterizedTest + @EnumSource(value = PType.class, names = {"U8", "U16", "U32", "U64"}) + void isUnsigned_trueForUnsigned(PType ptype) { + // Given / When / Then + assertThat(ptype.isUnsigned()).isTrue(); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"I8", "I16", "I32", "I64", "F16", "F32", "F64"}) + void isUnsigned_falseForSignedAndFloats(PType ptype) { + // Given / When / Then + assertThat(ptype.isUnsigned()).isFalse(); + } + + @ParameterizedTest + @EnumSource(PType.class) + void isUnsigned_isExactComplementOfIsSigned(PType ptype) { + // Given / When / Then — the two must partition every ptype; isUnsigned is defined as !isSigned + assertThat(ptype.isUnsigned()).isNotEqualTo(ptype.isSigned()); + } + @ParameterizedTest @EnumSource(PType.class) void fromOrdinal_roundTrips(PType ptype) { diff --git a/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveArraysTest.java b/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveArraysTest.java new file mode 100644 index 00000000..c2f3ccea --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/encoding/PrimitiveArraysTest.java @@ -0,0 +1,170 @@ +package io.github.dfa1.vortex.encoding; + +import io.github.dfa1.vortex.core.PType; +import io.github.dfa1.vortex.core.VortexException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class PrimitiveArraysTest { + + @Test + void toLongs_i8_signExtends() { + // Given a byte array with a negative value + byte[] data = {0, 1, -1, Byte.MIN_VALUE, Byte.MAX_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.I8, EncodingId.FASTLANES_DELTA); + + // Then negatives sign-extend to 64 bits + assertThat(result).containsExactly(0L, 1L, -1L, -128L, 127L); + } + + @Test + void toLongs_u8_zeroExtends() { + // Given a byte array whose high bit is set (would be negative if signed) + byte[] data = {0, 1, -1, Byte.MIN_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.U8, EncodingId.FASTLANES_DELTA); + + // Then the raw byte is zero-extended into 0..255 + assertThat(result).containsExactly(0L, 1L, 255L, 128L); + } + + @Test + void toLongs_i16_signExtends() { + // Given + short[] data = {0, -1, Short.MIN_VALUE, Short.MAX_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.I16, EncodingId.FASTLANES_DELTA); + + // Then + assertThat(result).containsExactly(0L, -1L, -32768L, 32767L); + } + + @Test + void toLongs_u16_zeroExtends() { + // Given a value with the high bit set + short[] data = {-1, Short.MIN_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.U16, EncodingId.FASTLANES_DELTA); + + // Then zero-extended into 0..65535 + assertThat(result).containsExactly(65535L, 32768L); + } + + @Test + void toLongs_i32_signExtends() { + // Given + int[] data = {0, -1, Integer.MIN_VALUE, Integer.MAX_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.I32, EncodingId.FASTLANES_DELTA); + + // Then + assertThat(result).containsExactly(0L, -1L, (long) Integer.MIN_VALUE, (long) Integer.MAX_VALUE); + } + + @Test + void toLongs_u32_zeroExtends() { + // Given a value with the high bit set + int[] data = {-1, Integer.MIN_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.U32, EncodingId.FASTLANES_DELTA); + + // Then zero-extended into 0..2^32-1 + assertThat(result).containsExactly(0xFFFF_FFFFL, 0x8000_0000L); + } + + @Test + void toLongs_i64_returnsSameArrayNoCopy() { + // Given a long array + long[] data = {1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE}; + + // When + long[] result = PrimitiveArrays.toLongs(data, PType.I64, EncodingId.FASTLANES_DELTA); + + // Then the I64/U64 path is a passthrough — no copy + assertThat(result).isSameAs(data); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"F16", "F32", "F64"}) + void toLongs_floatingPtypes_throwWithSuppliedEncodingId(PType ptype) { + // Given floating ptypes are not integer-widen targets; When/Then it throws, attributed to + // the caller's encoding id (here FrameOfReference) rather than a hardcoded one + assertThatThrownBy(() -> PrimitiveArrays.toLongs(new float[1], ptype, EncodingId.FASTLANES_FOR)) + .isInstanceOf(VortexException.class) + .hasMessageContaining("unsupported ptype: " + ptype); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"I8", "U8", "I16", "U16", "I32", "U32", "I64", "U64"}) + void fromLongs_roundTripsThroughToLongs(PType ptype) { + // Given values that exercise the low bytes at each width + long[] original = {0L, 1L, 2L, 7L, 42L}; + + try (Arena arena = Arena.ofConfined()) { + // When written to a segment and read back at the ptype's width + MemorySegment seg = PrimitiveArrays.fromLongs(original, ptype, arena); + + // Then the segment has one element per value at the expected width... + assertThat(seg.byteSize()).isEqualTo((long) original.length * ptype.byteSize()); + // ...and each element round-trips (values are small + positive, so width-narrowing is lossless) + for (int i = 0; i < original.length; i++) { + assertThat(readElement(seg, ptype, i)).isEqualTo(original[i]); + } + } + } + + @Test + void fromLongs_i64_writesLittleEndian() { + // Given a single value with distinct bytes + long[] original = {0x0102_0304_0506_0708L}; + + try (Arena arena = Arena.ofConfined()) { + // When written via the bulk I64 path + MemorySegment seg = PrimitiveArrays.fromLongs(original, PType.I64, arena); + + // Then it is stored little-endian (lowest byte first) + assertThat(seg.get(ValueLayout.JAVA_BYTE, 0)).isEqualTo((byte) 0x08); + assertThat(seg.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(0x0102_0304_0506_0708L); + } + } + + @Test + void fromLongs_narrowWidth_keepsOnlyLowBytes() { + // Given a value whose high bytes exceed the target width + long[] original = {0x1234_5678L}; + + try (Arena arena = Arena.ofConfined()) { + // When narrowed to I8 (1 byte/elem) + MemorySegment seg = PrimitiveArrays.fromLongs(original, PType.I8, arena); + + // Then only the low byte survives + assertThat(seg.byteSize()).isEqualTo(1L); + assertThat(seg.get(ValueLayout.JAVA_BYTE, 0)).isEqualTo((byte) 0x78); + } + } + + private static long readElement(MemorySegment seg, PType ptype, int i) { + return switch (ptype) { + case I8, U8 -> seg.get(ValueLayout.JAVA_BYTE, i); + case I16, U16 -> seg.getAtIndex(PTypeIO.LE_SHORT, i); + case I32, U32 -> seg.getAtIndex(PTypeIO.LE_INT, i); + case I64, U64 -> seg.getAtIndex(PTypeIO.LE_LONG, i); + default -> throw new IllegalArgumentException("not an integer ptype: " + ptype); + }; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoder.java index 3c1d9d7f..028cb999 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/decode/DeltaEncodingDecoder.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.PrimitiveArrays; import io.github.dfa1.vortex.encoding.PTypeIO; import io.github.dfa1.vortex.proto.DeltaMetadata; import io.github.dfa1.vortex.reader.array.Array; @@ -14,7 +15,6 @@ import java.io.IOException; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.nio.ByteBuffer; @@ -107,7 +107,7 @@ public Array decode(DecodeContext ctx) { long[] result = new long[(int) rowCount]; System.arraycopy(decoded, offset, result, 0, (int) rowCount); - MemorySegment seg = fromLongs(result, ptype, ctx.arena()); + MemorySegment seg = PrimitiveArrays.fromLongs(result, ptype, ctx.arena()); return switch (ptype) { case I64, U64 -> new MaterializedLongArray(ctx.dtype(), rowCount, seg); case I32, U32 -> new MaterializedIntArray(ctx.dtype(), rowCount, seg); @@ -179,19 +179,4 @@ private static long typeMask(PType ptype) { return bits == 64 ? -1L : (1L << bits) - 1; } - private static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) { - if (ptype == PType.I64 || ptype == PType.U64) { - MemorySegment dst = arena.allocate((long) longs.length * 8); - MemorySegment.copy(MemorySegment.ofArray(longs), ValueLayout.JAVA_LONG, 0L, dst, PTypeIO.LE_LONG, 0L, longs.length); - return dst; - } - int n = longs.length; - long elemSize = ptype.byteSize(); - MemorySegment seg = arena.allocate(n * elemSize); - for (int i = 0; i < n; i++) { - PTypeIO.set(seg, i * elemSize, ptype, longs[i]); - } - return seg; - } - } diff --git a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoder.java b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoder.java index 5e6a0ab7..b962126e 100644 --- a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoder.java +++ b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/BitpackedEncodingEncoder.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.PrimitiveArrays; import io.github.dfa1.vortex.encoding.PTypeIO; import io.github.dfa1.vortex.proto.BitPackedMetadata; import io.github.dfa1.vortex.proto.PatchesMetadata; @@ -43,11 +44,11 @@ public boolean accepts(DType dtype) { @Override public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { PType ptype = ((DType.Primitive) dtype).ptype(); - long[] longs = toLongs(data, ptype); + long[] longs = PrimitiveArrays.toLongs(data, ptype, EncodingId.FASTLANES_BITPACKED); int n = longs.length; int typeBits = ptype.byteSize() * 8; long typeMask = typeMask(typeBits); - boolean unsign = isUnsigned(ptype); + boolean unsign = ptype.isUnsigned(); long signedMin = 0L; long signedMax = 0L; @@ -237,74 +238,13 @@ private static MemorySegment packFastLanes(long[] values, int n, int bitWidth, i return seg; } - private static long[] toLongs(Object data, PType ptype) { - return switch (ptype) { - case I8 -> { - byte[] arr = (byte[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = arr[i]; - } - yield r; - } - case U8 -> { - byte[] arr = (byte[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = Byte.toUnsignedLong(arr[i]); - } - yield r; - } - case I16 -> { - short[] arr = (short[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = arr[i]; - } - yield r; - } - case U16 -> { - short[] arr = (short[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = Short.toUnsignedLong(arr[i]); - } - yield r; - } - case I32 -> { - int[] arr = (int[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = arr[i]; - } - yield r; - } - case U32 -> { - int[] arr = (int[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = Integer.toUnsignedLong(arr[i]); - } - yield r; - } - case I64, U64 -> (long[]) data; - default -> throw new VortexException(EncodingId.FASTLANES_BITPACKED, "unsupported ptype: " + ptype); - }; - } private static long typeMask(int typeBits) { return typeBits == 64 ? -1L : (1L << typeBits) - 1L; } - private static boolean isUnsigned(PType ptype) { - return switch (ptype) { - case U8, U16, U32, U64 -> true; - default -> false; - }; - } - private static byte[] statsBytes(PType ptype, long value) { - if (isUnsigned(ptype)) { + if (ptype.isUnsigned()) { return ScalarValue.ofUint64Value(value).encode(); } return ScalarValue.ofInt64Value(value).encode(); diff --git a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoder.java b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoder.java index 88482a06..7c093b50 100644 --- a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoder.java +++ b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/DeltaEncodingEncoder.java @@ -2,15 +2,12 @@ import io.github.dfa1.vortex.core.DType; import io.github.dfa1.vortex.core.PType; -import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; -import io.github.dfa1.vortex.encoding.PTypeIO; +import io.github.dfa1.vortex.encoding.PrimitiveArrays; import io.github.dfa1.vortex.proto.DeltaMetadata; import io.github.dfa1.vortex.proto.ScalarValue; import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.lang.foreign.ValueLayout; import java.nio.ByteBuffer; import java.util.List; @@ -40,12 +37,12 @@ public boolean accepts(DType dtype) { @Override public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { PType ptype = ((DType.Primitive) dtype).ptype(); - long[] longs = toLongs(data, ptype); + long[] longs = PrimitiveArrays.toLongs(data, ptype, EncodingId.FASTLANES_DELTA); int n = longs.length; int typeBits = typeBits(ptype); int lanes = lanes(ptype); long mask = typeMask(ptype); - boolean unsign = isUnsigned(ptype); + boolean unsign = ptype.isUnsigned(); long minVal = 0L; long maxVal = 0L; @@ -93,8 +90,8 @@ public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { System.arraycopy(chunkDelta, 0, deltasAll, chunk * FL_CHUNK_SIZE, FL_CHUNK_SIZE); } - MemorySegment basesSeg = fromLongs(basesAll, ptype, ctx.arena()); - MemorySegment deltasSeg = fromLongs(deltasAll, ptype, ctx.arena()); + MemorySegment basesSeg = PrimitiveArrays.fromLongs(basesAll, ptype, ctx.arena()); + MemorySegment deltasSeg = PrimitiveArrays.fromLongs(deltasAll, ptype, ctx.arena()); byte[] metaBytes = new DeltaMetadata(paddedLen, 0).encode(); @@ -120,70 +117,8 @@ private static void deltaChunk(long[] transposed, long[] bases, int lanes, int t } } - private static long[] toLongs(Object data, PType ptype) { - return switch (ptype) { - case I8 -> { - byte[] arr = (byte[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = arr[i]; - } - yield r; - } - case U8 -> { - byte[] arr = (byte[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = Byte.toUnsignedLong(arr[i]); - } - yield r; - } - case I16 -> { - short[] arr = (short[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = arr[i]; - } - yield r; - } - case U16 -> { - short[] arr = (short[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = Short.toUnsignedLong(arr[i]); - } - yield r; - } - case I32 -> { - int[] arr = (int[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = arr[i]; - } - yield r; - } - case U32 -> { - int[] arr = (int[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = Integer.toUnsignedLong(arr[i]); - } - yield r; - } - case I64, U64 -> (long[]) data; - default -> throw new VortexException(EncodingId.FASTLANES_DELTA, "unsupported ptype: " + ptype); - }; - } - - private static boolean isUnsigned(PType ptype) { - return switch (ptype) { - case U8, U16, U32, U64 -> true; - default -> false; - }; - } - private static byte[] statsBytes(PType ptype, long value) { - if (isUnsigned(ptype)) { + if (ptype.isUnsigned()) { return ScalarValue.ofUint64Value(value).encode(); } return ScalarValue.ofInt64Value(value).encode(); @@ -219,19 +154,5 @@ private static long typeMask(PType ptype) { return bits == 64 ? -1L : (1L << bits) - 1; } - private static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) { - if (ptype == PType.I64 || ptype == PType.U64) { - MemorySegment dst = arena.allocate((long) longs.length * 8); - MemorySegment.copy(MemorySegment.ofArray(longs), ValueLayout.JAVA_LONG, 0L, dst, PTypeIO.LE_LONG, 0L, longs.length); - return dst; - } - int n = longs.length; - long elemSize = ptype.byteSize(); - MemorySegment seg = arena.allocate(n * elemSize); - for (int i = 0; i < n; i++) { - PTypeIO.set(seg, i * elemSize, ptype, longs[i]); - } - return seg; - } } diff --git a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/FrameOfReferenceEncodingEncoder.java b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/FrameOfReferenceEncodingEncoder.java index aaa8f15b..a1e89557 100644 --- a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/FrameOfReferenceEncodingEncoder.java +++ b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/FrameOfReferenceEncodingEncoder.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.PrimitiveArrays; import io.github.dfa1.vortex.encoding.PTypeIO; import io.github.dfa1.vortex.proto.ScalarValue; @@ -34,7 +35,7 @@ public EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { throw new VortexException(EncodingId.FASTLANES_FOR, "expected primitive dtype, got " + dtype); } PType ptype = p.ptype(); - long[] longs = toLongs(data, ptype); + long[] longs = PrimitiveArrays.toLongs(data, ptype, EncodingId.FASTLANES_FOR); int n = longs.length; long ref = computeRef(longs, n); @@ -52,14 +53,14 @@ public CascadeStep encodeCascade(DType dtype, Object data, EncodeContext encodeC throw new VortexException(EncodingId.FASTLANES_FOR, "expected primitive dtype, got " + dtype); } PType ptype = p.ptype(); - long[] longs = toLongs(data, ptype); + long[] longs = PrimitiveArrays.toLongs(data, ptype, EncodingId.FASTLANES_FOR); int n = longs.length; long ref = computeRef(longs, n); // Skip when ref == 0 and ptype is unsigned: residuals == input, so FOR adds metadata // overhead (ref scalar + extra node) for zero compression benefit over plain bitpack. // Matches Rust IntFoRScheme's skip estimate for this case. - if (ref == 0L && isUnsigned(ptype)) { + if (ref == 0L && ptype.isUnsigned()) { return CascadeStep.notApplicable(); } ByteBuffer meta = buildForMeta(ref, ptype); @@ -69,12 +70,6 @@ public CascadeStep encodeCascade(DType dtype, Object data, EncodeContext encodeC return new CascadeStep(partialRoot, List.of(), List.of(slot), null, null, true); } - private static boolean isUnsigned(PType ptype) { - return switch (ptype) { - case U8, U16, U32, U64 -> true; - default -> false; - }; - } private static long computeRef(long[] longs, int n) { long ref = n > 0 ? longs[0] : 0L; @@ -130,36 +125,6 @@ private static Object residualsAsNativeArray(long[] longs, long ref, PType ptype }; } - private static long[] toLongs(Object data, PType ptype) { - return switch (ptype) { - case I8, U8 -> { - byte[] arr = (byte[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = ptype == PType.U8 ? Byte.toUnsignedLong(arr[i]) : arr[i]; - } - yield r; - } - case I16, U16 -> { - short[] arr = (short[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = ptype == PType.U16 ? Short.toUnsignedLong(arr[i]) : arr[i]; - } - yield r; - } - case I32, U32 -> { - int[] arr = (int[]) data; - long[] r = new long[arr.length]; - for (int i = 0; i < arr.length; i++) { - r[i] = ptype == PType.U32 ? Integer.toUnsignedLong(arr[i]) : arr[i]; - } - yield r; - } - case I64, U64 -> (long[]) data; - default -> throw new VortexException(EncodingId.FASTLANES_FOR, "unsupported ptype: " + ptype); - }; - } private static MemorySegment toResidualBuffer(long[] longs, long ref, PType ptype, EncodeContext ctx) { int n = longs.length; diff --git a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/PatchedEncodingEncoder.java b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/PatchedEncodingEncoder.java index ad9dd2b0..532daffe 100644 --- a/writer/src/main/java/io/github/dfa1/vortex/writer/encode/PatchedEncodingEncoder.java +++ b/writer/src/main/java/io/github/dfa1/vortex/writer/encode/PatchedEncodingEncoder.java @@ -4,6 +4,7 @@ import io.github.dfa1.vortex.core.PType; import io.github.dfa1.vortex.core.VortexException; import io.github.dfa1.vortex.encoding.EncodingId; +import io.github.dfa1.vortex.encoding.PrimitiveArrays; import io.github.dfa1.vortex.encoding.PTypeIO; import io.github.dfa1.vortex.proto.PatchedMetadata; @@ -62,7 +63,7 @@ static CascadeStep encodeCascade(DType dtype, Object data) { return CascadeStep.notApplicable(); } PType ptype = p.ptype(); - long[] longs = toLongs(data, ptype); + long[] longs = PrimitiveArrays.toLongs(data, ptype, EncodingId.VORTEX_PATCHED); int n = longs.length; if (n == 0) { return CascadeStep.notApplicable(); @@ -96,7 +97,7 @@ static EncodeResult encode(DType dtype, Object data, EncodeContext ctx) { "expected primitive dtype, got " + dtype); } PType ptype = p.ptype(); - long[] longs = toLongs(data, ptype); + long[] longs = PrimitiveArrays.toLongs(data, ptype, EncodingId.VORTEX_PATCHED); int n = longs.length; PatchedData pd = computePatchedData(longs, ptype, n); @@ -269,60 +270,6 @@ private static Object fromLongs(long[] values, PType ptype) { }; } - private static long[] toLongs(Object data, PType ptype) { - return switch (ptype) { - case I8 -> { - byte[] a = (byte[]) data; - long[] r = new long[a.length]; - for (int i = 0; i < a.length; i++) { - r[i] = a[i]; - } - yield r; - } - case U8 -> { - byte[] a = (byte[]) data; - long[] r = new long[a.length]; - for (int i = 0; i < a.length; i++) { - r[i] = Byte.toUnsignedLong(a[i]); - } - yield r; - } - case I16 -> { - short[] a = (short[]) data; - long[] r = new long[a.length]; - for (int i = 0; i < a.length; i++) { - r[i] = a[i]; - } - yield r; - } - case U16 -> { - short[] a = (short[]) data; - long[] r = new long[a.length]; - for (int i = 0; i < a.length; i++) { - r[i] = Short.toUnsignedLong(a[i]); - } - yield r; - } - case I32 -> { - int[] a = (int[]) data; - long[] r = new long[a.length]; - for (int i = 0; i < a.length; i++) { - r[i] = a[i]; - } - yield r; - } - case U32 -> { - int[] a = (int[]) data; - long[] r = new long[a.length]; - for (int i = 0; i < a.length; i++) { - r[i] = Integer.toUnsignedLong(a[i]); - } - yield r; - } - case I64, U64 -> (long[]) data; - default -> throw new VortexException(EncodingId.VORTEX_PATCHED, "unsupported ptype: " + ptype); - }; - } } @SuppressWarnings("java:S6218") // internal data carrier; array fields are not compared for equality