Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions core/src/main/java/io/github/dfa1/vortex/core/PType.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ public boolean isSigned() {
|| this == F16 || this == F32 || this == F64;
}

/// Returns `true` for the unsigned integer types (`U8`–`U64`) — the complement of
/// [#isSigned()], since every non-unsigned ptype is either a signed integer or floating-point.
///
/// @return `true` if this ptype is an unsigned integer
public boolean isUnsigned() {
return !isSigned();
}

/// Returns the [PType] for the given enum ordinal — the integer value the wire format
/// uses to identify a physical type.
///
Expand Down
109 changes: 109 additions & 0 deletions core/src/main/java/io/github/dfa1/vortex/encoding/PrimitiveArrays.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package io.github.dfa1.vortex.encoding;

import io.github.dfa1.vortex.core.PType;
import io.github.dfa1.vortex.core.VortexException;

import java.lang.foreign.MemorySegment;
import java.lang.foreign.SegmentAllocator;
import java.lang.foreign.ValueLayout;

/// Conversions between a boxed Java primitive value array and its wide / off-heap forms,
/// shared by the integer encodings on both the read and write sides.
///
/// [#toLongs(Object, PType, EncodingId)] and [#fromLongs(long[], PType, SegmentAllocator)] are
/// inverses: the first widens any 8–64 bit integer array to a `long[]`, the second writes a
/// `long[]` back to a little-endian off-heap segment of the target width. Floating-point ptypes
/// are not handled here — they reinterpret to raw bits or take type-specific encode paths instead.
public final class PrimitiveArrays {

private PrimitiveArrays() {
}

/// Widens a boxed primitive integer array to `long[]`, zero-extending the unsigned ptypes and
/// sign-extending the signed ones. The I64/U64 case returns the input array directly (no copy).
///
/// @param data the value array; its runtime type must match `ptype`
/// (`byte[]` for I8/U8, `short[]` for I16/U16, `int[]` for I32/U32, `long[]` for I64/U64)
/// @param ptype the logical primitive type of `data`
/// @param encoding the encoding requesting the widening, used for error attribution
/// @return a `long[]` holding every element of `data` widened to 64 bits
/// @throws VortexException if `ptype` is not an integer ptype
public static long[] toLongs(Object data, PType ptype, EncodingId encoding) {
return switch (ptype) {
case I8 -> {
byte[] arr = (byte[]) data;
long[] r = new long[arr.length];
for (int i = 0; i < arr.length; i++) {
r[i] = arr[i];
}
yield r;
}
case U8 -> {
byte[] arr = (byte[]) data;
long[] r = new long[arr.length];
for (int i = 0; i < arr.length; i++) {
r[i] = Byte.toUnsignedLong(arr[i]);
}
yield r;
}
case I16 -> {
short[] arr = (short[]) data;
long[] r = new long[arr.length];
for (int i = 0; i < arr.length; i++) {
r[i] = arr[i];
}
yield r;
}
case U16 -> {
short[] arr = (short[]) data;
long[] r = new long[arr.length];
for (int i = 0; i < arr.length; i++) {
r[i] = Short.toUnsignedLong(arr[i]);
}
yield r;
}
case I32 -> {
int[] arr = (int[]) data;
long[] r = new long[arr.length];
for (int i = 0; i < arr.length; i++) {
r[i] = arr[i];
}
yield r;
}
case U32 -> {
int[] arr = (int[]) data;
long[] r = new long[arr.length];
for (int i = 0; i < arr.length; i++) {
r[i] = Integer.toUnsignedLong(arr[i]);
}
yield r;
}
case I64, U64 -> (long[]) data;
default -> throw new VortexException(encoding, "unsupported ptype: " + ptype);
};
}

/// Writes a `long[]` to a freshly allocated little-endian off-heap segment whose element width
/// is that of `ptype`, narrowing each element to the low bytes. Inverse of
/// [#toLongs(Object, PType, EncodingId)]. The I64/U64 case bulk-copies; narrower widths write
/// element by element through [PTypeIO#set(MemorySegment, long, PType, long)].
///
/// @param longs the wide values to write
/// @param ptype the target primitive width
/// @param arena allocator for the output segment
/// @return a little-endian segment of `longs.length` elements at `ptype`'s width
public static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) {
if (ptype == PType.I64 || ptype == PType.U64) {
MemorySegment dst = arena.allocate((long) longs.length * 8);
MemorySegment.copy(MemorySegment.ofArray(longs), ValueLayout.JAVA_LONG, 0L, dst, PTypeIO.LE_LONG, 0L, longs.length);
return dst;
}
int n = longs.length;
long elemSize = ptype.byteSize();
MemorySegment seg = arena.allocate(n * elemSize);
for (int i = 0; i < n; i++) {
PTypeIO.set(seg, i * elemSize, ptype, longs[i]);
}
return seg;
}
}
21 changes: 21 additions & 0 deletions core/src/test/java/io/github/dfa1/vortex/core/PTypeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ void isSigned_falseForUnsigned(PType ptype) {
assertThat(ptype.isSigned()).isFalse();
}

@ParameterizedTest
@EnumSource(value = PType.class, names = {"U8", "U16", "U32", "U64"})
void isUnsigned_trueForUnsigned(PType ptype) {
// Given / When / Then
assertThat(ptype.isUnsigned()).isTrue();
}

@ParameterizedTest
@EnumSource(value = PType.class, names = {"I8", "I16", "I32", "I64", "F16", "F32", "F64"})
void isUnsigned_falseForSignedAndFloats(PType ptype) {
// Given / When / Then
assertThat(ptype.isUnsigned()).isFalse();
}

@ParameterizedTest
@EnumSource(PType.class)
void isUnsigned_isExactComplementOfIsSigned(PType ptype) {
// Given / When / Then — the two must partition every ptype; isUnsigned is defined as !isSigned
assertThat(ptype.isUnsigned()).isNotEqualTo(ptype.isSigned());
}

@ParameterizedTest
@EnumSource(PType.class)
void fromOrdinal_roundTrips(PType ptype) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package io.github.dfa1.vortex.encoding;

import io.github.dfa1.vortex.core.PType;
import io.github.dfa1.vortex.core.VortexException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

class PrimitiveArraysTest {

@Test
void toLongs_i8_signExtends() {
// Given a byte array with a negative value
byte[] data = {0, 1, -1, Byte.MIN_VALUE, Byte.MAX_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.I8, EncodingId.FASTLANES_DELTA);

// Then negatives sign-extend to 64 bits
assertThat(result).containsExactly(0L, 1L, -1L, -128L, 127L);
}

@Test
void toLongs_u8_zeroExtends() {
// Given a byte array whose high bit is set (would be negative if signed)
byte[] data = {0, 1, -1, Byte.MIN_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.U8, EncodingId.FASTLANES_DELTA);

// Then the raw byte is zero-extended into 0..255
assertThat(result).containsExactly(0L, 1L, 255L, 128L);
}

@Test
void toLongs_i16_signExtends() {
// Given
short[] data = {0, -1, Short.MIN_VALUE, Short.MAX_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.I16, EncodingId.FASTLANES_DELTA);

// Then
assertThat(result).containsExactly(0L, -1L, -32768L, 32767L);
}

@Test
void toLongs_u16_zeroExtends() {
// Given a value with the high bit set
short[] data = {-1, Short.MIN_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.U16, EncodingId.FASTLANES_DELTA);

// Then zero-extended into 0..65535
assertThat(result).containsExactly(65535L, 32768L);
}

@Test
void toLongs_i32_signExtends() {
// Given
int[] data = {0, -1, Integer.MIN_VALUE, Integer.MAX_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.I32, EncodingId.FASTLANES_DELTA);

// Then
assertThat(result).containsExactly(0L, -1L, (long) Integer.MIN_VALUE, (long) Integer.MAX_VALUE);
}

@Test
void toLongs_u32_zeroExtends() {
// Given a value with the high bit set
int[] data = {-1, Integer.MIN_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.U32, EncodingId.FASTLANES_DELTA);

// Then zero-extended into 0..2^32-1
assertThat(result).containsExactly(0xFFFF_FFFFL, 0x8000_0000L);
}

@Test
void toLongs_i64_returnsSameArrayNoCopy() {
// Given a long array
long[] data = {1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE};

// When
long[] result = PrimitiveArrays.toLongs(data, PType.I64, EncodingId.FASTLANES_DELTA);

// Then the I64/U64 path is a passthrough — no copy
assertThat(result).isSameAs(data);
}

@ParameterizedTest
@EnumSource(value = PType.class, names = {"F16", "F32", "F64"})
void toLongs_floatingPtypes_throwWithSuppliedEncodingId(PType ptype) {
// Given floating ptypes are not integer-widen targets; When/Then it throws, attributed to
// the caller's encoding id (here FrameOfReference) rather than a hardcoded one
assertThatThrownBy(() -> PrimitiveArrays.toLongs(new float[1], ptype, EncodingId.FASTLANES_FOR))
.isInstanceOf(VortexException.class)
.hasMessageContaining("unsupported ptype: " + ptype);
}

@ParameterizedTest
@EnumSource(value = PType.class, names = {"I8", "U8", "I16", "U16", "I32", "U32", "I64", "U64"})
void fromLongs_roundTripsThroughToLongs(PType ptype) {
// Given values that exercise the low bytes at each width
long[] original = {0L, 1L, 2L, 7L, 42L};

try (Arena arena = Arena.ofConfined()) {
// When written to a segment and read back at the ptype's width
MemorySegment seg = PrimitiveArrays.fromLongs(original, ptype, arena);

// Then the segment has one element per value at the expected width...
assertThat(seg.byteSize()).isEqualTo((long) original.length * ptype.byteSize());
// ...and each element round-trips (values are small + positive, so width-narrowing is lossless)
for (int i = 0; i < original.length; i++) {
assertThat(readElement(seg, ptype, i)).isEqualTo(original[i]);
}
}
}

@Test
void fromLongs_i64_writesLittleEndian() {
// Given a single value with distinct bytes
long[] original = {0x0102_0304_0506_0708L};

try (Arena arena = Arena.ofConfined()) {
// When written via the bulk I64 path
MemorySegment seg = PrimitiveArrays.fromLongs(original, PType.I64, arena);

// Then it is stored little-endian (lowest byte first)
assertThat(seg.get(ValueLayout.JAVA_BYTE, 0)).isEqualTo((byte) 0x08);
assertThat(seg.getAtIndex(PTypeIO.LE_LONG, 0)).isEqualTo(0x0102_0304_0506_0708L);
}
}

@Test
void fromLongs_narrowWidth_keepsOnlyLowBytes() {
// Given a value whose high bytes exceed the target width
long[] original = {0x1234_5678L};

try (Arena arena = Arena.ofConfined()) {
// When narrowed to I8 (1 byte/elem)
MemorySegment seg = PrimitiveArrays.fromLongs(original, PType.I8, arena);

// Then only the low byte survives
assertThat(seg.byteSize()).isEqualTo(1L);
assertThat(seg.get(ValueLayout.JAVA_BYTE, 0)).isEqualTo((byte) 0x78);
}
}

private static long readElement(MemorySegment seg, PType ptype, int i) {
return switch (ptype) {
case I8, U8 -> seg.get(ValueLayout.JAVA_BYTE, i);
case I16, U16 -> seg.getAtIndex(PTypeIO.LE_SHORT, i);
case I32, U32 -> seg.getAtIndex(PTypeIO.LE_INT, i);
case I64, U64 -> seg.getAtIndex(PTypeIO.LE_LONG, i);
default -> throw new IllegalArgumentException("not an integer ptype: " + ptype);
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import io.github.dfa1.vortex.core.PType;
import io.github.dfa1.vortex.core.VortexException;
import io.github.dfa1.vortex.encoding.EncodingId;
import io.github.dfa1.vortex.encoding.PrimitiveArrays;
import io.github.dfa1.vortex.encoding.PTypeIO;
import io.github.dfa1.vortex.proto.DeltaMetadata;
import io.github.dfa1.vortex.reader.array.Array;
Expand All @@ -14,7 +15,6 @@

import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.SegmentAllocator;
import java.lang.foreign.ValueLayout;
import java.nio.ByteBuffer;

Expand Down Expand Up @@ -107,7 +107,7 @@ public Array decode(DecodeContext ctx) {
long[] result = new long[(int) rowCount];
System.arraycopy(decoded, offset, result, 0, (int) rowCount);

MemorySegment seg = fromLongs(result, ptype, ctx.arena());
MemorySegment seg = PrimitiveArrays.fromLongs(result, ptype, ctx.arena());
return switch (ptype) {
case I64, U64 -> new MaterializedLongArray(ctx.dtype(), rowCount, seg);
case I32, U32 -> new MaterializedIntArray(ctx.dtype(), rowCount, seg);
Expand Down Expand Up @@ -179,19 +179,4 @@ private static long typeMask(PType ptype) {
return bits == 64 ? -1L : (1L << bits) - 1;
}

private static MemorySegment fromLongs(long[] longs, PType ptype, SegmentAllocator arena) {
if (ptype == PType.I64 || ptype == PType.U64) {
MemorySegment dst = arena.allocate((long) longs.length * 8);
MemorySegment.copy(MemorySegment.ofArray(longs), ValueLayout.JAVA_LONG, 0L, dst, PTypeIO.LE_LONG, 0L, longs.length);
return dst;
}
int n = longs.length;
long elemSize = ptype.byteSize();
MemorySegment seg = arena.allocate(n * elemSize);
for (int i = 0; i < n; i++) {
PTypeIO.set(seg, i * elemSize, ptype, longs[i]);
}
return seg;
}

}
Loading