Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ All notable changes to **vortex-java** are documented here.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added

- `DType.isUnsigned()` — `true` for the unsigned integer primitives (`U8`–`U64`), `false` otherwise. ([#159](https://github.com/dfa1/vortex-java/issues/159))

### Fixed

- Zone-map pruning now compares filter values in the *column's* type domain rather than by the boxed value's type. A predicate whose value is boxed at a different width (e.g. `Integer` on an `I64` column) — or any value on a `U64` column — previously pruned nothing and silently degraded to a full scan; it now prunes correctly (unsigned columns by unsigned order). As part of this, a filter value genuinely incomparable to its column (e.g. a `String` against a numeric column) now raises `VortexException` during the scan instead of silently disabling pruning — a behaviour change for callers that relied on the previous silent full scan. ([#159](https://github.com/dfa1/vortex-java/issues/159))

## [0.9.0] — 2026-06-24

Two import-only breaking changes — the `vortex-core` types moved under `io.github.dfa1.vortex.core.*`, and the no-arg `DType` factories became constants. In return, Vortex now ships with **no FlatBuffers or Protobuf runtime dependency**: the `.fbs`/`.proto` schemas compile in-house to `MemorySegment`-native Java, dropping `com.google.flatbuffers:flatbuffers-java` — the last automatic-module dependency — so a named JPMS `module-info` is viable, and the generated wire classes are prefixed so they no longer collide on your classpath (ADR 0017).
Expand Down
14 changes: 14 additions & 0 deletions core/src/main/java/io/github/dfa1/vortex/core/model/DType.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ public sealed interface DType
/// @return `true` if null values are permitted
boolean nullable();

/// Returns whether this is an unsigned integer type (`U8`–`U64`). `false` for every other
/// type, including signed integers, floats, and the composite/extension types. Useful where
/// unsigned values are stored in a signed `long` (e.g. zone-map comparisons), so the caller
/// knows to use unsigned ordering.
///
/// @return `true` if this is an unsigned-integer [Primitive]
default boolean isUnsigned() {
return switch (this) {
case Primitive(var pt, _) -> pt.isUnsigned();
case Null _, Bool _, Decimal _, Utf8 _, Binary _, Struct _, List _,
FixedSizeList _, Extension _, Variant _ -> false;
};
}

/// Returns a copy of this type marked nullable. Sugar over
/// [#withNullable(boolean)] so call sites read as a fluent adjective:
/// `DType.I64.asNullable()`.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package io.github.dfa1.vortex.core.model;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;

class DTypeIsUnsignedTest {

@ParameterizedTest
@EnumSource(value = PType.class, names = {"U8", "U16", "U32", "U64"})
void unsignedPrimitives_areUnsigned(PType pt) {
// Given / When / Then
assertThat(new DType.Primitive(pt, false).isUnsigned()).isTrue();
}

@ParameterizedTest
@EnumSource(value = PType.class, names = {"I8", "I16", "I32", "I64", "F16", "F32", "F64"})
void signedAndFloatPrimitives_areNotUnsigned(PType pt) {
// Given / When / Then
assertThat(new DType.Primitive(pt, false).isUnsigned()).isFalse();
}

@Test
void nonPrimitiveTypes_areNotUnsigned() {
// Given — composite/extension types are never "unsigned", even one that wraps a U64 column
List<DType> types = List.of(
DType.BOOL, DType.UTF8, DType.BINARY, DType.NULL, DType.VARIANT,
new DType.Decimal((byte) 10, (byte) 2, false),
new DType.Struct(List.of("u"), List.of(DType.U64), false));

// When / Then
assertThat(types).allSatisfy(t -> assertThat(t.isUnsigned()).isFalse());
}
}
80 changes: 59 additions & 21 deletions reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import java.lang.foreign.SegmentAllocator;
import java.lang.foreign.ValueLayout;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
Expand Down Expand Up @@ -85,6 +86,7 @@ public final class ScanIterator implements Iterator<Chunk>, AutoCloseable {
private List<ChunkSpec> chunks;
private List<String> projectedNames;
private List<DType> projectedDtypes;
private Map<String, DType> columnDtypes;
private int chunkIndex;
private int peekedChunkIdx = -1;
private long rowsReturned;
Expand Down Expand Up @@ -182,14 +184,57 @@ private static ChunkSpec buildChunkSpec(String[] colNames, Map<String, List<Layo
// ── Layout tree traversal ─────────────────────────────────────────────────

@SuppressWarnings("unchecked")
private static int compareValues(Object a, Object b) {
private static int compareValues(Object a, Object b, DType column) {
// Key the compare mode off the *column* type, not the boxed operand type. Stats decode
// integers as Long and floats as Float/Double, and a caller may box a filter value at the
// column's natural width (Integer for I32) or in a different width entirely. Letting the
// column decide keeps pruning width-agnostic (issue #159) without ever routing an integer
// column through double-compare (which would lose precision past 2^53 and mis-prune).
if (a instanceof Number na && b instanceof Number nb) {
if (column instanceof DType.Primitive prim) {
if (prim.ptype().isFloating()) {
return Double.compare(na.doubleValue(), nb.doubleValue());
}
// U64 stats/values store the raw 64 bits, so a value >= 2^63 is a negative Long; an
// unsigned column must compare unsigned. U8/U16/U32 are zero-extended to a positive
// Long where signed == unsigned, so this stays correct for them too.
return column.isUnsigned()
? Long.compareUnsigned(na.longValue(), nb.longValue())
: Long.compare(na.longValue(), nb.longValue());
}
// Column type unresolved (not a struct field) — fall back to a width-agnostic compare
// keyed off the operands so two valid numbers never drop into the throwing path.
if (a instanceof Double || a instanceof Float || b instanceof Double || b instanceof Float) {
return Double.compare(na.doubleValue(), nb.doubleValue());
}
return Long.compare(na.longValue(), nb.longValue());
}
try {
return ((Comparable<Object>) a).compareTo(b);
} catch (ClassCastException _) {
return 0;
} catch (ClassCastException e) {
// A genuinely incomparable filter value (e.g. a String against a numeric column) is a
// caller error — surface it instead of swallowing it into a silent no-prune.
throw new VortexException("filter value of type " + b.getClass().getSimpleName()
+ " is not comparable to the column's zone-map statistic of type "
+ a.getClass().getSimpleName(), e);
}
}

/// Returns the declared [DType] of column `col`, or `null` if the file is not a struct or has
/// no such column. Resolved once from the file's struct schema and cached; used to drive
/// zone-map comparisons by the column's true type rather than the filter value's boxing.
private DType columnDType(String col) {
if (columnDtypes == null) {
columnDtypes = new HashMap<>();
if (file.dtype() instanceof DType.Struct struct) {
for (int i = 0; i < struct.fieldNames().size(); i++) {
columnDtypes.put(struct.fieldNames().get(i), struct.fieldTypes().get(i));
}
}
}
return columnDtypes.get(col);
}

private static Map<String, Array> expandStruct(StructArray sa) {
DType.Struct sd = (DType.Struct) sa.dtype();
List<String> names = sd.fieldNames();
Expand Down Expand Up @@ -708,31 +753,31 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) {
yield false;
}
Object max = readFlatStats(flat).max();
yield max != null && compareValues(max, val) <= 0;
yield max != null && compareValues(max, val, columnDType(col)) <= 0;
}
case RowFilter.Gte(var col, var val) -> {
Layout flat = chunk.layoutFor(col);
if (flat == null) {
yield false;
}
Object max = readFlatStats(flat).max();
yield max != null && compareValues(max, val) < 0;
yield max != null && compareValues(max, val, columnDType(col)) < 0;
}
case RowFilter.Lt(var col, var val) -> {
Layout flat = chunk.layoutFor(col);
if (flat == null) {
yield false;
}
Object min = readFlatStats(flat).min();
yield min != null && compareValues(min, val) >= 0;
yield min != null && compareValues(min, val, columnDType(col)) >= 0;
}
case RowFilter.Lte(var col, var val) -> {
Layout flat = chunk.layoutFor(col);
if (flat == null) {
yield false;
}
Object min = readFlatStats(flat).min();
yield min != null && compareValues(min, val) > 0;
yield min != null && compareValues(min, val, columnDType(col)) > 0;
}
case RowFilter.Eq(var col, var val) -> {
Layout flat = chunk.layoutFor(col);
Expand All @@ -745,13 +790,10 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) {
if (min == null || max == null) {
yield false;
}
try {
@SuppressWarnings("unchecked")
Comparable<Object> cv = (Comparable<Object>) val;
yield cv.compareTo(min) < 0 || cv.compareTo(max) > 0;
} catch (ClassCastException _) {
yield false;
}
// val < min || val > max → no row in this chunk can equal val. Route through the
// shared comparator so this path is width-agnostic and unsigned-aware too (#159).
DType ct = columnDType(col);
yield compareValues(val, min, ct) < 0 || compareValues(val, max, ct) > 0;
}
case RowFilter.Neq(var col, var val) -> {
Layout flat = chunk.layoutFor(col);
Expand All @@ -764,13 +806,9 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) {
if (min == null || max == null) {
yield false;
}
try {
@SuppressWarnings("unchecked")
Comparable<Object> cv = (Comparable<Object>) val;
yield cv.compareTo(min) == 0 && cv.compareTo(max) == 0;
} catch (ClassCastException _) {
yield false;
}
// Every row equals val (min == max == val) → no row is != val.
DType ct = columnDType(col);
yield compareValues(val, min, ct) == 0 && compareValues(val, max, ct) == 0;
}
case RowFilter.IsNull(var col) -> {
Layout flat = chunk.layoutFor(col);
Expand Down
Loading
Loading