From acbaa0b87d688c109924a92e2134ad3a76ad82bc Mon Sep 17 00:00:00 2001 From: Davide Angelocola Date: Wed, 24 Jun 2026 23:38:10 +0200 Subject: [PATCH] fix(reader): zone-map pruning compares in the column's type domain (#159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ScanIterator's comparator caught ClassCastException and returned 0, which canPruneChunk reads as "cannot prune". Stats decode integers as Long and floats as Float/Double, so a filter value boxed at a different width (Integer for I64, Float for F32) threw internally and silently disabled pruning — a valid, selective predicate degraded to a full scan with no signal. The comparison now keys off the *column* type, not the boxed operand: - floating column -> Double.compare; - unsigned int column -> Long.compareUnsigned (U64 stats/values store raw bits, so a value >= 2^63 is a negative Long; signed compare keeps/drops the wrong chunks). U8/U16/U32 zero-extend to a positive Long, unaffected; - signed int column -> Long.compare. Keying off the column also avoids routing an integer column through double-compare, which would lose precision past 2^53 and mis-prune. Eq/Neq previously had their own inline comparator with the same swallow; they now route through the shared one. A genuinely incomparable filter value (e.g. a String against a numeric column) now raises VortexException instead of a silent no-prune — a behaviour change, noted in the changelog. Adds DType.isUnsigned() (exhaustive over the sealed set) to classify the column. Coverage — ZoneMapPruningTest (27): BoxedWidth (Integer == Long, all six operators), Unsigned (U64 >= 2^63 keep/prune correctness), FloatWidths (F32 stat vs Double/Float filters), IntegerColumnFloatFilter (Double filter on I64 compares in the integer domain past 2^53), TypeMismatch (String throws). Plus DTypeIsUnsignedTest. Closes #159. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 10 + .../github/dfa1/vortex/core/model/DType.java | 14 + .../core/model/DTypeIsUnsignedTest.java | 38 +++ .../dfa1/vortex/reader/ScanIterator.java | 80 ++++-- .../vortex/writer/ZoneMapPruningTest.java | 248 +++++++++++++++++- 5 files changed, 364 insertions(+), 26 deletions(-) create mode 100644 core/src/test/java/io/github/dfa1/vortex/core/model/DTypeIsUnsignedTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index d48b9241..593fcc24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ All notable changes to **vortex-java** are documented here. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- `DType.isUnsigned()` — `true` for the unsigned integer primitives (`U8`–`U64`), `false` otherwise. ([#159](https://github.com/dfa1/vortex-java/issues/159)) + +### Fixed + +- Zone-map pruning now compares filter values in the *column's* type domain rather than by the boxed value's type. A predicate whose value is boxed at a different width (e.g. `Integer` on an `I64` column) — or any value on a `U64` column — previously pruned nothing and silently degraded to a full scan; it now prunes correctly (unsigned columns by unsigned order). As part of this, a filter value genuinely incomparable to its column (e.g. a `String` against a numeric column) now raises `VortexException` during the scan instead of silently disabling pruning — a behaviour change for callers that relied on the previous silent full scan. ([#159](https://github.com/dfa1/vortex-java/issues/159)) + ## [0.9.0] — 2026-06-24 Two import-only breaking changes — the `vortex-core` types moved under `io.github.dfa1.vortex.core.*`, and the no-arg `DType` factories became constants. In return, Vortex now ships with **no FlatBuffers or Protobuf runtime dependency**: the `.fbs`/`.proto` schemas compile in-house to `MemorySegment`-native Java, dropping `com.google.flatbuffers:flatbuffers-java` — the last automatic-module dependency — so a named JPMS `module-info` is viable, and the generated wire classes are prefixed so they no longer collide on your classpath (ADR 0017). diff --git a/core/src/main/java/io/github/dfa1/vortex/core/model/DType.java b/core/src/main/java/io/github/dfa1/vortex/core/model/DType.java index ecfbe940..81277e5f 100644 --- a/core/src/main/java/io/github/dfa1/vortex/core/model/DType.java +++ b/core/src/main/java/io/github/dfa1/vortex/core/model/DType.java @@ -26,6 +26,20 @@ public sealed interface DType /// @return `true` if null values are permitted boolean nullable(); + /// Returns whether this is an unsigned integer type (`U8`–`U64`). `false` for every other + /// type, including signed integers, floats, and the composite/extension types. Useful where + /// unsigned values are stored in a signed `long` (e.g. zone-map comparisons), so the caller + /// knows to use unsigned ordering. + /// + /// @return `true` if this is an unsigned-integer [Primitive] + default boolean isUnsigned() { + return switch (this) { + case Primitive(var pt, _) -> pt.isUnsigned(); + case Null _, Bool _, Decimal _, Utf8 _, Binary _, Struct _, List _, + FixedSizeList _, Extension _, Variant _ -> false; + }; + } + /// Returns a copy of this type marked nullable. Sugar over /// [#withNullable(boolean)] so call sites read as a fluent adjective: /// `DType.I64.asNullable()`. diff --git a/core/src/test/java/io/github/dfa1/vortex/core/model/DTypeIsUnsignedTest.java b/core/src/test/java/io/github/dfa1/vortex/core/model/DTypeIsUnsignedTest.java new file mode 100644 index 00000000..07f75f35 --- /dev/null +++ b/core/src/test/java/io/github/dfa1/vortex/core/model/DTypeIsUnsignedTest.java @@ -0,0 +1,38 @@ +package io.github.dfa1.vortex.core.model; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +class DTypeIsUnsignedTest { + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"U8", "U16", "U32", "U64"}) + void unsignedPrimitives_areUnsigned(PType pt) { + // Given / When / Then + assertThat(new DType.Primitive(pt, false).isUnsigned()).isTrue(); + } + + @ParameterizedTest + @EnumSource(value = PType.class, names = {"I8", "I16", "I32", "I64", "F16", "F32", "F64"}) + void signedAndFloatPrimitives_areNotUnsigned(PType pt) { + // Given / When / Then + assertThat(new DType.Primitive(pt, false).isUnsigned()).isFalse(); + } + + @Test + void nonPrimitiveTypes_areNotUnsigned() { + // Given — composite/extension types are never "unsigned", even one that wraps a U64 column + List types = List.of( + DType.BOOL, DType.UTF8, DType.BINARY, DType.NULL, DType.VARIANT, + new DType.Decimal((byte) 10, (byte) 2, false), + new DType.Struct(List.of("u"), List.of(DType.U64), false)); + + // When / Then + assertThat(types).allSatisfy(t -> assertThat(t.isUnsigned()).isFalse()); + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java index 10eed37c..7baeaf09 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/ScanIterator.java @@ -43,6 +43,7 @@ import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; @@ -85,6 +86,7 @@ public final class ScanIterator implements Iterator, AutoCloseable { private List chunks; private List projectedNames; private List projectedDtypes; + private Map columnDtypes; private int chunkIndex; private int peekedChunkIdx = -1; private long rowsReturned; @@ -182,14 +184,57 @@ private static ChunkSpec buildChunkSpec(String[] colNames, Map= 2^63 is a negative Long; an + // unsigned column must compare unsigned. U8/U16/U32 are zero-extended to a positive + // Long where signed == unsigned, so this stays correct for them too. + return column.isUnsigned() + ? Long.compareUnsigned(na.longValue(), nb.longValue()) + : Long.compare(na.longValue(), nb.longValue()); + } + // Column type unresolved (not a struct field) — fall back to a width-agnostic compare + // keyed off the operands so two valid numbers never drop into the throwing path. + if (a instanceof Double || a instanceof Float || b instanceof Double || b instanceof Float) { + return Double.compare(na.doubleValue(), nb.doubleValue()); + } + return Long.compare(na.longValue(), nb.longValue()); + } try { return ((Comparable) a).compareTo(b); - } catch (ClassCastException _) { - return 0; + } catch (ClassCastException e) { + // A genuinely incomparable filter value (e.g. a String against a numeric column) is a + // caller error — surface it instead of swallowing it into a silent no-prune. + throw new VortexException("filter value of type " + b.getClass().getSimpleName() + + " is not comparable to the column's zone-map statistic of type " + + a.getClass().getSimpleName(), e); } } + /// Returns the declared [DType] of column `col`, or `null` if the file is not a struct or has + /// no such column. Resolved once from the file's struct schema and cached; used to drive + /// zone-map comparisons by the column's true type rather than the filter value's boxing. + private DType columnDType(String col) { + if (columnDtypes == null) { + columnDtypes = new HashMap<>(); + if (file.dtype() instanceof DType.Struct struct) { + for (int i = 0; i < struct.fieldNames().size(); i++) { + columnDtypes.put(struct.fieldNames().get(i), struct.fieldTypes().get(i)); + } + } + } + return columnDtypes.get(col); + } + private static Map expandStruct(StructArray sa) { DType.Struct sd = (DType.Struct) sa.dtype(); List names = sd.fieldNames(); @@ -708,7 +753,7 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) { yield false; } Object max = readFlatStats(flat).max(); - yield max != null && compareValues(max, val) <= 0; + yield max != null && compareValues(max, val, columnDType(col)) <= 0; } case RowFilter.Gte(var col, var val) -> { Layout flat = chunk.layoutFor(col); @@ -716,7 +761,7 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) { yield false; } Object max = readFlatStats(flat).max(); - yield max != null && compareValues(max, val) < 0; + yield max != null && compareValues(max, val, columnDType(col)) < 0; } case RowFilter.Lt(var col, var val) -> { Layout flat = chunk.layoutFor(col); @@ -724,7 +769,7 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) { yield false; } Object min = readFlatStats(flat).min(); - yield min != null && compareValues(min, val) >= 0; + yield min != null && compareValues(min, val, columnDType(col)) >= 0; } case RowFilter.Lte(var col, var val) -> { Layout flat = chunk.layoutFor(col); @@ -732,7 +777,7 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) { yield false; } Object min = readFlatStats(flat).min(); - yield min != null && compareValues(min, val) > 0; + yield min != null && compareValues(min, val, columnDType(col)) > 0; } case RowFilter.Eq(var col, var val) -> { Layout flat = chunk.layoutFor(col); @@ -745,13 +790,10 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) { if (min == null || max == null) { yield false; } - try { - @SuppressWarnings("unchecked") - Comparable cv = (Comparable) val; - yield cv.compareTo(min) < 0 || cv.compareTo(max) > 0; - } catch (ClassCastException _) { - yield false; - } + // val < min || val > max → no row in this chunk can equal val. Route through the + // shared comparator so this path is width-agnostic and unsigned-aware too (#159). + DType ct = columnDType(col); + yield compareValues(val, min, ct) < 0 || compareValues(val, max, ct) > 0; } case RowFilter.Neq(var col, var val) -> { Layout flat = chunk.layoutFor(col); @@ -764,13 +806,9 @@ private boolean canPruneChunk(ChunkSpec chunk, RowFilter filter) { if (min == null || max == null) { yield false; } - try { - @SuppressWarnings("unchecked") - Comparable cv = (Comparable) val; - yield cv.compareTo(min) == 0 && cv.compareTo(max) == 0; - } catch (ClassCastException _) { - yield false; - } + // Every row equals val (min == max == val) → no row is != val. + DType ct = columnDType(col); + yield compareValues(val, min, ct) == 0 && compareValues(val, max, ct) == 0; } case RowFilter.IsNull(var col) -> { Layout flat = chunk.layoutFor(col); diff --git a/writer/src/test/java/io/github/dfa1/vortex/writer/ZoneMapPruningTest.java b/writer/src/test/java/io/github/dfa1/vortex/writer/ZoneMapPruningTest.java index 911e0810..4e6e7713 100644 --- a/writer/src/test/java/io/github/dfa1/vortex/writer/ZoneMapPruningTest.java +++ b/writer/src/test/java/io/github/dfa1/vortex/writer/ZoneMapPruningTest.java @@ -1,5 +1,6 @@ package io.github.dfa1.vortex.writer; +import io.github.dfa1.vortex.core.error.VortexException; import io.github.dfa1.vortex.core.model.DType; import io.github.dfa1.vortex.reader.Chunk; import io.github.dfa1.vortex.reader.ReadRegistry; @@ -7,10 +8,12 @@ import io.github.dfa1.vortex.reader.ScanOptions; import io.github.dfa1.vortex.reader.VortexReader; import io.github.dfa1.vortex.reader.array.LongArray; -import io.github.dfa1.vortex.reader.decode.PrimitiveEncodingDecoder; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; import java.nio.channels.FileChannel; @@ -19,8 +22,13 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.function.Function; +import java.util.stream.Stream; + +import static org.junit.jupiter.params.provider.Arguments.arguments; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; /// Round-trip tests verifying zone-map chunk pruning via RowFilter. class ZoneMapPruningTest { @@ -30,6 +38,11 @@ class ZoneMapPruningTest { List.of(DType.I64), false); + private static final DType.Struct U64_SCHEMA = new DType.Struct( + List.of("id"), + List.of(DType.U64), + false); + // Three chunks: id in [1..50], [51..100], [101..150] private static Path writeThreeChunks(Path tmp) throws IOException { Path file = tmp.resolve("three_chunks.vtx"); @@ -45,7 +58,7 @@ private static Path writeThreeChunks(Path tmp) throws IOException { /// Returns one entry per surviving chunk: its row count after filter pruning. private static List scanRowCounts(Path file, RowFilter filter) throws IOException { var opts = new ScanOptions(List.of(), filter, ScanOptions.NO_LIMIT); - var registry = primitiveRegistry(); + var registry = registry(); var rowCounts = new ArrayList(); try (var vf = VortexReader.open(file, registry); var iter = vf.scan(opts)) { @@ -62,8 +75,94 @@ private static long[] range(long from, long to) { return arr; } - private static ReadRegistry primitiveRegistry() { - return ReadRegistry.builder().register(new PrimitiveEncodingDecoder()).build(); + private static Path writeI64Chunk(Path tmp, long... values) throws IOException { + Path file = tmp.resolve("i64_one.vtx"); + try (var ch = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE); + var sut = VortexWriter.create(ch, SCHEMA, WriteOptions.defaults())) { + sut.writeChunk(Map.of("id", values)); + } + return file; + } + + // Service-loaded so any encoding the writer picks (e.g. ALP for the F32 column) decodes; the + // tests exercise zone-map pruning, not a specific decoder. + private static ReadRegistry registry() { + return ReadRegistry.builder().registerServiceLoaded().build(); + } + + private static final DType.Struct F32_SCHEMA = new DType.Struct( + List.of("v"), + List.of(DType.F32), + false); + + /// 2^63 as raw bits: `Long.MIN_VALUE` is the smallest *signed* long but 2^63 *unsigned*. + private static final long TWO_POW_63 = Long.MIN_VALUE; + + // Two U64 chunks: a small chunk [10..20] and a chunk of values >= 2^63 (negative as a signed + // long), so signed and unsigned ordering diverge. + private static Path writeU64Chunks(Path tmp) throws IOException { + Path file = tmp.resolve("u64_chunks.vtx"); + try (var ch = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE); + var sut = VortexWriter.create(ch, U64_SCHEMA, WriteOptions.defaults())) { + sut.writeChunk(Map.of("id", range(10L, 20L))); // [10..20], 11 rows + sut.writeChunk(Map.of("id", urange(TWO_POW_63 + 10, 11))); // [2^63+10 .. 2^63+20], 11 rows + } + return file; + } + + private static long[] urange(long fromBits, int n) { + long[] arr = new long[n]; + for (int i = 0; i < n; i++) { + arr[i] = fromBits + i; + } + return arr; + } + + // Three F32 chunks: v in [1..50], [51..100], [101..150]. F32 zone stats decode as Float, so a + // Double or Float filter value must still compare via the shared width-agnostic path. + private static Path writeF32Chunks(Path tmp) throws IOException { + Path file = tmp.resolve("f32_chunks.vtx"); + try (var ch = FileChannel.open(file, StandardOpenOption.CREATE, StandardOpenOption.WRITE); + var sut = VortexWriter.create(ch, F32_SCHEMA, WriteOptions.defaults())) { + sut.writeChunk(Map.of("v", f32range(1, 50))); + sut.writeChunk(Map.of("v", f32range(51, 100))); + sut.writeChunk(Map.of("v", f32range(101, 150))); + } + return file; + } + + private static float[] f32range(int from, int to) { + float[] arr = new float[to - from + 1]; + for (int i = 0; i < arr.length; i++) { + arr[i] = from + i; + } + return arr; + } + + // Per-operator filter factories (value boxed by the caller), so the parameterized groups can + // drive every comparison operator through one shared body. + private static Function gt(String col) { + return v -> RowFilter.gt(col, (Comparable) v); + } + + private static Function gte(String col) { + return v -> RowFilter.gte(col, (Comparable) v); + } + + private static Function lt(String col) { + return v -> RowFilter.lt(col, (Comparable) v); + } + + private static Function lte(String col) { + return v -> RowFilter.lte(col, (Comparable) v); + } + + private static Function eq(String col) { + return v -> RowFilter.eq(col, v); + } + + private static Function neq(String col) { + return v -> RowFilter.neq(col, v); } @Test @@ -182,7 +281,7 @@ void and_allCollectedValuesInsideRange(@TempDir Path tmp) throws IOException { private List collectMatching(Path file, ScanOptions opts, java.util.function.LongPredicate predicate) throws IOException { var result = new ArrayList(); - try (VortexReader vf = VortexReader.open(file, primitiveRegistry()); + try (VortexReader vf = VortexReader.open(file, registry()); var iter = vf.scan(opts)) { while (iter.hasNext()) { try (Chunk c = iter.next()) { @@ -199,4 +298,143 @@ private List collectMatching(Path file, ScanOptions opts, java.util.functi return result; } } + + // ── #159: width-agnostic + unsigned-aware zone-map comparator ────────────── + + /// Every operator must prune identically whether the filter value is boxed at the column's + /// natural width (`Integer`) or the stat's storage width (`Long`) — the core of #159. `Eq`/`Neq` + /// previously used their own swallow-on-mismatch comparator, so they are covered here too. + @Nested + class BoxedWidth { + + static Stream ops() { + return Stream.of( + arguments("gt 75", gt("id"), List.of(50L, 50L)), + arguments("gte 75", gte("id"), List.of(50L, 50L)), + arguments("lt 75", lt("id"), List.of(50L, 50L)), + arguments("lte 75", lte("id"), List.of(50L, 50L)), + arguments("eq 75", eq("id"), List.of(50L)), + arguments("neq 75", neq("id"), List.of(50L, 50L, 50L))); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("ops") + void integerValuePrunesLikeLong(String name, Function make, List expected, + @TempDir Path tmp) throws IOException { + // Given — id in [1..50],[51..100],[101..150] + Path file = writeThreeChunks(tmp); + + // When — the same numeric value, boxed first as Long (stat width) then as Integer + List asLong = scanRowCounts(file, make.apply(75L)); + List asInt = scanRowCounts(file, make.apply(75)); + + // Then — both prune to the expected survivors (before #159 the Integer case pruned nothing) + assertThat(asLong).containsExactlyElementsOf(expected); + assertThat(asInt).containsExactlyElementsOf(expected); + } + } + + /// `U64` stats and values store the raw 64 bits, so a value `>= 2^63` is a negative `long`. + /// Pruning must use unsigned ordering, or it silently keeps/drops the wrong chunks (#159 follow-up). + @Nested + class Unsigned { + + static Stream cases() { + // Two U64 chunks: low [10..20], high [2^63+10 .. 2^63+20]. A signed compare reads the + // high chunk's bits as negative, so it would keep/drop the wrong chunk in each case. + return Stream.of( + arguments("gte 15 keeps both", RowFilter.gte("id", 15L), List.of(11L, 11L)), + arguments("gt 2^63+15 keeps high only", RowFilter.gt("id", TWO_POW_63 + 15), List.of(11L)), + arguments("lte 15 keeps low only", RowFilter.lte("id", 15L), List.of(11L)), + arguments("eq 2^63+15 in high only", RowFilter.eq("id", TWO_POW_63 + 15), List.of(11L))); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("cases") + void prunesByUnsignedOrder(String name, RowFilter filter, List expected, @TempDir Path tmp) + throws IOException { + // Given + Path file = writeU64Chunks(tmp); + + // When + List rowCounts = scanRowCounts(file, filter); + + // Then — surviving chunks match unsigned semantics (signed compare would differ) + assertThat(rowCounts).containsExactlyElementsOf(expected); + } + } + + /// `F32` stats decode as `Float`; a `Double` or `Float` filter value must compare via the + /// width-agnostic float path rather than throw or fail to prune. + @Nested + class FloatWidths { + + static Stream values() { + // gte(75) against F32 chunks [1..50],[51..100],[101..150]: chunk 1 (max 50) pruned, + // regardless of whether the filter value is boxed as Double or Float. + return Stream.of( + arguments("Double value", 75.0), + arguments("Float value", 75.0f)); + } + + @ParameterizedTest(name = "{0} on F32 column") + @MethodSource("values") + void f32ColumnPrunesRegardlessOfFilterWidth(String name, Object value, @TempDir Path tmp) + throws IOException { + // Given + Path file = writeF32Chunks(tmp); + + // When + List rowCounts = scanRowCounts(file, RowFilter.gte("v", (Comparable) value)); + + // Then — chunk 1 (max 50) pruned + assertThat(rowCounts).containsExactly(50L, 50L); + } + } + + /// Comparison mode keys off the *column* type, not the filter value's boxing — so an integer + /// column never routes through double-compare, which would lose precision past 2^53. + @Nested + class IntegerColumnFloatFilter { + + @Test + void doubleFilterOnI64ComparesInIntegerDomain(@TempDir Path tmp) throws IOException { + // Given — a chunk whose max (2^53 + 1) is NOT exactly representable as a double + long max = (1L << 53) + 1; + Path file = writeI64Chunk(tmp, max - 2, max - 1, max); + + // When — gt with a Double threshold of 2^53; only the row 2^53+1 satisfies it + List rowCounts = scanRowCounts(file, RowFilter.gt("id", (double) (1L << 53))); + + // Then — chunk kept. A double-domain compare would round 2^53+1 down to 2^53 and wrongly + // prune the chunk, dropping the one matching row. + assertThat(rowCounts).containsExactly(3L); + } + } + + /// A filter value genuinely incomparable to the column's stat (e.g. a `String` against a numeric + /// column) is a caller error: surface it, never swallow it into a silent no-prune (#159). + @Nested + class TypeMismatch { + + static Stream ops() { + return Stream.of( + arguments("gt", gt("id")), arguments("gte", gte("id")), + arguments("lt", lt("id")), arguments("lte", lte("id")), + arguments("eq", eq("id")), arguments("neq", neq("id"))); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("ops") + void stringValueOnNumericColumn_throws(String name, Function make, + @TempDir Path tmp) throws IOException { + // Given + Path file = writeThreeChunks(tmp); + + // When / Then + assertThatThrownBy(() -> scanRowCounts(file, make.apply("not-a-number"))) + .isInstanceOf(VortexException.class) + .hasMessageContaining("not comparable"); + } + } }