Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import io.github.dfa1.vortex.reader.decode.UnknownArrayNode;

import java.lang.foreign.MemorySegment;
import java.util.HashMap;
import java.util.Collections;
import java.util.Comparator;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.TreeMap;

/// Read-side registry: maps [EncodingId] to [EncodingDecoder] implementations.
///
Expand All @@ -25,7 +27,12 @@ public final class ReadRegistry {
private final boolean allowUnknown;

private ReadRegistry(Map<EncodingId, EncodingDecoder> decoders, boolean allowUnknown) {
this.decoders = Map.copyOf(decoders);
// Order by encoding name, mirroring WriteRegistry (Enum.compareTo is final, so a Comparator
// is required to sort by id rather than ordinal). Decode dispatch is keyed, so order is not
// load-bearing here, but a stable order keeps the two registries consistent.
var sorted = new TreeMap<EncodingId, EncodingDecoder>(Comparator.comparing(EncodingId::id));
sorted.putAll(decoders);
this.decoders = Collections.unmodifiableMap(sorted);
this.allowUnknown = allowUnknown;
}

Expand Down Expand Up @@ -136,7 +143,7 @@ private static UnknownArray decodeUnknown(DecodeContext ctx, ArrayNode node) {
/// Not thread-safe. Build once, use everywhere — the produced [ReadRegistry] is immutable.
public static final class Builder {

private final Map<EncodingId, EncodingDecoder> decoders = new HashMap<>();
private final Map<EncodingId, EncodingDecoder> decoders = new TreeMap<>();
private boolean allowUnknown = false;

private Builder() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import io.github.dfa1.vortex.encoding.EncodingId;
import io.github.dfa1.vortex.reader.decode.ArrayNode;
import io.github.dfa1.vortex.reader.decode.DecodeContext;
import io.github.dfa1.vortex.reader.decode.EncodingDecoder;
import io.github.dfa1.vortex.reader.decode.UnknownArrayNode;
import org.junit.jupiter.api.Test;

Expand All @@ -16,6 +17,8 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.BDDMockito.given;
import static org.mockito.Mockito.mock;

class ReadRegistryTest {

Expand Down Expand Up @@ -116,4 +119,25 @@ void decodeUnknownEncodingWrapsChildrenAsUnknown() {
assertThat(((UnknownArray) unknown.children()[0]).encodingId()).isEqualTo("vortex.primitive");
assertThat(sut.isAllowUnknown()).isTrue();
}

@Test
void builder_retainsAllRegisteredDecoders() {
// Given — decoders registered out of natural id order (the registry sorts into a TreeMap)
EncodingDecoder bool = decoder(EncodingId.VORTEX_BOOL);
EncodingDecoder primitive = decoder(EncodingId.VORTEX_PRIMITIVE);

// When
ReadRegistry result = ReadRegistry.builder().register(bool).register(primitive).build();

// Then — every registered decoder is retained, regardless of registration order
assertThat(result.hasDecoder(EncodingId.VORTEX_PRIMITIVE)).isTrue();
assertThat(result.hasDecoder(EncodingId.VORTEX_BOOL)).isTrue();
assertThat(result.hasDecoder(EncodingId.VORTEX_CONSTANT)).isFalse();
}

private static EncodingDecoder decoder(EncodingId id) {
EncodingDecoder decoder = mock(EncodingDecoder.class);
given(decoder.encodingId()).willReturn(id);
return decoder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
import io.github.dfa1.vortex.extension.ExtensionId;
import io.github.dfa1.vortex.writer.encode.EncodingEncoder;

import java.util.HashMap;
import java.util.Collections;
import java.util.Comparator;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.TreeMap;
import java.util.function.Function;

/// Write-side registry: maps [EncodingId] to [EncodingEncoder] implementations,
/// and [ExtensionId] to [ExtensionEncoder] implementations.
Expand All @@ -27,8 +30,21 @@ public final class WriteRegistry {

private WriteRegistry(Map<EncodingId, EncodingEncoder> encoders,
Map<ExtensionId, ExtensionEncoder> extensions) {
this.encoders = Map.copyOf(encoders);
this.extensions = Map.copyOf(extensions);
// Order by encoding name so it is stable regardless of enum declaration order. (The id
// enums sort by ordinal naturally — Enum.compareTo is final — so a Comparator is required.)
// VortexWriter.create(.., WriteRegistry) selects the first encoder whose accepts() matches
// the dtype, so iteration order is significant: a HashMap — or registration order, which
// depends on ServiceLoader's unspecified iteration and on how register* calls interleave —
// would make selection vary across runs and platforms. The order is now a pure function of
// the registered set, however it was assembled.
this.encoders = sortedByName(encoders, EncodingId::id);
this.extensions = sortedByName(extensions, ExtensionId::id);
}

private static <K, V> Map<K, V> sortedByName(Map<K, V> src, Function<K, String> name) {
var sorted = new TreeMap<K, V>(Comparator.comparing(name));
sorted.putAll(src);
return Collections.unmodifiableMap(sorted);
}

/// Loads all service-discovered [EncodingEncoder] and [ExtensionEncoder] implementations.
Expand Down Expand Up @@ -72,8 +88,8 @@ public ExtensionEncoder lookup(ExtensionId extensionId) {
/// Not thread-safe. Build once, use everywhere — the produced [WriteRegistry] is immutable.
public static final class Builder {

private final Map<EncodingId, EncodingEncoder> encoders = new HashMap<>();
private final Map<ExtensionId, ExtensionEncoder> extensions = new HashMap<>();
private final Map<EncodingId, EncodingEncoder> encoders = new TreeMap<>();
private final Map<ExtensionId, ExtensionEncoder> extensions = new TreeMap<>();

private Builder() {
}
Expand Down Expand Up @@ -104,8 +120,11 @@ public Builder register(ExtensionEncoder extension) {
return this;
}

/// Registers every [EncodingEncoder] and [ExtensionEncoder] discovered via
/// [ServiceLoader].
/// Registers every [EncodingEncoder] and [ExtensionEncoder] discovered via [ServiceLoader].
///
/// The order in which `ServiceLoader` yields providers is unspecified by the JDK, but it
/// does not matter here: [#build()] sorts the registered set by id, so the resulting
/// registry has the same deterministic order no matter how it was populated.
///
/// @return this builder, for chaining
/// @throws VortexException if a service-loaded entry collides with one already registered
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ public EncodingId encodingId() {

@Override
public boolean accepts(DType dtype) {
return dtype instanceof DType.Primitive || dtype instanceof DType.Struct;
// Carrier-dispatched, not dtype-dispatched: encode() requires a ChunkedData wrapper, which
// the dtype-based write path never produces. Returning false keeps this encoder out of
// first-match selection (VortexWriter.findEncoder and the cascade), where it would
// otherwise be handed a raw primitive array and throw ClassCastException. It stays usable
// by explicit construction / keyed lookup for callers that already hold ChunkedData.
return false;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,13 @@ void writeAndRead_columnProjection_returnsOnlyRequestedColumns(@TempDir Path tmp
}

@Test
void create_withWriteRegistry_roundTrips(@TempDir Path tmp) throws IOException {
// Given — the WriteRegistry factory overload (global dict forced off, encoders taken from
// the registry). A round-trip exercises this otherwise-untested create path end to end.
void create_withFullRegistry_roundTrips(@TempDir Path tmp) throws IOException {
// Given — create(WriteRegistry) over loadAll(), the exact combination that broke on Windows.
// With no cascade, writeSegment picks the first registered encoder whose accepts() matches
// the dtype. loadAll() pulls in every service encoder, so this is the end-to-end guard that
// a deterministic order (WriteRegistry's TreeMap) plus an honest accepts() (composite
// encoders like ChunkedEncodingEncoder no longer claim raw primitive dtypes) keep selection
// both stable across platforms and correct.
var schema = new DType.Struct(List.of("v"),
List.of(new DType.Primitive(PType.I64, false)), false);
Path file = tmp.resolve("registry.vortex");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import io.github.dfa1.vortex.writer.encode.EncodingEncoder;
import org.junit.jupiter.api.Test;

import java.util.List;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.BDDMockito.given;
Expand Down Expand Up @@ -125,4 +127,33 @@ void loadAll_discoversServiceLoadedEncoders() {
// Then
assertThat(sut.encoderMap()).isNotEmpty();
}

@Test
void build_ordersEncodersByEncodingNameRegardlessOfRegistrationSequence() {
// Given — three encoders registered out of name order ("vortex.bool" < "vortex.constant"
// < "vortex.primitive")
EncodingEncoder bool = encoder(EncodingId.VORTEX_BOOL);
EncodingEncoder primitive = encoder(EncodingId.VORTEX_PRIMITIVE);
EncodingEncoder constant = encoder(EncodingId.VORTEX_CONSTANT);

// When
WriteRegistry result = WriteRegistry.builder().register(primitive).register(bool).register(constant).build();

// Then — encoderMap iterates by encoding name no matter the registration sequence, so
// VortexWriter's first-match encoder selection is deterministic
assertThat(result.encoderMap().keySet())
.containsExactly(EncodingId.VORTEX_BOOL, EncodingId.VORTEX_CONSTANT, EncodingId.VORTEX_PRIMITIVE);
}

@Test
void loadAll_encoderOrderIsDeterministicallySortedByName() {
// Given — all service-loaded encoders
WriteRegistry registry = WriteRegistry.loadAll();

// When — the encoder names in iteration order
List<String> result = registry.encoderMap().keySet().stream().map(EncodingId::id).toList();

// Then — sorted by name, independent of ServiceLoader's unspecified iteration order
assertThat(result).isSorted();
}
}