Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -188,32 +188,6 @@ public String toString() {
+ '}';
}

/**
* Like {@link #from(int[])}, but throws {@link IllegalArgumentException} if the provided {@code
* projection} array contains nested projections, which are not supported by {@link
* ProjectedRow}.
*
* <p>The array represents the mapping of the fields of the original {@link DataType}, including
* nested rows. For example, {@code [[0, 2, 1], ...]} specifies to include the 2nd field of the
* 3rd field of the 1st field in the top-level row.
*
* @see Projection
* @see ProjectedRow
*/
public static ProjectedRow from(int[][] projection) throws IllegalArgumentException {
return new ProjectedRow(
Arrays.stream(projection)
.mapToInt(
arr -> {
if (arr.length != 1) {
throw new IllegalArgumentException(
"ProjectedRowData doesn't support nested projections");
}
return arr[0];
})
.toArray());
}

/**
* Create an empty {@link ProjectedRow} starting from a {@code projection} array.
*
Expand All @@ -234,17 +208,4 @@ public static ProjectedRow from(RowType readType, RowType tableType) {
.mapToInt(field -> tableType.getFieldIndexByFieldId(field.id()))
.toArray());
}

/**
* Create an empty {@link ProjectedRow} starting from a {@link Projection}.
*
* <p>Throws {@link IllegalStateException} if the provided {@code projection} array contains
* nested projections, which are not supported by {@link ProjectedRow}.
*
* @see Projection
* @see ProjectedRow
*/
public static ProjectedRow from(Projection projection) {
return new ProjectedRow(projection.toTopLevelIndexes());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.paimon.CoreOptions;
import org.apache.paimon.KeyValue;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.RowType;

import javax.annotation.Nullable;

Expand Down Expand Up @@ -73,7 +74,7 @@ public static MergeFunctionFactory<KeyValue> factory(Options options) {

private static class Factory implements MergeFunctionFactory<KeyValue> {

private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 2L;

private final boolean ignoreDelete;

Expand All @@ -82,7 +83,7 @@ private Factory(boolean ignoreDelete) {
}

@Override
public MergeFunction<KeyValue> create(@Nullable int[][] projection) {
public MergeFunction<KeyValue> create(@Nullable RowType readType) {
return new DeduplicateMergeFunction(ignoreDelete);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.paimon.CoreOptions;
import org.apache.paimon.KeyValue;
import org.apache.paimon.options.Options;
import org.apache.paimon.types.RowType;

import javax.annotation.Nullable;

Expand Down Expand Up @@ -82,15 +83,15 @@ public static MergeFunctionFactory<KeyValue> factory(Options options) {

private static class Factory implements MergeFunctionFactory<KeyValue> {

private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 2L;
private final boolean ignoreDelete;

public Factory(boolean ignoreDelete) {
this.ignoreDelete = ignoreDelete;
}

@Override
public MergeFunction<KeyValue> create(@Nullable int[][] projection) {
public MergeFunction<KeyValue> create(@Nullable RowType readType) {
return new FirstRowMergeFunction(ignoreDelete);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public static MergeFunctionFactory<KeyValue> wrap(
/** Factory to create {@link LookupMergeFunction}. */
public static class Factory implements MergeFunctionFactory<KeyValue> {

private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 2L;

private final MergeFunctionFactory<KeyValue> wrapped;
private final CoreOptions options;
Expand All @@ -168,14 +168,14 @@ public void withIOManager(@Nullable IOManager ioManager) {
}

@Override
public MergeFunction<KeyValue> create(@Nullable int[][] projection) {
public MergeFunction<KeyValue> create(@Nullable RowType readType) {
return new LookupMergeFunction(
wrapped.create(projection), options, keyType, valueType, ioManager);
wrapped.create(readType), options, keyType, valueType, ioManager);
}

@Override
public AdjustedProjection adjustProjection(@Nullable int[][] projection) {
return wrapped.adjustProjection(projection);
public RowType adjustReadType(RowType readType) {
return wrapped.adjustReadType(readType);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.paimon.mergetree.compact;

import org.apache.paimon.types.RowType;

import javax.annotation.Nullable;

import java.io.Serializable;
Expand All @@ -30,23 +32,10 @@ default MergeFunction<T> create() {
return create(null);
}

MergeFunction<T> create(@Nullable int[][] projection);

// todo: replace projection with rowType
default AdjustedProjection adjustProjection(@Nullable int[][] projection) {
return new AdjustedProjection(projection, null);
}

/** Result of adjusted projection. */
class AdjustedProjection {

@Nullable public final int[][] pushdownProjection;

@Nullable public final int[][] outerProjection;
MergeFunction<T> create(@Nullable RowType readType);

public AdjustedProjection(int[][] pushdownProjection, int[][] outerProjection) {
this.pushdownProjection = pushdownProjection;
this.outerProjection = outerProjection;
}
/** Adjust read type, if no need to adjust, return the original read type. */
default RowType adjustReadType(RowType readType) {
return readType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.apache.paimon.utils.ArrayUtils;
import org.apache.paimon.utils.FieldsComparator;
import org.apache.paimon.utils.Preconditions;
import org.apache.paimon.utils.Projection;
import org.apache.paimon.utils.UserDefinedSeqComparator;

import javax.annotation.Nullable;
Expand All @@ -51,8 +50,6 @@
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

import static org.apache.paimon.CoreOptions.FIELDS_PREFIX;
import static org.apache.paimon.CoreOptions.FIELDS_SEPARATOR;
Expand Down Expand Up @@ -379,13 +376,11 @@ public static MergeFunctionFactory<KeyValue> factory(

private static class Factory implements MergeFunctionFactory<KeyValue> {

private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 2L;

private final boolean ignoreDelete;
private final RowType rowType;

private final List<DataType> tableTypes;

private final Map<Integer, Supplier<FieldsComparator>> fieldSeqComparators;

private final Map<Integer, Supplier<FieldAggregator>> fieldAggregators;
Expand All @@ -397,7 +392,6 @@ private static class Factory implements MergeFunctionFactory<KeyValue> {
private Factory(Options options, RowType rowType, List<String> primaryKeys) {
this.ignoreDelete = options.get(CoreOptions.IGNORE_DELETE);
this.rowType = rowType;
this.tableTypes = rowType.getFieldTypes();
this.removeRecordOnDelete = options.get(PARTIAL_UPDATE_REMOVE_RECORD_ON_DELETE);
String removeRecordOnSequenceGroup =
options.get(PARTIAL_UPDATE_REMOVE_RECORD_ON_SEQUENCE_GROUP);
Expand Down Expand Up @@ -498,26 +492,29 @@ private Factory(Options options, RowType rowType, List<String> primaryKeys) {
}

@Override
public MergeFunction<KeyValue> create(@Nullable int[][] projection) {
if (projection != null) {
Map<Integer, FieldsComparator> projectedSeqComparators = new HashMap<>();
Map<Integer, FieldAggregator> projectedAggregators = new HashMap<>();
int[] projects = Projection.of(projection).toTopLevelIndexes();
public MergeFunction<KeyValue> create(@Nullable RowType readType) {
RowType targetType = readType != null ? readType : rowType;
Map<Integer, FieldsComparator> projectedSeqComparators = new HashMap<>();
Map<Integer, FieldAggregator> projectedAggregators = new HashMap<>();

if (readType != null) {
// Build index mapping from table schema to read schema
List<String> readFieldNames = readType.getFieldNames();
Map<Integer, Integer> indexMap = new HashMap<>();
List<DataField> dataFields = rowType.getFields();
List<DataType> newDataTypes = new ArrayList<>();

for (int i = 0; i < projects.length; i++) {
indexMap.put(projects[i], i);
newDataTypes.add(dataFields.get(projects[i]).type());
for (int i = 0; i < readType.getFieldCount(); i++) {
String fieldName = readFieldNames.get(i);
int oldIndex = rowType.getFieldIndex(fieldName);
if (oldIndex >= 0) {
indexMap.put(oldIndex, i);
}
}
RowType newRowType = RowType.builder().fields(newDataTypes).build();

// Remap sequence comparators
fieldSeqComparators.forEach(
(field, comparatorSupplier) -> {
FieldsComparator comparator = comparatorSupplier.get();
int newField = indexMap.getOrDefault(field, -1);
if (newField != -1) {
FieldsComparator comparator = comparatorSupplier.get();
int[] newSequenceFields =
Arrays.stream(comparator.compareFields())
.map(
Expand All @@ -532,94 +529,76 @@ public MergeFunction<KeyValue> create(@Nullable int[][] projection) {
+ "for new field. new field "
+ "index is %s",
newField));
} else {
return newIndex;
}
return newIndex;
})
.toArray();
projectedSeqComparators.put(
newField,
UserDefinedSeqComparator.create(
newRowType, newSequenceFields, true));
readType, newSequenceFields, true));
}
});
for (int i = 0; i < projects.length; i++) {
if (fieldAggregators.containsKey(projects[i])) {
projectedAggregators.put(i, fieldAggregators.get(projects[i]).get());

// Remap field aggregators
for (int oldIndex : indexMap.keySet()) {
if (fieldAggregators.containsKey(oldIndex)) {
int newIndex = indexMap.get(oldIndex);
projectedAggregators.put(newIndex, fieldAggregators.get(oldIndex).get());
}
}

List<DataType> projectedTypes = Projection.of(projection).project(tableTypes);
return new PartialUpdateMergeFunction(
createFieldGetters(projectedTypes),
ignoreDelete,
projectedSeqComparators,
projectedAggregators,
!fieldSeqComparators.isEmpty(),
removeRecordOnDelete,
sequenceGroupPartialDelete,
ArrayUtils.toPrimitiveBoolean(
projectedTypes.stream()
.map(DataType::isNullable)
.toArray(Boolean[]::new)));
} else {
Map<Integer, FieldsComparator> fieldSeqComparators = new HashMap<>();
// Use original mappings
this.fieldSeqComparators.forEach(
(f, supplier) -> fieldSeqComparators.put(f, supplier.get()));
Map<Integer, FieldAggregator> fieldAggregators = new HashMap<>();
(f, supplier) -> projectedSeqComparators.put(f, supplier.get()));
this.fieldAggregators.forEach(
(f, supplier) -> fieldAggregators.put(f, supplier.get()));
return new PartialUpdateMergeFunction(
createFieldGetters(tableTypes),
ignoreDelete,
fieldSeqComparators,
fieldAggregators,
!fieldSeqComparators.isEmpty(),
removeRecordOnDelete,
sequenceGroupPartialDelete,
ArrayUtils.toPrimitiveBoolean(
rowType.getFieldTypes().stream()
.map(DataType::isNullable)
.toArray(Boolean[]::new)));
(f, supplier) -> projectedAggregators.put(f, supplier.get()));
}

List<DataType> fieldTypes = targetType.getFieldTypes();
return new PartialUpdateMergeFunction(
createFieldGetters(fieldTypes),
ignoreDelete,
projectedSeqComparators,
projectedAggregators,
!fieldSeqComparators.isEmpty(),
removeRecordOnDelete,
sequenceGroupPartialDelete,
ArrayUtils.toPrimitiveBoolean(
fieldTypes.stream().map(DataType::isNullable).toArray(Boolean[]::new)));
}

@Override
public AdjustedProjection adjustProjection(@Nullable int[][] projection) {
public RowType adjustReadType(RowType readType) {
if (fieldSeqComparators.isEmpty()) {
return new AdjustedProjection(projection, null);
return readType;
}

if (projection == null) {
return new AdjustedProjection(null, null);
}
LinkedHashSet<Integer> extraFields = new LinkedHashSet<>();
int[] topProjects = Projection.of(projection).toTopLevelIndexes();
Set<Integer> indexSet = Arrays.stream(topProjects).boxed().collect(Collectors.toSet());
for (int index : topProjects) {
LinkedHashSet<DataField> extraFields = new LinkedHashSet<>();
List<String> readFieldNames = readType.getFieldNames();
for (DataField readField : readType.getFields()) {
int index = rowType.getFieldIndex(readField.name());
Supplier<FieldsComparator> comparatorSupplier = fieldSeqComparators.get(index);
if (comparatorSupplier == null) {
continue;
}

FieldsComparator comparator = comparatorSupplier.get();
for (int field : comparator.compareFields()) {
if (!indexSet.contains(field)) {
for (int fieldIndex : comparator.compareFields()) {
DataField field = rowType.getFields().get(fieldIndex);
if (!readFieldNames.contains(field.name())) {
extraFields.add(field);
}
}
}

int[] allProjects =
Stream.concat(Arrays.stream(topProjects).boxed(), extraFields.stream())
.mapToInt(Integer::intValue)
.toArray();
if (extraFields.isEmpty()) {
return readType;
}

int[][] pushDown = Projection.of(allProjects).toNestedIndexes();
int[][] outer =
Projection.of(IntStream.range(0, topProjects.length).toArray())
.toNestedIndexes();
return new AdjustedProjection(pushDown, outer);
List<DataField> allFields = new ArrayList<>(readType.getFields());
allFields.addAll(extraFields);
return new RowType(allFields);
}

private int requireField(String fieldName, List<String> fieldNames) {
Expand Down
Loading