diff --git a/docker/compose/docker-compose_hadoop340_hive2310_spark402_amd64.yml b/docker/compose/docker-compose_hadoop340_hive2310_spark402_amd64.yml new file mode 100644 index 0000000000000..0cd441eef2c56 --- /dev/null +++ b/docker/compose/docker-compose_hadoop340_hive2310_spark402_amd64.yml @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +services: + + namenode: + image: apachehudi/hudi-hadoop_3.4.0-namenode:latest + hostname: namenode + container_name: namenode + environment: + - CLUSTER_NAME=hudi_hadoop340_hive2310_spark402 + ports: + - "8020:8020" # HDFS NameNode IPC + - "9000:9000" # HDFS NameNode Client + - "9870:9870" # HDFS NameNode Web UI + env_file: + - ./hadoop.env + healthcheck: + test: ["CMD", "curl", "-f", "http://namenode:9870"] + interval: 30s + timeout: 10s + retries: 3 + + datanode1: + image: apachehudi/hudi-hadoop_3.4.0-datanode:latest + container_name: datanode1 + hostname: datanode1 + environment: + - CLUSTER_NAME=hudi_hadoop340_hive2310_spark402 + env_file: + - ./hadoop.env + ports: + - "50075:50075" + - "9864:9864" + - "50010:50010" + links: + - "namenode" + - "historyserver" + healthcheck: + test: ["CMD", "curl", "-f", "http://datanode1:9864"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - namenode + + historyserver: + image: apachehudi/hudi-hadoop_3.4.0-history:latest + hostname: historyserver + container_name: historyserver + environment: + - CLUSTER_NAME=hudi_hadoop340_hive2310_spark402 + depends_on: + - "namenode" + links: + - "namenode" + ports: + - "8188:8188" + healthcheck: + test: ["CMD", "curl", "-f", "http://historyserver:8188"] + interval: 30s + timeout: 10s + retries: 3 + env_file: + - ./hadoop.env + volumes: + - historyserver:/hadoop/yarn/timeline + + # Pure Hive 2.3.10 stack (postgres 2.3 schema -> HMS 2.3.10 -> HS2 2.3.10). + # Matches hudi-spark-bundle's compile-time Hive 2.3 client, so Hudi hive-sync + # talks to HMS natively (no Thrift get_table incompat, no sharedPrefixes hack). + # Hadoop 3.4.0 HDFS is backward-compat for the 2.8.4-based Hive client. + hive-metastore-postgresql: + image: bde2020/hive-metastore-postgresql:2.3.0 + volumes: + - hive-metastore-postgresql:/var/lib/postgresql + hostname: hive-metastore-postgresql + container_name: hive-metastore-postgresql + + hivemetastore: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.10:latest + hostname: hivemetastore + container_name: hivemetastore + links: + - "hive-metastore-postgresql" + - "namenode" + env_file: + - ./hadoop.env + command: /opt/hive/bin/hive --service metastore + environment: + - "SERVICE_PRECONDITION=namenode:9870 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + healthcheck: + test: ["CMD", "nc", "-z", "hivemetastore", "9083"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - "hive-metastore-postgresql" + - "namenode" + + hiveserver: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.10:latest + hostname: hiveserver + container_name: hiveserver + env_file: + - ./hadoop.env + environment: + - SERVICE_PRECONDITION=hivemetastore:9083 + ports: + - "10000:10000" + depends_on: + - "hivemetastore" + links: + - "hivemetastore" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + zookeeper: + image: 'bitnamilegacy/zookeeper:3.6.4' + hostname: zookeeper + container_name: zookeeper + ports: + - "2181:2181" + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + + kafka: + image: 'bitnamilegacy/kafka:3.4.1' + hostname: kafkabroker + container_name: kafkabroker + ports: + - "9092:9092" + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - ALLOW_PLAINTEXT_LISTENER=yes + + sparkmaster: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkmaster_4.0.2:latest + hostname: sparkmaster + container_name: sparkmaster + env_file: + - ./hadoop.env + ports: + - "8080:8080" + - "7077:7077" + - "8888:8888" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + - ./notebooks:/opt/workspace/notebooks + environment: + - INIT_DAEMON_STEP=setup_spark + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + + spark-worker-1: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkworker_4.0.2:latest + hostname: spark-worker-1 + container_name: spark-worker-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - "8081:8081" + environment: + - SPARK_MASTER=spark://sparkmaster:7077 + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + + adhoc-1: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkadhoc_4.0.2:latest + hostname: adhoc-1 + container_name: adhoc-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - '4040:4040' + environment: + - SPARK_MASTER=spark://sparkmaster:7077 + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + adhoc-2: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkadhoc_4.0.2:latest + hostname: adhoc-2 + container_name: adhoc-2 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + environment: + - SPARK_MASTER=spark://sparkmaster:7077 + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + minio: + image: 'minio/minio:latest' + hostname: minio + container_name: minio + ports: + - 9090:9090 # server address + - 9091:9091 # console address + volumes: + - minio-data:/data + environment: + - MINIO_ACCESS_KEY=minio + - MINIO_SECRET_KEY=minio123 + - MINIO_DOMAIN=minio + command: server --address ":9090" --console-address ":9091" /data + + mc: + image: minio/mc + container_name: mc + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9090 minio minio123 --api S3v4) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force minio/warehouse; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null + " + depends_on: + - minio + +volumes: + namenode: + historyserver: + hive-metastore-postgresql: + minio-data: + +networks: + default: + name: hudi diff --git a/docker/compose/docker-compose_hadoop340_hive2310_spark402_arm64.yml b/docker/compose/docker-compose_hadoop340_hive2310_spark402_arm64.yml new file mode 100644 index 0000000000000..edc1a36bd28ac --- /dev/null +++ b/docker/compose/docker-compose_hadoop340_hive2310_spark402_arm64.yml @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +services: + + namenode: + image: apachehudi/hudi-hadoop_3.4.0-namenode:latest + hostname: namenode + container_name: namenode + environment: + - CLUSTER_NAME=hudi_hadoop340_hive2310_spark402 + ports: + - "8020:8020" # HDFS NameNode IPC + - "9000:9000" # HDFS NameNode Client + - "9870:9870" # HDFS NameNode Web UI + env_file: + - ./hadoop.env + healthcheck: + test: ["CMD", "curl", "-f", "http://namenode:9870"] + interval: 30s + timeout: 10s + retries: 3 + + datanode1: + image: apachehudi/hudi-hadoop_3.4.0-datanode:latest + container_name: datanode1 + hostname: datanode1 + environment: + - CLUSTER_NAME=hudi_hadoop340_hive2310_spark402 + env_file: + - ./hadoop.env + ports: + - "50075:50075" + - "9864:9864" + - "50010:50010" + links: + - "namenode" + - "historyserver" + healthcheck: + test: ["CMD", "curl", "-f", "http://datanode1:9864"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - namenode + + historyserver: + image: apachehudi/hudi-hadoop_3.4.0-history:latest + hostname: historyserver + container_name: historyserver + environment: + - CLUSTER_NAME=hudi_hadoop340_hive2310_spark402 + depends_on: + - "namenode" + links: + - "namenode" + ports: + - "8188:8188" + healthcheck: + test: ["CMD", "curl", "-f", "http://historyserver:8188"] + interval: 30s + timeout: 10s + retries: 3 + env_file: + - ./hadoop.env + volumes: + - historyserver:/hadoop/yarn/timeline + + # Pure Hive 2.3.10 stack (postgres 2.3 schema -> HMS 2.3.10 → HS2 2.3.10). + # Matches hudi-spark-bundle's compile-time Hive 2.3 client, so Hudi hive-sync + # talks to HMS natively (no Thrift get_table incompat, no sharedPrefixes hack). + # Hadoop 3.4.0 HDFS is backward-compat for the 2.8.4-based Hive client. + hive-metastore-postgresql: + image: bde2020/hive-metastore-postgresql:2.3.0 + volumes: + - hive-metastore-postgresql:/var/lib/postgresql + hostname: hive-metastore-postgresql + container_name: hive-metastore-postgresql + + hivemetastore: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.10:latest + hostname: hivemetastore + container_name: hivemetastore + links: + - "hive-metastore-postgresql" + - "namenode" + env_file: + - ./hadoop.env + command: /opt/hive/bin/hive --service metastore + environment: + - "SERVICE_PRECONDITION=namenode:9870 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + healthcheck: + test: ["CMD", "nc", "-z", "hivemetastore", "9083"] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - "hive-metastore-postgresql" + - "namenode" + + hiveserver: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.10:latest + hostname: hiveserver + container_name: hiveserver + env_file: + - ./hadoop.env + environment: + - SERVICE_PRECONDITION=hivemetastore:9083 + ports: + - "10000:10000" + depends_on: + - "hivemetastore" + links: + - "hivemetastore" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + zookeeper: + image: 'bitnamilegacy/zookeeper:3.6.4' + hostname: zookeeper + container_name: zookeeper + ports: + - "2181:2181" + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + + kafka: + image: 'bitnamilegacy/kafka:3.4.1' + hostname: kafkabroker + container_name: kafkabroker + ports: + - "9092:9092" + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - ALLOW_PLAINTEXT_LISTENER=yes + + sparkmaster: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkmaster_4.0.2:latest + hostname: sparkmaster + container_name: sparkmaster + env_file: + - ./hadoop.env + ports: + - "8080:8080" + - "7077:7077" + - "8888:8888" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + - ./notebooks:/opt/workspace/notebooks + environment: + - INIT_DAEMON_STEP=setup_spark + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + + spark-worker-1: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkworker_4.0.2:latest + hostname: spark-worker-1 + container_name: spark-worker-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - "8081:8081" + environment: + - SPARK_MASTER=spark://sparkmaster:7077 + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + + adhoc-1: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkadhoc_4.0.2:latest + hostname: adhoc-1 + container_name: adhoc-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - '4040:4040' + environment: + - SPARK_MASTER=spark://sparkmaster:7077 + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + adhoc-2: + image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkadhoc_4.0.2:latest + hostname: adhoc-2 + container_name: adhoc-2 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + environment: + - SPARK_MASTER=spark://sparkmaster:7077 + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + minio: + image: 'minio/minio:latest' + hostname: minio + container_name: minio + ports: + - 9090:9090 # server address + - 9091:9091 # console address + volumes: + - minio-data:/data + environment: + - MINIO_ACCESS_KEY=minio + - MINIO_SECRET_KEY=minio123 + - MINIO_DOMAIN=minio + command: server --address ":9090" --console-address ":9091" /data + + mc: + image: minio/mc + container_name: mc + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc alias set minio http://minio:9090 minio minio123 --api S3v4) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force minio/warehouse; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null + " + depends_on: + - minio + +volumes: + namenode: + historyserver: + hive-metastore-postgresql: + minio-data: + +networks: + default: + name: hudi diff --git a/docker/hoodie/hadoop/base/entrypoint.sh b/docker/hoodie/hadoop/base/entrypoint.sh index 7c26f29f66886..b8e6aa164347d 100644 --- a/docker/hoodie/hadoop/base/entrypoint.sh +++ b/docker/hoodie/hadoop/base/entrypoint.sh @@ -74,7 +74,6 @@ if [ "$MULTIHOMED_NETWORK" = "1" ]; then # YARN addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0 addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 - addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0 # MAPRED diff --git a/docker/hoodie/hadoop/base_java11/entrypoint.sh b/docker/hoodie/hadoop/base_java11/entrypoint.sh index 7c26f29f66886..b8e6aa164347d 100644 --- a/docker/hoodie/hadoop/base_java11/entrypoint.sh +++ b/docker/hoodie/hadoop/base_java11/entrypoint.sh @@ -74,7 +74,6 @@ if [ "$MULTIHOMED_NETWORK" = "1" ]; then # YARN addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0 addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 - addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0 # MAPRED diff --git a/docker/hoodie/hadoop/base_java17/entrypoint.sh b/docker/hoodie/hadoop/base_java17/entrypoint.sh index 7c26f29f66886..b8e6aa164347d 100644 --- a/docker/hoodie/hadoop/base_java17/entrypoint.sh +++ b/docker/hoodie/hadoop/base_java17/entrypoint.sh @@ -74,7 +74,6 @@ if [ "$MULTIHOMED_NETWORK" = "1" ]; then # YARN addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0 addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 - addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0 # MAPRED diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index ccd09675fb51e..faf015a9aaf58 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -29,7 +29,6 @@ import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.CustomizedThreadFactory; import org.apache.hudi.common.util.HoodieTimer; -import org.apache.hudi.common.util.MapUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.config.GlueCatalogSyncClientConfig; import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; @@ -105,8 +104,8 @@ import java.util.stream.Collectors; import static org.apache.hudi.common.fs.FSUtils.s3aToS3; -import static org.apache.hudi.common.util.MapUtils.containsAll; -import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty; +import static org.apache.hudi.common.util.CollectionUtils.containsAll; +import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty; import static org.apache.hudi.config.GlueCatalogSyncClientConfig.ALL_PARTITIONS_READ_PARALLELISM; import static org.apache.hudi.config.GlueCatalogSyncClientConfig.CHANGED_PARTITIONS_READ_PARALLELISM; import static org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING; @@ -1016,7 +1015,7 @@ public String generatePushDownFilter(List writtenPartitions, List serdeProperties, boolean useRealtimeFormat) { - if (MapUtils.isNullOrEmpty(serdeProperties)) { + if (isNullOrEmpty(serdeProperties)) { return false; } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java index 2566812e05a7c..17106d8d940e5 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java @@ -1253,46 +1253,51 @@ public boolean rollback(final String commitInstantTime, Option commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream() .filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime)) .findFirst()); - Option rollbackPlanOption; - String rollbackInstantTime; - if (pendingRollbackInfo.isPresent()) { - rollbackPlanOption = Option.of(pendingRollbackInfo.get().getRollbackPlan()); - rollbackInstantTime = pendingRollbackInfo.get().getRollbackInstant().requestedTime(); - } else { - if (commitInstantOpt.isEmpty()) { - log.error("Cannot find instant {} in the timeline of table {} for rollback", commitInstantTime, config.getBasePath()); + + // ---- SCHEDULE PHASE ---- + // Determines which rollback plan to use and creates a new one if necessary. + Option>> scheduleResult = + resolveOrScheduleRollback(table, commitInstantTime, commitInstantOpt, pendingRollbackInfo, suppliedRollbackInstantTime, skipLocking); + if (scheduleResult.isEmpty()) { + return false; + } + Option rollbackInstantOpt = Option.of(scheduleResult.get().getLeft()); + Option rollbackPlanOption = scheduleResult.get().getRight(); + + // ---- EXECUTION PHASE ---- + boolean isMultiWriter = config.getWriteConcurrencyMode().supportsMultiWriter(); + if (rollbackPlanOption.isPresent()) { + if (isMultiWriter && !acquireRollbackHeartbeatIfMultiWriter(table, rollbackInstantOpt)) { return false; } - if (!skipLocking) { - txnManager.beginStateChange(Option.empty(), Option.empty()); - } + try { - rollbackInstantTime = suppliedRollbackInstantTime.orElseGet(() -> createNewInstantTime(false)); - rollbackPlanOption = table.scheduleRollback(context, rollbackInstantTime, commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers(), false); + // Execute rollback — no lock held during this operation. + + // There can be a case where the inflight rollback failed after the instant files + // are deleted for commitInstantTime, so that commitInstantOpt is empty as it is + // not present in the timeline. In such a case, the hoodie instant instance + // is reconstructed to allow the rollback to be reattempted, and the deleteInstants + // is set to false since they are already deleted. + HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent() + ? table.rollback(context, rollbackInstantOpt.get().requestedTime(), commitInstantOpt.get(), true, skipLocking) + : table.rollback(context, rollbackInstantOpt.get().requestedTime(), table.getMetaClient().createNewInstant( + HoodieInstant.State.INFLIGHT, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime), + false, skipLocking); + if (timerContext != null) { + long durationInMs = metrics.getDurationInMs(timerContext.stop()); + metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted()); + } + return true; } finally { - if (!skipLocking) { - txnManager.endStateChange(Option.empty()); + if (isMultiWriter) { + try { + heartbeatClient.stop(rollbackInstantOpt.get().requestedTime()); + } catch (Exception e) { + log.warn("Failed to stop heartbeat for rollback instant {}", rollbackInstantOpt.get().requestedTime(), e); + } } } - } - - if (rollbackPlanOption.isPresent()) { - // There can be a case where the inflight rollback failed after the instant files - // are deleted for commitInstantTime, so that commitInstantOpt is empty as it is - // not present in the timeline. In such a case, the hoodie instant instance - // is reconstructed to allow the rollback to be reattempted, and the deleteInstants - // is set to false since they are already deleted. - // Execute rollback - HoodieRollbackMetadata rollbackMetadata = commitInstantOpt.isPresent() - ? table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true, skipLocking) - : table.rollback(context, rollbackInstantTime, table.getMetaClient().createNewInstant( - HoodieInstant.State.INFLIGHT, rollbackPlanOption.get().getInstantToRollback().getAction(), commitInstantTime), - false, skipLocking); - if (timerContext != null) { - long durationInMs = metrics.getDurationInMs(timerContext.stop()); - metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted()); - } - return true; } else { throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime); } @@ -1302,6 +1307,85 @@ public boolean rollback(final String commitInstantTime, Option rollbackInstantOpt) throws IOException { + try { + txnManager.beginStateChange(rollbackInstantOpt, txnManager.getLastCompletedTransactionOwner()); + if (!this.heartbeatClient.isHeartbeatExpired(rollbackInstantOpt.get().requestedTime())) { + LOG.error("Rollback heartbeat already exists for instant {}", rollbackInstantOpt.get().requestedTime()); + return false; + } + if (table.getMetaClient().reloadActiveTimeline().getRollbackTimeline().filterCompletedInstants().getInstantsAsStream() + .anyMatch(instant -> EQUALS.test(instant.requestedTime(), rollbackInstantOpt.get().requestedTime()))) { + LOG.info("Requested rollback instant {} is already completed in the active timeline", rollbackInstantOpt.get().requestedTime()); + return false; + } + + this.heartbeatClient.start(rollbackInstantOpt.get().requestedTime()); + return true; + } finally { + txnManager.endStateChange(rollbackInstantOpt); + } + } + + /** + * Resolves an existing pending rollback or schedules a new one for the given commit instant. + * + * @return Option containing the rollback instant and plan pair, or empty if the commit instant + * is no longer present on the timeline (indicating no rollback is needed). + */ + private Option>> resolveOrScheduleRollback( + HoodieTable table, String commitInstantTime, Option commitInstantOpt, + Option pendingRollbackInfo, Option suppliedRollbackInstantTime, + boolean skipLocking) { + if (pendingRollbackInfo.isPresent()) { + // Case 1: caller already resolved a pending rollback — re-use it without taking a lock. + return Option.of(Pair.of(pendingRollbackInfo.get().getRollbackInstant(), + Option.of(pendingRollbackInfo.get().getRollbackPlan()))); + } + + // Case 2: no pending rollback supplied — reload the timeline under lock to get the latest view. + if (!skipLocking) { + txnManager.beginStateChange(Option.empty(), Option.empty()); + } + try { + if (config.shouldAvoidDuplicateRollbackPlan()) { + // Check if another writer already scheduled a rollback for this instant to avoid duplicates. + table.getMetaClient().reloadActiveTimeline(); + Option pendingRollbackOpt = getPendingRollbackInfo(table.getMetaClient(), commitInstantTime); + if (pendingRollbackOpt.isPresent()) { + // Case 2a: a concurrent writer already scheduled the rollback — re-use it. + return Option.of(Pair.of(pendingRollbackOpt.get().getRollbackInstant(), + Option.of(pendingRollbackOpt.get().getRollbackPlan()))); + } + commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstantsAsStream() + .filter(instant -> EQUALS.test(instant.requestedTime(), commitInstantTime)) + .findFirst()); + } + if (commitInstantOpt.isEmpty()) { + log.error("Cannot find instant {} in the timeline of table {} for rollback", commitInstantTime, config.getBasePath()); + return Option.empty(); + } + // Case 2b: no pending rollback exists — schedule one now. + // Refresh commitInstantOpt from the reloaded timeline. + String newRollbackInstantTime = suppliedRollbackInstantTime.orElseGet(() -> createNewInstantTime(false)); + HoodieInstant rollbackInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.ROLLBACK_ACTION, newRollbackInstantTime, + table.getMetaClient().getTimelineLayout().getInstantComparator().requestedTimeOrderedComparator()); + Option rollbackPlan = table.scheduleRollback(context, newRollbackInstantTime, commitInstantOpt.get(), + false, config.shouldRollbackUsingMarkers(), false); + return Option.of(Pair.of(rollbackInstant, rollbackPlan)); + } finally { + if (!skipLocking) { + txnManager.endStateChange(Option.empty()); + } + } + } + /** * Main API to rollback failed bootstrap. */ diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v1/TimelineArchiverV1.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v1/TimelineArchiverV1.java index e693a1c2e7fcf..d518ac5525dd6 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v1/TimelineArchiverV1.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/timeline/versioning/v1/TimelineArchiverV1.java @@ -20,6 +20,7 @@ package org.apache.hudi.client.timeline.versioning.v1; import org.apache.hudi.avro.model.HoodieArchivedMetaEntry; +import org.apache.hudi.avro.model.HoodieCleanMetadata; import org.apache.hudi.client.timeline.HoodieTimelineArchiver; import org.apache.hudi.client.transaction.TransactionManager; import org.apache.hudi.client.utils.ArchivalMetrics; @@ -53,6 +54,7 @@ import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieCommitException; import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.metadata.HoodieTableMetadata; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.table.HoodieTable; @@ -271,7 +273,31 @@ private Stream getCommitInstantsToArchive() throws IOException { Option oldestInstantToRetainForClustering = ClusteringUtils.getEarliestInstantToRetainForClustering(table.getActiveTimeline(), table.getMetaClient(), config.getCleanerPolicy()); + // If enabled, block archival based on ECTR from the last completed clean to ensure we don't archive + // commits that have data files that haven't been cleaned yet. + Option oldestInstantToRetainForClean = Option.empty(); + if (config.shouldBlockArchivalOnCleanECTR()) { + Option lastCleanInstant = table.getCleanTimeline().filterCompletedInstants().lastInstant(); + if (lastCleanInstant.isPresent()) { + try { + HoodieCleanMetadata cleanMetadata = + table.getActiveTimeline().readCleanMetadata(lastCleanInstant.get()); + if (cleanMetadata.getEarliestCommitToRetain() != null + && !cleanMetadata.getEarliestCommitToRetain().trim().isEmpty()) { + oldestInstantToRetainForClean = commitTimeline.findInstantsAfterOrEquals( + cleanMetadata.getEarliestCommitToRetain()).firstInstant(); + log.info("Blocking archival based on earliest commit to retain {} from last clean {}. Oldest to retain is {}", + cleanMetadata.getEarliestCommitToRetain(), lastCleanInstant.get().requestedTime(), oldestInstantToRetainForClean.map(instant -> instant).orElse(null)); + } + } catch (IOException e) { + log.warn("Failed to read clean metadata for {}", lastCleanInstant.get(), e); + throw new HoodieIOException("Failed to read clean metadata for " + lastCleanInstant.get(), e); + } + } + } + // Actually do the commits + Option finalOldestInstantToRetainForClean = oldestInstantToRetainForClean; Stream instantToArchiveStream = commitTimeline.getInstantsAsStream() .filter(s -> { if (config.shouldArchiveBeyondSavepoint()) { @@ -297,6 +323,10 @@ private Stream getCommitInstantsToArchive() throws IOException { oldestInstantToRetainForClustering.map(instantToRetain -> compareTimestamps(s.requestedTime(), LESSER_THAN, instantToRetain.requestedTime())) .orElse(true) + ).filter(s -> + finalOldestInstantToRetainForClean.map(instantToRetain -> + compareTimestamps(s.requestedTime(), LESSER_THAN, instantToRetain.requestedTime())) + .orElse(true) ); return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep); } else { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java index 18186476aba7e..fa7fde5175083 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/FileSystemBasedLockProvider.java @@ -103,6 +103,17 @@ public void close() { storage.deleteFile(this.lockFile); } catch (IOException e) { throw new HoodieLockException(generateLogStatement(LockState.FAILED_TO_RELEASE), e); + } finally { + try { + // HoodieHadoopStorage.close() is currently a no-op since Hadoop FileSystem + // instances are shared within the JVM process lifecycle and cannot be + // individually closed. This call is retained for HoodieStorage interface + // contract correctness and to support future storage backends that may + // implement close(). + storage.close(); + } catch (IOException closeEx) { + log.warn("Failed to close HoodieStorage", closeEx); + } } } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java index 1b1e3ba5c3917..8854c87edeaba 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieArchivalConfig.java @@ -114,6 +114,17 @@ public class HoodieArchivalConfig extends HoodieConfig { .markAdvanced() .withDocumentation("Number of timeline manifest versions to retain."); + public static final ConfigProperty BLOCK_ARCHIVAL_ON_LATEST_CLEAN_ECTR = ConfigProperty + .key("hoodie.archive.block.on.latest.clean.ectr") + .defaultValue(false) + .markAdvanced() + .sinceVersion("1.2.0") + .withDocumentation("If enabled, archival will not archive commits beyond the Earliest Commit To Retain (ECTR) " + + "from the last completed clean. ECTR represents the oldest commit whose data files are still needed by " + + "the table and have not yet been cleaned up. Blocking archival at this point ensures that timeline metadata " + + "is not removed for commits whose data files still exist on storage, preventing inconsistencies between " + + "the timeline and the actual data."); + /** * @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead */ @@ -205,6 +216,11 @@ public Builder withArchiveBeyondSavepoint(boolean archiveBeyondSavepoint) { return this; } + public Builder withBlockArchivalOnCleanECTR(boolean blockArchivalOnCleanECTR) { + archivalConfig.setValue(BLOCK_ARCHIVAL_ON_LATEST_CLEAN_ECTR, String.valueOf(blockArchivalOnCleanECTR)); + return this; + } + public HoodieArchivalConfig build() { archivalConfig.setDefaults(HoodieArchivalConfig.class.getName()); return archivalConfig; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java index 0565f8f9f44cb..ee090c880af4b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java @@ -250,6 +250,16 @@ public class HoodieCleanConfig extends HoodieConfig { .markAdvanced() .withDocumentation("Maximum number of commits to clean in one clean commit. Applicable only when the clean policy is based on KEEP_LATEST_COMMITS or KEEP_LATEST_HOURS"); + public static final ConfigProperty INTERVAL_TO_CREATE_EMPTY_CLEAN_HOURS = ConfigProperty + .key("hoodie.write.empty.clean.interval.hours") + .defaultValue(-1L) + .markAdvanced() + .withDocumentation("In some cases empty clean commit needs to be created to ensure the clean planner " + + "does not look through entire dataset if there are no clean plans. This is possible for append-only " + + "dataset. Also, for these datasets we cannot ignore clean completely since in the future there could " + + "be upsert or replace operations. By creating empty clean commit, earliest_commit_to_retain value " + + "will be updated so that now clean planner can only check for partitions that are modified after the " + + "last empty clean's earliest_commit_toRetain value thereby optimizing the clean planning"); /** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */ @Deprecated @@ -426,6 +436,11 @@ public HoodieCleanConfig.Builder withMaxCommitsToClean(long maxCommitsToClean) { return this; } + public HoodieCleanConfig.Builder withIntervalToCreateEmptyCleanHours(long emptyCleanIntervalHours) { + cleanConfig.setValue(INTERVAL_TO_CREATE_EMPTY_CLEAN_HOURS, String.valueOf(emptyCleanIntervalHours)); + return this; + } + public HoodieCleanConfig build() { cleanConfig.setDefaults(HoodieCleanConfig.class.getName()); HoodieCleaningPolicy.valueOf(cleanConfig.getString(CLEANER_POLICY)); @@ -434,6 +449,10 @@ public HoodieCleanConfig build() { if (maxCommitsToClean < 1) { throw new IllegalArgumentException(MAX_COMMITS_TO_CLEAN.key() + " must be >= 1, but was " + maxCommitsToClean); } + long emptyCleanIntervalHours = cleanConfig.getLong(INTERVAL_TO_CREATE_EMPTY_CLEAN_HOURS); + if (emptyCleanIntervalHours == 0 || emptyCleanIntervalHours < -1) { + throw new IllegalArgumentException(INTERVAL_TO_CREATE_EMPTY_CLEAN_HOURS.key() + " must be -1 (disabled) or >= 1, but was " + emptyCleanIntervalHours); + } return cleanConfig; } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index e0d2e1906fce8..5df834121bf90 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -300,6 +300,14 @@ public class HoodieWriteConfig extends HoodieConfig { .withDocumentation("Enables a more efficient mechanism for rollbacks based on the marker files generated " + "during the writes. Turned on by default."); + public static final ConfigProperty ROLLBACK_AVOID_DUPLICATE_PLAN = ConfigProperty + .key("hoodie.rollback.avoid.duplicate.plan") + .defaultValue("false") + .markAdvanced() + .withDocumentation("When enabled in multi-writer mode, before scheduling a new rollback plan, the writer reloads " + + "the timeline under lock to check if another writer already scheduled one for the same failed commit. " + + "This avoids duplicate rollback instants and uses heartbeats to ensure only one writer executes the rollback at a time."); + public static final ConfigProperty FAIL_JOB_ON_DUPLICATE_DATA_FILE_DETECTION = ConfigProperty .key("hoodie.fail.job.on.duplicate.data.file.detection") .defaultValue("false") @@ -1601,6 +1609,10 @@ public boolean shouldRollbackUsingMarkers() { return getBoolean(ROLLBACK_USING_MARKERS_ENABLE); } + public boolean shouldAvoidDuplicateRollbackPlan() { + return getBoolean(ROLLBACK_AVOID_DUPLICATE_PLAN) && getWriteConcurrencyMode().supportsMultiWriter(); + } + public boolean enableComplexKeygenValidation() { return getBoolean(ENABLE_COMPLEX_KEYGEN_VALIDATION); } @@ -1865,6 +1877,10 @@ public boolean isAutoClean() { return getBoolean(HoodieCleanConfig.AUTO_CLEAN); } + public long getIntervalToCreateEmptyCleanHours() { + return getLong(HoodieCleanConfig.INTERVAL_TO_CREATE_EMPTY_CLEAN_HOURS); + } + public boolean shouldArchiveBeyondSavepoint() { return getBooleanOrDefault(HoodieArchivalConfig.ARCHIVE_BEYOND_SAVEPOINT); } @@ -2002,6 +2018,10 @@ public int getCommitArchivalBatchSize() { return getInt(HoodieArchivalConfig.COMMITS_ARCHIVAL_BATCH_SIZE); } + public boolean shouldBlockArchivalOnCleanECTR() { + return getBoolean(HoodieArchivalConfig.BLOCK_ARCHIVAL_ON_LATEST_CLEAN_ECTR); + } + public Boolean shouldCleanBootstrapBaseFile() { return getBoolean(HoodieCleanConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLE); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java index 64262ab673a29..3fad30b22bdf0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java @@ -45,6 +45,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -52,6 +53,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.apache.hudi.common.util.CleanerUtils.CLEAN_METADATA_VERSION_2; import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty; @Slf4j @@ -134,9 +136,9 @@ private static Stream> deleteFilesFunc(Iterator * @throws IllegalArgumentException if unknown cleaning policy is provided */ List clean(HoodieEngineContext context, HoodieCleanerPlan cleanerPlan) { - int cleanerParallelism = Math.min( + int cleanerParallelism = Math.max(1, Math.min( cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).sum(), - config.getCleanerParallelism()); + config.getCleanerParallelism())); log.info("Using cleanerParallelism: {}", cleanerParallelism); context.setJobStatus(this.getClass().getSimpleName(), "Perform cleaning of table: " + config.getTableName()); @@ -155,7 +157,7 @@ List clean(HoodieEngineContext context, HoodieCleanerPlan clean List partitionsToBeDeleted = table.getMetaClient().getTableConfig().isTablePartitioned() && cleanerPlan.getPartitionsToBeDeleted() != null ? cleanerPlan.getPartitionsToBeDeleted() - : new ArrayList<>(); + : Collections.emptyList(); partitionsToBeDeleted.forEach(entry -> { if (!isNullOrEmpty(entry)) { deleteFileAndGetResult(table.getStorage(), table.getMetaClient().getBasePath() + "/" + entry); @@ -213,17 +215,18 @@ private HoodieCleanMetadata runClean(HoodieTable table, HoodieInstan } List cleanStats = clean(context, cleanerPlan); + table.getMetaClient().reloadActiveTimeline(); + HoodieCleanMetadata metadata; if (cleanStats.isEmpty()) { - return HoodieCleanMetadata.newBuilder().build(); + metadata = createEmptyCleanMetadata(cleanerPlan, inflightInstant, timer.endTimer()); + } else { + metadata = CleanerUtils.convertCleanMetadata( + inflightInstant.requestedTime(), + Option.of(timer.endTimer()), + cleanStats, + cleanerPlan.getExtraMetadata() + ); } - - table.getMetaClient().reloadActiveTimeline(); - HoodieCleanMetadata metadata = CleanerUtils.convertCleanMetadata( - inflightInstant.requestedTime(), - Option.of(timer.endTimer()), - cleanStats, - cleanerPlan.getExtraMetadata() - ); this.txnManager.beginStateChange(Option.of(inflightInstant), Option.empty()); writeTableMetadata(metadata, inflightInstant.requestedTime()); table.getActiveTimeline().transitionCleanInflightToComplete( @@ -238,6 +241,23 @@ private HoodieCleanMetadata runClean(HoodieTable table, HoodieInstan } } + private static HoodieCleanMetadata createEmptyCleanMetadata(HoodieCleanerPlan cleanerPlan, HoodieInstant inflightInstant, long timeTakenMillis) { + ValidationUtils.checkArgument(cleanerPlan.getEarliestInstantToRetain() != null, "For empty cleans, earliest instant to retain can never be null"); + HoodieCleanMetadata.Builder cleanMetadataBuilder = HoodieCleanMetadata.newBuilder() + .setStartCleanTime(inflightInstant.requestedTime()) + .setTimeTakenInMillis(timeTakenMillis) + .setTotalFilesDeleted(0) + .setLastCompletedCommitTimestamp(cleanerPlan.getLastCompletedCommitTimestamp()) + .setVersion(CLEAN_METADATA_VERSION_2) + .setPartitionMetadata(Collections.emptyMap()) + .setExtraMetadata(cleanerPlan.getExtraMetadata()) + .setBootstrapPartitionMetadata(Collections.emptyMap()); + if (cleanerPlan.getEarliestInstantToRetain() != null) { + cleanMetadataBuilder.setEarliestCommitToRetain(cleanerPlan.getEarliestInstantToRetain().getTimestamp()); + } + return cleanMetadataBuilder.build(); + } + @Override public HoodieCleanMetadata execute() { List cleanMetadataList = new ArrayList<>(); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java index 4c57cdd76b59a..d11c61d40c205 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanActionExecutor.java @@ -25,12 +25,13 @@ import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.engine.HoodieLocalEngineContext; import org.apache.hudi.common.model.CleanFileInfo; -import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.table.timeline.HoodieActiveTimeline; import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.util.CleanerUtils; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; import org.apache.hudi.exception.HoodieException; @@ -42,15 +43,20 @@ import lombok.extern.slf4j.Slf4j; import java.io.IOException; +import java.text.ParseException; +import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import static org.apache.hudi.common.table.timeline.InstantComparison.LESSER_THAN; +import static org.apache.hudi.common.table.timeline.InstantComparison.compareTimestamps; import static org.apache.hudi.common.util.CleanerUtils.SAVEPOINTED_TIMESTAMPS; -import static org.apache.hudi.common.util.MapUtils.nonEmpty; +import static org.apache.hudi.common.util.CollectionUtils.nonEmpty; @Slf4j public class CleanPlanActionExecutor extends BaseActionExecutor> { @@ -94,6 +100,23 @@ private boolean needsCleaning(CleaningTriggerStrategy strategy) { } } + private HoodieCleanerPlan getEmptyCleanerPlan(Option earliestInstant, CleanPlanner planner) throws IOException { + HoodieCleanerPlan.Builder cleanBuilder = HoodieCleanerPlan.newBuilder() + .setFilePathsToBeDeletedPerPartition(Collections.emptyMap()) + .setExtraMetadata(prepareExtraMetadata(planner.getSavepointedTimestamps())); + if (earliestInstant.isPresent()) { + HoodieInstant hoodieInstant = earliestInstant.get(); + cleanBuilder.setPolicy(config.getCleanerPolicy().name()) + .setVersion(CleanPlanner.LATEST_CLEAN_PLAN_VERSION) + .setEarliestInstantToRetain(new HoodieActionInstant(hoodieInstant.requestedTime(), hoodieInstant.getAction(), hoodieInstant.getState().name())) + .setLastCompletedCommitTimestamp(planner.getLastCompletedCommitTimestamp()); + } else { + cleanBuilder.setPolicy(config.getCleanerPolicy().name()) + .setVersion(CleanPlanner.LATEST_CLEAN_PLAN_VERSION); + } + return cleanBuilder.build(); + } + /** * Generates List of files to be cleaned. * @@ -109,8 +132,8 @@ HoodieCleanerPlan requestClean(HoodieEngineContext context) { context.clearJobStatus(); if (partitionsToClean.isEmpty()) { - log.info("Nothing to clean here. It is already clean"); - return HoodieCleanerPlan.newBuilder().setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name()).build(); + log.info("Partitions to clean returned empty. Checking to see if empty clean needs to be created."); + return getEmptyCleanerPlan(earliestInstant, planner); } log.info( "Earliest commit to retain for clean : {}", @@ -213,14 +236,61 @@ protected Option requestClean() { cleanerEngineContext = context; } final HoodieCleanerPlan cleanerPlan = requestClean(cleanerEngineContext); - Option option = Option.empty(); - if (nonEmpty(cleanerPlan.getFilePathsToBeDeletedPerPartition()) - && cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).sum() > 0) { + Option cleanPlanOpt = Option.empty(); + if ((cleanerPlan.getPartitionsToBeDeleted() != null && !cleanerPlan.getPartitionsToBeDeleted().isEmpty()) + || (nonEmpty(cleanerPlan.getFilePathsToBeDeletedPerPartition()) + && cleanerPlan.getFilePathsToBeDeletedPerPartition().values().stream().mapToInt(List::size).sum() > 0)) { // Only create cleaner plan which does some work - option = Option.of(cleanerPlan); + cleanPlanOpt = Option.of(cleanerPlan); } + // If cleaner plan returned an empty list, incremental clean is enabled and there was no + // completed clean created in the last X hours configured in INTERVAL_TO_CREATE_EMPTY_CLEAN_HOURS, + // create a dummy clean to avoid full scan in the future. + // Note: For a dataset with incremental clean enabled, that does not receive any updates, cleaner plan always comes + // with an empty list of files to be cleaned. CleanActionExecutor would never be invoked for this dataset. + // To avoid fullscan on the dataset with every ingestion run, empty clean commit is created here. + if (cleanPlanOpt.isEmpty() && config.incrementalCleanerModeEnabled() && cleanerPlan.getEarliestInstantToRetain() != null && config.getIntervalToCreateEmptyCleanHours() > 0) { + // Only create an empty clean commit if earliestInstantToRetain is present in the plan + boolean eligibleForEmptyCleanCommit = true; + + // if there is no previous clean instant or the previous clean instant was before the configured max duration, schedule an empty clean commit + Option lastCleanInstant = table.getCleanTimeline().filterCompletedInstants().lastInstant(); + if (lastCleanInstant.isPresent()) { + try { + ZonedDateTime latestDateTime = ZonedDateTime.ofInstant(java.time.Instant.now(), table.getMetaClient().getTableConfig().getTimelineTimezone().getZoneId()); + long currentCleanTimeMs = latestDateTime.toInstant().toEpochMilli(); + long lastCleanTimeMs = HoodieInstantTimeGenerator.parseDateFromInstantTime(lastCleanInstant.get().requestedTime()).toInstant().toEpochMilli(); + eligibleForEmptyCleanCommit = currentCleanTimeMs - lastCleanTimeMs > (TimeUnit.HOURS.toMillis(config.getIntervalToCreateEmptyCleanHours())); + } catch (ParseException e) { + log.error("Unable to parse last clean commit time", e); + throw new HoodieException("Unable to parse last clean commit time", e); + } + } + if (eligibleForEmptyCleanCommit) { + // Ensure earliestCommitToRetain doesn't go backwards when user changes cleaner configuration + if (lastCleanInstant.isPresent()) { + try { + HoodieCleanMetadata lastCleanMetadata = table.getActiveTimeline().readCleanMetadata(lastCleanInstant.get()); + String previousEarliestCommitToRetain = lastCleanMetadata.getEarliestCommitToRetain(); + String currentEarliestCommitToRetain = cleanerPlan.getEarliestInstantToRetain().getTimestamp(); - return option; + if (!StringUtils.isNullOrEmpty(previousEarliestCommitToRetain) && !StringUtils.isNullOrEmpty(currentEarliestCommitToRetain) + && compareTimestamps(currentEarliestCommitToRetain, LESSER_THAN, previousEarliestCommitToRetain)) { + log.warn("Adjusting empty clean earliestCommitToRetain to previous value to avoid going backwards. " + + "Previous: {}, Current: {}. This can happen when cleaner configuration is changed.", + previousEarliestCommitToRetain, currentEarliestCommitToRetain); + cleanerPlan.getEarliestInstantToRetain().setTimestamp(previousEarliestCommitToRetain); + } + } catch (IOException e) { + log.error("Unable to read last clean metadata", e); + throw new HoodieException("Unable to read last clean metadata", e); + } + } + log.info("Creating an empty clean instant with earliestCommitToRetain of {}", cleanerPlan.getEarliestInstantToRetain().getTimestamp()); + return Option.of(cleanerPlan); + } + } + return cleanPlanOpt; } @Override diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java index cc53e67b74872..0b42837b35869 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/table/action/TestCleanPlanner.java @@ -513,6 +513,14 @@ static Stream keepLatestByHoursOrCommitsArgsIncrCleanPartitions() { Collections.singletonMap(savepoint2, Collections.singletonList(PARTITION1)), Option.empty(), activeInstantsPartitionsMap2, Collections.emptyList(), threePartitionsInActiveTimeline, true, Collections.emptyMap())); + // Empty cleaner plan case + arguments.add(Arguments.of(true, getCleanByHoursConfig(), earliestInstant, lastCompletedInLastClean, lastCleanInstant, + earliestInstantInLastClean, Collections.emptyList(), Collections.emptyMap(), Option.empty(), + activeInstantsPartitionsMap2, Collections.emptyList(), twoPartitionsInActiveTimeline, false, Collections.emptyMap())); + arguments.add(Arguments.of(false, getCleanByHoursConfig(), earliestInstant, lastCompletedInLastClean, lastCleanInstant, + earliestInstantInLastClean, Collections.emptyList(), Collections.emptyMap(), Option.empty(), + activeInstantsUnPartitionsMap, Collections.emptyList(), unPartitionsInActiveTimeline, false, Collections.emptyMap())); + return arguments.stream(); } @@ -598,8 +606,8 @@ private static HoodieCleanMetadata getCleanCommitMetadata(List partition Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), false))); Map extraMetadata = new HashMap<>(); extraMetadata.put(SAVEPOINTED_TIMESTAMPS, savepointsToTrack.stream().collect(Collectors.joining(","))); - return new HoodieCleanMetadata(instantTime, 100L, 10, earliestCommitToRetain, lastCompletedTime, partitionMetadata, - CLEAN_METADATA_VERSION_2, Collections.EMPTY_MAP, extraMetadata.isEmpty() ? null : extraMetadata); + return new HoodieCleanMetadata(instantTime, 100L, partitionMetadata.isEmpty() ? 0 : 10, earliestCommitToRetain, lastCompletedTime, + partitionMetadata, CLEAN_METADATA_VERSION_2, Collections.emptyMap(), extraMetadata.isEmpty() ? null : extraMetadata); } private static HoodieSavepointMetadata getSavepointMetadata(List partitions) { diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java index 3dbc65c3290e1..be3242164a40e 100644 --- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java +++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java @@ -26,10 +26,13 @@ import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.io.HoodieParquetConfigInjector; import org.apache.hudi.io.storage.HoodieFileWriter; import org.apache.hudi.io.storage.HoodieFileWriterFactory; import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration; import org.apache.hudi.util.RowDataQueryContexts; @@ -115,17 +118,21 @@ public HoodieFileWriter newParquetFileWriter( HoodieConfig config, RowType rowType, TaskContextSupplier taskContextSupplier) throws IOException { - Configuration conf = storage.getConf().unwrapAs(Configuration.class); boolean populateMetaFields = config.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS); boolean withOperation = config.getBooleanOrDefault(HoodieWriteConfig.ALLOW_OPERATION_METADATA_FIELD); - BloomFilter filter = createBloomFilter(config); + Pair injectedConfigs = HoodieParquetConfigInjector.applyConfigInjector(storagePath, storage.getConf(), config); + StorageConfiguration storageConfiguration = injectedConfigs.getLeft(); + HoodieConfig hoodieConfig = injectedConfigs.getRight(); + + Configuration conf = (Configuration) storageConfiguration.unwrapAs(Configuration.class); + BloomFilter filter = createBloomFilter(hoodieConfig); HoodieRowDataParquetWriteSupport writeSupport = (HoodieRowDataParquetWriteSupport) ReflectionUtils.loadClass( - config.getStringOrDefault(HoodieStorageConfig.HOODIE_PARQUET_FLINK_ROW_DATA_WRITE_SUPPORT_CLASS), + hoodieConfig.getStringOrDefault(HoodieStorageConfig.HOODIE_PARQUET_FLINK_ROW_DATA_WRITE_SUPPORT_CLASS), new Class[] {Configuration.class, RowType.class, BloomFilter.class}, conf, rowType, filter); - return new HoodieRowDataParquetWriter(storagePath, getParquetConfig(config, writeSupport), + return new HoodieRowDataParquetWriter(storagePath, getParquetConfig(hoodieConfig, writeSupport), instantTime, taskContextSupplier, populateMetaFields, withOperation); } diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowDataParquetConfigInjector.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowDataParquetConfigInjector.java new file mode 100644 index 0000000000000..f65a2844649bd --- /dev/null +++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowDataParquetConfigInjector.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.io.storage.row; + +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.engine.LocalTaskContextSupplier; +import org.apache.hudi.common.testutils.DisableDictionaryInjector; +import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.io.HoodieParquetConfigInjector; +import org.apache.hudi.io.storage.HoodieFileWriter; +import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.StorageConfiguration; +import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration; +import org.apache.hudi.testutils.HoodieFlinkClientTestHarness; + +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link HoodieParquetConfigInjector} functionality in {@link HoodieRowDataFileWriterFactory}. + */ +public class TestHoodieRowDataParquetConfigInjector extends HoodieFlinkClientTestHarness { + + @TempDir + java.nio.file.Path tmpDir; + + @BeforeEach + public void setUp() throws IOException { + initPath(); + initFileSystem(); + initMetaClient(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + /** + * Test implementation that modifies Hadoop configuration for Parquet bloom filters. + */ + public static class ParquetBloomFilterInjector implements HoodieParquetConfigInjector { + @Override + public Pair injectConfig(StoragePath path, + StorageConfiguration storageConf, + HoodieConfig hoodieConfig) { + // Enable native Parquet bloom filter on a specific column + Configuration hadoopConf = (Configuration) storageConf.unwrapAs(Configuration.class); + hadoopConf.set("parquet.bloom.filter.enabled#uuid", "true"); + hadoopConf.set("parquet.bloom.filter.expected.ndv#uuid", "1000"); + + return Pair.of(new HadoopStorageConfiguration(hadoopConf), hoodieConfig); + } + } + + private RowType getTestRowType() { + DataType dataType = DataTypes.ROW( + DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)), + DataTypes.FIELD("name", DataTypes.VARCHAR(20)), + DataTypes.FIELD("age", DataTypes.INT()), + DataTypes.FIELD("partition", DataTypes.VARCHAR(20)) + ).notNull(); + return (RowType) dataType.getLogicalType(); + } + + @Test + public void testDisableDictionaryEncodingViaInjector() throws Exception { + final String instantTime = "100"; + HoodieStorage storage = HoodieTestUtils.getStorage(tmpDir.toString()); + final StoragePath parquetPath = new StoragePath( + basePath + "/partition/path/test_dictionary_" + instantTime + ".parquet"); + + RowType rowType = getTestRowType(); + + // Create config with the custom injector + HoodieConfig config = new HoodieConfig(); + config.setValue(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED, "true"); // Start with dictionary enabled + config.setValue(HoodieStorageConfig.HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, DisableDictionaryInjector.class.getName()); + + // Create writer and write some data + HoodieRowDataFileWriterFactory factory = new HoodieRowDataFileWriterFactory(storage); + HoodieFileWriter writer = factory.newParquetFileWriter( + instantTime, parquetPath, config, rowType, new LocalTaskContextSupplier()); + + assertTrue(writer instanceof HoodieRowDataParquetWriter); + + // Write test records + HoodieRowDataParquetWriter rowDataWriter = (HoodieRowDataParquetWriter) writer; + for (int i = 0; i < 100; i++) { + GenericRowData row = new GenericRowData(4); + row.setField(0, StringData.fromString("id" + i)); + row.setField(1, StringData.fromString("name" + i)); + row.setField(2, 20 + i); + row.setField(3, StringData.fromString("partition/path")); + rowDataWriter.write(row); + } + writer.close(); + + // Verify the parquet file was created + assertTrue(storage.exists(parquetPath)); + + // Read parquet metadata and verify dictionary encoding is disabled + Configuration hadoopConf = new Configuration(); + Path hadoopPath = new Path(parquetPath.toUri()); + ParquetFileReader reader = ParquetFileReader.open(hadoopConf, hadoopPath); + ParquetMetadata metadata = reader.getFooter(); + reader.close(); + + assertNotNull(metadata); + + // Verify that dictionary encoding is NOT used for any column + // When dictionary encoding is disabled, columns should use PLAIN or other encodings but not RLE_DICTIONARY + for (BlockMetaData block : metadata.getBlocks()) { + for (ColumnChunkMetaData column : block.getColumns()) { + // Check all encodings used for this column - should not include RLE_DICTIONARY or PLAIN_DICTIONARY + for (Encoding encoding : column.getEncodings()) { + assertFalse(encoding == Encoding.RLE_DICTIONARY || encoding == Encoding.PLAIN_DICTIONARY, + "Column " + column.getPath() + " should not use dictionary encoding, but found: " + encoding); + } + } + } + } + + @Test + public void testInvalidInjectorClassThrowsException() throws IOException { + final String instantTime = "102"; + HoodieStorage storage = HoodieTestUtils.getStorage(tmpDir.toString()); + final StoragePath parquetPath = new StoragePath( + basePath + "/partition/path/test_invalid_" + instantTime + ".parquet"); + + RowType rowType = getTestRowType(); + + // Create config with an invalid/non-existent injector class + HoodieConfig config = new HoodieConfig(); + config.setValue(HoodieStorageConfig.HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, "org.apache.hudi.NonExistentInjector"); + + // Should throw an exception when trying to create the writer + HoodieRowDataFileWriterFactory factory = new HoodieRowDataFileWriterFactory(storage); + assertThrows(Exception.class, () -> { + factory.newParquetFileWriter(instantTime, parquetPath, config, rowType, new LocalTaskContextSupplier()); + }); + } + + @Test + public void testNoInjectorUsesDefaultConfig() throws Exception { + final String instantTime = "103"; + HoodieStorage storage = HoodieTestUtils.getStorage(tmpDir.toString()); + final StoragePath parquetPath = new StoragePath( + basePath + "/partition/path/test_no_injector_" + instantTime + ".parquet"); + + RowType rowType = getTestRowType(); + + // Create config WITHOUT injector - should use default settings + HoodieConfig config = new HoodieConfig(); + config.setValue(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED, "true"); + + // Create writer and write some data + HoodieRowDataFileWriterFactory factory = new HoodieRowDataFileWriterFactory(storage); + HoodieFileWriter writer = factory.newParquetFileWriter( + instantTime, parquetPath, config, rowType, new LocalTaskContextSupplier()); + + assertTrue(writer instanceof HoodieRowDataParquetWriter); + + // Write test records + HoodieRowDataParquetWriter rowDataWriter = (HoodieRowDataParquetWriter) writer; + for (int i = 0; i < 10; i++) { + GenericRowData row = new GenericRowData(4); + row.setField(0, StringData.fromString("id" + i)); + row.setField(1, StringData.fromString("name" + i)); + row.setField(2, 20 + i); + row.setField(3, StringData.fromString("partition/path")); + rowDataWriter.write(row); + } + writer.close(); + + // Verify the parquet file was created + assertTrue(storage.exists(parquetPath)); + } +} diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml index 9b557f85b137f..905c15ac82874 100644 --- a/hudi-client/hudi-spark-client/pom.xml +++ b/hudi-client/hudi-spark-client/pom.xml @@ -102,7 +102,7 @@ - com.lancedb + org.lance ${lance.spark.artifact} diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java index bb075470c5073..b3a63dbb67a6b 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkFileWriterFactory.java @@ -25,7 +25,9 @@ import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.io.HoodieParquetConfigInjector; import org.apache.hudi.io.storage.row.HoodieRowParquetConfig; import org.apache.hudi.io.storage.row.HoodieRowParquetWriteSupport; import org.apache.hudi.storage.HoodieStorage; @@ -52,21 +54,27 @@ protected HoodieFileWriter newParquetFileWriter( String instantTime, StoragePath path, HoodieConfig config, HoodieSchema schema, TaskContextSupplier taskContextSupplier) throws IOException { boolean populateMetaFields = config.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS); - String compressionCodecName = config.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME); + + Pair injectedConfigs = HoodieParquetConfigInjector.applyConfigInjector(path, storage.getConf(), config); + StorageConfiguration storageConfiguration = injectedConfigs.getLeft(); + HoodieConfig hoodieConfig = injectedConfigs.getRight(); + + String compressionCodecName = hoodieConfig.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME); // Support PARQUET_COMPRESSION_CODEC_NAME is "" if (compressionCodecName.isEmpty()) { compressionCodecName = null; } - HoodieRowParquetWriteSupport writeSupport = getHoodieRowParquetWriteSupport(storage.getConf(), schema, - config, enableBloomFilter(populateMetaFields, config)); + + HoodieRowParquetWriteSupport writeSupport = getHoodieRowParquetWriteSupport(storageConfiguration, schema, + hoodieConfig, enableBloomFilter(populateMetaFields, hoodieConfig)); HoodieRowParquetConfig parquetConfig = new HoodieRowParquetConfig(writeSupport, CompressionCodecName.fromConf(compressionCodecName), - config.getIntOrDefault(HoodieStorageConfig.PARQUET_BLOCK_SIZE), - config.getIntOrDefault(HoodieStorageConfig.PARQUET_PAGE_SIZE), - config.getLongOrDefault(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE), - storage.getConf().unwrapAs(Configuration.class), - config.getDoubleOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION), - config.getBooleanOrDefault(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED)); + hoodieConfig.getIntOrDefault(HoodieStorageConfig.PARQUET_BLOCK_SIZE), + hoodieConfig.getIntOrDefault(HoodieStorageConfig.PARQUET_PAGE_SIZE), + hoodieConfig.getLongOrDefault(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE), + (Configuration) storageConfiguration.unwrapAs(Configuration.class), + hoodieConfig.getDoubleOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION), + hoodieConfig.getBooleanOrDefault(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED)); parquetConfig.getHadoopConf().addResource(writeSupport.getHadoopConf()); return new HoodieSparkParquetWriter(path, parquetConfig, instantTime, taskContextSupplier, populateMetaFields); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceReader.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceReader.java index 3bf6625a4fd57..905bd74026ad7 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceReader.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceReader.java @@ -202,7 +202,11 @@ private ClosableIterator getUnsafeRowIterator(HoodieSchema requestedS @Override public HoodieSchema getSchema() { try { - StructType structType = LanceArrowUtils.fromArrowSchema(arrowSchema); + Map customMetadata = arrowSchema.getCustomMetadata(); + Set vectorColumnNames = HoodieSchema.parseVectorColumnNames( + customMetadata == null ? null : customMetadata.get(HoodieSchema.VECTOR_COLUMNS_METADATA_KEY)); + StructType structType = VectorConversionUtils.restoreVectorMetadata( + LanceArrowUtils.fromArrowSchema(arrowSchema), vectorColumnNames); return HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(structType, "record", "", false); } catch (Exception e) { throw new HoodieException("Failed to read schema from Lance file: " + path, e); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceWriter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceWriter.java index 02f9c7e3b16b8..64cfb2322bcab 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceWriter.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkLanceWriter.java @@ -23,24 +23,31 @@ import org.apache.hudi.common.engine.TaskContextSupplier; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.io.lance.HoodieBaseLanceWriter; import org.apache.hudi.io.storage.row.HoodieBloomFilterRowWriteSupport; import org.apache.hudi.io.storage.row.HoodieInternalRowFileWriter; import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StoragePath; -import com.lancedb.lance.spark.arrow.LanceArrowWriter; +import org.lance.spark.arrow.LanceArrowWriter; import lombok.AllArgsConstructor; import lombok.Builder; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.MetadataBuilder; +import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.util.LanceArrowUtils; import org.apache.spark.unsafe.types.UTF8String; import java.io.IOException; +import java.util.Collections; +import java.util.Map; import java.util.function.Function; import static org.apache.hudi.common.model.HoodieRecord.HoodieMetadataField.COMMIT_SEQNO_METADATA_FIELD; @@ -120,8 +127,8 @@ private HoodieSparkLanceWriter(StoragePath file, Option bloomFilterOpt, long maxFileSize) { super(file, DEFAULT_BATCH_SIZE, bloomFilterOpt.map(HoodieBloomFilterRowWriteSupport::new)); - this.sparkSchema = sparkSchema; - this.arrowSchema = LanceArrowUtils.toArrowSchema(sparkSchema, DEFAULT_TIMEZONE, true, false); + this.sparkSchema = enrichSparkSchemaForLanceVectors(sparkSchema); + this.arrowSchema = LanceArrowUtils.toArrowSchema(this.sparkSchema, DEFAULT_TIMEZONE, true); this.fileName = UTF8String.fromString(file.getName()); this.instantTime = UTF8String.fromString(instantTime); this.populateMetaFields = populateMetaFields; @@ -132,6 +139,55 @@ private HoodieSparkLanceWriter(StoragePath file, }; } + /** + * For every field carrying a Hudi VECTOR logical type annotation + * (Spark metadata key {@link HoodieSchema#TYPE_METADATA_FIELD} starting with {@code "VECTOR"}), + * auto-attach the lance-spark metadata key {@link LanceArrowUtils#ARROW_FIXED_SIZE_LIST_SIZE_KEY()} + * with the vector's dimension so that {@link LanceArrowUtils#toArrowSchema} emits a native + * Arrow {@code FixedSizeList} (Lance's vector column encoding) and + * {@link LanceArrowWriter} selects its fixed-size-list field writer when serializing values. + * + *

Lance-spark keys vector columns off the per-field + * {@link LanceArrowUtils#ARROW_FIXED_SIZE_LIST_SIZE_KEY()} (literal: + * {@code arrow.fixed-size-list.size}) metadata entry (see Lance Spark CREATE TABLE docs); + * we derive it from the VECTOR dimension so users don't have to set it alongside the + * Hudi descriptor. + * + *

Currently only FLOAT and DOUBLE element vectors are supported on Lance, matching + * lance-spark's {@code VectorUtils.shouldBeFixedSizeList}. Other element types would + * silently fall through to a plain list write, so we fail fast instead. + */ + private static StructType enrichSparkSchemaForLanceVectors(StructType sparkSchema) { + Map vectorColumns = + VectorConversionUtils.detectVectorColumnsFromMetadata(sparkSchema); + if (vectorColumns.isEmpty()) { + return sparkSchema; + } + StructField[] fields = sparkSchema.fields(); + StructField[] newFields = new StructField[fields.length]; + for (int i = 0; i < fields.length; i++) { + StructField field = fields[i]; + HoodieSchema.Vector vec = vectorColumns.get(i); + if (vec == null) { + newFields[i] = field; + continue; + } + HoodieSchema.Vector.VectorElementType elemType = vec.getVectorElementType(); + if (elemType != HoodieSchema.Vector.VectorElementType.FLOAT + && elemType != HoodieSchema.Vector.VectorElementType.DOUBLE) { + throw new HoodieNotSupportedException( + "Lance base-file format currently supports FLOAT/DOUBLE VECTOR columns only; " + + "got element type " + elemType + " for field '" + field.name() + "'"); + } + Metadata enriched = new MetadataBuilder() + .withMetadata(field.metadata()) + .putLong(LanceArrowUtils.ARROW_FIXED_SIZE_LIST_SIZE_KEY(), vec.getDimension()) + .build(); + newFields[i] = new StructField(field.name(), field.dataType(), field.nullable(), enriched); + } + return new StructType(newFields); + } + @Override public void writeRowWithMetadata(HoodieKey key, InternalRow row) throws IOException { UTF8String recordKey = UTF8String.fromString(key.getRecordKey()); @@ -198,6 +254,27 @@ protected Schema getArrowSchema() { return arrowSchema; } + /** + * Emit Hudi's {@code hoodie.vector.columns} footer entry alongside any + * bloom-filter metadata. Mirrors the Parquet writer (see + * {@code HoodieRowParquetWriteSupport#init}) so Lance files carry the same + * self-describing VECTOR descriptor list that Parquet files do. + * + *

The read side today derives VECTOR identity from the Arrow + * {@code FixedSizeList} type — this footer entry is a + * forward-compat guard: it lets future readers recover the exact descriptor + * (including fields the Arrow type cannot express, e.g. quantization tags) + * without a writer bump. + */ + @Override + protected Map additionalSchemaMetadata() { + String value = VectorConversionUtils.buildVectorColumnsFooterValue(sparkSchema); + if (value.isEmpty()) { + return Collections.emptyMap(); + } + return Collections.singletonMap(HoodieSchema.VECTOR_COLUMNS_METADATA_KEY, value); + } + /** * Update Hudi metadata fields in the InternalRow. * diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/LanceRecordIterator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/LanceRecordIterator.java index dbf8693ae47f3..d75b35ac94753 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/LanceRecordIterator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/LanceRecordIterator.java @@ -23,19 +23,24 @@ import org.apache.hudi.exception.HoodieIOException; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.ipc.ArrowReader; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.catalyst.expressions.UnsafeProjection; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.vectorized.ColumnVector; import org.apache.spark.sql.vectorized.ColumnarBatch; -import org.apache.spark.sql.vectorized.LanceArrowColumnVector; +import org.lance.spark.vectorized.LanceArrowColumnVector; import org.lance.file.LanceFileReader; import java.io.IOException; +import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Map; /** * Shared iterator implementation for reading Lance files and converting Arrow batches to Spark rows. @@ -56,6 +61,7 @@ public class LanceRecordIterator implements ClosableIterator { private final BufferAllocator allocator; private final LanceFileReader lanceReader; private final ArrowReader arrowReader; + private final StructType sparkSchema; private final UnsafeProjection projection; private final String path; @@ -81,6 +87,7 @@ public LanceRecordIterator(BufferAllocator allocator, this.allocator = allocator; this.lanceReader = lanceReader; this.arrowReader = arrowReader; + this.sparkSchema = schema; this.projection = UnsafeProjection.create(schema); this.path = path; } @@ -103,12 +110,31 @@ public boolean hasNext() { if (arrowReader.loadNextBatch()) { VectorSchemaRoot root = arrowReader.getVectorSchemaRoot(); - // Wrap each Arrow FieldVector in LanceArrowColumnVector for type-safe access - // Cache the column wrappers on first batch and reuse for all subsequent batches + // Build ColumnVector[] in Spark-schema order by looking each field up by name; + // lance-spark 0.4.0's VectorSchemaRoot may return the file's on-disk order, which + // would misalign the UnsafeProjection. Cached on the first batch and reused thereafter. if (columnVectors == null) { - columnVectors = root.getFieldVectors().stream() - .map(LanceArrowColumnVector::new) - .toArray(ColumnVector[]::new); + List fieldVectors = root.getFieldVectors(); + Map byName = new HashMap<>(fieldVectors.size() * 2); + for (FieldVector fv : fieldVectors) { + byName.put(fv.getName(), fv); + } + StructField[] sparkFields = sparkSchema.fields(); + if (sparkFields.length != fieldVectors.size()) { + throw new HoodieException("Lance batch column count " + fieldVectors.size() + + " does not match expected Spark schema size " + sparkFields.length + + " for file: " + path); + } + columnVectors = new ColumnVector[sparkFields.length]; + for (int i = 0; i < sparkFields.length; i++) { + String name = sparkFields[i].name(); + FieldVector fv = byName.get(name); + if (fv == null) { + throw new HoodieException("Lance batch missing expected column '" + name + + "' for file: " + path + "; available columns: " + byName.keySet()); + } + columnVectors[i] = new LanceArrowColumnVector(fv); + } } // Create ColumnarBatch and keep it alive while iterating diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/VectorConversionUtils.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/VectorConversionUtils.java index 2bf8e86b5d0fd..a8cc02f58dad3 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/VectorConversionUtils.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/VectorConversionUtils.java @@ -25,17 +25,23 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData; import org.apache.spark.sql.catalyst.util.ArrayData; import org.apache.spark.sql.catalyst.util.GenericArrayData; +import org.apache.spark.sql.types.ArrayType; import org.apache.spark.sql.types.BinaryType$; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.MetadataBuilder; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.LanceArrowUtils; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; import java.nio.ByteBuffer; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; @@ -61,7 +67,7 @@ private VectorConversionUtils() { * @return map from field index to Vector schema; empty map if schema is null or has no vectors */ public static Map detectVectorColumns(HoodieSchema schema) { - Map vectorColumnInfo = new HashMap<>(); + Map vectorColumnInfo = new LinkedHashMap<>(); if (schema == null) { return vectorColumnInfo; } @@ -75,6 +81,28 @@ public static Map detectVectorColumns(HoodieSchema return vectorColumnInfo; } + /** + * Builds the {@link HoodieSchema#VECTOR_COLUMNS_METADATA_KEY} footer value + * from a Spark {@link StructType} by detecting VECTOR metadata annotations and + * delegating to {@link HoodieSchema#serializeVectorColumnsMetadata}. + * + * @param schema Spark StructType (may be null) + * @return comma-separated descriptor list, or empty string if no VECTOR columns + * @see HoodieSchema#serializeVectorColumnsMetadata(java.util.Map) + */ + public static String buildVectorColumnsFooterValue(StructType schema) { + if (schema == null) { + return ""; + } + Map detected = detectVectorColumnsFromMetadata(schema); + StructField[] fields = schema.fields(); + LinkedHashMap named = new LinkedHashMap<>(); + for (Map.Entry entry : detected.entrySet()) { + named.put(fields[entry.getKey()].name(), entry.getValue()); + } + return HoodieSchema.serializeVectorColumnsMetadata(named); + } + /** * Detects VECTOR columns from Spark StructType metadata annotations. * Fields with metadata key {@link HoodieSchema#TYPE_METADATA_FIELD} starting with "VECTOR" @@ -84,7 +112,8 @@ public static Map detectVectorColumns(HoodieSchema * @return map from field index to Vector schema; empty map if no vectors found */ public static Map detectVectorColumnsFromMetadata(StructType schema) { - Map vectorColumnInfo = new HashMap<>(); + // Use LinkedHashMap so callers iterate in field-ordinal order (stable across JDKs). + Map vectorColumnInfo = new LinkedHashMap<>(); if (schema == null) { return vectorColumnInfo; } @@ -235,4 +264,80 @@ public static void convertRowVectorColumns(InternalRow row, GenericInternalRow r } } } + + /** + * Re-attaches {@link HoodieSchema#TYPE_METADATA_FIELD} to Spark fields that are + * Arrow {@code FixedSizeList} in the Lance file. + * {@code LanceArrowUtils.fromArrowSchema} strips Hudi's VECTOR descriptor during + * Arrow→Spark conversion but preserves the fixed-size-list dimension under the + * lance-spark metadata key {@link LanceArrowUtils#ARROW_FIXED_SIZE_LIST_SIZE_KEY()}. + * + *

A FixedSizeList alone does not prove the column is a Hudi VECTOR — a + * non-Hudi Lance file could contain one. Callers must pass {@code vectorColumnNames} + * (derived from the Hudi schema's VECTOR-tagged fields, e.g. via + * {@link #detectVectorColumnsFromMetadata(StructType)}) so that only fields known to + * be Hudi VECTORs are restored. Pass an empty set to skip the restore entirely. + * + *

Nested structs are not recursed. + */ + public static StructType restoreVectorMetadata(StructType convertedSpark, Set vectorColumnNames) { + if (convertedSpark == null) { + return null; + } + if (vectorColumnNames == null || vectorColumnNames.isEmpty()) { + return convertedSpark; + } + StructField[] sparkFields = convertedSpark.fields(); + StructField[] newFields = new StructField[sparkFields.length]; + boolean changed = false; + for (int i = 0; i < sparkFields.length; i++) { + StructField sf = sparkFields[i]; + String descriptor = vectorColumnNames.contains(sf.name()) ? deriveVectorDescriptor(sf) : null; + if (descriptor == null) { + newFields[i] = sf; + } else { + // VECTOR contract: elements are non-nullable. lance-spark's Arrow→Spark + // conversion produces ArrayType(containsNull=true); force containsNull=false + // so the field round-trips through HoodieSchema conversion. + DataType arrayType = DataTypes.createArrayType( + ((ArrayType) sf.dataType()).elementType(), false); + newFields[i] = new StructField( + sf.name(), + arrayType, + sf.nullable(), + new MetadataBuilder() + .withMetadata(sf.metadata()) + .putString(HoodieSchema.TYPE_METADATA_FIELD, descriptor) + .build()); + changed = true; + } + } + return changed ? new StructType(newFields) : convertedSpark; + } + + /** + * Derives Hudi's VECTOR type descriptor for a Spark field if lance-spark tagged it + * with {@link LanceArrowUtils#ARROW_FIXED_SIZE_LIST_SIZE_KEY()} and its data type is + * {@code ArrayType(Float|Double, containsNull=false)}; otherwise returns null. + */ + private static String deriveVectorDescriptor(StructField sf) { + String sizeKey = LanceArrowUtils.ARROW_FIXED_SIZE_LIST_SIZE_KEY(); + if (!sf.metadata().contains(sizeKey)) { + return null; + } + if (!(sf.dataType() instanceof ArrayType)) { + return null; + } + DataType elemType = ((ArrayType) sf.dataType()).elementType(); + HoodieSchema.Vector.VectorElementType elementType; + if (DataTypes.FloatType.equals(elemType)) { + elementType = HoodieSchema.Vector.VectorElementType.FLOAT; + } else if (DataTypes.DoubleType.equals(elemType)) { + elementType = HoodieSchema.Vector.VectorElementType.DOUBLE; + } else { + return null; + } + int dim = (int) sf.metadata().getLong(sizeKey); + return HoodieSchema.createVector(dim, elementType).toTypeDescriptor(); + } } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java index 299c56213067a..646fb2330833f 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java @@ -322,7 +322,7 @@ public WriteContext init(Configuration configuration) { } String vectorMeta = HoodieSchema.buildVectorColumnsMetadataValue(schema); if (!vectorMeta.isEmpty()) { - metadata.put(HoodieSchema.PARQUET_VECTOR_COLUMNS_METADATA_KEY, vectorMeta); + metadata.put(HoodieSchema.VECTOR_COLUMNS_METADATA_KEY, vectorMeta); } Configuration configurationCopy = new Configuration(configuration); configurationCopy.set(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, Boolean.toString(writeLegacyListFormat)); diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala index 8c0980e009f05..92b963f683906 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hudi.SparkFileFormatInternalRowReaderContext.{filterIsSafeForBootstrap, filterIsSafeForPrimaryKey, getAppliedRequiredSchema} import org.apache.hudi.common.engine.HoodieReaderContext import org.apache.hudi.common.fs.FSUtils -import org.apache.hudi.common.model.HoodieRecord +import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord} import org.apache.hudi.common.schema.{HoodieSchema, HoodieSchemaUtils} import org.apache.hudi.common.table.HoodieTableConfig import org.apache.hudi.common.table.read.buffer.PositionBasedFileGroupRecordBuffer.ROW_INDEX_TEMPORARY_COLUMN_NAME @@ -81,9 +81,16 @@ class SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR } val structType = HoodieInternalRowUtils.getCachedSchema(requiredSchema) - // Detect VECTOR columns and replace with BinaryType for the Parquet reader - // (Parquet stores VECTOR as FIXED_LEN_BYTE_ARRAY which Spark maps to BinaryType) - val vectorColumnInfo = SparkFileFormatInternalRowReaderContext.detectVectorColumns(requiredSchema) + // Parquet stores VECTOR as FIXED_LEN_BYTE_ARRAY, so the reader needs BinaryType + // and we decode back to ArrayType below. Lance returns ArrayType natively, so skip + // the rewrite only for Lance base files; log files always go through the rewrite path. + val isLanceBaseFile = FSUtils.isBaseFile(filePath) && + tableConfig.getBaseFileFormat == HoodieFileFormat.LANCE + val vectorColumnInfo: Map[Int, HoodieSchema.Vector] = if (isLanceBaseFile) { + Map.empty + } else { + SparkFileFormatInternalRowReaderContext.detectVectorColumns(requiredSchema) + } val parquetReadStructType = if (vectorColumnInfo.nonEmpty) { SparkFileFormatInternalRowReaderContext.replaceVectorColumnsWithBinary(structType, vectorColumnInfo) } else { diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieSparkSchemaConverters.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieSparkSchemaConverters.scala index 1ab3c5f1994dc..80105e32013bb 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieSparkSchemaConverters.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieSparkSchemaConverters.scala @@ -141,6 +141,12 @@ object HoodieSparkSchemaConverters extends SparkAdapterSupport { // Validate blob structure before accepting validateBlobStructure(blobStruct) HoodieSchema.createBlob() + + case variantStruct: StructType if metadata.contains(HoodieSchema.TYPE_METADATA_FIELD) && + HoodieSchema.parseTypeDescriptor(metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)).getType == HoodieSchemaType.VARIANT => + validateVariantStructure(variantStruct) + HoodieSchema.createVariant(recordName, nameSpace, null) + case st: StructType => val childNameSpace = if (nameSpace != "") s"$nameSpace.$recordName" else recordName @@ -357,6 +363,33 @@ object HoodieSparkSchemaConverters extends SparkAdapterSupport { } } + private lazy val expectedVariantStructType: StructType = { + val metadataField = StructField(HoodieSchema.Variant.VARIANT_METADATA_FIELD, BinaryType, nullable = false) + val valueField = StructField(HoodieSchema.Variant.VARIANT_VALUE_FIELD, BinaryType, nullable = false) + StructType(Seq(metadataField, valueField)) + } + + /** + * Validates that a StructType matches the expected unshredded variant schema + * (two non-null {@code BinaryType} fields: {@code metadata} and {@code value}). + * + * @param structType the StructType to validate + * @throws IllegalArgumentException if the structure does not match the expected variant schema + */ + private def validateVariantStructure(structType: StructType): Unit = { + val fieldsByName = structType.fields.map(f => f.name -> f).toMap + val ok = structType.length == 2 && + fieldsByName.get(HoodieSchema.Variant.VARIANT_METADATA_FIELD).exists(f => f.dataType == BinaryType && !f.nullable) && + fieldsByName.get(HoodieSchema.Variant.VARIANT_VALUE_FIELD).exists(f => f.dataType == BinaryType && !f.nullable) + if (!ok) { + throw new IllegalArgumentException( + s"""Invalid variant schema structure. Expected schema: + |${expectedVariantStructType.toDDL} + |Got schema: + |${structType.toDDL}""".stripMargin) + } + } + private def canBeUnion(st: StructType): Boolean = { st.fields.length > 0 && st.forall { f => diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala index 0bf6611a911d4..85db88c935064 100644 --- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala +++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala @@ -76,6 +76,17 @@ trait SparkAdapter extends Serializable { */ def injectTableFunctions(extensions: SparkSessionExtensions): Unit = {} + /** + * Inject scalar functions into Spark SQL function registry. + * These functions can be used in SQL SELECT clauses. + */ + def injectScalarFunctions(extensions: SparkSessionExtensions): Unit + + /** + * Inject planner strategies to SparkSessionExtensions for converting custom logical plans into physical plans. + */ + def injectPlannerStrategies(extensions: SparkSessionExtensions): Unit + /** * Returns an instance of [[HoodieCatalystExpressionUtils]] providing for common utils operating * on Catalyst [[Expression]]s diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java index 82fe9591de270..7c39e75f01097 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java @@ -20,8 +20,10 @@ import org.apache.hudi.avro.model.HoodieInstantInfo; import org.apache.hudi.avro.model.HoodieRestorePlan; +import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.avro.model.HoodieRollbackPlan; import org.apache.hudi.avro.model.HoodieRollbackRequest; +import org.apache.hudi.client.heartbeat.HoodieHeartbeatClient; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.fs.FSUtils; import org.apache.hudi.common.model.HoodieBaseFile; @@ -36,6 +38,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView; +import org.apache.hudi.common.testutils.FileCreateUtils; import org.apache.hudi.common.testutils.FileCreateUtilsLegacy; import org.apache.hudi.common.testutils.HoodieMetadataTestTable; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; @@ -66,6 +69,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -825,6 +833,412 @@ public void testRollbackWithRequestedRollbackPlan(boolean enableMetadataTable, b } } + /** + * Test exclusive rollback with multi-writer: when a pending rollback exists with an expired heartbeat + * (no heartbeat file present → returns 0L → always expired), the current writer should take ownership + * and execute the rollback. + */ + @Test + public void testExclusiveRollbackPendingRollbackHeartbeatExpired() throws Exception { + final String p1 = "2016/05/01"; + final String p2 = "2016/05/02"; + final String commitTime1 = "20160501010101"; + final String commitTime2 = "20160502020601"; + final String commitTime3 = "20160506030611"; + final String rollbackInstantTime = "20160506040611"; + + Map partitionAndFileId1 = new HashMap() { + { + put(p1, "id11"); + put(p2, "id12"); + } + }; + Map partitionAndFileId2 = new HashMap() { + { + put(p1, "id21"); + put(p2, "id22"); + } + }; + Map partitionAndFileId3 = new HashMap() { + { + put(p1, "id31"); + put(p2, "id32"); + } + }; + + HoodieWriteConfig config = buildExclusiveRollbackMultiWriterConfig(); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + testTable.withPartitionMetaFiles(p1, p2) + .addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).getLeft() + .addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2).getLeft() + .addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3); + + // Create a valid pending rollback plan for commitTime3 + HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan(); + List rollbackRequestList = partitionAndFileId3.entrySet().stream() + .map(entry -> new HoodieRollbackRequest(entry.getKey(), EMPTY_STRING, EMPTY_STRING, + Collections.singletonList( + metaClient.getBasePath() + "/" + entry.getKey() + "/" + + FileCreateUtilsLegacy.baseFileName(commitTime3, entry.getValue())), + Collections.emptyMap())) + .collect(Collectors.toList()); + rollbackPlan.setRollbackRequests(rollbackRequestList); + rollbackPlan.setInstantToRollback(new HoodieInstantInfo(commitTime3, HoodieTimeline.COMMIT_ACTION)); + FileCreateUtilsLegacy.createRequestedRollbackFile(metaClient.getBasePath().toString(), rollbackInstantTime, rollbackPlan); + // No heartbeat file → getLastHeartbeatTime returns 0L → heartbeat is always expired + + try (SparkRDDWriteClient client = getHoodieWriteClient(config)) { + boolean result = client.rollback(commitTime3); + assertTrue(result, "Rollback should execute when pending rollback heartbeat is expired"); + + assertFalse(testTable.inflightCommitExists(commitTime3)); + assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3)); + assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2)); + + // Verify the pending rollback instant was reused and completed + metaClient.reloadActiveTimeline(); + List rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants(); + assertEquals(1, rollbackInstants.size()); + assertTrue(rollbackInstants.get(0).isCompleted()); + assertEquals(rollbackInstantTime, rollbackInstants.get(0).requestedTime()); + + // Verify heartbeat was cleaned up after rollback completion + assertFalse(HoodieHeartbeatClient.heartbeatExists(storage, basePath, rollbackInstantTime)); + } + } + + /** + * Test exclusive rollback with multi-writer: when a pending rollback exists with an active heartbeat + * (another writer is currently executing the rollback), the current writer should skip it and return false. + */ + @Test + public void testExclusiveRollbackPendingRollbackHeartbeatActive() throws Exception { + final String p1 = "2016/05/01"; + final String p2 = "2016/05/02"; + final String commitTime1 = "20160501010101"; + final String commitTime2 = "20160502020601"; + final String commitTime3 = "20160506030611"; + final String rollbackInstantTime = "20160506040611"; + + Map partitionAndFileId1 = new HashMap() { + { + put(p1, "id11"); + put(p2, "id12"); + } + }; + Map partitionAndFileId2 = new HashMap() { + { + put(p1, "id21"); + put(p2, "id22"); + } + }; + Map partitionAndFileId3 = new HashMap() { + { + put(p1, "id31"); + put(p2, "id32"); + } + }; + + HoodieWriteConfig config = buildExclusiveRollbackMultiWriterConfig(); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + testTable.withPartitionMetaFiles(p1, p2) + .addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).getLeft() + .addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2).getLeft() + .addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3); + + // Create a pending rollback plan for commitTime3 + HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan(); + rollbackPlan.setRollbackRequests(Collections.emptyList()); + rollbackPlan.setInstantToRollback(new HoodieInstantInfo(commitTime3, HoodieTimeline.COMMIT_ACTION)); + FileCreateUtilsLegacy.createRequestedRollbackFile(metaClient.getBasePath().toString(), rollbackInstantTime, rollbackPlan); + + // Simulate an active heartbeat by another writer for the rollback instant + try (HoodieHeartbeatClient otherWriterHeartbeat = new HoodieHeartbeatClient( + storage, basePath, config.getHoodieClientHeartbeatIntervalInMs(), + config.getHoodieClientHeartbeatTolerableMisses())) { + otherWriterHeartbeat.start(rollbackInstantTime); + // The heartbeat file is fresh → isHeartbeatExpired returns false + + try (SparkRDDWriteClient client = getHoodieWriteClient(config)) { + boolean result = client.rollback(commitTime3); + assertFalse(result, "Rollback should be skipped when another writer holds an active heartbeat"); + + // Verify the inflight commit and data files are still present + assertTrue(testTable.inflightCommitExists(commitTime3)); + assertTrue(testTable.baseFilesExist(partitionAndFileId3, commitTime3)); + + // Verify no completed rollback was created + metaClient.reloadActiveTimeline(); + List completedRollbacks = metaClient.getActiveTimeline() + .getRollbackTimeline().filterCompletedInstants().getInstants(); + assertEquals(0, completedRollbacks.size()); + } + } + } + + /** + * Test exclusive rollback with multi-writer: when the commit is no longer in the timeline + * (already rolled back by another writer) and no pending rollback exists, rollback should return false. + */ + @Test + public void testExclusiveRollbackWhenCommitNotInTimeline() throws Exception { + final String p1 = "2016/05/01"; + final String commitTime1 = "20160501010101"; + final String nonExistentCommitTime = "20160506030611"; + + Map partitionAndFileId1 = new HashMap() { + { + put(p1, "id11"); + } + }; + + HoodieWriteConfig config = buildExclusiveRollbackMultiWriterConfig(); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + testTable.withPartitionMetaFiles(p1) + .addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1); + + // nonExistentCommitTime is not in the timeline and no pending rollback exists for it + try (SparkRDDWriteClient client = getHoodieWriteClient(config)) { + boolean result = client.rollback(nonExistentCommitTime); + assertFalse(result, "Rollback should return false when commit is not in timeline (already rolled back)"); + + // Verify no rollback instant was created + metaClient.reloadActiveTimeline(); + assertTrue(metaClient.getActiveTimeline().getRollbackTimeline().empty()); + // Existing commit should be unaffected + assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1)); + } + } + + /** + * Test: config enabled, no pre-existing pending rollback, inflight commit exists. + * This is the "first writer to arrive" scenario — it schedules a fresh rollback plan under lock + * and then executes it (Case 2b in resolveOrScheduleRollback). + */ + @Test + public void testAvoidDuplicateRollbackFirstWriterSchedulesNewPlan() throws Exception { + final String p1 = "2016/05/01"; + final String p2 = "2016/05/02"; + final String commitTime1 = "20160501010101"; + final String commitTime2 = "20160502020601"; + final String commitTime3 = "20160506030611"; + + Map partitionAndFileId1 = new HashMap() { + { + put(p1, "id11"); + put(p2, "id12"); + } + }; + Map partitionAndFileId2 = new HashMap() { + { + put(p1, "id21"); + put(p2, "id22"); + } + }; + Map partitionAndFileId3 = new HashMap() { + { + put(p1, "id31"); + put(p2, "id32"); + } + }; + + HoodieWriteConfig config = buildExclusiveRollbackMultiWriterConfig(); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + testTable.withPartitionMetaFiles(p1, p2) + .addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).getLeft() + .addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2).getLeft() + .addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3); + + // No pending rollback file exists — the writer must schedule one itself. + try (SparkRDDWriteClient client = getHoodieWriteClient(config)) { + boolean result = client.rollback(commitTime3); + assertTrue(result, "Rollback should succeed when first writer schedules a new plan"); + + // Verify the inflight commit and its data files are cleaned up + assertFalse(testTable.inflightCommitExists(commitTime3)); + assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3)); + + // Verify earlier commits are unaffected + assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1)); + assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2)); + + // Verify exactly one rollback instant was created and completed + metaClient.reloadActiveTimeline(); + List rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants(); + assertEquals(1, rollbackInstants.size()); + assertTrue(rollbackInstants.get(0).isCompleted()); + } + } + + /** + * Test: another writer has already fully completed the rollback — the inflight commit is removed + * from the timeline and a completed rollback instant exists. With avoid-duplicate-plan enabled, + * resolveOrScheduleRollback reloads the timeline, finds the commit absent, and returns empty. + */ + @Test + public void testAvoidDuplicateRollbackAlreadyCompletedByAnotherWriter() throws Exception { + final String p1 = "2016/05/01"; + final String p2 = "2016/05/02"; + final String commitTime1 = "20160501010101"; + final String commitTime2 = "20160502020601"; + final String commitTime3 = "20160506030611"; + final String rollbackInstantTime = "20160506040611"; + + Map partitionAndFileId1 = new HashMap() { + { + put(p1, "id11"); + put(p2, "id12"); + } + }; + Map partitionAndFileId2 = new HashMap() { + { + put(p1, "id21"); + put(p2, "id22"); + } + }; + + HoodieWriteConfig config = buildExclusiveRollbackMultiWriterConfig(); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + testTable.withPartitionMetaFiles(p1, p2) + .addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).getLeft() + .addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2); + + // Simulate that another writer already completed the rollback of commitTime3: + // - The inflight commit for commitTime3 no longer exists on the timeline + // - A completed rollback instant exists for it + HoodieRollbackMetadata rollbackMetadata = new HoodieRollbackMetadata(); + rollbackMetadata.setCommitsRollback(Collections.singletonList(commitTime3)); + rollbackMetadata.setStartRollbackTime(rollbackInstantTime); + rollbackMetadata.setPartitionMetadata(new HashMap<>()); + rollbackMetadata.setInstantsRollback(Collections.singletonList( + new HoodieInstantInfo(commitTime3, HoodieTimeline.COMMIT_ACTION))); + FileCreateUtils.createRequestedRollbackFile(metaClient, rollbackInstantTime); + FileCreateUtils.createInflightRollbackFile(metaClient, rollbackInstantTime); + FileCreateUtils.createRollbackFile(metaClient, rollbackInstantTime, rollbackMetadata, false); + + try (SparkRDDWriteClient client = getHoodieWriteClient(config)) { + boolean result = client.rollback(commitTime3); + // commitTime3 is no longer on the timeline — the writer should detect this and skip + assertFalse(result, "Rollback should return false when already completed by another writer"); + + // Verify no additional rollback instants were created — only the pre-existing one + metaClient.reloadActiveTimeline(); + List completedRollbacks = metaClient.getActiveTimeline() + .getRollbackTimeline().filterCompletedInstants().getInstants(); + assertEquals(1, completedRollbacks.size()); + assertEquals(rollbackInstantTime, completedRollbacks.get(0).requestedTime()); + + // Verify earlier commits are unaffected + assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1)); + assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2)); + } + } + + /** + * Test: two writers concurrently attempt to rollback the same inflight commit. + * With avoid-duplicate-plan enabled, exactly one rollback should succeed. The other writer + * should either reuse the pending plan and skip (due to active heartbeat) or find the + * rollback already completed. + */ + @Test + public void testConcurrentWritersRollbackSameInflightCommit() throws Exception { + final String p1 = "2016/05/01"; + final String p2 = "2016/05/02"; + final String commitTime1 = "20160501010101"; + final String commitTime2 = "20160502020601"; + final String commitTime3 = "20160506030611"; + + Map partitionAndFileId1 = new HashMap() { + { + put(p1, "id11"); + put(p2, "id12"); + } + }; + Map partitionAndFileId2 = new HashMap() { + { + put(p1, "id21"); + put(p2, "id22"); + } + }; + Map partitionAndFileId3 = new HashMap() { + { + put(p1, "id31"); + put(p2, "id32"); + } + }; + + HoodieWriteConfig config = buildExclusiveRollbackMultiWriterConfig(); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + testTable.withPartitionMetaFiles(p1, p2) + .addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).getLeft() + .addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2).getLeft() + .addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3); + + // No pending rollback — both writers will race to schedule/execute one. + ExecutorService executor = Executors.newFixedThreadPool(2); + CountDownLatch startLatch = new CountDownLatch(1); + + try { + Future writer1Future = executor.submit(() -> { + startLatch.await(); + try (SparkRDDWriteClient client1 = getHoodieWriteClient(config)) { + return client1.rollback(commitTime3); + } + }); + + Future writer2Future = executor.submit(() -> { + startLatch.await(); + try (SparkRDDWriteClient client2 = getHoodieWriteClient(config)) { + return client2.rollback(commitTime3); + } + }); + + // Release both writers simultaneously + startLatch.countDown(); + + boolean result1 = writer1Future.get(); + boolean result2 = writer2Future.get(); + + // At least one writer must succeed; both must not fail with an exception + assertTrue(result1 || result2, "At least one writer should successfully execute the rollback"); + + // Verify the inflight commit is rolled back + assertFalse(testTable.inflightCommitExists(commitTime3)); + assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3)); + + // Verify earlier commits are unaffected + assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1)); + assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2)); + + // Verify there is exactly one completed rollback (no duplicates) + metaClient.reloadActiveTimeline(); + List completedRollbacks = metaClient.getActiveTimeline() + .getRollbackTimeline().filterCompletedInstants().getInstants(); + assertEquals(1, completedRollbacks.size(), "Exactly one completed rollback should exist, not duplicates"); + } finally { + executor.shutdownNow(); + } + } + + private HoodieWriteConfig buildExclusiveRollbackMultiWriterConfig() { + Properties props = new Properties(); + props.setProperty(HoodieWriteConfig.ROLLBACK_AVOID_DUPLICATE_PLAN.key(), "true"); + return HoodieWriteConfig.newBuilder() + .withPath(basePath) + .withRollbackUsingMarkers(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(InProcessLockProvider.class) + .build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()) + .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withMetadataIndexColumnStats(false).enable(false).build()) + .withProperties(props) + .build(); + } + @Test public void testFallbackToListingBasedRollbackForCompletedInstant() throws Exception { // Let's create some commit files and base files diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java index c631ec00626c1..60db8b8a13b75 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java @@ -18,16 +18,21 @@ package org.apache.hudi.io; +import org.apache.hudi.avro.model.HoodieActionInstant; +import org.apache.hudi.avro.model.HoodieCleanMetadata; +import org.apache.hudi.avro.model.HoodieCleanerPlan; import org.apache.hudi.avro.model.HoodieCompactionPlan; import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.avro.model.HoodieSavepointMetadata; import org.apache.hudi.client.BaseHoodieWriteClient; import org.apache.hudi.client.timeline.TimelineArchivers; import org.apache.hudi.client.WriteClientTestUtils; +import org.apache.hudi.client.timeline.versioning.v1.TimelineArchiverV1; import org.apache.hudi.client.timeline.versioning.v2.LSMTimelineWriter; import org.apache.hudi.client.timeline.versioning.v2.TimelineArchiverV2; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; import org.apache.hudi.client.utils.ArchivalMetrics; +import org.apache.hudi.common.HoodieCleanStat; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.model.HoodieCommitMetadata; @@ -47,6 +52,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.LSMTimeline; import org.apache.hudi.common.table.timeline.TimelineUtils; +import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataV2MigrationHandler; import org.apache.hudi.common.table.timeline.versioning.v2.InstantComparatorV2; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.testutils.FileCreateUtilsLegacy; @@ -103,6 +109,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import java.util.UUID; import java.util.concurrent.CompletableFuture; @@ -2189,4 +2196,227 @@ public void testArchivalMetricsWithMixedActionTypes() throws Exception { // Verify archival status is success assertEquals(1L, metrics.get(ArchivalMetrics.ARCHIVAL_STATUS), "Archival should succeed"); } + + private void initTableToTestECTRBlock() throws IOException { + HoodieTableType tableType = HoodieTableType.COPY_ON_WRITE; + initPath(); + initSparkContexts(); + initTimelineService(); + Properties properties = new Properties(); + properties.setProperty(HoodieWriteConfig.WRITE_TABLE_VERSION.key(), "6"); + properties.setProperty(HoodieWriteConfig.AUTO_UPGRADE_VERSION.key(), "false"); + initMetaClient(properties); + storage = metaClient.getStorage(); + metaClient.getStorage().createDirectory(new StoragePath(basePath)); + metaClient = HoodieTestUtils.init(storageConf, basePath, tableType, properties); + } + + /** + * Tests archival behavior with ECTR blocking enabled vs disabled. + * When enabled: commits >= ECTR from last clean are not archived. + * When disabled: archival proceeds normally ignoring ECTR (backward compatible). + * Also validates that archival makes progress when ECTR is later than the archival window. + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testArchivalWithECTRBlocking(boolean blockArchivalOnCleanECTR) throws Exception { + initTableToTestECTRBlock(); + + HoodieWriteConfig writeConfig = buildECTRTestConfig(2, 3, blockArchivalOnCleanECTR); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + + // Given: 5 commits and a clean commit with ECTR pointing to commit 00000003 + for (int i = 1; i <= 5; i++) { + testTable.addCommit(String.format("%08d", i)); + } + addCleanCommitWithECTR(testTable, "00000006", "00000003", "00000005"); + metaClient = HoodieTableMetaClient.reload(metaClient); + + // When: trigger archival + HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient); + TimelineArchiverV1 archiver = new TimelineArchiverV1(writeConfig, table); + archiver.archiveIfRequired(context); + + // Then: verify archival behavior + metaClient = HoodieTableMetaClient.reload(metaClient); + List activeCommitTimes = getActiveCommitTimes(); + + if (blockArchivalOnCleanECTR) { + // Commits >= ECTR should not be archived + assertTrue(activeCommitTimes.contains("00000003"), "Commit 00000003 (ECTR) should not be archived"); + assertTrue(activeCommitTimes.contains("00000004"), "Commit 00000004 (after ECTR) should not be archived"); + assertTrue(activeCommitTimes.contains("00000005"), "Commit 00000005 (after ECTR) should not be archived"); + } else { + // ECTR is ignored; archival proceeds based on min/max archival commits only + assertFalse(activeCommitTimes.contains("00000003"), + "Commit 00000003 (ECTR) should be archived when ECTR blocking is disabled"); + assertTrue(activeCommitTimes.contains("00000005"), + "Commit 00000005 should be retained (within min commits to keep)"); + } + + if (blockArchivalOnCleanECTR) { + // Additional step: validate archival makes progress when ECTR is later than the archival window. + // Add more commits and a new clean with ECTR at 00000008, so commits before ECTR can be archived. + for (int i = 7; i <= 10; i++) { + testTable.addCommit(String.format("%08d", i)); + } + addCleanCommitWithECTR(testTable, "00000011", "00000008", "00000010"); + metaClient = HoodieTableMetaClient.reload(metaClient); + + table = HoodieSparkTable.create(writeConfig, context, metaClient); + archiver = new TimelineArchiverV1(writeConfig, table); + archiver.archiveIfRequired(context); + + metaClient = HoodieTableMetaClient.reload(metaClient); + activeCommitTimes = getActiveCommitTimes(); + + // Commits before ECTR should be archived + for (int i = 1; i <= 7; i++) { + assertFalse(activeCommitTimes.contains(String.format("%08d", i)), + "Commit " + String.format("%08d", i) + " (before ECTR) should be archived"); + } + // Commits >= ECTR should be retained + assertTrue(activeCommitTimes.contains("00000008"), "Commit 00000008 (ECTR) should not be archived"); + assertTrue(activeCommitTimes.contains("00000009"), "Commit 00000009 (after ECTR) should not be archived"); + assertTrue(activeCommitTimes.contains("00000010"), "Commit 00000010 (after ECTR) should not be archived"); + assertEquals(3, activeCommitTimes.size(), "Exactly 3 commits (00000008-00000010) should remain active"); + } + } + + /** + * Tests graceful handling when clean metadata is missing or has empty ECTR. + * Archival should continue normally in both cases. + */ + @Test + public void testArchivalContinuesWhenECTRIsAbsent() throws Exception { + initTableToTestECTRBlock(); + + HoodieWriteConfig writeConfig = buildECTRTestConfig(2, 3, true); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + + // Step 1: No clean commit exists — archival should proceed without error + for (int i = 1; i <= 6; i++) { + testTable.addCommit(String.format("%08d", i)); + } + metaClient = HoodieTableMetaClient.reload(metaClient); + + HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient); + TimelineArchiverV1 archiver = new TimelineArchiverV1(writeConfig, table); + + TimelineArchiverV1 finalArchiver = archiver; + assertDoesNotThrow(() -> finalArchiver.archiveIfRequired(context), + "Archival should continue gracefully when clean metadata is missing"); + + metaClient = HoodieTableMetaClient.reload(metaClient); + int commitsAfterFirstArchival = metaClient.getActiveTimeline().getCommitsTimeline() + .filterCompletedInstants().countInstants(); + assertTrue(commitsAfterFirstArchival <= 3, "Archival should proceed when clean metadata is missing"); + + // Step 2: Clean commit exists but with empty ECTR — archival should still proceed + for (int i = 7; i <= 12; i++) { + testTable.addCommit(String.format("%08d", i)); + } + addCleanCommitWithECTR(testTable, "00000013", "", "00000012"); + metaClient = HoodieTableMetaClient.reload(metaClient); + + table = HoodieSparkTable.create(writeConfig, context, metaClient); + archiver = new TimelineArchiverV1(writeConfig, table); + + TimelineArchiverV1 finalArchiver1 = archiver; + assertDoesNotThrow(() -> finalArchiver1.archiveIfRequired(context), + "Archival should handle empty ECTR gracefully"); + + metaClient = HoodieTableMetaClient.reload(metaClient); + int commitsAfterSecondArchival = metaClient.getActiveTimeline().getCommitsTimeline() + .filterCompletedInstants().countInstants(); + assertTrue(commitsAfterSecondArchival <= 3, "Archival should proceed normally with empty ECTR"); + } + + private HoodieWriteConfig buildECTRTestConfig(int minCommits, int maxCommits, boolean blockArchivalOnCleanECTR) { + return HoodieWriteConfig.newBuilder() + .withPath(basePath) + .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) + .withParallelism(2, 2) + .withArchivalConfig(HoodieArchivalConfig.newBuilder() + .archiveCommitsWith(minCommits, maxCommits) + .withBlockArchivalOnCleanECTR(blockArchivalOnCleanECTR) + .build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .retainCommits(1) + .build()) + .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder() + .withRemoteServerPort(timelineServicePort) + .build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .enable(false) + .build()) + .forTable("test-trip-table") + .build(); + } + + private void addCleanCommitWithECTR(HoodieTestTable testTable, String cleanInstant, String ectr, String lastCompleted) throws Exception { + List cleanStatsList = new ArrayList<>(); + cleanStatsList.add(new HoodieCleanStat( + HoodieCleaningPolicy.KEEP_LATEST_COMMITS, + "p1", + Collections.emptyList(), + Collections.emptyList(), + Collections.emptyList(), + ectr, + lastCompleted + )); + + HoodieCleanMetadata cleanMetadata = + CleanerUtils.convertCleanMetadata(cleanInstant, Option.of(0L), cleanStatsList, Collections.emptyMap()); + HoodieCleanerPlan cleanerPlan = + new HoodieCleanerPlan( + new HoodieActionInstant(cleanInstant, CLEAN_ACTION, ""), + "", "", new HashMap<>(), CleanMetadataV2MigrationHandler.VERSION, new HashMap<>(), new ArrayList<>(), Collections.emptyMap()); + + testTable.addClean(cleanInstant, cleanerPlan, cleanMetadata); + } + + private List getActiveCommitTimes() { + return metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().getInstants().stream() + .map(HoodieInstant::requestedTime) + .collect(Collectors.toList()); + } + + /** + * Tests that TimelineArchiverV2 (LSM-based timeline, v9 tables) does NOT block archival on ECTR. + * ECTR blocking is only for v6 tables using TimelineArchiverV1. + */ + @Test + public void testArchivalBlocksOnCleanECTRWithTimelineArchiverV2AndVersion9() throws Exception { + init(); + + HoodieWriteConfig writeConfig = buildECTRTestConfig(2, 3, true); + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + + // Given: 5 commits and a clean commit with ECTR at 00000003 + for (int i = 1; i <= 5; i++) { + testTable.addCommit(String.format("%08d", i)); + } + addCleanCommitWithECTR(testTable, "00000006", "00000003", "00000005"); + metaClient = HoodieTableMetaClient.reload(metaClient); + + assertEquals(HoodieTableVersion.NINE, metaClient.getTableConfig().getTableVersion(), + "Table should be version 9"); + + // When: trigger archival using TimelineArchiverV2 + HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient); + TimelineArchiverV2 archiver = new TimelineArchiverV2(writeConfig, table); + archiver.archiveIfRequired(context); + + // Then: TimelineArchiverV2 should NOT respect ECTR — commit 00000003 gets archived + metaClient = HoodieTableMetaClient.reload(metaClient); + List activeCommitTimes = getActiveCommitTimes(); + + assertFalse(activeCommitTimes.contains("00000003"), + "TimelineArchiverV2: Commit 00000003 (ECTR) should be archived"); + assertTrue(activeCommitTimes.contains("00000004"), + "TimelineArchiverV2: Commit 00000004 (after ECTR) should not be archived"); + assertTrue(activeCommitTimes.contains("00000005"), + "TimelineArchiverV2: Commit 00000005 (after ECTR) should not be archived"); + } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java index 6b4f4f7160196..f57ee82385f6c 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java @@ -64,7 +64,6 @@ import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV2MigrationHandler; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.testutils.HoodieMetadataTestTable; -import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.CleanerUtils; @@ -114,9 +113,11 @@ import static org.apache.hudi.common.table.timeline.HoodieTimeline.DELTA_COMMIT_ACTION; import static org.apache.hudi.common.table.timeline.InstantComparison.GREATER_THAN; import static org.apache.hudi.common.table.timeline.InstantComparison.compareTimestamps; +import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.NO_PARTITION_PATH; import static org.apache.hudi.common.testutils.HoodieTestUtils.INSTANT_FILE_NAME_GENERATOR; import static org.apache.hudi.common.testutils.HoodieTestUtils.INSTANT_GENERATOR; import static org.apache.hudi.common.testutils.HoodieTestUtils.TIMELINE_FACTORY; +import static org.apache.hudi.common.testutils.HoodieTestUtils.getDefaultStorageConf; import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors; import static org.awaitility.Awaitility.await; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -374,7 +375,7 @@ public void testCleanNonPartitionedTable() throws IOException { .build()) .withEmbeddedTimelineServerEnabled(false).build(); // datagen for non-partitioned table - initTestDataGenerator(new String[] {HoodieTestDataGenerator.NO_PARTITION_PATH}); + initTestDataGenerator(new String[] {NO_PARTITION_PATH}); // init non-partitioned table HoodieTestUtils.init(storageConf, basePath, HoodieTableType.COPY_ON_WRITE, HoodieFileFormat.PARQUET, true, "org.apache.hudi.keygen.NonpartitionedKeyGenerator", true); @@ -392,17 +393,17 @@ public void testCleanNonPartitionedTable() throws IOException { instantTime = cleanPlanPair.getLeft(); HoodieCleanerPlan cleanPlan = cleanPlanPair.getRight(); assertEquals(cleanPlan.getPartitionsToBeDeleted().size(), 0); - assertEquals(cleanPlan.getFilePathsToBeDeletedPerPartition().get(HoodieTestDataGenerator.NO_PARTITION_PATH).size(), 1); - String filePathToClean = cleanPlan.getFilePathsToBeDeletedPerPartition().get(HoodieTestDataGenerator.NO_PARTITION_PATH).get(0).getFilePath(); + assertEquals(cleanPlan.getFilePathsToBeDeletedPerPartition().get(NO_PARTITION_PATH).size(), 1); + String filePathToClean = cleanPlan.getFilePathsToBeDeletedPerPartition().get(NO_PARTITION_PATH).get(0).getFilePath(); // clean HoodieTable table = HoodieSparkTable.create(writeConfig, context); HoodieCleanMetadata cleanMetadata = table.clean(context, instantTime); // check the cleaned file - assertEquals(cleanMetadata.getPartitionMetadata().get(HoodieTestDataGenerator.NO_PARTITION_PATH).getSuccessDeleteFiles().size(), 1); - assertTrue(filePathToClean.contains(cleanMetadata.getPartitionMetadata().get(HoodieTestDataGenerator.NO_PARTITION_PATH).getSuccessDeleteFiles().get(0))); + assertEquals(cleanMetadata.getPartitionMetadata().get(NO_PARTITION_PATH).getSuccessDeleteFiles().size(), 1); + assertTrue(filePathToClean.contains(cleanMetadata.getPartitionMetadata().get(NO_PARTITION_PATH).getSuccessDeleteFiles().get(0))); // ensure table is not fully cleaned and has a file group assertTrue(FSUtils.isTableExists(basePath, storage)); - assertTrue(table.getFileSystemView().getAllFileGroups(HoodieTestDataGenerator.NO_PARTITION_PATH).findAny().isPresent()); + assertTrue(table.getFileSystemView().getAllFileGroups(NO_PARTITION_PATH).findAny().isPresent()); } } @@ -1695,4 +1696,58 @@ public void testPreWriteCleanPolicyDisabledWhenTableServicesDisabled(boolean com assertEquals(7, metaClient.reloadActiveTimeline().getWriteTimeline().countInstants()); assertEquals(0, metaClient.getActiveTimeline().getCleanerTimeline().countInstants()); } + + @Test + void testEmptyClean() throws IOException { + // validate that an empty cleaner plan does not throw any errors at execution time + HoodieWriteConfig writeConfig = getConfigBuilder().withPath(basePath) + .withFileSystemViewConfig(new FileSystemViewStorageConfig.Builder() + .withEnableBackupForRemoteFileSystemView(false) + .build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withAutoClean(false) + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) + .retainCommits(1) + .build()) + .withEmbeddedTimelineServerEnabled(false).build(); + // datagen for non-partitioned table + initTestDataGenerator(new String[] {NO_PARTITION_PATH}); + // init non-partitioned table + HoodieTableMetaClient metaClient = HoodieTestUtils.init(getDefaultStorageConf(), basePath, HoodieTableType.COPY_ON_WRITE, HoodieFileFormat.PARQUET, + true, "org.apache.hudi.keygen.NonpartitionedKeyGenerator", true); + + try (SparkRDDWriteClient client = new SparkRDDWriteClient(context, writeConfig)) { + String instantTime = client.startCommit(); + List records = dataGen.generateInserts(instantTime, 1); + client.commit(instantTime, client.insert(jsc.parallelize(records, 1), instantTime)); + + instantTime = metaClient.createNewInstantTime(false); + HoodieTable table = HoodieSparkTable.create(writeConfig, context); + + HoodieActiveTimeline timeline = metaClient.reloadActiveTimeline(); + HoodieInstant hoodieInstant = timeline.firstInstant().get(); + HoodieCleanerPlan cleanerPlan = HoodieCleanerPlan.newBuilder() + .setPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS.name()) + .setVersion(CleanPlanner.LATEST_CLEAN_PLAN_VERSION) + .setEarliestInstantToRetain(new HoodieActionInstant(hoodieInstant.requestedTime(), hoodieInstant.getAction(), hoodieInstant.getState().name())) + .setLastCompletedCommitTimestamp(timeline.lastInstant().get().requestedTime()) + .setFilePathsToBeDeletedPerPartition(Collections.emptyMap()) + .build(); + final HoodieInstant cleanInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.CLEAN_ACTION, instantTime, + metaClient.getTimelineLayout().getInstantComparator().completionTimeOrderedComparator()); + table.getActiveTimeline().saveToCleanRequested(cleanInstant, Option.of(cleanerPlan)); + + table.getMetaClient().reloadActiveTimeline(); + // clean + HoodieCleanMetadata cleanMetadata = table.clean(context, instantTime); + // validate all fields of the empty clean metadata + assertTrue(cleanMetadata.getPartitionMetadata().isEmpty()); + assertEquals(0, cleanMetadata.getTotalFilesDeleted()); + assertEquals(hoodieInstant.requestedTime(), cleanMetadata.getEarliestCommitToRetain()); + assertEquals(timeline.lastInstant().get().requestedTime(), cleanMetadata.getLastCompletedCommitTimestamp()); + assertEquals(instantTime, cleanMetadata.getStartCleanTime()); + assertTrue(cleanMetadata.getBootstrapPartitionMetadata().isEmpty()); + assertTrue(cleanMetadata.getTimeTakenInMillis() >= 0); + } + } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java index 66385e0952e96..32e6af2180b5d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java @@ -18,8 +18,10 @@ package org.apache.hudi.table.functional; +import org.apache.hudi.avro.model.HoodieCleanMetadata; import org.apache.hudi.avro.model.HoodieFileStatus; import org.apache.hudi.client.WriteClientTestUtils; +import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.common.HoodieCleanStat; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.BootstrapFileMapping; @@ -30,11 +32,13 @@ import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator; import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.TimelineUtils; import org.apache.hudi.common.testutils.HoodieMetadataTestTable; import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.util.CleanerUtils; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; @@ -48,12 +52,14 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import java.nio.file.Files; import java.nio.file.Paths; import java.time.Instant; import java.time.ZoneId; import java.time.ZonedDateTime; +import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -63,6 +69,7 @@ import java.util.UUID; import java.util.stream.Stream; +import static org.apache.hudi.common.testutils.HoodieTestUtils.INSTANT_COMPARATOR; import static org.apache.hudi.common.testutils.HoodieTestUtils.INSTANT_GENERATOR; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -813,4 +820,218 @@ public void testKeepXHoursWithCleaning( testTable.close(); } } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testEmptyCleansAddedAfterThreshold(boolean secondCommitAfterThreshold) throws Exception { + boolean enableIncrementalClean = true; + boolean enableBootstrapSourceClean = false; + HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) + .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withIncrementalCleaningMode(enableIncrementalClean) + .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER) + .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean) + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(2) + .withIntervalToCreateEmptyCleanHours(1) + .build()) + .build(); + + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + try { + String p0 = "2020/01/01"; + + String file1P0C0 = UUID.randomUUID().toString(); + Instant instant = Instant.now(); + ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, metaClient.getTableConfig().getTimelineTimezone().getZoneId()); + int minutesForFirstCommit = 180; + String firstCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant())); + + commitToTestTable(testTable, firstCommitTs, p0, file1P0C0); + testTable = tearDownTestTableAndReinit(testTable, config); + + // make next commit, with 1 insert & 1 update per partition + String file2P0C1 = UUID.randomUUID().toString(); + int minutesForSecondCommit = 150; + String secondCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant())); + testTable = tearDownTestTableAndReinit(testTable, config); + + commitToTestTable(testTable, secondCommitTs, p0, file2P0C1); + testTable = tearDownTestTableAndReinit(testTable, config); + metaClient = HoodieTableMetaClient.reload(metaClient); + + // make next commit, with 1 insert per partition + int minutesForThirdCommit = 90; + String thirdCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusMinutes(minutesForThirdCommit).toInstant())); + String file3P0C2 = UUID.randomUUID().toString(); + + testTable = tearDownTestTableAndReinit(testTable, config); + + commitToTestTable(testTable, thirdCommitTs, p0, file3P0C2); + testTable = tearDownTestTableAndReinit(testTable, config); + metaClient = HoodieTableMetaClient.reload(metaClient); + + // first empty clean can be generated since earliest instant to retain will be the first commit (always keep last two instants at a minimum) + String firstCleanInstant = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minus(secondCommitAfterThreshold ? 70 : 30, ChronoUnit.MINUTES).toInstant())); + + SparkRDDWriteClient writeClient = getHoodieWriteClient(config); + List hoodieCleanStatsThree = runCleaner(config, false, false, writeClient, firstCleanInstant); + assertEquals(0, hoodieCleanStatsThree.size(), "Must not scan any partitions and clean any files"); + assertEquals(1, metaClient.reloadActiveTimeline().getCleanerTimeline().filterCompletedInstants().countInstants()); + String actualFirst = metaClient.getActiveTimeline().getCleanerTimeline().lastInstant().get().requestedTime(); + writeClient.close(); + + String file4P0C1 = UUID.randomUUID().toString(); + int minutesForFourthCommit = 10; + String fourthCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFourthCommit).toInstant())); + testTable = tearDownTestTableAndReinit(testTable, config); + + commitToTestTable(testTable, fourthCommitTs, p0, file4P0C1); + testTable = tearDownTestTableAndReinit(testTable, config); + + // add a savepoint + SparkRDDWriteClient writeClient1 = null; + try { + writeClient1 = getHoodieWriteClient(config); + writeClient1.savepoint(fourthCommitTs, "user", "comment"); + } finally { + writeClient1.close(); + } + + Date firstCleanDate = HoodieInstantTimeGenerator.parseDateFromInstantTime(firstCleanInstant); + int minutesBetweenCleans = secondCommitAfterThreshold ? 70 : 30; + String secondCleanInstant = HoodieInstantTimeGenerator.formatDate(Date.from(firstCleanDate.toInstant().plus(minutesBetweenCleans, ChronoUnit.MINUTES))); + + writeClient = getHoodieWriteClient(config); + List hoodieCleanStatsFour = runCleaner(config, false, false, writeClient, secondCleanInstant); + HoodieTimeline finalCompletedCleanInstants = metaClient.reloadActiveTimeline().getCleanerTimeline().filterCompletedInstants(); + if (secondCommitAfterThreshold) { + // second empty clean is added + assertEquals(0, hoodieCleanStatsFour.size(), "Must not scan any partitions and clean any files"); + assertEquals(2, finalCompletedCleanInstants.countInstants()); + // Ensure that extra metadata is properly set for empty clean commits + HoodieCleanMetadata secondCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), finalCompletedCleanInstants.lastInstant().get()); + // new clean should have the savepoint created + assertEquals(fourthCommitTs, secondCleanMetadata.getExtraMetadata().get(CleanerUtils.SAVEPOINTED_TIMESTAMPS)); + // assertEquals(thirdCommitTs, secondCleanMetadata.getExtraMetadata().get(CleanPlanner.EARLIEST_COMMIT_TO_NOT_ARCHIVE)); + } else { + // no cleaner commit should be added because the time since last clean threshold has not been met + assertEquals(1, finalCompletedCleanInstants.countInstants()); + // Ensure that extra metadata is properly set for empty clean commits + HoodieCleanMetadata firstCleanMetadata = CleanerUtils.getCleanerMetadata(HoodieTableMetaClient.reload(metaClient), finalCompletedCleanInstants.lastInstant().get()); + //assertEquals(thirdCommitTs, firstCleanMetadata.getExtraMetadata().get(CleanPlanner.EARLIEST_COMMIT_TO_NOT_ARCHIVE)); + // first clean commit happened before the savepoint so this field is expected to not be present in the map + assertFalse(firstCleanMetadata.getExtraMetadata().containsKey(CleanerUtils.SAVEPOINTED_TIMESTAMPS)); + } + writeClient.close(); + } finally { + testTable.close(); + } + } + + @Test + void testEmptyCleanDoesNotGoBackwardsOnConfigChange() throws Exception { + // Test that earliestCommitToRetain never goes backwards when user increases retention. + // Scenario: user starts with short retention (12h), then increases to long retention (72h). + // The longer retention would compute an older ECTR, but the code should adjust it to + // the previous ECTR and still create the empty clean. + HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) + .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withIncrementalCleaningMode(true) + .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER) + .withCleanBootstrapBaseFileEnabled(false) + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(12) + .withIntervalToCreateEmptyCleanHours(1) + .build()) + .build(); + + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + try { + String p0 = "2020/01/01"; + Instant instant = Instant.now(); + ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, metaClient.getTableConfig().getTimelineTimezone().getZoneId()); + + // Create first commit 70 hours ago + String file1P0C0 = UUID.randomUUID().toString(); + String firstCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusHours(70).toInstant())); + commitToTestTable(testTable, firstCommitTs, p0, file1P0C0); + testTable = tearDownTestTableAndReinit(testTable, config); + + // Create second commit 48 hours ago + String file2P0C1 = UUID.randomUUID().toString(); + String secondCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusHours(48).toInstant())); + commitToTestTable(testTable, secondCommitTs, p0, file2P0C1); + testTable = tearDownTestTableAndReinit(testTable, config); + metaClient = HoodieTableMetaClient.reload(metaClient); + + // Create third commit 6 hours ago (well within 12h retention window) + String file3P0C2 = UUID.randomUUID().toString(); + String thirdCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusHours(6).toInstant())); + commitToTestTable(testTable, thirdCommitTs, p0, file3P0C2); + testTable = tearDownTestTableAndReinit(testTable, config); + metaClient = HoodieTableMetaClient.reload(metaClient); + + // Run first empty clean with 12h retention - ECTR should be thirdCommitTs (6h ago) + String firstCleanInstant = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusHours(2).toInstant())); + SparkRDDWriteClient writeClient = getHoodieWriteClient(config); + List hoodieCleanStatsOne = runCleaner(config, false, false, writeClient, firstCleanInstant); + assertEquals(0, hoodieCleanStatsOne.size(), "Must not clean any files"); + assertEquals(1, metaClient.reloadActiveTimeline().getCleanerTimeline().filterCompletedInstants().countInstants()); + + // Get the earliestCommitToRetain from first clean + HoodieInstant firstCleanCompleted = metaClient.getActiveTimeline().getCleanerTimeline().filterCompletedInstants().lastInstant().get(); + HoodieCleanMetadata firstCleanMetadata = CleanerUtils.getCleanerMetadata(metaClient, firstCleanCompleted); + String firstEarliestCommitToRetain = firstCleanMetadata.getEarliestCommitToRetain(); + writeClient.close(); + + // Add a new commit so that needsCleaning() passes for the second clean attempt + String file4P0C3 = UUID.randomUUID().toString(); + String fourthCommitTs = HoodieInstantTimeGenerator.formatDate(Date.from(commitDateTime.minusHours(1).toInstant())); + commitToTestTable(testTable, fourthCommitTs, p0, file4P0C3); + testTable = tearDownTestTableAndReinit(testTable, config); + metaClient = HoodieTableMetaClient.reload(metaClient); + + // Now increase retention to 72 hours, which would make ECTR go backwards to firstCommitTs (70h ago) + HoodieWriteConfig newConfig = HoodieWriteConfig.newBuilder().withPath(basePath) + .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build()) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withIncrementalCleaningMode(true) + .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER) + .withCleanBootstrapBaseFileEnabled(false) + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(72) + .withIntervalToCreateEmptyCleanHours(1) + .build()) + .build(); + + // Create second empty clean 61 minutes after first clean + String secondCleanInstant = HoodieInstantTimeGenerator.formatDate(Date.from( + HoodieInstantTimeGenerator.parseDateFromInstantTime(firstCleanInstant).toInstant().plus(61, ChronoUnit.MINUTES))); + + writeClient = getHoodieWriteClient(newConfig); + List hoodieCleanStatsTwo = runCleaner(newConfig, false, false, writeClient, secondCleanInstant); + + // The empty clean should still be created, but with ECTR adjusted to the previous value + metaClient = HoodieTableMetaClient.reload(metaClient); + HoodieTimeline cleanTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterCompletedInstants(); + assertEquals(2, cleanTimeline.countInstants(), "Second empty clean should be created with adjusted ECTR"); + + // Verify earliestCommitToRetain did not go backwards + HoodieCleanMetadata secondCleanMetadata = CleanerUtils.getCleanerMetadata(metaClient, cleanTimeline.lastInstant().get()); + assertEquals(firstEarliestCommitToRetain, secondCleanMetadata.getEarliestCommitToRetain(), + "earliestCommitToRetain should be adjusted to previous value, not go backwards"); + writeClient.close(); + } finally { + testTable.close(); + } + } + + private void commitToTestTable(HoodieTestTable testTable, String commitTimeTs, String partition, String fileId) throws Exception { + testTable.addInflightCommit(commitTimeTs); + testTable.withBaseFilesInPartition(partition, fileId); + HoodieCommitMetadata commitMeta = generateCommitMetadata(commitTimeTs, Collections.singletonMap(partition, Collections.singletonList(fileId))); + metaClient.getActiveTimeline().saveAsComplete( + new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, commitTimeTs, INSTANT_COMPARATOR.completionTimeOrderedComparator()), + Option.of(commitMeta)); + } } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java index aa1a5ee6a9697..66f2f595ef4da 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieCleanerTestBase.java @@ -100,6 +100,18 @@ protected List runCleaner( return runCleaner(config, simulateRetryFailure, simulateMetadataFailure, 1, false); } + protected List runCleaner( + HoodieWriteConfig config, boolean simulateRetryFailure, boolean simulateMetadataFailure, + Integer firstCommitSequence, boolean needInstantInHudiFormat) throws IOException { + SparkRDDWriteClient writeClient = getHoodieWriteClient(config); + try { + String cleanInstantTs = needInstantInHudiFormat ? makeNewCommitTime(firstCommitSequence, "%014d") : makeNewCommitTime(firstCommitSequence, "%09d"); + return runCleaner(config, simulateRetryFailure, simulateMetadataFailure, writeClient, cleanInstantTs); + } finally { + writeClient.close(); + } + } + /** * Helper to run cleaner and collect Clean Stats. * @@ -107,11 +119,8 @@ protected List runCleaner( */ protected List runCleaner( HoodieWriteConfig config, boolean simulateRetryFailure, boolean simulateMetadataFailure, - Integer firstCommitSequence, boolean needInstantInHudiFormat) throws IOException { - SparkRDDWriteClient writeClient = getHoodieWriteClient(config); - String cleanInstantTs = needInstantInHudiFormat ? makeNewCommitTime(firstCommitSequence, "%014d") : makeNewCommitTime(firstCommitSequence, "%09d"); + SparkRDDWriteClient writeClient, String cleanInstantTs) throws IOException { HoodieCleanMetadata cleanMetadata1 = writeClient.clean(cleanInstantTs); - if (null == cleanMetadata1) { return new ArrayList<>(); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java index 97bd0e1175d76..6f7530f27b39b 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieSparkClientTestHarness.java @@ -55,6 +55,7 @@ import org.apache.hudi.exception.HoodieMetadataException; import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.index.HoodieIndex; +import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex; import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory; import org.apache.hudi.metadata.FileSystemBackedTableMetadata; import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter; @@ -237,6 +238,12 @@ protected void initQueryIndexConf() { * Cleanups Spark contexts ({@link JavaSparkContext} and {@link SQLContext}). */ protected void cleanupSparkContexts() { + // HoodieInMemoryHashIndex holds a JVM-static record-location map that survives + // sparkSession.stop(), leaking record keys and locations across sequential tests + // in the same JVM. A stale entry causes tagLocation to demote a not-matched + // INSERT into a no-op UPDATE on a non-existent file group. + HoodieInMemoryHashIndex.clear(); + if (sparkSession != null) { sparkSession.stop(); sparkSession = null; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java index 5f155e57a1eb7..a11cfaf36fbbd 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieStorageConfig.java @@ -284,6 +284,13 @@ public class HoodieStorageConfig extends HoodieConfig { .withDocumentation("The fully-qualified class name of the factory class to return readers and writers of files used " + "by Hudi. The provided class should implement `org.apache.hudi.io.storage.HoodieIOFactory`."); + public static final ConfigProperty HOODIE_PARQUET_CONFIG_INJECTOR_CLASS = ConfigProperty + .key("hoodie.parquet.write.config.injector.class") + .noDefaultValue() + .markAdvanced() + .sinceVersion("1.2.0") + .withDocumentation("Config injector implementation for HoodieParquetConfigInjector class, for users willing to inject some custom configs to parquet writers"); + /** * @deprecated Use {@link #PARQUET_MAX_FILE_SIZE} and its methods instead */ @@ -478,6 +485,11 @@ public Builder parquetCompressionCodec(String parquetCompressionCodec) { return this; } + public Builder parquetDictionaryEnabled(boolean enable) { + storageConfig.setValue(PARQUET_DICTIONARY_ENABLED, String.valueOf(enable)); + return this; + } + public Builder parquetWriteLegacyFormat(String parquetWriteLegacyFormat) { storageConfig.setValue(PARQUET_WRITE_LEGACY_FORMAT_ENABLED, parquetWriteLegacyFormat); return this; @@ -552,7 +564,7 @@ public Builder withWriteUtcTimezone(boolean writeUtcTimezone) { * Sets the bloom filter type for the configuration. * * @param bloomFilterType The bloom filter type (SIMPLE or DYNAMIC_V0) - * @return this builder instance for method chaining + */ public Builder withBloomFilterType(String bloomFilterType) { storageConfig.setValue(BLOOM_FILTER_TYPE, bloomFilterType); @@ -563,7 +575,7 @@ public Builder withBloomFilterType(String bloomFilterType) { * Sets the number of entries to be stored in the bloom filter. * * @param numEntries The number of entries for the bloom filter - * @return this builder instance for method chaining + */ public Builder withBloomFilterNumEntries(int numEntries) { storageConfig.setValue(BLOOM_FILTER_NUM_ENTRIES_VALUE, String.valueOf(numEntries)); @@ -574,7 +586,7 @@ public Builder withBloomFilterNumEntries(int numEntries) { * Sets the false positive probability (FPP) for the bloom filter. * * @param fpp The false positive probability as a double - * @return this builder instance for method chaining + */ public Builder withBloomFilterFpp(double fpp) { storageConfig.setValue(BLOOM_FILTER_FPP_VALUE, String.valueOf(fpp)); @@ -585,13 +597,24 @@ public Builder withBloomFilterFpp(double fpp) { * Sets the maximum number of entries for dynamic bloom filter. * * @param maxEntries The maximum number of entries for dynamic bloom filter - * @return this builder instance for method chaining + */ public Builder withBloomFilterDynamicMaxEntries(int maxEntries) { storageConfig.setValue(BLOOM_FILTER_DYNAMIC_MAX_ENTRIES, String.valueOf(maxEntries)); return this; } + /** + * Sets the parquet config injector class name. + * + * @param parquetConfigInjectorClass The fully-qualified class name of the parquet config injector + + */ + public Builder withParquetConfigInjectorClass(String parquetConfigInjectorClass) { + storageConfig.setValue(HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, parquetConfigInjectorClass); + return this; + } + public HoodieStorageConfig build() { storageConfig.setDefaults(HoodieStorageConfig.class.getName()); return storageConfig; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/FileSlice.java b/hudi-common/src/main/java/org/apache/hudi/common/model/FileSlice.java index 19565041f18f6..cf648b11fbfa6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/model/FileSlice.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/model/FileSlice.java @@ -129,6 +129,10 @@ public boolean hasLogFiles() { return !logFiles.isEmpty(); } + public int getLogFileCnt() { + return logFiles.size(); + } + /** * NOTE: equals and hashcode generated by Lombok uses getters by default. *

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java index b937b84495afb..dff06a4c0e0f0 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java @@ -42,6 +42,8 @@ import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -98,8 +100,8 @@ public class HoodieSchema implements Serializable { public static final HoodieSchema NULL_SCHEMA = HoodieSchema.create(HoodieSchemaType.NULL); /** * Constant to use when attaching type metadata to external schema systems like Spark's StructType. - * Stores a parameterized type string for custom Hudi logical types such as VECTOR and BLOB. - * Examples: "VECTOR(128)", "VECTOR(512, DOUBLE)", "BLOB". + * Stores a parameterized type string for custom Hudi logical types such as VECTOR, BLOB, and VARIANT. + * Examples: "VECTOR(128)", "VECTOR(512, DOUBLE)", "BLOB", "VARIANT". */ public static final String TYPE_METADATA_FIELD = "hudi_type"; @@ -109,8 +111,8 @@ public class HoodieSchema implements Serializable { public static final String VARIANT_TYPE_NAME = VariantLogicalType.VARIANT_LOGICAL_TYPE_NAME; /** - * Parses a type descriptor string for custom Hudi logical types such as VECTOR and BLOB. - * Examples: "VECTOR(128)", "VECTOR(512, DOUBLE)", "BLOB". + * Parses a type descriptor string for custom Hudi logical types such as VECTOR, BLOB, and VARIANT. + * Examples: "VECTOR(128)", "VECTOR(512, DOUBLE)", "BLOB", "VARIANT". * Throws for non-custom logical type names. */ public static HoodieSchema parseTypeDescriptor(String descriptor) { @@ -145,6 +147,12 @@ public static HoodieSchema parseTypeDescriptor(String descriptor) { "BLOB type descriptor does not support parameters, got: " + params); } return createBlob(); + case VARIANT: + if (!params.isEmpty()) { + throw new IllegalArgumentException( + "VARIANT type descriptor does not support parameters, got: " + params); + } + return createVariant(); default: throw new IllegalArgumentException( "parseTypeDescriptor only supports custom logical types, got: " + type); @@ -188,8 +196,7 @@ private static Pair> tokenizeTypeDescriptor(Strin } private static final Set CUSTOM_LOGICAL_TYPES = - EnumSet.of(HoodieSchemaType.VECTOR, HoodieSchemaType.BLOB); - + EnumSet.of(HoodieSchemaType.VECTOR, HoodieSchemaType.BLOB, HoodieSchemaType.VARIANT); /** * Constants for Parquet-style accessor patterns used in nested MAP and ARRAY navigation. @@ -210,41 +217,99 @@ private static Pair> tokenizeTypeDescriptor(Strin public static final String PARQUET_ARRAY_AVRO = "." + ARRAY_LIST_ELEMENT; /** - * Parquet file-footer metadata key under which VECTOR column names and type descriptors + * Base-file footer metadata key under which VECTOR column names and type descriptors * are recorded. The value is a comma-separated list of {@code colName:VECTOR(dim[,elemType])} * entries, e.g. {@code "embedding:VECTOR(128),tags:VECTOR(64,INT8)"}. * - *

Stored as file-level key-value metadata (Parquet footer) so that any reader can - * identify vector columns without needing the Hudi schema store. + *

Stored as file-level key-value metadata (Parquet footer, Lance schema metadata) + * so that any reader can identify vector columns without needing the Hudi schema store. + */ + public static final String VECTOR_COLUMNS_METADATA_KEY = "hoodie.vector.columns"; + + /** + * Serializes a name-to-Vector map into the comma-separated + * {@code colName:VECTOR(dim[,elemType])} format used for {@link #VECTOR_COLUMNS_METADATA_KEY}. + * + *

This is the single canonical serializer — all format-specific code (Parquet, Lance) + * should build the map from their respective schema representation and delegate here. + * + * @param vectorColumns ordered map of field name to Vector descriptor (iteration order is preserved) + * @return comma-separated descriptor list, or empty string if the map is null or empty */ - public static final String PARQUET_VECTOR_COLUMNS_METADATA_KEY = "hoodie.vector.columns"; + public static String serializeVectorColumnsMetadata(java.util.Map vectorColumns) { + if (vectorColumns == null || vectorColumns.isEmpty()) { + return ""; + } + StringBuilder sb = new StringBuilder(); + for (java.util.Map.Entry entry : vectorColumns.entrySet()) { + if (sb.length() > 0) { + sb.append(','); + } + sb.append(entry.getKey()).append(':').append(entry.getValue().toTypeDescriptor()); + } + return sb.toString(); + } /** - * Builds the value string for {@link #PARQUET_VECTOR_COLUMNS_METADATA_KEY}. + * Builds the value string for {@link #VECTOR_COLUMNS_METADATA_KEY} from a {@link HoodieSchema}. * * @param schema a HoodieSchema of type RECORD (or null) * @return comma-separated {@code colName:VECTOR(dim[,elemType])} entries, or empty string * if the schema is null or has no VECTOR columns + * @see #serializeVectorColumnsMetadata(java.util.Map) */ public static String buildVectorColumnsMetadataValue(HoodieSchema schema) { if (schema == null || schema.isSchemaNull()) { return ""; } - List fields = schema.getFields(); - StringBuilder sb = new StringBuilder(); - for (HoodieSchemaField field : fields) { + LinkedHashMap vectorColumns = new LinkedHashMap<>(); + for (HoodieSchemaField field : schema.getFields()) { HoodieSchema fieldSchema = field.schema().getNonNullType(); if (fieldSchema.getType() == HoodieSchemaType.VECTOR) { - Vector vectorSchema = (Vector) fieldSchema; - if (sb.length() > 0) { - sb.append(','); - } - sb.append(field.name()).append(':').append(vectorSchema.toTypeDescriptor()); + vectorColumns.put(field.name(), (Vector) fieldSchema); } } - return sb.toString(); + return serializeVectorColumnsMetadata(vectorColumns); + } + + /** + * Parses the comma-separated {@link #VECTOR_COLUMNS_METADATA_KEY} footer value and + * returns the set of vector column field names. Commas inside parentheses (e.g. inside + * the VECTOR descriptor {@code VECTOR(128, DOUBLE)}) are not treated as separators. + * + * @param footerValue raw value from the file footer, or null / empty + * @return set of field names (preserves insertion order), or empty set if input is null / empty + */ + public static Set parseVectorColumnNames(String footerValue) { + if (footerValue == null || footerValue.isEmpty()) { + return Collections.emptySet(); + } + LinkedHashSet names = new LinkedHashSet<>(); + int depth = 0; + int start = 0; + for (int i = 0; i < footerValue.length(); i++) { + char c = footerValue.charAt(i); + if (c == '(') { + depth++; + } else if (c == ')') { + depth--; + } else if (c == ',' && depth == 0) { + addVectorColumnName(footerValue, start, i, names); + start = i + 1; + } + } + addVectorColumnName(footerValue, start, footerValue.length(), names); + return names; } + private static void addVectorColumnName(String s, int start, int end, Set names) { + int colon = s.indexOf(':', start); + if (colon > start && colon < end) { + names.add(s.substring(start, colon).trim()); + } + } + + private Schema avroSchema; private HoodieSchemaType type; private transient List fields; diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java index 075a58bf7bd5d..9c4debe193659 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java @@ -601,9 +601,11 @@ protected Stream filterBaseFileAfterPendingCompaction(FileSlice fileS */ private Stream filterUncommittedFiles(FileSlice fileSlice, boolean includeEmptyFileSlice) { Option committedBaseFile = fileSlice.getBaseFile().isPresent() && completionTimeQueryView.isCompleted(fileSlice.getBaseInstantTime()) ? fileSlice.getBaseFile() : Option.empty(); - List committedLogFiles = fileSlice.getLogFiles().filter(logFile -> completionTimeQueryView.isCompleted(logFile.getDeltaCommitTime())).collect(Collectors.toList()); + List committedLogFiles = fileSlice.getLogFiles() + .filter(logFile -> completionTimeQueryView.isCompleted(logFile.getDeltaCommitTime())) + .collect(Collectors.toList()); if ((fileSlice.getBaseFile().isPresent() && !committedBaseFile.isPresent()) - || committedLogFiles.size() != fileSlice.getLogFiles().count()) { + || committedLogFiles.size() != fileSlice.getLogFileCnt()) { LOG.debug("File Slice ({}) has uncommitted files.", fileSlice); // A file is filtered out of the file-slice if the corresponding // instant has not completed yet. @@ -624,8 +626,10 @@ private Stream filterUncommittedFiles(FileSlice fileSlice, boolean in * @param fileSlice File Slice */ private FileSlice filterUncommittedLogs(FileSlice fileSlice) { - List committedLogFiles = fileSlice.getLogFiles().filter(logFile -> completionTimeQueryView.isCompleted(logFile.getDeltaCommitTime())).collect(Collectors.toList()); - if (committedLogFiles.size() != fileSlice.getLogFiles().count()) { + List committedLogFiles = fileSlice.getLogFiles() + .filter(logFile -> completionTimeQueryView.isCompleted(logFile.getDeltaCommitTime())) + .collect(Collectors.toList()); + if (committedLogFiles.size() != fileSlice.getLogFileCnt()) { LOG.debug("File Slice ({}) has uncommitted log files.", fileSlice); // A file is filtered out of the file-slice if the corresponding // instant has not completed yet. @@ -1628,7 +1632,7 @@ private FileSlice fetchMergedFileSlice(HoodieFileGroup fileGroup, */ private Option fetchAllLogsMergedFileSlice(HoodieFileGroup fileGroup, String maxInstantTime) { List fileSlices = fileGroup.getAllFileSlicesBeforeOn(maxInstantTime).collect(Collectors.toList()); - if (fileSlices.size() == 0) { + if (fileSlices.isEmpty()) { return Option.empty(); } if (fileSlices.size() == 1) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java index 6d2c1dfea4dd8..755cb7c5489eb 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java @@ -22,6 +22,7 @@ import java.lang.reflect.Array; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -69,6 +70,14 @@ public static boolean nonEmpty(Collection c) { return !isNullOrEmpty(c); } + public static boolean nonEmpty(Map m) { + return !isNullOrEmpty(m); + } + + public static boolean containsAll(Map m1, Map m2) { + return m1.entrySet().containsAll(m2.entrySet()); + } + /** * Reduces provided {@link Collection} using provided {@code reducer} applied to * every element of the collection like following @@ -252,7 +261,7 @@ public static boolean elementsEqual(Iterator iterator1, Iterator iterator2 @SafeVarargs public static List createImmutableList(final T... elements) { - return Collections.unmodifiableList(Stream.of(elements).collect(Collectors.toList())); + return Collections.unmodifiableList(new ArrayList<>(Arrays.asList(elements))); } public static List createImmutableList(final List list) { diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/MapUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/MapUtils.java deleted file mode 100644 index 937b4f873fa41..0000000000000 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/MapUtils.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hudi.common.util; - -import java.util.Map; -import java.util.Objects; - -/** - * Utils for Java Map. - */ -public class MapUtils { - - public static boolean isNullOrEmpty(Map m) { - return Objects.isNull(m) || m.isEmpty(); - } - - public static boolean nonEmpty(Map m) { - return !isNullOrEmpty(m); - } - - public static boolean containsAll(Map m1, Map m2) { - return m1.entrySet().containsAll(m2.entrySet()); - } -} diff --git a/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/InternalSchemaConverter.java b/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/InternalSchemaConverter.java index b887bbde550db..e9efd0ab86de4 100644 --- a/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/InternalSchemaConverter.java +++ b/hudi-common/src/main/java/org/apache/hudi/internal/schema/convert/InternalSchemaConverter.java @@ -55,6 +55,38 @@ public class InternalSchemaConverter { static final int VARIANT_VALUE_FIELD_ID = -1; static final int VARIANT_METADATA_FIELD_ID = -2; + // Sentinel field IDs used to mark Blob sub-fields in the internal schema representation. + // The BLOB logical type is itself a fixed-shape RECORD; we round-trip it through InternalSchema by + // tagging its three child fields with negative IDs that the reverse path detects to reconstruct + // a HoodieSchema.Blob (preserving the `blob` logical type rather than degrading to a plain record). + static final int BLOB_TYPE_FIELD_ID = -10; + static final int BLOB_DATA_FIELD_ID = -11; + static final int BLOB_REFERENCE_FIELD_ID = -12; + + // The BLOB internal-schema record is fully determined by the BLOB type definition, so we build + // it once and reuse it on every HoodieSchema -> InternalSchema conversion. + private static final Types.RecordType BLOB_INTERNAL_RECORD_TYPE = buildBlobInternalRecordType(); + + private static Types.RecordType buildBlobInternalRecordType() { + List referenceFields = new ArrayList<>(4); + referenceFields.add(Types.Field.get(0, false, + HoodieSchema.Blob.EXTERNAL_REFERENCE_PATH, Types.StringType.get())); + referenceFields.add(Types.Field.get(1, true, + HoodieSchema.Blob.EXTERNAL_REFERENCE_OFFSET, Types.LongType.get())); + referenceFields.add(Types.Field.get(2, true, + HoodieSchema.Blob.EXTERNAL_REFERENCE_LENGTH, Types.LongType.get())); + referenceFields.add(Types.Field.get(3, false, + HoodieSchema.Blob.EXTERNAL_REFERENCE_IS_MANAGED, Types.BooleanType.get())); + List blobFields = new ArrayList<>(3); + blobFields.add(Types.Field.get(BLOB_TYPE_FIELD_ID, false, + HoodieSchema.Blob.TYPE, Types.StringType.get(), "Blob storage type (INLINE | OUT_OF_LINE)")); + blobFields.add(Types.Field.get(BLOB_DATA_FIELD_ID, true, + HoodieSchema.Blob.INLINE_DATA_FIELD, Types.BinaryType.get(), "Inline blob bytes")); + blobFields.add(Types.Field.get(BLOB_REFERENCE_FIELD_ID, true, + HoodieSchema.Blob.EXTERNAL_REFERENCE, Types.RecordType.get(referenceFields), "Out-of-line blob reference")); + return Types.RecordType.get(blobFields); + } + /** * Convert internalSchema to HoodieSchema. * @@ -366,6 +398,8 @@ private static Type visitPrimitiveToBuildInternalType(HoodieSchema schema) { variantFields.add(Types.Field.get(VARIANT_VALUE_FIELD_ID, false, HoodieSchema.Variant.VARIANT_VALUE_FIELD, Types.BinaryType.get(), "Variant value component")); return Types.RecordType.get(variantFields); + case BLOB: + return BLOB_INTERNAL_RECORD_TYPE; default: throw new UnsupportedOperationException("Unsupported primitive type: " + schema.getType()); } @@ -478,6 +512,23 @@ private static HoodieSchema visitInternalRecordToBuildHoodieRecord(Types.RecordT } } + // Detect Blob round-trip: 3 sentinel-tagged children (type / data / reference) + if (fields.size() == 3) { + Types.Field f0 = fields.get(0); + Types.Field f1 = fields.get(1); + Types.Field f2 = fields.get(2); + boolean hasBlobIds = f0.fieldId() == BLOB_TYPE_FIELD_ID + && f1.fieldId() == BLOB_DATA_FIELD_ID + && f2.fieldId() == BLOB_REFERENCE_FIELD_ID; + boolean hasBlobNames = f0.name().equals(HoodieSchema.Blob.TYPE) + && f1.name().equals(HoodieSchema.Blob.INLINE_DATA_FIELD) + && f2.name().equals(HoodieSchema.Blob.EXTERNAL_REFERENCE); + if (hasBlobIds && hasBlobNames) { + // TODO: Schema evolution for Blob types follows the same path as Variant — fixed shape. + return HoodieSchema.createBlob(); + } + } + // Create regular record List schemaFields = new ArrayList<>(fields.size()); for (int i = 0; i < fields.size(); i++) { diff --git a/hudi-common/src/main/java/org/apache/hudi/io/HoodieParquetConfigInjector.java b/hudi-common/src/main/java/org/apache/hudi/io/HoodieParquetConfigInjector.java new file mode 100644 index 0000000000000..5ee9f67a70405 --- /dev/null +++ b/hudi-common/src/main/java/org/apache/hudi/io/HoodieParquetConfigInjector.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.io; + +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.storage.StorageConfiguration; +import org.apache.hudi.storage.StoragePath; + +/** + * A pluggable interface that all parquet-based writers (Spark/Flink) will invoke before creating write support + * or parquet file writer objects. + *

+ * This allows users to inject custom configurations into the Parquet writer pipeline at runtime, enabling + * fine-grained control over Parquet file properties such as bloom filters, compression settings, encoding + * options, and other advanced Parquet configurations. + *

+ * Important: Implementations must NOT mutate the input {@code storageConf} or + * {@code hoodieConfig} objects directly. Instead, they should create copies, apply the desired + * modifications to the copies, and return them. The caller retains references to the original + * objects, so in-place mutations would have unintended side effects on other components sharing + * the same configuration instances. + *

+ * Example use cases: + *

    + *
  • Enabling column-specific Parquet bloom filters
  • + *
  • Setting custom compression codecs per file or partition
  • + *
  • Adjusting page sizes or row group sizes based on data characteristics
  • + *
  • Injecting custom metadata into Parquet files
  • + *
+ * + * @since 1.2.0 + */ +public interface HoodieParquetConfigInjector { + + /** + * Injects custom configurations into the Parquet writer pipeline. + *

+ * This method is invoked before creating the Parquet write support and writer objects, allowing + * implementations to modify both the storage-level and Hudi-level configurations. + *

+ * Implementations must not mutate the input parameters. Instead, create copies of {@code storageConf} + * and {@code hoodieConfig}, apply modifications to the copies, and return them in the result pair. + * + * @param path the file path where the Parquet file will be written + * @param storageConf the storage configuration (e.g., Hadoop Configuration) — must not be mutated + * @param hoodieConfig the Hudi configuration containing write settings and table properties — must not be mutated + * @return a pair containing new (or copied) storage configuration and Hudi configuration with the injected properties. + * Both configurations will be used to create the Parquet writer. + */ + Pair injectConfig(StoragePath path, StorageConfiguration storageConf, HoodieConfig hoodieConfig); + + /** + * Applies the configured {@link HoodieParquetConfigInjector} (if any) to the given storage and Hudi configurations. + * If no injector class is configured, returns the original configurations unchanged. + * + * @param path the file path where the Parquet file will be written + * @param storageConf the storage configuration + * @param hoodieConfig the Hudi configuration + * @return a pair containing the (potentially modified) storage configuration and Hudi configuration + */ + static Pair applyConfigInjector(StoragePath path, StorageConfiguration storageConf, HoodieConfig hoodieConfig) { + String configInjectorClass = hoodieConfig.getStringOrDefault(HoodieStorageConfig.HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, StringUtils.EMPTY_STRING); + if (StringUtils.isNullOrEmpty(configInjectorClass)) { + return Pair.of(storageConf, hoodieConfig); + } + try { + HoodieParquetConfigInjector injector = (HoodieParquetConfigInjector) ReflectionUtils.loadClass(configInjectorClass); + return injector.injectConfig(path, storageConf, hoodieConfig); + } catch (Exception e) { + throw new HoodieException("Failed to instantiate or invoke parquet config injector class: " + configInjectorClass, e); + } + } +} diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java index 78e9606235fb2..1020d1a8c9f02 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieNativeAvroHFileReader.java @@ -177,11 +177,7 @@ public String next() { @Override public void close() { - try { - reader.close(); - } catch (IOException e) { - throw new HoodieIOException("Error closing the HFile reader", e); - } + closeReader(reader, "Error closing the HFile reader"); } }; } @@ -345,6 +341,14 @@ private static BloomFilter readBloomFilter(HFileReader reader) throws HoodieExce } } + private static void closeReader(HFileReader reader, String errorMessage) { + try { + reader.close(); + } catch (IOException e) { + throw new HoodieIOException(errorMessage, e); + } + } + private static class RecordIterator implements ClosableIterator { private final HFileReader reader; private final GenericDatumReader datumReader; @@ -399,11 +403,7 @@ public IndexedRecord next() { @Override public void close() { - try { - reader.close(); - } catch (IOException e) { - throw new HoodieIOException("Error closing the HFile reader", e); - } + closeReader(reader, "Error closing the HFile reader"); } } @@ -476,11 +476,7 @@ public IndexedRecord next() { @Override public void close() { - try { - reader.close(); - } catch (IOException e) { - throw new HoodieIOException("Error closing the HFile reader", e); - } + closeReader(reader, "Error closing the HFile reader"); } } @@ -538,11 +534,7 @@ public IndexedRecord next() { @Override public void close() { - try { - reader.close(); - } catch (IOException e) { - throw new HoodieIOException("Error closing the HFile reader and scanner", e); - } + closeReader(reader, "Error closing the HFile reader and scanner"); } private static Iterator getRecordByKeyPrefixIteratorInternal(HFileReader reader, diff --git a/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java b/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java index 70330f3d9e2d6..48d825ac3d55f 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java @@ -2854,6 +2854,25 @@ public void testBlobTypeDescriptorRoundTrip() { assertInstanceOf(HoodieSchema.Blob.class, parsed); } + @Test + public void testParseTypeDescriptorVariant() { + HoodieSchema parsed = HoodieSchema.parseTypeDescriptor("VARIANT"); + assertEquals(HoodieSchemaType.VARIANT, parsed.getType()); + assertInstanceOf(HoodieSchema.Variant.class, parsed); + } + + @Test + public void testParseTypeDescriptorVariantCaseInsensitive() { + HoodieSchema parsed = HoodieSchema.parseTypeDescriptor("variant"); + assertEquals(HoodieSchemaType.VARIANT, parsed.getType()); + } + + @Test + public void testParseTypeDescriptorVariantRejectsParameters() { + assertThrows(IllegalArgumentException.class, () -> HoodieSchema.parseTypeDescriptor("VARIANT(foo)")); + assertThrows(IllegalArgumentException.class, () -> HoodieSchema.parseTypeDescriptor("VARIANT(1, 2)")); + } + @Test public void testCreateArrayWithNullableVectorThrows() { HoodieSchema vectorSchema = HoodieSchema.createNullable(HoodieSchema.createVector(128)); diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/DisableDictionaryInjector.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/DisableDictionaryInjector.java new file mode 100644 index 0000000000000..60b181e22af87 --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/DisableDictionaryInjector.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.testutils; + +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.io.HoodieParquetConfigInjector; +import org.apache.hudi.storage.StorageConfiguration; +import org.apache.hudi.storage.StoragePath; + +/** + * Test implementation of {@link HoodieParquetConfigInjector} that disables dictionary encoding. + */ +public class DisableDictionaryInjector implements HoodieParquetConfigInjector { + @Override + public Pair injectConfig(StoragePath path, + StorageConfiguration storageConf, + HoodieConfig hoodieConfig) { + hoodieConfig.setValue(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED, "false"); + return Pair.of(storageConf, hoodieConfig); + } +} diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java index 53ca9b2bebc1f..75829f112a38c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestCollectionUtils.java @@ -20,17 +20,59 @@ package org.apache.hudi.common.util; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Stream; import static org.apache.hudi.common.util.CollectionUtils.batches; +import static org.apache.hudi.common.util.CollectionUtils.containsAll; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; class TestCollectionUtils { + private static Stream containsAllArgs() { + Map m0 = new HashMap<>(); + m0.put("k0", "v0"); + m0.put("k1", "v1"); + m0.put("k2", "v2"); + Map m1 = new HashMap<>(); + m1.put("k1", "v1"); + Map m2 = new HashMap<>(); + m2.put("k2", "v2"); + m2.put("k", "v"); + Map m3 = Collections.emptyMap(); + Map m4 = new HashMap<>(); + m4.put("k0", null); + Map m5 = new HashMap<>(); + m5.put("k0", 0); + + List argsList = new ArrayList<>(); + + argsList.add(Arguments.of(m0, m1, true)); + argsList.add(Arguments.of(m0, m3, true)); + argsList.add(Arguments.of(m5, m3, true)); + argsList.add(Arguments.of(m0, m4, false)); + argsList.add(Arguments.of(m0, m2, false)); + argsList.add(Arguments.of(m0, m5, false)); + + return argsList.stream(); + } + + @ParameterizedTest + @MethodSource("containsAllArgs") + void containsAllOnMaps(Map m1, Map m2, boolean expectedResult) { + assertEquals(expectedResult, containsAll(m1, m2)); + } + @Test void getBatchesFromList() { assertThrows(IllegalArgumentException.class, () -> { diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestMapUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestMapUtils.java deleted file mode 100644 index 888ded5c49df0..0000000000000 --- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestMapUtils.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.common.util; - -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Stream; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class TestMapUtils { - - private static Stream containsAllArgs() { - Map m0 = new HashMap<>(); - m0.put("k0", "v0"); - m0.put("k1", "v1"); - m0.put("k2", "v2"); - Map m1 = new HashMap<>(); - m1.put("k1", "v1"); - Map m2 = new HashMap<>(); - m2.put("k2", "v2"); - m2.put("k", "v"); - Map m3 = Collections.emptyMap(); - Map m4 = new HashMap<>(); - m4.put("k0", null); - Map m5 = new HashMap<>(); - m5.put("k0", 0); - - List argsList = new ArrayList<>(); - - argsList.add(Arguments.of(m0, m1, true)); - argsList.add(Arguments.of(m0, m3, true)); - argsList.add(Arguments.of(m5, m3, true)); - argsList.add(Arguments.of(m0, m4, false)); - argsList.add(Arguments.of(m0, m2, false)); - argsList.add(Arguments.of(m0, m5, false)); - - return argsList.stream(); - } - - @ParameterizedTest - @MethodSource("containsAllArgs") - void containsAll(Map m1, Map m2, boolean expectedResult) { - assertEquals(expectedResult, MapUtils.containsAll(m1, m2)); - } -} diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java index cf382e748fe82..767a7457e1e76 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java @@ -747,6 +747,16 @@ public class FlinkOptions extends HoodieConfig { + "Data is sorted within the buffer configured by number of records or buffer size. " + "The order of entire written file is not guaranteed."); + @AdvancedConfig + public static final ConfigOption WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE = ConfigOptions + .key("write.buffer.sort.continuous.drain.size") + .intType() + .defaultValue(1) // default drain 1 record at a time + .withDescription("Number of records to drain each time the max capacity is reached when using continuous sorting. " + + "Default value of 1 provides smooth, incremental draining. " + + "Can be increased for batching if needed (e.g., 10, 100). " + + "Larger values reduce drain frequency but may cause latency spikes."); + @AdvancedConfig public static final ConfigOption WRITE_BUFFER_SIZE = ConfigOptions .key("write.buffer.size") @@ -764,7 +774,8 @@ public class FlinkOptions extends HoodieConfig { .withDescription("Buffer type for append write function: " + "NONE (no buffer sort, default), " + "BOUNDED_IN_MEMORY (double buffer with async write), " - + "DISRUPTOR (ring buffer with async write, recommended for better throughput)"); + + "DISRUPTOR (ring buffer with async write, recommended for better throughput), " + + "CONTINUOUS_SORT (TreeMap-based continuous sorting with incremental draining)"); @AdvancedConfig public static final ConfigOption WRITE_BUFFER_DISRUPTOR_RING_SIZE = ConfigOptions diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctionWithContinuousSort.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctionWithContinuousSort.java new file mode 100644 index 0000000000000..caffecbb34ba3 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctionWithContinuousSort.java @@ -0,0 +1,332 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.sink.append; + +import org.apache.hudi.common.util.ObjectSizeCalculator; +import org.apache.hudi.configuration.FlinkOptions; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.exception.HoodieIOException; +import org.apache.hudi.sink.StreamWriteOperatorCoordinator; +import org.apache.hudi.sink.buffer.TotalSizeTracer; +import org.apache.hudi.sink.bulk.sort.SortOperatorGen; +import org.apache.hudi.utils.RuntimeContextUtils; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.core.memory.MemorySegment; +import org.apache.flink.core.memory.MemorySegmentFactory; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.planner.codegen.sort.SortCodeGenerator; +import org.apache.flink.table.runtime.typeutils.RowDataSerializer; +import org.apache.flink.table.runtime.generated.GeneratedNormalizedKeyComputer; +import org.apache.flink.table.runtime.generated.GeneratedRecordComparator; +import org.apache.flink.table.runtime.generated.NormalizedKeyComputer; +import org.apache.flink.table.runtime.generated.RecordComparator; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.util.Collector; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +/** + * Sink function to write data with continuous sorting for improved compression. + * + *

Unlike {@link AppendWriteFunctionWithBIMBufferSort} which uses batch sorting, + * this function maintains sorted order continuously using a TreeMap, providing: + *

    + *
  • Non-blocking inserts (O(log n) vs O(1) + periodic O(n log n))
  • + *
  • Incremental draining without re-sorting
  • + *
  • Predictable latency (no sort spikes)
  • + *
+ * + *

Strategy: + *

    + *
  1. Records are inserted in sorted order (TreeMap)
  2. + *
  3. When buffer reaches max capacity, oldest record(s) are drained synchronously
  4. + *
  5. Drain size is configurable to balance latency vs. throughput vs compression ratio
  6. + *
+ * + * @param Type of the input record + * @see StreamWriteOperatorCoordinator + */ +public class AppendWriteFunctionWithContinuousSort extends AppendWriteFunction { + + private static final Logger LOG = LoggerFactory.getLogger(AppendWriteFunctionWithContinuousSort.class); + + private final long maxCapacity; + private final int drainSize; + + private transient TreeMap sortedRecords; + private transient long insertionSequence; + private transient TotalSizeTracer sizeTracer; + + // Sort key computation + private transient NormalizedKeyComputer normalizedKeyComputer; + private transient RecordComparator recordComparator; + private transient MemorySegment reusableKeySegment; + private transient int normalizedKeySize; + private transient boolean objectReuseEnabled; + private transient RowDataSerializer rowDataSerializer; + + // Metrics + private transient long totalDrainOperations; + private transient long totalDrainedRecords; + private transient long totalInserted; + private transient long estimatedRecordSize; + + public AppendWriteFunctionWithContinuousSort(Configuration config, RowType rowType) { + super(config, rowType); + + // Configuration + this.maxCapacity = config.get(FlinkOptions.WRITE_BUFFER_SIZE); + this.drainSize = config.get(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE); + + LOG.info("AppendWriteFunctionWithContinuousSort created: maxCapacity={}, drainSize={}", + maxCapacity, drainSize); + } + + @Override + public void open(Configuration parameters) throws Exception { + // Validate configuration before calling super.open() which requires Flink runtime context + if (maxCapacity <= 0) { + throw new IllegalArgumentException( + String.format("Buffer capacity must be positive, got: %d", maxCapacity)); + } + + if (drainSize <= 0) { + throw new IllegalArgumentException( + String.format("Drain size must be positive, got: %d", drainSize)); + } + + // Resolve sort keys, falling back to record key if not specified + List sortKeyList = AppendWriteFunctions.resolveSortKeys(config); + + super.open(parameters); + + LOG.info("Initializing continuous sort with keys: {}", sortKeyList); + + // Create sort code generator for normalized key computation and record comparison + SortOperatorGen sortOperatorGen = new SortOperatorGen(rowType, sortKeyList.toArray(new String[0])); + SortCodeGenerator codeGenerator = sortOperatorGen.createSortCodeGenerator(); + GeneratedNormalizedKeyComputer generatedKeyComputer = codeGenerator.generateNormalizedKeyComputer("ContinuousSortKeyComputer"); + GeneratedRecordComparator generatedComparator = codeGenerator.generateRecordComparator("ContinuousSortComparator"); + + // Instantiate code-generated components + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + this.normalizedKeyComputer = generatedKeyComputer.newInstance(classLoader); + this.recordComparator = generatedComparator.newInstance(classLoader); + this.normalizedKeySize = normalizedKeyComputer.getNumKeyBytes(); + + // Initialize TreeMap with comparator that uses normalized keys for fast comparison + // and falls back to RecordComparator for full comparison when normalized keys are equal + this.sortedRecords = new TreeMap<>((k1, k2) -> { + int cmp = normalizedKeyComputer.compareKey(k1.keySegment, 0, k2.keySegment, 0); + if (cmp != 0) { + return cmp; + } + // Normalized keys are equal - use full record comparison for correct ordering + cmp = recordComparator.compare(k1.record, k2.record); + if (cmp != 0) { + return cmp; + } + // Records are equal by sort keys - use insertion order for stability + return Long.compare(k1.insertionOrder, k2.insertionOrder); + }); + this.insertionSequence = 0L; + + // Allocate reusable on-heap buffer for computing keys + byte[] reusableKeyBuffer = new byte[normalizedKeySize]; + this.reusableKeySegment = MemorySegmentFactory.wrap(reusableKeyBuffer); + + // Detect object reuse mode and create serializer for copying if needed + this.objectReuseEnabled = RuntimeContextUtils.isObjectReuseEnabled(getRuntimeContext()); + if (this.objectReuseEnabled) { + this.rowDataSerializer = new RowDataSerializer(rowType); + } + + // Initialize metrics + this.totalDrainOperations = 0; + this.totalDrainedRecords = 0; + this.totalInserted = 0; + + // Initialize memory size tracer for bounding buffer memory footprint + this.sizeTracer = new TotalSizeTracer(config); + + LOG.info("AppendWriteFunctionWithContinuousSort initialized successfully"); + } + + @Override + public void processElement(T value, Context ctx, Collector out) throws Exception { + RowData data = (RowData) value; + + // Check if buffer has reached max capacity (record count) or memory limit + if (sortedRecords.size() >= maxCapacity || sizeTracer.bufferSize > sizeTracer.maxBufferSize) { + drainRecords(drainSize); + + // Verify there's space after draining + if (sortedRecords.size() >= maxCapacity) { + throw new HoodieException( + String.format("Buffer cannot accept record after draining. " + + "Buffer size: %d, maxCapacity: %d, drainSize: %d", + sortedRecords.size(), maxCapacity, drainSize)); + } + } + + // Copy RowData when object reuse is enabled to prevent mutation after insertion + if (objectReuseEnabled) { + data = rowDataSerializer.copy(data); + } + + // Write to buffer (maintains sorted order) + // Compute normalized key into reusable segment + normalizedKeyComputer.putKey(data, reusableKeySegment, 0); + + // Create sort key (copies the normalized key from reusable segment) + SortKey key = new SortKey(reusableKeySegment, normalizedKeySize, data, insertionSequence++); + + // Store the RowData and track memory usage + sortedRecords.put(key, data); + if (estimatedRecordSize == 0) { + estimatedRecordSize = ObjectSizeCalculator.getObjectSize(data); + } + sizeTracer.trace(estimatedRecordSize); + + totalInserted++; + } + + /** + * Drain oldest records from buffer and write to storage. + */ + private void drainRecords(int count) throws IOException { + if (sortedRecords.isEmpty()) { + return; + } + + // Initialize writer if needed + if (this.writerHelper == null) { + initWriterHelper(); + } + + // Drain records from TreeMap using pollFirstEntry() to avoid iterator allocation + int actualCount = Math.min(count, sortedRecords.size()); + int drained = 0; + + while (drained < actualCount && !sortedRecords.isEmpty()) { + Map.Entry entry = sortedRecords.pollFirstEntry(); + writerHelper.write(entry.getValue()); + drained++; + } + + totalDrainOperations++; + totalDrainedRecords += drained; + sizeTracer.countDown(drained * estimatedRecordSize); + } + + @Override + public void snapshotState() { + try { + // Drain all remaining records and reset for next checkpoint interval + if (!sortedRecords.isEmpty()) { + LOG.info("Snapshot: draining {} remaining records", sortedRecords.size()); + drainRecords(sortedRecords.size()); + sortedRecords.clear(); + insertionSequence = 0L; + sizeTracer.reset(); + } + + LOG.info("Snapshot complete: total drained={}, operations={}", + totalDrainedRecords, totalDrainOperations); + + } catch (IOException e) { + throw new HoodieIOException("Failed to drain buffer during snapshot", e); + } + super.snapshotState(); + } + + @Override + public void endInput() { + try { + // Drain all remaining records and clear buffer + if (!sortedRecords.isEmpty()) { + LOG.info("EndInput: draining {} remaining records", sortedRecords.size()); + drainRecords(sortedRecords.size()); + sortedRecords.clear(); + insertionSequence = 0L; + sizeTracer.reset(); + } + + } catch (IOException e) { + throw new HoodieIOException("Failed to drain buffer during endInput", e); + } + super.endInput(); + } + + @Override + public void close() throws Exception { + try { + LOG.info("AppendWriteFunctionWithContinuousSort closed: totalInserted={}, totalDrained={}, operations={}", + totalInserted, totalDrainedRecords, totalDrainOperations); + + } finally { + super.close(); + } + } + + /** + * Sort key with normalized key stored as a pre-wrapped MemorySegment to avoid + * repeated allocation during TreeMap comparisons. + * Holds a reference to the original record for full comparison fallback. + * Comparison is done via TreeMap comparator. + */ + private static class SortKey { + final MemorySegment keySegment; + final RowData record; + final long insertionOrder; + + SortKey(MemorySegment sourceSegment, int keySize, RowData record, long insertionOrder) { + this.record = record; + this.insertionOrder = insertionOrder; + + // Copy normalized key and wrap as MemorySegment once to avoid per-comparison allocation + byte[] keyBytes = new byte[keySize]; + sourceSegment.get(0, keyBytes, 0, keySize); + this.keySegment = MemorySegmentFactory.wrap(keyBytes); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof SortKey)) { + return false; + } + return this.insertionOrder == ((SortKey) obj).insertionOrder; + } + + @Override + public int hashCode() { + return Long.hashCode(insertionOrder); + } + } +} diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctions.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctions.java index 39cda29ab4226..28325e4bfb349 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctions.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/append/AppendWriteFunctions.java @@ -51,7 +51,9 @@ public static AppendWriteFunction create(Configuration conf, RowType rowT } String bufferType = resolveBufferType(conf); - if (BufferType.DISRUPTOR.name().equalsIgnoreCase(bufferType)) { + if (BufferType.CONTINUOUS_SORT.name().equalsIgnoreCase(bufferType)) { + return new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + } else if (BufferType.DISRUPTOR.name().equalsIgnoreCase(bufferType)) { return new AppendWriteFunctionWithDisruptorBufferSort<>(conf, rowType); } else if (BufferType.BOUNDED_IN_MEMORY.name().equalsIgnoreCase(bufferType)) { return new AppendWriteFunctionWithBIMBufferSort<>(conf, rowType); @@ -97,8 +99,13 @@ public static List resolveSortKeys(Configuration conf) { ValidationUtils.checkArgument(StringUtils.nonEmpty(sortKeys), "Sort keys can't be null or empty for append write with buffer sort. " + "Either set write.buffer.sort.keys or ensure record key field is configured."); - return Arrays.stream(sortKeys.split(",")) + List sortKeyList = Arrays.stream(sortKeys.split(",")) .map(String::trim) + .filter(s -> !s.isEmpty()) .collect(Collectors.toList()); + ValidationUtils.checkArgument(!sortKeyList.isEmpty(), + "Sort keys can't be empty for append write with buffer sort. " + + "Either set write.buffer.sort.keys or ensure record key field is configured."); + return sortKeyList; } } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/buffer/BufferType.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/buffer/BufferType.java index cbc779cb11ca8..7253bd789468e 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/buffer/BufferType.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/buffer/BufferType.java @@ -36,5 +36,9 @@ public enum BufferType { @EnumFieldDescription("Lock-free ring buffer using LMAX Disruptor. Provides better throughput for high-volume " + "write operations by decoupling record ingestion from sorting and writing.") - DISRUPTOR + DISRUPTOR, + + @EnumFieldDescription("Continuous sorting using a TreeMap. Provides O(log n) inserts and incremental draining " + + "for predictable latency without sort spikes.") + CONTINUOUS_SORT } diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/CodedRecordGlobalLocationSerializer.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/CodedRecordGlobalLocationSerializer.java new file mode 100644 index 0000000000000..ebc3ad7cd0222 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/CodedRecordGlobalLocationSerializer.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.sink.partitioner.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Serializer specialization for partitioned tables that dictionary-encodes the partition paths. + * + *

Serialized bytes can only be deserialized correctly by the same serializer instance that encoded them. + */ +public class CodedRecordGlobalLocationSerializer extends RecordGlobalLocationSerializer { + private final Map partitionPathToDictId = new HashMap<>(); + private final List dictIdToPartitionPath = new ArrayList<>(); + + @Override + protected void writePartitionPath(String partitionPath) throws IOException { + outputSerializer.writeInt(getOrCreatePartitionPathId(partitionPath)); + } + + @Override + protected String readPartitionPath() throws IOException { + return getPartitionPath(inputDeserializer.readInt()); + } + + private int getOrCreatePartitionPathId(String partitionPath) { + Integer existingId = partitionPathToDictId.get(partitionPath); + if (existingId != null) { + return existingId; + } + + int newId = dictIdToPartitionPath.size(); + partitionPathToDictId.put(partitionPath, newId); + dictIdToPartitionPath.add(partitionPath); + return newId; + } + + private String getPartitionPath(int partitionPathId) { + if (partitionPathId < 0 || partitionPathId >= dictIdToPartitionPath.size()) { + throw new IllegalStateException("Unknown partition path dictionary id " + partitionPathId + + ", dictionary size is " + dictIdToPartitionPath.size()); + } + return dictIdToPartitionPath.get(partitionPathId); + } +} diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/IndexBackendFactory.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/IndexBackendFactory.java index f3bdaad5c1454..4e9df8d9fcbd2 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/IndexBackendFactory.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/IndexBackendFactory.java @@ -59,7 +59,7 @@ public static IndexBackend create(Configuration conf, FunctionInitializationCont return new FlinkStateIndexBackend(indexState); case GLOBAL_RECORD_LEVEL_INDEX: if (conf.get(FlinkOptions.INDEX_BOOTSTRAP_ENABLED)) { - return new RocksDBIndexBackend(conf.get(FlinkOptions.INDEX_BOOTSTRAP_ROCKSDB_PATH)); + return new RocksDBIndexBackend(conf.get(FlinkOptions.INDEX_BOOTSTRAP_ROCKSDB_PATH), OptionsResolver.isPartitionedTable(conf)); } else { ListState jobIdState = context.getOperatorStateStore().getListState( new ListStateDescriptor<>( diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/RecordGlobalLocationSerializer.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/RecordGlobalLocationSerializer.java index 0ba9b80a88002..18464f3f113f8 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/RecordGlobalLocationSerializer.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/index/RecordGlobalLocationSerializer.java @@ -42,8 +42,8 @@ public class RecordGlobalLocationSerializer implements CustomSerializer> serializers = new ConcurrentHashMap<>(); - serializers.put(COLUMN_FAMILY, new RecordGlobalLocationSerializer()); + serializers.put(COLUMN_FAMILY, isPartitionedTable + ? new CodedRecordGlobalLocationSerializer() + : new RecordGlobalLocationSerializer()); this.rocksDBDAO = new RocksDBDAO("hudi-index-backend", rocksDbBasePath, serializers, true); this.rocksDBDAO.addColumnFamily(COLUMN_FAMILY); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/append/ITTestAppendWriteFunctionWithContinuousSort.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/append/ITTestAppendWriteFunctionWithContinuousSort.java new file mode 100644 index 0000000000000..dbe319c9b5b47 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/append/ITTestAppendWriteFunctionWithContinuousSort.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.sink.append; + +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.hudi.configuration.FlinkOptions; +import org.apache.hudi.sink.buffer.BufferType; +import org.apache.hudi.sink.utils.TestWriteBase; +import org.apache.hudi.utils.TestConfigurations; +import org.apache.hudi.utils.TestData; + +import org.apache.avro.generic.GenericRecord; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.File; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Test cases for {@link AppendWriteFunctionWithContinuousSort}. + */ +public class ITTestAppendWriteFunctionWithContinuousSort extends TestWriteBase { + private Configuration conf; + private RowType rowType; + + @TempDir + protected File tempFile; + + @BeforeEach + public void before(@TempDir File tempDir) throws Exception { + this.conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath()); + this.conf.set(FlinkOptions.WRITE_BUFFER_TYPE, BufferType.CONTINUOUS_SORT.name()); + this.conf.set(FlinkOptions.OPERATION, "insert"); + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_KEYS, "name,age"); + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 100L); + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, 1); + + // Define the row type with fields: name (STRING), age (INT), partition (STRING) + List fields = new ArrayList<>(); + fields.add(new RowType.RowField("uuid", VarCharType.STRING_TYPE)); + fields.add(new RowType.RowField("name", VarCharType.STRING_TYPE)); + fields.add(new RowType.RowField("age", new IntType())); + fields.add(new RowType.RowField("ts", new TimestampType())); + fields.add(new RowType.RowField("partition", VarCharType.STRING_TYPE)); + this.rowType = new RowType(fields); + } + + @Test + public void testBufferFlushOnRecordNumberLimit() throws Exception { + // Create test data that exceeds buffer size + List inputData = new ArrayList<>(); + for (int i = 0; i < 150; i++) { + inputData.add(createRowData("uuid" + i, "Name" + i, i, "1970-01-01 00:00:01.123", "p1")); + } + + // Write the data + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .endInput(); + + // Verify all data was written + List actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(150, actualData.size()); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testBufferFlush(boolean flushOnCheckpoint) throws Exception { + // Create test data + List inputData = Arrays.asList( + createRowData("uuid1", "Bob", 30, "1970-01-01 00:00:01.123", "p1"), + createRowData("uuid2", "Alice", 25, "1970-01-01 00:00:01.124", "p1") + ); + + // Write the data and wait for timer + TestHarness testHarness = + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData); + if (flushOnCheckpoint) { + testHarness.checkpoint(1); + } else { + testHarness.endInput(); + } + + // Verify data was written + List actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(2, actualData.size()); + } + + @Test + public void testBufferFlushOnBufferSizeLimit() throws Exception { + // enlarge the write buffer record size + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 10000L); + // use a very small buffer memory size here + this.conf.set(FlinkOptions.WRITE_TASK_MAX_SIZE, 200.1D); + + // Create test data that exceeds buffer size + List inputData = new ArrayList<>(); + for (int i = 0; i < 2000; i++) { + inputData.add(createRowData("uuid" + i, "Name" + i, i, "1970-01-01 00:00:01.123", "p1")); + } + + // Write the data + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .endInput(); + + // Verify all data was written + List actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(2000, actualData.size()); + } + + @Test + public void testSortedResult() throws Exception { + // Create test data in unsorted order + List inputData = Arrays.asList( + createRowData("uuid1", "Bob", 30, "1970-01-01 00:00:01.123", "p1"), + createRowData("uuid2", "Alice", 25, "1970-01-01 00:00:01.124", "p1"), + createRowData("uuid3", "Bob", 21, "1970-01-01 00:00:31.124", "p1") + ); + + // Expected result after sorting by name, then age + List expected = Arrays.asList( + "uuid2,Alice,25,1970-01-01 00:00:01.124,p1", + "uuid3,Bob,21,1970-01-01 00:00:31.124,p1", + "uuid1,Bob,30,1970-01-01 00:00:01.123,p1"); + + // Write the data and wait for timer + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .checkpoint(1) + .endInput(); + + // Verify data was written + List result = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(3, result.size()); + + List filteredResult = + result.stream().map(TestData::filterOutVariablesWithoutHudiMetadata).collect(Collectors.toList()); + + assertArrayEquals(expected.toArray(), filteredResult.toArray()); + } + + @Test + public void testContinuousDrainBehavior() throws Exception { + // Set buffer size to 10 records + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 10L); + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, 2); + + // Create test data that will trigger drain + List inputData = new ArrayList<>(); + for (int i = 0; i < 12; i++) { + inputData.add(createRowData("uuid" + i, "Name" + i, i, "1970-01-01 00:00:01.123", "p1")); + } + + // Write the data - should trigger continuous draining + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .endInput(); + + // Verify all data was written despite buffer size limit + List actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(12, actualData.size()); + } + + @Test + public void testDrainSizeConfiguration() throws Exception { + // Set buffer size to 10 and drain size to 5 + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 10L); + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, 5); + + // Create test data that will trigger multiple drains + List inputData = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + inputData.add(createRowData("uuid" + i, "Name" + i, i, "1970-01-01 00:00:01.123", "p1")); + } + + // Write the data - should trigger draining in batches of 5 + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .endInput(); + + // Verify all data was written + List actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(20, actualData.size()); + } + + @Test + public void testSortedResultWithContinuousDrain() throws Exception { + // Set smaller buffer to force continuous draining + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 5L); + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, 1); + + // Create test data with various names and ages + List inputData = Arrays.asList( + createRowData("uuid1", "Charlie", 35, "1970-01-01 00:00:01.123", "p1"), + createRowData("uuid2", "Alice", 25, "1970-01-01 00:00:01.124", "p1"), + createRowData("uuid3", "Bob", 30, "1970-01-01 00:00:01.125", "p1"), + createRowData("uuid4", "Alice", 20, "1970-01-01 00:00:01.126", "p1"), + createRowData("uuid5", "Bob", 28, "1970-01-01 00:00:01.127", "p1"), + createRowData("uuid6", "Charlie", 40, "1970-01-01 00:00:01.128", "p1") + ); + + // Expected result after sorting by name, then age + List expected = Arrays.asList( + "uuid4,Alice,20,1970-01-01 00:00:01.126,p1", + "uuid2,Alice,25,1970-01-01 00:00:01.124,p1", + "uuid5,Bob,28,1970-01-01 00:00:01.127,p1", + "uuid3,Bob,30,1970-01-01 00:00:01.125,p1", + "uuid1,Charlie,35,1970-01-01 00:00:01.123,p1", + "uuid6,Charlie,40,1970-01-01 00:00:01.128,p1" + ); + + // Write the data + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .checkpoint(1) + .endInput(); + + // Verify data was written in sorted order + List result = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(6, result.size()); + + List filteredResult = + result.stream().map(TestData::filterOutVariablesWithoutHudiMetadata).collect(Collectors.toList()); + + assertArrayEquals(expected.toArray(), filteredResult.toArray()); + } + + @Test + public void testLargeDrainSize() throws Exception { + // Set larger drain size to test batch draining + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 20L); + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, 5); + + // Create test data + List inputData = new ArrayList<>(); + for (int i = 0; i < 30; i++) { + inputData.add(createRowData("uuid" + i, "Name" + i, i, "1970-01-01 00:00:01.123", "p1")); + } + + // Write the data + TestWriteBase.TestHarness.instance() + .preparePipeline(tempFile, conf) + .consume(inputData) + .endInput(); + + // Verify all data was written + List actualData = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(30, actualData.size()); + } + + @Test + public void testInvalidDrainSizeZero() { + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, 0); + + AppendWriteFunctionWithContinuousSort function = + new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + + assertThrows(IllegalArgumentException.class, () -> { + function.open(conf); + }); + } + + @Test + public void testInvalidDrainSizeNegative() { + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_CONTINUOUS_DRAIN_SIZE, -5); + + AppendWriteFunctionWithContinuousSort function = + new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + + assertThrows(IllegalArgumentException.class, () -> { + function.open(conf); + }); + } + + @Test + public void testInvalidBufferSizeZero() { + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, 0L); + + AppendWriteFunctionWithContinuousSort function = + new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + + assertThrows(IllegalArgumentException.class, () -> { + function.open(conf); + }); + } + + @Test + public void testInvalidBufferSizeNegative() { + this.conf.set(FlinkOptions.WRITE_BUFFER_SIZE, -100L); + + AppendWriteFunctionWithContinuousSort function = + new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + + assertThrows(IllegalArgumentException.class, () -> { + function.open(conf); + }); + } + + @Test + public void testInvalidSortKeysOnlyCommas() { + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_KEYS, " , , "); + + AppendWriteFunctionWithContinuousSort function = + new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + + assertThrows(IllegalArgumentException.class, () -> { + function.open(conf); + }); + } + + @Test + public void testObjectReuseEnabled() throws Exception { + // All expected records (order may vary due to file read ordering) + List expected = Arrays.asList( + "uuid1,Bob,30,1970-01-01 00:00:01.123,p1", + "uuid2,Alice,25,1970-01-01 00:00:01.124,p1", + "uuid3,Bob,21,1970-01-01 00:00:31.124,p1"); + + // Create a reusable row to simulate Flink object reuse behavior + GenericRowData reusableRow = new GenericRowData(5); + + // Write data using a single reused RowData instance (mimicking object reuse) + TestWriteBase.TestHarness harness = TestWriteBase.TestHarness.instance() + .preparePipelineWithObjectReuse(tempFile, conf); + + // Record 1: Bob, 30 + reusableRow.setField(0, StringData.fromString("uuid1")); + reusableRow.setField(1, StringData.fromString("Bob")); + reusableRow.setField(2, 30); + reusableRow.setField(3, TimestampData.fromTimestamp(Timestamp.valueOf("1970-01-01 00:00:01.123"))); + reusableRow.setField(4, StringData.fromString("p1")); + harness.consume(Arrays.asList(reusableRow)); + + // Record 2: Alice, 25 (mutating the same row) + reusableRow.setField(0, StringData.fromString("uuid2")); + reusableRow.setField(1, StringData.fromString("Alice")); + reusableRow.setField(2, 25); + reusableRow.setField(3, TimestampData.fromTimestamp(Timestamp.valueOf("1970-01-01 00:00:01.124"))); + reusableRow.setField(4, StringData.fromString("p1")); + harness.consume(Arrays.asList(reusableRow)); + + // Record 3: Bob, 21 (mutating the same row again) + reusableRow.setField(0, StringData.fromString("uuid3")); + reusableRow.setField(1, StringData.fromString("Bob")); + reusableRow.setField(2, 21); + reusableRow.setField(3, TimestampData.fromTimestamp(Timestamp.valueOf("1970-01-01 00:00:31.124"))); + reusableRow.setField(4, StringData.fromString("p1")); + harness.consume(Arrays.asList(reusableRow)); + + harness.checkpoint(1).endInput(); + + // Verify all 3 records are distinct and not corrupted by object reuse + // (without object reuse safety, all records would be the same - the last mutation) + List result = TestData.readAllData(new File(conf.get(FlinkOptions.PATH)), rowType, 1); + assertEquals(3, result.size()); + + List filteredResult = + result.stream().map(TestData::filterOutVariablesWithoutHudiMetadata) + .sorted().collect(Collectors.toList()); + List sortedExpected = expected.stream().sorted().collect(Collectors.toList()); + + assertArrayEquals(sortedExpected.toArray(), filteredResult.toArray()); + } + + @Test + public void testInvalidSortKeysOnlyWhitespace() { + this.conf.set(FlinkOptions.WRITE_BUFFER_SORT_KEYS, " "); + + AppendWriteFunctionWithContinuousSort function = + new AppendWriteFunctionWithContinuousSort<>(conf, rowType); + + assertThrows(IllegalArgumentException.class, () -> { + function.open(conf); + }); + } + + private GenericRowData createRowData(String uuid, String name, int age, String timestamp, String partition) { + return GenericRowData.of(StringData.fromString(uuid), StringData.fromString(name), + age, TimestampData.fromTimestamp(Timestamp.valueOf(timestamp)), StringData.fromString(partition)); + } +} diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestCodedRecordGlobalLocationSerializer.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestCodedRecordGlobalLocationSerializer.java new file mode 100644 index 0000000000000..1059f39eac2fa --- /dev/null +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestCodedRecordGlobalLocationSerializer.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.sink.partitioner.index; + +import org.apache.hudi.common.model.HoodieRecordGlobalLocation; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.UUID; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test cases for {@link CodedRecordGlobalLocationSerializer}. + */ +public class TestCodedRecordGlobalLocationSerializer { + + private CodedRecordGlobalLocationSerializer serializer; + + @BeforeEach + public void setUp() { + serializer = new CodedRecordGlobalLocationSerializer(); + } + + @Test + public void testHistoricalBytesRemainReadableAfterDictionaryGrowth() throws IOException { + String instantTime = "20240315120000"; + String firstFileId = UUID.randomUUID().toString(); + String secondFileId = UUID.randomUUID().toString(); + + HoodieRecordGlobalLocation first = new HoodieRecordGlobalLocation("partition/a", instantTime, firstFileId); + HoodieRecordGlobalLocation second = new HoodieRecordGlobalLocation("partition/b", instantTime, secondFileId); + + byte[] firstSerialized = serializer.serialize(first); + byte[] secondSerialized = serializer.serialize(second); + + assertEquals(first, serializer.deserialize(firstSerialized)); + assertEquals(second, serializer.deserialize(secondSerialized)); + } + + @Test + public void testSerializedSizeEfficiency() throws IOException { + String partitionPath = "partition/path/test/with/repeated/value"; + String instantTime = "20240315120000"; + String fileId = UUID.randomUUID().toString(); + + HoodieRecordGlobalLocation location = new HoodieRecordGlobalLocation(partitionPath, instantTime, fileId); + HoodieRecordGlobalLocation samePartitionLocation = new HoodieRecordGlobalLocation(partitionPath, instantTime, fileId); + + byte[] firstSerialized = serializer.serialize(location); + byte[] secondSerialized = serializer.serialize(samePartitionLocation); + + int expectedSize = 4 + 4 + instantTime.length() + 8 + 8 + 4; + int legacySize = 4 + partitionPath.getBytes(StandardCharsets.UTF_8).length + 4 + instantTime.length() + 8 + 8 + 4; + + assertEquals(expectedSize, firstSerialized.length); + assertEquals(expectedSize, secondSerialized.length); + assertEquals(firstSerialized.length, secondSerialized.length); + assertTrue(firstSerialized.length < legacySize); + } + + @Test + public void testRepeatedPartitionPathProducesSmallerPayloadThanLegacyFormat() throws IOException { + String partitionPath = "year=2024/month=03/day=15/hour=12"; + String instantTime = "20240315123045"; + String fileId = UUID.randomUUID().toString(); + + byte[] serialized = serializer.serialize(new HoodieRecordGlobalLocation(partitionPath, instantTime, fileId)); + + int dictionaryEncodedSize = 4 + 4 + instantTime.length() + 8 + 8 + 4; + int legacySize = 4 + partitionPath.getBytes(StandardCharsets.UTF_8).length + 4 + instantTime.length() + 8 + 8 + 4; + + assertEquals(dictionaryEncodedSize, serialized.length); + assertEquals(legacySize - partitionPath.getBytes(StandardCharsets.UTF_8).length, serialized.length); + } + + @Test + public void testDeserializeFailsForUnknownPartitionPathId() { + byte[] invalidBytes = new byte[] { + 0, 0, 0, 7, + 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1 + }; + + IllegalStateException exception = assertThrows(IllegalStateException.class, () -> serializer.deserialize(invalidBytes)); + assertEquals("Unknown partition path dictionary id 7, dictionary size is 0", exception.getMessage()); + } + + @Test + public void testFirstPartitionPathUsesFirstDictionaryId() throws IOException { + String instantTime = "20240315120000"; + String fileId = UUID.randomUUID().toString(); + + byte[] serialized = serializer.serialize(new HoodieRecordGlobalLocation("partition/a", instantTime, fileId)); + + assertEquals(0, readInt(serialized, 0)); + } + + private int readInt(byte[] bytes, int offset) { + return ((bytes[offset] & 0xFF) << 24) + | ((bytes[offset + 1] & 0xFF) << 16) + | ((bytes[offset + 2] & 0xFF) << 8) + | (bytes[offset + 3] & 0xFF); + } +} diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestRocksDBIndexBackend.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestRocksDBIndexBackend.java index 860be5bf9c70c..17209428fcb46 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestRocksDBIndexBackend.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/partitioner/index/TestRocksDBIndexBackend.java @@ -25,6 +25,8 @@ import org.apache.flink.metrics.MetricGroup; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.io.File; import java.util.HashMap; @@ -48,16 +50,17 @@ public class TestRocksDBIndexBackend { @TempDir File tempFile; - @Test - void testGetAndUpdate() throws Exception { - try (RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath())) { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testGetAndUpdate(boolean isPartitionedTable) throws Exception { + try (RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath(), isPartitionedTable)) { assertNull(rocksDBIndexBackend.get("id1")); - HoodieRecordGlobalLocation location1 = new HoodieRecordGlobalLocation("par1", "001", UUID.randomUUID().toString()); + HoodieRecordGlobalLocation location1 = new HoodieRecordGlobalLocation(isPartitionedTable ? "par1" : "", "001", UUID.randomUUID().toString()); rocksDBIndexBackend.update("id1", location1); assertEquals(location1, rocksDBIndexBackend.get("id1")); - HoodieRecordGlobalLocation location2 = new HoodieRecordGlobalLocation("par2", "002", UUID.randomUUID().toString()); + HoodieRecordGlobalLocation location2 = new HoodieRecordGlobalLocation(isPartitionedTable ? "par2" : "", "002", UUID.randomUUID().toString()); rocksDBIndexBackend.update("id2", location2); assertEquals(location2, rocksDBIndexBackend.get("id2")); } @@ -65,7 +68,7 @@ void testGetAndUpdate() throws Exception { @Test void testMetricsRegistrationAndSnapshot() throws Exception { - try (RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath())) { + try (RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath(), false)) { MetricGroup metricGroup = mock(MetricGroup.class); Map> gauges = new HashMap<>(); doAnswer(invocation -> { @@ -101,7 +104,7 @@ void testMetricsRegistrationAndSnapshot() throws Exception { @Test void testMetricsReflectWritesReadsAndAutoFlush() throws Exception { - try (RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath())) { + try (RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath(), false)) { MetricGroup metricGroup = mock(MetricGroup.class); Map> gauges = new HashMap<>(); doAnswer(invocation -> { @@ -168,7 +171,7 @@ void testMetricsSnapshotAfterCloseReturnsDefaultValues() throws Exception { return gauge; }).when(metricGroup).gauge(anyString(), any(Gauge.class)); - RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath()); + RocksDBIndexBackend rocksDBIndexBackend = new RocksDBIndexBackend(tempFile.getAbsolutePath(), false); rocksDBIndexBackend.registerMetrics(metricGroup); rocksDBIndexBackend.close(); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java index dcbceab936bb7..8c0369a889c77 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/InsertFunctionWrapper.java @@ -77,13 +77,18 @@ public class InsertFunctionWrapper implements TestFunctionWrapper { private AppendWriteFunction writeFunction; public InsertFunctionWrapper(String tablePath, Configuration conf) throws Exception { + this(tablePath, conf, new ExecutionConfig()); + } + + public InsertFunctionWrapper(String tablePath, Configuration conf, ExecutionConfig executionConfig) throws Exception { IOManager ioManager = new IOManagerAsync(); MockEnvironment environment = new MockEnvironmentBuilder() .setTaskName("mockTask") .setManagedMemorySize(4 * MemoryManager.DEFAULT_PAGE_SIZE) .setIOManager(ioManager) + .setExecutionConfig(executionConfig) .build(); - this.runtimeContext = new MockStreamingRuntimeContext(false, 1, 0, environment); + this.runtimeContext = new MockStreamingRuntimeContext(false, 1, 0, environment, executionConfig); this.gateway = new MockOperatorEventGateway(); this.subtaskGateway = new MockSubtaskGateway(); this.conf = conf; diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java index 4a87466814b7a..0c88ad04e59f0 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java @@ -70,6 +70,19 @@ public MockStreamingRuntimeContext( this.taskInfo = new MockTaskInfo(numParallelSubtasks, subtaskIndex, 0); } + public MockStreamingRuntimeContext( + boolean isCheckpointingEnabled, + int numParallelSubtasks, + int subtaskIndex, + MockEnvironment environment, + ExecutionConfig executionConfig) { + + super(new MockStreamOperator(executionConfig), environment, new HashMap<>()); + + this.isCheckpointingEnabled = isCheckpointingEnabled; + this.taskInfo = new MockTaskInfo(numParallelSubtasks, subtaskIndex, 0); + } + public int getIndexOfThisSubtask() { return taskInfo.getIndexOfThisSubtask(); } @@ -90,14 +103,23 @@ private static class MockStreamOperator extends AbstractStreamOperator private static final long serialVersionUID = -1153976702711944427L; private transient TestProcessingTimeService testProcessingTimeService; + private final transient ExecutionConfig executionConfig; @Setter private transient Object currentKey; private final transient Map mockKeyedStateStoreMap = new HashMap<>(); + MockStreamOperator() { + this(new ExecutionConfig()); + } + + MockStreamOperator(ExecutionConfig executionConfig) { + this.executionConfig = executionConfig; + } + @Override public ExecutionConfig getExecutionConfig() { - return new ExecutionConfig(); + return executionConfig; } @Override diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java index b4a265e2b252d..5318e4381bc35 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/utils/TestWriteBase.java @@ -186,6 +186,17 @@ public TestHarness preparePipeline(File basePath, Configuration conf) throws Exc return this; } + public TestHarness preparePipelineWithObjectReuse(File basePath, Configuration conf) throws Exception { + this.baseFile = basePath; + this.basePath = this.baseFile.getAbsolutePath(); + this.conf = conf; + this.pipeline = TestData.getWritePipelineWithObjectReuse(this.basePath, conf); + // open the function and ingest data + this.pipeline.openFunction(); + HoodieWriteConfig writeConfig = this.pipeline.getCoordinator().getWriteClient().getConfig(); + return this; + } + public TestHarness consume(List inputs) throws Exception { for (RowData rowData : inputs) { this.pipeline.invoke(rowData); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java index 57062b8882987..2c8f5c29640fe 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java @@ -660,6 +660,16 @@ public static TestFunctionWrapper getWritePipeline(String basePath, Con } } + /** + * Initializes a writing pipeline with object reuse enabled. + */ + public static TestFunctionWrapper getWritePipelineWithObjectReuse( + String basePath, Configuration conf) throws Exception { + org.apache.flink.api.common.ExecutionConfig execConfig = new org.apache.flink.api.common.ExecutionConfig(); + execConfig.enableObjectReuse(); + return new InsertFunctionWrapper<>(basePath, conf, execConfig); + } + private static String toStringSafely(Object obj) { return obj == null ? "null" : obj.toString(); } diff --git a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java index d76f37365d3d2..be75c77fd35a4 100644 --- a/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java +++ b/hudi-flink-datasource/hudi-flink1.17.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java @@ -49,4 +49,8 @@ public static int getNumberOfParallelSubtasks(RuntimeContext runtimeContext) { public static long getWatermarkInternal(RuntimeContext runtimeContext) { return runtimeContext.getExecutionConfig().getAutoWatermarkInterval(); } + + public static boolean isObjectReuseEnabled(RuntimeContext runtimeContext) { + return runtimeContext.getExecutionConfig().isObjectReuseEnabled(); + } } diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java index 98b5e61050898..a37b88352cf52 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/ParquetDecimalVector.java @@ -18,21 +18,29 @@ package org.apache.hudi.table.format.cow.vector; +import org.apache.flink.formats.parquet.utils.ParquetSchemaConverter; import org.apache.flink.table.data.DecimalData; import org.apache.flink.table.data.columnar.vector.BytesColumnVector; import org.apache.flink.table.data.columnar.vector.ColumnVector; import org.apache.flink.table.data.columnar.vector.DecimalColumnVector; +import org.apache.flink.table.data.columnar.vector.Dictionary; +import org.apache.flink.table.data.columnar.vector.IntColumnVector; +import org.apache.flink.table.data.columnar.vector.LongColumnVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableBytesVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableIntVector; +import org.apache.flink.table.data.columnar.vector.writable.WritableLongVector; + +import static org.apache.flink.util.Preconditions.checkArgument; /** - * Parquet write decimal as int32 and int64 and binary, this class wrap the real vector to - * provide {@link DecimalColumnVector} interface. - * - *

Reference Flink release 1.11.2 {@link org.apache.flink.formats.parquet.vector.ParquetDecimalVector} - * because it is not public. + * Parquet write decimal as int32 and int64 and binary, this class wrap the real vector to provide + * {@link DecimalColumnVector} interface. */ -public class ParquetDecimalVector implements DecimalColumnVector { +public class ParquetDecimalVector + implements DecimalColumnVector, WritableLongVector, WritableIntVector, WritableBytesVector { - public final ColumnVector vector; + private final ColumnVector vector; public ParquetDecimalVector(ColumnVector vector) { this.vector = vector; @@ -40,15 +48,180 @@ public ParquetDecimalVector(ColumnVector vector) { @Override public DecimalData getDecimal(int i, int precision, int scale) { - return DecimalData.fromUnscaledBytes( - ((BytesColumnVector) vector).getBytes(i).getBytes(), - precision, - scale); + if (ParquetSchemaConverter.is32BitDecimal(precision) && vector instanceof IntColumnVector) { + return DecimalData.fromUnscaledLong(((IntColumnVector) vector).getInt(i), precision, scale); + } else if (ParquetSchemaConverter.is64BitDecimal(precision) + && vector instanceof LongColumnVector) { + return DecimalData.fromUnscaledLong(((LongColumnVector) vector).getLong(i), precision, scale); + } else { + checkArgument( + vector instanceof BytesColumnVector, + "Reading decimal type occur unsupported vector type: %s", + vector.getClass()); + return DecimalData.fromUnscaledBytes( + ((BytesColumnVector) vector).getBytes(i).getBytes(), precision, scale); + } + } + + public ColumnVector getVector() { + return vector; } @Override public boolean isNullAt(int i) { return vector.isNullAt(i); } -} + @Override + public void reset() { + if (vector instanceof WritableColumnVector) { + ((WritableColumnVector) vector).reset(); + } + } + + @Override + public void setNullAt(int rowId) { + if (vector instanceof WritableColumnVector) { + ((WritableColumnVector) vector).setNullAt(rowId); + } + } + + @Override + public void setNulls(int rowId, int count) { + if (vector instanceof WritableColumnVector) { + ((WritableColumnVector) vector).setNulls(rowId, count); + } + } + + @Override + public void fillWithNulls() { + if (vector instanceof WritableColumnVector) { + ((WritableColumnVector) vector).fillWithNulls(); + } + } + + @Override + public void setDictionary(Dictionary dictionary) { + if (vector instanceof WritableColumnVector) { + ((WritableColumnVector) vector).setDictionary(dictionary); + } + } + + @Override + public boolean hasDictionary() { + if (vector instanceof WritableColumnVector) { + return ((WritableColumnVector) vector).hasDictionary(); + } + return false; + } + + @Override + public WritableIntVector reserveDictionaryIds(int capacity) { + if (vector instanceof WritableColumnVector) { + return ((WritableColumnVector) vector).reserveDictionaryIds(capacity); + } + throw new RuntimeException("Child vector must be instance of WritableColumnVector"); + } + + @Override + public WritableIntVector getDictionaryIds() { + if (vector instanceof WritableColumnVector) { + return ((WritableColumnVector) vector).getDictionaryIds(); + } + throw new RuntimeException("Child vector must be instance of WritableColumnVector"); + } + + @Override + public Bytes getBytes(int i) { + if (vector instanceof WritableBytesVector) { + return ((WritableBytesVector) vector).getBytes(i); + } + throw new RuntimeException("Child vector must be instance of WritableColumnVector"); + } + + @Override + public void appendBytes(int rowId, byte[] value, int offset, int length) { + if (vector instanceof WritableBytesVector) { + ((WritableBytesVector) vector).appendBytes(rowId, value, offset, length); + } + } + + @Override + public void fill(byte[] value) { + if (vector instanceof WritableBytesVector) { + ((WritableBytesVector) vector).fill(value); + } + } + + @Override + public int getInt(int i) { + if (vector instanceof WritableIntVector) { + return ((WritableIntVector) vector).getInt(i); + } + throw new RuntimeException("Child vector must be instance of WritableColumnVector"); + } + + @Override + public void setInt(int rowId, int value) { + if (vector instanceof WritableIntVector) { + ((WritableIntVector) vector).setInt(rowId, value); + } + } + + @Override + public void setIntsFromBinary(int rowId, int count, byte[] src, int srcIndex) { + if (vector instanceof WritableIntVector) { + ((WritableIntVector) vector).setIntsFromBinary(rowId, count, src, srcIndex); + } + } + + @Override + public void setInts(int rowId, int count, int value) { + if (vector instanceof WritableIntVector) { + ((WritableIntVector) vector).setInts(rowId, count, value); + } + } + + @Override + public void setInts(int rowId, int count, int[] src, int srcIndex) { + if (vector instanceof WritableIntVector) { + ((WritableIntVector) vector).setInts(rowId, count, src, srcIndex); + } + } + + @Override + public void fill(int value) { + if (vector instanceof WritableIntVector) { + ((WritableIntVector) vector).fill(value); + } + } + + @Override + public long getLong(int i) { + if (vector instanceof WritableLongVector) { + return ((WritableLongVector) vector).getLong(i); + } + throw new RuntimeException("Child vector must be instance of WritableColumnVector"); + } + + @Override + public void setLong(int rowId, long value) { + if (vector instanceof WritableLongVector) { + ((WritableLongVector) vector).setLong(rowId, value); + } + } + + @Override + public void setLongsFromBinary(int rowId, int count, byte[] src, int srcIndex) { + if (vector instanceof WritableLongVector) { + ((WritableLongVector) vector).setLongsFromBinary(rowId, count, src, srcIndex); + } + } + + @Override + public void fill(long value) { + if (vector instanceof WritableLongVector) { + ((WritableLongVector) vector).fill(value); + } + } +} diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java index 6a8a01b74946a..d758f35078d8f 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/table/format/cow/vector/reader/ArrayColumnReader.java @@ -423,13 +423,13 @@ private void fillColumnVector( switch (primitiveTypeName) { case INT32: lcv.child = new ParquetDecimalVector(new HeapIntVector(total)); - ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector).reset(); + ((HeapIntVector) ((ParquetDecimalVector) lcv.child).getVector()).reset(); for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { - ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector) + ((HeapIntVector) ((ParquetDecimalVector) lcv.child).getVector()) .setNullAt(i); } else { - ((HeapIntVector) ((ParquetDecimalVector) lcv.child).vector) + ((HeapIntVector) ((ParquetDecimalVector) lcv.child).getVector()) .vector[i] = ((List) valueList).get(i); } @@ -437,13 +437,13 @@ private void fillColumnVector( break; case INT64: lcv.child = new ParquetDecimalVector(new HeapLongVector(total)); - ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector).reset(); + ((HeapLongVector) ((ParquetDecimalVector) lcv.child).getVector()).reset(); for (int i = 0; i < valueList.size(); i++) { if (valueList.get(i) == null) { - ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector) + ((HeapLongVector) ((ParquetDecimalVector) lcv.child).getVector()) .setNullAt(i); } else { - ((HeapLongVector) ((ParquetDecimalVector) lcv.child).vector) + ((HeapLongVector) ((ParquetDecimalVector) lcv.child).getVector()) .vector[i] = ((List) valueList).get(i); } @@ -451,14 +451,14 @@ private void fillColumnVector( break; default: lcv.child = new ParquetDecimalVector(new HeapBytesVector(total)); - ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector).reset(); + ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).getVector()).reset(); for (int i = 0; i < valueList.size(); i++) { byte[] src = ((List) valueList).get(i); if (valueList.get(i) == null) { - ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector) + ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).getVector()) .setNullAt(i); } else { - ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).vector) + ((HeapBytesVector) ((ParquetDecimalVector) lcv.child).getVector()) .appendBytes(i, src, 0, src.length); } } diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java index d76f37365d3d2..be75c77fd35a4 100644 --- a/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java @@ -49,4 +49,8 @@ public static int getNumberOfParallelSubtasks(RuntimeContext runtimeContext) { public static long getWatermarkInternal(RuntimeContext runtimeContext) { return runtimeContext.getExecutionConfig().getAutoWatermarkInterval(); } + + public static boolean isObjectReuseEnabled(RuntimeContext runtimeContext) { + return runtimeContext.getExecutionConfig().isObjectReuseEnabled(); + } } diff --git a/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/table/format/cow/vector/TestParquetDecimalVector.java b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/table/format/cow/vector/TestParquetDecimalVector.java new file mode 100644 index 0000000000000..02fe1e61ccb05 --- /dev/null +++ b/hudi-flink-datasource/hudi-flink1.18.x/src/test/java/org/apache/hudi/table/format/cow/vector/TestParquetDecimalVector.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.table.format.cow.vector; + +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.columnar.vector.BytesColumnVector; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapBytesVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapIntVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapLongVector; +import org.apache.flink.table.data.columnar.vector.heap.HeapShortVector; + +import org.junit.jupiter.api.Test; + +import java.math.BigDecimal; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ParquetDecimalVector}. + */ +public class TestParquetDecimalVector { + + @Test + void testGetDecimalFromInt32Vector() { + // precision <= 9 => ParquetSchemaConverter.is32BitDecimal(precision) == true + HeapIntVector intVector = new HeapIntVector(1); + intVector.vector[0] = 12345; + ParquetDecimalVector wrapped = new ParquetDecimalVector(intVector); + + DecimalData decoded = wrapped.getDecimal(0, 5, 2); + + assertEquals(new BigDecimal("123.45"), decoded.toBigDecimal()); + } + + @Test + void testGetDecimalFromInt64Vector() { + // 9 < precision <= 18 => ParquetSchemaConverter.is64BitDecimal(precision) == true + HeapLongVector longVector = new HeapLongVector(1); + longVector.vector[0] = 1234567890123456L; + ParquetDecimalVector wrapped = new ParquetDecimalVector(longVector); + + DecimalData decoded = wrapped.getDecimal(0, 18, 4); + + assertEquals(new BigDecimal("123456789012.3456"), decoded.toBigDecimal()); + } + + @Test + void testGetDecimalFromBytesVectorAtLargePrecision() { + // precision > 18 => BINARY / FIXED_LEN_BYTE_ARRAY path + BigDecimal original = new BigDecimal("12345678901234567890.1234567890"); + byte[] unscaled = original.unscaledValue().toByteArray(); + HeapBytesVector bytesVector = new HeapBytesVector(1); + bytesVector.appendBytes(0, unscaled, 0, unscaled.length); + ParquetDecimalVector wrapped = new ParquetDecimalVector(bytesVector); + + DecimalData decoded = wrapped.getDecimal(0, 30, 10); + + assertEquals(original, decoded.toBigDecimal()); + } + + @Test + void testGetDecimalFromBytesVectorAtSmallPrecision() { + // A Parquet file can legally encode a small-precision decimal as BINARY. In that case the + // dispatch must fall through to the bytes branch rather than require an IntColumnVector. + BigDecimal original = new BigDecimal("123.45"); + byte[] unscaled = original.unscaledValue().toByteArray(); + HeapBytesVector bytesVector = new HeapBytesVector(1); + bytesVector.appendBytes(0, unscaled, 0, unscaled.length); + ParquetDecimalVector wrapped = new ParquetDecimalVector(bytesVector); + + DecimalData decoded = wrapped.getDecimal(0, 5, 2); + + assertEquals(original, decoded.toBigDecimal()); + } + + @Test + void testGetDecimalThrowsOnUnsupportedVectorType() { + // A large-precision request must have a bytes-backed child; any other writable child is an + // illegal combination and must be surfaced via Preconditions.checkArgument. + ColumnVector unsupported = new HeapShortVector(1); + ParquetDecimalVector wrapped = new ParquetDecimalVector(unsupported); + + assertThrows(IllegalArgumentException.class, () -> wrapped.getDecimal(0, 30, 10)); + } + + @Test + void testIsNullAtDelegatesToChild() { + HeapIntVector intVector = new HeapIntVector(2); + intVector.vector[0] = 1; + intVector.setNullAt(1); + ParquetDecimalVector wrapped = new ParquetDecimalVector(intVector); + + assertFalse(wrapped.isNullAt(0)); + assertTrue(wrapped.isNullAt(1)); + } + + @Test + void testWritableIntRoundTrip() { + HeapIntVector intVector = new HeapIntVector(1); + ParquetDecimalVector wrapped = new ParquetDecimalVector(intVector); + + wrapped.setInt(0, 42); + + assertEquals(42, wrapped.getInt(0)); + assertEquals(42, intVector.vector[0]); + } + + @Test + void testWritableLongRoundTrip() { + HeapLongVector longVector = new HeapLongVector(1); + ParquetDecimalVector wrapped = new ParquetDecimalVector(longVector); + + wrapped.setLong(0, 9876543210L); + + assertEquals(9876543210L, wrapped.getLong(0)); + assertEquals(9876543210L, longVector.vector[0]); + } + + @Test + void testWritableBytesRoundTrip() { + HeapBytesVector bytesVector = new HeapBytesVector(1); + ParquetDecimalVector wrapped = new ParquetDecimalVector(bytesVector); + byte[] payload = new byte[] {0x01, 0x02, 0x03}; + + wrapped.appendBytes(0, payload, 0, payload.length); + + BytesColumnVector.Bytes out = wrapped.getBytes(0); + assertEquals(payload.length, out.len); + assertEquals(0x01, out.data[out.offset]); + assertEquals(0x02, out.data[out.offset + 1]); + assertEquals(0x03, out.data[out.offset + 2]); + } + + @Test + void testResetDelegatesToChild() { + HeapIntVector intVector = new HeapIntVector(1); + intVector.setNullAt(0); + ParquetDecimalVector wrapped = new ParquetDecimalVector(intVector); + assertTrue(wrapped.isNullAt(0)); + + wrapped.reset(); + + assertFalse(wrapped.isNullAt(0)); + } + + @Test + void testFillWithNullsDelegatesToChild() { + HeapIntVector intVector = new HeapIntVector(2); + ParquetDecimalVector wrapped = new ParquetDecimalVector(intVector); + + wrapped.fillWithNulls(); + + assertTrue(wrapped.isNullAt(0)); + assertTrue(wrapped.isNullAt(1)); + } + + @Test + void testSetNullAtDelegatesToChild() { + HeapIntVector intVector = new HeapIntVector(2); + ParquetDecimalVector wrapped = new ParquetDecimalVector(intVector); + + wrapped.setNullAt(0); + wrapped.setNulls(1, 1); + + assertTrue(wrapped.isNullAt(0)); + assertTrue(wrapped.isNullAt(1)); + } +} diff --git a/hudi-flink-datasource/hudi-flink1.19.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java b/hudi-flink-datasource/hudi-flink1.19.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java index d76f37365d3d2..be75c77fd35a4 100644 --- a/hudi-flink-datasource/hudi-flink1.19.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java +++ b/hudi-flink-datasource/hudi-flink1.19.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java @@ -49,4 +49,8 @@ public static int getNumberOfParallelSubtasks(RuntimeContext runtimeContext) { public static long getWatermarkInternal(RuntimeContext runtimeContext) { return runtimeContext.getExecutionConfig().getAutoWatermarkInterval(); } + + public static boolean isObjectReuseEnabled(RuntimeContext runtimeContext) { + return runtimeContext.getExecutionConfig().isObjectReuseEnabled(); + } } diff --git a/hudi-flink-datasource/hudi-flink1.20.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java b/hudi-flink-datasource/hudi-flink1.20.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java index d76f37365d3d2..be75c77fd35a4 100644 --- a/hudi-flink-datasource/hudi-flink1.20.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java +++ b/hudi-flink-datasource/hudi-flink1.20.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java @@ -49,4 +49,8 @@ public static int getNumberOfParallelSubtasks(RuntimeContext runtimeContext) { public static long getWatermarkInternal(RuntimeContext runtimeContext) { return runtimeContext.getExecutionConfig().getAutoWatermarkInterval(); } + + public static boolean isObjectReuseEnabled(RuntimeContext runtimeContext) { + return runtimeContext.getExecutionConfig().isObjectReuseEnabled(); + } } diff --git a/hudi-flink-datasource/hudi-flink2.0.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java b/hudi-flink-datasource/hudi-flink2.0.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java index eb430e4c50127..f1bd7fdcc8e15 100644 --- a/hudi-flink-datasource/hudi-flink2.0.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java +++ b/hudi-flink-datasource/hudi-flink2.0.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java @@ -55,4 +55,8 @@ public static long getWatermarkInternal(RuntimeContext runtimeContext) { return Long.parseLong(jobParameters.getOrDefault(AUTO_WATERMARK_INTERVAL.key(), AUTO_WATERMARK_INTERVAL.defaultValue().toMillis() + "")); } + + public static boolean isObjectReuseEnabled(RuntimeContext runtimeContext) { + return runtimeContext.isObjectReuseEnabled(); + } } diff --git a/hudi-flink-datasource/hudi-flink2.1.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java b/hudi-flink-datasource/hudi-flink2.1.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java index eb430e4c50127..f1bd7fdcc8e15 100644 --- a/hudi-flink-datasource/hudi-flink2.1.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java +++ b/hudi-flink-datasource/hudi-flink2.1.x/src/main/java/org/apache/hudi/utils/RuntimeContextUtils.java @@ -55,4 +55,8 @@ public static long getWatermarkInternal(RuntimeContext runtimeContext) { return Long.parseLong(jobParameters.getOrDefault(AUTO_WATERMARK_INTERVAL.key(), AUTO_WATERMARK_INTERVAL.defaultValue().toMillis() + "")); } + + public static boolean isObjectReuseEnabled(RuntimeContext runtimeContext) { + return runtimeContext.isObjectReuseEnabled(); + } } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java index 547b02787369f..df7930060fe93 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java @@ -50,7 +50,7 @@ public HoodieAvroWriteSupport(MessageType schema, HoodieSchema hoodieSchema, Opt this.properties = properties; String vectorMeta = HoodieSchema.buildVectorColumnsMetadataValue(hoodieSchema); if (!vectorMeta.isEmpty()) { - footerMetadata.put(HoodieSchema.PARQUET_VECTOR_COLUMNS_METADATA_KEY, vectorMeta); + footerMetadata.put(HoodieSchema.VECTOR_COLUMNS_METADATA_KEY, vectorMeta); } } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/lance/HoodieBaseLanceWriter.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/lance/HoodieBaseLanceWriter.java index 8f76c61fa7249..f43c56625f592 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/lance/HoodieBaseLanceWriter.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/lance/HoodieBaseLanceWriter.java @@ -37,6 +37,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Collections; import java.util.Map; /** @@ -105,6 +106,20 @@ protected HoodieBaseLanceWriter(StoragePath path, int batchSize, */ protected abstract Schema getArrowSchema(); + /** + * Subclass hook for emitting additional Lance file-footer key-value metadata + * alongside any bloom-filter entries. Called once during {@link #close()}. + * + *

Default implementation returns an empty map. Overriders should return a + * fresh map; the caller does not retain a reference. Colliding keys are + * overwritten per {@code LanceFileWriter.addSchemaMetadata} semantics. + * + * @return map of footer metadata key-value pairs, or empty map for none + */ + protected Map additionalSchemaMetadata() { + return Collections.emptyMap(); + } + /** * Write a single record. Records are buffered and flushed in batches. * @@ -163,11 +178,21 @@ public void close() throws IOException { writer.write(root); } - // Finalize and write bloom filter metadata - if (writer != null && bloomFilterWriteSupportOpt.isPresent()) { - Map metadata = bloomFilterWriteSupportOpt.get().finalizeMetadata(); - if (!metadata.isEmpty()) { - writer.addSchemaMetadata(metadata); + if (writer != null) { + // Finalize and write bloom filter metadata + if (bloomFilterWriteSupportOpt.isPresent()) { + Map metadata = bloomFilterWriteSupportOpt.get().finalizeMetadata(); + if (!metadata.isEmpty()) { + writer.addSchemaMetadata(metadata); + } + } + + // Allow subclasses to contribute additional footer key-value metadata + // (e.g. Spark writer emits `hoodie.vector.columns` for forward-compat read). + // Called unconditionally; returns an empty map when no VECTOR columns are present. + Map extra = additionalSchemaMetadata(); + if (extra != null && !extra.isEmpty()) { + writer.addSchemaMetadata(extra); } } } catch (Exception e) { diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileWriterFactory.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileWriterFactory.java index 5be171d35c613..c474df450ad8b 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileWriterFactory.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileWriterFactory.java @@ -29,6 +29,10 @@ import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.io.HoodieParquetConfigInjector; import org.apache.hudi.io.compress.CompressionCodec; import org.apache.hudi.io.storage.HoodieAvroHFileReaderImplBase; import org.apache.hudi.io.storage.HoodieFileWriter; @@ -36,6 +40,7 @@ import org.apache.hudi.io.storage.HoodieHFileConfig; import org.apache.hudi.io.storage.HoodieOrcConfig; import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; import org.apache.hadoop.conf.Configuration; @@ -62,26 +67,35 @@ protected HoodieFileWriter newParquetFileWriter( String instantTime, StoragePath path, HoodieConfig config, HoodieSchema schema, TaskContextSupplier taskContextSupplier) throws IOException { boolean populateMetaFields = config.getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS); - HoodieAvroWriteSupport writeSupport = getHoodieAvroWriteSupport(schema, config, enableBloomFilter(populateMetaFields, config)); - String compressionCodecName = config.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME); + Pair injectedConfigs = HoodieParquetConfigInjector.applyConfigInjector(path, storage.getConf(), config); + StorageConfiguration storageConfiguration = injectedConfigs.getLeft(); + HoodieConfig hoodieConfig = injectedConfigs.getRight(); + + HoodieAvroWriteSupport writeSupport = getHoodieAvroWriteSupport(schema, hoodieConfig, storageConfiguration, enableBloomFilter(populateMetaFields, hoodieConfig)); + + String compressionCodecName = hoodieConfig.getStringOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME); // Support PARQUET_COMPRESSION_CODEC_NAME is "" if (compressionCodecName.isEmpty()) { compressionCodecName = null; } HoodieParquetConfig parquetConfig = new HoodieParquetConfig<>(writeSupport, CompressionCodecName.fromConf(compressionCodecName), - config.getIntOrDefault(HoodieStorageConfig.PARQUET_BLOCK_SIZE), - config.getIntOrDefault(HoodieStorageConfig.PARQUET_PAGE_SIZE), - config.getLongOrDefault(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE), - storage.getConf(), config.getDoubleOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION), - config.getBooleanOrDefault(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED)); + hoodieConfig.getIntOrDefault(HoodieStorageConfig.PARQUET_BLOCK_SIZE), + hoodieConfig.getIntOrDefault(HoodieStorageConfig.PARQUET_PAGE_SIZE), + hoodieConfig.getLongOrDefault(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE), + storageConfiguration, hoodieConfig.getDoubleOrDefault(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION), + hoodieConfig.getBooleanOrDefault(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED)); return new HoodieAvroParquetWriter(path, parquetConfig, instantTime, taskContextSupplier, populateMetaFields); } protected HoodieFileWriter newParquetFileWriter( OutputStream outputStream, HoodieConfig config, HoodieSchema schema) throws IOException { - HoodieAvroWriteSupport writeSupport = getHoodieAvroWriteSupport(schema, config, false); + String configInjectorClass = config.getStringOrDefault(HoodieStorageConfig.HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, StringUtils.EMPTY_STRING); + if (!StringUtils.isNullOrEmpty(configInjectorClass)) { + throw new HoodieException("hoodie.parquet.write.config.injector.class is not supported with streaming writes with parquet"); + } + HoodieAvroWriteSupport writeSupport = getHoodieAvroWriteSupport(schema, config, storage.getConf(), false); HoodieParquetConfig parquetConfig = new HoodieParquetConfig<>(writeSupport, CompressionCodecName.fromConf(config.getString(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME)), config.getInt(HoodieStorageConfig.PARQUET_BLOCK_SIZE), @@ -121,11 +135,13 @@ protected HoodieFileWriter newOrcFileWriter( } private HoodieAvroWriteSupport getHoodieAvroWriteSupport(HoodieSchema schema, - HoodieConfig config, boolean enableBloomFilter) { + HoodieConfig config, + StorageConfiguration storageConf, + boolean enableBloomFilter) { Option filter = enableBloomFilter ? Option.of(createBloomFilter(config)) : Option.empty(); return (HoodieAvroWriteSupport) ReflectionUtils.loadClass( config.getStringOrDefault(HoodieStorageConfig.HOODIE_AVRO_WRITE_SUPPORT_CLASS), new Class[] {MessageType.class, HoodieSchema.class, Option.class, Properties.class}, - getAvroSchemaConverter(storage.getConf().unwrapAs(Configuration.class)).convert(schema), schema, filter, config.getProps()); + getAvroSchemaConverter((Configuration) storageConf.unwrapAs(Configuration.class)).convert(schema), schema, filter, config.getProps()); } } diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/hadoop/TestHoodieAvroParquetConfigInjector.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/hadoop/TestHoodieAvroParquetConfigInjector.java new file mode 100644 index 0000000000000..438c3da5b7172 --- /dev/null +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/io/storage/hadoop/TestHoodieAvroParquetConfigInjector.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.io.storage.hadoop; + +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.engine.LocalTaskContextSupplier; +import org.apache.hudi.common.schema.HoodieSchema; +import org.apache.hudi.common.testutils.DisableDictionaryInjector; +import org.apache.hudi.common.testutils.HoodieTestDataGenerator; +import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.io.storage.HoodieFileWriter; +import org.apache.hudi.storage.HoodieStorage; +import org.apache.hudi.storage.StoragePath; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link HoodieAvroParquetConfigInjector} functionality in {@link HoodieAvroFileWriterFactory}. + */ +public class TestHoodieAvroParquetConfigInjector { + + @TempDir + java.nio.file.Path tmpDir; + + @Test + public void testDisableDictionaryEncodingViaInjector() throws Exception { + final String instantTime = "100"; + HoodieStorage storage = HoodieTestUtils.getStorage(tmpDir.toString()); + final StoragePath parquetPath = new StoragePath( + tmpDir.resolve("test_dictionary_" + instantTime + ".parquet").toAbsolutePath().toString()); + + // Generate test data + HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0xDEED); + List records = dataGen.generateGenericRecords(100); + HoodieSchema schema = HoodieSchema.fromAvroSchema(records.get(0).getSchema()); + + // Create config with the custom injector + HoodieConfig config = new HoodieConfig(); + config.setValue(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED, "true"); // Start with dictionary enabled + config.setValue(HoodieStorageConfig.HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, DisableDictionaryInjector.class.getName()); + + // Create writer and write some data + HoodieFileWriter writer = new HoodieAvroFileWriterFactory(storage) + .newParquetFileWriter(instantTime, parquetPath, config, schema, new LocalTaskContextSupplier()); + + assertTrue(writer instanceof HoodieAvroParquetWriter); + + // Write test records + HoodieAvroParquetWriter avroWriter = (HoodieAvroParquetWriter) writer; + for (GenericRecord record : records) { + avroWriter.writeAvro((String) record.get("_row_key"), record); + } + writer.close(); + + // Verify the parquet file was created + assertTrue(storage.exists(parquetPath)); + + // Read parquet metadata and verify dictionary encoding is disabled + Configuration hadoopConf = new Configuration(); + Path hadoopPath = new Path(parquetPath.toUri()); + ParquetFileReader reader = ParquetFileReader.open(hadoopConf, hadoopPath); + ParquetMetadata metadata = reader.getFooter(); + reader.close(); + + assertNotNull(metadata); + + // Verify that dictionary encoding is NOT used for any column + // When dictionary encoding is disabled, columns should use PLAIN or other encodings but not RLE_DICTIONARY + for (BlockMetaData block : metadata.getBlocks()) { + for (ColumnChunkMetaData column : block.getColumns()) { + // Check all encodings used for this column - should not include RLE_DICTIONARY or PLAIN_DICTIONARY + for (Encoding encoding : column.getEncodings()) { + assertFalse(encoding == Encoding.RLE_DICTIONARY || encoding == Encoding.PLAIN_DICTIONARY, + "Column " + column.getPath() + " should not use dictionary encoding, but found: " + encoding); + } + } + } + } + + @Test + public void testInvalidInjectorClassThrowsException() throws IOException { + final String instantTime = "102"; + HoodieStorage storage = HoodieTestUtils.getStorage(tmpDir.toString()); + final StoragePath parquetPath = new StoragePath( + tmpDir.resolve("test_invalid_" + instantTime + ".parquet").toAbsolutePath().toString()); + + // Generate test data + HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0xDEED); + List records = dataGen.generateGenericRecords(10); + HoodieSchema schema = HoodieSchema.fromAvroSchema(records.get(0).getSchema()); + + // Create config with an invalid/non-existent injector class + HoodieConfig config = new HoodieConfig(); + config.setValue(HoodieStorageConfig.HOODIE_PARQUET_CONFIG_INJECTOR_CLASS, "org.apache.hudi.NonExistentInjector"); + + // Should throw an exception when trying to create the writer + assertThrows(Exception.class, () -> { + new HoodieAvroFileWriterFactory(storage) + .newParquetFileWriter(instantTime, parquetPath, config, schema, new LocalTaskContextSupplier()); + }); + } + + @Test + public void testNoInjectorUsesDefaultConfig() throws Exception { + final String instantTime = "103"; + HoodieStorage storage = HoodieTestUtils.getStorage(tmpDir.toString()); + final StoragePath parquetPath = new StoragePath( + tmpDir.resolve("test_no_injector_" + instantTime + ".parquet").toAbsolutePath().toString()); + + // Generate test data + HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0xDEED); + List records = dataGen.generateGenericRecords(10); + HoodieSchema schema = HoodieSchema.fromAvroSchema(records.get(0).getSchema()); + + // Create config WITHOUT injector - should use default settings + HoodieConfig config = new HoodieConfig(); + config.setValue(HoodieStorageConfig.PARQUET_DICTIONARY_ENABLED, "true"); + + // Create writer and write some data + HoodieFileWriter writer = new HoodieAvroFileWriterFactory(storage) + .newParquetFileWriter(instantTime, parquetPath, config, schema, new LocalTaskContextSupplier()); + + assertTrue(writer instanceof HoodieAvroParquetWriter); + + // Write test records + HoodieAvroParquetWriter avroWriter = (HoodieAvroParquetWriter) writer; + for (GenericRecord record : records) { + avroWriter.writeAvro((String) record.get("_row_key"), record); + } + writer.close(); + + // Verify the parquet file was created + assertTrue(storage.exists(parquetPath)); + } +} diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieLanceInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieLanceInputFormat.java new file mode 100644 index 0000000000000..9256158814ce6 --- /dev/null +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieLanceInputFormat.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hadoop; + +import org.apache.hudi.common.table.timeline.HoodieTimeline; +import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +import java.io.IOException; + +/** + * HoodieInputFormat for HUDI datasets which store data in Lance base file format. + *

+ * This class is required for catalog/metastore registration during CREATE TABLE operations. + *

+ * TODO(#18557): Lance reading through Hive InputFormat is not yet supported. When support is + * added, this should route through {@link HoodieFileGroupReaderBasedRecordReader} (like + * {@link HoodieParquetInputFormat} does) instead of a standalone record reader, + * to get MOR log merging, schema evolution, and bootstrap support for free. + * + * @see #18557 + */ +@UseFileSplitsFromInputFormat +public class HoodieLanceInputFormat extends HoodieCopyOnWriteTableInputFormat { + + protected HoodieTimeline filterInstantsTimeline(HoodieTimeline timeline) { + return HoodieInputFormatUtils.filterInstantsTimeline(timeline); + } + + @Override + public RecordReader getRecordReader(final InputSplit split, final JobConf job, + final Reporter reporter) throws IOException { + throw new UnsupportedOperationException( + "Lance reading through Hive InputFormat is not yet supported. " + + "Use the Spark datasource path (spark.read.format(\"hudi\")) to read Lance tables."); + } + + @Override + protected boolean isSplitable(FileSystem fs, Path filename) { + // Lance files are not splittable. + return false; + } +} diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieLanceRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieLanceRealtimeInputFormat.java new file mode 100644 index 0000000000000..c3022872f4323 --- /dev/null +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieLanceRealtimeInputFormat.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.hadoop.realtime; + +import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat; +import org.apache.hudi.hadoop.UseRecordReaderFromInputFormat; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +import java.io.IOException; + +/** + * HoodieRealtimeInputFormat for HUDI datasets which store data in Lance base file format. + *

+ * This class is required for catalog/metastore registration during CREATE TABLE operations + * for MOR tables with Lance base file format. + *

+ * TODO(#18557): Lance reading through Hive InputFormat is not yet supported. When support is + * added, this should route through {@code HoodieFileGroupReaderBasedRecordReader} to get + * unified MOR log merging, schema evolution, and bootstrap support. + * + * @see #18557 + */ +@UseRecordReaderFromInputFormat +@UseFileSplitsFromInputFormat +public class HoodieLanceRealtimeInputFormat extends HoodieMergeOnReadTableInputFormat { + + @Override + public RecordReader getRecordReader(final InputSplit split, final JobConf jobConf, + final Reporter reporter) throws IOException { + throw new UnsupportedOperationException( + "Lance reading through Hive InputFormat is not yet supported. " + + "Use the Spark datasource path (spark.read.format(\"hudi\")) to read Lance tables."); + } + + @Override + protected boolean isSplitable(FileSystem fs, Path filename) { + // Lance files are not splittable. + return false; + } +} diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieArrayWritableSchemaUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieArrayWritableSchemaUtils.java index 3f9b38dc65169..7e9c336846d82 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieArrayWritableSchemaUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieArrayWritableSchemaUtils.java @@ -304,6 +304,23 @@ private static Writable rewritePrimaryTypeWithDiffSchemaType(Writable writable, } } break; + case VECTOR: + // Parquet stores VECTOR as a bare FIXED_LEN_BYTE_ARRAY without a logical-type + // annotation (see AvroSchemaConverterWithTimestampNTZ#convertField VECTOR branch), + // so Hive's Parquet reader reconstructs the Avro schema as plain FIXED named after + // the column. When Hudi then projects that record to the canonical VECTOR schema + // (fixed named vector__ with logicalType=vector), oldSchema.getType() is + // FIXED while newSchema.getType() is VECTOR. The byte layout is identical for + // StorageBacking.FIXED_BYTES as long as sizes match, so the rewrite is a pass-through. + if (oldSchema.getType() == HoodieSchemaType.FIXED + && newSchema instanceof HoodieSchema.Vector) { + HoodieSchema.Vector vector = (HoodieSchema.Vector) newSchema; + if (vector.getStorageBacking() == HoodieSchema.Vector.StorageBacking.FIXED_BYTES + && oldSchema.getFixedSize() == vector.getFixedSize()) { + return writable; + } + } + break; default: } throw new HoodieSchemaException(String.format("cannot support rewrite value for schema type: %s since the old schema type is: %s", newSchema, oldSchema)); diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java index 7fe0a15640b5d..0db2159269487 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java @@ -40,10 +40,12 @@ import org.apache.hudi.hadoop.BootstrapBaseFileSplit; import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile; import org.apache.hudi.hadoop.HoodieHFileInputFormat; +import org.apache.hudi.hadoop.HoodieLanceInputFormat; import org.apache.hudi.hadoop.HoodieParquetInputFormat; import org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile; import org.apache.hudi.hadoop.fs.HadoopFSUtils; import org.apache.hudi.hadoop.realtime.HoodieHFileRealtimeInputFormat; +import org.apache.hudi.hadoop.realtime.HoodieLanceRealtimeInputFormat; import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit; import org.apache.hudi.hadoop.realtime.HoodieRealtimePath; @@ -123,6 +125,16 @@ public static FileInputFormat getInputFormat(HoodieFileFormat baseFileFormat, bo inputFormat.setConf(conf); return inputFormat; } + case LANCE: + if (realtime) { + HoodieLanceRealtimeInputFormat inputFormat = new HoodieLanceRealtimeInputFormat(); + inputFormat.setConf(conf); + return inputFormat; + } else { + HoodieLanceInputFormat inputFormat = new HoodieLanceInputFormat(); + inputFormat.setConf(conf); + return inputFormat; + } default: throw new HoodieIOException("Hoodie InputFormat not implemented for base file format " + baseFileFormat); } @@ -146,6 +158,12 @@ public static String getInputFormatClassName(HoodieFileFormat baseFileFormat, bo } else { return HoodieHFileInputFormat.class.getName(); } + case LANCE: + if (realtime) { + return HoodieLanceRealtimeInputFormat.class.getName(); + } else { + return HoodieLanceInputFormat.class.getName(); + } case ORC: return OrcInputFormat.class.getName(); default: @@ -157,6 +175,7 @@ public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) { switch (baseFileFormat) { case PARQUET: case HFILE: + case LANCE: return MapredParquetOutputFormat.class.getName(); case ORC: return OrcOutputFormat.class.getName(); @@ -169,6 +188,7 @@ public static String getSerDeClassName(HoodieFileFormat baseFileFormat) { switch (baseFileFormat) { case PARQUET: case HFILE: + case LANCE: return ParquetHiveSerDe.class.getName(); case ORC: return OrcSerde.class.getName(); @@ -185,6 +205,9 @@ public static FileInputFormat getInputFormat(String path, boolean realtime, Conf if (extension.equals(HoodieFileFormat.HFILE.getFileExtension())) { return getInputFormat(HoodieFileFormat.HFILE, realtime, conf); } + if (extension.equals(HoodieFileFormat.LANCE.getFileExtension())) { + return getInputFormat(HoodieFileFormat.LANCE, realtime, conf); + } // now we support read log file, try to find log file if (HadoopFSUtils.isLogFile(new Path(path)) && realtime) { return getInputFormat(HoodieFileFormat.PARQUET, realtime, conf); diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/utils/TestHoodieArrayWritableSchemaUtils.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/utils/TestHoodieArrayWritableSchemaUtils.java index 026a0cdbea61f..b6220a0caccd7 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/utils/TestHoodieArrayWritableSchemaUtils.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/utils/TestHoodieArrayWritableSchemaUtils.java @@ -314,6 +314,24 @@ void testRewriteDecimalBytesToFixed() throws AvroSerdeException { validateRewriteWithAvro(oldWritable, oldSchema, result, decimalSchema); } + @Test + void testRewritePlainFixedToVectorPassesThrough() { + // Pins the fix for the Hive vector-read path. Parquet stores VECTOR as bare + // FIXED_LEN_BYTE_ARRAY (AvroSchemaConverterWithTimestampNTZ#convertField), + // so Hive's Parquet reader reconstructs the Avro schema as plain FIXED using + // the column name; Hudi then projects to the canonical VECTOR schema + // (vector_float_3, size 12, logicalType=vector). Sizes match and VECTOR's + // FIXED_BYTES backing is byte-identical, so the rewrite must pass through. + // Before the fix this threw "cannot support rewrite value for schema type". + HoodieSchema oldSchema = HoodieSchema.createFixed("embedding", null, null, 12); + HoodieSchema newSchema = HoodieSchema.createVector(3, HoodieSchema.Vector.VectorElementType.FLOAT); + BytesWritable bytes = new BytesWritable(new byte[12]); + + Writable rewritten = HoodieArrayWritableSchemaUtils.rewritePrimaryType(bytes, oldSchema, newSchema); + + assertSame(bytes, rewritten); + } + private void validateRewriteWithAvro( Writable oldWritable, HoodieSchema oldSchema, diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Index.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Index.scala index d083deb2616c1..09ef1b7307316 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Index.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Index.scala @@ -58,18 +58,22 @@ case class DropIndex(table: LogicalPlan, /** * The logical plan of the SHOW INDEXES command. + * + * NOTE: named `HoodieShowIndexes` to avoid an FQCN collision with + * `org.apache.spark.sql.catalyst.plans.logical.ShowIndexes` from `lance-spark-base` (>=0.4.0), + * which otherwise shadows this class on the `hudi-spark3.x`/`4.x` classpath and breaks pattern matching. */ -case class ShowIndexes(table: LogicalPlan, - override val output: Seq[Attribute] = ShowIndexes.getOutputAttrs) extends Command { +case class HoodieShowIndexes(table: LogicalPlan, + override val output: Seq[Attribute] = HoodieShowIndexes.getOutputAttrs) extends Command { override def children: Seq[LogicalPlan] = Seq(table) - def withNewChildrenInternal(newChild: IndexedSeq[LogicalPlan]): ShowIndexes = { + def withNewChildrenInternal(newChild: IndexedSeq[LogicalPlan]): HoodieShowIndexes = { copy(table = newChild.head) } } -object ShowIndexes { +object HoodieShowIndexes { def getOutputAttrs: Seq[Attribute] = Seq( AttributeReference("index_name", StringType, nullable = false)(), AttributeReference("index_type", StringType, nullable = false)(), diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/lance/SparkLanceReaderBase.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/lance/SparkLanceReaderBase.scala index bc24051a633f3..183eea6fc5baf 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/lance/SparkLanceReaderBase.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/lance/SparkLanceReaderBase.scala @@ -23,7 +23,7 @@ import org.apache.hudi.SparkAdapterSupport.sparkAdapter import org.apache.hudi.common.util import org.apache.hudi.internal.schema.InternalSchema import org.apache.hudi.io.memory.HoodieArrowAllocator -import org.apache.hudi.io.storage.{HoodieSparkLanceReader, LanceRecordIterator} +import org.apache.hudi.io.storage.{HoodieSparkLanceReader, LanceRecordIterator, VectorConversionUtils} import org.apache.hudi.storage.StorageConfiguration import org.apache.hadoop.conf.Configuration @@ -90,9 +90,20 @@ class SparkLanceReaderBase(enableVectorizedReader: Boolean) extends SparkColumna // Open Lance file reader val lanceReader = LanceFileReader.open(filePath, allocator) - // Get schema from Lance file + // Get schema from Lance file. lance-spark strips Hudi's VECTOR descriptor during + // Arrow→Spark conversion but keeps the fixed-size-list dimension on the Spark + // field metadata; rebuild the descriptor from that, using requiredSchema + // as the source of truth for which columns are Hudi VECTORs — so non-Hudi fixed-size-lists aren't mis-tagged. val arrowSchema = lanceReader.schema() - val fileSchema = LanceArrowUtils.fromArrowSchema(arrowSchema) + val vectorColumnNames: java.util.Set[String] = VectorConversionUtils + .detectVectorColumnsFromMetadata(requiredSchema) + .keySet() + .asScala + .map(i => requiredSchema.fields(i).name) + .toSet + .asJava + val fileSchema = VectorConversionUtils.restoreVectorMetadata( + LanceArrowUtils.fromArrowSchema(arrowSchema), vectorColumnNames) // Build type change info for schema evolution val (implicitTypeChangeInfo, sparkRequestSchema) = diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedFileFormat.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedFileFormat.scala index 9ba25424a9ca8..7e0523ec70a98 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedFileFormat.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedFileFormat.scala @@ -251,7 +251,8 @@ class HoodieFileGroupReaderBasedFileFormat(tablePath: String, partitionSchema.fields.foreach(f => exclusionFields.add(f.name)) val requestedStructType = StructType(requiredSchema.fields ++ partitionSchema.fields.filter(f => mandatoryFields.contains(f.name))) val requestedSchema = HoodieSchemaUtils.pruneDataSchema(schema, HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(requestedStructType, sanitizedTableName), exclusionFields) - val dataSchema = HoodieSchemaUtils.pruneDataSchema(schema, HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(dataStructType, sanitizedTableName), exclusionFields) + val dataStructTypeWithMandatoryPartitionFields = StructType(dataStructType.fields ++ partitionSchema.fields.filter(f => mandatoryFields.contains(f.name))) + val dataSchema = HoodieSchemaUtils.pruneDataSchema(schema, HoodieSchemaConversionUtils.convertStructTypeToHoodieSchema(dataStructTypeWithMandatoryPartitionFields, sanitizedTableName), exclusionFields) spark.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", supportVectorizedRead.toString) @@ -431,11 +432,28 @@ class HoodieFileGroupReaderBasedFileFormat(tablePath: String, /** * Detects vector columns and replaces them with BinaryType in one step. + * + *

The BinaryType rewrite is Parquet-specific: Hudi stores VECTOR columns as + * FIXED_LEN_BYTE_ARRAY in Parquet, so the reader must see BinaryType and the raw + * bytes are post-converted back to ArrayType. Other formats (e.g. Lance) encode + * vectors natively as Arrow FixedSizeList and return ArrayType directly, so the + * rewrite would introduce a spurious ArrayType→BinaryType cast during schema + * evolution and break the read. Skip the rewrite for those formats. + * * @return (modified schema with BinaryType for vectors, vector column ordinal map) */ private def withVectorRewrite(schema: StructType): (StructType, Map[Int, HoodieSchema.Vector]) = { - val vecs = detectVectorColumns(schema) - if (vecs.nonEmpty) (replaceVectorFieldsWithBinary(schema, vecs), vecs) else (schema, vecs) + // Only Parquet needs the BinaryType rewrite; other formats (Lance) return ArrayType natively. + if (hoodieFileFormat != HoodieFileFormat.PARQUET) { + (schema, Map.empty[Int, HoodieSchema.Vector]) + } else { + val vecs = detectVectorColumns(schema) + if (vecs.isEmpty) { + (schema, vecs) + } else { + (replaceVectorFieldsWithBinary(schema, vecs), vecs) + } + } } /** diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobRead.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobRead.scala new file mode 100644 index 0000000000000..2d1b4f3da58a3 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobRead.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.hudi.blob.BatchedBlobReader.DATA_COL +import org.apache.spark.sql.types.BinaryType + +/** + * Logical plan node for batched blob reading. + * + * Created by [[ReadBlobRule]] when `read_blob()` is detected in queries. + * Converted to [[BatchedBlobReadExec]] by [[BatchedBlobReaderStrategy]] during physical planning. + * + * @param child Child logical plan + * @param blobAttr The blob column attribute to read from + * @param dataAttr The output attribute for resolved blob data + */ +case class BatchedBlobRead( + child: LogicalPlan, + blobAttr: Attribute, + dataAttr: AttributeReference = + AttributeReference(DATA_COL, BinaryType, nullable = true)()) + extends UnaryNode { + + override def output: Seq[Attribute] = child.output :+ dataAttr + + override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = { + copy(child = newChild) + } + + override def producedAttributes: AttributeSet = AttributeSet(dataAttr) + +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReadExec.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReadExec.scala new file mode 100644 index 0000000000000..d70063a1481e4 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReadExec.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.hudi.storage.StorageConfiguration + +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} + +/** + * Physical plan node that executes batched blob reads. + * + * Reads blob data from storage using [[BatchedBlobReader]] to batch + * reads efficiently when data is sorted by file and position. + * + * Holds pre-extracted `blobAttrName` and `output` rather than the originating + * [[BatchedBlobRead]]: the logical tree reaches `HoodieFileIndex`, which + * isn't Serializable and would fail task dispatch on executor send. + * + * @param child Child physical plan + * @param maxGapBytes Maximum gap between reads to batch (from config) + * @param storageConf Storage configuration for file access + * @param lookaheadSize Number of rows to buffer for batch detection + * @param blobAttrName Name of the blob column resolved from the logical plan + * @param output Output attributes resolved from the logical plan + */ +case class BatchedBlobReadExec(child: SparkPlan, + maxGapBytes: Int, + storageConf: StorageConfiguration[_], + lookaheadSize: Int, + blobAttrName: String, + override val output: Seq[Attribute]) + extends UnaryExecNode { + + override protected def doExecute(): RDD[InternalRow] = { + val childRDD = child.execute() + // Broadcast storageConf to avoid per-task serialization + val broadcastConf: Broadcast[StorageConfiguration[_]] = childRDD.sparkContext.broadcast(storageConf) + // Use direct RDD processing - no DataFrame conversion! + BatchedBlobReader.processRDD( + childRDD, + child.schema, + broadcastConf, + maxGapBytes, + lookaheadSize, + blobAttrName + ) + } + + override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = { + copy(child = newChild) + } +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReader.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReader.scala new file mode 100644 index 0000000000000..0328cdd0c5c22 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReader.scala @@ -0,0 +1,783 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.hudi.HoodieSparkUtils.sparkAdapter +import org.apache.hudi.common.schema.{HoodieSchema, HoodieSchemaType} +import org.apache.hudi.io.SeekableDataInputStream +import org.apache.hudi.storage.{HoodieStorage, HoodieStorageUtils, StorageConfiguration, StoragePath} + +import org.apache.spark.TaskContext +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Row} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{GenericRowWithSchema, SpecificInternalRow} +import org.apache.spark.sql.types.{BinaryType, BlobType, DataType, StructField, StructType} +import org.slf4j.LoggerFactory + +import java.io.InputStream + +import scala.collection.mutable.ArrayBuffer + +/** + * Batched byte range reader that optimizes I/O by combining consecutive reads for out-of-line data. + * + * This reader analyzes sequences of read requests within a partition and merges + * consecutive or nearby reads into single I/O operations. This significantly reduces + * the number of seeks and reads when processing sorted data. + * + *

Schema Requirement:

+ * The blob column must match the schema defined in {@link HoodieSchema.Blob}: + *
+ * struct {
+ *   type: string                   // "inline" or "out_of_line"
+ *   data: binary (nullable)       // inline data (null for out_of_line)
+ *   reference: struct (nullable) { // file reference (null for inline)
+ *     external_path: string
+ *     offset: long
+ *     length: long
+ *     managed: boolean
+ *   }
+ * }
+ * 
+ * + *

Key Features:

+ *
    + *
  • Batches consecutive reads from the same file
  • + *
  • Configurable gap threshold for merging nearby reads
  • + *
  • Lookahead buffer to identify batch opportunities
  • + *
  • Preserves input row order in output
  • + *
+ * + *

Usage Example:

+ * {{{ + * import org.apache.hudi.udf.BatchedByteRangeReader + * import org.apache.spark.sql.functions._ + * // Read a table with a blob column (e.g. image_data) + * val df = spark.read.format("hudi").load("/my_path").select("image_data", "record_id") + * + * // Read with batching (best when data is sorted by external_path, offset) + * val result = BatchedByteRangeReader.readBatched(df, structColName = "file_info") + * + * // Result has: image_data, record_id, data + * result.show() + * }}} + * + *

Performance Tips:

+ *
    + *
  • Sort input by (blob.reference.external_path, blob.reference.offset) for maximum batching effectiveness
  • + *
  • Increase lookaheadSize for better batch detection (at cost of memory)
  • + *
  • Tune maxGapBytes based on your data access patterns
  • + *
+ * + * @param storage HoodieStorage instance for file I/O + * @param maxGapBytes Maximum gap between ranges to consider for batching (default: 4KB) + * @param lookaheadRows Number of rows to buffer for batch detection (default: 50) + */ +class BatchedBlobReader( + storage: HoodieStorage, + maxGapBytes: Int = 4096, + lookaheadRows: Int = 50) { + + private val logger = LoggerFactory.getLogger(classOf[BatchedBlobReader]) + + /** + * Process a partition iterator, batching consecutive reads. + * + * This method consumes the input iterator and produces an output iterator + * with each row containing the original data plus a "data" column with the + * bytes read from the file. + * + * @param rows Iterator of input rows with struct column + * @param structColIdx Index of the struct column in the row + * @param outputSchema Schema for output rows + * @param accessor Type class for accessing row fields + * @param builder Type class for building output rows + * @tparam R Row type (Row or InternalRow) + * @return Iterator of output rows with data column added + */ + def processPartition[R]( + rows: Iterator[R], + structColIdx: Int, + outputSchema: StructType) + (implicit accessor: RowAccessor[R], builder: RowBuilder[R]): Iterator[R] = { + + // Create buffered iterator for lookahead + val bufferedRows = rows.buffered + + // Result buffer to maintain order + val resultIterator = new Iterator[R] { + private var currentBatch: Iterator[R] = Iterator.empty + private var rowIndex = 0L + + override def hasNext: Boolean = { + if (currentBatch.hasNext) { + true + } else if (bufferedRows.hasNext) { + // Process next batch + currentBatch = processNextBatch() + currentBatch.hasNext + } else { + false + } + } + + override def next(): R = { + if (!hasNext) { + throw new NoSuchElementException("No more rows") + } + currentBatch.next() + } + + /** + * Collect and process the next batch of rows. + */ + private def processNextBatch(): Iterator[R] = { + // Collect up to lookaheadSize rows with their original indices + val batch = collectBatch() + + if (batch.isEmpty) { + Iterator.empty + } else { + // Partition the batch into three groups + val (inlineRows, outOfLineRows) = batch.partition(_.inlineBytes.isDefined) + val (wholeFileRows, rangeRows) = outOfLineRows.partition(_.length < 0) + + // Case 1: Inline — return bytes directly without I/O + val inlineResults = inlineRows.map { ri => + RowResult(builder.buildRow(ri.originalRow, ri.inlineBytes.get, outputSchema), ri.index) + } + + // Case 2: Whole-file reads + val wholeFileResults = wholeFileRows.map(readWholeFile(_, outputSchema)) + + // Case 3: Regular range reads — merge consecutive ranges and batch + val mergedRanges = identifyConsecutiveRanges(rangeRows) + val rangeResults = mergedRanges.flatMap(readAndSplitRange(_, outputSchema)) + + // Sort by original index to preserve input order + (inlineResults ++ wholeFileResults ++ rangeResults).sortBy(_.index).map(_.row).iterator + } + } + + /** + * Collect up to lookaheadSize rows from the input iterator. + */ + private def collectBatch(): Seq[RowInfo[R]] = { + val batch = ArrayBuffer[RowInfo[R]]() + var collected = 0 + + while (bufferedRows.hasNext && collected < lookaheadRows) { + // Spark's upstream InternalRow iterator can reuse a single buffer + // across next() calls; without copying, every buffered RowInfo + // aliases the last emitted row — non-blob columns (id, etc.) would + // collapse to the tail value by the time we emit the batch + val row = accessor.copy(bufferedRows.next()) + // Handle null struct column (null blob) + if (accessor.isNullAt(row, structColIdx)) { + batch += RowInfo[R]( + originalRow = row, + filePath = "", + offset = -1, + length = -1, + index = rowIndex, + inlineBytes = Some(null) + ) + rowIndex += 1 + collected += 1 + } else { + val blobStruct = accessor.getStruct(row, structColIdx, HoodieSchema.Blob.getFieldCount) + // Dispatch based on storage_type (field 0) + val storageType = accessor.getString(blobStruct, 0) + if (storageType == HoodieSchema.Blob.INLINE) { + // Case 1: Inline — bytes are in field 1 + val bytes = accessor.getBytes(blobStruct, 1) + batch += RowInfo[R]( + originalRow = row, + filePath = "", + offset = -1, + length = -1, + index = rowIndex, + inlineBytes = Some(bytes) + ) + } else if (storageType == HoodieSchema.Blob.OUT_OF_LINE) { + // Case 2 or 3: Out-of-line — get reference struct (field 2) + require(!accessor.isNullAt(blobStruct, 2), s"Out-of-line blob at row $rowIndex must set reference") + val referenceStruct = accessor.getStruct(blobStruct, 2, HoodieSchema.Blob.getReferenceFieldCount) + val filePath = accessor.getString(referenceStruct, 0) + require(filePath != null && filePath.nonEmpty, s"Blob reference must have non-empty external_path at row $rowIndex") + val offsetIsNull = accessor.isNullAt(referenceStruct, 1) + val lengthIsNull = accessor.isNullAt(referenceStruct, 2) + if (offsetIsNull && lengthIsNull) { + // Case 2: Whole-file read — no offset/length specified; sentinel length = -1 + batch += RowInfo[R]( + originalRow = row, + filePath = filePath, + offset = 0, + length = -1, + index = rowIndex + ) + } else if (offsetIsNull || lengthIsNull) { + throw new IllegalArgumentException(s"Blob reference for '$filePath' must set both offset and length, or neither") + } else { + // Case 3: Regular range read + val offset = accessor.getLong(referenceStruct, 1) + val length = accessor.getLong(referenceStruct, 2) + require(offset >= 0, s"Blob offset must be non-negative for '$filePath': $offset") + require(length >= 0, s"Blob length must be non-negative for '$filePath': $length") + batch += RowInfo[R]( + originalRow = row, + filePath = filePath, + offset = offset, + length = length, + index = rowIndex + ) + } + } else { + throw new IllegalArgumentException(s"Unsupported blob storage_type at row $rowIndex: $storageType") + } + rowIndex += 1 + collected += 1 + } + } + batch.toSeq + } + } + + resultIterator + } + + /** + * Identify consecutive ranges that can be batched together. + * + * This method groups rows by file path, sorts by offset, and merges + * ranges that are consecutive or within maxGapBytes of each other. + * + * @param rows Sequence of row information + * @return Sequence of merged ranges + */ + private def identifyConsecutiveRanges[R](rows: Seq[RowInfo[R]]): Seq[MergedRange[R]] = { + // Group by file path + val byFile = rows.groupBy(_.filePath) + + val allRanges = ArrayBuffer[MergedRange[R]]() + + byFile.foreach { case (filePath, fileRows) => + // Sort by offset + val sorted = fileRows.sortBy(_.offset) + + // Merge consecutive ranges + val merged = mergeRanges(sorted, maxGapBytes) + allRanges ++= merged + } + + allRanges.toSeq + } + + /** + * Merge consecutive ranges within the gap threshold. + * + * @param rows Sorted rows from the same file + * @param maxGap Maximum gap to consider for merging + * @return Sequence of merged ranges + */ + private def mergeRanges[R](rows: Seq[RowInfo[R]], maxGap: Int): Seq[MergedRange[R]] = { + + val result = ArrayBuffer[MergedRange[R]]() + var currentFilePath: String = null + var currentStartOffset: Long = 0L + var currentEndOffset: Long = 0L + var currentRows: ArrayBuffer[RowInfo[R]] = null + + rows.foreach { row => + if (currentRows == null) { + // Start first range + currentFilePath = row.filePath + currentStartOffset = row.offset + currentEndOffset = row.offset + row.length + currentRows = ArrayBuffer(row) + } else { + val gap = row.offset - currentEndOffset + // Check for overlap + if (row.offset < currentEndOffset) { + throw new IllegalArgumentException( + s"Overlapping blob ranges detected: previous range [${currentStartOffset}, ${currentEndOffset}) and current row [${row.offset}, ${row.offset + row.length}) in file ${row.filePath}" + ) + } + if (gap >= 0 && gap <= maxGap) { + // Merge into current range + currentEndOffset = math.max(currentEndOffset, row.offset + row.length) + currentRows += row + } else { + // Save current range and start new one + result += MergedRange[R]( + filePath = currentFilePath, + startOffset = currentStartOffset, + endOffset = currentEndOffset, + rows = currentRows.toSeq + ) + currentFilePath = row.filePath + currentStartOffset = row.offset + currentEndOffset = row.offset + row.length + currentRows = ArrayBuffer(row) + } + } + } + + // Add final range + if (currentRows != null) { + result += MergedRange[R]( + filePath = currentFilePath, + startOffset = currentStartOffset, + endOffset = currentEndOffset, + rows = currentRows.toSeq + ) + } + + result.toSeq + } + + /** + * Read an entire file and return it as a single row result. + * + * Used for whole-file out-of-line blobs where no offset or length is specified. + * + * @param rowInfo Row information with the file path + * @param outputSchema Schema for output rows + * @param builder Type class for building output rows + * @tparam R Row type (Row or InternalRow) + * @return Sequence containing a single row result + */ + private def readWholeFile[R]( + rowInfo: RowInfo[R], + outputSchema: StructType) + (implicit builder: RowBuilder[R]): RowResult[R] = { + + var inputStream: InputStream = null + try { + val path = new StoragePath(rowInfo.filePath) + inputStream = storage.open(path) + val buffer = inputStream.readAllBytes() + + logger.debug(s"Read entire file ${rowInfo.filePath} (${buffer.length} bytes)") + + RowResult(builder.buildRow(rowInfo.originalRow, buffer, outputSchema), rowInfo.index) + } finally { + if (inputStream != null) { + try { + inputStream.close() + } catch { + case e: Exception => + logger.warn(s"Error closing stream for ${rowInfo.filePath}", e) + } + } + } + } + + /** + * Read a merged range and split it back into individual row results. + * + * This method performs a single I/O operation to read the entire merged + * range, then splits the buffer into individual results for each original + * row. + * + * @param range The merged range to read + * @param outputSchema Schema for output rows + * @param builder Type class for building output rows + * @tparam R Row type (Row or InternalRow) + * @return Sequence of row results with original indices + */ + private def readAndSplitRange[R]( + range: MergedRange[R], + outputSchema: StructType) + (implicit builder: RowBuilder[R]): Seq[RowResult[R]] = { + + var inputStream: SeekableDataInputStream = null + try { + // Get or open file handle + inputStream = storage.openSeekable(new StoragePath(range.filePath), false) + + // Seek to start offset + inputStream.seek(range.startOffset) + + // Read the entire merged range + val totalLengthLong = range.endOffset - range.startOffset + require(totalLengthLong >= 0 && totalLengthLong <= Int.MaxValue, s"Range too large: $totalLengthLong bytes exceeds Int.MaxValue") + val totalLength = totalLengthLong.toInt + val buffer = new Array[Byte](totalLength) + inputStream.readFully(buffer, 0, totalLength) + + logger.debug( + s"Read ${totalLength} bytes from ${range.filePath} at offset ${range.startOffset} " + + s"for ${range.rows.length} rows" + ) + + // Split buffer into individual results + range.rows.map { rowInfo => + val relativeOffset = (rowInfo.offset - range.startOffset).toInt + val data = buffer.slice(relativeOffset, relativeOffset + rowInfo.length.toInt) + + // Build output row using type class + val outputRow = builder.buildRow(rowInfo.originalRow, data, outputSchema) + + RowResult[R]( + row = outputRow, + index = rowInfo.index + ) + } + + } catch { + case e: Exception => + logger.error( + s"Failed to read range from ${range.filePath} " + + s"at offset ${range.startOffset}, length ${range.endOffset - range.startOffset}", + e + ) + throw e + } finally { + if (inputStream != null) { + try { + inputStream.close() + } catch { + case e: Exception => + logger.warn(s"Error closing input stream for ${range.filePath}", e) + } + } + } + } +} + +/** + * Type class for accessing row fields. + * Abstracts over Row and InternalRow API differences. + * + * @tparam R Row type (Row or InternalRow) + */ +private[blob] trait RowAccessor[R] { + def getStruct(row: R, structColIdx: Int, numFields: Int): R + def getString(struct: R, fieldIdx: Int): String + def getLong(struct: R, fieldIdx: Int): Long + def getBytes(row: R, fieldIdx: Int): Array[Byte] + def isNullAt(row: R, fieldIdx: Int): Boolean + // Return a deep copy safe to retain across iterator advances. Spark's child + // InternalRow iterator may hand out a single mutated buffer, so batching + // requires copying before stashing the row. + def copy(row: R): R +} + +/** + * Type class for building output rows. + * Abstracts over Row and InternalRow construction. + * + * @tparam R Row type (Row or InternalRow) + */ +private[blob] trait RowBuilder[R] { + def buildRow(originalRow: R, data: Array[Byte], outputSchema: StructType): R +} + +/** + * Type class instances for Row. + */ +private[blob] object RowAccessor { + implicit val rowAccessor: RowAccessor[Row] = new RowAccessor[Row] { + override def getStruct(row: Row, structColIdx: Int, numFields: Int): Row = row.getStruct(structColIdx) + override def getString(struct: Row, fieldIdx: Int): String = struct.getString(fieldIdx) + override def getLong(struct: Row, fieldIdx: Int): Long = struct.getLong(fieldIdx) + override def getBytes(row: Row, fieldIdx: Int): Array[Byte] = row.getAs[Array[Byte]](fieldIdx) + override def isNullAt(row: Row, fieldIdx: Int): Boolean = row.isNullAt(fieldIdx) + override def copy(row: Row): Row = row + } + + implicit val internalRowAccessor: RowAccessor[InternalRow] = new RowAccessor[InternalRow] { + override def getStruct(row: InternalRow, structColIdx: Int, numFields: Int): InternalRow = row.getStruct(structColIdx, numFields) + override def getString(struct: InternalRow, fieldIdx: Int): String = { + val utf8String = struct.getUTF8String(fieldIdx) + if (utf8String == null) null else utf8String.toString + } + override def getLong(struct: InternalRow, fieldIdx: Int): Long = struct.getLong(fieldIdx) + override def getBytes(row: InternalRow, fieldIdx: Int): Array[Byte] = row.getBinary(fieldIdx) + override def isNullAt(row: InternalRow, fieldIdx: Int): Boolean = row.isNullAt(fieldIdx) + override def copy(row: InternalRow): InternalRow = row.copy() + } +} + +/** + * Type class instances for Row builders. + */ +private[blob] object RowBuilder { + implicit val rowBuilder: RowBuilder[Row] = new RowBuilder[Row] { + override def buildRow(originalRow: Row, data: Array[Byte], outputSchema: StructType): Row = { + val outputValues = new Array[Any](originalRow.length + 1) + var i = 0 + while (i < originalRow.length) { + outputValues(i) = originalRow.get(i) + i += 1 + } + outputValues(originalRow.length) = data + new GenericRowWithSchema(outputValues, outputSchema) + } + } + + implicit val internalRowBuilder: RowBuilder[InternalRow] = new RowBuilder[InternalRow] { + override def buildRow(originalRow: InternalRow, data: Array[Byte], outputSchema: StructType): InternalRow = { + val outputRow = new SpecificInternalRow(outputSchema.fields.map(_.dataType)) + var i = 0 + while (i < originalRow.numFields) { + if (originalRow.isNullAt(i)) { + outputRow.setNullAt(i) + } else { + val dataType = outputSchema.fields(i).dataType + // Copy field using generic get/update for compatibility + outputRow.update(i, InternalRow.copyValue(originalRow.get(i, dataType))) + } + i += 1 + } + // Set the data field (last position) + outputRow.update(originalRow.numFields, data) + outputRow + } + } +} + +/** + * Information about a single row to be read. + * + * @param originalRow Original input row + * @param filePath Path to the file + * @param offset Byte offset in file + * @param length Number of bytes to read + * @param index Original position in input (for ordering) + * @tparam R Row type (Row or InternalRow) + */ +private case class RowInfo[R]( + originalRow: R, + filePath: String, + offset: Long, + length: Long, + index: Long, + inlineBytes: Option[Array[Byte]] = None) + +/** + * A merged range combining multiple consecutive reads. + * + * @param filePath Path to the file + * @param startOffset Start byte offset of merged range + * @param endOffset End byte offset of merged range (exclusive) + * @param rows Individual rows included in this range + * @tparam R Row type (Row or InternalRow) + */ +private case class MergedRange[R]( + filePath: String, + startOffset: Long, + endOffset: Long, + rows: Seq[RowInfo[R]]) { +} + +/** + * Result row with its original index for ordering. + * + * @param row Output row with data + * @param index Original position in input + * @tparam R Row type (Row or InternalRow) + */ +private case class RowResult[R]( + row: R, + index: Long) + +/** + * Companion object providing the main API for batched byte range reading. + */ +object BatchedBlobReader { + + val MAX_GAP_BYTES_CONF = "hoodie.blob.batching.max.gap.bytes" + val LOOKAHEAD_SIZE_CONF = "hoodie.blob.batching.lookahead.size" + + /** Default maximum gap to consider for batching */ + val DEFAULT_MAX_GAP_BYTES = 4096 + + /** Default lookahead buffer size */ + val DEFAULT_LOOKAHEAD_SIZE = 50 + + val DATA_COL = "__temp__data" + + /** + * Read byte ranges from a DataFrame with a Blob column. + * + * The struct column must contain the HoodieSchema.Blob structure: + * - storage_type (String): "out_of_line" or "inline" + * - bytes (Binary, nullable): inline blob data or null + * - reference (Struct, nullable): out-of-line reference with external_path, offset, length, and managed fields + * Returns a DataFrame with all original columns plus a "data" column containing byte arrays. + * + * For best performance, sort the input DataFrame by the struct fields (file_path, offset) + * before calling this method. + * + * @param df Input DataFrame with struct column + * @param storageConf Storage configuration for file access + * @param maxGapBytes Max gap to consider consecutive (default: 4096) + * @param lookaheadSize Rows to buffer for batching (default: 50) + * @param columnName Optional column name to resolve. If not provided, searches for column with hudi_blob=true metadata + * @param keepTempColumn If true, keeps __temp__data column; if false (default), renames it to original column name + * @return DataFrame with struct column + data column + * @throws IllegalArgumentException if struct column is missing or has wrong schema + */ + def readBatched( + df: Dataset[Row], + storageConf: StorageConfiguration[_], + maxGapBytes: Int = DEFAULT_MAX_GAP_BYTES, + lookaheadSize: Int = DEFAULT_LOOKAHEAD_SIZE, + columnName: Option[String] = None, + keepTempColumn: Boolean = false): Dataset[Row] = { + + require(maxGapBytes >= 0, "maxGapBytes must be non-negative") + require(lookaheadSize > 0, "lookaheadSize must be positive") + + val spark = df.sparkSession + + // Get struct column index - use provided column name or fallback to metadata search + val (structColIdx, structColName) = columnName match { + case Some(name) => + // Use provided column name directly + val idx = df.schema.fieldIndex(name) + (idx, name) + case None => + // Fallback to metadata-based inference + getBlobColumn(df.schema) + } + + val structField = df.schema(structColIdx) + require(isCompatibleBlobType(structField.dataType), + s"Blob column '$structColName' must be compatible with BlobType (type, data, reference struct), found: ${structField.dataType}") + + // Create output schema (input + data column) + val outputSchema = df.schema.add(StructField(DATA_COL, BinaryType, nullable = true)) + + // Broadcast storage configuration + val broadcastConf = spark.sparkContext.broadcast(storageConf) + + // Apply mapPartitions + val result = df.mapPartitions { partition => + // Create storage and reader for this partition + val storage = HoodieStorageUtils.getStorage(broadcastConf.value) + val reader = new BatchedBlobReader(storage, maxGapBytes, lookaheadSize) + + // Import implicit instances for Row + import RowAccessor.rowAccessor + import RowBuilder.rowBuilder + + // Process partition + val iter = reader.processPartition[Row](partition, structColIdx, outputSchema) + TaskContext.get().addTaskCompletionListener[Unit](_ => storage.close()) + iter + } (sparkAdapter.getCatalystExpressionUtils.getEncoder(outputSchema)) + + if (keepTempColumn) { + // Keep both columns for ReadBlobRule + result + } else { + // Backwards compatible behavior: rename __temp__data to original column name + result.drop(structColName).withColumnRenamed(DATA_COL, structColName) + } + } + + /** + * Process RDD[InternalRow] directly without DataFrame conversion. + * + * This method provides optimized processing for physical plan execution, + * avoiding the overhead of RDD → DataFrame → RDD conversions. + * + * @param rdd Input RDD of InternalRows + * @param schema Schema of the input RDD + * @param storageConf Storage configuration for file access + * @param maxGapBytes Max gap to consider consecutive (default: 4096) + * @param lookaheadSize Rows to buffer for batching (default: 50) + * @param columnName Optional column name to resolve. If not provided, searches for column with hudi_blob=true metadata + * @return RDD with struct column + data column + * @throws IllegalArgumentException if struct column is missing or has wrong schema + */ + def processRDD( + rdd: RDD[InternalRow], + schema: StructType, + broadcastConf: Broadcast[StorageConfiguration[_]], + maxGapBytes: Int = DEFAULT_MAX_GAP_BYTES, + lookaheadSize: Int = DEFAULT_LOOKAHEAD_SIZE, + columnName: String): RDD[InternalRow] = { + + require(maxGapBytes >= 0, "maxGapBytes must be non-negative") + require(lookaheadSize > 0, "lookaheadSize must be positive") + + // Get struct column index + val structColIdx = schema.fieldIndex(columnName) + val structField = schema(structColIdx) + require(isCompatibleBlobType(structField.dataType), + s"Blob column '$columnName' must be compatible with BlobType (type, data, reference struct), found: ${structField.dataType}") + + // Create output schema (input + __temp__data column) + val outputSchema = schema.add(StructField(DATA_COL, BinaryType, nullable = true)) + + // Process partitions using InternalRow type classes + rdd.mapPartitions { partition => + val storage = HoodieStorageUtils.getStorage(broadcastConf.value) + + val reader = new BatchedBlobReader(storage, maxGapBytes, lookaheadSize) + + // Import implicit instances for InternalRow + import RowAccessor.internalRowAccessor + import RowBuilder.internalRowBuilder + + val iter = reader.processPartition[InternalRow](partition, structColIdx, outputSchema) + TaskContext.get().addTaskCompletionListener[Unit](_ => storage.close()) + iter + } + } + + /** + * Find the blob column by searching for metadata hudi_blob=true. + * + * This is a fallback method used when no explicit column name is provided. + * Searches through all fields in the schema to find the first field + * that has metadata with key "hudi_blob" set to boolean true. + * + * @param structType The schema to search + * @return The index of the blob column and the name of the column + * @throws IllegalArgumentException if no blob column is found + */ + private def getBlobColumn(structType: StructType): (Int, String) = { + // Find field with metadata key hudi_blob=true + val blobFieldIndex = structType.fields.zipWithIndex.find { case (field, _) => + field.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD) && + field.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD).equalsIgnoreCase(HoodieSchemaType.BLOB.name) + }.map(fieldAndIndex => (fieldAndIndex._2, fieldAndIndex._1.name)) + + blobFieldIndex.getOrElse { + throw new IllegalArgumentException( + s"No blob column found with metadata hudi_blob=true. " + + s"Available columns: ${structType.fieldNames.mkString(", ")}" + ) + } + } + + // Validate that the struct column is compatible with BlobType + private def isCompatibleBlobType(dt: DataType): Boolean = dt match { + case struct: StructType => DataType.equalsIgnoreCaseAndNullability(struct, BlobType.dataType) + case _ => false + } +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReaderStrategy.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReaderStrategy.scala new file mode 100644 index 0000000000000..07bb522210bd3 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/BatchedBlobReaderStrategy.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.hudi.HoodieSparkConfUtils +import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy} + +/** + * Spark strategy that converts [[BatchedBlobRead]] logical nodes to [[BatchedBlobReadExec]] physical nodes. + * + * Reads configuration for batching parameters and creates physical plan with appropriate settings. + * + * @param sparkSession SparkSession for accessing configuration + */ +case class BatchedBlobReaderStrategy(sparkSession: SparkSession) extends SparkStrategy { + def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { + case read @ BatchedBlobRead(child, _, _) => + // TODO find proper way to access these configs + val maxGapBytes = HoodieSparkConfUtils.getConfigValue( + Map.empty, sparkSession.sessionState.conf, + BatchedBlobReader.MAX_GAP_BYTES_CONF, + String.valueOf(BatchedBlobReader.DEFAULT_MAX_GAP_BYTES)).toInt + + val lookaheadSize = HoodieSparkConfUtils.getConfigValue( + Map.empty, sparkSession.sessionState.conf, + BatchedBlobReader.LOOKAHEAD_SIZE_CONF, + String.valueOf(BatchedBlobReader.DEFAULT_LOOKAHEAD_SIZE)).toInt + + val storageConf = new HadoopStorageConfiguration(sparkSession.sparkContext.hadoopConfiguration) + BatchedBlobReadExec( + planLater(child), + maxGapBytes, + storageConf, + lookaheadSize, + read.blobAttr.name, + read.output + ) :: Nil + + case _ => Nil + } +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ReadBlobExpression.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ReadBlobExpression.scala new file mode 100644 index 0000000000000..ffd00ff51313c --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ReadBlobExpression.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression, Unevaluable} +import org.apache.spark.sql.types.{BinaryType, DataType} + +/** + * Marker expression for lazy blob data reading. + * + * This expression is detected by [[ReadBlobRule]] and transformed to use + * batched I/O for efficient blob reading during physical execution. + * + * Example: `SELECT id, read_blob(image_data) FROM table` + * + * @param child Expression representing the blob column (matching the definition in {@link org.apache.hudi.common.schema.HoodieSchema.Blob}) + */ +case class ReadBlobExpression(child: Expression) + extends UnaryExpression + with Unevaluable { + + override def dataType: DataType = BinaryType + + override def nullable: Boolean = true + + override protected def withNewChildInternal(newChild: Expression): ReadBlobExpression = { + copy(child = newChild) + } + + override def toString: String = s"read_blob($child)" +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ReadBlobRule.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ReadBlobRule.scala new file mode 100644 index 0000000000000..b91d08674cb95 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ReadBlobRule.scala @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, ExprId, NamedExpression} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types.{DataType, StructType} + +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +/** + * Transforms queries with `read_blob()` to use lazy batched I/O. + * + * Replaces [[ReadBlobExpression]] markers with [[BatchedBlobRead]] nodes + * that read blob data during physical execution. + * + * Example: `SELECT id, read_blob(image_data) FROM table` + * + * @param spark SparkSession for accessing configuration + */ +case class ReadBlobRule(spark: SparkSession) extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp { + case Project(projectList, Filter(condition, child)) + if containsReadBlobExpression(projectList) + && containsReadBlobInExpression(condition) + && !child.isInstanceOf[BatchedBlobRead] => + val projectBlobCols = extractAllBlobColumns(projectList) + val filterBlobCols = extractBlobColumnsFromExpression(condition) + val blobColumns = (projectBlobCols ++ filterBlobCols) + .foldLeft((mutable.LinkedHashSet.empty[ExprId], ArrayBuffer.empty[AttributeReference])) { + case ((seen, acc), a) if seen.add(a.exprId) => (seen, acc += a) + case ((seen, acc), _) => (seen, acc) + }._2.toSeq + val (wrappedPlan, blobToDataAttr) = wrapWithBlobReads(blobColumns, child) + val newCondition = replaceReadBlobExpression(condition, blobToDataAttr) + val newProjectList = transformNamedExpressions(projectList, blobToDataAttr) + Project(newProjectList, Filter(newCondition, wrappedPlan)) + + case Filter(condition, child) + if containsReadBlobInExpression(condition) + && !child.isInstanceOf[BatchedBlobRead] => + + val blobColumns = extractBlobColumnsFromExpression(condition) + val (wrappedPlan, blobToDataAttr) = wrapWithBlobReads(blobColumns, child) + val newCondition = replaceReadBlobExpression(condition, blobToDataAttr) + Project(child.output, Filter(newCondition, wrappedPlan)) + + case Project(projectList, child) + if containsReadBlobExpression(projectList) + && !child.isInstanceOf[BatchedBlobRead] => + + val blobColumns = extractAllBlobColumns(projectList) + val (wrappedPlan, blobToDataAttr) = wrapWithBlobReads(blobColumns, child) + val newProjectList = transformNamedExpressions(projectList, blobToDataAttr) + Project(newProjectList, wrappedPlan) + + case node if containsReadBlobInAnyExpression(node) => + throw new IllegalArgumentException( + s"read_blob() may only appear in SELECT or WHERE clauses. Found in unsupported logical plan node: ${node.nodeName}. " + + s"Move read_blob() to a SELECT or WHERE clause. Full plan: ${node.simpleStringWithNodeId()}") + } + + private def containsReadBlobInAnyExpression(plan: LogicalPlan): Boolean = { + plan.expressions.exists(containsReadBlobInExpression) + } + + private def wrapWithBlobReads( + blobColumns: Seq[AttributeReference], + child: LogicalPlan): (LogicalPlan, Map[ExprId, Attribute]) = { + if (blobColumns.isEmpty) { + throw new IllegalStateException("read_blob() found but no valid blob column reference extracted.") + } + blobColumns.foldLeft((child: LogicalPlan, Map.empty[ExprId, Attribute])) { + case ((currentPlan, mapping), blobAttr) => + // Type compatibility check (early fail for non-struct columns) + blobAttr.dataType match { + case struct: StructType if DataType.equalsIgnoreCaseAndNullability(struct, org.apache.spark.sql.types.BlobType.dataType) => + // Valid blob column + case _ => + throw new IllegalArgumentException( + s"Blob column '${blobAttr.name}' must be compatible with BlobType (type, data, reference struct), found: ${blobAttr.dataType}") + } + val blobRead = BatchedBlobRead(currentPlan, blobAttr) + (blobRead, mapping + (blobAttr.exprId -> blobRead.dataAttr)) + } + } + + private def extractBlobColumnsFromExpression(expr: Expression): Seq[AttributeReference] = { + val seen = mutable.LinkedHashSet.empty[ExprId] + val result = ArrayBuffer.empty[AttributeReference] + collectBlobColumns(expr, seen, result) + result.toSeq + } + + /** + * Check if any expression in the project list contains a ReadBlobExpression. + */ + private def containsReadBlobExpression(projectList: Seq[Expression]): Boolean = { + projectList.exists(expr => containsReadBlobInExpression(expr)) + } + + private def containsReadBlobInExpression(expr: Expression): Boolean = { + expr match { + case _: ReadBlobExpression => true + case other => other.children.exists(containsReadBlobInExpression) + } + } + + private def extractAllBlobColumns(expressions: Seq[Expression]): Seq[AttributeReference] = { + val seen = mutable.LinkedHashSet.empty[ExprId] + val result = ArrayBuffer.empty[AttributeReference] + expressions.foreach(collectBlobColumns(_, seen, result)) + result.toSeq + } + + private def collectBlobColumns( + expr: Expression, + seen: mutable.Set[ExprId], + result: ArrayBuffer[AttributeReference]): Unit = expr match { + case ReadBlobExpression(attr: AttributeReference) => + if (seen.add(attr.exprId)) result += attr + case other => + other.children.foreach(collectBlobColumns(_, seen, result)) + } + + private def transformNamedExpressions( + expressions: Seq[NamedExpression], + blobToDataAttr: Map[ExprId, Attribute]): Seq[NamedExpression] = { + expressions.map { + case alias @ Alias(childExpr, name) => + val rewritten = replaceReadBlobExpression(childExpr, blobToDataAttr) + Alias(rewritten, name)(alias.exprId, alias.qualifier, alias.explicitMetadata) + case attr: AttributeReference => attr + case other => + replaceReadBlobExpression(other, blobToDataAttr).asInstanceOf[NamedExpression] + } + } + + private def replaceReadBlobExpression( + expr: Expression, + blobToDataAttr: Map[ExprId, Attribute]): Expression = expr match { + case ReadBlobExpression(attr: AttributeReference) => + blobToDataAttr.getOrElse(attr.exprId, throw new IllegalArgumentException( + s"read_blob() called on column '${attr.name}' (exprId=${attr.exprId}) which was not registered for blob reading. " + + s"Available blob columns: ${blobToDataAttr.keys.mkString(", ")}")) + case ReadBlobExpression(_) => + throw new IllegalStateException("read_blob() must be called on a direct column reference") + case other => + other.mapChildren(replaceReadBlobExpression(_, blobToDataAttr)) + } +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ScalarFunctions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ScalarFunctions.scala new file mode 100644 index 0000000000000..bf94b4519c33b --- /dev/null +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/blob/ScalarFunctions.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.blob + +import org.apache.spark.sql.catalyst.FunctionIdentifier +import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo} + +/** + * Registry of scalar functions for Hudi SQL integration. + * + * These functions are registered as built-in functions that can be used + * in SQL queries. They integrate with Spark's function registry and are + * available in both SQL and DataFrame API contexts. + * + *

Function Registration:

+ * Functions are registered via [[SparkAdapter.injectScalarFunctions]] which is + * called during [[HoodieSparkSessionExtension]] initialization. + * + *

Adding New Functions:

+ * To add a new scalar function: + *
    + *
  1. Create a marker expression class (extends Unevaluable)
  2. + *
  3. Add function definition tuple to [[funcs]] below
  4. + *
  5. Create a logical plan rule to handle the expression
  6. + *
  7. Register the rule in [[HoodieAnalysis.customPostHocResolutionRules]]
  8. + *
+ */ +object ScalarFunctions { + + private val READ_BLOB_FUNC_NAME = "read_blob" + + /** + * Function definitions as tuples of: + *
    + *
  • FunctionIdentifier - function name
  • + *
  • ExpressionInfo - metadata for DESCRIBE FUNCTION
  • + *
  • Builder function - (Seq[Expression] => Expression)
  • + *
+ */ + val funcs: Seq[(FunctionIdentifier, ExpressionInfo, Seq[Expression] => Expression)] = Seq( + ( + FunctionIdentifier(READ_BLOB_FUNC_NAME), + new ExpressionInfo( + classOf[ReadBlobExpression].getCanonicalName, + READ_BLOB_FUNC_NAME, + """ + |Usage: read_blob(blob_column) - Reads blob data from storage + | + |Reads byte ranges from files referenced in a blob-compatible column. + |The column must be structurally compatible with HoodieSchema.Blob. + | + |This function uses batched I/O operations for optimal performance. + |For best results, ensure data is sorted by (reference.file, reference.position). + | + |Example: + | SELECT id, name, read_blob(file_ref) as data FROM table + | + |Arguments: + | blob_column - Struct column with HoodieSchema.Blob structure: + | - type (string): "out_of_line" or "inline" + | - data (binary, nullable): inline blob data or null + | - reference (struct, nullable): {file, offset, length, managed} + | + |Returns: + | Binary data read from the file + | + |Performance: + | - Configure batching: hoodie.blob.batching.max.gap.bytes (default 4096) + | - Configure lookahead: hoodie.blob.batching.lookahead.size (default 50) + """.stripMargin + ), + (args: Seq[Expression]) => { + require(args.length == 1, s"read_blob expects exactly 1 argument, got ${args.length}") + ReadBlobExpression(args.head) + } + ) + ) +} diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala index 59b2b444211bc..08906bab531fa 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableCommand.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hudi.command import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport} import org.apache.hudi.common.model.HoodieTableType +import org.apache.hudi.common.schema.{HoodieSchema, HoodieSchemaType} import org.apache.hudi.common.table.HoodieTableConfig import org.apache.hudi.common.util.ConfigUtils import org.apache.hudi.exception.{HoodieException, HoodieValidationException} @@ -40,7 +41,7 @@ import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{isUsingHiveCatalog, isUsi import org.apache.spark.sql.hudi.command.CreateHoodieTableCommand.validateTableSchema import org.apache.spark.sql.hudi.command.exception.HoodieAnalysisException import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{ArrayType, BinaryType, DataType, MapType, Metadata, StructField, StructType} import scala.collection.JavaConverters._ import scala.collection.mutable @@ -93,7 +94,7 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean } } -object CreateHoodieTableCommand { +object CreateHoodieTableCommand extends SparkAdapterSupport { def validateTableSchema(userDefinedSchema: StructType, hoodieTableSchema: StructType): Boolean = { if (userDefinedSchema.fields.length != 0 && @@ -222,16 +223,78 @@ object CreateHoodieTableCommand { if (!dbExists) { throw new NoSuchDatabaseException(dbName) } - // append some table properties need for spark data source table. + // Store original schema (with VariantType) in properties so Spark can reconstruct it when reading; + // schema passed to Hive is converted to Hive-compatible types so Hive 2.x/3.x does not reject VARIANT. val dataSourceProps = tableMetaToTableProps(sparkSession.sparkContext.conf, table, table.schema) - - val tableWithDataSourceProps = table.copy(properties = dataSourceProps ++ table.properties) + val tableWithDataSourceProps = buildHiveCompatibleCatalogTable(table, dataSourceProps) val client = HiveClientUtils.getSingletonClientForMetadata(sparkSession) // create hive table. client.createTable(tableWithDataSourceProps, ignoreIfExists = true) } + /** + * Returns a copy of `table` with a Hive-compatible schema and data-source properties merged in. + * The schema passed to the Hive metastore has VariantType (and other Hive-incompatible types) + * replaced by their physical representations, while `dataSourceProps` carries the original + * schema JSON so Spark can restore the logical types on read. + */ + private[hudi] def buildHiveCompatibleCatalogTable( + table: CatalogTable, + dataSourceProps: Map[String, String]): CatalogTable = { + table.copy( + schema = toHiveCompatibleSchema(table.schema), + properties = dataSourceProps ++ table.properties) + } + + /** + * Converts Spark DataTypes that Hive doesn't support to their physical representations. + * Currently handles: + * - VariantType (Spark 4.0+) -> `struct` + * - VECTOR (Hudi custom logical type, exposed in Spark as ArrayType with `hudi_type` + * metadata) -> `BinaryType`, matching its on-disk fixed_len_byte_array layout + * (RFC-99). Field metadata is preserved so the full logical schema is still + * serialized into `spark.sql.sources.schema.*` TBLPROPERTIES by the caller. + * + * Recurses into nested StructType, ArrayType, and MapType so variants embedded in + * complex types (e.g. `STRUCT`, `ARRAY`, `MAP`) + * are also converted. + */ + private[hudi] def toHiveCompatibleSchema(schema: StructType): StructType = { + toHiveCompatibleType(schema).asInstanceOf[StructType] + } + + private def toHiveCompatibleType(dataType: DataType, + metadata: Metadata = Metadata.empty): DataType = dataType match { + case dt if sparkAdapter.isVariantType(dt) => + // Canonical field order (metadata, value) matches the Parquet spec and Iceberg convention, + // mirroring HoodieSchema.createVariant(). + StructType(Seq( + StructField(HoodieSchema.Variant.VARIANT_METADATA_FIELD, BinaryType, nullable = false), + StructField(HoodieSchema.Variant.VARIANT_VALUE_FIELD, BinaryType, nullable = false) + )) + case _: ArrayType if isVectorType(metadata) => + // VECTOR is exposed in Spark as ArrayType(FloatType) with hudi_type metadata. + // HMS must store the column as BINARY matching the on-disk fixed_len_byte_array (RFC-99). + BinaryType + case st: StructType => + StructType(st.fields.map(f => f.copy(dataType = toHiveCompatibleType(f.dataType, f.metadata)))) + case at: ArrayType => + at.copy(elementType = toHiveCompatibleType(at.elementType)) + case mt: MapType => + mt.copy( + keyType = toHiveCompatibleType(mt.keyType), + valueType = toHiveCompatibleType(mt.valueType)) + case other => other + } + + /** Mirrors the detection in HoodieSparkSchemaConverters.toHoodieTypeNested. */ + private def isVectorType(metadata: Metadata): Boolean = { + metadata.contains(HoodieSchema.TYPE_METADATA_FIELD) && + HoodieSchema.parseTypeDescriptor( + metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)).getType == HoodieSchemaType.VECTOR + } + // This code is forked from org.apache.spark.sql.hive.HiveExternalCatalog#tableMetaToTableProps private def tableMetaToTableProps(sparkConf: SparkConf, table: CatalogTable, schema: StructType): Map[String, String] = { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala index 2cbf479bdeadd..f01ad896c6575 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSparkSessionExtension.scala @@ -53,5 +53,7 @@ class HoodieSparkSessionExtension extends (SparkSessionExtensions => Unit) */ sparkAdapter.injectTableFunctions(extensions) + sparkAdapter.injectScalarFunctions(extensions) + sparkAdapter.injectPlannerStrategies(extensions) } } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala index e7576359c2a49..0d6d4efe304cd 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala @@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.{CreateTable, LogicalRelation} import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{isMetaField, removeMetaFields} import org.apache.spark.sql.hudi.analysis.HoodieAnalysis.{sparkAdapter, MatchCreateIndex, MatchCreateTableLike, MatchDropIndex, MatchInsertIntoStatement, MatchMergeIntoTable, MatchRefreshIndex, MatchShowIndexes, ResolvesToHudiTable} +import org.apache.spark.sql.hudi.blob.ReadBlobRule import org.apache.spark.sql.hudi.command._ import org.apache.spark.sql.hudi.command.HoodieLeafRunnableCommand.stripMetaFieldAttributes import org.apache.spark.sql.hudi.command.InsertIntoHoodieTableCommand.alignQueryOutput @@ -169,6 +170,8 @@ object HoodieAnalysis extends SparkAdapterSupport { } rules += (spark => instantiateKlass(pruneFileSourcePartitionsClass, spark)) + rules += (session => ReadBlobRule(session)) + rules.toSeq } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/IndexCommands.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/IndexCommands.scala index 5510a188d5617..aedb40d51585c 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/IndexCommands.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/IndexCommands.scala @@ -152,7 +152,7 @@ case class DropIndexCommand(table: CatalogTable, /** * Command to show available indexes in hudi. The corresponding logical plan is available at - * org.apache.spark.sql.catalyst.plans.logical.ShowIndexes + * org.apache.spark.sql.catalyst.plans.logical.HoodieShowIndexes */ case class ShowIndexesCommand(table: CatalogTable, override val output: Seq[Attribute]) extends IndexBaseCommand { diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieParquetConfigInjector.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieParquetConfigInjector.java new file mode 100644 index 0000000000000..6ef80ae89b5ae --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieParquetConfigInjector.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.io.storage; + +import org.apache.hudi.client.SparkTaskContextSupplier; +import org.apache.hudi.common.config.HoodieStorageConfig; +import org.apache.hudi.common.model.HoodieKey; +import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; +import org.apache.hudi.common.schema.HoodieSchema; +import org.apache.hudi.common.testutils.DisableDictionaryInjector; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.io.HoodieParquetConfigInjector; +import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.table.HoodieSparkTable; +import org.apache.hudi.table.HoodieTable; +import org.apache.hudi.testutils.HoodieClientTestBase; +import org.apache.hudi.testutils.SparkDatasetTestUtils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link HoodieParquetConfigInjector} functionality in {@link HoodieSparkFileWriterFactory}. + */ +public class TestHoodieParquetConfigInjector extends HoodieClientTestBase { + + /** + * Get HoodieSchema that matches SparkDatasetTestUtils.STRUCT_TYPE. + * This schema includes metadata fields and matches the structure of rows generated by getRandomRows(). + */ + private HoodieSchema getStructTypeCompatibleSchema() { + // Create Avro schema that matches SparkDatasetTestUtils.STRUCT_TYPE + // STRUCT_TYPE has: commit_time, commit_seqno, record_key, partition_path, filename, + // RECORD_KEY_FIELD_NAME, PARTITION_PATH_FIELD_NAME, randomInt, randomLong + String avroSchema = "{" + + "\"type\":\"record\"," + + "\"name\":\"test_schema\"," + + "\"namespace\":\"test.namespace\"," + + "\"fields\":[" + + "{\"name\":\"_hoodie_commit_time\",\"type\":\"string\"}," + + "{\"name\":\"_hoodie_commit_seqno\",\"type\":\"string\"}," + + "{\"name\":\"_hoodie_record_key\",\"type\":\"string\"}," + + "{\"name\":\"_hoodie_partition_path\",\"type\":\"string\"}," + + "{\"name\":\"_hoodie_file_name\",\"type\":\"string\"}," + + "{\"name\":\"key\",\"type\":\"string\"}," + + "{\"name\":\"partition\",\"type\":\"string\"}," + + "{\"name\":\"randomInt\",\"type\":\"int\"}," + + "{\"name\":\"randomLong\",\"type\":\"long\"}" + + "]}"; + return HoodieSchema.parse(avroSchema); + } + + /** + * Helper method to convert InternalRow (potentially UnsafeRow) to mutable GenericInternalRow. + * This is needed because writeRowWithMetadata() calls row.update() which doesn't work on UnsafeRow. + */ + private List toMutableRows(List internalRows) { + List mutableRows = new ArrayList<>(); + for (InternalRow row : internalRows) { + Object[] values = new Object[row.numFields()]; + for (int i = 0; i < row.numFields(); i++) { + if (row.isNullAt(i)) { + values[i] = null; + } else { + values[i] = row.get(i, SparkDatasetTestUtils.STRUCT_TYPE.fields()[i].dataType()); + } + } + mutableRows.add(new GenericInternalRow(values)); + } + return mutableRows; + } + + @Test + public void testDisableDictionaryEncodingViaInjector() throws Exception { + final String instantTime = "100"; + final StoragePath parquetPath = new StoragePath( + basePath + "/partition/path/test_dictionary_" + instantTime + ".parquet"); + + HoodieSchema schema = getStructTypeCompatibleSchema(); + + // Create config with the custom injector + final HoodieWriteConfig cfg = getConfigBuilder() + .withStorageConfig(HoodieStorageConfig.newBuilder() + .withParquetConfigInjectorClass(DisableDictionaryInjector.class.getName()) + .parquetDictionaryEnabled(true) // Start with dictionary enabled + .build()) + .build(); + + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + SparkTaskContextSupplier supplier = new SparkTaskContextSupplier(); + + // Create writer and write some data + HoodieFileWriter writer = HoodieFileWriterFactory.getFileWriter( + instantTime, parquetPath, table.getStorage(), cfg.getStorageConfig(), + schema, supplier, HoodieRecordType.SPARK); + + assertTrue(writer instanceof HoodieSparkParquetWriter); + + // Generate test data using SparkDatasetTestUtils and convert to InternalRows + Dataset rowDataset = SparkDatasetTestUtils.getRandomRows(sqlContext, 100, "partition/path", false); + List internalRows = toMutableRows(SparkDatasetTestUtils.toInternalRows(rowDataset, SparkDatasetTestUtils.ENCODER)); + List rows = rowDataset.collectAsList(); + + // Write some test records + HoodieSparkParquetWriter sparkWriter = (HoodieSparkParquetWriter) writer; + for (int i = 0; i < internalRows.size(); i++) { + InternalRow row = internalRows.get(i); + String recordKey = rows.get(i).getString(2); // record_key is at index 2 (after commit_time and seq_no) + String partition = rows.get(i).getString(3); // partition is at index 3 + sparkWriter.writeRowWithMetadata(new HoodieKey(recordKey, partition), row); + } + writer.close(); + + // Verify the parquet file was created + assertTrue(table.getStorage().exists(parquetPath)); + + // Read parquet metadata and verify dictionary encoding is disabled + Configuration hadoopConf = new Configuration(); + Path hadoopPath = new Path(parquetPath.toUri()); + ParquetFileReader reader = ParquetFileReader.open(hadoopConf, hadoopPath); + ParquetMetadata metadata = reader.getFooter(); + reader.close(); + + assertNotNull(metadata); + + // Verify that dictionary encoding is NOT used for any column + // When dictionary encoding is disabled, columns should use PLAIN or other encodings but not RLE_DICTIONARY + for (BlockMetaData block : metadata.getBlocks()) { + for (ColumnChunkMetaData column : block.getColumns()) { + // Check all encodings used for this column - should not include RLE_DICTIONARY or PLAIN_DICTIONARY + for (Encoding encoding : column.getEncodings()) { + assertFalse(encoding == Encoding.RLE_DICTIONARY || encoding == Encoding.PLAIN_DICTIONARY, + "Column " + column.getPath() + " should not use dictionary encoding, but found: " + encoding); + } + } + } + } + + @Test + public void testInvalidInjectorClassThrowsException() throws IOException { + final String instantTime = "102"; + final StoragePath parquetPath = new StoragePath( + basePath + "/partition/path/test_invalid_" + instantTime + ".parquet"); + + HoodieSchema schema = getStructTypeCompatibleSchema(); + + // Create config with an invalid/non-existent injector class + final HoodieWriteConfig cfg = getConfigBuilder() + .withStorageConfig(HoodieStorageConfig.newBuilder() + .withParquetConfigInjectorClass("org.apache.hudi.NonExistentInjector") + .build()) + .build(); + + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + SparkTaskContextSupplier supplier = new SparkTaskContextSupplier(); + + // Should throw an exception when trying to create the writer + assertThrows(Exception.class, () -> { + HoodieFileWriterFactory.getFileWriter( + instantTime, parquetPath, table.getStorage(), cfg.getStorageConfig(), + schema, supplier, HoodieRecordType.SPARK); + }); + } + + @Test + public void testNoInjectorUsesDefaultConfig() throws Exception { + final String instantTime = "103"; + final StoragePath parquetPath = new StoragePath( + basePath + "/partition/path/test_no_injector_" + instantTime + ".parquet"); + + HoodieSchema schema = getStructTypeCompatibleSchema(); + + // Create config WITHOUT injector - should use default settings + final HoodieWriteConfig cfg = getConfigBuilder() + .withStorageConfig(HoodieStorageConfig.newBuilder() + .parquetDictionaryEnabled(true) + .build()) + .build(); + + HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient); + SparkTaskContextSupplier supplier = new SparkTaskContextSupplier(); + + // Create writer and write some data + HoodieFileWriter writer = HoodieFileWriterFactory.getFileWriter( + instantTime, parquetPath, table.getStorage(), cfg.getStorageConfig(), + schema, supplier, HoodieRecordType.SPARK); + + assertTrue(writer instanceof HoodieSparkParquetWriter); + + // Generate test data using SparkDatasetTestUtils and convert to InternalRows + Dataset rowDataset = SparkDatasetTestUtils.getRandomRows(sqlContext, 10, "partition/path", false); + List internalRows = toMutableRows(SparkDatasetTestUtils.toInternalRows(rowDataset, SparkDatasetTestUtils.ENCODER)); + List rows = rowDataset.collectAsList(); + + // Write some test records + HoodieSparkParquetWriter sparkWriter = (HoodieSparkParquetWriter) writer; + for (int i = 0; i < internalRows.size(); i++) { + InternalRow row = internalRows.get(i); + String recordKey = rows.get(i).getString(2); // record_key is at index 2 (after commit_time and seq_no) + String partition = rows.get(i).getString(3); // partition is at index 3 + sparkWriter.writeRowWithMetadata(new HoodieKey(recordKey, partition), row); + } + writer.close(); + + // Verify the parquet file was created + assertTrue(table.getStorage().exists(parquetPath)); + } +} diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieSparkLanceReader.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieSparkLanceReader.java index b2593fc908162..4aef3d8a16b0e 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieSparkLanceReader.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/TestHoodieSparkLanceReader.java @@ -27,6 +27,8 @@ import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.schema.HoodieSchema; +import org.apache.hudi.common.schema.HoodieSchemaField; +import org.apache.hudi.common.schema.HoodieSchemaType; import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.ClosableIterator; @@ -41,6 +43,9 @@ import org.apache.spark.sql.catalyst.util.GenericArrayData; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.Decimal; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.MetadataBuilder; +import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -648,6 +653,38 @@ public void testReadWithRequestedSchema() throws Exception { } } + @Test + public void testGetSchemaRestoresVectorMetadata() throws Exception { + int dim = 4; + Metadata vectorFieldMetadata = new MetadataBuilder() + .putString(HoodieSchema.TYPE_METADATA_FIELD, "VECTOR(" + dim + ")") + .build(); + StructType schema = new StructType() + .add(new StructField("id", DataTypes.IntegerType, false, Metadata.empty())) + .add(new StructField( + "embedding", + DataTypes.createArrayType(DataTypes.FloatType, false), + false, + vectorFieldMetadata)); + + List rows = new ArrayList<>(); + rows.add(createRow(1, new Object[] {1.0f, 2.0f, 3.0f, 4.0f})); + rows.add(createRow(2, new Object[] {5.0f, 6.0f, 7.0f, 8.0f})); + + StoragePath path = new StoragePath(tempDir.getAbsolutePath() + "/test_vector_schema.lance"); + try (HoodieSparkLanceReader reader = writeAndCreateReader(path, schema, rows)) { + HoodieSchema readSchema = reader.getSchema(); + HoodieSchemaField embedding = readSchema.getField("embedding") + .orElseThrow(() -> new AssertionError("embedding field missing on read schema")); + HoodieSchema fieldSchema = embedding.schema().getNonNullType(); + assertEquals(HoodieSchemaType.VECTOR, fieldSchema.getType(), + "embedding must be restored as VECTOR from the FixedSizeList encoding alone"); + HoodieSchema.Vector vec = (HoodieSchema.Vector) fieldSchema; + assertEquals(dim, vec.getDimension()); + assertEquals(HoodieSchema.Vector.VectorElementType.FLOAT, vec.getVectorElementType()); + } + } + private void assertBloomFilter(HoodieSparkLanceReader reader, Class clazz, String minKey, String maxKey, int keyCount) { BloomFilter bloomFilter = reader.readBloomFilter(); assertInstanceOf(clazz, bloomFilter); diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/BlobTestHelpers.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/BlobTestHelpers.scala index 8783422c51b4e..b94229aa478e6 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/BlobTestHelpers.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/BlobTestHelpers.scala @@ -39,8 +39,8 @@ object BlobTestHelpers { def inlineBlobStructCol(name: String, bytesCol: Column): Column = { struct( lit(HoodieSchema.Blob.INLINE).as(HoodieSchema.Blob.TYPE), - bytesCol.as(HoodieSchema.Blob.INLINE_DATA_FIELD), - lit(null).cast("struct") + bytesCol.cast("binary").as(HoodieSchema.Blob.INLINE_DATA_FIELD), + lit(null).cast("struct") .as(HoodieSchema.Blob.EXTERNAL_REFERENCE) ).as(name, blobMetadata) } @@ -65,7 +65,7 @@ object BlobTestHelpers { struct( filePathCol.as(HoodieSchema.Blob.EXTERNAL_REFERENCE_PATH), offsetCol.as(HoodieSchema.Blob.EXTERNAL_REFERENCE_OFFSET), - lengthCol.as(HoodieSchema.Blob.EXTERNAL_REFERENCE_LENGTH), + lengthCol.cast("bigint").as(HoodieSchema.Blob.EXTERNAL_REFERENCE_LENGTH), lit(false).as(HoodieSchema.Blob.EXTERNAL_REFERENCE_IS_MANAGED) ).as(HoodieSchema.Blob.EXTERNAL_REFERENCE) ).as(name, blobMetadata) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBatchedBlobReader.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBatchedBlobReader.scala new file mode 100644 index 0000000000000..12f4496cc3777 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBatchedBlobReader.scala @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.blob + +import org.apache.hudi.blob.BlobTestHelpers._ +import org.apache.hudi.common.schema.HoodieSchema +import org.apache.hudi.testutils.HoodieClientTestBase + +import org.apache.spark.SparkException +import org.apache.spark.sql.Row +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.hudi.blob.BatchedBlobReader +import org.apache.spark.sql.types._ +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test + +import java.util.Collections + +/** + * Tests for BatchedByteRangeReader. + * + * These tests verify the batching behavior and effectiveness of the + * BatchedByteRangeReader compared to non-batched approaches. + */ +class TestBatchedBlobReader extends HoodieClientTestBase { + + @Test + def testBasicBatchedRead(): Unit = { + val filePath = createTestFile(tempDir, "basic.bin", 10000) + + // Create input with struct column + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L), + (filePath, 100L, 100L), + (filePath, 200L, 100L) + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("offset", "data") + + // Read with batching + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + + // Verify schema + assertTrue(resultDF.columns.contains("data")) + assertEquals(2, resultDF.columns.length) // offset, data + + // Verify results + val results = resultDF.orderBy("offset").collect() + assertEquals(3, results.length) + + // Check data content + results.zipWithIndex.foreach { case (row, i) => + val data = row.getAs[Array[Byte]]("data") + assertEquals(100, data.length) + + // Verify content matches expected pattern + assertBytesContent(data, expectedOffset = i * 100) + } + } + + @Test + def testNoBatchingDifferentFiles(): Unit = { + // Create different files + val file1 = createTestFile(tempDir, "file1.bin", 5000) + val file2 = createTestFile(tempDir, "file2.bin", 5000) + val file3 = createTestFile(tempDir, "file3.bin", 5000) + + // Reads from different files (no batching possible) + val inputDF = sparkSession.createDataFrame(Seq( + (file1, 0L, 100L), + (file2, 0L, 100L), + (file3, 0L, 100L) + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + + val results = resultDF.collect() + assertEquals(3, results.length) + + // Verify all reads succeeded + results.foreach { row => + val data = row.getAs[Array[Byte]]("data") + assertEquals(100, data.length) + } + } + + @Test + def testGapThresholdSmallGaps(): Unit = { + + val filePath = createTestFile(tempDir, "small-gaps.bin", 10000) + + // Reads with small gaps (should batch with default threshold of 4KB) + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L), + (filePath, 120L, 100L), // 20 byte gap + (filePath, 240L, 100L), // 20 byte gap + (filePath, 360L, 100L) // 20 byte gap + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data") + + // Use default maxGapBytes=4096 which should batch these + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf, maxGapBytes = 4096) + + val results = resultDF.collect() + assertEquals(4, results.length) + + results.foreach { row => + val data = row.getAs[Array[Byte]]("data") + assertEquals(100, data.length) + } + } + + @Test + def testGapThresholdLargeGaps(): Unit = { + + val filePath = createTestFile(tempDir, "large-gaps.bin", 50000) + + // Reads with large gaps (should NOT batch with small threshold) + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L), + (filePath, 10000L, 100L), // 9.9KB gap + (filePath, 20000L, 100L), // 9.9KB gap + (filePath, 30000L, 100L) // 9.9KB gap + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data") + + // Use small maxGapBytes that won't batch these + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf, maxGapBytes = 1000) + + val results = resultDF.collect() + assertEquals(4, results.length) + + // Verify data correctness + results.zipWithIndex.foreach { case (row, i) => + val data = row.getAs[Array[Byte]]("data") + assertEquals(100, data.length) + } + } + + @Test + def testPreserveInputOrder(): Unit = { + + val filePath = createTestFile(tempDir, "order.bin", 10000) + + // Create input in specific order with record IDs + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L, "rec1"), + (filePath, 100L, 100L, "rec2"), + (filePath, 200L, 100L, "rec3"), + (filePath, 300L, 100L, "rec4") + )).toDF("external_path", "offset", "length", "record_id") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data", "record_id") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + + val results = resultDF.collect() + assertEquals(4, results.length) + + // Verify order is preserved + assertEquals("rec1", results(0).getAs[String]("record_id")) + assertEquals("rec2", results(1).getAs[String]("record_id")) + assertEquals("rec3", results(2).getAs[String]("record_id")) + assertEquals("rec4", results(3).getAs[String]("record_id")) + } + + @Test + def testMixedScenario(): Unit = { + + val file1 = createTestFile(tempDir, "mixed1.bin", 10000) + val file2 = createTestFile(tempDir, "mixed2.bin", 10000) + + // Mix of batchable and non-batchable reads + val inputDF = sparkSession.createDataFrame(Seq( + // Batchable group from file1 + (file1, 0L, 100L), + (file1, 100L, 100L), + (file1, 200L, 100L), + // Single read from file2 + (file2, 0L, 100L), + // Another batchable group from file1 + (file1, 300L, 100L), + (file1, 400L, 100L), + // Large gap in file1 (may not batch depending on threshold) + (file1, 5000L, 100L) + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + + val results = resultDF.collect() + assertEquals(7, results.length) + + // Verify all reads succeeded + results.foreach { row => + val data = row.getAs[Array[Byte]]("data") + assertEquals(100, data.length) + } + } + + @Test + def testEmptyDataset(): Unit = { + + val inputDF = sparkSession.createDataFrame(Seq.empty[(String, Long, Int)]) + .toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + + assertEquals(0, resultDF.count()) + } + + @Test + def testPreserveAdditionalColumns(): Unit = { + + val filePath = createTestFile(tempDir, "preserve-cols.bin", 5000) + + // Input with multiple additional columns + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L, "rec1", 42, true, 3.14), + (filePath, 100L, 100L, "rec2", 43, false, 2.71) + )).toDF("external_path", "offset", "length", "record_id", "sequence", "flag", "value") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data", "record_id", "sequence", "flag", "value") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + + // Verify all columns are preserved + assertColumnsExist(resultDF, "data", "record_id", "sequence", "flag", "value") + + val results = resultDF.collect() + assertEquals(2, results.length) + + // Verify data integrity + assertEquals("rec1", results(0).getAs[String]("record_id")) + assertEquals(42, results(0).getAs[Int]("sequence")) + assertEquals(true, results(0).getAs[Boolean]("flag")) + assertEquals(3.14, results(0).getAs[Double]("value"), 0.001) + + assertEquals("rec2", results(1).getAs[String]("record_id")) + assertEquals(43, results(1).getAs[Int]("sequence")) + assertEquals(false, results(1).getAs[Boolean]("flag")) + assertEquals(2.71, results(1).getAs[Double]("value"), 0.001) + } + + @Test + def testExplicitColumnNameWithMultipleBlobColumns(): Unit = { + // Test that explicit column name resolves the correct column + // when multiple blob columns exist in the schema + + val file1 = createTestFile(tempDir, "blob1.bin", 5000) + val file2 = createTestFile(tempDir, "blob2.bin", 5000) + + // Create DataFrame with two blob columns + val inputDF = sparkSession.createDataFrame(Seq( + (1, file1, 0L, 100L, file2, 0L, 50L), + (2, file1, 100L, 100L, file2, 50L, 50L) + )).toDF("id", "path1", "offset1", "len1", "path2", "offset2", "len2") + .withColumn("blob1", blobStructCol("blob1", col("path1"), col("offset1"), col("len1"))) + .withColumn("blob2", blobStructCol("blob2", col("path2"), col("offset2"), col("len2"))) + .select("id", "blob1", "blob2") + + // Resolve blob1 explicitly + val result1 = BatchedBlobReader.readBatched( + inputDF, + storageConf, + columnName = Some("blob1") + ) + + val rows1 = result1.collect() + assertEquals(2, rows1.length) + + // Verify blob1 data was read (100 bytes) + rows1.foreach { row => + val data = row.getAs[Array[Byte]]("blob1") + assertEquals(100, data.length) + } + + // Resolve blob2 explicitly + val result2 = BatchedBlobReader.readBatched( + inputDF, + storageConf, + columnName = Some("blob2") + ) + + val rows2 = result2.collect() + assertEquals(2, rows2.length) + + // Verify blob2 data was read (50 bytes) + rows2.foreach { row => + val data = row.getAs[Array[Byte]]("blob2") + assertEquals(50, data.length) + } + } + + @Test + def testFallbackToMetadataWhenNoColumnNameProvided(): Unit = { + // Test that when no explicit column name is provided, + // it falls back to searching for hudi_blob=true metadata + + val filePath = createTestFile(tempDir, "fallback.bin", 5000) + + // Create DataFrame with blob metadata (the traditional way) + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L), + (filePath, 100L, 100L) + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("data") + + // Should work without explicit column name (uses metadata) + val resultDF = BatchedBlobReader.readBatched( + inputDF, + storageConf + // Note: columnName parameter is NOT provided + ) + + val results = resultDF.collect() + assertEquals(2, results.length) + + results.foreach { row => + val data = row.getAs[Array[Byte]]("data") + assertEquals(100, data.length) + } + } + + @Test + def testInlineBlobRead(): Unit = { + val inlineData = Array[Byte](10, 20, 30, 40, 50) + val inputDF = sparkSession.createDataFrame(Seq(Tuple1("rec1"))) + .toDF("record_id") + .withColumn("data", inlineBlobStructCol("data", lit(inlineData))) + .select("data", "record_id") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + val results = resultDF.collect() + assertEquals(1, results.length) + assertArrayEquals(inlineData, results(0).getAs[Array[Byte]]("data")) + assertEquals("rec1", results(0).getAs[String]("record_id")) + } + + @Test + def testWholeFileBlobRead(): Unit = { + val size = 500 + val filePath = createTestFile(tempDir, "whole.bin", size) + + val inputDF = sparkSession.createDataFrame(Seq(Tuple1("rec1"))) + .toDF("record_id") + .withColumn("data", wholeFileBlobStructCol("data", lit(filePath))) + .select("data", "record_id") + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + val results = resultDF.collect() + assertEquals(1, results.length) + val data = results(0).getAs[Array[Byte]]("data") + assertEquals(size, data.length) + assertBytesContent(data, expectedOffset = 0) + } + + @Test + def testMixedBlobTypes(): Unit = { + val fileSize = 1000 + val filePath = createTestFile(tempDir, "mixed-types.bin", fileSize) + val inlineData = Array[Byte](7, 8, 9) + + val outerSchema = StructType(Seq( + StructField("record_id", StringType, nullable = false), + StructField("data", BlobType(), nullable = false, metadata = blobMetadata) + )) + + val rows = Seq( + Row("inline_row", Row(HoodieSchema.Blob.INLINE, inlineData, null)), + Row("wholefile_row", Row(HoodieSchema.Blob.OUT_OF_LINE, null, Row(filePath, null, null, false))), + Row("range_row_1", Row(HoodieSchema.Blob.OUT_OF_LINE, null, Row(filePath, 0L, 100L, false))), + Row("range_row_2", Row(HoodieSchema.Blob.OUT_OF_LINE, null, Row(filePath, 100L, 100L, false))) + ) + + val inputDF = sparkSession.createDataFrame( + sparkSession.sparkContext.parallelize(rows), outerSchema) + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf, columnName = Some("data")) + val results = resultDF.orderBy("record_id").collect() + assertEquals(4, results.length) + + val inlineRow = results.find(_.getAs[String]("record_id") == "inline_row").get + assertArrayEquals(inlineData, inlineRow.getAs[Array[Byte]]("data")) + + val wholeFileRow = results.find(_.getAs[String]("record_id") == "wholefile_row").get + val wholeFileData = wholeFileRow.getAs[Array[Byte]]("data") + assertEquals(fileSize, wholeFileData.length) + + val range1 = results.find(_.getAs[String]("record_id") == "range_row_1").get + assertEquals(100, range1.getAs[Array[Byte]]("data").length) + assertBytesContent(range1.getAs[Array[Byte]]("data"), expectedOffset = 0) + + val range2 = results.find(_.getAs[String]("record_id") == "range_row_2").get + assertEquals(100, range2.getAs[Array[Byte]]("data").length) + assertBytesContent(range2.getAs[Array[Byte]]("data"), expectedOffset = 100) + } + + @Test + def testNullBlobStructColumnReturnsNull(): Unit = { + // Create a DataFrame with a null struct column + val schema = StructType(Seq(StructField("data", BlobType.dataType, nullable = true, metadata = blobMetadata))) + val rows = Collections.singletonList(Row(null)) + val inputDF = sparkSession.createDataFrame(rows, schema) + + val resultDF = BatchedBlobReader.readBatched(inputDF, storageConf) + val results = resultDF.collect() + assertEquals(1, results.length) + assertTrue(results(0).isNullAt(0)) + } + + @Test + def testOverlappingRangesThrowsException(): Unit = { + val filePath = createTestFile(tempDir, "overlap.bin", 1000) + // Overlapping: [0, 100) and [50, 100) + val inputDF = sparkSession.createDataFrame(Seq( + (filePath, 0L, 100L), + (filePath, 50L, 100L) + )).toDF("external_path", "offset", "length") + .withColumn("data", blobStructCol("data", col("external_path"), col("offset"), col("length"))) + .select("offset", "data") + .coalesce(1) + + val thrown = assertThrows(classOf[SparkException], () => { + val rows = BatchedBlobReader.readBatched(inputDF, storageConf).collect() + // Force access to the data column to trigger the batch read logic + rows.foreach(row => row.getAs[Array[Byte]]("data")) + }) + assertTrue(thrown.getCause.isInstanceOf[IllegalArgumentException]) + assertTrue(thrown.getCause.getMessage.contains("Overlapping blob ranges detected")) + } +} diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBlobSupport.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBlobSupport.scala index 22e5eee777736..5b3fb0ce0e5d6 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBlobSupport.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestBlobSupport.scala @@ -29,11 +29,12 @@ import org.apache.hudi.config.HoodieIndexConfig import org.apache.hudi.index.HoodieIndex import org.apache.hudi.testutils.HoodieClientTestBase -import org.apache.avro.generic.{GenericData, GenericRecord, IndexedRecord} -import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.apache.avro.generic.{GenericData, IndexedRecord} +import org.junit.jupiter.api.Assertions.{assertArrayEquals, assertEquals, assertTrue} import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.EnumSource +import java.nio.ByteBuffer import java.util.{Arrays, Properties} import scala.collection.JavaConverters._ @@ -106,10 +107,132 @@ class TestBlobSupport extends HoodieClientTestBase with SparkDatasetMixin { assertEquals(10, rows.size()) rows.asScala.foreach { row => + val i = row.getInt(row.fieldIndex("value")) val data = row.getStruct(row.fieldIndex("data")) + assertEquals(HoodieSchema.Blob.OUT_OF_LINE, + data.getString(data.fieldIndex(HoodieSchema.Blob.TYPE))) + assertTrue(data.isNullAt(data.fieldIndex(HoodieSchema.Blob.INLINE_DATA_FIELD))) val reference = data.getStruct(data.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE)) val filePath = reference.getString(reference.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE_PATH)) assertTrue(filePath.endsWith("file2.bin")) + assertEquals(i * 100L, + reference.getLong(reference.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE_OFFSET))) + assertEquals(100L, + reference.getLong(reference.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE_LENGTH))) + assertEquals(false, + reference.getBoolean(reference.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE_IS_MANAGED))) + } + + // Verify SQL read_blob() returns bytes matching the referenced file region. + table.createOrReplaceTempView("hudi_table_view") + val sqlRows = sparkSession.sql( + "SELECT id, value, read_blob(data) as full_bytes from hudi_table_view ORDER BY value") + .collectAsList() + assertEquals(10, sqlRows.size()) + sqlRows.asScala.foreach { row => + val i = row.getInt(row.fieldIndex("value")) + val bytes = row.getAs[Array[Byte]]("full_bytes") + assertEquals(100, bytes.length) + assertBytesContent(bytes, expectedOffset = i * 100) + } + } + + @ParameterizedTest + @EnumSource(classOf[HoodieTableType]) + def testEndToEndInline(tableType: HoodieTableType): Unit = { + val properties = new Properties() + properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), "id") + properties.put(HoodieTableConfig.PARTITION_FIELDS.key(), "") + properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieFileFormat.PARQUET.toString) + + HoodieTableMetaClient.newTableBuilder() + .setTableName("test_inline_blob_table") + .setTableType(tableType) + .fromProperties(properties) + .initTable(storageConf, basePath) + + var client: SparkRDDWriteClient[IndexedRecord] = null + val config = getConfigBuilder(SCHEMA.toString()) + .withIndexConfig(HoodieIndexConfig.newBuilder.withIndexType(HoodieIndex.IndexType.SIMPLE).build) + .build() + try { + client = getHoodieWriteClient(config).asInstanceOf[SparkRDDWriteClient[IndexedRecord]] + + // First commit - insert ids 0..9 with payload prefix 0xA. + val commit1 = client.startCommit() + val firstBatch = createInlineTestRecords(0 until 10, payloadPrefix = 0xA.toByte) + val statuses1 = client.insert(jsc.parallelize(firstBatch.asJava, 1), commit1).collect() + client.commit(commit1, jsc.parallelize(statuses1, 1)) + + // Second commit - upsert only ids 5..9 with payload prefix 0xB. This leaves ids 0..4 + // untouched (still 0xA) and forces the read side to correctly merge updated and + // non-updated records — a pure full-overwrite upsert would not exercise that path. + val commit2 = client.startCommit() + val secondBatch = createInlineTestRecords(5 until 10, payloadPrefix = 0xB.toByte) + val statuses2 = client.upsert(jsc.parallelize(secondBatch.asJava, 1), commit2).collect() + client.commit(commit2, jsc.parallelize(statuses2, 1)) + } finally { + if (client != null) client.close() + } + + val table = sparkSession.read.format("hudi").load(basePath) + val rows = table.collectAsList() + assertEquals(10, rows.size()) + + // Direct struct-field access — verifies INLINE bytes round-tripped + // through Parquet and the data field is populated (reference null). + // ids 0..4 retain the 0xA payload from commit1; ids 5..9 carry the + // 0xB payload produced by the partial upsert in commit2. + rows.asScala.foreach { row => + val data = row.getStruct(row.fieldIndex("data")) + assertEquals(HoodieSchema.Blob.INLINE, + data.getString(data.fieldIndex(HoodieSchema.Blob.TYPE))) + assertTrue(data.isNullAt(data.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE))) + val bytes = data.getAs[Array[Byte]](HoodieSchema.Blob.INLINE_DATA_FIELD) + val value = row.getInt(row.fieldIndex("value")) + val expectedPrefix: Byte = if (value < 5) 0xA.toByte else 0xB.toByte + assertArrayEquals(expectedInlinePayload(expectedPrefix, value), bytes) + } + + // SQL read_blob() — INLINE passthrough through the planner/exec path. + table.createOrReplaceTempView("hudi_inline_table_view") + val sqlRows = sparkSession.sql( + "SELECT id, value, read_blob(data) as full_bytes from hudi_inline_table_view ORDER BY value") + .collectAsList() + assertEquals(10, sqlRows.size()) + sqlRows.asScala.foreach { row => + val value = row.getInt(row.fieldIndex("value")) + val bytes = row.getAs[Array[Byte]]("full_bytes") + val expectedPrefix: Byte = if (value < 5) 0xA.toByte else 0xB.toByte + assertArrayEquals(expectedInlinePayload(expectedPrefix, value), bytes) + } + } + + private def expectedInlinePayload(prefix: Byte, value: Int): Array[Byte] = { + // Distinct per-record payload so upserts can be verified by content. + Array[Byte](prefix, value.toByte, (value + 1).toByte, (value + 2).toByte) + } + + private def createInlineTestRecords(ids: Range, payloadPrefix: Byte): Seq[HoodieRecord[IndexedRecord]] = { + ids.map { i => + val id = s"id_$i" + val key = new HoodieKey(id, "") + + val dataSchema = SCHEMA.getField("data").get.schema + val blobRecord = new GenericData.Record(dataSchema.toAvroSchema) + blobRecord.put(HoodieSchema.Blob.TYPE, new GenericData.EnumSymbol( + dataSchema.getField(HoodieSchema.Blob.TYPE).get.schema.toAvroSchema, + HoodieSchema.Blob.INLINE)) + blobRecord.put(HoodieSchema.Blob.INLINE_DATA_FIELD, + ByteBuffer.wrap(expectedInlinePayload(payloadPrefix, i))) + // EXTERNAL_REFERENCE left null — the union default in HoodieSchema.Blob. + + val record = new GenericData.Record(SCHEMA.toAvroSchema) + record.put("id", id) + record.put("value", i) + record.put("data", blobRecord) + + new HoodieAvroIndexedRecord(key, record) } } @@ -139,4 +262,91 @@ class TestBlobSupport extends HoodieClientTestBase with SparkDatasetMixin { new HoodieAvroIndexedRecord(key, record) } } + + @ParameterizedTest + @EnumSource(classOf[HoodieTableType]) + def testMixedInlineAndOutOfLine(tableType: HoodieTableType): Unit = { + val filePath = createTestFile(tempDir, "mixed_file.bin", 1000) + + val properties = new Properties() + properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), "id") + properties.put(HoodieTableConfig.PARTITION_FIELDS.key(), "") + properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieFileFormat.PARQUET.toString) + + HoodieTableMetaClient.newTableBuilder() + .setTableName("test_mixed_blob_table") + .setTableType(tableType) + .fromProperties(properties) + .initTable(storageConf, basePath) + + var client: SparkRDDWriteClient[IndexedRecord] = null + val config = getConfigBuilder(SCHEMA.toString()) + .withIndexConfig(HoodieIndexConfig.newBuilder.withIndexType(HoodieIndex.IndexType.SIMPLE).build) + .build() + try { + client = getHoodieWriteClient(config).asInstanceOf[SparkRDDWriteClient[IndexedRecord]] + val commit = client.startCommit() + val records = (0 until 10).map { i => + val storageType = if (i % 2 == 0) HoodieSchema.Blob.INLINE else HoodieSchema.Blob.OUT_OF_LINE + createMixedRecord(i, storageType, filePath, inlinePrefix = 0xC.toByte) + } + val statuses = client.insert(jsc.parallelize(records.asJava, 1), commit).collect() + client.commit(commit, jsc.parallelize(statuses, 1)) + } finally { + if (client != null) client.close() + } + + val table = sparkSession.read.format("hudi").load(basePath) + table.createOrReplaceTempView("hudi_mixed_table_view") + val sqlRows = sparkSession.sql( + "SELECT id, value, read_blob(data) as full_bytes from hudi_mixed_table_view ORDER BY value") + .collectAsList() + assertEquals(10, sqlRows.size()) + sqlRows.asScala.foreach { row => + val value = row.getInt(row.fieldIndex("value")) + val bytes = row.getAs[Array[Byte]]("full_bytes") + if (value % 2 == 0) { + assertArrayEquals(expectedInlinePayload(0xC.toByte, value), bytes) + } else { + assertEquals(100, bytes.length) + assertBytesContent(bytes, expectedOffset = value * 100) + } + } + } + + private def createMixedRecord( + value: Int, + storageType: String, + filePath: String, + inlinePrefix: Byte): HoodieRecord[IndexedRecord] = { + val id = s"id_$value" + val key = new HoodieKey(id, "") + val dataSchema = SCHEMA.getField("data").get.schema + val blobRecord = new GenericData.Record(dataSchema.toAvroSchema) + + if (storageType == HoodieSchema.Blob.INLINE) { + blobRecord.put(HoodieSchema.Blob.TYPE, new GenericData.EnumSymbol( + dataSchema.getField(HoodieSchema.Blob.TYPE).get.schema.toAvroSchema, + HoodieSchema.Blob.INLINE)) + blobRecord.put(HoodieSchema.Blob.INLINE_DATA_FIELD, + ByteBuffer.wrap(expectedInlinePayload(inlinePrefix, value))) + } else { + val fileReference = new GenericData.Record(dataSchema.getField(HoodieSchema.Blob.EXTERNAL_REFERENCE) + .get.getNonNullSchema.toAvroSchema) + fileReference.put(HoodieSchema.Blob.EXTERNAL_REFERENCE_PATH, filePath) + fileReference.put(HoodieSchema.Blob.EXTERNAL_REFERENCE_OFFSET, value * 100L) + fileReference.put(HoodieSchema.Blob.EXTERNAL_REFERENCE_LENGTH, 100L) + fileReference.put(HoodieSchema.Blob.EXTERNAL_REFERENCE_IS_MANAGED, false) + blobRecord.put(HoodieSchema.Blob.TYPE, new GenericData.EnumSymbol( + dataSchema.getField(HoodieSchema.Blob.TYPE).get.schema.toAvroSchema, + HoodieSchema.Blob.OUT_OF_LINE)) + blobRecord.put(HoodieSchema.Blob.EXTERNAL_REFERENCE, fileReference) + } + + val record = new GenericData.Record(SCHEMA.toAvroSchema) + record.put("id", id) + record.put("value", value) + record.put("data", blobRecord) + new HoodieAvroIndexedRecord(key, record) + } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestReadBlobSQL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestReadBlobSQL.scala new file mode 100644 index 0000000000000..533c9589e338c --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/blob/TestReadBlobSQL.scala @@ -0,0 +1,498 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.blob + +import org.apache.hudi.blob.BlobTestHelpers._ +import org.apache.hudi.common.schema.HoodieSchema +import org.apache.hudi.exception.HoodieIOException +import org.apache.hudi.testutils.HoodieClientTestBase + +import org.apache.spark.sql.Row +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ +import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.Test + +import java.util.Collections + +/** + * Tests for the read_blob() SQL function. + * + * This test suite verifies: + *
    + *
  • Basic SQL integration with read_blob()
  • + *
  • Integration with WHERE clauses, JOINs
  • + *
  • Configuration parameter handling
  • + *
  • Error handling for invalid inputs
  • + *
+ */ +class TestReadBlobSQL extends HoodieClientTestBase { + + @Test + def testReadOutOfLineBlobOnHudiBackedTable(): Unit = { + // Verifies read_blob()'s logical plan is task-serializable over a + // HoodieFileIndex-backed relation (DataFrame write path). + val extFile = createTestFile(tempDir, "basic.bin", 10000) + val tablePath = s"$tempDir/hudi_blob_table" + + val rawDf = sparkSession.createDataFrame(Seq( + (1, "rec1", extFile, 0L, 100L), + (2, "rec2", extFile, 100L, 100L), + (3, "rec3", extFile, 200L, 100L) + )).toDF("id", "name", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), + col("length"))) + .select("id", "name", "file_info") + + // Coerce to the canonical BlobType schema. blobStructCol produces a + // non-null reference struct, but HoodieSparkSchemaConverters + // .validateBlobStructure rejects that on write — it demands the + // reference field be nullable. Rebuild the DataFrame on its RDD + // against the canonical shape so .save() doesn't fail early. + val canonicalSchema = StructType(Seq( + StructField("id", IntegerType, nullable = false), + StructField("name", StringType, nullable = true), + StructField("file_info", BlobType().asInstanceOf[StructType], + nullable = true, blobMetadata) + )) + val df = sparkSession.createDataFrame(rawDf.rdd, canonicalSchema) + + df.write.format("hudi") + .option("hoodie.table.name", "blob_test") + .option("hoodie.datasource.write.recordkey.field", "id") + .option("hoodie.datasource.write.operation", "bulk_insert") + .mode("overwrite") + .save(tablePath) + + // Hudi read pulls HoodieFileIndex into the plan that BatchedBlobReadExec + // serializes to executors — the scenario the exec-node fix must handle. + sparkSession.read.format("hudi").load(tablePath) + .createOrReplaceTempView("hudi_blob_view") + + val result = sparkSession.sql(""" + SELECT id, read_blob(file_info) AS data + FROM hudi_blob_view + ORDER BY id + """).collect() + + assertEquals(3, result.length) + // Verify the bytes read from the external file match the recorded offsets. + result.zipWithIndex.foreach { case (row, idx) => + assertEquals(idx + 1, row.getInt(0)) + val bytes = row.getAs[Array[Byte]]("data") + assertEquals(100, bytes.length) + assertBytesContent(bytes, expectedOffset = idx * 100) + } + } + + @Test + def testBasicReadBlobSQL(): Unit = { + val filePath = createTestFile(tempDir, "basic.bin", 10000) + + // Main DataFrame with blobStructCol + val df = sparkSession.createDataFrame(Seq( + (1, "record1", filePath, 0L, 100L), + (3, "record3", filePath, 100L, 100L), + (4, "record4", filePath, 200L, 100L) + )).toDF("id", "name", "external_path", "offset", "length") + .withColumn("file_info", blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "name", "file_info") + + // Ensure file_info is nullable in the schema + val schema = StructType(df.schema.map { + case StructField("file_info", dt, _, md) => StructField("file_info", dt, nullable = true, md) + case other => other + }) + val dfWithNullable = sparkSession.createDataFrame(df.rdd, schema) + + // DataFrame with a null blob value + val nullRow = Row(2, "record2", null) + val nullDf = sparkSession.createDataFrame(Collections.singletonList(nullRow), schema) + + // Union the null row + val fullDf = dfWithNullable.unionByName(nullDf) + fullDf.createOrReplaceTempView("test_table") + + // Use SQL with read_blob + val result = sparkSession.sql(""" + SELECT id, name, read_blob(file_info) as data + FROM test_table + WHERE id <= 3 + ORDER BY id + """) + + val rows = result.collect() + assertEquals(3, rows.length) + + // Verify data is binary for non-null rows + val data1 = rows(0).getAs[Array[Byte]]("data") + assertEquals(100, data1.length) + assertBytesContent(data1) + + // The null_blob row should have null data + assertTrue(rows(1).isNullAt(2)) + + val data3 = rows(2).getAs[Array[Byte]]("data") + assertEquals(100, data3.length) + assertBytesContent(data3, expectedOffset = 100) + } + + @Test + def testReadBlobWithJoin(): Unit = { + val filePath = createTestFile(tempDir, "join.bin", 10000) + + // Create blob table + val blobDF = sparkSession.createDataFrame(Seq( + (1, filePath, 0L, 100L), + (2, filePath, 100L, 100L) + )).toDF("id", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "file_info") + + blobDF.createOrReplaceTempView("blob_table") + + // Create metadata table + val metaDF = sparkSession.createDataFrame(Seq( + (1, "Alice"), + (2, "Bob") + )).toDF("id", "name") + + metaDF.createOrReplaceTempView("meta_table") + + // SQL with JOIN + val result = sparkSession.sql(""" + SELECT m.id, m.name, read_blob(b.file_info) as data + FROM meta_table m + JOIN blob_table b ON m.id = b.id + ORDER BY m.id + """) + + val rows = result.collect() + assertEquals(2, rows.length) + assertEquals("Alice", rows(0).getAs[String]("name")) + assertEquals(100, rows(0).getAs[Array[Byte]]("data").length) + assertEquals("Bob", rows(1).getAs[String]("name")) + assertEquals(100, rows(1).getAs[Array[Byte]]("data").length) + + // Verify data content + val data1 = rows(0).getAs[Array[Byte]]("data") + assertBytesContent(data1) + } + + @Test + def testReadBlobInSubquery(): Unit = { + val filePath = createTestFile(tempDir, "subquery.bin", 10000) + + val df = sparkSession.createDataFrame(Seq( + (1, "A", filePath, 0L, 100L), + (2, "A", filePath, 100L, 100L), + (3, "B", filePath, 200L, 100L) + )).toDF("id", "category", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "category", "file_info") + + df.createOrReplaceTempView("subquery_table") + + // SQL with subquery + val result = sparkSession.sql(""" + SELECT * FROM ( + SELECT id, category, read_blob(file_info) as data + FROM subquery_table + ) WHERE category = 'A' + """) + + val rows = result.collect() + assertEquals(2, rows.length) + rows.foreach { row => + assertEquals("A", row.getAs[String]("category")) + assertEquals(100, row.getAs[Array[Byte]]("data").length) + } + } + + @Test + def testConfigurationParameters(): Unit = { + val filePath = createTestFile(tempDir, "config.bin", 50000) + + val df = sparkSession.createDataFrame(Seq( + (1, filePath, 0L, 100L), + (2, filePath, 5000L, 100L), // 4.9KB gap + (3, filePath, 10000L, 100L) + )).toDF("id", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "file_info") + + df.createOrReplaceTempView("config_table") + + // Use withSparkConfig to automatically manage configuration + withSparkConfig(sparkSession, Map( + "hoodie.blob.batching.max.gap.bytes" -> "10000", + "hoodie.blob.batching.lookahead.size" -> "100" + )) { + val result = sparkSession.sql(""" + SELECT id, read_blob(file_info) as data + FROM config_table + """) + + val rows = result.collect() + assertEquals(3, rows.length) + + // Verify all reads completed successfully + rows.foreach { row => + assertEquals(100, row.getAs[Array[Byte]]("data").length) + } + } + } + + @Test + def testMultipleReadBlobInSameQuery(): Unit = { + val filePath1 = createTestFile(tempDir, "multi1.bin", 10000) + val filePath2 = createTestFile(tempDir, "multi2.bin", 10000) + + val df = sparkSession.createDataFrame(Seq( + (1, filePath1, 0L, 50L, filePath2, 500L, 50L), + (2, filePath1, 100L, 50L, filePath2, 600L, 50L) + )).toDF("id", "external_path1", "offset1", "length1", "external_path2", "offset2", "length2") + .withColumn("file_info1", + blobStructCol("file_info1", col("external_path1"), col("offset1"), col("length1"))) + .withColumn("file_info2", + blobStructCol("file_info2", col("external_path2"), col("offset2"), col("length2"))) + .select("id", "file_info1", "file_info2") + + df.createOrReplaceTempView("multi_table") + + // SQL with multiple read_blob calls + val result = sparkSession.sql(""" + SELECT + id, + read_blob(file_info1) as data1, + read_blob(file_info2) as data2 + FROM multi_table + """) + + val rows = result.collect() + assertEquals(2, rows.length) + + // Row 1: data1 = file1 at offset 0, data2 = file2 at offset 500 + val data1_row1 = rows(0).getAs[Array[Byte]]("data1") + val data2_row1 = rows(0).getAs[Array[Byte]]("data2") + assertEquals(50, data1_row1.length) + assertEquals(50, data2_row1.length) + assertBytesContent(data1_row1, expectedOffset = 0) + assertBytesContent(data2_row1, expectedOffset = 500) + + // Row 2: data1 = file1 at offset 100, data2 = file2 at offset 600 + val data1_row2 = rows(1).getAs[Array[Byte]]("data1") + val data2_row2 = rows(1).getAs[Array[Byte]]("data2") + assertBytesContent(data1_row2, expectedOffset = 100) + assertBytesContent(data2_row2, expectedOffset = 600) + } + + @Test + def testReadBlobWithEmptyResult(): Unit = { + val filePath = createTestFile(tempDir, "empty.bin", 10000) + + val df = sparkSession.createDataFrame(Seq( + (1, filePath, 0L, 100L), + (2, filePath, 100L, 100L) + )).toDF("id", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "file_info") + + df.createOrReplaceTempView("empty_table") + + // SQL that returns no rows + val result = sparkSession.sql(""" + SELECT id, read_blob(file_info) as data + FROM empty_table + WHERE id > 100 + """) + + val rows = result.collect() + assertEquals(0, rows.length) + } + + @Test + def testReadBlobMultipleFiles(): Unit = { + val filePath1 = createTestFile(tempDir, "file1.bin", 10000) + val filePath2 = createTestFile(tempDir, "file2.bin", 10000) + + val df = sparkSession.createDataFrame(Seq( + (1, filePath1, 0L, 100L), + (2, filePath2, 0L, 100L), + (3, filePath1, 100L, 100L), + (4, filePath2, 100L, 100L) + )).toDF("id", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "file_info") + + df.createOrReplaceTempView("multi_file_table") + + // SQL reading from multiple files + val result = sparkSession.sql(""" + SELECT id, read_blob(file_info) as data + FROM multi_file_table + ORDER BY id + """) + + val rows = result.collect() + assertEquals(4, rows.length) + + // Verify all data was read correctly + rows.foreach { row => + assertEquals(100, row.getAs[Array[Byte]]("data").length) + } + } + + @Test + def testReadBlobInWhereClause(): Unit = { + val filePath = createTestFile(tempDir, "where.bin", 10000) + val df = sparkSession.createDataFrame(Seq( + (1, filePath, 0L, 50L), // 50 bytes — filtered out + (2, filePath, 100L, 100L), // 100 bytes — passes filter + (3, filePath, 200L, 200L) // 200 bytes — passes filter + )).toDF("id", "external_path", "offset", "length") + .withColumn("file_info", blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "file_info") + df.createOrReplaceTempView("where_table") + + val result = sparkSession.sql(""" + SELECT id, read_blob(file_info) AS data + FROM where_table + WHERE length(read_blob(file_info)) > 50 + ORDER BY id + """) + + val rows = result.collect() + assertEquals(2, rows.length) + + // validate that rows with IDs 2 and 3 are returned + assertEquals(2, rows(0).getInt(0)) + assertEquals(3, rows(1).getInt(0)) + } + + @Test + def testReadBlobWithCaseWhen(): Unit = { + val filePath = createTestFile(tempDir, "case.bin", 10000) + + val df = sparkSession.createDataFrame(Seq( + (1, true, filePath, 0L, 100L), + (2, false, filePath, 100L, 100L), + (3, true, filePath, 200L, 100L) + )).toDF("id", "should_resolve", "external_path", "offset", "length") + .withColumn("file_info", + blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "should_resolve", "file_info") + + df.createOrReplaceTempView("case_table") + + // SQL with CASE WHEN - note: this tests that the expression is handled + // even in conditional contexts + val result = sparkSession.sql(""" + SELECT + id, + should_resolve, + CASE + WHEN should_resolve THEN read_blob(file_info) + ELSE NULL + END as data + FROM case_table + """) + + val rows = result.collect() + assertEquals(3, rows.length) + + // Row 1 should have data + assertTrue(rows(0).getAs[Boolean]("should_resolve")) + assertNotNull(rows(0).get(2)) + + // Row 2 should have null + assertFalse(rows(1).getAs[Boolean]("should_resolve")) + assertTrue(rows(1).isNullAt(2)) + + // Row 3 should have data + assertTrue(rows(2).getAs[Boolean]("should_resolve")) + assertNotNull(rows(2).get(2)) + } + + @Test + def testReadBlobWithMissingFile(): Unit = { + val missingPath = tempDir.resolve("does_not_exist.bin").toString + val df = sparkSession.createDataFrame(Seq( + (1, missingPath, 0L, 10L) + )).toDF("id", "external_path", "offset", "length") + .withColumn("file_info", blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "file_info") + df.createOrReplaceTempView("missing_file_table") + val thrown = assertThrows(classOf[Exception], () => { + sparkSession.sql("SELECT id, read_blob(file_info) as data FROM missing_file_table").collect() + }) + assertTrue(thrown.getCause.isInstanceOf[HoodieIOException]) + } + + @Test + def testReadBlobOnNonBlobColumn(): Unit = { + val df = sparkSession.createDataFrame(Seq( + (1, "not_a_blob") + )).toDF("id", "not_blob") + df.createOrReplaceTempView("non_blob_table") + val thrown = assertThrows(classOf[Exception], () => { + sparkSession.sql("SELECT id, read_blob(not_blob) as data FROM non_blob_table").collect() + }) + assertTrue(thrown.isInstanceOf[IllegalArgumentException]) + assertTrue(thrown.getMessage.contains("must be compatible with BlobType")) + } + + @Test + def testReadBlobInProjectAndFilter(): Unit = { + val filePath = createTestFile(tempDir, "project_and_filter.bin", 10000) + + // DataFrame with blobStructCol + val df = sparkSession.createDataFrame(Seq( + (1, "record1", filePath, 0L, 100L), + (2, "record2", filePath, 100L, 100L), + (3, "record3", filePath, 200L, 100L) + )).toDF("id", "name", "external_path", "offset", "length") + .withColumn("file_info", blobStructCol("file_info", col("external_path"), col("offset"), col("length"))) + .select("id", "name", "file_info") + + df.createOrReplaceTempView("project_and_filter_table") + + // Query with read_blob in both SELECT and WHERE + val result = sparkSession.sql(""" + SELECT id, name, read_blob(file_info) as data + FROM project_and_filter_table + WHERE length(read_blob(file_info)) = 100 + ORDER BY id + """) + + val rows = result.collect() + assertEquals(3, rows.length) + rows.zipWithIndex.foreach { case (row, idx) => + assertEquals(100, row.getAs[Array[Byte]]("data").length) + assertBytesContent(row.getAs[Array[Byte]]("data"), expectedOffset = idx * 100) + } + } +} diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestFileGroupReaderPartitionColumn.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestFileGroupReaderPartitionColumn.scala new file mode 100644 index 0000000000000..1311ceac448df --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestFileGroupReaderPartitionColumn.scala @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.functional + +import org.apache.hudi.testutils.SparkClientFunctionalTestHarness + +import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructField, StructType} +import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotNull} +import org.junit.jupiter.api.Test + +/** + * Regression test for the FileGroupReader bug where mandatory partition columns + * were dropped from `dataSchema` before pruning, causing the column to read back + * as null for untouched rows in MOR file slices containing both a base file and + * a log file. + * + * Scenario: MOR + `CustomKeyGenerator` (`country:simple`) + `PostgresDebeziumAvroPayload` + * + `GLOBAL_SIMPLE` index with `update.partition.path=true`, then a round-2 write + * that moves two records out of the `country=IN` partition via a partition-key + * change. This produces a file slice in `country=IN` with both a base parquet + * (round 1) and a log file (round 2 delete markers for the moved records), + * while the two untouched records in that slice (id=12 and id=14) remain in + * the base file. + * + * Before the fix: `buildReaderWithPartitionValues` augmented only `requestedStructType` + * with mandatory partition fields before pruning, leaving `dataSchema` missing + * `country`. The FileGroupReader path then skipped reading the column from + * parquet, and the `FileGroupReaderSchemaHandler` (which sets `requiredSchema` to + * `this.tableSchema` for a non-projection-compatible CUSTOM merger like Postgres) + * propagated that through the output converter, which wrote `null` for every + * untouched row in the base+log slice. + * + * After the fix: `dataSchema` contains `country`, so id=12 and id=14 read back + * with the correct `country="IN"`. + * + * Partitions without log files (`country=US`, `country=CN`) hit the `readBaseFile` + * path that appends partition values from the directory and are unaffected either + * way — they're included here as negative controls. + */ +class TestFileGroupReaderPartitionColumn extends SparkClientFunctionalTestHarness { + + @Test + def testMandatoryPartitionColumnReadFromLogFileSlice(): Unit = { + val commonOpts = Map( + "hoodie.table.name" -> "test_fg_reader_partition_col", + "hoodie.datasource.write.table.type" -> "MERGE_ON_READ", + "hoodie.write.table.version" -> "6", + "hoodie.datasource.write.recordkey.field" -> "id", + "hoodie.datasource.write.precombine.field" -> "_event_lsn", + "hoodie.datasource.write.partitionpath.field" -> "country:simple", + "hoodie.datasource.write.keygenerator.class" -> "org.apache.hudi.keygen.CustomKeyGenerator", + "hoodie.datasource.write.hive_style_partitioning" -> "true", + "hoodie.datasource.write.partitionpath.urlencode" -> "true", + "hoodie.datasource.write.payload.class" -> + "org.apache.hudi.common.model.debezium.PostgresDebeziumAvroPayload", + "hoodie.index.type" -> "GLOBAL_SIMPLE", + "hoodie.simple.index.update.partition.path" -> "true", + "hoodie.datasource.write.operation" -> "upsert", + "hoodie.metadata.enable" -> "true", + "hoodie.compact.inline" -> "false", + "hoodie.clean.automatic" -> "false", + "hoodie.datasource.write.reconcile.schema" -> "false" + ) + + val schema = StructType(Array( + StructField("_change_operation_type", StringType, nullable = true), + StructField("_event_lsn", LongType, nullable = true), + StructField("id", LongType, nullable = true), + StructField("country", StringType, nullable = true), + StructField("device_id", StringType, nullable = true), + StructField("manufacturer", StringType, nullable = true), + StructField("event_type", StringType, nullable = true), + StructField("price", DoubleType, nullable = true) + )) + + // Round 1: 8 records across 3 partitions. + // country=IN: id=2,6,12,14 (4 rows in one base parquet) + // country=US: id=4,10 + // country=CN: id=8,16 + val round1Rows = Seq( + Row("c", 2550218872L, 2L, "IN", "0x100000021ce30", "Acme Corp", "plan change", 11.0), + Row("c", 2550219144L, 4L, "US", "0x100000052e763", "Delta corp", "telecoms activity", 12.0), + Row("c", 2550219424L, 6L, "IN", "0x10000008e92b8", "Xyzzy Inc.", "plan change", 13.0), + Row("c", 2550219696L, 8L, "CN", "0x10000008a5eba", "Xyzzy Inc.", "plan change", 14.0), + Row("c", 2550219968L, 10L, "US", "0x10000008df79c", "Lakehouse Ltd", "device error", 15.0), + Row("c", 2550220240L, 12L, "IN", "0x10000007ddfb1", "Embanks Devices", "plan change", 16.0), + Row("c", 2550220512L, 14L, "IN", "0x10000008d8892", "Acme Corp", "deactivation", 17.0), + Row("c", 2550220784L, 16L, "CN", "0x10000007352cd", "Acme Corp", "telecoms activity", 18.0) + ) + spark.createDataFrame(spark.sparkContext.parallelize(round1Rows, 1), schema) + .write.format("hudi") + .options(commonOpts) + .mode(SaveMode.Overwrite) + .save(basePath) + + // Round 2: partition-key changes for id=2 (IN->US), id=6 (IN->US), and a delete + // for id=4 (stays in US). With GLOBAL_SIMPLE + update.partition.path=true, the + // partition-key changes leave delete markers in the old country=IN partition — + // which gives us the base+log file layout in country=IN. id=12 and id=14 are + // untouched and must read back with country=IN after the fix. + val round2Rows = Seq( + Row("u", 2650218872L, 2L, "US", "0x100000021ce30", "Acme Corp", "plan change", 11.0), + Row("d", 2650219144L, 4L, "US", "0x100000052e763", "Delta corp", "telecoms activity", 12.0), + Row("u", 2650219424L, 6L, "US", "0x10000008e92b8", "Xyzzy Inc.", "plan change", 13.0) + ) + spark.createDataFrame(spark.sparkContext.parallelize(round2Rows, 1), schema) + .write.format("hudi") + .options(commonOpts) + .mode(SaveMode.Append) + .save(basePath) + + val rows = spark.read.format("hudi").load(basePath) + .select("id", "country") + .collect() + .map(r => r.getLong(0) -> (if (r.isNullAt(1)) null else r.getString(1))) + .toMap + + // The moved records land in US. + assertEquals("US", rows(2L), "id=2 moved to US") + assertEquals("US", rows(6L), "id=6 moved to US") + + // id=4 was deleted. + assertFalse(rows.contains(4L), "id=4 was deleted") + + // Untouched rows whose slice has no log file — negative controls, these were + // never broken. + assertEquals("CN", rows(8L), "id=8 untouched in CN (no log file)") + assertEquals("US", rows(10L), "id=10 untouched in US (no log file)") + assertEquals("CN", rows(16L), "id=16 untouched in CN (no log file)") + + // The regression: untouched rows in the IN slice that now has base+log. Before + // the fix these read back as null. + assertNotNull(rows(12L), + "id=12 (untouched, country=IN slice with base+log) must not be null") + assertNotNull(rows(14L), + "id=14 (untouched, country=IN slice with base+log) must not be null") + assertEquals("IN", rows(12L), "id=12 partition column must be IN") + assertEquals("IN", rows(14L), "id=14 partition column must be IN") + } +} diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLanceDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLanceDataSource.scala index 662a6885e3e7b..01172d58706e9 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLanceDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLanceDataSource.scala @@ -21,7 +21,8 @@ import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.DefaultSparkRecordMerger import org.apache.hudi.common.config.{HoodieCommonConfig, HoodieMetadataConfig} import org.apache.hudi.common.engine.HoodieLocalEngineContext -import org.apache.hudi.common.model.HoodieTableType +import org.apache.hudi.common.model.{HoodieFileFormat, HoodieTableType} +import org.apache.hudi.common.schema.HoodieSchema import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient} import org.apache.hudi.common.table.view.{FileSystemViewManager, FileSystemViewStorageConfig} import org.apache.hudi.common.testutils.HoodieTestUtils @@ -30,13 +31,18 @@ import org.apache.hudi.io.storage.HoodieSparkLanceReader import org.apache.hudi.storage.StoragePath import org.apache.hudi.testutils.HoodieSparkClientTestBase -import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} +import org.apache.arrow.memory.RootAllocator +import org.apache.arrow.vector.types.pojo.ArrowType +import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession} +import org.apache.spark.sql.types._ import org.junit.jupiter.api.{AfterEach, BeforeEach} import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertNotNull, assertTrue} import org.junit.jupiter.api.condition.DisabledIfSystemProperty import org.junit.jupiter.params.ParameterizedTest import org.junit.jupiter.params.provider.EnumSource +import org.lance.file.LanceFileReader +import java.util.concurrent.atomic.AtomicInteger import java.util.stream.Collectors import scala.collection.JavaConverters._ @@ -61,6 +67,21 @@ class TestLanceDataSource extends HoodieSparkClientTestBase { spark = null } + private val tableId = new AtomicInteger(0) + + private def generateTableName: String = { + s"lance_sql_${tableId.incrementAndGet()}" + } + + private def checkAnswer(sql: String)(expects: Seq[Any]*): Unit = { + val result = spark.sql(sql).collect() + val expectedRows = expects.map(row => Row(row: _*)).toArray + assertEquals(expectedRows.length, result.length, "Row count mismatch") + expectedRows.zip(result).foreach { case (expected, actual) => + assertEquals(expected, actual) + } + } + @ParameterizedTest @EnumSource(value = classOf[HoodieTableType]) def testBasicWriteAndRead(tableType: HoodieTableType): Unit = { @@ -784,6 +805,370 @@ class TestLanceDataSource extends HoodieSparkClientTestBase { fsView.close() } + @ParameterizedTest + @EnumSource(value = classOf[HoodieTableType]) + def testSqlCommands(tableType: HoodieTableType): Unit = { + Seq(true, false).foreach { isPartitioned => + val tableName = generateTableName + val tablePath = s"$basePath/$tableName" + testSqlCommands(tableType, tableName, tablePath, isPartitioned) + } + } + + private def testSqlCommands(tableType: HoodieTableType, + tableName: String, + tablePath: String, + isPartitioned: Boolean): Unit = { + val createTablePartitionClause = if (isPartitioned) "partitioned by (dt)" else "" + + // CREATE TABLE with Lance configuration + // Lance format requires Spark record merger for writing + spark.sql(s""" + create table $tableName ( + id int, + dt string, + name string, + age int, + score double + ) using hudi + tblproperties ( + hoodie.table.base.file.format = 'LANCE', + type = '${tableType.name()}', + primaryKey = 'id', + hoodie.datasource.write.record.merger.impls = '${classOf[DefaultSparkRecordMerger].getName}' + ) + $createTablePartitionClause + location '$tablePath' + """.stripMargin) + + // Test 1: INSERT with all columns in schema order + spark.sql(s""" + insert into $tableName (id, name, age, score, dt) + values (1, 'Alice', 30, 95.5, '2025-01-01'), + (2, 'Bob', 25, 87.3, '2025-01-02') + """.stripMargin) + + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 30, 95.5, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02") + ) + + // Test 2: INSERT with reordered columns + spark.sql(s""" + insert into $tableName (dt, name, id, age, score) + values ('2025-01-02', 'Charlie', 3, 35, 92.1) + """.stripMargin) + + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 30, 95.5, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02"), + Seq(3, "Charlie", 35, 92.1, "2025-01-02") + ) + + // Disable small file handling so the next insert creates a new file group + // and updates in MOR generate log file(s) + spark.sql(s"alter table $tableName set tblproperties ('hoodie.merge.small.file.group.candidates.limit' = '0')") + + // Test 3: INSERT with subset of columns (null handling) + spark.sql(s""" + insert into $tableName (dt, age, name, id) + values ('2025-01-01', 40, 'Diana', 4) + """.stripMargin) + + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 30, 95.5, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02"), + Seq(3, "Charlie", 35, 92.1, "2025-01-02"), + Seq(4, "Diana", 40, null, "2025-01-01") + ) + + // Test 4: UPDATE existing row + spark.sql(s"update $tableName set score = 99.9, age = 31 where id = 1") + + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 31, 99.9, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02"), + Seq(3, "Charlie", 35, 92.1, "2025-01-02"), + Seq(4, "Diana", 40, null, "2025-01-01") + ) + + // Test 5: DELETE a row + // TODO(#18558): test DELETE with MOR table type once the bug is fixed + if (tableType == HoodieTableType.COPY_ON_WRITE) { + spark.sql(s"delete from $tableName where id = 3") + + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 31, 99.9, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02"), + Seq(4, "Diana", 40, null, "2025-01-01") + ) + } + + // Test 6: INSERT with static partition (only for partitioned tables) + if (isPartitioned) { + spark.sql(s""" + insert into $tableName partition(dt='2025-01-05') (age, id, name) + values (28, 5, 'Eve') + """.stripMargin) + + if (tableType == HoodieTableType.COPY_ON_WRITE) { + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 31, 99.9, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02"), + Seq(4, "Diana", 40, null, "2025-01-01"), + Seq(5, "Eve", 28, null, "2025-01-05") + ) + } else { + checkAnswer(s"select id, name, age, score, dt from $tableName order by id")( + Seq(1, "Alice", 31, 99.9, "2025-01-01"), + Seq(2, "Bob", 25, 87.3, "2025-01-02"), + Seq(3, "Charlie", 35, 92.1, "2025-01-02"), + Seq(4, "Diana", 40, null, "2025-01-01"), + Seq(5, "Eve", 28, null, "2025-01-05") + ) + } + } + + // Verify Lance files were created + val metaClient = HoodieTableMetaClient.builder() + .setConf(HoodieTestUtils.getDefaultStorageConf) + .setBasePath(tablePath) + .build() + + val baseFileFormat = metaClient.getTableConfig.getBaseFileFormat + assertEquals(HoodieFileFormat.LANCE, baseFileFormat, + "Table should use Lance base file format") + } + + /** + * Vector round-trip test parameterized over COW + MOR. Covers two non-null VECTOR + * columns of different element types (FLOAT, DOUBLE) and dimensions, and exercises + * the upsert path (MOR log-merge on MOR, file rewrite on COW). + * + *

Nullable-vector coverage lives in {@code testNullableVectorRoundTrip} because + * merging a null-valued vector through the upsert path currently errors out in + * the Lance reader; tracked as a separate follow-up. + */ + @ParameterizedTest + @EnumSource(value = classOf[HoodieTableType]) + def testMultipleVectorColumns(tableType: HoodieTableType): Unit = { + val tableName = s"test_lance_vec_multi_${tableType.name().toLowerCase}" + val tablePath = s"$basePath/$tableName" + + val embeddingDim = 3 + val featuresDim = 2 + val schema = StructType(Seq( + StructField("id", IntegerType, nullable = false), + StructField("name", StringType, nullable = false), + StructField("age", IntegerType, nullable = false), + StructField("embedding", + ArrayType(FloatType, containsNull = false), + nullable = false, + vectorMetadata(s"VECTOR($embeddingDim)")), + StructField("features", + ArrayType(DoubleType, containsNull = false), + nullable = false, + vectorMetadata(s"VECTOR($featuresDim, DOUBLE)")) + )) + + // Initial insert. + val data1 = Seq( + Row(1, "Alice", 30, Array(1.0f, 2.0f, 3.0f), Array(10.0d, 20.0d)), + Row(2, "Bob", 25, Array(4.0f, 5.0f, 6.0f), Array(30.0d, 40.0d)), + Row(3, "Charlie", 35, Array(7.0f, 8.0f, 9.0f), Array(50.0d, 60.0d)) + ) + val df1 = spark.createDataFrame(spark.sparkContext.parallelize(data1), schema).coalesce(1) + writeDataframe(tableType, tableName, tablePath, df1, + saveMode = SaveMode.Overwrite, operation = Some("insert")) + + // Upsert — update Bob's embedding, age, and features. + val data2 = Seq( + Row(2, "Bob", 40, Array(10.0f, 20.0f, 30.0f), Array(70.0d, 80.0d)) + ) + val df2 = spark.createDataFrame(spark.sparkContext.parallelize(data2), schema).coalesce(1) + writeDataframe(tableType, tableName, tablePath, df2, operation = Some("upsert")) + + val readDf = spark.read.format("hudi").load(tablePath) + .select("id", "name", "age", "embedding", "features") + assertEquals( + ArrayType(FloatType, containsNull = false), + readDf.schema("embedding").dataType) + assertEquals( + ArrayType(DoubleType, containsNull = false), + readDf.schema("features").dataType) + assertHudiTypeMetadata(readDf.schema("embedding"), s"VECTOR($embeddingDim)") + assertHudiTypeMetadata(readDf.schema("features"), s"VECTOR($featuresDim, DOUBLE)") + + val rows = readDf.collect().sortBy(_.getInt(0)) + assertEquals(3, rows.length) + // Alice unchanged. + assertEquals(Seq(1.0f, 2.0f, 3.0f), rows(0).getSeq[Float](3).toSeq) + assertEquals(Seq(10.0d, 20.0d), rows(0).getSeq[Double](4).toSeq) + // Bob upserted. + assertEquals(40, rows(1).getInt(2), "Bob's age should be updated to 40") + assertEquals(Seq(10.0f, 20.0f, 30.0f), rows(1).getSeq[Float](3).toSeq) + assertEquals(Seq(70.0d, 80.0d), rows(1).getSeq[Double](4).toSeq) + // Charlie unchanged. + assertEquals(Seq(7.0f, 8.0f, 9.0f), rows(2).getSeq[Float](3).toSeq) + assertEquals(Seq(50.0d, 60.0d), rows(2).getSeq[Double](4).toSeq) + + // Validate Lance file physical schema + footer on the base files produced. + assertLanceFieldIsFixedSizeList(tablePath, "embedding", embeddingDim) + assertLanceFieldIsFixedSizeList(tablePath, "features", featuresDim) + assertLanceFooterHasVectorColumns(tablePath, + s"embedding:VECTOR($embeddingDim),features:VECTOR($featuresDim, DOUBLE)") + } + + /** + * Nullable-vector coverage — kept separate from {@code testMultipleVectorColumns} + * because folding this case into the upsert/merge path errors out in the current + * Lance reader (null vector element read). + */ + @ParameterizedTest + @EnumSource(value = classOf[HoodieTableType]) + def testNullableVectorRoundTrip(tableType: HoodieTableType): Unit = { + val tableName = s"test_lance_vec_nullable_${tableType.name().toLowerCase}" + val tablePath = s"$basePath/$tableName" + + val dim = 3 + val schema = StructType(Seq( + StructField("id", IntegerType, nullable = false), + StructField("embedding", + ArrayType(FloatType, containsNull = false), + nullable = true, + vectorMetadata(s"VECTOR($dim)")) + )) + val data = Seq( + Row(1, Array(1.0f, 2.0f, 3.0f)), + Row(2, null), + Row(3, Array(7.0f, 8.0f, 9.0f)) + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema).coalesce(1) + + writeDataframe(tableType, tableName, tablePath, df, saveMode = SaveMode.Overwrite) + + val readDf = spark.read.format("hudi").load(tablePath).select("id", "embedding") + assertHudiTypeMetadata(readDf.schema("embedding"), s"VECTOR($dim)") + + val rows = readDf.collect().sortBy(_.getInt(0)) + assertEquals(3, rows.length) + assertEquals(Seq(1.0f, 2.0f, 3.0f), rows(0).getSeq[Float](1).toSeq) + assertTrue(rows(1).isNullAt(1), "Row with id=2 should have null embedding") + assertEquals(Seq(7.0f, 8.0f, 9.0f), rows(2).getSeq[Float](1).toSeq) + } + + @ParameterizedTest + @EnumSource(value = classOf[HoodieTableType]) + def testVectorProjection(tableType: HoodieTableType): Unit = { + val tableName = s"test_lance_vec_proj_${tableType.name().toLowerCase}" + val tablePath = s"$basePath/$tableName" + + val dim = 4 + val schema = StructType(Seq( + StructField("id", IntegerType, nullable = false), + StructField("name", StringType, nullable = false), + StructField("embedding", + ArrayType(FloatType, containsNull = false), + nullable = false, + vectorMetadata(s"VECTOR($dim)")) + )) + val data = Seq( + Row(1, "Alice", Array(1.0f, 2.0f, 3.0f, 4.0f)), + Row(2, "Bob", Array(5.0f, 6.0f, 7.0f, 8.0f)) + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema).coalesce(1) + writeDataframe(tableType, tableName, tablePath, df, saveMode = SaveMode.Overwrite) + + // Project only the vector column + val vecOnly = spark.read.format("hudi").load(tablePath).select("embedding") + assertEquals(1, vecOnly.schema.fields.length) + assertHudiTypeMetadata(vecOnly.schema("embedding"), s"VECTOR($dim)") + val vecRows = vecOnly.collect().map(_.getSeq[Float](0).toSeq).toSet + assertEquals(Set(Seq(1.0f, 2.0f, 3.0f, 4.0f), Seq(5.0f, 6.0f, 7.0f, 8.0f)), vecRows) + + // Project vector alongside Hudi metadata columns + val withMeta = spark.read.format("hudi").load(tablePath) + .select("_hoodie_record_key", "embedding") + assertEquals(2, withMeta.schema.fields.length) + assertHudiTypeMetadata(withMeta.schema("embedding"), s"VECTOR($dim)") + val metaRows = withMeta.collect().map(r => + r.getString(0) -> r.getSeq[Float](1).toSeq).toMap + assertEquals(Seq(1.0f, 2.0f, 3.0f, 4.0f), metaRows("1")) + assertEquals(Seq(5.0f, 6.0f, 7.0f, 8.0f), metaRows("2")) + } + + private def assertHudiTypeMetadata(field: StructField, expectedDescriptor: String): Unit = { + assertTrue(field.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected field ${field.name} to carry ${HoodieSchema.TYPE_METADATA_FIELD} metadata after read") + assertEquals(expectedDescriptor, field.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected ${HoodieSchema.TYPE_METADATA_FIELD}=$expectedDescriptor on field ${field.name}") + } + + private def vectorMetadata(descriptor: String): Metadata = + new MetadataBuilder().putString(HoodieSchema.TYPE_METADATA_FIELD, descriptor).build() + + /** Runs `check` against each Lance base file's Arrow schema. */ + private def validateLanceFileSchema(tablePath: String)(check: (org.apache.arrow.vector.types.pojo.Schema, String) => Unit): Unit = { + val metaClient = HoodieTableMetaClient.builder() + .setConf(HoodieTestUtils.getDefaultStorageConf) + .setBasePath(tablePath) + .build() + val engineContext = new HoodieLocalEngineContext(metaClient.getStorageConf) + val metadataConfig = HoodieMetadataConfig.newBuilder.build + val viewManager = FileSystemViewManager.createViewManager( + engineContext, metadataConfig, FileSystemViewStorageConfig.newBuilder.build, + HoodieCommonConfig.newBuilder.build, + (mc: HoodieTableMetaClient) => metaClient.getTableFormat + .getMetadataFactory.create(engineContext, mc.getStorage, metadataConfig, tablePath)) + val fsView = viewManager.getFileSystemView(metaClient) + try { + val baseFiles = fsView.getLatestBaseFiles("") + .collect(Collectors.toList[org.apache.hudi.common.model.HoodieBaseFile]) + assertTrue(baseFiles.size() > 0, "Expected at least one Lance base file") + val allocator = new RootAllocator() + try { + baseFiles.asScala.foreach { bf => + val reader = LanceFileReader.open(bf.getPath, allocator) + try { + check(reader.schema(), bf.getPath) + } finally { + reader.close() + } + } + } finally { + allocator.close() + } + } finally { + fsView.close() + } + } + + private def assertLanceFooterHasVectorColumns(tablePath: String, expected: String): Unit = { + validateLanceFileSchema(tablePath) { (schema, path) => + val meta = schema.getCustomMetadata + assertNotNull(meta, s"Lance footer metadata null for $path") + val key = HoodieSchema.VECTOR_COLUMNS_METADATA_KEY + assertTrue(meta.containsKey(key), + s"Lance file $path should have footer key $key, got keys ${meta.keySet()}") + assertEquals(expected, meta.get(key), s"Lance file $path footer $key mismatch") + } + } + + private def assertLanceFieldIsFixedSizeList(tablePath: String, fieldName: String, expectedDim: Int): Unit = { + validateLanceFileSchema(tablePath) { (schema, path) => + val field = schema.findField(fieldName) + assertNotNull(field, s"Field $fieldName not found in Lance schema for $path") + field.getType match { + case fsl: ArrowType.FixedSizeList => + assertEquals(expectedDim, fsl.getListSize, + s"Lance field $fieldName in $path should be FixedSizeList of size $expectedDim") + case other => + throw new AssertionError( + s"Lance field $fieldName in $path should be FixedSizeList but was $other") + } + } + } + private def createDataFrame(records: Seq[(Int, String, Int, Double)]) = { spark.createDataFrame(records).toDF("id", "name", "age", "score").coalesce(1) } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestVectorDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestVectorDataSource.scala index 8ef97c167b9f6..3758a22442d29 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestVectorDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestVectorDataSource.scala @@ -18,8 +18,10 @@ package org.apache.hudi.functional import org.apache.hudi.DataSourceWriteOptions._ +import org.apache.hudi.common.model.HoodieFileFormat import org.apache.hudi.common.schema.{HoodieSchema, HoodieSchemaType} -import org.apache.hudi.testutils.HoodieSparkClientTestBase +import org.apache.hudi.common.table.HoodieTableMetaClient +import org.apache.hudi.testutils.{DataSourceTestUtils, HoodieSparkClientTestBase} import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetFileReader @@ -28,6 +30,7 @@ import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession} import org.apache.spark.sql.types._ import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.junit.jupiter.api.Assertions._ +import org.junit.jupiter.api.condition.DisabledIfSystemProperty import scala.collection.JavaConverters._ @@ -674,6 +677,263 @@ class TestVectorDataSource extends HoodieSparkClientTestBase { assertTrue(r7.getSeq[Double](1).forall(_ == 1.0), "key_7 should have original value 1.0") } + @Test + def testMorLogOnlyCompactionPreservesVectorMetadata(): Unit = { + val path = basePath + "/mor_log_only_vec" + val tableName = "mor_log_only_vec_test" + try { + spark.sql( + s""" + |create table $tableName ( + | id int, + | embedding VECTOR(3), + | ts long + |) using hudi + | location '$path' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1' + | ) + """.stripMargin) + + def readOrdered(): Seq[Row] = + spark.sql(s"select id, embedding, ts from $tableName order by id").collect().toSeq + + def embeddingOf(id: Int, rows: Seq[Row]): Seq[Float] = + rows.find(_.getInt(0) == id).get.getSeq[Float](1) + + spark.sql( + s"insert into $tableName values " + + "(1, array(cast(0.1 as float), cast(0.2 as float), cast(0.3 as float)), 1000)") + spark.sql( + s"insert into $tableName values " + + "(2, array(cast(0.4 as float), cast(0.5 as float), cast(0.6 as float)), 1000)") + spark.sql( + s"insert into $tableName values " + + "(3, array(cast(0.7 as float), cast(0.8 as float), cast(0.9 as float)), 1000)") + // 3 commits will not trigger compaction, so it should be log only. + assertTrue(DataSourceTestUtils.isLogFileOnly(path)) + val afterInserts = readOrdered() + assertEquals(3, afterInserts.size) + assertEquals(Seq(0.1f, 0.2f, 0.3f), embeddingOf(1, afterInserts)) + assertEquals(Seq(0.4f, 0.5f, 0.6f), embeddingOf(2, afterInserts)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterInserts)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, + | array(cast(0.11 as float), cast(0.22 as float), cast(0.33 as float)) as embedding, + | 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + // 4 commits will not trigger compaction, so it should be log only. + assertTrue(DataSourceTestUtils.isLogFileOnly(path)) + val afterUpdate = readOrdered() + assertEquals(Seq(0.11f, 0.22f, 0.33f), embeddingOf(1, afterUpdate)) + assertEquals(Seq(0.4f, 0.5f, 0.6f), embeddingOf(2, afterUpdate)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterUpdate)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, + | array(cast(0.44 as float), cast(0.55 as float), cast(0.66 as float)) as embedding, + | 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + // 5 commits will trigger compaction. + assertFalse(DataSourceTestUtils.isLogFileOnly(path)) + val afterCompaction = readOrdered() + assertEquals(4, afterCompaction.size) + assertEquals(Seq(0.11f, 0.22f, 0.33f), embeddingOf(1, afterCompaction)) + assertEquals(Seq(0.4f, 0.5f, 0.6f), embeddingOf(2, afterCompaction)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterCompaction)) + assertEquals(Seq(0.44f, 0.55f, 0.66f), embeddingOf(4, afterCompaction)) + + // VECTOR custom-type descriptor must survive the compacted base-file read path. + val embeddingField = spark.table(tableName).schema.find(_.name == "embedding").get + assertTrue(embeddingField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected VECTOR type metadata on embedding field after compaction, " + + s"got: ${embeddingField.metadata}") + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + // Inline compaction on commit 5 ran AFTER its own postCommit clean, so the prior + // slice was not yet superseded when that clean fired and no .clean instant was + // written. This deltacommit's postCommit clean sees the post-compaction base + // file and writes the .clean instant. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, + | array(cast(0.222 as float), cast(0.555 as float), cast(0.888 as float)) as embedding, + | 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + val afterCleanup = readOrdered() + assertEquals(Seq(0.11f, 0.22f, 0.33f), embeddingOf(1, afterCleanup)) + assertEquals(Seq(0.222f, 0.555f, 0.888f), embeddingOf(2, afterCleanup)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterCleanup)) + assertEquals(Seq(0.44f, 0.55f, 0.66f), embeddingOf(4, afterCleanup)) + + val metaClient = HoodieTableMetaClient.builder() + .setBasePath(path).setConf(storageConf).build() + metaClient.reloadActiveTimeline() + assertTrue(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + } finally { + spark.sql(s"drop table if exists $tableName") + } + } + + @Test + @DisabledIfSystemProperty(named = "lance.skip.tests", matches = "true") + def testMorLogOnlyCompactionPreservesVectorMetadataLance(): Unit = { + val path = basePath + "/mor_log_only_vec_lance" + val tableName = "mor_log_only_vec_lance_test" + try { + spark.sql( + s""" + |create table $tableName ( + | id int, + | embedding VECTOR(3), + | ts long + |) using hudi + | location '$path' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1', + | 'hoodie.table.base.file.format' = 'LANCE' + | ) + """.stripMargin) + + def readOrdered(): Seq[Row] = + spark.sql(s"select id, embedding, ts from $tableName order by id").collect().toSeq + + def embeddingOf(id: Int, rows: Seq[Row]): Seq[Float] = + rows.find(_.getInt(0) == id).get.getSeq[Float](1) + + spark.sql( + s"insert into $tableName values " + + "(1, array(cast(0.1 as float), cast(0.2 as float), cast(0.3 as float)), 1000)") + spark.sql( + s"insert into $tableName values " + + "(2, array(cast(0.4 as float), cast(0.5 as float), cast(0.6 as float)), 1000)") + spark.sql( + s"insert into $tableName values " + + "(3, array(cast(0.7 as float), cast(0.8 as float), cast(0.9 as float)), 1000)") + // 3 commits will not trigger compaction, so it should be log only. + assertTrue(DataSourceTestUtils.isLogFileOnly(path)) + val afterInserts = readOrdered() + assertEquals(3, afterInserts.size) + assertEquals(Seq(0.1f, 0.2f, 0.3f), embeddingOf(1, afterInserts)) + assertEquals(Seq(0.4f, 0.5f, 0.6f), embeddingOf(2, afterInserts)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterInserts)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, + | array(cast(0.11 as float), cast(0.22 as float), cast(0.33 as float)) as embedding, + | 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + // 4 commits will not trigger compaction, so it should be log only. + assertTrue(DataSourceTestUtils.isLogFileOnly(path)) + val afterUpdate = readOrdered() + assertEquals(Seq(0.11f, 0.22f, 0.33f), embeddingOf(1, afterUpdate)) + assertEquals(Seq(0.4f, 0.5f, 0.6f), embeddingOf(2, afterUpdate)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterUpdate)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, + | array(cast(0.44 as float), cast(0.55 as float), cast(0.66 as float)) as embedding, + | 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + // 5 commits will trigger compaction. + assertFalse(DataSourceTestUtils.isLogFileOnly(path)) + val afterCompaction = readOrdered() + assertEquals(4, afterCompaction.size) + assertEquals(Seq(0.11f, 0.22f, 0.33f), embeddingOf(1, afterCompaction)) + assertEquals(Seq(0.4f, 0.5f, 0.6f), embeddingOf(2, afterCompaction)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterCompaction)) + assertEquals(Seq(0.44f, 0.55f, 0.66f), embeddingOf(4, afterCompaction)) + + val embeddingField = spark.table(tableName).schema.find(_.name == "embedding").get + assertTrue(embeddingField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected VECTOR type metadata on embedding field after compaction, " + + s"got: ${embeddingField.metadata}") + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + // Inline compaction on commit 5 ran AFTER its own postCommit clean, so the prior + // slice was not yet superseded when that clean fired and no .clean instant was + // written. This deltacommit's postCommit clean writes the .clean instant. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, + | array(cast(0.222 as float), cast(0.555 as float), cast(0.888 as float)) as embedding, + | 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + val afterCleanup = readOrdered() + assertEquals(Seq(0.11f, 0.22f, 0.33f), embeddingOf(1, afterCleanup)) + assertEquals(Seq(0.222f, 0.555f, 0.888f), embeddingOf(2, afterCleanup)) + assertEquals(Seq(0.7f, 0.8f, 0.9f), embeddingOf(3, afterCleanup)) + assertEquals(Seq(0.44f, 0.55f, 0.66f), embeddingOf(4, afterCleanup)) + + val metaClient = HoodieTableMetaClient.builder() + .setBasePath(path).setConf(storageConf).build() + metaClient.reloadActiveTimeline() + assertTrue(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + + // The table is configured for Lance, and compaction must have produced a + // .lance base file (not parquet) — otherwise the Lance variant degenerates + // into a parquet test. + assertEquals(HoodieFileFormat.LANCE, metaClient.getTableConfig.getBaseFileFormat, + "Expected Lance base file format") + val lanceBaseFiles = new java.io.File(path).listFiles() + .filter(f => f.isFile && f.getName.endsWith(".lance")) + assertTrue(lanceBaseFiles.nonEmpty, + s"Expected at least one .lance base file under $path after compaction, " + + s"found: ${new java.io.File(path).listFiles().map(_.getName).mkString(", ")}") + } finally { + spark.sql(s"drop table if exists $tableName") + } + } + @Test def testDimensionMismatchOnWrite(): Unit = { // Schema declares VECTOR(8) but data has arrays of length 4 @@ -770,10 +1030,10 @@ class TestVectorDataSource extends HoodieSparkClientTestBase { val reader = ParquetFileReader.open(HadoopInputFile.fromPath(parquetFiles.head.getPath, conf)) try { val footerMeta = reader.getFileMetaData.getKeyValueMetaData.asScala - assertTrue(footerMeta.contains(HoodieSchema.PARQUET_VECTOR_COLUMNS_METADATA_KEY), - s"Footer should contain ${HoodieSchema.PARQUET_VECTOR_COLUMNS_METADATA_KEY}, got keys: ${footerMeta.keys.mkString(", ")}") + assertTrue(footerMeta.contains(HoodieSchema.VECTOR_COLUMNS_METADATA_KEY), + s"Footer should contain ${HoodieSchema.VECTOR_COLUMNS_METADATA_KEY}, got keys: ${footerMeta.keys.mkString(", ")}") - val value = footerMeta(HoodieSchema.PARQUET_VECTOR_COLUMNS_METADATA_KEY) + val value = footerMeta(HoodieSchema.VECTOR_COLUMNS_METADATA_KEY) assertTrue(value.contains("embedding"), s"Footer value should reference 'embedding' column, got: $value") assertTrue(value.contains("VECTOR"), s"Footer value should contain 'VECTOR' descriptor, got: $value") } finally { diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/avro/TestSchemaConverters.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/avro/TestSchemaConverters.scala index c27fa2d2878f9..36de434477d56 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/avro/TestSchemaConverters.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/avro/TestSchemaConverters.scala @@ -249,6 +249,51 @@ class TestSchemaConverters extends SparkAdapterSupport { assertEquals(HoodieSchemaType.VARIANT, nullableField.schema().getNonNullType.getType) } + @Test + def testInvalidVariantSchemaWrongFieldCount(): Unit = { + val invalidStruct = new StructType(Array[StructField]( + StructField(HoodieSchema.Variant.VARIANT_METADATA_FIELD, DataTypes.BinaryType, nullable = false) + )) + assertInvalidVariantSchema(invalidStruct) + } + + @Test + def testInvalidVariantSchemaNullableMetadataField(): Unit = { + val invalidStruct = new StructType(Array[StructField]( + StructField(HoodieSchema.Variant.VARIANT_METADATA_FIELD, DataTypes.BinaryType, nullable = true), + StructField(HoodieSchema.Variant.VARIANT_VALUE_FIELD, DataTypes.BinaryType, nullable = false) + )) + assertInvalidVariantSchema(invalidStruct) + } + + @Test + def testInvalidVariantSchemaWrongValueFieldType(): Unit = { + val invalidStruct = new StructType(Array[StructField]( + StructField(HoodieSchema.Variant.VARIANT_METADATA_FIELD, DataTypes.BinaryType, nullable = false), + StructField(HoodieSchema.Variant.VARIANT_VALUE_FIELD, DataTypes.StringType, nullable = false) + )) + assertInvalidVariantSchema(invalidStruct) + } + + @Test + def testInvalidVariantSchemaWrongFieldNames(): Unit = { + val invalidStruct = new StructType(Array[StructField]( + StructField("foo", DataTypes.BinaryType, nullable = false), + StructField("bar", DataTypes.BinaryType, nullable = false) + )) + assertInvalidVariantSchema(invalidStruct) + } + + private def assertInvalidVariantSchema(invalidStruct: StructType): Unit = { + val metadata = new MetadataBuilder() + .putString(HoodieSchema.TYPE_METADATA_FIELD, HoodieSchemaType.VARIANT.name()) + .build() + val exception = assertThrows(classOf[IllegalArgumentException], () => { + HoodieSparkSchemaConverters.toHoodieType(invalidStruct, metadata = metadata) + }) + assertTrue(exception.getMessage.startsWith("Invalid variant schema structure")) + } + @Test def testTopLevelVectorStillAllowed(): Unit = { val vectorMetadata = new MetadataBuilder() diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala index 5cd5dbb0d54bf..107b934832703 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala @@ -37,6 +37,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable} import org.apache.spark.sql.functions.{col, concat, expr, lit} import org.apache.spark.sql.hudi.HoodieSqlCommonUtils +import org.apache.spark.sql.hudi.command.CreateHoodieTableCommand import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase.{disableComplexKeygenValidation, getLastCommitMetadata} import org.apache.spark.sql.types._ @@ -2120,4 +2121,216 @@ class TestCreateTable extends HoodieSparkSqlTestBase { assertTrue(blobPathField.dataType.isInstanceOf[StringType]) } } + + test("test create table with VECTOR column") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | embedding VECTOR(128) COMMENT 'document embedding' + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin) + + val schema = spark.table(tableName).schema + val embeddingField = schema.find(_.name == "embedding").get + assertTrue(embeddingField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals("VECTOR(128)", embeddingField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals("document embedding", embeddingField.metadata.getString("comment")) + assertEquals(ArrayType(FloatType, containsNull = false), embeddingField.dataType) + } + } + + test("test create table with VECTOR column with element type") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | embedding VECTOR(64, DOUBLE) + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin) + + val schema = spark.table(tableName).schema + val embeddingField = schema.find(_.name == "embedding").get + assertTrue(embeddingField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals("VECTOR(64, DOUBLE)", embeddingField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(DoubleType, containsNull = false), embeddingField.dataType) + } + } + + test("test create table with multiple VECTOR columns") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | float_vec VECTOR(128), + | float_vec_explicit VECTOR(128, FLOAT), + | double_vec VECTOR(64, DOUBLE), + | int8_vec VECTOR(256, INT8) NOT NULL + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin) + + val schema = spark.table(tableName).schema + + val floatVecField = schema.find(_.name == "float_vec").get + assertEquals("VECTOR(128)", floatVecField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(FloatType, containsNull = false), floatVecField.dataType) + assertTrue(floatVecField.nullable) + + // VECTOR(128, FLOAT) should be normalized to the canonical form "VECTOR(128)" + val floatVecExplicitField = schema.find(_.name == "float_vec_explicit").get + assertEquals("VECTOR(128)", floatVecExplicitField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(FloatType, containsNull = false), floatVecExplicitField.dataType) + + val doubleVecField = schema.find(_.name == "double_vec").get + assertEquals("VECTOR(64, DOUBLE)", doubleVecField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(DoubleType, containsNull = false), doubleVecField.dataType) + + val int8VecField = schema.find(_.name == "int8_vec").get + assertEquals("VECTOR(256, INT8)", int8VecField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(ByteType, containsNull = false), int8VecField.dataType) + assertFalse(int8VecField.nullable) + } + } + + test("test create table with INT8 VECTOR column") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | embedding VECTOR(256, INT8) + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin) + + val schema = spark.table(tableName).schema + val embeddingField = schema.find(_.name == "embedding").get + assertEquals("VECTOR(256, INT8)", embeddingField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(ByteType, containsNull = false), embeddingField.dataType) + } + } + + test("test create table with VECTOR without dimension fails") { + withTempDir { tmp => + val tableName = generateTableName + checkExceptionContain( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | v VECTOR + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin)("vector is not supported") + } + } + + test("test create table with invalid VECTOR type surfaces ParseException") { + withTempDir { tmp => + val tableName = generateTableName + // Unsupported element type + checkExceptionContain( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | embedding VECTOR(128, BOOLEAN) + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin)("Invalid VECTOR type") + } + } + + test("test create table with VECTOR column case insensitive") { + withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |CREATE TABLE $tableName ( + | id BIGINT, + | embedding vector(128) + |) USING hudi + |LOCATION '${tmp.getCanonicalPath}' + |TBLPROPERTIES ( + | primaryKey = 'id' + |) + """.stripMargin) + + val schema = spark.table(tableName).schema + val embeddingField = schema.find(_.name == "embedding").get + assertTrue(embeddingField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals(ArrayType(FloatType, containsNull = false), embeddingField.dataType) + } + } + + test("toHiveCompatibleSchema rewrites VECTOR field to BinaryType and preserves metadata") { + // VECTOR's on-disk layout is Parquet fixed_len_byte_array (RFC-99); the HMS column must be + // BINARY so Hive-side Parquet reads match the physical type. The logical ArrayType(FloatType) + // view is preserved in spark.sql.sources.schema.* TBLPROPERTIES via the retained metadata. + val vectorMetadata = new MetadataBuilder() + .putString(HoodieSchema.TYPE_METADATA_FIELD, "VECTOR(3)") + .putString("comment", "embedding") + .build() + + val schema = StructType(Seq( + StructField("id", LongType, nullable = false), + StructField("name", StringType), + StructField( + "embedding", + ArrayType(FloatType, containsNull = false), + nullable = true, + metadata = vectorMetadata), + StructField("dt", StringType))) + + val hiveSchema = CreateHoodieTableCommand.toHiveCompatibleSchema(schema) + + val embedding = hiveSchema("embedding") + assertEquals(BinaryType, embedding.dataType) + assertEquals("binary", embedding.dataType.catalogString) + assertTrue(embedding.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals("VECTOR(3)", embedding.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + assertEquals("embedding", embedding.metadata.getString("comment")) + + // Non-VECTOR fields must pass through unchanged. + assertEquals(LongType, hiveSchema("id").dataType) + assertEquals(StringType, hiveSchema("name").dataType) + assertEquals(StringType, hiveSchema("dt").dataType) + } + + test("toHiveCompatibleSchema leaves plain ArrayType(FloatType) without VECTOR marker alone") { + // Guard against accidentally rewriting any ArrayType(FloatType). Only fields carrying the + // VECTOR descriptor in hudi_type metadata should be converted to BinaryType. + val schema = StructType(Seq( + StructField("id", LongType, nullable = false), + StructField("floats", ArrayType(FloatType, containsNull = false)))) + + val hiveSchema = CreateHoodieTableCommand.toHiveCompatibleSchema(schema) + assertEquals(ArrayType(FloatType, containsNull = false), hiveSchema("floats").dataType) + } } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestBlobDataType.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestBlobDataType.scala new file mode 100644 index 0000000000000..f9dd9262c4b04 --- /dev/null +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestBlobDataType.scala @@ -0,0 +1,518 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.sql.hudi.dml.schema + +import org.apache.hudi.blob.BlobTestHelpers +import org.apache.hudi.common.model.HoodieFileFormat +import org.apache.hudi.common.schema.{HoodieSchema, HoodieSchemaType} +import org.apache.hudi.testutils.DataSourceTestUtils +import org.apache.hudi.testutils.HoodieClientTestUtils.createMetaClient + +import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase + +import java.io.File + +class TestBlobDataType extends HoodieSparkSqlTestBase { + + private val referenceStructType = + "struct" + + private def inlineBlobLiteral(hex: String): String = + s"""named_struct( + | 'type', 'INLINE', + | 'data', cast(X'$hex' as binary), + | 'reference', cast(null as $referenceStructType) + |)""".stripMargin + + private def outOfLineBlobLiteral(path: String, offset: Long, length: Long): String = + s"""named_struct( + | 'type', 'OUT_OF_LINE', + | 'data', cast(null as binary), + | 'reference', named_struct( + | 'external_path', '$path', + | 'offset', cast($offset as bigint), + | 'length', cast($length as bigint), + | 'managed', false + | ) + |)""".stripMargin + + test("Test Query Log Only MOR Table With BLOB INLINE column triggers compaction") { + withRecordType()(withTempDir { tmp => + val tablePath = new File(tmp, "hudi").getCanonicalPath + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | data blob, + | ts long + |) using hudi + | location '$tablePath' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1' + | ) + """.stripMargin) + + spark.sql(s"insert into $tableName values (1, ${inlineBlobLiteral("01")}, 1000)") + spark.sql(s"insert into $tableName values (2, ${inlineBlobLiteral("02")}, 1000)") + spark.sql(s"insert into $tableName values (3, ${inlineBlobLiteral("03")}, 1000)") + // 3 commits will not trigger compaction, so it should be log only. + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, ${inlineBlobLiteral("11")} as data, 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + // 4 commits will not trigger compaction, so it should be log only. + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, ${inlineBlobLiteral("04")} as data, 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + // 5 commits will trigger compaction. + assertResult(false)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + // read_blob() on an INLINE column returns the inline bytes directly, verify the + // post-compaction bytes match what was written. + val bytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assertResult(4)(bytesById.size) + assert(bytesById(1).sameElements(Array(0x11.toByte))) + assert(bytesById(2).sameElements(Array(0x02.toByte))) + assert(bytesById(3).sameElements(Array(0x03.toByte))) + assert(bytesById(4).sameElements(Array(0x04.toByte))) + + // Verify inline shape: type='INLINE', data non-null, reference null. + spark.sql(s"select id, data from $tableName order by id").collect().foreach { row => + val blob = row.getStruct(1) + assertResult("INLINE")(blob.getString(blob.fieldIndex(HoodieSchema.Blob.TYPE))) + assert(!blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.INLINE_DATA_FIELD))) + assert(blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE))) + } + + // BLOB custom-type descriptor must survive the compacted base-file read path. + val blobField = spark.table(tableName).schema.find(_.name == "data").get + assert(blobField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected BLOB type metadata on data field after compaction, " + + s"got: ${blobField.metadata}") + assertResult(HoodieSchemaType.BLOB.name())( + blobField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + // Inline compaction on commit 5 ran AFTER its own postCommit clean, so the prior + // slice was not yet superseded when that clean fired and no .clean instant was + // written. This deltacommit's postCommit clean writes the .clean instant. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, ${inlineBlobLiteral("22")} as data, 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + val updatedBytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assert(updatedBytesById(2).sameElements(Array(0x22.toByte))) + + val metaClient = createMetaClient(spark, tablePath) + metaClient.reloadActiveTimeline() + assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + }) + } + + test("Test Query Log Only MOR Table With BLOB OUT_OF_LINE column triggers compaction") { + withRecordType()(withTempDir { tmp => + val tablePath = new File(tmp, "hudi").getCanonicalPath + val blobDir = new File(tmp, "blobs") + blobDir.mkdirs() + // createTestFile writes bytes where byte[i] = i % 256, assertBytesContent + // checks round-trip against that pattern. + val file1 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob1.bin", 100) + val file2 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob2.bin", 100) + val file3 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob3.bin", 100) + val file4 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob4.bin", 100) + val file1Updated = BlobTestHelpers.createTestFile(blobDir.toPath, "blob1_updated.bin", 80) + val file2Updated = BlobTestHelpers.createTestFile(blobDir.toPath, "blob2_updated.bin", 60) + + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | data blob, + | ts long + |) using hudi + | location '$tablePath' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1' + | ) + """.stripMargin) + + spark.sql( + s"insert into $tableName values (1, ${outOfLineBlobLiteral(file1, 0L, 100L)}, 1000)") + spark.sql( + s"insert into $tableName values (2, ${outOfLineBlobLiteral(file2, 0L, 100L)}, 1000)") + spark.sql( + s"insert into $tableName values (3, ${outOfLineBlobLiteral(file3, 0L, 100L)}, 1000)") + // 3 commits will not trigger compaction, so it should be log only. + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, ${outOfLineBlobLiteral(file1Updated, 0L, 80L)} as data, 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + // 4 commits will not trigger compaction, so it should be log only. + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, ${outOfLineBlobLiteral(file4, 0L, 100L)} as data, 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + // 5 commits will trigger compaction. + assertResult(false)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + // read_blob() on an OUT_OF_LINE column must dereference external_path and read + // the referenced byte range, verify bytes from the compacted base-file plan. + val bytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assertResult(4)(bytesById.size) + assertResult(80)(bytesById(1).length) + BlobTestHelpers.assertBytesContent(bytesById(1)) + assertResult(100)(bytesById(2).length) + BlobTestHelpers.assertBytesContent(bytesById(2)) + assertResult(100)(bytesById(3).length) + BlobTestHelpers.assertBytesContent(bytesById(3)) + assertResult(100)(bytesById(4).length) + BlobTestHelpers.assertBytesContent(bytesById(4)) + + // Verify out-of-line shape: type='OUT_OF_LINE', data null, reference non-null. + spark.sql(s"select id, data from $tableName order by id").collect().foreach { row => + val blob = row.getStruct(1) + assertResult("OUT_OF_LINE")(blob.getString(blob.fieldIndex(HoodieSchema.Blob.TYPE))) + assert(blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.INLINE_DATA_FIELD))) + assert(!blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE))) + } + + // BLOB custom-type descriptor must survive the compacted base-file read path. + val blobField = spark.table(tableName).schema.find(_.name == "data").get + assert(blobField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected BLOB type metadata on data field after compaction, " + + s"got: ${blobField.metadata}") + assertResult(HoodieSchemaType.BLOB.name())( + blobField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + // Inline compaction on commit 5 ran AFTER its own postCommit clean, so the prior + // slice was not yet superseded when that clean fired and no .clean instant was + // written. This deltacommit's postCommit clean writes the .clean instant. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, ${outOfLineBlobLiteral(file2Updated, 0L, 60L)} as data, 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + val updatedBytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assertResult(60)(updatedBytesById(2).length) + BlobTestHelpers.assertBytesContent(updatedBytesById(2)) + + val metaClient = createMetaClient(spark, tablePath) + metaClient.reloadActiveTimeline() + assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + }) + } + + test("Test Query Log Only MOR Table With BLOB INLINE column triggers compaction (Lance)") { + assume(System.getProperty("lance.skip.tests") != "true", + "Lance tests disabled via -Dlance.skip.tests=true") + // Covers log-only MOR ingest of INLINE blobs on a Lance base format, the inline + // compaction trigger, and the post-compaction inline read shape. + + withRecordType()(withTempDir { tmp => + val tablePath = new File(tmp, "hudi").getCanonicalPath + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | data blob, + | ts long + |) using hudi + | location '$tablePath' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1', + | 'hoodie.table.base.file.format' = 'LANCE' + | ) + """.stripMargin) + + // Verify the LANCE config was actually persisted to hoodie.properties. + assertResult(HoodieFileFormat.LANCE)( + createMetaClient(spark, tablePath).getTableConfig.getBaseFileFormat) + + spark.sql(s"insert into $tableName values (1, ${inlineBlobLiteral("01")}, 1000)") + spark.sql(s"insert into $tableName values (2, ${inlineBlobLiteral("02")}, 1000)") + spark.sql(s"insert into $tableName values (3, ${inlineBlobLiteral("03")}, 1000)") + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, ${inlineBlobLiteral("11")} as data, 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, ${inlineBlobLiteral("04")} as data, 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + assertResult(false)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + val bytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assertResult(4)(bytesById.size) + assert(bytesById(1).sameElements(Array(0x11.toByte))) + assert(bytesById(2).sameElements(Array(0x02.toByte))) + assert(bytesById(3).sameElements(Array(0x03.toByte))) + assert(bytesById(4).sameElements(Array(0x04.toByte))) + + spark.sql(s"select id, data from $tableName order by id").collect().foreach { row => + val blob = row.getStruct(1) + assertResult("INLINE")(blob.getString(blob.fieldIndex(HoodieSchema.Blob.TYPE))) + assert(!blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.INLINE_DATA_FIELD))) + // Lance materializes the `reference` struct as non-null with all-null leaves for + // INLINE rows (vs. a null struct on Parquet). `type` is the canonical INLINE + // discriminator per RFC-100; tolerate either shape and just check the leaves. + val refIdx = blob.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE) + if (!blob.isNullAt(refIdx)) { + val ref = blob.getStruct(refIdx) + assert(ref.isNullAt(ref.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE_PATH))) + } + } + + val blobField = spark.table(tableName).schema.find(_.name == "data").get + assert(blobField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected BLOB type metadata on data field after compaction, " + + s"got: ${blobField.metadata}") + assertResult(HoodieSchemaType.BLOB.name())( + blobField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, ${inlineBlobLiteral("22")} as data, 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + val updatedBytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assert(updatedBytesById(2).sameElements(Array(0x22.toByte))) + + val metaClient = createMetaClient(spark, tablePath) + metaClient.reloadActiveTimeline() + assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + }) + } + + test("Test Query Log Only MOR Table With BLOB OUT_OF_LINE column triggers compaction (Lance)") { + assume(System.getProperty("lance.skip.tests") != "true", + "Lance tests disabled via -Dlance.skip.tests=true") + // Lance writer has no BLOB handling today (RFC-100 Phase 2). Expected to fail + // until support lands in HoodieSparkLanceWriter; this test pins the gap. + + withRecordType()(withTempDir { tmp => + val tablePath = new File(tmp, "hudi").getCanonicalPath + val blobDir = new File(tmp, "blobs") + blobDir.mkdirs() + val file1 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob1.bin", 100) + val file2 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob2.bin", 100) + val file3 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob3.bin", 100) + val file4 = BlobTestHelpers.createTestFile(blobDir.toPath, "blob4.bin", 100) + val file1Updated = BlobTestHelpers.createTestFile(blobDir.toPath, "blob1_updated.bin", 80) + val file2Updated = BlobTestHelpers.createTestFile(blobDir.toPath, "blob2_updated.bin", 60) + + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | data blob, + | ts long + |) using hudi + | location '$tablePath' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1', + | 'hoodie.table.base.file.format' = 'LANCE' + | ) + """.stripMargin) + + // Verify the LANCE config was actually persisted to hoodie.properties. + assertResult(HoodieFileFormat.LANCE)( + createMetaClient(spark, tablePath).getTableConfig.getBaseFileFormat) + + spark.sql( + s"insert into $tableName values (1, ${outOfLineBlobLiteral(file1, 0L, 100L)}, 1000)") + spark.sql( + s"insert into $tableName values (2, ${outOfLineBlobLiteral(file2, 0L, 100L)}, 1000)") + spark.sql( + s"insert into $tableName values (3, ${outOfLineBlobLiteral(file3, 0L, 100L)}, 1000)") + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, ${outOfLineBlobLiteral(file1Updated, 0L, 80L)} as data, 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, ${outOfLineBlobLiteral(file4, 0L, 100L)} as data, 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + assertResult(false)(DataSourceTestUtils.isLogFileOnly(tablePath)) + + val bytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assertResult(4)(bytesById.size) + assertResult(80)(bytesById(1).length) + BlobTestHelpers.assertBytesContent(bytesById(1)) + assertResult(100)(bytesById(2).length) + BlobTestHelpers.assertBytesContent(bytesById(2)) + assertResult(100)(bytesById(3).length) + BlobTestHelpers.assertBytesContent(bytesById(3)) + assertResult(100)(bytesById(4).length) + BlobTestHelpers.assertBytesContent(bytesById(4)) + + spark.sql(s"select id, data from $tableName order by id").collect().foreach { row => + val blob = row.getStruct(1) + assertResult("OUT_OF_LINE")(blob.getString(blob.fieldIndex(HoodieSchema.Blob.TYPE))) + assert(blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.INLINE_DATA_FIELD))) + assert(!blob.isNullAt(blob.fieldIndex(HoodieSchema.Blob.EXTERNAL_REFERENCE))) + } + + val blobField = spark.table(tableName).schema.find(_.name == "data").get + assert(blobField.metadata.contains(HoodieSchema.TYPE_METADATA_FIELD), + s"Expected BLOB type metadata on data field after compaction, " + + s"got: ${blobField.metadata}") + assertResult(HoodieSchemaType.BLOB.name())( + blobField.metadata.getString(HoodieSchema.TYPE_METADATA_FIELD)) + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, ${outOfLineBlobLiteral(file2Updated, 0L, 60L)} as data, 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + val updatedBytesById = spark.sql( + s"select id, read_blob(data) as bytes from $tableName order by id" + ).collect().map(r => r.getInt(0) -> r.getAs[Array[Byte]]("bytes")).toMap + assertResult(60)(updatedBytesById(2).length) + BlobTestHelpers.assertBytesContent(updatedBytesById(2)) + + val metaClient = createMetaClient(spark, tablePath) + metaClient.reloadActiveTimeline() + assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + }) + } +} diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestVariantDataType.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestVariantDataType.scala index a236592bc1d4f..f9c2dbc32c6c2 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestVariantDataType.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/schema/TestVariantDataType.scala @@ -20,11 +20,20 @@ package org.apache.spark.sql.hudi.dml.schema import org.apache.hudi.HoodieSparkUtils +import org.apache.hudi.common.model.HoodieFileFormat +import org.apache.hudi.common.schema.HoodieSchema import org.apache.hudi.common.testutils.HoodieTestUtils import org.apache.hudi.common.util.StringUtils import org.apache.hudi.internal.schema.HoodieSchemaException +import org.apache.hudi.testutils.DataSourceTestUtils +import org.apache.hudi.testutils.HoodieClientTestUtils.createMetaClient +import org.apache.spark.sql.{Row, SaveMode} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.hudi.command.CreateHoodieTableCommand import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase +import org.apache.spark.sql.types.{ArrayType, BinaryType, DataType, LongType, MapType, MetadataBuilder, StringType, StructField, StructType} class TestVariantDataType extends HoodieSparkSqlTestBase { @@ -87,10 +96,468 @@ class TestVariantDataType extends HoodieSparkSqlTestBase { checkAnswer(s"select id, name, cast(v as string), ts from $tableName order by id")( Seq(1, "row1", "{\"new_field\":123,\"updated\":true}", 1000) ) + + // Test MergeInto: exercises both MATCHED (UPDATE SET on the Variant + // column) and NOT MATCHED (INSERT of a new row carrying a Variant + // literal). + spark.sql( + s""" + |merge into $tableName t + |using ( + | select 1 as id, 'row1' as name, parse_json('{"key":"v1-merged"}') as v, 2000L as ts + | union all + | select 3 as id, 'row3' as name, parse_json('{"key":"v3"}') as v, 2000L as ts + |) s + |on t.id = s.id + |when matched then update set t.v = s.v, t.ts = s.ts + |when not matched then insert (id, name, v, ts) values (s.id, s.name, s.v, s.ts) + """.stripMargin) + + checkAnswer(s"select id, name, cast(v as string), ts from $tableName order by id")( + Seq(1, "row1", "{\"key\":\"v1-merged\"}", 2000), + Seq(3, "row3", "{\"key\":\"v3\"}", 2000) + ) }) } } + test("Test Query Log Only MOR Table With VARIANT column triggers compaction") { + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + + withRecordType()(withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | v variant, + | ts long + |) using hudi + | location '${tmp.getCanonicalPath}' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1' + | ) + """.stripMargin) + + spark.sql( + s"insert into $tableName values " + + "(1, parse_json('{\"key\":\"value1\"}'), 1000)") + spark.sql( + s"insert into $tableName values " + + "(2, parse_json('{\"key\":\"value2\"}'), 1000)") + spark.sql( + s"insert into $tableName values " + + "(3, parse_json('{\"key\":\"value3\"}'), 1000)") + // 3 commits will not trigger compaction, so it should be log only. + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath)) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"value1\"}", 1000), + Seq(2, "{\"key\":\"value2\"}", 1000), + Seq(3, "{\"key\":\"value3\"}", 1000) + ) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, + | parse_json('{"key":"v1-merged"}') as v, + | 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + // 4 commits will not trigger compaction, so it should be log only. + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath)) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"v1-merged\"}", 1001), + Seq(2, "{\"key\":\"value2\"}", 1000), + Seq(3, "{\"key\":\"value3\"}", 1000) + ) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, + | parse_json('{"key":"value4"}') as v, + | 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + // 5 commits will trigger compaction. + assertResult(false)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath)) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"v1-merged\"}", 1001), + Seq(2, "{\"key\":\"value2\"}", 1000), + Seq(3, "{\"key\":\"value3\"}", 1000), + Seq(4, "{\"key\":\"value4\"}", 1000) + ) + + // VARIANT must round-trip as native VariantType through the compacted base-file read path. + val variantField = spark.table(tableName).schema.find(_.name == "v").get + assertResult("variant")(variantField.dataType.typeName) + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + // Inline compaction on commit 5 ran AFTER its own postCommit clean, so the prior + // slice was not yet superseded when that clean fired and no .clean instant was + // written. This deltacommit's postCommit clean writes the .clean instant. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, + | parse_json('{"key":"v2-merged"}') as v, + | 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"v1-merged\"}", 1001), + Seq(2, "{\"key\":\"v2-merged\"}", 1002), + Seq(3, "{\"key\":\"value3\"}", 1000), + Seq(4, "{\"key\":\"value4\"}", 1000) + ) + + val metaClient = createMetaClient(spark, tmp.getCanonicalPath) + metaClient.reloadActiveTimeline() + assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + }) + } + + test("Test Query Log Only MOR Table With VARIANT column triggers compaction (Lance)") { + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + assume(System.getProperty("lance.skip.tests") != "true", + "Lance tests disabled via -Dlance.skip.tests=true") + // Lance writer has no VARIANT handling today (RFC-100 Phase 2). Expected to fail + // until support lands in HoodieSparkLanceWriter; this test pins the gap. + + withRecordType()(withTempDir { tmp => + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | v variant, + | ts long + |) using hudi + | location '${tmp.getCanonicalPath}' + | tblproperties ( + | primaryKey = 'id', + | type = 'mor', + | preCombineField = 'ts', + | hoodie.index.type = 'INMEMORY', + | hoodie.compact.inline = 'true', + | hoodie.clean.commits.retained = '1', + | 'hoodie.table.base.file.format' = 'LANCE' + | ) + """.stripMargin) + + // Verify the LANCE config was actually persisted to hoodie.properties. + assertResult(HoodieFileFormat.LANCE)( + createMetaClient(spark, tmp.getCanonicalPath).getTableConfig.getBaseFileFormat) + + spark.sql( + s"insert into $tableName values " + + "(1, parse_json('{\"key\":\"value1\"}'), 1000)") + spark.sql( + s"insert into $tableName values " + + "(2, parse_json('{\"key\":\"value2\"}'), 1000)") + spark.sql( + s"insert into $tableName values " + + "(3, parse_json('{\"key\":\"value3\"}'), 1000)") + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath)) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"value1\"}", 1000), + Seq(2, "{\"key\":\"value2\"}", 1000), + Seq(3, "{\"key\":\"value3\"}", 1000) + ) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 1 as id, + | parse_json('{"key":"v1-merged"}') as v, + | 1001L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + assertResult(true)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath)) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"v1-merged\"}", 1001), + Seq(2, "{\"key\":\"value2\"}", 1000), + Seq(3, "{\"key\":\"value3\"}", 1000) + ) + + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 4 as id, + | parse_json('{"key":"value4"}') as v, + | 1000L as ts + |) s0 + | on h0.id = s0.id + | when not matched then insert * + |""".stripMargin) + + assertResult(false)(DataSourceTestUtils.isLogFileOnly(tmp.getCanonicalPath)) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"v1-merged\"}", 1001), + Seq(2, "{\"key\":\"value2\"}", 1000), + Seq(3, "{\"key\":\"value3\"}", 1000), + Seq(4, "{\"key\":\"value4\"}", 1000) + ) + + val variantField = spark.table(tableName).schema.find(_.name == "v").get + assertResult("variant")(variantField.dataType.typeName) + + // 6th commit drives an auto-clean that retires the now-superseded log-only slice. + spark.sql( + s""" + |merge into $tableName h0 + |using ( + | select 2 as id, + | parse_json('{"key":"v2-merged"}') as v, + | 1002L as ts + |) s0 + | on h0.id = s0.id + | when matched then update set * + |""".stripMargin) + checkAnswer(s"select id, cast(v as string), ts from $tableName order by id")( + Seq(1, "{\"key\":\"v1-merged\"}", 1001), + Seq(2, "{\"key\":\"v2-merged\"}", 1002), + Seq(3, "{\"key\":\"value3\"}", 1000), + Seq(4, "{\"key\":\"value4\"}", 1000) + ) + + val metaClient = createMetaClient(spark, tmp.getCanonicalPath) + metaClient.reloadActiveTimeline() + assert(metaClient.getActiveTimeline.getCleanerTimeline.countInstants() > 0, + "Expected at least one .clean instant on the timeline after compaction") + }) + } + + test("Test toHiveCompatibleSchema converts VariantType to physical struct") { + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + + val variantType = DataType.fromDDL("variant") + val schema = StructType(Seq( + StructField("id", LongType, nullable = false), + StructField("name", StringType), + StructField("variant_col", variantType, nullable = true), + StructField("nested_struct", StructType(Seq( + StructField("inner_variant", variantType) + ))), + StructField("variant_array", ArrayType(variantType)), + StructField("variant_map", MapType(StringType, variantType)), + StructField("ts", LongType) + )) + + val hiveSchema = CreateHoodieTableCommand.toHiveCompatibleSchema(schema) + + // Non-variant fields should be unchanged + assert(hiveSchema("id").dataType == LongType) + assert(hiveSchema("name").dataType == StringType) + assert(hiveSchema("ts").dataType == LongType) + + // Top-level variant should be converted with canonical (metadata, value) field order. + val variantStruct = assertVariantStruct(hiveSchema("variant_col").dataType) + assert(variantStruct.fields(0).name == HoodieSchema.Variant.VARIANT_METADATA_FIELD) + assert(variantStruct.fields(1).name == HoodieSchema.Variant.VARIANT_VALUE_FIELD) + + // Variant nested inside a StructType should be converted recursively. + val nestedStruct = hiveSchema("nested_struct").dataType.asInstanceOf[StructType] + assertVariantStruct(nestedStruct("inner_variant").dataType) + + // Variant as ArrayType element should be converted. + val arrayType = hiveSchema("variant_array").dataType.asInstanceOf[ArrayType] + assertVariantStruct(arrayType.elementType) + + // Variant as MapType value should be converted. + val mapType = hiveSchema("variant_map").dataType.asInstanceOf[MapType] + assert(mapType.keyType == StringType) + assertVariantStruct(mapType.valueType) + } + + private def assertVariantStruct(dataType: DataType): StructType = { + assert(dataType.isInstanceOf[StructType]) + val structType = dataType.asInstanceOf[StructType] + assert(structType.length == 2) + assert(structType(HoodieSchema.Variant.VARIANT_METADATA_FIELD).dataType == BinaryType) + assert(structType(HoodieSchema.Variant.VARIANT_VALUE_FIELD).dataType == BinaryType) + structType + } + + test("Test buildHiveCompatibleCatalogTable converts schema and merges properties") { + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + + val variantType = DataType.fromDDL("variant") + val table = CatalogTable( + identifier = TableIdentifier("test_table", Some("default")), + tableType = CatalogTableType.MANAGED, + storage = CatalogStorageFormat.empty, + schema = StructType(Seq( + StructField("id", LongType, nullable = false), + StructField("variant_col", variantType, nullable = true) + )), + provider = Some("hudi"), + properties = Map("existing_key" -> "table_value", "shared_key" -> "table_value")) + + val dataSourceProps = Map( + "spark.sql.sources.provider" -> "hudi", + "shared_key" -> "datasource_value") + + val result = CreateHoodieTableCommand.buildHiveCompatibleCatalogTable(table, dataSourceProps) + + // VariantType replaced with the canonical (metadata, value) struct. + assertVariantStruct(result.schema("variant_col").dataType) + // Non-variant columns preserved. + assert(result.schema("id").dataType == LongType) + // Existing-only table properties survive. + assert(result.properties("existing_key") == "table_value") + // dataSource-only keys are merged in. + assert(result.properties("spark.sql.sources.provider") == "hudi") + // On conflict, CatalogTable.properties wins over dataSourceProps (right-biased `++`). + assert(result.properties("shared_key") == "table_value") + // Identity/provider fields pass through unchanged. + assert(result.identifier == table.identifier) + assert(result.provider == table.provider) + } + + test("Test DataFrame writer with native VariantType round-trips through the V1 save path") { + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + + withTempDir { tmp => + val df = spark.sql( + """ + |SELECT + | 1L AS id, + | 'row1' AS name, + | parse_json('{"key":"value1"}') AS variant_data, + | 1000L AS ts + |UNION ALL + |SELECT + | 2L AS id, + | 'row2' AS name, + | parse_json('{"key":"value2"}') AS variant_data, + | 1000L AS ts + |""".stripMargin) + + // Sanity: the DataFrame carries a native VariantType column, not a metadata-tagged struct. + assert(df.schema("variant_data").dataType.typeName == "variant", + s"expected native VariantType, got ${df.schema("variant_data").dataType}") + + df.write.format("hudi") + .option("hoodie.table.name", "variant_native_df_test") + .option("hoodie.datasource.write.recordkey.field", "id") + .option("hoodie.datasource.write.precombine.field", "ts") + .mode(SaveMode.Overwrite) + .save(tmp.getCanonicalPath) + + val readDf = spark.read.format("hudi").load(tmp.getCanonicalPath) + assert(readDf.schema("variant_data").dataType.typeName == "variant", + s"variant_data should round-trip as native VariantType, got ${readDf.schema("variant_data").dataType}") + assert(readDf.count() == 2) + + val rows = readDf.selectExpr("id", "cast(variant_data as string) as v") + .orderBy("id").collect() + assert(rows(0).getString(1) == "{\"key\":\"value1\"}") + assert(rows(1).getString(1) == "{\"key\":\"value2\"}") + } + } + + test("Test StructType with hudi_type=VARIANT metadata is promoted to VARIANT logical type") { + // A StructType field in the DataFrame API tagged with hudi_type=VARIANT is treated as a first-class + // VARIANT (like BLOB/VECTOR), not a plain struct. On Spark 4.0+ the column round-trips as native VariantType. + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + + withTempDir { tmp => + val variantMetadata = new MetadataBuilder() + .putString(HoodieSchema.TYPE_METADATA_FIELD, "VARIANT") + .build() + + val variantStruct = StructType(Seq( + StructField("metadata", BinaryType, nullable = false), + StructField("value", BinaryType, nullable = false) + )) + + val schema = StructType(Seq( + StructField("id", LongType, nullable = false), + StructField("name", StringType), + StructField("variant_data", variantStruct, nullable = false, metadata = variantMetadata), + StructField("ts", LongType) + )) + + val data = Seq( + Row(1L, "row1", Row(Array[Byte](1, 0), """{"key":"value1"}""".getBytes), 1000L) + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + df.write.format("hudi") + .option("hoodie.table.name", "variant_struct_test") + .option("hoodie.datasource.write.recordkey.field", "id") + .option("hoodie.datasource.write.precombine.field", "ts") + .mode(SaveMode.Overwrite) + .save(tmp.getCanonicalPath) + + val readDf = spark.read.format("hudi").load(tmp.getCanonicalPath) + val readFieldType = readDf.schema("variant_data").dataType + assert(readFieldType.typeName == "variant", + s"variant_data should round-trip as native VariantType on Spark 4.0+, got $readFieldType") + assert(readDf.count() == 1) + } + } + + test("Test StructType with hudi_type=VARIANT metadata rejects malformed struct") { + assume(HoodieSparkUtils.gteqSpark4_0, "Variant type requires Spark 4.0 or higher") + + withTempDir { tmp => + val variantMetadata = new MetadataBuilder() + .putString(HoodieSchema.TYPE_METADATA_FIELD, "VARIANT") + .build() + + // VARIANT structure must be {metadata: binary, value: binary}; a single string field is malformed. + val malformedVariantStruct = StructType(Seq( + StructField("wrong_field", StringType, nullable = false) + )) + + val schema = StructType(Seq( + StructField("id", LongType, nullable = false), + StructField("variant_data", malformedVariantStruct, nullable = false, metadata = variantMetadata), + StructField("ts", LongType) + )) + + val data = Seq(Row(1L, Row("oops"), 1000L)) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + val ex = intercept[Exception] { + df.write.format("hudi") + .option("hoodie.table.name", "variant_malformed_test") + .option("hoodie.datasource.write.recordkey.field", "id") + .option("hoodie.datasource.write.precombine.field", "ts") + .mode(SaveMode.Overwrite) + .save(tmp.getCanonicalPath) + } + val causes = Iterator.iterate[Throwable](ex)(e => e.getCause).takeWhile(_ != null).toList + assert(causes.exists(c => c.isInstanceOf[IllegalArgumentException] + && c.getMessage != null + && c.getMessage.contains("Invalid variant schema structure")), + s"Expected IllegalArgumentException with 'Invalid variant schema structure', got: ${causes.map(_.getMessage)}") + } + } + test("Test Spark 3.x throws when auto-resolving Variant schema from commit metadata") { assume(HoodieSparkUtils.isSpark3, "This test verifies Spark 3.x rejects VARIANT type during schema resolution") diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/hudi-spark-datasource/hudi-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister similarity index 98% rename from hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister rename to hudi-spark-datasource/hudi-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index df33a2d912404..a0b08f0377b9b 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/hudi-spark-datasource/hudi-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -17,4 +17,4 @@ org.apache.hudi.BaseDefaultSource -org.apache.spark.sql.execution.datasources.parquet.LegacyHoodieParquetFileFormat \ No newline at end of file +org.apache.spark.sql.execution.datasources.parquet.LegacyHoodieParquetFileFormat diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/antlr4/imports/SqlBase.g4 b/hudi-spark-datasource/hudi-spark3.3.x/src/main/antlr4/imports/SqlBase.g4 index d7f87b4e5aa59..a5e31dadb2c48 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/antlr4/imports/SqlBase.g4 +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/antlr4/imports/SqlBase.g4 @@ -942,7 +942,7 @@ dataType | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + | typeName=identifier ('(' (INTEGER_VALUE | identifier) (',' (INTEGER_VALUE | identifier))* ')')? #primitiveDataType ; qualifiedColTypeWithPositionList diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystPlanUtils.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystPlanUtils.scala index 9a434e90e2452..9c40f74ca2f94 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystPlanUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/HoodieSpark33CatalystPlanUtils.scala @@ -100,7 +100,7 @@ object HoodieSpark33CatalystPlanUtils extends BaseHoodieCatalystPlanUtils { override def unapplyShowIndexes(plan: LogicalPlan): Option[(LogicalPlan, Seq[Attribute])] = { plan match { - case ci @ ShowIndexes(table, output) => + case ci @ HoodieShowIndexes(table, output) => Some((table, output)) case _ => None diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_3Adapter.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_3Adapter.scala index a180ab3483dc5..06af2d3163714 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_3Adapter.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_3Adapter.scala @@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.datasources.orc.Spark33OrcReader import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetFilters, Spark33LegacyHoodieParquetFileFormat, Spark33ParquetReader} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.hudi.analysis.TableValuedFunctions +import org.apache.spark.sql.hudi.blob.{BatchedBlobReaderStrategy, ScalarFunctions} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy import org.apache.spark.sql.parser.{HoodieExtendedParserInterface, HoodieSpark3_3ExtendedSqlParser} @@ -114,6 +115,16 @@ class Spark3_3Adapter extends BaseSpark3Adapter { TableValuedFunctions.funcs.foreach(extensions.injectTableFunction) } + override def injectScalarFunctions(extensions: SparkSessionExtensions): Unit = { + ScalarFunctions.funcs.foreach(extensions.injectFunction) + } + + override def injectPlannerStrategies(extensions: SparkSessionExtensions): Unit = { + extensions.injectPlannerStrategy { session => + BatchedBlobReaderStrategy(session) + } + } + /** * Converts instance of [[StorageLevel]] to a corresponding string */ diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala index ec357ccb7b1a5..2b579db2c5b5b 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlAstBuilder.scala @@ -2587,7 +2587,7 @@ class HoodieSpark3_3ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa * Resolve/create a primitive type. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { - val dataType = ctx.identifier.getText.toLowerCase(Locale.ROOT) + val dataType = ctx.typeName.getText.toLowerCase(Locale.ROOT) (dataType, ctx.INTEGER_VALUE().asScala.toList) match { case ("boolean", Nil) => BooleanType case ("tinyint" | "byte", Nil) => ByteType @@ -2606,6 +2606,21 @@ class HoodieSpark3_3ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) case ("binary", Nil) => BinaryType case ("blob", Nil) => BlobType() + case ("vector", _ :: _) => + // Delegate validation to HoodieSchema.parseTypeDescriptor which handles dimension + // range checks, element type validation, and canonical normalization. + val vectorSchema = try { + HoodieSchema.parseTypeDescriptor(ctx.getText).asInstanceOf[HoodieSchema.Vector] + } catch { + case e: IllegalArgumentException => + throw new ParseException(s"Invalid VECTOR type: ${e.getMessage}", ctx) + } + val sparkElemType = vectorSchema.getVectorElementType match { + case HoodieSchema.Vector.VectorElementType.FLOAT => FloatType + case HoodieSchema.Vector.VectorElementType.DOUBLE => DoubleType + case HoodieSchema.Vector.VectorElementType.INT8 => ByteType + } + ArrayType(sparkElemType, containsNull = false) case ("decimal" | "dec" | "numeric", Nil) => DecimalType.USER_DEFAULT case ("decimal" | "dec" | "numeric", precision :: Nil) => DecimalType(precision.getText.toInt, 0) @@ -2702,8 +2717,13 @@ class HoodieSpark3_3ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa private def addMetadataForType(dataType: HoodieSqlBaseParser.DataTypeContext, builder: MetadataBuilder): Unit = { val typeText = dataType.getText - if (typeText.equalsIgnoreCase(HoodieSchemaType.BLOB.name())) { + val upperTypeText = typeText.toUpperCase(Locale.ROOT) + if (upperTypeText == HoodieSchemaType.BLOB.name()) { builder.putString(HoodieSchema.TYPE_METADATA_FIELD, HoodieSchemaType.BLOB.name()) + } else if (upperTypeText.startsWith("VECTOR(")) { + // Normalize to canonical form (e.g. "VECTOR(128,FLOAT)" -> "VECTOR(128)") + val vectorSchema = HoodieSchema.parseTypeDescriptor(typeText).asInstanceOf[HoodieSchema.Vector] + builder.putString(HoodieSchema.TYPE_METADATA_FIELD, vectorSchema.toTypeDescriptor) } } @@ -3400,14 +3420,14 @@ class HoodieSpark3_3ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa } /** - * Show indexes, returning a [[ShowIndexes]] logical plan. + * Show indexes, returning a [[HoodieShowIndexes]] logical plan. * For example: * {{{ * SHOW INDEXES (FROM | IN) [TABLE] table_name * }}} */ override def visitShowIndexes(ctx: ShowIndexesContext): LogicalPlan = withOrigin(ctx) { - ShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) + HoodieShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) } /** diff --git a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlParser.scala b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlParser.scala index e10df36d2fafb..688847a78cabf 100644 --- a/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlParser.scala +++ b/hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlParser.scala @@ -129,7 +129,8 @@ class HoodieSpark3_3ExtendedSqlParser(session: SparkSession, delegate: ParserInt normalized.contains("drop index") || normalized.contains("show indexes") || normalized.contains("refresh index") || - normalized.contains(" blob") + normalized.contains(" blob") || + normalized.contains(" vector") } } diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/antlr4/imports/SqlBase.g4 b/hudi-spark-datasource/hudi-spark3.4.x/src/main/antlr4/imports/SqlBase.g4 index d7f87b4e5aa59..a5e31dadb2c48 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/antlr4/imports/SqlBase.g4 +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/antlr4/imports/SqlBase.g4 @@ -942,7 +942,7 @@ dataType | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + | typeName=identifier ('(' (INTEGER_VALUE | identifier) (',' (INTEGER_VALUE | identifier))* ')')? #primitiveDataType ; qualifiedColTypeWithPositionList diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystPlanUtils.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystPlanUtils.scala index 8fecd76a89cfc..ded8a735c6555 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystPlanUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/HoodieSpark34CatalystPlanUtils.scala @@ -107,7 +107,7 @@ object HoodieSpark34CatalystPlanUtils extends BaseHoodieCatalystPlanUtils { override def unapplyShowIndexes(plan: LogicalPlan): Option[(LogicalPlan, Seq[Attribute])] = { plan match { - case ci@ShowIndexes(table, output) => + case ci@HoodieShowIndexes(table, output) => Some((table, output)) case _ => None diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_4Adapter.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_4Adapter.scala index 32b21b936f09d..21a2294e134d8 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_4Adapter.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_4Adapter.scala @@ -42,6 +42,7 @@ import org.apache.spark.sql.execution.datasources.orc.Spark34OrcReader import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetFilters, Spark34LegacyHoodieParquetFileFormat, Spark34ParquetReader} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.hudi.analysis.TableValuedFunctions +import org.apache.spark.sql.hudi.blob.{BatchedBlobReaderStrategy, ScalarFunctions} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy import org.apache.spark.sql.parser.{HoodieExtendedParserInterface, HoodieSpark3_4ExtendedSqlParser} @@ -115,6 +116,16 @@ class Spark3_4Adapter extends BaseSpark3Adapter { TableValuedFunctions.funcs.foreach(extensions.injectTableFunction) } + override def injectScalarFunctions(extensions: SparkSessionExtensions): Unit = { + ScalarFunctions.funcs.foreach(extensions.injectFunction) + } + + override def injectPlannerStrategies(extensions: SparkSessionExtensions): Unit = { + extensions.injectPlannerStrategy { session => + BatchedBlobReaderStrategy(session) + } + } + /** * Converts instance of [[StorageLevel]] to a corresponding string */ diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlAstBuilder.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlAstBuilder.scala index fb995babe70da..0b5d920bf5717 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlAstBuilder.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlAstBuilder.scala @@ -2590,7 +2590,7 @@ class HoodieSpark3_4ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa * Resolve/create a primitive type. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { - val dataType = ctx.identifier.getText.toLowerCase(Locale.ROOT) + val dataType = ctx.typeName.getText.toLowerCase(Locale.ROOT) (dataType, ctx.INTEGER_VALUE().asScala.toList) match { case ("boolean", Nil) => BooleanType case ("tinyint" | "byte", Nil) => ByteType @@ -2609,6 +2609,21 @@ class HoodieSpark3_4ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) case ("binary", Nil) => BinaryType case ("blob", Nil) => BlobType() + case ("vector", _ :: _) => + // Delegate validation to HoodieSchema.parseTypeDescriptor which handles dimension + // range checks, element type validation, and canonical normalization. + val vectorSchema = try { + HoodieSchema.parseTypeDescriptor(ctx.getText).asInstanceOf[HoodieSchema.Vector] + } catch { + case e: IllegalArgumentException => + throw new ParseException(s"Invalid VECTOR type: ${e.getMessage}", ctx) + } + val sparkElemType = vectorSchema.getVectorElementType match { + case HoodieSchema.Vector.VectorElementType.FLOAT => FloatType + case HoodieSchema.Vector.VectorElementType.DOUBLE => DoubleType + case HoodieSchema.Vector.VectorElementType.INT8 => ByteType + } + ArrayType(sparkElemType, containsNull = false) case ("decimal" | "dec" | "numeric", Nil) => DecimalType.USER_DEFAULT case ("decimal" | "dec" | "numeric", precision :: Nil) => DecimalType(precision.getText.toInt, 0) @@ -2705,8 +2720,13 @@ class HoodieSpark3_4ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa private def addMetadataForType(dataType: HoodieSqlBaseParser.DataTypeContext, builder: MetadataBuilder): Unit = { val typeText = dataType.getText - if (typeText.equalsIgnoreCase(HoodieSchemaType.BLOB.name())) { + val upperTypeText = typeText.toUpperCase(Locale.ROOT) + if (upperTypeText == HoodieSchemaType.BLOB.name()) { builder.putString(HoodieSchema.TYPE_METADATA_FIELD, HoodieSchemaType.BLOB.name()) + } else if (upperTypeText.startsWith("VECTOR(")) { + // Normalize to canonical form (e.g. "VECTOR(128,FLOAT)" -> "VECTOR(128)") + val vectorSchema = HoodieSchema.parseTypeDescriptor(typeText).asInstanceOf[HoodieSchema.Vector] + builder.putString(HoodieSchema.TYPE_METADATA_FIELD, vectorSchema.toTypeDescriptor) } } @@ -3404,14 +3424,14 @@ class HoodieSpark3_4ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa } /** - * Show indexes, returning a [[ShowIndexes]] logical plan. + * Show indexes, returning a [[HoodieShowIndexes]] logical plan. * For example: * {{{ * SHOW INDEXES (FROM | IN) [TABLE] table_name * }}} */ override def visitShowIndexes(ctx: ShowIndexesContext): LogicalPlan = withOrigin(ctx) { - ShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) + HoodieShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) } /** diff --git a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlParser.scala b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlParser.scala index 267e582bf2a1d..687dc2b01b148 100644 --- a/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlParser.scala +++ b/hudi-spark-datasource/hudi-spark3.4.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_4ExtendedSqlParser.scala @@ -129,7 +129,8 @@ class HoodieSpark3_4ExtendedSqlParser(session: SparkSession, delegate: ParserInt normalized.contains("drop index") || normalized.contains("show indexes") || normalized.contains("refresh index") || - normalized.contains(" blob") + normalized.contains(" blob") || + normalized.contains(" vector") } } diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 b/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 index d7f87b4e5aa59..a5e31dadb2c48 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/antlr4/imports/SqlBase.g4 @@ -942,7 +942,7 @@ dataType | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + | typeName=identifier ('(' (INTEGER_VALUE | identifier) (',' (INTEGER_VALUE | identifier))* ')')? #primitiveDataType ; qualifiedColTypeWithPositionList diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala index 64b8a985559b7..4673a305711a7 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/HoodieSpark35CatalystPlanUtils.scala @@ -106,7 +106,7 @@ object HoodieSpark35CatalystPlanUtils extends BaseHoodieCatalystPlanUtils { override def unapplyShowIndexes(plan: LogicalPlan): Option[(LogicalPlan, Seq[Attribute])] = { plan match { - case ci@ShowIndexes(table, output) => + case ci@HoodieShowIndexes(table, output) => Some((table, output)) case _ => None diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala index 24492fdc358b9..ab5d18024e42f 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/adapter/Spark3_5Adapter.scala @@ -21,7 +21,6 @@ import org.apache.hudi.Spark35HoodieFileScanRDD import org.apache.hudi.common.schema.HoodieSchema import org.apache.hudi.storage.StorageConfiguration -import org.apache.avro.Schema import org.apache.hadoop.conf.Configuration import org.apache.parquet.schema.MessageType import org.apache.spark.api.java.JavaSparkContext @@ -42,6 +41,7 @@ import org.apache.spark.sql.execution.datasources.orc.Spark35OrcReader import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetFilters, Spark35LegacyHoodieParquetFileFormat, Spark35ParquetReader} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.hudi.analysis.TableValuedFunctions +import org.apache.spark.sql.hudi.blob.{BatchedBlobReaderStrategy, ScalarFunctions} import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} import org.apache.spark.sql.parser.{HoodieExtendedParserInterface, HoodieSpark3_5ExtendedSqlParser} import org.apache.spark.sql.types.{DataType, DataTypes, Metadata, MetadataBuilder, StructType} @@ -118,6 +118,16 @@ class Spark3_5Adapter extends BaseSpark3Adapter { TableValuedFunctions.funcs.foreach(extensions.injectTableFunction) } + override def injectScalarFunctions(extensions: SparkSessionExtensions): Unit = { + ScalarFunctions.funcs.foreach(extensions.injectFunction) + } + + override def injectPlannerStrategies(extensions: SparkSessionExtensions): Unit = { + extensions.injectPlannerStrategy { session => + BatchedBlobReaderStrategy(session) + } + } + /** * Converts instance of [[StorageLevel]] to a corresponding string */ diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala index 653a19984490d..846d5c5ebab5e 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlAstBuilder.scala @@ -2591,7 +2591,7 @@ class HoodieSpark3_5ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa * Resolve/create a primitive type. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { - val dataType = ctx.identifier.getText.toLowerCase(Locale.ROOT) + val dataType = ctx.typeName.getText.toLowerCase(Locale.ROOT) (dataType, ctx.INTEGER_VALUE().asScala.toList) match { case ("boolean", Nil) => BooleanType case ("tinyint" | "byte", Nil) => ByteType @@ -2610,6 +2610,21 @@ class HoodieSpark3_5ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) case ("binary", Nil) => BinaryType case ("blob", Nil) => BlobType() + case ("vector", _ :: _) => + // Delegate validation to HoodieSchema.parseTypeDescriptor which handles dimension + // range checks, element type validation, and canonical normalization. + val vectorSchema = try { + HoodieSchema.parseTypeDescriptor(ctx.getText).asInstanceOf[HoodieSchema.Vector] + } catch { + case e: IllegalArgumentException => + throw new ParseException(s"Invalid VECTOR type: ${e.getMessage}", ctx) + } + val sparkElemType = vectorSchema.getVectorElementType match { + case HoodieSchema.Vector.VectorElementType.FLOAT => FloatType + case HoodieSchema.Vector.VectorElementType.DOUBLE => DoubleType + case HoodieSchema.Vector.VectorElementType.INT8 => ByteType + } + ArrayType(sparkElemType, containsNull = false) case ("decimal" | "dec" | "numeric", Nil) => DecimalType.USER_DEFAULT case ("decimal" | "dec" | "numeric", precision :: Nil) => DecimalType(precision.getText.toInt, 0) @@ -2706,8 +2721,13 @@ class HoodieSpark3_5ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa private def addMetadataForType(dataType: HoodieSqlBaseParser.DataTypeContext, builder: MetadataBuilder): Unit = { val typeText = dataType.getText - if (typeText.equalsIgnoreCase(HoodieSchemaType.BLOB.name())) { + val upperTypeText = typeText.toUpperCase(Locale.ROOT) + if (upperTypeText == HoodieSchemaType.BLOB.name()) { builder.putString(HoodieSchema.TYPE_METADATA_FIELD, HoodieSchemaType.BLOB.name()) + } else if (upperTypeText.startsWith("VECTOR(")) { + // Normalize to canonical form (e.g. "VECTOR(128,FLOAT)" -> "VECTOR(128)") + val vectorSchema = HoodieSchema.parseTypeDescriptor(typeText).asInstanceOf[HoodieSchema.Vector] + builder.putString(HoodieSchema.TYPE_METADATA_FIELD, vectorSchema.toTypeDescriptor) } } @@ -3406,14 +3426,14 @@ class HoodieSpark3_5ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa } /** - * Show indexes, returning a [[ShowIndexes]] logical plan. + * Show indexes, returning a [[HoodieShowIndexes]] logical plan. * For example: * {{{ * SHOW INDEXES (FROM | IN) [TABLE] table_name * }}} */ override def visitShowIndexes(ctx: ShowIndexesContext): LogicalPlan = withOrigin(ctx) { - ShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) + HoodieShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) } /** diff --git a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala index 3c3bc06736cb4..ef31536985e5e 100644 --- a/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala +++ b/hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_5ExtendedSqlParser.scala @@ -129,7 +129,8 @@ class HoodieSpark3_5ExtendedSqlParser(session: SparkSession, delegate: ParserInt normalized.contains("drop index") || normalized.contains("show indexes") || normalized.contains("refresh index") || - normalized.contains(" blob") + normalized.contains(" blob") || + normalized.contains(" vector") } } diff --git a/hudi-spark-datasource/hudi-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/hudi-spark-datasource/hudi-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 0000000000000..4c8a74920ff8f --- /dev/null +++ b/hudi-spark-datasource/hudi-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1,20 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +org.apache.hudi.Spark4DefaultSource +org.apache.spark.sql.execution.datasources.parquet.LegacyHoodieParquetFileFormat diff --git a/hudi-spark-datasource/hudi-spark4-common/src/main/scala/org/apache/hudi/Spark4DefaultSource.scala b/hudi-spark-datasource/hudi-spark4-common/src/main/scala/org/apache/hudi/Spark4DefaultSource.scala new file mode 100644 index 0000000000000..738ce9b38b5df --- /dev/null +++ b/hudi-spark-datasource/hudi-spark4-common/src/main/scala/org/apache/hudi/Spark4DefaultSource.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi + +import org.apache.spark.sql.types.{DataType, VariantType} + +/** + * Spark-4-specific `DataSourceRegister` for Hudi. + * + * Spark 4.0 added `CreatableRelationProvider.supportsDataType` and gates the V1 write path + * (`DataSource.planForWriting`) on it. The default whitelist does not include `VariantType`, + * so without this override `df.write.format("hudi").save(path)` fails with + * `UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE` whenever the DataFrame carries a native + * VariantType column. The default implementation recurses through Array/Map/Struct + * element types via virtual dispatch, so nested VariantType is covered automatically. + */ +class Spark4DefaultSource extends BaseDefaultSource { + override def supportsDataType(dt: DataType): Boolean = dt match { + case _: VariantType => true + case _ => super.supportsDataType(dt) + } +} diff --git a/hudi-spark-datasource/hudi-spark4.0.x/src/main/antlr4/imports/SqlBase.g4 b/hudi-spark-datasource/hudi-spark4.0.x/src/main/antlr4/imports/SqlBase.g4 index d7f87b4e5aa59..a5e31dadb2c48 100644 --- a/hudi-spark-datasource/hudi-spark4.0.x/src/main/antlr4/imports/SqlBase.g4 +++ b/hudi-spark-datasource/hudi-spark4.0.x/src/main/antlr4/imports/SqlBase.g4 @@ -942,7 +942,7 @@ dataType | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType | INTERVAL from=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType - | identifier ('(' INTEGER_VALUE (',' INTEGER_VALUE)* ')')? #primitiveDataType + | typeName=identifier ('(' (INTEGER_VALUE | identifier) (',' (INTEGER_VALUE | identifier))* ')')? #primitiveDataType ; qualifiedColTypeWithPositionList diff --git a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark40CatalystPlanUtils.scala b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark40CatalystPlanUtils.scala index 859ba13530bd4..3f7372bf22306 100644 --- a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark40CatalystPlanUtils.scala +++ b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/HoodieSpark40CatalystPlanUtils.scala @@ -106,7 +106,7 @@ object HoodieSpark40CatalystPlanUtils extends BaseHoodieCatalystPlanUtils { override def unapplyShowIndexes(plan: LogicalPlan): Option[(LogicalPlan, Seq[Attribute])] = { plan match { - case ci@ShowIndexes(table, output) => + case ci@HoodieShowIndexes(table, output) => Some((table, output)) case _ => None diff --git a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/adapter/Spark4_0Adapter.scala b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/adapter/Spark4_0Adapter.scala index 7cb449809c2ba..174f25b08b3d7 100644 --- a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/adapter/Spark4_0Adapter.scala +++ b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/adapter/Spark4_0Adapter.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.datasources.orc.Spark40OrcReader import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, Spark40LegacyHoodieParquetFileFormat, Spark40ParquetReader} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.hudi.analysis.TableValuedFunctions +import org.apache.spark.sql.hudi.blob.{BatchedBlobReaderStrategy, ScalarFunctions} import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} import org.apache.spark.sql.parser.{HoodieExtendedParserInterface, HoodieSpark4_0ExtendedSqlParser} import org.apache.spark.sql.types.{DataType, DataTypes, Metadata, MetadataBuilder, StructType} @@ -115,6 +116,16 @@ class Spark4_0Adapter extends BaseSpark4Adapter { TableValuedFunctions.funcs.foreach(extensions.injectTableFunction) } + override def injectScalarFunctions(extensions: SparkSessionExtensions): Unit = { + ScalarFunctions.funcs.foreach(extensions.injectFunction) + } + + override def injectPlannerStrategies(extensions: SparkSessionExtensions): Unit = { + extensions.injectPlannerStrategy { session => + BatchedBlobReaderStrategy(session) + } + } + /** * Converts instance of [[StorageLevel]] to a corresponding string */ diff --git a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlAstBuilder.scala b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlAstBuilder.scala index d8d3faa0dfcad..501505849175f 100644 --- a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlAstBuilder.scala +++ b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlAstBuilder.scala @@ -2593,7 +2593,7 @@ class HoodieSpark4_0ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa * Resolve/create a primitive type. */ override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = withOrigin(ctx) { - val dataType = ctx.identifier.getText.toLowerCase(Locale.ROOT) + val dataType = ctx.typeName.getText.toLowerCase(Locale.ROOT) (dataType, ctx.INTEGER_VALUE().asScala.toList) match { case ("boolean", Nil) => BooleanType case ("tinyint" | "byte", Nil) => ByteType @@ -2612,6 +2612,21 @@ class HoodieSpark4_0ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) case ("binary", Nil) => BinaryType case ("blob", Nil) => BlobType() + case ("vector", _ :: _) => + // Delegate validation to HoodieSchema.parseTypeDescriptor which handles dimension + // range checks, element type validation, and canonical normalization. + val vectorSchema = try { + HoodieSchema.parseTypeDescriptor(ctx.getText).asInstanceOf[HoodieSchema.Vector] + } catch { + case e: IllegalArgumentException => + throw new ParseException(s"Invalid VECTOR type: ${e.getMessage}", ctx) + } + val sparkElemType = vectorSchema.getVectorElementType match { + case HoodieSchema.Vector.VectorElementType.FLOAT => FloatType + case HoodieSchema.Vector.VectorElementType.DOUBLE => DoubleType + case HoodieSchema.Vector.VectorElementType.INT8 => ByteType + } + ArrayType(sparkElemType, containsNull = false) case ("decimal" | "dec" | "numeric", Nil) => DecimalType.USER_DEFAULT case ("decimal" | "dec" | "numeric", precision :: Nil) => DecimalType(precision.getText.toInt, 0) @@ -2708,8 +2723,13 @@ class HoodieSpark4_0ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa private def addMetadataForType(dataType: HoodieSqlBaseParser.DataTypeContext, builder: MetadataBuilder): Unit = { val typeText = dataType.getText - if (typeText.equalsIgnoreCase(HoodieSchemaType.BLOB.name())) { + val upperTypeText = typeText.toUpperCase(Locale.ROOT) + if (upperTypeText == HoodieSchemaType.BLOB.name()) { builder.putString(HoodieSchema.TYPE_METADATA_FIELD, HoodieSchemaType.BLOB.name()) + } else if (upperTypeText.startsWith("VECTOR(")) { + // Normalize to canonical form (e.g. "VECTOR(128,FLOAT)" -> "VECTOR(128)") + val vectorSchema = HoodieSchema.parseTypeDescriptor(typeText).asInstanceOf[HoodieSchema.Vector] + builder.putString(HoodieSchema.TYPE_METADATA_FIELD, vectorSchema.toTypeDescriptor) } } @@ -3408,14 +3428,14 @@ class HoodieSpark4_0ExtendedSqlAstBuilder(conf: SQLConf, delegate: ParserInterfa } /** - * Show indexes, returning a [[ShowIndexes]] logical plan. + * Show indexes, returning a [[HoodieShowIndexes]] logical plan. * For example: * {{{ * SHOW INDEXES (FROM | IN) [TABLE] table_name * }}} */ override def visitShowIndexes(ctx: ShowIndexesContext): LogicalPlan = withOrigin(ctx) { - ShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) + HoodieShowIndexes(UnresolvedRelation(visitTableIdentifier(ctx.tableIdentifier()))) } /** diff --git a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlParser.scala b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlParser.scala index f44002ab0265d..ccddd6e6d3f1b 100644 --- a/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlParser.scala +++ b/hudi-spark-datasource/hudi-spark4.0.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark4_0ExtendedSqlParser.scala @@ -138,7 +138,8 @@ class HoodieSpark4_0ExtendedSqlParser(session: SparkSession, delegate: ParserInt normalized.contains("drop index") || normalized.contains("show indexes") || normalized.contains("refresh index") || - normalized.contains(" blob") + normalized.contains(" blob") || + normalized.contains(" vector") } override def parseRoutineParam(sqlText: String): StructType = throw new UnsupportedOperationException() diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java index 892fe9350abe0..1bb34bf3d7840 100644 --- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java +++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HoodieHiveSyncClient.java @@ -25,8 +25,8 @@ import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.timeline.HoodieInstant; import org.apache.hudi.common.table.timeline.HoodieTimeline; +import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.ConfigUtils; -import org.apache.hudi.common.util.MapUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.ReflectionUtils; import org.apache.hudi.common.util.StringUtils; @@ -233,7 +233,7 @@ public void dropPartitions(String tableName, List partitionsToDrop) { @Override public boolean updateTableProperties(String tableName, Map tableProperties) { - if (MapUtils.isNullOrEmpty(tableProperties)) { + if (CollectionUtils.isNullOrEmpty(tableProperties)) { return false; } @@ -247,7 +247,7 @@ public boolean updateTableProperties(String tableName, Map table try { Table table = client.getTable(databaseName, tableName); Map remoteTableProperties = table.getParameters(); - if (MapUtils.containsAll(remoteTableProperties, tableProperties)) { + if (CollectionUtils.containsAll(remoteTableProperties, tableProperties)) { return false; } @@ -268,7 +268,7 @@ public boolean updateTableProperties(String tableName, Map table @Override public boolean updateSerdeProperties(String tableName, Map serdeProperties, boolean useRealtimeFormat) { - if (MapUtils.isNullOrEmpty(serdeProperties)) { + if (CollectionUtils.isNullOrEmpty(serdeProperties)) { return false; } if (useJdbcFallback()) { @@ -289,7 +289,7 @@ public boolean updateSerdeProperties(String tableName, Map serde } else { serdeInfoName = remoteSerdeInfo.getName(); Map remoteSerdeProperties = remoteSerdeInfo.getParameters(); - shouldUpdate = !MapUtils.containsAll(remoteSerdeProperties, serdeProperties); + shouldUpdate = !CollectionUtils.containsAll(remoteSerdeProperties, serdeProperties); } // check if any change to input/output format diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java index 1ab5bb5224229..4d75ca0e6b420 100644 --- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java +++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java @@ -193,11 +193,15 @@ public static void clear() throws IOException, HiveException, MetaException { .setPayloadClass(HoodieAvroPayload.class) .initTable(HadoopFSUtils.getStorageConfWithCopy(configuration), basePath); - for (String tableName : createdTablesSet) { - ddlExecutor.runSQL("drop table if exists " + tableName); + if (ddlExecutor != null) { + for (String tableName : createdTablesSet) { + ddlExecutor.runSQL("drop table if exists " + tableName); + } } createdTablesSet.clear(); - ddlExecutor.runSQL("drop database if exists " + DB_NAME + " cascade"); + if (ddlExecutor != null) { + ddlExecutor.runSQL("drop database if exists " + DB_NAME + " cascade"); + } } public static HiveConf getHiveConf() { @@ -226,6 +230,7 @@ public static void shutdown() { try { if (hiveServer != null) { hiveServer.stop(); + hiveServer = null; } } catch (Exception e) { e.printStackTrace(); @@ -235,6 +240,7 @@ public static void shutdown() { try { if (hiveTestService != null) { hiveTestService.stop(); + hiveTestService = null; } } catch (Exception e) { e.printStackTrace(); @@ -244,6 +250,7 @@ public static void shutdown() { try { if (zkServer != null) { zkServer.shutdown(true); + zkServer = null; } } catch (Exception e) { e.printStackTrace(); @@ -253,6 +260,7 @@ public static void shutdown() { try { if (zkService != null) { zkService.stop(); + zkService = null; } } catch (RuntimeException re) { re.printStackTrace(); @@ -262,6 +270,7 @@ public static void shutdown() { try { if (fileSystem != null) { fileSystem.close(); + fileSystem = null; } } catch (IOException ie) { ie.printStackTrace(); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java index 478d36a599217..c7aa46a0ae682 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HiveIncrementalPuller.java @@ -145,8 +145,7 @@ public void saveDelta() throws IOException { lastCommitTime = config.fromCommitTime; } - Connection conn = getConnection(); - stmt = conn.createStatement(); + stmt = getConnection().createStatement(); // drop the temp table if exists String tempDbTable = config.tmpDb + "." + config.targetTable + "__" + config.sourceTable; String tempDbTablePath = @@ -172,6 +171,15 @@ public void saveDelta() throws IOException { } catch (SQLException e) { LOG.error("Could not close the resultSet opened ", e); } + try { + if (this.connection != null) { + this.connection.close(); + } + } catch (SQLException e) { + LOG.error("Could not close the JDBC connection", e); + } finally { + this.connection = null; + } } } diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HudiHiveSyncJob.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HudiHiveSyncJob.java new file mode 100644 index 0000000000000..59d92658f4f81 --- /dev/null +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HudiHiveSyncJob.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.utilities; + +import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.util.HoodieTimer; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.exception.HoodieException; +import org.apache.hudi.hive.HiveSyncTool; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.Parameter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; + +/** + * Utility job for running Hive sync on-demand for Hudi tables. + *

+ * This tool allows you to synchronize Hudi table metadata with Hive metastore + * independently from ingestion workflows, useful for backfills, manual data + * corrections, or quick metadata reconciliation. + *

+ * Example usage: + *

+ * spark-submit \
+ *   --class org.apache.hudi.utilities.HudiHiveSyncJob \
+ *   hudi-utilities.jar \
+ *   --base-path /path/to/hudi/table \
+ *   --base-file-format PARQUET \
+ *   --props-file-path /path/to/hive-sync.properties \
+ *   --hoodie-conf hoodie.datasource.hive_sync.database=my_db \
+ *   --hoodie-conf hoodie.datasource.hive_sync.table=my_table
+ * 
+ */ +public class HudiHiveSyncJob { + + private static final Logger LOG = LoggerFactory.getLogger(HudiHiveSyncJob.class); + + private final Config cfg; + private final Configuration hadoopConf; + private final TypedProperties props; + + public HudiHiveSyncJob(JavaSparkContext jsc, Config cfg) { + this.cfg = cfg; + this.hadoopConf = jsc.hadoopConfiguration(); + this.props = UtilHelpers.buildProperties(hadoopConf, cfg.propsFilePath, cfg.configs); + } + + public static void main(String[] args) throws IOException { + final Config cfg = new Config(); + new JCommander(cfg, null, args); + LOG.info("Cfg received: {}", cfg); + JavaSparkContext jsc; + if (StringUtils.isNullOrEmpty(cfg.sparkMaster)) { + jsc = UtilHelpers.buildSparkContext("HudiHiveSyncJob", "local[2]", true); + } else { + jsc = UtilHelpers.buildSparkContext("HudiHiveSyncJob", cfg.sparkMaster, true); + } + try { + new HudiHiveSyncJob(jsc, cfg).run(); + } finally { + jsc.stop(); + } + } + + public void run() throws IOException { + LOG.info("Starting hive sync for {}", cfg.basePath); + HoodieTimer timer = HoodieTimer.start(); + HiveSyncTool syncTool = null; + try { + props.put(META_SYNC_BASE_PATH.key(), cfg.basePath); + props.put(META_SYNC_BASE_FILE_FORMAT.key(), cfg.baseFileFormat); + + LOG.info("HiveSyncConfig props used to sync data {}", props); + syncTool = new HiveSyncTool(props, new HiveConf(hadoopConf, HiveConf.class)); + syncTool.syncHoodieTable(); + } catch (Exception e) { + LOG.error("Exception in running hive-sync", e); + throw new HoodieException("Hive sync failed", e); + } finally { + if (syncTool != null) { + syncTool.close(); + } + LOG.info("Hive-sync duration in ms {}", timer.endTimer()); + } + } + + public static class Config implements Serializable { + @Parameter(names = {"--base-path", "-sp"}, description = "Base path for the table", required = true) + public String basePath = null; + + @Parameter(names = {"--base-file-format", "-bff"}, description = "Base file format of the dataset") + public String baseFileFormat = "PARQUET"; + + @Parameter(names = {"--props-file-path"}, description = "Path to properties file on localfs or dfs.") + public String propsFilePath = null; + + @Parameter(names = {"--spark-master"}, + description = "spark master to use, if not defined inherits from your environment taking into " + + "account Spark Configuration priority rules (e.g. not using spark-submit command).") + public String sparkMaster = ""; + + @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file " + + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated", + splitter = IdentitySplitter.class) + public List configs = new ArrayList<>(); + + @Override + public String toString() { + return "Config{" + + "basePath='" + basePath + '\'' + + ", baseFileFormat='" + baseFileFormat + '\'' + + ", propsFilePath='" + propsFilePath + '\'' + + ", configs=" + configs + + '}'; + } + } +} diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java index 689f328da8c1a..64abbd4f192ee 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/ProtoConversionUtil.java @@ -65,7 +65,6 @@ import static org.apache.hudi.common.util.ConfigUtils.getBooleanWithAltKeys; import static org.apache.hudi.common.util.ConfigUtils.getIntWithAltKeys; -import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; import static org.apache.hudi.utilities.config.ProtoClassBasedSchemaProviderConfig.PROTO_SCHEMA_MAX_RECURSION_DEPTH; import static org.apache.hudi.utilities.config.ProtoClassBasedSchemaProviderConfig.PROTO_SCHEMA_TIMESTAMPS_AS_RECORDS; import static org.apache.hudi.utilities.config.ProtoClassBasedSchemaProviderConfig.PROTO_SCHEMA_WRAPPED_PRIMITIVES_AS_RECORDS; @@ -142,7 +141,7 @@ private static class AvroSupport { private static final String OVERFLOW_BYTES_FIELD_NAME = "proto_bytes"; private static final HoodieSchema RECURSION_OVERFLOW_SCHEMA = HoodieSchema.createRecord("recursion_overflow", null, "org.apache.hudi.proto", false, Arrays.asList(HoodieSchemaField.of(OVERFLOW_DESCRIPTOR_FIELD_NAME, STRING_SCHEMA, null, ""), - HoodieSchemaField.of(OVERFLOW_BYTES_FIELD_NAME, HoodieSchema.create(HoodieSchemaType.BYTES), null, getUTF8Bytes("")))); + HoodieSchemaField.of(OVERFLOW_BYTES_FIELD_NAME, HoodieSchema.create(HoodieSchemaType.BYTES), null, ""))); // A cache of the proto class name paired with whether wrapped primitives should be flattened as the key and the generated avro schema as the value private static final Map SCHEMA_CACHE = new ConcurrentHashMap<>(); // A cache with a key as the pair target avro schema and the proto descriptor for the source and the value as an array of proto field descriptors where the order matches the avro ordering. diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHudiHiveSyncJob.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHudiHiveSyncJob.java new file mode 100644 index 0000000000000..64abaf68a8db8 --- /dev/null +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHudiHiveSyncJob.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.utilities; + +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.hive.HiveSyncConfig; +import org.apache.hudi.hive.HoodieHiveSyncClient; +import org.apache.hudi.hive.testutils.HiveTestUtil; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.api.java.JavaSparkContext; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.api.AfterAll; + +import java.nio.file.Path; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.Locale; +import java.util.UUID; + +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_PASS; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_MODE; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_URL; +import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_USER; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +/** + * Test cases for {@link HudiHiveSyncJob}. + */ +@Disabled("Hive set up in CI is failing") +public class TestHudiHiveSyncJob { + + @TempDir + Path tempDir; + + @BeforeEach + void setUp() throws Exception { + HiveTestUtil.setUp(Option.empty(), true); + } + + @AfterEach + void cleanUp() { + try { + HiveTestUtil.clear(); + } catch (Throwable t) { + // no-op for cleanup failures in tests + } + } + + @AfterAll + static void cleanUpClass() { + HiveTestUtil.shutdown(); + } + + @Test + void testRunRegistersUnregisteredHudiDatasetInMetastore() throws Exception { + String tableName = "hive_sync_job_" + UUID.randomUUID().toString().replace("-", ""); + String basePath = Files.createDirectory(tempDir.resolve("hudi-table")).toUri().toString(); + String databaseName = "default"; + HudiHiveSyncJob.Config cfg = new HudiHiveSyncJob.Config(); + cfg.basePath = basePath; + cfg.baseFileFormat = "PARQUET"; + cfg.configs.add("hoodie.datasource.hive_sync.database=" + databaseName); + cfg.configs.add("hoodie.datasource.hive_sync.table=" + tableName); + cfg.configs.add("hoodie.datasource.meta.sync.database=" + databaseName); + cfg.configs.add("hoodie.datasource.meta.sync.table=" + tableName); + cfg.configs.add(HIVE_SYNC_MODE.key() + "=jdbc"); + cfg.configs.add(HIVE_URL.key() + "=" + HiveTestUtil.hiveSyncProps.getString(HIVE_URL.key())); + cfg.configs.add(HIVE_USER.key() + "=" + HiveTestUtil.hiveSyncProps.getString(HIVE_USER.key())); + cfg.configs.add(HIVE_PASS.key() + "=" + HiveTestUtil.hiveSyncProps.getString(HIVE_PASS.key())); + + JavaSparkContext jsc = null; + SparkSession spark = null; + try { + jsc = UtilHelpers.buildSparkContext("test-hudi-hive-sync-job", "local[2]", false); + spark = SparkSession.builder().sparkContext(jsc.sc()).getOrCreate(); + + StructType schema = new StructType() + .add("id", DataTypes.StringType, false) + .add("name", DataTypes.StringType, true) + .add("ts", DataTypes.LongType, false); + Dataset source = spark.createDataFrame( + Arrays.asList( + RowFactory.create("1", "a1", 1000L), + RowFactory.create("2", "a2", 1001L)), + schema); + + // Write Hudi dataset by path only: this should create commits but not register a metastore table. + source.write().format("hudi") + .option("hoodie.table.name", tableName) + .option("hoodie.datasource.write.recordkey.field", "id") + .option("hoodie.datasource.write.precombine.field", "ts") + .option("hoodie.datasource.write.keygenerator.class", "org.apache.hudi.keygen.NonpartitionedKeyGenerator") + .option("hoodie.datasource.write.partitionpath.field", "") + .mode("overwrite") + .save(basePath); + + assertFalse(tableExists(databaseName, tableName, basePath, spark)); + + new HudiHiveSyncJob(jsc, cfg).run(); + + assertTrue(tableExists(databaseName, tableName, basePath, spark)); + } finally { + if (spark != null) { + spark.close(); + } + if (jsc != null) { + jsc.stop(); + } + } + } + + private boolean tableExists(String dbName, String tableName, String basePath, SparkSession spark) { + String catalogImpl = spark.conf().get("spark.sql.catalogImplementation", "in-memory") + .toLowerCase(Locale.ROOT); + if ("hive".equals(catalogImpl)) { + return spark.catalog().tableExists(dbName, tableName); + } + return tableExistsInMetastore(dbName, tableName, basePath); + } + + private boolean tableExistsInMetastore(String dbName, String tableName, String basePath) { + TypedProperties props = TypedProperties.copy(HiveTestUtil.hiveSyncProps); + props.setProperty(META_SYNC_DATABASE_NAME.key(), dbName); + props.setProperty(META_SYNC_TABLE_NAME.key(), tableName); + props.setProperty(META_SYNC_BASE_PATH.key(), basePath); + try (HoodieHiveSyncClient client = new HoodieHiveSyncClient( + new HiveSyncConfig(props, HiveTestUtil.getHiveConf()), + mock(HoodieTableMetaClient.class))) { + return client.tableExists(tableName); + } + } +} diff --git a/pom.xml b/pom.xml index d3e1fa45b1df9..6289ca7464e58 100644 --- a/pom.xml +++ b/pom.xml @@ -143,7 +143,7 @@ 4.5.14 ${spark3.version} ${spark35.version} - 4.0.1 + 4.0.2 2.1.1 2.0.0 @@ -173,7 +173,7 @@ 3.3.4 3.4.3 3.5.5 - 4.0.1 + 4.0.2 hudi-spark3.5.x hudi-spark3-common 1.11.4 @@ -211,7 +211,7 @@ provided - -Xmx3g -Xms128m -XX:-OmitStackTraceInFastThrow + -Xmx4g -Xms128m -XX:-OmitStackTraceInFastThrow 0.8.12 compile org.apache.hudi. @@ -243,8 +243,8 @@ 2.1.1 1.1.10.7 18.3.0 - 1.0.2 - 0.0.15 + 4.0.0 + 0.4.0 lance-spark-3.5_${scala.binary.version} false @@ -964,12 +964,12 @@ - com.lancedb + org.lance ${lance.spark.artifact} ${lance.spark.connector.version} - com.lancedb + org.lance lance-core @@ -2487,6 +2487,21 @@ *:*_2.11 *:*_2.12 + + + org.apache.flink:*_2.12 + org.scala-lang.modules:scala-xml_2.12 + com.twitter:chill_2.12 + @@ -2925,7 +2940,7 @@ java11 - -Xmx3g -Xms512m -XX:+UseParallelGC --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -XX:-OmitStackTraceInFastThrow + -Xmx4g -Xms512m -XX:+UseParallelGC --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -XX:-OmitStackTraceInFastThrow [11,17) @@ -2935,7 +2950,7 @@ java17 - -Xmx3g -Xms512m -XX:+UseParallelGC --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djol.magicFieldOffset=true -XX:-OmitStackTraceInFastThrow + -Xmx4g -Xms512m -XX:+UseParallelGC --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djol.magicFieldOffset=true -XX:-OmitStackTraceInFastThrow [17,)