Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
55bf91a
introduce static helper method to remove clones (#18533)
aaaZayne Apr 19, 2026
937a64a
fix: whitelist Flink _2.12 artifacts in scala-2.13 enforcer rule (#18…
voonhous Apr 20, 2026
cfb9833
chore(docker): Remove duplicate yarn.nodemanager.bind-host in entrypo…
voonhous Apr 20, 2026
91dba3e
chore(common): Consolidate MapUtils into CollectionUtils (#18529)
voonhous Apr 20, 2026
95199f0
perf(common): avoid stream allocation in CollectionUtils.createImmuta…
voonhous Apr 20, 2026
3a387da
feat(flink): Implement continuous sorting feature for append write (#…
prashantwason Apr 20, 2026
0fb4454
feat(utilities): add external HudiHiveSyncJob for on-demand Hive sync…
suryaprasanna Apr 20, 2026
f35b69c
feat(blob): Read Blobs in Spark SQL (#18098)
the-other-tim-brown Apr 21, 2026
adf29ac
fix: HoodieStorage resource leak in FileSystemBasedLockProvider.close…
mailtoboggavarapu-coder Apr 21, 2026
76a0a27
perf(common): Avoid double-iterating log files in file-system-view fi…
voonhous Apr 21, 2026
a834736
feat(vector): Add Spark SQL DDL CREATE TABLE support for VECTOR type …
voonhous Apr 21, 2026
59fee58
feat(lance): Bump lance to 4.0.0 and lance-spark to 0.4.0 (#18498)
rahil-c Apr 21, 2026
f9dead0
feat: Adding support to block archival on last known ECTR for v6 tabl…
nsivabalan Apr 21, 2026
e303579
fix: prevent parseTypeDescriptor crash for VARIANT (#18510)
voonhous Apr 21, 2026
0d57435
fix: VARIANT Hive sync error when performing CREATE table DDL (#18511)
voonhous Apr 22, 2026
e4904ba
feat: Add support for exclusive rollbacks with multi writer (#18448)
lokeshj1703 Apr 22, 2026
4ef56e4
feat(blob): followup fixes for blob reader (#18538)
rahil-c Apr 22, 2026
cd83cf4
chore(docker): add Hadoop 3.4.0 / Hive 2.3.10 / Spark 4.0.2 compose s…
voonhous Apr 22, 2026
4260914
fix: Parquet small-precision decimals decode ClassCastException (#18552)
skywalker0618 Apr 23, 2026
8623898
fix: JDBC connection leak in HiveIncrementalPuller.saveDelta() (#18460)
mailtoboggavarapu-coder Apr 23, 2026
ddbdbb9
chore(spark): bump spark4.version to 4.0.2 (#18549)
voonhous Apr 23, 2026
7f4dd31
fix(lance): Add Hive InputFormat stubs and fix Spark SQL for Lance fi…
rahil-c Apr 23, 2026
ace2871
feat(flink): Introduces dictionary encoding of payload partition path…
cshuo Apr 23, 2026
1e64662
feat(lance): round-trip Hudi VECTOR columns as native Lance fixed-siz…
rahil-c Apr 23, 2026
9d1f817
fix(vector): Register VECTOR HMS column as BINARY on Spark CREATE (#1…
voonhous Apr 24, 2026
110b9be
fix(variant): allow VariantType writes through Hudi's V1 DataSource o…
voonhous Apr 24, 2026
2092890
fix: ProtoConversionUtil$AvroSupport static init under Avro 1.12 (#18…
tiennguyen-onehouse Apr 24, 2026
edaa168
fix: FileGroupReader drops mandatory partition columns from dataSchem…
tiennguyen-onehouse Apr 24, 2026
217e2a7
feat: Adding support to inject custom configs to parquet writer (#18379)
nsivabalan Apr 24, 2026
2059c11
feat(clean): Adding empty clean support to hudi (#18337)
nsivabalan Apr 24, 2026
436bd66
fix(vector): Pass plain FIXED through to VECTOR projection on Hive re…
voonhous Apr 24, 2026
c1569db
fix(clean): address review comments on empty clean support (#18587)
yihua Apr 25, 2026
4f3e885
fix(ci): bump surefire test heap from 3g to 4g (#18589)
yihua Apr 25, 2026
0812168
test(schema): Add MOR log-only compaction tests for custom types
voonhous Apr 24, 2026
cf663ca
test(schema): Add Lance MOR log-only compaction tests for custom types
voonhous Apr 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 267 additions & 0 deletions docker/compose/docker-compose_hadoop340_hive2310_spark402_amd64.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

services:

namenode:
image: apachehudi/hudi-hadoop_3.4.0-namenode:latest
hostname: namenode
container_name: namenode
environment:
- CLUSTER_NAME=hudi_hadoop340_hive2310_spark402
ports:
- "8020:8020" # HDFS NameNode IPC
- "9000:9000" # HDFS NameNode Client
- "9870:9870" # HDFS NameNode Web UI
env_file:
- ./hadoop.env
healthcheck:
test: ["CMD", "curl", "-f", "http://namenode:9870"]
interval: 30s
timeout: 10s
retries: 3

datanode1:
image: apachehudi/hudi-hadoop_3.4.0-datanode:latest
container_name: datanode1
hostname: datanode1
environment:
- CLUSTER_NAME=hudi_hadoop340_hive2310_spark402
env_file:
- ./hadoop.env
ports:
- "50075:50075"
- "9864:9864"
- "50010:50010"
links:
- "namenode"
- "historyserver"
healthcheck:
test: ["CMD", "curl", "-f", "http://datanode1:9864"]
interval: 30s
timeout: 10s
retries: 3
depends_on:
- namenode

historyserver:
image: apachehudi/hudi-hadoop_3.4.0-history:latest
hostname: historyserver
container_name: historyserver
environment:
- CLUSTER_NAME=hudi_hadoop340_hive2310_spark402
depends_on:
- "namenode"
links:
- "namenode"
ports:
- "8188:8188"
healthcheck:
test: ["CMD", "curl", "-f", "http://historyserver:8188"]
interval: 30s
timeout: 10s
retries: 3
env_file:
- ./hadoop.env
volumes:
- historyserver:/hadoop/yarn/timeline

# Pure Hive 2.3.10 stack (postgres 2.3 schema -> HMS 2.3.10 -> HS2 2.3.10).
# Matches hudi-spark-bundle's compile-time Hive 2.3 client, so Hudi hive-sync
# talks to HMS natively (no Thrift get_table incompat, no sharedPrefixes hack).
# Hadoop 3.4.0 HDFS is backward-compat for the 2.8.4-based Hive client.
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0
volumes:
- hive-metastore-postgresql:/var/lib/postgresql
hostname: hive-metastore-postgresql
container_name: hive-metastore-postgresql

hivemetastore:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.10:latest
hostname: hivemetastore
container_name: hivemetastore
links:
- "hive-metastore-postgresql"
- "namenode"
env_file:
- ./hadoop.env
command: /opt/hive/bin/hive --service metastore
environment:
- "SERVICE_PRECONDITION=namenode:9870 hive-metastore-postgresql:5432"
ports:
- "9083:9083"
healthcheck:
test: ["CMD", "nc", "-z", "hivemetastore", "9083"]
interval: 30s
timeout: 10s
retries: 3
depends_on:
- "hive-metastore-postgresql"
- "namenode"

hiveserver:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.10:latest
hostname: hiveserver
container_name: hiveserver
env_file:
- ./hadoop.env
environment:
- SERVICE_PRECONDITION=hivemetastore:9083
ports:
- "10000:10000"
depends_on:
- "hivemetastore"
links:
- "hivemetastore"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws

zookeeper:
image: 'bitnamilegacy/zookeeper:3.6.4'
hostname: zookeeper
container_name: zookeeper
ports:
- "2181:2181"
environment:
- ALLOW_ANONYMOUS_LOGIN=yes

kafka:
image: 'bitnamilegacy/kafka:3.4.1'
hostname: kafkabroker
container_name: kafkabroker
ports:
- "9092:9092"
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- ALLOW_PLAINTEXT_LISTENER=yes

sparkmaster:
image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkmaster_4.0.2:latest
hostname: sparkmaster
container_name: sparkmaster
env_file:
- ./hadoop.env
ports:
- "8080:8080"
- "7077:7077"
- "8888:8888"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
- ./notebooks:/opt/workspace/notebooks
environment:
- INIT_DAEMON_STEP=setup_spark
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"

spark-worker-1:
image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkworker_4.0.2:latest
hostname: spark-worker-1
container_name: spark-worker-1
env_file:
- ./hadoop.env
depends_on:
- sparkmaster
ports:
- "8081:8081"
environment:
- SPARK_MASTER=spark://sparkmaster:7077
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"

adhoc-1:
image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkadhoc_4.0.2:latest
hostname: adhoc-1
container_name: adhoc-1
env_file:
- ./hadoop.env
depends_on:
- sparkmaster
ports:
- '4040:4040'
environment:
- SPARK_MASTER=spark://sparkmaster:7077
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws

adhoc-2:
image: apachehudi/hudi-hadoop_3.4.0-hive_2.3.10-sparkadhoc_4.0.2:latest
hostname: adhoc-2
container_name: adhoc-2
env_file:
- ./hadoop.env
depends_on:
- sparkmaster
environment:
- SPARK_MASTER=spark://sparkmaster:7077
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws

minio:
image: 'minio/minio:latest'
hostname: minio
container_name: minio
ports:
- 9090:9090 # server address
- 9091:9091 # console address
volumes:
- minio-data:/data
environment:
- MINIO_ACCESS_KEY=minio
- MINIO_SECRET_KEY=minio123
- MINIO_DOMAIN=minio
command: server --address ":9090" --console-address ":9091" /data

mc:
image: minio/mc
container_name: mc
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc alias set minio http://minio:9090 minio minio123 --api S3v4) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc rm -r --force minio/warehouse;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
tail -f /dev/null
"
depends_on:
- minio

volumes:
namenode:
historyserver:
hive-metastore-postgresql:
minio-data:

networks:
default:
name: hudi
Loading
Loading