From d5865dff78b7a73b00d94968246186a700abcc28 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Thu, 5 Mar 2026 22:39:32 -0800 Subject: [PATCH 01/22] Allow concurrent execution of MDT table services --- .../HoodieBackedTableMetadataWriter.java | 107 +++++++++++------- .../metadata/HoodieMetadataWriteUtils.java | 56 ++++++++- .../TestHoodieMetadataWriteUtils.java | 78 +++++++++++++ .../common/config/HoodieMetadataConfig.java | 50 ++++++++ 4 files changed, 242 insertions(+), 49 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 8da89553fcf9d..7683e4dd637c8 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -28,6 +28,8 @@ import org.apache.hudi.client.WriteStatus; import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieMetadataConfig; +import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; +import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.engine.EngineType; import org.apache.hudi.common.engine.HoodieEngineContext; @@ -2150,15 +2152,24 @@ static HoodieActiveTimeline runPendingTableServicesOperationsAndRefreshTimeline( Option metricsOption) { try { HoodieActiveTimeline activeTimeline = initialTimelineRequiresRefresh ? metadataMetaClient.reloadActiveTimeline() : metadataMetaClient.getActiveTimeline(); + HoodieTableServiceManagerConfig tsmConfig = writeClient.getConfig().getTableServiceManagerConfig(); // finish off any pending log compaction or compactions operations if any from previous attempt. boolean ranServices = false; if (activeTimeline.filterPendingCompactionTimeline().countInstants() > 0) { - writeClient.runAnyPendingCompactions(); - ranServices = true; + if (tsmConfig.isEnabledAndActionSupported(ActionType.compaction)) { + LOG.info("Skipping pending compactions on MDT as they are delegated to table service manager."); + } else { + writeClient.runAnyPendingCompactions(); + ranServices = true; + } } if (activeTimeline.filterPendingLogCompactionTimeline().countInstants() > 0) { - writeClient.runAnyPendingLogCompactions(); - ranServices = true; + if (tsmConfig.isEnabledAndActionSupported(ActionType.logcompaction)) { + LOG.info("Skipping pending log compactions on MDT as they are delegated to table service manager."); + } else { + writeClient.runAnyPendingLogCompactions(); + ranServices = true; + } } return ranServices ? metadataMetaClient.reloadActiveTimeline() : activeTimeline; } catch (Exception e) { @@ -2179,51 +2190,59 @@ static HoodieActiveTimeline runPendingTableServicesOperationsAndRefreshTimeline( * deltacommit. */ void compactIfNecessary(BaseHoodieWriteClient writeClient, Option latestDeltaCommitTimeOpt) { - // IMPORTANT: Trigger compaction with max instant time that is smaller than(or equals) the earliest pending instant from DT. - // The compaction planner will manage to filter out the log files that finished with greater completion time. - // see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more details. - HoodieTimeline metadataCompletedTimeline = metadataMetaClient.getActiveTimeline().filterCompletedInstants(); - final String compactionInstantTime = dataMetaClient.reloadActiveTimeline() - // The filtering strategy is kept in line with the rollback premise, if an instant is pending on DT but completed on MDT, - // generates a compaction time smaller than it so that the instant could then been rolled back. - .filterInflightsAndRequested().filter(instant -> metadataCompletedTimeline.containsInstant(instant.requestedTime())).firstInstant() - // minus the pending instant time by 1 millisecond to avoid conflicts on the MDT. - .map(instant -> HoodieInstantTimeGenerator.instantTimeMinusMillis(instant.requestedTime(), 1L)) - .orElse(writeClient.createNewInstantTime(false)); - - // we need to avoid checking compaction w/ same instant again. - // let's say we trigger compaction after C5 in MDT and so compaction completes with C4001. but C5 crashed before completing in MDT. - // and again w/ C6, we will re-attempt compaction at which point latest delta commit is C4 in MDT. - // and so we try compaction w/ instant C4001. So, we can avoid compaction if we already have compaction w/ same instant time. - boolean skipCompactions = metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(compactionInstantTime); - try { - if (skipCompactions) { - LOG.info("Compaction with same {} time is already present in the timeline.", compactionInstantTime); - } else if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) { - LOG.info("Compaction is scheduled for timestamp {}", compactionInstantTime); - writeClient.compact(compactionInstantTime, true); + HoodieTableServiceManagerConfig tsmConfig = metadataWriteConfig.getTableServiceManagerConfig(); + + if (tsmConfig.isEnabledAndActionSupported(ActionType.compaction)) { + LOG.info("Skipping compaction on MDT as it is delegated to table service manager."); + } else { + // IMPORTANT: Trigger compaction with max instant time that is smaller than(or equals) the earliest pending instant from DT. + // The compaction planner will manage to filter out the log files that finished with greater completion time. + // see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more details. + HoodieTimeline metadataCompletedTimeline = metadataMetaClient.getActiveTimeline().filterCompletedInstants(); + final String compactionInstantTime = dataMetaClient.reloadActiveTimeline() + // The filtering strategy is kept in line with the rollback premise, if an instant is pending on DT but completed on MDT, + // generates a compaction time smaller than it so that the instant could then been rolled back. + .filterInflightsAndRequested().filter(instant -> metadataCompletedTimeline.containsInstant(instant.requestedTime())).firstInstant() + // minus the pending instant time by 1 millisecond to avoid conflicts on the MDT. + .map(instant -> HoodieInstantTimeGenerator.instantTimeMinusMillis(instant.requestedTime(), 1L)) + .orElse(writeClient.createNewInstantTime(false)); + + // we need to avoid checking compaction w/ same instant again. + // let's say we trigger compaction after C5 in MDT and so compaction completes with C4001. but C5 crashed before completing in MDT. + // and again w/ C6, we will re-attempt compaction at which point latest delta commit is C4 in MDT. + // and so we try compaction w/ instant C4001. So, we can avoid compaction if we already have compaction w/ same instant time. + boolean skipCompactions = metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(compactionInstantTime); + try { + if (skipCompactions) { + LOG.info("Compaction with same {} time is already present in the timeline.", compactionInstantTime); + } else if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) { + LOG.info("Compaction is scheduled for timestamp {}", compactionInstantTime); + writeClient.compact(compactionInstantTime, true); + } + } catch (Exception e) { + metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.COMPACTION_FAILURES, 1)); + LOG.error("Error in scheduling and executing compaction in metadata table", e); + throw e; } - } catch (Exception e) { - metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.COMPACTION_FAILURES, 1)); - LOG.error("Error in scheduling and executing compaction in metadata table", e); - throw e; } - try { - if (skipCompactions) { - LOG.info("Compaction with same {} time is already present in the timeline.", compactionInstantTime); - } else if (metadataWriteConfig.isLogCompactionEnabled()) { - // Schedule and execute log compaction with new instant time. - Option scheduledLogCompaction = writeClient.scheduleLogCompaction(Option.empty()); - if (scheduledLogCompaction.isPresent()) { - LOG.info("Log compaction is scheduled for timestamp {}", scheduledLogCompaction.get()); - writeClient.logCompact(scheduledLogCompaction.get(), true); + if (tsmConfig.isEnabledAndActionSupported(ActionType.logcompaction)) { + LOG.info("Skipping log compaction on MDT as it is delegated to table service manager."); + } else { + try { + if (metadataWriteConfig.isLogCompactionEnabled()) { + // Schedule and execute log compaction with new instant time. + Option scheduledLogCompaction = writeClient.scheduleLogCompaction(Option.empty()); + if (scheduledLogCompaction.isPresent()) { + LOG.info("Log compaction is scheduled for timestamp {}", scheduledLogCompaction.get()); + writeClient.logCompact(scheduledLogCompaction.get(), true); + } } + } catch (Exception e) { + metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.LOG_COMPACTION_FAILURES, 1)); + LOG.error("Error in scheduling and executing logcompaction in metadata table", e); + throw e; } - } catch (Exception e) { - metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.LOG_COMPACTION_FAILURES, 1)); - LOG.error("Error in scheduling and executing logcompaction in metadata table", e); - throw e; } } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index bcabac2c516d8..6ec21aad15531 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -21,7 +21,10 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.client.FailOnFirstErrorWriteStatus; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; +import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; +import org.apache.hudi.common.lock.LockProvider; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.config.RecordMergeMode; @@ -147,14 +150,22 @@ public static HoodieWriteConfig createMetadataWriteConfig( HoodieTableVersion datatableVersion) { String tableName = writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX; boolean isStreamingWritesToMetadataEnabled = writeConfig.isMetadataStreamingWritesEnabled(datatableVersion); - WriteConcurrencyMode concurrencyMode = isStreamingWritesToMetadataEnabled - ? WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL : WriteConcurrencyMode.SINGLE_WRITER; - HoodieLockConfig lockConfig = isStreamingWritesToMetadataEnabled - ? HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build() : HoodieLockConfig.newBuilder().build(); - // HUDI-9407 tracks adding support for separate lock configuration for MDT. Until then, all writes to MDT will happen within data table lock. + WriteConcurrencyMode metadataWriteConcurrencyMode = + WriteConcurrencyMode.valueOf(writeConfig.getMetadataConfig().getWriteConcurrencyMode()); + + WriteConcurrencyMode concurrencyMode; + HoodieLockConfig lockConfig; if (isStreamingWritesToMetadataEnabled) { + concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; + lockConfig = HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build(); failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; + } else if (metadataWriteConcurrencyMode.supportsMultiWriter()) { + concurrencyMode = metadataWriteConcurrencyMode; + lockConfig = buildMetadataLockConfig(writeConfig, metadataWriteConcurrencyMode); + } else { + concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; + lockConfig = HoodieLockConfig.newBuilder().build(); } final long maxLogFileSizeBytes = writeConfig.getMetadataConfig().getMaxLogFileSize(); @@ -261,6 +272,11 @@ public static HoodieWriteConfig createMetadataWriteConfig( properties.put(HoodieTableConfig.TYPE.key(), HoodieTableType.MERGE_ON_READ.name()); properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), RECORD_KEY_FIELD_NAME); properties.put("hoodie.datasource.write.recordkey.field", RECORD_KEY_FIELD_NAME); + // Pass table service manager config for MDT + properties.put(HoodieTableServiceManagerConfig.TABLE_SERVICE_MANAGER_ENABLED.key(), + String.valueOf(writeConfig.getMetadataConfig().isTableServiceManagerEnabled())); + properties.put(HoodieTableServiceManagerConfig.TABLE_SERVICE_MANAGER_ACTIONS.key(), + writeConfig.getMetadataConfig().getTableServiceManagerActions()); if (nonEmpty(writeConfig.getMetricReporterMetricsNamePrefix())) { properties.put(HoodieMetricsConfig.METRICS_REPORTER_PREFIX.key(), writeConfig.getMetricReporterMetricsNamePrefix() + METADATA_TABLE_NAME_SUFFIX); @@ -362,6 +378,36 @@ public static HoodieWriteConfig createMetadataWriteConfig( return metadataWriteConfig; } + @SuppressWarnings("unchecked") + private static HoodieLockConfig buildMetadataLockConfig(HoodieWriteConfig writeConfig, + WriteConcurrencyMode metadataWriteConcurrencyMode) { + if (!metadataWriteConcurrencyMode.supportsMultiWriter()) { + return HoodieLockConfig.newBuilder().build(); + } + HoodieLockConfig.Builder lockConfigBuilder = HoodieLockConfig.newBuilder() + .withClientNumRetries(writeConfig.getProps().getInteger(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key())) + .withClientRetryWaitTimeInMillis(writeConfig.getProps().getLong(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key())) + .withLockWaitTimeInMillis(writeConfig.getProps().getLong(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key())); + + try { + Class lockProviderClass = + (Class) Class.forName(writeConfig.getLockProviderClass()); + lockConfigBuilder.withLockProvider(lockProviderClass); + if (lockProviderClass.equals(ZookeeperBasedLockProvider.class)) { + lockConfigBuilder.withZkLockKey(writeConfig.getProps().getString(HoodieLockConfig.ZK_LOCK_KEY.key())); + if (writeConfig.getProps().containsKey(HoodieLockConfig.ZK_BASE_PATH.key())) { + lockConfigBuilder.withZkBasePath(writeConfig.getProps().getString(HoodieLockConfig.ZK_BASE_PATH.key())); + } + if (writeConfig.getProps().containsKey(HoodieLockConfig.ZK_CONNECT_URL.key())) { + lockConfigBuilder.withZkQuorum(writeConfig.getProps().getString(HoodieLockConfig.ZK_CONNECT_URL.key())); + } + } + } catch (ClassNotFoundException e) { + throw new HoodieException("Could not find class for " + writeConfig.getLockProviderClass(), e); + } + return lockConfigBuilder.build(); + } + /** * Convert commit action to metadata records for the enabled partition types. * diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 0175cb60d663c..2165679385762 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -20,11 +20,13 @@ import org.apache.hudi.client.transaction.lock.InProcessLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; +import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.model.HoodieCleaningPolicy; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; import org.apache.hudi.common.model.WriteConcurrencyMode; import org.apache.hudi.common.table.HoodieTableVersion; import org.apache.hudi.config.HoodieCleanConfig; +import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieWriteConfig; import org.junit.jupiter.api.Test; @@ -32,8 +34,10 @@ import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; public class TestHoodieMetadataWriteUtils { @@ -106,6 +110,80 @@ public void testCreateMetadataWriteConfigForNBCC() { WriteConcurrencyMode.SINGLE_WRITER, null); } + @Test + public void testCreateMetadataWriteConfigForOCC() { + String dataTableBasePath = "/tmp/base_path/"; + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath(dataTableBasePath) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(InProcessLockProvider.class).build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, InProcessLockProvider.class.getCanonicalName()); + // MDT base path should NOT be overwritten to data table's base path + String expectedMdtBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataTableBasePath); + assertEquals(expectedMdtBasePath, metadataWriteConfig.getBasePath()); + } + + @Test + public void testCreateMetadataWriteConfigNBCCTakesPrecedenceOverOCC() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(true) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(InProcessLockProvider.class).build()) + .build(); + + // Even with metadata OCC configured, streaming writes (NBCC) takes precedence + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, + WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL, InProcessLockProvider.class.getCanonicalName()); + } + + @Test + public void testCreateMetadataWriteConfigWithTableServiceManager() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withTableServiceManagerEnabled(true) + .withTableServiceManagerActions("compaction,logcompaction") + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + assertTrue(metadataWriteConfig.getTableServiceManagerConfig().isTableServiceManagerEnabled()); + assertTrue(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.compaction)); + assertTrue(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.logcompaction)); + assertFalse(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.clean)); + } + + @Test + public void testCreateMetadataWriteConfigWithTableServiceManagerDisabled() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + assertFalse(metadataWriteConfig.getTableServiceManagerConfig().isTableServiceManagerEnabled()); + assertFalse(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.compaction)); + } + private void validateMetadataWriteConfig(HoodieWriteConfig metadataWriteConfig, HoodieFailedWritesCleaningPolicy expectedPolicy, WriteConcurrencyMode expectedWriteConcurrencyMode, String expectedLockProviderClass) { assertEquals(expectedPolicy, metadataWriteConfig.getFailedWritesCleanPolicy()); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 5fa3201d2ffea..f0df18779aa56 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -20,6 +20,7 @@ import org.apache.hudi.common.bloom.BloomFilterTypeCode; import org.apache.hudi.common.engine.EngineType; +import org.apache.hudi.common.model.WriteConcurrencyMode; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; @@ -81,6 +82,28 @@ public final class HoodieMetadataConfig extends HoodieConfig { + "in streaming manner rather than two disjoint writes. By default " + "streaming writes to metadata table is enabled for SPARK engine for incremental operations and disabled for all other cases."); + public static final ConfigProperty METADATA_WRITE_CONCURRENCY_MODE = ConfigProperty + .key(METADATA_PREFIX + ".write.concurrency.mode") + .defaultValue(WriteConcurrencyMode.SINGLE_WRITER.name()) + .markAdvanced() + .withDocumentation("Change this to OPTIMISTIC_CONCURRENCY_CONTROL when MDT operations are being performed " + + "from an async pipeline so that appropriate locks are taken."); + + public static final ConfigProperty TABLE_SERVICE_MANAGER_ENABLED = ConfigProperty + .key(METADATA_PREFIX + ".table.service.manager.enabled") + .defaultValue(false) + .markAdvanced() + .withDocumentation("If true, delegate specified table service actions on the metadata table to the table service manager " + + "instead of executing them inline. This prevents the current writer from executing compaction/logcompaction " + + "on the metadata table, allowing a separate async pipeline to handle them."); + + public static final ConfigProperty TABLE_SERVICE_MANAGER_ACTIONS = ConfigProperty + .key(METADATA_PREFIX + ".table.service.manager.actions") + .defaultValue("") + .markAdvanced() + .withDocumentation("Comma-separated list of table service actions (e.g. compaction, logcompaction) on the metadata table " + + "that should be delegated to the table service manager."); + public static final ConfigProperty STREAMING_WRITE_DATATABLE_WRITE_STATUSES_COALESCE_DIVISOR = ConfigProperty .key(METADATA_PREFIX + ".streaming.write.datatable.write.statuses.coalesce.divisor") .defaultValue(5000) @@ -678,6 +701,18 @@ public boolean isStreamingWriteEnabled() { return getBoolean(STREAMING_WRITE_ENABLED); } + public String getWriteConcurrencyMode() { + return getString(METADATA_WRITE_CONCURRENCY_MODE); + } + + public boolean isTableServiceManagerEnabled() { + return getBoolean(TABLE_SERVICE_MANAGER_ENABLED); + } + + public String getTableServiceManagerActions() { + return getString(TABLE_SERVICE_MANAGER_ACTIONS); + } + public int getStreamingWritesCoalesceDivisorForDataTableWrites() { return getInt(HoodieMetadataConfig.STREAMING_WRITE_DATATABLE_WRITE_STATUSES_COALESCE_DIVISOR); } @@ -1006,6 +1041,21 @@ public Builder withStreamingWriteEnabled(boolean enabled) { return this; } + public Builder withWriteConcurrencyMode(WriteConcurrencyMode mode) { + metadataConfig.setValue(METADATA_WRITE_CONCURRENCY_MODE, mode.name()); + return this; + } + + public Builder withTableServiceManagerEnabled(boolean enabled) { + metadataConfig.setValue(TABLE_SERVICE_MANAGER_ENABLED, String.valueOf(enabled)); + return this; + } + + public Builder withTableServiceManagerActions(String actions) { + metadataConfig.setValue(TABLE_SERVICE_MANAGER_ACTIONS, actions); + return this; + } + public Builder withMetadataIndexBloomFilter(boolean enable) { metadataConfig.setValue(ENABLE_METADATA_INDEX_BLOOM_FILTER, String.valueOf(enable)); return this; From 5b6316fd1f09dc77c0c69e9c0cbc522d91760e32 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Sun, 8 Mar 2026 20:20:31 -0700 Subject: [PATCH 02/22] validate nbcc --- .../hudi/metadata/HoodieMetadataWriteUtils.java | 6 ++++++ .../hudi/metadata/TestHoodieMetadataWriteUtils.java | 12 ++++++------ .../hudi/common/config/HoodieMetadataConfig.java | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 6ec21aad15531..083faf8b58253 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -156,6 +156,12 @@ public static HoodieWriteConfig createMetadataWriteConfig( WriteConcurrencyMode concurrencyMode; HoodieLockConfig lockConfig; + if (metadataWriteConcurrencyMode.supportsMultiWriter()) { + checkState(!isStreamingWritesToMetadataEnabled, + "Streaming writes to metadata table must be disabled when using multi-writer concurrency mode " + + metadataWriteConcurrencyMode + ". Disable " + HoodieMetadataConfig.STREAMING_WRITE_ENABLED.key()); + } + if (isStreamingWritesToMetadataEnabled) { concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; lockConfig = HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build(); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 2165679385762..47584683175b9 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -37,6 +37,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; public class TestHoodieMetadataWriteUtils { @@ -134,7 +135,7 @@ public void testCreateMetadataWriteConfigForOCC() { } @Test - public void testCreateMetadataWriteConfigNBCCTakesPrecedenceOverOCC() { + public void testCreateMetadataWriteConfigRejectsStreamingWritesWithMultiWriter() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") .withCleanConfig(HoodieCleanConfig.newBuilder() @@ -147,11 +148,10 @@ public void testCreateMetadataWriteConfigNBCCTakesPrecedenceOverOCC() { .withLockProvider(InProcessLockProvider.class).build()) .build(); - // Even with metadata OCC configured, streaming writes (NBCC) takes precedence - HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( - writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL, InProcessLockProvider.class.getCanonicalName()); + IllegalStateException ex = assertThrows(IllegalStateException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("Streaming writes to metadata table must be disabled")); } @Test diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index f0df18779aa56..2c183f25db190 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -87,7 +87,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { .defaultValue(WriteConcurrencyMode.SINGLE_WRITER.name()) .markAdvanced() .withDocumentation("Change this to OPTIMISTIC_CONCURRENCY_CONTROL when MDT operations are being performed " - + "from an async pipeline so that appropriate locks are taken."); + + "from an external concurrent writer (such as a table service platform) so that appropriate locks are taken."); public static final ConfigProperty TABLE_SERVICE_MANAGER_ENABLED = ConfigProperty .key(METADATA_PREFIX + ".table.service.manager.enabled") From 9f187e0b57d866ac29788f45b09653da703da73b Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Sun, 8 Mar 2026 23:33:19 -0700 Subject: [PATCH 03/22] validate nbcc --- .../HoodieBackedTableMetadataWriter.java | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 7683e4dd637c8..c6ee71717b064 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -2192,57 +2192,57 @@ static HoodieActiveTimeline runPendingTableServicesOperationsAndRefreshTimeline( void compactIfNecessary(BaseHoodieWriteClient writeClient, Option latestDeltaCommitTimeOpt) { HoodieTableServiceManagerConfig tsmConfig = metadataWriteConfig.getTableServiceManagerConfig(); - if (tsmConfig.isEnabledAndActionSupported(ActionType.compaction)) { - LOG.info("Skipping compaction on MDT as it is delegated to table service manager."); - } else { - // IMPORTANT: Trigger compaction with max instant time that is smaller than(or equals) the earliest pending instant from DT. - // The compaction planner will manage to filter out the log files that finished with greater completion time. - // see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more details. - HoodieTimeline metadataCompletedTimeline = metadataMetaClient.getActiveTimeline().filterCompletedInstants(); - final String compactionInstantTime = dataMetaClient.reloadActiveTimeline() - // The filtering strategy is kept in line with the rollback premise, if an instant is pending on DT but completed on MDT, - // generates a compaction time smaller than it so that the instant could then been rolled back. - .filterInflightsAndRequested().filter(instant -> metadataCompletedTimeline.containsInstant(instant.requestedTime())).firstInstant() - // minus the pending instant time by 1 millisecond to avoid conflicts on the MDT. - .map(instant -> HoodieInstantTimeGenerator.instantTimeMinusMillis(instant.requestedTime(), 1L)) - .orElse(writeClient.createNewInstantTime(false)); - - // we need to avoid checking compaction w/ same instant again. - // let's say we trigger compaction after C5 in MDT and so compaction completes with C4001. but C5 crashed before completing in MDT. - // and again w/ C6, we will re-attempt compaction at which point latest delta commit is C4 in MDT. - // and so we try compaction w/ instant C4001. So, we can avoid compaction if we already have compaction w/ same instant time. - boolean skipCompactions = metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(compactionInstantTime); - try { - if (skipCompactions) { - LOG.info("Compaction with same {} time is already present in the timeline.", compactionInstantTime); - } else if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) { - LOG.info("Compaction is scheduled for timestamp {}", compactionInstantTime); + // IMPORTANT: Trigger compaction with max instant time that is smaller than(or equals) the earliest pending instant from DT. + // The compaction planner will manage to filter out the log files that finished with greater completion time. + // see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more details. + HoodieTimeline metadataCompletedTimeline = metadataMetaClient.getActiveTimeline().filterCompletedInstants(); + final String compactionInstantTime = dataMetaClient.reloadActiveTimeline() + // The filtering strategy is kept in line with the rollback premise, if an instant is pending on DT but completed on MDT, + // generates a compaction time smaller than it so that the instant could then been rolled back. + .filterInflightsAndRequested().filter(instant -> metadataCompletedTimeline.containsInstant(instant.requestedTime())).firstInstant() + // minus the pending instant time by 1 millisecond to avoid conflicts on the MDT. + .map(instant -> HoodieInstantTimeGenerator.instantTimeMinusMillis(instant.requestedTime(), 1L)) + .orElse(writeClient.createNewInstantTime(false)); + + // we need to avoid checking compaction w/ same instant again. + // let's say we trigger compaction after C5 in MDT and so compaction completes with C4001. but C5 crashed before completing in MDT. + // and again w/ C6, we will re-attempt compaction at which point latest delta commit is C4 in MDT. + // and so we try compaction w/ instant C4001. So, we can avoid compaction if we already have compaction w/ same instant time. + boolean skipCompactions = metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(compactionInstantTime); + try { + if (skipCompactions) { + LOG.info("Compaction with same {} time is already present in the timeline.", compactionInstantTime); + } else if (writeClient.scheduleCompactionAtInstant(compactionInstantTime, Option.empty())) { + LOG.info("Compaction is scheduled for timestamp {}", compactionInstantTime); + if (tsmConfig.isEnabledAndActionSupported(ActionType.compaction)) { + LOG.info("Skipping execution of compaction on MDT as it is delegated to table service manager."); + } else { writeClient.compact(compactionInstantTime, true); } - } catch (Exception e) { - metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.COMPACTION_FAILURES, 1)); - LOG.error("Error in scheduling and executing compaction in metadata table", e); - throw e; } + } catch (Exception e) { + metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.COMPACTION_FAILURES, 1)); + LOG.error("Error in scheduling and executing compaction in metadata table", e); + throw e; } - if (tsmConfig.isEnabledAndActionSupported(ActionType.logcompaction)) { - LOG.info("Skipping log compaction on MDT as it is delegated to table service manager."); - } else { - try { - if (metadataWriteConfig.isLogCompactionEnabled()) { - // Schedule and execute log compaction with new instant time. - Option scheduledLogCompaction = writeClient.scheduleLogCompaction(Option.empty()); - if (scheduledLogCompaction.isPresent()) { - LOG.info("Log compaction is scheduled for timestamp {}", scheduledLogCompaction.get()); + try { + if (metadataWriteConfig.isLogCompactionEnabled()) { + // Schedule and execute log compaction with new instant time. + Option scheduledLogCompaction = writeClient.scheduleLogCompaction(Option.empty()); + if (scheduledLogCompaction.isPresent()) { + LOG.info("Log compaction is scheduled for timestamp {}", scheduledLogCompaction.get()); + if (tsmConfig.isEnabledAndActionSupported(ActionType.logcompaction)) { + LOG.info("Skipping execution of log compaction on MDT as it is delegated to table service manager."); + } else { writeClient.logCompact(scheduledLogCompaction.get(), true); } } - } catch (Exception e) { - metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.LOG_COMPACTION_FAILURES, 1)); - LOG.error("Error in scheduling and executing logcompaction in metadata table", e); - throw e; } + } catch (Exception e) { + metrics.ifPresent(m -> m.incrementMetric(HoodieMetadataMetrics.LOG_COMPACTION_FAILURES, 1)); + LOG.error("Error in scheduling and executing logcompaction in metadata table", e); + throw e; } } From dca344e20ddd0aa0b35ce4a8b36e82f18f1299e4 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Sun, 8 Mar 2026 23:47:00 -0700 Subject: [PATCH 04/22] validate nbcc --- .../HoodieBackedTableMetadataWriter.java | 4 +- ...kedTableMetadataWriterTableVersionSix.java | 15 ++++- .../TestHoodieBackedTableMetadataWriter.java | 61 +++++++++++++++++++ 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index c6ee71717b064..f7af8d339aa3f 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -2227,7 +2227,9 @@ void compactIfNecessary(BaseHoodieWriteClient writeClient, Option scheduledLogCompaction = writeClient.scheduleLogCompaction(Option.empty()); if (scheduledLogCompaction.isPresent()) { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java index a7f51400300ec..9646e3781f2e8 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java @@ -21,6 +21,8 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.client.BaseHoodieWriteClient; import org.apache.hudi.common.config.HoodieMetadataConfig; +import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; +import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; import org.apache.hudi.common.table.HoodieTableMetaClient; @@ -276,6 +278,7 @@ private void validateRollbackVersionSix( */ @Override void compactIfNecessary(BaseHoodieWriteClient writeClient, Option latestDeltaCommitTimeOpt) { + HoodieTableServiceManagerConfig tsmConfig = metadataWriteConfig.getTableServiceManagerConfig(); // Trigger compaction with suffixes based on the same instant time. This ensures that any future // delta commits synced over will not have an instant time lesser than the last completed instant on the // metadata table. @@ -289,7 +292,11 @@ void compactIfNecessary(BaseHoodieWriteClient writeClient, Option writeClient, Option Date: Mon, 9 Mar 2026 00:35:48 -0700 Subject: [PATCH 05/22] import --- .../hudi/metadata/TestHoodieBackedTableMetadataWriter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java index ae248db75c030..e57a8c4bd9659 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java @@ -23,7 +23,6 @@ import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.engine.HoodieEngineContext; -import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.table.HoodieTableMetaClient; From eb149865f0160a23945a4d5bf01917d050c66aae Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 9 Mar 2026 15:10:52 -0700 Subject: [PATCH 06/22] simplfy lock changes --- .../metadata/HoodieMetadataWriteUtils.java | 35 +----- .../TestHoodieMetadataWriteUtils.java | 117 ++++++++++++++++++ 2 files changed, 119 insertions(+), 33 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 083faf8b58253..8f1127c26077b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -21,10 +21,8 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.client.FailOnFirstErrorWriteStatus; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; -import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; -import org.apache.hudi.common.lock.LockProvider; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.config.RecordMergeMode; @@ -157,6 +155,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( HoodieLockConfig lockConfig; if (metadataWriteConcurrencyMode.supportsMultiWriter()) { + // Configuring Multi-writer directly on metadata table is intended for executing table service plans, not for writes. checkState(!isStreamingWritesToMetadataEnabled, "Streaming writes to metadata table must be disabled when using multi-writer concurrency mode " + metadataWriteConcurrencyMode + ". Disable " + HoodieMetadataConfig.STREAMING_WRITE_ENABLED.key()); @@ -168,7 +167,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; } else if (metadataWriteConcurrencyMode.supportsMultiWriter()) { concurrencyMode = metadataWriteConcurrencyMode; - lockConfig = buildMetadataLockConfig(writeConfig, metadataWriteConcurrencyMode); + lockConfig = HoodieLockConfig.newBuilder().fromProperties(writeConfig.getProps()).build(); } else { concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; lockConfig = HoodieLockConfig.newBuilder().build(); @@ -384,36 +383,6 @@ public static HoodieWriteConfig createMetadataWriteConfig( return metadataWriteConfig; } - @SuppressWarnings("unchecked") - private static HoodieLockConfig buildMetadataLockConfig(HoodieWriteConfig writeConfig, - WriteConcurrencyMode metadataWriteConcurrencyMode) { - if (!metadataWriteConcurrencyMode.supportsMultiWriter()) { - return HoodieLockConfig.newBuilder().build(); - } - HoodieLockConfig.Builder lockConfigBuilder = HoodieLockConfig.newBuilder() - .withClientNumRetries(writeConfig.getProps().getInteger(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key())) - .withClientRetryWaitTimeInMillis(writeConfig.getProps().getLong(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key())) - .withLockWaitTimeInMillis(writeConfig.getProps().getLong(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key())); - - try { - Class lockProviderClass = - (Class) Class.forName(writeConfig.getLockProviderClass()); - lockConfigBuilder.withLockProvider(lockProviderClass); - if (lockProviderClass.equals(ZookeeperBasedLockProvider.class)) { - lockConfigBuilder.withZkLockKey(writeConfig.getProps().getString(HoodieLockConfig.ZK_LOCK_KEY.key())); - if (writeConfig.getProps().containsKey(HoodieLockConfig.ZK_BASE_PATH.key())) { - lockConfigBuilder.withZkBasePath(writeConfig.getProps().getString(HoodieLockConfig.ZK_BASE_PATH.key())); - } - if (writeConfig.getProps().containsKey(HoodieLockConfig.ZK_CONNECT_URL.key())) { - lockConfigBuilder.withZkQuorum(writeConfig.getProps().getString(HoodieLockConfig.ZK_CONNECT_URL.key())); - } - } - } catch (ClassNotFoundException e) { - throw new HoodieException("Could not find class for " + writeConfig.getLockProviderClass(), e); - } - return lockConfigBuilder.build(); - } - /** * Convert commit action to metadata records for the enabled partition types. * diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 47584683175b9..cfcae773be198 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -18,7 +18,9 @@ package org.apache.hudi.metadata; +import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.model.HoodieCleaningPolicy; @@ -184,6 +186,121 @@ public void testCreateMetadataWriteConfigWithTableServiceManagerDisabled() { assertFalse(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.compaction)); } + @Test + public void testCreateMetadataWriteConfigForOCCWithZookeeperLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(ZookeeperBasedLockProvider.class) + .withZkQuorum("zk-host:2181") + .withZkBasePath("/hudi/locks") + .withZkLockKey("test_table") + .withZkPort("2181") + .withZkSessionTimeoutInMs(30000L) + .withZkConnectionTimeoutInMs(15000L) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, ZookeeperBasedLockProvider.class.getCanonicalName()); + assertEquals("zk-host:2181", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_CONNECT_URL.key())); + assertEquals("/hudi/locks", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_BASE_PATH.key())); + assertEquals("test_table", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_LOCK_KEY.key())); + assertEquals("2181", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_PORT.key())); + assertEquals(30000, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key())); + assertEquals(15000, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key())); + } + + @Test + public void testCreateMetadataWriteConfigForOCCWithHiveMetastoreLockProvider() { + String hmsLockProviderClass = "org.apache.hudi.hive.transaction.lock.HiveMetastoreBasedLockProvider"; + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), hmsLockProviderClass); + lockProps.put(HoodieLockConfig.HIVE_DATABASE_NAME.key(), "my_database"); + lockProps.put(HoodieLockConfig.HIVE_TABLE_NAME.key(), "my_table"); + lockProps.put(HoodieLockConfig.HIVE_METASTORE_URI.key(), "thrift://hms-host:9083"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withLockConfig(HoodieLockConfig.newBuilder() + .fromProperties(lockProps) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, hmsLockProviderClass); + assertEquals("my_database", metadataWriteConfig.getProps().getString(HoodieLockConfig.HIVE_DATABASE_NAME.key())); + assertEquals("my_table", metadataWriteConfig.getProps().getString(HoodieLockConfig.HIVE_TABLE_NAME.key())); + assertEquals("thrift://hms-host:9083", metadataWriteConfig.getProps().getString(HoodieLockConfig.HIVE_METASTORE_URI.key())); + } + + @Test + public void testCreateMetadataWriteConfigForOCCWithFileSystemLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockPath("/tmp/lock_dir") + .withFileSystemLockExpire(10) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, FileSystemBasedLockProvider.class.getCanonicalName()); + assertEquals("/tmp/lock_dir", metadataWriteConfig.getProps().getString(HoodieLockConfig.FILESYSTEM_LOCK_PATH.key())); + assertEquals(10, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE.key())); + } + + @Test + public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { + String customLockProviderClass = "com.example.custom.MyCustomLockProvider"; + String customConfigKey = "hoodie.write.lock.custom.endpoint"; + String customConfigValue = "https://lock-service.example.com"; + + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); + lockProps.put(customConfigKey, customConfigValue); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withLockConfig(HoodieLockConfig.newBuilder() + .fromProperties(lockProps) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); + assertEquals(customConfigValue, metadataWriteConfig.getProps().getString(customConfigKey)); + } + private void validateMetadataWriteConfig(HoodieWriteConfig metadataWriteConfig, HoodieFailedWritesCleaningPolicy expectedPolicy, WriteConcurrencyMode expectedWriteConcurrencyMode, String expectedLockProviderClass) { assertEquals(expectedPolicy, metadataWriteConfig.getFailedWritesCleanPolicy()); From 1d71d4ec6471c9c6a0ad1cbd6d2ed99cddaba1e0 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 9 Mar 2026 15:16:34 -0700 Subject: [PATCH 07/22] fix text --- .../apache/hudi/metadata/TestHoodieMetadataWriteUtils.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index cfcae773be198..7b0e2dc5c17da 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -122,6 +122,7 @@ public void testCreateMetadataWriteConfigForOCC() { .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .retainCommits(5).build()) .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(InProcessLockProvider.class).build()) @@ -194,6 +195,7 @@ public void testCreateMetadataWriteConfigForOCCWithZookeeperLockProvider() { .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .retainCommits(5).build()) .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(ZookeeperBasedLockProvider.class) @@ -233,6 +235,7 @@ public void testCreateMetadataWriteConfigForOCCWithHiveMetastoreLockProvider() { .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .retainCommits(5).build()) .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withLockConfig(HoodieLockConfig.newBuilder() .fromProperties(lockProps) @@ -256,6 +259,7 @@ public void testCreateMetadataWriteConfigForOCCWithFileSystemLockProvider() { .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .retainCommits(5).build()) .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(FileSystemBasedLockProvider.class) @@ -288,6 +292,7 @@ public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) .retainCommits(5).build()) .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withLockConfig(HoodieLockConfig.newBuilder() .fromProperties(lockProps) From 2b366629e66fb7831a45f2f66a5c68d5179279a9 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Tue, 10 Mar 2026 13:09:36 -0700 Subject: [PATCH 08/22] handle lock provider cases --- .../metadata/HoodieMetadataWriteUtils.java | 99 ++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 8f1127c26077b..653195c3c53be 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -20,9 +20,13 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.client.FailOnFirstErrorWriteStatus; +import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; +import org.apache.hudi.common.config.LockConfiguration; +import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.config.RecordMergeMode; @@ -167,7 +171,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; } else if (metadataWriteConcurrencyMode.supportsMultiWriter()) { concurrencyMode = metadataWriteConcurrencyMode; - lockConfig = HoodieLockConfig.newBuilder().fromProperties(writeConfig.getProps()).build(); + lockConfig = buildMetadataLockConfig(writeConfig); } else { concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; lockConfig = HoodieLockConfig.newBuilder().build(); @@ -383,6 +387,99 @@ public static HoodieWriteConfig createMetadataWriteConfig( return metadataWriteConfig; } + /** + * Build the lock config for the metadata table by extracting lock-specific properties from the + * data table's write config. This avoids copying all properties (which would overwrite MDT-specific + * settings like base path and auto-clean). + */ + private static HoodieLockConfig buildMetadataLockConfig(HoodieWriteConfig writeConfig) { + TypedProperties props = writeConfig.getProps(); + HoodieLockConfig.Builder lockConfigBuilder = HoodieLockConfig.newBuilder() + .withClientNumRetries(Integer.parseInt(props.getString( + HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(), + HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.defaultValue()))) + .withClientRetryWaitTimeInMillis(Long.parseLong(props.getString( + HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(), + HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.defaultValue()))) + .withLockWaitTimeInMillis(Long.valueOf(props.getString( + HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(), + String.valueOf(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.defaultValue())))) + .withNumRetries(Integer.parseInt(props.getString( + HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES.key(), + HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES.defaultValue()))) + .withRetryWaitTimeInMillis(Long.parseLong(props.getString( + HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(), + HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.defaultValue()))) + .withRetryMaxWaitTimeInMillis(Long.parseLong(props.getString( + HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(), + HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.defaultValue()))) + .withHeartbeatIntervalInMillis(Long.valueOf(props.getString( + HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS.key(), + String.valueOf(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS.defaultValue())))); + + String lockProviderClass = writeConfig.getLockProviderClass(); + if (lockProviderClass == null) { + return lockConfigBuilder.build(); + } + + Properties providerProp = new Properties(); + providerProp.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); + lockConfigBuilder.fromProperties(providerProp); + + if (ZookeeperBasedLockProvider.class.getName().equals(lockProviderClass)) { + if (props.containsKey(HoodieLockConfig.ZK_CONNECT_URL.key())) { + lockConfigBuilder.withZkQuorum(props.getString(HoodieLockConfig.ZK_CONNECT_URL.key())); + } + if (props.containsKey(HoodieLockConfig.ZK_BASE_PATH.key())) { + lockConfigBuilder.withZkBasePath(props.getString(HoodieLockConfig.ZK_BASE_PATH.key())); + } + if (props.containsKey(HoodieLockConfig.ZK_LOCK_KEY.key())) { + lockConfigBuilder.withZkLockKey(props.getString(HoodieLockConfig.ZK_LOCK_KEY.key())); + } + if (props.containsKey(HoodieLockConfig.ZK_PORT.key())) { + lockConfigBuilder.withZkPort(props.getString(HoodieLockConfig.ZK_PORT.key())); + } + if (props.containsKey(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key())) { + lockConfigBuilder.withZkSessionTimeoutInMs( + Long.valueOf(props.getString(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key()))); + } + if (props.containsKey(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key())) { + lockConfigBuilder.withZkConnectionTimeoutInMs( + Long.valueOf(props.getString(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key()))); + } + } else if (FileSystemBasedLockProvider.class.getName().equals(lockProviderClass)) { + if (props.containsKey(HoodieLockConfig.FILESYSTEM_LOCK_PATH.key())) { + lockConfigBuilder.withFileSystemLockPath(props.getString(HoodieLockConfig.FILESYSTEM_LOCK_PATH.key())); + } + if (props.containsKey(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE.key())) { + lockConfigBuilder.withFileSystemLockExpire( + Integer.parseInt(props.getString(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE.key()))); + } + } else if (lockProviderClass.contains("HiveMetastoreBasedLockProvider")) { + if (props.containsKey(HoodieLockConfig.HIVE_DATABASE_NAME.key())) { + lockConfigBuilder.withHiveDatabaseName(props.getString(HoodieLockConfig.HIVE_DATABASE_NAME.key())); + } + if (props.containsKey(HoodieLockConfig.HIVE_TABLE_NAME.key())) { + lockConfigBuilder.withHiveTableName(props.getString(HoodieLockConfig.HIVE_TABLE_NAME.key())); + } + if (props.containsKey(HoodieLockConfig.HIVE_METASTORE_URI.key())) { + lockConfigBuilder.withHiveMetastoreURIs(props.getString(HoodieLockConfig.HIVE_METASTORE_URI.key())); + } + } else { + // For any custom lock provider, pass through all lock-prefixed properties + // so provider-specific configs are preserved. + Properties lockProps = new Properties(); + props.forEach((k, v) -> { + if (k.toString().startsWith(LockConfiguration.LOCK_PREFIX)) { + lockProps.put(k, v); + } + }); + lockConfigBuilder.fromProperties(lockProps); + } + + return lockConfigBuilder.build(); + } + /** * Convert commit action to metadata records for the enabled partition types. * From 63576bd9ffd9287a156ae12792b01e6bb1528436 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Tue, 10 Mar 2026 14:55:35 -0700 Subject: [PATCH 09/22] handle auto adjust concurretn --- .../hudi/metadata/TestHoodieMetadataWriteUtils.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 7b0e2dc5c17da..d4be035fbcf47 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -130,7 +130,8 @@ public void testCreateMetadataWriteConfigForOCC() { HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + // HoodieWriteConfig builder auto-adjusts failed writes policy to LAZY for multi-writer modes + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, InProcessLockProvider.class.getCanonicalName()); // MDT base path should NOT be overwritten to data table's base path String expectedMdtBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataTableBasePath); @@ -210,7 +211,7 @@ public void testCreateMetadataWriteConfigForOCCWithZookeeperLockProvider() { HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, ZookeeperBasedLockProvider.class.getCanonicalName()); assertEquals("zk-host:2181", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_CONNECT_URL.key())); assertEquals("/hudi/locks", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_BASE_PATH.key())); @@ -244,7 +245,7 @@ public void testCreateMetadataWriteConfigForOCCWithHiveMetastoreLockProvider() { HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, hmsLockProviderClass); assertEquals("my_database", metadataWriteConfig.getProps().getString(HoodieLockConfig.HIVE_DATABASE_NAME.key())); assertEquals("my_table", metadataWriteConfig.getProps().getString(HoodieLockConfig.HIVE_TABLE_NAME.key())); @@ -270,7 +271,7 @@ public void testCreateMetadataWriteConfigForOCCWithFileSystemLockProvider() { HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, FileSystemBasedLockProvider.class.getCanonicalName()); assertEquals("/tmp/lock_dir", metadataWriteConfig.getProps().getString(HoodieLockConfig.FILESYSTEM_LOCK_PATH.key())); assertEquals(10, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE.key())); @@ -301,7 +302,7 @@ public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.EAGER, + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); assertEquals(customConfigValue, metadataWriteConfig.getProps().getString(customConfigKey)); } From 3b62360a010ca5e5d180c731386bf244902ca4e0 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 16 Mar 2026 15:03:40 -0700 Subject: [PATCH 10/22] refactor lock config --- .../metadata/HoodieMetadataWriteUtils.java | 149 ++++++------------ .../TestHoodieMetadataWriteUtils.java | 97 ++++++++++++ .../common/config/HoodieMetadataConfig.java | 4 +- 3 files changed, 144 insertions(+), 106 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 653195c3c53be..acd2baaf49d8d 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -157,24 +157,34 @@ public static HoodieWriteConfig createMetadataWriteConfig( WriteConcurrencyMode concurrencyMode; HoodieLockConfig lockConfig; - + final boolean mergeMetdataLockConfigAtEnd; if (metadataWriteConcurrencyMode.supportsMultiWriter()) { // Configuring Multi-writer directly on metadata table is intended for executing table service plans, not for writes. checkState(!isStreamingWritesToMetadataEnabled, "Streaming writes to metadata table must be disabled when using multi-writer concurrency mode " + metadataWriteConcurrencyMode + ". Disable " + HoodieMetadataConfig.STREAMING_WRITE_ENABLED.key()); - } - - if (isStreamingWritesToMetadataEnabled) { - concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; - lockConfig = HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build(); - failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; - } else if (metadataWriteConcurrencyMode.supportsMultiWriter()) { - concurrencyMode = metadataWriteConcurrencyMode; - lockConfig = buildMetadataLockConfig(writeConfig); - } else { + checkState(metadataWriteConcurrencyMode == writeConfig.getWriteConcurrencyMode(), + "If multiwriter is used on metadata table, its concurrency mode (" + metadataWriteConcurrencyMode + + ") must match the data table concurrency mode (" + writeConfig.getWriteConcurrencyMode() + ")"); + // First lets create te MDT write config with default single writer lock configs. + // Then, once all MDT-specific write configs are set, we can create a lock config + // containing all write config raw props in data table that aren't in the raw props + // for MDT write config. And then, re-build the MDT write config with this merged lock config. concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; lockConfig = HoodieLockConfig.newBuilder().build(); + failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; + mergeMetdataLockConfigAtEnd = true; + + } else { + mergeMetdataLockConfigAtEnd = false; + if (isStreamingWritesToMetadataEnabled) { + concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; + lockConfig = HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build(); + failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; + } else { + concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; + lockConfig = HoodieLockConfig.newBuilder().build(); + } } final long maxLogFileSizeBytes = writeConfig.getMetadataConfig().getMaxLogFileSize(); @@ -375,6 +385,30 @@ public static HoodieWriteConfig createMetadataWriteConfig( } HoodieWriteConfig metadataWriteConfig = builder.build(); + if (mergeMetdataLockConfigAtEnd) { + // We need to update the MDT write config to have the same lock related configs as the data table. + // Unfortunately, we cannot use infer which keys on data table are specific to lock config, as + // users may use custom lock providers and lock configs. As a workaround, we will add all write + // config props on data table that are not present/explicitly set on MDT write config to the MDT write config. + Properties dataTablePropsNotInMdt = new Properties(); + TypedProperties dataTableProps = writeConfig.getProps(); + TypedProperties mdtProps = metadataWriteConfig.getProps(); + for (String key : dataTableProps.stringPropertyNames()) { + if (!mdtProps.containsKey(key)) { + dataTablePropsNotInMdt.setProperty(key, dataTableProps.getProperty(key)); + } + } + Properties lockProps = new Properties(); + lockProps.putAll(dataTablePropsNotInMdt); + lockProps.setProperty(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), metadataWriteConcurrencyMode.name()); + String lockProviderClass = writeConfig.getLockProviderClass(); + checkState(lockProviderClass != null, "Lock provider class must be set for metadata table"); + lockProps.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); + metadataWriteConfig = HoodieWriteConfig.newBuilder() + .withProperties(metadataWriteConfig.getProps()) + .withProperties(lockProps) + .build(); + } // Inline compaction and auto clean is required as we do not expose this table outside ValidationUtils.checkArgument(!metadataWriteConfig.isAutoClean(), "Cleaning is controlled internally for Metadata table."); @@ -387,99 +421,6 @@ public static HoodieWriteConfig createMetadataWriteConfig( return metadataWriteConfig; } - /** - * Build the lock config for the metadata table by extracting lock-specific properties from the - * data table's write config. This avoids copying all properties (which would overwrite MDT-specific - * settings like base path and auto-clean). - */ - private static HoodieLockConfig buildMetadataLockConfig(HoodieWriteConfig writeConfig) { - TypedProperties props = writeConfig.getProps(); - HoodieLockConfig.Builder lockConfigBuilder = HoodieLockConfig.newBuilder() - .withClientNumRetries(Integer.parseInt(props.getString( - HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(), - HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.defaultValue()))) - .withClientRetryWaitTimeInMillis(Long.parseLong(props.getString( - HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(), - HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.defaultValue()))) - .withLockWaitTimeInMillis(Long.valueOf(props.getString( - HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(), - String.valueOf(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.defaultValue())))) - .withNumRetries(Integer.parseInt(props.getString( - HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES.key(), - HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES.defaultValue()))) - .withRetryWaitTimeInMillis(Long.parseLong(props.getString( - HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(), - HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.defaultValue()))) - .withRetryMaxWaitTimeInMillis(Long.parseLong(props.getString( - HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(), - HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.defaultValue()))) - .withHeartbeatIntervalInMillis(Long.valueOf(props.getString( - HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS.key(), - String.valueOf(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS.defaultValue())))); - - String lockProviderClass = writeConfig.getLockProviderClass(); - if (lockProviderClass == null) { - return lockConfigBuilder.build(); - } - - Properties providerProp = new Properties(); - providerProp.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); - lockConfigBuilder.fromProperties(providerProp); - - if (ZookeeperBasedLockProvider.class.getName().equals(lockProviderClass)) { - if (props.containsKey(HoodieLockConfig.ZK_CONNECT_URL.key())) { - lockConfigBuilder.withZkQuorum(props.getString(HoodieLockConfig.ZK_CONNECT_URL.key())); - } - if (props.containsKey(HoodieLockConfig.ZK_BASE_PATH.key())) { - lockConfigBuilder.withZkBasePath(props.getString(HoodieLockConfig.ZK_BASE_PATH.key())); - } - if (props.containsKey(HoodieLockConfig.ZK_LOCK_KEY.key())) { - lockConfigBuilder.withZkLockKey(props.getString(HoodieLockConfig.ZK_LOCK_KEY.key())); - } - if (props.containsKey(HoodieLockConfig.ZK_PORT.key())) { - lockConfigBuilder.withZkPort(props.getString(HoodieLockConfig.ZK_PORT.key())); - } - if (props.containsKey(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key())) { - lockConfigBuilder.withZkSessionTimeoutInMs( - Long.valueOf(props.getString(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key()))); - } - if (props.containsKey(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key())) { - lockConfigBuilder.withZkConnectionTimeoutInMs( - Long.valueOf(props.getString(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key()))); - } - } else if (FileSystemBasedLockProvider.class.getName().equals(lockProviderClass)) { - if (props.containsKey(HoodieLockConfig.FILESYSTEM_LOCK_PATH.key())) { - lockConfigBuilder.withFileSystemLockPath(props.getString(HoodieLockConfig.FILESYSTEM_LOCK_PATH.key())); - } - if (props.containsKey(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE.key())) { - lockConfigBuilder.withFileSystemLockExpire( - Integer.parseInt(props.getString(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE.key()))); - } - } else if (lockProviderClass.contains("HiveMetastoreBasedLockProvider")) { - if (props.containsKey(HoodieLockConfig.HIVE_DATABASE_NAME.key())) { - lockConfigBuilder.withHiveDatabaseName(props.getString(HoodieLockConfig.HIVE_DATABASE_NAME.key())); - } - if (props.containsKey(HoodieLockConfig.HIVE_TABLE_NAME.key())) { - lockConfigBuilder.withHiveTableName(props.getString(HoodieLockConfig.HIVE_TABLE_NAME.key())); - } - if (props.containsKey(HoodieLockConfig.HIVE_METASTORE_URI.key())) { - lockConfigBuilder.withHiveMetastoreURIs(props.getString(HoodieLockConfig.HIVE_METASTORE_URI.key())); - } - } else { - // For any custom lock provider, pass through all lock-prefixed properties - // so provider-specific configs are preserved. - Properties lockProps = new Properties(); - props.forEach((k, v) -> { - if (k.toString().startsWith(LockConfiguration.LOCK_PREFIX)) { - lockProps.put(k, v); - } - }); - lockConfigBuilder.fromProperties(lockProps); - } - - return lockConfigBuilder.build(); - } - /** * Convert commit action to metadata records for the enabled partition types. * diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index d4be035fbcf47..0e3f3cb8f37b5 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -124,6 +124,7 @@ public void testCreateMetadataWriteConfigForOCC() { .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(InProcessLockProvider.class).build()) .build(); @@ -148,6 +149,7 @@ public void testCreateMetadataWriteConfigRejectsStreamingWritesWithMultiWriter() .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(true) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(InProcessLockProvider.class).build()) .build(); @@ -198,6 +200,7 @@ public void testCreateMetadataWriteConfigForOCCWithZookeeperLockProvider() { .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(ZookeeperBasedLockProvider.class) .withZkQuorum("zk-host:2181") @@ -238,6 +241,7 @@ public void testCreateMetadataWriteConfigForOCCWithHiveMetastoreLockProvider() { .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .fromProperties(lockProps) .build()) @@ -262,6 +266,7 @@ public void testCreateMetadataWriteConfigForOCCWithFileSystemLockProvider() { .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(FileSystemBasedLockProvider.class) .withFileSystemLockPath("/tmp/lock_dir") @@ -295,6 +300,7 @@ public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .fromProperties(lockProps) .build()) @@ -307,6 +313,97 @@ public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { assertEquals(customConfigValue, metadataWriteConfig.getProps().getString(customConfigKey)); } + @Test + public void testCreateMetadataWriteConfigRejectsConcurrencyModeMismatch() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .build(); + + IllegalStateException ex = assertThrows(IllegalStateException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("must match the data table concurrency mode")); + } + + @Test + public void testCreateMetadataWriteConfigForOCCPreservesMdtSpecificValues() { + String dataTableBasePath = "/tmp/base_path/"; + String customLockProviderClass = "com.example.custom.MyCustomLockProvider"; + String customConfigKey = "hoodie.write.lock.custom.zk_url"; + String customConfigValue = "zk://custom-host:2181"; + + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); + lockProps.put(customConfigKey, customConfigValue); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath(dataTableBasePath) + .withCleanConfig(HoodieCleanConfig.newBuilder() + .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) + .retainCommits(5).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .fromProperties(lockProps) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + + // Lock config and concurrency mode should be carried over + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); + assertEquals(customConfigValue, metadataWriteConfig.getProps().getString(customConfigKey)); + + // MDT-specific values must be preserved after lock config merge + String expectedMdtBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataTableBasePath); + assertEquals(expectedMdtBasePath, metadataWriteConfig.getBasePath()); + assertFalse(metadataWriteConfig.isAutoClean(), "Auto clean should be disabled for MDT"); + assertFalse(metadataWriteConfig.inlineCompactionEnabled(), "Inline compaction should be disabled for MDT"); + assertFalse(metadataWriteConfig.isMetadataTableEnabled(), "Metadata listing should be disabled for MDT"); + assertNotEquals(dataTableBasePath, metadataWriteConfig.getBasePath(), + "MDT base path should not be overwritten to data table base path"); + } + + @Test + public void testCreateMetadataWriteConfigForOCCWithMultipleCustomKeys() { + String customLockProviderClass = "com.example.custom.DistributedLockProvider"; + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); + lockProps.put("hoodie.write.lock.custom.endpoint", "https://lock-service.example.com"); + lockProps.put("hoodie.write.lock.custom.token", "my-secret-token"); + lockProps.put("hoodie.write.lock.custom.timeout_ms", "30000"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .fromProperties(lockProps) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); + assertEquals("https://lock-service.example.com", + metadataWriteConfig.getProps().getString("hoodie.write.lock.custom.endpoint")); + assertEquals("my-secret-token", + metadataWriteConfig.getProps().getString("hoodie.write.lock.custom.token")); + assertEquals("30000", + metadataWriteConfig.getProps().getString("hoodie.write.lock.custom.timeout_ms")); + } + private void validateMetadataWriteConfig(HoodieWriteConfig metadataWriteConfig, HoodieFailedWritesCleaningPolicy expectedPolicy, WriteConcurrencyMode expectedWriteConcurrencyMode, String expectedLockProviderClass) { assertEquals(expectedPolicy, metadataWriteConfig.getFailedWritesCleanPolicy()); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 2c183f25db190..79495ec99ca60 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -101,8 +101,8 @@ public final class HoodieMetadataConfig extends HoodieConfig { .key(METADATA_PREFIX + ".table.service.manager.actions") .defaultValue("") .markAdvanced() - .withDocumentation("Comma-separated list of table service actions (e.g. compaction, logcompaction) on the metadata table " - + "that should be delegated to the table service manager."); + .withDocumentation("Comma-separated list of table service actions on the metadata table " + + "that should be delegated to the table service manager. Currently supported actions are: compaction, logcompaction."); public static final ConfigProperty STREAMING_WRITE_DATATABLE_WRITE_STATUSES_COALESCE_DIVISOR = ConfigProperty .key(METADATA_PREFIX + ".streaming.write.datatable.write.statuses.coalesce.divisor") From b1f68c28bdfb162efa12d95ae9b33bd3c7351eea Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 16 Mar 2026 16:16:24 -0700 Subject: [PATCH 11/22] checkstyle --- .../org/apache/hudi/metadata/HoodieMetadataWriteUtils.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index acd2baaf49d8d..fdf92e71fa36a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -20,12 +20,9 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.client.FailOnFirstErrorWriteStatus; -import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; -import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; -import org.apache.hudi.common.config.LockConfiguration; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.HoodieStorageConfig; From 7cbe14e9ac4804b195b5f57d5f6a5364f725d92b Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 16 Mar 2026 16:55:06 -0700 Subject: [PATCH 12/22] lock prefix --- .../hudi/metadata/HoodieMetadataWriteUtils.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index fdf92e71fa36a..e5a1262207fc6 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -23,6 +23,7 @@ import org.apache.hudi.client.transaction.lock.InProcessLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; +import org.apache.hudi.common.config.LockConfiguration; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.HoodieStorageConfig; @@ -384,19 +385,17 @@ public static HoodieWriteConfig createMetadataWriteConfig( HoodieWriteConfig metadataWriteConfig = builder.build(); if (mergeMetdataLockConfigAtEnd) { // We need to update the MDT write config to have the same lock related configs as the data table. - // Unfortunately, we cannot use infer which keys on data table are specific to lock config, as - // users may use custom lock providers and lock configs. As a workaround, we will add all write - // config props on data table that are not present/explicitly set on MDT write config to the MDT write config. - Properties dataTablePropsNotInMdt = new Properties(); + // All data table props with the lock prefix are always copied (to override MDT defaults with + // user-configured values). Other data table props not present in MDT config are also copied to + // support custom lock providers that may use non-standard config keys. + Properties lockProps = new Properties(); TypedProperties dataTableProps = writeConfig.getProps(); TypedProperties mdtProps = metadataWriteConfig.getProps(); for (String key : dataTableProps.stringPropertyNames()) { - if (!mdtProps.containsKey(key)) { - dataTablePropsNotInMdt.setProperty(key, dataTableProps.getProperty(key)); + if (key.startsWith(LockConfiguration.LOCK_PREFIX) || !mdtProps.containsKey(key)) { + lockProps.setProperty(key, dataTableProps.getProperty(key)); } } - Properties lockProps = new Properties(); - lockProps.putAll(dataTablePropsNotInMdt); lockProps.setProperty(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), metadataWriteConcurrencyMode.name()); String lockProviderClass = writeConfig.getLockProviderClass(); checkState(lockProviderClass != null, "Lock provider class must be set for metadata table"); From fa3a1626aad474dcadfc9d924457ae5b3dc6490e Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 16 Mar 2026 20:25:12 -0700 Subject: [PATCH 13/22] reply --- .../config/TestHoodieMetadataConfig.java | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java b/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java index 028c69683ef22..50ff653c42711 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java @@ -19,6 +19,8 @@ package org.apache.hudi.common.config; +import org.apache.hudi.common.model.WriteConcurrencyMode; + import org.junit.jupiter.api.Test; import java.util.Properties; @@ -171,4 +173,57 @@ void testFailOnTableServiceFailures() { assertEquals("hoodie.metadata.write.fail.on.table.service.failures", HoodieMetadataConfig.FAIL_ON_TABLE_SERVICE_FAILURES.key()); } + + @Test + void testWriteConcurrencyMode() { + HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().build(); + assertEquals(WriteConcurrencyMode.SINGLE_WRITER.name(), config.getWriteConcurrencyMode()); + + HoodieMetadataConfig occConfig = HoodieMetadataConfig.newBuilder() + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .build(); + assertEquals(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name(), occConfig.getWriteConcurrencyMode()); + + Properties props = new Properties(); + props.put(HoodieMetadataConfig.METADATA_WRITE_CONCURRENCY_MODE.key(), + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name()); + HoodieMetadataConfig propsConfig = HoodieMetadataConfig.newBuilder() + .fromProperties(props) + .build(); + assertEquals(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name(), propsConfig.getWriteConcurrencyMode()); + } + + @Test + void testTableServiceManagerEnabled() { + HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().build(); + assertFalse(config.isTableServiceManagerEnabled()); + + HoodieMetadataConfig enabledConfig = HoodieMetadataConfig.newBuilder() + .withTableServiceManagerEnabled(true) + .build(); + assertTrue(enabledConfig.isTableServiceManagerEnabled()); + + HoodieMetadataConfig disabledConfig = HoodieMetadataConfig.newBuilder() + .withTableServiceManagerEnabled(false) + .build(); + assertFalse(disabledConfig.isTableServiceManagerEnabled()); + } + + @Test + void testTableServiceManagerActions() { + HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().build(); + assertEquals("", config.getTableServiceManagerActions()); + + HoodieMetadataConfig configWithActions = HoodieMetadataConfig.newBuilder() + .withTableServiceManagerActions("compaction,logcompaction") + .build(); + assertEquals("compaction,logcompaction", configWithActions.getTableServiceManagerActions()); + + Properties props = new Properties(); + props.put(HoodieMetadataConfig.TABLE_SERVICE_MANAGER_ACTIONS.key(), "compaction"); + HoodieMetadataConfig propsConfig = HoodieMetadataConfig.newBuilder() + .fromProperties(props) + .build(); + assertEquals("compaction", propsConfig.getTableServiceManagerActions()); + } } From 91a61ff83b4b3962f91016c1f31462a44c500106 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Thu, 19 Mar 2026 19:12:35 -0700 Subject: [PATCH 14/22] reply --- .../metadata/HoodieMetadataWriteUtils.java | 27 ++++++++++--------- .../TestHoodieMetadataWriteUtils.java | 22 +++++++++++++-- .../common/config/HoodieMetadataConfig.java | 26 ++++++++++++++++++ .../config/TestHoodieMetadataConfig.java | 24 +++++++++++++++++ 4 files changed, 85 insertions(+), 14 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index e5a1262207fc6..24ba16822e17b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -155,7 +155,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( WriteConcurrencyMode concurrencyMode; HoodieLockConfig lockConfig; - final boolean mergeMetdataLockConfigAtEnd; + final boolean deriveMetadataLockConfigsFromDataTableConfigs; if (metadataWriteConcurrencyMode.supportsMultiWriter()) { // Configuring Multi-writer directly on metadata table is intended for executing table service plans, not for writes. checkState(!isStreamingWritesToMetadataEnabled, @@ -164,17 +164,22 @@ public static HoodieWriteConfig createMetadataWriteConfig( checkState(metadataWriteConcurrencyMode == writeConfig.getWriteConcurrencyMode(), "If multiwriter is used on metadata table, its concurrency mode (" + metadataWriteConcurrencyMode + ") must match the data table concurrency mode (" + writeConfig.getWriteConcurrencyMode() + ")"); - // First lets create te MDT write config with default single writer lock configs. - // Then, once all MDT-specific write configs are set, we can create a lock config - // containing all write config raw props in data table that aren't in the raw props - // for MDT write config. And then, re-build the MDT write config with this merged lock config. + String lockProviderClass = writeConfig.getLockProviderClass(); + checkState(lockProviderClass != null, + "Lock provider class must be set for data table to enable async executions of table services in metadata table"); + checkState(!InProcessLockProvider.class.getCanonicalName().equals(lockProviderClass), + "InProcessLockProvider cannot be used for metadata table multi-writer mode as it does not support cross-process locking. " + + "Configure a distributed lock provider on the data table."); + // First lets create the MDT write config with default single writer lock configs. + // Then, once all MDT-specific write configs are set, we can derive lock configs + // from the data table and re-build the MDT write config with the merged lock config. concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; lockConfig = HoodieLockConfig.newBuilder().build(); failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; - mergeMetdataLockConfigAtEnd = true; + deriveMetadataLockConfigsFromDataTableConfigs = true; } else { - mergeMetdataLockConfigAtEnd = false; + deriveMetadataLockConfigsFromDataTableConfigs = false; if (isStreamingWritesToMetadataEnabled) { concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; lockConfig = HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build(); @@ -383,7 +388,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( } HoodieWriteConfig metadataWriteConfig = builder.build(); - if (mergeMetdataLockConfigAtEnd) { + if (deriveMetadataLockConfigsFromDataTableConfigs) { // We need to update the MDT write config to have the same lock related configs as the data table. // All data table props with the lock prefix are always copied (to override MDT defaults with // user-configured values). Other data table props not present in MDT config are also copied to @@ -397,11 +402,9 @@ public static HoodieWriteConfig createMetadataWriteConfig( } } lockProps.setProperty(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), metadataWriteConcurrencyMode.name()); - String lockProviderClass = writeConfig.getLockProviderClass(); - checkState(lockProviderClass != null, "Lock provider class must be set for metadata table"); - lockProps.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); + lockProps.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), writeConfig.getLockProviderClass()); metadataWriteConfig = HoodieWriteConfig.newBuilder() - .withProperties(metadataWriteConfig.getProps()) + .withProperties(mdtProps) .withProperties(lockProps) .build(); } diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 0e3f3cb8f37b5..66c5b42dfebe0 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -126,19 +126,37 @@ public void testCreateMetadataWriteConfigForOCC() { .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .withLockProvider(InProcessLockProvider.class).build()) + .withLockProvider(FileSystemBasedLockProvider.class).build()) .build(); HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); // HoodieWriteConfig builder auto-adjusts failed writes policy to LAZY for multi-writer modes validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, InProcessLockProvider.class.getCanonicalName()); + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, FileSystemBasedLockProvider.class.getCanonicalName()); // MDT base path should NOT be overwritten to data table's base path String expectedMdtBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataTableBasePath); assertEquals(expectedMdtBasePath, metadataWriteConfig.getBasePath()); } + @Test + public void testCreateMetadataWriteConfigRejectsInProcessLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(InProcessLockProvider.class).build()) + .build(); + + IllegalStateException ex = assertThrows(IllegalStateException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("InProcessLockProvider cannot be used")); + } + @Test public void testCreateMetadataWriteConfigRejectsStreamingWritesWithMultiWriter() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 79495ec99ca60..d69fefbaf6b79 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -20,6 +20,7 @@ import org.apache.hudi.common.bloom.BloomFilterTypeCode; import org.apache.hudi.common.engine.EngineType; +import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.model.WriteConcurrencyMode; import org.apache.hudi.common.table.view.FileSystemViewStorageConfig; import org.apache.hudi.common.util.Option; @@ -33,10 +34,12 @@ import java.io.FileReader; import java.io.IOException; import java.util.Arrays; +import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import java.util.stream.Collectors; import static org.apache.hudi.common.util.ValidationUtils.checkArgument; @@ -97,6 +100,9 @@ public final class HoodieMetadataConfig extends HoodieConfig { + "instead of executing them inline. This prevents the current writer from executing compaction/logcompaction " + "on the metadata table, allowing a separate async pipeline to handle them."); + public static final Set SUPPORTED_TABLE_SERVICE_MANAGER_ACTIONS = + EnumSet.of(ActionType.compaction, ActionType.logcompaction); + public static final ConfigProperty TABLE_SERVICE_MANAGER_ACTIONS = ConfigProperty .key(METADATA_PREFIX + ".table.service.manager.actions") .defaultValue("") @@ -1052,6 +1058,9 @@ public Builder withTableServiceManagerEnabled(boolean enabled) { } public Builder withTableServiceManagerActions(String actions) { + if (!actions.isEmpty()) { + validateTableServiceManagerActions(actions); + } metadataConfig.setValue(TABLE_SERVICE_MANAGER_ACTIONS, actions); return this; } @@ -1397,6 +1406,23 @@ private boolean getDefaultSecondaryIndexEnable(EngineType engineType) { throw new HoodieNotSupportedException("Unsupported engine " + engineType); } } + + private static void validateTableServiceManagerActions(String actions) { + for (String action : actions.split(",")) { + String trimmed = action.trim(); + ActionType actionType; + try { + actionType = ActionType.valueOf(trimmed); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Unknown metadata table service manager action: " + trimmed + + ". Supported actions are: " + SUPPORTED_TABLE_SERVICE_MANAGER_ACTIONS, e); + } + if (!SUPPORTED_TABLE_SERVICE_MANAGER_ACTIONS.contains(actionType)) { + throw new IllegalArgumentException("Unsupported metadata table service manager action: " + trimmed + + ". Supported actions are: " + SUPPORTED_TABLE_SERVICE_MANAGER_ACTIONS); + } + } + } } /** diff --git a/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java b/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java index 50ff653c42711..361a2661bdb2c 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java @@ -27,6 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -226,4 +227,27 @@ void testTableServiceManagerActions() { .build(); assertEquals("compaction", propsConfig.getTableServiceManagerActions()); } + + @Test + void testTableServiceManagerActionsRejectsUnsupportedActions() { + assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataConfig.newBuilder() + .withTableServiceManagerActions("clean") + .build()); + + assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataConfig.newBuilder() + .withTableServiceManagerActions("clustering") + .build()); + + assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataConfig.newBuilder() + .withTableServiceManagerActions("compaction,clean") + .build()); + + assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataConfig.newBuilder() + .withTableServiceManagerActions("nonexistent") + .build()); + } } From 84eb3307e1cc700f3fa1a4f22b2a4ed69431ef29 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Thu, 19 Mar 2026 22:46:04 -0700 Subject: [PATCH 15/22] comment --- .../metadata/HoodieBackedTableMetadataWriter.java | 15 ++++++--------- ...eBackedTableMetadataWriterTableVersionSix.java | 6 ++---- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index f7af8d339aa3f..4c1c90fe27911 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -25,10 +25,10 @@ import org.apache.hudi.avro.model.HoodieRestorePlan; import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.client.BaseHoodieWriteClient; +import org.apache.hudi.client.RunsTableService; import org.apache.hudi.client.WriteStatus; import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieMetadataConfig; -import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.engine.EngineType; @@ -158,7 +158,7 @@ * * @param Type of input for the write client */ -public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMetadataWriter { +public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMetadataWriter, RunsTableService { static final Logger LOG = LoggerFactory.getLogger(HoodieBackedTableMetadataWriter.class); @@ -2152,11 +2152,10 @@ static HoodieActiveTimeline runPendingTableServicesOperationsAndRefreshTimeline( Option metricsOption) { try { HoodieActiveTimeline activeTimeline = initialTimelineRequiresRefresh ? metadataMetaClient.reloadActiveTimeline() : metadataMetaClient.getActiveTimeline(); - HoodieTableServiceManagerConfig tsmConfig = writeClient.getConfig().getTableServiceManagerConfig(); // finish off any pending log compaction or compactions operations if any from previous attempt. boolean ranServices = false; if (activeTimeline.filterPendingCompactionTimeline().countInstants() > 0) { - if (tsmConfig.isEnabledAndActionSupported(ActionType.compaction)) { + if (writeClient.shouldDelegateToTableServiceManager(writeClient.getConfig(), ActionType.compaction)) { LOG.info("Skipping pending compactions on MDT as they are delegated to table service manager."); } else { writeClient.runAnyPendingCompactions(); @@ -2164,7 +2163,7 @@ static HoodieActiveTimeline runPendingTableServicesOperationsAndRefreshTimeline( } } if (activeTimeline.filterPendingLogCompactionTimeline().countInstants() > 0) { - if (tsmConfig.isEnabledAndActionSupported(ActionType.logcompaction)) { + if (writeClient.shouldDelegateToTableServiceManager(writeClient.getConfig(), ActionType.logcompaction)) { LOG.info("Skipping pending log compactions on MDT as they are delegated to table service manager."); } else { writeClient.runAnyPendingLogCompactions(); @@ -2190,8 +2189,6 @@ static HoodieActiveTimeline runPendingTableServicesOperationsAndRefreshTimeline( * deltacommit. */ void compactIfNecessary(BaseHoodieWriteClient writeClient, Option latestDeltaCommitTimeOpt) { - HoodieTableServiceManagerConfig tsmConfig = metadataWriteConfig.getTableServiceManagerConfig(); - // IMPORTANT: Trigger compaction with max instant time that is smaller than(or equals) the earliest pending instant from DT. // The compaction planner will manage to filter out the log files that finished with greater completion time. // see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more details. @@ -2214,7 +2211,7 @@ void compactIfNecessary(BaseHoodieWriteClient writeClient, Option writeClient, Option scheduledLogCompaction = writeClient.scheduleLogCompaction(Option.empty()); if (scheduledLogCompaction.isPresent()) { LOG.info("Log compaction is scheduled for timestamp {}", scheduledLogCompaction.get()); - if (tsmConfig.isEnabledAndActionSupported(ActionType.logcompaction)) { + if (shouldDelegateToTableServiceManager(metadataWriteConfig, ActionType.logcompaction)) { LOG.info("Skipping execution of log compaction on MDT as it is delegated to table service manager."); } else { writeClient.logCompact(scheduledLogCompaction.get(), true); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java index 9646e3781f2e8..2d4717a80054b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriterTableVersionSix.java @@ -21,7 +21,6 @@ import org.apache.hudi.avro.model.HoodieRollbackMetadata; import org.apache.hudi.client.BaseHoodieWriteClient; import org.apache.hudi.common.config.HoodieMetadataConfig; -import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; import org.apache.hudi.common.model.ActionType; import org.apache.hudi.common.engine.HoodieEngineContext; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; @@ -278,7 +277,6 @@ private void validateRollbackVersionSix( */ @Override void compactIfNecessary(BaseHoodieWriteClient writeClient, Option latestDeltaCommitTimeOpt) { - HoodieTableServiceManagerConfig tsmConfig = metadataWriteConfig.getTableServiceManagerConfig(); // Trigger compaction with suffixes based on the same instant time. This ensures that any future // delta commits synced over will not have an instant time lesser than the last completed instant on the // metadata table. @@ -292,7 +290,7 @@ void compactIfNecessary(BaseHoodieWriteClient writeClient, Option writeClient, Option Date: Thu, 19 Mar 2026 23:47:42 -0700 Subject: [PATCH 16/22] comment --- .../hudi/metadata/TestHoodieBackedTableMetadataWriter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java index e57a8c4bd9659..4065f113d518f 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieBackedTableMetadataWriter.java @@ -138,6 +138,7 @@ void runPendingTableServicesSkipsExecutionWhenTSMEnabled() { tsmProps.put(HoodieTableServiceManagerConfig.TABLE_SERVICE_MANAGER_ACTIONS.key(), "compaction,logcompaction"); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp/").withProperties(tsmProps).build(); when(writeClient.getConfig()).thenReturn(writeConfig); + when(writeClient.shouldDelegateToTableServiceManager(any(), any())).thenCallRealMethod(); when(metaClient.getActiveTimeline()).thenReturn(initialTimeline); when(initialTimeline.filterPendingCompactionTimeline().countInstants()).thenReturn(1); @@ -165,6 +166,7 @@ void runPendingTableServicesPartialTSMDelegation() { tsmProps.put(HoodieTableServiceManagerConfig.TABLE_SERVICE_MANAGER_ACTIONS.key(), "compaction"); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp/").withProperties(tsmProps).build(); when(writeClient.getConfig()).thenReturn(writeConfig); + when(writeClient.shouldDelegateToTableServiceManager(any(), any())).thenCallRealMethod(); when(metaClient.getActiveTimeline()).thenReturn(initialTimeline); when(initialTimeline.filterPendingCompactionTimeline().countInstants()).thenReturn(1); From 6745fd97ae43f57740eef63cca7f4f0ed359adc8 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Mon, 23 Mar 2026 20:11:45 -0700 Subject: [PATCH 17/22] different lock config approach --- .../metadata/HoodieMetadataWriteUtils.java | 104 +++++++++---- .../TestHoodieMetadataWriteUtils.java | 140 +++++++++++++----- 2 files changed, 177 insertions(+), 67 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 24ba16822e17b..1e7ae75ab1642 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -20,7 +20,11 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.client.FailOnFirstErrorWriteStatus; +import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; +import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedImplicitBasePathLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; import org.apache.hudi.common.config.LockConfiguration; @@ -133,6 +137,19 @@ public class HoodieMetadataWriteUtils { // any ways be limited by the executor counts. private static final int MDT_DEFAULT_PARALLELISM = 512; + // Lock provider class names from modules not directly accessible in hudi-client-common. + @VisibleForTesting + static final String HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS = + "org.apache.hudi.hive.transaction.lock.HiveMetastoreBasedLockProvider"; + @VisibleForTesting + static final String DYNAMODB_BASED_LOCK_PROVIDER_CLASS = + "org.apache.hudi.aws.transaction.lock.DynamoDBBasedLockProvider"; + @VisibleForTesting + static final String DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS = + "org.apache.hudi.aws.transaction.lock.DynamoDBBasedImplicitPartitionKeyLockProvider"; + private static final String DYNAMODB_BASED_LOCK_PROPERTY_PREFIX = LockConfiguration.LOCK_PREFIX + "dynamodb."; + private static final String STORAGE_BASED_LOCK_PROPERTY_PREFIX = LockConfiguration.LOCK_PREFIX + "storage."; + // File groups in each partition are fixed at creation time and we do not want them to be split into multiple files // ever. Hence, we use a very large basefile size in metadata table. The actual size of the HFiles created will // eventually depend on the number of file groups selected for each partition (See estimateFileGroupCount function) @@ -155,7 +172,6 @@ public static HoodieWriteConfig createMetadataWriteConfig( WriteConcurrencyMode concurrencyMode; HoodieLockConfig lockConfig; - final boolean deriveMetadataLockConfigsFromDataTableConfigs; if (metadataWriteConcurrencyMode.supportsMultiWriter()) { // Configuring Multi-writer directly on metadata table is intended for executing table service plans, not for writes. checkState(!isStreamingWritesToMetadataEnabled, @@ -170,16 +186,10 @@ public static HoodieWriteConfig createMetadataWriteConfig( checkState(!InProcessLockProvider.class.getCanonicalName().equals(lockProviderClass), "InProcessLockProvider cannot be used for metadata table multi-writer mode as it does not support cross-process locking. " + "Configure a distributed lock provider on the data table."); - // First lets create the MDT write config with default single writer lock configs. - // Then, once all MDT-specific write configs are set, we can derive lock configs - // from the data table and re-build the MDT write config with the merged lock config. - concurrencyMode = WriteConcurrencyMode.SINGLE_WRITER; - lockConfig = HoodieLockConfig.newBuilder().build(); + concurrencyMode = metadataWriteConcurrencyMode; failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; - deriveMetadataLockConfigsFromDataTableConfigs = true; - + lockConfig = buildMdtLockConfig(lockProviderClass, writeConfig); } else { - deriveMetadataLockConfigsFromDataTableConfigs = false; if (isStreamingWritesToMetadataEnabled) { concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; lockConfig = HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class).build(); @@ -388,26 +398,6 @@ public static HoodieWriteConfig createMetadataWriteConfig( } HoodieWriteConfig metadataWriteConfig = builder.build(); - if (deriveMetadataLockConfigsFromDataTableConfigs) { - // We need to update the MDT write config to have the same lock related configs as the data table. - // All data table props with the lock prefix are always copied (to override MDT defaults with - // user-configured values). Other data table props not present in MDT config are also copied to - // support custom lock providers that may use non-standard config keys. - Properties lockProps = new Properties(); - TypedProperties dataTableProps = writeConfig.getProps(); - TypedProperties mdtProps = metadataWriteConfig.getProps(); - for (String key : dataTableProps.stringPropertyNames()) { - if (key.startsWith(LockConfiguration.LOCK_PREFIX) || !mdtProps.containsKey(key)) { - lockProps.setProperty(key, dataTableProps.getProperty(key)); - } - } - lockProps.setProperty(HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(), metadataWriteConcurrencyMode.name()); - lockProps.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), writeConfig.getLockProviderClass()); - metadataWriteConfig = HoodieWriteConfig.newBuilder() - .withProperties(mdtProps) - .withProperties(lockProps) - .build(); - } // Inline compaction and auto clean is required as we do not expose this table outside ValidationUtils.checkArgument(!metadataWriteConfig.isAutoClean(), "Cleaning is controlled internally for Metadata table."); @@ -420,6 +410,62 @@ public static HoodieWriteConfig createMetadataWriteConfig( return metadataWriteConfig; } + /** + * Build a {@link HoodieLockConfig} for the metadata table by copying lock-related configs + * from the data table's write config based on the configured lock provider. + * Only built-in lock providers are supported. + */ + @VisibleForTesting + static HoodieLockConfig buildMdtLockConfig(String lockProviderClass, HoodieWriteConfig writeConfig) { + TypedProperties dataProps = writeConfig.getProps(); + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); + + // Common lock configs used by all providers + lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(), + writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS)); + lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(), + writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS)); + lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(), + writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS)); + lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES.key(), + writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES)); + lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(), + writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES)); + lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(), + String.valueOf(writeConfig.getIntOrDefault(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS))); + lockProps.put(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS.key(), + String.valueOf(writeConfig.getIntOrDefault(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS))); + + // Provider-specific configs + if (FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX); + } else if (ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass) + || ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); + } else if (HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.HIVE_METASTORE_LOCK_PROPERTY_PREFIX); + } else if (DYNAMODB_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass) + || DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); + } else if (StorageBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, STORAGE_BASED_LOCK_PROPERTY_PREFIX); + } else { + throw new HoodieException(writeConfig.getWriteConcurrencyMode() + + " is only supported for built-in lock providers. Unsupported lock provider: " + lockProviderClass); + } + + return HoodieLockConfig.newBuilder().fromProperties(lockProps).build(); + } + + private static void copyPropsWithPrefix(TypedProperties source, Properties target, String prefix) { + for (String key : source.stringPropertyNames()) { + if (key.startsWith(prefix)) { + target.setProperty(key, source.getProperty(key)); + } + } + } + /** * Convert commit action to metadata records for the enabled partition types. * diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 66c5b42dfebe0..dbd3687359418 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -20,6 +20,8 @@ import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; +import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedImplicitBasePathLockProvider; import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.ActionType; @@ -30,6 +32,7 @@ import org.apache.hudi.config.HoodieCleanConfig; import org.apache.hudi.config.HoodieLockConfig; import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.exception.HoodieException; import org.junit.jupiter.api.Test; @@ -301,20 +304,13 @@ public void testCreateMetadataWriteConfigForOCCWithFileSystemLockProvider() { } @Test - public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { + public void testCreateMetadataWriteConfigRejectsCustomLockProvider() { String customLockProviderClass = "com.example.custom.MyCustomLockProvider"; - String customConfigKey = "hoodie.write.lock.custom.endpoint"; - String customConfigValue = "https://lock-service.example.com"; - Properties lockProps = new Properties(); lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); - lockProps.put(customConfigKey, customConfigValue); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") - .withCleanConfig(HoodieCleanConfig.newBuilder() - .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS) - .retainCommits(5).build()) .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withStreamingWriteEnabled(false) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) @@ -324,11 +320,11 @@ public void testCreateMetadataWriteConfigForOCCWithCustomLockProvider() { .build()) .build(); - HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( - writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); - assertEquals(customConfigValue, metadataWriteConfig.getProps().getString(customConfigKey)); + HoodieException ex = assertThrows(HoodieException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("only supported for built-in lock providers")); + assertTrue(ex.getMessage().contains(customLockProviderClass)); } @Test @@ -349,13 +345,6 @@ public void testCreateMetadataWriteConfigRejectsConcurrencyModeMismatch() { @Test public void testCreateMetadataWriteConfigForOCCPreservesMdtSpecificValues() { String dataTableBasePath = "/tmp/base_path/"; - String customLockProviderClass = "com.example.custom.MyCustomLockProvider"; - String customConfigKey = "hoodie.write.lock.custom.zk_url"; - String customConfigValue = "zk://custom-host:2181"; - - Properties lockProps = new Properties(); - lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); - lockProps.put(customConfigKey, customConfigValue); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath(dataTableBasePath) @@ -367,19 +356,19 @@ public void testCreateMetadataWriteConfigForOCCPreservesMdtSpecificValues() { .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .fromProperties(lockProps) + .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockPath("/tmp/lock_dir") + .withFileSystemLockExpire(10) .build()) .build(); HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - // Lock config and concurrency mode should be carried over validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); - assertEquals(customConfigValue, metadataWriteConfig.getProps().getString(customConfigKey)); + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, FileSystemBasedLockProvider.class.getCanonicalName()); - // MDT-specific values must be preserved after lock config merge + // MDT-specific values must be preserved String expectedMdtBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataTableBasePath); assertEquals(expectedMdtBasePath, metadataWriteConfig.getBasePath()); assertFalse(metadataWriteConfig.isAutoClean(), "Auto clean should be disabled for MDT"); @@ -390,13 +379,13 @@ public void testCreateMetadataWriteConfigForOCCPreservesMdtSpecificValues() { } @Test - public void testCreateMetadataWriteConfigForOCCWithMultipleCustomKeys() { - String customLockProviderClass = "com.example.custom.DistributedLockProvider"; + public void testCreateMetadataWriteConfigForOCCWithDynamoDBLockProvider() { Properties lockProps = new Properties(); - lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); - lockProps.put("hoodie.write.lock.custom.endpoint", "https://lock-service.example.com"); - lockProps.put("hoodie.write.lock.custom.token", "my-secret-token"); - lockProps.put("hoodie.write.lock.custom.timeout_ms", "30000"); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieMetadataWriteUtils.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); + lockProps.put("hoodie.write.lock.dynamodb.table", "my_lock_table"); + lockProps.put("hoodie.write.lock.dynamodb.region", "us-west-2"); + lockProps.put("hoodie.write.lock.dynamodb.partition_key", "test_table"); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") @@ -411,15 +400,90 @@ public void testCreateMetadataWriteConfigForOCCWithMultipleCustomKeys() { HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, + HoodieMetadataWriteUtils.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); + assertEquals("my_lock_table", metadataWriteConfig.getProps().getString("hoodie.write.lock.dynamodb.table")); + assertEquals("us-west-2", metadataWriteConfig.getProps().getString("hoodie.write.lock.dynamodb.region")); + assertEquals("test_table", metadataWriteConfig.getProps().getString("hoodie.write.lock.dynamodb.partition_key")); + } + + @Test + public void testCreateMetadataWriteConfigForOCCWithStorageBasedLockProvider() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + StorageBasedLockProvider.class.getCanonicalName()); + lockProps.put("hoodie.write.lock.storage.validity.timeout.secs", "600"); + lockProps.put("hoodie.write.lock.storage.renew.interval.secs", "60"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .fromProperties(lockProps) + .build()) + .build(); + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, customLockProviderClass); - assertEquals("https://lock-service.example.com", - metadataWriteConfig.getProps().getString("hoodie.write.lock.custom.endpoint")); - assertEquals("my-secret-token", - metadataWriteConfig.getProps().getString("hoodie.write.lock.custom.token")); - assertEquals("30000", - metadataWriteConfig.getProps().getString("hoodie.write.lock.custom.timeout_ms")); + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, + StorageBasedLockProvider.class.getCanonicalName()); + assertEquals("600", metadataWriteConfig.getProps().getString("hoodie.write.lock.storage.validity.timeout.secs")); + assertEquals("60", metadataWriteConfig.getProps().getString("hoodie.write.lock.storage.renew.interval.secs")); + } + + @Test + public void testCreateMetadataWriteConfigForOCCWithZookeeperImplicitBasePathLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(ZookeeperBasedImplicitBasePathLockProvider.class) + .withZkQuorum("zk-host:2181") + .withZkPort("2181") + .withZkSessionTimeoutInMs(30000L) + .withZkConnectionTimeoutInMs(15000L) + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, + ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName()); + assertEquals("zk-host:2181", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_CONNECT_URL.key())); + assertEquals(30000, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key())); + assertEquals(15000, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key())); + } + + @Test + public void testCreateMetadataWriteConfigRejectsCustomLockProviders() { + String customLockProviderClass = "com.example.custom.DistributedLockProvider"; + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withStreamingWriteEnabled(false) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .fromProperties(lockProps) + .build()) + .build(); + + HoodieException ex = assertThrows(HoodieException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("only supported for built-in lock providers")); } private void validateMetadataWriteConfig(HoodieWriteConfig metadataWriteConfig, HoodieFailedWritesCleaningPolicy expectedPolicy, From 9041c225fec4bd8f030c55563bd411071d518380 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Thu, 26 Mar 2026 18:41:33 -0700 Subject: [PATCH 18/22] store --- .../apache/hudi/config/HoodieLockConfig.java | 76 ++++++ .../metadata/HoodieMetadataWriteUtils.java | 77 +----- .../hudi/config/TestHoodieLockConfig.java | 220 ++++++++++++++++++ .../TestHoodieMetadataWriteUtils.java | 4 +- 4 files changed, 299 insertions(+), 78 deletions(-) create mode 100644 hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java index 08e240af98f54..4adda959c02d8 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java @@ -20,12 +20,19 @@ import org.apache.hudi.client.transaction.BucketIndexConcurrentFileWritesConflictResolutionStrategy; import org.apache.hudi.client.transaction.ConflictResolutionStrategy; import org.apache.hudi.client.transaction.SimpleConcurrentFileWritesConflictResolutionStrategy; +import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; +import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedImplicitBasePathLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.ConfigClassProperty; import org.apache.hudi.common.config.ConfigGroups; import org.apache.hudi.common.config.ConfigProperty; import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.LockConfiguration; +import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.lock.LockProvider; import org.apache.hudi.common.util.Option; +import org.apache.hudi.exception.HoodieException; import org.apache.hudi.index.HoodieIndex; import java.io.File; @@ -241,6 +248,16 @@ public class HoodieLockConfig extends HoodieConfig { @Deprecated public static final String LOCK_PROVIDER_CLASS_PROP = LOCK_PROVIDER_CLASS_NAME.key(); + // Lock provider class names from modules not directly accessible in hudi-client-common. + public static final String HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS = + "org.apache.hudi.hive.transaction.lock.HiveMetastoreBasedLockProvider"; + public static final String DYNAMODB_BASED_LOCK_PROVIDER_CLASS = + "org.apache.hudi.aws.transaction.lock.DynamoDBBasedLockProvider"; + public static final String DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS = + "org.apache.hudi.aws.transaction.lock.DynamoDBBasedImplicitPartitionKeyLockProvider"; + private static final String DYNAMODB_BASED_LOCK_PROPERTY_PREFIX = LOCK_PREFIX + "dynamodb."; + private static final String STORAGE_BASED_LOCK_PROPERTY_PREFIX = LOCK_PREFIX + "storage."; + private HoodieLockConfig() { super(); } @@ -249,6 +266,65 @@ public static HoodieLockConfig.Builder newBuilder() { return new HoodieLockConfig.Builder(); } + /** + * Build a {@link HoodieLockConfig} by copying lock-related configs from the given write config + * based on the configured lock provider. Only built-in lock providers are supported. + * + * @param lockProviderClass the lock provider class name + * @param writeConfig the write config to copy lock properties from + * @return a new HoodieLockConfig with the relevant lock properties + * @throws HoodieException if the lock provider is not a built-in provider + */ + public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String lockProviderClass, HoodieWriteConfig writeConfig) { + TypedProperties dataProps = writeConfig.getProps(); + Properties lockProps = new Properties(); + lockProps.put(LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); + + // Common lock configs used by all providers + lockProps.put(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(), + writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS)); + lockProps.put(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(), + writeConfig.getStringOrDefault(LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS)); + lockProps.put(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(), + writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS)); + lockProps.put(LOCK_ACQUIRE_NUM_RETRIES.key(), + writeConfig.getStringOrDefault(LOCK_ACQUIRE_NUM_RETRIES)); + lockProps.put(LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(), + writeConfig.getStringOrDefault(LOCK_ACQUIRE_CLIENT_NUM_RETRIES)); + lockProps.put(LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(), + String.valueOf(writeConfig.getIntOrDefault(LOCK_ACQUIRE_WAIT_TIMEOUT_MS))); + lockProps.put(LOCK_HEARTBEAT_INTERVAL_MS.key(), + String.valueOf(writeConfig.getIntOrDefault(LOCK_HEARTBEAT_INTERVAL_MS))); + + // Provider-specific configs + if (FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX); + } else if (ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass) + || ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); + } else if (HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.HIVE_METASTORE_LOCK_PROPERTY_PREFIX); + } else if (DYNAMODB_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass) + || DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); + } else if (StorageBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, STORAGE_BASED_LOCK_PROPERTY_PREFIX); + } else { + throw new HoodieException(writeConfig.getWriteConcurrencyMode() + + " is only supported for built-in lock providers. Unsupported lock provider: " + lockProviderClass); + } + + return newBuilder().fromProperties(lockProps).build(); + } + + private static void copyPropsWithPrefix(TypedProperties source, Properties target, String prefix) { + for (String key : source.stringPropertyNames()) { + if (key.startsWith(prefix)) { + target.setProperty(key, source.getProperty(key)); + } + } + } + public static class Builder { private final HoodieLockConfig lockConfig = new HoodieLockConfig(); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 1e7ae75ab1642..2aa3d497c9af0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -20,15 +20,9 @@ import org.apache.hudi.avro.model.HoodieMetadataRecord; import org.apache.hudi.client.FailOnFirstErrorWriteStatus; -import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; import org.apache.hudi.client.transaction.lock.InProcessLockProvider; -import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; -import org.apache.hudi.client.transaction.lock.ZookeeperBasedImplicitBasePathLockProvider; -import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.HoodieTableServiceManagerConfig; -import org.apache.hudi.common.config.LockConfiguration; -import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.config.HoodieReaderConfig; import org.apache.hudi.common.config.HoodieStorageConfig; import org.apache.hudi.common.config.RecordMergeMode; @@ -137,19 +131,6 @@ public class HoodieMetadataWriteUtils { // any ways be limited by the executor counts. private static final int MDT_DEFAULT_PARALLELISM = 512; - // Lock provider class names from modules not directly accessible in hudi-client-common. - @VisibleForTesting - static final String HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS = - "org.apache.hudi.hive.transaction.lock.HiveMetastoreBasedLockProvider"; - @VisibleForTesting - static final String DYNAMODB_BASED_LOCK_PROVIDER_CLASS = - "org.apache.hudi.aws.transaction.lock.DynamoDBBasedLockProvider"; - @VisibleForTesting - static final String DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS = - "org.apache.hudi.aws.transaction.lock.DynamoDBBasedImplicitPartitionKeyLockProvider"; - private static final String DYNAMODB_BASED_LOCK_PROPERTY_PREFIX = LockConfiguration.LOCK_PREFIX + "dynamodb."; - private static final String STORAGE_BASED_LOCK_PROPERTY_PREFIX = LockConfiguration.LOCK_PREFIX + "storage."; - // File groups in each partition are fixed at creation time and we do not want them to be split into multiple files // ever. Hence, we use a very large basefile size in metadata table. The actual size of the HFiles created will // eventually depend on the number of file groups selected for each partition (See estimateFileGroupCount function) @@ -188,7 +169,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( + "Configure a distributed lock provider on the data table."); concurrencyMode = metadataWriteConcurrencyMode; failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; - lockConfig = buildMdtLockConfig(lockProviderClass, writeConfig); + lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider(lockProviderClass, writeConfig); } else { if (isStreamingWritesToMetadataEnabled) { concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; @@ -410,62 +391,6 @@ public static HoodieWriteConfig createMetadataWriteConfig( return metadataWriteConfig; } - /** - * Build a {@link HoodieLockConfig} for the metadata table by copying lock-related configs - * from the data table's write config based on the configured lock provider. - * Only built-in lock providers are supported. - */ - @VisibleForTesting - static HoodieLockConfig buildMdtLockConfig(String lockProviderClass, HoodieWriteConfig writeConfig) { - TypedProperties dataProps = writeConfig.getProps(); - Properties lockProps = new Properties(); - lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); - - // Common lock configs used by all providers - lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS.key(), - writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS)); - lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS.key(), - writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS)); - lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS.key(), - writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS)); - lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES.key(), - writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES)); - lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES.key(), - writeConfig.getStringOrDefault(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES)); - lockProps.put(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS.key(), - String.valueOf(writeConfig.getIntOrDefault(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS))); - lockProps.put(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS.key(), - String.valueOf(writeConfig.getIntOrDefault(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS))); - - // Provider-specific configs - if (FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX); - } else if (ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass) - || ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); - } else if (HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.HIVE_METASTORE_LOCK_PROPERTY_PREFIX); - } else if (DYNAMODB_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass) - || DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); - } else if (StorageBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, STORAGE_BASED_LOCK_PROPERTY_PREFIX); - } else { - throw new HoodieException(writeConfig.getWriteConcurrencyMode() - + " is only supported for built-in lock providers. Unsupported lock provider: " + lockProviderClass); - } - - return HoodieLockConfig.newBuilder().fromProperties(lockProps).build(); - } - - private static void copyPropsWithPrefix(TypedProperties source, Properties target, String prefix) { - for (String key : source.stringPropertyNames()) { - if (key.startsWith(prefix)) { - target.setProperty(key, source.getProperty(key)); - } - } - } - /** * Convert commit action to metadata records for the enabled partition types. * diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java new file mode 100644 index 0000000000000..09a918951ca1d --- /dev/null +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.config; + +import org.apache.hudi.client.transaction.lock.FileSystemBasedLockProvider; +import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedImplicitBasePathLockProvider; +import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; +import org.apache.hudi.common.model.WriteConcurrencyMode; +import org.apache.hudi.exception.HoodieException; + +import org.junit.jupiter.api.Test; + +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestHoodieLockConfig { + + @Test + public void testGetLockConfigForFileSystemLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockPath("/tmp/lock_dir") + .withFileSystemLockExpire(10) + .build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + FileSystemBasedLockProvider.class.getCanonicalName(), writeConfig); + assertEquals(FileSystemBasedLockProvider.class.getCanonicalName(), + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + assertEquals("/tmp/lock_dir", lockConfig.getString(HoodieLockConfig.FILESYSTEM_LOCK_PATH)); + assertEquals("10", lockConfig.getString(HoodieLockConfig.FILESYSTEM_LOCK_EXPIRE)); + } + + @Test + public void testGetLockConfigForZookeeperLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(ZookeeperBasedLockProvider.class) + .withZkQuorum("zk-host:2181") + .withZkBasePath("/hudi/locks") + .withZkLockKey("test_table") + .withZkPort("2181") + .withZkSessionTimeoutInMs(30000L) + .withZkConnectionTimeoutInMs(15000L) + .build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + ZookeeperBasedLockProvider.class.getCanonicalName(), writeConfig); + assertEquals(ZookeeperBasedLockProvider.class.getCanonicalName(), + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + assertEquals("zk-host:2181", lockConfig.getString(HoodieLockConfig.ZK_CONNECT_URL)); + assertEquals("/hudi/locks", lockConfig.getString(HoodieLockConfig.ZK_BASE_PATH)); + assertEquals("test_table", lockConfig.getString(HoodieLockConfig.ZK_LOCK_KEY)); + assertEquals("2181", lockConfig.getString(HoodieLockConfig.ZK_PORT)); + assertEquals("30000", lockConfig.getString(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS)); + assertEquals("15000", lockConfig.getString(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS)); + } + + @Test + public void testGetLockConfigForZookeeperImplicitBasePathLockProvider() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(ZookeeperBasedImplicitBasePathLockProvider.class) + .withZkQuorum("zk-host:2181") + .withZkPort("2181") + .build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), writeConfig); + assertEquals(ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + assertEquals("zk-host:2181", lockConfig.getString(HoodieLockConfig.ZK_CONNECT_URL)); + } + + @Test + public void testGetLockConfigForHiveMetastoreLockProvider() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS); + lockProps.put(HoodieLockConfig.HIVE_DATABASE_NAME.key(), "my_database"); + lockProps.put(HoodieLockConfig.HIVE_TABLE_NAME.key(), "my_table"); + lockProps.put(HoodieLockConfig.HIVE_METASTORE_URI.key(), "thrift://hms-host:9083"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, writeConfig); + assertEquals(HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + assertEquals("my_database", lockConfig.getString(HoodieLockConfig.HIVE_DATABASE_NAME)); + assertEquals("my_table", lockConfig.getString(HoodieLockConfig.HIVE_TABLE_NAME)); + assertEquals("thrift://hms-host:9083", lockConfig.getString(HoodieLockConfig.HIVE_METASTORE_URI)); + } + + @Test + public void testGetLockConfigForDynamoDBLockProvider() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); + lockProps.put("hoodie.write.lock.dynamodb.table", "my_lock_table"); + lockProps.put("hoodie.write.lock.dynamodb.region", "us-west-2"); + lockProps.put("hoodie.write.lock.dynamodb.partition_key", "test_table"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS, writeConfig); + assertEquals(HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS, + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + assertEquals("my_lock_table", lockConfig.getProps().getProperty("hoodie.write.lock.dynamodb.table")); + assertEquals("us-west-2", lockConfig.getProps().getProperty("hoodie.write.lock.dynamodb.region")); + assertEquals("test_table", lockConfig.getProps().getProperty("hoodie.write.lock.dynamodb.partition_key")); + } + + @Test + public void testGetLockConfigForStorageBasedLockProvider() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + StorageBasedLockProvider.class.getCanonicalName()); + lockProps.put("hoodie.write.lock.storage.validity.timeout.secs", "600"); + lockProps.put("hoodie.write.lock.storage.renew.interval.secs", "60"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + StorageBasedLockProvider.class.getCanonicalName(), writeConfig); + assertEquals(StorageBasedLockProvider.class.getCanonicalName(), + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + assertEquals("600", lockConfig.getProps().getProperty("hoodie.write.lock.storage.validity.timeout.secs")); + assertEquals("60", lockConfig.getProps().getProperty("hoodie.write.lock.storage.renew.interval.secs")); + } + + @Test + public void testGetLockConfigCopiesCommonRetryConfigs() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(FileSystemBasedLockProvider.class) + .withNumRetries(5) + .withRetryWaitTimeInMillis(2000L) + .withRetryMaxWaitTimeInMillis(32000L) + .withClientNumRetries(10) + .withClientRetryWaitTimeInMillis(3000L) + .withLockWaitTimeInMillis(90000L) + .withHeartbeatIntervalInMillis(45000L) + .build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + FileSystemBasedLockProvider.class.getCanonicalName(), writeConfig); + assertEquals("5", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES)); + assertEquals("2000", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS)); + assertEquals("32000", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_RETRY_MAX_WAIT_TIME_IN_MILLIS)); + assertEquals("10", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_NUM_RETRIES)); + assertEquals("3000", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS)); + assertEquals("90000", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_WAIT_TIMEOUT_MS)); + assertEquals("45000", lockConfig.getString(HoodieLockConfig.LOCK_HEARTBEAT_INTERVAL_MS)); + } + + @Test + public void testGetLockConfigRejectsCustomLockProvider() { + String customLockProviderClass = "com.example.custom.MyCustomLockProvider"; + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), customLockProviderClass); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + HoodieException ex = assertThrows(HoodieException.class, () -> + HoodieLockConfig.getLockConfigForBuiltInLockProvider(customLockProviderClass, writeConfig)); + assertTrue(ex.getMessage().contains("only supported for built-in lock providers")); + assertTrue(ex.getMessage().contains(customLockProviderClass)); + } +} diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index dbd3687359418..06ce8521d41c2 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -382,7 +382,7 @@ public void testCreateMetadataWriteConfigForOCCPreservesMdtSpecificValues() { public void testCreateMetadataWriteConfigForOCCWithDynamoDBLockProvider() { Properties lockProps = new Properties(); lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), - HoodieMetadataWriteUtils.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); + HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); lockProps.put("hoodie.write.lock.dynamodb.table", "my_lock_table"); lockProps.put("hoodie.write.lock.dynamodb.region", "us-west-2"); lockProps.put("hoodie.write.lock.dynamodb.partition_key", "test_table"); @@ -402,7 +402,7 @@ public void testCreateMetadataWriteConfigForOCCWithDynamoDBLockProvider() { writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, - HoodieMetadataWriteUtils.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); + HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); assertEquals("my_lock_table", metadataWriteConfig.getProps().getString("hoodie.write.lock.dynamodb.table")); assertEquals("us-west-2", metadataWriteConfig.getProps().getString("hoodie.write.lock.dynamodb.region")); assertEquals("test_table", metadataWriteConfig.getProps().getString("hoodie.write.lock.dynamodb.partition_key")); From 90d4b1d6a7a44d5983fe87dff6e0b67547091b77 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Wed, 8 Apr 2026 20:52:11 -0700 Subject: [PATCH 19/22] Force streaming writes off when MDT multi-writer is enabled Instead of throwing an error when streaming writes and multi-writer are both configured on the metadata table, silently force streaming writes off since multi-writer mode is for separate table service execution and is not compatible with streaming writes. --- .../hudi/metadata/HoodieMetadataWriteUtils.java | 9 ++++----- .../hudi/metadata/TestHoodieMetadataWriteUtils.java | 13 +++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 2aa3d497c9af0..2ac1a74c8447e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -147,17 +147,16 @@ public static HoodieWriteConfig createMetadataWriteConfig( HoodieWriteConfig writeConfig, HoodieFailedWritesCleaningPolicy failedWritesCleaningPolicy, HoodieTableVersion datatableVersion) { String tableName = writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX; - boolean isStreamingWritesToMetadataEnabled = writeConfig.isMetadataStreamingWritesEnabled(datatableVersion); WriteConcurrencyMode metadataWriteConcurrencyMode = WriteConcurrencyMode.valueOf(writeConfig.getMetadataConfig().getWriteConcurrencyMode()); + // Multi-writer on MDT is for separate table service execution; streaming writes are not compatible, + // so force it off to avoid confusing config mismatch errors. + boolean isStreamingWritesToMetadataEnabled = !metadataWriteConcurrencyMode.supportsMultiWriter() + && writeConfig.isMetadataStreamingWritesEnabled(datatableVersion); WriteConcurrencyMode concurrencyMode; HoodieLockConfig lockConfig; if (metadataWriteConcurrencyMode.supportsMultiWriter()) { - // Configuring Multi-writer directly on metadata table is intended for executing table service plans, not for writes. - checkState(!isStreamingWritesToMetadataEnabled, - "Streaming writes to metadata table must be disabled when using multi-writer concurrency mode " - + metadataWriteConcurrencyMode + ". Disable " + HoodieMetadataConfig.STREAMING_WRITE_ENABLED.key()); checkState(metadataWriteConcurrencyMode == writeConfig.getWriteConcurrencyMode(), "If multiwriter is used on metadata table, its concurrency mode (" + metadataWriteConcurrencyMode + ") must match the data table concurrency mode (" + writeConfig.getWriteConcurrencyMode() + ")"); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 06ce8521d41c2..8b41c0dd4441b 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -161,7 +161,7 @@ public void testCreateMetadataWriteConfigRejectsInProcessLockProvider() { } @Test - public void testCreateMetadataWriteConfigRejectsStreamingWritesWithMultiWriter() { + public void testCreateMetadataWriteConfigForcesStreamingWritesOffWithMultiWriter() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") .withCleanConfig(HoodieCleanConfig.newBuilder() @@ -172,13 +172,14 @@ public void testCreateMetadataWriteConfigRejectsStreamingWritesWithMultiWriter() .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .withLockProvider(InProcessLockProvider.class).build()) + .withLockProvider(FileSystemBasedLockProvider.class).build()) .build(); - IllegalStateException ex = assertThrows(IllegalStateException.class, () -> - HoodieMetadataWriteUtils.createMetadataWriteConfig( - writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); - assertTrue(ex.getMessage().contains("Streaming writes to metadata table must be disabled")); + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + // Multi-writer takes precedence over streaming writes; streaming writes are forced off + validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, + WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, FileSystemBasedLockProvider.class.getCanonicalName()); } @Test From 64a7c856409abbc29d4b57e58243dec0af623c46 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Tue, 14 Apr 2026 20:11:39 -0700 Subject: [PATCH 20/22] assert lock key --- .../apache/hudi/config/HoodieLockConfig.java | 33 ++++- .../hudi/config/TestHoodieLockConfig.java | 118 ++++++++++++++++++ 2 files changed, 146 insertions(+), 5 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java index 4adda959c02d8..a349665fb22a0 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java @@ -256,6 +256,7 @@ public class HoodieLockConfig extends HoodieConfig { public static final String DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS = "org.apache.hudi.aws.transaction.lock.DynamoDBBasedImplicitPartitionKeyLockProvider"; private static final String DYNAMODB_BASED_LOCK_PROPERTY_PREFIX = LOCK_PREFIX + "dynamodb."; + private static final String DYNAMODB_LOCK_PARTITION_KEY_PROP_KEY = DYNAMODB_BASED_LOCK_PROPERTY_PREFIX + "partition_key"; private static final String STORAGE_BASED_LOCK_PROPERTY_PREFIX = LOCK_PREFIX + "storage."; private HoodieLockConfig() { @@ -296,16 +297,38 @@ public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String lockPr lockProps.put(LOCK_HEARTBEAT_INTERVAL_MS.key(), String.valueOf(writeConfig.getIntOrDefault(LOCK_HEARTBEAT_INTERVAL_MS))); - // Provider-specific configs + // Provider-specific configs with lock key validation where applicable. + // Providers that infer lock keys (e.g. from TBL_NAME or HoodieTableConfig.NAME) at build time + // won't carry those inferred values into the rebuilt config, so we require them to be set explicitly. if (FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX); - } else if (ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass) - || ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + } else if (ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); + if (!lockProps.containsKey(LockConfiguration.ZK_LOCK_KEY_PROP_KEY)) { + throw new IllegalArgumentException(LockConfiguration.ZK_LOCK_KEY_PROP_KEY + + " must be explicitly set on the data table's lock config when using " + lockProviderClass + + ". The inferred default from table name is not propagated to the metadata table lock config."); + } + } else if (ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); } else if (HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.HIVE_METASTORE_LOCK_PROPERTY_PREFIX); - } else if (DYNAMODB_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass) - || DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { + if (!lockProps.containsKey(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY)) { + throw new IllegalArgumentException(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY + + " must be explicitly set on the data table's lock config when using " + lockProviderClass); + } + if (!lockProps.containsKey(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY)) { + throw new IllegalArgumentException(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY + + " must be explicitly set on the data table's lock config when using " + lockProviderClass); + } + } else if (DYNAMODB_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { + copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); + if (!lockProps.containsKey(DYNAMODB_LOCK_PARTITION_KEY_PROP_KEY)) { + throw new IllegalArgumentException(DYNAMODB_LOCK_PARTITION_KEY_PROP_KEY + + " must be explicitly set on the data table's lock config when using " + lockProviderClass + + ". The inferred default from table name is not propagated to the metadata table lock config."); + } + } else if (DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); } else if (StorageBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, STORAGE_BASED_LOCK_PROPERTY_PREFIX); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java index 09a918951ca1d..b4f1c8c98f56c 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java @@ -22,6 +22,7 @@ import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; import org.apache.hudi.client.transaction.lock.ZookeeperBasedImplicitBasePathLockProvider; import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider; +import org.apache.hudi.common.config.LockConfiguration; import org.apache.hudi.common.model.WriteConcurrencyMode; import org.apache.hudi.exception.HoodieException; @@ -217,4 +218,121 @@ public void testGetLockConfigRejectsCustomLockProvider() { assertTrue(ex.getMessage().contains("only supported for built-in lock providers")); assertTrue(ex.getMessage().contains(customLockProviderClass)); } + + @Test + public void testGetLockConfigRejectsZookeeperProviderWithoutLockKey() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(ZookeeperBasedLockProvider.class) + .withZkQuorum("zk-host:2181") + .withZkBasePath("/hudi/locks") + .withZkPort("2181") + .build()) + .build(); + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.getLockConfigForBuiltInLockProvider( + ZookeeperBasedLockProvider.class.getCanonicalName(), writeConfig)); + assertTrue(ex.getMessage().contains(LockConfiguration.ZK_LOCK_KEY_PROP_KEY)); + } + + @Test + public void testGetLockConfigAcceptsZookeeperImplicitBasePathWithoutLockKey() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder() + .withLockProvider(ZookeeperBasedImplicitBasePathLockProvider.class) + .withZkQuorum("zk-host:2181") + .withZkPort("2181") + .build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), writeConfig); + assertEquals(ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + } + + @Test + public void testGetLockConfigRejectsDynamoDBProviderWithoutPartitionKey() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS); + lockProps.put("hoodie.write.lock.dynamodb.table", "my_lock_table"); + lockProps.put("hoodie.write.lock.dynamodb.region", "us-west-2"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS, writeConfig)); + assertTrue(ex.getMessage().contains("hoodie.write.lock.dynamodb.partition_key")); + } + + @Test + public void testGetLockConfigAcceptsDynamoDBImplicitPartitionKeyWithoutPartitionKey() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS); + lockProps.put("hoodie.write.lock.dynamodb.table", "my_lock_table"); + lockProps.put("hoodie.write.lock.dynamodb.region", "us-west-2"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS, writeConfig); + assertEquals(HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS, + lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + } + + @Test + public void testGetLockConfigRejectsHiveMetastoreProviderWithoutDatabase() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS); + lockProps.put(HoodieLockConfig.HIVE_TABLE_NAME.key(), "my_table"); + lockProps.put(HoodieLockConfig.HIVE_METASTORE_URI.key(), "thrift://hms-host:9083"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, writeConfig)); + assertTrue(ex.getMessage().contains(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY)); + } + + @Test + public void testGetLockConfigRejectsHiveMetastoreProviderWithoutTable() { + Properties lockProps = new Properties(); + lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), + HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS); + lockProps.put(HoodieLockConfig.HIVE_DATABASE_NAME.key(), "my_database"); + lockProps.put(HoodieLockConfig.HIVE_METASTORE_URI.key(), "thrift://hms-host:9083"); + + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) + .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) + .build(); + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, writeConfig)); + assertTrue(ex.getMessage().contains(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY)); + } } From 14879550352632ed36494da86ac707e7163c8320 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Tue, 14 Apr 2026 22:43:25 -0700 Subject: [PATCH 21/22] change name --- .../apache/hudi/config/HoodieLockConfig.java | 54 +++++++++----- .../metadata/HoodieMetadataWriteUtils.java | 2 +- .../hudi/config/TestHoodieLockConfig.java | 72 +++++++++---------- .../TestHoodieMetadataWriteUtils.java | 37 +++++----- 4 files changed, 91 insertions(+), 74 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java index a349665fb22a0..232068b3addf4 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieLockConfig.java @@ -268,15 +268,17 @@ public static HoodieLockConfig.Builder newBuilder() { } /** - * Build a {@link HoodieLockConfig} by copying lock-related configs from the given write config - * based on the configured lock provider. Only built-in lock providers are supported. + * Derive a {@link HoodieLockConfig} for a different table by copying lock-related configs from the + * given write config. Only built-in lock providers with explicitly set lock keys/paths are supported; + * providers that implicitly derive their lock identity from the table's base path are rejected. * * @param lockProviderClass the lock provider class name - * @param writeConfig the write config to copy lock properties from + * @param writeConfig the source write config to copy lock properties from * @return a new HoodieLockConfig with the relevant lock properties + * @throws IllegalArgumentException if required lock keys are missing or the provider derives its lock identity implicitly * @throws HoodieException if the lock provider is not a built-in provider */ - public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String lockProviderClass, HoodieWriteConfig writeConfig) { + public static HoodieLockConfig deriveLockConfigForDifferentTable(String lockProviderClass, HoodieWriteConfig writeConfig) { TypedProperties dataProps = writeConfig.getProps(); Properties lockProps = new Properties(); lockProps.put(LOCK_PROVIDER_CLASS_NAME.key(), lockProviderClass); @@ -297,41 +299,61 @@ public static HoodieLockConfig getLockConfigForBuiltInLockProvider(String lockPr lockProps.put(LOCK_HEARTBEAT_INTERVAL_MS.key(), String.valueOf(writeConfig.getIntOrDefault(LOCK_HEARTBEAT_INTERVAL_MS))); - // Provider-specific configs with lock key validation where applicable. - // Providers that infer lock keys (e.g. from TBL_NAME or HoodieTableConfig.NAME) at build time - // won't carry those inferred values into the rebuilt config, so we require them to be set explicitly. + // Provider-specific configs with lock key validation. + // Providers whose lock identity (key/path) is inferred from table name or base path at build time + // won't carry those inferred values into a rebuilt config for a different table, so we either + // require them to be set explicitly or reject providers that offer no explicit override. if (FileSystemBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.FILESYSTEM_BASED_LOCK_PROPERTY_PREFIX); + if (!lockProps.containsKey(LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY)) { + throw new IllegalArgumentException(LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY + + " must be explicitly set when using " + lockProviderClass + + ". Without it, the lock path is derived from the table's base path," + + " which would resolve to a different location when building a lock config for a different table."); + } } else if (ZookeeperBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); if (!lockProps.containsKey(LockConfiguration.ZK_LOCK_KEY_PROP_KEY)) { throw new IllegalArgumentException(LockConfiguration.ZK_LOCK_KEY_PROP_KEY - + " must be explicitly set on the data table's lock config when using " + lockProviderClass - + ". The inferred default from table name is not propagated to the metadata table lock config."); + + " must be explicitly set when using " + lockProviderClass + + ". The inferred default from table name is not propagated" + + " when building a lock config for a different table."); } } else if (ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName().equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.ZOOKEEPER_BASED_LOCK_PROPERTY_PREFIX); + throw new IllegalArgumentException(lockProviderClass + + " is not supported because it derives its lock identity from the table's base path," + + " which would resolve to a different lock when building a lock config for a different table." + + " Use " + ZookeeperBasedLockProvider.class.getCanonicalName() + " with an explicit " + + LockConfiguration.ZK_LOCK_KEY_PROP_KEY + " instead."); } else if (HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, LockConfiguration.HIVE_METASTORE_LOCK_PROPERTY_PREFIX); if (!lockProps.containsKey(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY)) { throw new IllegalArgumentException(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY - + " must be explicitly set on the data table's lock config when using " + lockProviderClass); + + " must be explicitly set when using " + lockProviderClass); } if (!lockProps.containsKey(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY)) { throw new IllegalArgumentException(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY - + " must be explicitly set on the data table's lock config when using " + lockProviderClass); + + " must be explicitly set when using " + lockProviderClass); } } else if (DYNAMODB_BASED_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); if (!lockProps.containsKey(DYNAMODB_LOCK_PARTITION_KEY_PROP_KEY)) { throw new IllegalArgumentException(DYNAMODB_LOCK_PARTITION_KEY_PROP_KEY - + " must be explicitly set on the data table's lock config when using " + lockProviderClass - + ". The inferred default from table name is not propagated to the metadata table lock config."); + + " must be explicitly set when using " + lockProviderClass + + ". The inferred default from table name is not propagated" + + " when building a lock config for a different table."); } } else if (DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS.equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, DYNAMODB_BASED_LOCK_PROPERTY_PREFIX); + throw new IllegalArgumentException(lockProviderClass + + " is not supported because it derives its lock identity from the table's base path," + + " which would resolve to a different lock when building a lock config for a different table." + + " Use " + DYNAMODB_BASED_LOCK_PROVIDER_CLASS + " with an explicit " + + DYNAMODB_LOCK_PARTITION_KEY_PROP_KEY + " instead."); } else if (StorageBasedLockProvider.class.getCanonicalName().equals(lockProviderClass)) { - copyPropsWithPrefix(dataProps, lockProps, STORAGE_BASED_LOCK_PROPERTY_PREFIX); + throw new IllegalArgumentException(lockProviderClass + + " is not supported because it derives its lock identity from the table's base path," + + " which would resolve to a different lock when building a lock config for a different table," + + " and does not provide an explicit lock path override."); } else { throw new HoodieException(writeConfig.getWriteConcurrencyMode() + " is only supported for built-in lock providers. Unsupported lock provider: " + lockProviderClass); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java index 2ac1a74c8447e..58f2bafc6e38a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataWriteUtils.java @@ -168,7 +168,7 @@ public static HoodieWriteConfig createMetadataWriteConfig( + "Configure a distributed lock provider on the data table."); concurrencyMode = metadataWriteConcurrencyMode; failedWritesCleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY; - lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider(lockProviderClass, writeConfig); + lockConfig = HoodieLockConfig.deriveLockConfigForDifferentTable(lockProviderClass, writeConfig); } else { if (isStreamingWritesToMetadataEnabled) { concurrencyMode = WriteConcurrencyMode.NON_BLOCKING_CONCURRENCY_CONTROL; diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java index b4f1c8c98f56c..283f830eb533a 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieLockConfig.java @@ -48,7 +48,7 @@ public void testGetLockConfigForFileSystemLockProvider() { .build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig lockConfig = HoodieLockConfig.deriveLockConfigForDifferentTable( FileSystemBasedLockProvider.class.getCanonicalName(), writeConfig); assertEquals(FileSystemBasedLockProvider.class.getCanonicalName(), lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); @@ -72,7 +72,7 @@ public void testGetLockConfigForZookeeperLockProvider() { .build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig lockConfig = HoodieLockConfig.deriveLockConfigForDifferentTable( ZookeeperBasedLockProvider.class.getCanonicalName(), writeConfig); assertEquals(ZookeeperBasedLockProvider.class.getCanonicalName(), lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); @@ -85,7 +85,7 @@ public void testGetLockConfigForZookeeperLockProvider() { } @Test - public void testGetLockConfigForZookeeperImplicitBasePathLockProvider() { + public void testGetLockConfigRejectsZookeeperImplicitBasePathLockProvider() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) @@ -96,11 +96,11 @@ public void testGetLockConfigForZookeeperImplicitBasePathLockProvider() { .build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( - ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), writeConfig); - assertEquals(ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), - lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); - assertEquals("zk-host:2181", lockConfig.getString(HoodieLockConfig.ZK_CONNECT_URL)); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.deriveLockConfigForDifferentTable( + ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), writeConfig)); + assertTrue(ex.getMessage().contains("derives its lock identity from the table's base path")); + assertTrue(ex.getMessage().contains(ZookeeperBasedLockProvider.class.getCanonicalName())); } @Test @@ -118,7 +118,7 @@ public void testGetLockConfigForHiveMetastoreLockProvider() { .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig lockConfig = HoodieLockConfig.deriveLockConfigForDifferentTable( HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, writeConfig); assertEquals(HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); @@ -142,7 +142,7 @@ public void testGetLockConfigForDynamoDBLockProvider() { .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig lockConfig = HoodieLockConfig.deriveLockConfigForDifferentTable( HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS, writeConfig); assertEquals(HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS, lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); @@ -152,7 +152,7 @@ public void testGetLockConfigForDynamoDBLockProvider() { } @Test - public void testGetLockConfigForStorageBasedLockProvider() { + public void testGetLockConfigRejectsStorageBasedLockProvider() { Properties lockProps = new Properties(); lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), StorageBasedLockProvider.class.getCanonicalName()); @@ -165,12 +165,11 @@ public void testGetLockConfigForStorageBasedLockProvider() { .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( - StorageBasedLockProvider.class.getCanonicalName(), writeConfig); - assertEquals(StorageBasedLockProvider.class.getCanonicalName(), - lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); - assertEquals("600", lockConfig.getProps().getProperty("hoodie.write.lock.storage.validity.timeout.secs")); - assertEquals("60", lockConfig.getProps().getProperty("hoodie.write.lock.storage.renew.interval.secs")); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.deriveLockConfigForDifferentTable( + StorageBasedLockProvider.class.getCanonicalName(), writeConfig)); + assertTrue(ex.getMessage().contains("derives its lock identity from the table's base path")); + assertTrue(ex.getMessage().contains("does not provide an explicit lock path override")); } @Test @@ -180,6 +179,7 @@ public void testGetLockConfigCopiesCommonRetryConfigs() { .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockPath("/tmp/lock_dir") .withNumRetries(5) .withRetryWaitTimeInMillis(2000L) .withRetryMaxWaitTimeInMillis(32000L) @@ -190,7 +190,7 @@ public void testGetLockConfigCopiesCommonRetryConfigs() { .build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig lockConfig = HoodieLockConfig.deriveLockConfigForDifferentTable( FileSystemBasedLockProvider.class.getCanonicalName(), writeConfig); assertEquals("5", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_NUM_RETRIES)); assertEquals("2000", lockConfig.getString(HoodieLockConfig.LOCK_ACQUIRE_RETRY_WAIT_TIME_IN_MILLIS)); @@ -214,7 +214,7 @@ public void testGetLockConfigRejectsCustomLockProvider() { .build(); HoodieException ex = assertThrows(HoodieException.class, () -> - HoodieLockConfig.getLockConfigForBuiltInLockProvider(customLockProviderClass, writeConfig)); + HoodieLockConfig.deriveLockConfigForDifferentTable(customLockProviderClass, writeConfig)); assertTrue(ex.getMessage().contains("only supported for built-in lock providers")); assertTrue(ex.getMessage().contains(customLockProviderClass)); } @@ -233,27 +233,26 @@ public void testGetLockConfigRejectsZookeeperProviderWithoutLockKey() { .build(); IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> - HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.deriveLockConfigForDifferentTable( ZookeeperBasedLockProvider.class.getCanonicalName(), writeConfig)); assertTrue(ex.getMessage().contains(LockConfiguration.ZK_LOCK_KEY_PROP_KEY)); } @Test - public void testGetLockConfigAcceptsZookeeperImplicitBasePathWithoutLockKey() { + public void testGetLockConfigRejectsFileSystemProviderWithoutLockPath() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .withLockProvider(ZookeeperBasedImplicitBasePathLockProvider.class) - .withZkQuorum("zk-host:2181") - .withZkPort("2181") + .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockExpire(10) .build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( - ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), writeConfig); - assertEquals(ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName(), - lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.deriveLockConfigForDifferentTable( + FileSystemBasedLockProvider.class.getCanonicalName(), writeConfig)); + assertTrue(ex.getMessage().contains(LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY)); } @Test @@ -271,13 +270,13 @@ public void testGetLockConfigRejectsDynamoDBProviderWithoutPartitionKey() { .build(); IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> - HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.deriveLockConfigForDifferentTable( HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS, writeConfig)); assertTrue(ex.getMessage().contains("hoodie.write.lock.dynamodb.partition_key")); } @Test - public void testGetLockConfigAcceptsDynamoDBImplicitPartitionKeyWithoutPartitionKey() { + public void testGetLockConfigRejectsDynamoDBImplicitPartitionKeyLockProvider() { Properties lockProps = new Properties(); lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS); @@ -290,10 +289,11 @@ public void testGetLockConfigAcceptsDynamoDBImplicitPartitionKeyWithoutPartition .withLockConfig(HoodieLockConfig.newBuilder().fromProperties(lockProps).build()) .build(); - HoodieLockConfig lockConfig = HoodieLockConfig.getLockConfigForBuiltInLockProvider( - HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS, writeConfig); - assertEquals(HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS, - lockConfig.getString(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME)); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieLockConfig.deriveLockConfigForDifferentTable( + HoodieLockConfig.DYNAMODB_BASED_IMPLICIT_PARTITION_KEY_LOCK_PROVIDER_CLASS, writeConfig)); + assertTrue(ex.getMessage().contains("derives its lock identity from the table's base path")); + assertTrue(ex.getMessage().contains(HoodieLockConfig.DYNAMODB_BASED_LOCK_PROVIDER_CLASS)); } @Test @@ -311,7 +311,7 @@ public void testGetLockConfigRejectsHiveMetastoreProviderWithoutDatabase() { .build(); IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> - HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.deriveLockConfigForDifferentTable( HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, writeConfig)); assertTrue(ex.getMessage().contains(LockConfiguration.HIVE_DATABASE_NAME_PROP_KEY)); } @@ -331,7 +331,7 @@ public void testGetLockConfigRejectsHiveMetastoreProviderWithoutTable() { .build(); IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> - HoodieLockConfig.getLockConfigForBuiltInLockProvider( + HoodieLockConfig.deriveLockConfigForDifferentTable( HoodieLockConfig.HIVE_METASTORE_BASED_LOCK_PROVIDER_CLASS, writeConfig)); assertTrue(ex.getMessage().contains(LockConfiguration.HIVE_TABLE_NAME_PROP_KEY)); } diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 8b41c0dd4441b..44c48368b6249 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -129,7 +129,9 @@ public void testCreateMetadataWriteConfigForOCC() { .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .withLockProvider(FileSystemBasedLockProvider.class).build()) + .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockPath("/tmp/lock_dir") + .build()) .build(); HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( @@ -172,7 +174,9 @@ public void testCreateMetadataWriteConfigForcesStreamingWritesOffWithMultiWriter .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL).build()) .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL) .withLockConfig(HoodieLockConfig.newBuilder() - .withLockProvider(FileSystemBasedLockProvider.class).build()) + .withLockProvider(FileSystemBasedLockProvider.class) + .withFileSystemLockPath("/tmp/lock_dir") + .build()) .build(); HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( @@ -410,7 +414,7 @@ public void testCreateMetadataWriteConfigForOCCWithDynamoDBLockProvider() { } @Test - public void testCreateMetadataWriteConfigForOCCWithStorageBasedLockProvider() { + public void testCreateMetadataWriteConfigRejectsStorageBasedLockProvider() { Properties lockProps = new Properties(); lockProps.put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), StorageBasedLockProvider.class.getCanonicalName()); @@ -428,17 +432,14 @@ public void testCreateMetadataWriteConfigForOCCWithStorageBasedLockProvider() { .build()) .build(); - HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( - writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, - StorageBasedLockProvider.class.getCanonicalName()); - assertEquals("600", metadataWriteConfig.getProps().getString("hoodie.write.lock.storage.validity.timeout.secs")); - assertEquals("60", metadataWriteConfig.getProps().getString("hoodie.write.lock.storage.renew.interval.secs")); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("derives its lock identity from the table's base path")); } @Test - public void testCreateMetadataWriteConfigForOCCWithZookeeperImplicitBasePathLockProvider() { + public void testCreateMetadataWriteConfigRejectsZookeeperImplicitBasePathLockProvider() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() .withPath("/tmp/base_path/") .withMetadataConfig(HoodieMetadataConfig.newBuilder() @@ -449,19 +450,13 @@ public void testCreateMetadataWriteConfigForOCCWithZookeeperImplicitBasePathLock .withLockProvider(ZookeeperBasedImplicitBasePathLockProvider.class) .withZkQuorum("zk-host:2181") .withZkPort("2181") - .withZkSessionTimeoutInMs(30000L) - .withZkConnectionTimeoutInMs(15000L) .build()) .build(); - HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( - writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); - validateMetadataWriteConfig(metadataWriteConfig, HoodieFailedWritesCleaningPolicy.LAZY, - WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL, - ZookeeperBasedImplicitBasePathLockProvider.class.getCanonicalName()); - assertEquals("zk-host:2181", metadataWriteConfig.getProps().getString(HoodieLockConfig.ZK_CONNECT_URL.key())); - assertEquals(30000, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.ZK_SESSION_TIMEOUT_MS.key())); - assertEquals(15000, metadataWriteConfig.getProps().getInteger(HoodieLockConfig.ZK_CONNECTION_TIMEOUT_MS.key())); + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT)); + assertTrue(ex.getMessage().contains("derives its lock identity from the table's base path")); } @Test From 7a92049d4165653702f5f0a03f509200ab712283 Mon Sep 17 00:00:00 2001 From: Krishen Bhan <“bkrishen@uber.com”> Date: Thu, 16 Apr 2026 17:02:51 -0700 Subject: [PATCH 22/22] reply --- .../TestHoodieMetadataWriteUtils.java | 18 ++++++++++++++++++ .../common/config/HoodieMetadataConfig.java | 11 +++++++++++ .../HoodieTableServiceManagerConfig.java | 11 +++++++++-- .../config/TestHoodieMetadataConfig.java | 19 +++++++++++++++++++ 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java index 44c48368b6249..06c116967e175 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metadata/TestHoodieMetadataWriteUtils.java @@ -204,6 +204,24 @@ public void testCreateMetadataWriteConfigWithTableServiceManager() { assertFalse(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.clean)); } + @Test + public void testCreateMetadataWriteConfigWithTableServiceManagerLogCompactionOnly() { + HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() + .withPath("/tmp/base_path/") + .withMetadataConfig(HoodieMetadataConfig.newBuilder() + .withTableServiceManagerEnabled(true) + .withTableServiceManagerActions("logcompaction") + .build()) + .build(); + + HoodieWriteConfig metadataWriteConfig = HoodieMetadataWriteUtils.createMetadataWriteConfig( + writeConfig, HoodieFailedWritesCleaningPolicy.EAGER, HoodieTableVersion.EIGHT); + assertTrue(metadataWriteConfig.getTableServiceManagerConfig().isTableServiceManagerEnabled()); + assertFalse(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.compaction), + "compaction should not match when only logcompaction is configured"); + assertTrue(metadataWriteConfig.getTableServiceManagerConfig().isEnabledAndActionSupported(ActionType.logcompaction)); + } + @Test public void testCreateMetadataWriteConfigWithTableServiceManagerDisabled() { HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder() diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index d69fefbaf6b79..cd4d03ee4ee70 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -1356,6 +1356,17 @@ public HoodieMetadataConfig build() { metadataConfig.setDefaultValue(STREAMING_WRITE_ENABLED, getDefaultForStreamingWriteEnabled(engineType)); // fix me: disable when schema on read is enabled. metadataConfig.setDefaults(HoodieMetadataConfig.class.getName()); + + String tsmActions = metadataConfig.getString(TABLE_SERVICE_MANAGER_ACTIONS); + if (tsmActions != null && !tsmActions.isEmpty()) { + validateTableServiceManagerActions(tsmActions); + } + if (metadataConfig.getBoolean(TABLE_SERVICE_MANAGER_ENABLED) + && (tsmActions == null || tsmActions.isEmpty())) { + throw new IllegalArgumentException(TABLE_SERVICE_MANAGER_ENABLED.key() + " is set to true but " + + TABLE_SERVICE_MANAGER_ACTIONS.key() + " is empty. Specify at least one action to delegate" + + " (supported: " + SUPPORTED_TABLE_SERVICE_MANAGER_ACTIONS + ")."); + } return metadataConfig; } diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTableServiceManagerConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTableServiceManagerConfig.java index 8e59ce1efa0e7..a2cef4558e1b6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTableServiceManagerConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTableServiceManagerConfig.java @@ -22,7 +22,10 @@ import javax.annotation.concurrent.Immutable; +import java.util.Arrays; import java.util.Properties; +import java.util.Set; +import java.util.stream.Collectors; /** * Configurations used by the Hudi Table Service Manager. @@ -171,9 +174,13 @@ public int getConnectionRetryDelay() { } public boolean isEnabledAndActionSupported(ActionType actionType) { - boolean isActionSupported = getTableServiceManagerActions().contains(actionType.name()); + Set actions = Arrays.stream(getTableServiceManagerActions().split(",")) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .collect(Collectors.toSet()); + boolean isActionSupported = actions.contains(actionType.name()); if (actionType.equals(ActionType.clustering)) { - isActionSupported = isActionSupported || getTableServiceManagerActions().contains(ActionType.replacecommit.name()); + isActionSupported = isActionSupported || actions.contains(ActionType.replacecommit.name()); } return isTableServiceManagerEnabled() && isActionSupported; } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java b/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java index 361a2661bdb2c..b66b432abc24d 100644 --- a/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java +++ b/hudi-common/src/test/java/org/apache/hudi/common/config/TestHoodieMetadataConfig.java @@ -201,6 +201,7 @@ void testTableServiceManagerEnabled() { HoodieMetadataConfig enabledConfig = HoodieMetadataConfig.newBuilder() .withTableServiceManagerEnabled(true) + .withTableServiceManagerActions("compaction") .build(); assertTrue(enabledConfig.isTableServiceManagerEnabled()); @@ -250,4 +251,22 @@ void testTableServiceManagerActionsRejectsUnsupportedActions() { .withTableServiceManagerActions("nonexistent") .build()); } + + @Test + void testTableServiceManagerActionsValidatedInBuildFromProperties() { + Properties props = new Properties(); + props.put(HoodieMetadataConfig.TABLE_SERVICE_MANAGER_ACTIONS.key(), "clean"); + assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataConfig.newBuilder() + .fromProperties(props) + .build()); + } + + @Test + void testTableServiceManagerEnabledWithEmptyActionsRejected() { + assertThrows(IllegalArgumentException.class, () -> + HoodieMetadataConfig.newBuilder() + .withTableServiceManagerEnabled(true) + .build()); + } }