From b317fbfa58be892f830858ddc6083c708a489fdb Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Tue, 21 Apr 2026 19:08:14 -0700 Subject: [PATCH 1/2] fix: Skip pre-compaction rollback metadata reads in getValidInstantTimestamps --- .../metadata/HoodieTableMetadataUtil.java | 18 +++- .../metadata/TestHoodieTableMetadataUtil.java | 93 +++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java index bfc16dbf55294..6bca38d034924 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java @@ -2098,9 +2098,25 @@ public static Set getValidInstantTimestamps(HoodieTableMetaClient dataMe // For any rollbacks and restores, we cannot neglect the instants that they are rolling back. // The rollback instant should be more recent than the start of the timeline for it to have rolled back any // instant which we have a log block for. + // + // Only read rollback metadata for rollbacks newer than the latest MDT compaction. + // After compaction, rolled-back log blocks are already merged into base files, so pre-compaction + // rollback timestamps are no longer needed for log block filtering. This avoids sequential storage + // reads for old rollback instants that can cause long latency during metadata table reading. final String earliestInstantTime = validInstantTimestamps.isEmpty() ? SOLO_COMMIT_TIMESTAMP : Collections.min(validInstantTimestamps); + final String latestMdtCompactionTime = metadataMetaClient.getActiveTimeline() + .getCommitTimeline() + .filterCompletedInstants() + .lastInstant() + .map(HoodieInstant::requestedTime) + .orElse(SOLO_COMMIT_TIMESTAMP); + // Only read rollback metadata for rollbacks newer than the later of: + // (a) the earliest completed instant (original filter), and + // (b) the latest MDT compaction instant (new optimization) + final String rollbackFilterThreshold = compareTimestamps(latestMdtCompactionTime, + GREATER_THAN, earliestInstantTime) ? latestMdtCompactionTime : earliestInstantTime; datasetTimeline.getRollbackAndRestoreTimeline().filterCompletedInstants().getInstantsAsStream() - .filter(instant -> compareTimestamps(instant.requestedTime(), GREATER_THAN, earliestInstantTime)) + .filter(instant -> compareTimestamps(instant.requestedTime(), GREATER_THAN, rollbackFilterThreshold)) .forEach(instant -> validInstantTimestamps.addAll(getRollbackedCommits(instant, datasetTimeline, dataMetaClient.getInstantGenerator()))); // add restore and rollback instants from MDT. diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java index 74589886a35a7..5f36e7daa8a4c 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java @@ -19,6 +19,9 @@ package org.apache.hudi.metadata; +import org.apache.hudi.avro.model.HoodieInstantInfo; +import org.apache.hudi.avro.model.HoodieRollbackMetadata; +import org.apache.hudi.avro.model.HoodieRollbackPlan; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.data.HoodieData; import org.apache.hudi.common.engine.HoodieLocalEngineContext; @@ -28,6 +31,7 @@ import org.apache.hudi.common.model.HoodieCommitMetadata; import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieRecord; +import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.WriteOperationType; import org.apache.hudi.common.schema.HoodieSchema; import org.apache.hudi.common.schema.HoodieSchemaField; @@ -36,10 +40,13 @@ import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableVersion; +import org.apache.hudi.common.table.timeline.HoodieInstant; +import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.testutils.FileCreateUtilsLegacy; import org.apache.hudi.common.testutils.HoodieCommonTestHarness; import org.apache.hudi.common.testutils.HoodieTestDataGenerator; import org.apache.hudi.common.testutils.HoodieTestTable; +import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.util.CollectionUtils; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; @@ -61,6 +68,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Properties; @@ -869,4 +877,89 @@ private static Stream mapKeyNoSeparatorToFileGroupIndexTestCases() { ) ); } + + /** + * Tests getValidInstantTimestamps rollback handling: + * - Without MDT compaction, all rollback metadata is read (rolled-back commits appear in valid timestamps). + * - With MDT compaction, only post-compaction rollback metadata is read (pre-compaction rollbacks are skipped + * because those log blocks are already merged into base files). + */ + @Test + void testGetValidInstantTimestampsSkipsPreCompactionRollbacks() throws Exception { + HoodieTestTable testTable = HoodieTestTable.of(metaClient); + + String commit1 = "20260101010101000"; + String commit2 = "20260201010101000"; + String commit3 = "20260301010101000"; + String commit4 = "20260501010101000"; + String commit5 = "20260601010101000"; + testTable.addCommit(commit1); + testTable.addCommit(commit2); + testTable.addCommit(commit3); + testTable.addCommit(commit4); + testTable.addCommit(commit5); + + // Rollbacks before MDT compaction time + addCompletedRollback(testTable, "20260202010101000", commit2); + addCompletedRollback(testTable, "20260302010101000", commit3); + // Rollback after MDT compaction time + addCompletedRollback(testTable, "20260502010101000", commit4); + + // Delete rolled-back commit instants from the timeline to simulate real rollback behavior. + // In a real system, the commit instant file is removed when a rollback completes, so the + // only way these timestamps appear in validInstantTimestamps is via rollback metadata reading. + metaClient = HoodieTableMetaClient.reload(metaClient); + for (String rolledBack : Arrays.asList(commit2, commit3, commit4)) { + HoodieInstant completedCommit = metaClient.getInstantGenerator() + .createNewInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, rolledBack); + metaClient.getActiveTimeline().deleteInstantFileIfExists(completedCommit); + } + + // Create MDT metaClient with NO compaction initially (only delta commits) + String mdtBasePath = HoodieTableMetadata.getMetadataTableBasePath(basePath); + HoodieTestUtils.init(mdtBasePath, HoodieTableType.MERGE_ON_READ); + HoodieTableMetaClient mdtMetaClient = HoodieTableMetaClient.builder() + .setBasePath(mdtBasePath) + .build(); + HoodieTestTable mdtTestTable = HoodieTestTable.of(mdtMetaClient); + mdtTestTable.addDeltaCommit("20260101020101000"); + + metaClient = HoodieTableMetaClient.reload(metaClient); + mdtMetaClient = HoodieTableMetaClient.reload(mdtMetaClient); + + // Without MDT compaction, all rollback metadata is read — rolled-back commits appear + Set validTimestamps = HoodieTableMetadataUtil.getValidInstantTimestamps(metaClient, mdtMetaClient); + assertTrue(validTimestamps.contains(commit1), "commit1 should be in valid timestamps"); + assertTrue(validTimestamps.contains(commit2), "commit2 should be in valid timestamps (from rollback metadata read)"); + assertTrue(validTimestamps.contains(commit3), "commit3 should be in valid timestamps (from rollback metadata read)"); + assertTrue(validTimestamps.contains(commit4), "commit4 should be in valid timestamps (from rollback metadata read)"); + assertTrue(validTimestamps.contains(commit5), "commit5 should be in valid timestamps"); + + // Now add a compaction commit to MDT at a time between rollback2 and rollback3 + mdtTestTable.addCommit("20260401010101000"); + mdtTestTable.addDeltaCommit("20260501020101000"); + + metaClient = HoodieTableMetaClient.reload(metaClient); + mdtMetaClient = HoodieTableMetaClient.reload(mdtMetaClient); + + // With MDT compaction, only post-compaction rollback (commit4) metadata is read; + // pre-compaction rollbacks for commit2 and commit3 are skipped + validTimestamps = HoodieTableMetadataUtil.getValidInstantTimestamps(metaClient, mdtMetaClient); + assertTrue(validTimestamps.contains(commit1), "commit1 should be in valid timestamps"); + assertTrue(validTimestamps.contains(commit5), "commit5 should be in valid timestamps"); + assertTrue(validTimestamps.contains(commit4), "commit4 should be in valid timestamps (from post-compaction rollback)"); + assertFalse(validTimestamps.contains(commit2), "commit2 should NOT be in valid timestamps (pre-compaction rollback skipped)"); + assertFalse(validTimestamps.contains(commit3), "commit3 should NOT be in valid timestamps (pre-compaction rollback skipped)"); + } + + private void addCompletedRollback(HoodieTestTable testTable, String rollbackTime, String rolledBackCommit) throws Exception { + Map> emptyPartitionFiles = new HashMap<>(); + emptyPartitionFiles.put("partition1", Collections.emptyList()); + HoodieRollbackMetadata rollbackMeta = testTable.getRollbackMetadata(rolledBackCommit, emptyPartitionFiles, false); + HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan(); + rollbackPlan.setInstantToRollback(new HoodieInstantInfo(rolledBackCommit, HoodieTimeline.COMMIT_ACTION)); + rollbackPlan.setRollbackRequests(Collections.emptyList()); + testTable.addRollback(rollbackTime, rollbackMeta, rollbackPlan); + testTable.addRollbackCompleted(rollbackTime, rollbackMeta, false); + } } From 67eac7ab767b5f7d059efd854b16f25a7b1241d0 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 8 May 2026 19:15:35 -0700 Subject: [PATCH 2/2] Fix import --- .../org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java index 5f36e7daa8a4c..00dd8b82e7b04 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java @@ -71,6 +71,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.UUID;