From 38e4d44e679d3c93a99bb5300834dc54d1b56c3e Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Tue, 13 Jan 2026 15:19:18 -0500 Subject: [PATCH 01/11] Add hudi azure based storage lock --- hudi-azure/pom.xml | 240 ++++++++++++ .../lock/ADLSGen2StorageLockClient.java | 364 ++++++++++++++++++ .../ITADLSGen2StorageLockClientAzurite.java | 125 ++++++ .../TestADLSGen2StorageBasedLockProvider.java | 48 +++ .../lock/TestADLSGen2StorageLockClient.java | 315 +++++++++++++++ ...stADLSGen2StorageLockClientUriParsing.java | 44 +++ .../hudi/common/fs/TestStorageSchemes.java | 4 +- .../apache/hudi/storage/StorageSchemes.java | 4 +- 8 files changed, 1141 insertions(+), 3 deletions(-) create mode 100644 hudi-azure/pom.xml create mode 100644 hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java diff --git a/hudi-azure/pom.xml b/hudi-azure/pom.xml new file mode 100644 index 0000000000000..b0b6e9151b71e --- /dev/null +++ b/hudi-azure/pom.xml @@ -0,0 +1,240 @@ + + + + + hudi + org.apache.hudi + 1.2.0-SNAPSHOT + ../pom.xml + + + 4.0.0 + hudi-azure + jar + + + + 12.26.0 + 1.12.2 + + 2.13.5 + + + + + + org.apache.logging.log4j + log4j-1.2-api + + + + + org.projectlombok + lombok + + + + + org.apache.hudi + hudi-client-common + ${project.version} + + + org.apache.hudi + hudi-common + ${project.version} + + + + + org.apache.hadoop + hadoop-common + test + + + + + com.azure + azure-storage-blob + ${azure.storage.blob.version} + + + com.azure + azure-identity + ${azure.identity.version} + + + + + org.apache.hudi + hudi-tests-common + ${project.version} + test + + + org.apache.hudi + hudi-client-common + ${project.version} + tests + test + + + org.apache.hudi + hudi-hadoop-common + ${project.version} + test + test-jar + + + org.apache.hudi + hudi-common + ${project.version} + tests + test + + + org.mockito + mockito-core + test + + + + com.fasterxml.jackson.core + jackson-core + ${azure.jackson.version} + test + + + com.fasterxml.jackson.core + jackson-annotations + ${azure.jackson.version} + test + + + com.fasterxml.jackson.core + jackson-databind + ${azure.jackson.version} + test + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + ${azure.jackson.version} + test + + + com.fasterxml.jackson.dataformat + jackson-dataformat-xml + ${azure.jackson.version} + test + + + org.testcontainers + testcontainers + ${testcontainers.version} + test + + + org.testcontainers + junit-jupiter + ${testcontainers.version} + test + + + com.esotericsoftware + kryo-shaded + test + + + + + + azure-integration-tests + + + + + org.apache.maven.plugins + maven-surefire-plugin + + true + + + + org.apache.maven.plugins + maven-failsafe-plugin + ${maven-failsafe-plugin.version} + + + **/IT*.java + + + + + integration-test + + integration-test + + + + verify-integration-test + verify + + verify + + + + + + + + + + + + + src/main/resources + + + + + org.apache.rat + apache-rat-plugin + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar-plugin.version} + + + + test-jar + + + + + + org.jacoco + jacoco-maven-plugin + + + + + diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java new file mode 100644 index 0000000000000..4740d142a94ec --- /dev/null +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.StorageLockClient; +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.util.Functions; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.VisibleForTesting; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieLockException; + +import com.azure.core.credential.AzureSasCredential; +import com.azure.core.util.BinaryData; +import com.azure.core.util.Context; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.models.BlobProperties; +import com.azure.storage.blob.models.BlobRequestConditions; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.options.BlobParallelUploadOptions; +import lombok.extern.slf4j.Slf4j; +import org.slf4j.Logger; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.ThreadSafe; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Properties; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * ADLS Gen2 (ABFS/ABFSS) implementation of {@link StorageLockClient} using Azure Blob conditional requests. + * + *

Lock semantics match S3/GCS storage lock clients: + * - Create: conditional write with If-None-Match: * + * - Update/Renew/Expire: conditional write with If-Match: <etag> + * + *

Expected lock URI format: + *

+ * + *

Authentication precedence (via {@link Properties}): + *

+ */ +@Slf4j +@ThreadSafe +public class ADLSGen2StorageLockClient implements StorageLockClient { + + private static final int PRECONDITION_FAILURE_ERROR_CODE = 412; + private static final int NOT_FOUND_ERROR_CODE = 404; + private static final int CONFLICT_ERROR_CODE = 409; + private static final int RATE_LIMIT_ERROR_CODE = 429; + private static final int INTERNAL_SERVER_ERROR_CODE_MIN = 500; + + public static final String AZURE_CONNECTION_STRING = "hoodie.write.lock.azure.connection.string"; + public static final String AZURE_SAS_TOKEN = "hoodie.write.lock.azure.sas.token"; + + private final Logger logger; + private final BlobServiceClient blobServiceClient; + private final BlobClient lockBlobClient; + private final Properties clientProperties; + private final String ownerId; + private final String lockFileUri; + + /** + * Constructor used by reflection by {@link org.apache.hudi.client.transaction.lock.StorageBasedLockProvider}. + * + * @param ownerId lock owner id + * @param lockFileUri lock file URI (ABFS/ABFSS) + * @param props properties used to customize/authenticate the Azure client + */ + public ADLSGen2StorageLockClient(String ownerId, String lockFileUri, Properties props) { + this(ownerId, lockFileUri, props, createDefaultBlobServiceClient(), log); + } + + @VisibleForTesting + ADLSGen2StorageLockClient( + String ownerId, + String lockFileUri, + Properties props, + Functions.Function1 blobServiceClientSupplier, + Logger logger) { + this.ownerId = ownerId; + this.lockFileUri = lockFileUri; + this.logger = logger; + this.clientProperties = props; + + AzureLocation location = parseAzureLocation(lockFileUri); + this.blobServiceClient = blobServiceClientSupplier.apply(location.withProperties(props)); + BlobContainerClient containerClient = blobServiceClient.getBlobContainerClient(location.container); + this.lockBlobClient = containerClient.getBlobClient(location.blobPath); + } + + private static Functions.Function1 createDefaultBlobServiceClient() { + return (location) -> { + Properties props = location.props; + BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + + String connectionString = props == null ? null : props.getProperty(AZURE_CONNECTION_STRING); + if (connectionString != null && !connectionString.trim().isEmpty()) { + return builder.connectionString(connectionString).buildClient(); + } + + builder.endpoint(location.blobEndpoint); + String sasToken = props == null ? null : props.getProperty(AZURE_SAS_TOKEN); + if (sasToken != null && !sasToken.trim().isEmpty()) { + String cleaned = sasToken.startsWith("?") ? sasToken.substring(1) : sasToken; + return builder.credential(new AzureSasCredential(cleaned)).buildClient(); + } + + return builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient(); + }; + } + + @Override + public Pair> tryUpsertLockFile( + StorageLockData newLockData, + Option previousLockFile) { + String expectedEtag = previousLockFile.isPresent() ? previousLockFile.get().getVersionId() : null; + try { + StorageLockFile updated = createOrUpdateLockFileInternal(newLockData, expectedEtag); + return Pair.of(LockUpsertResult.SUCCESS, Option.of(updated)); + } catch (BlobStorageException e) { + int code = e.getStatusCode(); + if (code == PRECONDITION_FAILURE_ERROR_CODE || code == CONFLICT_ERROR_CODE) { + logger.info("OwnerId: {}, Unable to write new lock file. Another process has modified this lockfile {} already.", + ownerId, lockFileUri); + return Pair.of(LockUpsertResult.ACQUIRED_BY_OTHERS, Option.empty()); + } else if (code == RATE_LIMIT_ERROR_CODE) { + logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); + } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { + logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", + ownerId, lockFileUri, e); + } else { + throw e; + } + return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); + } catch (Exception e) { + logger.error("OwnerId: {}, Unexpected error while writing lock file: {}", ownerId, lockFileUri, e); + return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); + } + } + + @Override + public Pair> readCurrentLockFile() { + try { + BlobProperties props = lockBlobClient.getProperties(); + String eTag = props.getETag(); + try { + return getLockFileFromBlob(lockBlobClient, eTag); + } catch (BlobStorageException e) { + // Blob can change/disappear after properties call; treat 404 during stream read as UNKNOWN_ERROR. + return Pair.of(handleGetStorageException(e, true), Option.empty()); + } + } catch (BlobStorageException e) { + return Pair.of(handleGetStorageException(e, false), Option.empty()); + } + } + + private LockGetResult handleGetStorageException(BlobStorageException e, boolean ignore404) { + int code = e.getStatusCode(); + if (code == NOT_FOUND_ERROR_CODE) { + if (ignore404) { + logger.info("OwnerId: {}, Azure stream read failure detected: {}", ownerId, lockFileUri); + } else { + logger.info("OwnerId: {}, Object not found in the path: {}", ownerId, lockFileUri); + return LockGetResult.NOT_EXISTS; + } + } else if (code == RATE_LIMIT_ERROR_CODE) { + logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); + } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { + logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", ownerId, lockFileUri, e); + } else { + throw e; + } + return LockGetResult.UNKNOWN_ERROR; + } + + private @Nonnull Pair> getLockFileFromBlob(BlobClient blobClient, String eTag) { + try (InputStream inputStream = blobClient.openInputStream()) { + return Pair.of(LockGetResult.SUCCESS, Option.of(StorageLockFile.createFromStream(inputStream, eTag))); + } catch (IOException ioe) { + throw new UncheckedIOException("Failed reading blob: " + lockFileUri, ioe); + } + } + + private StorageLockFile createOrUpdateLockFileInternal(StorageLockData lockData, String expectedEtag) { + byte[] bytes = StorageLockFile.toByteArray(lockData); + BlobRequestConditions conditions = new BlobRequestConditions(); + if (expectedEtag == null) { + conditions.setIfNoneMatch("*"); + } else { + conditions.setIfMatch(expectedEtag); + } + + BlobParallelUploadOptions options = new BlobParallelUploadOptions(BinaryData.fromBytes(bytes)) + .setRequestConditions(conditions); + lockBlobClient.uploadWithResponse(options, null, Context.NONE); + String newEtag = lockBlobClient.getProperties().getETag(); + return new StorageLockFile(lockData, newEtag); + } + + @Override + public Option readObject(String filePath, boolean checkExistsFirst) { + try { + AzureLocation location = parseAzureLocation(filePath); + AzureLocation lockLocation = parseAzureLocation(lockFileUri); + BlobServiceClient svc = location.blobEndpoint.equals(lockLocation.blobEndpoint) + ? blobServiceClient + : createDefaultBlobServiceClient().apply(location.withProperties(clientProperties)); + BlobClient blobClient = svc.getBlobContainerClient(location.container).getBlobClient(location.blobPath); + + if (checkExistsFirst && !blobClient.exists()) { + logger.debug("JSON config file not found: {}", filePath); + return Option.empty(); + } + byte[] bytes = blobClient.downloadContent().toBytes(); + return Option.of(new String(bytes, UTF_8)); + } catch (BlobStorageException e) { + if (e.getStatusCode() == NOT_FOUND_ERROR_CODE) { + logger.debug("JSON config file not found: {}", filePath); + } else { + logger.warn("Error reading JSON config file: {}", filePath, e); + } + return Option.empty(); + } catch (Exception e) { + logger.warn("Error reading JSON config file: {}", filePath, e); + return Option.empty(); + } + } + + @Override + public boolean writeObject(String filePath, String content) { + try { + AzureLocation location = parseAzureLocation(filePath); + AzureLocation lockLocation = parseAzureLocation(lockFileUri); + BlobServiceClient svc = location.blobEndpoint.equals(lockLocation.blobEndpoint) + ? blobServiceClient + : createDefaultBlobServiceClient().apply(location.withProperties(clientProperties)); + BlobClient blobClient = svc.getBlobContainerClient(location.container).getBlobClient(location.blobPath); + blobClient.upload(BinaryData.fromString(content), true); + logger.debug("Successfully wrote object to: {}", filePath); + return true; + } catch (Exception e) { + logger.error("Error writing object to: {}", filePath, e); + return false; + } + } + + @Override + public void close() { + // BlobServiceClient does not require explicit close. No-op. + } + + private static AzureLocation parseAzureLocation(String uriString) { + try { + URI uri = new URI(uriString); + String scheme = uri.getScheme(); + if (scheme == null) { + throw new IllegalArgumentException("URI does not contain a valid scheme."); + } + + String authority = uri.getAuthority(); + String path = uri.getPath() == null ? "" : uri.getPath().replaceFirst("/", ""); + + if ("abfs".equalsIgnoreCase(scheme) || "abfss".equalsIgnoreCase(scheme)) { + // abfs[s]://@.dfs.core.windows.net/ + if (authority == null || !authority.contains("@")) { + throw new IllegalArgumentException("ABFS URI authority must be in the form '@': " + uriString); + } + String[] parts = authority.split("@", 2); + String container = parts[0]; + String host = parts[1]; + String endpointHost = dfsHostToBlobHost(host); + String endpoint = "https://" + endpointHost; + if (container.isEmpty() || path.isEmpty()) { + throw new IllegalArgumentException("ABFS URI must contain container and path: " + uriString); + } + return new AzureLocation(endpoint, container, path, null); + } + + if ("https".equalsIgnoreCase(scheme)) { + // https://.blob.core.windows.net// + if (authority == null || authority.isEmpty()) { + throw new IllegalArgumentException("HTTPS URI authority missing: " + uriString); + } + int slash = path.indexOf('/'); + if (slash <= 0 || slash == path.length() - 1) { + throw new IllegalArgumentException("HTTPS URI must be in the form 'https:////': " + uriString); + } + String container = path.substring(0, slash); + String blobPath = path.substring(slash + 1); + return new AzureLocation("https://" + authority, container, blobPath, null); + } + + throw new IllegalArgumentException("Unsupported scheme for Azure storage lock: " + scheme); + } catch (URISyntaxException e) { + throw new HoodieLockException("Failed to parse Azure URI: " + uriString, e); + } + } + + private static String dfsHostToBlobHost(String host) { + if (host == null) { + return null; + } + if (host.endsWith(".dfs.core.windows.net")) { + return host.replace(".dfs.core.windows.net", ".blob.core.windows.net"); + } + return host; + } + + @VisibleForTesting + static final class AzureLocation { + final String blobEndpoint; + final String container; + final String blobPath; + final Properties props; + + AzureLocation(String blobEndpoint, String container, String blobPath, Properties props) { + this.blobEndpoint = blobEndpoint; + this.container = container; + this.blobPath = blobPath; + this.props = props; + } + + AzureLocation withProperties(Properties props) { + return new AzureLocation(blobEndpoint, container, blobPath, props); + } + } +} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java new file mode 100644 index 0000000000000..8cef55f5100a9 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.Pair; + +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.containers.wait.strategy.Wait; + +import java.time.Duration; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration tests for {@link ADLSGen2StorageLockClient} using Azurite (Azure Storage emulator). + * + *

Run with: {@code mvn -Pazure-integration-tests -pl hudi-azure verify} + */ +@Testcontainers(disabledWithoutDocker = true) +@DisabledIfEnvironmentVariable(named = "SKIP_AZURITE_IT", matches = "true") +public class ITADLSGen2StorageLockClientAzurite { + + private static final DockerImageName AZURITE_IMAGE = + DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite"); + + // Standard Azurite defaults (documented by Microsoft) + private static final String ACCOUNT_NAME = "devstoreaccount1"; + // Standard Azurite dev account key (NOT a secret; used by the emulator by default) + // See: https://learn.microsoft.com/azure/storage/common/storage-use-azurite?tabs=visual-studio#connection-strings + private static final String ACCOUNT_KEY = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + + @Container + public static final GenericContainer AZURITE = + new GenericContainer<>(AZURITE_IMAGE) + .withExposedPorts(10000) + .withCommand("azurite-blob", "--blobHost", "0.0.0.0", "--blobPort", "10000", "--loose") + .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofSeconds(60))); + + private static String blobEndpoint() { + // Azurite expects / in the endpoint URL path + return "http://" + AZURITE.getHost() + ":" + AZURITE.getMappedPort(10000) + "/" + ACCOUNT_NAME; + } + + private static String connectionString() { + String key = System.getenv("AZURITE_ACCOUNT_KEY"); + if (key == null || key.trim().isEmpty()) { + key = ACCOUNT_KEY; + } + return "DefaultEndpointsProtocol=http;" + + "AccountName=" + ACCOUNT_NAME + ";" + + "AccountKey=" + key + ";" + + "BlobEndpoint=" + blobEndpoint() + ";"; + } + + @Test + void testCreateUpdateAndReadLockFileWithAzurite() { + String container = "container"; + String blobPath = "locks/table_lock.json"; + + BlobServiceClient svc = new BlobServiceClientBuilder() + .connectionString(connectionString()) + .buildClient(); + BlobContainerClient containerClient = svc.getBlobContainerClient(container); + containerClient.createIfNotExists(); + + // NOTE: lockFileUri only needs to be parseable for container/blobPath extraction. + // The actual endpoint comes from the connection string. + String lockFileUri = "https://localhost:10000/" + container + "/" + blobPath; + Properties props = new Properties(); + props.setProperty(ADLSGen2StorageLockClient.AZURE_CONNECTION_STRING, connectionString()); + + ADLSGen2StorageLockClient owner1 = new ADLSGen2StorageLockClient("owner1", lockFileUri, props); + StorageLockData lockData1 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner1"); + Pair> upsert1 = owner1.tryUpsertLockFile(lockData1, Option.empty()); + assertEquals(LockUpsertResult.SUCCESS, upsert1.getLeft()); + assertTrue(upsert1.getRight().isPresent()); + + // Read back + Pair> read = owner1.readCurrentLockFile(); + assertEquals(LockGetResult.SUCCESS, read.getLeft()); + assertTrue(read.getRight().isPresent()); + + // Wrong ETag should fail with precondition and be mapped to ACQUIRED_BY_OTHERS + ADLSGen2StorageLockClient owner2 = new ADLSGen2StorageLockClient("owner2", lockFileUri, props); + StorageLockFile wrongPrev = new StorageLockFile(lockData1, "\"etag-does-not-match\""); + StorageLockData lockData2 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner2"); + Pair> upsert2 = + owner2.tryUpsertLockFile(lockData2, Option.of(wrongPrev)); + assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, upsert2.getLeft()); + } +} + diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java new file mode 100644 index 0000000000000..5481d45b2deb6 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; +import org.apache.hudi.client.transaction.lock.StorageBasedLockProviderTestBase; +import org.apache.hudi.common.config.LockConfiguration; +import org.apache.hudi.common.testutils.HoodieTestUtils; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; + +import static org.apache.hudi.common.config.HoodieCommonConfig.BASE_PATH; + +@Disabled("Requires Azurite/Testcontainers-based integration environment (not enabled by default).") +public class TestADLSGen2StorageBasedLockProvider extends StorageBasedLockProviderTestBase { + + @BeforeEach + void setupLockProvider() { + providerProperties.put(BASE_PATH.key(), + "abfs://container@account.dfs.core.windows.net/lake/db/tbl-default"); + lockProvider = createLockProvider(); + } + + @Override + protected StorageBasedLockProvider createLockProvider() { + LockConfiguration lockConf = new LockConfiguration(providerProperties); + return new StorageBasedLockProvider(lockConf, HoodieTestUtils.getDefaultStorageConf()); + } +} + diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java new file mode 100644 index 0000000000000..222be00a53603 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.Pair; + +import com.azure.core.util.Context; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.models.BlobProperties; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.options.BlobParallelUploadOptions; +import com.azure.storage.blob.specialized.BlobInputStream; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.slf4j.Logger; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.contains; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +public class TestADLSGen2StorageLockClient { + + private static final String OWNER_ID = "ownerId"; + private static final String LOCK_FILE_URI = + "abfs://container@account.dfs.core.windows.net/lockFilePath"; + private static final String LOCK_FILE_URI_WITH_NESTED_PATH = + "abfs://container@account.dfs.core.windows.net/lake/db/tbl-default/.hoodie/.locks/table_lock.json"; + + @Mock + private BlobServiceClient mockBlobServiceClient; + + @Mock + private BlobContainerClient mockContainerClient; + + @Mock + private BlobClient mockBlobClient; + + @Mock + private BlobProperties mockBlobProperties; + + @Mock + private Logger mockLogger; + + private ADLSGen2StorageLockClient lockClient; + + @BeforeEach + void setUp() { + setUp(LOCK_FILE_URI); + } + + private void setUp(String lockFileUri) { + when(mockBlobServiceClient.getBlobContainerClient(eq("container"))).thenReturn(mockContainerClient); + String expectedBlobPath = lockFileUri.replaceFirst("^abfss?://[^/]+/", ""); + when(mockContainerClient.getBlobClient(eq(expectedBlobPath))).thenReturn(mockBlobClient); + + lockClient = new ADLSGen2StorageLockClient( + OWNER_ID, + lockFileUri, + new Properties(), + (location) -> mockBlobServiceClient, + mockLogger); + } + + @Test + void testTryUpsertLockFile_noPreviousLock_success_setsIfNoneMatchStar() throws Exception { + StorageLockData lockData = new StorageLockData(false, 123L, "test-owner"); + when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); + when(mockBlobProperties.getETag()).thenReturn("\"etag-1\""); + + ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(BlobParallelUploadOptions.class); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) + .thenReturn(null); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"etag-1\"", result.getRight().get().getVersionId()); + + verify(mockBlobClient).uploadWithResponse(optionsCaptor.capture(), isNull(), eq(Context.NONE)); + BlobParallelUploadOptions options = optionsCaptor.getValue(); + assertRequestCondition(options, "ifNoneMatch", "*"); + verifyNoMoreInteractions(mockLogger); + } + + @Test + void testTryUpsertLockFile_withPreviousLock_success_setsIfMatch() throws Exception { + StorageLockData lockData = new StorageLockData(false, 999L, "existing-owner"); + StorageLockFile previousLockFile = new StorageLockFile(lockData, "\"etag-prev\""); + + when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); + when(mockBlobProperties.getETag()).thenReturn("\"etag-new\""); + + ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(BlobParallelUploadOptions.class); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) + .thenReturn(null); + + Pair> result = + lockClient.tryUpsertLockFile(lockData, Option.of(previousLockFile)); + + assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"etag-new\"", result.getRight().get().getVersionId()); + + verify(mockBlobClient).uploadWithResponse(optionsCaptor.capture(), isNull(), eq(Context.NONE)); + BlobParallelUploadOptions options = optionsCaptor.getValue(); + assertRequestCondition(options, "ifMatch", "\"etag-prev\""); + } + + @Test + void testTryUpsertLockFile_preconditionFailed_returnsAcquiredByOthers() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(412); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).info( + contains("Unable to write new lock file. Another process has modified this lockfile"), + eq(OWNER_ID), + eq(LOCK_FILE_URI)); + } + + @Test + void testTryUpsertLockFile_rateLimit_returnsUnknownError() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(429); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).warn(contains("Rate limit exceeded"), eq(OWNER_ID), eq(LOCK_FILE_URI)); + } + + @Test + void testTryUpsertLockFile_serverError_returnsUnknownError() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(503); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).warn(contains("Azure returned internal server error code"), eq(OWNER_ID), eq(LOCK_FILE_URI), eq(ex)); + } + + @Test + void testTryUpsertLockFile_unexpectedError_rethrows() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(400); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + assertThrows(BlobStorageException.class, () -> lockClient.tryUpsertLockFile(lockData, Option.empty())); + } + + @Test + void testReadCurrentLockFile_notFound_returnsNotExists() { + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(404); + when(mockBlobClient.getProperties()).thenThrow(ex); + + Pair> result = lockClient.readCurrentLockFile(); + assertEquals(LockGetResult.NOT_EXISTS, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).info(contains("Object not found"), eq(OWNER_ID), eq(LOCK_FILE_URI)); + } + + @Test + void testReadCurrentLockFile_blobFound_success() { + setUp(LOCK_FILE_URI_WITH_NESTED_PATH); + + StorageLockData data = new StorageLockData(false, 1700000000000L, "testOwner"); + byte[] json = StorageLockFile.toByteArray(data); + ByteArrayInputStream delegate = new ByteArrayInputStream(json); + BlobInputStream stream = mock(BlobInputStream.class); + try { + doAnswer(inv -> delegate.read(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2))) + .when(stream).read(any(byte[].class), anyInt(), anyInt()); + } catch (IOException ioe) { + throw new RuntimeException("Failed to stub BlobInputStream", ioe); + } + + when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); + when(mockBlobProperties.getETag()).thenReturn("\"etag-123\""); + when(mockBlobClient.openInputStream()).thenReturn(stream); + + Pair> result = lockClient.readCurrentLockFile(); + + assertEquals(LockGetResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"etag-123\"", result.getRight().get().getVersionId()); + assertEquals("testOwner", result.getRight().get().getOwner()); + } + + @Test + void testReadCurrentLockFile_streamRead404_returnsUnknownError() { + when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); + when(mockBlobProperties.getETag()).thenReturn("\"etag-123\""); + + BlobStorageException ex404 = mock(BlobStorageException.class); + when(ex404.getStatusCode()).thenReturn(404); + when(mockBlobClient.openInputStream()).thenThrow(ex404); + + Pair> result = lockClient.readCurrentLockFile(); + assertEquals(LockGetResult.UNKNOWN_ERROR, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + } + + @Test + void testClose_noop() { + lockClient.close(); + } + + private static void assertRequestCondition(Object blobParallelUploadOptions, String expectedField, String expectedValue) throws Exception { + Object requestConditions = tryInvoke(blobParallelUploadOptions, "getRequestConditions"); + if (requestConditions == null) { + Field f = blobParallelUploadOptions.getClass().getDeclaredField("requestConditions"); + f.setAccessible(true); + requestConditions = f.get(blobParallelUploadOptions); + } + assertNotNull(requestConditions, "requestConditions should be set on upload options"); + + Object actualIfMatch = tryInvoke(requestConditions, "getIfMatch"); + if (actualIfMatch == null) { + actualIfMatch = getFieldIfExists(requestConditions, "ifMatch"); + } + Object actualIfNoneMatch = tryInvoke(requestConditions, "getIfNoneMatch"); + if (actualIfNoneMatch == null) { + actualIfNoneMatch = getFieldIfExists(requestConditions, "ifNoneMatch"); + } + + if ("ifMatch".equals(expectedField)) { + assertEquals(expectedValue, actualIfMatch, "Expected If-Match to be set"); + } else if ("ifNoneMatch".equals(expectedField)) { + assertEquals(expectedValue, actualIfNoneMatch, "Expected If-None-Match to be set"); + } else { + throw new IllegalArgumentException("Unexpected expectedField: " + expectedField); + } + } + + private static Object tryInvoke(Object target, String methodName) { + try { + Method m = target.getClass().getMethod(methodName); + return m.invoke(target); + } catch (Exception ignored) { + return null; + } + } + + private static Object getFieldIfExists(Object target, String fieldName) { + try { + Field f = target.getClass().getDeclaredField(fieldName); + f.setAccessible(true); + return f.get(target); + } catch (Exception ignored) { + return null; + } + } +} + diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java new file mode 100644 index 0000000000000..36867ae17e278 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Method; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestADLSGen2StorageLockClientUriParsing { + + @Test + public void testParseAbfsUri() throws Exception { + String uri = "abfs://container@account.dfs.core.windows.net/table/.hoodie/.locks/table_lock.json"; + + Method m = ADLSGen2StorageLockClient.class.getDeclaredMethod("parseAzureLocation", String.class); + m.setAccessible(true); + Object location = m.invoke(null, uri); + + ADLSGen2StorageLockClient.AzureLocation l = (ADLSGen2StorageLockClient.AzureLocation) location; + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } +} + diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java index ab9994aa36b5c..42a45bba31a47 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java @@ -41,7 +41,9 @@ public void testStorageSchemes() { for (StorageSchemes scheme : StorageSchemes.values()) { String schemeName = scheme.getScheme(); - if (scheme.getScheme().startsWith("s3") || scheme.getScheme().startsWith("gs")) { + if (scheme.getScheme().startsWith("s3") + || scheme.getScheme().startsWith("gs") + || scheme.getScheme().startsWith("abfs")) { assertTrue(StorageSchemes.getStorageLockImplementationIfExists(schemeName).isPresent()); } else { assertFalse(StorageSchemes.getStorageLockImplementationIfExists(schemeName).isPresent()); diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java index 77ecb536cf3ea..256ba3b9039c7 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java @@ -53,8 +53,8 @@ public enum StorageSchemes { // Azure ADLS ADL("adl", null, null, null), // Azure ADLS Gen2 - ABFS("abfs", null, null, null), - ABFSS("abfss", null, null, null), + ABFS("abfs", true, null, "org.apache.hudi.azure.transaction.lock.ADLSGen2StorageLockClient"), + ABFSS("abfss", true, null, "org.apache.hudi.azure.transaction.lock.ADLSGen2StorageLockClient"), // Aliyun OSS OSS("oss", null, null, null), // View FS for federated setups. If federating across cloud stores, then append From 318978795585cc0c8a52b5b367cb7fffd81ee949 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Tue, 20 Jan 2026 10:12:51 -0500 Subject: [PATCH 02/11] ADLS support for storage lock --- ...Client.java => ADLSStorageLockClient.java} | 53 ++++-- ...va => ITADLSStorageLockClientAzurite.java} | 11 +- ...stADLSGen2StorageLockClientUriParsing.java | 44 ----- ... => TestADLSStorageBasedLockProvider.java} | 3 +- ...nt.java => TestADLSStorageLockClient.java} | 7 +- .../TestADLSStorageLockClientUriParsing.java | 81 ++++++++ .../apache/hudi/storage/StorageSchemes.java | 10 +- packaging/hudi-azure-bundle/pom.xml | 179 ++++++++++++++++++ 8 files changed, 314 insertions(+), 74 deletions(-) rename hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/{ADLSGen2StorageLockClient.java => ADLSStorageLockClient.java} (87%) rename hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/{ITADLSGen2StorageLockClientAzurite.java => ITADLSStorageLockClientAzurite.java} (92%) delete mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java rename hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/{TestADLSGen2StorageBasedLockProvider.java => TestADLSStorageBasedLockProvider.java} (95%) rename hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/{TestADLSGen2StorageLockClient.java => TestADLSStorageLockClient.java} (98%) create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java create mode 100644 packaging/hudi-azure-bundle/pom.xml diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java similarity index 87% rename from hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java rename to hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java index 4740d142a94ec..353b0402da95b 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSGen2StorageLockClient.java +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java @@ -57,16 +57,26 @@ import static java.nio.charset.StandardCharsets.UTF_8; /** - * ADLS Gen2 (ABFS/ABFSS) implementation of {@link StorageLockClient} using Azure Blob conditional requests. + * Azure Data Lake Storage (ADLS) implementation of {@link StorageLockClient} using Azure Blob conditional requests. * - *

Lock semantics match S3/GCS storage lock clients: - * - Create: conditional write with If-None-Match: * - * - Update/Renew/Expire: conditional write with If-Match: <etag> + *

Supports the following URI schemes: + *

    + *
  • ADLS Gen2: {@code abfs://} and {@code abfss://}
  • + *
  • Azure Blob Storage: {@code wasb://} and {@code wasbs://}
  • + *
+ * + * + *
    + *
  • Create: conditional write with If-None-Match: *
  • + *
  • Update/Renew/Expire: conditional write with If-Match: <etag>
  • + *
* - *

Expected lock URI format: + *

Expected lock URI formats: *

    *
  • {@code abfs://<container>@<account>.dfs.core.windows.net/<path>}
  • *
  • {@code abfss://<container>@<account>.dfs.core.windows.net/<path>}
  • + *
  • {@code wasb://<container>@<account>.blob.core.windows.net/<path>}
  • + *
  • {@code wasbs://<container>@<account>.blob.core.windows.net/<path>}
  • *
* *

Authentication precedence (via {@link Properties}): @@ -78,7 +88,7 @@ */ @Slf4j @ThreadSafe -public class ADLSGen2StorageLockClient implements StorageLockClient { +public class ADLSStorageLockClient implements StorageLockClient { private static final int PRECONDITION_FAILURE_ERROR_CODE = 412; private static final int NOT_FOUND_ERROR_CODE = 404; @@ -100,15 +110,15 @@ public class ADLSGen2StorageLockClient implements StorageLockClient { * Constructor used by reflection by {@link org.apache.hudi.client.transaction.lock.StorageBasedLockProvider}. * * @param ownerId lock owner id - * @param lockFileUri lock file URI (ABFS/ABFSS) + * @param lockFileUri lock file URI (abfs/abfss/wasb/wasbs) * @param props properties used to customize/authenticate the Azure client */ - public ADLSGen2StorageLockClient(String ownerId, String lockFileUri, Properties props) { + public ADLSStorageLockClient(String ownerId, String lockFileUri, Properties props) { this(ownerId, lockFileUri, props, createDefaultBlobServiceClient(), log); } @VisibleForTesting - ADLSGen2StorageLockClient( + ADLSStorageLockClient( String ownerId, String lockFileUri, Properties props, @@ -286,7 +296,8 @@ public void close() { // BlobServiceClient does not require explicit close. No-op. } - private static AzureLocation parseAzureLocation(String uriString) { + @VisibleForTesting + static AzureLocation parseAzureLocation(String uriString) { try { URI uri = new URI(uriString); String scheme = uri.getScheme(); @@ -297,8 +308,8 @@ private static AzureLocation parseAzureLocation(String uriString) { String authority = uri.getAuthority(); String path = uri.getPath() == null ? "" : uri.getPath().replaceFirst("/", ""); + // ADLS Gen2: abfs[s]://@.dfs.core.windows.net/ if ("abfs".equalsIgnoreCase(scheme) || "abfss".equalsIgnoreCase(scheme)) { - // abfs[s]://@.dfs.core.windows.net/ if (authority == null || !authority.contains("@")) { throw new IllegalArgumentException("ABFS URI authority must be in the form '@': " + uriString); } @@ -313,8 +324,23 @@ private static AzureLocation parseAzureLocation(String uriString) { return new AzureLocation(endpoint, container, path, null); } + // Azure Blob Storage: wasb[s]://@.blob.core.windows.net/ + if ("wasb".equalsIgnoreCase(scheme) || "wasbs".equalsIgnoreCase(scheme)) { + if (authority == null || !authority.contains("@")) { + throw new IllegalArgumentException("WASB URI authority must be in the form '@': " + uriString); + } + String[] parts = authority.split("@", 2); + String container = parts[0]; + String host = parts[1]; + String endpoint = "https://" + host; + if (container.isEmpty() || path.isEmpty()) { + throw new IllegalArgumentException("WASB URI must contain container and path: " + uriString); + } + return new AzureLocation(endpoint, container, path, null); + } + + // Direct HTTPS: https://.blob.core.windows.net// if ("https".equalsIgnoreCase(scheme)) { - // https://.blob.core.windows.net// if (authority == null || authority.isEmpty()) { throw new IllegalArgumentException("HTTPS URI authority missing: " + uriString); } @@ -327,7 +353,8 @@ private static AzureLocation parseAzureLocation(String uriString) { return new AzureLocation("https://" + authority, container, blobPath, null); } - throw new IllegalArgumentException("Unsupported scheme for Azure storage lock: " + scheme); + throw new IllegalArgumentException("Unsupported scheme for Azure storage lock: " + scheme + + ". Supported schemes: abfs, abfss, wasb, wasbs, https"); } catch (URISyntaxException e) { throw new HoodieLockException("Failed to parse Azure URI: " + uriString, e); } diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java similarity index 92% rename from hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java rename to hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java index 8cef55f5100a9..9ca862a0ec855 100644 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSGen2StorageLockClientAzurite.java +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java @@ -44,13 +44,13 @@ import static org.junit.jupiter.api.Assertions.assertTrue; /** - * Integration tests for {@link ADLSGen2StorageLockClient} using Azurite (Azure Storage emulator). + * Integration tests for {@link ADLSStorageLockClient} using Azurite (Azure Storage emulator). * *

Run with: {@code mvn -Pazure-integration-tests -pl hudi-azure verify} */ @Testcontainers(disabledWithoutDocker = true) @DisabledIfEnvironmentVariable(named = "SKIP_AZURITE_IT", matches = "true") -public class ITADLSGen2StorageLockClientAzurite { +public class ITADLSStorageLockClientAzurite { private static final DockerImageName AZURITE_IMAGE = DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite"); @@ -100,9 +100,9 @@ void testCreateUpdateAndReadLockFileWithAzurite() { // The actual endpoint comes from the connection string. String lockFileUri = "https://localhost:10000/" + container + "/" + blobPath; Properties props = new Properties(); - props.setProperty(ADLSGen2StorageLockClient.AZURE_CONNECTION_STRING, connectionString()); + props.setProperty(ADLSStorageLockClient.AZURE_CONNECTION_STRING, connectionString()); - ADLSGen2StorageLockClient owner1 = new ADLSGen2StorageLockClient("owner1", lockFileUri, props); + ADLSStorageLockClient owner1 = new ADLSStorageLockClient("owner1", lockFileUri, props); StorageLockData lockData1 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner1"); Pair> upsert1 = owner1.tryUpsertLockFile(lockData1, Option.empty()); assertEquals(LockUpsertResult.SUCCESS, upsert1.getLeft()); @@ -114,7 +114,7 @@ void testCreateUpdateAndReadLockFileWithAzurite() { assertTrue(read.getRight().isPresent()); // Wrong ETag should fail with precondition and be mapped to ACQUIRED_BY_OTHERS - ADLSGen2StorageLockClient owner2 = new ADLSGen2StorageLockClient("owner2", lockFileUri, props); + ADLSStorageLockClient owner2 = new ADLSStorageLockClient("owner2", lockFileUri, props); StorageLockFile wrongPrev = new StorageLockFile(lockData1, "\"etag-does-not-match\""); StorageLockData lockData2 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner2"); Pair> upsert2 = @@ -122,4 +122,3 @@ void testCreateUpdateAndReadLockFileWithAzurite() { assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, upsert2.getLeft()); } } - diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java deleted file mode 100644 index 36867ae17e278..0000000000000 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClientUriParsing.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.azure.transaction.lock; - -import org.junit.jupiter.api.Test; - -import java.lang.reflect.Method; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class TestADLSGen2StorageLockClientUriParsing { - - @Test - public void testParseAbfsUri() throws Exception { - String uri = "abfs://container@account.dfs.core.windows.net/table/.hoodie/.locks/table_lock.json"; - - Method m = ADLSGen2StorageLockClient.class.getDeclaredMethod("parseAzureLocation", String.class); - m.setAccessible(true); - Object location = m.invoke(null, uri); - - ADLSGen2StorageLockClient.AzureLocation l = (ADLSGen2StorageLockClient.AzureLocation) location; - - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); - } -} - diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java similarity index 95% rename from hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java rename to hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java index 5481d45b2deb6..4c9ea9b024999 100644 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageBasedLockProvider.java +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java @@ -30,7 +30,7 @@ import static org.apache.hudi.common.config.HoodieCommonConfig.BASE_PATH; @Disabled("Requires Azurite/Testcontainers-based integration environment (not enabled by default).") -public class TestADLSGen2StorageBasedLockProvider extends StorageBasedLockProviderTestBase { +public class TestADLSStorageBasedLockProvider extends StorageBasedLockProviderTestBase { @BeforeEach void setupLockProvider() { @@ -45,4 +45,3 @@ protected StorageBasedLockProvider createLockProvider() { return new StorageBasedLockProvider(lockConf, HoodieTestUtils.getDefaultStorageConf()); } } - diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java similarity index 98% rename from hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java rename to hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java index 222be00a53603..3b04224602821 100644 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSGen2StorageLockClient.java +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java @@ -64,7 +64,7 @@ import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) -public class TestADLSGen2StorageLockClient { +public class TestADLSStorageLockClient { private static final String OWNER_ID = "ownerId"; private static final String LOCK_FILE_URI = @@ -87,7 +87,7 @@ public class TestADLSGen2StorageLockClient { @Mock private Logger mockLogger; - private ADLSGen2StorageLockClient lockClient; + private ADLSStorageLockClient lockClient; @BeforeEach void setUp() { @@ -99,7 +99,7 @@ private void setUp(String lockFileUri) { String expectedBlobPath = lockFileUri.replaceFirst("^abfss?://[^/]+/", ""); when(mockContainerClient.getBlobClient(eq(expectedBlobPath))).thenReturn(mockBlobClient); - lockClient = new ADLSGen2StorageLockClient( + lockClient = new ADLSStorageLockClient( OWNER_ID, lockFileUri, new Properties(), @@ -312,4 +312,3 @@ private static Object getFieldIfExists(Object target, String fieldName) { } } } - diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java new file mode 100644 index 0000000000000..9504b3cc6d8b5 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestADLSStorageLockClientUriParsing { + + @Test + public void testParseAbfsUri() { + String uri = "abfs://container@account.dfs.core.windows.net/table/.hoodie/.locks/table_lock.json"; + + ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseAbfssUri() { + String uri = "abfss://container@account.dfs.core.windows.net/lake/db/tbl/.hoodie/.locks/table_lock.json"; + + ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseWasbUri() { + String uri = "wasb://container@account.blob.core.windows.net/table/.hoodie/.locks/table_lock.json"; + + ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseWasbsUri() { + String uri = "wasbs://container@account.blob.core.windows.net/lake/db/tbl/.hoodie/.locks/table_lock.json"; + + ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseHttpsUri() { + String uri = "https://account.blob.core.windows.net/container/table/.hoodie/.locks/table_lock.json"; + + ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java index 256ba3b9039c7..0450dbfa6db4b 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java @@ -47,14 +47,14 @@ public enum StorageSchemes { S3("s3", true, null, "org.apache.hudi.aws.transaction.lock.S3StorageLockClient"), // Google Cloud Storage GCS("gs", true, null, "org.apache.hudi.gcp.transaction.lock.GCSStorageLockClient"), - // Azure WASB - WASB("wasb", null, null, null), - WASBS("wasbs", null, null, null), + // Azure WASB (Azure Blob Storage) + WASB("wasb", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), + WASBS("wasbs", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), // Azure ADLS ADL("adl", null, null, null), // Azure ADLS Gen2 - ABFS("abfs", true, null, "org.apache.hudi.azure.transaction.lock.ADLSGen2StorageLockClient"), - ABFSS("abfss", true, null, "org.apache.hudi.azure.transaction.lock.ADLSGen2StorageLockClient"), + ABFS("abfs", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), + ABFSS("abfss", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), // Aliyun OSS OSS("oss", null, null, null), // View FS for federated setups. If federating across cloud stores, then append diff --git a/packaging/hudi-azure-bundle/pom.xml b/packaging/hudi-azure-bundle/pom.xml new file mode 100644 index 0000000000000..479a4bb6c93c0 --- /dev/null +++ b/packaging/hudi-azure-bundle/pom.xml @@ -0,0 +1,179 @@ + + + + + hudi + org.apache.hudi + 1.2.0-SNAPSHOT + ../../pom.xml + + 4.0.0 + + hudi-azure-bundle + jar + + + true + ${project.parent.basedir} + true + + + + + + org.apache.rat + apache-rat-plugin + + + org.apache.maven.plugins + maven-shade-plugin + ${maven-shade-plugin.version} + + + package + + shade + + + ${shadeSources} + ${project.build.directory}/dependency-reduced-pom.xml + + + + + true + + + META-INF/LICENSE + target/classes/META-INF/LICENSE + + + + + + + org.apache.hudi:hudi-hadoop-mr + org.apache.hudi:hudi-sync-common + org.apache.hudi:hudi-hive-sync + org.apache.hudi:hudi-azure + + + com.azure:* + com.microsoft.azure:* + io.projectreactor:* + io.netty:* + + + io.dropwizard.metrics:metrics-core + com.beust:jcommander + commons-io:commons-io + org.openjdk.jol:jol-core + org.apache.parquet:parquet-avro + + + + + com.beust.jcommander. + org.apache.hudi.com.beust.jcommander. + + + org.apache.commons.io. + org.apache.hudi.org.apache.commons.io. + + + com.codahale.metrics. + org.apache.hudi.com.codahale.metrics. + + + org.openjdk.jol. + org.apache.hudi.org.openjdk.jol. + + + false + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + META-INF/services/javax.* + **/*.proto + + + + ${project.artifactId}-${project.version} + + + + + + + + src/main/resources + + + + + + + + org.apache.hudi + hudi-common + ${project.version} + + + + org.apache.hadoop + * + + + com.fasterxml.jackson.module + jackson-module-afterburner + + + + + org.apache.hudi + hudi-hive-sync + ${project.version} + + + javax.servlet + servlet-api + + + + + org.apache.hudi + hudi-azure + ${project.version} + + + + + org.apache.parquet + parquet-avro + ${parquet.version} + compile + + + + From 43b35a0a65113c81463a237015cb99a18515d352 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Tue, 20 Jan 2026 10:15:04 -0500 Subject: [PATCH 03/11] Test schema add --- .../java/org/apache/hudi/common/fs/TestStorageSchemes.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java index 42a45bba31a47..faa7f52e12d68 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/common/fs/TestStorageSchemes.java @@ -43,7 +43,8 @@ public void testStorageSchemes() { String schemeName = scheme.getScheme(); if (scheme.getScheme().startsWith("s3") || scheme.getScheme().startsWith("gs") - || scheme.getScheme().startsWith("abfs")) { + || scheme.getScheme().startsWith("abfs") + || scheme.getScheme().startsWith("wasb")) { assertTrue(StorageSchemes.getStorageLockImplementationIfExists(schemeName).isPresent()); } else { assertFalse(StorageSchemes.getStorageLockImplementationIfExists(schemeName).isPresent()); From 24d84063a6e9887312aedb0d1372d28c243a20f5 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Wed, 11 Feb 2026 11:14:37 -0500 Subject: [PATCH 04/11] Merge changes in two branches --- .../lock/ADLSStorageLockClient.java | 84 +++++++++++++++---- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java index 353b0402da95b..49a6febd5058f 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java @@ -28,18 +28,26 @@ import org.apache.hudi.common.util.VisibleForTesting; import org.apache.hudi.common.util.collection.Pair; import org.apache.hudi.exception.HoodieLockException; +import org.apache.hudi.config.StorageBasedLockConfig; +import com.azure.core.exception.HttpResponseException; +import com.azure.core.http.policy.ExponentialBackoffOptions; +import com.azure.core.http.policy.RetryOptions; +import com.azure.core.http.rest.Response; import com.azure.core.credential.AzureSasCredential; import com.azure.core.util.BinaryData; import com.azure.core.util.Context; +import com.azure.core.util.HttpClientOptions; import com.azure.identity.DefaultAzureCredentialBuilder; import com.azure.storage.blob.BlobClient; import com.azure.storage.blob.BlobContainerClient; import com.azure.storage.blob.BlobServiceClient; import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.models.BlobErrorCode; import com.azure.storage.blob.models.BlobProperties; import com.azure.storage.blob.models.BlobRequestConditions; import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.BlockBlobItem; import com.azure.storage.blob.options.BlobParallelUploadOptions; import lombok.extern.slf4j.Slf4j; import org.slf4j.Logger; @@ -52,6 +60,7 @@ import java.io.UncheckedIOException; import java.net.URI; import java.net.URISyntaxException; +import java.time.Duration; import java.util.Properties; import static java.nio.charset.StandardCharsets.UTF_8; @@ -139,6 +148,7 @@ private static Functions.Function1 createDefau return (location) -> { Properties props = location.props; BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + configureAzureClientOptions(builder, props); String connectionString = props == null ? null : props.getProperty(AZURE_CONNECTION_STRING); if (connectionString != null && !connectionString.trim().isEmpty()) { @@ -156,6 +166,37 @@ private static Functions.Function1 createDefau }; } + private static void configureAzureClientOptions(BlobServiceClientBuilder builder, Properties props) { + // Set Azure SDK timeouts based on lock validity to avoid long-hanging calls. + long validityTimeoutSecs = getLongProperty(props, StorageBasedLockConfig.VALIDITY_TIMEOUT_SECONDS); + long azureCallTimeoutSecs = Math.max(1, validityTimeoutSecs / 5); + + // Disable automatic SDK retries; Hudi manages retries at the lock-provider level. + ExponentialBackoffOptions exponentialOptions = new ExponentialBackoffOptions().setMaxRetries(0); + RetryOptions retryOptions = new RetryOptions(exponentialOptions); + + HttpClientOptions clientOptions = new HttpClientOptions() + .setResponseTimeout(Duration.ofSeconds(azureCallTimeoutSecs)) + .setReadTimeout(Duration.ofSeconds(azureCallTimeoutSecs)); + + builder.retryOptions(retryOptions).clientOptions(clientOptions); + } + + private static long getLongProperty(Properties props, org.apache.hudi.common.config.ConfigProperty prop) { + if (props == null) { + return prop.defaultValue(); + } + Object value = props.get(prop.key()); + if (value == null) { + return prop.defaultValue(); + } + try { + return Long.parseLong(String.valueOf(value)); + } catch (NumberFormatException e) { + return prop.defaultValue(); + } + } + @Override public Pair> tryUpsertLockFile( StorageLockData newLockData, @@ -165,17 +206,12 @@ public Pair> tryUpsertLockFile( StorageLockFile updated = createOrUpdateLockFileInternal(newLockData, expectedEtag); return Pair.of(LockUpsertResult.SUCCESS, Option.of(updated)); } catch (BlobStorageException e) { - int code = e.getStatusCode(); - if (code == PRECONDITION_FAILURE_ERROR_CODE || code == CONFLICT_ERROR_CODE) { - logger.info("OwnerId: {}, Unable to write new lock file. Another process has modified this lockfile {} already.", - ownerId, lockFileUri); - return Pair.of(LockUpsertResult.ACQUIRED_BY_OTHERS, Option.empty()); - } else if (code == RATE_LIMIT_ERROR_CODE) { - logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); - } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { - logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", - ownerId, lockFileUri, e); - } else { + return Pair.of(handleUpsertBlobStorageException(e), Option.empty()); + } catch (HttpResponseException e) { + logger.error("OwnerId: {}, Unexpected Azure SDK error while writing lock file: {}", + ownerId, lockFileUri, e); + if (!previousLockFile.isPresent()) { + // For create, fail fast since this indicates a larger issue. throw e; } return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); @@ -203,7 +239,7 @@ public Pair> readCurrentLockFile() { private LockGetResult handleGetStorageException(BlobStorageException e, boolean ignore404) { int code = e.getStatusCode(); - if (code == NOT_FOUND_ERROR_CODE) { + if (code == NOT_FOUND_ERROR_CODE || e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) { if (ignore404) { logger.info("OwnerId: {}, Azure stream read failure detected: {}", ownerId, lockFileUri); } else { @@ -239,11 +275,31 @@ private StorageLockFile createOrUpdateLockFileInternal(StorageLockData lockData, BlobParallelUploadOptions options = new BlobParallelUploadOptions(BinaryData.fromBytes(bytes)) .setRequestConditions(conditions); - lockBlobClient.uploadWithResponse(options, null, Context.NONE); - String newEtag = lockBlobClient.getProperties().getETag(); + Response response = lockBlobClient.uploadWithResponse(options, null, Context.NONE); + String newEtag = response.getValue() != null ? response.getValue().getETag() : null; + if (newEtag == null) { + newEtag = lockBlobClient.getProperties().getETag(); + } return new StorageLockFile(lockData, newEtag); } + private LockUpsertResult handleUpsertBlobStorageException(BlobStorageException e) { + int code = e.getStatusCode(); + if (code == PRECONDITION_FAILURE_ERROR_CODE || e.getErrorCode() == BlobErrorCode.CONDITION_NOT_MET) { + logger.info("OwnerId: {}, Unable to write new lock file. Another process has modified this lockfile {} already.", + ownerId, lockFileUri); + return LockUpsertResult.ACQUIRED_BY_OTHERS; + } else if (code == CONFLICT_ERROR_CODE) { + logger.info("OwnerId: {}, Retriable conditional request conflict error: {}", ownerId, lockFileUri); + } else if (code == RATE_LIMIT_ERROR_CODE) { + logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); + } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { + logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", ownerId, lockFileUri, e); + } else { + logger.warn("OwnerId: {}, Error writing lock file: {}", ownerId, lockFileUri, e); + } + return LockUpsertResult.UNKNOWN_ERROR; + } @Override public Option readObject(String filePath, boolean checkExistsFirst) { try { From df7054b59932d78d931e443bc3ecbd1e4b9b83b4 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Tue, 24 Feb 2026 09:57:00 -0500 Subject: [PATCH 05/11] Address review comments. Relable ADLS to Azure Storage. Fix etag fetch calls and retries --- .../lock/AzureStorageLockClient.java | 430 ++++++++++++++++++ .../hudi/config/AzureStorageLockConfig.java | 52 +++ .../lock/ITAzureStorageLockClientAzurite.java | 124 +++++ .../TestAzureStorageBasedLockProvider.java | 47 ++ .../lock/TestAzureStorageLockClient.java | 338 ++++++++++++++ .../TestAzureStorageLockClientUriParsing.java | 92 ++++ .../apache/hudi/storage/StorageSchemes.java | 8 +- pom.xml | 6 +- 8 files changed, 1091 insertions(+), 6 deletions(-) create mode 100644 hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java create mode 100644 hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageBasedLockProvider.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java create mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java new file mode 100644 index 0000000000000..9f207d80cfb6b --- /dev/null +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java @@ -0,0 +1,430 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.StorageLockClient; +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.util.Functions; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.VisibleForTesting; +import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.exception.HoodieLockException; +import org.apache.hudi.config.AzureStorageLockConfig; +import org.apache.hudi.config.StorageBasedLockConfig; +import org.apache.hudi.common.config.TypedProperties; + +import com.azure.core.credential.AzureSasCredential; +import com.azure.core.exception.HttpResponseException; +import com.azure.core.http.policy.ExponentialBackoffOptions; +import com.azure.core.http.policy.RetryOptions; +import com.azure.core.http.rest.Response; +import com.azure.core.util.BinaryData; +import com.azure.core.util.Context; +import com.azure.core.util.HttpClientOptions; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import com.azure.storage.blob.BlobUrlParts; +import com.azure.storage.blob.models.BlobErrorCode; +import com.azure.storage.blob.models.BlobRequestConditions; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.BlockBlobItem; +import com.azure.storage.blob.options.BlobParallelUploadOptions; +import lombok.AllArgsConstructor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.ThreadSafe; + +import java.net.URI; +import java.net.URISyntaxException; +import java.time.Duration; +import java.util.Properties; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hudi.common.util.ConfigUtils.getLongWithAltKeys; + +/** + * Azure Storage implementation of {@link StorageLockClient} using Azure Blob conditional requests. + * + *

Supports the following URI schemes: + *

    + *
  • ADLS Gen2: {@code abfs://} and {@code abfss://}
  • + *
  • Azure Blob Storage: {@code wasb://} and {@code wasbs://}
  • + *
+ * + * + *
    + *
  • Create: conditional write with If-None-Match: *
  • + *
  • Update/Renew/Expire: conditional write with If-Match: <etag>
  • + *
+ * + *

Expected lock URI formats: + *

    + *
  • {@code abfs://<container>@<account>.dfs.core.windows.net/<path>}
  • + *
  • {@code abfss://<container>@<account>.dfs.core.windows.net/<path>}
  • + *
  • {@code wasb://<container>@<account>.blob.core.windows.net/<path>}
  • + *
  • {@code wasbs://<container>@<account>.blob.core.windows.net/<path>}
  • + *
+ * + *

Authentication precedence (via {@link Properties}): + *

    + *
  • {@link AzureStorageLockConfig#AZURE_CONNECTION_STRING}
  • + *
  • {@link AzureStorageLockConfig#AZURE_SAS_TOKEN}
  • + *
  • DefaultAzureCredential
  • + *
+ */ +@ThreadSafe +public class AzureStorageLockClient implements StorageLockClient { + + private static final int PRECONDITION_FAILURE_ERROR_CODE = 412; + private static final int NOT_FOUND_ERROR_CODE = 404; + private static final int CONFLICT_ERROR_CODE = 409; + private static final int RATE_LIMIT_ERROR_CODE = 429; + private static final int INTERNAL_SERVER_ERROR_CODE_MIN = 500; + + public static final String AZURE_CONNECTION_STRING = AzureStorageLockConfig.AZURE_CONNECTION_STRING.key(); + public static final String AZURE_SAS_TOKEN = AzureStorageLockConfig.AZURE_SAS_TOKEN.key(); + + private final Logger logger; + private final BlobServiceClient blobServiceClient; + private final Functions.Function1 blobServiceClientSupplier; + private final ConcurrentMap secondaryBlobServiceClients; + private final BlobClient lockBlobClient; + private final Properties clientProperties; + private final String ownerId; + private final String lockFileUri; + private final String lockBlobEndpoint; + + /** + * Constructor used by reflection by {@link org.apache.hudi.client.transaction.lock.StorageBasedLockProvider}. + * + * @param ownerId lock owner id + * @param lockFileUri lock file URI (abfs/abfss/wasb/wasbs) + * @param props properties used to customize/authenticate the Azure client + */ + public AzureStorageLockClient(String ownerId, String lockFileUri, Properties props) { + this(ownerId, lockFileUri, props, createDefaultBlobServiceClient(), LoggerFactory.getLogger(AzureStorageLockClient.class)); + } + + @VisibleForTesting + AzureStorageLockClient( + String ownerId, + String lockFileUri, + Properties props, + Functions.Function1 blobServiceClientSupplier, + Logger logger) { + this.ownerId = ownerId; + this.lockFileUri = lockFileUri; + this.logger = logger; + this.clientProperties = props; + this.blobServiceClientSupplier = blobServiceClientSupplier; + this.secondaryBlobServiceClients = new ConcurrentHashMap<>(); + + AzureLocation location = parseAzureLocation(lockFileUri).withProperties(props); + this.lockBlobEndpoint = location.blobEndpoint; + this.blobServiceClient = blobServiceClientSupplier.apply(location); + BlobContainerClient containerClient = blobServiceClient.getBlobContainerClient(location.container); + this.lockBlobClient = containerClient.getBlobClient(location.blobPath); + } + + private static Functions.Function1 createDefaultBlobServiceClient() { + return (location) -> { + Properties props = location.props; + BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); + configureAzureClientOptions(builder, props); + + String connectionString = props == null ? null : props.getProperty(AZURE_CONNECTION_STRING); + if (connectionString != null && !connectionString.trim().isEmpty()) { + return builder.connectionString(connectionString).buildClient(); + } + + builder.endpoint(location.blobEndpoint); + String sasToken = props == null ? null : props.getProperty(AZURE_SAS_TOKEN); + if (sasToken != null && !sasToken.trim().isEmpty()) { + String cleaned = sasToken.startsWith("?") ? sasToken.substring(1) : sasToken; + return builder.credential(new AzureSasCredential(cleaned)).buildClient(); + } + + return builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient(); + }; + } + + private static void configureAzureClientOptions(BlobServiceClientBuilder builder, Properties props) { + // Set Azure SDK timeouts based on lock validity to avoid long-hanging calls. + TypedProperties typedProps = new TypedProperties(); + if (props != null) { + typedProps.putAll(props); + } + long validityTimeoutSecs; + try { + validityTimeoutSecs = getLongWithAltKeys(typedProps, StorageBasedLockConfig.VALIDITY_TIMEOUT_SECONDS); + } catch (NumberFormatException e) { + validityTimeoutSecs = StorageBasedLockConfig.VALIDITY_TIMEOUT_SECONDS.defaultValue(); + } + long azureCallTimeoutSecs = Math.max(1, validityTimeoutSecs / 5); + + // Disable automatic SDK retries; Hudi manages retries at the lock-provider level. + ExponentialBackoffOptions exponentialOptions = new ExponentialBackoffOptions().setMaxRetries(0); + RetryOptions retryOptions = new RetryOptions(exponentialOptions); + + HttpClientOptions clientOptions = new HttpClientOptions() + .setResponseTimeout(Duration.ofSeconds(azureCallTimeoutSecs)) + .setReadTimeout(Duration.ofSeconds(azureCallTimeoutSecs)); + + builder.retryOptions(retryOptions).clientOptions(clientOptions); + } + + @Override + public Pair> tryUpsertLockFile( + StorageLockData newLockData, + Option previousLockFile) { + String expectedEtag = previousLockFile.isPresent() ? previousLockFile.get().getVersionId() : null; + try { + StorageLockFile updated = createOrUpdateLockFileInternal(newLockData, expectedEtag); + return Pair.of(LockUpsertResult.SUCCESS, Option.of(updated)); + } catch (BlobStorageException e) { + return Pair.of(handleUpsertBlobStorageException(e), Option.empty()); + } catch (HttpResponseException e) { + logger.error("OwnerId: {}, Unexpected Azure SDK error while writing lock file: {}", + ownerId, lockFileUri, e); + if (!previousLockFile.isPresent()) { + // For create, fail fast since this indicates a larger issue. + throw e; + } + return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); + } catch (Exception e) { + logger.error("OwnerId: {}, Unexpected error while writing lock file: {}", ownerId, lockFileUri, e); + return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); + } + } + + @Override + public Pair> readCurrentLockFile() { + try { + Response response = lockBlobClient.downloadContentWithResponse(null, null, null, Context.NONE); + String eTag = response.getHeaders() != null ? response.getHeaders().getValue("ETag") : null; + StorageLockFile lockFile = StorageLockFile.createFromStream(response.getValue().toStream(), eTag); + return Pair.of(LockGetResult.SUCCESS, Option.of(lockFile)); + } catch (BlobStorageException e) { + return Pair.of(handleGetStorageException(e), Option.empty()); + } + } + + private LockGetResult handleGetStorageException(BlobStorageException e) { + int code = e.getStatusCode(); + if (code == NOT_FOUND_ERROR_CODE || e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) { + logger.info("OwnerId: {}, Object not found in the path: {}", ownerId, lockFileUri); + return LockGetResult.NOT_EXISTS; + } else if (code == RATE_LIMIT_ERROR_CODE) { + logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); + } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { + logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", ownerId, lockFileUri, e); + } else { + throw e; + } + return LockGetResult.UNKNOWN_ERROR; + } + + private StorageLockFile createOrUpdateLockFileInternal(StorageLockData lockData, String expectedEtag) { + byte[] bytes = StorageLockFile.toByteArray(lockData); + BlobRequestConditions conditions = new BlobRequestConditions(); + if (expectedEtag == null) { + conditions.setIfNoneMatch("*"); + } else { + conditions.setIfMatch(expectedEtag); + } + + BlobParallelUploadOptions options = new BlobParallelUploadOptions(BinaryData.fromBytes(bytes)) + .setRequestConditions(conditions); + Response response = lockBlobClient.uploadWithResponse(options, null, Context.NONE); + String newEtag = response.getHeaders() != null ? response.getHeaders().getValue("ETag") : null; + if (newEtag == null && response.getValue() != null) { + newEtag = response.getValue().getETag(); + } + if (newEtag == null || newEtag.isEmpty()) { + throw new HoodieLockException("Missing ETag in Azure upload response for lock file: " + lockFileUri); + } + return new StorageLockFile(lockData, newEtag); + } + + private LockUpsertResult handleUpsertBlobStorageException(BlobStorageException e) { + int code = e.getStatusCode(); + if (code == PRECONDITION_FAILURE_ERROR_CODE || e.getErrorCode() == BlobErrorCode.CONDITION_NOT_MET) { + logger.info("OwnerId: {}, Unable to write new lock file. Another process has modified this lockfile {} already.", + ownerId, lockFileUri); + return LockUpsertResult.ACQUIRED_BY_OTHERS; + } else if (code == CONFLICT_ERROR_CODE) { + logger.info("OwnerId: {}, Retriable conditional request conflict error: {}", ownerId, lockFileUri); + } else if (code == RATE_LIMIT_ERROR_CODE) { + logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); + } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { + logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", ownerId, lockFileUri, e); + } else { + logger.warn("OwnerId: {}, Error writing lock file: {}", ownerId, lockFileUri, e); + } + return LockUpsertResult.UNKNOWN_ERROR; + } + + @Override + public Option readObject(String filePath, boolean checkExistsFirst) { + try { + AzureLocation location = parseAzureLocation(filePath); + BlobServiceClient svc = getBlobServiceClient(location); + BlobClient blobClient = svc.getBlobContainerClient(location.container).getBlobClient(location.blobPath); + + if (checkExistsFirst && !blobClient.exists()) { + logger.debug("JSON config file not found: {}", filePath); + return Option.empty(); + } + byte[] bytes = blobClient.downloadContent().toBytes(); + return Option.of(new String(bytes, UTF_8)); + } catch (BlobStorageException e) { + if (e.getStatusCode() == NOT_FOUND_ERROR_CODE) { + logger.debug("JSON config file not found: {}", filePath); + } else { + logger.warn("Error reading JSON config file: {}", filePath, e); + } + return Option.empty(); + } catch (Exception e) { + logger.warn("Error reading JSON config file: {}", filePath, e); + return Option.empty(); + } + } + + @Override + public boolean writeObject(String filePath, String content) { + try { + AzureLocation location = parseAzureLocation(filePath); + BlobServiceClient svc = getBlobServiceClient(location); + BlobClient blobClient = svc.getBlobContainerClient(location.container).getBlobClient(location.blobPath); + blobClient.upload(BinaryData.fromString(content), true); + logger.debug("Successfully wrote object to: {}", filePath); + return true; + } catch (Exception e) { + logger.error("Error writing object to: {}", filePath, e); + return false; + } + } + + private BlobServiceClient getBlobServiceClient(AzureLocation location) { + if (location.blobEndpoint.equals(lockBlobEndpoint)) { + return blobServiceClient; + } + return secondaryBlobServiceClients.computeIfAbsent( + location.blobEndpoint, + endpoint -> blobServiceClientSupplier.apply(location.withProperties(clientProperties))); + } + + @Override + public void close() { + // BlobServiceClient does not require explicit close. No-op. + } + + @VisibleForTesting + static AzureLocation parseAzureLocation(String uriString) { + try { + URI uri = new URI(uriString); + String scheme = uri.getScheme(); + if (scheme == null) { + throw new IllegalArgumentException("URI does not contain a valid scheme."); + } + + String authority = uri.getAuthority(); + String path = uri.getPath() == null ? "" : uri.getPath().replaceFirst("/", ""); + + // ADLS Gen2: abfs[s]://@.dfs.core.windows.net/ + if ("abfs".equalsIgnoreCase(scheme) || "abfss".equalsIgnoreCase(scheme)) { + if (authority == null || !authority.contains("@")) { + throw new IllegalArgumentException("ABFS URI authority must be in the form '@': " + uriString); + } + String[] parts = authority.split("@", 2); + String container = parts[0]; + String host = parts[1]; + String endpointHost = dfsHostToBlobHost(host); + String endpoint = "https://" + endpointHost; + if (container.isEmpty() || path.isEmpty()) { + throw new IllegalArgumentException("ABFS URI must contain container and path: " + uriString); + } + return new AzureLocation(endpoint, container, path, null); + } + + // Azure Blob Storage: wasb[s]://@.blob.core.windows.net/ + if ("wasb".equalsIgnoreCase(scheme) || "wasbs".equalsIgnoreCase(scheme)) { + if (authority == null || !authority.contains("@")) { + throw new IllegalArgumentException("WASB URI authority must be in the form '@': " + uriString); + } + String[] parts = authority.split("@", 2); + String container = parts[0]; + String host = parts[1]; + String endpoint = "https://" + host; + if (container.isEmpty() || path.isEmpty()) { + throw new IllegalArgumentException("WASB URI must contain container and path: " + uriString); + } + return new AzureLocation(endpoint, container, path, null); + } + + // Direct HTTP(S) blob URL. + if ("https".equalsIgnoreCase(scheme) || "http".equalsIgnoreCase(scheme)) { + BlobUrlParts parts = BlobUrlParts.parse(uriString); + String container = parts.getBlobContainerName(); + String blobPath = parts.getBlobName(); + if (container == null || container.isEmpty() || blobPath == null || blobPath.isEmpty()) { + throw new IllegalArgumentException("HTTP(S) URI must contain container and path: " + uriString); + } + return new AzureLocation(parts.getScheme() + "://" + parts.getHost(), container, blobPath, null); + } + + throw new IllegalArgumentException("Unsupported scheme for Azure storage lock: " + scheme + + ". Supported schemes: abfs, abfss, wasb, wasbs, https, http"); + } catch (URISyntaxException e) { + throw new HoodieLockException("Failed to parse Azure URI: " + uriString, e); + } + } + + private static String dfsHostToBlobHost(String host) { + if (host == null) { + return null; + } + if (host.endsWith(".dfs.core.windows.net")) { + return host.replace(".dfs.core.windows.net", ".blob.core.windows.net"); + } + return host; + } + + @VisibleForTesting + @AllArgsConstructor + static final class AzureLocation { + final String blobEndpoint; + final String container; + final String blobPath; + final Properties props; + + AzureLocation withProperties(Properties props) { + return new AzureLocation(blobEndpoint, container, blobPath, props); + } + } +} diff --git a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java new file mode 100644 index 0000000000000..11bf3074f3190 --- /dev/null +++ b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.config; + +import org.apache.hudi.common.config.ConfigClassProperty; +import org.apache.hudi.common.config.ConfigGroups; +import org.apache.hudi.common.config.ConfigProperty; +import org.apache.hudi.common.config.HoodieConfig; +import org.apache.hudi.common.config.LockConfiguration; + +/** + * Hoodie Configs for Azure based storage locks. + */ +@ConfigClassProperty(name = "Azure based Locks Configurations", + groupName = ConfigGroups.Names.WRITE_CLIENT, + subGroupName = ConfigGroups.SubGroupNames.LOCK, + description = "Configs that control Azure Blob/ADLS based locking mechanisms " + + "required for concurrency control between writers to a Hudi table.") +public class AzureStorageLockConfig extends HoodieConfig { + + private static final String AZURE_BASED_LOCK_PROPERTY_PREFIX = LockConfiguration.LOCK_PREFIX + "azure."; + + public static final ConfigProperty AZURE_CONNECTION_STRING = ConfigProperty + .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "connection.string") + .noDefaultValue() + .markAdvanced() + .withDocumentation("For Azure based lock provider, optional Azure Storage connection string used " + + "for authenticating BlobServiceClient."); + + public static final ConfigProperty AZURE_SAS_TOKEN = ConfigProperty + .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "sas.token") + .noDefaultValue() + .markAdvanced() + .withDocumentation("For Azure based lock provider, optional SAS token used for " + + "authenticating BlobServiceClient when connection string is not provided."); +} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java new file mode 100644 index 0000000000000..8a27e2f6bf7e4 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.Pair; + +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.BlobServiceClientBuilder; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; + +import java.time.Duration; +import java.util.Properties; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration tests for {@link AzureStorageLockClient} using Azurite (Azure Storage emulator). + * + *

Run with: {@code mvn -Pazure-integration-tests -pl hudi-azure verify} + */ +@Testcontainers(disabledWithoutDocker = true) +@DisabledIfEnvironmentVariable(named = "SKIP_AZURITE_IT", matches = "true") +public class ITAzureStorageLockClientAzurite { + + private static final DockerImageName AZURITE_IMAGE = + DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite"); + + // Standard Azurite defaults (documented by Microsoft) + private static final String ACCOUNT_NAME = "devstoreaccount1"; + // Standard Azurite dev account key (NOT a secret; used by the emulator by default) + // See: https://learn.microsoft.com/azure/storage/common/storage-use-azurite?tabs=visual-studio#connection-strings + private static final String ACCOUNT_KEY = + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; + + @Container + public static final GenericContainer AZURITE = + new GenericContainer<>(AZURITE_IMAGE) + .withExposedPorts(10000) + .withCommand("azurite-blob", "--blobHost", "0.0.0.0", "--blobPort", "10000", "--loose") + .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofSeconds(60))); + + private static String blobEndpoint() { + // Azurite expects / in the endpoint URL path + return "http://" + AZURITE.getHost() + ":" + AZURITE.getMappedPort(10000) + "/" + ACCOUNT_NAME; + } + + private static String connectionString() { + String key = System.getenv("AZURITE_ACCOUNT_KEY"); + if (key == null || key.trim().isEmpty()) { + key = ACCOUNT_KEY; + } + return "DefaultEndpointsProtocol=http;" + + "AccountName=" + ACCOUNT_NAME + ";" + + "AccountKey=" + key + ";" + + "BlobEndpoint=" + blobEndpoint() + ";"; + } + + @Test + void testCreateUpdateAndReadLockFileWithAzurite() { + String container = "container"; + String blobPath = "locks/table_lock.json"; + + BlobServiceClient svc = new BlobServiceClientBuilder() + .connectionString(connectionString()) + .buildClient(); + BlobContainerClient containerClient = svc.getBlobContainerClient(container); + containerClient.createIfNotExists(); + + // NOTE: lockFileUri only needs to be parseable for container/blobPath extraction. + // The actual endpoint comes from the connection string. + String lockFileUri = "https://localhost:10000/" + container + "/" + blobPath; + Properties props = new Properties(); + props.setProperty(AzureStorageLockClient.AZURE_CONNECTION_STRING, connectionString()); + + AzureStorageLockClient owner1 = new AzureStorageLockClient("owner1", lockFileUri, props); + StorageLockData lockData1 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner1"); + Pair> upsert1 = owner1.tryUpsertLockFile(lockData1, Option.empty()); + assertEquals(LockUpsertResult.SUCCESS, upsert1.getLeft()); + assertTrue(upsert1.getRight().isPresent()); + + // Read back + Pair> read = owner1.readCurrentLockFile(); + assertEquals(LockGetResult.SUCCESS, read.getLeft()); + assertTrue(read.getRight().isPresent()); + + // Wrong ETag should fail with precondition and be mapped to ACQUIRED_BY_OTHERS + AzureStorageLockClient owner2 = new AzureStorageLockClient("owner2", lockFileUri, props); + StorageLockFile wrongPrev = new StorageLockFile(lockData1, "\"etag-does-not-match\""); + StorageLockData lockData2 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner2"); + Pair> upsert2 = + owner2.tryUpsertLockFile(lockData2, Option.of(wrongPrev)); + assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, upsert2.getLeft()); + } +} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageBasedLockProvider.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageBasedLockProvider.java new file mode 100644 index 0000000000000..ecc1ee6c7c022 --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageBasedLockProvider.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; +import org.apache.hudi.client.transaction.lock.StorageBasedLockProviderTestBase; +import org.apache.hudi.common.config.LockConfiguration; +import org.apache.hudi.common.testutils.HoodieTestUtils; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; + +import static org.apache.hudi.common.config.HoodieCommonConfig.BASE_PATH; + +@Disabled("Requires Azurite/Testcontainers-based integration environment (not enabled by default).") +public class TestAzureStorageBasedLockProvider extends StorageBasedLockProviderTestBase { + + @BeforeEach + void setupLockProvider() { + providerProperties.put(BASE_PATH.key(), + "abfs://container@account.dfs.core.windows.net/lake/db/tbl-default"); + lockProvider = createLockProvider(); + } + + @Override + protected StorageBasedLockProvider createLockProvider() { + LockConfiguration lockConf = new LockConfiguration(providerProperties); + return new StorageBasedLockProvider(lockConf, HoodieTestUtils.getDefaultStorageConf()); + } +} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java new file mode 100644 index 0000000000000..6a185809f3b7d --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java @@ -0,0 +1,338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; +import org.apache.hudi.client.transaction.lock.models.StorageLockData; +import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.collection.Pair; + +import com.azure.core.http.HttpHeaders; +import com.azure.core.http.rest.Response; +import com.azure.core.util.BinaryData; +import com.azure.core.util.Context; +import com.azure.storage.blob.BlobClient; +import com.azure.storage.blob.BlobContainerClient; +import com.azure.storage.blob.BlobServiceClient; +import com.azure.storage.blob.models.BlobDownloadContentResponse; +import com.azure.storage.blob.models.BlobStorageException; +import com.azure.storage.blob.models.BlockBlobItem; +import com.azure.storage.blob.options.BlobParallelUploadOptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.slf4j.Logger; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.Properties; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.contains; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.isNull; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +public class TestAzureStorageLockClient { + + private static final String OWNER_ID = "ownerId"; + private static final String LOCK_FILE_URI = + "abfs://container@account.dfs.core.windows.net/lockFilePath"; + private static final String LOCK_FILE_URI_WITH_NESTED_PATH = + "abfs://container@account.dfs.core.windows.net/lake/db/tbl-default/.hoodie/.locks/table_lock.json"; + + @Mock + private BlobServiceClient mockBlobServiceClient; + + @Mock + private BlobContainerClient mockContainerClient; + + @Mock + private BlobClient mockBlobClient; + + @Mock + private Logger mockLogger; + + private AzureStorageLockClient lockClient; + + @BeforeEach + void setUp() { + setUp(LOCK_FILE_URI); + } + + private void setUp(String lockFileUri) { + when(mockBlobServiceClient.getBlobContainerClient(eq("container"))).thenReturn(mockContainerClient); + String expectedBlobPath = lockFileUri.replaceFirst("^abfss?://[^/]+/", ""); + when(mockContainerClient.getBlobClient(eq(expectedBlobPath))).thenReturn(mockBlobClient); + + lockClient = new AzureStorageLockClient( + OWNER_ID, + lockFileUri, + new Properties(), + (location) -> mockBlobServiceClient, + mockLogger); + } + + @Test + void testTryUpsertLockFile_noPreviousLock_success_setsIfNoneMatchStar() throws Exception { + StorageLockData lockData = new StorageLockData(false, 123L, "test-owner"); + @SuppressWarnings("unchecked") + Response response = (Response) mock(Response.class); + when(response.getHeaders()).thenReturn(new HttpHeaders().set("ETag", "\"etag-1\"")); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) + .thenReturn(response); + + ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(BlobParallelUploadOptions.class); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"etag-1\"", result.getRight().get().getVersionId()); + + verify(mockBlobClient).uploadWithResponse(optionsCaptor.capture(), isNull(), eq(Context.NONE)); + BlobParallelUploadOptions options = optionsCaptor.getValue(); + assertRequestCondition(options, "ifNoneMatch", "*"); + verifyNoMoreInteractions(mockLogger); + } + + @Test + void testTryUpsertLockFile_withPreviousLock_success_setsIfMatch() throws Exception { + StorageLockData lockData = new StorageLockData(false, 999L, "existing-owner"); + StorageLockFile previousLockFile = new StorageLockFile(lockData, "\"etag-prev\""); + + @SuppressWarnings("unchecked") + Response response = (Response) mock(Response.class); + when(response.getHeaders()).thenReturn(new HttpHeaders().set("ETag", "\"etag-new\"")); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) + .thenReturn(response); + + ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(BlobParallelUploadOptions.class); + + Pair> result = + lockClient.tryUpsertLockFile(lockData, Option.of(previousLockFile)); + + assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"etag-new\"", result.getRight().get().getVersionId()); + + verify(mockBlobClient).uploadWithResponse(optionsCaptor.capture(), isNull(), eq(Context.NONE)); + BlobParallelUploadOptions options = optionsCaptor.getValue(); + assertRequestCondition(options, "ifMatch", "\"etag-prev\""); + } + + @Test + void testTryUpsertLockFile_preconditionFailed_returnsAcquiredByOthers() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(412); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).info( + contains("Unable to write new lock file. Another process has modified this lockfile"), + eq(OWNER_ID), + eq(LOCK_FILE_URI)); + } + + @Test + void testTryUpsertLockFile_rateLimit_returnsUnknownError() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(429); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).warn(contains("Rate limit exceeded"), eq(OWNER_ID), eq(LOCK_FILE_URI)); + } + + @Test + void testTryUpsertLockFile_serverError_returnsUnknownError() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(503); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).warn(contains("Azure returned internal server error code"), eq(OWNER_ID), eq(LOCK_FILE_URI), eq(ex)); + } + + @Test + void testTryUpsertLockFile_unexpectedError_rethrows() { + StorageLockData lockData = new StorageLockData(false, 999L, "owner"); + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(400); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); + + assertThrows(BlobStorageException.class, () -> lockClient.tryUpsertLockFile(lockData, Option.empty())); + } + + @Test + void testReadCurrentLockFile_notFound_returnsNotExists() { + BlobStorageException ex = mock(BlobStorageException.class); + when(ex.getStatusCode()).thenReturn(404); + when(mockBlobClient.downloadContentWithResponse(isNull(), isNull(), isNull(), eq(Context.NONE))).thenThrow(ex); + + Pair> result = lockClient.readCurrentLockFile(); + assertEquals(LockGetResult.NOT_EXISTS, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + verify(mockLogger).info(contains("Object not found"), eq(OWNER_ID), eq(LOCK_FILE_URI)); + } + + @Test + void testReadCurrentLockFile_blobFound_success() { + setUp(LOCK_FILE_URI_WITH_NESTED_PATH); + + StorageLockData data = new StorageLockData(false, 1700000000000L, "testOwner"); + byte[] json = StorageLockFile.toByteArray(data); + BlobDownloadContentResponse response = mock(BlobDownloadContentResponse.class); + when(response.getValue()).thenReturn(BinaryData.fromBytes(json)); + when(response.getHeaders()).thenReturn(new HttpHeaders().set("ETag", "\"etag-123\"")); + when(mockBlobClient.downloadContentWithResponse(isNull(), isNull(), isNull(), eq(Context.NONE))).thenReturn(response); + + Pair> result = lockClient.readCurrentLockFile(); + + assertEquals(LockGetResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"etag-123\"", result.getRight().get().getVersionId()); + assertEquals("testOwner", result.getRight().get().getOwner()); + } + + @Test + void testReadCurrentLockFile_download404_returnsNotExists() { + BlobStorageException ex404 = mock(BlobStorageException.class); + when(ex404.getStatusCode()).thenReturn(404); + when(mockBlobClient.downloadContentWithResponse(isNull(), isNull(), isNull(), eq(Context.NONE))).thenThrow(ex404); + + Pair> result = lockClient.readCurrentLockFile(); + assertEquals(LockGetResult.NOT_EXISTS, result.getLeft()); + assertTrue(result.getRight().isEmpty()); + } + + @Test + void testClose_noop() { + lockClient.close(); + } + + @Test + void testReadObject_reusesSecondaryBlobServiceClientForSameEndpoint() { + BlobServiceClient primaryServiceClient = mock(BlobServiceClient.class); + BlobContainerClient primaryContainerClient = mock(BlobContainerClient.class); + BlobClient primaryBlobClient = mock(BlobClient.class); + when(primaryServiceClient.getBlobContainerClient(any(String.class))).thenReturn(primaryContainerClient); + when(primaryContainerClient.getBlobClient(any(String.class))).thenReturn(primaryBlobClient); + when(primaryBlobClient.exists()).thenReturn(false); + + BlobServiceClient secondaryServiceClient = mock(BlobServiceClient.class); + BlobContainerClient secondaryContainerClient = mock(BlobContainerClient.class); + BlobClient secondaryBlobClient = mock(BlobClient.class); + when(secondaryServiceClient.getBlobContainerClient(any(String.class))).thenReturn(secondaryContainerClient); + when(secondaryContainerClient.getBlobClient(any(String.class))).thenReturn(secondaryBlobClient); + when(secondaryBlobClient.exists()).thenReturn(false); + + AtomicInteger secondarySupplierInvocations = new AtomicInteger(0); + AzureStorageLockClient client = new AzureStorageLockClient( + OWNER_ID, + LOCK_FILE_URI, + new Properties(), + location -> { + if ("https://secondary.blob.core.windows.net".equals(location.blobEndpoint)) { + secondarySupplierInvocations.incrementAndGet(); + return secondaryServiceClient; + } + return primaryServiceClient; + }, + mockLogger); + + client.readObject("abfs://container@secondary.dfs.core.windows.net/path1", true); + client.readObject("abfs://container@secondary.dfs.core.windows.net/path2", true); + + assertEquals(1, secondarySupplierInvocations.get()); + } + + private static void assertRequestCondition(Object blobParallelUploadOptions, String expectedField, String expectedValue) throws Exception { + Object requestConditions = tryInvoke(blobParallelUploadOptions, "getRequestConditions"); + if (requestConditions == null) { + Field f = blobParallelUploadOptions.getClass().getDeclaredField("requestConditions"); + f.setAccessible(true); + requestConditions = f.get(blobParallelUploadOptions); + } + assertNotNull(requestConditions, "requestConditions should be set on upload options"); + + Object actualIfMatch = tryInvoke(requestConditions, "getIfMatch"); + if (actualIfMatch == null) { + actualIfMatch = getFieldIfExists(requestConditions, "ifMatch"); + } + Object actualIfNoneMatch = tryInvoke(requestConditions, "getIfNoneMatch"); + if (actualIfNoneMatch == null) { + actualIfNoneMatch = getFieldIfExists(requestConditions, "ifNoneMatch"); + } + + if ("ifMatch".equals(expectedField)) { + assertEquals(expectedValue, actualIfMatch, "Expected If-Match to be set"); + } else if ("ifNoneMatch".equals(expectedField)) { + assertEquals(expectedValue, actualIfNoneMatch, "Expected If-None-Match to be set"); + } else { + throw new IllegalArgumentException("Unexpected expectedField: " + expectedField); + } + } + + private static Object tryInvoke(Object target, String methodName) { + try { + Method m = target.getClass().getMethod(methodName); + return m.invoke(target); + } catch (Exception ignored) { + return null; + } + } + + private static Object getFieldIfExists(Object target, String fieldName) { + try { + Field f = target.getClass().getDeclaredField(fieldName); + f.setAccessible(true); + return f.get(target); + } catch (Exception ignored) { + return null; + } + } +} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java new file mode 100644 index 0000000000000..b87e9fea2477f --- /dev/null +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.azure.transaction.lock; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestAzureStorageLockClientUriParsing { + + @Test + public void testParseAbfsUri() { + String uri = "abfs://container@account.dfs.core.windows.net/table/.hoodie/.locks/table_lock.json"; + + AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseAbfssUri() { + String uri = "abfss://container@account.dfs.core.windows.net/lake/db/tbl/.hoodie/.locks/table_lock.json"; + + AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseWasbUri() { + String uri = "wasb://container@account.blob.core.windows.net/table/.hoodie/.locks/table_lock.json"; + + AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseWasbsUri() { + String uri = "wasbs://container@account.blob.core.windows.net/lake/db/tbl/.hoodie/.locks/table_lock.json"; + + AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseHttpsUri() { + String uri = "https://account.blob.core.windows.net/container/table/.hoodie/.locks/table_lock.json"; + + AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); + + assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } + + @Test + public void testParseHttpUri() { + String uri = "http://127.0.0.1:10000/container/table/.hoodie/.locks/table_lock.json"; + + AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); + + assertEquals("http://127.0.0.1:10000", l.blobEndpoint); + assertEquals("container", l.container); + assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + } +} diff --git a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java index 0450dbfa6db4b..a052a84f743a6 100644 --- a/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java +++ b/hudi-io/src/main/java/org/apache/hudi/storage/StorageSchemes.java @@ -48,13 +48,13 @@ public enum StorageSchemes { // Google Cloud Storage GCS("gs", true, null, "org.apache.hudi.gcp.transaction.lock.GCSStorageLockClient"), // Azure WASB (Azure Blob Storage) - WASB("wasb", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), - WASBS("wasbs", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), + WASB("wasb", null, null, "org.apache.hudi.azure.transaction.lock.AzureStorageLockClient"), + WASBS("wasbs", null, null, "org.apache.hudi.azure.transaction.lock.AzureStorageLockClient"), // Azure ADLS ADL("adl", null, null, null), // Azure ADLS Gen2 - ABFS("abfs", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), - ABFSS("abfss", true, null, "org.apache.hudi.azure.transaction.lock.ADLSStorageLockClient"), + ABFS("abfs", null, null, "org.apache.hudi.azure.transaction.lock.AzureStorageLockClient"), + ABFSS("abfss", null, null, "org.apache.hudi.azure.transaction.lock.AzureStorageLockClient"), // Aliyun OSS OSS("oss", null, null, null), // View FS for federated setups. If federating across cloud stores, then append diff --git a/pom.xml b/pom.xml index 1dbfc302754fb..c369c87b79d27 100644 --- a/pom.xml +++ b/pom.xml @@ -40,6 +40,7 @@ hudi-client hudi-aws hudi-gcp + hudi-azure hudi-hadoop-common hudi-hadoop-mr hudi-io @@ -52,6 +53,7 @@ packaging/hudi-hive-sync-bundle packaging/hudi-aws-bundle packaging/hudi-gcp-bundle + packaging/hudi-azure-bundle packaging/hudi-spark-bundle packaging/hudi-presto-bundle packaging/hudi-utilities-bundle @@ -112,7 +114,7 @@ 5.5.0 2.17 3.0.1-b12 - 1.10.1 + 1.13.1 5.14.1 5.14.1 1.14.1 @@ -123,7 +125,7 @@ 2.10.2 org.apache.hive 2.3.10 - 1.10.1 + 1.13.1 1.11.4 0.273 390 From 2d956dc0beb807d556d51316f42d4728e41be82e Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Wed, 25 Feb 2026 13:27:05 -0500 Subject: [PATCH 06/11] Revert hive parquet version and added comments for SAS token --- .../java/org/apache/hudi/config/AzureStorageLockConfig.java | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java index 11bf3074f3190..bb5899a99a582 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java +++ b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java @@ -48,5 +48,5 @@ public class AzureStorageLockConfig extends HoodieConfig { .noDefaultValue() .markAdvanced() .withDocumentation("For Azure based lock provider, optional SAS token used for " - + "authenticating BlobServiceClient when connection string is not provided."); + + "authenticating BlobServiceClient when connection string is not provided. SAS token is not recommended for production use by Azure."); } diff --git a/pom.xml b/pom.xml index c369c87b79d27..d7ccd29ad3024 100644 --- a/pom.xml +++ b/pom.xml @@ -125,7 +125,7 @@ 2.10.2 org.apache.hive 2.3.10 - 1.13.1 + 1.10.1 1.11.4 0.273 390 From 828646839bbe115c06e8a01e982e62ac4fc33292 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Mon, 9 Mar 2026 12:22:22 -0400 Subject: [PATCH 07/11] Address review comments. Add explicity configs for credential types. --- .../credentials/AzureCredentialFactory.java | 90 +++++++++++++++++++ .../lock/AzureStorageLockClient.java | 25 ++++-- .../hudi/config/AzureStorageLockConfig.java | 34 +++++++ 3 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java b/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java new file mode 100644 index 0000000000000..c66e90891c16e --- /dev/null +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.azure.credentials; + +import org.apache.hudi.config.AzureStorageLockConfig; + +import com.azure.core.credential.TokenCredential; +import com.azure.identity.ClientSecretCredentialBuilder; +import com.azure.identity.DefaultAzureCredentialBuilder; +import com.azure.identity.ManagedIdentityCredentialBuilder; + +import java.util.Properties; + +/** + * Factory for resolving an Azure {@link TokenCredential} from Hudi properties. + * + *

Credential precedence: + *

    + *
  1. User-assigned managed identity ({@link AzureStorageLockConfig#AZURE_MANAGED_IDENTITY_CLIENT_ID}) + * — uses {@code ManagedIdentityCredential}
  2. + *
  3. Service principal ({@link AzureStorageLockConfig#AZURE_CLIENT_TENANT_ID} + + * {@link AzureStorageLockConfig#AZURE_CLIENT_ID} + + * {@link AzureStorageLockConfig#AZURE_CLIENT_SECRET}) + * — uses {@code ClientSecretCredential}
  4. + *
  5. {@code DefaultAzureCredential} — (system-assigned MI, + * workload identity, env-var SP, Azure CLI, etc.); suitable for dev and environments + * where auth is controlled externally
  6. + *
+ * + *

Note: connection string and SAS token auth are not {@link TokenCredential}-based and are + * handled directly by the caller before this factory is consulted. + */ +public class AzureCredentialFactory { + + // Shared instance so the token cache and IMDS probe are reused across all clients + // that fall through to the default chain. + private static final TokenCredential DEFAULT_AZURE_CREDENTIAL = + new DefaultAzureCredentialBuilder().build(); + + private AzureCredentialFactory() { + } + + /** + * Returns a {@link TokenCredential} resolved from the supplied properties. + * + * @param props Hudi lock properties + * @return resolved credential, never {@code null} + */ + public static TokenCredential getAzureCredential(Properties props) { + if (props != null) { + String miClientId = props.getProperty(AzureStorageLockConfig.AZURE_MANAGED_IDENTITY_CLIENT_ID.key()); + if (miClientId != null && !miClientId.trim().isEmpty()) { + return new ManagedIdentityCredentialBuilder() + .clientId(miClientId) + .build(); + } + + String tenantId = props.getProperty(AzureStorageLockConfig.AZURE_CLIENT_TENANT_ID.key()); + String clientId = props.getProperty(AzureStorageLockConfig.AZURE_CLIENT_ID.key()); + String clientSecret = props.getProperty(AzureStorageLockConfig.AZURE_CLIENT_SECRET.key()); + if (tenantId != null && !tenantId.trim().isEmpty() + && clientId != null && !clientId.trim().isEmpty() + && clientSecret != null && !clientSecret.trim().isEmpty()) { + return new ClientSecretCredentialBuilder() + .tenantId(tenantId) + .clientId(clientId) + .clientSecret(clientSecret) + .build(); + } + } + + return DEFAULT_AZURE_CREDENTIAL; + } +} diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java index 9f207d80cfb6b..beb3973ad6a0d 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java @@ -32,6 +32,8 @@ import org.apache.hudi.config.StorageBasedLockConfig; import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.azure.credentials.AzureCredentialFactory; + import com.azure.core.credential.AzureSasCredential; import com.azure.core.exception.HttpResponseException; import com.azure.core.http.policy.ExponentialBackoffOptions; @@ -40,7 +42,6 @@ import com.azure.core.util.BinaryData; import com.azure.core.util.Context; import com.azure.core.util.HttpClientOptions; -import com.azure.identity.DefaultAzureCredentialBuilder; import com.azure.storage.blob.BlobClient; import com.azure.storage.blob.BlobContainerClient; import com.azure.storage.blob.BlobServiceClient; @@ -91,11 +92,17 @@ * * *

Authentication precedence (via {@link Properties}): - *

    - *
  • {@link AzureStorageLockConfig#AZURE_CONNECTION_STRING}
  • - *
  • {@link AzureStorageLockConfig#AZURE_SAS_TOKEN}
  • - *
  • DefaultAzureCredential
  • - *
+ *
    + *
  1. {@link AzureStorageLockConfig#AZURE_CONNECTION_STRING} — connection string (includes shared key)
  2. + *
  3. {@link AzureStorageLockConfig#AZURE_SAS_TOKEN} — shared access signature
  4. + *
  5. {@link AzureStorageLockConfig#AZURE_MANAGED_IDENTITY_CLIENT_ID} — user-assigned managed identity + * via {@code ManagedIdentityCredential}
  6. + *
  7. {@link AzureStorageLockConfig#AZURE_CLIENT_TENANT_ID} + + * {@link AzureStorageLockConfig#AZURE_CLIENT_ID} + + * {@link AzureStorageLockConfig#AZURE_CLIENT_SECRET} — service principal + * via {@code ClientSecretCredential}
  8. + *
  9. {@code DefaultAzureCredential} — probing chain; see {@link org.apache.hudi.azure.credentials.AzureCredentialFactory}
  10. + *
*/ @ThreadSafe public class AzureStorageLockClient implements StorageLockClient { @@ -157,19 +164,23 @@ private static Functions.Function1 createDefau BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); configureAzureClientOptions(builder, props); + // 1. Connection string (includes shared-key auth). String connectionString = props == null ? null : props.getProperty(AZURE_CONNECTION_STRING); if (connectionString != null && !connectionString.trim().isEmpty()) { return builder.connectionString(connectionString).buildClient(); } builder.endpoint(location.blobEndpoint); + + // 2. SAS token. String sasToken = props == null ? null : props.getProperty(AZURE_SAS_TOKEN); if (sasToken != null && !sasToken.trim().isEmpty()) { String cleaned = sasToken.startsWith("?") ? sasToken.substring(1) : sasToken; return builder.credential(new AzureSasCredential(cleaned)).buildClient(); } - return builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient(); + // 3. TokenCredential — MI, service principal, or DefaultAzureCredential fallback. + return builder.credential(AzureCredentialFactory.getAzureCredential(props)).buildClient(); }; } diff --git a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java index bb5899a99a582..e051def6e828c 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java +++ b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java @@ -49,4 +49,38 @@ public class AzureStorageLockConfig extends HoodieConfig { .markAdvanced() .withDocumentation("For Azure based lock provider, optional SAS token used for " + "authenticating BlobServiceClient when connection string is not provided. SAS token is not recommended for production use by Azure."); + + public static final ConfigProperty AZURE_MANAGED_IDENTITY_CLIENT_ID = ConfigProperty + .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "managed.identity.client.id") + .noDefaultValue() + .markAdvanced() + .withDocumentation("For Azure based lock provider, client ID of a user-assigned managed identity to authenticate " + + "BlobServiceClient with ManagedIdentityCredential."); + + public static final ConfigProperty AZURE_CLIENT_TENANT_ID = ConfigProperty + .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.tenant.id") + .noDefaultValue() + .markAdvanced() + .withDocumentation("For Azure based lock provider, Azure AD tenant ID used together with " + + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.id' and " + + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.secret' to authenticate via service principal " + + "(ClientSecretCredential). All three must be set for this auth mode to activate."); + + public static final ConfigProperty AZURE_CLIENT_ID = ConfigProperty + .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.id") + .noDefaultValue() + .markAdvanced() + .withDocumentation("For Azure based lock provider, Azure AD application (client) ID used together with " + + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.tenant.id' and " + + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.secret' to authenticate via service principal " + + "(ClientSecretCredential). All three must be set for this auth mode to activate."); + + public static final ConfigProperty AZURE_CLIENT_SECRET = ConfigProperty + .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.secret") + .noDefaultValue() + .markAdvanced() + .withDocumentation("For Azure based lock provider, Azure AD client secret used together with " + + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.tenant.id' and " + + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.id' to authenticate via service principal " + + "(ClientSecretCredential). All three must be set for this auth mode to activate."); } From 598acc412f2dcbde52a06add8e314d67f80f3ee0 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Sat, 28 Mar 2026 16:14:05 -0400 Subject: [PATCH 08/11] Change default credentials to holder pattern for lazy init and config changes --- .../credentials/AzureCredentialFactory.java | 12 +- .../lock/ADLSStorageLockClient.java | 447 ------------------ .../hudi/config/AzureStorageLockConfig.java | 6 + .../lock/ITADLSStorageLockClientAzurite.java | 124 ----- .../TestADLSStorageBasedLockProvider.java | 47 -- .../lock/TestADLSStorageLockClient.java | 314 ------------ .../TestADLSStorageLockClientUriParsing.java | 81 ---- 7 files changed, 13 insertions(+), 1018 deletions(-) delete mode 100644 hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java delete mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java delete mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java delete mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java delete mode 100644 hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java b/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java index c66e90891c16e..147eae1fbc5af 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/credentials/AzureCredentialFactory.java @@ -48,10 +48,12 @@ */ public class AzureCredentialFactory { - // Shared instance so the token cache and IMDS probe are reused across all clients - // that fall through to the default chain. - private static final TokenCredential DEFAULT_AZURE_CREDENTIAL = - new DefaultAzureCredentialBuilder().build(); + /** + * Lazily initializes {@code DefaultAzureCredential} on first use of the default chain only. + */ + private static final class DefaultAzureCredentialHolder { + static final TokenCredential INSTANCE = new DefaultAzureCredentialBuilder().build(); + } private AzureCredentialFactory() { } @@ -85,6 +87,6 @@ public static TokenCredential getAzureCredential(Properties props) { } } - return DEFAULT_AZURE_CREDENTIAL; + return DefaultAzureCredentialHolder.INSTANCE; } } diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java deleted file mode 100644 index 49a6febd5058f..0000000000000 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/ADLSStorageLockClient.java +++ /dev/null @@ -1,447 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.azure.transaction.lock; - -import org.apache.hudi.client.transaction.lock.StorageLockClient; -import org.apache.hudi.client.transaction.lock.models.LockGetResult; -import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; -import org.apache.hudi.client.transaction.lock.models.StorageLockData; -import org.apache.hudi.client.transaction.lock.models.StorageLockFile; -import org.apache.hudi.common.util.Functions; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.VisibleForTesting; -import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.exception.HoodieLockException; -import org.apache.hudi.config.StorageBasedLockConfig; - -import com.azure.core.exception.HttpResponseException; -import com.azure.core.http.policy.ExponentialBackoffOptions; -import com.azure.core.http.policy.RetryOptions; -import com.azure.core.http.rest.Response; -import com.azure.core.credential.AzureSasCredential; -import com.azure.core.util.BinaryData; -import com.azure.core.util.Context; -import com.azure.core.util.HttpClientOptions; -import com.azure.identity.DefaultAzureCredentialBuilder; -import com.azure.storage.blob.BlobClient; -import com.azure.storage.blob.BlobContainerClient; -import com.azure.storage.blob.BlobServiceClient; -import com.azure.storage.blob.BlobServiceClientBuilder; -import com.azure.storage.blob.models.BlobErrorCode; -import com.azure.storage.blob.models.BlobProperties; -import com.azure.storage.blob.models.BlobRequestConditions; -import com.azure.storage.blob.models.BlobStorageException; -import com.azure.storage.blob.models.BlockBlobItem; -import com.azure.storage.blob.options.BlobParallelUploadOptions; -import lombok.extern.slf4j.Slf4j; -import org.slf4j.Logger; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.ThreadSafe; - -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.time.Duration; -import java.util.Properties; - -import static java.nio.charset.StandardCharsets.UTF_8; - -/** - * Azure Data Lake Storage (ADLS) implementation of {@link StorageLockClient} using Azure Blob conditional requests. - * - *

Supports the following URI schemes: - *

    - *
  • ADLS Gen2: {@code abfs://} and {@code abfss://}
  • - *
  • Azure Blob Storage: {@code wasb://} and {@code wasbs://}
  • - *
- * - * - *
    - *
  • Create: conditional write with If-None-Match: *
  • - *
  • Update/Renew/Expire: conditional write with If-Match: <etag>
  • - *
- * - *

Expected lock URI formats: - *

    - *
  • {@code abfs://<container>@<account>.dfs.core.windows.net/<path>}
  • - *
  • {@code abfss://<container>@<account>.dfs.core.windows.net/<path>}
  • - *
  • {@code wasb://<container>@<account>.blob.core.windows.net/<path>}
  • - *
  • {@code wasbs://<container>@<account>.blob.core.windows.net/<path>}
  • - *
- * - *

Authentication precedence (via {@link Properties}): - *

    - *
  • {@code hoodie.write.lock.azure.connection.string}
  • - *
  • {@code hoodie.write.lock.azure.sas.token}
  • - *
  • DefaultAzureCredential
  • - *
- */ -@Slf4j -@ThreadSafe -public class ADLSStorageLockClient implements StorageLockClient { - - private static final int PRECONDITION_FAILURE_ERROR_CODE = 412; - private static final int NOT_FOUND_ERROR_CODE = 404; - private static final int CONFLICT_ERROR_CODE = 409; - private static final int RATE_LIMIT_ERROR_CODE = 429; - private static final int INTERNAL_SERVER_ERROR_CODE_MIN = 500; - - public static final String AZURE_CONNECTION_STRING = "hoodie.write.lock.azure.connection.string"; - public static final String AZURE_SAS_TOKEN = "hoodie.write.lock.azure.sas.token"; - - private final Logger logger; - private final BlobServiceClient blobServiceClient; - private final BlobClient lockBlobClient; - private final Properties clientProperties; - private final String ownerId; - private final String lockFileUri; - - /** - * Constructor used by reflection by {@link org.apache.hudi.client.transaction.lock.StorageBasedLockProvider}. - * - * @param ownerId lock owner id - * @param lockFileUri lock file URI (abfs/abfss/wasb/wasbs) - * @param props properties used to customize/authenticate the Azure client - */ - public ADLSStorageLockClient(String ownerId, String lockFileUri, Properties props) { - this(ownerId, lockFileUri, props, createDefaultBlobServiceClient(), log); - } - - @VisibleForTesting - ADLSStorageLockClient( - String ownerId, - String lockFileUri, - Properties props, - Functions.Function1 blobServiceClientSupplier, - Logger logger) { - this.ownerId = ownerId; - this.lockFileUri = lockFileUri; - this.logger = logger; - this.clientProperties = props; - - AzureLocation location = parseAzureLocation(lockFileUri); - this.blobServiceClient = blobServiceClientSupplier.apply(location.withProperties(props)); - BlobContainerClient containerClient = blobServiceClient.getBlobContainerClient(location.container); - this.lockBlobClient = containerClient.getBlobClient(location.blobPath); - } - - private static Functions.Function1 createDefaultBlobServiceClient() { - return (location) -> { - Properties props = location.props; - BlobServiceClientBuilder builder = new BlobServiceClientBuilder(); - configureAzureClientOptions(builder, props); - - String connectionString = props == null ? null : props.getProperty(AZURE_CONNECTION_STRING); - if (connectionString != null && !connectionString.trim().isEmpty()) { - return builder.connectionString(connectionString).buildClient(); - } - - builder.endpoint(location.blobEndpoint); - String sasToken = props == null ? null : props.getProperty(AZURE_SAS_TOKEN); - if (sasToken != null && !sasToken.trim().isEmpty()) { - String cleaned = sasToken.startsWith("?") ? sasToken.substring(1) : sasToken; - return builder.credential(new AzureSasCredential(cleaned)).buildClient(); - } - - return builder.credential(new DefaultAzureCredentialBuilder().build()).buildClient(); - }; - } - - private static void configureAzureClientOptions(BlobServiceClientBuilder builder, Properties props) { - // Set Azure SDK timeouts based on lock validity to avoid long-hanging calls. - long validityTimeoutSecs = getLongProperty(props, StorageBasedLockConfig.VALIDITY_TIMEOUT_SECONDS); - long azureCallTimeoutSecs = Math.max(1, validityTimeoutSecs / 5); - - // Disable automatic SDK retries; Hudi manages retries at the lock-provider level. - ExponentialBackoffOptions exponentialOptions = new ExponentialBackoffOptions().setMaxRetries(0); - RetryOptions retryOptions = new RetryOptions(exponentialOptions); - - HttpClientOptions clientOptions = new HttpClientOptions() - .setResponseTimeout(Duration.ofSeconds(azureCallTimeoutSecs)) - .setReadTimeout(Duration.ofSeconds(azureCallTimeoutSecs)); - - builder.retryOptions(retryOptions).clientOptions(clientOptions); - } - - private static long getLongProperty(Properties props, org.apache.hudi.common.config.ConfigProperty prop) { - if (props == null) { - return prop.defaultValue(); - } - Object value = props.get(prop.key()); - if (value == null) { - return prop.defaultValue(); - } - try { - return Long.parseLong(String.valueOf(value)); - } catch (NumberFormatException e) { - return prop.defaultValue(); - } - } - - @Override - public Pair> tryUpsertLockFile( - StorageLockData newLockData, - Option previousLockFile) { - String expectedEtag = previousLockFile.isPresent() ? previousLockFile.get().getVersionId() : null; - try { - StorageLockFile updated = createOrUpdateLockFileInternal(newLockData, expectedEtag); - return Pair.of(LockUpsertResult.SUCCESS, Option.of(updated)); - } catch (BlobStorageException e) { - return Pair.of(handleUpsertBlobStorageException(e), Option.empty()); - } catch (HttpResponseException e) { - logger.error("OwnerId: {}, Unexpected Azure SDK error while writing lock file: {}", - ownerId, lockFileUri, e); - if (!previousLockFile.isPresent()) { - // For create, fail fast since this indicates a larger issue. - throw e; - } - return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); - } catch (Exception e) { - logger.error("OwnerId: {}, Unexpected error while writing lock file: {}", ownerId, lockFileUri, e); - return Pair.of(LockUpsertResult.UNKNOWN_ERROR, Option.empty()); - } - } - - @Override - public Pair> readCurrentLockFile() { - try { - BlobProperties props = lockBlobClient.getProperties(); - String eTag = props.getETag(); - try { - return getLockFileFromBlob(lockBlobClient, eTag); - } catch (BlobStorageException e) { - // Blob can change/disappear after properties call; treat 404 during stream read as UNKNOWN_ERROR. - return Pair.of(handleGetStorageException(e, true), Option.empty()); - } - } catch (BlobStorageException e) { - return Pair.of(handleGetStorageException(e, false), Option.empty()); - } - } - - private LockGetResult handleGetStorageException(BlobStorageException e, boolean ignore404) { - int code = e.getStatusCode(); - if (code == NOT_FOUND_ERROR_CODE || e.getErrorCode() == BlobErrorCode.BLOB_NOT_FOUND) { - if (ignore404) { - logger.info("OwnerId: {}, Azure stream read failure detected: {}", ownerId, lockFileUri); - } else { - logger.info("OwnerId: {}, Object not found in the path: {}", ownerId, lockFileUri); - return LockGetResult.NOT_EXISTS; - } - } else if (code == RATE_LIMIT_ERROR_CODE) { - logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); - } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { - logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", ownerId, lockFileUri, e); - } else { - throw e; - } - return LockGetResult.UNKNOWN_ERROR; - } - - private @Nonnull Pair> getLockFileFromBlob(BlobClient blobClient, String eTag) { - try (InputStream inputStream = blobClient.openInputStream()) { - return Pair.of(LockGetResult.SUCCESS, Option.of(StorageLockFile.createFromStream(inputStream, eTag))); - } catch (IOException ioe) { - throw new UncheckedIOException("Failed reading blob: " + lockFileUri, ioe); - } - } - - private StorageLockFile createOrUpdateLockFileInternal(StorageLockData lockData, String expectedEtag) { - byte[] bytes = StorageLockFile.toByteArray(lockData); - BlobRequestConditions conditions = new BlobRequestConditions(); - if (expectedEtag == null) { - conditions.setIfNoneMatch("*"); - } else { - conditions.setIfMatch(expectedEtag); - } - - BlobParallelUploadOptions options = new BlobParallelUploadOptions(BinaryData.fromBytes(bytes)) - .setRequestConditions(conditions); - Response response = lockBlobClient.uploadWithResponse(options, null, Context.NONE); - String newEtag = response.getValue() != null ? response.getValue().getETag() : null; - if (newEtag == null) { - newEtag = lockBlobClient.getProperties().getETag(); - } - return new StorageLockFile(lockData, newEtag); - } - - private LockUpsertResult handleUpsertBlobStorageException(BlobStorageException e) { - int code = e.getStatusCode(); - if (code == PRECONDITION_FAILURE_ERROR_CODE || e.getErrorCode() == BlobErrorCode.CONDITION_NOT_MET) { - logger.info("OwnerId: {}, Unable to write new lock file. Another process has modified this lockfile {} already.", - ownerId, lockFileUri); - return LockUpsertResult.ACQUIRED_BY_OTHERS; - } else if (code == CONFLICT_ERROR_CODE) { - logger.info("OwnerId: {}, Retriable conditional request conflict error: {}", ownerId, lockFileUri); - } else if (code == RATE_LIMIT_ERROR_CODE) { - logger.warn("OwnerId: {}, Rate limit exceeded for lock file: {}", ownerId, lockFileUri); - } else if (code >= INTERNAL_SERVER_ERROR_CODE_MIN) { - logger.warn("OwnerId: {}, Azure returned internal server error code for lock file: {}", ownerId, lockFileUri, e); - } else { - logger.warn("OwnerId: {}, Error writing lock file: {}", ownerId, lockFileUri, e); - } - return LockUpsertResult.UNKNOWN_ERROR; - } - @Override - public Option readObject(String filePath, boolean checkExistsFirst) { - try { - AzureLocation location = parseAzureLocation(filePath); - AzureLocation lockLocation = parseAzureLocation(lockFileUri); - BlobServiceClient svc = location.blobEndpoint.equals(lockLocation.blobEndpoint) - ? blobServiceClient - : createDefaultBlobServiceClient().apply(location.withProperties(clientProperties)); - BlobClient blobClient = svc.getBlobContainerClient(location.container).getBlobClient(location.blobPath); - - if (checkExistsFirst && !blobClient.exists()) { - logger.debug("JSON config file not found: {}", filePath); - return Option.empty(); - } - byte[] bytes = blobClient.downloadContent().toBytes(); - return Option.of(new String(bytes, UTF_8)); - } catch (BlobStorageException e) { - if (e.getStatusCode() == NOT_FOUND_ERROR_CODE) { - logger.debug("JSON config file not found: {}", filePath); - } else { - logger.warn("Error reading JSON config file: {}", filePath, e); - } - return Option.empty(); - } catch (Exception e) { - logger.warn("Error reading JSON config file: {}", filePath, e); - return Option.empty(); - } - } - - @Override - public boolean writeObject(String filePath, String content) { - try { - AzureLocation location = parseAzureLocation(filePath); - AzureLocation lockLocation = parseAzureLocation(lockFileUri); - BlobServiceClient svc = location.blobEndpoint.equals(lockLocation.blobEndpoint) - ? blobServiceClient - : createDefaultBlobServiceClient().apply(location.withProperties(clientProperties)); - BlobClient blobClient = svc.getBlobContainerClient(location.container).getBlobClient(location.blobPath); - blobClient.upload(BinaryData.fromString(content), true); - logger.debug("Successfully wrote object to: {}", filePath); - return true; - } catch (Exception e) { - logger.error("Error writing object to: {}", filePath, e); - return false; - } - } - - @Override - public void close() { - // BlobServiceClient does not require explicit close. No-op. - } - - @VisibleForTesting - static AzureLocation parseAzureLocation(String uriString) { - try { - URI uri = new URI(uriString); - String scheme = uri.getScheme(); - if (scheme == null) { - throw new IllegalArgumentException("URI does not contain a valid scheme."); - } - - String authority = uri.getAuthority(); - String path = uri.getPath() == null ? "" : uri.getPath().replaceFirst("/", ""); - - // ADLS Gen2: abfs[s]://@.dfs.core.windows.net/ - if ("abfs".equalsIgnoreCase(scheme) || "abfss".equalsIgnoreCase(scheme)) { - if (authority == null || !authority.contains("@")) { - throw new IllegalArgumentException("ABFS URI authority must be in the form '@': " + uriString); - } - String[] parts = authority.split("@", 2); - String container = parts[0]; - String host = parts[1]; - String endpointHost = dfsHostToBlobHost(host); - String endpoint = "https://" + endpointHost; - if (container.isEmpty() || path.isEmpty()) { - throw new IllegalArgumentException("ABFS URI must contain container and path: " + uriString); - } - return new AzureLocation(endpoint, container, path, null); - } - - // Azure Blob Storage: wasb[s]://@.blob.core.windows.net/ - if ("wasb".equalsIgnoreCase(scheme) || "wasbs".equalsIgnoreCase(scheme)) { - if (authority == null || !authority.contains("@")) { - throw new IllegalArgumentException("WASB URI authority must be in the form '@': " + uriString); - } - String[] parts = authority.split("@", 2); - String container = parts[0]; - String host = parts[1]; - String endpoint = "https://" + host; - if (container.isEmpty() || path.isEmpty()) { - throw new IllegalArgumentException("WASB URI must contain container and path: " + uriString); - } - return new AzureLocation(endpoint, container, path, null); - } - - // Direct HTTPS: https://.blob.core.windows.net// - if ("https".equalsIgnoreCase(scheme)) { - if (authority == null || authority.isEmpty()) { - throw new IllegalArgumentException("HTTPS URI authority missing: " + uriString); - } - int slash = path.indexOf('/'); - if (slash <= 0 || slash == path.length() - 1) { - throw new IllegalArgumentException("HTTPS URI must be in the form 'https:////': " + uriString); - } - String container = path.substring(0, slash); - String blobPath = path.substring(slash + 1); - return new AzureLocation("https://" + authority, container, blobPath, null); - } - - throw new IllegalArgumentException("Unsupported scheme for Azure storage lock: " + scheme - + ". Supported schemes: abfs, abfss, wasb, wasbs, https"); - } catch (URISyntaxException e) { - throw new HoodieLockException("Failed to parse Azure URI: " + uriString, e); - } - } - - private static String dfsHostToBlobHost(String host) { - if (host == null) { - return null; - } - if (host.endsWith(".dfs.core.windows.net")) { - return host.replace(".dfs.core.windows.net", ".blob.core.windows.net"); - } - return host; - } - - @VisibleForTesting - static final class AzureLocation { - final String blobEndpoint; - final String container; - final String blobPath; - final Properties props; - - AzureLocation(String blobEndpoint, String container, String blobPath, Properties props) { - this.blobEndpoint = blobEndpoint; - this.container = container; - this.blobPath = blobPath; - this.props = props; - } - - AzureLocation withProperties(Properties props) { - return new AzureLocation(blobEndpoint, container, blobPath, props); - } - } -} diff --git a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java index e051def6e828c..4ab42d3d22b66 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java +++ b/hudi-azure/src/main/java/org/apache/hudi/config/AzureStorageLockConfig.java @@ -40,6 +40,7 @@ public class AzureStorageLockConfig extends HoodieConfig { .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "connection.string") .noDefaultValue() .markAdvanced() + .sinceVersion("1.2.0") .withDocumentation("For Azure based lock provider, optional Azure Storage connection string used " + "for authenticating BlobServiceClient."); @@ -47,6 +48,7 @@ public class AzureStorageLockConfig extends HoodieConfig { .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "sas.token") .noDefaultValue() .markAdvanced() + .sinceVersion("1.2.0") .withDocumentation("For Azure based lock provider, optional SAS token used for " + "authenticating BlobServiceClient when connection string is not provided. SAS token is not recommended for production use by Azure."); @@ -54,6 +56,7 @@ public class AzureStorageLockConfig extends HoodieConfig { .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "managed.identity.client.id") .noDefaultValue() .markAdvanced() + .sinceVersion("1.2.0") .withDocumentation("For Azure based lock provider, client ID of a user-assigned managed identity to authenticate " + "BlobServiceClient with ManagedIdentityCredential."); @@ -61,6 +64,7 @@ public class AzureStorageLockConfig extends HoodieConfig { .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.tenant.id") .noDefaultValue() .markAdvanced() + .sinceVersion("1.2.0") .withDocumentation("For Azure based lock provider, Azure AD tenant ID used together with " + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.id' and " + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.secret' to authenticate via service principal " @@ -70,6 +74,7 @@ public class AzureStorageLockConfig extends HoodieConfig { .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.id") .noDefaultValue() .markAdvanced() + .sinceVersion("1.2.0") .withDocumentation("For Azure based lock provider, Azure AD application (client) ID used together with " + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.tenant.id' and " + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.secret' to authenticate via service principal " @@ -79,6 +84,7 @@ public class AzureStorageLockConfig extends HoodieConfig { .key(AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.secret") .noDefaultValue() .markAdvanced() + .sinceVersion("1.2.0") .withDocumentation("For Azure based lock provider, Azure AD client secret used together with " + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.tenant.id' and " + "'" + AZURE_BASED_LOCK_PROPERTY_PREFIX + "client.id' to authenticate via service principal " diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java deleted file mode 100644 index 9ca862a0ec855..0000000000000 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITADLSStorageLockClientAzurite.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hudi.azure.transaction.lock; - -import org.apache.hudi.client.transaction.lock.models.LockGetResult; -import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; -import org.apache.hudi.client.transaction.lock.models.StorageLockData; -import org.apache.hudi.client.transaction.lock.models.StorageLockFile; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.collection.Pair; - -import com.azure.storage.blob.BlobContainerClient; -import com.azure.storage.blob.BlobServiceClient; -import com.azure.storage.blob.BlobServiceClientBuilder; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; -import org.testcontainers.containers.GenericContainer; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; -import org.testcontainers.utility.DockerImageName; -import org.testcontainers.containers.wait.strategy.Wait; - -import java.time.Duration; -import java.util.Properties; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Integration tests for {@link ADLSStorageLockClient} using Azurite (Azure Storage emulator). - * - *

Run with: {@code mvn -Pazure-integration-tests -pl hudi-azure verify} - */ -@Testcontainers(disabledWithoutDocker = true) -@DisabledIfEnvironmentVariable(named = "SKIP_AZURITE_IT", matches = "true") -public class ITADLSStorageLockClientAzurite { - - private static final DockerImageName AZURITE_IMAGE = - DockerImageName.parse("mcr.microsoft.com/azure-storage/azurite"); - - // Standard Azurite defaults (documented by Microsoft) - private static final String ACCOUNT_NAME = "devstoreaccount1"; - // Standard Azurite dev account key (NOT a secret; used by the emulator by default) - // See: https://learn.microsoft.com/azure/storage/common/storage-use-azurite?tabs=visual-studio#connection-strings - private static final String ACCOUNT_KEY = - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="; - - @Container - public static final GenericContainer AZURITE = - new GenericContainer<>(AZURITE_IMAGE) - .withExposedPorts(10000) - .withCommand("azurite-blob", "--blobHost", "0.0.0.0", "--blobPort", "10000", "--loose") - .waitingFor(Wait.forListeningPort().withStartupTimeout(Duration.ofSeconds(60))); - - private static String blobEndpoint() { - // Azurite expects / in the endpoint URL path - return "http://" + AZURITE.getHost() + ":" + AZURITE.getMappedPort(10000) + "/" + ACCOUNT_NAME; - } - - private static String connectionString() { - String key = System.getenv("AZURITE_ACCOUNT_KEY"); - if (key == null || key.trim().isEmpty()) { - key = ACCOUNT_KEY; - } - return "DefaultEndpointsProtocol=http;" - + "AccountName=" + ACCOUNT_NAME + ";" - + "AccountKey=" + key + ";" - + "BlobEndpoint=" + blobEndpoint() + ";"; - } - - @Test - void testCreateUpdateAndReadLockFileWithAzurite() { - String container = "container"; - String blobPath = "locks/table_lock.json"; - - BlobServiceClient svc = new BlobServiceClientBuilder() - .connectionString(connectionString()) - .buildClient(); - BlobContainerClient containerClient = svc.getBlobContainerClient(container); - containerClient.createIfNotExists(); - - // NOTE: lockFileUri only needs to be parseable for container/blobPath extraction. - // The actual endpoint comes from the connection string. - String lockFileUri = "https://localhost:10000/" + container + "/" + blobPath; - Properties props = new Properties(); - props.setProperty(ADLSStorageLockClient.AZURE_CONNECTION_STRING, connectionString()); - - ADLSStorageLockClient owner1 = new ADLSStorageLockClient("owner1", lockFileUri, props); - StorageLockData lockData1 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner1"); - Pair> upsert1 = owner1.tryUpsertLockFile(lockData1, Option.empty()); - assertEquals(LockUpsertResult.SUCCESS, upsert1.getLeft()); - assertTrue(upsert1.getRight().isPresent()); - - // Read back - Pair> read = owner1.readCurrentLockFile(); - assertEquals(LockGetResult.SUCCESS, read.getLeft()); - assertTrue(read.getRight().isPresent()); - - // Wrong ETag should fail with precondition and be mapped to ACQUIRED_BY_OTHERS - ADLSStorageLockClient owner2 = new ADLSStorageLockClient("owner2", lockFileUri, props); - StorageLockFile wrongPrev = new StorageLockFile(lockData1, "\"etag-does-not-match\""); - StorageLockData lockData2 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner2"); - Pair> upsert2 = - owner2.tryUpsertLockFile(lockData2, Option.of(wrongPrev)); - assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, upsert2.getLeft()); - } -} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java deleted file mode 100644 index 4c9ea9b024999..0000000000000 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageBasedLockProvider.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hudi.azure.transaction.lock; - -import org.apache.hudi.client.transaction.lock.StorageBasedLockProvider; -import org.apache.hudi.client.transaction.lock.StorageBasedLockProviderTestBase; -import org.apache.hudi.common.config.LockConfiguration; -import org.apache.hudi.common.testutils.HoodieTestUtils; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; - -import static org.apache.hudi.common.config.HoodieCommonConfig.BASE_PATH; - -@Disabled("Requires Azurite/Testcontainers-based integration environment (not enabled by default).") -public class TestADLSStorageBasedLockProvider extends StorageBasedLockProviderTestBase { - - @BeforeEach - void setupLockProvider() { - providerProperties.put(BASE_PATH.key(), - "abfs://container@account.dfs.core.windows.net/lake/db/tbl-default"); - lockProvider = createLockProvider(); - } - - @Override - protected StorageBasedLockProvider createLockProvider() { - LockConfiguration lockConf = new LockConfiguration(providerProperties); - return new StorageBasedLockProvider(lockConf, HoodieTestUtils.getDefaultStorageConf()); - } -} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java deleted file mode 100644 index 3b04224602821..0000000000000 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClient.java +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hudi.azure.transaction.lock; - -import org.apache.hudi.client.transaction.lock.models.LockGetResult; -import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; -import org.apache.hudi.client.transaction.lock.models.StorageLockData; -import org.apache.hudi.client.transaction.lock.models.StorageLockFile; -import org.apache.hudi.common.util.Option; -import org.apache.hudi.common.util.collection.Pair; - -import com.azure.core.util.Context; -import com.azure.storage.blob.BlobClient; -import com.azure.storage.blob.BlobContainerClient; -import com.azure.storage.blob.BlobServiceClient; -import com.azure.storage.blob.models.BlobProperties; -import com.azure.storage.blob.models.BlobStorageException; -import com.azure.storage.blob.options.BlobParallelUploadOptions; -import com.azure.storage.blob.specialized.BlobInputStream; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.ArgumentCaptor; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.slf4j.Logger; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.util.Properties; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.contains; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.ArgumentMatchers.isNull; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoMoreInteractions; -import static org.mockito.Mockito.when; - -@ExtendWith(MockitoExtension.class) -public class TestADLSStorageLockClient { - - private static final String OWNER_ID = "ownerId"; - private static final String LOCK_FILE_URI = - "abfs://container@account.dfs.core.windows.net/lockFilePath"; - private static final String LOCK_FILE_URI_WITH_NESTED_PATH = - "abfs://container@account.dfs.core.windows.net/lake/db/tbl-default/.hoodie/.locks/table_lock.json"; - - @Mock - private BlobServiceClient mockBlobServiceClient; - - @Mock - private BlobContainerClient mockContainerClient; - - @Mock - private BlobClient mockBlobClient; - - @Mock - private BlobProperties mockBlobProperties; - - @Mock - private Logger mockLogger; - - private ADLSStorageLockClient lockClient; - - @BeforeEach - void setUp() { - setUp(LOCK_FILE_URI); - } - - private void setUp(String lockFileUri) { - when(mockBlobServiceClient.getBlobContainerClient(eq("container"))).thenReturn(mockContainerClient); - String expectedBlobPath = lockFileUri.replaceFirst("^abfss?://[^/]+/", ""); - when(mockContainerClient.getBlobClient(eq(expectedBlobPath))).thenReturn(mockBlobClient); - - lockClient = new ADLSStorageLockClient( - OWNER_ID, - lockFileUri, - new Properties(), - (location) -> mockBlobServiceClient, - mockLogger); - } - - @Test - void testTryUpsertLockFile_noPreviousLock_success_setsIfNoneMatchStar() throws Exception { - StorageLockData lockData = new StorageLockData(false, 123L, "test-owner"); - when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); - when(mockBlobProperties.getETag()).thenReturn("\"etag-1\""); - - ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(BlobParallelUploadOptions.class); - when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) - .thenReturn(null); - - Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); - - assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); - assertTrue(result.getRight().isPresent()); - assertEquals("\"etag-1\"", result.getRight().get().getVersionId()); - - verify(mockBlobClient).uploadWithResponse(optionsCaptor.capture(), isNull(), eq(Context.NONE)); - BlobParallelUploadOptions options = optionsCaptor.getValue(); - assertRequestCondition(options, "ifNoneMatch", "*"); - verifyNoMoreInteractions(mockLogger); - } - - @Test - void testTryUpsertLockFile_withPreviousLock_success_setsIfMatch() throws Exception { - StorageLockData lockData = new StorageLockData(false, 999L, "existing-owner"); - StorageLockFile previousLockFile = new StorageLockFile(lockData, "\"etag-prev\""); - - when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); - when(mockBlobProperties.getETag()).thenReturn("\"etag-new\""); - - ArgumentCaptor optionsCaptor = ArgumentCaptor.forClass(BlobParallelUploadOptions.class); - when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) - .thenReturn(null); - - Pair> result = - lockClient.tryUpsertLockFile(lockData, Option.of(previousLockFile)); - - assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); - assertTrue(result.getRight().isPresent()); - assertEquals("\"etag-new\"", result.getRight().get().getVersionId()); - - verify(mockBlobClient).uploadWithResponse(optionsCaptor.capture(), isNull(), eq(Context.NONE)); - BlobParallelUploadOptions options = optionsCaptor.getValue(); - assertRequestCondition(options, "ifMatch", "\"etag-prev\""); - } - - @Test - void testTryUpsertLockFile_preconditionFailed_returnsAcquiredByOthers() { - StorageLockData lockData = new StorageLockData(false, 999L, "owner"); - BlobStorageException ex = mock(BlobStorageException.class); - when(ex.getStatusCode()).thenReturn(412); - when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); - - Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); - - assertEquals(LockUpsertResult.ACQUIRED_BY_OTHERS, result.getLeft()); - assertTrue(result.getRight().isEmpty()); - verify(mockLogger).info( - contains("Unable to write new lock file. Another process has modified this lockfile"), - eq(OWNER_ID), - eq(LOCK_FILE_URI)); - } - - @Test - void testTryUpsertLockFile_rateLimit_returnsUnknownError() { - StorageLockData lockData = new StorageLockData(false, 999L, "owner"); - BlobStorageException ex = mock(BlobStorageException.class); - when(ex.getStatusCode()).thenReturn(429); - when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); - - Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); - - assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); - assertTrue(result.getRight().isEmpty()); - verify(mockLogger).warn(contains("Rate limit exceeded"), eq(OWNER_ID), eq(LOCK_FILE_URI)); - } - - @Test - void testTryUpsertLockFile_serverError_returnsUnknownError() { - StorageLockData lockData = new StorageLockData(false, 999L, "owner"); - BlobStorageException ex = mock(BlobStorageException.class); - when(ex.getStatusCode()).thenReturn(503); - when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); - - Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); - - assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); - assertTrue(result.getRight().isEmpty()); - verify(mockLogger).warn(contains("Azure returned internal server error code"), eq(OWNER_ID), eq(LOCK_FILE_URI), eq(ex)); - } - - @Test - void testTryUpsertLockFile_unexpectedError_rethrows() { - StorageLockData lockData = new StorageLockData(false, 999L, "owner"); - BlobStorageException ex = mock(BlobStorageException.class); - when(ex.getStatusCode()).thenReturn(400); - when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); - - assertThrows(BlobStorageException.class, () -> lockClient.tryUpsertLockFile(lockData, Option.empty())); - } - - @Test - void testReadCurrentLockFile_notFound_returnsNotExists() { - BlobStorageException ex = mock(BlobStorageException.class); - when(ex.getStatusCode()).thenReturn(404); - when(mockBlobClient.getProperties()).thenThrow(ex); - - Pair> result = lockClient.readCurrentLockFile(); - assertEquals(LockGetResult.NOT_EXISTS, result.getLeft()); - assertTrue(result.getRight().isEmpty()); - verify(mockLogger).info(contains("Object not found"), eq(OWNER_ID), eq(LOCK_FILE_URI)); - } - - @Test - void testReadCurrentLockFile_blobFound_success() { - setUp(LOCK_FILE_URI_WITH_NESTED_PATH); - - StorageLockData data = new StorageLockData(false, 1700000000000L, "testOwner"); - byte[] json = StorageLockFile.toByteArray(data); - ByteArrayInputStream delegate = new ByteArrayInputStream(json); - BlobInputStream stream = mock(BlobInputStream.class); - try { - doAnswer(inv -> delegate.read(inv.getArgument(0), inv.getArgument(1), inv.getArgument(2))) - .when(stream).read(any(byte[].class), anyInt(), anyInt()); - } catch (IOException ioe) { - throw new RuntimeException("Failed to stub BlobInputStream", ioe); - } - - when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); - when(mockBlobProperties.getETag()).thenReturn("\"etag-123\""); - when(mockBlobClient.openInputStream()).thenReturn(stream); - - Pair> result = lockClient.readCurrentLockFile(); - - assertEquals(LockGetResult.SUCCESS, result.getLeft()); - assertTrue(result.getRight().isPresent()); - assertEquals("\"etag-123\"", result.getRight().get().getVersionId()); - assertEquals("testOwner", result.getRight().get().getOwner()); - } - - @Test - void testReadCurrentLockFile_streamRead404_returnsUnknownError() { - when(mockBlobClient.getProperties()).thenReturn(mockBlobProperties); - when(mockBlobProperties.getETag()).thenReturn("\"etag-123\""); - - BlobStorageException ex404 = mock(BlobStorageException.class); - when(ex404.getStatusCode()).thenReturn(404); - when(mockBlobClient.openInputStream()).thenThrow(ex404); - - Pair> result = lockClient.readCurrentLockFile(); - assertEquals(LockGetResult.UNKNOWN_ERROR, result.getLeft()); - assertTrue(result.getRight().isEmpty()); - } - - @Test - void testClose_noop() { - lockClient.close(); - } - - private static void assertRequestCondition(Object blobParallelUploadOptions, String expectedField, String expectedValue) throws Exception { - Object requestConditions = tryInvoke(blobParallelUploadOptions, "getRequestConditions"); - if (requestConditions == null) { - Field f = blobParallelUploadOptions.getClass().getDeclaredField("requestConditions"); - f.setAccessible(true); - requestConditions = f.get(blobParallelUploadOptions); - } - assertNotNull(requestConditions, "requestConditions should be set on upload options"); - - Object actualIfMatch = tryInvoke(requestConditions, "getIfMatch"); - if (actualIfMatch == null) { - actualIfMatch = getFieldIfExists(requestConditions, "ifMatch"); - } - Object actualIfNoneMatch = tryInvoke(requestConditions, "getIfNoneMatch"); - if (actualIfNoneMatch == null) { - actualIfNoneMatch = getFieldIfExists(requestConditions, "ifNoneMatch"); - } - - if ("ifMatch".equals(expectedField)) { - assertEquals(expectedValue, actualIfMatch, "Expected If-Match to be set"); - } else if ("ifNoneMatch".equals(expectedField)) { - assertEquals(expectedValue, actualIfNoneMatch, "Expected If-None-Match to be set"); - } else { - throw new IllegalArgumentException("Unexpected expectedField: " + expectedField); - } - } - - private static Object tryInvoke(Object target, String methodName) { - try { - Method m = target.getClass().getMethod(methodName); - return m.invoke(target); - } catch (Exception ignored) { - return null; - } - } - - private static Object getFieldIfExists(Object target, String fieldName) { - try { - Field f = target.getClass().getDeclaredField(fieldName); - f.setAccessible(true); - return f.get(target); - } catch (Exception ignored) { - return null; - } - } -} diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java deleted file mode 100644 index 9504b3cc6d8b5..0000000000000 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestADLSStorageLockClientUriParsing.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hudi.azure.transaction.lock; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class TestADLSStorageLockClientUriParsing { - - @Test - public void testParseAbfsUri() { - String uri = "abfs://container@account.dfs.core.windows.net/table/.hoodie/.locks/table_lock.json"; - - ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); - - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); - } - - @Test - public void testParseAbfssUri() { - String uri = "abfss://container@account.dfs.core.windows.net/lake/db/tbl/.hoodie/.locks/table_lock.json"; - - ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); - - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); - } - - @Test - public void testParseWasbUri() { - String uri = "wasb://container@account.blob.core.windows.net/table/.hoodie/.locks/table_lock.json"; - - ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); - - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); - } - - @Test - public void testParseWasbsUri() { - String uri = "wasbs://container@account.blob.core.windows.net/lake/db/tbl/.hoodie/.locks/table_lock.json"; - - ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); - - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); - } - - @Test - public void testParseHttpsUri() { - String uri = "https://account.blob.core.windows.net/container/table/.hoodie/.locks/table_lock.json"; - - ADLSStorageLockClient.AzureLocation l = ADLSStorageLockClient.parseAzureLocation(uri); - - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); - } -} From 637a857b02bfb874dcc5eb5bfec909f94d271326 Mon Sep 17 00:00:00 2001 From: Revanth Chandupatla Date: Mon, 30 Mar 2026 15:39:00 -0400 Subject: [PATCH 09/11] Add review comments. explicit exceptions and address incosistent quotes if present in etag --- .../lock/AzureStorageLockClient.java | 46 +++++++++--- .../lock/ITAzureStorageLockClientAzurite.java | 5 +- .../lock/TestAzureStorageLockClient.java | 70 +++++++++++++++++-- .../TestAzureStorageLockClientUriParsing.java | 36 +++++----- 4 files changed, 124 insertions(+), 33 deletions(-) diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java index beb3973ad6a0d..88a5399624745 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java @@ -53,6 +53,7 @@ import com.azure.storage.blob.models.BlockBlobItem; import com.azure.storage.blob.options.BlobParallelUploadOptions; import lombok.AllArgsConstructor; +import lombok.Getter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -67,6 +68,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hudi.common.util.ConfigUtils.getLongWithAltKeys; +import static org.apache.hudi.common.util.ConfigUtils.getStringWithAltKeys; /** * Azure Storage implementation of {@link StorageLockClient} using Azure Blob conditional requests. @@ -113,7 +115,6 @@ public class AzureStorageLockClient implements StorageLockClient { private static final int RATE_LIMIT_ERROR_CODE = 429; private static final int INTERNAL_SERVER_ERROR_CODE_MIN = 500; - public static final String AZURE_CONNECTION_STRING = AzureStorageLockConfig.AZURE_CONNECTION_STRING.key(); public static final String AZURE_SAS_TOKEN = AzureStorageLockConfig.AZURE_SAS_TOKEN.key(); private final Logger logger; @@ -165,7 +166,7 @@ private static Functions.Function1 createDefau configureAzureClientOptions(builder, props); // 1. Connection string (includes shared-key auth). - String connectionString = props == null ? null : props.getProperty(AZURE_CONNECTION_STRING); + String connectionString = getStringWithAltKeys(props, AzureStorageLockConfig.AZURE_CONNECTION_STRING, true); if (connectionString != null && !connectionString.trim().isEmpty()) { return builder.connectionString(connectionString).buildClient(); } @@ -173,7 +174,7 @@ private static Functions.Function1 createDefau builder.endpoint(location.blobEndpoint); // 2. SAS token. - String sasToken = props == null ? null : props.getProperty(AZURE_SAS_TOKEN); + String sasToken = getStringWithAltKeys(props, AzureStorageLockConfig.AZURE_SAS_TOKEN, true); if (sasToken != null && !sasToken.trim().isEmpty()) { String cleaned = sasToken.startsWith("?") ? sasToken.substring(1) : sasToken; return builder.credential(new AzureSasCredential(cleaned)).buildClient(); @@ -213,7 +214,9 @@ private static void configureAzureClientOptions(BlobServiceClientBuilder builder public Pair> tryUpsertLockFile( StorageLockData newLockData, Option previousLockFile) { - String expectedEtag = previousLockFile.isPresent() ? previousLockFile.get().getVersionId() : null; + String expectedEtag = previousLockFile.isPresent() + ? previousLockFile.get().getVersionId() + : null; try { StorageLockFile updated = createOrUpdateLockFileInternal(newLockData, expectedEtag); return Pair.of(LockUpsertResult.SUCCESS, Option.of(updated)); @@ -237,7 +240,10 @@ public Pair> tryUpsertLockFile( public Pair> readCurrentLockFile() { try { Response response = lockBlobClient.downloadContentWithResponse(null, null, null, Context.NONE); - String eTag = response.getHeaders() != null ? response.getHeaders().getValue("ETag") : null; + //Check for null or empty ETag and inconsistent quotes + String eTag = canonicalizeEtag( + response.getHeaders() != null ? response.getHeaders().getValue("ETag") : null, + "download"); StorageLockFile lockFile = StorageLockFile.createFromStream(response.getValue().toStream(), eTag); return Pair.of(LockGetResult.SUCCESS, Option.of(lockFile)); } catch (BlobStorageException e) { @@ -276,12 +282,33 @@ private StorageLockFile createOrUpdateLockFileInternal(StorageLockData lockData, if (newEtag == null && response.getValue() != null) { newEtag = response.getValue().getETag(); } - if (newEtag == null || newEtag.isEmpty()) { - throw new HoodieLockException("Missing ETag in Azure upload response for lock file: " + lockFileUri); - } + //Check for null or empty ETag and inconsistent quotes + newEtag = canonicalizeEtag(newEtag, "upload"); return new StorageLockFile(lockData, newEtag); } + private String canonicalizeEtag(String eTag, String operation) { + if (eTag == null) { + throw new HoodieLockException("Missing ETag in Azure " + operation + " response for lock file: " + lockFileUri); + } + + String normalized = eTag.trim(); + if (normalized.isEmpty()) { + throw new HoodieLockException("Missing ETag in Azure " + operation + " response for lock file: " + lockFileUri); + } + + boolean startsWithQuote = normalized.startsWith("\""); + boolean endsWithQuote = normalized.endsWith("\""); + if (startsWithQuote && endsWithQuote) { + return normalized; + } + if (!startsWithQuote && !endsWithQuote) { + return "\"" + normalized + "\""; + } + + throw new HoodieLockException("Malformed ETag in Azure " + operation + " response for lock file: " + lockFileUri); + } + private LockUpsertResult handleUpsertBlobStorageException(BlobStorageException e) { int code = e.getStatusCode(); if (code == PRECONDITION_FAILURE_ERROR_CODE || e.getErrorCode() == BlobErrorCode.CONDITION_NOT_MET) { @@ -425,8 +452,9 @@ private static String dfsHostToBlobHost(String host) { } return host; } - + @VisibleForTesting + @Getter @AllArgsConstructor static final class AzureLocation { final String blobEndpoint; diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java index 8a27e2f6bf7e4..a7897a6ac3d7d 100644 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/ITAzureStorageLockClientAzurite.java @@ -25,6 +25,7 @@ import org.apache.hudi.client.transaction.lock.models.StorageLockFile; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.collection.Pair; +import org.apache.hudi.config.AzureStorageLockConfig; import com.azure.storage.blob.BlobContainerClient; import com.azure.storage.blob.BlobServiceClient; @@ -98,9 +99,9 @@ void testCreateUpdateAndReadLockFileWithAzurite() { // NOTE: lockFileUri only needs to be parseable for container/blobPath extraction. // The actual endpoint comes from the connection string. - String lockFileUri = "https://localhost:10000/" + container + "/" + blobPath; + String lockFileUri = "https://localhost:10000/" + ACCOUNT_NAME + "/" + container + "/" + blobPath; Properties props = new Properties(); - props.setProperty(AzureStorageLockClient.AZURE_CONNECTION_STRING, connectionString()); + props.setProperty(AzureStorageLockConfig.AZURE_CONNECTION_STRING.key(), connectionString()); AzureStorageLockClient owner1 = new AzureStorageLockClient("owner1", lockFileUri, props); StorageLockData lockData1 = new StorageLockData(false, System.currentTimeMillis() + 60_000, "owner1"); diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java index 6a185809f3b7d..801d20b91872b 100644 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClient.java @@ -20,6 +20,7 @@ package org.apache.hudi.azure.transaction.lock; import org.apache.hudi.client.transaction.lock.models.LockGetResult; +import org.apache.hudi.exception.HoodieLockException; import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; import org.apache.hudi.client.transaction.lock.models.StorageLockData; import org.apache.hudi.client.transaction.lock.models.StorageLockFile; @@ -152,6 +153,25 @@ void testTryUpsertLockFile_withPreviousLock_success_setsIfMatch() throws Excepti assertRequestCondition(options, "ifMatch", "\"etag-prev\""); } + @Test + void testTryUpsertLockFile_fallsBackToBlockBlobItemEtag() { + StorageLockData lockData = new StorageLockData(false, 123L, "test-owner"); + @SuppressWarnings("unchecked") + Response response = (Response) mock(Response.class); + BlockBlobItem blockBlobItem = mock(BlockBlobItem.class); + when(response.getHeaders()).thenReturn(null); + when(response.getValue()).thenReturn(blockBlobItem); + when(blockBlobItem.getETag()).thenReturn("0x8DABC123"); + when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))) + .thenReturn(response); + + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.SUCCESS, result.getLeft()); + assertTrue(result.getRight().isPresent()); + assertEquals("\"0x8DABC123\"", result.getRight().get().getVersionId()); + } + @Test void testTryUpsertLockFile_preconditionFailed_returnsAcquiredByOthers() { StorageLockData lockData = new StorageLockData(false, 999L, "owner"); @@ -198,13 +218,16 @@ void testTryUpsertLockFile_serverError_returnsUnknownError() { } @Test - void testTryUpsertLockFile_unexpectedError_rethrows() { + void testTryUpsertLockFile_unexpectedError_returnsUnknownError() { StorageLockData lockData = new StorageLockData(false, 999L, "owner"); BlobStorageException ex = mock(BlobStorageException.class); when(ex.getStatusCode()).thenReturn(400); when(mockBlobClient.uploadWithResponse(any(BlobParallelUploadOptions.class), isNull(), eq(Context.NONE))).thenThrow(ex); - assertThrows(BlobStorageException.class, () -> lockClient.tryUpsertLockFile(lockData, Option.empty())); + Pair> result = lockClient.tryUpsertLockFile(lockData, Option.empty()); + + assertEquals(LockUpsertResult.UNKNOWN_ERROR, result.getLeft()); + assertTrue(result.getRight().isEmpty()); } @Test @@ -238,6 +261,46 @@ void testReadCurrentLockFile_blobFound_success() { assertEquals("testOwner", result.getRight().get().getOwner()); } + @Test + void testReadCurrentLockFile_missingEtag_throwsHoodieLockException() { + StorageLockData data = new StorageLockData(false, 1700000000000L, "testOwner"); + byte[] json = StorageLockFile.toByteArray(data); + BlobDownloadContentResponse response = mock(BlobDownloadContentResponse.class); + when(response.getHeaders()).thenReturn(null); + when(mockBlobClient.downloadContentWithResponse(isNull(), isNull(), isNull(), eq(Context.NONE))).thenReturn(response); + + HoodieLockException exception = + assertThrows(HoodieLockException.class, () -> lockClient.readCurrentLockFile()); + + assertTrue(exception.getMessage().contains("Missing ETag in Azure download response for lock file")); + } + + @Test + void testReadCurrentLockFile_emptyEtag_throwsHoodieLockException() { + StorageLockData data = new StorageLockData(false, 1700000000000L, "testOwner"); + byte[] json = StorageLockFile.toByteArray(data); + BlobDownloadContentResponse response = mock(BlobDownloadContentResponse.class); + when(response.getHeaders()).thenReturn(new HttpHeaders().set("ETag", "")); + when(mockBlobClient.downloadContentWithResponse(isNull(), isNull(), isNull(), eq(Context.NONE))).thenReturn(response); + + HoodieLockException exception = + assertThrows(HoodieLockException.class, () -> lockClient.readCurrentLockFile()); + + assertTrue(exception.getMessage().contains("Missing ETag in Azure download response for lock file")); + } + + @Test + void testReadCurrentLockFile_malformedQuotedEtag_throwsHoodieLockException() { + BlobDownloadContentResponse response = mock(BlobDownloadContentResponse.class); + when(response.getHeaders()).thenReturn(new HttpHeaders().set("ETag", "\"etag-123")); + when(mockBlobClient.downloadContentWithResponse(isNull(), isNull(), isNull(), eq(Context.NONE))).thenReturn(response); + + HoodieLockException exception = + assertThrows(HoodieLockException.class, () -> lockClient.readCurrentLockFile()); + + assertTrue(exception.getMessage().contains("Malformed ETag in Azure download response for lock file")); + } + @Test void testReadCurrentLockFile_download404_returnsNotExists() { BlobStorageException ex404 = mock(BlobStorageException.class); @@ -261,7 +324,6 @@ void testReadObject_reusesSecondaryBlobServiceClientForSameEndpoint() { BlobClient primaryBlobClient = mock(BlobClient.class); when(primaryServiceClient.getBlobContainerClient(any(String.class))).thenReturn(primaryContainerClient); when(primaryContainerClient.getBlobClient(any(String.class))).thenReturn(primaryBlobClient); - when(primaryBlobClient.exists()).thenReturn(false); BlobServiceClient secondaryServiceClient = mock(BlobServiceClient.class); BlobContainerClient secondaryContainerClient = mock(BlobContainerClient.class); @@ -276,7 +338,7 @@ void testReadObject_reusesSecondaryBlobServiceClientForSameEndpoint() { LOCK_FILE_URI, new Properties(), location -> { - if ("https://secondary.blob.core.windows.net".equals(location.blobEndpoint)) { + if ("https://secondary.blob.core.windows.net".equals(location.getBlobEndpoint())) { secondarySupplierInvocations.incrementAndGet(); return secondaryServiceClient; } diff --git a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java index b87e9fea2477f..ba1cde8047f71 100644 --- a/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java +++ b/hudi-azure/src/test/java/org/apache/hudi/azure/transaction/lock/TestAzureStorageLockClientUriParsing.java @@ -30,9 +30,9 @@ public void testParseAbfsUri() { AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + assertEquals("https://account.blob.core.windows.net", l.getBlobEndpoint()); + assertEquals("container", l.getContainer()); + assertEquals("table/.hoodie/.locks/table_lock.json", l.getBlobPath()); } @Test @@ -41,9 +41,9 @@ public void testParseAbfssUri() { AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); + assertEquals("https://account.blob.core.windows.net", l.getBlobEndpoint()); + assertEquals("container", l.getContainer()); + assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.getBlobPath()); } @Test @@ -52,9 +52,9 @@ public void testParseWasbUri() { AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + assertEquals("https://account.blob.core.windows.net", l.getBlobEndpoint()); + assertEquals("container", l.getContainer()); + assertEquals("table/.hoodie/.locks/table_lock.json", l.getBlobPath()); } @Test @@ -63,9 +63,9 @@ public void testParseWasbsUri() { AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.blobPath); + assertEquals("https://account.blob.core.windows.net", l.getBlobEndpoint()); + assertEquals("container", l.getContainer()); + assertEquals("lake/db/tbl/.hoodie/.locks/table_lock.json", l.getBlobPath()); } @Test @@ -74,9 +74,9 @@ public void testParseHttpsUri() { AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); - assertEquals("https://account.blob.core.windows.net", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + assertEquals("https://account.blob.core.windows.net", l.getBlobEndpoint()); + assertEquals("container", l.getContainer()); + assertEquals("table/.hoodie/.locks/table_lock.json", l.getBlobPath()); } @Test @@ -85,8 +85,8 @@ public void testParseHttpUri() { AzureStorageLockClient.AzureLocation l = AzureStorageLockClient.parseAzureLocation(uri); - assertEquals("http://127.0.0.1:10000", l.blobEndpoint); - assertEquals("container", l.container); - assertEquals("table/.hoodie/.locks/table_lock.json", l.blobPath); + assertEquals("http://127.0.0.1:10000", l.getBlobEndpoint()); + assertEquals("table", l.getContainer()); + assertEquals(".hoodie/.locks/table_lock.json", l.getBlobPath()); } } From 2ef504fe85cbb9a51e9f4f921843e7d37ec6a3f2 Mon Sep 17 00:00:00 2001 From: chrevanthreddy <27821245+chrevanthreddy@users.noreply.github.com> Date: Wed, 15 Apr 2026 10:18:36 -0400 Subject: [PATCH 10/11] Revert parquet version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d7ccd29ad3024..bc63c732f74ef 100644 --- a/pom.xml +++ b/pom.xml @@ -114,7 +114,7 @@ 5.5.0 2.17 3.0.1-b12 - 1.13.1 + 1.10.1 5.14.1 5.14.1 1.14.1 From 2992185d3b874b02c9c4ec77e6f96c8ee83770d3 Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 17 Apr 2026 15:49:29 -0700 Subject: [PATCH 11/11] Remove unused variable --- .../azure/transaction/lock/AzureStorageLockClient.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java index 88a5399624745..09cedf6b1faaa 100644 --- a/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java +++ b/hudi-azure/src/main/java/org/apache/hudi/azure/transaction/lock/AzureStorageLockClient.java @@ -18,21 +18,20 @@ package org.apache.hudi.azure.transaction.lock; +import org.apache.hudi.azure.credentials.AzureCredentialFactory; import org.apache.hudi.client.transaction.lock.StorageLockClient; import org.apache.hudi.client.transaction.lock.models.LockGetResult; import org.apache.hudi.client.transaction.lock.models.LockUpsertResult; import org.apache.hudi.client.transaction.lock.models.StorageLockData; import org.apache.hudi.client.transaction.lock.models.StorageLockFile; +import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.util.Functions; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.VisibleForTesting; import org.apache.hudi.common.util.collection.Pair; -import org.apache.hudi.exception.HoodieLockException; import org.apache.hudi.config.AzureStorageLockConfig; import org.apache.hudi.config.StorageBasedLockConfig; -import org.apache.hudi.common.config.TypedProperties; - -import org.apache.hudi.azure.credentials.AzureCredentialFactory; +import org.apache.hudi.exception.HoodieLockException; import com.azure.core.credential.AzureSasCredential; import com.azure.core.exception.HttpResponseException; @@ -115,8 +114,6 @@ public class AzureStorageLockClient implements StorageLockClient { private static final int RATE_LIMIT_ERROR_CODE = 429; private static final int INTERNAL_SERVER_ERROR_CODE_MIN = 500; - public static final String AZURE_SAS_TOKEN = AzureStorageLockConfig.AZURE_SAS_TOKEN.key(); - private final Logger logger; private final BlobServiceClient blobServiceClient; private final Functions.Function1 blobServiceClientSupplier;