From df10424277eb3334f9bac32b89b9454a2883c5fe Mon Sep 17 00:00:00 2001 From: Marvin Froeder Date: Tue, 26 May 2026 06:04:08 -0300 Subject: [PATCH] feat: Load DuckDB aws extension and S3 credential-chain secret for IRSA Signed-off-by: Marvin Froeder --- Dockerfile.duckdb-extensions | 2 +- .../docs/configuration-engine/duckdb.md | 2 + .../resources/jsonSchema/packageSchema.json | 3 + .../datasqrl/graphql/config/JdbcConfig.java | 3 + .../com/datasqrl/util/DuckDbExtensions.java | 22 ++++- .../datasqrl/util/DuckDbExtensionsTest.java | 80 +++++++++++++++++++ 6 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 sqrl-server/sqrl-server-vertx-base/src/test/java/com/datasqrl/util/DuckDbExtensionsTest.java diff --git a/Dockerfile.duckdb-extensions b/Dockerfile.duckdb-extensions index 5c1e87cf8..cf8493b34 100644 --- a/Dockerfile.duckdb-extensions +++ b/Dockerfile.duckdb-extensions @@ -11,7 +11,7 @@ RUN set -eux; \ duckdb_cli_version="$(printf '%s' "${duckdb_jdbc_version}" | cut -d. -f1-3)"; \ export DUCKDB_VERSION="${duckdb_cli_version}"; \ curl https://install.duckdb.org | sh; \ - /root/.duckdb/cli/${duckdb_cli_version}/duckdb -c "INSTALL iceberg; INSTALL httpfs; INSTALL cache_httpfs FROM community;"; \ + /root/.duckdb/cli/${duckdb_cli_version}/duckdb -c "INSTALL iceberg; INSTALL httpfs; INSTALL aws; INSTALL cache_httpfs FROM community;"; \ mkdir -p "${DUCKDB_EXTENSIONS_DIR}"; \ cp -a /root/.duckdb/extensions/. "${DUCKDB_EXTENSIONS_DIR}/"; \ rm -rf /root/.duckdb diff --git a/documentation/docs/configuration-engine/duckdb.md b/documentation/docs/configuration-engine/duckdb.md index 8b23173f3..71999b099 100644 --- a/documentation/docs/configuration-engine/duckdb.md +++ b/documentation/docs/configuration-engine/duckdb.md @@ -9,6 +9,7 @@ DuckDB is a vectorized database query engine that excels at analytical queries a | `url` | **string** | `"jdbc:duckdb:"` | Full JDBC URL for the database connection | | `use-disk-cache` | **boolean** | `false` | Install and load `cache_httpfs` extension | | `use-version-guessing` | **boolean** | `false` | Sets `unsafe_enable_version_guessing` flag to be able to read uncommitted data | +| `use-credential-chain` | **boolean** | `false` | Load the `aws` extension and create an S3 secret backed by the AWS credential provider chain | ## Example Configuration @@ -29,6 +30,7 @@ DuckDB is a vectorized database query engine that excels at analytical queries a - Ideal for local development and testing of analytical workloads - Excellent performance on analytical queries with vectorized execution - Can read Iceberg tables directly without additional infrastructure +- Enable `use-credential-chain` when reading S3-backed Iceberg from an environment that supplies credentials through the AWS provider chain rather than static keys — e.g. EKS IRSA, where the pod only has a projected web-identity token. Plain `httpfs` does not perform the web-identity exchange, so without this flag S3 reads fail with `HTTP 403`. - Supports both in-memory and persistent database modes - Perfect for prototyping before deploying to cloud query engines like Snowflake - Lightweight alternative to larger analytical databases diff --git a/sqrl-planner/src/main/resources/jsonSchema/packageSchema.json b/sqrl-planner/src/main/resources/jsonSchema/packageSchema.json index 79b084d1f..c5e873780 100644 --- a/sqrl-planner/src/main/resources/jsonSchema/packageSchema.json +++ b/sqrl-planner/src/main/resources/jsonSchema/packageSchema.json @@ -177,6 +177,9 @@ }, "use-version-guessing": { "type": "boolean" + }, + "use-credential-chain": { + "type": "boolean" } }, "additionalProperties": false, diff --git a/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/graphql/config/JdbcConfig.java b/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/graphql/config/JdbcConfig.java index 89a613184..7e857dff1 100644 --- a/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/graphql/config/JdbcConfig.java +++ b/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/graphql/config/JdbcConfig.java @@ -45,5 +45,8 @@ public static class DuckDbConfig extends JdbcConfig { @JsonProperty("use-version-guessing") private boolean useVersionGuessing; + + @JsonProperty("use-credential-chain") + private boolean useCredentialChain; } } diff --git a/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/util/DuckDbExtensions.java b/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/util/DuckDbExtensions.java index baf379732..ac3ec9123 100644 --- a/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/util/DuckDbExtensions.java +++ b/sqrl-server/sqrl-server-vertx-base/src/main/java/com/datasqrl/util/DuckDbExtensions.java @@ -18,6 +18,7 @@ import static com.datasqrl.env.EnvVariableNames.DUCKDB_EXTENSIONS_DIR; import com.datasqrl.graphql.config.JdbcConfig; +import com.google.common.annotations.VisibleForTesting; import java.util.Optional; import java.util.StringJoiner; import lombok.RequiredArgsConstructor; @@ -27,8 +28,6 @@ @Slf4j public final class DuckDbExtensions { - private final StringJoiner joiner = new StringJoiner(";", "", ";"); - private final JdbcConfig.DuckDbConfig config; public Optional buildInitSql() { @@ -39,10 +38,27 @@ public Optional buildInitSql() { return Optional.empty(); } + return Optional.of(buildInitSql(extensionDir)); + } + + @VisibleForTesting + String buildInitSql(String extensionDir) { + var joiner = new StringJoiner(";", "", ";"); + joiner.add("SET extension_directory='" + extensionDir + "'"); joiner.add("LOAD iceberg"); joiner.add("LOAD httpfs"); + if (config.isUseCredentialChain()) { + // Let DuckDB use the AWS SDK default credential provider chain, which includes the + // web-identity provider that backs EKS IRSA. The default chain is required here: an explicit + // CHAIN containing 'sts' is rejected unless an ASSUME_ROLE_ARN is also supplied, since DuckDB + // maps 'sts' to AssumeRole, not to the web-identity token flow. + joiner.add("LOAD aws"); + joiner.add( + "CREATE OR REPLACE SECRET sqrl_s3_credential_chain (TYPE S3, PROVIDER credential_chain)"); + } + if (config.isUseDiskCache()) { joiner.add("LOAD cache_httpfs"); } @@ -51,6 +67,6 @@ public Optional buildInitSql() { joiner.add("SET unsafe_enable_version_guessing = true"); } - return Optional.of(joiner.toString()); + return joiner.toString(); } } diff --git a/sqrl-server/sqrl-server-vertx-base/src/test/java/com/datasqrl/util/DuckDbExtensionsTest.java b/sqrl-server/sqrl-server-vertx-base/src/test/java/com/datasqrl/util/DuckDbExtensionsTest.java new file mode 100644 index 000000000..9bb99fdc7 --- /dev/null +++ b/sqrl-server/sqrl-server-vertx-base/src/test/java/com/datasqrl/util/DuckDbExtensionsTest.java @@ -0,0 +1,80 @@ +/* + * Copyright © 2021 DataSQRL (contact@datasqrl.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datasqrl.util; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.datasqrl.graphql.config.JdbcConfig; +import org.junit.jupiter.api.Test; + +class DuckDbExtensionsTest { + + private static final String EXTENSION_DIR = "/opt/duckdb_extensions"; + + @Test + void givenDefaultConfig_whenBuildInitSql_thenLoadsCoreExtensionsOnly() { + var sql = new DuckDbExtensions(new JdbcConfig.DuckDbConfig()).buildInitSql(EXTENSION_DIR); + + assertThat(sql) + .isEqualTo("SET extension_directory='" + EXTENSION_DIR + "';LOAD iceberg;LOAD httpfs;"); + assertThat(sql).doesNotContain("LOAD aws").doesNotContain("CREATE OR REPLACE SECRET"); + } + + @Test + void givenCredentialChainEnabled_whenBuildInitSql_thenLoadsAwsAndCreatesS3Secret() { + var config = new JdbcConfig.DuckDbConfig(); + config.setUseCredentialChain(true); + + var sql = new DuckDbExtensions(config).buildInitSql(EXTENSION_DIR); + + assertThat(sql) + .contains("LOAD aws;") + .contains( + "CREATE OR REPLACE SECRET sqrl_s3_credential_chain (TYPE S3, PROVIDER credential_chain);"); + assertThat(sql) + .as( + "must use the default chain; an explicit CHAIN with 'sts' is rejected without ASSUME_ROLE_ARN") + .doesNotContain("CHAIN '"); + } + + @Test + void givenCredentialChainEnabled_whenBuildInitSql_thenLoadsAwsAfterHttpfsAndBeforeSecret() { + var config = new JdbcConfig.DuckDbConfig(); + config.setUseCredentialChain(true); + + var sql = new DuckDbExtensions(config).buildInitSql(EXTENSION_DIR); + + assertThat(sql.indexOf("LOAD httpfs")) + .isLessThan(sql.indexOf("LOAD aws")) + .isGreaterThanOrEqualTo(0); + assertThat(sql.indexOf("LOAD aws")).isLessThan(sql.indexOf("CREATE OR REPLACE SECRET")); + } + + @Test + void givenAllFlagsEnabled_whenBuildInitSql_thenAppliesEveryStatement() { + var config = new JdbcConfig.DuckDbConfig(); + config.setUseCredentialChain(true); + config.setUseDiskCache(true); + config.setUseVersionGuessing(true); + + var sql = new DuckDbExtensions(config).buildInitSql(EXTENSION_DIR); + + assertThat(sql) + .contains("LOAD aws;") + .contains("LOAD cache_httpfs;") + .contains("SET unsafe_enable_version_guessing = true;"); + } +}