Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.duckdb-extensions
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ RUN set -eux; \
duckdb_cli_version="$(printf '%s' "${duckdb_jdbc_version}" | cut -d. -f1-3)"; \
export DUCKDB_VERSION="${duckdb_cli_version}"; \
curl https://install.duckdb.org | sh; \
/root/.duckdb/cli/${duckdb_cli_version}/duckdb -c "INSTALL iceberg; INSTALL httpfs; INSTALL cache_httpfs FROM community;"; \
/root/.duckdb/cli/${duckdb_cli_version}/duckdb -c "INSTALL iceberg; INSTALL httpfs; INSTALL aws; INSTALL cache_httpfs FROM community;"; \
mkdir -p "${DUCKDB_EXTENSIONS_DIR}"; \
cp -a /root/.duckdb/extensions/. "${DUCKDB_EXTENSIONS_DIR}/"; \
rm -rf /root/.duckdb
2 changes: 2 additions & 0 deletions documentation/docs/configuration-engine/duckdb.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ DuckDB is a vectorized database query engine that excels at analytical queries a
| `url` | **string** | `"jdbc:duckdb:"` | Full JDBC URL for the database connection |
| `use-disk-cache` | **boolean** | `false` | Install and load `cache_httpfs` extension |
| `use-version-guessing` | **boolean** | `false` | Sets `unsafe_enable_version_guessing` flag to be able to read uncommitted data |
| `use-credential-chain` | **boolean** | `false` | Load the `aws` extension and create an S3 secret backed by the AWS credential provider chain |

## Example Configuration

Expand All @@ -29,6 +30,7 @@ DuckDB is a vectorized database query engine that excels at analytical queries a
- Ideal for local development and testing of analytical workloads
- Excellent performance on analytical queries with vectorized execution
- Can read Iceberg tables directly without additional infrastructure
- Enable `use-credential-chain` when reading S3-backed Iceberg from an environment that supplies credentials through the AWS provider chain rather than static keys — e.g. EKS IRSA, where the pod only has a projected web-identity token. Plain `httpfs` does not perform the web-identity exchange, so without this flag S3 reads fail with `HTTP 403`.
- Supports both in-memory and persistent database modes
- Perfect for prototyping before deploying to cloud query engines like Snowflake
- Lightweight alternative to larger analytical databases
3 changes: 3 additions & 0 deletions sqrl-planner/src/main/resources/jsonSchema/packageSchema.json
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@
},
"use-version-guessing": {
"type": "boolean"
},
"use-credential-chain": {
"type": "boolean"
}
},
"additionalProperties": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,8 @@ public static class DuckDbConfig extends JdbcConfig {

@JsonProperty("use-version-guessing")
private boolean useVersionGuessing;

@JsonProperty("use-credential-chain")
private boolean useCredentialChain;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import static com.datasqrl.env.EnvVariableNames.DUCKDB_EXTENSIONS_DIR;

import com.datasqrl.graphql.config.JdbcConfig;
import com.google.common.annotations.VisibleForTesting;
import java.util.Optional;
import java.util.StringJoiner;
import lombok.RequiredArgsConstructor;
Expand All @@ -27,8 +28,6 @@
@Slf4j
public final class DuckDbExtensions {

private final StringJoiner joiner = new StringJoiner(";", "", ";");

private final JdbcConfig.DuckDbConfig config;

public Optional<String> buildInitSql() {
Expand All @@ -39,10 +38,27 @@ public Optional<String> buildInitSql() {
return Optional.empty();
}

return Optional.of(buildInitSql(extensionDir));
}

@VisibleForTesting
String buildInitSql(String extensionDir) {
var joiner = new StringJoiner(";", "", ";");

joiner.add("SET extension_directory='" + extensionDir + "'");
joiner.add("LOAD iceberg");
joiner.add("LOAD httpfs");

if (config.isUseCredentialChain()) {
// Let DuckDB use the AWS SDK default credential provider chain, which includes the
// web-identity provider that backs EKS IRSA. The default chain is required here: an explicit
// CHAIN containing 'sts' is rejected unless an ASSUME_ROLE_ARN is also supplied, since DuckDB
// maps 'sts' to AssumeRole, not to the web-identity token flow.
joiner.add("LOAD aws");
joiner.add(
"CREATE OR REPLACE SECRET sqrl_s3_credential_chain (TYPE S3, PROVIDER credential_chain)");
}

if (config.isUseDiskCache()) {
joiner.add("LOAD cache_httpfs");
}
Expand All @@ -51,6 +67,6 @@ public Optional<String> buildInitSql() {
joiner.add("SET unsafe_enable_version_guessing = true");
}

return Optional.of(joiner.toString());
return joiner.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright © 2021 DataSQRL (contact@datasqrl.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.datasqrl.util;

import static org.assertj.core.api.Assertions.assertThat;

import com.datasqrl.graphql.config.JdbcConfig;
import org.junit.jupiter.api.Test;

class DuckDbExtensionsTest {

private static final String EXTENSION_DIR = "/opt/duckdb_extensions";

@Test
void givenDefaultConfig_whenBuildInitSql_thenLoadsCoreExtensionsOnly() {
var sql = new DuckDbExtensions(new JdbcConfig.DuckDbConfig()).buildInitSql(EXTENSION_DIR);

assertThat(sql)
.isEqualTo("SET extension_directory='" + EXTENSION_DIR + "';LOAD iceberg;LOAD httpfs;");
assertThat(sql).doesNotContain("LOAD aws").doesNotContain("CREATE OR REPLACE SECRET");
}

@Test
void givenCredentialChainEnabled_whenBuildInitSql_thenLoadsAwsAndCreatesS3Secret() {
var config = new JdbcConfig.DuckDbConfig();
config.setUseCredentialChain(true);

var sql = new DuckDbExtensions(config).buildInitSql(EXTENSION_DIR);

assertThat(sql)
.contains("LOAD aws;")
.contains(
"CREATE OR REPLACE SECRET sqrl_s3_credential_chain (TYPE S3, PROVIDER credential_chain);");
assertThat(sql)
.as(
"must use the default chain; an explicit CHAIN with 'sts' is rejected without ASSUME_ROLE_ARN")
.doesNotContain("CHAIN '");
}

@Test
void givenCredentialChainEnabled_whenBuildInitSql_thenLoadsAwsAfterHttpfsAndBeforeSecret() {
var config = new JdbcConfig.DuckDbConfig();
config.setUseCredentialChain(true);

var sql = new DuckDbExtensions(config).buildInitSql(EXTENSION_DIR);

assertThat(sql.indexOf("LOAD httpfs"))
.isLessThan(sql.indexOf("LOAD aws"))
.isGreaterThanOrEqualTo(0);
assertThat(sql.indexOf("LOAD aws")).isLessThan(sql.indexOf("CREATE OR REPLACE SECRET"));
}

@Test
void givenAllFlagsEnabled_whenBuildInitSql_thenAppliesEveryStatement() {
var config = new JdbcConfig.DuckDbConfig();
config.setUseCredentialChain(true);
config.setUseDiskCache(true);
config.setUseVersionGuessing(true);

var sql = new DuckDbExtensions(config).buildInitSql(EXTENSION_DIR);

assertThat(sql)
.contains("LOAD aws;")
.contains("LOAD cache_httpfs;")
.contains("SET unsafe_enable_version_guessing = true;");
}
}
Loading