From 8e78a2384a91d7bedd87827d7f237bcbe12ef40b Mon Sep 17 00:00:00 2001 From: Max Halford Date: Tue, 14 Apr 2026 06:10:20 +0200 Subject: [PATCH 1/3] Fix MotherDuck hanging on concurrent scripts, bump to 0.18.0 MotherDuckClient used the module-level duckdb connection which isn't thread-safe. Give it a persistent connection with per-job cursors, matching the DuckLakeClient pattern. Also bump sqlglot to >=30.2 and update the motherduck example. Closes #55 Co-Authored-By: Claude Opus 4.6 (1M context) --- examples/motherduck/README.md | 24 +++++++++--------------- lea/conductor.py | 2 ++ lea/databases.py | 20 +++++++++++++++++++- pyproject.toml | 4 ++-- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/examples/motherduck/README.md b/examples/motherduck/README.md index 8c027c7..8c2533f 100644 --- a/examples/motherduck/README.md +++ b/examples/motherduck/README.md @@ -1,28 +1,22 @@ -# Using MotherDuck +# MotherDuck -lea works with DuckDB, and thus can be used with [MotherDuck](https://motherduck.com/) too. +This example runs the [jaffle shop](../jaffle_shop/) pipeline on [MotherDuck](https://motherduck.com/). Local CSV files are read via hybrid execution and materialized as tables in MotherDuck. -Here is an example `.env` file: +Create a token at [app.motherduck.com/settings/tokens](https://app.motherduck.com/settings/tokens). ```sh echo " LEA_USERNAME=max -LEA_WAREHOUSE=duckdb -LEA_DUCKDB_PATH=md:jaffle_shop -MOTHERDUCK_TOKEN= +LEA_WAREHOUSE=motherduck +LEA_MOTHERDUCK_DATABASE=jaffle_shop +MOTHERDUCK_TOKEN= " > .env ``` -The token can be obtained by logging into MotherDuck from the terminal, as documented [here](https://motherduck.com/docs/getting-started/connect-query-from-python/installation-authentication#authenticating-to-motherduck). - ```sh -lea run ../jaffle_shop/views +ln -s ../jaffle_shop/jaffle_shop jaffle_shop ``` +```sh +lea run --scripts ../jaffle_shop/scripts ``` -Created schema analytics -Created schema staging -Created schema core -``` - -You should see the views in your MotherDuck UI. diff --git a/lea/conductor.py b/lea/conductor.py index ad6daca..90a3261 100644 --- a/lea/conductor.py +++ b/lea/conductor.py @@ -253,6 +253,8 @@ def prepare_session( f"CREATE DATABASE IF NOT EXISTS {write_dataset};" ) database_client.connection.execute(f"USE {write_dataset};") + if isinstance(database_client, databases.MotherDuckClient): + database_client.set_active_database(write_dataset) elif self.warehouse == databases.Warehouse.DUCKLAKE: if secret := os.environ.get("LEA_DUCKLAKE_SECRET"): database_client.connection.execute(f"CREATE SECRET ({secret});") diff --git a/lea/databases.py b/lea/databases.py index 9f35a13..3c881d6 100644 --- a/lea/databases.py +++ b/lea/databases.py @@ -872,9 +872,27 @@ def make_job_config( class MotherDuckClient(DuckDBClient): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._conn = duckdb.connect() + self._active_database: str | None = None + @property def connection(self) -> duckdb.DuckDBPyConnection: - return duckdb # ty: ignore[invalid-return-type] + return self._conn + + def set_active_database(self, database_name: str): + self._active_database = database_name + + def make_job_config( + self, script: scripts.SQLScript, destination: str | None = None + ) -> DuckDBJob: + if self.print_mode: + rich.print(script) + cursor = self._conn.cursor() + if self._active_database: + cursor.execute(f"USE {self._active_database};") + return DuckDBJob(query=script.query, connection=cursor, destination=destination) @property def _tables_query(self) -> str: diff --git a/pyproject.toml b/pyproject.toml index cc9ac5a..436dcfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "lea-cli" -version = "0.17.0" +version = "0.18.0" description = "A minimalist alternative to dbt" authors = [{name = "Max Halford", email = "maxhalford25@gmail.com"}] requires-python = ">=3.11,<4" @@ -14,7 +14,7 @@ dependencies = [ "pandas>=2.1.3,<4", "python-dotenv>=1.0.0,<2", "rich>=13.5.3,<16", - "sqlglot>=27.8", + "sqlglot>=30.2", "rsa>=4.7,<5", "google-cloud-bigquery-storage>=2.27.0,<3", "requests>=2.32.3,<3", From acb7cdb9aad541904ae8da88af2f3695a2119bdd Mon Sep 17 00:00:00 2001 From: Max Halford Date: Tue, 14 Apr 2026 06:18:08 +0200 Subject: [PATCH 2/3] Update README.md --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 404d6e6..c348bdf 100644 --- a/README.md +++ b/README.md @@ -386,10 +386,12 @@ LEA_BQ_BIG_BLUE_PICK_API_REVERVATION_PROJECT_ID=reservation-compute-project-id ## Examples - [Jaffle shop](examples/jaffle_shop/) -- [Incremental](examples/incremental) -- [School](examples/school/) +- [R2 + DuckLake](examples/r2-ducklake/) +- [MotherDuck](examples/motherduck/) +- [Quack mode](examples/quack/) +- [Incremental](examples/incremental/) - [Compare development to production](examples/diff/) -- [Using MotherDuck](examples/motherduck/) +- [School](examples/school/) ## Contributing From 64c3fcca102f460ed87b02a8c32b2c921335e1aa Mon Sep 17 00:00:00 2001 From: Max Halford Date: Tue, 14 Apr 2026 06:18:32 +0200 Subject: [PATCH 3/3] Create CHANGELOG.md --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00bfbf8..0ea85ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,11 @@ # Changelog -## Unreleased +## v0.18.0 ### Breaking changes - Replaced provider-specific DuckLake secret env vars (`LEA_DUCKLAKE_R2_KEY_ID`, `LEA_DUCKLAKE_GCS_KEY_ID`, `LEA_DUCKLAKE_S3_ENDPOINT`, etc.) with a single `LEA_DUCKLAKE_SECRET` variable. The value is the body of a DuckDB [`CREATE SECRET`](https://duckdb.org/docs/current/configuration/secrets_manager) statement. Same for quack mode with `LEA_QUACK_DUCKLAKE_SECRET`. This supports any secret type DuckDB supports (S3, GCS, R2, Azure, etc.) without lea needing provider-specific code. + +### Bug fixes + +- Fixed MotherDuck hanging when running concurrent scripts. The `MotherDuckClient` now uses a persistent connection with cursor-based thread safety, matching the pattern used by `DuckLakeClient`.