Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
06d0c74
feat(duckdb): add @duckdb/node-api dependency for federation
kevinmessiaen Jun 12, 2026
b8a9a72
refactor(connectors): extract resolveStringReference to shared module
kevinmessiaen Jun 12, 2026
5223f1c
refactor(connectors): route all identical connectors through shared r…
kevinmessiaen Jun 12, 2026
1d2ccfa
feat(sl): reserve _ktx_ connection-id prefix for virtual connections
kevinmessiaen Jun 12, 2026
e051ecc
feat(connections): derive virtual federated connection from compatibl…
kevinmessiaen Jun 12, 2026
6d9fc9f
feat(duckdb): federated executor builds READ_ONLY attaches and runs SQL
kevinmessiaen Jun 12, 2026
ffe9f12
fix(duckdb): close federated DuckDB instance and escape quotes in att…
kevinmessiaen Jun 12, 2026
e3b4b2c
feat(sl): union member source directories for _ktx_federated
kevinmessiaen Jun 12, 2026
2520500
feat(query): route _ktx_federated through DuckDB executor
kevinmessiaen Jun 12, 2026
e1bca78
feat(sl): use duckdb dialect for federated query compilation
kevinmessiaen Jun 12, 2026
0e94000
test(duckdb): end-to-end cross-catalog federated join
kevinmessiaen Jun 12, 2026
5eb3557
test(duckdb): harden federated join test with multi-book join-key cov…
kevinmessiaen Jun 12, 2026
690a723
feat(ingest): keep declared cross-DB joins to federated siblings
kevinmessiaen Jun 12, 2026
7648c73
feat(setup): surface federated connection availability after adding a…
kevinmessiaen Jun 12, 2026
483eb52
chore(setup): mark federationNoticeFor @internal for dead-code gate
kevinmessiaen Jun 12, 2026
0214648
docs(concepts): document cross-database federation
kevinmessiaen Jun 12, 2026
66dac9c
docs(concepts): correct sqlite two-part naming in federation doc
kevinmessiaen Jun 12, 2026
3aaeefc
fix(duckdb): quote federated catalog alias so hyphenated connection i…
kevinmessiaen Jun 12, 2026
f50c6b9
refactor(duckdb): single-source federation driver list, dedup attach …
kevinmessiaen Jun 12, 2026
1aaf117
fix(duckdb): close federated DuckDB instance on connect failure; dedu…
kevinmessiaen Jun 12, 2026
ed65b9f
refactor(federation): carry member connection config and projectDir o…
kevinmessiaen Jun 13, 2026
b3ccdb3
feat(federation): resolve per-member attach targets via canonical con…
kevinmessiaen Jun 13, 2026
499c84f
fix(federation): quote mysql attach-string values like postgres
kevinmessiaen Jun 13, 2026
bf1a40b
fix(federation): resolve member attach targets via canonical resolver…
kevinmessiaen Jun 13, 2026
d9eae20
refactor(federation): thread projectDir through deriveFederatedConnec…
kevinmessiaen Jun 13, 2026
32338ff
feat(federation): add shared project read-only SQL executor that rout…
kevinmessiaen Jun 13, 2026
13bfc3d
test(federation): exercise shared executor default federated path wit…
kevinmessiaen Jun 13, 2026
eb49eb1
refactor(federation): route ingest query executor through shared exec…
kevinmessiaen Jun 13, 2026
1000d76
fix(federation): route MCP sql_execution _ktx_federated through share…
kevinmessiaen Jun 13, 2026
5ad4044
fix(federation): preserve cross-DB joins to federated siblings in man…
kevinmessiaen Jun 13, 2026
8d49974
fix(federation): preserve declared cross-DB joins through scan re-ingest
kevinmessiaen Jun 13, 2026
5431740
refactor(federation): document sibling-ref invariant, drop unsafe cas…
kevinmessiaen Jun 13, 2026
553a8c6
fix(federation): namespace federated source names by member to avoid …
kevinmessiaen Jun 13, 2026
0cbe3d4
docs(federation): document member-namespaced federated source names
kevinmessiaen Jun 13, 2026
b49b28c
fix(federation): preserve member SSL/search_path in attach, classify …
kevinmessiaen Jun 13, 2026
ed8ed25
refactor(federation): simplify federated dispatch and parallelize sib…
kevinmessiaen Jun 13, 2026
68c6290
Merge branch 'main' into feat/duckdb-federation
kevinmessiaen Jun 13, 2026
4a3ee67
feat(federation): carry headerTypes through shared SQL executor
kevinmessiaen Jun 13, 2026
c7a4dba
feat(federation): add shared federated connection listing builder
kevinmessiaen Jun 13, 2026
dc08225
fix(federation): route ktx sql through shared executor for _ktx_feder…
kevinmessiaen Jun 13, 2026
f1a489e
feat(federation): show _ktx_federated in ktx connection list
kevinmessiaen Jun 13, 2026
bf2014b
feat(federation): surface _ktx_federated in MCP connection_list
kevinmessiaen Jun 13, 2026
1cf35f4
test(federation): ktx sql federated cross-file join end-to-end
kevinmessiaen Jun 13, 2026
a75a93b
docs(federation): document direct _ktx_federated query surface
kevinmessiaen Jun 13, 2026
31aa906
fix(federation): coerce DuckDB bigint to number in shared federated e…
kevinmessiaen Jun 13, 2026
f9a6f0d
refactor(federation): simplify driver map and collapse forked MCP SQL…
kevinmessiaen Jun 13, 2026
a3b71f8
chore(federation): allowlist placeholder credentials for detect-secrets
kevinmessiaen Jun 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions docs-site/content/docs/concepts/cross-database-federation.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
---
title: Cross-database federation
description: How ktx federates postgres, mysql, and sqlite connections so a single semantic query can join across them without copying data.
---

Cross-database federation lets a single semantic-layer query join tables that
live in different databases. **ktx** achieves this by embedding DuckDB and
using its `ATTACH` mechanism to connect each member database read-only. The
join executes inside DuckDB at query time — live data, no ETL, no copy.

Federation activates automatically when a `ktx.yaml` file declares two or more
attach-compatible connections. There is nothing to configure and no federation
block to add. With zero or one compatible connection the behavior is unchanged.

## Which connections participate

The v1 federation engine supports three drivers:

| Driver | Participates in federation |
|--------|---------------------------|
| `postgres` | Yes |
| `mysql` | Yes |
| `sqlite` | Yes |
| `snowflake` | No — standalone connection |
| `bigquery` | No — standalone connection |
| `clickhouse` | No — standalone connection |
| `sqlserver` | No — standalone connection |

Non-participating connections continue to work exactly as they did. They are
queried independently; they do not appear as federation members.

## How it activates

**ktx** inspects the connections in `ktx.yaml` at startup. When it finds two or
more connections whose driver is `postgres`, `mysql`, or `sqlite`, it
instantiates the DuckDB federation engine and attaches each one read-only.
There is no `federation:` key, no opt-in flag, and no connection-level setting
to enable. The engine is derived entirely from what is already declared.

A minimal `ktx.yaml` that triggers federation:

```yaml
connections:
- id: pg_books
driver: postgres
url: "postgres://user:pass@localhost:5432/books" # pragma: allowlist secret
- id: sqlite_reviews
driver: sqlite
path: ./data/reviews.db
```

Two attach-compatible connections are present, so federation is active.

## Table naming in federated queries

Inside a federated query, postgres and mysql tables use a three-part name:
`connectionId.schema.table`. SQLite tables, which have no schema layer in
DuckDB, use the two-part form `connectionId.table`. In both cases the
connection's `id` field in `ktx.yaml` becomes the catalog name inside DuckDB.

For the example above:

- `pg_books.public.books` — the `books` table in the `public` schema of the
postgres connection
- `sqlite_reviews.reviews` — the `reviews` table in the sqlite connection

These fully qualified names are what you write in a source's `table:` field
and in any cross-database join's `to:` field.

## Source names in the federated view

When you list or search semantic-layer sources under the federated connection,
each source's `name` is prefixed with its member connection id — for example
`pg_books.books` and `sqlite_reviews.reviews`. The prefix keeps names unique
when two members own a table with the same name: a `users` table in each of
`pg_app` and `sqlite_app` surfaces as `pg_app.users` and `sqlite_app.users`
rather than colliding on a bare `users`.

Each source file's physical `table:` field is unchanged — it still uses the
fully qualified form from the previous section. The member prefix applies only
to the source `name` as seen through the federated connection, not to the
per-member view of the same source.

## Declaring a cross-database join

In v1, cross-database joins are declared explicitly in a source's `joins:`
block. **ktx** validates the join at ingest time, resolves both sides, and
executes it through the federation engine at query time.

The example below shows a `books` source (owned by the postgres connection)
declaring a one-to-many join to the `reviews` table in the sqlite connection:

```yaml
name: books
table: pg_books.public.books
joins:
- to: sqlite_reviews.reviews
on: "id = book_id"
relationship: one_to_many
```

The `to:` value is the fully qualified federated table reference. The `on:`
value is the join predicate expressed in terms of the tables' own column names.
The `relationship:` value tells **ktx** how to aggregate safely across the join.

## Querying the federated connection directly

Beyond declared joins, the federated connection is addressable by its id,
`_ktx_federated`, anywhere **ktx** runs read-only SQL. The same id works for the
`ktx sql` command and for a data agent calling the `sql_execution` MCP tool, so
both surfaces can run a cross-database query without a source file:

```bash
ktx sql -c _ktx_federated \
"SELECT b.title, avg(r.rating) AS avg_rating
FROM pg_books.public.books b
JOIN sqlite_reviews.reviews r ON b.id = r.book_id
GROUP BY b.title"
```

Table names follow the rules from
[Table naming in federated queries](#table-naming-in-federated-queries):
three-part `connectionId.schema.table` for postgres and mysql, two-part
`connectionId.table` for sqlite. The `_ktx_federated` id is virtual — it is
never written to `ktx.yaml` and only exists when two or more attach-compatible
connections are declared. It surfaces in `ktx connection` and in the agent's
connection list so the id is discoverable. Querying a single member database
directly with its own connection id (`ktx sql -c pg_books ...`) is unchanged.

## Federated queries are read-only

DuckDB attaches every member database with read-only access. Federated queries
are `SELECT`/`WITH` only. No writes, no DDL, and no mutations reach any member
database through the federation engine.

## Current limitations

- **Declared joins only.** Automatic discovery of cross-database relationships
is not available in v1. Intra-database relationship discovery for each member
connection is unchanged.
- **postgres, mysql, and sqlite only.** Other drivers (snowflake, bigquery,
clickhouse, sqlserver) do not participate in federation in this version. They
remain usable as standalone connections.
2 changes: 1 addition & 1 deletion docs-site/content/docs/concepts/meta.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"title": "Concepts",
"defaultOpen": true,
"pages": ["the-context-layer", "semantic-layer-internals", "wiki-retrieval"]
"pages": ["the-context-layer", "semantic-layer-internals", "cross-database-federation", "wiki-retrieval"]
}
1 change: 1 addition & 0 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"@clack/prompts": "1.4.0",
"@clickhouse/client": "^1.18.5",
"@commander-js/extra-typings": "14.0.0",
"@duckdb/node-api": "1.5.3-r.3",
"@google-cloud/bigquery": "^8.3.1",
"@looker/sdk": "^26.8.0",
"@looker/sdk-node": "^26.8.0",
Expand Down
17 changes: 13 additions & 4 deletions packages/cli/src/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/noti
import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js';
import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js';
import { testRepoConnection } from './context/ingest/repo-fetch.js';
import { federatedConnectionListing } from './context/connections/federation.js';
import { getDriverRegistration } from './context/connections/drivers.js';
import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/notion-config.js';
import { resolveKtxConfigReference } from './context/core/config-reference.js';
Expand Down Expand Up @@ -447,15 +448,23 @@ export async function runKtxConnection(
io.stdout.write('No connections configured. Run `ktx setup` to add one.\n');
return 0;
}
const idWidth = Math.max('ID'.length, ...entries.map(([id]) => id.length));
const driverWidth = Math.max(
'DRIVER'.length,
const federated = federatedConnectionListing(project.config.connections, args.projectDir);
const idCandidates = [...entries.map(([id]) => id), ...(federated ? [federated.id] : [])];
const driverLengths = [
...entries.map(([, c]) => (c.driver ?? 'unknown').length),
);
...(federated ? [federated.driver.length] : []),
];
const idWidth = Math.max('ID'.length, ...idCandidates.map((id) => id.length));
const driverWidth = Math.max('DRIVER'.length, ...driverLengths);
io.stdout.write(`${'ID'.padEnd(idWidth)} ${'DRIVER'.padEnd(driverWidth)}\n`);
for (const [id, connection] of entries) {
io.stdout.write(`${id.padEnd(idWidth)} ${(connection.driver ?? 'unknown').padEnd(driverWidth)}\n`);
}
if (federated) {
io.stdout.write(`${federated.id.padEnd(idWidth)} ${federated.driver.padEnd(driverWidth)}\n`);
io.stdout.write(` federates: ${federated.members.join(', ')}\n`);
io.stdout.write(` ${federated.hint}\n`);
}
return 0;
}

Expand Down
16 changes: 1 addition & 15 deletions packages/cli/src/connectors/bigquery/connector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ import {
type KtxTableSampleInput,
type KtxTableSampleResult,
} from '../../context/scan/types.js';
import { readFileSync } from 'node:fs';
import { homedir } from 'node:os';
import { resolve } from 'node:path';
import { resolveStringReference } from '../shared/string-reference.js';

export interface KtxBigQueryConnectionConfig {
driver?: string;
Expand Down Expand Up @@ -138,18 +136,6 @@ class DefaultBigQueryClientFactory implements KtxBigQueryClientFactory {
}
}

function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
if (value.startsWith('env:')) {
return env[value.slice('env:'.length)] ?? '';
}
if (value.startsWith('file:')) {
const rawPath = value.slice('file:'.length);
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
return readFileSync(path, 'utf-8').trim();
}
return value;
}

function stringConfigValue(
connection: KtxBigQueryConnectionConfig | undefined,
key: keyof KtxBigQueryConnectionConfig,
Expand Down
17 changes: 1 addition & 16 deletions packages/cli/src/connectors/clickhouse/connector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@ import { getDialectForDriver } from '../../context/connections/dialects.js';
import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js';
import { connectorTestFailure, createKtxConnectorCapabilities, type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js';
import { scopedTableNames } from '../../context/scan/table-ref.js';
import { readFileSync } from 'node:fs';
import { resolveStringReference } from '../shared/string-reference.js';
import { Agent as HttpsAgent } from 'node:https';
import { homedir } from 'node:os';
import { resolve } from 'node:path';

export interface KtxClickHouseConnectionConfig {
driver?: string;
Expand Down Expand Up @@ -142,19 +140,6 @@ function stringConfigValue(
return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined;
}

function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string {
if (value.startsWith('env:')) {
const envName = value.slice('env:'.length);
return env[envName] ?? '';
}
if (value.startsWith('file:')) {
const rawPath = value.slice('file:'.length);
const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath;
return readFileSync(path, 'utf-8').trim();
}
return value;
}

function maybeNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
}
Expand Down
79 changes: 79 additions & 0 deletions packages/cli/src/connectors/duckdb/federated-attach.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { sqliteDatabasePathFromConfig, type KtxSqliteConnectionConfig } from '../sqlite/connector.js';
import { postgresPoolConfigFromConfig, type KtxPostgresConnectionConfig } from '../postgres/connector.js';
import {
mysqlConnectionPoolConfigFromConfig,
type KtxMysqlConnectionConfig,
} from '../mysql/connector.js';
import type { FederatedMember } from '../../context/connections/federation.js';

function kvKeyword(value: string): string {
// libpq/DuckDB key-value values quote with single quotes and backslash-escape.
return /[\s'\\]/.test(value) ? `'${value.replaceAll('\\', '\\\\').replaceAll("'", "\\'")}'` : value;
}

function postgresAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): string {
const cfg = postgresPoolConfigFromConfig({
connectionId: member.connectionId,
connection: member.connection as KtxPostgresConnectionConfig,
env,
});
if (cfg.connectionString) {
return cfg.connectionString;
}
const parts: string[] = [];
if (cfg.host) parts.push(`host=${kvKeyword(cfg.host)}`);
if (cfg.port) parts.push(`port=${cfg.port}`);
if (cfg.database) parts.push(`dbname=${kvKeyword(cfg.database)}`);
if (cfg.user) parts.push(`user=${kvKeyword(cfg.user)}`);
if (cfg.password) parts.push(`password=${kvKeyword(cfg.password)}`);
if (cfg.ssl) {
parts.push('sslmode=require');
}
if (cfg.options) {
parts.push(`options=${kvKeyword(cfg.options)}`);
}
return parts.join(' ');
}

function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): string {
const cfg = mysqlConnectionPoolConfigFromConfig({
connectionId: member.connectionId,
connection: member.connection as KtxMysqlConnectionConfig,
env,
});
const parts: string[] = [
`host=${kvKeyword(cfg.host)}`,
`port=${cfg.port}`,
`database=${kvKeyword(cfg.database)}`,
`user=${kvKeyword(cfg.user)}`,
];
if (cfg.password) {
parts.push(`password=${kvKeyword(cfg.password)}`);
}
if (cfg.ssl) {
parts.push('ssl_mode=REQUIRED');
}
return parts.join(' ');
}

/**
* Resolves a federated member's ktx.yaml config into the connection target
* DuckDB's ATTACH wants for that driver, reusing each connector's canonical
* resolver so federation and standalone scans agree on config interpretation.
*/
export function federatedAttachTarget(member: FederatedMember, env: NodeJS.ProcessEnv): string {
switch (member.driver.toLowerCase()) {
case 'sqlite':
return sqliteDatabasePathFromConfig({
connectionId: member.connectionId,
projectDir: member.projectDir,
connection: member.connection as KtxSqliteConnectionConfig,
});
case 'postgres':
return postgresAttachString(member, env);
case 'mysql':
return mysqlAttachString(member, env);
default:
throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`);
}
}
Loading
Loading