From 06d0c74626b82609aa7434d63270a0c37ac635a6 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 18:20:39 +0700 Subject: [PATCH 01/46] feat(duckdb): add @duckdb/node-api dependency for federation Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/package.json | 1 + pnpm-lock.yaml | 94 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/packages/cli/package.json b/packages/cli/package.json index 9bb4c5a1..4e32e5fb 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -55,6 +55,7 @@ "@clack/prompts": "1.4.0", "@clickhouse/client": "^1.18.5", "@commander-js/extra-typings": "14.0.0", + "@duckdb/node-api": "1.5.3-r.3", "@google-cloud/bigquery": "^8.3.1", "@looker/sdk": "^26.8.0", "@looker/sdk-node": "^26.8.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 54401eff..6c1eae07 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -143,6 +143,9 @@ importers: '@commander-js/extra-typings': specifier: 14.0.0 version: 14.0.0(commander@14.0.3) + '@duckdb/node-api': + specifier: 1.5.3-r.3 + version: 1.5.3-r.3 '@google-cloud/bigquery': specifier: ^8.3.1 version: 8.3.1 @@ -747,6 +750,56 @@ packages: '@dabh/diagnostics@2.0.8': resolution: {integrity: sha512-R4MSXTVnuMzGD7bzHdW2ZhhdPC/igELENcq5IjEverBvq5hn1SXCWcsi6eSsdWP0/Ur+SItRRjAktmdoX/8R/Q==} + '@duckdb/node-api@1.5.3-r.3': + resolution: {integrity: sha512-FzuL6sevuFfEFwkgiUMRMUAj4TaVqV//L0oo2FVZ9s9oYpLpALF9qZyQv2ucclTNQZwDCkm8+e6yLMc6t8IjlA==} + + '@duckdb/node-bindings-darwin-arm64@1.5.3-r.3': + resolution: {integrity: sha512-ttD8QBesgzHu7Sc4qouuIGLM7PWedLW8GvFbnZEyMqk24mQz1HWFgaT0ivw6nDRaDPUQLB9QnAOq8MZUh1zWHQ==} + cpu: [arm64] + os: [darwin] + + '@duckdb/node-bindings-darwin-x64@1.5.3-r.3': + resolution: {integrity: sha512-Vp9MYtoYf6zUWHdCmHXwUcJlHq3YaaIeULWeSiPUM1hsDflLiZKXtz5i250Ulz03VsfWBjpO4wdM99sjjrYKkg==} + cpu: [x64] + os: [darwin] + + '@duckdb/node-bindings-linux-arm64-musl@1.5.3-r.3': + resolution: {integrity: sha512-IadRyx+98FEynKLXAk2MzReinFgduiDXgNd5Z8c5VKch+8FgBfqkEUYGOnBMMUPT8kuheKdLj23vpWXaCzOgoQ==} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@duckdb/node-bindings-linux-arm64@1.5.3-r.3': + resolution: {integrity: sha512-3HLcrzQE83947JS51UVR7C9qnXQMltCOk4Dnhiz1CD+9u32DGLMgPTIIxclk7O+Q7EwfqzD8JV86Ud+LT1crcQ==} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@duckdb/node-bindings-linux-x64-musl@1.5.3-r.3': + resolution: {integrity: sha512-5bulS16YhftXcarki4tvCufVslntpQDLOEF6RZ+FSMOGiv5d7SDXqklmVRy4DKW3C5ekgN7S2oYzuGL/ss9BuA==} + cpu: [x64] + os: [linux] + libc: [musl] + + '@duckdb/node-bindings-linux-x64@1.5.3-r.3': + resolution: {integrity: sha512-TXndAL0ZoETq17Df6wB+SUZjLGDmOsKuDSySxB+wy6sHfpRtbDgQibyXRlajVeUkRDwSzBFC5ymy16YG0Fl4iw==} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@duckdb/node-bindings-win32-arm64@1.5.3-r.3': + resolution: {integrity: sha512-55Vu13S0jUudiAGlNWJd7UvlW1iKjwWehD8s93jBCNm0AdE/EJN4nz5rQ0IuWzPWXpMjAYuKu00yE7NdtbTyug==} + cpu: [arm64] + os: [win32] + + '@duckdb/node-bindings-win32-x64@1.5.3-r.3': + resolution: {integrity: sha512-rlOc9ltWQNHuDq99Ah8XaD80nN1ucrSK5AcH/7ibSp9ogX/jswPYlRVE7ODFJAjnQNf8bVvs++Mp+wyGvuG7ag==} + cpu: [x64] + os: [win32] + + '@duckdb/node-bindings@1.5.3-r.3': + resolution: {integrity: sha512-Dphw1a9kKXZnCiWX1YCEAJsQ7WJQO2Ikgxy7m8jy0QVXqAwB9esr5NGsuEL3vMKL7velZHeZCjGOMnHZEcIsdg==} + '@electric-sql/pglite-socket@0.1.5': resolution: {integrity: sha512-/RAye+3EPKfO9nY4tljzxXmkT7yIpFDm0L3F+c28b+Z6uxPOjy/Zz/QEHYHXcrfuUC88/a9S72EO0+3E0j97wQ==} hasBin: true @@ -6755,6 +6808,47 @@ snapshots: enabled: 2.0.0 kuler: 2.0.0 + '@duckdb/node-api@1.5.3-r.3': + dependencies: + '@duckdb/node-bindings': 1.5.3-r.3 + + '@duckdb/node-bindings-darwin-arm64@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-darwin-x64@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-linux-arm64-musl@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-linux-arm64@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-linux-x64-musl@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-linux-x64@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-win32-arm64@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings-win32-x64@1.5.3-r.3': + optional: true + + '@duckdb/node-bindings@1.5.3-r.3': + dependencies: + detect-libc: 2.1.2 + optionalDependencies: + '@duckdb/node-bindings-darwin-arm64': 1.5.3-r.3 + '@duckdb/node-bindings-darwin-x64': 1.5.3-r.3 + '@duckdb/node-bindings-linux-arm64': 1.5.3-r.3 + '@duckdb/node-bindings-linux-arm64-musl': 1.5.3-r.3 + '@duckdb/node-bindings-linux-x64': 1.5.3-r.3 + '@duckdb/node-bindings-linux-x64-musl': 1.5.3-r.3 + '@duckdb/node-bindings-win32-arm64': 1.5.3-r.3 + '@duckdb/node-bindings-win32-x64': 1.5.3-r.3 + '@electric-sql/pglite-socket@0.1.5(@electric-sql/pglite@0.4.5)': dependencies: '@electric-sql/pglite': 0.4.5 From b8a9a7292ce81a4b298fde275edb47bedcc0bf68 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 18:26:01 +0700 Subject: [PATCH 02/46] refactor(connectors): extract resolveStringReference to shared module Co-Authored-By: Claude Sonnet 4.6 --- .../cli/src/connectors/postgres/connector.ts | 15 +--------- .../src/connectors/shared/string-reference.ts | 20 +++++++++++++ .../shared/string-reference.test.ts | 30 +++++++++++++++++++ 3 files changed, 51 insertions(+), 14 deletions(-) create mode 100644 packages/cli/src/connectors/shared/string-reference.ts create mode 100644 packages/cli/test/connectors/shared/string-reference.test.ts diff --git a/packages/cli/src/connectors/postgres/connector.ts b/packages/cli/src/connectors/postgres/connector.ts index 1a956a3d..1a2fcd40 100644 --- a/packages/cli/src/connectors/postgres/connector.ts +++ b/packages/cli/src/connectors/postgres/connector.ts @@ -1,6 +1,4 @@ -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; +import { resolveStringReference } from '../shared/string-reference.js'; import { getDialectForDriver } from '../../context/connections/dialects.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; @@ -281,17 +279,6 @@ function stringConfigValue( return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; } -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} function numberValue(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; diff --git a/packages/cli/src/connectors/shared/string-reference.ts b/packages/cli/src/connectors/shared/string-reference.ts new file mode 100644 index 00000000..2bae2e1a --- /dev/null +++ b/packages/cli/src/connectors/shared/string-reference.ts @@ -0,0 +1,20 @@ +import { readFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { resolve } from 'node:path'; + +/** + * Resolves a config string that may reference an environment variable + * (`env:NAME`) or a file (`file:/path`, `~` expands to the home dir). + * Plain values pass through unchanged. + */ +export function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { + if (value.startsWith('env:')) { + return env[value.slice('env:'.length)] ?? ''; + } + if (value.startsWith('file:')) { + const rawPath = value.slice('file:'.length); + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + return readFileSync(path, 'utf-8').trim(); + } + return value; +} diff --git a/packages/cli/test/connectors/shared/string-reference.test.ts b/packages/cli/test/connectors/shared/string-reference.test.ts new file mode 100644 index 00000000..620d4536 --- /dev/null +++ b/packages/cli/test/connectors/shared/string-reference.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from 'vitest'; +import { mkdtempSync, writeFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { resolveStringReference } from '../../../src/connectors/shared/string-reference.js'; + +describe('resolveStringReference', () => { + it('returns plain values unchanged', () => { + expect(resolveStringReference('postgres://localhost/db', {})).toBe('postgres://localhost/db'); + }); + + it('resolves env: references from the provided env', () => { + expect(resolveStringReference('env:MY_URL', { MY_URL: 'resolved-url' })).toBe('resolved-url'); + }); + + it('returns empty string for a missing env var', () => { + expect(resolveStringReference('env:NOPE', {})).toBe(''); + }); + + it('resolves file: references and trims whitespace', () => { + const dir = mkdtempSync(join(tmpdir(), 'ktx-strref-')); + const file = join(dir, 'secret.txt'); + writeFileSync(file, ' hunter2\n'); + try { + expect(resolveStringReference(`file:${file}`, {})).toBe('hunter2'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); From 5223f1cbeb16d3cac8f1ad1e038d9cf2f48f3e40 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 18:37:54 +0700 Subject: [PATCH 03/46] refactor(connectors): route all identical connectors through shared resolveStringReference Collapse the 5 remaining private copies in bigquery, clickhouse, mysql, snowflake, and sqlserver into the shared module. Fix a latent bug in the shared module where `~/path` was incorrectly sliced (dropping only `~`, leaving the leading `/` and making resolve() ignore homedir). Add a tilde-expansion test that caught the bug and now covers that branch. Co-Authored-By: Claude Sonnet 4.6 --- .../cli/src/connectors/bigquery/connector.ts | 16 +--------------- .../cli/src/connectors/clickhouse/connector.ts | 17 +---------------- packages/cli/src/connectors/mysql/connector.ts | 17 +---------------- .../src/connectors/shared/string-reference.ts | 2 +- .../cli/src/connectors/snowflake/connector.ts | 16 +--------------- .../cli/src/connectors/sqlserver/connector.ts | 16 +--------------- .../connectors/shared/string-reference.test.ts | 13 ++++++++++++- 7 files changed, 18 insertions(+), 79 deletions(-) diff --git a/packages/cli/src/connectors/bigquery/connector.ts b/packages/cli/src/connectors/bigquery/connector.ts index eae0f2ed..0b30c025 100644 --- a/packages/cli/src/connectors/bigquery/connector.ts +++ b/packages/cli/src/connectors/bigquery/connector.ts @@ -26,9 +26,7 @@ import { type KtxTableSampleInput, type KtxTableSampleResult, } from '../../context/scan/types.js'; -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; +import { resolveStringReference } from '../shared/string-reference.js'; export interface KtxBigQueryConnectionConfig { driver?: string; @@ -138,18 +136,6 @@ class DefaultBigQueryClientFactory implements KtxBigQueryClientFactory { } } -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - function stringConfigValue( connection: KtxBigQueryConnectionConfig | undefined, key: keyof KtxBigQueryConnectionConfig, diff --git a/packages/cli/src/connectors/clickhouse/connector.ts b/packages/cli/src/connectors/clickhouse/connector.ts index c0d8c9a6..38a477e7 100644 --- a/packages/cli/src/connectors/clickhouse/connector.ts +++ b/packages/cli/src/connectors/clickhouse/connector.ts @@ -3,10 +3,8 @@ import { getDialectForDriver } from '../../context/connections/dialects.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; import { connectorTestFailure, createKtxConnectorCapabilities, type KtxConnectorTestResult, type KtxColumnSampleInput, type KtxColumnSampleResult, type KtxColumnStatsInput, type KtxColumnStatsResult, type KtxQueryResult, type KtxReadOnlyQueryInput, type KtxScanConnector, type KtxScanContext, type KtxScanInput, type KtxSchemaColumn, type KtxSchemaSnapshot, type KtxSchemaTable, type KtxTableRef, type KtxTableSampleInput, type KtxTableListEntry, type KtxTableSampleResult } from '../../context/scan/types.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; -import { readFileSync } from 'node:fs'; +import { resolveStringReference } from '../shared/string-reference.js'; import { Agent as HttpsAgent } from 'node:https'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; export interface KtxClickHouseConnectionConfig { driver?: string; @@ -142,19 +140,6 @@ function stringConfigValue( return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; } -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - const envName = value.slice('env:'.length); - return env[envName] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - function maybeNumber(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } diff --git a/packages/cli/src/connectors/mysql/connector.ts b/packages/cli/src/connectors/mysql/connector.ts index 2675fa2c..5bddec53 100644 --- a/packages/cli/src/connectors/mysql/connector.ts +++ b/packages/cli/src/connectors/mysql/connector.ts @@ -1,8 +1,6 @@ import mysql, { type FieldPacket, type Pool, type RowDataPacket } from 'mysql2/promise'; -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; import { getDialectForDriver } from '../../context/connections/dialects.js'; +import { resolveStringReference } from '../shared/string-reference.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; import { constraintDiscoveryWarning, @@ -183,19 +181,6 @@ function stringConfigValue( return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; } -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - const envName = value.slice('env:'.length); - return env[envName] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - function maybeNumber(value: unknown): number | undefined { return typeof value === 'number' && Number.isFinite(value) ? value : undefined; } diff --git a/packages/cli/src/connectors/shared/string-reference.ts b/packages/cli/src/connectors/shared/string-reference.ts index 2bae2e1a..7f83736d 100644 --- a/packages/cli/src/connectors/shared/string-reference.ts +++ b/packages/cli/src/connectors/shared/string-reference.ts @@ -13,7 +13,7 @@ export function resolveStringReference(value: string, env: NodeJS.ProcessEnv): s } if (value.startsWith('file:')) { const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; + const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(rawPath[1] === '/' ? 2 : 1)) : rawPath; return readFileSync(path, 'utf-8').trim(); } return value; diff --git a/packages/cli/src/connectors/snowflake/connector.ts b/packages/cli/src/connectors/snowflake/connector.ts index 56c3b2f3..5f016675 100644 --- a/packages/cli/src/connectors/snowflake/connector.ts +++ b/packages/cli/src/connectors/snowflake/connector.ts @@ -1,8 +1,6 @@ import { createPrivateKey } from 'node:crypto'; -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; import { getDialectForDriver } from '../../context/connections/dialects.js'; +import { resolveStringReference } from '../shared/string-reference.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; import { tryConstraintQuery } from '../../context/scan/constraint-discovery.js'; import { scopedTableNames } from '../../context/scan/table-ref.js'; @@ -135,18 +133,6 @@ export interface KtxSnowflakeColumnDistinctValuesResult { const DATE_TYPES = ['DATE', 'TIMESTAMP', 'TIMESTAMP_LTZ', 'TIMESTAMP_NTZ', 'TIMESTAMP_TZ', 'TIME']; -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - function stringConfigValue( connection: KtxSnowflakeConnectionConfig | undefined, key: keyof KtxSnowflakeConnectionConfig, diff --git a/packages/cli/src/connectors/sqlserver/connector.ts b/packages/cli/src/connectors/sqlserver/connector.ts index 0d0136be..116fdea7 100644 --- a/packages/cli/src/connectors/sqlserver/connector.ts +++ b/packages/cli/src/connectors/sqlserver/connector.ts @@ -25,10 +25,8 @@ import { type KtxTableSampleInput, type KtxTableSampleResult, } from '../../context/scan/types.js'; -import { readFileSync } from 'node:fs'; -import { homedir } from 'node:os'; -import { resolve } from 'node:path'; import sql from 'mssql'; +import { resolveStringReference } from '../shared/string-reference.js'; export interface KtxSqlServerConnectionConfig { driver?: string; @@ -208,18 +206,6 @@ function stringConfigValue( return typeof value === 'string' && value.trim().length > 0 ? resolveStringReference(value.trim(), env) : undefined; } -function resolveStringReference(value: string, env: NodeJS.ProcessEnv): string { - if (value.startsWith('env:')) { - return env[value.slice('env:'.length)] ?? ''; - } - if (value.startsWith('file:')) { - const rawPath = value.slice('file:'.length); - const path = rawPath.startsWith('~') ? resolve(homedir(), rawPath.slice(1)) : rawPath; - return readFileSync(path, 'utf-8').trim(); - } - return value; -} - function parseSqlServerUrl(url: string): Partial { const parsed = new URL(url); return { diff --git a/packages/cli/test/connectors/shared/string-reference.test.ts b/packages/cli/test/connectors/shared/string-reference.test.ts index 620d4536..da2b6dc1 100644 --- a/packages/cli/test/connectors/shared/string-reference.test.ts +++ b/packages/cli/test/connectors/shared/string-reference.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest'; import { mkdtempSync, writeFileSync, rmSync } from 'node:fs'; -import { tmpdir } from 'node:os'; +import { homedir, tmpdir } from 'node:os'; import { join } from 'node:path'; import { resolveStringReference } from '../../../src/connectors/shared/string-reference.js'; @@ -27,4 +27,15 @@ describe('resolveStringReference', () => { rmSync(dir, { recursive: true, force: true }); } }); + + it('expands ~ in file: references to the home directory', () => { + const name = `.ktx-strref-test-${process.pid}.txt`; + const abs = join(homedir(), name); + writeFileSync(abs, 'tilde-secret\n'); + try { + expect(resolveStringReference(`file:~/${name}`, {})).toBe('tilde-secret'); + } finally { + rmSync(abs, { force: true }); + } + }); }); From 1d2ccfa561d2343809262356042510a987badea2 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:18:28 +0700 Subject: [PATCH 04/46] feat(sl): reserve _ktx_ connection-id prefix for virtual connections Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/src/context/sl/source-files.ts | 11 ++++++++++ .../context/sl/source-files-reserved.test.ts | 22 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 packages/cli/test/context/sl/source-files-reserved.test.ts diff --git a/packages/cli/src/context/sl/source-files.ts b/packages/cli/src/context/sl/source-files.ts index ae44c683..d21f59dc 100644 --- a/packages/cli/src/context/sl/source-files.ts +++ b/packages/cli/src/context/sl/source-files.ts @@ -23,7 +23,18 @@ function assertSafePathToken(kind: string, value: string): string { return value; } +/** + * The `_ktx_` prefix is ktx's reserved namespace for runtime-derived virtual + * connections (e.g. `_ktx_federated`). User connection ids may not use it. + */ +export function isReservedConnectionId(connectionId: string): boolean { + return connectionId.startsWith('_ktx_'); +} + export function assertSafeConnectionId(connectionId: string): string { + if (isReservedConnectionId(connectionId)) { + throw new Error(`Connection id "${connectionId}" uses the reserved "_ktx_" prefix.`); + } if (!isSafeConnectionId(connectionId)) { throw new Error(`Unsafe connection id: ${connectionId}`); } diff --git a/packages/cli/test/context/sl/source-files-reserved.test.ts b/packages/cli/test/context/sl/source-files-reserved.test.ts new file mode 100644 index 00000000..535549fd --- /dev/null +++ b/packages/cli/test/context/sl/source-files-reserved.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from 'vitest'; +import { assertSafeConnectionId, isReservedConnectionId } from '../../../src/context/sl/source-files.js'; + +describe('reserved connection ids', () => { + it('flags _ktx_ prefixed ids as reserved', () => { + expect(isReservedConnectionId('_ktx_federated')).toBe(true); + expect(isReservedConnectionId('_ktx_anything')).toBe(true); + }); + + it('does not flag normal ids', () => { + expect(isReservedConnectionId('pg_books')).toBe(false); + expect(isReservedConnectionId('sqlite_reviews')).toBe(false); + }); + + it('rejects a user-supplied reserved id', () => { + expect(() => assertSafeConnectionId('_ktx_federated')).toThrow(/reserved/i); + }); + + it('still accepts normal ids', () => { + expect(assertSafeConnectionId('pg_books')).toBe('pg_books'); + }); +}); From e051ecc4f09cd726af139e105174b966433e86bd Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:23:40 +0700 Subject: [PATCH 05/46] feat(connections): derive virtual federated connection from compatible members Co-Authored-By: Claude Sonnet 4.6 --- .../cli/src/context/connections/federation.ts | 40 ++++++++++++++++++ .../context/connections/federation.test.ts | 41 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 packages/cli/src/context/connections/federation.ts create mode 100644 packages/cli/test/context/connections/federation.test.ts diff --git a/packages/cli/src/context/connections/federation.ts b/packages/cli/src/context/connections/federation.ts new file mode 100644 index 00000000..3caa687e --- /dev/null +++ b/packages/cli/src/context/connections/federation.ts @@ -0,0 +1,40 @@ +import type { KtxProjectConnectionConfig } from '../project/config.js'; + +/** Stable id for the runtime-derived federated connection. Never written to ktx.yaml. */ +export const FEDERATED_CONNECTION_ID = '_ktx_federated'; + +/** Drivers DuckDB can ATTACH live with first-party extensions. */ +const ATTACH_COMPATIBLE_DRIVERS = new Set(['postgres', 'mysql', 'sqlite']); + +export interface FederatedMember { + connectionId: string; + driver: string; + config: KtxProjectConnectionConfig; +} + +export interface FederatedConnectionDescriptor { + id: typeof FEDERATED_CONNECTION_ID; + driver: 'duckdb'; + members: FederatedMember[]; +} + +/** + * Derives a virtual federated connection when a project declares 2+ + * attach-compatible databases. Returns null otherwise — single-DB and + * incompatible projects are unaffected. + */ +export function deriveFederatedConnection( + connections: Record, +): FederatedConnectionDescriptor | null { + const members: FederatedMember[] = []; + for (const [connectionId, config] of Object.entries(connections)) { + const driver = config.driver.toLowerCase(); + if (ATTACH_COMPATIBLE_DRIVERS.has(driver)) { + members.push({ connectionId, driver, config }); + } + } + if (members.length < 2) { + return null; + } + return { id: FEDERATED_CONNECTION_ID, driver: 'duckdb', members }; +} diff --git a/packages/cli/test/context/connections/federation.test.ts b/packages/cli/test/context/connections/federation.test.ts new file mode 100644 index 00000000..c35fee59 --- /dev/null +++ b/packages/cli/test/context/connections/federation.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest'; +import { + deriveFederatedConnection, + FEDERATED_CONNECTION_ID, +} from '../../../src/context/connections/federation.js'; + +const conns = (entries: Record) => entries as never; + +describe('deriveFederatedConnection', () => { + it('returns null with zero compatible members', () => { + expect(deriveFederatedConnection(conns({ snow: { driver: 'snowflake' } }))).toBeNull(); + }); + + it('returns null with exactly one compatible member', () => { + expect(deriveFederatedConnection(conns({ pg: { driver: 'postgres' } }))).toBeNull(); + }); + + it('derives a descriptor with two compatible members', () => { + const result = deriveFederatedConnection( + conns({ pg: { driver: 'postgres' }, lite: { driver: 'sqlite' } }), + ); + expect(result).not.toBeNull(); + expect(result?.id).toBe(FEDERATED_CONNECTION_ID); + expect(result?.driver).toBe('duckdb'); + expect(result?.members.map((m) => m.connectionId).sort()).toEqual(['lite', 'pg']); + }); + + it('excludes incompatible members from the group', () => { + const result = deriveFederatedConnection( + conns({ pg: { driver: 'postgres' }, my: { driver: 'mysql' }, snow: { driver: 'snowflake' } }), + ); + expect(result?.members.map((m) => m.connectionId).sort()).toEqual(['my', 'pg']); + }); + + it('is case-insensitive on driver names', () => { + const result = deriveFederatedConnection( + conns({ pg: { driver: 'POSTGRES' }, lite: { driver: 'SQLite' } }), + ); + expect(result?.members).toHaveLength(2); + }); +}); From 6d9fc9f6be8392c1869e8499316d0d5e084ecf22 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:29:51 +0700 Subject: [PATCH 06/46] feat(duckdb): federated executor builds READ_ONLY attaches and runs SQL Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connectors/duckdb/federated-executor.ts | 76 +++++++++++++++++++ .../src/context/connections/query-executor.ts | 2 +- .../duckdb/federated-executor.test.ts | 45 +++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 packages/cli/src/connectors/duckdb/federated-executor.ts create mode 100644 packages/cli/test/connectors/duckdb/federated-executor.test.ts diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts new file mode 100644 index 00000000..4ac4930a --- /dev/null +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -0,0 +1,76 @@ +import { DuckDBInstance } from '@duckdb/node-api'; +import { resolveStringReference } from '../shared/string-reference.js'; +import type { + KtxSqlQueryExecutionInput, + KtxSqlQueryExecutionResult, +} from '../../context/connections/query-executor.js'; +import { normalizeQueryRows } from '../../context/connections/query-executor.js'; +import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; +import type { FederatedMember } from '../../context/connections/federation.js'; + +const ATTACH_TYPE_BY_DRIVER: Record = { + postgres: 'postgres', + mysql: 'mysql', + sqlite: 'sqlite', +}; + +export function attachTypeForDriver(driver: string): string { + const type = ATTACH_TYPE_BY_DRIVER[driver.toLowerCase()]; + if (!type) { + throw new Error(`Driver "${driver}" cannot be attached by DuckDB federation.`); + } + return type; +} + +function memberUrl(member: FederatedMember, env: NodeJS.ProcessEnv): string { + const raw = (member.config as { url?: unknown }).url; + if (typeof raw !== 'string' || raw.length === 0) { + throw new Error(`Federated member "${member.connectionId}" has no url in ktx.yaml.`); + } + return resolveStringReference(raw, env); +} + +/** + * Builds INSTALL/LOAD + READ_ONLY ATTACH statements, one member per DuckDB + * catalog aliased by its connectionId. READ_ONLY makes the attach physically + * non-writable; assertReadOnlySql guards the query text itself. + */ +export function buildAttachStatements(members: FederatedMember[], env: NodeJS.ProcessEnv): string[] { + const statements: string[] = []; + for (const member of members) { + const type = attachTypeForDriver(member.driver); + const url = memberUrl(member, env); + statements.push(`INSTALL ${type}; LOAD ${type};`); + statements.push(`ATTACH '${url}' AS ${member.connectionId} (TYPE ${type}, READ_ONLY);`); + } + return statements; +} + +export async function executeFederatedQuery( + members: FederatedMember[], + input: KtxSqlQueryExecutionInput, + env: NodeJS.ProcessEnv = process.env, +): Promise { + const sql = limitSqlForExecution(assertReadOnlySql(input.sql), input.maxRows); + const attachStatements = buildAttachStatements(members, env); + + const instance = await DuckDBInstance.create(':memory:'); + const connection = await instance.connect(); + try { + for (const statement of attachStatements) { + await connection.run(statement); + } + const reader = await connection.runAndReadAll(sql); + const rows = normalizeQueryRows(reader.getRows()); + const headers = reader.columnNames(); + return { + headers, + rows, + totalRows: rows.length, + command: 'SELECT', + rowCount: rows.length, + }; + } finally { + connection.closeSync(); + } +} diff --git a/packages/cli/src/context/connections/query-executor.ts b/packages/cli/src/context/connections/query-executor.ts index a397dfc3..e169d164 100644 --- a/packages/cli/src/context/connections/query-executor.ts +++ b/packages/cli/src/context/connections/query-executor.ts @@ -8,7 +8,7 @@ export interface KtxSqlQueryExecutionInput { maxRows?: number; } -interface KtxSqlQueryExecutionResult { +export interface KtxSqlQueryExecutionResult { headers: string[]; rows: unknown[][]; totalRows: number; diff --git a/packages/cli/test/connectors/duckdb/federated-executor.test.ts b/packages/cli/test/connectors/duckdb/federated-executor.test.ts new file mode 100644 index 00000000..af84144e --- /dev/null +++ b/packages/cli/test/connectors/duckdb/federated-executor.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from 'vitest'; +import { + buildAttachStatements, + attachTypeForDriver, +} from '../../../src/connectors/duckdb/federated-executor.js'; +import type { FederatedMember } from '../../../src/context/connections/federation.js'; + +const member = (connectionId: string, driver: string, url: string): FederatedMember => + ({ connectionId, driver, config: { driver, url } as never }); + +describe('attachTypeForDriver', () => { + it('maps drivers to DuckDB attach extension types', () => { + expect(attachTypeForDriver('postgres')).toBe('postgres'); + expect(attachTypeForDriver('mysql')).toBe('mysql'); + expect(attachTypeForDriver('sqlite')).toBe('sqlite'); + }); + + it('throws for an unsupported driver', () => { + expect(() => attachTypeForDriver('snowflake')).toThrow(/cannot be attached/i); + }); +}); + +describe('buildAttachStatements', () => { + it('emits READ_ONLY ATTACH aliased by connectionId, resolving env refs', () => { + const stmts = buildAttachStatements( + [ + member('pg_books', 'postgres', 'env:PG_URL'), + member('sqlite_reviews', 'sqlite', '/data/reviews.db'), + ], + { PG_URL: 'postgresql://localhost/books' }, + ); + expect(stmts).toEqual([ + "INSTALL postgres; LOAD postgres;", + "ATTACH 'postgresql://localhost/books' AS pg_books (TYPE postgres, READ_ONLY);", + "INSTALL sqlite; LOAD sqlite;", + "ATTACH '/data/reviews.db' AS sqlite_reviews (TYPE sqlite, READ_ONLY);", + ]); + }); + + it('throws if a member url is missing', () => { + expect(() => + buildAttachStatements([{ connectionId: 'pg', driver: 'postgres', config: { driver: 'postgres' } as never }], {}), + ).toThrow(/no url/i); + }); +}); From ffe9f12704b8e481168183a3b25f96213d50505a Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:39:26 +0700 Subject: [PATCH 07/46] fix(duckdb): close federated DuckDB instance and escape quotes in attach url Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/src/connectors/duckdb/federated-executor.ts | 4 +++- .../cli/test/connectors/duckdb/federated-executor.test.ts | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 4ac4930a..9f4152be 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -40,8 +40,9 @@ export function buildAttachStatements(members: FederatedMember[], env: NodeJS.Pr for (const member of members) { const type = attachTypeForDriver(member.driver); const url = memberUrl(member, env); + const safeUrl = url.replaceAll("'", "''"); statements.push(`INSTALL ${type}; LOAD ${type};`); - statements.push(`ATTACH '${url}' AS ${member.connectionId} (TYPE ${type}, READ_ONLY);`); + statements.push(`ATTACH '${safeUrl}' AS ${member.connectionId} (TYPE ${type}, READ_ONLY);`); } return statements; } @@ -72,5 +73,6 @@ export async function executeFederatedQuery( }; } finally { connection.closeSync(); + instance.closeSync(); } } diff --git a/packages/cli/test/connectors/duckdb/federated-executor.test.ts b/packages/cli/test/connectors/duckdb/federated-executor.test.ts index af84144e..c5a48abc 100644 --- a/packages/cli/test/connectors/duckdb/federated-executor.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-executor.test.ts @@ -42,4 +42,12 @@ describe('buildAttachStatements', () => { buildAttachStatements([{ connectionId: 'pg', driver: 'postgres', config: { driver: 'postgres' } as never }], {}), ).toThrow(/no url/i); }); + + it('escapes single quotes in a member url', () => { + const stmts = buildAttachStatements( + [member('pg', 'postgres', "postgresql://u:it's@h/db")], + {}, + ); + expect(stmts[1]).toBe("ATTACH 'postgresql://u:it''s@h/db' AS pg (TYPE postgres, READ_ONLY);"); + }); }); From e3b4b2cc75d5978395caf31356dc9af2b32057c1 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:07:23 +0700 Subject: [PATCH 08/46] feat(sl): union member source directories for _ktx_federated Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/context/sl/local-sl.ts | 20 +++- .../context/sl/local-sl-federated.test.ts | 95 +++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 packages/cli/test/context/sl/local-sl-federated.test.ts diff --git a/packages/cli/src/context/sl/local-sl.ts b/packages/cli/src/context/sl/local-sl.ts index 1c12ef67..bcfa298c 100644 --- a/packages/cli/src/context/sl/local-sl.ts +++ b/packages/cli/src/context/sl/local-sl.ts @@ -2,6 +2,7 @@ import { join } from 'node:path'; import YAML from 'yaml'; import { z } from 'zod'; import type { KtxEmbeddingPort } from '../../context/core/embedding.js'; +import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from '../connections/federation.js'; import type { KtxLocalProject } from '../../context/project/project.js'; import { HybridSearchCore } from '../../context/search/hybrid-search-core.js'; import type { SearchCandidateGenerator } from '../../context/search/types.js'; @@ -169,7 +170,24 @@ export async function loadLocalSlSourceRecords( project: KtxLocalProject, input: { connectionId: string }, ): Promise { - const connectionId = assertSafeConnectionId(input.connectionId); + if (input.connectionId === FEDERATED_CONNECTION_ID) { + const descriptor = deriveFederatedConnection(project.config.connections); + if (!descriptor) { + return []; + } + const perMember = await Promise.all( + descriptor.members.map((member) => loadSingleConnectionSourceRecords(project, member.connectionId)), + ); + return perMember.flat(); + } + return loadSingleConnectionSourceRecords(project, input.connectionId); +} + +async function loadSingleConnectionSourceRecords( + project: KtxLocalProject, + rawConnectionId: string, +): Promise { + const connectionId = assertSafeConnectionId(rawConnectionId); const dir = `semantic-layer/${connectionId}`; const schemaDir = `${dir}/_schema`; const listed = await project.fileStore.listFiles(dir); diff --git a/packages/cli/test/context/sl/local-sl-federated.test.ts b/packages/cli/test/context/sl/local-sl-federated.test.ts new file mode 100644 index 00000000..7c2b5585 --- /dev/null +++ b/packages/cli/test/context/sl/local-sl-federated.test.ts @@ -0,0 +1,95 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildDefaultKtxProjectConfig } from '../../../src/context/project/config.js'; +import type { GitService } from '../../../src/context/core/git.service.js'; +import { LocalGitFileStore } from '../../../src/context/project/local-git-file-store.js'; +import type { KtxLocalProject } from '../../../src/context/project/project.js'; +import { loadLocalSlSourceRecords } from '../../../src/context/sl/local-sl.js'; + +const BOOKS_MANIFEST = `tables: + books: + table: public.books + columns: + - name: book_id + type: number + pk: true + - name: title + type: string +`; + +const REVIEWS_MANIFEST = `tables: + reviews: + table: main.reviews + columns: + - name: review_id + type: number + pk: true + - name: rating + type: number +`; + +// Build a project backed only by an on-disk file store (no git init, no +// commit), so the fixture never hits the gpg-signing path during init. +function fakeProject(projectDir: string, connections: KtxLocalProject['config']['connections']): KtxLocalProject { + const fileStore = new LocalGitFileStore({ rootDir: projectDir, git: {} as GitService }); + const config = { ...buildDefaultKtxProjectConfig(), connections }; + return { + projectDir, + configPath: join(projectDir, 'ktx.yaml'), + config, + coreConfig: {} as KtxLocalProject['coreConfig'], + git: {} as GitService, + fileStore, + }; +} + +// `skipLock: true` writes the file to disk without committing, avoiding git. +async function seedManifest(project: KtxLocalProject, path: string, content: string): Promise { + await project.fileStore.writeFile(path, content, 'ktx', 'ktx@example.com', 'seed manifest', { skipLock: true }); +} + +describe('federated semantic-layer source loading', () => { + let tempDir: string; + let project: KtxLocalProject; + let singleMemberProject: KtxLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-sl-fed-')); + + project = fakeProject(join(tempDir, 'project'), { + pg_books: { driver: 'postgres' }, + sqlite_reviews: { driver: 'sqlite' }, + }); + await seedManifest(project, 'semantic-layer/pg_books/_schema/public.yaml', BOOKS_MANIFEST); + await seedManifest(project, 'semantic-layer/sqlite_reviews/_schema/main.yaml', REVIEWS_MANIFEST); + + singleMemberProject = fakeProject(join(tempDir, 'single'), { + pg_books: { driver: 'postgres' }, + }); + await seedManifest(singleMemberProject, 'semantic-layer/pg_books/_schema/public.yaml', BOOKS_MANIFEST); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('unions member source records for _ktx_federated', async () => { + const records = await loadLocalSlSourceRecords(project, { connectionId: '_ktx_federated' }); + const names = records.map((r) => r.source.name).sort(); + expect(names).toEqual(['books', 'reviews']); + }); + + it('reads from member dirs, never a literal _ktx_federated dir', async () => { + const records = await loadLocalSlSourceRecords(project, { connectionId: '_ktx_federated' }); + // The federated connection owns no directory; records carry their member + // connection ids, proving the union read from member dirs only. + expect(records.map((r) => r.connectionId).sort()).toEqual(['pg_books', 'sqlite_reviews']); + }); + + it('returns empty for _ktx_federated when fewer than 2 compatible members', async () => { + const records = await loadLocalSlSourceRecords(singleMemberProject, { connectionId: '_ktx_federated' }); + expect(records).toEqual([]); + }); +}); From 252050056293017ef8b3e0dd73c2aa49780a9d3a Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:13:48 +0700 Subject: [PATCH 09/46] feat(query): route _ktx_federated through DuckDB executor Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/ingest-query-executor.ts | 12 +++++++ .../ingest-query-executor-federated.test.ts | 36 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 packages/cli/test/ingest-query-executor-federated.test.ts diff --git a/packages/cli/src/ingest-query-executor.ts b/packages/cli/src/ingest-query-executor.ts index f8b6880d..fcc27150 100644 --- a/packages/cli/src/ingest-query-executor.ts +++ b/packages/cli/src/ingest-query-executor.ts @@ -1,4 +1,6 @@ +import { executeFederatedQuery } from './connectors/duckdb/federated-executor.js'; import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from './context/connections/query-executor.js'; +import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from './context/connections/federation.js'; import type { KtxLocalProject } from './context/project/project.js'; import type { KtxScanConnector, KtxScanContext } from './context/scan/types.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; @@ -7,6 +9,7 @@ type CreateConnector = typeof createKtxCliScanConnector; export interface KtxCliIngestQueryExecutorDeps { createConnector?: CreateConnector; + executeFederated?: typeof executeFederatedQuery; } async function cleanupConnector(connector: KtxScanConnector | null): Promise { @@ -20,6 +23,15 @@ export function createKtxCliIngestQueryExecutor( const createConnector = deps.createConnector ?? createKtxCliScanConnector; return { async execute(input: KtxSqlQueryExecutionInput) { + if (input.connectionId === FEDERATED_CONNECTION_ID) { + const descriptor = deriveFederatedConnection(project.config.connections); + if (!descriptor) { + throw new Error('Federated execution requested but fewer than 2 attach-compatible connections exist.'); + } + const runFederated = deps.executeFederated ?? executeFederatedQuery; + return runFederated(descriptor.members, input); + } + let connector: KtxScanConnector | null = null; try { connector = await createConnector(project, input.connectionId); diff --git a/packages/cli/test/ingest-query-executor-federated.test.ts b/packages/cli/test/ingest-query-executor-federated.test.ts new file mode 100644 index 00000000..cc7cb871 --- /dev/null +++ b/packages/cli/test/ingest-query-executor-federated.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it, vi } from 'vitest'; +import { createKtxCliIngestQueryExecutor } from '../src/ingest-query-executor.js'; + +describe('federated query executor routing', () => { + it('routes _ktx_federated to the DuckDB federated executor, not a single connector', async () => { + const project = { + projectDir: '/tmp/x', + config: { connections: { pg: { driver: 'postgres', url: 'env:PG' }, lite: { driver: 'sqlite', url: '/x.db' } } }, + } as never; + + const federatedSpy = vi.fn(async () => ({ + headers: ['n'], rows: [[1]], totalRows: 1, command: 'SELECT', rowCount: 1, + })); + + const executor = createKtxCliIngestQueryExecutor(project, { executeFederated: federatedSpy }); + const result = await executor.execute({ + connectionId: '_ktx_federated', + connection: undefined, + sql: 'select 1 as n', + }); + + expect(federatedSpy).toHaveBeenCalledOnce(); + expect(result.totalRows).toBe(1); + }); + + it('throws if _ktx_federated requested but fewer than 2 compatible members', async () => { + const project = { + projectDir: '/tmp/x', + config: { connections: { pg: { driver: 'postgres', url: 'env:PG' } } }, + } as never; + const executor = createKtxCliIngestQueryExecutor(project, { executeFederated: vi.fn() }); + await expect( + executor.execute({ connectionId: '_ktx_federated', connection: undefined, sql: 'select 1' }), + ).rejects.toThrow(/2 attach-compatible/i); + }); +}); From e1bca786c2445b56c6b226a09dd5bcf19607a671 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:20:43 +0700 Subject: [PATCH 10/46] feat(sl): use duckdb dialect for federated query compilation Bypass assertSafeConnectionId for _ktx_federated in resolveLocalConnectionId and loadComputableSources, and resolve the compute dialect to 'duckdb' when connectionId is FEDERATED_CONNECTION_ID instead of falling through to the default postgres lookup. Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/src/context/sl/local-query.ts | 11 ++- .../context/sl/local-query-federated.test.ts | 82 +++++++++++++++++++ 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 packages/cli/test/context/sl/local-query-federated.test.ts diff --git a/packages/cli/src/context/sl/local-query.ts b/packages/cli/src/context/sl/local-query.ts index f6de3aab..41a46877 100644 --- a/packages/cli/src/context/sl/local-query.ts +++ b/packages/cli/src/context/sl/local-query.ts @@ -2,6 +2,7 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-ex import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; import type { KtxMcpProgressCallback } from '../mcp/types.js'; import type { KtxLocalProject } from '../../context/project/project.js'; +import { FEDERATED_CONNECTION_ID } from '../connections/federation.js'; import { sqlAnalysisDialectForDriver } from '../sql-analysis/dialect.js'; import { loadLocalSlSourceRecords } from './local-sl.js'; import { toResolvedWire } from './semantic-layer.service.js'; @@ -27,6 +28,9 @@ export interface CompileLocalSlQueryResult extends SemanticLayerQueryExecutionRe } function resolveLocalConnectionId(project: KtxLocalProject, requested: string | undefined): string { + if (requested === FEDERATED_CONNECTION_ID) { + return requested; + } if (requested) { return assertSafeConnectionId(requested); } @@ -41,7 +45,7 @@ async function loadComputableSources( project: KtxLocalProject, connectionId: string, ): Promise[]> { - return (await loadLocalSlSourceRecords(project, { connectionId: assertSafeConnectionId(connectionId) })) + return (await loadLocalSlSourceRecords(project, { connectionId })) .filter((record) => record.source.table || record.source.sql) .map((record) => toResolvedWire(record.source)); } @@ -58,7 +62,10 @@ export async function compileLocalSlQuery( ): Promise { await options.onProgress?.({ progress: 0, message: 'Compiling query' }); const connectionId = resolveLocalConnectionId(project, options.connectionId); - const dialect = sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver); + const dialect = + connectionId === FEDERATED_CONNECTION_ID + ? 'duckdb' + : sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver); const sources = await loadComputableSources(project, connectionId); await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' }); diff --git a/packages/cli/test/context/sl/local-query-federated.test.ts b/packages/cli/test/context/sl/local-query-federated.test.ts new file mode 100644 index 00000000..f7e41d1c --- /dev/null +++ b/packages/cli/test/context/sl/local-query-federated.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { KtxSemanticLayerComputePort } from '../../../src/context/daemon/semantic-layer-compute.js'; +import type { KtxLocalProject } from '../../../src/context/project/project.js'; +import { compileLocalSlQuery } from '../../../src/context/sl/local-query.js'; + +function makeFakeProject(): KtxLocalProject { + const fileStore = { + listFiles: vi.fn(async () => ({ files: [] })), + readFile: vi.fn(async () => ({ content: '' })), + writeFile: vi.fn(async () => ({})), + deleteFile: vi.fn(async () => ({})), + fileHistory: vi.fn(async () => []), + headCommit: vi.fn(async () => null), + } as unknown as KtxLocalProject['fileStore']; + + return { + projectDir: '/tmp/fake-ktx-project', + configPath: '/tmp/fake-ktx-project/ktx.yaml', + config: { + connections: { + pg_books: { driver: 'postgres' }, + sqlite_reviews: { driver: 'sqlite' }, + }, + storage: { state: 'sqlite', search: 'sqlite-fts5', git: {} }, + llm: {}, + ingest: {}, + agent: {}, + scan: {}, + } as unknown as KtxLocalProject['config'], + coreConfig: {} as KtxLocalProject['coreConfig'], + git: {} as KtxLocalProject['git'], + fileStore, + }; +} + +function makeFakeCompute(): KtxSemanticLayerComputePort & { lastDialect: string | undefined } { + const fake = { + lastDialect: undefined as string | undefined, + query: vi.fn(async (input: { dialect: string; query: unknown; sources: unknown[] }) => { + fake.lastDialect = input.dialect; + return { + sql: 'select 1', + dialect: input.dialect, + columns: [], + plan: { measures: [], dimensions: [] }, + }; + }), + validateSources: vi.fn(), + generateSources: vi.fn(), + }; + return fake; +} + +describe('compileLocalSlQuery — federated dialect', () => { + it('compiles federated queries with the duckdb dialect', async () => { + const project = makeFakeProject(); + const compute = makeFakeCompute(); + + await compileLocalSlQuery(project, { + connectionId: '_ktx_federated', + query: { measures: [], dimensions: [] }, + compute, + execute: false, + }); + + expect(compute.lastDialect).toBe('duckdb'); + }); + + it('still uses the driver dialect for a normal connection', async () => { + const project = makeFakeProject(); + const compute = makeFakeCompute(); + + await compileLocalSlQuery(project, { + connectionId: 'pg_books', + query: { measures: [], dimensions: [] }, + compute, + execute: false, + }); + + expect(compute.lastDialect).toBe('postgres'); + }); +}); From 0e9400059d7316c6d7180f6601b63cdd9f2637db Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:27:49 +0700 Subject: [PATCH 11/46] test(duckdb): end-to-end cross-catalog federated join Co-Authored-By: Claude Opus 4.8 (1M context) --- .../duckdb/federated-join.integration.test.ts | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 packages/cli/test/connectors/duckdb/federated-join.integration.test.ts diff --git a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts new file mode 100644 index 00000000..5f2bed50 --- /dev/null +++ b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from 'vitest'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { executeFederatedQuery } from '../../../src/connectors/duckdb/federated-executor.js'; +import type { FederatedMember } from '../../../src/context/connections/federation.js'; + +describe('federated cross-catalog join (live DuckDB)', () => { + it('joins two sqlite catalogs and enforces read-only', async () => { + const dir = mkdtempSync(join(tmpdir(), 'ktx-fed-')); + const booksPath = join(dir, 'books.db'); + const reviewsPath = join(dir, 'reviews.db'); + + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune');"); + books.close(); + + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 4);'); + reviews.close(); + + const members: FederatedMember[] = [ + { connectionId: 'books_db', driver: 'sqlite', config: { driver: 'sqlite', url: booksPath } as never }, + { connectionId: 'reviews_db', driver: 'sqlite', config: { driver: 'sqlite', url: reviewsPath } as never }, + ]; + + try { + const result = await executeFederatedQuery(members, { + connectionId: '_ktx_federated', + connection: undefined, + sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.title', + }); + expect(result.headers).toEqual(['title', 'avg_stars']); + expect(result.rows[0][0]).toBe('Dune'); + expect(Number(result.rows[0][1])).toBeCloseTo(4.5); + + await expect( + executeFederatedQuery(members, { + connectionId: '_ktx_federated', + connection: undefined, + sql: "INSERT INTO books_db.books VALUES (2, 'Hack')", + }), + ).rejects.toThrow(/read-only/i); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); From 5eb355772ee1a853f5d1c68bddafc9384a922b7a Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:30:15 +0700 Subject: [PATCH 12/46] test(duckdb): harden federated join test with multi-book join-key coverage Co-Authored-By: Claude Sonnet 4.6 --- .../duckdb/federated-join.integration.test.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts index 5f2bed50..f429db32 100644 --- a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts @@ -13,11 +13,11 @@ describe('federated cross-catalog join (live DuckDB)', () => { const reviewsPath = join(dir, 'reviews.db'); const books = new Database(booksPath); - books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune');"); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune'), (2, 'Foundation');"); books.close(); const reviews = new Database(reviewsPath); - reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 4);'); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 4), (2, 2);'); reviews.close(); const members: FederatedMember[] = [ @@ -29,11 +29,13 @@ describe('federated cross-catalog join (live DuckDB)', () => { const result = await executeFederatedQuery(members, { connectionId: '_ktx_federated', connection: undefined, - sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.title', + sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.title ORDER BY b.title', }); expect(result.headers).toEqual(['title', 'avg_stars']); - expect(result.rows[0][0]).toBe('Dune'); - expect(Number(result.rows[0][1])).toBeCloseTo(4.5); + // ORDER BY title: Dune, Foundation + expect(result.rows.map((row) => row[0])).toEqual(['Dune', 'Foundation']); + expect(Number(result.rows[0][1])).toBeCloseTo(4.5); // Dune: (5+4)/2 + expect(Number(result.rows[1][1])).toBeCloseTo(2.0); // Foundation: 2/1 await expect( executeFederatedQuery(members, { From 690a7232e7266b30c3485705ef26abdb9577ebc8 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:35:04 +0700 Subject: [PATCH 13/46] feat(ingest): keep declared cross-DB joins to federated siblings Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ingest/adapters/live-database/manifest.ts | 37 ++++++++++++++----- .../ingest/manifest-federated-join.test.ts | 36 ++++++++++++++++++ 2 files changed, 63 insertions(+), 10 deletions(-) create mode 100644 packages/cli/test/context/ingest/manifest-federated-join.test.ts diff --git a/packages/cli/src/context/ingest/adapters/live-database/manifest.ts b/packages/cli/src/context/ingest/adapters/live-database/manifest.ts index 3c35b463..44b90707 100644 --- a/packages/cli/src/context/ingest/adapters/live-database/manifest.ts +++ b/packages/cli/src/context/ingest/adapters/live-database/manifest.ts @@ -86,6 +86,9 @@ export interface BuildLiveDatabaseManifestShardsInput { existingPreservedJoins?: Map; existingDescriptions?: Map; existingUsage?: Map; + // Table refs owned by other federated members; declared cross-DB joins to + // these survive even though the target has no shard in this snapshot. + federatedSiblingTargets?: Set; } export interface BuildLiveDatabaseManifestShardsResult { @@ -204,15 +207,20 @@ function joinCondition( .join(' AND '); } -function buildJoinsByTable( +/** @internal */ +export function buildJoinsByTable( tableNames: Set, joins: LiveDatabaseManifestJoinData[], preservedJoins: Map, + federatedSiblingTargets: Set = new Set(), ): Map { const joinsByTable = new Map(); for (const join of joins) { - if (!tableNames.has(join.fromTable) || !tableNames.has(join.toTable)) { + const fromLocal = tableNames.has(join.fromTable); + const toLocal = tableNames.has(join.toTable); + const toSibling = federatedSiblingTargets.has(join.toTable); + if (!fromLocal || (!toLocal && !toSibling)) { continue; } const relationship = RELATIONSHIP_MAP[join.relationship] ?? join.relationship; @@ -223,13 +231,17 @@ function buildJoinsByTable( source: join.source, }); - const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many'; - addJoinOnce(joinsByTable, join.toTable, { - to: join.fromTable, - on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns), - relationship: reverseRelationship, - source: join.source, - }); + // Reverse direction only when the target is a local table in THIS snapshot; + // a federated sibling has no shard here, so it gets no reverse entry. + if (toLocal) { + const reverseRelationship = RELATIONSHIP_INVERSE[relationship] ?? 'one_to_many'; + addJoinOnce(joinsByTable, join.toTable, { + to: join.fromTable, + on: joinCondition(join.toTable, join.toColumns, join.fromTable, join.fromColumns), + relationship: reverseRelationship, + source: join.source, + }); + } } for (const [tableName, tableJoins] of preservedJoins) { @@ -250,7 +262,12 @@ export function buildLiveDatabaseManifestShards( input: BuildLiveDatabaseManifestShardsInput, ): BuildLiveDatabaseManifestShardsResult { const tableNames = new Set(input.tables.map((table) => table.name)); - const joinsByTable = buildJoinsByTable(tableNames, input.joins, input.existingPreservedJoins ?? new Map()); + const joinsByTable = buildJoinsByTable( + tableNames, + input.joins, + input.existingPreservedJoins ?? new Map(), + input.federatedSiblingTargets ?? new Set(), + ); const shards = new Map(); for (const table of input.tables) { diff --git a/packages/cli/test/context/ingest/manifest-federated-join.test.ts b/packages/cli/test/context/ingest/manifest-federated-join.test.ts new file mode 100644 index 00000000..e6dd3b24 --- /dev/null +++ b/packages/cli/test/context/ingest/manifest-federated-join.test.ts @@ -0,0 +1,36 @@ +import { describe, expect, it } from 'vitest'; +import { buildJoinsByTable } from '../../../src/context/ingest/adapters/live-database/manifest.js'; + +const joinData = (toTable: string) => ({ + fromTable: 'books', + fromColumns: ['id'], + toTable, + toColumns: ['book_id'], + relationship: 'one_to_many', + source: 'manual' as const, +}); + +describe('buildJoinsByTable federated siblings', () => { + it('keeps a forward join whose target is a federated sibling table', () => { + const result = buildJoinsByTable( + new Set(['books']), // current snapshot + [joinData('sqlite_reviews.reviews')], // target NOT local + new Map(), + new Set(['sqlite_reviews.reviews']), // federated sibling targets + ); + expect(result.get('books')?.map((j) => j.to)).toEqual(['sqlite_reviews.reviews']); + // The sibling target must NOT get a reverse entry (it has no shard in this snapshot) + expect(result.get('sqlite_reviews.reviews')).toBeUndefined(); + }); + + it('still drops a join whose target is neither local nor a sibling', () => { + const result = buildJoinsByTable(new Set(['books']), [joinData('ghost')], new Map(), new Set()); + expect(result.get('books')).toBeUndefined(); + }); + + it('keeps both directions for a fully-local join (unchanged behavior)', () => { + const result = buildJoinsByTable(new Set(['books', 'authors']), [joinData('authors')], new Map(), new Set()); + expect(result.get('books')?.map((j) => j.to)).toEqual(['authors']); + expect(result.get('authors')?.map((j) => j.to)).toEqual(['books']); // reverse still added for local joins + }); +}); From 7648c730f3d42bb2dce8837efc49071dd8360995 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:40:01 +0700 Subject: [PATCH 14/46] feat(setup): surface federated connection availability after adding a member Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/setup-databases.ts | 20 ++++++++++++++++ .../setup-databases-federation-notice.test.ts | 24 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 packages/cli/test/setup-databases-federation-notice.test.ts diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index 9b7aa189..1a7a710e 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -3,6 +3,7 @@ import { readFile, writeFile } from 'node:fs/promises'; import { delimiter, dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { promisify } from 'node:util'; +import { deriveFederatedConnection } from './context/connections/federation.js'; import { getDriverRegistration } from './context/connections/drivers.js'; import { createLocalKtxLlmRuntimeFromConfig } from './context/llm/local-config.js'; import type { KtxLlmRuntimePort } from './context/llm/runtime-port.js'; @@ -1171,6 +1172,25 @@ async function writeConnectionConfig(input: { if (queryHistory?.enabled === true) { await ensureHistoricSqlIngestDefaults(input.projectDir); } + + if (input.io) { + const federationNotice = federationNoticeFor(config.connections); + if (federationNotice) { + writeSetupSection(input.io, 'Federated connection available', [federationNotice]); + } + } +} + +/** Derived, never persisted: the federated connection is recomputed from declared connections. */ +export function federationNoticeFor( + connections: Record, +): string | null { + const descriptor = deriveFederatedConnection(connections); + if (!descriptor) { + return null; + } + const names = descriptor.members.map((m) => m.connectionId).join(', '); + return `Detected ${descriptor.members.length} attach-compatible databases (${names}). They're queryable together as one federated connection. Declare cross-database joins in a source's \`joins:\` list.`; } async function disableConnectionQueryHistory(projectDir: string, connectionId: string): Promise { diff --git a/packages/cli/test/setup-databases-federation-notice.test.ts b/packages/cli/test/setup-databases-federation-notice.test.ts new file mode 100644 index 00000000..2e0afcb4 --- /dev/null +++ b/packages/cli/test/setup-databases-federation-notice.test.ts @@ -0,0 +1,24 @@ +import { describe, expect, it } from 'vitest'; +import { federationNoticeFor } from '../src/setup-databases.js'; + +describe('federationNoticeFor', () => { + it('returns a notice naming members when 2+ compatible exist', () => { + const notice = federationNoticeFor({ + pg_books: { driver: 'postgres' }, + sqlite_reviews: { driver: 'sqlite' }, + } as never); + expect(notice).toMatch(/pg_books/); + expect(notice).toMatch(/sqlite_reviews/); + expect(notice).toMatch(/cross-database/i); + }); + + it('returns null with fewer than 2 compatible', () => { + expect(federationNoticeFor({ pg: { driver: 'postgres' } } as never)).toBeNull(); + }); + + it('returns null when the second db is incompatible', () => { + expect( + federationNoticeFor({ pg: { driver: 'postgres' }, snow: { driver: 'snowflake' } } as never), + ).toBeNull(); + }); +}); From 483eb5257cbb4b774f7e36078c894994369d8190 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:44:18 +0700 Subject: [PATCH 15/46] chore(setup): mark federationNoticeFor @internal for dead-code gate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also marks attachTypeForDriver, buildAttachStatements, and isReservedConnectionId @internal — all three are exported solely for unit-test access with no production cross-file consumer. Co-Authored-By: Claude Sonnet 4.6 --- packages/cli/src/connectors/duckdb/federated-executor.ts | 7 ++----- packages/cli/src/context/sl/source-files.ts | 5 +---- packages/cli/src/setup-databases.ts | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 9f4152be..734f6e37 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -14,6 +14,7 @@ const ATTACH_TYPE_BY_DRIVER: Record = { sqlite: 'sqlite', }; +/** @internal */ export function attachTypeForDriver(driver: string): string { const type = ATTACH_TYPE_BY_DRIVER[driver.toLowerCase()]; if (!type) { @@ -30,11 +31,7 @@ function memberUrl(member: FederatedMember, env: NodeJS.ProcessEnv): string { return resolveStringReference(raw, env); } -/** - * Builds INSTALL/LOAD + READ_ONLY ATTACH statements, one member per DuckDB - * catalog aliased by its connectionId. READ_ONLY makes the attach physically - * non-writable; assertReadOnlySql guards the query text itself. - */ +/** @internal */ export function buildAttachStatements(members: FederatedMember[], env: NodeJS.ProcessEnv): string[] { const statements: string[] = []; for (const member of members) { diff --git a/packages/cli/src/context/sl/source-files.ts b/packages/cli/src/context/sl/source-files.ts index d21f59dc..6d2e361d 100644 --- a/packages/cli/src/context/sl/source-files.ts +++ b/packages/cli/src/context/sl/source-files.ts @@ -23,10 +23,7 @@ function assertSafePathToken(kind: string, value: string): string { return value; } -/** - * The `_ktx_` prefix is ktx's reserved namespace for runtime-derived virtual - * connections (e.g. `_ktx_federated`). User connection ids may not use it. - */ +/** @internal */ export function isReservedConnectionId(connectionId: string): boolean { return connectionId.startsWith('_ktx_'); } diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index 1a7a710e..735cc188 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -1181,7 +1181,7 @@ async function writeConnectionConfig(input: { } } -/** Derived, never persisted: the federated connection is recomputed from declared connections. */ +/** @internal */ export function federationNoticeFor( connections: Record, ): string | null { From 02146489d1176ee0f06f0f404c38fc469b0e31f8 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:52:03 +0700 Subject: [PATCH 16/46] docs(concepts): document cross-database federation Co-Authored-By: Claude Sonnet 4.6 --- .../concepts/cross-database-federation.mdx | 105 ++++++++++++++++++ docs-site/content/docs/concepts/meta.json | 2 +- 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 docs-site/content/docs/concepts/cross-database-federation.mdx diff --git a/docs-site/content/docs/concepts/cross-database-federation.mdx b/docs-site/content/docs/concepts/cross-database-federation.mdx new file mode 100644 index 00000000..1a533cc0 --- /dev/null +++ b/docs-site/content/docs/concepts/cross-database-federation.mdx @@ -0,0 +1,105 @@ +--- +title: Cross-database federation +description: How ktx federates postgres, mysql, and sqlite connections so a single semantic query can join across them without copying data. +--- + +Cross-database federation lets a single semantic-layer query join tables that +live in different databases. **ktx** achieves this by embedding DuckDB and +using its `ATTACH` mechanism to connect each member database read-only. The +join executes inside DuckDB at query time — live data, no ETL, no copy. + +Federation activates automatically when a `ktx.yaml` file declares two or more +attach-compatible connections. There is nothing to configure and no federation +block to add. With zero or one compatible connection the behavior is unchanged. + +## Which connections participate + +The v1 federation engine supports three drivers: + +| Driver | Participates in federation | +|--------|---------------------------| +| `postgres` | Yes | +| `mysql` | Yes | +| `sqlite` | Yes | +| `snowflake` | No — standalone connection | +| `bigquery` | No — standalone connection | +| `clickhouse` | No — standalone connection | +| `sqlserver` | No — standalone connection | + +Non-participating connections continue to work exactly as they did. They are +queried independently; they do not appear as federation members. + +## How it activates + +**ktx** inspects the connections in `ktx.yaml` at startup. When it finds two or +more connections whose driver is `postgres`, `mysql`, or `sqlite`, it +instantiates the DuckDB federation engine and attaches each one read-only. +There is no `federation:` key, no opt-in flag, and no connection-level setting +to enable. The engine is derived entirely from what is already declared. + +A minimal `ktx.yaml` that triggers federation: + +```yaml +connections: + - id: pg_books + driver: postgres + url: "postgres://user:pass@localhost:5432/books" + - id: sqlite_reviews + driver: sqlite + path: ./data/reviews.db +``` + +Two attach-compatible connections are present, so federation is active. + +## Table naming in federated queries + +Inside a federated query, every table is referenced with a three-part name: +`connectionId.schema.table`. The connection's `id` field in `ktx.yaml` becomes +the catalog name inside DuckDB. + +For the example above: + +- `pg_books.public.books` — the `books` table in the `public` schema of the + postgres connection +- `sqlite_reviews.reviews` — the `reviews` table in the sqlite connection + (SQLite has no schema layer, so the two-part form is used) + +These fully qualified names are what you write in a source's `table:` field +and in any cross-database join's `to:` field. + +## Declaring a cross-database join + +In v1, cross-database joins are declared explicitly in a source's `joins:` +block. **ktx** validates the join at ingest time, resolves both sides, and +executes it through the federation engine at query time. + +The example below shows a `books` source (owned by the postgres connection) +declaring a one-to-many join to the `reviews` table in the sqlite connection: + +```yaml +name: books +table: pg_books.public.books +joins: + - to: sqlite_reviews.reviews + on: "id = book_id" + relationship: one_to_many +``` + +The `to:` value is the fully qualified federated table reference. The `on:` +value is the join predicate expressed in terms of the tables' own column names. +The `relationship:` value tells **ktx** how to aggregate safely across the join. + +## Federated queries are read-only + +DuckDB attaches every member database with read-only access. Federated queries +are `SELECT`/`WITH` only. No writes, no DDL, and no mutations reach any member +database through the federation engine. + +## Current limitations + +- **Declared joins only.** Automatic discovery of cross-database relationships + is not available in v1. Intra-database relationship discovery for each member + connection is unchanged. +- **postgres, mysql, and sqlite only.** Other drivers (snowflake, bigquery, + clickhouse, sqlserver) do not participate in federation in this version. They + remain usable as standalone connections. diff --git a/docs-site/content/docs/concepts/meta.json b/docs-site/content/docs/concepts/meta.json index bf4de9d6..3936328a 100644 --- a/docs-site/content/docs/concepts/meta.json +++ b/docs-site/content/docs/concepts/meta.json @@ -1,5 +1,5 @@ { "title": "Concepts", "defaultOpen": true, - "pages": ["the-context-layer", "semantic-layer-internals", "wiki-retrieval"] + "pages": ["the-context-layer", "semantic-layer-internals", "cross-database-federation", "wiki-retrieval"] } From 66dac9c389fa03daad6a25c6e33bd6a7d519b278 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 20:57:52 +0700 Subject: [PATCH 17/46] docs(concepts): correct sqlite two-part naming in federation doc Co-Authored-By: Claude Opus 4.8 (1M context) --- .../content/docs/concepts/cross-database-federation.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs-site/content/docs/concepts/cross-database-federation.mdx b/docs-site/content/docs/concepts/cross-database-federation.mdx index 1a533cc0..77ff149a 100644 --- a/docs-site/content/docs/concepts/cross-database-federation.mdx +++ b/docs-site/content/docs/concepts/cross-database-federation.mdx @@ -53,16 +53,16 @@ Two attach-compatible connections are present, so federation is active. ## Table naming in federated queries -Inside a federated query, every table is referenced with a three-part name: -`connectionId.schema.table`. The connection's `id` field in `ktx.yaml` becomes -the catalog name inside DuckDB. +Inside a federated query, postgres and mysql tables use a three-part name: +`connectionId.schema.table`. SQLite tables, which have no schema layer in +DuckDB, use the two-part form `connectionId.table`. In both cases the +connection's `id` field in `ktx.yaml` becomes the catalog name inside DuckDB. For the example above: - `pg_books.public.books` — the `books` table in the `public` schema of the postgres connection - `sqlite_reviews.reviews` — the `reviews` table in the sqlite connection - (SQLite has no schema layer, so the two-part form is used) These fully qualified names are what you write in a source's `table:` field and in any cross-database join's `to:` field. From 3aaeefc73b9bbe62ebd9980516fdff8fb6a2cc81 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 21:12:30 +0700 Subject: [PATCH 18/46] fix(duckdb): quote federated catalog alias so hyphenated connection ids attach --- .../connectors/duckdb/federated-executor.ts | 8 +++++- .../duckdb/federated-executor.test.ts | 14 +++++++--- .../duckdb/federated-join.integration.test.ts | 27 +++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 734f6e37..332d8ea8 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -14,6 +14,10 @@ const ATTACH_TYPE_BY_DRIVER: Record = { sqlite: 'sqlite', }; +function quoteDuckdbIdentifier(id: string): string { + return `"${id.replaceAll('"', '""')}"`; +} + /** @internal */ export function attachTypeForDriver(driver: string): string { const type = ATTACH_TYPE_BY_DRIVER[driver.toLowerCase()]; @@ -39,7 +43,9 @@ export function buildAttachStatements(members: FederatedMember[], env: NodeJS.Pr const url = memberUrl(member, env); const safeUrl = url.replaceAll("'", "''"); statements.push(`INSTALL ${type}; LOAD ${type};`); - statements.push(`ATTACH '${safeUrl}' AS ${member.connectionId} (TYPE ${type}, READ_ONLY);`); + statements.push( + `ATTACH '${safeUrl}' AS ${quoteDuckdbIdentifier(member.connectionId)} (TYPE ${type}, READ_ONLY);`, + ); } return statements; } diff --git a/packages/cli/test/connectors/duckdb/federated-executor.test.ts b/packages/cli/test/connectors/duckdb/federated-executor.test.ts index c5a48abc..7d41be43 100644 --- a/packages/cli/test/connectors/duckdb/federated-executor.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-executor.test.ts @@ -31,12 +31,20 @@ describe('buildAttachStatements', () => { ); expect(stmts).toEqual([ "INSTALL postgres; LOAD postgres;", - "ATTACH 'postgresql://localhost/books' AS pg_books (TYPE postgres, READ_ONLY);", + 'ATTACH \'postgresql://localhost/books\' AS "pg_books" (TYPE postgres, READ_ONLY);', "INSTALL sqlite; LOAD sqlite;", - "ATTACH '/data/reviews.db' AS sqlite_reviews (TYPE sqlite, READ_ONLY);", + 'ATTACH \'/data/reviews.db\' AS "sqlite_reviews" (TYPE sqlite, READ_ONLY);', ]); }); + it('quotes a hyphenated connection id as a DuckDB identifier', () => { + const stmts = buildAttachStatements( + [member('postgres-warehouse', 'postgres', 'postgresql://h/db')], + {}, + ); + expect(stmts[1]).toBe(`ATTACH 'postgresql://h/db' AS "postgres-warehouse" (TYPE postgres, READ_ONLY);`); + }); + it('throws if a member url is missing', () => { expect(() => buildAttachStatements([{ connectionId: 'pg', driver: 'postgres', config: { driver: 'postgres' } as never }], {}), @@ -48,6 +56,6 @@ describe('buildAttachStatements', () => { [member('pg', 'postgres', "postgresql://u:it's@h/db")], {}, ); - expect(stmts[1]).toBe("ATTACH 'postgresql://u:it''s@h/db' AS pg (TYPE postgres, READ_ONLY);"); + expect(stmts[1]).toBe('ATTACH \'postgresql://u:it\'\'s@h/db\' AS "pg" (TYPE postgres, READ_ONLY);'); }); }); diff --git a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts index f429db32..a577b802 100644 --- a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts @@ -48,4 +48,31 @@ describe('federated cross-catalog join (live DuckDB)', () => { rmSync(dir, { recursive: true, force: true }); } }); + + it('joins catalogs whose connection ids contain hyphens', async () => { + const dir = mkdtempSync(join(tmpdir(), 'ktx-fed-hyphen-')); + const booksPath = join(dir, 'books.db'); + const reviewsPath = join(dir, 'reviews.db'); + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune');"); + books.close(); + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 3);'); + reviews.close(); + const members: FederatedMember[] = [ + { connectionId: 'books-db', driver: 'sqlite', config: { driver: 'sqlite', url: booksPath } as never }, + { connectionId: 'reviews-db', driver: 'sqlite', config: { driver: 'sqlite', url: reviewsPath } as never }, + ]; + try { + const result = await executeFederatedQuery(members, { + connectionId: '_ktx_federated', + connection: undefined, + sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM "books-db".books b JOIN "reviews-db".reviews r ON b.id = r.book_id GROUP BY b.title', + }); + expect(result.rows[0][0]).toBe('Dune'); + expect(Number(result.rows[0][1])).toBeCloseTo(4.0); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); }); From f50c6b9fed57d18815ee96ebb300890d2f94e493 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 21:37:15 +0700 Subject: [PATCH 19/46] refactor(duckdb): single-source federation driver list, dedup attach loads Collapse the parallel ATTACH_COMPATIBLE_DRIVERS set and ATTACH_TYPE_BY_DRIVER map into one map in federation.ts whose keys are the membership rule. Replace FederatedMember.config (read only via a type-erasing cast) with a typed url field extracted at derive time. Emit INSTALL/LOAD once per distinct driver type instead of once per member. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connectors/duckdb/federated-executor.ts | 47 +++++++----------- .../cli/src/context/connections/federation.ts | 40 +++++++++++---- .../duckdb/federated-executor.test.ts | 49 +++++++++++-------- .../duckdb/federated-join.integration.test.ts | 8 +-- 4 files changed, 79 insertions(+), 65 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 332d8ea8..032329f2 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -6,48 +6,35 @@ import type { } from '../../context/connections/query-executor.js'; import { normalizeQueryRows } from '../../context/connections/query-executor.js'; import { assertReadOnlySql, limitSqlForExecution } from '../../context/connections/read-only-sql.js'; -import type { FederatedMember } from '../../context/connections/federation.js'; - -const ATTACH_TYPE_BY_DRIVER: Record = { - postgres: 'postgres', - mysql: 'mysql', - sqlite: 'sqlite', -}; +import { attachTypeForDriver, type FederatedMember } from '../../context/connections/federation.js'; function quoteDuckdbIdentifier(id: string): string { return `"${id.replaceAll('"', '""')}"`; } -/** @internal */ -export function attachTypeForDriver(driver: string): string { - const type = ATTACH_TYPE_BY_DRIVER[driver.toLowerCase()]; - if (!type) { - throw new Error(`Driver "${driver}" cannot be attached by DuckDB federation.`); - } - return type; -} - function memberUrl(member: FederatedMember, env: NodeJS.ProcessEnv): string { - const raw = (member.config as { url?: unknown }).url; - if (typeof raw !== 'string' || raw.length === 0) { + if (member.url === undefined || member.url.length === 0) { throw new Error(`Federated member "${member.connectionId}" has no url in ktx.yaml.`); } - return resolveStringReference(raw, env); + return resolveStringReference(member.url, env); } /** @internal */ export function buildAttachStatements(members: FederatedMember[], env: NodeJS.ProcessEnv): string[] { - const statements: string[] = []; - for (const member of members) { - const type = attachTypeForDriver(member.driver); - const url = memberUrl(member, env); - const safeUrl = url.replaceAll("'", "''"); - statements.push(`INSTALL ${type}; LOAD ${type};`); - statements.push( - `ATTACH '${safeUrl}' AS ${quoteDuckdbIdentifier(member.connectionId)} (TYPE ${type}, READ_ONLY);`, - ); - } - return statements; + const attachments = members.map((member) => ({ + type: attachTypeForDriver(member.driver), + url: memberUrl(member, env), + alias: member.connectionId, + })); + + const loadStatements = [...new Set(attachments.map((a) => a.type))].map( + (type) => `INSTALL ${type}; LOAD ${type};`, + ); + const attachStatements = attachments.map( + ({ type, url, alias }) => + `ATTACH '${url.replaceAll("'", "''")}' AS ${quoteDuckdbIdentifier(alias)} (TYPE ${type}, READ_ONLY);`, + ); + return [...loadStatements, ...attachStatements]; } export async function executeFederatedQuery( diff --git a/packages/cli/src/context/connections/federation.ts b/packages/cli/src/context/connections/federation.ts index 3caa687e..b3f5dee4 100644 --- a/packages/cli/src/context/connections/federation.ts +++ b/packages/cli/src/context/connections/federation.ts @@ -3,13 +3,30 @@ import type { KtxProjectConnectionConfig } from '../project/config.js'; /** Stable id for the runtime-derived federated connection. Never written to ktx.yaml. */ export const FEDERATED_CONNECTION_ID = '_ktx_federated'; -/** Drivers DuckDB can ATTACH live with first-party extensions. */ -const ATTACH_COMPATIBLE_DRIVERS = new Set(['postgres', 'mysql', 'sqlite']); +/** + * Maps each attach-compatible driver to the DuckDB extension that attaches it. + * The keys are the single source of truth for federation membership: a driver + * participates iff it appears here. + */ +const ATTACH_TYPE_BY_DRIVER: Record = { + postgres: 'postgres', + mysql: 'mysql', + sqlite: 'sqlite', +}; + +export function attachTypeForDriver(driver: string): string { + const type = ATTACH_TYPE_BY_DRIVER[driver.toLowerCase()]; + if (!type) { + throw new Error(`Driver "${driver}" cannot be attached by DuckDB federation.`); + } + return type; +} export interface FederatedMember { connectionId: string; driver: string; - config: KtxProjectConnectionConfig; + /** Raw `url` from ktx.yaml; may carry an `env:`/`file:` reference, resolved at execution time. */ + url: string | undefined; } export interface FederatedConnectionDescriptor { @@ -26,13 +43,16 @@ export interface FederatedConnectionDescriptor { export function deriveFederatedConnection( connections: Record, ): FederatedConnectionDescriptor | null { - const members: FederatedMember[] = []; - for (const [connectionId, config] of Object.entries(connections)) { - const driver = config.driver.toLowerCase(); - if (ATTACH_COMPATIBLE_DRIVERS.has(driver)) { - members.push({ connectionId, driver, config }); - } - } + const members: FederatedMember[] = Object.entries(connections) + .filter(([, config]) => config.driver.toLowerCase() in ATTACH_TYPE_BY_DRIVER) + .map(([connectionId, config]) => { + const url = 'url' in config ? config.url : undefined; + return { + connectionId, + driver: config.driver.toLowerCase(), + url: typeof url === 'string' ? url : undefined, + }; + }); if (members.length < 2) { return null; } diff --git a/packages/cli/test/connectors/duckdb/federated-executor.test.ts b/packages/cli/test/connectors/duckdb/federated-executor.test.ts index 7d41be43..3c69dfa8 100644 --- a/packages/cli/test/connectors/duckdb/federated-executor.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-executor.test.ts @@ -1,12 +1,12 @@ import { describe, expect, it } from 'vitest'; -import { - buildAttachStatements, - attachTypeForDriver, -} from '../../../src/connectors/duckdb/federated-executor.js'; -import type { FederatedMember } from '../../../src/context/connections/federation.js'; +import { buildAttachStatements } from '../../../src/connectors/duckdb/federated-executor.js'; +import { attachTypeForDriver, type FederatedMember } from '../../../src/context/connections/federation.js'; -const member = (connectionId: string, driver: string, url: string): FederatedMember => - ({ connectionId, driver, config: { driver, url } as never }); +const member = (connectionId: string, driver: string, url: string | undefined): FederatedMember => ({ + connectionId, + driver, + url, +}); describe('attachTypeForDriver', () => { it('maps drivers to DuckDB attach extension types', () => { @@ -21,7 +21,7 @@ describe('attachTypeForDriver', () => { }); describe('buildAttachStatements', () => { - it('emits READ_ONLY ATTACH aliased by connectionId, resolving env refs', () => { + it('loads each driver type once, then emits READ_ONLY ATTACH aliased by connectionId, resolving env refs', () => { const stmts = buildAttachStatements( [ member('pg_books', 'postgres', 'env:PG_URL'), @@ -30,32 +30,39 @@ describe('buildAttachStatements', () => { { PG_URL: 'postgresql://localhost/books' }, ); expect(stmts).toEqual([ - "INSTALL postgres; LOAD postgres;", + 'INSTALL postgres; LOAD postgres;', + 'INSTALL sqlite; LOAD sqlite;', 'ATTACH \'postgresql://localhost/books\' AS "pg_books" (TYPE postgres, READ_ONLY);', - "INSTALL sqlite; LOAD sqlite;", 'ATTACH \'/data/reviews.db\' AS "sqlite_reviews" (TYPE sqlite, READ_ONLY);', ]); }); - it('quotes a hyphenated connection id as a DuckDB identifier', () => { + it('loads a shared driver type only once across members', () => { const stmts = buildAttachStatements( - [member('postgres-warehouse', 'postgres', 'postgresql://h/db')], + [ + member('pg_a', 'postgres', 'postgresql://h/a'), + member('pg_b', 'postgres', 'postgresql://h/b'), + ], {}, ); - expect(stmts[1]).toBe(`ATTACH 'postgresql://h/db' AS "postgres-warehouse" (TYPE postgres, READ_ONLY);`); + expect(stmts).toEqual([ + 'INSTALL postgres; LOAD postgres;', + 'ATTACH \'postgresql://h/a\' AS "pg_a" (TYPE postgres, READ_ONLY);', + 'ATTACH \'postgresql://h/b\' AS "pg_b" (TYPE postgres, READ_ONLY);', + ]); + }); + + it('quotes a hyphenated connection id as a DuckDB identifier', () => { + const stmts = buildAttachStatements([member('postgres-warehouse', 'postgres', 'postgresql://h/db')], {}); + expect(stmts.at(-1)).toBe(`ATTACH 'postgresql://h/db' AS "postgres-warehouse" (TYPE postgres, READ_ONLY);`); }); it('throws if a member url is missing', () => { - expect(() => - buildAttachStatements([{ connectionId: 'pg', driver: 'postgres', config: { driver: 'postgres' } as never }], {}), - ).toThrow(/no url/i); + expect(() => buildAttachStatements([member('pg', 'postgres', undefined)], {})).toThrow(/no url/i); }); it('escapes single quotes in a member url', () => { - const stmts = buildAttachStatements( - [member('pg', 'postgres', "postgresql://u:it's@h/db")], - {}, - ); - expect(stmts[1]).toBe('ATTACH \'postgresql://u:it\'\'s@h/db\' AS "pg" (TYPE postgres, READ_ONLY);'); + const stmts = buildAttachStatements([member('pg', 'postgres', "postgresql://u:it's@h/db")], {}); + expect(stmts.at(-1)).toBe('ATTACH \'postgresql://u:it\'\'s@h/db\' AS "pg" (TYPE postgres, READ_ONLY);'); }); }); diff --git a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts index a577b802..cf2a7bd6 100644 --- a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts @@ -21,8 +21,8 @@ describe('federated cross-catalog join (live DuckDB)', () => { reviews.close(); const members: FederatedMember[] = [ - { connectionId: 'books_db', driver: 'sqlite', config: { driver: 'sqlite', url: booksPath } as never }, - { connectionId: 'reviews_db', driver: 'sqlite', config: { driver: 'sqlite', url: reviewsPath } as never }, + { connectionId: 'books_db', driver: 'sqlite', url: booksPath }, + { connectionId: 'reviews_db', driver: 'sqlite', url: reviewsPath }, ]; try { @@ -60,8 +60,8 @@ describe('federated cross-catalog join (live DuckDB)', () => { reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 3);'); reviews.close(); const members: FederatedMember[] = [ - { connectionId: 'books-db', driver: 'sqlite', config: { driver: 'sqlite', url: booksPath } as never }, - { connectionId: 'reviews-db', driver: 'sqlite', config: { driver: 'sqlite', url: reviewsPath } as never }, + { connectionId: 'books-db', driver: 'sqlite', url: booksPath }, + { connectionId: 'reviews-db', driver: 'sqlite', url: reviewsPath }, ]; try { const result = await executeFederatedQuery(members, { From 1aaf1177b8b57e4b7deae8dab8a182325911d8f8 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Fri, 12 Jun 2026 21:53:46 +0700 Subject: [PATCH 20/46] fix(duckdb): close federated DuckDB instance on connect failure; dedup id validation Wrap the federated DuckDB instance in its own try/finally so a failing connect() or a throwing connection.closeSync() no longer leaks the native instance. Route setup-sources connection-id validation through the canonical assertSafeConnectionId so the reserved _ktx_ prefix guard applies there too. Derive the federated dialect through sqlAnalysisDialectForDriver instead of a hardcoded literal. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connectors/duckdb/federated-executor.ts | 31 ++++++++++--------- packages/cli/src/context/sl/local-query.ts | 7 ++--- packages/cli/src/setup-sources.ts | 7 +---- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 032329f2..3d7ab523 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -46,23 +46,26 @@ export async function executeFederatedQuery( const attachStatements = buildAttachStatements(members, env); const instance = await DuckDBInstance.create(':memory:'); - const connection = await instance.connect(); try { - for (const statement of attachStatements) { - await connection.run(statement); + const connection = await instance.connect(); + try { + for (const statement of attachStatements) { + await connection.run(statement); + } + const reader = await connection.runAndReadAll(sql); + const rows = normalizeQueryRows(reader.getRows()); + const headers = reader.columnNames(); + return { + headers, + rows, + totalRows: rows.length, + command: 'SELECT', + rowCount: rows.length, + }; + } finally { + connection.closeSync(); } - const reader = await connection.runAndReadAll(sql); - const rows = normalizeQueryRows(reader.getRows()); - const headers = reader.columnNames(); - return { - headers, - rows, - totalRows: rows.length, - command: 'SELECT', - rowCount: rows.length, - }; } finally { - connection.closeSync(); instance.closeSync(); } } diff --git a/packages/cli/src/context/sl/local-query.ts b/packages/cli/src/context/sl/local-query.ts index 41a46877..c4cfd7c1 100644 --- a/packages/cli/src/context/sl/local-query.ts +++ b/packages/cli/src/context/sl/local-query.ts @@ -62,10 +62,9 @@ export async function compileLocalSlQuery( ): Promise { await options.onProgress?.({ progress: 0, message: 'Compiling query' }); const connectionId = resolveLocalConnectionId(project, options.connectionId); - const dialect = - connectionId === FEDERATED_CONNECTION_ID - ? 'duckdb' - : sqlAnalysisDialectForDriver(project.config.connections[connectionId]?.driver); + const driver = + connectionId === FEDERATED_CONNECTION_ID ? 'duckdb' : project.config.connections[connectionId]?.driver; + const dialect = sqlAnalysisDialectForDriver(driver); const sources = await loadComputableSources(project, connectionId); await options.onProgress?.({ progress: 0.3, message: 'Generating SQL' }); diff --git a/packages/cli/src/setup-sources.ts b/packages/cli/src/setup-sources.ts index c38ff113..3f66fe41 100644 --- a/packages/cli/src/setup-sources.ts +++ b/packages/cli/src/setup-sources.ts @@ -20,6 +20,7 @@ import type { KtxCliIo } from './cli-runtime.js'; import { errorMessage, writePrefixedLines } from './clack.js'; import { pickNotionRootPages } from './notion-page-picker.js'; import { runKtxSourceMapping } from './source-mapping.js'; +import { assertSafeConnectionId } from './context/sl/source-files.js'; import { runConnectionSetupWithRecovery, type ConfigureResult, @@ -206,12 +207,6 @@ async function promptText( return await prompts.text({ ...options, message: withTextInputNavigation(options.message) }); } -function assertSafeConnectionId(connectionId: string): void { - if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(connectionId)) { - throw new Error(`Unsafe connection id: ${connectionId}`); - } -} - function credentialRef(value: string | undefined, label: string): string { const ref = value?.trim(); if (!ref) { From ed65b9fcf09a0d08f2f939f0ce93f6249cf00cb9 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:04:19 +0700 Subject: [PATCH 21/46] refactor(federation): carry member connection config and projectDir on FederatedMember Co-Authored-By: Claude Opus 4.8 (1M context) --- .../cli/src/context/connections/federation.ts | 19 +++++++++---------- .../context/connections/federation.test.ts | 19 ++++++++++++++++--- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/packages/cli/src/context/connections/federation.ts b/packages/cli/src/context/connections/federation.ts index b3f5dee4..10224397 100644 --- a/packages/cli/src/context/connections/federation.ts +++ b/packages/cli/src/context/connections/federation.ts @@ -25,8 +25,8 @@ export function attachTypeForDriver(driver: string): string { export interface FederatedMember { connectionId: string; driver: string; - /** Raw `url` from ktx.yaml; may carry an `env:`/`file:` reference, resolved at execution time. */ - url: string | undefined; + projectDir: string; + connection: KtxProjectConnectionConfig; } export interface FederatedConnectionDescriptor { @@ -42,17 +42,16 @@ export interface FederatedConnectionDescriptor { */ export function deriveFederatedConnection( connections: Record, + projectDir: string, ): FederatedConnectionDescriptor | null { const members: FederatedMember[] = Object.entries(connections) .filter(([, config]) => config.driver.toLowerCase() in ATTACH_TYPE_BY_DRIVER) - .map(([connectionId, config]) => { - const url = 'url' in config ? config.url : undefined; - return { - connectionId, - driver: config.driver.toLowerCase(), - url: typeof url === 'string' ? url : undefined, - }; - }); + .map(([connectionId, config]) => ({ + connectionId, + driver: config.driver.toLowerCase(), + projectDir, + connection: config, + })); if (members.length < 2) { return null; } diff --git a/packages/cli/test/context/connections/federation.test.ts b/packages/cli/test/context/connections/federation.test.ts index c35fee59..26f5b2bb 100644 --- a/packages/cli/test/context/connections/federation.test.ts +++ b/packages/cli/test/context/connections/federation.test.ts @@ -4,20 +4,21 @@ import { FEDERATED_CONNECTION_ID, } from '../../../src/context/connections/federation.js'; -const conns = (entries: Record) => entries as never; +const conns = (entries: Record) => entries as never; describe('deriveFederatedConnection', () => { it('returns null with zero compatible members', () => { - expect(deriveFederatedConnection(conns({ snow: { driver: 'snowflake' } }))).toBeNull(); + expect(deriveFederatedConnection(conns({ snow: { driver: 'snowflake' } }), '/proj')).toBeNull(); }); it('returns null with exactly one compatible member', () => { - expect(deriveFederatedConnection(conns({ pg: { driver: 'postgres' } }))).toBeNull(); + expect(deriveFederatedConnection(conns({ pg: { driver: 'postgres' } }), '/proj')).toBeNull(); }); it('derives a descriptor with two compatible members', () => { const result = deriveFederatedConnection( conns({ pg: { driver: 'postgres' }, lite: { driver: 'sqlite' } }), + '/proj', ); expect(result).not.toBeNull(); expect(result?.id).toBe(FEDERATED_CONNECTION_ID); @@ -25,9 +26,20 @@ describe('deriveFederatedConnection', () => { expect(result?.members.map((m) => m.connectionId).sort()).toEqual(['lite', 'pg']); }); + it('carries each member connection config and projectDir', () => { + const result = deriveFederatedConnection( + conns({ pg: { driver: 'postgres', host: 'h' }, lite: { driver: 'sqlite', path: './a.db' } }), + '/proj', + ); + const pg = result?.members.find((m) => m.connectionId === 'pg'); + expect(pg?.connection).toEqual({ driver: 'postgres', host: 'h' }); + expect(pg?.projectDir).toBe('/proj'); + }); + it('excludes incompatible members from the group', () => { const result = deriveFederatedConnection( conns({ pg: { driver: 'postgres' }, my: { driver: 'mysql' }, snow: { driver: 'snowflake' } }), + '/proj', ); expect(result?.members.map((m) => m.connectionId).sort()).toEqual(['my', 'pg']); }); @@ -35,6 +47,7 @@ describe('deriveFederatedConnection', () => { it('is case-insensitive on driver names', () => { const result = deriveFederatedConnection( conns({ pg: { driver: 'POSTGRES' }, lite: { driver: 'SQLite' } }), + '/proj', ); expect(result?.members).toHaveLength(2); }); From b3ccdb35bb17372980c36cc3b26830764c4a955d Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:09:02 +0700 Subject: [PATCH 22/46] feat(federation): resolve per-member attach targets via canonical connector resolvers Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/connectors/duckdb/federated-attach.ts | 71 ++++++++++++++++ .../duckdb/federated-attach.test.ts | 82 +++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 packages/cli/src/connectors/duckdb/federated-attach.ts create mode 100644 packages/cli/test/connectors/duckdb/federated-attach.test.ts diff --git a/packages/cli/src/connectors/duckdb/federated-attach.ts b/packages/cli/src/connectors/duckdb/federated-attach.ts new file mode 100644 index 00000000..1de2c204 --- /dev/null +++ b/packages/cli/src/connectors/duckdb/federated-attach.ts @@ -0,0 +1,71 @@ +import { sqliteDatabasePathFromConfig, type KtxSqliteConnectionConfig } from '../sqlite/connector.js'; +import { postgresPoolConfigFromConfig, type KtxPostgresConnectionConfig } from '../postgres/connector.js'; +import { + mysqlConnectionPoolConfigFromConfig, + type KtxMysqlConnectionConfig, +} from '../mysql/connector.js'; +import { attachTypeForDriver, type FederatedMember } from '../../context/connections/federation.js'; + +function pgKeyword(value: string): string { + // libpq keyword values quote with single quotes and backslash-escape. + return /[\s'\\]/.test(value) ? `'${value.replaceAll('\\', '\\\\').replaceAll("'", "\\'")}'` : value; +} + +function postgresAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): string { + const cfg = postgresPoolConfigFromConfig({ + connectionId: member.connectionId, + connection: member.connection as KtxPostgresConnectionConfig, + env, + }); + if (cfg.connectionString) { + return cfg.connectionString; + } + const parts: string[] = []; + if (cfg.host) parts.push(`host=${pgKeyword(cfg.host)}`); + if (cfg.port) parts.push(`port=${cfg.port}`); + if (cfg.database) parts.push(`dbname=${pgKeyword(cfg.database)}`); + if (cfg.user) parts.push(`user=${pgKeyword(cfg.user)}`); + if (cfg.password) parts.push(`password=${pgKeyword(cfg.password)}`); + return parts.join(' '); +} + +function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): string { + const cfg = mysqlConnectionPoolConfigFromConfig({ + connectionId: member.connectionId, + connection: member.connection as KtxMysqlConnectionConfig, + env, + }); + const parts: string[] = [ + `host=${cfg.host}`, + `port=${cfg.port}`, + `database=${cfg.database}`, + `user=${cfg.user}`, + ]; + if (cfg.password) { + parts.push(`password=${cfg.password}`); + } + return parts.join(' '); +} + +/** + * Resolves a federated member's ktx.yaml config into the connection target + * DuckDB's ATTACH wants for that driver, reusing each connector's canonical + * resolver so federation and standalone scans agree on config interpretation. + */ +export function federatedAttachTarget(member: FederatedMember, env: NodeJS.ProcessEnv): string { + const type = attachTypeForDriver(member.driver); + switch (type) { + case 'sqlite': + return sqliteDatabasePathFromConfig({ + connectionId: member.connectionId, + projectDir: member.projectDir, + connection: member.connection as KtxSqliteConnectionConfig, + }); + case 'postgres': + return postgresAttachString(member, env); + case 'mysql': + return mysqlAttachString(member, env); + default: + throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`); + } +} diff --git a/packages/cli/test/connectors/duckdb/federated-attach.test.ts b/packages/cli/test/connectors/duckdb/federated-attach.test.ts new file mode 100644 index 00000000..bb3277a2 --- /dev/null +++ b/packages/cli/test/connectors/duckdb/federated-attach.test.ts @@ -0,0 +1,82 @@ +import { mkdtempSync, writeFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { federatedAttachTarget } from '../../../src/connectors/duckdb/federated-attach.js'; +import type { FederatedMember } from '../../../src/context/connections/federation.js'; + +const member = (over: Partial): FederatedMember => ({ + connectionId: 'm', + driver: 'sqlite', + projectDir: '/proj', + connection: { driver: 'sqlite' }, + ...over, +}); + +describe('federatedAttachTarget', () => { + it('resolves a sqlite path: config to an absolute filesystem path against projectDir', () => { + const dir = mkdtempSync(join(tmpdir(), 'ktx-attach-')); + writeFileSync(join(dir, 'reviews.db'), ''); + try { + const target = federatedAttachTarget( + member({ driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: './reviews.db' } }), + {}, + ); + expect(target).toBe(join(dir, 'reviews.db')); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it('resolves a sqlite file:// url to a filesystem path', () => { + const target = federatedAttachTarget( + member({ driver: 'sqlite', connection: { driver: 'sqlite', url: 'file:///data/reviews.db' } }), + {}, + ); + expect(target).toBe('/data/reviews.db'); + }); + + it('builds a libpq connection string for postgres from host/database/user', () => { + const target = federatedAttachTarget( + member({ + driver: 'postgres', + connection: { driver: 'postgres', host: 'h', port: 5433, database: 'books', username: 'u', password: 'p' }, + }), + {}, + ); + expect(target).toContain('host=h'); + expect(target).toContain('port=5433'); + expect(target).toContain('dbname=books'); + expect(target).toContain('user=u'); + expect(target).toContain('password=p'); + }); + + it('passes a postgres url through as the connection string', () => { + const target = federatedAttachTarget( + member({ driver: 'postgres', connection: { driver: 'postgres', url: 'env:PG_URL' } }), + { PG_URL: 'postgresql://localhost/books' }, + ); + expect(target).toBe('postgresql://localhost/books'); + }); + + it('builds a mysql connection string from host/database/user', () => { + const target = federatedAttachTarget( + member({ + driver: 'mysql', + connection: { driver: 'mysql', host: 'h', port: 3307, database: 'app', username: 'u', password: 'p' }, + }), + {}, + ); + expect(target).toContain('host=h'); + expect(target).toContain('port=3307'); + expect(target).toContain('database=app'); + expect(target).toContain('user=u'); + expect(target).toContain('password=p'); + }); + + it('throws for an unsupported driver', () => { + expect(() => federatedAttachTarget(member({ driver: 'snowflake', connection: { driver: 'snowflake' } }), {})).toThrow( + /cannot be attached/i, + ); + }); +}); From 499c84ff0b93c2447da3dc72e87473d967c03757 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:12:55 +0700 Subject: [PATCH 23/46] fix(federation): quote mysql attach-string values like postgres Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/connectors/duckdb/federated-attach.ts | 20 +++++++++---------- .../duckdb/federated-attach.test.ts | 11 ++++++++++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-attach.ts b/packages/cli/src/connectors/duckdb/federated-attach.ts index 1de2c204..710b4269 100644 --- a/packages/cli/src/connectors/duckdb/federated-attach.ts +++ b/packages/cli/src/connectors/duckdb/federated-attach.ts @@ -6,8 +6,8 @@ import { } from '../mysql/connector.js'; import { attachTypeForDriver, type FederatedMember } from '../../context/connections/federation.js'; -function pgKeyword(value: string): string { - // libpq keyword values quote with single quotes and backslash-escape. +function kvKeyword(value: string): string { + // libpq/DuckDB key-value values quote with single quotes and backslash-escape. return /[\s'\\]/.test(value) ? `'${value.replaceAll('\\', '\\\\').replaceAll("'", "\\'")}'` : value; } @@ -21,11 +21,11 @@ function postgresAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): return cfg.connectionString; } const parts: string[] = []; - if (cfg.host) parts.push(`host=${pgKeyword(cfg.host)}`); + if (cfg.host) parts.push(`host=${kvKeyword(cfg.host)}`); if (cfg.port) parts.push(`port=${cfg.port}`); - if (cfg.database) parts.push(`dbname=${pgKeyword(cfg.database)}`); - if (cfg.user) parts.push(`user=${pgKeyword(cfg.user)}`); - if (cfg.password) parts.push(`password=${pgKeyword(cfg.password)}`); + if (cfg.database) parts.push(`dbname=${kvKeyword(cfg.database)}`); + if (cfg.user) parts.push(`user=${kvKeyword(cfg.user)}`); + if (cfg.password) parts.push(`password=${kvKeyword(cfg.password)}`); return parts.join(' '); } @@ -36,13 +36,13 @@ function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): str env, }); const parts: string[] = [ - `host=${cfg.host}`, + `host=${kvKeyword(cfg.host)}`, `port=${cfg.port}`, - `database=${cfg.database}`, - `user=${cfg.user}`, + `database=${kvKeyword(cfg.database)}`, + `user=${kvKeyword(cfg.user)}`, ]; if (cfg.password) { - parts.push(`password=${cfg.password}`); + parts.push(`password=${kvKeyword(cfg.password)}`); } return parts.join(' '); } diff --git a/packages/cli/test/connectors/duckdb/federated-attach.test.ts b/packages/cli/test/connectors/duckdb/federated-attach.test.ts index bb3277a2..bb21c03d 100644 --- a/packages/cli/test/connectors/duckdb/federated-attach.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-attach.test.ts @@ -74,6 +74,17 @@ describe('federatedAttachTarget', () => { expect(target).toContain('password=p'); }); + it('quotes mysql values containing spaces', () => { + const target = federatedAttachTarget( + member({ + driver: 'mysql', + connection: { driver: 'mysql', host: 'h', database: 'app', username: 'u', password: 'pass word' }, + }), + {}, + ); + expect(target).toContain("password='pass word'"); + }); + it('throws for an unsupported driver', () => { expect(() => federatedAttachTarget(member({ driver: 'snowflake', connection: { driver: 'snowflake' } }), {})).toThrow( /cannot be attached/i, From bf1a40b8e2e29f2f0abbb270ddbb7215752d67ef Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:16:07 +0700 Subject: [PATCH 24/46] fix(federation): resolve member attach targets via canonical resolvers, supporting sqlite path: Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connectors/duckdb/federated-executor.ts | 11 ++---- .../duckdb/federated-executor.test.ts | 34 ++++++++++--------- .../duckdb/federated-join.integration.test.ts | 8 ++--- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 3d7ab523..508f53aa 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -1,5 +1,5 @@ import { DuckDBInstance } from '@duckdb/node-api'; -import { resolveStringReference } from '../shared/string-reference.js'; +import { federatedAttachTarget } from './federated-attach.js'; import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutionResult, @@ -12,18 +12,11 @@ function quoteDuckdbIdentifier(id: string): string { return `"${id.replaceAll('"', '""')}"`; } -function memberUrl(member: FederatedMember, env: NodeJS.ProcessEnv): string { - if (member.url === undefined || member.url.length === 0) { - throw new Error(`Federated member "${member.connectionId}" has no url in ktx.yaml.`); - } - return resolveStringReference(member.url, env); -} - /** @internal */ export function buildAttachStatements(members: FederatedMember[], env: NodeJS.ProcessEnv): string[] { const attachments = members.map((member) => ({ type: attachTypeForDriver(member.driver), - url: memberUrl(member, env), + url: federatedAttachTarget(member, env), alias: member.connectionId, })); diff --git a/packages/cli/test/connectors/duckdb/federated-executor.test.ts b/packages/cli/test/connectors/duckdb/federated-executor.test.ts index 3c69dfa8..0cc07dc4 100644 --- a/packages/cli/test/connectors/duckdb/federated-executor.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-executor.test.ts @@ -2,11 +2,11 @@ import { describe, expect, it } from 'vitest'; import { buildAttachStatements } from '../../../src/connectors/duckdb/federated-executor.js'; import { attachTypeForDriver, type FederatedMember } from '../../../src/context/connections/federation.js'; -const member = (connectionId: string, driver: string, url: string | undefined): FederatedMember => ({ - connectionId, - driver, - url, -}); +const member = ( + connectionId: string, + driver: string, + connection: FederatedMember['connection'], +): FederatedMember => ({ connectionId, driver, projectDir: '/proj', connection }); describe('attachTypeForDriver', () => { it('maps drivers to DuckDB attach extension types', () => { @@ -24,8 +24,8 @@ describe('buildAttachStatements', () => { it('loads each driver type once, then emits READ_ONLY ATTACH aliased by connectionId, resolving env refs', () => { const stmts = buildAttachStatements( [ - member('pg_books', 'postgres', 'env:PG_URL'), - member('sqlite_reviews', 'sqlite', '/data/reviews.db'), + member('pg_books', 'postgres', { driver: 'postgres', url: 'env:PG_URL' }), + member('sqlite_reviews', 'sqlite', { driver: 'sqlite', path: '/data/reviews.db' }), ], { PG_URL: 'postgresql://localhost/books' }, ); @@ -40,8 +40,8 @@ describe('buildAttachStatements', () => { it('loads a shared driver type only once across members', () => { const stmts = buildAttachStatements( [ - member('pg_a', 'postgres', 'postgresql://h/a'), - member('pg_b', 'postgres', 'postgresql://h/b'), + member('pg_a', 'postgres', { driver: 'postgres', url: 'postgresql://h/a' }), + member('pg_b', 'postgres', { driver: 'postgres', url: 'postgresql://h/b' }), ], {}, ); @@ -53,16 +53,18 @@ describe('buildAttachStatements', () => { }); it('quotes a hyphenated connection id as a DuckDB identifier', () => { - const stmts = buildAttachStatements([member('postgres-warehouse', 'postgres', 'postgresql://h/db')], {}); + const stmts = buildAttachStatements( + [member('postgres-warehouse', 'postgres', { driver: 'postgres', url: 'postgresql://h/db' })], + {}, + ); expect(stmts.at(-1)).toBe(`ATTACH 'postgresql://h/db' AS "postgres-warehouse" (TYPE postgres, READ_ONLY);`); }); - it('throws if a member url is missing', () => { - expect(() => buildAttachStatements([member('pg', 'postgres', undefined)], {})).toThrow(/no url/i); - }); - - it('escapes single quotes in a member url', () => { - const stmts = buildAttachStatements([member('pg', 'postgres', "postgresql://u:it's@h/db")], {}); + it('escapes single quotes in a resolved attach target', () => { + const stmts = buildAttachStatements( + [member('pg', 'postgres', { driver: 'postgres', url: "postgresql://u:it's@h/db" })], + {}, + ); expect(stmts.at(-1)).toBe('ATTACH \'postgresql://u:it\'\'s@h/db\' AS "pg" (TYPE postgres, READ_ONLY);'); }); }); diff --git a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts index cf2a7bd6..087f6618 100644 --- a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts @@ -21,8 +21,8 @@ describe('federated cross-catalog join (live DuckDB)', () => { reviews.close(); const members: FederatedMember[] = [ - { connectionId: 'books_db', driver: 'sqlite', url: booksPath }, - { connectionId: 'reviews_db', driver: 'sqlite', url: reviewsPath }, + { connectionId: 'books_db', driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: booksPath } }, + { connectionId: 'reviews_db', driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: reviewsPath } }, ]; try { @@ -60,8 +60,8 @@ describe('federated cross-catalog join (live DuckDB)', () => { reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 3);'); reviews.close(); const members: FederatedMember[] = [ - { connectionId: 'books-db', driver: 'sqlite', url: booksPath }, - { connectionId: 'reviews-db', driver: 'sqlite', url: reviewsPath }, + { connectionId: 'books-db', driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: booksPath } }, + { connectionId: 'reviews-db', driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: reviewsPath } }, ]; try { const result = await executeFederatedQuery(members, { From d9eae205b37d2d9d4215c2adf6751e175440b3a1 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:20:46 +0700 Subject: [PATCH 25/46] refactor(federation): thread projectDir through deriveFederatedConnection callers Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/context/sl/local-sl.ts | 2 +- packages/cli/src/ingest-query-executor.ts | 2 +- packages/cli/src/setup-databases.ts | 5 +++-- packages/cli/test/setup-databases-federation-notice.test.ts | 6 +++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/context/sl/local-sl.ts b/packages/cli/src/context/sl/local-sl.ts index bcfa298c..6af348e5 100644 --- a/packages/cli/src/context/sl/local-sl.ts +++ b/packages/cli/src/context/sl/local-sl.ts @@ -171,7 +171,7 @@ export async function loadLocalSlSourceRecords( input: { connectionId: string }, ): Promise { if (input.connectionId === FEDERATED_CONNECTION_ID) { - const descriptor = deriveFederatedConnection(project.config.connections); + const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir); if (!descriptor) { return []; } diff --git a/packages/cli/src/ingest-query-executor.ts b/packages/cli/src/ingest-query-executor.ts index fcc27150..9f77f434 100644 --- a/packages/cli/src/ingest-query-executor.ts +++ b/packages/cli/src/ingest-query-executor.ts @@ -24,7 +24,7 @@ export function createKtxCliIngestQueryExecutor( return { async execute(input: KtxSqlQueryExecutionInput) { if (input.connectionId === FEDERATED_CONNECTION_ID) { - const descriptor = deriveFederatedConnection(project.config.connections); + const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir); if (!descriptor) { throw new Error('Federated execution requested but fewer than 2 attach-compatible connections exist.'); } diff --git a/packages/cli/src/setup-databases.ts b/packages/cli/src/setup-databases.ts index 735cc188..5e26f4cd 100644 --- a/packages/cli/src/setup-databases.ts +++ b/packages/cli/src/setup-databases.ts @@ -1174,7 +1174,7 @@ async function writeConnectionConfig(input: { } if (input.io) { - const federationNotice = federationNoticeFor(config.connections); + const federationNotice = federationNoticeFor(config.connections, input.projectDir); if (federationNotice) { writeSetupSection(input.io, 'Federated connection available', [federationNotice]); } @@ -1184,8 +1184,9 @@ async function writeConnectionConfig(input: { /** @internal */ export function federationNoticeFor( connections: Record, + projectDir: string, ): string | null { - const descriptor = deriveFederatedConnection(connections); + const descriptor = deriveFederatedConnection(connections, projectDir); if (!descriptor) { return null; } diff --git a/packages/cli/test/setup-databases-federation-notice.test.ts b/packages/cli/test/setup-databases-federation-notice.test.ts index 2e0afcb4..218f436e 100644 --- a/packages/cli/test/setup-databases-federation-notice.test.ts +++ b/packages/cli/test/setup-databases-federation-notice.test.ts @@ -6,19 +6,19 @@ describe('federationNoticeFor', () => { const notice = federationNoticeFor({ pg_books: { driver: 'postgres' }, sqlite_reviews: { driver: 'sqlite' }, - } as never); + } as never, '/proj'); expect(notice).toMatch(/pg_books/); expect(notice).toMatch(/sqlite_reviews/); expect(notice).toMatch(/cross-database/i); }); it('returns null with fewer than 2 compatible', () => { - expect(federationNoticeFor({ pg: { driver: 'postgres' } } as never)).toBeNull(); + expect(federationNoticeFor({ pg: { driver: 'postgres' } } as never, '/proj')).toBeNull(); }); it('returns null when the second db is incompatible', () => { expect( - federationNoticeFor({ pg: { driver: 'postgres' }, snow: { driver: 'snowflake' } } as never), + federationNoticeFor({ pg: { driver: 'postgres' }, snow: { driver: 'snowflake' } } as never, '/proj'), ).toBeNull(); }); }); From 32338ff6123e6137a0b294d834d569551d25fc0e Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:24:53 +0700 Subject: [PATCH 26/46] feat(federation): add shared project read-only SQL executor that routes _ktx_federated Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connections/project-sql-executor.ts | 61 +++++++++++++++ .../project-sql-executor-federated.test.ts | 75 +++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 packages/cli/src/context/connections/project-sql-executor.ts create mode 100644 packages/cli/test/context/connections/project-sql-executor-federated.test.ts diff --git a/packages/cli/src/context/connections/project-sql-executor.ts b/packages/cli/src/context/connections/project-sql-executor.ts new file mode 100644 index 00000000..0bdf36a7 --- /dev/null +++ b/packages/cli/src/context/connections/project-sql-executor.ts @@ -0,0 +1,61 @@ +import { executeFederatedQuery } from '../../connectors/duckdb/federated-executor.js'; +import type { KtxLocalProject } from '../project/project.js'; +import type { KtxScanConnector, KtxScanContext } from '../scan/types.js'; +import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from './federation.js'; +import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutionResult } from './query-executor.js'; + +export interface ExecuteProjectReadOnlySqlDeps { + project: KtxLocalProject; + input: KtxSqlQueryExecutionInput; + createConnector: (connectionId: string) => Promise | KtxScanConnector; + executeFederated?: typeof executeFederatedQuery; + runId?: string; +} + +async function cleanupConnector(connector: KtxScanConnector | null): Promise { + await connector?.cleanup?.(); +} + +/** + * Single resolve-and-execute path for project read-only SQL. The federated + * connection is derived from declared state here so every executor entry point + * routes `_ktx_federated` identically; standard connections go through the + * scan connector. + */ +export async function executeProjectReadOnlySql( + deps: ExecuteProjectReadOnlySqlDeps, +): Promise { + const { project, input } = deps; + if (input.connectionId === FEDERATED_CONNECTION_ID) { + const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir); + if (!descriptor) { + throw new Error('Federated execution requested but fewer than 2 attach-compatible connections exist.'); + } + const runFederated = deps.executeFederated ?? executeFederatedQuery; + return runFederated(descriptor.members, input); + } + + let connector: KtxScanConnector | null = null; + try { + connector = await deps.createConnector(input.connectionId); + if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { + throw new Error( + `Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`, + ); + } + const ctx: KtxScanContext = { runId: deps.runId ?? 'sql-execution' }; + const result = await connector.executeReadOnly( + { connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows }, + ctx, + ); + return { + headers: result.headers, + rows: result.rows, + totalRows: result.totalRows, + command: 'SELECT', + rowCount: result.rowCount, + }; + } finally { + await cleanupConnector(connector); + } +} diff --git a/packages/cli/test/context/connections/project-sql-executor-federated.test.ts b/packages/cli/test/context/connections/project-sql-executor-federated.test.ts new file mode 100644 index 00000000..cd1bdb62 --- /dev/null +++ b/packages/cli/test/context/connections/project-sql-executor-federated.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { executeFederatedQuery } from '../../../src/connectors/duckdb/federated-executor.js'; +import { executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js'; +import type { KtxLocalProject } from '../../../src/context/project/project.js'; + +function fakeProject(connections: Record): KtxLocalProject { + return { + projectDir: '/tmp/proj', + configPath: '/tmp/proj/ktx.yaml', + config: { connections } as unknown as KtxLocalProject['config'], + coreConfig: {} as KtxLocalProject['coreConfig'], + git: {} as KtxLocalProject['git'], + fileStore: {} as KtxLocalProject['fileStore'], + }; +} + +describe('executeProjectReadOnlySql — federated routing', () => { + it('routes _ktx_federated through the federated executor with derived members', async () => { + const project = fakeProject({ pg: { driver: 'postgres' }, lite: { driver: 'sqlite' } }); + const executeFederated = vi.fn(async () => ({ + headers: ['x'], + rows: [[1]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })); + const createConnector = vi.fn(); + + const result = await executeProjectReadOnlySql({ + project, + input: { connectionId: '_ktx_federated', connection: undefined, sql: 'SELECT 1', maxRows: 100 }, + createConnector: createConnector as never, + executeFederated, + }); + + expect(result.rows).toEqual([[1]]); + expect(executeFederated).toHaveBeenCalledOnce(); + const members = executeFederated.mock.calls[0][0]; + expect(members.map((m) => m.connectionId).sort()).toEqual(['lite', 'pg']); + expect(createConnector).not.toHaveBeenCalled(); + }); + + it('throws when _ktx_federated requested but fewer than 2 compatible members', async () => { + const project = fakeProject({ pg: { driver: 'postgres' } }); + await expect( + executeProjectReadOnlySql({ + project, + input: { connectionId: '_ktx_federated', connection: undefined, sql: 'SELECT 1', maxRows: 100 }, + createConnector: (() => { + throw new Error('should not be called'); + }) as never, + executeFederated: vi.fn(), + }), + ).rejects.toThrow(/fewer than 2/i); + }); + + it('routes a normal connection through the scan connector', async () => { + const project = fakeProject({ pg: { driver: 'postgres' } }); + const connector = { + driver: 'postgres', + capabilities: { readOnlySql: true }, + executeReadOnly: vi.fn(async () => ({ headers: ['a'], rows: [['v']], totalRows: 1, rowCount: 1 })), + cleanup: vi.fn(async () => {}), + }; + const result = await executeProjectReadOnlySql({ + project, + input: { connectionId: 'pg', connection: { driver: 'postgres' }, sql: 'SELECT a', maxRows: 50 }, + createConnector: (async () => connector) as never, + executeFederated: vi.fn(), + }); + expect(result.rows).toEqual([['v']]); + expect(connector.executeReadOnly).toHaveBeenCalledOnce(); + expect(connector.cleanup).toHaveBeenCalledOnce(); + }); +}); From 13bfc3da452cba9f44432aaaf3f1f165c4bff3c6 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:33:53 +0700 Subject: [PATCH 27/46] test(federation): exercise shared executor default federated path with real DuckDB Co-Authored-By: Claude Opus 4.8 (1M context) --- .../project-sql-executor.integration.test.ts | 57 +++++++++++++++++++ ...d.test.ts => project-sql-executor.test.ts} | 0 2 files changed, 57 insertions(+) create mode 100644 packages/cli/test/context/connections/project-sql-executor.integration.test.ts rename packages/cli/test/context/connections/{project-sql-executor-federated.test.ts => project-sql-executor.test.ts} (100%) diff --git a/packages/cli/test/context/connections/project-sql-executor.integration.test.ts b/packages/cli/test/context/connections/project-sql-executor.integration.test.ts new file mode 100644 index 00000000..b6322f43 --- /dev/null +++ b/packages/cli/test/context/connections/project-sql-executor.integration.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from 'vitest'; +import { mkdtempSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js'; +import type { KtxLocalProject } from '../../../src/context/project/project.js'; + +function fakeProject(projectDir: string, connections: Record): KtxLocalProject { + return { + projectDir, + configPath: join(projectDir, 'ktx.yaml'), + config: { connections } as unknown as KtxLocalProject['config'], + coreConfig: {} as KtxLocalProject['coreConfig'], + git: {} as KtxLocalProject['git'], + fileStore: {} as KtxLocalProject['fileStore'], + }; +} + +describe('executeProjectReadOnlySql — federated integration (real DuckDB)', () => { + it('runs a federated cross-catalog join through the default executeFederatedQuery', async () => { + const dir = mkdtempSync(join(tmpdir(), 'ktx-fed-exec-')); + const booksPath = join(dir, 'books.db'); + const reviewsPath = join(dir, 'reviews.db'); + + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune'), (2, 'Foundation');"); + books.close(); + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 4), (2, 2);'); + reviews.close(); + + const project = fakeProject(dir, { + books_db: { driver: 'sqlite', path: booksPath }, + reviews_db: { driver: 'sqlite', path: reviewsPath }, + }); + + try { + const result = await executeProjectReadOnlySql({ + project, + input: { + connectionId: '_ktx_federated', + connection: undefined, + sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.title ORDER BY b.title', + maxRows: 100, + }, + createConnector: () => { + throw new Error('federated path must not create a scan connector'); + }, + }); + expect(result.rows.map((row) => row[0])).toEqual(['Dune', 'Foundation']); + expect(Number(result.rows[0][1])).toBeCloseTo(4.5); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/cli/test/context/connections/project-sql-executor-federated.test.ts b/packages/cli/test/context/connections/project-sql-executor.test.ts similarity index 100% rename from packages/cli/test/context/connections/project-sql-executor-federated.test.ts rename to packages/cli/test/context/connections/project-sql-executor.test.ts From eb49eb1dfa8ce6600dae7fcb518aeda8716bc0bc Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:36:54 +0700 Subject: [PATCH 28/46] refactor(federation): route ingest query executor through shared executor Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/ingest-query-executor.ts | 47 ++++------------------- 1 file changed, 8 insertions(+), 39 deletions(-) diff --git a/packages/cli/src/ingest-query-executor.ts b/packages/cli/src/ingest-query-executor.ts index 9f77f434..2671beee 100644 --- a/packages/cli/src/ingest-query-executor.ts +++ b/packages/cli/src/ingest-query-executor.ts @@ -1,8 +1,7 @@ import { executeFederatedQuery } from './connectors/duckdb/federated-executor.js'; import type { KtxSqlQueryExecutionInput, KtxSqlQueryExecutorPort } from './context/connections/query-executor.js'; -import { deriveFederatedConnection, FEDERATED_CONNECTION_ID } from './context/connections/federation.js'; +import { executeProjectReadOnlySql } from './context/connections/project-sql-executor.js'; import type { KtxLocalProject } from './context/project/project.js'; -import type { KtxScanConnector, KtxScanContext } from './context/scan/types.js'; import { createKtxCliScanConnector } from './local-scan-connectors.js'; type CreateConnector = typeof createKtxCliScanConnector; @@ -12,10 +11,6 @@ export interface KtxCliIngestQueryExecutorDeps { executeFederated?: typeof executeFederatedQuery; } -async function cleanupConnector(connector: KtxScanConnector | null): Promise { - await connector?.cleanup?.(); -} - export function createKtxCliIngestQueryExecutor( project: KtxLocalProject, deps: KtxCliIngestQueryExecutorDeps = {}, @@ -23,39 +18,13 @@ export function createKtxCliIngestQueryExecutor( const createConnector = deps.createConnector ?? createKtxCliScanConnector; return { async execute(input: KtxSqlQueryExecutionInput) { - if (input.connectionId === FEDERATED_CONNECTION_ID) { - const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir); - if (!descriptor) { - throw new Error('Federated execution requested but fewer than 2 attach-compatible connections exist.'); - } - const runFederated = deps.executeFederated ?? executeFederatedQuery; - return runFederated(descriptor.members, input); - } - - let connector: KtxScanConnector | null = null; - try { - connector = await createConnector(project, input.connectionId); - if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { - throw new Error( - `Connection "${input.connectionId}" driver "${connector.driver}" does not support read-only SQL execution.`, - ); - } - - const ctx: KtxScanContext = { runId: 'ingest-sql-execution' }; - const result = await connector.executeReadOnly( - { connectionId: input.connectionId, sql: input.sql, maxRows: input.maxRows }, - ctx, - ); - return { - headers: result.headers, - rows: result.rows, - totalRows: result.totalRows, - command: 'SELECT', - rowCount: result.rowCount, - }; - } finally { - await cleanupConnector(connector); - } + return executeProjectReadOnlySql({ + project, + input, + createConnector: (connectionId) => createConnector(project, connectionId), + executeFederated: deps.executeFederated, + runId: 'ingest-sql-execution', + }); }, }; } From 1000d764c9f8ff9ddca3e1389f0559ae09f7f67d Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 09:53:13 +0700 Subject: [PATCH 29/46] fix(federation): route MCP sql_execution _ktx_federated through shared executor Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/context/mcp/local-project-ports.ts | 40 +++++++++++--- ...roject-ports-federated.integration.test.ts | 54 +++++++++++++++++++ 2 files changed, 87 insertions(+), 7 deletions(-) create mode 100644 packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts diff --git a/packages/cli/src/context/mcp/local-project-ports.ts b/packages/cli/src/context/mcp/local-project-ports.ts index 4bada831..76f347b8 100644 --- a/packages/cli/src/context/mcp/local-project-ports.ts +++ b/packages/cli/src/context/mcp/local-project-ports.ts @@ -1,5 +1,7 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js'; import { KtxQueryError, isNativeProgrammingFault } from '../../errors.js'; +import { executeProjectReadOnlySql } from '../../context/connections/project-sql-executor.js'; +import { FEDERATED_CONNECTION_ID } from '../../context/connections/federation.js'; import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js'; import type { KtxEmbeddingPort } from '../../context/core/embedding.js'; import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; @@ -38,22 +40,46 @@ async function executeValidatedReadOnlySql( onProgress?: KtxMcpProgressCallback, ): Promise { await onProgress?.({ progress: 0, message: 'Validating SQL' }); + if (!options.sqlAnalysis) { + throw new Error('sql_execution requires parser-backed SQL validation.'); + } + const createConnector = options.localScan?.createConnector; + if (!createConnector) { + throw new Error('sql_execution requires a local scan connector factory.'); + } + + if (input.connectionId === FEDERATED_CONNECTION_ID) { + const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver('duckdb')); + if (!validation.ok) { + throw new Error(validation.error ?? 'SQL is not read-only.'); + } + await onProgress?.({ progress: 0.3, message: 'Executing' }); + const result = await executeProjectReadOnlySql({ + project, + input: { + connectionId: input.connectionId, + projectDir: project.projectDir, + connection: undefined, + sql: input.sql, + maxRows: input.maxRows, + }, + createConnector, + runId: 'mcp-sql-execution', + }); + const rowCount = result.rowCount ?? result.rows.length; + await onProgress?.({ progress: 1, message: `Fetched ${rowCount} rows` }); + return { headers: result.headers, rows: result.rows, rowCount }; + } + const connectionId = assertSafeConnectionId(input.connectionId); const connection = project.config.connections[connectionId]; if (!connection) { throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`); } - if (!options.sqlAnalysis) { - throw new Error('sql_execution requires parser-backed SQL validation.'); - } const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver)); if (!validation.ok) { throw new Error(validation.error ?? 'SQL is not read-only.'); } - const createConnector = options.localScan?.createConnector; - if (!createConnector) { - throw new Error('sql_execution requires a local scan connector factory.'); - } let connector: KtxScanConnector | null = null; try { diff --git a/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts b/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts new file mode 100644 index 00000000..5e56fd90 --- /dev/null +++ b/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts @@ -0,0 +1,54 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { describe, expect, it, vi } from 'vitest'; +import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js'; +import { initKtxProject } from '../../../src/context/project/project.js'; + +describe('MCP sql_execution — federated routing (live DuckDB)', () => { + it('routes _ktx_federated through the shared federated executor, validating with the duckdb dialect', async () => { + const dir = await mkdtemp(join(tmpdir(), 'ktx-mcp-fed-')); + try { + const booksPath = join(dir, 'books.db'); + const reviewsPath = join(dir, 'reviews.db'); + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune');"); + books.close(); + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 3);'); + reviews.close(); + + const project = await initKtxProject({ projectDir: dir }); + project.config.connections.books_db = { driver: 'sqlite', path: booksPath }; + project.config.connections.reviews_db = { driver: 'sqlite', path: reviewsPath }; + + const validateReadOnly = vi.fn(async () => ({ ok: true, error: null })); + const ports = createLocalProjectMcpContextPorts(project, { + sqlAnalysis: { + analyzeForFingerprint: vi.fn(), + analyzeBatch: vi.fn(), + validateReadOnly, + } as never, + localScan: { + createConnector: () => { + throw new Error('federated path must not create a scan connector'); + }, + }, + embeddingService: null, + }); + + const result = await ports.sqlExecution?.execute({ + connectionId: '_ktx_federated', + sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.title', + maxRows: 100, + }); + + expect(result?.rows?.[0]?.[0]).toBe('Dune'); + // Federated validation uses the duckdb dialect, not a member driver. + expect(validateReadOnly).toHaveBeenCalledWith(expect.any(String), 'duckdb'); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); +}); From 5ad4044728be218870d0e6ee345cffe71c80bc4d Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 12:47:24 +0700 Subject: [PATCH 30/46] fix(federation): preserve cross-DB joins to federated siblings in manifest re-emit Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ingest/adapters/live-database/manifest.ts | 2 +- .../ingest/manifest-federated-join.test.ts | 36 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/packages/cli/src/context/ingest/adapters/live-database/manifest.ts b/packages/cli/src/context/ingest/adapters/live-database/manifest.ts index 44b90707..2e864528 100644 --- a/packages/cli/src/context/ingest/adapters/live-database/manifest.ts +++ b/packages/cli/src/context/ingest/adapters/live-database/manifest.ts @@ -249,7 +249,7 @@ export function buildJoinsByTable( continue; } for (const join of tableJoins) { - if (tableNames.has(join.to)) { + if (tableNames.has(join.to) || federatedSiblingTargets.has(join.to)) { addJoinOnce(joinsByTable, tableName, join); } } diff --git a/packages/cli/test/context/ingest/manifest-federated-join.test.ts b/packages/cli/test/context/ingest/manifest-federated-join.test.ts index e6dd3b24..0595d321 100644 --- a/packages/cli/test/context/ingest/manifest-federated-join.test.ts +++ b/packages/cli/test/context/ingest/manifest-federated-join.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest'; -import { buildJoinsByTable } from '../../../src/context/ingest/adapters/live-database/manifest.js'; +import { buildJoinsByTable, buildLiveDatabaseManifestShards } from '../../../src/context/ingest/adapters/live-database/manifest.js'; const joinData = (toTable: string) => ({ fromTable: 'books', @@ -34,3 +34,37 @@ describe('buildJoinsByTable federated siblings', () => { expect(result.get('authors')?.map((j) => j.to)).toEqual(['books']); // reverse still added for local joins }); }); + +describe('buildLiveDatabaseManifestShards federated preserved joins', () => { + it('keeps a preserved manual join whose target is a federated sibling', () => { + const result = buildLiveDatabaseManifestShards({ + connectionType: 'POSTGRES', + tables: [{ name: 'books', catalog: null, db: 'public', columns: [{ name: 'id', type: 'int' }] }], + joins: [], + existingPreservedJoins: new Map([ + [ + 'books', + [{ to: 'sqlite_reviews.reviews', on: 'books.id = reviews.book_id', relationship: 'one_to_many', source: 'manual' }], + ], + ]), + federatedSiblingTargets: new Set(['sqlite_reviews.reviews']), + mapColumnType: (t) => t, + }); + const shard = result.shards.get('public'); + expect(shard?.tables.books?.joins?.map((j) => j.to)).toEqual(['sqlite_reviews.reviews']); + }); + + it('still drops a preserved join whose target is neither local nor a sibling', () => { + const result = buildLiveDatabaseManifestShards({ + connectionType: 'POSTGRES', + tables: [{ name: 'books', catalog: null, db: 'public', columns: [{ name: 'id', type: 'int' }] }], + joins: [], + existingPreservedJoins: new Map([ + ['books', [{ to: 'ghost', on: 'books.id = ghost.id', relationship: 'one_to_many', source: 'manual' }]], + ]), + federatedSiblingTargets: new Set(), + mapColumnType: (t) => t, + }); + expect(result.shards.get('public')?.tables.books?.joins).toBeUndefined(); + }); +}); From 8d499745ae9e7eb4331cf0757d258140e2ceabe7 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 12:59:05 +0700 Subject: [PATCH 31/46] fix(federation): preserve declared cross-DB joins through scan re-ingest Co-Authored-By: Claude Opus 4.8 (1M context) --- .../scan/local-enrichment-artifacts.ts | 48 +++++++- .../local-enrichment-federated-join.test.ts | 110 ++++++++++++++++++ 2 files changed, 156 insertions(+), 2 deletions(-) create mode 100644 packages/cli/test/context/scan/local-enrichment-federated-join.test.ts diff --git a/packages/cli/src/context/scan/local-enrichment-artifacts.ts b/packages/cli/src/context/scan/local-enrichment-artifacts.ts index e2072d6b..2ae18d91 100644 --- a/packages/cli/src/context/scan/local-enrichment-artifacts.ts +++ b/packages/cli/src/context/scan/local-enrichment-artifacts.ts @@ -4,6 +4,7 @@ import type { TableUsageOutput } from '../../context/ingest/adapters/historic-sq import type { KtxScanRelationshipConfig } from '../project/config.js'; import type { KtxLocalProject } from '../../context/project/project.js'; import { isSlYamlPath } from '../../context/sl/source-files.js'; +import { deriveFederatedConnection } from '../connections/federation.js'; import type { KtxLocalScanEnrichmentResult } from './local-enrichment.js'; import { buildKtxRelationshipArtifacts, @@ -193,10 +194,46 @@ function joinReferencesExistingColumns( return true; } +async function federatedSiblingTargets( + project: KtxLocalProject, + connectionId: string, +): Promise> { + const descriptor = deriveFederatedConnection(project.config.connections, project.projectDir); + if (!descriptor) { + return new Set(); + } + const siblings = descriptor.members.filter((member) => member.connectionId !== connectionId); + const targets = new Set(); + for (const sibling of siblings) { + let files: string[]; + try { + files = (await project.fileStore.listFiles(schemaDir(sibling.connectionId))).files.filter(isSlYamlPath); + } catch { + continue; + } + for (const file of files) { + try { + const { content } = await project.fileStore.readFile(file); + const shard = YAML.parse(content) as LiveDatabaseManifestShard | null; + for (const entry of Object.values(shard?.tables ?? {})) { + // entry.table is the member-local qualified ref (e.g. `public.books` + // or `reviews`); prefix with the member connection id to match the + // fully-qualified `to:` form authored in cross-DB joins. + targets.add(`${sibling.connectionId}.${entry.table}`); + } + } catch { + continue; + } + } + } + return targets; +} + async function loadExistingManifestState( project: KtxLocalProject, connectionId: string, snapshot: KtxSchemaSnapshot, + siblingTargets: Set, ): Promise { const descriptions = new Map(); const preservedJoins = new Map(); @@ -236,7 +273,7 @@ async function loadExistingManifestState( const joins = (entry.joins ?? []).filter((join) => { return ( (join.source === 'manual' || join.source === 'inferred') && - validTableNames.has(join.to) && + (validTableNames.has(join.to) || siblingTargets.has(join.to)) && joinReferencesExistingColumns(join, columnsByTable) ); }); @@ -277,7 +314,13 @@ export async function writeLocalScanManifestShards( }; } - const existing = await loadExistingManifestState(input.project, input.connectionId, input.snapshot); + const siblingTargets = await federatedSiblingTargets(input.project, input.connectionId); + const existing = await loadExistingManifestState( + input.project, + input.connectionId, + input.snapshot, + siblingTargets, + ); const { shards } = buildLiveDatabaseManifestShards({ connectionType: input.driver.toUpperCase(), tables: snapshotTablesToManifestData(input.snapshot, input.descriptionUpdates), @@ -285,6 +328,7 @@ export async function writeLocalScanManifestShards( existingDescriptions: existing.descriptions, existingPreservedJoins: existing.preservedJoins, existingUsage: existing.usage, + federatedSiblingTargets: siblingTargets, mapColumnType: (dimensionType) => dimensionType, }); diff --git a/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts b/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts new file mode 100644 index 00000000..6ff1635a --- /dev/null +++ b/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts @@ -0,0 +1,110 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import YAML from 'yaml'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildDefaultKtxProjectConfig } from '../../../src/context/project/config.js'; +import type { GitService } from '../../../src/context/core/git.service.js'; +import { LocalGitFileStore } from '../../../src/context/project/local-git-file-store.js'; +import type { KtxLocalProject } from '../../../src/context/project/project.js'; +import { writeLocalScanManifestShards } from '../../../src/context/scan/local-enrichment-artifacts.js'; +import type { KtxSchemaSnapshot } from '../../../src/context/scan/types.js'; + +// `writeLocalScanManifestShards` commits its output via git; the file is +// already on disk before the commit call, so the stub only returns commit info. +const stubGit = { + commitFile: async () => ({ + commitHash: 'stub', + message: 'stub', + author: 'ktx', + authorEmail: 'ktx@example.com', + timestamp: new Date().toISOString(), + created: true, + }), +} as unknown as GitService; + +function fakeProject(projectDir: string, connections: KtxLocalProject['config']['connections']): KtxLocalProject { + const fileStore = new LocalGitFileStore({ rootDir: projectDir, git: stubGit }); + return { + projectDir, + configPath: join(projectDir, 'ktx.yaml'), + config: { ...buildDefaultKtxProjectConfig(), connections }, + coreConfig: {} as KtxLocalProject['coreConfig'], + git: stubGit, + fileStore, + }; +} + +const EXISTING_BOOKS_SHARD = `tables: + books: + table: public.books + columns: + - name: id + type: number + pk: true + joins: + - to: sqlite_reviews.reviews + on: books.id = reviews.book_id + relationship: one_to_many + source: manual +`; + +const booksSnapshot: KtxSchemaSnapshot = { + connectionId: 'pg_books', + tables: [ + { + name: 'books', + catalog: null, + db: 'public', + columns: [{ name: 'id', dimensionType: 'number', primaryKey: true }], + foreignKeys: [], + }, + ], +} as unknown as KtxSchemaSnapshot; + +describe('writeLocalScanManifestShards federated cross-DB joins', () => { + let tempDir: string; + let project: KtxLocalProject; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-enrich-fed-')); + project = fakeProject(join(tempDir, 'project'), { + pg_books: { driver: 'postgres' }, + sqlite_reviews: { driver: 'sqlite' }, + }); + await project.fileStore.writeFile( + 'semantic-layer/pg_books/_schema/public.yaml', + EXISTING_BOOKS_SHARD, + 'ktx', + 'ktx@example.com', + 'seed', + { skipLock: true }, + ); + await project.fileStore.writeFile( + 'semantic-layer/sqlite_reviews/_schema/main.yaml', + 'tables:\n reviews:\n table: reviews\n columns:\n - name: book_id\n type: number\n', + 'ktx', + 'ktx@example.com', + 'seed', + { skipLock: true }, + ); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('preserves a manual cross-DB join to a sqlite sibling across a re-scan', async () => { + await writeLocalScanManifestShards({ + project, + connectionId: 'pg_books', + syncId: 'sync1', + driver: 'postgres', + snapshot: booksSnapshot, + dryRun: false, + }); + const { content } = await project.fileStore.readFile('semantic-layer/pg_books/_schema/public.yaml'); + const shard = YAML.parse(content) as { tables: Record }> }; + expect(shard.tables.books?.joins?.map((j) => j.to)).toEqual(['sqlite_reviews.reviews']); + }); +}); From 5431740b89444e5f1cb93e9342ee829a64a7b9be Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:04:34 +0700 Subject: [PATCH 32/46] refactor(federation): document sibling-ref invariant, drop unsafe casts in test Co-Authored-By: Claude Opus 4.8 (1M context) --- .../scan/local-enrichment-artifacts.ts | 7 +++-- .../local-enrichment-federated-join.test.ts | 28 ++++++++++++++++--- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/packages/cli/src/context/scan/local-enrichment-artifacts.ts b/packages/cli/src/context/scan/local-enrichment-artifacts.ts index 2ae18d91..4c59c0b6 100644 --- a/packages/cli/src/context/scan/local-enrichment-artifacts.ts +++ b/packages/cli/src/context/scan/local-enrichment-artifacts.ts @@ -216,9 +216,10 @@ async function federatedSiblingTargets( const { content } = await project.fileStore.readFile(file); const shard = YAML.parse(content) as LiveDatabaseManifestShard | null; for (const entry of Object.values(shard?.tables ?? {})) { - // entry.table is the member-local qualified ref (e.g. `public.books` - // or `reviews`); prefix with the member connection id to match the - // fully-qualified `to:` form authored in cross-DB joins. + // entry.table is buildTableRef's member-local form — `schema.table` + // (postgres/mysql) or `table` (sqlite), never connectionId-prefixed — + // so prefixing with the member id yields the fully-qualified `to:` + // form authored in cross-DB joins (connectionId.schema.table / connectionId.table). targets.add(`${sibling.connectionId}.${entry.table}`); } } catch { diff --git a/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts b/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts index 6ff1635a..ba1c6cae 100644 --- a/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts +++ b/packages/cli/test/context/scan/local-enrichment-federated-join.test.ts @@ -12,16 +12,19 @@ import type { KtxSchemaSnapshot } from '../../../src/context/scan/types.js'; // `writeLocalScanManifestShards` commits its output via git; the file is // already on disk before the commit call, so the stub only returns commit info. -const stubGit = { +const stubGitCommitFile: Pick = { commitFile: async () => ({ commitHash: 'stub', + shortHash: 'stub', message: 'stub', author: 'ktx', authorEmail: 'ktx@example.com', timestamp: new Date().toISOString(), + committedDate: new Date().toISOString(), created: true, }), -} as unknown as GitService; +}; +const stubGit = stubGitCommitFile as GitService; function fakeProject(projectDir: string, connections: KtxLocalProject['config']['connections']): KtxLocalProject { const fileStore = new LocalGitFileStore({ rootDir: projectDir, git: stubGit }); @@ -51,16 +54,33 @@ const EXISTING_BOOKS_SHARD = `tables: const booksSnapshot: KtxSchemaSnapshot = { connectionId: 'pg_books', + driver: 'postgres', + extractedAt: new Date().toISOString(), + scope: {}, + metadata: {}, tables: [ { name: 'books', catalog: null, db: 'public', - columns: [{ name: 'id', dimensionType: 'number', primaryKey: true }], + kind: 'table', + comment: null, + estimatedRows: null, + columns: [ + { + name: 'id', + nativeType: 'integer', + normalizedType: 'integer', + dimensionType: 'number', + nullable: false, + primaryKey: true, + comment: null, + }, + ], foreignKeys: [], }, ], -} as unknown as KtxSchemaSnapshot; +}; describe('writeLocalScanManifestShards federated cross-DB joins', () => { let tempDir: string; From 553a8c6b3f59fb7b62be904a4b9fc05bed1acb7f Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:09:50 +0700 Subject: [PATCH 33/46] fix(federation): namespace federated source names by member to avoid collisions Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/context/sl/local-sl.ts | 9 ++++++++- .../test/context/sl/local-sl-federated.test.ts | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/context/sl/local-sl.ts b/packages/cli/src/context/sl/local-sl.ts index 6af348e5..10583ab4 100644 --- a/packages/cli/src/context/sl/local-sl.ts +++ b/packages/cli/src/context/sl/local-sl.ts @@ -176,7 +176,14 @@ export async function loadLocalSlSourceRecords( return []; } const perMember = await Promise.all( - descriptor.members.map((member) => loadSingleConnectionSourceRecords(project, member.connectionId)), + descriptor.members.map(async (member) => { + const records = await loadSingleConnectionSourceRecords(project, member.connectionId); + return records.map((record) => ({ + ...record, + name: `${member.connectionId}.${record.name}`, + source: { ...record.source, name: `${member.connectionId}.${record.source.name}` }, + })); + }), ); return perMember.flat(); } diff --git a/packages/cli/test/context/sl/local-sl-federated.test.ts b/packages/cli/test/context/sl/local-sl-federated.test.ts index 7c2b5585..798aaa6a 100644 --- a/packages/cli/test/context/sl/local-sl-federated.test.ts +++ b/packages/cli/test/context/sl/local-sl-federated.test.ts @@ -75,10 +75,22 @@ describe('federated semantic-layer source loading', () => { await rm(tempDir, { recursive: true, force: true }); }); - it('unions member source records for _ktx_federated', async () => { + it('namespaces member source records by connection id for _ktx_federated', async () => { const records = await loadLocalSlSourceRecords(project, { connectionId: '_ktx_federated' }); const names = records.map((r) => r.source.name).sort(); - expect(names).toEqual(['books', 'reviews']); + expect(names).toEqual(['pg_books.books', 'sqlite_reviews.reviews']); + }); + + it('keeps colliding member table names distinct via namespacing', async () => { + const collide = fakeProject(join(tempDir, 'collide'), { + pg_a: { driver: 'postgres' }, + sqlite_b: { driver: 'sqlite' }, + }); + const usersManifest = `tables:\n users:\n table: public.users\n columns:\n - name: id\n type: number\n`; + await seedManifest(collide, 'semantic-layer/pg_a/_schema/public.yaml', usersManifest); + await seedManifest(collide, 'semantic-layer/sqlite_b/_schema/main.yaml', usersManifest); + const records = await loadLocalSlSourceRecords(collide, { connectionId: '_ktx_federated' }); + expect(records.map((r) => r.source.name).sort()).toEqual(['pg_a.users', 'sqlite_b.users']); }); it('reads from member dirs, never a literal _ktx_federated dir', async () => { From 0cbe3d456eb5c0679de6457264a15310bbb2313d Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:18:43 +0700 Subject: [PATCH 34/46] docs(federation): document member-namespaced federated source names Co-Authored-By: Claude Opus 4.8 (1M context) --- .../docs/concepts/cross-database-federation.mdx | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs-site/content/docs/concepts/cross-database-federation.mdx b/docs-site/content/docs/concepts/cross-database-federation.mdx index 77ff149a..e9a07dc5 100644 --- a/docs-site/content/docs/concepts/cross-database-federation.mdx +++ b/docs-site/content/docs/concepts/cross-database-federation.mdx @@ -67,6 +67,20 @@ For the example above: These fully qualified names are what you write in a source's `table:` field and in any cross-database join's `to:` field. +## Source names in the federated view + +When you list or search semantic-layer sources under the federated connection, +each source's `name` is prefixed with its member connection id — for example +`pg_books.books` and `sqlite_reviews.reviews`. The prefix keeps names unique +when two members own a table with the same name: a `users` table in each of +`pg_app` and `sqlite_app` surfaces as `pg_app.users` and `sqlite_app.users` +rather than colliding on a bare `users`. + +Each source file's physical `table:` field is unchanged — it still uses the +fully qualified form from the previous section. The member prefix applies only +to the source `name` as seen through the federated connection, not to the +per-member view of the same source. + ## Declaring a cross-database join In v1, cross-database joins are declared explicitly in a source's `joins:` From b49b28c78ba9ac80ac4bebb1577d26525b6f0f52 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:31:09 +0700 Subject: [PATCH 35/46] fix(federation): preserve member SSL/search_path in attach, classify federated MCP errors Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/connectors/duckdb/federated-attach.ts | 9 ++ .../src/context/mcp/local-project-ports.ts | 5 + .../scan/local-enrichment-artifacts.ts | 8 +- packages/cli/src/context/sl/local-query.ts | 5 +- .../duckdb/federated-attach.test.ts | 33 ++++++ .../local-query-federated.integration.test.ts | 111 ++++++++++++++++++ 6 files changed, 166 insertions(+), 5 deletions(-) create mode 100644 packages/cli/test/context/sl/local-query-federated.integration.test.ts diff --git a/packages/cli/src/connectors/duckdb/federated-attach.ts b/packages/cli/src/connectors/duckdb/federated-attach.ts index 710b4269..3a675c94 100644 --- a/packages/cli/src/connectors/duckdb/federated-attach.ts +++ b/packages/cli/src/connectors/duckdb/federated-attach.ts @@ -26,6 +26,12 @@ function postgresAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): if (cfg.database) parts.push(`dbname=${kvKeyword(cfg.database)}`); if (cfg.user) parts.push(`user=${kvKeyword(cfg.user)}`); if (cfg.password) parts.push(`password=${kvKeyword(cfg.password)}`); + if (cfg.ssl) { + parts.push('sslmode=require'); + } + if (cfg.options) { + parts.push(`options=${kvKeyword(cfg.options)}`); + } return parts.join(' '); } @@ -44,6 +50,9 @@ function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): str if (cfg.password) { parts.push(`password=${kvKeyword(cfg.password)}`); } + if (cfg.ssl) { + parts.push('ssl_mode=REQUIRED'); + } return parts.join(' '); } diff --git a/packages/cli/src/context/mcp/local-project-ports.ts b/packages/cli/src/context/mcp/local-project-ports.ts index 76f347b8..e88c5af4 100644 --- a/packages/cli/src/context/mcp/local-project-ports.ts +++ b/packages/cli/src/context/mcp/local-project-ports.ts @@ -65,6 +65,11 @@ async function executeValidatedReadOnlySql( }, createConnector, runId: 'mcp-sql-execution', + }).catch((error: unknown) => { + if (isNativeProgrammingFault(error)) { + throw error; + } + throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error }); }); const rowCount = result.rowCount ?? result.rows.length; await onProgress?.({ progress: 1, message: `Fetched ${rowCount} rows` }); diff --git a/packages/cli/src/context/scan/local-enrichment-artifacts.ts b/packages/cli/src/context/scan/local-enrichment-artifacts.ts index 4c59c0b6..c800f810 100644 --- a/packages/cli/src/context/scan/local-enrichment-artifacts.ts +++ b/packages/cli/src/context/scan/local-enrichment-artifacts.ts @@ -216,10 +216,10 @@ async function federatedSiblingTargets( const { content } = await project.fileStore.readFile(file); const shard = YAML.parse(content) as LiveDatabaseManifestShard | null; for (const entry of Object.values(shard?.tables ?? {})) { - // entry.table is buildTableRef's member-local form — `schema.table` - // (postgres/mysql) or `table` (sqlite), never connectionId-prefixed — - // so prefixing with the member id yields the fully-qualified `to:` - // form authored in cross-DB joins (connectionId.schema.table / connectionId.table). + // entry.table is buildTableRef's member-local ref (1-3 parts: + // table / schema.table / catalog.schema.table), never connectionId- + // prefixed — so prefixing with the member id yields the fully-qualified + // `to:` form authored in cross-DB joins. targets.add(`${sibling.connectionId}.${entry.table}`); } } catch { diff --git a/packages/cli/src/context/sl/local-query.ts b/packages/cli/src/context/sl/local-query.ts index c4cfd7c1..187d7e6c 100644 --- a/packages/cli/src/context/sl/local-query.ts +++ b/packages/cli/src/context/sl/local-query.ts @@ -119,7 +119,10 @@ export async function compileLocalSlQuery( ...response.plan, execution: { mode: 'executed', - driver: project.config.connections[connectionId]?.driver ?? 'unknown', + driver: + connectionId === FEDERATED_CONNECTION_ID + ? 'duckdb' + : (project.config.connections[connectionId]?.driver ?? 'unknown'), maxRows, rowCount: execution.rowCount, }, diff --git a/packages/cli/test/connectors/duckdb/federated-attach.test.ts b/packages/cli/test/connectors/duckdb/federated-attach.test.ts index bb21c03d..a3e492fa 100644 --- a/packages/cli/test/connectors/duckdb/federated-attach.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-attach.test.ts @@ -85,6 +85,39 @@ describe('federatedAttachTarget', () => { expect(target).toContain("password='pass word'"); }); + it('emits sslmode=require for a postgres member configured with discrete fields and ssl', () => { + const target = federatedAttachTarget( + member({ + driver: 'postgres', + connection: { driver: 'postgres', host: 'h', database: 'db', username: 'u', ssl: true }, + }), + {}, + ); + expect(target).toContain('sslmode=require'); + }); + + it('passes through the postgres search_path as options', () => { + const target = federatedAttachTarget( + member({ + driver: 'postgres', + connection: { driver: 'postgres', host: 'h', database: 'db', username: 'u', schema: 'analytics' }, + }), + {}, + ); + expect(target).toContain('search_path=analytics'); + }); + + it('emits ssl_mode=REQUIRED for a mysql member with ssl', () => { + const target = federatedAttachTarget( + member({ + driver: 'mysql', + connection: { driver: 'mysql', host: 'h', database: 'db', username: 'u', ssl: true }, + }), + {}, + ); + expect(target).toContain('ssl_mode=REQUIRED'); + }); + it('throws for an unsupported driver', () => { expect(() => federatedAttachTarget(member({ driver: 'snowflake', connection: { driver: 'snowflake' } }), {})).toThrow( /cannot be attached/i, diff --git a/packages/cli/test/context/sl/local-query-federated.integration.test.ts b/packages/cli/test/context/sl/local-query-federated.integration.test.ts new file mode 100644 index 00000000..b0ff1f5e --- /dev/null +++ b/packages/cli/test/context/sl/local-query-federated.integration.test.ts @@ -0,0 +1,111 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { buildDefaultKtxProjectConfig } from '../../../src/context/project/config.js'; +import { executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js'; +import type { GitService } from '../../../src/context/core/git.service.js'; +import { LocalGitFileStore } from '../../../src/context/project/local-git-file-store.js'; +import type { KtxLocalProject } from '../../../src/context/project/project.js'; +import { loadLocalSlSourceRecords } from '../../../src/context/sl/local-sl.js'; + +const BOOKS_MANIFEST = `tables: + books: + table: main.books + columns: + - name: id + type: number + pk: true + - name: title + type: string +`; + +const REVIEWS_MANIFEST = `tables: + reviews: + table: main.reviews + columns: + - name: book_id + type: number + pk: true + - name: stars + type: number +`; + +// On-disk file store only (no git init/commit) so manifest seeding never hits +// the gpg-signing path; connections also carry real sqlite paths so the +// federated executor can attach them. +function fakeProject(projectDir: string, connections: KtxLocalProject['config']['connections']): KtxLocalProject { + const fileStore = new LocalGitFileStore({ rootDir: projectDir, git: {} as GitService }); + const config = { ...buildDefaultKtxProjectConfig(), connections }; + return { + projectDir, + configPath: join(projectDir, 'ktx.yaml'), + config, + coreConfig: {} as KtxLocalProject['coreConfig'], + git: {} as GitService, + fileStore, + }; +} + +async function seedManifest(project: KtxLocalProject, path: string, content: string): Promise { + await project.fileStore.writeFile(path, content, 'ktx', 'ktx@example.com', 'seed manifest', { skipLock: true }); +} + +describe('federated SL source loading and physical execution (real DuckDB)', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-local-query-fed-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('namespaces source names while keeping physical table refs, and executes against them', async () => { + const projectDir = join(tempDir, 'project'); + const booksPath = join(tempDir, 'books.db'); + const reviewsPath = join(tempDir, 'reviews.db'); + + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune'), (2, 'Foundation');"); + books.close(); + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 4), (2, 2);'); + reviews.close(); + + const project = fakeProject(projectDir, { + sqlite_books: { driver: 'sqlite', path: booksPath }, + sqlite_reviews: { driver: 'sqlite', path: reviewsPath }, + }); + await seedManifest(project, 'semantic-layer/sqlite_books/_schema/main.yaml', BOOKS_MANIFEST); + await seedManifest(project, 'semantic-layer/sqlite_reviews/_schema/main.yaml', REVIEWS_MANIFEST); + + // (a) Name-vs-physical separation: federated loading namespaces source.name + // by member id while source.table stays the unprefixed physical ref. + const records = await loadLocalSlSourceRecords(project, { connectionId: '_ktx_federated' }); + const byName = new Map(records.map((record) => [record.source.name, record.source.table])); + expect([...byName.keys()].sort()).toEqual(['sqlite_books.books', 'sqlite_reviews.reviews']); + expect(byName.get('sqlite_books.books')).toBe('main.books'); + expect(byName.get('sqlite_reviews.reviews')).toBe('main.reviews'); + + // (b) Physical targeting end-to-end: a federated query joining the two + // attached catalogs by their connectionId-prefixed physical refs returns + // the correct joined rows through live DuckDB. + const result = await executeProjectReadOnlySql({ + project, + input: { + connectionId: '_ktx_federated', + connection: undefined, + sql: 'SELECT b.title, AVG(r.stars) AS avg_stars FROM sqlite_books.books b JOIN sqlite_reviews.reviews r ON b.id = r.book_id GROUP BY b.title ORDER BY b.title', + maxRows: 100, + }, + createConnector: () => { + throw new Error('federated path must not create a scan connector'); + }, + }); + expect(result.rows.map((row) => row[0])).toEqual(['Dune', 'Foundation']); + expect(Number(result.rows[0][1])).toBeCloseTo(4.5); + }); +}); From ed8ed25b96d3811da50674232f9bae827c95c267 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 13:54:08 +0700 Subject: [PATCH 36/46] refactor(federation): simplify federated dispatch and parallelize sibling reads Dedup the federated driver ternary in local-query, derive the prefixed source.name from the already-built name, drop the duplicated error in federatedAttachTarget's exhaustive switch, inline the one-line cleanupConnector wrapper, and parallelize federatedSiblingTargets' shard reads (was sequential await-in-for on the scan hot path). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/connectors/duckdb/federated-attach.ts | 8 ++-- .../connections/project-sql-executor.ts | 6 +-- .../scan/local-enrichment-artifacts.ts | 46 +++++++++---------- packages/cli/src/context/sl/local-query.ts | 5 +- packages/cli/src/context/sl/local-sl.ts | 9 ++-- 5 files changed, 31 insertions(+), 43 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-attach.ts b/packages/cli/src/connectors/duckdb/federated-attach.ts index 3a675c94..3a97d0e9 100644 --- a/packages/cli/src/connectors/duckdb/federated-attach.ts +++ b/packages/cli/src/connectors/duckdb/federated-attach.ts @@ -62,8 +62,9 @@ function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): str * resolver so federation and standalone scans agree on config interpretation. */ export function federatedAttachTarget(member: FederatedMember, env: NodeJS.ProcessEnv): string { - const type = attachTypeForDriver(member.driver); - switch (type) { + // attachTypeForDriver throws on unsupported drivers, so the cases below are + // exhaustive; the trailing throw exists only to satisfy the string return type. + switch (attachTypeForDriver(member.driver)) { case 'sqlite': return sqliteDatabasePathFromConfig({ connectionId: member.connectionId, @@ -74,7 +75,6 @@ export function federatedAttachTarget(member: FederatedMember, env: NodeJS.Proce return postgresAttachString(member, env); case 'mysql': return mysqlAttachString(member, env); - default: - throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`); } + throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`); } diff --git a/packages/cli/src/context/connections/project-sql-executor.ts b/packages/cli/src/context/connections/project-sql-executor.ts index 0bdf36a7..6ee4408a 100644 --- a/packages/cli/src/context/connections/project-sql-executor.ts +++ b/packages/cli/src/context/connections/project-sql-executor.ts @@ -12,10 +12,6 @@ export interface ExecuteProjectReadOnlySqlDeps { runId?: string; } -async function cleanupConnector(connector: KtxScanConnector | null): Promise { - await connector?.cleanup?.(); -} - /** * Single resolve-and-execute path for project read-only SQL. The federated * connection is derived from declared state here so every executor entry point @@ -56,6 +52,6 @@ export async function executeProjectReadOnlySql( rowCount: result.rowCount, }; } finally { - await cleanupConnector(connector); + await connector?.cleanup?.(); } } diff --git a/packages/cli/src/context/scan/local-enrichment-artifacts.ts b/packages/cli/src/context/scan/local-enrichment-artifacts.ts index c800f810..798107b8 100644 --- a/packages/cli/src/context/scan/local-enrichment-artifacts.ts +++ b/packages/cli/src/context/scan/local-enrichment-artifacts.ts @@ -203,31 +203,27 @@ async function federatedSiblingTargets( return new Set(); } const siblings = descriptor.members.filter((member) => member.connectionId !== connectionId); - const targets = new Set(); - for (const sibling of siblings) { - let files: string[]; - try { - files = (await project.fileStore.listFiles(schemaDir(sibling.connectionId))).files.filter(isSlYamlPath); - } catch { - continue; - } - for (const file of files) { - try { - const { content } = await project.fileStore.readFile(file); - const shard = YAML.parse(content) as LiveDatabaseManifestShard | null; - for (const entry of Object.values(shard?.tables ?? {})) { - // entry.table is buildTableRef's member-local ref (1-3 parts: - // table / schema.table / catalog.schema.table), never connectionId- - // prefixed — so prefixing with the member id yields the fully-qualified - // `to:` form authored in cross-DB joins. - targets.add(`${sibling.connectionId}.${entry.table}`); - } - } catch { - continue; - } - } - } - return targets; + const perSibling = await Promise.all(siblings.map((sibling) => siblingJoinTargets(project, sibling.connectionId))); + return new Set(perSibling.flat()); +} + +async function siblingJoinTargets(project: KtxLocalProject, connectionId: string): Promise { + const listed = await project.fileStore.listFiles(schemaDir(connectionId)).catch(() => ({ files: [] })); + const files = listed.files.filter(isSlYamlPath); + const perFile = await Promise.all( + files.map(async (file) => { + const shard = await project.fileStore + .readFile(file) + .then(({ content }) => YAML.parse(content) as LiveDatabaseManifestShard | null) + .catch(() => null); + // entry.table is buildTableRef's member-local ref (1-3 parts: + // table / schema.table / catalog.schema.table), never connectionId- + // prefixed — so prefixing with the member id yields the fully-qualified + // `to:` form authored in cross-DB joins. + return Object.values(shard?.tables ?? {}).map((entry) => `${connectionId}.${entry.table}`); + }), + ); + return perFile.flat(); } async function loadExistingManifestState( diff --git a/packages/cli/src/context/sl/local-query.ts b/packages/cli/src/context/sl/local-query.ts index 187d7e6c..2fc5629d 100644 --- a/packages/cli/src/context/sl/local-query.ts +++ b/packages/cli/src/context/sl/local-query.ts @@ -119,10 +119,7 @@ export async function compileLocalSlQuery( ...response.plan, execution: { mode: 'executed', - driver: - connectionId === FEDERATED_CONNECTION_ID - ? 'duckdb' - : (project.config.connections[connectionId]?.driver ?? 'unknown'), + driver: driver ?? 'unknown', maxRows, rowCount: execution.rowCount, }, diff --git a/packages/cli/src/context/sl/local-sl.ts b/packages/cli/src/context/sl/local-sl.ts index 10583ab4..8aaa738a 100644 --- a/packages/cli/src/context/sl/local-sl.ts +++ b/packages/cli/src/context/sl/local-sl.ts @@ -178,11 +178,10 @@ export async function loadLocalSlSourceRecords( const perMember = await Promise.all( descriptor.members.map(async (member) => { const records = await loadSingleConnectionSourceRecords(project, member.connectionId); - return records.map((record) => ({ - ...record, - name: `${member.connectionId}.${record.name}`, - source: { ...record.source, name: `${member.connectionId}.${record.source.name}` }, - })); + return records.map((record) => { + const name = `${member.connectionId}.${record.name}`; + return { ...record, name, source: { ...record.source, name } }; + }); }), ); return perMember.flat(); From 4a3ee6721c4ab3fdba57fdc7fb8ffe9255352774 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 14:40:09 +0700 Subject: [PATCH 37/46] feat(federation): carry headerTypes through shared SQL executor Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connections/project-sql-executor.ts | 1 + .../src/context/connections/query-executor.ts | 1 + .../connections/project-sql-executor.test.ts | 41 +++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/packages/cli/src/context/connections/project-sql-executor.ts b/packages/cli/src/context/connections/project-sql-executor.ts index 6ee4408a..0c2da04e 100644 --- a/packages/cli/src/context/connections/project-sql-executor.ts +++ b/packages/cli/src/context/connections/project-sql-executor.ts @@ -46,6 +46,7 @@ export async function executeProjectReadOnlySql( ); return { headers: result.headers, + ...(result.headerTypes ? { headerTypes: result.headerTypes } : {}), rows: result.rows, totalRows: result.totalRows, command: 'SELECT', diff --git a/packages/cli/src/context/connections/query-executor.ts b/packages/cli/src/context/connections/query-executor.ts index e169d164..0f963c63 100644 --- a/packages/cli/src/context/connections/query-executor.ts +++ b/packages/cli/src/context/connections/query-executor.ts @@ -10,6 +10,7 @@ export interface KtxSqlQueryExecutionInput { export interface KtxSqlQueryExecutionResult { headers: string[]; + headerTypes?: string[]; rows: unknown[][]; totalRows: number; command: string; diff --git a/packages/cli/test/context/connections/project-sql-executor.test.ts b/packages/cli/test/context/connections/project-sql-executor.test.ts index cd1bdb62..899875a8 100644 --- a/packages/cli/test/context/connections/project-sql-executor.test.ts +++ b/packages/cli/test/context/connections/project-sql-executor.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it, vi } from 'vitest'; import type { executeFederatedQuery } from '../../../src/connectors/duckdb/federated-executor.js'; import { executeProjectReadOnlySql } from '../../../src/context/connections/project-sql-executor.js'; import type { KtxLocalProject } from '../../../src/context/project/project.js'; +import type { KtxScanConnector } from '../../../src/context/scan/types.js'; function fakeProject(connections: Record): KtxLocalProject { return { @@ -73,3 +74,43 @@ describe('executeProjectReadOnlySql — federated routing', () => { expect(connector.cleanup).toHaveBeenCalledOnce(); }); }); + +function connectorReturning(result: { + headers: string[]; + headerTypes?: string[]; + rows: unknown[][]; + totalRows: number; + rowCount: number | null; +}): KtxScanConnector { + return { + driver: 'sqlite', + capabilities: { readOnlySql: true }, + async executeReadOnly() { + return result; + }, + } as unknown as KtxScanConnector; +} + +describe('executeProjectReadOnlySql headerTypes', () => { + it('forwards connector headerTypes on the non-federated branch', async () => { + const project = { + projectDir: '/tmp/p', + config: { connections: { books_db: { driver: 'sqlite', path: './b.db' } } }, + } as never; + + const result = await executeProjectReadOnlySql({ + project, + input: { connectionId: 'books_db', connection: undefined, sql: 'SELECT 1', maxRows: 10 }, + createConnector: () => + connectorReturning({ + headers: ['id'], + headerTypes: ['INTEGER'], + rows: [[1]], + totalRows: 1, + rowCount: 1, + }), + }); + + expect(result.headerTypes).toEqual(['INTEGER']); + }); +}); From c7a4dba8c6981135304c41f0dfbbbeaa97b07454 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 14:43:52 +0700 Subject: [PATCH 38/46] feat(federation): add shared federated connection listing builder Co-Authored-By: Claude Opus 4.8 (1M context) --- .../cli/src/context/connections/federation.ts | 28 +++++++++++++++++++ .../context/connections/federation.test.ts | 26 +++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/packages/cli/src/context/connections/federation.ts b/packages/cli/src/context/connections/federation.ts index 10224397..7a712c69 100644 --- a/packages/cli/src/context/connections/federation.ts +++ b/packages/cli/src/context/connections/federation.ts @@ -57,3 +57,31 @@ export function deriveFederatedConnection( } return { id: FEDERATED_CONNECTION_ID, driver: 'duckdb', members }; } + +export interface FederatedConnectionListing { + id: typeof FEDERATED_CONNECTION_ID; + driver: 'duckdb'; + members: string[]; + hint: string; +} + +/** + * Listing-facing view of the virtual federated connection for `ktx connection` + * and MCP `connection_list`. Derived from the same declared state as + * deriveFederatedConnection, so both surfaces describe one connection. + */ +export function federatedConnectionListing( + connections: Record, + projectDir: string, +): FederatedConnectionListing | null { + const descriptor = deriveFederatedConnection(connections, projectDir); + if (!descriptor) { + return null; + } + return { + id: FEDERATED_CONNECTION_ID, + driver: 'duckdb', + members: descriptor.members.map((member) => member.connectionId), + hint: 'Cross-database queries run here. Name tables connectionId.schema.table (or connectionId.table for sqlite).', + }; +} diff --git a/packages/cli/test/context/connections/federation.test.ts b/packages/cli/test/context/connections/federation.test.ts index 26f5b2bb..68d82898 100644 --- a/packages/cli/test/context/connections/federation.test.ts +++ b/packages/cli/test/context/connections/federation.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from 'vitest'; import { deriveFederatedConnection, + federatedConnectionListing, FEDERATED_CONNECTION_ID, } from '../../../src/context/connections/federation.js'; @@ -52,3 +53,28 @@ describe('deriveFederatedConnection', () => { expect(result?.members).toHaveLength(2); }); }); + +describe('federatedConnectionListing', () => { + it('returns null with fewer than 2 attach-compatible connections', () => { + expect( + federatedConnectionListing({ books_db: { driver: 'sqlite', path: './b.db' } }, '/tmp/p'), + ).toBeNull(); + }); + + it('returns id, driver, member ids and a usage hint with 2+ members', () => { + const listing = federatedConnectionListing( + { + books_db: { driver: 'sqlite', path: './b.db' }, + reviews_db: { driver: 'sqlite', path: './r.db' }, + snow: { driver: 'snowflake', account: 'x' }, + }, + '/tmp/p', + ); + expect(listing).not.toBeNull(); + expect(listing!.id).toBe(FEDERATED_CONNECTION_ID); + expect(listing!.driver).toBe('duckdb'); + expect(listing!.members).toEqual(['books_db', 'reviews_db']); + expect(listing!.hint).toContain('Cross-database'); + expect(listing!.hint).toContain('connectionId.table'); + }); +}); From dc08225ee83f75dd80b26f641847d7f285b4f64f Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 18:42:01 +0700 Subject: [PATCH 39/46] fix(federation): route ktx sql through shared executor for _ktx_federated parity Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/sql.ts | 99 ++++++++++++++--------------------- packages/cli/test/sql.test.ts | 54 ++++++++++++++++++- 2 files changed, 92 insertions(+), 61 deletions(-) diff --git a/packages/cli/src/sql.ts b/packages/cli/src/sql.ts index d3eb6a81..556e3821 100644 --- a/packages/cli/src/sql.ts +++ b/packages/cli/src/sql.ts @@ -1,5 +1,9 @@ +import { executeFederatedQuery } from './connectors/duckdb/federated-executor.js'; +import { FEDERATED_CONNECTION_ID } from './context/connections/federation.js'; +import { executeProjectReadOnlySql } from './context/connections/project-sql-executor.js'; +import type { KtxSqlQueryExecutionResult } from './context/connections/query-executor.js'; import { loadKtxProject, type KtxLocalProject } from './context/project/project.js'; -import type { KtxQueryResult, KtxScanConnector } from './context/scan/types.js'; +import { sqlAnalysisDialectForDriver } from './context/sql-analysis/dialect.js'; import type { SqlAnalysisDialect, SqlAnalysisPort } from './context/sql-analysis/ports.js'; import type { KtxCliIo } from './cli-runtime.js'; import { type KtxOutputMode, resolveOutputMode } from './io/mode.js'; @@ -30,6 +34,7 @@ export interface KtxSqlDeps { loadProject?: typeof loadKtxProject; createSqlAnalysis?: () => SqlAnalysisPort; createScanConnector?: typeof createKtxCliScanConnector; + executeFederated?: typeof executeFederatedQuery; } interface SqlExecutionOutput { @@ -40,20 +45,6 @@ interface SqlExecutionOutput { rowCount: number; } -function sqlAnalysisDialectForDriver(driver: string | undefined): SqlAnalysisDialect { - const normalized = String(driver ?? '').trim().toLowerCase(); - const map: Record = { - postgres: 'postgres', - bigquery: 'bigquery', - snowflake: 'snowflake', - mysql: 'mysql', - sqlserver: 'tsql', - sqlite: 'sqlite', - clickhouse: 'clickhouse', - }; - return map[normalized] ?? 'postgres'; -} - function queryVerb(sql: string): 'select' | 'explain' | 'show' | 'with' | 'other' { const first = sql.trim().split(/\s+/, 1)[0]?.toLowerCase(); if (first === 'select' || first === 'explain' || first === 'show' || first === 'with') { @@ -123,13 +114,7 @@ function printSqlResult(output: SqlExecutionOutput, mode: KtxSqlOutputMode, io: printPretty(output, io); } -async function cleanupConnector(connector: KtxScanConnector | null): Promise { - if (connector?.cleanup) { - await connector.cleanup(); - } -} - -function resultOutput(connectionId: string, result: KtxQueryResult): SqlExecutionOutput { +function resultOutput(connectionId: string, result: KtxSqlQueryExecutionResult): SqlExecutionOutput { return { connectionId, headers: result.headers, @@ -146,12 +131,10 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: let project: KtxLocalProject | undefined; try { project = await (deps.loadProject ?? loadKtxProject)({ projectDir: args.projectDir }); + const isFederated = args.connectionId === FEDERATED_CONNECTION_ID; const connection = project.config.connections[args.connectionId]; - if (!connection) { - throw new Error(`Connection "${args.connectionId}" is not configured in ktx.yaml`); - } - driver = String(connection.driver ?? 'unknown').toLowerCase(); - demoConnection = isDemoConnection(args.connectionId, connection); + driver = isFederated ? 'duckdb' : String(connection?.driver ?? 'unknown').toLowerCase(); + demoConnection = isFederated ? false : isDemoConnection(args.connectionId, connection); const createSqlAnalysis = deps.createSqlAnalysis ?? @@ -163,7 +146,7 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: io, })); const analysisPort = createSqlAnalysis(); - const dialect = sqlAnalysisDialectForDriver(connection.driver); + const dialect: SqlAnalysisDialect = isFederated ? 'duckdb' : sqlAnalysisDialectForDriver(connection?.driver); const validation = await analysisPort.validateReadOnly(args.sql, dialect); if (!validation.ok) { throw new Error(validation.error ?? 'SQL is not read-only.'); @@ -171,39 +154,35 @@ export async function runKtxSql(args: KtxSqlArgs, io: KtxCliIo = process, deps: const referencedTableCount = await safeReferencedTableCount(analysisPort, args.sql, dialect); const createScanConnector = deps.createScanConnector ?? createKtxCliScanConnector; - let connector: KtxScanConnector | null = null; - try { - connector = await createScanConnector(project, args.connectionId); - if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { - throw new Error(`Connection "${args.connectionId}" does not support read-only SQL execution.`); - } - const result = await connector.executeReadOnly( - { - connectionId: args.connectionId, - sql: args.sql, - maxRows: args.maxRows, - }, - { runId: 'cli-sql' }, - ); - const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); - printSqlResult(resultOutput(args.connectionId, result), mode, io); - await emitTelemetryEvent({ - name: 'sql_completed', + const result = await executeProjectReadOnlySql({ + project, + input: { + connectionId: args.connectionId, projectDir: args.projectDir, - io, - fields: { - driver, - isDemoConnection: demoConnection, - queryVerb: queryVerb(args.sql), - referencedTableCount, - durationMs: Math.max(0, performance.now() - startedAt), - outcome: 'ok', - }, - }); - return 0; - } finally { - await cleanupConnector(connector); - } + connection, + sql: args.sql, + maxRows: args.maxRows, + }, + createConnector: (connectionId) => createScanConnector(project!, connectionId), + executeFederated: deps.executeFederated, + runId: 'cli-sql', + }); + const mode = resolveOutputMode({ explicit: args.output, json: args.json, io }); + printSqlResult(resultOutput(args.connectionId, result), mode, io); + await emitTelemetryEvent({ + name: 'sql_completed', + projectDir: args.projectDir, + io, + fields: { + driver, + isDemoConnection: demoConnection, + queryVerb: queryVerb(args.sql), + referencedTableCount, + durationMs: Math.max(0, performance.now() - startedAt), + outcome: 'ok', + }, + }); + return 0; } catch (error) { const errorClass = scrubErrorClass(error); await emitTelemetryEvent({ diff --git a/packages/cli/test/sql.test.ts b/packages/cli/test/sql.test.ts index 5e297429..a95d6cd1 100644 --- a/packages/cli/test/sql.test.ts +++ b/packages/cli/test/sql.test.ts @@ -343,6 +343,58 @@ describe('runKtxSql', () => { expect(connector.executeReadOnly).not.toHaveBeenCalled(); expect(connector.cleanup).toHaveBeenCalledTimes(1); - expect(io.stderr()).toContain('Connection "warehouse" does not support read-only SQL execution.'); + expect(io.stderr()).toContain('does not support read-only SQL execution.'); + }); + + it('routes _ktx_federated through the shared federated executor', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeConnections(projectDir, { + books_db: { driver: 'sqlite', path: 'books.db' }, + reviews_db: { driver: 'sqlite', path: 'reviews.db' }, + }); + const executeFederated = vi.fn(async () => ({ + headers: ['title', 'rating'], + rows: [['Clean Code', 5]], + totalRows: 1, + command: 'SELECT', + rowCount: 1, + })); + const memberConnector = makeConnector({ + executeReadOnly: vi.fn(async () => { + throw new Error('member connector must not be used for federated id'); + }), + }); + const io = makeIo(); + + await expect( + runKtxSql( + { + command: 'execute', + projectDir, + connectionId: '_ktx_federated', + sql: 'select 1', + maxRows: 100, + output: 'json', + json: true, + cliVersion: '0.0.0-test', + }, + io.io, + { + createSqlAnalysis: () => makeSqlAnalysis({ ok: true, error: null }), + createScanConnector: vi.fn(async () => memberConnector), + executeFederated, + }, + ), + ).resolves.toBe(0); + + expect(executeFederated).toHaveBeenCalledTimes(1); + expect(memberConnector.executeReadOnly).not.toHaveBeenCalled(); + expect(JSON.parse(io.stdout())).toEqual({ + connectionId: '_ktx_federated', + headers: ['title', 'rating'], + rows: [['Clean Code', 5]], + rowCount: 1, + }); }); }); From f1a489ec4f72fce78a8e854c05528a22e209271f Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 18:59:39 +0700 Subject: [PATCH 40/46] feat(federation): show _ktx_federated in ktx connection list Surfaces the virtual federated connection in the output of `ktx connection list` so agents and users can discover cross-database querying when 2+ attach-compatible connections are configured. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/connection.ts | 17 +++-- .../test/connection-list-federated.test.ts | 66 +++++++++++++++++++ 2 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 packages/cli/test/connection-list-federated.test.ts diff --git a/packages/cli/src/connection.ts b/packages/cli/src/connection.ts index 1e267833..d12dccb7 100644 --- a/packages/cli/src/connection.ts +++ b/packages/cli/src/connection.ts @@ -6,6 +6,7 @@ import { type NotionBotInfo, NotionClient } from './context/ingest/adapters/noti import { createLocalLookerCredentialResolver } from './context/ingest/adapters/looker/local-looker.adapter.js'; import { metabaseRuntimeConfigFromLocalConnection } from './context/ingest/adapters/metabase/local-metabase.adapter.js'; import { testRepoConnection } from './context/ingest/repo-fetch.js'; +import { federatedConnectionListing } from './context/connections/federation.js'; import { getDriverRegistration } from './context/connections/drivers.js'; import { parseNotionConnectionConfig, resolveNotionConnectionAuthToken } from './context/connections/notion-config.js'; import { resolveKtxConfigReference } from './context/core/config-reference.js'; @@ -447,15 +448,23 @@ export async function runKtxConnection( io.stdout.write('No connections configured. Run `ktx setup` to add one.\n'); return 0; } - const idWidth = Math.max('ID'.length, ...entries.map(([id]) => id.length)); - const driverWidth = Math.max( - 'DRIVER'.length, + const federated = federatedConnectionListing(project.config.connections, args.projectDir); + const idCandidates = [...entries.map(([id]) => id), ...(federated ? [federated.id] : [])]; + const driverLengths = [ ...entries.map(([, c]) => (c.driver ?? 'unknown').length), - ); + ...(federated ? [federated.driver.length] : []), + ]; + const idWidth = Math.max('ID'.length, ...idCandidates.map((id) => id.length)); + const driverWidth = Math.max('DRIVER'.length, ...driverLengths); io.stdout.write(`${'ID'.padEnd(idWidth)} ${'DRIVER'.padEnd(driverWidth)}\n`); for (const [id, connection] of entries) { io.stdout.write(`${id.padEnd(idWidth)} ${(connection.driver ?? 'unknown').padEnd(driverWidth)}\n`); } + if (federated) { + io.stdout.write(`${federated.id.padEnd(idWidth)} ${federated.driver.padEnd(driverWidth)}\n`); + io.stdout.write(` federates: ${federated.members.join(', ')}\n`); + io.stdout.write(` ${federated.hint}\n`); + } return 0; } diff --git a/packages/cli/test/connection-list-federated.test.ts b/packages/cli/test/connection-list-federated.test.ts new file mode 100644 index 00000000..edb3c72b --- /dev/null +++ b/packages/cli/test/connection-list-federated.test.ts @@ -0,0 +1,66 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { runKtxConnection } from '../src/connection.js'; +import { initKtxProject } from '../src/context/project/project.js'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js'; +import type { KtxProjectConnectionConfig } from '../src/context/project/config.js'; + +function makeIo() { + const out: string[] = []; + return { + io: { + stdout: { isTTY: false, write: (c: string) => { out.push(c); return true; } }, + stderr: { write: () => true }, + }, + stdout: () => out.join(''), + }; +} + +async function writeConnections( + projectDir: string, + connections: Record, +): Promise { + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile(join(projectDir, 'ktx.yaml'), serializeKtxProjectConfig({ ...config, connections }), 'utf-8'); +} + +describe('ktx connection list federated entry', () => { + let tempDir: string; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'ktx-conn-fed-')); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + it('shows _ktx_federated when 2+ attach-compatible connections exist', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeConnections(projectDir, { + books_db: { driver: 'sqlite' }, + reviews_db: { driver: 'sqlite' }, + }); + const io = makeIo(); + const code = await runKtxConnection({ command: 'list', projectDir }, io.io); + const printed = io.stdout(); + expect(code).toBe(0); + expect(printed).toContain('_ktx_federated'); + expect(printed).toContain('books_db, reviews_db'); + expect(printed).toContain('Cross-database queries run here'); + }); + + it('omits _ktx_federated with a single connection', async () => { + const projectDir = join(tempDir, 'project'); + await initKtxProject({ projectDir }); + await writeConnections(projectDir, { + books_db: { driver: 'sqlite' }, + }); + const io = makeIo(); + await runKtxConnection({ command: 'list', projectDir }, io.io); + expect(io.stdout()).not.toContain('_ktx_federated'); + }); +}); From bf2014b875fe8a0f1fc7f85a4d20f9f4e3874576 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 19:07:41 +0700 Subject: [PATCH 41/46] feat(federation): surface _ktx_federated in MCP connection_list Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connections/local-warehouse-descriptor.ts | 2 ++ packages/cli/src/context/mcp/context-tools.ts | 4 ++- .../src/context/mcp/local-project-ports.ts | 24 +++++++++---- packages/cli/src/context/mcp/types.ts | 2 ++ .../mcp/__snapshots__/mcp-tools-list.json | 11 +++++- .../mcp/connection-list-federated.test.ts | 34 +++++++++++++++++++ 6 files changed, 69 insertions(+), 8 deletions(-) create mode 100644 packages/cli/test/context/mcp/connection-list-federated.test.ts diff --git a/packages/cli/src/context/connections/local-warehouse-descriptor.ts b/packages/cli/src/context/connections/local-warehouse-descriptor.ts index 4ad926df..0e5d0b9d 100644 --- a/packages/cli/src/context/connections/local-warehouse-descriptor.ts +++ b/packages/cli/src/context/connections/local-warehouse-descriptor.ts @@ -16,6 +16,8 @@ export interface LocalConnectionInfo { id: string; name: string; connectionType: string; + members?: string[]; + hint?: string; } const DRIVER_TO_CONNECTION_TYPE: Record = { diff --git a/packages/cli/src/context/mcp/context-tools.ts b/packages/cli/src/context/mcp/context-tools.ts index 71ed3a14..f2f64077 100644 --- a/packages/cli/src/context/mcp/context-tools.ts +++ b/packages/cli/src/context/mcp/context-tools.ts @@ -56,7 +56,7 @@ const toolAnnotations = { const toolDescriptions = { connection_list: - 'List configured read-only data connections available to this ktx project. Use this before connection-scoped tools when the project may have multiple warehouses.', + 'List configured read-only data connections available to this ktx project. Use this before connection-scoped tools when the project may have multiple warehouses. A "_ktx_federated" entry (when present) queries all its member databases together; use its id for cross-database joins.', discover_data: 'Search across ktx wiki pages, semantic-layer sources, measures, dimensions, raw tables, and columns. Example: discover_data({ query: "monthly orders by customer", connectionId: "warehouse", kinds: ["sl_source", "table"] }).', wiki_search: @@ -227,6 +227,8 @@ const connectionListOutputSchema = z.object({ id: z.string(), name: z.string(), connectionType: z.string(), + members: z.array(z.string()).optional(), + hint: z.string().optional(), }), ), }); diff --git a/packages/cli/src/context/mcp/local-project-ports.ts b/packages/cli/src/context/mcp/local-project-ports.ts index e88c5af4..3be1e0ff 100644 --- a/packages/cli/src/context/mcp/local-project-ports.ts +++ b/packages/cli/src/context/mcp/local-project-ports.ts @@ -1,8 +1,11 @@ import type { KtxSqlQueryExecutorPort } from '../../context/connections/query-executor.js'; import { KtxQueryError, isNativeProgrammingFault } from '../../errors.js'; import { executeProjectReadOnlySql } from '../../context/connections/project-sql-executor.js'; -import { FEDERATED_CONNECTION_ID } from '../../context/connections/federation.js'; -import { localConnectionInfoFromConfig } from '../../context/connections/local-warehouse-descriptor.js'; +import { FEDERATED_CONNECTION_ID, federatedConnectionListing } from '../../context/connections/federation.js'; +import { + type LocalConnectionInfo, + localConnectionInfoFromConfig, +} from '../../context/connections/local-warehouse-descriptor.js'; import type { KtxEmbeddingPort } from '../../context/core/embedding.js'; import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; import type { KtxLocalProject } from '../../context/project/project.js'; @@ -134,12 +137,21 @@ export function createLocalProjectMcpContextPorts( const ports: KtxMcpContextPorts = { connections: { async list() { - return Object.entries(project.config.connections) + const configured = Object.entries(project.config.connections) .map(([id, config]) => localConnectionInfoFromConfig(id, config)) - .filter( - (connection): connection is { id: string; name: string; connectionType: string } => connection !== null, - ) + .filter((connection): connection is LocalConnectionInfo => connection !== null) .sort((a, b) => a.id.localeCompare(b.id)); + const federated = federatedConnectionListing(project.config.connections, project.projectDir); + if (federated) { + configured.push({ + id: federated.id, + name: federated.id, + connectionType: 'DUCKDB', + members: federated.members, + hint: federated.hint, + }); + } + return configured; }, }, knowledge: { diff --git a/packages/cli/src/context/mcp/types.ts b/packages/cli/src/context/mcp/types.ts index 3694e3d6..e48d0975 100644 --- a/packages/cli/src/context/mcp/types.ts +++ b/packages/cli/src/context/mcp/types.ts @@ -78,6 +78,8 @@ interface KtxConnectionSummary { id: string; name: string; connectionType: string; + members?: string[]; + hint?: string; } interface KtxConnectionsMcpPort { diff --git a/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json b/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json index 3ffca96b..8a78009f 100644 --- a/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json +++ b/packages/cli/test/context/mcp/__snapshots__/mcp-tools-list.json @@ -2,7 +2,7 @@ { "name": "connection_list", "title": "Connection List", - "description": "List configured read-only data connections available to this ktx project. Use this before connection-scoped tools when the project may have multiple warehouses.", + "description": "List configured read-only data connections available to this ktx project. Use this before connection-scoped tools when the project may have multiple warehouses. A \"_ktx_federated\" entry (when present) queries all its member databases together; use its id for cross-database joins.", "inputSchema": { "type": "object", "properties": {}, @@ -24,6 +24,15 @@ }, "connectionType": { "type": "string" + }, + "members": { + "type": "array", + "items": { + "type": "string" + } + }, + "hint": { + "type": "string" } }, "required": [ diff --git a/packages/cli/test/context/mcp/connection-list-federated.test.ts b/packages/cli/test/context/mcp/connection-list-federated.test.ts new file mode 100644 index 00000000..f09b11c9 --- /dev/null +++ b/packages/cli/test/context/mcp/connection-list-federated.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from 'vitest'; +import { createLocalProjectMcpContextPorts } from '../../../src/context/mcp/local-project-ports.js'; + +const project = { + projectDir: '/tmp/p', + config: { + connections: { + books_db: { driver: 'sqlite', path: './b.db' }, + reviews_db: { driver: 'sqlite', path: './r.db' }, + }, + }, +} as never; + +describe('MCP connection_list federated entry', () => { + it('includes _ktx_federated with members and hint', async () => { + const ports = createLocalProjectMcpContextPorts(project, { embeddingService: null }); + const list = await ports.connections!.list(); + const federated = list.find((c) => c.id === '_ktx_federated'); + expect(federated).toBeDefined(); + expect(federated!.connectionType).toBe('DUCKDB'); + expect(federated!.members).toEqual(['books_db', 'reviews_db']); + expect(federated!.hint).toContain('Cross-database'); + }); + + it('omits _ktx_federated with a single connection', async () => { + const single = { + projectDir: '/tmp/p', + config: { connections: { books_db: { driver: 'sqlite', path: './b.db' } } }, + } as never; + const ports = createLocalProjectMcpContextPorts(single, { embeddingService: null }); + const list = await ports.connections!.list(); + expect(list.find((c) => c.id === '_ktx_federated')).toBeUndefined(); + }); +}); From 1cf35f4875ac6baa97246c059a24a01b96aca62d Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 19:42:00 +0700 Subject: [PATCH 42/46] test(federation): ktx sql federated cross-file join end-to-end Drive runKtxSql with the real federated DuckDB executor against two on-disk sqlite files, stubbing only SQL validation. The test surfaced that the JSON output path could not serialize bigint values DuckDB returns for integer columns; printJson now coerces bigint to JSON numbers, matching the plain/pretty paths. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/cli/src/sql.ts | 5 +- .../test/sql-federated.integration.test.ts | 90 +++++++++++++++++++ 2 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 packages/cli/test/sql-federated.integration.test.ts diff --git a/packages/cli/src/sql.ts b/packages/cli/src/sql.ts index 556e3821..24833aa4 100644 --- a/packages/cli/src/sql.ts +++ b/packages/cli/src/sql.ts @@ -74,7 +74,10 @@ function formatValue(value: unknown): string { } function printJson(output: SqlExecutionOutput, io: KtxCliIo): void { - io.stdout.write(`${JSON.stringify(output, null, 2)}\n`); + // DuckDB-backed results carry integer columns as bigint, which JSON.stringify + // cannot serialize; emit them as JSON numbers to match the plain/pretty paths. + const json = JSON.stringify(output, (_key, value) => (typeof value === 'bigint' ? Number(value) : value), 2); + io.stdout.write(`${json}\n`); } function printPlain(output: SqlExecutionOutput, io: KtxCliIo): void { diff --git a/packages/cli/test/sql-federated.integration.test.ts b/packages/cli/test/sql-federated.integration.test.ts new file mode 100644 index 00000000..c9190a9b --- /dev/null +++ b/packages/cli/test/sql-federated.integration.test.ts @@ -0,0 +1,90 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import Database from 'better-sqlite3'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { parseKtxProjectConfig, serializeKtxProjectConfig } from '../src/context/project/config.js'; +import { initKtxProject } from '../src/context/project/project.js'; +import type { SqlAnalysisPort } from '../src/context/sql-analysis/ports.js'; +import type { KtxCliIo } from '../src/cli-runtime.js'; +import { runKtxSql } from '../src/sql.js'; + +function fakeIo(): { io: KtxCliIo; out: () => string; err: () => string } { + let out = ''; + let err = ''; + return { + io: { + stdout: { write: (chunk: string) => ((out += chunk), true) }, + stderr: { write: (chunk: string) => ((err += chunk), true) }, + } as unknown as KtxCliIo, + out: () => out, + err: () => err, + }; +} + +// Validation needs the Python daemon, unavailable in unit tests; execution is real. +const stubSqlAnalysis: SqlAnalysisPort = { + analyzeForFingerprint: async () => ({ fingerprint: '', normalizedSql: '', tablesTouched: [], literalSlots: [] }), + analyzeBatch: async () => new Map([['cli-sql', { tablesTouched: [], columnsByClause: {} }]]), + validateReadOnly: async () => ({ ok: true, error: null }), +}; + +describe('ktx sql federated integration', () => { + let dir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'ktx-fed-int-')); + }); + + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it('joins books and reviews across two sqlite files', async () => { + const projectDir = join(dir, 'project'); + await initKtxProject({ projectDir }); + + const books = new Database(join(projectDir, 'books.db')); + books.exec("CREATE TABLE books (id INTEGER PRIMARY KEY, title TEXT); INSERT INTO books VALUES (1, 'Clean Code');"); + books.close(); + const reviews = new Database(join(projectDir, 'reviews.db')); + reviews.exec('CREATE TABLE reviews (id INTEGER PRIMARY KEY, book_id INTEGER, rating INTEGER); INSERT INTO reviews VALUES (1, 1, 5);'); + reviews.close(); + + const config = parseKtxProjectConfig(await readFile(join(projectDir, 'ktx.yaml'), 'utf-8')); + await writeFile( + join(projectDir, 'ktx.yaml'), + serializeKtxProjectConfig({ + ...config, + connections: { + books_db: { driver: 'sqlite', path: 'books.db' }, + reviews_db: { driver: 'sqlite', path: 'reviews.db' }, + }, + }), + 'utf-8', + ); + + const { io, out, err } = fakeIo(); + const code = await runKtxSql( + { + command: 'execute', + projectDir, + connectionId: '_ktx_federated', + sql: 'SELECT b.title, r.rating FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id', + maxRows: 100, + json: true, + cliVersion: 'test', + }, + io, + { createSqlAnalysis: () => stubSqlAnalysis }, + ); + + expect(code, err()).toBe(0); + const payload = JSON.parse(out()) as { connectionId: string; headers: string[]; rows: unknown[][] }; + expect(payload.connectionId).toBe('_ktx_federated'); + expect(payload.headers).toEqual(['title', 'rating']); + expect(payload.rows).toHaveLength(1); + expect(payload.rows[0][0]).toBe('Clean Code'); + expect(Number(payload.rows[0][1])).toBe(5); + }); +}); From a75a93b3a62a7bcc4903c9555168ce801575dc64 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 20:17:36 +0700 Subject: [PATCH 43/46] docs(federation): document direct _ktx_federated query surface Co-Authored-By: Claude Opus 4.8 (1M context) --- .../concepts/cross-database-federation.mdx | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs-site/content/docs/concepts/cross-database-federation.mdx b/docs-site/content/docs/concepts/cross-database-federation.mdx index e9a07dc5..6ca396ff 100644 --- a/docs-site/content/docs/concepts/cross-database-federation.mdx +++ b/docs-site/content/docs/concepts/cross-database-federation.mdx @@ -103,6 +103,30 @@ The `to:` value is the fully qualified federated table reference. The `on:` value is the join predicate expressed in terms of the tables' own column names. The `relationship:` value tells **ktx** how to aggregate safely across the join. +## Querying the federated connection directly + +Beyond declared joins, the federated connection is addressable by its id, +`_ktx_federated`, anywhere **ktx** runs read-only SQL. The same id works for the +`ktx sql` command and for a data agent calling the `sql_execution` MCP tool, so +both surfaces can run a cross-database query without a source file: + +```bash +ktx sql -c _ktx_federated \ + "SELECT b.title, avg(r.rating) AS avg_rating + FROM pg_books.public.books b + JOIN sqlite_reviews.reviews r ON b.id = r.book_id + GROUP BY b.title" +``` + +Table names follow the rules from +[Table naming in federated queries](#table-naming-in-federated-queries): +three-part `connectionId.schema.table` for postgres and mysql, two-part +`connectionId.table` for sqlite. The `_ktx_federated` id is virtual — it is +never written to `ktx.yaml` and only exists when two or more attach-compatible +connections are declared. It surfaces in `ktx connection` and in the agent's +connection list so the id is discoverable. Querying a single member database +directly with its own connection id (`ktx sql -c pg_books ...`) is unchanged. + ## Federated queries are read-only DuckDB attaches every member database with read-only access. Federated queries From 31aa9069e4b4022b191e6ec47dad2c9250c7c3f1 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 20:30:32 +0700 Subject: [PATCH 44/46] fix(federation): coerce DuckDB bigint to number in shared federated executor DuckDB returns integer columns as JS bigint, which JSON.stringify cannot serialize. The CLI --json path worked around this with a replacer, but the MCP sql_execution tool serializes via plain JSON.stringify and crashed on any federated query selecting an integer column. Coerce bigint to Number once in executeFederatedQuery so every consumer (CLI, MCP, ingest, SL) gets a JSON-safe result, and remove the now-redundant CLI replacer. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../connectors/duckdb/federated-executor.ts | 9 +++- packages/cli/src/sql.ts | 5 +-- .../duckdb/federated-join.integration.test.ts | 37 +++++++++++++++ ...roject-ports-federated.integration.test.ts | 45 +++++++++++++++++++ 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-executor.ts b/packages/cli/src/connectors/duckdb/federated-executor.ts index 508f53aa..8dcddb6d 100644 --- a/packages/cli/src/connectors/duckdb/federated-executor.ts +++ b/packages/cli/src/connectors/duckdb/federated-executor.ts @@ -12,6 +12,13 @@ function quoteDuckdbIdentifier(id: string): string { return `"${id.replaceAll('"', '""')}"`; } +// DuckDB returns integer columns as JS bigint, which JSON.stringify cannot +// serialize. Coerce to Number so every federated-result consumer (CLI, MCP, +// ingest, SL) gets a JSON-safe value. Integers beyond 2^53 lose precision. +function toJsonSafeRows(rows: unknown[][]): unknown[][] { + return rows.map((row) => row.map((cell) => (typeof cell === 'bigint' ? Number(cell) : cell))); +} + /** @internal */ export function buildAttachStatements(members: FederatedMember[], env: NodeJS.ProcessEnv): string[] { const attachments = members.map((member) => ({ @@ -46,7 +53,7 @@ export async function executeFederatedQuery( await connection.run(statement); } const reader = await connection.runAndReadAll(sql); - const rows = normalizeQueryRows(reader.getRows()); + const rows = toJsonSafeRows(normalizeQueryRows(reader.getRows())); const headers = reader.columnNames(); return { headers, diff --git a/packages/cli/src/sql.ts b/packages/cli/src/sql.ts index 24833aa4..556e3821 100644 --- a/packages/cli/src/sql.ts +++ b/packages/cli/src/sql.ts @@ -74,10 +74,7 @@ function formatValue(value: unknown): string { } function printJson(output: SqlExecutionOutput, io: KtxCliIo): void { - // DuckDB-backed results carry integer columns as bigint, which JSON.stringify - // cannot serialize; emit them as JSON numbers to match the plain/pretty paths. - const json = JSON.stringify(output, (_key, value) => (typeof value === 'bigint' ? Number(value) : value), 2); - io.stdout.write(`${json}\n`); + io.stdout.write(`${JSON.stringify(output, null, 2)}\n`); } function printPlain(output: SqlExecutionOutput, io: KtxCliIo): void { diff --git a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts index 087f6618..7011cc92 100644 --- a/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-join.integration.test.ts @@ -49,6 +49,43 @@ describe('federated cross-catalog join (live DuckDB)', () => { } }); + it('returns integer columns as JSON-safe numbers, not bigint', async () => { + const dir = mkdtempSync(join(tmpdir(), 'ktx-fed-bigint-')); + const booksPath = join(dir, 'books.db'); + const reviewsPath = join(dir, 'reviews.db'); + + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune'), (2, 'Foundation');"); + books.close(); + + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 4), (2, 2);'); + reviews.close(); + + const members: FederatedMember[] = [ + { connectionId: 'books_db', driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: booksPath } }, + { connectionId: 'reviews_db', driver: 'sqlite', projectDir: dir, connection: { driver: 'sqlite', path: reviewsPath } }, + ]; + + try { + const result = await executeFederatedQuery(members, { + connectionId: '_ktx_federated', + connection: undefined, + sql: 'SELECT b.id, count(*) AS n FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.id ORDER BY b.id', + }); + for (const row of result.rows) { + for (const cell of row) { + expect(typeof cell).not.toBe('bigint'); + } + } + expect(() => JSON.stringify(result)).not.toThrow(); + expect(result.rows[0][0]).toBe(1); + expect(Number(result.rows[0][1])).toBeGreaterThan(0); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + it('joins catalogs whose connection ids contain hyphens', async () => { const dir = mkdtempSync(join(tmpdir(), 'ktx-fed-hyphen-')); const booksPath = join(dir, 'books.db'); diff --git a/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts b/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts index 5e56fd90..362406df 100644 --- a/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts +++ b/packages/cli/test/context/mcp/local-project-ports-federated.integration.test.ts @@ -51,4 +51,49 @@ describe('MCP sql_execution — federated routing (live DuckDB)', () => { await rm(dir, { recursive: true, force: true }); } }); + + it('serializes integer columns from a federated query without throwing on bigint', async () => { + const dir = await mkdtemp(join(tmpdir(), 'ktx-mcp-fed-int-')); + try { + const booksPath = join(dir, 'books.db'); + const reviewsPath = join(dir, 'reviews.db'); + const books = new Database(booksPath); + books.exec("CREATE TABLE books (id INTEGER, title TEXT); INSERT INTO books VALUES (1, 'Dune');"); + books.close(); + const reviews = new Database(reviewsPath); + reviews.exec('CREATE TABLE reviews (book_id INTEGER, stars INTEGER); INSERT INTO reviews VALUES (1, 5), (1, 3);'); + reviews.close(); + + const project = await initKtxProject({ projectDir: dir }); + project.config.connections.books_db = { driver: 'sqlite', path: booksPath }; + project.config.connections.reviews_db = { driver: 'sqlite', path: reviewsPath }; + + const validateReadOnly = vi.fn(async () => ({ ok: true, error: null })); + const ports = createLocalProjectMcpContextPorts(project, { + sqlAnalysis: { + analyzeForFingerprint: vi.fn(), + analyzeBatch: vi.fn(), + validateReadOnly, + } as never, + localScan: { + createConnector: () => { + throw new Error('federated path must not create a scan connector'); + }, + }, + embeddingService: null, + }); + + const result = await ports.sqlExecution?.execute({ + connectionId: '_ktx_federated', + sql: 'SELECT b.title, count(*) AS n FROM books_db.books b JOIN reviews_db.reviews r ON b.id = r.book_id GROUP BY b.title', + maxRows: 100, + }); + + expect(() => JSON.stringify(result)).not.toThrow(); + expect(result?.rows?.[0]?.[0]).toBe('Dune'); + expect(result?.rows?.[0]?.[1]).toBe(2); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); }); From f9a6f0db4845935c4053b396b2778aae511d0e3f Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sat, 13 Jun 2026 21:30:48 +0700 Subject: [PATCH 45/46] refactor(federation): simplify driver map and collapse forked MCP SQL path - Replace the identity-valued ATTACH_TYPE_BY_DRIVER record with a ATTACH_COMPATIBLE_DRIVERS Set; the driver name doubles as the attach type, so the map encoded nothing beyond membership. - Switch federatedAttachTarget directly on the driver with a default throw, dropping the unreachable post-switch throw and its comment. - Route the MCP sql_execution standard-connection case through the shared executeProjectReadOnlySql instead of reimplementing the connector create/capability-check/execute/cleanup ceremony, so federated and standard connections share one execution path. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/connectors/duckdb/federated-attach.ts | 9 +- .../cli/src/context/connections/federation.ts | 20 ++-- .../src/context/mcp/local-project-ports.ts | 113 ++++++------------ 3 files changed, 49 insertions(+), 93 deletions(-) diff --git a/packages/cli/src/connectors/duckdb/federated-attach.ts b/packages/cli/src/connectors/duckdb/federated-attach.ts index 3a97d0e9..1631952a 100644 --- a/packages/cli/src/connectors/duckdb/federated-attach.ts +++ b/packages/cli/src/connectors/duckdb/federated-attach.ts @@ -4,7 +4,7 @@ import { mysqlConnectionPoolConfigFromConfig, type KtxMysqlConnectionConfig, } from '../mysql/connector.js'; -import { attachTypeForDriver, type FederatedMember } from '../../context/connections/federation.js'; +import type { FederatedMember } from '../../context/connections/federation.js'; function kvKeyword(value: string): string { // libpq/DuckDB key-value values quote with single quotes and backslash-escape. @@ -62,9 +62,7 @@ function mysqlAttachString(member: FederatedMember, env: NodeJS.ProcessEnv): str * resolver so federation and standalone scans agree on config interpretation. */ export function federatedAttachTarget(member: FederatedMember, env: NodeJS.ProcessEnv): string { - // attachTypeForDriver throws on unsupported drivers, so the cases below are - // exhaustive; the trailing throw exists only to satisfy the string return type. - switch (attachTypeForDriver(member.driver)) { + switch (member.driver.toLowerCase()) { case 'sqlite': return sqliteDatabasePathFromConfig({ connectionId: member.connectionId, @@ -75,6 +73,7 @@ export function federatedAttachTarget(member: FederatedMember, env: NodeJS.Proce return postgresAttachString(member, env); case 'mysql': return mysqlAttachString(member, env); + default: + throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`); } - throw new Error(`Driver "${member.driver}" cannot be attached by DuckDB federation.`); } diff --git a/packages/cli/src/context/connections/federation.ts b/packages/cli/src/context/connections/federation.ts index 7a712c69..7718e300 100644 --- a/packages/cli/src/context/connections/federation.ts +++ b/packages/cli/src/context/connections/federation.ts @@ -4,22 +4,18 @@ import type { KtxProjectConnectionConfig } from '../project/config.js'; export const FEDERATED_CONNECTION_ID = '_ktx_federated'; /** - * Maps each attach-compatible driver to the DuckDB extension that attaches it. - * The keys are the single source of truth for federation membership: a driver - * participates iff it appears here. + * Drivers DuckDB can ATTACH for federation. The driver name doubles as the + * DuckDB extension/TYPE name, so this set is the single source of truth for + * both membership (a driver participates iff it appears here) and attach type. */ -const ATTACH_TYPE_BY_DRIVER: Record = { - postgres: 'postgres', - mysql: 'mysql', - sqlite: 'sqlite', -}; +const ATTACH_COMPATIBLE_DRIVERS = new Set(['postgres', 'mysql', 'sqlite']); export function attachTypeForDriver(driver: string): string { - const type = ATTACH_TYPE_BY_DRIVER[driver.toLowerCase()]; - if (!type) { + const normalized = driver.toLowerCase(); + if (!ATTACH_COMPATIBLE_DRIVERS.has(normalized)) { throw new Error(`Driver "${driver}" cannot be attached by DuckDB federation.`); } - return type; + return normalized; } export interface FederatedMember { @@ -45,7 +41,7 @@ export function deriveFederatedConnection( projectDir: string, ): FederatedConnectionDescriptor | null { const members: FederatedMember[] = Object.entries(connections) - .filter(([, config]) => config.driver.toLowerCase() in ATTACH_TYPE_BY_DRIVER) + .filter(([, config]) => ATTACH_COMPATIBLE_DRIVERS.has(config.driver.toLowerCase())) .map(([connectionId, config]) => ({ connectionId, driver: config.driver.toLowerCase(), diff --git a/packages/cli/src/context/mcp/local-project-ports.ts b/packages/cli/src/context/mcp/local-project-ports.ts index 3be1e0ff..99af73d4 100644 --- a/packages/cli/src/context/mcp/local-project-ports.ts +++ b/packages/cli/src/context/mcp/local-project-ports.ts @@ -10,7 +10,6 @@ import type { KtxEmbeddingPort } from '../../context/core/embedding.js'; import type { KtxSemanticLayerComputePort } from '../../context/daemon/semantic-layer-compute.js'; import type { KtxLocalProject } from '../../context/project/project.js'; import { createKtxEntityDetailsService } from '../../context/scan/entity-details.js'; -import type { KtxScanConnector } from '../../context/scan/types.js'; import type { LocalScanMcpOptions } from '../../context/scan/local-scan.js'; import { createKtxDiscoverDataService } from '../../context/search/discover.js'; import { sqlAnalysisDialectForDriver } from '../../context/sql-analysis/dialect.js'; @@ -30,12 +29,6 @@ interface CreateLocalProjectMcpContextPortsOptions { embeddingService: KtxEmbeddingPort | null; } -async function cleanupConnector(connector: KtxScanConnector | null): Promise { - if (connector?.cleanup) { - await connector.cleanup(); - } -} - async function executeValidatedReadOnlySql( project: KtxLocalProject, options: CreateLocalProjectMcpContextPortsOptions, @@ -51,82 +44,50 @@ async function executeValidatedReadOnlySql( throw new Error('sql_execution requires a local scan connector factory.'); } - if (input.connectionId === FEDERATED_CONNECTION_ID) { - const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver('duckdb')); - if (!validation.ok) { - throw new Error(validation.error ?? 'SQL is not read-only.'); - } - await onProgress?.({ progress: 0.3, message: 'Executing' }); - const result = await executeProjectReadOnlySql({ - project, - input: { - connectionId: input.connectionId, - projectDir: project.projectDir, - connection: undefined, - sql: input.sql, - maxRows: input.maxRows, - }, - createConnector, - runId: 'mcp-sql-execution', - }).catch((error: unknown) => { - if (isNativeProgrammingFault(error)) { - throw error; - } - throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error }); - }); - const rowCount = result.rowCount ?? result.rows.length; - await onProgress?.({ progress: 1, message: `Fetched ${rowCount} rows` }); - return { headers: result.headers, rows: result.rows, rowCount }; - } - - const connectionId = assertSafeConnectionId(input.connectionId); - const connection = project.config.connections[connectionId]; - if (!connection) { + const isFederated = input.connectionId === FEDERATED_CONNECTION_ID; + const connectionId = isFederated ? input.connectionId : assertSafeConnectionId(input.connectionId); + const connection = isFederated ? undefined : project.config.connections[connectionId]; + if (!isFederated && !connection) { throw new Error(`Connection "${connectionId}" is not configured in ktx.yaml`); } - const validation = await options.sqlAnalysis.validateReadOnly(input.sql, sqlAnalysisDialectForDriver(connection.driver)); + + const dialect = sqlAnalysisDialectForDriver(isFederated ? 'duckdb' : connection!.driver); + const validation = await options.sqlAnalysis.validateReadOnly(input.sql, dialect); if (!validation.ok) { throw new Error(validation.error ?? 'SQL is not read-only.'); } - let connector: KtxScanConnector | null = null; - try { - connector = await createConnector(connectionId); - if (!connector.capabilities.readOnlySql || !connector.executeReadOnly) { - throw new Error(`Connection "${connectionId}" does not support read-only SQL execution.`); + await onProgress?.({ progress: 0.3, message: 'Executing' }); + const result = await executeProjectReadOnlySql({ + project, + input: { + connectionId, + projectDir: project.projectDir, + connection, + sql: input.sql, + maxRows: input.maxRows, + }, + createConnector, + runId: 'mcp-sql-execution', + }).catch((error: unknown) => { + // A warehouse/driver rejection (e.g. the agent's SQL failed to compile) is a + // surfaced operational outcome, not a ktx fault: mark it expected while + // preserving the warehouse's own diagnostics. A native JS error (TypeError, + // etc.) signals a bug in connector code — let it propagate unchanged so Error + // Tracking still sees it. + if (isNativeProgrammingFault(error)) { + throw error; } - await onProgress?.({ progress: 0.3, message: 'Executing' }); - const result = await connector - .executeReadOnly( - { - connectionId, - sql: input.sql, - maxRows: input.maxRows, - }, - { runId: 'mcp-sql-execution' }, - ) - .catch((error: unknown) => { - // A warehouse/driver rejection (e.g. the agent's SQL failed to compile) - // is a surfaced operational outcome, not a ktx fault: mark it expected - // while preserving the warehouse's own diagnostics. A native JS error - // (TypeError, etc.) signals a bug in connector code — let it propagate - // unchanged so Error Tracking still sees it. - if (isNativeProgrammingFault(error)) { - throw error; - } - throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error }); - }); - const response = { - headers: result.headers, - ...(result.headerTypes ? { headerTypes: result.headerTypes } : {}), - rows: result.rows, - rowCount: result.rowCount ?? result.rows.length, - }; - await onProgress?.({ progress: 1, message: `Fetched ${response.rowCount} rows` }); - return response; - } finally { - await cleanupConnector(connector); - } + throw new KtxQueryError(error instanceof Error ? error.message : String(error), { cause: error }); + }); + const response = { + headers: result.headers, + ...(result.headerTypes ? { headerTypes: result.headerTypes } : {}), + rows: result.rows, + rowCount: result.rowCount ?? result.rows.length, + }; + await onProgress?.({ progress: 1, message: `Fetched ${response.rowCount} rows` }); + return response; } export function createLocalProjectMcpContextPorts( From a3b71f863b59aabab7cee2897056faa1521267d8 Mon Sep 17 00:00:00 2001 From: Kevin Messiaen <114553769+kevinmessiaen@users.noreply.github.com> Date: Sun, 14 Jun 2026 13:35:03 +0700 Subject: [PATCH 46/46] chore(federation): allowlist placeholder credentials for detect-secrets The federation doc example URL and the federated-attach test fixtures use literal placeholder credentials that trip detect-secrets. Mark them with line-scoped pragma allowlist comments so a real secret added later is still caught. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs-site/content/docs/concepts/cross-database-federation.mdx | 2 +- packages/cli/test/connectors/duckdb/federated-attach.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs-site/content/docs/concepts/cross-database-federation.mdx b/docs-site/content/docs/concepts/cross-database-federation.mdx index 6ca396ff..b0627adc 100644 --- a/docs-site/content/docs/concepts/cross-database-federation.mdx +++ b/docs-site/content/docs/concepts/cross-database-federation.mdx @@ -43,7 +43,7 @@ A minimal `ktx.yaml` that triggers federation: connections: - id: pg_books driver: postgres - url: "postgres://user:pass@localhost:5432/books" + url: "postgres://user:pass@localhost:5432/books" # pragma: allowlist secret - id: sqlite_reviews driver: sqlite path: ./data/reviews.db diff --git a/packages/cli/test/connectors/duckdb/federated-attach.test.ts b/packages/cli/test/connectors/duckdb/federated-attach.test.ts index a3e492fa..98bfeb2e 100644 --- a/packages/cli/test/connectors/duckdb/federated-attach.test.ts +++ b/packages/cli/test/connectors/duckdb/federated-attach.test.ts @@ -78,11 +78,11 @@ describe('federatedAttachTarget', () => { const target = federatedAttachTarget( member({ driver: 'mysql', - connection: { driver: 'mysql', host: 'h', database: 'app', username: 'u', password: 'pass word' }, + connection: { driver: 'mysql', host: 'h', database: 'app', username: 'u', password: 'pass word' }, // pragma: allowlist secret }), {}, ); - expect(target).toContain("password='pass word'"); + expect(target).toContain("password='pass word'"); // pragma: allowlist secret }); it('emits sslmode=require for a postgres member configured with discrete fields and ssl', () => {