diff --git a/services/actions/src/rpc/smartGenSchemas.js b/services/actions/src/rpc/smartGenSchemas.js index dc75c706..bcf5d6a2 100644 --- a/services/actions/src/rpc/smartGenSchemas.js +++ b/services/actions/src/rpc/smartGenSchemas.js @@ -1,5 +1,6 @@ import apiError from "../utils/apiError.js"; import cubejsApi from "../utils/cubejsApi.js"; +import { invalidateUserCache } from "../utils/cubeCache.js"; export default async (session, input, headers) => { const { @@ -44,6 +45,12 @@ export default async (session, input, headers) => { selected_columns: selectedColumns, }); + // Ensure subsequent Explore/Meta requests resolve the latest branch version + // immediately after model generation instead of waiting for cache TTL expiry. + if (userId) { + invalidateUserCache(userId); + } + return result; } catch (err) { return apiError(err); diff --git a/services/cubejs/src/routes/smartGenerate.js b/services/cubejs/src/routes/smartGenerate.js index 9dd12608..2c3fb659 100644 --- a/services/cubejs/src/routes/smartGenerate.js +++ b/services/cubejs/src/routes/smartGenerate.js @@ -14,6 +14,59 @@ import { deserializeProfile } from '../utils/smart-generation/profileSerializer. import { diffModels, parseCubesFromJs } from '../utils/smart-generation/diffModels.js'; import { validateModelSyntax, smokeTestQuery } from '../utils/smart-generation/modelValidator.js'; +function reorderProfileColumns(profiledTable) { + if (!profiledTable?.columns || !(profiledTable.columns instanceof Map)) return profiledTable; + const ordered = new Map(); + const seen = new Set(); + const preferred = Array.isArray(profiledTable.columnOrder) ? profiledTable.columnOrder : []; + + for (const colName of preferred) { + if (profiledTable.columns.has(colName)) { + ordered.set(colName, profiledTable.columns.get(colName)); + seen.add(colName); + } + } + + for (const [colName, colData] of profiledTable.columns) { + if (seen.has(colName)) continue; + ordered.set(colName, colData); + preferred.push(colName); + } + + return { + ...profiledTable, + columns: ordered, + columnOrder: preferred, + }; +} + +async function hydrateColumnOrderFromClickHouse(driver, profiledTable, schema, table) { + if (!driver || !profiledTable?.columns || !(profiledTable.columns instanceof Map)) { + return profiledTable; + } + const hasStableOrder = + Array.isArray(profiledTable.columnOrder) + && profiledTable.columnOrder.length > 0 + && profiledTable.columnOrder.length >= profiledTable.columns.size; + if (hasStableOrder) { + return reorderProfileColumns(profiledTable); + } + + try { + const rows = await driver.query( + `SELECT name FROM system.columns WHERE database = '${schema}' AND table = '${table}' ORDER BY position` + ); + const ddlOrder = rows.map((r) => r.name).filter((name) => profiledTable.columns.has(name)); + if (ddlOrder.length > 0) { + return reorderProfileColumns({ ...profiledTable, columnOrder: ddlOrder }); + } + } catch (err) { + console.warn(`[smartGenerate] Column order hydration failed (non-fatal): ${err.message}`); + } + + return reorderProfileColumns(profiledTable); +} + export default async (req, res, cubejs) => { const { securityContext } = req; const { @@ -75,6 +128,9 @@ export default async (req, res, cubejs) => { const deserialized = deserializeProfile(profileData); profiledTable = deserialized.profiledTable; primaryKeys = deserialized.primaryKeys; + // Ensure column order is stable even when profile_data transport reorders object keys. + driver = await cubejs.options.driverFactory({ securityContext }); + profiledTable = await hydrateColumnOrderFromClickHouse(driver, profiledTable, schema, table); } else { // Legacy path: profile from scratch (two ClickHouse round-trips) driver = await cubejs.options.driverFactory({ securityContext }); @@ -89,6 +145,7 @@ export default async (req, res, cubejs) => { emitter.emit('primary_keys', 'Detecting primary keys...', 0.5); primaryKeys = await detectPrimaryKeys(driver, schema, table); + profiledTable = reorderProfileColumns(profiledTable); } // Filter columns if user selected a subset @@ -100,7 +157,11 @@ export default async (req, res, cubejs) => { filtered.set(name, data); } } - profiledTable = { ...profiledTable, columns: filtered }; + const existingOrder = Array.isArray(profiledTable.columnOrder) ? profiledTable.columnOrder : []; + const filteredOrder = existingOrder.length > 0 + ? existingOrder.filter((name) => selectedColumns.has(name)) + : Array.from(filtered.keys()); + profiledTable = { ...profiledTable, columns: filtered, columnOrder: filteredOrder }; } // Build cubes diff --git a/services/cubejs/src/utils/smart-generation/cubeBuilder.js b/services/cubejs/src/utils/smart-generation/cubeBuilder.js index 9ba32107..769fcfe6 100644 --- a/services/cubejs/src/utils/smart-generation/cubeBuilder.js +++ b/services/cubejs/src/utils/smart-generation/cubeBuilder.js @@ -285,7 +285,14 @@ function isInt8Boolean(rawType, profile) { * @returns {{ dimensions: object[], measures: object[], mapKeysDiscovered: number, columnsProfiled: number, columnsSkipped: number }} */ function processColumns(columns, options) { - const { arrayJoinColumns = [], maxMapKeys = 500, primaryKeys = [], cubeName = 'cube', columnDescriptions = new Map() } = options; + const { + arrayJoinColumns = [], + maxMapKeys = 500, + primaryKeys = [], + cubeName = 'cube', + columnDescriptions = new Map(), + columnOrder = [], + } = options; const arrayJoinColumnNames = arrayJoinColumns.map((a) => a.column); const allFields = []; @@ -562,6 +569,37 @@ function processColumns(columns, options) { // Deduplicate field names deduplicateFields(allFields); + // Final ordering guard: keep generated fields in DDL column order. + // This protects against accidental ordering drift in upstream payloads. + const columnIndex = new Map(); + if (Array.isArray(columnOrder) && columnOrder.length > 0) { + for (let i = 0; i < columnOrder.length; i++) { + columnIndex.set(columnOrder[i], i); + } + } else { + let idx = 0; + for (const colName of columns.keys()) { + columnIndex.set(colName, idx++); + } + } + + const fallbackIndex = Number.MAX_SAFE_INTEGER; + allFields + .map((field, idx) => ({ + field, + idx, + order: columnIndex.has(field._sourceColumn) + ? columnIndex.get(field._sourceColumn) + : fallbackIndex, + })) + .sort((a, b) => { + if (a.order === b.order) return a.idx - b.idx; + return a.order - b.order; + }) + .forEach((entry, i) => { + allFields[i] = entry.field; + }); + const dimensions = []; const measures = []; @@ -619,6 +657,7 @@ function buildRawCube(profiledTable, options) { maxMapKeys, primaryKeys, cubeName, + columnOrder: profiledTable.columnOrder || [], columnDescriptions: profiledTable.columnDescriptions || new Map(), }); diff --git a/services/cubejs/src/utils/smart-generation/merger.js b/services/cubejs/src/utils/smart-generation/merger.js index 63d45e7f..06890400 100644 --- a/services/cubejs/src/utils/smart-generation/merger.js +++ b/services/cubejs/src/utils/smart-generation/merger.js @@ -209,7 +209,27 @@ function mergeFields(existingFields, newFields, keepStale) { } } - return merged; + // Preserve DDL-based ordering from the incoming generation for all fields + // that exist in the incoming set. Fields not present in incoming (e.g. stale + // auto fields, preserved user/AI fields) keep relative order and are placed after. + const incomingOrder = new Map(); + for (let i = 0; i < incoming.length; i++) { + incomingOrder.set(incoming[i].name, i); + } + + return merged + .map((field, idx) => ({ + field, + idx, + order: incomingOrder.has(field.name) + ? incomingOrder.get(field.name) + : Number.MAX_SAFE_INTEGER, + })) + .sort((a, b) => { + if (a.order === b.order) return a.idx - b.idx; + return a.order - b.order; + }) + .map((entry) => entry.field); } // --------------------------------------------------------------------------- diff --git a/services/cubejs/src/utils/smart-generation/profileSerializer.js b/services/cubejs/src/utils/smart-generation/profileSerializer.js index e45dc149..b7e5bd29 100644 --- a/services/cubejs/src/utils/smart-generation/profileSerializer.js +++ b/services/cubejs/src/utils/smart-generation/profileSerializer.js @@ -21,13 +21,34 @@ export function serializeProfile(profiledTable, primaryKeys) { } const columns = {}; + const columnOrder = []; + const preferredOrder = Array.isArray(profiledTable.columnOrder) ? profiledTable.columnOrder : null; + + if (preferredOrder && profiledTable.columns && typeof profiledTable.columns === 'object') { + for (const key of preferredOrder) { + const value = profiledTable.columns instanceof Map + ? profiledTable.columns.get(key) + : profiledTable.columns[key]; + if (value !== undefined) { + columns[key] = value; + columnOrder.push(key); + } + } + } + if (profiledTable.columns instanceof Map) { for (const [key, value] of profiledTable.columns) { + if (Object.prototype.hasOwnProperty.call(columns, key)) continue; columns[key] = value; + columnOrder.push(key); } } else if (profiledTable.columns && typeof profiledTable.columns === 'object') { // Already a plain object — pass through - Object.assign(columns, profiledTable.columns); + for (const [key, value] of Object.entries(profiledTable.columns)) { + if (Object.prototype.hasOwnProperty.call(columns, key)) continue; + columns[key] = value; + columnOrder.push(key); + } } const columnDescriptions = {}; @@ -47,6 +68,7 @@ export function serializeProfile(profiledTable, primaryKeys) { sampled: profiledTable.sampled, sample_size: profiledTable.sample_size, columns, + columnOrder, columnDescriptions, }, primaryKeys: primaryKeys || [], @@ -68,7 +90,20 @@ export function deserializeProfile(serialized) { const columns = new Map(); if (src.columns && typeof src.columns === 'object') { + const preferredOrder = Array.isArray(src.columnOrder) ? src.columnOrder : []; + const seen = new Set(); + + // Reconstruct in preserved DDL order first. + for (const key of preferredOrder) { + if (Object.prototype.hasOwnProperty.call(src.columns, key)) { + columns.set(key, src.columns[key]); + seen.add(key); + } + } + + // Include any unexpected keys not present in columnOrder. for (const [key, value] of Object.entries(src.columns)) { + if (seen.has(key)) continue; columns.set(key, value); } } @@ -88,6 +123,7 @@ export function deserializeProfile(serialized) { sampled: src.sampled, sample_size: src.sample_size, columns, + columnOrder: Array.isArray(src.columnOrder) ? src.columnOrder : undefined, columnDescriptions, }, primaryKeys: serialized.primaryKeys || [], diff --git a/services/cubejs/src/utils/smart-generation/profiler.js b/services/cubejs/src/utils/smart-generation/profiler.js index 5a1fcf3f..ce7170c6 100644 --- a/services/cubejs/src/utils/smart-generation/profiler.js +++ b/services/cubejs/src/utils/smart-generation/profiler.js @@ -483,11 +483,20 @@ export async function profileTable(driver, schema, table, options = {}) { return []; }); - const [metaResult, describeRows, tableCommentRows, columnCommentRows] = await Promise.all([ + // Fetch DDL column positions from system.columns to preserve true table order + const columnOrderPromise = driver.query( + `SELECT name, position FROM system.columns WHERE database = '${schema}' AND table = '${table}' ORDER BY position` + ).catch(err => { + console.warn(`[profiler] Column order fetch failed (non-fatal): ${err.message}`); + return []; + }); + + const [metaResult, describeRows, tableCommentRows, columnCommentRows, columnOrderRows] = await Promise.all([ metaPromise, driver.query(`DESCRIBE TABLE ${schema}.\`${table}\``), tableCommentPromise, columnCommentsPromise, + columnOrderPromise, ]); // Build column descriptions map @@ -514,9 +523,29 @@ export async function profileTable(driver, schema, table, options = {}) { tracker.emit('init', `Found ${emptyColumns.size} columns with zero bytes — will skip`, { empty_columns: emptyColumns.size }); } - // Build columns map from DESCRIBE + // Build a stable DDL order map from system.columns.position + const ddlPositionByName = new Map(); + for (const row of columnOrderRows) { + if (row?.name != null && row?.position != null) { + ddlPositionByName.set(row.name, Number(row.position)); + } + } + + // Order DESCRIBE rows by DDL position when available (fallback: original order) + const describeRowsWithIndex = describeRows.map((row, index) => ({ row, index })); + describeRowsWithIndex.sort((a, b) => { + const aPos = ddlPositionByName.get(a.row.name); + const bPos = ddlPositionByName.get(b.row.name); + const aOrder = Number.isFinite(aPos) ? aPos : Number.MAX_SAFE_INTEGER; + const bOrder = Number.isFinite(bPos) ? bPos : Number.MAX_SAFE_INTEGER; + if (aOrder !== bOrder) return aOrder - bOrder; + return a.index - b.index; + }); + const columnOrder = describeRowsWithIndex.map(({ row }) => row.name); + + // Build columns map from ordered DESCRIBE rows const columns = new Map(); - for (const row of describeRows) { + for (const { row } of describeRowsWithIndex) { const colName = row.name; const parsed = parseType(row.type, colName); columns.set(colName, { @@ -625,6 +654,7 @@ export async function profileTable(driver, schema, table, options = {}) { sample_size: null, sampling_method: 'none', columns, + columnOrder, tableDescription: null, columnDescriptions: new Map(), filters: normalizedFilters, @@ -1023,6 +1053,7 @@ export async function profileTable(driver, schema, table, options = {}) { sample_size: needsSampling ? Math.min(Math.round(rowCount / SAMPLE_RATIO), SUBQUERY_LIMIT_MAX) : null, sampling_method: needsSampling ? 'subquery_limit' : 'none', columns, + columnOrder, tableDescription, columnDescriptions, filters: normalizedFilters.length > 0 ? normalizedFilters : undefined, diff --git a/services/hasura/metadata/actions.yaml b/services/hasura/metadata/actions.yaml index 0be514d9..8c76cd36 100644 --- a/services/hasura/metadata/actions.yaml +++ b/services/hasura/metadata/actions.yaml @@ -163,6 +163,7 @@ custom_types: - name: SourceTable - name: events_create_input - name: ArrayJoinInput + - name: FilterConditionInput objects: - name: CheckSourceOutput - name: GenSourceSchemaOutput