From ed3a4d1e654c40dac0e7efb00c4deb649cc67b3b Mon Sep 17 00:00:00 2001 From: akshaykumar2505 Date: Tue, 24 Mar 2026 13:05:23 +0530 Subject: [PATCH 1/2] Enhance smart generation and profiling with stable column ordering - Introduced column order preservation in smart generation processes, ensuring that generated fields maintain their DDL order. - Added functionality to hydrate column order from ClickHouse, improving the stability of column arrangements during profiling. - Updated serialization and deserialization processes to include column order, allowing for consistent handling of column arrangements across different operations. - Implemented cache invalidation for user sessions post-model generation to ensure immediate updates in Explore/Meta requests. This update improves the reliability and predictability of column handling in generated models. --- services/actions/src/rpc/smartGenSchemas.js | 7 +++ services/cubejs/src/routes/smartGenerate.js | 63 ++++++++++++++++++- .../src/utils/smart-generation/cubeBuilder.js | 41 +++++++++++- .../src/utils/smart-generation/merger.js | 22 ++++++- .../smart-generation/profileSerializer.js | 38 ++++++++++- .../src/utils/smart-generation/profiler.js | 37 ++++++++++- 6 files changed, 201 insertions(+), 7 deletions(-) diff --git a/services/actions/src/rpc/smartGenSchemas.js b/services/actions/src/rpc/smartGenSchemas.js index dc75c706..bcf5d6a2 100644 --- a/services/actions/src/rpc/smartGenSchemas.js +++ b/services/actions/src/rpc/smartGenSchemas.js @@ -1,5 +1,6 @@ import apiError from "../utils/apiError.js"; import cubejsApi from "../utils/cubejsApi.js"; +import { invalidateUserCache } from "../utils/cubeCache.js"; export default async (session, input, headers) => { const { @@ -44,6 +45,12 @@ export default async (session, input, headers) => { selected_columns: selectedColumns, }); + // Ensure subsequent Explore/Meta requests resolve the latest branch version + // immediately after model generation instead of waiting for cache TTL expiry. + if (userId) { + invalidateUserCache(userId); + } + return result; } catch (err) { return apiError(err); diff --git a/services/cubejs/src/routes/smartGenerate.js b/services/cubejs/src/routes/smartGenerate.js index 9dd12608..2c3fb659 100644 --- a/services/cubejs/src/routes/smartGenerate.js +++ b/services/cubejs/src/routes/smartGenerate.js @@ -14,6 +14,59 @@ import { deserializeProfile } from '../utils/smart-generation/profileSerializer. import { diffModels, parseCubesFromJs } from '../utils/smart-generation/diffModels.js'; import { validateModelSyntax, smokeTestQuery } from '../utils/smart-generation/modelValidator.js'; +function reorderProfileColumns(profiledTable) { + if (!profiledTable?.columns || !(profiledTable.columns instanceof Map)) return profiledTable; + const ordered = new Map(); + const seen = new Set(); + const preferred = Array.isArray(profiledTable.columnOrder) ? profiledTable.columnOrder : []; + + for (const colName of preferred) { + if (profiledTable.columns.has(colName)) { + ordered.set(colName, profiledTable.columns.get(colName)); + seen.add(colName); + } + } + + for (const [colName, colData] of profiledTable.columns) { + if (seen.has(colName)) continue; + ordered.set(colName, colData); + preferred.push(colName); + } + + return { + ...profiledTable, + columns: ordered, + columnOrder: preferred, + }; +} + +async function hydrateColumnOrderFromClickHouse(driver, profiledTable, schema, table) { + if (!driver || !profiledTable?.columns || !(profiledTable.columns instanceof Map)) { + return profiledTable; + } + const hasStableOrder = + Array.isArray(profiledTable.columnOrder) + && profiledTable.columnOrder.length > 0 + && profiledTable.columnOrder.length >= profiledTable.columns.size; + if (hasStableOrder) { + return reorderProfileColumns(profiledTable); + } + + try { + const rows = await driver.query( + `SELECT name FROM system.columns WHERE database = '${schema}' AND table = '${table}' ORDER BY position` + ); + const ddlOrder = rows.map((r) => r.name).filter((name) => profiledTable.columns.has(name)); + if (ddlOrder.length > 0) { + return reorderProfileColumns({ ...profiledTable, columnOrder: ddlOrder }); + } + } catch (err) { + console.warn(`[smartGenerate] Column order hydration failed (non-fatal): ${err.message}`); + } + + return reorderProfileColumns(profiledTable); +} + export default async (req, res, cubejs) => { const { securityContext } = req; const { @@ -75,6 +128,9 @@ export default async (req, res, cubejs) => { const deserialized = deserializeProfile(profileData); profiledTable = deserialized.profiledTable; primaryKeys = deserialized.primaryKeys; + // Ensure column order is stable even when profile_data transport reorders object keys. + driver = await cubejs.options.driverFactory({ securityContext }); + profiledTable = await hydrateColumnOrderFromClickHouse(driver, profiledTable, schema, table); } else { // Legacy path: profile from scratch (two ClickHouse round-trips) driver = await cubejs.options.driverFactory({ securityContext }); @@ -89,6 +145,7 @@ export default async (req, res, cubejs) => { emitter.emit('primary_keys', 'Detecting primary keys...', 0.5); primaryKeys = await detectPrimaryKeys(driver, schema, table); + profiledTable = reorderProfileColumns(profiledTable); } // Filter columns if user selected a subset @@ -100,7 +157,11 @@ export default async (req, res, cubejs) => { filtered.set(name, data); } } - profiledTable = { ...profiledTable, columns: filtered }; + const existingOrder = Array.isArray(profiledTable.columnOrder) ? profiledTable.columnOrder : []; + const filteredOrder = existingOrder.length > 0 + ? existingOrder.filter((name) => selectedColumns.has(name)) + : Array.from(filtered.keys()); + profiledTable = { ...profiledTable, columns: filtered, columnOrder: filteredOrder }; } // Build cubes diff --git a/services/cubejs/src/utils/smart-generation/cubeBuilder.js b/services/cubejs/src/utils/smart-generation/cubeBuilder.js index 9ba32107..769fcfe6 100644 --- a/services/cubejs/src/utils/smart-generation/cubeBuilder.js +++ b/services/cubejs/src/utils/smart-generation/cubeBuilder.js @@ -285,7 +285,14 @@ function isInt8Boolean(rawType, profile) { * @returns {{ dimensions: object[], measures: object[], mapKeysDiscovered: number, columnsProfiled: number, columnsSkipped: number }} */ function processColumns(columns, options) { - const { arrayJoinColumns = [], maxMapKeys = 500, primaryKeys = [], cubeName = 'cube', columnDescriptions = new Map() } = options; + const { + arrayJoinColumns = [], + maxMapKeys = 500, + primaryKeys = [], + cubeName = 'cube', + columnDescriptions = new Map(), + columnOrder = [], + } = options; const arrayJoinColumnNames = arrayJoinColumns.map((a) => a.column); const allFields = []; @@ -562,6 +569,37 @@ function processColumns(columns, options) { // Deduplicate field names deduplicateFields(allFields); + // Final ordering guard: keep generated fields in DDL column order. + // This protects against accidental ordering drift in upstream payloads. + const columnIndex = new Map(); + if (Array.isArray(columnOrder) && columnOrder.length > 0) { + for (let i = 0; i < columnOrder.length; i++) { + columnIndex.set(columnOrder[i], i); + } + } else { + let idx = 0; + for (const colName of columns.keys()) { + columnIndex.set(colName, idx++); + } + } + + const fallbackIndex = Number.MAX_SAFE_INTEGER; + allFields + .map((field, idx) => ({ + field, + idx, + order: columnIndex.has(field._sourceColumn) + ? columnIndex.get(field._sourceColumn) + : fallbackIndex, + })) + .sort((a, b) => { + if (a.order === b.order) return a.idx - b.idx; + return a.order - b.order; + }) + .forEach((entry, i) => { + allFields[i] = entry.field; + }); + const dimensions = []; const measures = []; @@ -619,6 +657,7 @@ function buildRawCube(profiledTable, options) { maxMapKeys, primaryKeys, cubeName, + columnOrder: profiledTable.columnOrder || [], columnDescriptions: profiledTable.columnDescriptions || new Map(), }); diff --git a/services/cubejs/src/utils/smart-generation/merger.js b/services/cubejs/src/utils/smart-generation/merger.js index 63d45e7f..06890400 100644 --- a/services/cubejs/src/utils/smart-generation/merger.js +++ b/services/cubejs/src/utils/smart-generation/merger.js @@ -209,7 +209,27 @@ function mergeFields(existingFields, newFields, keepStale) { } } - return merged; + // Preserve DDL-based ordering from the incoming generation for all fields + // that exist in the incoming set. Fields not present in incoming (e.g. stale + // auto fields, preserved user/AI fields) keep relative order and are placed after. + const incomingOrder = new Map(); + for (let i = 0; i < incoming.length; i++) { + incomingOrder.set(incoming[i].name, i); + } + + return merged + .map((field, idx) => ({ + field, + idx, + order: incomingOrder.has(field.name) + ? incomingOrder.get(field.name) + : Number.MAX_SAFE_INTEGER, + })) + .sort((a, b) => { + if (a.order === b.order) return a.idx - b.idx; + return a.order - b.order; + }) + .map((entry) => entry.field); } // --------------------------------------------------------------------------- diff --git a/services/cubejs/src/utils/smart-generation/profileSerializer.js b/services/cubejs/src/utils/smart-generation/profileSerializer.js index e45dc149..b7e5bd29 100644 --- a/services/cubejs/src/utils/smart-generation/profileSerializer.js +++ b/services/cubejs/src/utils/smart-generation/profileSerializer.js @@ -21,13 +21,34 @@ export function serializeProfile(profiledTable, primaryKeys) { } const columns = {}; + const columnOrder = []; + const preferredOrder = Array.isArray(profiledTable.columnOrder) ? profiledTable.columnOrder : null; + + if (preferredOrder && profiledTable.columns && typeof profiledTable.columns === 'object') { + for (const key of preferredOrder) { + const value = profiledTable.columns instanceof Map + ? profiledTable.columns.get(key) + : profiledTable.columns[key]; + if (value !== undefined) { + columns[key] = value; + columnOrder.push(key); + } + } + } + if (profiledTable.columns instanceof Map) { for (const [key, value] of profiledTable.columns) { + if (Object.prototype.hasOwnProperty.call(columns, key)) continue; columns[key] = value; + columnOrder.push(key); } } else if (profiledTable.columns && typeof profiledTable.columns === 'object') { // Already a plain object — pass through - Object.assign(columns, profiledTable.columns); + for (const [key, value] of Object.entries(profiledTable.columns)) { + if (Object.prototype.hasOwnProperty.call(columns, key)) continue; + columns[key] = value; + columnOrder.push(key); + } } const columnDescriptions = {}; @@ -47,6 +68,7 @@ export function serializeProfile(profiledTable, primaryKeys) { sampled: profiledTable.sampled, sample_size: profiledTable.sample_size, columns, + columnOrder, columnDescriptions, }, primaryKeys: primaryKeys || [], @@ -68,7 +90,20 @@ export function deserializeProfile(serialized) { const columns = new Map(); if (src.columns && typeof src.columns === 'object') { + const preferredOrder = Array.isArray(src.columnOrder) ? src.columnOrder : []; + const seen = new Set(); + + // Reconstruct in preserved DDL order first. + for (const key of preferredOrder) { + if (Object.prototype.hasOwnProperty.call(src.columns, key)) { + columns.set(key, src.columns[key]); + seen.add(key); + } + } + + // Include any unexpected keys not present in columnOrder. for (const [key, value] of Object.entries(src.columns)) { + if (seen.has(key)) continue; columns.set(key, value); } } @@ -88,6 +123,7 @@ export function deserializeProfile(serialized) { sampled: src.sampled, sample_size: src.sample_size, columns, + columnOrder: Array.isArray(src.columnOrder) ? src.columnOrder : undefined, columnDescriptions, }, primaryKeys: serialized.primaryKeys || [], diff --git a/services/cubejs/src/utils/smart-generation/profiler.js b/services/cubejs/src/utils/smart-generation/profiler.js index 5a1fcf3f..ce7170c6 100644 --- a/services/cubejs/src/utils/smart-generation/profiler.js +++ b/services/cubejs/src/utils/smart-generation/profiler.js @@ -483,11 +483,20 @@ export async function profileTable(driver, schema, table, options = {}) { return []; }); - const [metaResult, describeRows, tableCommentRows, columnCommentRows] = await Promise.all([ + // Fetch DDL column positions from system.columns to preserve true table order + const columnOrderPromise = driver.query( + `SELECT name, position FROM system.columns WHERE database = '${schema}' AND table = '${table}' ORDER BY position` + ).catch(err => { + console.warn(`[profiler] Column order fetch failed (non-fatal): ${err.message}`); + return []; + }); + + const [metaResult, describeRows, tableCommentRows, columnCommentRows, columnOrderRows] = await Promise.all([ metaPromise, driver.query(`DESCRIBE TABLE ${schema}.\`${table}\``), tableCommentPromise, columnCommentsPromise, + columnOrderPromise, ]); // Build column descriptions map @@ -514,9 +523,29 @@ export async function profileTable(driver, schema, table, options = {}) { tracker.emit('init', `Found ${emptyColumns.size} columns with zero bytes — will skip`, { empty_columns: emptyColumns.size }); } - // Build columns map from DESCRIBE + // Build a stable DDL order map from system.columns.position + const ddlPositionByName = new Map(); + for (const row of columnOrderRows) { + if (row?.name != null && row?.position != null) { + ddlPositionByName.set(row.name, Number(row.position)); + } + } + + // Order DESCRIBE rows by DDL position when available (fallback: original order) + const describeRowsWithIndex = describeRows.map((row, index) => ({ row, index })); + describeRowsWithIndex.sort((a, b) => { + const aPos = ddlPositionByName.get(a.row.name); + const bPos = ddlPositionByName.get(b.row.name); + const aOrder = Number.isFinite(aPos) ? aPos : Number.MAX_SAFE_INTEGER; + const bOrder = Number.isFinite(bPos) ? bPos : Number.MAX_SAFE_INTEGER; + if (aOrder !== bOrder) return aOrder - bOrder; + return a.index - b.index; + }); + const columnOrder = describeRowsWithIndex.map(({ row }) => row.name); + + // Build columns map from ordered DESCRIBE rows const columns = new Map(); - for (const row of describeRows) { + for (const { row } of describeRowsWithIndex) { const colName = row.name; const parsed = parseType(row.type, colName); columns.set(colName, { @@ -625,6 +654,7 @@ export async function profileTable(driver, schema, table, options = {}) { sample_size: null, sampling_method: 'none', columns, + columnOrder, tableDescription: null, columnDescriptions: new Map(), filters: normalizedFilters, @@ -1023,6 +1053,7 @@ export async function profileTable(driver, schema, table, options = {}) { sample_size: needsSampling ? Math.min(Math.round(rowCount / SAMPLE_RATIO), SUBQUERY_LIMIT_MAX) : null, sampling_method: needsSampling ? 'subquery_limit' : 'none', columns, + columnOrder, tableDescription, columnDescriptions, filters: normalizedFilters.length > 0 ? normalizedFilters : undefined, From 4d5cc71870e7e313554691c3eaa43299eb91b8ba Mon Sep 17 00:00:00 2001 From: akshaykumar2505 Date: Tue, 24 Mar 2026 19:33:23 +0530 Subject: [PATCH 2/2] Add FilterConditionInput to custom types in actions.yaml This update introduces the FilterConditionInput type to the custom types section of actions.yaml, enhancing the schema definition for better filtering capabilities in the Hasura metadata configuration. --- services/hasura/metadata/actions.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/services/hasura/metadata/actions.yaml b/services/hasura/metadata/actions.yaml index 0be514d9..8c76cd36 100644 --- a/services/hasura/metadata/actions.yaml +++ b/services/hasura/metadata/actions.yaml @@ -163,6 +163,7 @@ custom_types: - name: SourceTable - name: events_create_input - name: ArrayJoinInput + - name: FilterConditionInput objects: - name: CheckSourceOutput - name: GenSourceSchemaOutput