From cfb6afb92f26124d57f4d38ecbb2dac3d5a6ee7a Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 13:59:54 -0400 Subject: [PATCH 01/10] improve pagination and scroll to the file row that is currently processing the ingestion --- frontend/app/knowledge/page.tsx | 78 ++++++++++ frontend/lib/knowledge-grid-pagination.ts | 174 ++++++++++++++++++++++ frontend/lib/knowledge-table-state.ts | 6 +- 3 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 frontend/lib/knowledge-grid-pagination.ts diff --git a/frontend/app/knowledge/page.tsx b/frontend/app/knowledge/page.tsx index f70811f1d..fbc3aeeab 100644 --- a/frontend/app/knowledge/page.tsx +++ b/frontend/app/knowledge/page.tsx @@ -41,6 +41,11 @@ import { } from "@/components/ui/tooltip"; import { useIsCloudBrand } from "@/contexts/brand-context"; import { getConnectorDescriptor } from "@/lib/connectors/registry"; +import { + collectNewIngestFocusIdentities, + collectProcessingFocusIdentities, + focusPendingIngestRows, +} from "@/lib/knowledge-grid-pagination"; import { buildKnowledgeTableRows, getKnowledgeFileIdentity, @@ -453,6 +458,71 @@ function SearchPage() { const gridRows = fileResults; const gridRef = useRef(null); + const prevTaskFilesForPaginationRef = useRef([]); + const prevGridRowsForPaginationRef = useRef([]); + const hasInitializedPaginationRefsRef = useRef(false); + const pendingIngestFocusIdentitiesRef = useRef>(new Set()); + + const tryFocusPendingIngestRows = useCallback(() => { + const run = () => { + const api = gridRef.current?.api; + if (!api) { + return; + } + const resolved = focusPendingIngestRows( + api, + pendingIngestFocusIdentitiesRef.current, + ); + for (const identity of resolved) { + pendingIngestFocusIdentitiesRef.current.delete(identity); + } + }; + requestAnimationFrame(() => requestAnimationFrame(run)); + }, []); + + const queueIngestFocusIdentities = useCallback( + (identities: string[]) => { + if (identities.length === 0) { + return; + } + for (const identity of identities) { + pendingIngestFocusIdentitiesRef.current.add(identity); + } + tryFocusPendingIngestRows(); + }, + [tryFocusPendingIngestRows], + ); + + // Jump to the page where a file starts processing (new ingest or retry). + useEffect(() => { + if (!hasInitializedPaginationRefsRef.current) { + prevTaskFilesForPaginationRef.current = taskFiles; + prevGridRowsForPaginationRef.current = gridRows; + hasInitializedPaginationRefsRef.current = true; + return; + } + + const fromTasks = collectNewIngestFocusIdentities( + prevTaskFilesForPaginationRef.current, + taskFiles, + ); + const fromGrid = collectProcessingFocusIdentities( + prevGridRowsForPaginationRef.current, + gridRows, + ); + prevTaskFilesForPaginationRef.current = taskFiles; + prevGridRowsForPaginationRef.current = gridRows; + + queueIngestFocusIdentities([...new Set([...fromTasks, ...fromGrid])]); + }, [taskFiles, gridRows, queueIngestFocusIdentities]); + + const onKnowledgeGridReady = useCallback(() => { + tryFocusPendingIngestRows(); + }, [tryFocusPendingIngestRows]); + + const onKnowledgeRowDataUpdated = useCallback(() => { + tryFocusPendingIngestRows(); + }, [tryFocusPendingIngestRows]); // Re-run only when row identity/status changes, not on every list poll reference. const gridRowsSelectionKey = useMemo( @@ -477,6 +547,10 @@ function SearchPage() { ); }, [gridRowsSelectionKey, isDeletableKnowledgeRow]); + useEffect(() => { + tryFocusPendingIngestRows(); + }, [gridRowsSelectionKey, tryFocusPendingIngestRows]); + const columnDefs: ColDef[] = [ { field: "filename", @@ -1011,6 +1085,8 @@ function SearchPage() { } isRowSelectable={(params) => isDeletableKnowledgeRow(params.data)} domLayout="normal" + onGridReady={onKnowledgeGridReady} + onRowDataUpdated={onKnowledgeRowDataUpdated} onSelectionChanged={onSelectionChanged} pagination={pagination} paginationPageSize={paginationPageSize} @@ -1045,6 +1121,8 @@ function SearchPage() { } isRowSelectable={(params) => isDeletableKnowledgeRow(params.data)} domLayout="normal" + onGridReady={onKnowledgeGridReady} + onRowDataUpdated={onKnowledgeRowDataUpdated} onSelectionChanged={onSelectionChanged} pagination={pagination} paginationPageSize={paginationPageSize} diff --git a/frontend/lib/knowledge-grid-pagination.ts b/frontend/lib/knowledge-grid-pagination.ts new file mode 100644 index 000000000..c99b06ae6 --- /dev/null +++ b/frontend/lib/knowledge-grid-pagination.ts @@ -0,0 +1,174 @@ +import type { IRowNode } from "ag-grid-community"; +import type { AgGridReact } from "ag-grid-react"; +import type { TaskFile } from "@/contexts/task-context"; +import { getKnowledgeFileIdentity } from "@/lib/knowledge-table-state"; + +type GridApi = NonNullable["api"]>; + +type GridRowLike = { + filename?: string; + source_url?: string; + status?: string; +}; + +type VisibleRowTarget = { + displayIndex: number; + node: IRowNode; +}; + +/** Earliest visible row (after filter/sort) matching any identity. */ +function findFirstVisibleRowTarget( + api: GridApi, + identitySet: Set, +): VisibleRowTarget | null { + let target: VisibleRowTarget | null = null; + + api.forEachNodeAfterFilterAndSort((node, index) => { + const id = node.id; + if (id && identitySet.has(id)) { + if (!target || index < target.displayIndex) { + target = { displayIndex: index, node }; + } + } + }); + + return target; +} + +/** Identities visible in the grid after filter/sort. */ +function collectVisibleIdentities( + api: GridApi, + identities: Set, +): Set { + const visible = new Set(); + api.forEachNodeAfterFilterAndSort((node) => { + const id = node.id; + if (id && identities.has(id)) { + visible.add(id); + } + }); + return visible; +} + +/** Identities of task overlays that just started ingesting or were retried. */ +export function collectNewIngestFocusIdentities( + previous: TaskFile[], + current: TaskFile[], +): string[] { + const prevByIdentity = new Map(); + for (const file of previous) { + const identity = getKnowledgeFileIdentity(file); + if (identity) { + prevByIdentity.set(identity, file); + } + } + + const identities: string[] = []; + for (const file of current) { + const identity = getKnowledgeFileIdentity(file); + if (!identity) { + continue; + } + const prev = prevByIdentity.get(identity); + if (!prev) { + identities.push(identity); + continue; + } + if (prev.status !== "processing" && file.status === "processing") { + identities.push(identity); + } + } + return identities; +} + +/** Identities of rendered rows that entered the processing state. */ +export function collectProcessingFocusIdentities( + previous: GridRowLike[], + current: GridRowLike[], +): string[] { + const prevStatusByIdentity = new Map(); + for (const row of previous) { + const identity = getKnowledgeFileIdentity(row); + if (identity) { + prevStatusByIdentity.set(identity, row.status ?? "active"); + } + } + + const identities: string[] = []; + for (const row of current) { + const identity = getKnowledgeFileIdentity(row); + if (!identity) { + continue; + } + const status = row.status ?? "active"; + if (status !== "processing") { + continue; + } + const prevStatus = prevStatusByIdentity.get(identity); + if (prevStatus === undefined || prevStatus !== "processing") { + identities.push(identity); + } + } + return identities; +} + +function scrollToRowTarget( + api: GridApi, + target: VisibleRowTarget, + afterPageChange = false, +): void { + const scroll = () => api.ensureNodeVisible(target.node, "middle"); + if (afterPageChange) { + requestAnimationFrame(() => requestAnimationFrame(scroll)); + } else { + requestAnimationFrame(scroll); + } +} + +/** Jump to the first pagination page that contains any of the given row identities. */ +export function goToGridRowIdentities( + api: GridApi, + identities: Iterable, +): boolean { + const identitySet = new Set(identities); + if (identitySet.size === 0) { + return false; + } + + const target = findFirstVisibleRowTarget(api, identitySet); + if (!target) { + return false; + } + + let didChangePage = false; + const pageSize = api.paginationGetPageSize(); + if (pageSize && pageSize > 0) { + const targetPage = Math.floor(target.displayIndex / pageSize); + const currentPage = api.paginationGetCurrentPage(); + if (currentPage !== targetPage) { + api.paginationGoToPage(targetPage); + didChangePage = true; + } + } + + scrollToRowTarget(api, target, didChangePage); + return true; +} + +/** Focus pending ingest rows once they appear in the grid. Returns resolved identities. */ +export function focusPendingIngestRows( + api: GridApi, + pending: Set, +): string[] { + if (pending.size === 0) { + return []; + } + + const found = collectVisibleIdentities(api, pending); + if (found.size === 0) { + return []; + } + + const didJump = goToGridRowIdentities(api, found); + return didJump ? [...found] : []; +} diff --git a/frontend/lib/knowledge-table-state.ts b/frontend/lib/knowledge-table-state.ts index 82466f417..b8974f3b0 100644 --- a/frontend/lib/knowledge-table-state.ts +++ b/frontend/lib/knowledge-table-state.ts @@ -63,12 +63,16 @@ export function buildKnowledgeTableRows( const taskFile = taskFileMap.get(getKnowledgeFileIdentity(file)); if (taskFile) { const backendStatus = file.status ?? "active"; + const status = + taskFile.status === "processing" || taskFile.status === "failed" + ? taskFile.status + : backendStatus; return { ...file, filename: taskFile.filename, source_url: taskFile.source_url, connector_type: taskFile.connector_type, - status: backendStatus, + status, error: taskFile.error, embedding_model: taskFile.embedding_model ?? file.embedding_model, embedding_dimensions: From a1b7856a04bb884b153000b984e1ba38351ab959 Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 16:03:18 -0400 Subject: [PATCH 02/10] improve pagination and scroll to the file row that is currently processing the ingestion, make sure it works for connectors too --- frontend/app/knowledge/page.tsx | 47 ++- frontend/app/upload/[provider]/page.tsx | 2 + .../AgGrid/registerAgGridModules.ts | 6 + frontend/components/knowledge-dropdown.tsx | 2 + frontend/lib/knowledge-grid-pagination.ts | 352 ++++++++++++++++-- frontend/lib/knowledge-table-state.ts | 95 ++++- 6 files changed, 452 insertions(+), 52 deletions(-) diff --git a/frontend/app/knowledge/page.tsx b/frontend/app/knowledge/page.tsx index fbc3aeeab..f4fa5096b 100644 --- a/frontend/app/knowledge/page.tsx +++ b/frontend/app/knowledge/page.tsx @@ -44,7 +44,12 @@ import { getConnectorDescriptor } from "@/lib/connectors/registry"; import { collectNewIngestFocusIdentities, collectProcessingFocusIdentities, + consumePersistedKnowledgeIngestFocus, focusPendingIngestRows, + type IngestFocusMode, + inferIngestFocusMode, + ingestFocusModeFromReplace, + KNOWLEDGE_INGEST_FOCUS_EVENT, } from "@/lib/knowledge-grid-pagination"; import { buildKnowledgeTableRows, @@ -458,10 +463,15 @@ function SearchPage() { const gridRows = fileResults; const gridRef = useRef(null); + const gridRowsRef = useRef(gridRows); + gridRowsRef.current = gridRows; const prevTaskFilesForPaginationRef = useRef([]); const prevGridRowsForPaginationRef = useRef([]); const hasInitializedPaginationRefsRef = useRef(false); const pendingIngestFocusIdentitiesRef = useRef>(new Set()); + const pendingIngestFocusModesRef = useRef>( + new Map(), + ); const tryFocusPendingIngestRows = useCallback(() => { const run = () => { @@ -472,27 +482,62 @@ function SearchPage() { const resolved = focusPendingIngestRows( api, pendingIngestFocusIdentitiesRef.current, + gridRowsRef.current, + pendingIngestFocusModesRef.current, ); for (const identity of resolved) { pendingIngestFocusIdentitiesRef.current.delete(identity); + pendingIngestFocusModesRef.current.delete(identity); } }; requestAnimationFrame(() => requestAnimationFrame(run)); }, []); const queueIngestFocusIdentities = useCallback( - (identities: string[]) => { + (identities: string[], mode?: IngestFocusMode) => { if (identities.length === 0) { return; } for (const identity of identities) { pendingIngestFocusIdentitiesRef.current.add(identity); + pendingIngestFocusModesRef.current.set( + identity, + mode ?? inferIngestFocusMode(identity, gridRowsRef.current), + ); } tryFocusPendingIngestRows(); }, [tryFocusPendingIngestRows], ); + useEffect(() => { + const stored = consumePersistedKnowledgeIngestFocus(); + for (const target of stored) { + queueIngestFocusIdentities( + [target.filename], + ingestFocusModeFromReplace(target.replace), + ); + } + }, [queueIngestFocusIdentities]); + + useEffect(() => { + const handler = (event: Event) => { + const detail = ( + event as CustomEvent<{ filename: string; replace: boolean }> + ).detail; + if (!detail?.filename) { + return; + } + queueIngestFocusIdentities( + [detail.filename], + ingestFocusModeFromReplace(detail.replace), + ); + }; + window.addEventListener(KNOWLEDGE_INGEST_FOCUS_EVENT, handler); + return () => + window.removeEventListener(KNOWLEDGE_INGEST_FOCUS_EVENT, handler); + }, [queueIngestFocusIdentities]); + // Jump to the page where a file starts processing (new ingest or retry). useEffect(() => { if (!hasInitializedPaginationRefsRef.current) { diff --git a/frontend/app/upload/[provider]/page.tsx b/frontend/app/upload/[provider]/page.tsx index 480776377..7ace0e561 100644 --- a/frontend/app/upload/[provider]/page.tsx +++ b/frontend/app/upload/[provider]/page.tsx @@ -19,6 +19,7 @@ import { import { useTask } from "@/contexts/task-context"; import { useSessionIngestSettings } from "@/hooks/useSessionIngestSettings"; import { getConnectorDescriptor } from "@/lib/connectors/registry"; +import { queueKnowledgeIngestFocusForCloudFiles } from "@/lib/knowledge-grid-pagination"; // CloudFile interface is now imported from the unified cloud picker @@ -91,6 +92,7 @@ export default function UploadProviderPage() { files: CloudFile[], replaceDuplicates: boolean, ) => { + queueKnowledgeIngestFocusForCloudFiles(files, replaceDuplicates); syncMutation.mutate( { connectorType: connector.type, diff --git a/frontend/components/AgGrid/registerAgGridModules.ts b/frontend/components/AgGrid/registerAgGridModules.ts index 486a76ba7..764919d22 100644 --- a/frontend/components/AgGrid/registerAgGridModules.ts +++ b/frontend/components/AgGrid/registerAgGridModules.ts @@ -1,5 +1,6 @@ import { CellStyleModule, + ClientSideRowModelApiModule, ClientSideRowModelModule, ColumnApiModule, ColumnAutoSizeModule, @@ -9,7 +10,9 @@ import { ModuleRegistry, PaginationModule, QuickFilterModule, + RowApiModule, RowSelectionModule, + ScrollApiModule, TextFilterModule, ValidationModule, } from "ag-grid-community"; @@ -24,6 +27,9 @@ ModuleRegistry.registerModules([ CellStyleModule, QuickFilterModule, ClientSideRowModelModule, + ClientSideRowModelApiModule, + RowApiModule, + ScrollApiModule, TextFilterModule, DateFilterModule, EventApiModule, diff --git a/frontend/components/knowledge-dropdown.tsx b/frontend/components/knowledge-dropdown.tsx index c8338e8de..d9a250c84 100644 --- a/frontend/components/knowledge-dropdown.tsx +++ b/frontend/components/knowledge-dropdown.tsx @@ -39,6 +39,7 @@ import { getConnectorDescriptor, getConnectorDescriptors, } from "@/lib/connectors/registry"; +import { dispatchKnowledgeIngestFocus } from "@/lib/knowledge-grid-pagination"; import { duplicateCheck, uploadFiles, @@ -335,6 +336,7 @@ export function KnowledgeDropdown() { setFileUploading(true); try { + dispatchKnowledgeIngestFocus(file.name, replace); await uploadFileUtil(file, replace); refetchTasks(); } catch (error) { diff --git a/frontend/lib/knowledge-grid-pagination.ts b/frontend/lib/knowledge-grid-pagination.ts index c99b06ae6..18e06be13 100644 --- a/frontend/lib/knowledge-grid-pagination.ts +++ b/frontend/lib/knowledge-grid-pagination.ts @@ -1,7 +1,10 @@ import type { IRowNode } from "ag-grid-community"; import type { AgGridReact } from "ag-grid-react"; import type { TaskFile } from "@/contexts/task-context"; -import { getKnowledgeFileIdentity } from "@/lib/knowledge-table-state"; +import { + getKnowledgeFileAliasKeys, + getKnowledgeFileIdentity, +} from "@/lib/knowledge-table-state"; type GridApi = NonNullable["api"]>; @@ -11,43 +14,269 @@ type GridRowLike = { status?: string; }; +export type IngestFocusMode = "existing" | "new"; + +/** + * Maps an upload overwrite flag to grid pagination focus mode. + * + * Intentionally `replace ? "existing" : "new"` — not inverted: + * - Overwrite: file is already indexed; jump to its current row (first match). + * - New ingest: processing overlay is appended after indexed rows (last page). + */ +export function ingestFocusModeFromReplace(replace: boolean): IngestFocusMode { + return replace ? "existing" : "new"; +} + +export const KNOWLEDGE_INGEST_FOCUS_EVENT = "knowledgeIngestFocus"; + +const INGEST_FOCUS_STORAGE_KEY = "openrag:knowledge-ingest-focus"; + +export type KnowledgeIngestFocusTarget = { + filename: string; + replace: boolean; +}; + +function readPersistedIngestFocusTargets(): KnowledgeIngestFocusTarget[] { + if (typeof window === "undefined") { + return []; + } + try { + const raw = sessionStorage.getItem(INGEST_FOCUS_STORAGE_KEY); + if (!raw) { + return []; + } + const parsed: unknown = JSON.parse(raw); + if (!Array.isArray(parsed)) { + return []; + } + return parsed.filter( + (item): item is KnowledgeIngestFocusTarget => + typeof item === "object" && + item !== null && + typeof (item as KnowledgeIngestFocusTarget).filename === "string" && + typeof (item as KnowledgeIngestFocusTarget).replace === "boolean", + ); + } catch { + return []; + } +} + +/** Persist focus targets across route changes (e.g. cloud upload → /knowledge). */ +export function persistKnowledgeIngestFocus( + targets: KnowledgeIngestFocusTarget[], +): void { + if (typeof window === "undefined" || targets.length === 0) { + return; + } + const merged = [...readPersistedIngestFocusTargets(), ...targets]; + sessionStorage.setItem(INGEST_FOCUS_STORAGE_KEY, JSON.stringify(merged)); +} + +export function consumePersistedKnowledgeIngestFocus(): KnowledgeIngestFocusTarget[] { + if (typeof window === "undefined") { + return []; + } + const targets = readPersistedIngestFocusTargets(); + sessionStorage.removeItem(INGEST_FOCUS_STORAGE_KEY); + return targets; +} + +export function dispatchKnowledgeIngestFocus( + filename: string, + replace: boolean, +): void { + if (typeof window === "undefined") { + return; + } + window.dispatchEvent( + new CustomEvent(KNOWLEDGE_INGEST_FOCUS_EVENT, { + detail: { filename, replace }, + }), + ); +} + +export function queueKnowledgeIngestFocusForCloudFiles( + files: Array<{ name: string }>, + replace: boolean, +): void { + if (files.length === 0) { + return; + } + persistKnowledgeIngestFocus( + files.map((file) => ({ filename: file.name, replace })), + ); +} + type VisibleRowTarget = { displayIndex: number; - node: IRowNode; + node: IRowNode | null; }; -/** Earliest visible row (after filter/sort) matching any identity. */ -function findFirstVisibleRowTarget( +function rowMatchesIdentitySet( + row: GridRowLike, + identitySet: Set, +): boolean { + return getKnowledgeFileAliasKeys(row).some((key) => identitySet.has(key)); +} + +/** Infer focus mode from current grid rows (task-poll path). */ +export function inferIngestFocusMode( + identity: string, + rowData: GridRowLike[], +): IngestFocusMode { + const identitySet = new Set([identity]); + const hasMatch = rowData.some((row) => + rowMatchesIdentitySet(row, identitySet), + ); + return hasMatch ? "existing" : "new"; +} + +function hasActiveColumnSort(api: GridApi): boolean { + return api.getColumnState().some((column) => column.sort != null); +} + +function findRowDataIndex( + identitySet: Set, + rowData: GridRowLike[], + pick: "first" | "last", +): number { + let targetIndex = -1; + rowData.forEach((row, index) => { + if (!rowMatchesIdentitySet(row, identitySet)) { + return; + } + if (targetIndex < 0) { + targetIndex = index; + return; + } + if (pick === "first") { + targetIndex = Math.min(targetIndex, index); + } else { + targetIndex = Math.max(targetIndex, index); + } + }); + return targetIndex; +} + +/** Earliest or latest visible row (after filter/sort) matching any identity. */ +function findVisibleRowTarget( api: GridApi, identitySet: Set, + rowData: GridRowLike[] | undefined, + pick: "first" | "last", ): VisibleRowTarget | null { let target: VisibleRowTarget | null = null; api.forEachNodeAfterFilterAndSort((node, index) => { - const id = node.id; - if (id && identitySet.has(id)) { - if (!target || index < target.displayIndex) { - target = { displayIndex: index, node }; - } + const data = node.data as GridRowLike | undefined; + const nodeId = node.id; + const matches = + (nodeId && identitySet.has(nodeId)) || + (data != null && rowMatchesIdentitySet(data, identitySet)); + if (!matches) { + return; + } + if (!target) { + target = { displayIndex: index, node }; + return; + } + if ( + pick === "first" + ? index < target.displayIndex + : index > target.displayIndex + ) { + target = { displayIndex: index, node }; } }); - return target; + if (target) { + return target; + } + + for (const id of identitySet) { + const node = api.getRowNode(id); + if (!node) { + continue; + } + let displayIndex = -1; + api.forEachNodeAfterFilterAndSort((current, index) => { + if (current === node || current.id === id) { + displayIndex = index; + } + }); + if (displayIndex >= 0) { + return { displayIndex, node }; + } + } + + if (!rowData?.length || hasActiveColumnSort(api)) { + return null; + } + + const displayIndex = findRowDataIndex(identitySet, rowData, pick); + if (displayIndex < 0) { + return null; + } + + for (const id of identitySet) { + const node = api.getRowNode(id); + if (node) { + return { displayIndex, node }; + } + } + + return { displayIndex, node: null }; } -/** Identities visible in the grid after filter/sort. */ -function collectVisibleIdentities( +/** Identities from pending that appear in the grid model and/or current rowData. */ +function collectResolvableIdentities( api: GridApi, identities: Set, + rowData?: GridRowLike[], ): Set { - const visible = new Set(); + const found = new Set(); + api.forEachNodeAfterFilterAndSort((node) => { - const id = node.id; - if (id && identities.has(id)) { - visible.add(id); + const data = node.data as GridRowLike | undefined; + const nodeId = node.id; + if (nodeId && identities.has(nodeId)) { + found.add(nodeId); + } + if (data) { + for (const key of getKnowledgeFileAliasKeys(data)) { + if (identities.has(key)) { + found.add(key); + } + } } }); - return visible; + + if (!rowData?.length || hasActiveColumnSort(api)) { + return found; + } + + for (const row of rowData) { + for (const key of getKnowledgeFileAliasKeys(row)) { + if (identities.has(key)) { + found.add(key); + } + } + } + + return found; +} + +function resolveFocusMode( + identitySet: Set, + modes: Map | undefined, +): IngestFocusMode { + for (const id of identitySet) { + const mode = modes?.get(id); + if (mode) { + return mode; + } + } + return "existing"; } /** Identities of task overlays that just started ingesting or were retried. */ @@ -55,27 +284,32 @@ export function collectNewIngestFocusIdentities( previous: TaskFile[], current: TaskFile[], ): string[] { - const prevByIdentity = new Map(); + const prevByAlias = new Map(); for (const file of previous) { - const identity = getKnowledgeFileIdentity(file); - if (identity) { - prevByIdentity.set(identity, file); + for (const key of getKnowledgeFileAliasKeys(file)) { + prevByAlias.set(key, file); } } const identities: string[] = []; + const seen = new Set(); + for (const file of current) { - const identity = getKnowledgeFileIdentity(file); - if (!identity) { + const keys = getKnowledgeFileAliasKeys(file); + const identity = getKnowledgeFileIdentity(file) || keys[0]; + if (!identity || seen.has(identity)) { continue; } - const prev = prevByIdentity.get(identity); + + const prev = keys.map((key) => prevByAlias.get(key)).find(Boolean); if (!prev) { identities.push(identity); + seen.add(identity); continue; } if (prev.status !== "processing" && file.status === "processing") { identities.push(identity); + seen.add(identity); } } return identities; @@ -86,27 +320,33 @@ export function collectProcessingFocusIdentities( previous: GridRowLike[], current: GridRowLike[], ): string[] { - const prevStatusByIdentity = new Map(); + const prevStatusByAlias = new Map(); for (const row of previous) { - const identity = getKnowledgeFileIdentity(row); - if (identity) { - prevStatusByIdentity.set(identity, row.status ?? "active"); + const status = row.status ?? "active"; + for (const key of getKnowledgeFileAliasKeys(row)) { + prevStatusByAlias.set(key, status); } } const identities: string[] = []; + const seen = new Set(); + for (const row of current) { - const identity = getKnowledgeFileIdentity(row); - if (!identity) { - continue; - } const status = row.status ?? "active"; if (status !== "processing") { continue; } - const prevStatus = prevStatusByIdentity.get(identity); + const keys = getKnowledgeFileAliasKeys(row); + const identity = getKnowledgeFileIdentity(row) || keys[0]; + if (!identity || seen.has(identity)) { + continue; + } + const prevStatus = keys + .map((key) => prevStatusByAlias.get(key)) + .find((value) => value !== undefined); if (prevStatus === undefined || prevStatus !== "processing") { identities.push(identity); + seen.add(identity); } } return identities; @@ -117,7 +357,13 @@ function scrollToRowTarget( target: VisibleRowTarget, afterPageChange = false, ): void { - const scroll = () => api.ensureNodeVisible(target.node, "middle"); + const scroll = () => { + if (target.node) { + api.ensureNodeVisible(target.node, "middle"); + return; + } + api.ensureIndexVisible(target.displayIndex, "middle"); + }; if (afterPageChange) { requestAnimationFrame(() => requestAnimationFrame(scroll)); } else { @@ -125,17 +371,27 @@ function scrollToRowTarget( } } -/** Jump to the first pagination page that contains any of the given row identities. */ +/** Jump to the pagination page that contains the target ingest row. */ export function goToGridRowIdentities( api: GridApi, identities: Iterable, + rowData?: GridRowLike[], + modes?: Map, ): boolean { const identitySet = new Set(identities); if (identitySet.size === 0) { return false; } - const target = findFirstVisibleRowTarget(api, identitySet); + const mode = resolveFocusMode(identitySet, modes); + const pick = mode === "new" ? "last" : "first"; + const target = findVisibleRowTarget(api, identitySet, rowData, pick); + + if (!target && mode === "new") { + api.paginationGoToLastPage(); + return true; + } + if (!target) { return false; } @@ -159,16 +415,32 @@ export function goToGridRowIdentities( export function focusPendingIngestRows( api: GridApi, pending: Set, + rowData?: GridRowLike[], + modes?: Map, ): string[] { if (pending.size === 0) { return []; } - const found = collectVisibleIdentities(api, pending); - if (found.size === 0) { + const resolvable = collectResolvableIdentities(api, pending, rowData); + const hasExistingTarget = resolvable.size > 0; + const hasNewOnlyPending = [...pending].some( + (id) => modes?.get(id) === "new" && !resolvable.has(id), + ); + + if (!hasExistingTarget && hasNewOnlyPending) { + api.paginationGoToLastPage(); + return []; + } + + if (!hasExistingTarget) { + return []; + } + + const didJump = goToGridRowIdentities(api, pending, rowData, modes); + if (!didJump) { return []; } - const didJump = goToGridRowIdentities(api, found); - return didJump ? [...found] : []; + return [...pending].filter((id) => resolvable.has(id)); } diff --git a/frontend/lib/knowledge-table-state.ts b/frontend/lib/knowledge-table-state.ts index b8974f3b0..b8f048f0d 100644 --- a/frontend/lib/knowledge-table-state.ts +++ b/frontend/lib/knowledge-table-state.ts @@ -28,6 +28,75 @@ export function getKnowledgeFileIdentity(file?: { return ""; } +/** Lookup keys for matching task overlays to indexed rows (filename, path, basename). */ +export function getKnowledgeFileAliasKeys(file?: { + filename?: string; + source_url?: string; +}): string[] { + const keys = new Set(); + const identity = getKnowledgeFileIdentity(file); + if (identity) { + keys.add(identity); + } + const filename = file?.filename?.trim(); + if (filename) { + keys.add(filename); + } + const sourceUrl = file?.source_url?.trim(); + if (sourceUrl) { + keys.add(sourceUrl); + const basename = sourceUrl.split("/").pop()?.trim(); + if (basename) { + keys.add(basename); + } + } + return [...keys]; +} + +function taskOverlayPriority(status?: string): number { + switch (status) { + case "processing": + return 3; + case "failed": + return 2; + case "active": + return 1; + default: + return 0; + } +} + +function indexTaskFileOverlays( + taskFilesAsFiles: SearchFile[], +): Map { + const map = new Map(); + for (const file of taskFilesAsFiles) { + for (const key of getKnowledgeFileAliasKeys(file)) { + const existing = map.get(key); + if ( + !existing || + taskOverlayPriority(file.status) >= taskOverlayPriority(existing.status) + ) { + map.set(key, file); + } + } + } + return map; +} + +function lookupTaskFileOverlay( + map: Map, + file: SearchFile, +): SearchFile | undefined { + for (const key of getKnowledgeFileAliasKeys(file)) { + const match = map.get(key); + if (match) { + return match; + } + } + return undefined; +} + export function buildKnowledgeTableRows( searchData: SearchFile[], taskFiles: TaskFile[], @@ -52,15 +121,13 @@ export function buildKnowledgeTableRows( }; }); - const taskFileMap = new Map( - taskFilesAsFiles.map((file) => [getKnowledgeFileIdentity(file), file]), - ); + const taskFileMap = indexTaskFileOverlays(taskFilesAsFiles); const backendFiles = searchData.map((file) => { if (file.connector_type === "openrag_docs") { return file; } - const taskFile = taskFileMap.get(getKnowledgeFileIdentity(file)); + const taskFile = lookupTaskFileOverlay(taskFileMap, file); if (taskFile) { const backendStatus = file.status ?? "active"; const status = @@ -69,8 +136,8 @@ export function buildKnowledgeTableRows( : backendStatus; return { ...file, - filename: taskFile.filename, - source_url: taskFile.source_url, + filename: taskFile.filename || file.filename, + source_url: file.source_url || taskFile.source_url, connector_type: taskFile.connector_type, status, error: taskFile.error, @@ -82,9 +149,12 @@ export function buildKnowledgeTableRows( return file; }); - const backendIdentities = new Set( - backendFiles.map((f) => getKnowledgeFileIdentity(f)), - ); + const backendIdentityKeys = new Set(); + for (const file of backendFiles) { + for (const key of getKnowledgeFileAliasKeys(file)) { + backendIdentityKeys.add(key); + } + } const filteredTaskFiles = taskFilesAsFiles.filter((taskFile) => { if ( @@ -96,8 +166,11 @@ export function buildKnowledgeTableRows( if (taskFile.connector_type === "openrag_docs") { return false; } - const identity = getKnowledgeFileIdentity(taskFile); - if (backendIdentities.has(identity)) { + if ( + getKnowledgeFileAliasKeys(taskFile).some((key) => + backendIdentityKeys.has(key), + ) + ) { return false; } // Keep "active" overlays until the index lists the file (task drops key before refetch). From 05609aba906c902acc7e8aa159aa2b2cecec4a65 Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 16:23:09 -0400 Subject: [PATCH 03/10] adressed coderabbit comment1 --- frontend/lib/knowledge-grid-pagination.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/frontend/lib/knowledge-grid-pagination.ts b/frontend/lib/knowledge-grid-pagination.ts index 18e06be13..5d1cf4d11 100644 --- a/frontend/lib/knowledge-grid-pagination.ts +++ b/frontend/lib/knowledge-grid-pagination.ts @@ -303,8 +303,10 @@ export function collectNewIngestFocusIdentities( const prev = keys.map((key) => prevByAlias.get(key)).find(Boolean); if (!prev) { - identities.push(identity); - seen.add(identity); + if (file.status === "processing") { + identities.push(identity); + seen.add(identity); + } continue; } if (prev.status !== "processing" && file.status === "processing") { From 9af6032134b6b3ff3a5123261a812b964ba1ef91 Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 16:45:35 -0400 Subject: [PATCH 04/10] fix connector ingest error --- src/connectors/service.py | 2 ++ src/models/processors.py | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/connectors/service.py b/src/connectors/service.py index ade3f9e91..8d9267c46 100644 --- a/src/connectors/service.py +++ b/src/connectors/service.py @@ -421,6 +421,7 @@ async def sync_connector_files( ), models_service=self.models_service, replace_duplicates=replace_duplicates, + connector_type=getattr(connector, "CONNECTOR_TYPE", None) or "local", ) # Use file IDs as items (no more fake file paths!) @@ -603,6 +604,7 @@ async def sync_specific_files( models_service=self.models_service, ingest_settings=ingest_settings, replace_duplicates=replace_duplicates, + connector_type=getattr(connector, "CONNECTOR_TYPE", None) or "local", ) # Create custom task using TaskService diff --git a/src/models/processors.py b/src/models/processors.py index 5f50dd811..a9c7e01f3 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -635,6 +635,7 @@ def __init__( models_service=None, ingest_settings: dict[str, Any] | None = None, replace_duplicates: bool = False, + connector_type: str | None = None, ): super().__init__( document_service=document_service, @@ -650,6 +651,7 @@ def __init__( self.owner_email = owner_email self.ingest_settings = ingest_settings self.replace_duplicates = replace_duplicates + self.connector_type = connector_type async def process_item(self, upload_task: UploadTask, item: str, file_task: FileTask) -> None: """Process a connector file using unified methods""" @@ -667,6 +669,8 @@ async def process_item(self, upload_task: UploadTask, item: str, file_task: File if not connector or not connection: raise ValueError(f"Connection '{self.connection_id}' not found") + connector_type = self.connector_type or connection.connector_type + # Validate file extension early if filename is available VALID_EXTENSIONS = { "adoc", @@ -886,7 +890,7 @@ async def process_item(self, upload_task: UploadTask, item: str, file_task: File owner=self.user_id, owner_name=self.owner_name, owner_email=self.owner_email, - connector_type=connection.connector_type, + connector_type=connector_type, docling_polling_service=self.connector_service.task_service.docling_polling_service if self.connector_service.task_service else None, @@ -936,7 +940,7 @@ async def process_item(self, upload_task: UploadTask, item: str, file_task: File owner_name=self.owner_name, owner_email=self.owner_email, file_size=len(document.content), - connector_type=connection.connector_type, + connector_type=connector_type, acl=document.acl, connector_file_id=document.id, **standard_kwargs, @@ -947,7 +951,7 @@ async def process_item(self, upload_task: UploadTask, item: str, file_task: File await self.connector_service._update_connector_metadata( document, self.user_id, - connection.connector_type, + connector_type, self.jwt_token, id_field="connector_file_id", ) From 3c5396281678aaa1234e4f667f6e084dfac4bf95 Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 17:30:47 -0400 Subject: [PATCH 05/10] Build backendIdentityKeys from raw searchData instead of merged backendFiles, so dedupe only reflects authentic backend identities. --- frontend/lib/knowledge-grid-pagination.ts | 44 +++++++++++++++-------- frontend/lib/knowledge-table-state.ts | 2 +- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/frontend/lib/knowledge-grid-pagination.ts b/frontend/lib/knowledge-grid-pagination.ts index 5d1cf4d11..227e0b2fe 100644 --- a/frontend/lib/knowledge-grid-pagination.ts +++ b/frontend/lib/knowledge-grid-pagination.ts @@ -228,6 +228,26 @@ function findVisibleRowTarget( return { displayIndex, node: null }; } +/** Map row alias hits back to the canonical pending ids that were queued. */ +function addResolvablePendingIdsForRowKeys( + found: Set, + identities: Set, + rowKeys: Iterable, +): void { + const keys = new Set(rowKeys); + for (const pendingId of identities) { + for (const alias of getKnowledgeFileAliasKeys({ + filename: pendingId, + source_url: pendingId, + })) { + if (keys.has(alias)) { + found.add(pendingId); + break; + } + } + } +} + /** Identities from pending that appear in the grid model and/or current rowData. */ function collectResolvableIdentities( api: GridApi, @@ -238,17 +258,11 @@ function collectResolvableIdentities( api.forEachNodeAfterFilterAndSort((node) => { const data = node.data as GridRowLike | undefined; - const nodeId = node.id; - if (nodeId && identities.has(nodeId)) { - found.add(nodeId); - } - if (data) { - for (const key of getKnowledgeFileAliasKeys(data)) { - if (identities.has(key)) { - found.add(key); - } - } + const rowKeys = new Set(getKnowledgeFileAliasKeys(data)); + if (node.id) { + rowKeys.add(node.id); } + addResolvablePendingIdsForRowKeys(found, identities, rowKeys); }); if (!rowData?.length || hasActiveColumnSort(api)) { @@ -256,11 +270,11 @@ function collectResolvableIdentities( } for (const row of rowData) { - for (const key of getKnowledgeFileAliasKeys(row)) { - if (identities.has(key)) { - found.add(key); - } - } + addResolvablePendingIdsForRowKeys( + found, + identities, + getKnowledgeFileAliasKeys(row), + ); } return found; diff --git a/frontend/lib/knowledge-table-state.ts b/frontend/lib/knowledge-table-state.ts index b8f048f0d..23d4d6cef 100644 --- a/frontend/lib/knowledge-table-state.ts +++ b/frontend/lib/knowledge-table-state.ts @@ -150,7 +150,7 @@ export function buildKnowledgeTableRows( }); const backendIdentityKeys = new Set(); - for (const file of backendFiles) { + for (const file of searchData) { for (const key of getKnowledgeFileAliasKeys(file)) { backendIdentityKeys.add(key); } From abd286e8b5bdfddb3a1c16bc70d8bb679c88c05e Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 17:46:05 -0400 Subject: [PATCH 06/10] Refactor ingest-focus logic into useKnowledgeIngestFocus hook. Extract pagination/scroll-to-processing behavior from the knowledge page into a dedicated hook, share buildGridRowsSelectionKey, and align task overlay source_url merge priority with filename. --- frontend/app/knowledge/page.tsx | 134 +------------------ frontend/hooks/useKnowledgeIngestFocus.ts | 150 ++++++++++++++++++++++ frontend/lib/knowledge-grid-pagination.ts | 10 +- frontend/lib/knowledge-table-state.ts | 2 +- 4 files changed, 166 insertions(+), 130 deletions(-) create mode 100644 frontend/hooks/useKnowledgeIngestFocus.ts diff --git a/frontend/app/knowledge/page.tsx b/frontend/app/knowledge/page.tsx index f4fa5096b..e82d51703 100644 --- a/frontend/app/knowledge/page.tsx +++ b/frontend/app/knowledge/page.tsx @@ -40,17 +40,8 @@ import { TooltipTrigger, } from "@/components/ui/tooltip"; import { useIsCloudBrand } from "@/contexts/brand-context"; +import { useKnowledgeIngestFocus } from "@/hooks/useKnowledgeIngestFocus"; import { getConnectorDescriptor } from "@/lib/connectors/registry"; -import { - collectNewIngestFocusIdentities, - collectProcessingFocusIdentities, - consumePersistedKnowledgeIngestFocus, - focusPendingIngestRows, - type IngestFocusMode, - inferIngestFocusMode, - ingestFocusModeFromReplace, - KNOWLEDGE_INGEST_FOCUS_EVENT, -} from "@/lib/knowledge-grid-pagination"; import { buildKnowledgeTableRows, getKnowledgeFileIdentity, @@ -463,122 +454,13 @@ function SearchPage() { const gridRows = fileResults; const gridRef = useRef(null); - const gridRowsRef = useRef(gridRows); - gridRowsRef.current = gridRows; - const prevTaskFilesForPaginationRef = useRef([]); - const prevGridRowsForPaginationRef = useRef([]); - const hasInitializedPaginationRefsRef = useRef(false); - const pendingIngestFocusIdentitiesRef = useRef>(new Set()); - const pendingIngestFocusModesRef = useRef>( - new Map(), - ); - - const tryFocusPendingIngestRows = useCallback(() => { - const run = () => { - const api = gridRef.current?.api; - if (!api) { - return; - } - const resolved = focusPendingIngestRows( - api, - pendingIngestFocusIdentitiesRef.current, - gridRowsRef.current, - pendingIngestFocusModesRef.current, - ); - for (const identity of resolved) { - pendingIngestFocusIdentitiesRef.current.delete(identity); - pendingIngestFocusModesRef.current.delete(identity); - } - }; - requestAnimationFrame(() => requestAnimationFrame(run)); - }, []); - - const queueIngestFocusIdentities = useCallback( - (identities: string[], mode?: IngestFocusMode) => { - if (identities.length === 0) { - return; - } - for (const identity of identities) { - pendingIngestFocusIdentitiesRef.current.add(identity); - pendingIngestFocusModesRef.current.set( - identity, - mode ?? inferIngestFocusMode(identity, gridRowsRef.current), - ); - } - tryFocusPendingIngestRows(); - }, - [tryFocusPendingIngestRows], - ); - - useEffect(() => { - const stored = consumePersistedKnowledgeIngestFocus(); - for (const target of stored) { - queueIngestFocusIdentities( - [target.filename], - ingestFocusModeFromReplace(target.replace), - ); - } - }, [queueIngestFocusIdentities]); - - useEffect(() => { - const handler = (event: Event) => { - const detail = ( - event as CustomEvent<{ filename: string; replace: boolean }> - ).detail; - if (!detail?.filename) { - return; - } - queueIngestFocusIdentities( - [detail.filename], - ingestFocusModeFromReplace(detail.replace), - ); - }; - window.addEventListener(KNOWLEDGE_INGEST_FOCUS_EVENT, handler); - return () => - window.removeEventListener(KNOWLEDGE_INGEST_FOCUS_EVENT, handler); - }, [queueIngestFocusIdentities]); - - // Jump to the page where a file starts processing (new ingest or retry). - useEffect(() => { - if (!hasInitializedPaginationRefsRef.current) { - prevTaskFilesForPaginationRef.current = taskFiles; - prevGridRowsForPaginationRef.current = gridRows; - hasInitializedPaginationRefsRef.current = true; - return; - } - - const fromTasks = collectNewIngestFocusIdentities( - prevTaskFilesForPaginationRef.current, - taskFiles, - ); - const fromGrid = collectProcessingFocusIdentities( - prevGridRowsForPaginationRef.current, - gridRows, - ); - prevTaskFilesForPaginationRef.current = taskFiles; - prevGridRowsForPaginationRef.current = gridRows; - - queueIngestFocusIdentities([...new Set([...fromTasks, ...fromGrid])]); - }, [taskFiles, gridRows, queueIngestFocusIdentities]); - - const onKnowledgeGridReady = useCallback(() => { - tryFocusPendingIngestRows(); - }, [tryFocusPendingIngestRows]); - - const onKnowledgeRowDataUpdated = useCallback(() => { - tryFocusPendingIngestRows(); - }, [tryFocusPendingIngestRows]); + const { + gridRowsSelectionKey, + onKnowledgeGridReady, + onKnowledgeRowDataUpdated, + } = useKnowledgeIngestFocus(gridRef, gridRows, taskFiles); // Re-run only when row identity/status changes, not on every list poll reference. - const gridRowsSelectionKey = useMemo( - () => - gridRows - .map( - (row) => `${getKnowledgeFileIdentity(row)}:${row.status ?? "active"}`, - ) - .join("\0"), - [gridRows], - ); useEffect(() => { const api = gridRef.current?.api; @@ -592,10 +474,6 @@ function SearchPage() { ); }, [gridRowsSelectionKey, isDeletableKnowledgeRow]); - useEffect(() => { - tryFocusPendingIngestRows(); - }, [gridRowsSelectionKey, tryFocusPendingIngestRows]); - const columnDefs: ColDef[] = [ { field: "filename", diff --git a/frontend/hooks/useKnowledgeIngestFocus.ts b/frontend/hooks/useKnowledgeIngestFocus.ts new file mode 100644 index 000000000..83847270b --- /dev/null +++ b/frontend/hooks/useKnowledgeIngestFocus.ts @@ -0,0 +1,150 @@ +"use client"; + +import type { AgGridReact } from "ag-grid-react"; +import { type RefObject, useCallback, useEffect, useMemo, useRef } from "react"; +import type { File } from "@/app/api/queries/useGetSearchQuery"; +import type { TaskFile } from "@/contexts/task-context"; +import { + buildGridRowsSelectionKey, + collectNewIngestFocusIdentities, + collectProcessingFocusIdentities, + consumePersistedKnowledgeIngestFocus, + focusPendingIngestRows, + type IngestFocusMode, + inferIngestFocusMode, + ingestFocusModeFromReplace, + KNOWLEDGE_INGEST_FOCUS_EVENT, +} from "@/lib/knowledge-grid-pagination"; + +export function useKnowledgeIngestFocus( + gridRef: RefObject | null>, + gridRows: File[], + taskFiles: TaskFile[], +) { + const paginationSnapshotRef = useRef({ + initialized: false, + taskFiles: [] as TaskFile[], + gridRows: [] as File[], + }); + + const pendingFocusRef = useRef({ + identities: new Set(), + modes: new Map(), + }); + + const tryFocusPendingIngestRows = useCallback( + (rows: File[]) => { + const run = () => { + const api = gridRef.current?.api; + if (!api) { + return; + } + const pending = pendingFocusRef.current; + const resolved = focusPendingIngestRows( + api, + pending.identities, + rows, + pending.modes, + ); + for (const identity of resolved) { + pending.identities.delete(identity); + pending.modes.delete(identity); + } + }; + requestAnimationFrame(() => requestAnimationFrame(run)); + }, + [gridRef], + ); + + const queueIngestFocusIdentities = useCallback( + (identities: string[], mode?: IngestFocusMode, rows: File[] = gridRows) => { + if (identities.length === 0) { + return; + } + const pending = pendingFocusRef.current; + for (const identity of identities) { + pending.identities.add(identity); + pending.modes.set( + identity, + mode ?? inferIngestFocusMode(identity, rows), + ); + } + tryFocusPendingIngestRows(rows); + }, + [gridRows, tryFocusPendingIngestRows], + ); + + useEffect(() => { + const stored = consumePersistedKnowledgeIngestFocus(); + for (const target of stored) { + queueIngestFocusIdentities( + [target.filename], + ingestFocusModeFromReplace(target.replace), + ); + } + }, [queueIngestFocusIdentities]); + + useEffect(() => { + const handler = (event: Event) => { + const detail = ( + event as CustomEvent<{ filename: string; replace: boolean }> + ).detail; + if (!detail?.filename) { + return; + } + queueIngestFocusIdentities( + [detail.filename], + ingestFocusModeFromReplace(detail.replace), + ); + }; + window.addEventListener(KNOWLEDGE_INGEST_FOCUS_EVENT, handler); + return () => + window.removeEventListener(KNOWLEDGE_INGEST_FOCUS_EVENT, handler); + }, [queueIngestFocusIdentities]); + + useEffect(() => { + const snapshot = paginationSnapshotRef.current; + if (!snapshot.initialized) { + snapshot.taskFiles = taskFiles; + snapshot.gridRows = gridRows; + snapshot.initialized = true; + return; + } + + const fromTasks = collectNewIngestFocusIdentities( + snapshot.taskFiles, + taskFiles, + ); + const fromGrid = collectProcessingFocusIdentities( + snapshot.gridRows, + gridRows, + ); + snapshot.taskFiles = taskFiles; + snapshot.gridRows = gridRows; + + queueIngestFocusIdentities([...new Set([...fromTasks, ...fromGrid])]); + }, [taskFiles, gridRows, queueIngestFocusIdentities]); + + const gridRowsSelectionKey = useMemo( + () => buildGridRowsSelectionKey(gridRows), + [gridRows], + ); + + useEffect(() => { + tryFocusPendingIngestRows(gridRows); + }, [gridRows, gridRowsSelectionKey, tryFocusPendingIngestRows]); + + const onKnowledgeGridReady = useCallback(() => { + tryFocusPendingIngestRows(gridRows); + }, [gridRows, tryFocusPendingIngestRows]); + + const onKnowledgeRowDataUpdated = useCallback(() => { + tryFocusPendingIngestRows(gridRows); + }, [gridRows, tryFocusPendingIngestRows]); + + return { + gridRowsSelectionKey, + onKnowledgeGridReady, + onKnowledgeRowDataUpdated, + }; +} diff --git a/frontend/lib/knowledge-grid-pagination.ts b/frontend/lib/knowledge-grid-pagination.ts index 227e0b2fe..d075b6d95 100644 --- a/frontend/lib/knowledge-grid-pagination.ts +++ b/frontend/lib/knowledge-grid-pagination.ts @@ -1,12 +1,13 @@ import type { IRowNode } from "ag-grid-community"; import type { AgGridReact } from "ag-grid-react"; +import type { File } from "@/app/api/queries/useGetSearchQuery"; import type { TaskFile } from "@/contexts/task-context"; import { getKnowledgeFileAliasKeys, getKnowledgeFileIdentity, } from "@/lib/knowledge-table-state"; -type GridApi = NonNullable["api"]>; +type GridApi = NonNullable["api"]>; type GridRowLike = { filename?: string; @@ -14,6 +15,13 @@ type GridRowLike = { status?: string; }; +/** Stable key for row identity/status changes (avoids reacting to array reference churn). */ +export function buildGridRowsSelectionKey(rows: GridRowLike[]): string { + return rows + .map((row) => `${getKnowledgeFileIdentity(row)}:${row.status ?? "active"}`) + .join("\0"); +} + export type IngestFocusMode = "existing" | "new"; /** diff --git a/frontend/lib/knowledge-table-state.ts b/frontend/lib/knowledge-table-state.ts index 23d4d6cef..1e20fa9b4 100644 --- a/frontend/lib/knowledge-table-state.ts +++ b/frontend/lib/knowledge-table-state.ts @@ -137,7 +137,7 @@ export function buildKnowledgeTableRows( return { ...file, filename: taskFile.filename || file.filename, - source_url: file.source_url || taskFile.source_url, + source_url: taskFile.source_url || file.source_url, connector_type: taskFile.connector_type, status, error: taskFile.error, From 4517407ed065cfc45f669b3a4f1a6bfa0c4b1c23 Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 17:55:55 -0400 Subject: [PATCH 07/10] Preserve stored mode when no explicit mode is passed --- frontend/hooks/useKnowledgeIngestFocus.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/frontend/hooks/useKnowledgeIngestFocus.ts b/frontend/hooks/useKnowledgeIngestFocus.ts index 83847270b..39cc6755c 100644 --- a/frontend/hooks/useKnowledgeIngestFocus.ts +++ b/frontend/hooks/useKnowledgeIngestFocus.ts @@ -66,7 +66,9 @@ export function useKnowledgeIngestFocus( pending.identities.add(identity); pending.modes.set( identity, - mode ?? inferIngestFocusMode(identity, rows), + mode ?? + pending.modes.get(identity) ?? + inferIngestFocusMode(identity, rows), ); } tryFocusPendingIngestRows(rows); From 3fa49639c6679a3cecf6105ecffc04828c1230e3 Mon Sep 17 00:00:00 2001 From: Olfa Maslah Date: Mon, 15 Jun 2026 19:51:04 -0400 Subject: [PATCH 08/10] Wire connector type through task polling so processing rows show the correct source icon, and fix connector overwrite so re-ingests merge onto existing indexed rows instead of duplicating. - Pass connectorType from connector ingest call sites into addTask - Store per-task connector type in task-context and resolve overlays via inferTaskFileConnectorType - Restore main icon helpers (inferTaskFileConnectorType, resolveKnowledgeRowConnectorType) in knowledge-table-state - Match task overlays to backend rows via filename aliases and cleanConnectorFilename; prefer backend filename/source_url when merging onto indexed rows so identity fields stay paired - Use cleaned connector filenames for cloud ingest focus targets - Use file_task.filename for connector duplicate check/delete in the backend --- frontend/app/connectors/page.tsx | 2 +- frontend/app/upload/[provider]/page.tsx | 2 +- .../connectors/shared-bucket-view.tsx | 4 +- frontend/contexts/task-context.tsx | 87 +++++++-- frontend/lib/knowledge-grid-pagination.ts | 8 +- frontend/lib/knowledge-table-state.ts | 170 ++++++++++++++++-- src/models/processors.py | 4 +- 7 files changed, 233 insertions(+), 44 deletions(-) diff --git a/frontend/app/connectors/page.tsx b/frontend/app/connectors/page.tsx index bc114962b..6f79f6d7d 100644 --- a/frontend/app/connectors/page.tsx +++ b/frontend/app/connectors/page.tsx @@ -55,7 +55,7 @@ export default function ConnectorsPage() { if (response.status === 201) { const taskId = result.task_id; if (taskId) { - addTask(taskId); + addTask(taskId, { connectorType: connector.type }); setSyncResult({ processed: 0, total: selectedFiles.length, diff --git a/frontend/app/upload/[provider]/page.tsx b/frontend/app/upload/[provider]/page.tsx index 7ace0e561..fbfecc31a 100644 --- a/frontend/app/upload/[provider]/page.tsx +++ b/frontend/app/upload/[provider]/page.tsx @@ -114,7 +114,7 @@ export default function UploadProviderPage() { onSuccess: (result) => { const taskIds = result.task_ids; if (taskIds && taskIds.length > 0) { - addTask(taskIds[0]); + addTask(taskIds[0], { connectorType: connector.type }); router.push("/knowledge"); } }, diff --git a/frontend/components/connectors/shared-bucket-view.tsx b/frontend/components/connectors/shared-bucket-view.tsx index 86d5f23b0..49121cb8d 100644 --- a/frontend/components/connectors/shared-bucket-view.tsx +++ b/frontend/components/connectors/shared-bucket-view.tsx @@ -19,7 +19,7 @@ export interface SharedBucketViewProps { onRefetch: () => void; invalidateQueryKey: readonly unknown[]; syncMutation: ReturnType; - addTask: (id: string) => void; + addTask: (id: string, options?: { connectorType?: string }) => void; onBack: () => void; onDone: () => void; } @@ -82,7 +82,7 @@ export function SharedBucketView({ onSuccess: (result) => { invalidate(); if (result.task_ids?.length) { - addTask(result.task_ids[0]); + addTask(result.task_ids[0], { connectorType: connector.type }); onDone(); } else { toast.info("No files found in the selected buckets."); diff --git a/frontend/contexts/task-context.tsx b/frontend/contexts/task-context.tsx index bd7784be2..3eb83d1bf 100644 --- a/frontend/contexts/task-context.tsx +++ b/frontend/contexts/task-context.tsx @@ -24,7 +24,10 @@ import TaskDialog from "@/components/task-dialog"; import { useAuth } from "@/contexts/auth-context"; import { useOnboardingState } from "@/hooks/use-onboarding-state"; import { trackProcessFailure, trackProcessSuccess } from "@/lib/analytics"; -import { getKnowledgeFileIdentity } from "@/lib/knowledge-table-state"; +import { + getKnowledgeFileIdentity, + inferTaskFileConnectorType, +} from "@/lib/knowledge-table-state"; import { didTaskReachCompleted, finalizeProcessingOverlaysForEnhancedTask, @@ -57,7 +60,7 @@ export interface TaskFile { interface TaskContextType { tasks: Task[]; files: TaskFile[]; - addTask: (taskId: string) => void; + addTask: (taskId: string, options?: { connectorType?: string }) => void; addFiles: (files: Partial[], taskId: string) => void; /** Mark knowledge-table overlays as processing when a retry starts. */ markTaskFilesProcessing: (taskId: string, sourceUrls: string[]) => void; @@ -95,6 +98,24 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { const [taskDialogTaskId, setTaskDialogTaskId] = useState(null); const [isTaskDialogOpen, setIsTaskDialogOpen] = useState(false); const previousTasksRef = useRef([]); + const taskConnectorTypesRef = useRef>(new Map()); + + const clearTaskConnectorType = useCallback((taskId: string) => { + taskConnectorTypesRef.current.delete(taskId); + }, []); + + const clearTaskConnectorTypesWithoutOverlays = useCallback( + (prevFiles: TaskFile[], nextFiles: TaskFile[]) => { + const nextTaskIds = new Set(nextFiles.map((file) => file.task_id)); + for (const file of prevFiles) { + if (!nextTaskIds.has(file.task_id)) { + taskConnectorTypesRef.current.delete(file.task_id); + } + } + }, + [], + ); + const openTaskDialog = useCallback((taskId: string) => { setTaskDialogTaskId(taskId); setIsTaskDialogOpen(true); @@ -136,6 +157,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { }, ); + clearTaskConnectorType(variables.taskId); + // Update file to display as cancelled setFiles((prevFiles) => prevFiles.map((file) => { @@ -222,6 +245,13 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { // Handle task status changes and file updates useEffect(() => { + const currentTaskIds = new Set(tasks.map((task) => task.task_id)); + for (const previousTask of previousTasksRef.current) { + if (!currentTaskIds.has(previousTask.task_id)) { + clearTaskConnectorType(previousTask.task_id); + } + } + if (tasks.length === 0) { // Store current tasks as previous for next comparison previousTasksRef.current = tasks; @@ -308,18 +338,25 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { fileName, ); - // Detect connector type based on file path or other indicators - let connectorType = "local"; - if (filePath.includes("/") && !filePath.startsWith("/")) { - // Likely S3 key format (bucket/path/file.ext) - connectorType = "s3"; - } + const existingFile = + existingFileIndex >= 0 + ? prevFiles[existingFileIndex] + : undefined; + + const connectorType = inferTaskFileConnectorType( + filePath, + fileName, + existingFile?.connector_type && + existingFile.connector_type !== "local" + ? existingFile.connector_type + : taskConnectorTypesRef.current.get(currentTask.task_id), + ); const fileEntry: TaskFile = { filename: fileName, - mimetype: "", // We don't have this info from the task + mimetype: existingFile?.mimetype || "", source_url: filePath, - size: 0, // We don't have this info from the task + size: existingFile?.size || 0, connector_type: connectorType, status: mappedStatus, task_id: currentTask.task_id, @@ -498,6 +535,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { } } + clearTaskConnectorType(currentTask.task_id); + setFiles((prevFiles) => prevFiles.filter((file) => { if (file.task_id !== currentTask.task_id) { @@ -538,6 +577,8 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { !isTerminalFailedTask(previousTask) && isTerminalFailedTask(currentTask) ) { + clearTaskConnectorType(currentTask.task_id); + if (!isOnboardingActive) { selectTask(currentTask.task_id); setIsMenuOpen(true); @@ -579,10 +620,20 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { // Store current tasks as previous for next comparison previousTasksRef.current = tasks; - }, [tasks, refetchSearch, isOnboardingActive]); + }, [ + tasks, + refetchSearch, + isOnboardingActive, + clearTaskConnectorType, + queryClient, + ]); const addTask = useCallback( - (_taskId: string) => { + (taskId: string, options?: { connectorType?: string }) => { + const connectorType = options?.connectorType?.trim(); + if (connectorType) { + taskConnectorTypesRef.current.set(taskId, connectorType); + } // React Query will automatically handle polling when tasks are active // Just trigger a refetch to get the latest data setTimeout(() => { @@ -593,13 +644,15 @@ export function TaskProvider({ children }: { children: React.ReactNode }) { ); const refreshTasks = useCallback(async () => { - setFiles((prevFiles) => - prevFiles.filter( + setFiles((prevFiles) => { + const nextFiles = prevFiles.filter( (file) => file.status !== "active" && file.status !== "failed", - ), - ); + ); + clearTaskConnectorTypesWithoutOverlays(prevFiles, nextFiles); + return nextFiles; + }); await refetchTasks(); - }, [refetchTasks]); + }, [refetchTasks, clearTaskConnectorTypesWithoutOverlays]); const cancelTask = useCallback( async (taskId: string) => { diff --git a/frontend/lib/knowledge-grid-pagination.ts b/frontend/lib/knowledge-grid-pagination.ts index d075b6d95..cfaa72860 100644 --- a/frontend/lib/knowledge-grid-pagination.ts +++ b/frontend/lib/knowledge-grid-pagination.ts @@ -3,6 +3,7 @@ import type { AgGridReact } from "ag-grid-react"; import type { File } from "@/app/api/queries/useGetSearchQuery"; import type { TaskFile } from "@/contexts/task-context"; import { + cleanConnectorFilename, getKnowledgeFileAliasKeys, getKnowledgeFileIdentity, } from "@/lib/knowledge-table-state"; @@ -104,14 +105,17 @@ export function dispatchKnowledgeIngestFocus( } export function queueKnowledgeIngestFocusForCloudFiles( - files: Array<{ name: string }>, + files: Array<{ name: string; mimeType?: string }>, replace: boolean, ): void { if (files.length === 0) { return; } persistKnowledgeIngestFocus( - files.map((file) => ({ filename: file.name, replace })), + files.map((file) => ({ + filename: cleanConnectorFilename(file.name, file.mimeType), + replace, + })), ); } diff --git a/frontend/lib/knowledge-table-state.ts b/frontend/lib/knowledge-table-state.ts index 1e20fa9b4..03154e5a0 100644 --- a/frontend/lib/knowledge-table-state.ts +++ b/frontend/lib/knowledge-table-state.ts @@ -28,31 +28,155 @@ export function getKnowledgeFileIdentity(file?: { return ""; } +const MIME_TO_EXTENSION: Record = { + "application/pdf": ".pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document": + ".docx", + "application/msword": ".doc", + "application/vnd.openxmlformats-officedocument.presentationml.presentation": + ".pptx", + "application/vnd.ms-powerpoint": ".ppt", + "text/plain": ".txt", + "text/markdown": ".md", + "text/x-markdown": ".md", + "text/html": ".html", + "text/csv": ".csv", + "application/json": ".json", + "application/xml": ".xml", + "text/xml": ".xml", + "application/rtf": ".rtf", + "application/vnd.google-apps.document": ".pdf", + "application/vnd.google-apps.presentation": ".pdf", + "application/vnd.google-apps.spreadsheet": ".pdf", +}; + +/** Mirror backend `clean_connector_filename` for overlay/focus matching. */ +export function cleanConnectorFilename( + filename: string, + mimetype?: string, +): string { + const trimmed = filename.trim(); + if (!trimmed || !mimetype) { + return trimmed; + } + const suffix = MIME_TO_EXTENSION[mimetype]; + if (!suffix) { + return trimmed; + } + if (!trimmed.toLowerCase().endsWith(suffix.toLowerCase())) { + return trimmed + suffix; + } + return trimmed; +} + +/** Mirror backend `get_filename_aliases` so overlays match indexed connector names. */ +function getKnowledgeFilenameAliases(filename?: string): string[] { + const normalized = filename?.trim() ?? ""; + if (!normalized) { + return []; + } + const aliases = [normalized]; + const lower = normalized.toLowerCase(); + if (lower.endsWith(".txt")) { + aliases.push(`${normalized.slice(0, -4)}.md`); + } else if (lower.endsWith(".md")) { + aliases.push(`${normalized.slice(0, -3)}.txt`); + } + for (const name of [...aliases]) { + aliases.push(name.replace(/ /g, "_").replace(/\//g, "_")); + } + return [...new Set(aliases)]; +} + +function addFilenameAliasKeys(keys: Set, filename?: string): void { + for (const alias of getKnowledgeFilenameAliases(filename)) { + keys.add(alias); + } +} + /** Lookup keys for matching task overlays to indexed rows (filename, path, basename). */ export function getKnowledgeFileAliasKeys(file?: { filename?: string; source_url?: string; + mimetype?: string; }): string[] { const keys = new Set(); - const identity = getKnowledgeFileIdentity(file); - if (identity) { - keys.add(identity); - } - const filename = file?.filename?.trim(); - if (filename) { - keys.add(filename); - } + addFilenameAliasKeys( + keys, + cleanConnectorFilename(file?.filename ?? "", file?.mimetype), + ); const sourceUrl = file?.source_url?.trim(); if (sourceUrl) { keys.add(sourceUrl); - const basename = sourceUrl.split("/").pop()?.trim(); - if (basename) { - keys.add(basename); - } + addFilenameAliasKeys(keys, sourceUrl.split("/").pop()); } + addFilenameAliasKeys(keys, getKnowledgeFileIdentity(file)); return [...keys]; } +function isMeaningfulConnectorType(connectorType?: string): boolean { + const normalized = connectorType?.trim(); + return Boolean(normalized && normalized !== "local"); +} + +function looksLikeHttpUrl(value: string): boolean { + return /^https?:\/\//i.test(value.trim()); +} + +/** Infer connector_type for task overlays when the API does not return it. */ +export function inferTaskFileConnectorType( + filePath: string, + fileName?: string, + taskConnectorType?: string, +): string { + if (isMeaningfulConnectorType(taskConnectorType)) { + return taskConnectorType!.trim(); + } + + for (const candidate of [filePath, fileName ?? ""]) { + const normalized = candidate.trim(); + if (!normalized) { + continue; + } + if (looksLikeHttpUrl(normalized)) { + return normalized.includes("openr.ag") ? "openrag_docs" : "url"; + } + } + + if (filePath.includes("/") && !filePath.startsWith("/")) { + return "aws_s3"; + } + + return "local"; +} + +/** Pick the connector icon source when merging backend rows with task overlays. */ +export function resolveKnowledgeRowConnectorType( + backendType?: string, + taskType?: string, + status: SearchFile["status"] = "active", +): string { + const rowStatus = status ?? "active"; + + if (rowStatus === "active") { + if (isMeaningfulConnectorType(backendType)) { + return backendType!.trim(); + } + if (isMeaningfulConnectorType(taskType)) { + return taskType!.trim(); + } + return backendType?.trim() || taskType?.trim() || "local"; + } + + if (isMeaningfulConnectorType(taskType)) { + return taskType!.trim(); + } + if (isMeaningfulConnectorType(backendType)) { + return backendType!.trim(); + } + return taskType?.trim() || backendType?.trim() || "local"; +} + function taskOverlayPriority(status?: string): number { switch (status) { case "processing": @@ -103,10 +227,11 @@ export function buildKnowledgeTableRows( hasActiveFilter = false, ): SearchFile[] { const taskFilesAsFiles: SearchFile[] = taskFiles.map((taskFile) => { - const normalizedFilename = - taskFile.filename?.trim() || - taskFile.source_url?.trim() || - "Untitled source"; + const rawFilename = + taskFile.filename?.trim() || taskFile.source_url?.trim() || ""; + const normalizedFilename = rawFilename + ? cleanConnectorFilename(rawFilename, taskFile.mimetype) + : "Untitled source"; return { filename: normalizedFilename, @@ -136,9 +261,16 @@ export function buildKnowledgeTableRows( : backendStatus; return { ...file, - filename: taskFile.filename || file.filename, - source_url: taskFile.source_url || file.source_url, - connector_type: taskFile.connector_type, + // Indexed row identity: prefer backend fields so filename and source_url stay paired. + filename: file.filename || taskFile.filename, + source_url: file.source_url || taskFile.source_url, + connector_type: isMeaningfulConnectorType(file.connector_type) + ? file.connector_type!.trim() + : resolveKnowledgeRowConnectorType( + file.connector_type, + taskFile.connector_type, + status, + ), status, error: taskFile.error, embedding_model: taskFile.embedding_model ?? file.embedding_model, diff --git a/src/models/processors.py b/src/models/processors.py index a9c7e01f3..206cbaa96 100644 --- a/src/models/processors.py +++ b/src/models/processors.py @@ -782,7 +782,7 @@ async def process_item(self, upload_task: UploadTask, item: str, file_task: File opensearch_client = self.document_service.session_manager.get_user_opensearch_client( self.user_id, self.jwt_token ) - if await self.check_filename_exists(document.filename, opensearch_client): + if await self.check_filename_exists(file_task.filename, opensearch_client): if not self.replace_duplicates: file_task.status = TaskStatus.SKIPPED file_task.error = None @@ -795,7 +795,7 @@ async def process_item(self, upload_task: UploadTask, item: str, file_task: File upload_task.successful_files += 1 return await self.delete_document_by_filename( - document.filename, + file_task.filename, opensearch_client, owner_user_id=self.user_id, ) From 29c523f0233c9723ab21f1b773896ccdc4194321 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 16 Jun 2026 00:05:44 +0000 Subject: [PATCH 09/10] style: ruff autofix (auto) --- tests/unit/test_connector_processor_filename_dedupe.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/test_connector_processor_filename_dedupe.py b/tests/unit/test_connector_processor_filename_dedupe.py index 16e595992..36b231e35 100644 --- a/tests/unit/test_connector_processor_filename_dedupe.py +++ b/tests/unit/test_connector_processor_filename_dedupe.py @@ -251,9 +251,7 @@ async def test_langflow_connector_processor_uses_cleaned_filename(monkeypatch): await processor.process_item(upload_task, "file-id-1", file_task) - upload_call = ( - processor.connector_service.langflow_service.upload_and_ingest_file.await_args - ) + upload_call = processor.connector_service.langflow_service.upload_and_ingest_file.await_args assert upload_call.kwargs["file_tuple"][0] == "My Report.pdf" From f3ff8ee7b5b529729870c4695b687943545abad7 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Mon, 15 Jun 2026 20:13:04 -0500 Subject: [PATCH 10/10] fix: don't mask connector_type with "local" in sync paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two connector sync entry points passed `getattr(connector, "CONNECTOR_TYPE", None) or "local"` into ConnectorFileProcessor. The `or "local"` made the value always truthy, so the processor's `self.connector_type or connection.connector_type` fallback never engaged — a connector with CONNECTOR_TYPE unset would be mislabeled "local" instead of falling through to the connection's real stored type. Pass `connector.CONNECTOR_TYPE` directly instead: `connector` is already guarded non-None at both sites and CONNECTOR_TYPE is a declared BaseConnector attribute, so the getattr default was unreachable. Result precedence is now connector type → connection type, with no "local" masking. Real connectors (SharePoint/OneDrive/Drive/S3) set CONNECTOR_TYPE so their resolved type is unchanged. --- src/connectors/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/connectors/service.py b/src/connectors/service.py index 8d9267c46..1e3ed859f 100644 --- a/src/connectors/service.py +++ b/src/connectors/service.py @@ -421,7 +421,7 @@ async def sync_connector_files( ), models_service=self.models_service, replace_duplicates=replace_duplicates, - connector_type=getattr(connector, "CONNECTOR_TYPE", None) or "local", + connector_type=connector.CONNECTOR_TYPE, ) # Use file IDs as items (no more fake file paths!) @@ -604,7 +604,7 @@ async def sync_specific_files( models_service=self.models_service, ingest_settings=ingest_settings, replace_duplicates=replace_duplicates, - connector_type=getattr(connector, "CONNECTOR_TYPE", None) or "local", + connector_type=connector.CONNECTOR_TYPE, ) # Create custom task using TaskService