diff --git a/ui/__tests__/research-rag.test.ts b/ui/__tests__/research-rag.test.ts
new file mode 100644
index 0000000..c2b3ac7
--- /dev/null
+++ b/ui/__tests__/research-rag.test.ts
@@ -0,0 +1,137 @@
+import {
+  buildEvidencePayload,
+  buildResearchMetadata,
+  buildResearchQueries,
+  detectScientificSection,
+  fuseChromaResults,
+  parseBoundedInteger,
+} from '@/utils/server/research-rag';
+
+import { describe, expect, it } from 'vitest';
+
+describe('research RAG helpers', () => {
+  it('builds deterministic research query variants', () => {
+    expect(
+      buildResearchQueries(
+        'What does "retrieval augmented generation" improve in scientific workflows?',
+      ),
+    ).toEqual([
+      'What does "retrieval augmented generation" improve in scientific workflows',
+      'retrieval augmented generation',
+      'retrieval augmented generation improve scientific workflows',
+    ]);
+  });
+
+  it('detects scientific sections near chunk starts', () => {
+    expect(detectScientificSection('Abstract\nThis paper studies RAG.')).toBe(
+      'abstract',
+    );
+    expect(
+      detectScientificSection('Materials and Methods\nWe used a benchmark.'),
+    ).toBe('materials and methods');
+    expect(detectScientificSection('A general paragraph.')).toBe('body');
+  });
+
+  it('builds citation metadata without leaking temporary upload paths', () => {
+    const metadata = buildResearchMetadata(
+      {
+        pageContent: 'Results\nThe method improves grounded answers.',
+        metadata: {
+          loc: { pageNumber: 7 },
+          pdf: { info: { Title: 'Grounded Scientific RAG' } },
+          source: '/tmp/uploads/private/source-paper.pdf',
+        },
+      },
+      'source-paper.pdf',
+      3,
+      1,
+    );
+
+    expect(metadata).toMatchObject({
+      citationKey: 'grounded-scientific-rag:p7:c2',
+      page: 7,
+      section: 'results',
+      source: 'source-paper.pdf',
+      title: 'Grounded Scientific RAG',
+    });
+    expect(metadata.source).not.toContain('/tmp/uploads');
+  });
+
+  it('bounds integer request parameters', () => {
+    expect(parseBoundedInteger('20', 8, 16)).toBe(16);
+    expect(parseBoundedInteger(0, 8, 16)).toBe(8);
+    expect(parseBoundedInteger('bad', 8, 16)).toBe(8);
+  });
+
+  it('fuses duplicate chunks across query variants', () => {
+    const fused = fuseChromaResults(
+      {
+        documents: [
+          ['The answer is grounded in chunk one.', 'A second chunk.'],
+          ['The answer is grounded in chunk one.'],
+        ],
+        metadatas: [
+          [
+            {
+              citationKey: 'paper:p1:c1',
+              page: 1,
+              source: 'paper.pdf',
+              title: 'Paper',
+            },
+            {
+              citationKey: 'paper:p2:c1',
+              page: 2,
+              source: 'paper.pdf',
+              title: 'Paper',
+            },
+          ],
+          [
+            {
+              citationKey: 'paper:p1:c1',
+              page: 1,
+              source: 'paper.pdf',
+              title: 'Paper',
+            },
+          ],
+        ],
+        distances: [[0.05, 0.4], [0.06]],
+        ids: [['a', 'b'], ['a']],
+      },
+      4,
+    );
+
+    expect(fused).toHaveLength(2);
+    expect(fused[0].citationKey).toBe('paper:p1:c1');
+    expect(fused[0].rankScore).toBeGreaterThan(fused[1].rankScore);
+  });
+
+  it('formats bounded evidence and source manifests', () => {
+    const payload = buildEvidencePayload(
+      {
+        documents: [['Chunk about scientific retrieval.'.repeat(20)]],
+        metadatas: [
+          [
+            {
+              citationKey: 'paper:p1:c1',
+              page: 1,
+              section: 'abstract',
+              source: 'paper.pdf',
+              title: 'Paper',
+            },
+          ],
+        ],
+        distances: [[0.1]],
+      },
+      { maxChunkChars: 40, maxEvidenceChars: 200, maxResults: 2 },
+    );
+
+    expect(payload.citations).toHaveLength(1);
+    expect(payload.citations[0].content.length).toBeLessThanOrEqual(40);
+    expect(payload.evidenceContext).toContain('[paper:p1:c1]');
+    expect(payload.sourceManifest[0]).toMatchObject({
+      citationKeys: ['paper:p1:c1'],
+      source: 'paper.pdf',
+      title: 'Paper',
+    });
+  });
+});
diff --git a/ui/pages/api/fetch-documents.ts b/ui/pages/api/fetch-documents.ts
index 9304e48..133fab3 100644
--- a/ui/pages/api/fetch-documents.ts
+++ b/ui/pages/api/fetch-documents.ts
@@ -1,25 +1,66 @@
-import type { NextApiRequest, NextApiResponse } from "next";
-import { ChromaClient, TransformersEmbeddingFunction } from "chromadb";
+import type { NextApiRequest, NextApiResponse } from 'next';
 
-export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+import {
+  buildEvidencePayload,
+  buildResearchQueries,
+  parseBoundedInteger,
+} from '@/utils/server/research-rag';
+
+import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
+
+export default async function handler(
+  req: NextApiRequest,
+  res: NextApiResponse,
+) {
   try {
+    if (req.method !== 'POST') {
+      res.setHeader('Allow', 'POST');
+      return res.status(405).json({ error: 'Method not allowed' });
+    }
+
     const client = new ChromaClient({
-      path: "http://chroma-server:8000",
+      path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
     });
 
-    const query = req.body.input;
+    const query =
+      typeof req.body.input === 'string' ? req.body.input.trim() : '';
+
+    if (!query) {
+      return res.status(400).json({ error: 'Missing retrieval query' });
+    }
+
+    const nResults = parseBoundedInteger(req.body.nResults, 8, 16);
+    const maxEvidenceChars = parseBoundedInteger(
+      req.body.maxEvidenceChars,
+      12000,
+      30000,
+    );
+    const queryTexts = buildResearchQueries(query);
 
     const embedder = new TransformersEmbeddingFunction();
 
-    const collection = await client.getOrCreateCollection({ name: "default-collection", embeddingFunction: embedder });
+    const collection = await client.getOrCreateCollection({
+      name: 'default-collection',
+      embeddingFunction: embedder,
+    });
+
+    // Query deterministic research-focused variants, then fuse the result sets.
+    const results = await collection.query({
+      nResults,
+      queryTexts,
+      include: ['documents', 'metadatas', 'distances'] as any,
+    });
 
-  // query the collection
-  const results = await collection.query({
-      nResults: 4, 
-      queryTexts: [query]
-  }) 
+    const evidence = buildEvidencePayload(results, {
+      maxEvidenceChars,
+      maxResults: nResults,
+    });
 
-    res.status(200).json(results);
+    res.status(200).json({
+      ...results,
+      queryTexts,
+      ...evidence,
+    });
   } catch (error) {
     if (error instanceof Error) {
       console.error('Error message:', error.message);
@@ -29,4 +70,4 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
     }
     res.status(500).json({ error: 'An unexpected error occurred :(' });
   }
-}
\ No newline at end of file
+}
diff --git a/ui/pages/api/inject-documents.ts b/ui/pages/api/inject-documents.ts
index 532a635..df25c59 100644
--- a/ui/pages/api/inject-documents.ts
+++ b/ui/pages/api/inject-documents.ts
@@ -1,10 +1,16 @@
 import type { NextApiRequest, NextApiResponse } from 'next';
 
+import {
+  type LoadedDocument,
+  type PrimitiveMetadata,
+  RESEARCH_TEXT_SEPARATORS,
+  buildResearchMetadata,
+} from '@/utils/server/research-rag';
+
 import { ChromaClient, TransformersEmbeddingFunction } from 'chromadb';
 import { IncomingForm } from 'formidable';
 import { PDFLoader } from 'langchain/document_loaders/fs/pdf';
-import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
-
+import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
 import path from 'path';
 import { v4 as uuidv4 } from 'uuid';
 
@@ -33,22 +39,31 @@ export default async function handler(
         path: process.env.CHROMA_PATH || 'http://chroma-server:8000',
       });
 
-      const loader = new PDFLoader(files.pdf[0].filepath);
+      const pdfFile = Array.isArray(files.pdf) ? files.pdf[0] : files.pdf;
 
-      const originalDocs = await loader.load();
+      if (!pdfFile?.filepath) {
+        return res.status(400).json({ error: 'Missing PDF upload' });
+      }
 
-      console.log(JSON.stringify(originalDocs));
+      const fallbackSource =
+        pdfFile.originalFilename ?? path.basename(pdfFile.filepath);
+      const loader = new PDFLoader(pdfFile.filepath);
 
+      const originalDocs = await loader.load();
 
       const splitter = new RecursiveCharacterTextSplitter({
-        chunkSize: 500,
-        chunkOverlap: 100,
-      });      
+        chunkSize: 900,
+        chunkOverlap: 180,
+        separators: RESEARCH_TEXT_SEPARATORS,
+      });
 
       const docs = await splitter.splitDocuments(originalDocs);
- 
+
       // Process the documents and perform other logic
-      const { ids, metadatas, documentContents } = processDocuments(docs);
+      const { ids, metadatas, documentContents } = processDocuments(
+        docs,
+        fallbackSource,
+      );
 
       const embedder = new TransformersEmbeddingFunction();
       const collection = await client.getOrCreateCollection({
@@ -75,28 +90,41 @@ export default async function handler(
   }
 }
 
-function processDocuments(docs: any) {
-  const ids = [];
-  const metadatas = [];
-  const documentContents = [];
+function processDocuments(docs: LoadedDocument[], fallbackSource: string) {
+  const ids: string[] = [];
+  const metadatas: PrimitiveMetadata[] = [];
+  const documentContents: string[] = [];
+  const pageChunkCounts = new Map<string, number>();
 
-  for (const document of docs) {
+  for (let index = 0; index < docs.length; index += 1) {
+    const document = docs[index];
     // Generate an ID for each document, or use some existing unique identifier
     const id = uuidv4();
     ids.push(id);
 
-    const fallbackTitle = path.basename(document.metadata.source);
-    const titleFromMetadata = document.metadata.pdf.info.Title;
-
-    const title = titleFromMetadata && titleFromMetadata.length > 0 ? titleFromMetadata : fallbackTitle;
-
-  
-    const metadata = {
-      title: title,
-      page: document.metadata.loc.pageNumber, // Define this function to extract chapter info
-      source: document.metadata.source, // Define this function to extract verse info
-    };
-    metadatas.push(metadata);
+    const source =
+      typeof document.metadata === 'object' &&
+      document.metadata !== null &&
+      'source' in document.metadata &&
+      typeof document.metadata.source === 'string'
+        ? document.metadata.source
+        : fallbackSource;
+    const page =
+      typeof document.metadata === 'object' &&
+      document.metadata !== null &&
+      'loc' in document.metadata &&
+      typeof document.metadata.loc === 'object' &&
+      document.metadata.loc !== null &&
+      'pageNumber' in document.metadata.loc
+        ? document.metadata.loc.pageNumber
+        : 'unknown';
+    const pageKey = `${source}:${page}`;
+    const pageChunkIndex = pageChunkCounts.get(pageKey) ?? 0;
+    pageChunkCounts.set(pageKey, pageChunkIndex + 1);
+
+    metadatas.push(
+      buildResearchMetadata(document, fallbackSource, index, pageChunkIndex),
+    );
 
     // Add the page content to the documents array
     documentContents.push(document.pageContent);
diff --git a/ui/pages/api/rag-chat.ts b/ui/pages/api/rag-chat.ts
index ce84d67..184248a 100644
--- a/ui/pages/api/rag-chat.ts
+++ b/ui/pages/api/rag-chat.ts
@@ -1,6 +1,9 @@
 import { DEFAULT_SYSTEM_PROMPT, DEFAULT_TEMPERATURE } from '@/utils/app/const';
 import { OpenAIError, OpenAIStream } from '@/utils/server';
-import { codeBlock, oneLine } from 'common-tags'
+import type {
+  ResearchCitation,
+  SourceManifestEntry,
+} from '@/utils/server/research-rag';
 
 import { ChatBody, Message } from '@/types/chat';
 
@@ -9,46 +12,66 @@ import wasm from '../../node_modules/@dqbd/tiktoken/lite/tiktoken_bg.wasm?module
 
 import tiktokenModel from '@dqbd/tiktoken/encoders/cl100k_base.json';
 import { Tiktoken, init } from '@dqbd/tiktoken/lite/init';
+import { codeBlock, oneLine } from 'common-tags';
 
 export const config = {
   runtime: 'edge',
 };
 
-// Function to fetch and format documents
-async function fetchAndFormatDocuments(lastMessageContent: string) {
+type FetchDocumentsResponse = {
+  citations?: ResearchCitation[];
+  evidenceContext?: string;
+  sourceManifest?: SourceManifestEntry[];
+};
+
+function formatSourceManifest(sourceManifest: SourceManifestEntry[]) {
+  if (sourceManifest.length === 0) {
+    return 'No source manifest was produced.';
+  }
+
+  return sourceManifest
+    .map(
+      (source, index) =>
+        `${index + 1}. ${source.title} (${
+          source.source
+        }) -> ${source.citationKeys.join(', ')}`,
+    )
+    .join('\n');
+}
+
+async function fetchResearchEvidence(req: Request, lastMessageContent: string) {
   try {
-    console.log("fetching documents")
-    const response = await fetch('http://localhost:3000/api/fetch-documents', {
+    const response = await fetch(new URL('/api/fetch-documents', req.url), {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({ input: lastMessageContent }),
+      body: JSON.stringify({
+        input: lastMessageContent,
+        maxEvidenceChars: 12000,
+        nResults: 8,
+      }),
     });
-    
+
     if (!response.ok) {
       throw new Error(`Error fetching documents: ${response.statusText}`);
     }
 
-    const data = await response.json();
-    const result = data.metadatas[0].map((metadata: any, index: number) => {
-      return `Source ${index + 1}) Title: ${metadata.title}, Page: ${metadata.page}, Content: ${data.documents[0][index]}\n`;
-    }).join('');
-
-    console.log(result);
-
-    return result;
+    const data = (await response.json()) as FetchDocumentsResponse;
 
+    return {
+      citations: Array.isArray(data.citations) ? data.citations : [],
+      evidenceContext:
+        typeof data.evidenceContext === 'string' ? data.evidenceContext : '',
+      sourceManifest: Array.isArray(data.sourceManifest)
+        ? data.sourceManifest
+        : [],
+    };
   } catch (error) {
-    console.error('Error fetching and formatting documents:', error);
-    throw error; // You may want to throw a more specific error object here
+    console.error('Error fetching research evidence:', error);
+    throw error;
   }
 }
 
-
-
-
-
 const handler = async (req: Request): Promise<Response> => {
-
   try {
     const { model, messages, key, prompt, temperature } =
       (await req.json()) as ChatBody;
@@ -62,10 +85,8 @@ const handler = async (req: Request): Promise<Response> => {
 
     let promptToSend = codeBlock`
     ${oneLine`
-      You are a very enthusiastic AI assistant  who loves
-      to help people! Given the following information from
-      relevant documentation, answer the user's question using
-      only that information, outputted in markdown format.
+      You are a scientific research assistant. Given retrieved evidence from
+      uploaded documents, answer the user's question using only that evidence.
     `}
 
     ${oneLine`
@@ -75,7 +96,7 @@ const handler = async (req: Request): Promise<Response> => {
     `}
     
     ${oneLine`
-      Always include citations from the documentation.
+      Every factual claim must include the exact citation key from the evidence.
     `}
   `;
 
@@ -85,8 +106,9 @@ const handler = async (req: Request): Promise<Response> => {
 
     const lastMessage = messages[messages.length - 1];
 
-    const relevantDocuments = await fetchAndFormatDocuments(lastMessage.content);
-    
+    const { citations, evidenceContext, sourceManifest } =
+      await fetchResearchEvidence(req, lastMessage.content);
+
     let temperatureToUse = temperature;
     if (temperatureToUse == null) {
       temperatureToUse = DEFAULT_TEMPERATURE;
@@ -97,30 +119,43 @@ const handler = async (req: Request): Promise<Response> => {
     let tokenCount = prompt_tokens.length;
     let messagesToSend: Message[] = [];
 
-
     encoding.free();
 
-    console.log(model, promptToSend, temperatureToUse, key, messagesToSend);
-
-  
-  messagesToSend = [
+    messagesToSend = [
+      {
+        role: 'user',
+        content: codeBlock`
+          Here is the retrieved evidence:
+          ${evidenceContext || 'No matching evidence was retrieved.'}
+        `,
+      },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
-          Here is the relevant documentation:
-          ${relevantDocuments}
+          Here is the source manifest:
+          ${formatSourceManifest(sourceManifest)}
         `,
       },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
           ${oneLine`
-            Answer my next question using only the above documentation.
+            Answer my next question using only the above evidence.
             You must also follow the below rules when answering:
           `}
           ${oneLine`
             - Do not make up answers that are not provided in the documentation.
           `}
+          ${oneLine`
+            - Cite claims inline with the exact keys shown in square brackets,
+            for example [paper-title:p3:c2].
+          `}
+          ${oneLine`
+            - Only cite keys that appear in the source manifest or evidence.
+          `}
+          ${oneLine`
+            - Prefer the highest-ranked evidence when sources disagree.
+          `}
           ${oneLine`
             - If you are unsure and the answer is not explicitly written
             in the documentation context, say
@@ -135,19 +170,26 @@ const handler = async (req: Request): Promise<Response> => {
         `,
       },
       {
-        role: "user",
+        role: 'user',
         content: codeBlock`
           Here is my question:
           ${oneLine`${lastMessage.content}`}
       `,
       },
-    ]
-
+    ];
+
+    if (citations.length === 0) {
+      messagesToSend.splice(2, 0, {
+        role: 'user',
+        content:
+          'No citations were found. If the evidence is empty, say "Sorry, I don\'t know how to help with that."',
+      });
+    }
 
     const stream = await OpenAIStream(
       model,
       promptToSend,
-      0,
+      temperatureToUse,
       key,
       messagesToSend,
     );
diff --git a/ui/utils/server/research-rag.ts b/ui/utils/server/research-rag.ts
new file mode 100644
index 0000000..3a64f1f
--- /dev/null
+++ b/ui/utils/server/research-rag.ts
@@ -0,0 +1,590 @@
+type Primitive = string | number | boolean;
+
+type UnknownRecord = Record<string, unknown>;
+
+export type PrimitiveMetadata = Record<string, Primitive>;
+
+export type LoadedDocument = {
+  pageContent: string;
+  metadata?: unknown;
+};
+
+export type ResearchChunkMetadata = PrimitiveMetadata & {
+  citationKey: string;
+  chunkIndex: number;
+  page: number | string;
+  section: string;
+  source: string;
+  sourceId: string;
+  title: string;
+};
+
+export type ChromaQueryResults = {
+  documents?: unknown;
+  distances?: unknown;
+  ids?: unknown;
+  metadatas?: unknown;
+};
+
+export type FusedResearchResult = {
+  citationKey: string;
+  content: string;
+  distance?: number;
+  id?: string;
+  metadata: Partial<ResearchChunkMetadata> & PrimitiveMetadata;
+  rank: number;
+  rankScore: number;
+  sourceQueryIndex: number;
+};
+
+export type ResearchCitation = {
+  key: string;
+  title: string;
+  source: string;
+  page: number | string;
+  section: string;
+  distance?: number;
+  rankScore: number;
+  content: string;
+};
+
+export type SourceManifestEntry = {
+  sourceId: string;
+  title: string;
+  source: string;
+  citationKeys: string[];
+};
+
+export type EvidencePayload = {
+  citations: ResearchCitation[];
+  evidenceContext: string;
+  results: FusedResearchResult[];
+  sourceManifest: SourceManifestEntry[];
+};
+
+const SCIENTIFIC_SECTIONS = [
+  'abstract',
+  'introduction',
+  'background',
+  'related work',
+  'materials and methods',
+  'methodology',
+  'methods',
+  'experiment',
+  'experiments',
+  'evaluation',
+  'results',
+  'discussion',
+  'limitations',
+  'conclusion',
+  'references',
+];
+
+const STOP_WORDS = new Set([
+  'about',
+  'after',
+  'again',
+  'also',
+  'answer',
+  'based',
+  'before',
+  'between',
+  'could',
+  'describe',
+  'does',
+  'explain',
+  'from',
+  'have',
+  'how',
+  'into',
+  'paper',
+  'papers',
+  'please',
+  'research',
+  'should',
+  'show',
+  'that',
+  'their',
+  'there',
+  'these',
+  'this',
+  'using',
+  'what',
+  'when',
+  'where',
+  'which',
+  'with',
+]);
+
+export const RESEARCH_TEXT_SEPARATORS = [
+  '\nAbstract',
+  '\nABSTRACT',
+  '\nIntroduction',
+  '\nINTRODUCTION',
+  '\nBackground',
+  '\nBACKGROUND',
+  '\nRelated Work',
+  '\nRELATED WORK',
+  '\nMethods',
+  '\nMETHODS',
+  '\nMaterials and Methods',
+  '\nMATERIALS AND METHODS',
+  '\nMethodology',
+  '\nMETHODOLOGY',
+  '\nExperiments',
+  '\nEXPERIMENTS',
+  '\nResults',
+  '\nRESULTS',
+  '\nEvaluation',
+  '\nEVALUATION',
+  '\nDiscussion',
+  '\nDISCUSSION',
+  '\nLimitations',
+  '\nLIMITATIONS',
+  '\nConclusion',
+  '\nCONCLUSION',
+  '\nReferences',
+  '\nREFERENCES',
+  '\n\n',
+  '\n',
+  '. ',
+  ' ',
+  '',
+];
+
+function isRecord(value: unknown): value is UnknownRecord {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+function primitive(value: unknown): Primitive | undefined {
+  if (
+    typeof value === 'string' ||
+    typeof value === 'number' ||
+    typeof value === 'boolean'
+  ) {
+    return value;
+  }
+
+  return undefined;
+}
+
+function firstPrimitive(
+  record: UnknownRecord,
+  keys: string[],
+): Primitive | undefined {
+  for (const key of keys) {
+    const value = primitive(record[key]);
+    if (value !== undefined) {
+      return value;
+    }
+  }
+
+  return undefined;
+}
+
+function nestedPrimitive(
+  record: UnknownRecord,
+  path: string[],
+): Primitive | undefined {
+  let cursor: unknown = record;
+
+  for (const part of path) {
+    if (!isRecord(cursor)) {
+      return undefined;
+    }
+
+    cursor = cursor[part];
+  }
+
+  return primitive(cursor);
+}
+
+export function asCleanString(value: unknown): string | undefined {
+  if (typeof value === 'string') {
+    const trimmed = value.trim();
+    return trimmed.length > 0 ? trimmed : undefined;
+  }
+
+  if (typeof value === 'number' || typeof value === 'boolean') {
+    return String(value);
+  }
+
+  return undefined;
+}
+
+function asFiniteNumber(value: unknown): number | undefined {
+  if (typeof value === 'number' && Number.isFinite(value)) {
+    return value;
+  }
+
+  if (typeof value === 'string') {
+    const parsed = Number(value);
+    if (Number.isFinite(parsed)) {
+      return parsed;
+    }
+  }
+
+  return undefined;
+}
+
+function basename(value: string) {
+  const [withoutQuery] = value.split(/[?#]/);
+  const parts = withoutQuery.replace(/\\/g, '/').split('/').filter(Boolean);
+
+  return parts[parts.length - 1] ?? value;
+}
+
+function slugify(value: string, fallback: string) {
+  return (
+    value
+      .toLowerCase()
+      .replace(/[^a-z0-9]+/g, '-')
+      .replace(/^-+|-+$/g, '')
+      .slice(0, 48) || fallback
+  );
+}
+
+function hashString(value: string) {
+  let hash = 0x811c9dc5;
+
+  for (let index = 0; index < value.length; index += 1) {
+    hash ^= value.charCodeAt(index);
+    hash +=
+      (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
+  }
+
+  return (hash >>> 0).toString(16).toUpperCase().padStart(8, '0');
+}
+
+function collapseWhitespace(value: string) {
+  return value.replace(/\s+/g, ' ').trim();
+}
+
+export function parseBoundedInteger(
+  value: unknown,
+  fallback: number,
+  max: number,
+) {
+  const parsed = asFiniteNumber(value);
+
+  if (parsed === undefined || parsed <= 0) {
+    return fallback;
+  }
+
+  return Math.min(Math.floor(parsed), max);
+}
+
+export function detectScientificSection(content: string) {
+  const sample = content.split('\n').slice(0, 10).join(' ').toLowerCase();
+
+  const matchedSection = SCIENTIFIC_SECTIONS.find((section) => {
+    const escaped = section.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+    return new RegExp(`\\b${escaped}\\b`).test(sample);
+  });
+
+  return matchedSection ?? 'body';
+}
+
+export function buildCitationKey({
+  chunkIndex,
+  page,
+  title,
+}: {
+  chunkIndex: number;
+  page: number | string;
+  title: string;
+}) {
+  return `${slugify(title, 'document')}:p${page}:c${chunkIndex + 1}`;
+}
+
+export function buildResearchMetadata(
+  document: LoadedDocument,
+  fallbackSource: string,
+  chunkIndex: number,
+  pageChunkIndex = chunkIndex,
+): ResearchChunkMetadata {
+  const metadata = isRecord(document.metadata) ? document.metadata : {};
+  const publicSource = basename(
+    asCleanString(
+      firstPrimitive(metadata, [
+        'originalFilename',
+        'filename',
+        'fileName',
+        'sourceLabel',
+        'source',
+      ]),
+    ) ?? fallbackSource,
+  );
+  const fallbackTitle = publicSource || fallbackSource || 'document';
+  const title =
+    asCleanString(firstPrimitive(metadata, ['title', 'documentTitle'])) ??
+    asCleanString(nestedPrimitive(metadata, ['pdf', 'info', 'Title'])) ??
+    fallbackTitle;
+  const page =
+    asFiniteNumber(firstPrimitive(metadata, ['page', 'pageNumber'])) ??
+    asFiniteNumber(nestedPrimitive(metadata, ['loc', 'pageNumber'])) ??
+    'unknown';
+  const section = detectScientificSection(document.pageContent);
+  const citationKey = buildCitationKey({
+    chunkIndex: pageChunkIndex,
+    page,
+    title,
+  });
+  const sourceId = `DOC-${hashString(
+    `${title.toLowerCase()}|${publicSource.toLowerCase()}`,
+  )}`;
+
+  return {
+    citationKey,
+    chunkIndex,
+    page,
+    section,
+    source: publicSource,
+    sourceId,
+    title,
+  };
+}
+
+export function buildResearchQueries(input: string, maxQueries = 3) {
+  const normalized = collapseWhitespace(input.replace(/[?!.]+$/g, ''));
+  const queries = [normalized];
+  const quotedPhrases = Array.from(input.matchAll(/"([^"]{8,120})"/g))
+    .map((match) => collapseWhitespace(match[1]))
+    .filter(Boolean);
+
+  queries.push(...quotedPhrases);
+
+  const terms = normalized
+    .toLowerCase()
+    .replace(/[^a-z0-9\s-]/g, ' ')
+    .split(/\s+/)
+    .map((term) => term.trim())
+    .filter((term) => term.length >= 4 && !STOP_WORDS.has(term));
+  const uniqueTerms = Array.from(new Set(terms)).slice(0, 10);
+
+  if (uniqueTerms.length >= 2) {
+    queries.push(uniqueTerms.join(' '));
+  }
+
+  if (uniqueTerms.length >= 4) {
+    queries.push(uniqueTerms.slice(0, 6).join(' '));
+  }
+
+  return Array.from(new Set(queries.filter(Boolean))).slice(0, maxQueries);
+}
+
+function matrixRow(value: unknown, index: number) {
+  if (!Array.isArray(value)) {
+    return [];
+  }
+
+  const row = value[index];
+  return Array.isArray(row) ? row : [];
+}
+
+function normalizeResultMetadata(
+  rawMetadata: unknown,
+  fallbackIndex: number,
+): Partial<ResearchChunkMetadata> & PrimitiveMetadata {
+  const metadata = isRecord(rawMetadata) ? rawMetadata : {};
+  const title =
+    asCleanString(firstPrimitive(metadata, ['title', 'documentTitle'])) ??
+    'Untitled source';
+  const source = basename(
+    asCleanString(
+      firstPrimitive(metadata, [
+        'source',
+        'filename',
+        'fileName',
+        'sourcePath',
+      ]),
+    ) ?? 'unknown-source',
+  );
+  const page =
+    asFiniteNumber(firstPrimitive(metadata, ['page', 'pageNumber'])) ??
+    asCleanString(firstPrimitive(metadata, ['page', 'pageNumber'])) ??
+    'unknown';
+  const chunkIndex =
+    asFiniteNumber(firstPrimitive(metadata, ['chunkIndex', 'chunk_index'])) ??
+    fallbackIndex;
+  const section =
+    asCleanString(firstPrimitive(metadata, ['section'])) ?? 'body';
+  const citationKey =
+    asCleanString(firstPrimitive(metadata, ['citationKey', 'citation_key'])) ??
+    buildCitationKey({ chunkIndex, page, title });
+  const sourceId =
+    asCleanString(firstPrimitive(metadata, ['sourceId', 'source_id'])) ??
+    `DOC-${hashString(`${title.toLowerCase()}|${source.toLowerCase()}`)}`;
+
+  return {
+    citationKey,
+    chunkIndex,
+    page,
+    section,
+    source,
+    sourceId,
+    title,
+  };
+}
+
+export function fuseChromaResults(
+  results: ChromaQueryResults,
+  limit: number,
+): FusedResearchResult[] {
+  const documentsByQuery = Array.isArray(results.documents)
+    ? results.documents
+    : [];
+  const fused = new Map<string, FusedResearchResult>();
+
+  for (
+    let queryIndex = 0;
+    queryIndex < documentsByQuery.length;
+    queryIndex += 1
+  ) {
+    const documents = matrixRow(results.documents, queryIndex);
+    const metadatas = matrixRow(results.metadatas, queryIndex);
+    const distances = matrixRow(results.distances, queryIndex);
+    const ids = matrixRow(results.ids, queryIndex);
+
+    for (let rank = 0; rank < documents.length; rank += 1) {
+      const content = asCleanString(documents[rank]);
+      if (!content) {
+        continue;
+      }
+
+      const normalizedContent = collapseWhitespace(content);
+      const metadata = normalizeResultMetadata(metadatas[rank], rank);
+      const distance = asFiniteNumber(distances[rank]);
+      const citationKey =
+        metadata.citationKey ??
+        `SRC-${hashString(
+          `${metadata.source}|${metadata.page}|${normalizedContent}`,
+        )}`;
+      const dedupeKey = `${citationKey}|${hashString(
+        normalizedContent.slice(0, 400),
+      )}`;
+      const queryWeight = queryIndex === 0 ? 1 : 0.88;
+      const rankContribution = queryWeight / (rank + 1);
+      const distanceContribution =
+        distance === undefined ? 0 : 0.2 / (1 + Math.max(distance, 0));
+      const rankScore = rankContribution + distanceContribution;
+      const existing = fused.get(dedupeKey);
+
+      if (existing) {
+        existing.rankScore += rankScore;
+        if (
+          existing.distance === undefined ||
+          (distance !== undefined && distance < existing.distance)
+        ) {
+          existing.distance = distance;
+          existing.rank = rank;
+          existing.sourceQueryIndex = queryIndex;
+        }
+        continue;
+      }
+
+      fused.set(dedupeKey, {
+        citationKey,
+        content: normalizedContent,
+        distance,
+        id: asCleanString(ids[rank]),
+        metadata,
+        rank,
+        rankScore,
+        sourceQueryIndex: queryIndex,
+      });
+    }
+  }
+
+  return Array.from(fused.values())
+    .sort((left, right) => right.rankScore - left.rankScore)
+    .slice(0, limit);
+}
+
+function safeTruncate(value: string, maxChars: number) {
+  if (value.length <= maxChars) {
+    return value;
+  }
+
+  if (maxChars <= 3) {
+    return '.'.repeat(Math.max(maxChars, 0));
+  }
+
+  return `${value.slice(0, maxChars - 3).trimEnd()}...`;
+}
+
+export function buildEvidencePayload(
+  results: ChromaQueryResults,
+  options: {
+    maxChunkChars?: number;
+    maxEvidenceChars?: number;
+    maxResults?: number;
+  } = {},
+): EvidencePayload {
+  const maxResults = options.maxResults ?? 8;
+  const maxChunkChars = options.maxChunkChars ?? 1400;
+  const maxEvidenceChars = options.maxEvidenceChars ?? 12000;
+  const fusedResults = fuseChromaResults(results, maxResults);
+  const citations: ResearchCitation[] = fusedResults.map((result) => ({
+    key: result.citationKey,
+    title: asCleanString(result.metadata.title) ?? 'Untitled source',
+    source: asCleanString(result.metadata.source) ?? 'unknown-source',
+    page: result.metadata.page ?? 'unknown',
+    section: asCleanString(result.metadata.section) ?? 'body',
+    distance: result.distance,
+    rankScore: result.rankScore,
+    content: safeTruncate(result.content, maxChunkChars),
+  }));
+  const manifest = new Map<string, SourceManifestEntry>();
+
+  for (const citation of citations) {
+    const sourceId = `DOC-${hashString(
+      `${citation.title.toLowerCase()}|${citation.source.toLowerCase()}`,
+    )}`;
+    const existing = manifest.get(sourceId);
+
+    if (existing) {
+      existing.citationKeys.push(citation.key);
+      continue;
+    }
+
+    manifest.set(sourceId, {
+      sourceId,
+      title: citation.title,
+      source: citation.source,
+      citationKeys: [citation.key],
+    });
+  }
+
+  let usedChars = 0;
+  const evidenceBlocks: string[] = [];
+
+  for (const citation of citations) {
+    const distance =
+      citation.distance === undefined
+        ? ''
+        : ` | Distance: ${citation.distance.toFixed(4)}`;
+    const block = [
+      `[${citation.key}] Title: ${citation.title} | Source: ${citation.source} | Page: ${citation.page} | Section: ${citation.section}${distance}`,
+      citation.content,
+    ].join('\n');
+
+    if (usedChars + block.length > maxEvidenceChars) {
+      const remaining = maxEvidenceChars - usedChars;
+      if (remaining > 0) {
+        evidenceBlocks.push(safeTruncate(block, remaining));
+      }
+      break;
+    }
+
+    evidenceBlocks.push(block);
+    usedChars += block.length;
+  }
+
+  return {
+    citations,
+    evidenceContext: evidenceBlocks.join('\n\n'),
+    results: fusedResults,
+    sourceManifest: Array.from(manifest.values()),
+  };
+}