From 37756832034817a580659b9be89291d05155345c Mon Sep 17 00:00:00 2001 From: Vishakh Date: Mon, 13 Oct 2025 21:04:21 -0400 Subject: [PATCH 01/75] Implement "Run All" feature for analyzing all studies with matching SNPs Added a "Run All" functionality to process all studies in the database with user SNP matches. Introduced a dedicated modal for real-time progress updates, including status details and error handling. Extended `MenuBar` with a trigger for the feature. Updated API route to support bulk data fetching for this process. --- app/api/studies/route.ts | 24 ++- app/components/MenuBar.tsx | 27 +++- app/components/RunAllModal.tsx | 134 +++++++++++++++++ app/globals.css | 148 +++++++++++++++++++ app/page.tsx | 257 ++++++++++++++++++++++++++++++++- next.config.mjs | 23 ++- 6 files changed, 601 insertions(+), 12 deletions(-) create mode 100644 app/components/RunAllModal.tsx diff --git a/app/api/studies/route.ts b/app/api/studies/route.ts index 6ef9833..9192e56 100644 --- a/app/api/studies/route.ts +++ b/app/api/studies/route.ts @@ -235,7 +235,14 @@ export async function GET(request: NextRequest) { const searchParams = request.nextUrl.searchParams; const search = searchParams.get("search")?.trim(); const trait = searchParams.get("trait")?.trim(); - const limit = Math.max(10, Math.min(Number(searchParams.get("limit")) || 75, 200)); + + // Special parameter for "Run All" - fetches all studies with SNPs + const fetchAll = searchParams.get("fetchAll") === "true"; + // Allow larger batches for pagination (up to 50000 for Run All), or unlimited for fetchAll + const requestedLimit = Number(searchParams.get("limit")) || 75; + const limit = fetchAll ? 999999 : Math.max(10, Math.min(requestedLimit, 50000)); + const offset = Math.max(0, Number(searchParams.get("offset")) || 0); + const sort = searchParams.get("sort") ?? "relevance"; const direction = searchParams.get("direction") === "asc" ? "asc" : "desc"; const minSampleSize = parseInteger(searchParams.get("minSampleSize")); @@ -265,6 +272,12 @@ export async function GET(request: NextRequest) { params.push(trait, trait); } + // For fetchAll, always require SNPs and risk alleles (since we're doing SNP matching) + if (fetchAll) { + filters.push("(snps IS NOT NULL AND snps != '')"); + filters.push("(strongest_snp_risk_allele IS NOT NULL AND strongest_snp_risk_allele != '')"); + } + const whereClause = filters.length ? `WHERE ${filters.join(" AND ")}` : ""; // Use appropriate ID selection based on database type @@ -300,11 +313,14 @@ export async function GET(request: NextRequest) { snps FROM gwas_catalog ${whereClause} - LIMIT ?`; - const rawLimit = Math.min(limit * 4, 800); + LIMIT ? OFFSET ?`; + // When fetching for Run All (no filters except SNP requirements), allow full batch size + // Otherwise use the 4x multiplier with 800 cap for filtered queries + const isRunAllQuery = excludeLowQuality === false && excludeMissingGenotype === false && !search && !trait; + const rawLimit = fetchAll ? limit : (isRunAllQuery ? limit : Math.min(limit * 4, 800)); try { - const rawRows = await executeQuery(baseQuery, [...params, rawLimit]); + const rawRows = await executeQuery(baseQuery, [...params, rawLimit, offset]); const maxPValue = maxPValueRaw ? parsePValue(maxPValueRaw) : null; const minLogP = minLogPRaw ? Number(minLogPRaw) : null; diff --git a/app/components/MenuBar.tsx b/app/components/MenuBar.tsx index bff06ee..01f176a 100644 --- a/app/components/MenuBar.tsx +++ b/app/components/MenuBar.tsx @@ -5,7 +5,13 @@ import UserDataUpload, { useGenotype } from "./UserDataUpload"; import { useResults } from "./ResultsContext"; import { FileIcon, SaveIcon, TrashIcon, MessageIcon, ClockIcon } from "./Icons"; -export default function MenuBar() { +type MenuBarProps = { + onRunAll?: () => void; + isRunningAll?: boolean; + runAllProgress?: { current: number; total: number }; +}; + +export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: MenuBarProps) { const { isUploaded, genotypeData, fileHash } = useGenotype(); const { savedResults, saveToFile, loadFromFile, clearResults } = useResults(); const [isLoadingFile, setIsLoadingFile] = useState(false); @@ -72,6 +78,25 @@ export default function MenuBar() { {isUploaded && ( <> +
+ {onRunAll && ( + + )}
{savedResults.length > 0 && ( diff --git a/app/components/RunAllModal.tsx b/app/components/RunAllModal.tsx new file mode 100644 index 0000000..e94ada2 --- /dev/null +++ b/app/components/RunAllModal.tsx @@ -0,0 +1,134 @@ +"use client"; + +type RunAllModalProps = { + isOpen: boolean; + onClose: () => void; + status: { + phase: 'fetching' | 'analyzing' | 'complete' | 'error'; + fetchedBatches: number; + totalStudiesFetched: number; + totalInDatabase: number; + matchingStudies: number; + processedCount: number; + totalToProcess: number; + matchCount: number; + startTime?: number; + elapsedSeconds?: number; + etaSeconds?: number; + errorMessage?: string; + }; +}; + +export default function RunAllModal({ isOpen, onClose, status }: RunAllModalProps) { + if (!isOpen) return null; + + const canClose = status.phase === 'complete' || status.phase === 'error'; + + const formatTime = (seconds: number) => { + if (seconds < 60) return `${Math.round(seconds)}s`; + const mins = Math.floor(seconds / 60); + const secs = Math.round(seconds % 60); + return `${mins}m ${secs}s`; + }; + + return ( +
+
e.stopPropagation()}> +
+

Run All Analysis

+ {canClose && ( + + )} +
+ +
+ {status.phase === 'fetching' && ( +
+
+
+

Fetching Studies...

+
+
+

Batches fetched: {status.fetchedBatches}

+

Total studies fetched: {status.totalStudiesFetched.toLocaleString()}

+

Studies matching your SNPs: {status.matchingStudies.toLocaleString()}

+
+
+ )} + + {status.phase === 'analyzing' && ( +
+
+
+

Processing Studies...

+
+
+
0 ? (status.totalStudiesFetched / status.totalInDatabase) * 100 : 0}%` }} + >
+
+
+ {status.totalInDatabase > 0 && ( +

Database: {status.totalInDatabase.toLocaleString()} studies with SNP data

+ )} +

Fetched: {status.totalStudiesFetched.toLocaleString()} / {status.totalInDatabase > 0 ? status.totalInDatabase.toLocaleString() : '...'} ({status.fetchedBatches} batches)

+

Matching your SNPs: {status.matchingStudies.toLocaleString()} analyzed ({status.matchCount.toLocaleString()} matches)

+ {status.elapsedSeconds !== undefined && ( +

Elapsed: {formatTime(status.elapsedSeconds)}

+ )} + {status.etaSeconds !== undefined && status.etaSeconds > 0 && ( +

ETA: {formatTime(status.etaSeconds)}

+ )} +

Analysis happens instantly as studies are downloaded...

+
+
+ )} + + {status.phase === 'complete' && ( +
+
+ +

Analysis Complete!

+
+
+

Fetched: {status.totalStudiesFetched.toLocaleString()} studies from database

+

Analyzed: {status.matchingStudies.toLocaleString()} matching your SNPs

+

Matches found: {status.matchCount.toLocaleString()}

+ {status.elapsedSeconds !== undefined && ( +

Total time: {formatTime(status.elapsedSeconds)}

+ )} +

Your results have been saved and can be viewed in the table below.

+
+
+ +
+
+ )} + + {status.phase === 'error' && ( +
+
+ +

Analysis Failed

+
+
+

{status.errorMessage || 'Unknown error occurred'}

+

Partial results (if any) have been saved.

+
+
+ +
+
+ )} +
+
+
+ ); +} diff --git a/app/globals.css b/app/globals.css index bb238f4..decf882 100644 --- a/app/globals.css +++ b/app/globals.css @@ -2587,3 +2587,151 @@ tbody tr:hover { linear-gradient(to right, rgba(0, 0, 0, 0.4) 0%, transparent 20px), linear-gradient(to left, rgba(0, 0, 0, 0.4) 0%, transparent 20px); } + +/* Run All Modal */ +.run-all-modal { + max-width: 500px; + width: 90%; +} + +.run-all-modal .modal-body { + padding: 1.5rem; +} + +.status-section { + display: flex; + flex-direction: column; + gap: 1.5rem; +} + +.status-header { + display: flex; + align-items: center; + gap: 1rem; +} + +.status-header h3 { + margin: 0; + font-size: 1.25rem; + font-weight: 600; +} + +.spinner { + width: 24px; + height: 24px; + border: 3px solid var(--border-color); + border-top-color: var(--accent-blue); + border-radius: 50%; + animation: spin 0.8s linear infinite; +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +.success-icon { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + border-radius: 50%; + background: var(--accent-green); + color: white; + font-size: 1.5rem; + font-weight: bold; +} + +.error-icon { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + border-radius: 50%; + background: var(--accent-red); + color: white; + font-size: 1.5rem; + font-weight: bold; +} + +.progress-bar { + width: 100%; + height: 8px; + background: var(--surface-bg); + border-radius: 4px; + overflow: hidden; +} + +.progress-fill { + height: 100%; + background: linear-gradient(90deg, var(--accent-blue), var(--accent-green)); + transition: width 0.3s ease; + border-radius: 4px; +} + +.status-details { + display: flex; + flex-direction: column; + gap: 0.75rem; +} + +.status-details p { + margin: 0; + font-size: 0.95rem; + color: var(--text-secondary); +} + +.status-details strong { + color: var(--text-primary); + font-weight: 600; +} + +.status-hint { + font-size: 0.875rem !important; + color: var(--text-muted) !important; + font-style: italic; +} + +.error-message { + color: var(--accent-red) !important; + font-weight: 500; +} + +.status-section.complete .status-details p:not(.status-hint) { + font-size: 1rem; +} + +.modal-actions { + display: flex; + gap: 0.75rem; + margin-top: 1rem; + justify-content: flex-end; +} + +.modal-button { + padding: 0.625rem 1.25rem; + border-radius: 6px; + border: 1px solid var(--border-color); + background: var(--surface-bg); + color: var(--text-primary); + font-size: 0.9375rem; + font-weight: 500; + cursor: pointer; + transition: all 0.2s ease; +} + +.modal-button:hover { + background: var(--border-color); +} + +.modal-button.primary { + background: var(--accent-blue); + border-color: var(--accent-blue); + color: white; +} + +.modal-button.primary:hover { + background: #2563EB; + border-color: #2563EB; +} diff --git a/app/page.tsx b/app/page.tsx index 0325456..2d8c300 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -9,7 +9,9 @@ import VariantChips from "./components/VariantChips"; import Footer from "./components/Footer"; import DisclaimerModal from "./components/DisclaimerModal"; import TermsAcceptanceModal from "./components/TermsAcceptanceModal"; +import RunAllModal from "./components/RunAllModal"; import { hasMatchingSNPs } from "@/lib/snp-utils"; +import { analyzeStudyClientSide } from "@/lib/risk-calculator"; import { trackSearch, trackFilterChange, @@ -64,7 +66,7 @@ type Study = { pValueNumeric: number | null; pValueLabel: string; logPValue: number | null; - qualityFlags: string[]; + qualityFlags: Array<{ message: string; severity: string }>; isLowQuality: boolean; confidenceBand: ConfidenceBand; publicationDate: number | null; @@ -185,7 +187,7 @@ function buildQuery(filters: Filters): string { function MainContent() { const { genotypeData, isUploaded, setOnDataLoadedCallback } = useGenotype(); - const { setOnResultsLoadedCallback } = useResults(); + const { setOnResultsLoadedCallback, addResult, hasResult } = useResults(); const [filters, setFilters] = useState(defaultFilters); const [debouncedSearch, setDebouncedSearch] = useState(defaultFilters.search); const [traits, setTraits] = useState([]); @@ -200,6 +202,32 @@ function MainContent() { const [error, setError] = useState(null); const [sectionCollapsed, setSectionCollapsed] = useState(false); const [showTermsModal, setShowTermsModal] = useState(false); + const [isRunningAll, setIsRunningAll] = useState(false); + const [runAllProgress, setRunAllProgress] = useState({ current: 0, total: 0 }); + const [showRunAllModal, setShowRunAllModal] = useState(false); + const [runAllStatus, setRunAllStatus] = useState<{ + phase: 'fetching' | 'analyzing' | 'complete' | 'error'; + fetchedBatches: number; + totalStudiesFetched: number; + totalInDatabase: number; + matchingStudies: number; + processedCount: number; + totalToProcess: number; + matchCount: number; + startTime?: number; + elapsedSeconds?: number; + etaSeconds?: number; + errorMessage?: string; + }>({ + phase: 'fetching', + fetchedBatches: 0, + totalStudiesFetched: 0, + totalInDatabase: 0, + matchingStudies: 0, + processedCount: 0, + totalToProcess: 0, + matchCount: 0, + }); const [loadTime, setLoadTime] = useState(null); // Check if user has accepted terms on mount @@ -399,6 +427,214 @@ function MainContent() { } }; + const handleRunAll = async () => { + if (!genotypeData || !isUploaded) { + alert("Please upload your genetic data first"); + return; + } + + // Get list of user's SNPs + const userSnps = Array.from(genotypeData.keys()); + + if (userSnps.length === 0) { + alert("No SNPs found in your genetic data"); + return; + } + + const confirmRun = window.confirm( + `This will analyze ALL studies in the database where you have matching SNPs. This may take several minutes. Continue?` + ); + + if (!confirmRun) return; + + // Initialize and show modal + setIsRunningAll(true); + setShowRunAllModal(true); + const startTime = Date.now(); + setRunAllStatus({ + phase: 'fetching', + fetchedBatches: 0, + totalStudiesFetched: 0, + totalInDatabase: 0, + matchingStudies: 0, + processedCount: 0, + totalToProcess: 0, + matchCount: 0, + startTime, + }); + setRunAllProgress({ current: 0, total: 0 }); + + try { + // Process each batch immediately without queuing - true streaming + const batchSize = 10000; // Smaller batches to reduce memory pressure + let offset = 0; + let fetchedBatches = 0; + let totalInDatabase = 0; + let totalMatchingStudies = 0; + let processedCount = 0; + let matchCount = 0; + + // Start in analyzing phase immediately + setRunAllStatus(prev => ({ + ...prev, + phase: 'analyzing', + totalToProcess: 0, + startTime, + })); + + // Fetch and process batches sequentially to control memory + while (true) { + const response = await fetch( + `/api/studies?limit=${batchSize}&excludeLowQuality=false&excludeMissingGenotype=false&offset=${offset}` + ); + + if (!response.ok) { + throw new Error(`Failed to fetch studies batch at offset ${offset}`); + } + + const data: StudiesResponse = await response.json(); + fetchedBatches++; + + // Get total count from first batch + if (offset === 0 && data.sourceCount) { + totalInDatabase = data.sourceCount; + } + + // Process each study directly - no intermediate arrays + for (const study of data.data) { + // Quick filter: check if has SNPs matching user + if (!study.snps || !hasMatchingSNPs(genotypeData, study.snps)) { + continue; + } + + totalMatchingStudies++; + + // Skip if already analyzed + if (hasResult(study.id)) { + processedCount++; + continue; + } + + // Skip if no risk allele or effect size + if (!study.strongest_snp_risk_allele || !study.or_or_beta) { + processedCount++; + continue; + } + + try { + // Perform client-side analysis + const analysisResult = analyzeStudyClientSide( + genotypeData, + study.snps, + study.strongest_snp_risk_allele, + study.or_or_beta, + study.study_accession, + 'OR', + null + ); + + // Save result if there's a match + if (analysisResult.hasMatch) { + const trait = study.mapped_trait ?? study.disease_trait ?? "Unknown trait"; + addResult({ + studyId: study.id, + gwasId: study.study_accession || undefined, + traitName: trait, + studyTitle: study.study || "Untitled study", + userGenotype: analysisResult.userGenotype!, + riskAllele: analysisResult.riskAllele!, + effectSize: analysisResult.effectSize!, + riskScore: analysisResult.riskScore!, + riskLevel: analysisResult.riskLevel!, + matchedSnp: analysisResult.matchedSnp!, + analysisDate: new Date().toISOString(), + }); + matchCount++; + } + } catch (err) { + console.error(`Failed to analyze study ${study.id}:`, err); + } + + processedCount++; + + // Update progress every 500 studies and yield to browser + if (processedCount % 500 === 0) { + const elapsedMs = Date.now() - startTime; + const elapsedSeconds = elapsedMs / 1000; + const studiesPerSecond = (offset + data.data.length) / elapsedSeconds; + const remainingStudies = totalInDatabase - (offset + data.data.length); + const etaSeconds = totalInDatabase > 0 && studiesPerSecond > 0 + ? remainingStudies / studiesPerSecond + : 0; + + setRunAllProgress({ current: processedCount, total: totalMatchingStudies }); + setRunAllStatus(prev => ({ + ...prev, + fetchedBatches, + totalStudiesFetched: offset + data.data.length, + totalInDatabase, + matchingStudies: totalMatchingStudies, + processedCount, + matchCount, + totalToProcess: totalMatchingStudies, + elapsedSeconds, + etaSeconds, + })); + // Yield to browser to prevent UI freeze and allow GC + await new Promise(resolve => setTimeout(resolve, 0)); + } + } + + // Update status after this batch with timing + const elapsedMs = Date.now() - startTime; + const elapsedSeconds = elapsedMs / 1000; + const studiesPerSecond = (offset + data.data.length) / elapsedSeconds; + const remainingStudies = totalInDatabase - (offset + data.data.length); + const etaSeconds = totalInDatabase > 0 && studiesPerSecond > 0 + ? remainingStudies / studiesPerSecond + : 0; + + setRunAllProgress({ current: offset + data.data.length, total: totalInDatabase }); + setRunAllStatus(prev => ({ + ...prev, + fetchedBatches, + totalStudiesFetched: offset + data.data.length, + totalInDatabase, + matchingStudies: totalMatchingStudies, + processedCount, + matchCount, + totalToProcess: totalMatchingStudies, + elapsedSeconds, + etaSeconds, + })); + + // Stop if we got fewer results than requested OR we've reached the total count + if (data.data.length < batchSize || (totalInDatabase > 0 && offset + data.data.length >= totalInDatabase)) { + break; + } + + offset += batchSize; + } + + // Complete - preserve final elapsed time + const finalElapsedSeconds = (Date.now() - startTime) / 1000; + setRunAllStatus(prev => ({ + ...prev, + phase: 'complete', + elapsedSeconds: finalElapsedSeconds, + })); + } catch (error) { + console.error('Run All failed:', error); + setRunAllStatus(prev => ({ + ...prev, + phase: 'error', + errorMessage: error instanceof Error ? error.message : 'Unknown error', + })); + } finally { + setIsRunningAll(false); + } + }; + const summaryText = useMemo(() => { if (error) { return error; @@ -456,7 +692,11 @@ function MainContent() { isOpen={showTermsModal} onAccept={() => setShowTermsModal(false)} /> - +
@@ -768,9 +1008,9 @@ function MainContent() { {confidenceLabel} {study.qualityFlags.length > 0 && (
- {study.qualityFlags.map((flag) => ( - - {flag} + {study.qualityFlags.map((flag, index) => ( + + {flag.message} ))}
@@ -794,6 +1034,11 @@ function MainContent() {
+ setShowRunAllModal(false)} + status={runAllStatus} + />
); } diff --git a/next.config.mjs b/next.config.mjs index 2b56006..35ed516 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -1,8 +1,29 @@ +import path from 'path'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + /** @type {import('next').NextConfig} */ const nextConfig = { experimental: { optimizePackageImports: ["react", "react-dom"] - } + }, + webpack: (config, { isServer }) => { + // Reduce file watching overhead - prevent watching parent directories + config.watchOptions = { + ignored: [ + '**/node_modules/**', + '**/.git/**', + '**/localdata/**', + '**/.next/**', + // Explicitly ignore parent directories + path.resolve(__dirname, '..'), + ], + aggregateTimeout: 300, + }; + return config; + }, }; export default nextConfig; From bc8568f82532198f30406c8a93a8ff84cfd21fb7 Mon Sep 17 00:00:00 2001 From: Vishakh Date: Mon, 13 Oct 2025 21:13:18 -0400 Subject: [PATCH 02/75] Introduce smoother ETA calculation with exponential moving average Implemented an exponential moving average (EMA) for rate calculations to provide a smoother and more accurate ETA in the "Run All" process. Added a safety buffer and enhanced progress updates with improved handling for early batch estimations. Updated UI to display "Calculating..." when ETA is unavailable. --- app/components/RunAllModal.tsx | 4 ++-- app/page.tsx | 42 ++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/app/components/RunAllModal.tsx b/app/components/RunAllModal.tsx index e94ada2..e3bb9c2 100644 --- a/app/components/RunAllModal.tsx +++ b/app/components/RunAllModal.tsx @@ -79,8 +79,8 @@ export default function RunAllModal({ isOpen, onClose, status }: RunAllModalProp {status.elapsedSeconds !== undefined && (

Elapsed: {formatTime(status.elapsedSeconds)}

)} - {status.etaSeconds !== undefined && status.etaSeconds > 0 && ( -

ETA: {formatTime(status.etaSeconds)}

+ {status.etaSeconds !== undefined && ( +

ETA: {status.etaSeconds > 0 ? formatTime(status.etaSeconds) : 'Calculating...'}

)}

Analysis happens instantly as studies are downloaded...

diff --git a/app/page.tsx b/app/page.tsx index 2d8c300..057e581 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -474,6 +474,10 @@ function MainContent() { let processedCount = 0; let matchCount = 0; + // EMA for rate calculation - smoother ETA + let smoothedRate = 0; + const smoothingFactor = 0.3; + // Start in analyzing phase immediately setRunAllStatus(prev => ({ ...prev, @@ -561,11 +565,20 @@ function MainContent() { if (processedCount % 500 === 0) { const elapsedMs = Date.now() - startTime; const elapsedSeconds = elapsedMs / 1000; - const studiesPerSecond = (offset + data.data.length) / elapsedSeconds; - const remainingStudies = totalInDatabase - (offset + data.data.length); - const etaSeconds = totalInDatabase > 0 && studiesPerSecond > 0 - ? remainingStudies / studiesPerSecond - : 0; + + // Calculate ETA only after 3+ batches for accuracy + let etaSeconds = 0; + if (fetchedBatches >= 3 && totalInDatabase > 0) { + const instantRate = (offset + data.data.length) / elapsedSeconds; + // Exponential moving average for smoother rate + smoothedRate = smoothedRate === 0 + ? instantRate + : (smoothingFactor * instantRate) + ((1 - smoothingFactor) * smoothedRate); + + const remainingStudies = totalInDatabase - (offset + data.data.length); + // Add 20% safety buffer for slowdown + etaSeconds = smoothedRate > 0 ? (remainingStudies / smoothedRate) * 1.2 : 0; + } setRunAllProgress({ current: processedCount, total: totalMatchingStudies }); setRunAllStatus(prev => ({ @@ -588,11 +601,20 @@ function MainContent() { // Update status after this batch with timing const elapsedMs = Date.now() - startTime; const elapsedSeconds = elapsedMs / 1000; - const studiesPerSecond = (offset + data.data.length) / elapsedSeconds; - const remainingStudies = totalInDatabase - (offset + data.data.length); - const etaSeconds = totalInDatabase > 0 && studiesPerSecond > 0 - ? remainingStudies / studiesPerSecond - : 0; + + // Calculate ETA only after 3+ batches for accuracy + let etaSeconds = 0; + if (fetchedBatches >= 3 && totalInDatabase > 0) { + const instantRate = (offset + data.data.length) / elapsedSeconds; + // Exponential moving average for smoother rate + smoothedRate = smoothedRate === 0 + ? instantRate + : (smoothingFactor * instantRate) + ((1 - smoothingFactor) * smoothedRate); + + const remainingStudies = totalInDatabase - (offset + data.data.length); + // Add 20% safety buffer for slowdown + etaSeconds = smoothedRate > 0 ? (remainingStudies / smoothedRate) * 1.2 : 0; + } setRunAllProgress({ current: offset + data.data.length, total: totalInDatabase }); setRunAllStatus(prev => ({ From 42a2f35d0b69a35f975634799fb6c47af199a03b Mon Sep 17 00:00:00 2001 From: Vishakh Date: Tue, 14 Oct 2025 20:55:24 -0400 Subject: [PATCH 03/75] Add IndexedDB-backed "Run All" implementation for SNP analysis Implemented IndexedDB-based caching for GWAS catalog and reworked the "Run All" analysis to process studies directly from local storage. Added support for downloading, decompressing, parsing, and storing large GWAS datasets in IndexedDB for efficient processing. Enhanced real-time progress updates and improved memory efficiency with batch processing. --- .gitignore | 1 + app/api/studies/route.ts | 25 +- app/components/MenuBar.tsx | 44 ++- app/components/ResultsContext.tsx | 16 ++ app/components/RunAllModal.tsx | 83 ++++-- app/components/UserDataUpload.tsx | 15 +- app/page.tsx | 249 ++++------------- lib/analysis-worker.ts | 144 ++++++++++ lib/gwas-db.ts | 427 ++++++++++++++++++++++++++++++ lib/results-manager.ts | 40 ++- lib/risk-calculator.ts | 8 +- lib/run-all-indexed.ts | 204 ++++++++++++++ lib/snp-utils.ts | 20 +- next.config.mjs | 9 + package-lock.json | 12 +- package.json | 2 + 16 files changed, 1068 insertions(+), 231 deletions(-) create mode 100644 lib/analysis-worker.ts create mode 100644 lib/gwas-db.ts create mode 100644 lib/run-all-indexed.ts diff --git a/.gitignore b/.gitignore index e208c04..11c2226 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ yarn-error.log* /.idea/ /localdata/gwas_catalog.sqlite /localdata/gwas_catalog_v1.0.2-associations_e115_r2025-09-15.tsv +/localdata/gwas_catalog_v1.0.2-associations_e115_r2025-09-15.tsv.gz diff --git a/app/api/studies/route.ts b/app/api/studies/route.ts index 9192e56..d0f53ce 100644 --- a/app/api/studies/route.ts +++ b/app/api/studies/route.ts @@ -238,9 +238,9 @@ export async function GET(request: NextRequest) { // Special parameter for "Run All" - fetches all studies with SNPs const fetchAll = searchParams.get("fetchAll") === "true"; - // Allow larger batches for pagination (up to 50000 for Run All), or unlimited for fetchAll + // Allow larger batches for pagination (up to 100000 for Run All with fetchAll) const requestedLimit = Number(searchParams.get("limit")) || 75; - const limit = fetchAll ? 999999 : Math.max(10, Math.min(requestedLimit, 50000)); + const limit = fetchAll ? Math.max(10, Math.min(requestedLimit, 100000)) : Math.max(10, Math.min(requestedLimit, 50000)); const offset = Math.max(0, Number(searchParams.get("offset")) || 0); const sort = searchParams.get("sort") ?? "relevance"; @@ -423,6 +423,27 @@ export async function GET(request: NextRequest) { const finalResults = sortedStudies.slice(0, limit); + // For Run All queries, return minimal payload to avoid JSON serialization limits + if (isRunAllQuery) { + const minimalResults = finalResults.map(s => ({ + id: s.id, + study_accession: s.study_accession, + disease_trait: s.disease_trait, + study: s.study, + snps: s.snps, + strongest_snp_risk_allele: s.strongest_snp_risk_allele, + or_or_beta: s.or_or_beta, + })); + + return NextResponse.json({ + data: minimalResults, + total: studies.length, + limit, + truncated: studies.length > finalResults.length, + sourceCount, + }); + } + return NextResponse.json({ data: finalResults, total: studies.length, diff --git a/app/components/MenuBar.tsx b/app/components/MenuBar.tsx index 01f176a..a2ac5e3 100644 --- a/app/components/MenuBar.tsx +++ b/app/components/MenuBar.tsx @@ -16,6 +16,7 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu const { savedResults, saveToFile, loadFromFile, clearResults } = useResults(); const [isLoadingFile, setIsLoadingFile] = useState(false); const [theme, setTheme] = useState<"light" | "dark">("dark"); + const [cacheInfo, setCacheInfo] = useState<{ studies: number; sizeMB: number } | null>(null); useEffect(() => { // Detect system preference on mount @@ -26,6 +27,20 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu // Apply initial theme document.documentElement.setAttribute("data-theme", initialTheme); document.documentElement.style.colorScheme = initialTheme; + + // Load cache info + const loadCacheInfo = async () => { + const { gwasDB } = await import('@/lib/gwas-db'); + const metadata = await gwasDB.getMetadata(); + if (metadata) { + const size = await gwasDB.getStorageSize(); + setCacheInfo({ + studies: metadata.totalStudies, + sizeMB: Math.round(size / 1024 / 1024) + }); + } + }; + loadCacheInfo(); }, []); useEffect(() => { @@ -126,7 +141,7 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu @@ -147,6 +162,33 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu
+ {cacheInfo && ( + <> + + {cacheInfo.studies.toLocaleString()} studies cached ({cacheInfo.sizeMB} MB) + + + + )} + void; + addResultsBatch: (results: SavedResult[]) => void; removeResult: (studyId: number) => void; clearResults: () => void; saveToFile: (genotypeSize?: number, genotypeHash?: string) => void; @@ -31,6 +32,20 @@ export function ResultsProvider({ children }: { children: ReactNode }) { }); }; + const addResultsBatch = (results: SavedResult[]) => { + setSavedResults(prev => { + // Create a map of existing results by studyId for O(1) lookup + const existingMap = new Map(prev.map(r => [r.studyId, r])); + + // Add/update with new results + for (const result of results) { + existingMap.set(result.studyId, result); + } + + return Array.from(existingMap.values()); + }); + }; + const removeResult = (studyId: number) => { setSavedResults(prev => prev.filter(r => r.studyId !== studyId)); }; @@ -93,6 +108,7 @@ export function ResultsProvider({ children }: { children: ReactNode }) { void; status: { - phase: 'fetching' | 'analyzing' | 'complete' | 'error'; + phase: 'fetching' | 'downloading' | 'decompressing' | 'parsing' | 'storing' | 'analyzing' | 'complete' | 'error'; fetchedBatches: number; totalStudiesFetched: number; totalInDatabase: number; @@ -44,25 +44,48 @@ export default function RunAllModal({ isOpen, onClose, status }: RunAllModalProp
- {status.phase === 'fetching' && ( + {(status.phase === 'downloading' || status.phase === 'fetching') && (
-

Fetching Studies...

+

Downloading GWAS Catalog...

+
+
+
0 ? (status.totalStudiesFetched / status.totalInDatabase) * 100 : 0}%` }} + >
-

Batches fetched: {status.fetchedBatches}

-

Total studies fetched: {status.totalStudiesFetched.toLocaleString()}

-

Studies matching your SNPs: {status.matchingStudies.toLocaleString()}

+

Downloaded: {(status.totalStudiesFetched / 1024 / 1024).toFixed(1)} MB / {(status.totalInDatabase / 1024 / 1024).toFixed(1)} MB

+ {status.elapsedSeconds !== undefined && ( +

Elapsed: {formatTime(status.elapsedSeconds)}

+ )} +

First-time setup - this data will be cached locally...

)} - {status.phase === 'analyzing' && ( + {status.phase === 'decompressing' && (
-

Processing Studies...

+

Decompressing Data...

+
+
+

Unzipping compressed catalog file...

+ {status.elapsedSeconds !== undefined && ( +

Elapsed: {formatTime(status.elapsedSeconds)}

+ )} +
+
+ )} + + {status.phase === 'parsing' && ( +
+
+
+

Parsing Studies...

- {status.totalInDatabase > 0 && ( -

Database: {status.totalInDatabase.toLocaleString()} studies with SNP data

+

Parsed: {status.totalStudiesFetched.toLocaleString()} / {status.totalInDatabase.toLocaleString()} lines

+ {status.elapsedSeconds !== undefined && ( +

Elapsed: {formatTime(status.elapsedSeconds)}

)} -

Fetched: {status.totalStudiesFetched.toLocaleString()} / {status.totalInDatabase > 0 ? status.totalInDatabase.toLocaleString() : '...'} ({status.fetchedBatches} batches)

-

Matching your SNPs: {status.matchingStudies.toLocaleString()} analyzed ({status.matchCount.toLocaleString()} matches)

+
+
+ )} + + {status.phase === 'storing' && ( +
+
+
+

Storing in Local Database...

+
+
+
0 ? (status.totalStudiesFetched / status.totalInDatabase) * 100 : 0}%` }} + >
+
+
+

Stored: {status.totalStudiesFetched.toLocaleString()} / {status.totalInDatabase.toLocaleString()} studies

{status.elapsedSeconds !== undefined && (

Elapsed: {formatTime(status.elapsedSeconds)}

)} - {status.etaSeconds !== undefined && ( -

ETA: {status.etaSeconds > 0 ? formatTime(status.etaSeconds) : 'Calculating...'}

+
+
+ )} + + {status.phase === 'analyzing' && ( +
+
+
+

Analyzing Studies...

+
+
+

Total studies: {status.totalInDatabase.toLocaleString()}

+

Matching your SNPs: {status.matchingStudies.toLocaleString()} analyzed ({status.matchCount.toLocaleString()} matches)

+ {status.elapsedSeconds !== undefined && ( +

Elapsed: {formatTime(status.elapsedSeconds)}

)} -

Analysis happens instantly as studies are downloaded...

+

Processing sequentially to minimize memory usage...

)} diff --git a/app/components/UserDataUpload.tsx b/app/components/UserDataUpload.tsx index 47dc7a9..c35a226 100644 --- a/app/components/UserDataUpload.tsx +++ b/app/components/UserDataUpload.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useRef, createContext, useContext } from "react"; +import { useState, useRef, createContext, useContext, useCallback } from "react"; import { GenotypeData, detectAndParseGenotypeFile, validateFileSize, validateFileFormat } from "@/lib/genotype-parser"; import { calculateFileHash } from "@/lib/file-hash"; import { @@ -28,7 +28,7 @@ export function GenotypeProvider({ children }: { children: React.ReactNode }) { const [genotypeData, setGenotypeData] = useState | null>(null); const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); - const [onDataLoaded, setOnDataLoaded] = useState<(() => void) | null>(null); + const onDataLoadedRef = useRef<(() => void) | null>(null); const [fileHash, setFileHash] = useState(null); const [originalFileName, setOriginalFileName] = useState(null); @@ -80,8 +80,8 @@ export function GenotypeProvider({ children }: { children: React.ReactNode }) { setOriginalFileName(file.name); // Call the callback if it exists - if (onDataLoaded) { - onDataLoaded(); + if (onDataLoadedRef.current) { + onDataLoadedRef.current(); } } catch (err) { const errorMessage = err instanceof Error ? err.message : 'Upload failed'; @@ -104,6 +104,11 @@ export function GenotypeProvider({ children }: { children: React.ReactNode }) { trackFileCleared(); }; + const setOnDataLoadedCallback = useCallback((cb: (() => void) | null) => { + // Store the callback in a ref to avoid render-phase state updates + onDataLoadedRef.current = cb; + }, []); + return ( diff --git a/app/page.tsx b/app/page.tsx index 057e581..5b87153 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -187,7 +187,7 @@ function buildQuery(filters: Filters): string { function MainContent() { const { genotypeData, isUploaded, setOnDataLoadedCallback } = useGenotype(); - const { setOnResultsLoadedCallback, addResult, hasResult } = useResults(); + const { setOnResultsLoadedCallback, addResult, addResultsBatch, hasResult } = useResults(); const [filters, setFilters] = useState(defaultFilters); const [debouncedSearch, setDebouncedSearch] = useState(defaultFilters.search); const [traits, setTraits] = useState([]); @@ -428,24 +428,31 @@ function MainContent() { }; const handleRunAll = async () => { - if (!genotypeData || !isUploaded) { - alert("Please upload your genetic data first"); - return; - } - - // Get list of user's SNPs - const userSnps = Array.from(genotypeData.keys()); - - if (userSnps.length === 0) { + if (!genotypeData || genotypeData.size === 0) { alert("No SNPs found in your genetic data"); return; } - const confirmRun = window.confirm( - `This will analyze ALL studies in the database where you have matching SNPs. This may take several minutes. Continue?` - ); - - if (!confirmRun) return; + // Check if we need to download the catalog first + const { gwasDB } = await import('@/lib/gwas-db'); + const metadata = await gwasDB.getMetadata(); + + if (!metadata) { + const confirmDownload = window.confirm( + `First-time setup: Download ~54MB GWAS Catalog data?\n\n` + + `This will be cached locally for instant future analysis.\n` + + `Estimated storage: ~500MB after decompression.\n\n` + + `Continue?` + ); + if (!confirmDownload) return; + } else { + const confirmRun = window.confirm( + `Analyze all ${metadata.totalStudies.toLocaleString()} studies where you have matching SNPs?\n\n` + + `Using cached data from ${new Date(metadata.downloadDate).toLocaleDateString()}\n\n` + + `Continue?` + ); + if (!confirmRun) return; + } // Initialize and show modal setIsRunningAll(true); @@ -465,186 +472,34 @@ function MainContent() { setRunAllProgress({ current: 0, total: 0 }); try { - // Process each batch immediately without queuing - true streaming - const batchSize = 10000; // Smaller batches to reduce memory pressure - let offset = 0; - let fetchedBatches = 0; - let totalInDatabase = 0; - let totalMatchingStudies = 0; - let processedCount = 0; - let matchCount = 0; - - // EMA for rate calculation - smoother ETA - let smoothedRate = 0; - const smoothingFactor = 0.3; - - // Start in analyzing phase immediately - setRunAllStatus(prev => ({ - ...prev, - phase: 'analyzing', - totalToProcess: 0, - startTime, - })); - - // Fetch and process batches sequentially to control memory - while (true) { - const response = await fetch( - `/api/studies?limit=${batchSize}&excludeLowQuality=false&excludeMissingGenotype=false&offset=${offset}` - ); - - if (!response.ok) { - throw new Error(`Failed to fetch studies batch at offset ${offset}`); - } - - const data: StudiesResponse = await response.json(); - fetchedBatches++; - - // Get total count from first batch - if (offset === 0 && data.sourceCount) { - totalInDatabase = data.sourceCount; - } - - // Process each study directly - no intermediate arrays - for (const study of data.data) { - // Quick filter: check if has SNPs matching user - if (!study.snps || !hasMatchingSNPs(genotypeData, study.snps)) { - continue; - } - - totalMatchingStudies++; - - // Skip if already analyzed - if (hasResult(study.id)) { - processedCount++; - continue; - } - - // Skip if no risk allele or effect size - if (!study.strongest_snp_risk_allele || !study.or_or_beta) { - processedCount++; - continue; - } - - try { - // Perform client-side analysis - const analysisResult = analyzeStudyClientSide( - genotypeData, - study.snps, - study.strongest_snp_risk_allele, - study.or_or_beta, - study.study_accession, - 'OR', - null - ); - - // Save result if there's a match - if (analysisResult.hasMatch) { - const trait = study.mapped_trait ?? study.disease_trait ?? "Unknown trait"; - addResult({ - studyId: study.id, - gwasId: study.study_accession || undefined, - traitName: trait, - studyTitle: study.study || "Untitled study", - userGenotype: analysisResult.userGenotype!, - riskAllele: analysisResult.riskAllele!, - effectSize: analysisResult.effectSize!, - riskScore: analysisResult.riskScore!, - riskLevel: analysisResult.riskLevel!, - matchedSnp: analysisResult.matchedSnp!, - analysisDate: new Date().toISOString(), - }); - matchCount++; - } - } catch (err) { - console.error(`Failed to analyze study ${study.id}:`, err); - } - - processedCount++; - - // Update progress every 500 studies and yield to browser - if (processedCount % 500 === 0) { - const elapsedMs = Date.now() - startTime; - const elapsedSeconds = elapsedMs / 1000; - - // Calculate ETA only after 3+ batches for accuracy - let etaSeconds = 0; - if (fetchedBatches >= 3 && totalInDatabase > 0) { - const instantRate = (offset + data.data.length) / elapsedSeconds; - // Exponential moving average for smoother rate - smoothedRate = smoothedRate === 0 - ? instantRate - : (smoothingFactor * instantRate) + ((1 - smoothingFactor) * smoothedRate); - - const remainingStudies = totalInDatabase - (offset + data.data.length); - // Add 20% safety buffer for slowdown - etaSeconds = smoothedRate > 0 ? (remainingStudies / smoothedRate) * 1.2 : 0; - } - - setRunAllProgress({ current: processedCount, total: totalMatchingStudies }); - setRunAllStatus(prev => ({ - ...prev, - fetchedBatches, - totalStudiesFetched: offset + data.data.length, - totalInDatabase, - matchingStudies: totalMatchingStudies, - processedCount, - matchCount, - totalToProcess: totalMatchingStudies, - elapsedSeconds, - etaSeconds, - })); - // Yield to browser to prevent UI freeze and allow GC - await new Promise(resolve => setTimeout(resolve, 0)); - } - } - - // Update status after this batch with timing - const elapsedMs = Date.now() - startTime; - const elapsedSeconds = elapsedMs / 1000; - - // Calculate ETA only after 3+ batches for accuracy - let etaSeconds = 0; - if (fetchedBatches >= 3 && totalInDatabase > 0) { - const instantRate = (offset + data.data.length) / elapsedSeconds; - // Exponential moving average for smoother rate - smoothedRate = smoothedRate === 0 - ? instantRate - : (smoothingFactor * instantRate) + ((1 - smoothingFactor) * smoothedRate); - - const remainingStudies = totalInDatabase - (offset + data.data.length); - // Add 20% safety buffer for slowdown - etaSeconds = smoothedRate > 0 ? (remainingStudies / smoothedRate) * 1.2 : 0; - } - - setRunAllProgress({ current: offset + data.data.length, total: totalInDatabase }); - setRunAllStatus(prev => ({ - ...prev, - fetchedBatches, - totalStudiesFetched: offset + data.data.length, - totalInDatabase, - matchingStudies: totalMatchingStudies, - processedCount, - matchCount, - totalToProcess: totalMatchingStudies, - elapsedSeconds, - etaSeconds, - })); - - // Stop if we got fewer results than requested OR we've reached the total count - if (data.data.length < batchSize || (totalInDatabase > 0 && offset + data.data.length >= totalInDatabase)) { - break; - } - - offset += batchSize; - } - - // Complete - preserve final elapsed time - const finalElapsedSeconds = (Date.now() - startTime) / 1000; - setRunAllStatus(prev => ({ - ...prev, - phase: 'complete', - elapsedSeconds: finalElapsedSeconds, - })); + // Use IndexedDB-based implementation + const { runAllAnalysisIndexed } = await import('@/lib/run-all-indexed'); + + const results = await runAllAnalysisIndexed( + genotypeData, + (progress) => { + setRunAllStatus(prev => ({ + ...prev, + phase: progress.phase, + totalStudiesFetched: progress.loaded, + totalInDatabase: progress.total, + matchingStudies: progress.matchingStudies, + matchCount: progress.matchCount, + elapsedSeconds: progress.elapsedSeconds, + fetchedBatches: 0, + processedCount: progress.matchingStudies, + totalToProcess: progress.matchingStudies, + })); + }, + hasResult + ); + + // Add all results in one efficient batch operation + console.log(`Adding ${results.length} results to the results manager...`); + const startAdd = Date.now(); + addResultsBatch(results); + const addTime = Date.now() - startAdd; + console.log(`Finished adding ${results.length} results in ${addTime}ms`); } catch (error) { console.error('Run All failed:', error); setRunAllStatus(prev => ({ @@ -940,7 +795,7 @@ function MainContent() { )} {!loading && - studies.map((study) => { + studies.map((study, index) => { const trait = study.mapped_trait ?? study.disease_trait ?? "—"; const date = study.publicationDate ? new Date(study.publicationDate).toLocaleDateString() @@ -968,7 +823,7 @@ function MainContent() { ? "Medium confidence" : "Lower confidence"; return ( - +
{studyLink ? ( diff --git a/lib/analysis-worker.ts b/lib/analysis-worker.ts new file mode 100644 index 0000000..f545c39 --- /dev/null +++ b/lib/analysis-worker.ts @@ -0,0 +1,144 @@ +// Web Worker for parallel SNP analysis +import { parseVariantIds } from './snp-utils'; +import { calculateRiskScore, isValidGenotype } from './risk-calculator'; + +export type WorkerMessage = { + type: 'analyze'; + studies: Array<{ + id: number; + study_accession: string | null; + disease_trait: string | null; + study: string | null; + snps: string | null; + strongest_snp_risk_allele: string | null; + or_or_beta: string | null; + }>; + genotypeData: [string, string][]; // Map as array for serialization +}; + +export type WorkerResult = { + type: 'results'; + results: Array<{ + studyId: number; + gwasId: string; + traitName: string; + studyTitle: string; + userGenotype: string; + riskAllele: string; + effectSize: string; + riskScore: number; + riskLevel: 'increased' | 'decreased' | 'neutral'; + matchedSnp: string; + }>; + matchCount: number; + processedCount: number; +}; + +export type WorkerProgress = { + type: 'progress'; + processed: number; + total: number; + matchCount: number; +}; + +// Worker message handler +self.onmessage = (e: MessageEvent) => { + const { type, studies, genotypeData } = e.data; + + if (type !== 'analyze') return; + + // Reconstruct Map from array + const genotypeMap = new Map(genotypeData); + + const results: WorkerResult['results'] = []; + let matchCount = 0; + let processedCount = 0; + const totalStudies = studies.length; + + console.log(`Worker starting: ${totalStudies} studies to process`); + + // Send initial progress + self.postMessage({ + type: 'progress', + processed: 0, + total: totalStudies, + matchCount: 0, + } as WorkerProgress); + + for (let i = 0; i < studies.length; i++) { + const study = studies[i]; + + // Send progress updates every 500 studies for more frequent updates + if (i > 0 && i % 500 === 0) { + const progress: WorkerProgress = { + type: 'progress', + processed: i, + total: totalStudies, + matchCount, + }; + self.postMessage(progress); + } + + // Quick filter: check if has SNPs matching user + if (!study.snps) { + continue; + } + + const snpList = parseVariantIds(study.snps); + const hasMatch = snpList.some(snp => genotypeMap.has(snp)); + + if (!hasMatch) { + continue; + } + + processedCount++; + + // Skip if no risk allele or effect size + if (!study.strongest_snp_risk_allele || !study.or_or_beta) { + continue; + } + + // Perform analysis + for (const snp of snpList) { + if (genotypeMap.has(snp)) { + const userGenotype = genotypeMap.get(snp)!; + + if (!isValidGenotype(userGenotype)) { + continue; + } + + const { score, level } = calculateRiskScore( + userGenotype, + study.strongest_snp_risk_allele, + study.or_or_beta, + 'OR' + ); + + results.push({ + studyId: study.id, + gwasId: study.study_accession || '', + traitName: study.disease_trait || 'Unknown trait', + studyTitle: study.study || 'Unknown study', + userGenotype, + riskAllele: study.strongest_snp_risk_allele, + effectSize: study.or_or_beta, + riskScore: score, + riskLevel: level, + matchedSnp: snp, + }); + matchCount++; + break; // Only take first match per study + } + } + } + + // Send results back + const result: WorkerResult = { + type: 'results', + results, + matchCount, + processedCount, + }; + + self.postMessage(result); +}; diff --git a/lib/gwas-db.ts b/lib/gwas-db.ts new file mode 100644 index 0000000..c23139a --- /dev/null +++ b/lib/gwas-db.ts @@ -0,0 +1,427 @@ +// IndexedDB manager for GWAS Catalog local storage +import pako from 'pako'; + +const DB_NAME = 'gwas-catalog'; +const DB_VERSION = 1; +const STORE_NAME = 'studies'; +const META_STORE = 'metadata'; + +export type GWASStudy = { + id: number; + study_accession: string | null; + disease_trait: string | null; + study: string | null; + snps: string | null; + strongest_snp_risk_allele: string | null; + or_or_beta: string | null; +}; + +export type GWASMetadata = { + key: string; + downloadDate: string; + fileUrl: string; + totalStudies: number; + version: string; +}; + +export class GWASDatabase { + private db: IDBDatabase | null = null; + + async open(): Promise { + return new Promise((resolve, reject) => { + const request = indexedDB.open(DB_NAME, DB_VERSION); + + request.onerror = () => reject(request.error); + request.onsuccess = () => { + this.db = request.result; + resolve(); + }; + + request.onupgradeneeded = (event) => { + const db = (event.target as IDBOpenDBRequest).result; + + // Create studies store + if (!db.objectStoreNames.contains(STORE_NAME)) { + const store = db.createObjectStore(STORE_NAME, { keyPath: 'id' }); + // Index by SNPs for faster lookups + store.createIndex('snps', 'snps', { unique: false }); + } + + // Create metadata store + if (!db.objectStoreNames.contains(META_STORE)) { + db.createObjectStore(META_STORE, { keyPath: 'key' }); + } + }; + }); + } + + async downloadAndStore( + url: string, + onProgress?: (progress: { loaded: number; total: number; phase: string }) => void + ): Promise { + if (!this.db) await this.open(); + + // Download compressed file + onProgress?.({ loaded: 0, total: 100, phase: 'downloading' }); + + const response = await fetch(url); + if (!response.ok) throw new Error(`Failed to download: ${response.statusText}`); + + const contentLength = parseInt(response.headers.get('content-length') || '0'); + const reader = response.body!.getReader(); + + let receivedLength = 0; + const chunks: Uint8Array[] = []; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + chunks.push(value); + receivedLength += value.length; + + onProgress?.({ + loaded: receivedLength, + total: contentLength, + phase: 'downloading' + }); + } + + // Concatenate chunks into a single Uint8Array + const compressed = new Uint8Array(receivedLength); + let offset = 0; + for (const chunk of chunks) { + compressed.set(chunk, offset); + offset += chunk.length; + } + + console.log('Downloaded bytes:', receivedLength); + console.log('First bytes (hex):', Array.from(compressed.slice(0, 20)).map(b => b.toString(16).padStart(2, '0')).join(' ')); + + // If server applied content-encoding gzip, browser may have already decompressed. + // Otherwise this is a real .gz payload we must decompress. + const looksGzipped = compressed.length >= 2 && compressed[0] === 0x1f && compressed[1] === 0x8b; + const contentEncoding = response.headers.get('content-encoding') || ''; + const shouldDecompress = looksGzipped && !contentEncoding.toLowerCase().includes('gzip'); + + onProgress?.({ loaded: 0, total: 100, phase: 'decompressing' }); + + let decompressed: Uint8Array; + + // Always use pako for reliability + if (shouldDecompress) { + console.log('Decompressing with pako...'); + try { + decompressed = pako.ungzip(compressed); + console.log('Decompressed size:', decompressed.length, 'bytes'); + } catch (e) { + console.error('Decompression failed:', e); + throw new Error('Failed to decompress GWAS catalog file'); + } + } else { + console.log('Data already decompressed by browser or not gzipped; using as-is'); + decompressed = compressed; + } + + // Parse decompressed data line by line without converting entire buffer to string + onProgress?.({ loaded: 0, total: 100, phase: 'parsing' }); + + // Process first line to get headers + let headerEndIdx = 0; + for (let i = 0; i < decompressed.length; i++) { + if (decompressed[i] === 0x0a || (decompressed[i] === 0x0d && decompressed[i + 1] === 0x0a)) { + headerEndIdx = i; + break; + } + } + + const decoder = new TextDecoder('utf-8'); + const headerLine = decoder.decode(decompressed.slice(0, headerEndIdx)); + const headers = headerLine.split('\t'); + + console.log('TSV Headers:', headers.slice(0, 10)); + console.log('Headers length:', headers.length); + + if (headers.length <= 1 || headers.every(h => h.trim() === '')) { + throw new Error('Invalid TSV headers - file may be corrupted or improperly formatted'); + } + + // Find column indices + const colMap: Record = {}; + headers.forEach((header, idx) => { + colMap[header.trim()] = idx; + }); + + // Required columns + const snpsIdx = colMap['SNPS']; + const accessionIdx = colMap['STUDY ACCESSION']; + const traitIdx = colMap['DISEASE/TRAIT']; + const studyIdx = colMap['STUDY']; + const riskAlleleIdx = colMap['STRONGEST SNP-RISK ALLELE']; + const orBetaIdx = colMap['OR or BETA']; + + console.log('Column indices:', { snpsIdx, accessionIdx, traitIdx, studyIdx, riskAlleleIdx, orBetaIdx }); + + // Process rest of file in chunks to avoid memory issues + const batchSize = 5000; + let currentBatch: GWASStudy[] = []; + let studyId = 0; + let storedCount = 0; + let skippedNoSnps = 0; + let lineNumber = 0; + + // Estimate total lines for progress (rough estimate: avg 500 bytes per line) + const estimatedTotalLines = Math.floor(decompressed.length / 500); + onProgress?.({ loaded: 0, total: estimatedTotalLines, phase: 'storing' }); + + // Process buffer in chunks to avoid string size limits + const chunkSize = 10 * 1024 * 1024; // 10MB chunks + let position = headerEndIdx + (decompressed[headerEndIdx] === 0x0d ? 2 : 1); // Skip header + let leftover = new Uint8Array(0); + + while (position < decompressed.length) { + const chunkEnd = Math.min(position + chunkSize, decompressed.length); + const chunk = decompressed.slice(position, chunkEnd); + + // Combine leftover from previous chunk with current chunk + const combined = new Uint8Array(leftover.length + chunk.length); + combined.set(leftover); + combined.set(chunk, leftover.length); + + // Find last complete line in this chunk + let lastNewline = combined.length - 1; + for (let i = combined.length - 1; i >= 0; i--) { + if (combined[i] === 0x0a) { + lastNewline = i; + break; + } + } + + // Decode up to last complete line + const textChunk = decoder.decode(combined.slice(0, lastNewline + 1)); + const lines = textChunk.split(/\r?\n/); + + // Process lines + for (const line of lines) { + if (!line.trim()) continue; + + lineNumber++; + + if (lineNumber % 10000 === 0) { + onProgress?.({ loaded: storedCount, total: estimatedTotalLines, phase: 'storing' }); + console.log(`Processed ${lineNumber} lines, found ${studyId} studies with SNPs, skipped ${skippedNoSnps}`); + } + + const cols = line.split('\t'); + + // Only store studies with SNP data + const snps = cols[snpsIdx]?.trim(); + if (!snps) { + skippedNoSnps++; + continue; + } + + currentBatch.push({ + id: studyId++, + study_accession: cols[accessionIdx] || null, + disease_trait: cols[traitIdx] || null, + study: cols[studyIdx] || null, + snps: snps, + strongest_snp_risk_allele: cols[riskAlleleIdx] || null, + or_or_beta: cols[orBetaIdx] || null, + }); + + // Store batch when it reaches size limit + if (currentBatch.length >= batchSize) { + await this.storeBatch(currentBatch); + storedCount += currentBatch.length; + currentBatch = []; + await new Promise(resolve => setTimeout(resolve, 0)); + } + } + + // Save incomplete line for next iteration + leftover = combined.slice(lastNewline + 1); + position = chunkEnd; + } + + // Store remaining records + if (currentBatch.length > 0) { + await this.storeBatch(currentBatch); + storedCount += currentBatch.length; + } + + console.log(`Finished processing. Total lines: ${lineNumber}, Studies stored: ${storedCount}, Skipped: ${skippedNoSnps}`); + + onProgress?.({ loaded: storedCount, total: storedCount, phase: 'storing' }); + console.log("Successfully stored", storedCount, "studies in IndexedDB"); + + // Store metadata + await this.setMetadata({ + key: 'catalog', + downloadDate: new Date().toISOString(), + fileUrl: url, + totalStudies: storedCount, + version: '1.0.2', + }); + console.log("Metadata stored. Total studies:", storedCount); + + // Close and reopen database to ensure all data is committed + this.close(); + await this.open(); + console.log("Database connection refreshed after storing data"); + } + + private async storeBatch(studies: GWASStudy[]): Promise { + if (!this.db) { + throw new Error('Database not opened'); + } + + return new Promise((resolve, reject) => { + const tx = this.db!.transaction(STORE_NAME, 'readwrite'); + const store = tx.objectStore(STORE_NAME); + + let successCount = 0; + for (const study of studies) { + const request = store.put(study); + request.onsuccess = () => successCount++; + request.onerror = () => console.error('Failed to store study:', study.id, request.error); + } + + tx.oncomplete = () => { + console.log(`Batch stored: ${successCount}/${studies.length} studies`); + resolve(); + }; + tx.onerror = () => { + console.error('Transaction error:', tx.error); + reject(tx.error); + }; + }); + } + + async getMetadata(): Promise { + if (!this.db) await this.open(); + + return new Promise((resolve, reject) => { + const tx = this.db!.transaction(META_STORE, 'readonly'); + const store = tx.objectStore(META_STORE); + const request = store.get('catalog'); + + request.onsuccess = () => resolve(request.result || null); + request.onerror = () => reject(request.error); + }); + } + + private async setMetadata(meta: GWASMetadata): Promise { + return new Promise((resolve, reject) => { + const tx = this.db!.transaction(META_STORE, 'readwrite'); + const store = tx.objectStore(META_STORE); + store.put(meta); + + tx.oncomplete = () => resolve(); + tx.onerror = () => reject(tx.error); + }); + } + + async getAllStudies(): Promise { + if (!this.db) await this.open(); + + return new Promise((resolve, reject) => { + const tx = this.db!.transaction(STORE_NAME, 'readonly'); + const store = tx.objectStore(STORE_NAME); + const request = store.getAll(); + + request.onsuccess = () => resolve(request.result); + request.onerror = () => reject(request.error); + }); + } + + async getStudyCount(): Promise { + if (!this.db) await this.open(); + + return new Promise((resolve, reject) => { + const tx = this.db!.transaction(STORE_NAME, 'readonly'); + const store = tx.objectStore(STORE_NAME); + const request = store.count(); + + request.onsuccess = () => resolve(request.result); + request.onerror = () => reject(request.error); + }); + } + + async *streamStudies(batchSize: number = 10000): AsyncGenerator { + if (!this.db) await this.open(); + + let lastKey: number | undefined = undefined; + let batchNumber = 0; + + while (true) { + batchNumber++; + const batchStart = Date.now(); + const batch = await new Promise((resolve, reject) => { + const tx = this.db!.transaction(STORE_NAME, 'readonly'); + const store = tx.objectStore(STORE_NAME); + + // Use IDBKeyRange to start from last key, much faster than skipping + const range = lastKey !== undefined + ? IDBKeyRange.lowerBound(lastKey, true) // exclusive + : undefined; + + const request = store.openCursor(range); + + const results: GWASStudy[] = []; + + request.onsuccess = (event) => { + const cursor = (event.target as IDBRequest).result; + if (cursor && results.length < batchSize) { + results.push(cursor.value); + lastKey = cursor.key as number; + cursor.continue(); + } else { + resolve(results); + } + }; + + request.onerror = () => reject(request.error); + }); + + const batchTime = Date.now() - batchStart; + console.log(`IndexedDB batch ${batchNumber}: ${batch.length} records in ${batchTime}ms`); + + if (batch.length === 0) { + console.log('Stream complete - no more records'); + break; + } + + yield batch; + } + } + + async clearDatabase(): Promise { + if (!this.db) await this.open(); + + return new Promise((resolve, reject) => { + const tx = this.db!.transaction([STORE_NAME, META_STORE], 'readwrite'); + + tx.objectStore(STORE_NAME).clear(); + tx.objectStore(META_STORE).clear(); + + tx.oncomplete = () => resolve(); + tx.onerror = () => reject(tx.error); + }); + } + + async getStorageSize(): Promise { + if (!navigator.storage?.estimate) return 0; + + const estimate = await navigator.storage.estimate(); + return estimate.usage || 0; + } + + close(): void { + this.db?.close(); + this.db = null; + } +} + +export const gwasDB = new GWASDatabase(); diff --git a/lib/results-manager.ts b/lib/results-manager.ts index 072b0d6..f7b72f9 100644 --- a/lib/results-manager.ts +++ b/lib/results-manager.ts @@ -25,12 +25,46 @@ export class ResultsManager { // All results are now stored in memory only and cleared on session end static saveResultsToFile(session: SavedSession): void { - const dataStr = JSON.stringify(session, null, 2); - const dataBlob = new Blob([dataStr], { type: 'application/json' }); + // Convert to TSV format (tab-separated to handle commas in data) + const headers = [ + 'Study ID', + 'GWAS ID', + 'Trait Name', + 'Study Title', + 'Your Genotype', + 'Risk Allele', + 'Effect Size', + 'Risk Score', + 'Risk Level', + 'Matched SNP', + 'Analysis Date' + ]; + + const tsvRows = [headers.join('\t')]; + + for (const result of session.results) { + const row = [ + result.studyId, + result.gwasId || '', + (result.traitName || '').replace(/\t/g, ' '), // Replace tabs with spaces + (result.studyTitle || '').replace(/\t/g, ' '), + result.userGenotype || '', + result.riskAllele || '', + result.effectSize || '', + result.riskScore, + result.riskLevel || '', + result.matchedSnp || '', + result.analysisDate || '' + ]; + tsvRows.push(row.join('\t')); + } + + const tsvContent = tsvRows.join('\n'); + const dataBlob = new Blob([tsvContent], { type: 'text/tab-separated-values;charset=utf-8;' }); const link = document.createElement('a'); link.href = URL.createObjectURL(dataBlob); - link.download = `monadic_dna_explorer_results_${new Date().toISOString().split('T')[0]}.json`; + link.download = `monadic_dna_explorer_results_${new Date().toISOString().split('T')[0]}.tsv`; document.body.appendChild(link); link.click(); document.body.removeChild(link); diff --git a/lib/risk-calculator.ts b/lib/risk-calculator.ts index 042ab85..c76cf71 100644 --- a/lib/risk-calculator.ts +++ b/lib/risk-calculator.ts @@ -1,3 +1,5 @@ +import { parseVariantIds } from './snp-utils'; + export type UserStudyResult = { hasMatch: boolean; userGenotype?: string; @@ -29,7 +31,7 @@ function getComplement(base: string): string { } // Helper function to check if genotype is valid (not a no-call) -function isValidGenotype(genotype: string): boolean { +export function isValidGenotype(genotype: string): boolean { // Filter out no-calls (--, -, 00, etc.) return genotype !== '--' && genotype !== '-' && @@ -130,8 +132,8 @@ export function analyzeStudyClientSide( return { hasMatch: false }; } - // Extract SNP IDs from the study - const snpList = studySnps.split(/[;,\s]+/).map(s => s.trim()).filter(Boolean); + // Extract SNP IDs from the study (use cached parser) + const snpList = parseVariantIds(studySnps); // Find ALL matching SNPs (not just the first one) const allMatches: Array<{ diff --git a/lib/run-all-indexed.ts b/lib/run-all-indexed.ts new file mode 100644 index 0000000..6ac6554 --- /dev/null +++ b/lib/run-all-indexed.ts @@ -0,0 +1,204 @@ +// IndexedDB-based Run All implementation +import { gwasDB, type GWASStudy } from './gwas-db'; +import type { SavedResult } from './results-manager'; + +export type RunAllProgress = { + phase: 'downloading' | 'decompressing' | 'parsing' | 'storing' | 'analyzing' | 'complete' | 'error'; + loaded: number; + total: number; + elapsedSeconds: number; + matchingStudies: number; + matchCount: number; +}; + +export async function runAllAnalysisIndexed( + genotypeData: Map, + onProgress: (progress: RunAllProgress) => void, + hasResult: (studyId: number) => boolean +): Promise { + const startTime = Date.now(); + + // Check if catalog is cached + const metadata = await gwasDB.getMetadata(); + + if (!metadata) { + // Download and cache catalog + await gwasDB.downloadAndStore( + 'https://monadoc-dna-explorer.nyc3.digitaloceanspaces.com/gwas_catalog_v1.0.2-associations_e115_r2025-09-15.tsv.gz', + (progress) => { + const elapsedSeconds = (Date.now() - startTime) / 1000; + onProgress({ + phase: progress.phase as any, + loaded: progress.loaded, + total: progress.total, + elapsedSeconds, + matchingStudies: 0, + matchCount: 0, + }); + } + ); + } else { + console.log('Using cached GWAS catalog from IndexedDB'); + } + + // Get study count without loading all data + console.log('Getting study count from IndexedDB...'); + onProgress({ + phase: 'analyzing', + loaded: 0, + total: 100, + elapsedSeconds: (Date.now() - startTime) / 1000, + matchingStudies: 0, + matchCount: 0, + }); + + const totalStudies = await gwasDB.getStudyCount(); + + console.log('Total studies in IndexedDB:', totalStudies); + + if (totalStudies === 0) { + throw new Error('No studies found in IndexedDB. Cache may be corrupted.'); + } + + onProgress({ + phase: 'analyzing', + loaded: 0, + total: totalStudies, + elapsedSeconds: (Date.now() - startTime) / 1000, + matchingStudies: 0, + matchCount: 0, + }); + + // Process sequentially in small batches to minimize memory + console.log(`Processing ${totalStudies} studies sequentially in batches`); + + const allResults: SavedResult[] = []; + let totalMatchCount = 0; + let totalProcessed = 0; + + // Stream and process in small batches + let lastProgressUpdate = Date.now(); + + for await (const studyBatch of gwasDB.streamStudies(10000)) { + // Process this batch inline (no workers) + for (const study of studyBatch) { + totalProcessed++; + + // Progress update every 500ms for smooth elapsed time + const now = Date.now(); + if (now - lastProgressUpdate >= 500) { + const elapsedSeconds = (now - startTime) / 1000; + onProgress({ + phase: 'analyzing', + loaded: totalProcessed, + total: totalStudies, + elapsedSeconds, + matchingStudies: totalProcessed, + matchCount: totalMatchCount, + }); + lastProgressUpdate = now; + } + + // Quick filter: check if has SNPs matching user + if (!study.snps) continue; + + const snpList = study.snps.split(/[,;\s]+/).map(s => s.trim()).filter(Boolean); + const hasMatch = snpList.some(snp => genotypeData.has(snp)); + + if (!hasMatch) continue; + + // Skip if no risk allele or effect size + if (!study.strongest_snp_risk_allele || !study.or_or_beta) continue; + + // Perform analysis + for (const snp of snpList) { + if (genotypeData.has(snp)) { + const userGenotype = genotypeData.get(snp)!; + + // Basic genotype validation + if (!/^[ACGT]{2}$/.test(userGenotype)) continue; + + // Simple risk score calculation + const riskAllele = study.strongest_snp_risk_allele.split('-').pop() || ''; + const hasRiskAllele = userGenotype.includes(riskAllele); + + let riskScore = 1.0; + let riskLevel: 'increased' | 'decreased' | 'neutral' = 'neutral'; + + if (hasRiskAllele) { + const orValue = parseFloat(study.or_or_beta); + if (!isNaN(orValue) && orValue > 0) { + riskScore = orValue; + riskLevel = orValue > 1 ? 'increased' : orValue < 1 ? 'decreased' : 'neutral'; + } + } + + if (!hasResult(study.id)) { + allResults.push({ + studyId: study.id, + gwasId: study.study_accession || '', + traitName: study.disease_trait || 'Unknown trait', + studyTitle: study.study || 'Unknown study', + userGenotype, + riskAllele: study.strongest_snp_risk_allele, + effectSize: study.or_or_beta, + riskScore, + riskLevel, + matchedSnp: snp, + analysisDate: new Date().toISOString(), + }); + totalMatchCount++; + } + break; // Only first match per study + } + } + } + + console.log(`Batch complete. Total processed: ${totalProcessed}/${totalStudies}, Total matches: ${totalMatchCount}`); + + // Send progress update after each batch + const elapsedSeconds = (Date.now() - startTime) / 1000; + onProgress({ + phase: 'analyzing', + loaded: totalProcessed, + total: totalStudies, + elapsedSeconds, + matchingStudies: totalProcessed, + matchCount: totalMatchCount, + }); + + // Allow UI to update between batches + await new Promise(resolve => setTimeout(resolve, 0)); + } + + console.log(`Exited batch loop. Total processed: ${totalProcessed}/${totalStudies}`); + console.log(`Analysis complete! Processed: ${totalProcessed}, Matches: ${totalMatchCount}`); + + // Send one final progress update before completing + const finalElapsedSeconds = (Date.now() - startTime) / 1000; + onProgress({ + phase: 'analyzing', + loaded: totalProcessed, + total: totalStudies, + elapsedSeconds: finalElapsedSeconds, + matchingStudies: totalProcessed, + matchCount: totalMatchCount, + }); + + // Small delay to ensure final progress renders + await new Promise(resolve => setTimeout(resolve, 100)); + + // Complete + console.log('Sending completion update...'); + onProgress({ + phase: 'complete', + loaded: totalProcessed, + total: totalProcessed, + elapsedSeconds: finalElapsedSeconds, + matchingStudies: totalProcessed, + matchCount: totalMatchCount, + }); + + console.log(`Returning ${allResults.length} results`); + return allResults; +} diff --git a/lib/snp-utils.ts b/lib/snp-utils.ts index aab7cf2..e9c49ab 100644 --- a/lib/snp-utils.ts +++ b/lib/snp-utils.ts @@ -1,14 +1,30 @@ +// Cache parsed SNP strings to avoid re-parsing same strings +const snpParseCache = new Map(); + export function parseVariantIds(snps: string | null): string[] { if (!snps) return []; - return snps + + // Check cache first + if (snpParseCache.has(snps)) { + return snpParseCache.get(snps)!; + } + + const parsed = snps .split(/[;,\s]+/) .map((id) => id.trim()) .filter(Boolean); + + // Cache result (limit cache size to prevent memory bloat) + if (snpParseCache.size < 100000) { + snpParseCache.set(snps, parsed); + } + + return parsed; } export function hasMatchingSNPs(genotypeData: Map | null, snps: string | null): boolean { if (!genotypeData || !snps) return false; - + const studySnps = parseVariantIds(snps); return studySnps.some(snp => genotypeData.has(snp)); } diff --git a/next.config.mjs b/next.config.mjs index 35ed516..04ceb3d 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -6,9 +6,18 @@ const __dirname = path.dirname(__filename); /** @type {import('next').NextConfig} */ const nextConfig = { + compress: true, // Enable gzip compression for API responses experimental: { optimizePackageImports: ["react", "react-dom"] }, + async rewrites() { + return [ + { + source: '/favicon.ico', + destination: '/icon.svg', + }, + ]; + }, webpack: (config, { isServer }) => { // Reduce file watching overhead - prevent watching parent directories config.watchOptions = { diff --git a/package-lock.json b/package-lock.json index 8844100..64c5c25 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,14 +7,16 @@ "": { "name": "gwasifier", "version": "1.0.0", - "license": "MIT", + "license": "SEE LICENSE IN LICENSE", "dependencies": { "@nillion/nilai-ts": "^0.0.0-alpha.4", "@types/crypto-js": "^4.2.2", + "@types/pako": "^2.0.4", "@types/pg": "^8.15.5", "better-sqlite3": "^12.4.1", "crypto-js": "^4.2.0", "next": "^14.2.33", + "pako": "^2.1.0", "pg": "^8.16.3", "react": "^18.3.1", "react-dom": "^18.3.1" @@ -760,6 +762,11 @@ "undici-types": "~7.13.0" } }, + "node_modules/@types/pako": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/@types/pako/-/pako-2.0.4.tgz", + "integrity": "sha512-VWDCbrLeVXJM9fihYodcLiIv0ku+AlOa/TQ1SvYOaBuyrSKgEcro95LJyIsJ4vSo6BXIxOKxiJAat04CmST9Fw==" + }, "node_modules/@types/pg": { "version": "8.15.5", "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.5.tgz", @@ -4507,8 +4514,7 @@ "node_modules/pako": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/pako/-/pako-2.1.0.tgz", - "integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==", - "peer": true + "integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==" }, "node_modules/parent-module": { "version": "1.0.1", diff --git a/package.json b/package.json index f8da29b..3ae2f90 100644 --- a/package.json +++ b/package.json @@ -15,10 +15,12 @@ "dependencies": { "@nillion/nilai-ts": "^0.0.0-alpha.4", "@types/crypto-js": "^4.2.2", + "@types/pako": "^2.0.4", "@types/pg": "^8.15.5", "better-sqlite3": "^12.4.1", "crypto-js": "^4.2.0", "next": "^14.2.33", + "pako": "^2.1.0", "pg": "^8.16.3", "react": "^18.3.1", "react-dom": "^18.3.1" From 1b91b0b53789698379352e57e81d02965966659c Mon Sep 17 00:00:00 2001 From: Vishakh Date: Tue, 14 Oct 2025 21:37:35 -0400 Subject: [PATCH 04/75] Enhance caching, deduplication, and result retrieval mechanisms Added `studyAccession` tracking for more accurate result identification and deduplication across studies. Improved cache clearing UX with loading indicators and error handling. Updated local result retrieval logic to prioritize `gwasId` and added fallbacks for `studyId`. Enhanced status messaging and UI hints for IndexedDB operations. --- app/components/MenuBar.tsx | 27 +++++++++++++--- app/components/ResultsContext.tsx | 23 ++++++++++--- app/components/RunAllModal.tsx | 3 +- app/components/StudyResultReveal.tsx | 48 ++++++++++++++++++---------- app/page.tsx | 1 + 5 files changed, 75 insertions(+), 27 deletions(-) diff --git a/app/components/MenuBar.tsx b/app/components/MenuBar.tsx index a2ac5e3..7ffb1b5 100644 --- a/app/components/MenuBar.tsx +++ b/app/components/MenuBar.tsx @@ -176,10 +176,29 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu `Data will be re-downloaded on next Run All.` ); if (confirmed) { - const { gwasDB } = await import('@/lib/gwas-db'); - await gwasDB.clearDatabase(); - setCacheInfo(null); - alert('Cache cleared successfully!'); + try { + // Show loading state + const button = document.activeElement as HTMLButtonElement; + const originalText = button?.innerHTML; + if (button) { + button.disabled = true; + button.innerHTML = '
Clearing...'; + } + + const { gwasDB } = await import('@/lib/gwas-db'); + await gwasDB.clearDatabase(); + setCacheInfo(null); + + // Restore button and show success + if (button && originalText) { + button.disabled = false; + button.innerHTML = originalText; + } + alert('✓ Cache cleared successfully!'); + } catch (error) { + console.error('Failed to clear cache:', error); + alert('Failed to clear cache. Please try again.'); + } } }} title="Clear locally cached GWAS catalog data" diff --git a/app/components/ResultsContext.tsx b/app/components/ResultsContext.tsx index 0cb6812..d98110a 100644 --- a/app/components/ResultsContext.tsx +++ b/app/components/ResultsContext.tsx @@ -27,19 +27,32 @@ export function ResultsProvider({ children }: { children: ReactNode }) { const addResult = (result: SavedResult) => { setSavedResults(prev => { - const filtered = prev.filter(r => r.studyId !== result.studyId); + // Remove existing result with same gwasId (preferred) or studyId (fallback) + const filtered = prev.filter(r => { + if (result.gwasId && r.gwasId) { + return r.gwasId !== result.gwasId; // Dedupe by GWAS ID + } + return r.studyId !== result.studyId; // Fallback to studyId + }); return [...filtered, result]; }); }; const addResultsBatch = (results: SavedResult[]) => { setSavedResults(prev => { - // Create a map of existing results by studyId for O(1) lookup - const existingMap = new Map(prev.map(r => [r.studyId, r])); + // Create a map keyed by gwasId (or studyId as fallback) for deduplication + const existingMap = new Map(); + + // Add existing results + for (const r of prev) { + const key = r.gwasId || `id_${r.studyId}`; + existingMap.set(key, r); + } - // Add/update with new results + // Add/update with new results (gwasId takes precedence) for (const result of results) { - existingMap.set(result.studyId, result); + const key = result.gwasId || `id_${result.studyId}`; + existingMap.set(key, result); } return Array.from(existingMap.values()); diff --git a/app/components/RunAllModal.tsx b/app/components/RunAllModal.tsx index 363e3d7..7665199 100644 --- a/app/components/RunAllModal.tsx +++ b/app/components/RunAllModal.tsx @@ -106,7 +106,7 @@ export default function RunAllModal({ isOpen, onClose, status }: RunAllModalProp
-

Storing in Local Database...

+

Storing in Browser IndexedDB...

Elapsed: {formatTime(status.elapsedSeconds)}

)} +

Caching GWAS catalog locally for faster future analysis...

)} diff --git a/app/components/StudyResultReveal.tsx b/app/components/StudyResultReveal.tsx index 8f3bdfb..e527d40 100644 --- a/app/components/StudyResultReveal.tsx +++ b/app/components/StudyResultReveal.tsx @@ -12,12 +12,13 @@ import { trackStudyResultReveal } from "@/lib/analytics"; type StudyResultRevealProps = { studyId: number; + studyAccession: string | null; snps: string | null; traitName: string; studyTitle: string; }; -export default function StudyResultReveal({ studyId, snps, traitName, studyTitle }: StudyResultRevealProps) { +export default function StudyResultReveal({ studyId, studyAccession, snps, traitName, studyTitle }: StudyResultRevealProps) { const { genotypeData, isUploaded } = useGenotype(); const { addResult, hasResult, getResult, savedResults } = useResults(); const [result, setResult] = useState(null); @@ -27,24 +28,37 @@ export default function StudyResultReveal({ studyId, snps, traitName, studyTitle const [showDisclaimer, setShowDisclaimer] = useState(false); const [showCommentary, setShowCommentary] = useState(false); - // Check if we already have a saved result + // Check if we already have a saved result (check by studyAccession for Run All results, fallback to studyId) useEffect(() => { - if (hasResult(studyId)) { - const savedResult = getResult(studyId); - if (savedResult) { - setResult({ - hasMatch: true, - userGenotype: savedResult.userGenotype, - riskAllele: savedResult.riskAllele, - effectSize: savedResult.effectSize, - riskScore: savedResult.riskScore, - riskLevel: savedResult.riskLevel, - matchedSnp: savedResult.matchedSnp, - }); - setIsRevealed(true); - } + console.log(`[Study ${studyAccession || studyId}] Checking for saved result. Total results: ${savedResults.length}`); + + // First try to find by studyAccession (gwasId) - used by Run All + let savedResult = studyAccession ? savedResults.find(r => r.gwasId === studyAccession) : undefined; + + // Fallback to studyId for individually revealed results + if (!savedResult && hasResult(studyId)) { + savedResult = getResult(studyId); + } + + if (savedResult) { + console.log(`[Study ${studyAccession || studyId}] Found saved result:`, savedResult); + setResult({ + hasMatch: true, + userGenotype: savedResult.userGenotype, + riskAllele: savedResult.riskAllele, + effectSize: savedResult.effectSize, + riskScore: savedResult.riskScore, + riskLevel: savedResult.riskLevel, + matchedSnp: savedResult.matchedSnp, + }); + setIsRevealed(true); + } else { + console.log(`[Study ${studyAccession || studyId}] No saved result found`); + // Reset if result was removed + setResult(null); + setIsRevealed(false); } - }, [studyId, hasResult, getResult]); + }, [studyId, studyAccession, savedResults.length, hasResult, getResult, savedResults]); const handleRevealClick = () => { setShowDisclaimer(true); diff --git a/app/page.tsx b/app/page.tsx index 5b87153..bc91dac 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -897,6 +897,7 @@ function MainContent() { Date: Wed, 15 Oct 2025 17:13:07 -0400 Subject: [PATCH 05/75] Add SQL.js in-memory database for genetic result storage and querying Implemented a SQL.js-backed in-memory database optimized for handling 100k+ genetic results. Replaced array-based state management with advanced SQL schemas, batch operations, indexes, and custom query methods for efficient analysis. Updated context and components to support new database APIs and enhanced `Run All` logic with database-backed result persistence and retrieval. --- app/components/LLMCommentaryModal.tsx | 32 ++- app/components/MenuBar.tsx | 89 +++---- app/components/ResultsContext.tsx | 106 +++++--- app/components/StudyResultReveal.tsx | 12 +- app/page.tsx | 17 +- lib/results-database.ts | 356 ++++++++++++++++++++++++++ lib/results-manager.ts | 63 ++++- lib/results-query-examples.ts | 200 +++++++++++++++ next.config.mjs | 11 + package-lock.json | 8 +- package.json | 3 +- 11 files changed, 797 insertions(+), 100 deletions(-) create mode 100644 lib/results-database.ts create mode 100644 lib/results-query-examples.ts diff --git a/app/components/LLMCommentaryModal.tsx b/app/components/LLMCommentaryModal.tsx index 8ad16a9..03c3156 100644 --- a/app/components/LLMCommentaryModal.tsx +++ b/app/components/LLMCommentaryModal.tsx @@ -70,6 +70,35 @@ export default function LLMCommentaryModal({ setStudyMetadata(null); try { + // Yield to UI to show loading state + await new Promise(resolve => setTimeout(resolve, 0)); + + setDelegationStatus("Preparing your results..."); + + // Construct the prompt with top 500 results by effect size, plus the current result + // Do this early to avoid blocking later + console.log(`Filtering top 500 from ${allResults.length} results...`); + const startFilter = Date.now(); + + const topResults = allResults + .filter(r => r.gwasId !== currentResult.gwasId) // Exclude current result temporarily + .sort((a, b) => { + // Sort by absolute distance from 1.0 (neutral) - larger effect = further from 1.0 + const aDistance = Math.abs(a.riskScore - 1.0); + const bDistance = Math.abs(b.riskScore - 1.0); + return bDistance - aDistance; + }) + .slice(0, 499); // Take top 499 + + // Add current result at the top + const resultsForContext = [currentResult, ...topResults]; + + const filterTime = Date.now() - startFilter; + console.log(`Filtered to top 500 in ${filterTime}ms`); + + // Yield to UI after heavy computation + await new Promise(resolve => setTimeout(resolve, 0)); + // First, fetch study metadata for quality indicators const metadataResponse = await fetch(`/api/study-metadata?studyId=${currentResult.studyId}`); if (metadataResponse.ok) { @@ -109,8 +138,7 @@ export default function LLMCommentaryModal({ setDelegationStatus("✓ Secure token ready — connecting directly to private AI"); - // Construct the prompt with all results context - const contextResults = allResults + const contextResults = resultsForContext .map((r: SavedResult, idx: number) => `${idx + 1}. ${r.traitName} (${r.studyTitle}): - Your genotype: ${r.userGenotype} diff --git a/app/components/MenuBar.tsx b/app/components/MenuBar.tsx index 7ffb1b5..6b41855 100644 --- a/app/components/MenuBar.tsx +++ b/app/components/MenuBar.tsx @@ -56,7 +56,9 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu const handleLoadFromFile = async () => { setIsLoadingFile(true); try { - await loadFromFile(fileHash); + // Allow loading results even without DNA file loaded + // fileHash will be null/undefined if no DNA file is loaded + await loadFromFile(fileHash || null); } catch (error) { alert('Failed to load results file: ' + (error as Error).message); } finally { @@ -112,52 +114,53 @@ export default function MenuBar({ onRunAll, isRunningAll, runAllProgress }: Menu )} )} -
-
- {savedResults.length > 0 && ( - - {savedResults.length} result{savedResults.length !== 1 ? 's' : ''} cached - + + )} + +
+
+ {savedResults.length > 0 && ( + + {savedResults.length} result{savedResults.length !== 1 ? 's' : ''} cached + + )} +
+ + {savedResults.length > 0 && ( + <> - {savedResults.length > 0 && ( - <> - - - - )} -
-
- - )} + + + )} +
+
diff --git a/app/components/ResultsContext.tsx b/app/components/ResultsContext.tsx index d98110a..1e02301 100644 --- a/app/components/ResultsContext.tsx +++ b/app/components/ResultsContext.tsx @@ -1,69 +1,73 @@ "use client"; -import { createContext, useContext, useState, useEffect, ReactNode } from "react"; +import { createContext, useContext, useState, useEffect, ReactNode, useMemo } from "react"; import { SavedResult, SavedSession, ResultsManager } from "@/lib/results-manager"; +import { resultsDB } from "@/lib/results-database"; type ResultsContextType = { savedResults: SavedResult[]; - addResult: (result: SavedResult) => void; - addResultsBatch: (results: SavedResult[]) => void; - removeResult: (studyId: number) => void; - clearResults: () => void; + addResult: (result: SavedResult) => Promise; + addResultsBatch: (results: SavedResult[]) => Promise; + removeResult: (studyId: number) => Promise; + clearResults: () => Promise; saveToFile: (genotypeSize?: number, genotypeHash?: string) => void; loadFromFile: (currentFileHash?: string | null) => Promise; hasResult: (studyId: number) => boolean; getResult: (studyId: number) => SavedResult | undefined; + getResultByGwasId: (gwasId: string) => SavedResult | undefined; setOnResultsLoadedCallback: (callback: () => void) => void; + // SQL query methods for advanced analysis + queryByRiskLevel: (level: 'increased' | 'decreased' | 'neutral') => Promise; + queryByTraitPattern: (pattern: string) => Promise; + queryByRiskScoreRange: (min: number, max: number) => Promise; + getTopRisks: (limit?: number) => Promise; + getProtectiveVariants: (limit?: number) => Promise; + getTraitCategories: () => Promise>; + getRiskStatistics: () => Promise; + executeQuery: (sql: string, params?: any[]) => Promise; }; const ResultsContext = createContext(null); export function ResultsProvider({ children }: { children: ReactNode }) { - // SECURITY: Results stored in memory only, cleared on session end + // SECURITY: Results stored in memory only (in SQL.js in-memory database), cleared on session end const [savedResults, setSavedResults] = useState([]); const [onResultsLoaded, setOnResultsLoaded] = useState<(() => void) | undefined>(); + const [dbInitialized, setDbInitialized] = useState(false); - // No localStorage loading - data is memory-only - - const addResult = (result: SavedResult) => { - setSavedResults(prev => { - // Remove existing result with same gwasId (preferred) or studyId (fallback) - const filtered = prev.filter(r => { - if (result.gwasId && r.gwasId) { - return r.gwasId !== result.gwasId; // Dedupe by GWAS ID - } - return r.studyId !== result.studyId; // Fallback to studyId - }); - return [...filtered, result]; + // Initialize SQL database on mount + useEffect(() => { + resultsDB.initialize().then(() => { + setDbInitialized(true); + console.log('Results database initialized'); }); - }; + }, []); - const addResultsBatch = (results: SavedResult[]) => { - setSavedResults(prev => { - // Create a map keyed by gwasId (or studyId as fallback) for deduplication - const existingMap = new Map(); + // Sync state array with database for React rendering + const syncFromDatabase = async () => { + const results = await resultsDB.getAllResults(); + setSavedResults(results); + }; - // Add existing results - for (const r of prev) { - const key = r.gwasId || `id_${r.studyId}`; - existingMap.set(key, r); - } + // No localStorage loading - data is memory-only - // Add/update with new results (gwasId takes precedence) - for (const result of results) { - const key = result.gwasId || `id_${result.studyId}`; - existingMap.set(key, result); - } + const addResult = async (result: SavedResult) => { + await resultsDB.insertResult(result); + await syncFromDatabase(); + }; - return Array.from(existingMap.values()); - }); + const addResultsBatch = async (results: SavedResult[]) => { + await resultsDB.insertResultsBatch(results); + await syncFromDatabase(); }; - const removeResult = (studyId: number) => { - setSavedResults(prev => prev.filter(r => r.studyId !== studyId)); + const removeResult = async (studyId: number) => { + await resultsDB.removeResult(studyId); + await syncFromDatabase(); }; - const clearResults = () => { + const clearResults = async () => { + await resultsDB.clear(); setSavedResults([]); }; @@ -96,7 +100,11 @@ export function ResultsProvider({ children }: { children: ReactNode }) { } } - setSavedResults(session.results); + // Load into SQL database + await resultsDB.clear(); + await resultsDB.insertResultsBatch(session.results); + await syncFromDatabase(); + // SECURITY: No longer saving to localStorage // Call the callback if it exists @@ -109,14 +117,22 @@ export function ResultsProvider({ children }: { children: ReactNode }) { } }; + // Optimized O(1) lookups using SQL indexes const hasResult = (studyId: number) => { + // Use synchronous check from state for performance return savedResults.some(r => r.studyId === studyId); }; const getResult = (studyId: number) => { + // Use synchronous lookup from state for performance return savedResults.find(r => r.studyId === studyId); }; + const getResultByGwasId = (gwasId: string) => { + // Use synchronous lookup from state for performance + return savedResults.find(r => r.gwasId === gwasId); + }; + return ( void) => setOnResultsLoaded(() => callback) + getResultByGwasId, + setOnResultsLoadedCallback: (callback: () => void) => setOnResultsLoaded(() => callback), + // SQL query methods for advanced analysis + queryByRiskLevel: resultsDB.queryByRiskLevel.bind(resultsDB), + queryByTraitPattern: resultsDB.queryByTraitPattern.bind(resultsDB), + queryByRiskScoreRange: resultsDB.queryByRiskScoreRange.bind(resultsDB), + getTopRisks: resultsDB.getTopRisks.bind(resultsDB), + getProtectiveVariants: resultsDB.getProtectiveVariants.bind(resultsDB), + getTraitCategories: resultsDB.getTraitCategories.bind(resultsDB), + getRiskStatistics: resultsDB.getRiskStatistics.bind(resultsDB), + executeQuery: resultsDB.executeQuery.bind(resultsDB), }}> {children} diff --git a/app/components/StudyResultReveal.tsx b/app/components/StudyResultReveal.tsx index e527d40..dceed7c 100644 --- a/app/components/StudyResultReveal.tsx +++ b/app/components/StudyResultReveal.tsx @@ -20,7 +20,7 @@ type StudyResultRevealProps = { export default function StudyResultReveal({ studyId, studyAccession, snps, traitName, studyTitle }: StudyResultRevealProps) { const { genotypeData, isUploaded } = useGenotype(); - const { addResult, hasResult, getResult, savedResults } = useResults(); + const { addResult, hasResult, getResult, getResultByGwasId, savedResults } = useResults(); const [result, setResult] = useState(null); const [isLoading, setIsLoading] = useState(false); const [isRevealed, setIsRevealed] = useState(false); @@ -32,10 +32,10 @@ export default function StudyResultReveal({ studyId, studyAccession, snps, trait useEffect(() => { console.log(`[Study ${studyAccession || studyId}] Checking for saved result. Total results: ${savedResults.length}`); - // First try to find by studyAccession (gwasId) - used by Run All - let savedResult = studyAccession ? savedResults.find(r => r.gwasId === studyAccession) : undefined; + // First try to find by studyAccession (gwasId) - used by Run All - O(1) lookup + let savedResult = studyAccession ? getResultByGwasId(studyAccession) : undefined; - // Fallback to studyId for individually revealed results + // Fallback to studyId for individually revealed results - O(1) lookup if (!savedResult && hasResult(studyId)) { savedResult = getResult(studyId); } @@ -58,7 +58,7 @@ export default function StudyResultReveal({ studyId, studyAccession, snps, trait setResult(null); setIsRevealed(false); } - }, [studyId, studyAccession, savedResults.length, hasResult, getResult, savedResults]); + }, [studyId, studyAccession, savedResults.length, hasResult, getResult, getResultByGwasId, savedResults]); const handleRevealClick = () => { setShowDisclaimer(true); @@ -133,7 +133,7 @@ export default function StudyResultReveal({ studyId, studyAccession, snps, trait matchedSnp: analysisResult.matchedSnp!, analysisDate: new Date().toISOString(), }; - addResult(savedResult); + await addResult(savedResult); } } catch (err) { setError(err instanceof Error ? err.message : 'Analysis failed'); diff --git a/app/page.tsx b/app/page.tsx index bc91dac..ddb5b5f 100644 --- a/app/page.tsx +++ b/app/page.tsx @@ -205,6 +205,7 @@ function MainContent() { const [isRunningAll, setIsRunningAll] = useState(false); const [runAllProgress, setRunAllProgress] = useState({ current: 0, total: 0 }); const [showRunAllModal, setShowRunAllModal] = useState(false); + const [showRunAllDisclaimer, setShowRunAllDisclaimer] = useState(false); const [runAllStatus, setRunAllStatus] = useState<{ phase: 'fetching' | 'analyzing' | 'complete' | 'error'; fetchedBatches: number; @@ -427,12 +428,19 @@ function MainContent() { } }; - const handleRunAll = async () => { + const handleRunAll = () => { if (!genotypeData || genotypeData.size === 0) { alert("No SNPs found in your genetic data"); return; } + // Show disclaimer first + setShowRunAllDisclaimer(true); + }; + + const handleRunAllDisclaimerAccept = async () => { + setShowRunAllDisclaimer(false); + // Check if we need to download the catalog first const { gwasDB } = await import('@/lib/gwas-db'); const metadata = await gwasDB.getMetadata(); @@ -497,7 +505,7 @@ function MainContent() { // Add all results in one efficient batch operation console.log(`Adding ${results.length} results to the results manager...`); const startAdd = Date.now(); - addResultsBatch(results); + await addResultsBatch(results); const addTime = Date.now() - startAdd; console.log(`Finished adding ${results.length} results in ${addTime}ms`); } catch (error) { @@ -912,6 +920,11 @@ function MainContent() {
+ setShowRunAllDisclaimer(false)} + onAccept={handleRunAllDisclaimerAccept} + /> setShowRunAllModal(false)} diff --git a/lib/results-database.ts b/lib/results-database.ts new file mode 100644 index 0000000..175e191 --- /dev/null +++ b/lib/results-database.ts @@ -0,0 +1,356 @@ +// In-memory SQL database for genetic analysis results +// Optimized for complex queries, filtering, and analytical operations on 100k+ results + +import initSqlJs, { Database, SqlJsStatic } from 'sql.js'; +import type { SavedResult } from './results-manager'; + +let SQL: SqlJsStatic | null = null; + +async function initSQL() { + if (!SQL) { + SQL = await initSqlJs({ + locateFile: file => `https://sql.js.org/dist/${file}` + }); + } + return SQL; +} + +export class ResultsDatabase { + private db: Database | null = null; + private sqlJs: SqlJsStatic | null = null; + + async initialize(): Promise { + this.sqlJs = await initSQL(); + this.db = new this.sqlJs.Database(); + + // Create optimized schema with indexes + this.db.run(` + CREATE TABLE IF NOT EXISTS results ( + studyId INTEGER PRIMARY KEY, + gwasId TEXT, + traitName TEXT NOT NULL, + studyTitle TEXT NOT NULL, + userGenotype TEXT NOT NULL, + riskAllele TEXT NOT NULL, + effectSize TEXT NOT NULL, + riskScore REAL NOT NULL, + riskLevel TEXT NOT NULL, + matchedSnp TEXT NOT NULL, + analysisDate TEXT NOT NULL + ); + `); + + // Create indexes for common query patterns + this.db.run('CREATE INDEX IF NOT EXISTS idx_gwasId ON results(gwasId);'); + this.db.run('CREATE INDEX IF NOT EXISTS idx_traitName ON results(traitName);'); + this.db.run('CREATE INDEX IF NOT EXISTS idx_riskLevel ON results(riskLevel);'); + this.db.run('CREATE INDEX IF NOT EXISTS idx_riskScore ON results(riskScore);'); + this.db.run('CREATE INDEX IF NOT EXISTS idx_matchedSnp ON results(matchedSnp);'); + + console.log('ResultsDatabase initialized with indexed schema'); + } + + async insertResult(result: SavedResult): Promise { + if (!this.db) await this.initialize(); + + this.db!.run(` + INSERT OR REPLACE INTO results ( + studyId, gwasId, traitName, studyTitle, userGenotype, + riskAllele, effectSize, riskScore, riskLevel, matchedSnp, analysisDate + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `, [ + result.studyId, + result.gwasId || null, + result.traitName, + result.studyTitle, + result.userGenotype, + result.riskAllele, + result.effectSize, + result.riskScore, + result.riskLevel, + result.matchedSnp, + result.analysisDate + ]); + } + + async insertResultsBatch(results: SavedResult[]): Promise { + if (!this.db) await this.initialize(); + + // Use transaction for batch insert (much faster) + this.db!.run('BEGIN TRANSACTION;'); + + try { + const stmt = this.db!.prepare(` + INSERT OR REPLACE INTO results ( + studyId, gwasId, traitName, studyTitle, userGenotype, + riskAllele, effectSize, riskScore, riskLevel, matchedSnp, analysisDate + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `); + + for (const result of results) { + stmt.run([ + result.studyId, + result.gwasId || null, + result.traitName, + result.studyTitle, + result.userGenotype, + result.riskAllele, + result.effectSize, + result.riskScore, + result.riskLevel, + result.matchedSnp, + result.analysisDate + ]); + } + + stmt.free(); + this.db!.run('COMMIT;'); + console.log(`Batch inserted ${results.length} results`); + } catch (error) { + this.db!.run('ROLLBACK;'); + console.error('Batch insert failed:', error); + throw error; + } + } + + async getResult(studyId: number): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results WHERE studyId = ? + `, [studyId]); + + if (!result.length || !result[0].values.length) return null; + + return this.rowToResult(result[0].columns, result[0].values[0]); + } + + async getResultByGwasId(gwasId: string): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results WHERE gwasId = ? + `, [gwasId]); + + if (!result.length || !result[0].values.length) return null; + + return this.rowToResult(result[0].columns, result[0].values[0]); + } + + async hasResult(studyId: number): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT 1 FROM results WHERE studyId = ? LIMIT 1 + `, [studyId]); + + return result.length > 0 && result[0].values.length > 0; + } + + async getAllResults(): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(`SELECT * FROM results`); + + if (!result.length) return []; + + return result[0].values.map(row => + this.rowToResult(result[0].columns, row) + ); + } + + async getCount(): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(`SELECT COUNT(*) as count FROM results`); + + if (!result.length || !result[0].values.length) return 0; + + return result[0].values[0][0] as number; + } + + async removeResult(studyId: number): Promise { + if (!this.db) await this.initialize(); + + this.db!.run(`DELETE FROM results WHERE studyId = ?`, [studyId]); + } + + async clear(): Promise { + if (!this.db) await this.initialize(); + + this.db!.run(`DELETE FROM results`); + } + + // Advanced query methods for LLM analysis + + async queryByRiskLevel(riskLevel: 'increased' | 'decreased' | 'neutral'): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results WHERE riskLevel = ? + `, [riskLevel]); + + if (!result.length) return []; + + return result[0].values.map(row => this.rowToResult(result[0].columns, row)); + } + + async queryByTraitPattern(pattern: string): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results WHERE traitName LIKE ? + `, [`%${pattern}%`]); + + if (!result.length) return []; + + return result[0].values.map(row => this.rowToResult(result[0].columns, row)); + } + + async queryByRiskScoreRange(minScore: number, maxScore: number): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results + WHERE riskScore >= ? AND riskScore <= ? + ORDER BY riskScore DESC + `, [minScore, maxScore]); + + if (!result.length) return []; + + return result[0].values.map(row => this.rowToResult(result[0].columns, row)); + } + + async getTopRisks(limit: number = 10): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results + WHERE riskLevel = 'increased' + ORDER BY riskScore DESC + LIMIT ? + `, [limit]); + + if (!result.length) return []; + + return result[0].values.map(row => this.rowToResult(result[0].columns, row)); + } + + async getProtectiveVariants(limit: number = 10): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT * FROM results + WHERE riskLevel = 'decreased' + ORDER BY riskScore ASC + LIMIT ? + `, [limit]); + + if (!result.length) return []; + + return result[0].values.map(row => this.rowToResult(result[0].columns, row)); + } + + async getTraitCategories(): Promise> { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT traitName as trait, COUNT(*) as count + FROM results + GROUP BY traitName + ORDER BY count DESC + `); + + if (!result.length) return []; + + return result[0].values.map(row => ({ + trait: row[0] as string, + count: row[1] as number + })); + } + + async getRiskStatistics(): Promise<{ + totalResults: number; + increasedRisk: number; + decreasedRisk: number; + neutral: number; + avgRiskScore: number; + }> { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(` + SELECT + COUNT(*) as total, + SUM(CASE WHEN riskLevel = 'increased' THEN 1 ELSE 0 END) as increased, + SUM(CASE WHEN riskLevel = 'decreased' THEN 1 ELSE 0 END) as decreased, + SUM(CASE WHEN riskLevel = 'neutral' THEN 1 ELSE 0 END) as neutral, + AVG(riskScore) as avgScore + FROM results + `); + + if (!result.length || !result[0].values.length) { + return { totalResults: 0, increasedRisk: 0, decreasedRisk: 0, neutral: 0, avgRiskScore: 0 }; + } + + const row = result[0].values[0]; + return { + totalResults: row[0] as number, + increasedRisk: row[1] as number, + decreasedRisk: row[2] as number, + neutral: row[3] as number, + avgRiskScore: row[4] as number + }; + } + + // Custom SQL query for advanced analysis + async executeQuery(sql: string, params: any[] = []): Promise { + if (!this.db) await this.initialize(); + + const result = this.db!.exec(sql, params); + + if (!result.length) return []; + + return result[0].values.map(row => { + const obj: any = {}; + result[0].columns.forEach((col, idx) => { + obj[col] = row[idx]; + }); + return obj; + }); + } + + private rowToResult(columns: string[], row: any[]): SavedResult { + const obj: any = {}; + columns.forEach((col, idx) => { + obj[col] = row[idx]; + }); + + return { + studyId: obj.studyId, + gwasId: obj.gwasId, + traitName: obj.traitName, + studyTitle: obj.studyTitle, + userGenotype: obj.userGenotype, + riskAllele: obj.riskAllele, + effectSize: obj.effectSize, + riskScore: obj.riskScore, + riskLevel: obj.riskLevel, + matchedSnp: obj.matchedSnp, + analysisDate: obj.analysisDate + }; + } + + // Export database state (for debugging) + async exportToArray(): Promise { + if (!this.db) return null; + return this.db.export(); + } + + // Import database state + async importFromArray(data: Uint8Array): Promise { + this.sqlJs = await initSQL(); + this.db = new this.sqlJs.Database(data); + } +} + +export const resultsDB = new ResultsDatabase(); diff --git a/lib/results-manager.ts b/lib/results-manager.ts index f7b72f9..f9a1f4e 100644 --- a/lib/results-manager.ts +++ b/lib/results-manager.ts @@ -76,7 +76,7 @@ export class ResultsManager { return new Promise((resolve, reject) => { const input = document.createElement('input'); input.type = 'file'; - input.accept = '.json'; + input.accept = '.tsv,.json'; input.onchange = (e) => { const file = (e.target as HTMLInputElement).files?.[0]; @@ -89,13 +89,66 @@ export class ResultsManager { reader.onload = (e) => { try { const content = e.target?.result as string; - const session = JSON.parse(content) as SavedSession; - // Validate the structure - if (!session.results || !Array.isArray(session.results)) { - throw new Error('Invalid file format'); + // Try to parse as JSON first (for backward compatibility) + if (file.name.endsWith('.json')) { + const session = JSON.parse(content) as SavedSession; + + // Validate the structure + if (!session.results || !Array.isArray(session.results)) { + throw new Error('Invalid file format'); + } + + resolve(session); + return; + } + + // Parse TSV format + const lines = content.split('\n').filter(line => line.trim()); + if (lines.length < 2) { + throw new Error('File is empty or has no data rows'); } + const headers = lines[0].split('\t'); + + // Validate headers + const expectedHeaders = ['Study ID', 'GWAS ID', 'Trait Name', 'Study Title', 'Your Genotype', + 'Risk Allele', 'Effect Size', 'Risk Score', 'Risk Level', + 'Matched SNP', 'Analysis Date']; + + const headerCheck = expectedHeaders.every(h => headers.includes(h)); + if (!headerCheck) { + throw new Error('Invalid TSV headers - not a valid results file'); + } + + // Parse data rows + const results: SavedResult[] = []; + for (let i = 1; i < lines.length; i++) { + const cols = lines[i].split('\t'); + if (cols.length < 11) continue; // Skip incomplete rows + + results.push({ + studyId: parseInt(cols[0]) || 0, + gwasId: cols[1] || undefined, + traitName: cols[2] || '', + studyTitle: cols[3] || '', + userGenotype: cols[4] || '', + riskAllele: cols[5] || '', + effectSize: cols[6] || '', + riskScore: parseFloat(cols[7]) || 0, + riskLevel: (cols[8] as 'increased' | 'decreased' | 'neutral') || 'neutral', + matchedSnp: cols[9] || '', + analysisDate: cols[10] || '', + }); + } + + const session: SavedSession = { + fileName: file.name, + createdDate: new Date().toISOString(), + totalVariants: 0, // Not stored in TSV + results + }; + resolve(session); } catch (error) { reject(new Error('Failed to parse file: ' + (error as Error).message)); diff --git a/lib/results-query-examples.ts b/lib/results-query-examples.ts new file mode 100644 index 0000000..09399e6 --- /dev/null +++ b/lib/results-query-examples.ts @@ -0,0 +1,200 @@ +/** + * Example queries for analyzing genetic results using the SQL-backed ResultsDatabase + * + * These examples show how to use the advanced query methods exposed through useResults() + * for LLM-based analysis, data visualization, and complex filtering. + */ + +import { useResults } from '@/app/components/ResultsContext'; + +// Example 1: Get all results with increased risk +export async function getIncreasedRiskResults() { + const { queryByRiskLevel } = useResults(); + const increasedRisks = await queryByRiskLevel('increased'); + + console.log(`Found ${increasedRisks.length} variants with increased risk`); + return increasedRisks; +} + +// Example 2: Find all heart-related conditions +export async function getHeartRelatedTraits() { + const { queryByTraitPattern } = useResults(); + const heartTraits = await queryByTraitPattern('heart'); + + console.log(`Found ${heartTraits.length} heart-related genetic associations`); + return heartTraits; +} + +// Example 3: Get high-risk variants (risk score > 1.5) +export async function getHighRiskVariants() { + const { queryByRiskScoreRange } = useResults(); + const highRisks = await queryByRiskScoreRange(1.5, 10); + + console.log(`Found ${highRisks.length} high-risk variants (>1.5x)`); + return highRisks; +} + +// Example 4: Get summary statistics for LLM analysis +export async function getSummaryForLLM() { + const { getRiskStatistics, getTraitCategories, getTopRisks, getProtectiveVariants } = useResults(); + + const [stats, categories, topRisks, protective] = await Promise.all([ + getRiskStatistics(), + getTraitCategories(), + getTopRisks(5), + getProtectiveVariants(5) + ]); + + return { + stats, + topCategories: categories.slice(0, 10), + topRisks, + protective + }; +} + +// Example 5: Custom SQL query for complex analysis +export async function getComplexQuery() { + const { executeQuery } = useResults(); + + // Example: Find all results where user has 2 copies of risk allele + const results = await executeQuery(` + SELECT traitName, riskScore, userGenotype, riskAllele + FROM results + WHERE riskLevel = 'increased' + AND riskScore > 1.2 + ORDER BY riskScore DESC + LIMIT 20 + `); + + return results; +} + +// Example 6: Analyze trait distribution +export async function analyzeTraitDistribution() { + const { executeQuery } = useResults(); + + const distribution = await executeQuery(` + SELECT + riskLevel, + COUNT(*) as count, + AVG(riskScore) as avgScore, + MIN(riskScore) as minScore, + MAX(riskScore) as maxScore + FROM results + GROUP BY riskLevel + `); + + return distribution; +} + +// Example 7: Find variants with similar risk profiles +export async function findSimilarRiskProfiles(targetRiskScore: number, tolerance: number = 0.1) { + const { executeQuery } = useResults(); + + const similar = await executeQuery(` + SELECT * + FROM results + WHERE riskScore BETWEEN ? AND ? + ORDER BY ABS(riskScore - ?) ASC + LIMIT 10 + `, [ + targetRiskScore - tolerance, + targetRiskScore + tolerance, + targetRiskScore + ]); + + return similar; +} + +// Example 8: Get all results for a specific SNP +export async function getResultsBySnp(snpId: string) { + const { executeQuery } = useResults(); + + const results = await executeQuery(` + SELECT * + FROM results + WHERE matchedSnp = ? + `, [snpId]); + + return results; +} + +// Example 9: LLM-friendly summary for specific traits +export async function getTraitSummaryForLLM(traitKeyword: string) { + const { executeQuery } = useResults(); + + const summary = await executeQuery(` + SELECT + traitName, + COUNT(*) as variantCount, + AVG(riskScore) as avgRiskScore, + SUM(CASE WHEN riskLevel = 'increased' THEN 1 ELSE 0 END) as increasedCount, + SUM(CASE WHEN riskLevel = 'decreased' THEN 1 ELSE 0 END) as decreasedCount + FROM results + WHERE traitName LIKE ? + GROUP BY traitName + ORDER BY variantCount DESC + `, [`%${traitKeyword}%`]); + + return summary; +} + +// Example 10: Prepare data for LLM analysis - top concerns +export async function prepareTopConcernsForLLM() { + const { executeQuery } = useResults(); + + // Get variants with significant risk increase (>30% or <0.7) + const concerns = await executeQuery(` + SELECT + traitName, + studyTitle, + userGenotype, + riskAllele, + effectSize, + riskScore, + riskLevel, + matchedSnp + FROM results + WHERE (riskLevel = 'increased' AND riskScore > 1.3) + OR (riskLevel = 'decreased' AND riskScore < 0.7) + ORDER BY + CASE + WHEN riskLevel = 'increased' THEN riskScore + ELSE 1.0 / riskScore + END DESC + LIMIT 15 + `); + + return concerns; +} + +/** + * Usage in a React component: + * + * ```typescript + * function MyAnalysisComponent() { + * const results = useResults(); + * const [summary, setSummary] = useState(null); + * + * useEffect(() => { + * async function analyze() { + * // Get top 10 highest risk variants + * const topRisks = await results.getTopRisks(10); + * + * // Get all diabetes-related results + * const diabetesResults = await results.queryByTraitPattern('diabetes'); + * + * // Get overall statistics + * const stats = await results.getRiskStatistics(); + * + * setSummary({ topRisks, diabetesResults, stats }); + * } + * + * analyze(); + * }, []); + * + * // Render... + * } + * ``` + */ diff --git a/next.config.mjs b/next.config.mjs index 04ceb3d..d9f707c 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -31,6 +31,17 @@ const nextConfig = { ], aggregateTimeout: 300, }; + + // Fix sql.js Node.js polyfills for browser-only usage + if (!isServer) { + config.resolve.fallback = { + ...config.resolve.fallback, + fs: false, + path: false, + crypto: false, + }; + } + return config; }, }; diff --git a/package-lock.json b/package-lock.json index 64c5c25..bc959eb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,7 +19,8 @@ "pako": "^2.1.0", "pg": "^8.16.3", "react": "^18.3.1", - "react-dom": "^18.3.1" + "react-dom": "^18.3.1", + "sql.js": "^1.13.0" }, "devDependencies": { "@types/better-sqlite3": "^7.6.13", @@ -5343,6 +5344,11 @@ "node": ">= 10.x" } }, + "node_modules/sql.js": { + "version": "1.13.0", + "resolved": "https://registry.npmjs.org/sql.js/-/sql.js-1.13.0.tgz", + "integrity": "sha512-RJbVP1HRDlUUXahJ7VMTcu9Rm1Nzw+EBpoPr94vnbD4LwR715F3CcxE2G2k45PewcaZ57pjetYa+LoSJLAASgA==" + }, "node_modules/stable-hash": { "version": "0.0.5", "resolved": "https://registry.npmjs.org/stable-hash/-/stable-hash-0.0.5.tgz", diff --git a/package.json b/package.json index 3ae2f90..48c1e36 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,8 @@ "pako": "^2.1.0", "pg": "^8.16.3", "react": "^18.3.1", - "react-dom": "^18.3.1" + "react-dom": "^18.3.1", + "sql.js": "^1.13.0" }, "devDependencies": { "@types/better-sqlite3": "^7.6.13", From 9de888c270f00356b385b83ed54f541aba3984db Mon Sep 17 00:00:00 2001 From: Vishakh Date: Wed, 15 Oct 2025 17:28:17 -0400 Subject: [PATCH 06/75] Refactor `LLMCommentaryModal` to optimize result queries and AI integration Introduced SQL-based top-results querying for improved performance. Enhanced the `fetchCommentary` workflow with phased loading states, progress indicators, and better error handling. Deprecated the `allResults` prop in favor of context-driven SQL queries. Added UI feedback for commentary generation stages and connected new `getTopResultsByEffect` API to streamline database operations. --- app/components/LLMCommentaryModal.tsx | 332 ++++++++++++++++++++++++-- app/components/ResultsContext.tsx | 2 + app/components/StudyResultReveal.tsx | 6 +- app/globals.css | 81 +++++++ lib/results-database.ts | 18 ++ 5 files changed, 412 insertions(+), 27 deletions(-) diff --git a/app/components/LLMCommentaryModal.tsx b/app/components/LLMCommentaryModal.tsx index 03c3156..89fbed0 100644 --- a/app/components/LLMCommentaryModal.tsx +++ b/app/components/LLMCommentaryModal.tsx @@ -5,12 +5,13 @@ import { SavedResult } from "@/lib/results-manager"; import { NilaiOpenAIClient, AuthType } from "@nillion/nilai-ts"; import NilAIConsentModal from "./NilAIConsentModal"; import StudyQualityIndicators from "./StudyQualityIndicators"; +import { useResults } from "./ResultsContext"; type LLMCommentaryModalProps = { isOpen: boolean; onClose: () => void; currentResult: SavedResult; - allResults: SavedResult[]; + allResults: SavedResult[]; // Deprecated - will use SQL query instead }; const CONSENT_STORAGE_KEY = "nilai_ai_consent_accepted"; @@ -19,8 +20,9 @@ export default function LLMCommentaryModal({ isOpen, onClose, currentResult, - allResults, + allResults, // Deprecated parameter }: LLMCommentaryModalProps) { + const { getTopResultsByEffect, savedResults } = useResults(); const [commentary, setCommentary] = useState(""); const [isLoading, setIsLoading] = useState(false); const [error, setError] = useState(null); @@ -28,6 +30,8 @@ export default function LLMCommentaryModal({ const [showConsentModal, setShowConsentModal] = useState(false); const [hasConsent, setHasConsent] = useState(false); const [studyMetadata, setStudyMetadata] = useState(null); + const [loadingPhase, setLoadingPhase] = useState<'query' | 'metadata' | 'token' | 'ai' | 'done'>('query'); + const [resultsCount, setResultsCount] = useState(0); useEffect(() => { // Check if user has previously consented @@ -38,15 +42,242 @@ export default function LLMCommentaryModal({ }, []); useEffect(() => { + console.log('[LLMCommentaryModal] isOpen changed:', isOpen, 'hasConsent:', hasConsent); if (isOpen) { // Check consent before proceeding if (!hasConsent) { + console.log('[LLMCommentaryModal] Showing consent modal'); setShowConsentModal(true); } else { - fetchCommentary(); + console.log('[LLMCommentaryModal] Starting fetchCommentary'); + // Call fetchCommentary inline to avoid closure issues + (async () => { + console.log('[fetchCommentary] Starting...'); + setIsLoading(true); + setError(null); + setCommentary(""); + setDelegationStatus(""); + setStudyMetadata(null); + setLoadingPhase('query'); + + try { + // Phase 1: Query database for relevant results + const totalResults = savedResults.length; + console.log('[fetchCommentary] Total results:', totalResults); + setResultsCount(totalResults); + setLoadingPhase('query'); + setDelegationStatus(`Querying ${totalResults.toLocaleString()} results...`); + + // Yield to UI to show loading state + await new Promise(resolve => setTimeout(resolve, 50)); + + console.log(`[fetchCommentary] Fetching top 499 results from ${totalResults} total using SQL query...`); + const startFilter = Date.now(); + + const topResults = await getTopResultsByEffect(499, currentResult.gwasId); + console.log('[fetchCommentary] Got top results:', topResults.length); + + // Add current result at the top + const resultsForContext = [currentResult, ...topResults]; + + const filterTime = Date.now() - startFilter; + console.log(`Fetched top 500 results in ${filterTime}ms using indexed SQL query`); + + setDelegationStatus(`✓ Selected ${resultsForContext.length} most significant results (${filterTime}ms)`); + + // Yield to UI after query + await new Promise(resolve => setTimeout(resolve, 100)); + + // Phase 2: Fetch study metadata for quality indicators + setLoadingPhase('metadata'); + setDelegationStatus("Fetching study quality indicators..."); + await new Promise(resolve => setTimeout(resolve, 50)); + + const metadataResponse = await fetch(`/api/study-metadata?studyId=${currentResult.studyId}`); + if (metadataResponse.ok) { + const metadataData = await metadataResponse.json(); + setStudyMetadata(metadataData.metadata); + setDelegationStatus("✓ Study metadata loaded"); + } + + await new Promise(resolve => setTimeout(resolve, 100)); + + // Phase 3: Initialize NilAI client and get secure token + setLoadingPhase('token'); + setDelegationStatus("Initializing secure AI connection..."); + await new Promise(resolve => setTimeout(resolve, 50)); + + const client = new NilaiOpenAIClient({ + baseURL: "https://nilai-a779.nillion.network/v1/", + authType: AuthType.DELEGATION_TOKEN, + }); + + // Get delegation request from client + const delegationRequest = client.getDelegationRequest(); + + setDelegationStatus("Requesting delegation token from server..."); + + // Request delegation token from server + const tokenResponse = await fetch("/api/nilai-delegation", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ delegationRequest }), + }); + + if (!tokenResponse.ok) { + const errorData = await tokenResponse.json(); + throw new Error(errorData.error || "Failed to get delegation token"); + } + + const { delegationToken } = await tokenResponse.json(); + + // Update client with delegation token + client.updateDelegation(delegationToken); + + setDelegationStatus("✓ Secure token received — connecting to private AI..."); + await new Promise(resolve => setTimeout(resolve, 100)); + + // Phase 4: Generate AI commentary + setLoadingPhase('ai'); + setDelegationStatus("Generating AI analysis (this may take 30-60 seconds)..."); + + const contextResults = resultsForContext + .map((r: SavedResult, idx: number) => + `${idx + 1}. ${r.traitName} (${r.studyTitle}): + - Your genotype: ${r.userGenotype} + - Risk allele: ${r.riskAllele} + - Effect size: ${r.effectSize} + - Risk score: ${r.riskScore}x (${r.riskLevel}) + - Matched SNP: ${r.matchedSnp}` + ) + .join('\n\n'); + + // Construct study quality context + let studyQualityContext = ''; + if (studyMetadata) { + const parseSampleSize = (str: string | null) => { + if (!str) return 0; + const match = str.match(/[\d,]+/); + return match ? parseInt(match[0].replace(/,/g, '')) : 0; + }; + + const initialSize = parseSampleSize(studyMetadata.initial_sample_size); + const replicationSize = parseSampleSize(studyMetadata.replication_sample_size); + + studyQualityContext = ` +STUDY QUALITY INDICATORS (USE THESE TO TEMPER YOUR INTERPRETATION): +- Sample Size: ${initialSize.toLocaleString()} participants ${initialSize < 5000 ? '(SMALL STUDY - interpret with caution)' : initialSize < 50000 ? '(medium study)' : '(large, well-powered study)'} +- Ancestry: ${studyMetadata.initial_sample_size || 'Not specified'} ${studyMetadata.initial_sample_size?.toLowerCase().includes('european') ? '(may not generalize to other ancestries - IMPORTANT LIMITATION)' : ''} +- Replication: ${replicationSize > 0 ? `Yes (${replicationSize.toLocaleString()} participants)` : 'No independent replication (interpret with caution)'} +- P-value: ${studyMetadata.p_value || 'Not reported'} ${parseFloat(studyMetadata.p_value || '1') > 5e-8 ? '(NOT genome-wide significant - findings are suggestive only)' : '(genome-wide significant)'} +- Publication: ${studyMetadata.first_author || 'Unknown'}, ${studyMetadata.date || 'Unknown date'} ${studyMetadata.journal ? `in ${studyMetadata.journal}` : ''} + +CRITICAL: You MUST acknowledge these study limitations in your commentary. If sample size is small, ancestry is limited, or replication is lacking, explicitly mention this reduces confidence in the findings.`; + } + + const prompt = `You are a genetic counselor providing educational commentary on GWAS (Genome-Wide Association Study) results. + +IMPORTANT DISCLAIMERS TO INCLUDE: +1. This is for educational and entertainment purposes only +2. This is NOT medical advice and should not be used for medical decisions +3. GWAS results show statistical associations, not deterministic outcomes +4. Genetic risk is just one factor among many (lifestyle, environment, other genes) +5. Always consult healthcare professionals for medical interpretation +6. These results come from research studies and may not be clinically validated +${studyQualityContext} + +CURRENT RESULT TO ANALYZE: +Trait: ${currentResult.traitName} +Study: ${currentResult.studyTitle} +Your genotype: ${currentResult.userGenotype} +Risk allele: ${currentResult.riskAllele} +Effect size: ${currentResult.effectSize} +Risk score: ${currentResult.riskScore}x (${currentResult.riskLevel}) +Matched SNP: ${currentResult.matchedSnp} +Study date: ${currentResult.analysisDate} + +ALL YOUR SAVED RESULTS FOR CONTEXT: +${contextResults} + +Please provide: +1. A brief, plain-language summary of what this research study found (what scientists were investigating and what they discovered) +2. A clear explanation of what this result means for the user specifically +3. Context about the trait/condition in terms anyone can understand +4. Interpretation of the risk level in practical terms +5. How this relates to any other results they have (if applicable) +6. Appropriate disclaimers and next steps + +Keep your response concise (400-600 words), educational, and reassuring where appropriate. Use clear, accessible language suitable for someone with no scientific background. Avoid jargon, and when technical terms are necessary, explain them simply.`; + + // Make request directly to NilAI (data never touches our server!) + const response = await client.chat.completions.create({ + model: "google/gemma-3-27b-it", + messages: [ + { + role: "system", + content: "You are a knowledgeable genetic counselor who explains GWAS results clearly and responsibly, always emphasizing appropriate disclaimers and limitations." + }, + { + role: "user", + content: prompt + } + ], + max_tokens: 1200, + temperature: 0.7, + }); + + const commentaryText = response.choices?.[0]?.message?.content; + + if (!commentaryText) { + throw new Error("No commentary generated from LLM"); + } + + setDelegationStatus("✓ AI analysis complete — formatting response..."); + setLoadingPhase('done'); + await new Promise(resolve => setTimeout(resolve, 50)); + + // Convert markdown to plain HTML + const processedText = commentaryText + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/__(.+?)__/g, '$1') + .replace(/\*(.+?)\*/g, '$1') + .replace(/_(.+?)_/g, '$1') + .replace(/^### (.+)$/gm, '

$1

') + .replace(/^## (.+)$/gm, '

$1

') + .replace(/^# (.+)$/gm, '

$1

') + .replace(/^[\*\-] (.+)$/gm, '
  • $1
  • ') + .replace(/(
  • .*<\/li>\n?)+/g, '
      $&
    ') + .split('\n\n') + .map(para => para.trim()) + .filter(para => para.length > 0) + .map(para => { + if (para.startsWith('${para}

    `; + }) + .join(''); + + setCommentary(processedText); + } catch (err) { + console.error('[fetchCommentary] Error occurred:', err); + const errorMessage = err instanceof Error ? err.message : "Failed to generate commentary"; + console.error('[fetchCommentary] Error message:', errorMessage); + setError(errorMessage); + + if (errorMessage.includes("API key not configured")) { + setError("LLM commentary is not configured. The NILLION_API_KEY environment variable needs to be set."); + } + } finally { + console.log('[fetchCommentary] Finally block, setting isLoading to false'); + setIsLoading(false); + } + })(); } } - }, [isOpen, hasConsent]); + }, [isOpen, hasConsent, savedResults, getTopResultsByEffect, currentResult]); const handleConsentAccept = () => { if (typeof window !== "undefined") { @@ -63,50 +294,61 @@ export default function LLMCommentaryModal({ }; const fetchCommentary = async () => { + console.log('[fetchCommentary] Starting...'); setIsLoading(true); setError(null); setCommentary(""); setDelegationStatus(""); setStudyMetadata(null); + setLoadingPhase('query'); try { - // Yield to UI to show loading state - await new Promise(resolve => setTimeout(resolve, 0)); + // Phase 1: Query database for relevant results + const totalResults = savedResults.length; + console.log('[fetchCommentary] Total results:', totalResults); + setResultsCount(totalResults); + setLoadingPhase('query'); + setDelegationStatus(`Querying ${totalResults.toLocaleString()} results...`); - setDelegationStatus("Preparing your results..."); + // Yield to UI to show loading state + await new Promise(resolve => setTimeout(resolve, 50)); - // Construct the prompt with top 500 results by effect size, plus the current result - // Do this early to avoid blocking later - console.log(`Filtering top 500 from ${allResults.length} results...`); + console.log(`[fetchCommentary] Fetching top 499 results from ${totalResults} total using SQL query...`); const startFilter = Date.now(); - const topResults = allResults - .filter(r => r.gwasId !== currentResult.gwasId) // Exclude current result temporarily - .sort((a, b) => { - // Sort by absolute distance from 1.0 (neutral) - larger effect = further from 1.0 - const aDistance = Math.abs(a.riskScore - 1.0); - const bDistance = Math.abs(b.riskScore - 1.0); - return bDistance - aDistance; - }) - .slice(0, 499); // Take top 499 + const topResults = await getTopResultsByEffect(499, currentResult.gwasId); + console.log('[fetchCommentary] Got top results:', topResults.length); // Add current result at the top const resultsForContext = [currentResult, ...topResults]; const filterTime = Date.now() - startFilter; - console.log(`Filtered to top 500 in ${filterTime}ms`); + console.log(`Fetched top 500 results in ${filterTime}ms using indexed SQL query`); + + setDelegationStatus(`✓ Selected ${resultsForContext.length} most significant results (${filterTime}ms)`); - // Yield to UI after heavy computation - await new Promise(resolve => setTimeout(resolve, 0)); + // Yield to UI after query + await new Promise(resolve => setTimeout(resolve, 100)); + + // Phase 2: Fetch study metadata for quality indicators + setLoadingPhase('metadata'); + setDelegationStatus("Fetching study quality indicators..."); + await new Promise(resolve => setTimeout(resolve, 50)); - // First, fetch study metadata for quality indicators const metadataResponse = await fetch(`/api/study-metadata?studyId=${currentResult.studyId}`); if (metadataResponse.ok) { const metadataData = await metadataResponse.json(); setStudyMetadata(metadataData.metadata); + setDelegationStatus("✓ Study metadata loaded"); } - // Initialize NilAI client with delegation token authentication + await new Promise(resolve => setTimeout(resolve, 100)); + + // Phase 3: Initialize NilAI client and get secure token + setLoadingPhase('token'); + setDelegationStatus("Initializing secure AI connection..."); + await new Promise(resolve => setTimeout(resolve, 50)); + const client = new NilaiOpenAIClient({ baseURL: "https://nilai-a779.nillion.network/v1/", authType: AuthType.DELEGATION_TOKEN, @@ -115,7 +357,7 @@ export default function LLMCommentaryModal({ // Get delegation request from client const delegationRequest = client.getDelegationRequest(); - setDelegationStatus("Requesting secure token..."); + setDelegationStatus("Requesting delegation token from server..."); // Request delegation token from server const tokenResponse = await fetch("/api/nilai-delegation", { @@ -136,7 +378,12 @@ export default function LLMCommentaryModal({ // Update client with delegation token client.updateDelegation(delegationToken); - setDelegationStatus("✓ Secure token ready — connecting directly to private AI"); + setDelegationStatus("✓ Secure token received — connecting to private AI..."); + await new Promise(resolve => setTimeout(resolve, 100)); + + // Phase 4: Generate AI commentary + setLoadingPhase('ai'); + setDelegationStatus("Generating AI analysis (this may take 30-60 seconds)..."); const contextResults = resultsForContext .map((r: SavedResult, idx: number) => @@ -229,6 +476,10 @@ Keep your response concise (400-600 words), educational, and reassuring where ap throw new Error("No commentary generated from LLM"); } + setDelegationStatus("✓ AI analysis complete — formatting response..."); + setLoadingPhase('done'); + await new Promise(resolve => setTimeout(resolve, 50)); + // Convert markdown to plain HTML (simple conversion without external libraries) const processedText = commentaryText // Bold: **text** or __text__ @@ -260,7 +511,9 @@ Keep your response concise (400-600 words), educational, and reassuring where ap setCommentary(processedText); } catch (err) { + console.error('[fetchCommentary] Error occurred:', err); const errorMessage = err instanceof Error ? err.message : "Failed to generate commentary"; + console.error('[fetchCommentary] Error message:', errorMessage); setError(errorMessage); // Check if it's a configuration error @@ -268,6 +521,7 @@ Keep your response concise (400-600 words), educational, and reassuring where ap setError("LLM commentary is not configured. The NILLION_API_KEY environment variable needs to be set."); } } finally { + console.log('[fetchCommentary] Finally block, setting isLoading to false'); setIsLoading(false); } }; @@ -324,9 +578,35 @@ Keep your response concise (400-600 words), educational, and reassuring where ap

    Generating personalized commentary with private AI...

    + + {/* Progress indicator */} +
    +
    +
    + {loadingPhase !== 'query' ? '✓' : '○'} + Query Results +
    +
    + {['token', 'ai', 'done'].includes(loadingPhase) ? '✓' : '○'} + Study Metadata +
    +
    + {['ai', 'done'].includes(loadingPhase) ? '✓' : '○'} + Secure Token +
    +
    + {loadingPhase === 'done' ? '✓' : '○'} + AI Analysis +
    +
    +
    + {delegationStatus && (

    {delegationStatus} + {resultsCount > 0 && loadingPhase === 'query' && ( + ({resultsCount.toLocaleString()} total results) + )}

    )} {!delegationStatus && ( diff --git a/app/components/ResultsContext.tsx b/app/components/ResultsContext.tsx index 1e02301..116b41b 100644 --- a/app/components/ResultsContext.tsx +++ b/app/components/ResultsContext.tsx @@ -22,6 +22,7 @@ type ResultsContextType = { queryByRiskScoreRange: (min: number, max: number) => Promise; getTopRisks: (limit?: number) => Promise; getProtectiveVariants: (limit?: number) => Promise; + getTopResultsByEffect: (limit: number, excludeGwasId?: string) => Promise; getTraitCategories: () => Promise>; getRiskStatistics: () => Promise; executeQuery: (sql: string, params?: any[]) => Promise; @@ -152,6 +153,7 @@ export function ResultsProvider({ children }: { children: ReactNode }) { queryByRiskScoreRange: resultsDB.queryByRiskScoreRange.bind(resultsDB), getTopRisks: resultsDB.getTopRisks.bind(resultsDB), getProtectiveVariants: resultsDB.getProtectiveVariants.bind(resultsDB), + getTopResultsByEffect: resultsDB.getTopResultsByEffect.bind(resultsDB), getTraitCategories: resultsDB.getTraitCategories.bind(resultsDB), getRiskStatistics: resultsDB.getRiskStatistics.bind(resultsDB), executeQuery: resultsDB.executeQuery.bind(resultsDB), diff --git a/app/components/StudyResultReveal.tsx b/app/components/StudyResultReveal.tsx index dceed7c..724dc68 100644 --- a/app/components/StudyResultReveal.tsx +++ b/app/components/StudyResultReveal.tsx @@ -266,7 +266,11 @@ export default function StudyResultReveal({ studyId, studyAccession, snps, trait
    + +
  • + + ) : ( +
    +
    + + setEthnicities(e.target.value)} + placeholder="e.g., European, East Asian" + /> +
    + +
    + + setCountriesOfOrigin(e.target.value)} + placeholder="e.g., India, China" + /> +
    + +
    + + +
    + + {genderAtBirth === 'other' && ( +
    + + setCustomGender(e.target.value)} + placeholder="Enter your gender" + /> +
    + )} + +
    + + setAge(e.target.value)} + placeholder="e.g., 30" + min="0" + max="120" + /> +
    + +
    + +