From b26e185b2e6a4327973c36c248cb1dc566c4f085 Mon Sep 17 00:00:00 2001 From: rajarshidattapy Date: Sat, 25 Oct 2025 12:16:08 +0530 Subject: [PATCH 1/3] added CI --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ exora/README.md | 20 ++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..3c96a57 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,21 @@ +name: CI + +on: [push, pull_request] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 18 + cache: 'npm' + - name: Install dependencies + run: npm ci + - name: Lint + run: npm run lint --if-present + - name: Build / Typecheck + run: npm run build --if-present + - name: Run tests + run: npm test --if-present diff --git a/exora/README.md b/exora/README.md index 02cee35..01c95a3 100644 --- a/exora/README.md +++ b/exora/README.md @@ -131,6 +131,25 @@ Canonical inference guides alias filtering, disambiguates homonyms, and drives e - No logging of raw keys - All enrichment transient per request +## Continuous Integration (CI) + +This repository includes a GitHub Actions workflow that runs on push and pull request to enforce quality checks. + +- What it runs: dependency install, linter (if present), build/typecheck (if present), and tests (if present). +- Workflow file: `.github/workflows/ci.yml`. + +To run the same checks locally use: + +```powershell +npm ci +npm run lint # if present +npm run build # if present (often performs TypeScript typechecking) +npm test # if present +``` + +If any of those scripts are missing the workflow currently uses `--if-present` so CI will not fail solely because a script is not defined. If you'd prefer stricter behavior (fail when scripts are missing), update the workflow to remove `--if-present`. + + --- ## šŸ–„ļø Local Development ```bash @@ -170,3 +189,4 @@ Consider further enhancement with a future compact mode (reduced padding + conde --- ## ļæ½šŸ“¬ Contact For questions or collaboration: open an issue or reach out via GitHub profile. + From dabee892f5451b55c804394622cf9ce8a9d25a0b Mon Sep 17 00:00:00 2001 From: rajarshidattapy Date: Sat, 25 Oct 2025 12:49:52 +0530 Subject: [PATCH 2/3] Improve resilience of the server-side SSE briefing stream --- exora/app/api/briefing/route.ts | 989 +++++++++++++------------------- exora/lib/exa-service.ts | 143 ++++- 2 files changed, 527 insertions(+), 605 deletions(-) diff --git a/exora/app/api/briefing/route.ts b/exora/app/api/briefing/route.ts index 1728770..92dd759 100644 --- a/exora/app/api/briefing/route.ts +++ b/exora/app/api/briefing/route.ts @@ -1,280 +1,176 @@ -// app/api/briefing/route.ts - -import { NextResponse } from 'next/server'; -import { safeGenerateText, safeGenerateJson, ProviderConfig } from '@/lib/llm-service'; -import { fetchExaData, fetchHistoricalData } from '@/lib/exa-service'; -import { calculateNarrativeMomentum, calculatePulseIndex, generateSentimentHistoricalData, generateEnhancedSentimentAnalysis } from '@/lib/analysis-service'; -import { normalizePublishedDate } from '@/lib/utils'; -import { TRUSTED_SOURCES as TRUSTED_SOURCES_SET, isTrustedSource } from '@/lib/constants'; -import { getCanonicalInfo } from '@/lib/canonical'; -import { BriefingResponse, EventType, CompanyProfile, FounderInfo } from '@/lib/types'; - -// --- HELPER FUNCTIONS --- - -function getIndustryDefaults(domain: string): string[] { - if (domain.includes('ai') || domain.includes('tech')) return ['openai.com', 'anthropic.com', 'google.com']; - if (domain.includes('shop') || domain.includes('store')) return ['shopify.com', 'amazon.com', 'ebay.com']; - if (domain.includes('pay') || domain.includes('bank')) return ['paypal.com', 'stripe.com', 'square.com']; - return ['google.com', 'microsoft.com', 'amazon.com']; +import { NextResponse } from 'next/server' +import { sharedLimiter } from '@/lib/limiter' +import { safeGenerateText, safeGenerateJson, ProviderConfig } from '@/lib/llm-service' +import { fetchExaData, scoreNewsItem, exaAdHocSearch } from '@/lib/exa-service' +import { TRUSTED_SOURCES as TRUSTED_SOURCES_SET, isTrustedSource } from '@/lib/constants' +import { getCanonicalInfo } from '@/lib/canonical' +import { getOrGenerateProfileSnapshot } from '@/lib/profile-snapshot' +import { calculateNarrativeMomentum, calculatePulseIndex, generateSentimentHistoricalData, generateEnhancedSentimentAnalysis } from '@/lib/analysis-service' +import { normalizePublishedDate } from '@/lib/utils' +import Sentry, { flush as sentryFlush } from '@/lib/sentry' + +// Emit SSE event helper +function sseChunk(event: string, data: unknown) { + return `event: ${event}\n` + `data: ${JSON.stringify(data)}\n\n` } -function parseCompetitorResponse(text: string): string[] { - const jsonMatch = text.match(/\[[\s\S]*?\]/); - if (jsonMatch) { - try { return JSON.parse(jsonMatch[0]); } catch { } - } - const domainMatches = text.match(/[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g); - return domainMatches ? domainMatches.slice(0, 3) : []; +// Tiny utility to normalize domains +function cleanDomain(raw: string) { + return raw.replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/$/, '').trim() } -async function discoverCompetitors(domain: string, providers: ProviderConfig[]): Promise { - const prompt = `Analyze ONLY the company operating at the exact domain \"${domain}\" (ignore similarly named companies on different TLDs like .finance, .ai, etc.). Identify the top 3 direct competitors. Return ONLY a JSON array of competitor domains.\n\nExamples:\n- stripe.com → ["paypal.com", "square.com", "adyen.com"]\n\nFormat: ["competitor1.com", "competitor2.com", "competitor3.com"]`; - - try { - const result = await safeGenerateJson(prompt, providers); - if (result && Array.isArray(result) && result.length > 0) { - console.log('āœ… Competitor discovery via JSON succeeded.'); - return result; - } - } catch (error) { - console.warn(`Competitor discovery via JSON failed, falling back to text parsing...`); - } - +// Safe call wrapper with timeout, logging & metrics +async function safeCall(fn: () => Promise, opts?: { timeoutMs?: number; stage?: string; fallback?: T }) { + const timeoutMs = opts?.timeoutMs ?? 4000 + const stage = opts?.stage ?? 'external_call' + Sentry.addBreadcrumb({ category: 'sse', message: `call_start:${stage}` }) try { - const textResult = await safeGenerateText(prompt, providers); - const parsedCompetitors = parseCompetitorResponse(textResult); - if (parsedCompetitors.length > 0) { - console.log('āœ… Competitor discovery via text parsing succeeded.'); - return parsedCompetitors; - } - } catch (error) { - console.warn(`Competitor discovery via text parsing failed, falling back to industry defaults...`); + const res = await Promise.race([ + fn(), + new Promise((_, rej) => setTimeout(() => rej(new Error('timeout')), timeoutMs)) + ]) + Sentry.addBreadcrumb({ category: 'sse', message: `call_done:${stage}` }) + return res + } catch (err: any) { + Sentry.addBreadcrumb({ category: 'sse', message: `call_failed:${stage}`, data: { message: err?.message } }) + try { Sentry.captureException(err) } catch (e) {} + // return fallback when provided, otherwise throw + if (opts && Object.prototype.hasOwnProperty.call(opts, 'fallback')) return opts!.fallback as T + throw err } - - console.log('āš ļø Using industry default competitors.'); - return getIndustryDefaults(domain); } -// --- Profile Refinement Helper --- -async function refineIncompleteProfile(base: CompanyProfile, domain: string, providers: ProviderConfig[]): Promise> { - const missing: string[] = [] - if (!base.industry) missing.push('industry') - if (!base.foundedYear) missing.push('foundedYear') - if (!base.headquarters) missing.push('headquarters') - if (!base.headcountRange && !base.employeeCountApprox) missing.push('headcountRange','employeeCount') - if (!base.brief) missing.push('brief') - if (!base.description || base.description.length < 40) missing.push('description') - if (missing.length === 0) return {} - - const refinePrompt = `We have a partial company profile for domain "${domain}". -Existing JSON: -${JSON.stringify(base, null, 2)} - -Provide ONLY a JSON object containing ONLY the missing or weak fields from this list: ${missing.join(', ')}. -Rules: -- Do NOT include fields that are already sufficiently populated. -- If still unknown, omit the field entirely (do not output null). -- description <=320 chars; brief <=160 chars; headcountRange must be one of: "1-10","11-50","51-200","201-500","501-1000","1000-5000","5000+". -- Avoid marketing fluff; keep factual. -Return JSON now:` +// Competitor discovery +async function discoverCompetitors(domain: string, providers: ProviderConfig[]): Promise { + const prompt = `Identify top 3 direct competitors for the company operating at the exact domain ${domain}. Ignore similarly named companies on different TLDs. Return ONLY JSON array of domains.` try { - const patch = await safeGenerateJson(refinePrompt, providers) - return patch || {} + const list = await safeGenerateJson(prompt, providers) + return Array.isArray(list) ? list.slice(0, 3) : [] } catch { - return {} - } -} - -function validateAndCleanCompetitors(competitors: string[], primaryDomain: string): string[] { - return competitors - .map(d => d.replace(/https?:\/\//, '').replace(/^www\./, '').replace(/["']/g, '').trim()) - .filter(d => d && /^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(d) && d.toLowerCase() !== primaryDomain.toLowerCase()) - .slice(0, 3); -} - -async function getSentimentScore(headlines: string[], groqKey?: string, openAiKey?: string): Promise { - if (headlines.length === 0) return 50; - const prompt = `Analyze the sentiment of these headlines about a company. You MUST respond with ONLY a number between 0-100. No words, no explanation.\n\n0 = Very Negative\n50 = Neutral\n100 = Very Positive\n\nHeadlines:\n${headlines.map((h) => `- ${h}`).join('\n')}\n\nResponse (number only):`; - - try { - const scoreText = await safeGenerateText(prompt, [ - { provider: 'groq', apiKey: groqKey }, - { provider: 'openai', apiKey: openAiKey } - ]); - const cleanText = scoreText.trim().replace(/[^0-9.]/g, ''); - let score = parseInt(cleanText); - if (isNaN(score)) { score = Math.round(parseFloat(cleanText)); } - if (isNaN(score)) { - const matches = scoreText.match(/\d+/); - if (matches) { score = parseInt(matches[0]); } + try { + const text = await safeGenerateText(prompt, providers) + const m = text.match(/\[[\s\S]*?\]/) + return m ? (JSON.parse(m[0]) as string[]).slice(0, 3) : [] + } catch { + return [] } - if (isNaN(score) || score < 0 || score > 100) { return 50; } - return score; - } catch (e) { return 50; } -} - -function isRelevantToCompany(headline: string, domain: string): boolean { - const companyName = domain.split('.')[0].toLowerCase(); - const headlineLower = headline.toLowerCase(); - const relevancePatterns = [ - new RegExp(`^${companyName}`, 'i'), - new RegExp(`${companyName} (announces|launches|raises|acquires|cuts|hires|reports)`, 'i'), - ]; - return relevancePatterns.some(pattern => pattern.test(headlineLower)); + } } -async function classifyEventType(title: string, groqKey?: string, openAiKey?: string): Promise { - if (!title) return "Other"; - const prompt = `Classify this headline into ONE category: Funding, Product Launch, Acquisition, Layoffs. Return ONLY the category name, no other text.\n\nHeadline: "${title}"`; +async function getSentimentScore(headlines: string[], providers: ProviderConfig[]): Promise { + if (!headlines?.length) return 50 + // 1) Try LLM fast path + const prompt = `Score the sentiment (0-100) of these headlines. Return ONLY the number.\n\n${headlines.map(h => `- ${h}`).join('\n')}` try { - const classification = await safeGenerateText(prompt, [ - { provider: 'groq', apiKey: groqKey }, - { provider: 'openai', apiKey: openAiKey } - ]); - const clean = classification.trim().split('\n')[0].replace(/[^a-zA-Z\s]/g, '').trim(); - if (["Funding", "Product Launch", "Acquisition", "Layoffs"].includes(clean)) { - return clean as EventType; - } - return "Other"; - } catch (e) { return "Other"; } -} - -// Optional NewsAPI fallback when Exa is unavailable or returns empty -async function fetchNewsApiFallback(domain: string, apiKey?: string) { - if (!apiKey) return [] as { title: string; url: string; publishedAt: string; source?: { name?: string } }[] - const companyName = domain.split('.')[0] - const params = new URLSearchParams({ - q: `${companyName} OR ${domain}`, - sortBy: 'publishedAt', - language: 'en', - pageSize: '12' - }) - const url = `https://newsapi.org/v2/everything?${params.toString()}` - try { - const res = await fetch(url, { headers: { 'X-Api-Key': apiKey } }) - if (!res.ok) return [] - const json = await res.json() - const articles = (json.articles || []) as any[] - // Filter homonyms: keep exact-domain links and trusted news; drop other brand TLDs - const trustedNews = TRUSTED_SOURCES_SET - const getHost = (u: string): string | null => { try { return new URL(u).hostname.toLowerCase() } catch { return null } } - const target = domain.toLowerCase() - const brand = companyName.toLowerCase() - return articles.filter(a => { - const host = getHost(a.url) - if (!host) return true - const isExact = host === target || host.endsWith(`.${target}`) - if (isExact) return true - const isTrusted = trustedNews.has(host) || Array.from(trustedNews).some(n => host === n || host.endsWith(`.${n}`)) - if (isTrusted) return true - if (host.includes(brand)) return false - return true - }) - } catch { - return [] + const txt = await safeGenerateText(prompt, providers) + const num = parseInt(txt.replace(/[^0-9]/g, '')) + if (Number.isFinite(num) && num >= 0 && num <= 100) return num + } catch {} + + // 2) Deterministic lexical fallback (ensures non-flat variation without LLM) + const POS = ['growth','record','surge','expand','expands','launch','launches','innovative','partnership','raises','funding','profit','profitable','beat','beats','up','acquire','acquires','strong','improve','improves'] + const NEG = ['layoff','layoffs','cut','cuts','lawsuit','decline','drop','plunge','breach','shutdown','loss','negative','down','fraud','issue','issues','risk','risks','weak','slow','slows'] + let pos = 0, neg = 0 + for (const h of headlines) { + const lower = (h||'').toLowerCase() + POS.forEach(w=> { if (lower.includes(w)) pos++ }) + NEG.forEach(w=> { if (lower.includes(w)) neg++ }) } + const total = pos + neg + if (total === 0) return 50 // truly neutral fallback + // Center at 50, scale polarity to ±35, add mild volume boost capped + const polarity = (pos - neg) / total // -1..1 + const volumeBoost = Math.min(10, Math.log2(headlines.length + 1) * 3) // 0..~10 + const score = Math.round(50 + polarity * 35 + volumeBoost * (polarity >= 0 ? 0.4 : -0.4)) + return Math.max(0, Math.min(100, score)) } -// Layer 2: AI "Bouncer" — quick, cheap validation to avoid nonsense inputs -async function validateDomainIsCompany(domain: string, providers: ProviderConfig[]): Promise { - const prompt = `Is the website at the domain "${domain}" the primary, official website for a single company, organization, or commercial product? Answer ONLY with a single word: YES or NO.` - try { - const response = await safeGenerateText(prompt, providers) - if (/YES/i.test(response)) { - console.log(`āœ… Domain validation PASSED for: ${domain}`) - return true - } - } catch (error) { - console.warn(`Domain validation LLM call failed for ${domain}:`, error) - // Graceful: if the validator fails, allow downstream best-effort - return true +export const runtime = 'nodejs' + +export async function GET(req: Request) { + const { searchParams } = new URL(req.url) + const raw = searchParams.get('domain') + if (!raw) return NextResponse.json({ error: 'Missing domain' }, { status: 400 }) + const domain = cleanDomain(raw) + Sentry.addBreadcrumb({ category: 'sse', message: 'briefing_stream_start', data: { domain } }) + // Dynamic canonical inference structure + let canonical: { canonicalName: string; aliases: string[]; industryHint?: string; brandTokens?: string[] } | null = null + const refreshProfile = searchParams.get('refreshProfile') === 'true' + + // BYOK: decode optional keys param (base64url JSON { exa, groq, gemini, openai }) + let userKeys: Partial> = {} + const keysParam = searchParams.get('keys') + if (keysParam) { + try { + const padded = keysParam.replace(/-/g,'+').replace(/_/g,'/') + const json = decodeURIComponent(Buffer.from(padded, 'base64').toString('utf8')) + } catch {} + try { + // Second attempt: direct base64url decode without decodeURIComponent + const normalized = keysParam.replace(/-/g,'+').replace(/_/g,'/') + const buf = Buffer.from(normalized, 'base64') + const raw = buf.toString('utf8') + const parsed = JSON.parse(raw) + if (parsed && typeof parsed === 'object') { + userKeys = parsed + } + } catch {} } - console.log(`āŒ Domain validation FAILED for: ${domain}`) - return false -} -// --- MAIN API ENDPOINT --- -export async function GET(request: Request) { - const { searchParams } = new URL(request.url); - const rawDomain = searchParams.get('domain'); + const exaKey = (userKeys.exa || process.env.EXA_API_KEY || '').trim() + const groqKey = (userKeys.groq || process.env.GROQ_API_KEY || '').trim() + const openAiKey = (userKeys.openai || process.env.OPENAI_API_KEY || '').trim() + const geminiKey = (userKeys.gemini || process.env.GEMINI_API_KEY || '').trim() - // šŸ”§ FIXED: Get API keys ONLY from the server's .env.local file. - const exaApiKey = process.env.EXA_API_KEY; - const groqApiKey = process.env.GROQ_API_KEY; - const openAiApiKey = process.env.OPENAI_API_KEY; - const geminiApiKey = process.env.GEMINI_API_KEY; - const newsApiKey = process.env.NEWS_API_KEY; + if (!exaKey) return NextResponse.json({ error: 'Missing Exa API key' }, { status: 400 }) - if (!rawDomain) { - return NextResponse.json({ error: 'Missing domain parameter' }, { status: 400 }); - } - // šŸ”§ FIXED: Check for server-side keys now. - if (!exaApiKey) { - return NextResponse.json({ error: 'Missing EXA_API_KEY in the server environment (.env.local)' }, { status: 500 }); - } - - const domain = rawDomain.replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/$/, '').trim(); - console.log(`šŸ¢ Processing domain: "${rawDomain}" → cleaned: "${domain}"`); + const llmProviders: ProviderConfig[] = [] + if (groqKey) llmProviders.push({ provider: 'groq', apiKey: groqKey, model: 'llama-3.1-8b-instant' }) + if (geminiKey) llmProviders.push({ provider: 'gemini', apiKey: geminiKey, model: 'gemini-2.0-flash' }) + if (openAiKey) llmProviders.push({ provider: 'openai', apiKey: openAiKey, model: 'gpt-4o-mini' }) + // If user provided no LLM keys, llmProviders remains empty; downstream safeGenerate* functions should degrade gracefully - try { - const llmProviders: ProviderConfig[] = [ - { provider: 'gemini', apiKey: geminiApiKey }, - { provider: 'openai', apiKey: openAiApiKey } - ]; - const groqProviders: ProviderConfig[] = [ - { provider: 'groq', apiKey: groqApiKey }, - { provider: 'gemini', apiKey: geminiApiKey } - ]; - - // Canonical enrichment to disambiguate brand/entity before any LLM lookups (improves founders accuracy) - let canonical: { canonicalName?: string; aliases?: string[]; industryHint?: string } | null = null - try { - canonical = await getCanonicalInfo(domain, llmProviders) - } catch {} + const stream = new ReadableStream({ + async start(controller) { + const send = (event: string, data: unknown) => controller.enqueue(new TextEncoder().encode(sseChunk(event, data))) - // --- NEW: Layer 2 - The AI Bouncer --- - const isCompany = await validateDomainIsCompany(domain, groqProviders) - if (!isCompany) { - return NextResponse.json({ - error: 'Invalid Input', - details: `The provided URL "${domain}" does not appear to be a primary company website. Please try a different domain.` - }, { status: 400 }) - } + try { + // Stage 0: Canonical inference (disambiguation & alias surface) + try { + Sentry.addBreadcrumb({ category: 'sse', message: 'canonical_inference_start' }) + canonical = await getCanonicalInfo(domain, llmProviders) + Sentry.addBreadcrumb({ category: 'sse', message: 'canonical_inference_done', data: { canonical } }) + send('canonical', { canonical }) + } catch (e) { + Sentry.addBreadcrumb({ category: 'sse', message: 'canonical_inference_failed' }) + } + // Stage 1: Overview (fast LLM TL;DR) + Sentry.addBreadcrumb({ category: 'sse', message: 'profile_snapshot_start' }) + const snapshotProfile = await getOrGenerateProfileSnapshot(domain, llmProviders, refreshProfile) + Sentry.addBreadcrumb({ category: 'sse', message: 'profile_snapshot_done' }) + // Attach canonical enrichment: only override if snapshot name looks like raw domain stem or is very short (<4 chars) + if (canonical) { + const baseStem = domain.split('.')[0].toLowerCase() + const currentName = (snapshotProfile.name || '').trim() + const looksGeneric = !currentName || currentName.toLowerCase() === baseStem || currentName.length < 4 + if (looksGeneric && canonical.canonicalName) { + snapshotProfile.name = canonical.canonicalName + } + snapshotProfile.canonicalName = canonical.canonicalName + snapshotProfile.aliases = canonical.aliases + if (!snapshotProfile.industry && canonical.industryHint) { + snapshotProfile.industry = canonical.industryHint + } + } + const overview = snapshotProfile.brief || snapshotProfile.description?.split('.').slice(0,1).join('.') || `Company at ${domain}` + send('overview', { domain, overview }) + send('profile', { profile: snapshotProfile }) - // --- Step 1: Improved Groq TL;DR --- - const groqTlDrPrompt = `What is the company at the domain "${domain}"? Provide a brief, factual description of what they do in one sentence. If you're not familiar with this specific company, make a reasonable inference based on the domain name and provide a general business description.`; - const groqTlDrPromise = safeGenerateText(groqTlDrPrompt, groqProviders) - .catch(() => `Company analysis for ${domain}`); - - const competitorsPromise = discoverCompetitors(domain, llmProviders) - .then(raw => validateAndCleanCompetitors(raw, domain)); - - const [groqTlDr, competitors] = await Promise.all([groqTlDrPromise, competitorsPromise]); - const allDomains = [domain, ...competitors]; - - // --- Step 2: Additional LLM metadata (Company Profile, Founders) --- - const profilePrompt = `You are enriching a structured company profile for the domain "${domain}"${canonical?.canonicalName ? ` (canonical: ${canonical.canonicalName}${canonical?.aliases?.length ? `; aliases: ${canonical.aliases.join(', ')}` : ''})` : ''}. -Return ONLY valid JSON with the following keys (omit or null if genuinely unknown – do NOT hallucinate specifics): -{ - "name": string, // Official or widely used brand name - "description": string, // <=320 chars factual what the company does (no marketing fluff) - "brief": string, // <=160 chars crisp capsule summary - "ipoStatus": "Public"|"Private"|"Unknown", - "industry": string|null, // General industry vertical (e.g. Fintech, Cloud Infrastructure) - "foundedYear": number|null, - "headquarters": string|null, // City, Country or City, State (US) - "headcountRange": string|null, // e.g. "1-10", "11-50", "51-200", "201-500", "501-1000", "1000-5000", "5000+" - "employeeCount": number|null, // Approx current total employees worldwide (integer) - "socials": { "linkedin"?: string, "twitter"?: string, "facebook"?: string } -} -Rules: -- Prefer concise, factual wording. -- If the domain looks like an early-stage startup and no data is known, return minimal safe fields. -- Never invent precise employee counts – use null if not inferable. -- If name would just repeat domain verbatim (e.g. "google.com"), strip the TLD -> "Google".`; - const foundersPrompt = `Task: Identify founders for the exact company operating at domain: ${domain}${canonical?.canonicalName ? ` (canonical name: ${canonical.canonicalName})` : ''}${canonical?.aliases?.length ? `; known aliases: ${canonical.aliases.join(', ')}` : ''}. + // Stage 2: Founders + Socials (parallel, but limited) + // Strengthened founders prompt with strict JSON-only rules and ambiguity guard + const foundersPrompt = `Task: Identify founders for the exact company operating at domain: ${domain}${canonical ? ` (canonical name: ${canonical.canonicalName})` : ''}${canonical?.aliases?.length ? `; known aliases: ${canonical.aliases.join(', ')}` : ''}. Rules: - ONLY include founders or co-founders of this exact company (match the canonical brand, not similarly named orgs or different TLDs). @@ -282,69 +178,27 @@ Rules: - Return ONLY a JSON array. No prose. Output JSON schema (array of objects): -[{ "name": string, "linkedin"?: string|null, "twitter"?: string|null, "role"?: string|null }] -`; - - const companyProfilePromise = safeGenerateJson(profilePrompt, llmProviders) - .catch(() => ({ name: domain.split('.')[0], domain, description: `Company at ${domain}`, ipoStatus: 'Unknown', socials: {} } as CompanyProfile)) - .then(async (rawProfile) => { - // Normalize weak name - const bare = domain.split('.')[0] - if (rawProfile.name && /\.[a-z]{2,}$/.test(rawProfile.name)) { - rawProfile.name = rawProfile.name.replace(/\.[a-z]{2,}$/,'') - } - if (!rawProfile.name || rawProfile.name.toLowerCase() === domain.toLowerCase()) rawProfile.name = bare.charAt(0).toUpperCase() + bare.slice(1) - - // Run refinement pass for missing fields - const refinement = await refineIncompleteProfile(rawProfile as CompanyProfile, domain, llmProviders) - const merged: CompanyProfile = { ...rawProfile, ...refinement } - - // Heuristic employeeCountApprox from headcountRange - if (!merged.employeeCountApprox && merged.headcountRange) { - const range = merged.headcountRange - const midpoint = (r: string): number | undefined => { - if (r === '5000+') return 7500 - const m = r.match(/(\d+)-(\d+)/) - if (m) { return Math.round((parseInt(m[1]) + parseInt(m[2]))/2) } - return undefined - } - ;(merged as any).employeeCountApprox = merged.employeeCountApprox || midpoint(range) - } +[{ "name": string, "linkedin"?: string|null, "twitter"?: string|null, "role"?: string|null, "confidence"?: number }] +` + const profilePrompt = `Profile for ${domain}${canonical ? ` (canonical name: ${canonical.canonicalName})` : ''}. Return ONLY JSON with name, description, ipoStatus, socials{linkedin,twitter,facebook}` - // Weak description repair - if (!merged.description || merged.description.length < 40) { - merged.description = merged.brief || `Company operating at ${domain}` - } - if (!merged.brief) { - merged.brief = (merged.description.length <= 160) ? merged.description : merged.description.slice(0,157) + '…' - } - - // Data quality scoring - const filledCore = [merged.industry, merged.foundedYear, merged.headquarters, merged.headcountRange, merged.employeeCountApprox].filter(Boolean).length - let quality: 'high' | 'medium' | 'low' = 'low' - if (filledCore >= 4) quality = 'high' - else if (filledCore >= 2) quality = 'medium' - merged.profileDataQuality = quality - - return merged - }); - const founderInfoPromise = (async () => { - try { - // 1) Web search for founder info using Exa (actual web sources) - const { searchFounderInfo } = await import('@/lib/exa-service'); - const founderSearchResults = await searchFounderInfo( - domain, - canonical?.canonicalName || null, - exaApiKey - ); - - // 2) Extract founder names from web content using LLM - if (founderSearchResults.results.length > 0) { - const webContent = founderSearchResults.results - .map((r, i) => `Source ${i + 1} (${r.url}):\n${r.text || r.title || ''}`) - .join('\n\n'); - - const extractPrompt = `Based on these web sources about ${canonical?.canonicalName || domain}, extract ONLY the founders/co-founders and CEO. + const foundersPromise = (async () => { + try { + // 1) Web search for founder info using Exa (actual web sources) + const { searchFounderInfo } = await import('@/lib/exa-service'); + const founderSearchResults = await safeCall(() => searchFounderInfo( + domain, + canonical?.canonicalName || null, + exaKey + ), { timeoutMs: 5000, stage: 'searchFounderInfo', fallback: { results: [] } as any }) + + // 2) Extract founder names from web content using LLM + if (founderSearchResults.results.length > 0) { + const webContent = founderSearchResults.results + .map((r, i) => `Source ${i + 1} (${r.url}):\n${r.text || r.title || ''}`) + .join('\n\n'); + + const extractPrompt = `Based on these web sources about ${canonical?.canonicalName || domain}, extract ONLY the founders/co-founders and CEO. Web Sources: ${webContent} @@ -359,293 +213,256 @@ Rules: Return ONLY valid JSON array: [{ "name": string, "role": string, "linkedin"?: string, "twitter"?: string }]`; - const extracted = await safeGenerateJson(extractPrompt, llmProviders).catch(() => []); + const extracted = await safeGenerateJson(extractPrompt, llmProviders).catch(() => []); - if (Array.isArray(extracted) && extracted.length > 0) { - const cleaned = extracted - .map((p: any) => ({ - name: typeof p?.name === 'string' ? p.name.trim() : '', - linkedin: typeof p?.linkedin === 'string' ? p.linkedin : undefined, - twitter: typeof p?.twitter === 'string' ? p.twitter : undefined, - role: typeof p?.role === 'string' ? p.role : undefined, - })) - .filter(p => p.name && p.name.length < 120 && p.name.length > 2); - - if (cleaned.length > 0) { - return cleaned; - } - } - } - - // 3) Fallback: Try direct LLM if web search found nothing - console.log(`āš ļø Web search found no founder info for ${domain}, falling back to LLM`); - const fallbackPrompt = `Who are the founders and current CEO of ${canonical?.canonicalName || domain}? Return ONLY JSON array: [{"name": string, "role": string}]`; - const fallback = await safeGenerateJson(fallbackPrompt, llmProviders).catch(() => []); - - if (Array.isArray(fallback) && fallback.length > 0) { - return fallback + if (Array.isArray(extracted) && extracted.length > 0) { + const cleaned = extracted .map((p: any) => ({ name: typeof p?.name === 'string' ? p.name.trim() : '', - role: typeof p?.role === 'string' ? p.role : 'Founder', + linkedin: typeof p?.linkedin === 'string' ? p.linkedin : undefined, + twitter: typeof p?.twitter === 'string' ? p.twitter : undefined, + role: typeof p?.role === 'string' ? p.role : undefined, })) - .filter(p => p.name && p.name.length < 120) - .slice(0, 5); // Limit to top 5 - } + .filter(p => p.name && p.name.length < 120 && p.name.length > 2); - return []; - } catch (error) { - console.error('Error fetching founder info:', error); - return []; + if (cleaned.length > 0) { + return cleaned; + } + } } - })(); - // --- Step 3: Combined Data Fetch with Historical Data --- - console.log(`šŸ“‹ Fetching comprehensive data for domains: ${allDomains.join(', ')}`); + // 3) Fallback: Try direct LLM if web search found nothing + console.log(`āš ļø Web search found no founder info for ${domain}, falling back to LLM`); + const fallbackPrompt = `Who are the founders and current CEO of ${canonical?.canonicalName || domain}? Return ONLY JSON array: [{"name": string, "role": string}]`; + const fallback = await safeGenerateJson(fallbackPrompt, llmProviders).catch(() => []); + + if (Array.isArray(fallback) && fallback.length > 0) { + return fallback + .map((p: any) => ({ + name: typeof p?.name === 'string' ? p.name.trim() : '', + role: typeof p?.role === 'string' ? p.role : 'Founder', + })) + .filter(p => p.name && p.name.length < 120) + .slice(0, 5); // Limit to top 5 + } - const [mentionsResults, signalsResults, historicalDataResults, profileResult, foundersResult] = await Promise.allSettled([ - Promise.all( - allDomains.map((d, i) => new Promise(resolve => setTimeout(resolve, i * 250)).then(() => { - const limit = i === 0 ? 15 : 8; // main company richer, competitors lighter - return fetchExaData(d, 'mentions', exaApiKey, { numResults: limit }) - })) - ), - fetchExaData(domain, 'signals', exaApiKey), - Promise.all(allDomains.map(d => fetchHistoricalData(d, exaApiKey))), // This is slow, but necessary for V1 - companyProfilePromise, - founderInfoPromise - ]); - - const allMentions = (mentionsResults.status === 'fulfilled') ? mentionsResults.value : allDomains.map(() => ({ results: [] })); - const primarySignals = (signalsResults.status === 'fulfilled') ? signalsResults.value : { results: [] }; - const allHistoricalData = (historicalDataResults.status === 'fulfilled') ? historicalDataResults.value : allDomains.map(() => []); - const companyProfile = (profileResult.status === 'fulfilled') ? profileResult.value as CompanyProfile : { name: domain.split('.')[0], domain, description: `Company at ${domain}`, ipoStatus: 'Unknown', socials: {} } as CompanyProfile; - const founderInfo = (foundersResult.status === 'fulfilled') ? foundersResult.value as FounderInfo[] : []; - - // If Exa mentions are empty, try NewsAPI fallback per domain - const enhancedMentions = await Promise.all(allDomains.map(async (d, i) => { - const cur = allMentions[i]?.results || [] - if (cur.length > 0) return { results: cur } - if (!newsApiKey) return { results: [] } - const articles = await fetchNewsApiFallback(d, newsApiKey) - const mapped = articles.map((a: any) => ({ - title: a.title, - url: a.url, - domain: a.source?.name || 'news', - publishedDate: normalizePublishedDate(a.publishedAt) - })) - return { results: mapped } - })) - - console.log(`šŸ“‹ Final data summary: mentions=${enhancedMentions.map((m, i) => `${allDomains[i]}:${m.results?.length || 0}`).join(', ')}, historical=${allHistoricalData.map((h, i) => `${allDomains[i]}:${h?.length || 0}`).join(', ')}`); - - // --- Step 4a: Real Sentiment Analysis --- - const sentimentScores = await Promise.all( - enhancedMentions.map(mention => { - const headlines = (mention.results || []).slice(0, 5).map((m: any) => m.title || ''); - return getSentimentScore(headlines, groqApiKey ?? undefined, openAiApiKey ?? undefined); - }) - ); - - // --- Step 4b: Event Log Processing with fallback --- - const allRawSignals = (primarySignals.results || []); - // Start with strict relevance; if we got too few, relax the filter - let relevantSignals = allRawSignals.filter((s: any) => s.title && isRelevantToCompany(s.title, domain)); - if (relevantSignals.length < 3) { - relevantSignals = allRawSignals.filter((s: any) => !!s.title); + return []; + } catch (error) { + try { + Sentry.withScope((scope: any) => { + scope.setTag('sse_subtask', 'founders_fetch') + scope.setExtra('domain', domain) + Sentry.captureException(error) + }) + // flush so short-lived events have a chance to be delivered + await sentryFlush(1500) + } catch (e) {} + console.error('Error fetching founder info:', error); + return []; } + })() - let finalEventLog; - if (relevantSignals.length > 0) { - const classifiedEvents = await Promise.all( - relevantSignals.slice(0, 12).map(async (signal: any) => ({ - date: normalizePublishedDate(signal.publishedDate || new Date().toISOString()), - headline: signal.title || 'N/A', - type: await classifyEventType(signal.title, groqApiKey ?? undefined, openAiApiKey ?? undefined), - url: signal.url || '' - })) - ); - finalEventLog = classifiedEvents; - } else { - // Fallback: build news from primary mentions, prefer exact-domain matches first - const primaryMentions = (enhancedMentions[0]?.results || []) as any[]; - const target = domain.toLowerCase(); - const getHost = (u: string): string | null => { try { return new URL(u).hostname.toLowerCase() } catch { return null } } - const exact = primaryMentions.filter(m => { const h = getHost(m.url); return h && (h === target || h.endsWith(`.${target}`)) }) - const rest = primaryMentions.filter(m => !exact.includes(m)) - const ordered = [...exact, ...rest] - finalEventLog = ordered.slice(0, 12).map((m: any) => ({ - date: normalizePublishedDate(m.publishedDate || new Date().toISOString()), - headline: m.title || 'N/A', - type: 'Other' as const, - url: m.url || '#' - })); - } + const profilePromise = safeGenerateJson(profilePrompt, llmProviders).catch((err: any) => { + try { Sentry.addBreadcrumb({ category: 'sse', message: 'profile_prompt_failed' }); Sentry.captureException(err) } catch (e) {} + return ({ name: domain.split('.')[0], description: `Company at ${domain}`, ipoStatus: 'Unknown', socials: {} }) + }) - // --- Step 4c: Calculate Final Benchmark Matrix (WITH SENTIMENT DATA) --- - const enableEnhanced = process.env.ENABLE_ENHANCED_SENTIMENT === 'true' - - const benchmarkMatrix = allDomains.map((d, i) => { - const mentions = enhancedMentions[i]?.results || []; - const narrativeMomentum = calculateNarrativeMomentum(mentions); - const sentimentScore = sentimentScores[i]; - const pulseIndex = calculatePulseIndex(narrativeMomentum, sentimentScore); - const sentimentHistoricalData = generateSentimentHistoricalData(mentions, sentimentScore); - const eventsForDomain = i === 0 ? finalEventLog : [] // only primary has classified events right now - const enhancedSentiment = enableEnhanced ? generateEnhancedSentimentAnalysis( - mentions, - eventsForDomain, - sentimentScore, - narrativeMomentum, - { peerVolumes: enhancedMentions.map(m => m.results?.length || 0) } - ) : undefined - - let topNews: any[]; - if (i === 0) { - // Main company: Get 5 latest news - topNews = (mentions || []) - .slice(0, 5) - .map((m: any) => ({ - headline: m.title || 'N/A', - url: m.url || '#', - source: m.domain || 'unknown', - publishedDate: normalizePublishedDate(m.publishedDate || new Date().toISOString()), - })); - } else { - // Competitors: store all, filter later to a global cap - topNews = (mentions || []).map((m: any) => ({ - headline: m.title || 'N/A', - url: m.url || '#', - source: m.domain || 'unknown', + const [verifiedFounders, profile] = await Promise.all([foundersPromise, profilePromise]) + // Split founders vs execs for emission + const founders = verifiedFounders.filter((p: any) => /founder/i.test(p.role || '')) + const executives = verifiedFounders.filter((p: any) => /(ceo|cto|director)/i.test(p.role || '')) + send('founders', { founders }) + send('leadership', { executives, founders }) + send('socials', { socials: profile.socials || {} }) + // merge socials into existing snapshot if emitted + snapshotProfile.socials = profile.socials || {} + let mainMentions = await safeCall(() => fetchExaData(domain, 'mentions', exaKey, { numResults: 25 }), { timeoutMs: 7000, stage: 'fetchExaData_mainMentions', fallback: { results: [] } as any }) + // Stage 3: Competitor discovery (LLM) + mentions for main company + const competitors = await discoverCompetitors(domain, llmProviders) + Sentry.addBreadcrumb({ category: 'sse', message: 'competitor_discovery_done', data: { competitors } }) + send('competitors', { competitors }) + // Re-score + disambiguation filter: keep only items referencing aliases unless from trusted source + const TRUSTED_SOURCES = Array.from(TRUSTED_SOURCES_SET) + const MIN_MENTIONS = 8 + if (canonical && mainMentions?.results?.length) { + const original = [...mainMentions.results] + const aliasRegex = new RegExp(`\\b(${canonical.aliases.map((a: string)=>a.replace(/[-/\\^$*+?.()|[\]{}]/g,'\\$&')).join('|')})\\b`, 'i') + mainMentions.results = mainMentions.results.filter((r: any) => { + if (!r.title) return false + const host = (()=>{ try { return new URL(r.url).hostname.toLowerCase() } catch { return '' } })() + if (TRUSTED_SOURCES.includes(host)) return true + return aliasRegex.test(r.title || '') + }) + // If filtering became too aggressive, fall back to original + if (mainMentions.results.length < Math.min(3, original.length)) { + mainMentions.results = original + } + } + // Query expansion if still low recall + if (canonical && (!mainMentions?.results || mainMentions.results.length < MIN_MENTIONS)) { + try { + const expansionPrompt = `Given canonical company name ${canonical.canonicalName} and aliases ${canonical.aliases.join(', ')}, propose 3 concise alternative news search queries (no quotes) focusing on recent strategic moves, funding, partnerships or expansions. Return ONLY JSON array of strings.` + const queries = await safeGenerateJson(expansionPrompt, llmProviders).catch(()=>[]) + const extraResults: any[] = [] + if (Array.isArray(queries)) { + for (const q of queries.slice(0,3)) { + try { + const res = await safeCall(() => exaAdHocSearch(exaKey, `${q} ${canonical!.canonicalName}`, 6), { timeoutMs: 5000, stage: 'exaAdHocSearch', fallback: { results: [] } as any }) + if (res?.results) extraResults.push(...res.results) + } catch {} + } + } + if (extraResults.length) { + const seen = new Set((mainMentions.results || []).map((r: any)=> r.url)) + const merged = [...(mainMentions.results || [])] + for (const r of extraResults) { + if (!r.url || seen.has(r.url)) continue + seen.add(r.url) + merged.push(r) + } + mainMentions.results = merged + } + } catch {} + } + // Derive company news similarly to competitor approach (rank & slice) + const trustedSources = new Set(Array.from(TRUSTED_SOURCES_SET)) + interface MainNews { headline: string; url: string; source: string; publishedDate: string; credibility?: number; _metrics?: any } + let mainTopNews: MainNews[] = (mainMentions.results || []).map((m: any): MainNews => ({ + headline: m.title, + url: m.url, + source: m.domain, publishedDate: normalizePublishedDate(m.publishedDate || new Date().toISOString()), - domain: d, - })); - } - - return { - domain: d, - pulseIndex, - narrativeMomentum, - sentimentScore, - historicalData: allHistoricalData[i] || [], - sentimentHistoricalData, - news: topNews, - enhancedSentiment, - }; - }); - - // Refine competitor news: keep 2–4 credible/trending items per competitor - const trustedSources = new Set(Array.from(TRUSTED_SOURCES_SET)) - - benchmarkMatrix.forEach((company: any, i) => { - if (i === 0) return // skip primary here - const filtered = (company.news || []) - .filter((n: any) => n.headline && n.headline !== 'N/A') - .map((n: any) => ({ - ...n, - credibility: isTrustedSource(n.source) ? 1 : 0, - freshness: Date.now() - new Date(n.publishedDate).getTime() + credibility: isTrustedSource(m.domain) ? 2 : 0 })) - .sort((a: any, b: any) => { - // Primary sort: credibility desc, then recency asc - if (b.credibility !== a.credibility) return b.credibility - a.credibility - return a.freshness - b.freshness + .map((n: MainNews) => ({ ...n, _metrics: scoreNewsItem({ title: n.headline, url: n.url, publishedDate: n.publishedDate, credibility: n.credibility, domain: n.source }) })) + .sort((a: MainNews, b: MainNews)=> (b._metrics.score - a._metrics.score) || (new Date(b.publishedDate).getTime() - new Date(a.publishedDate).getTime())) + .slice(0,6) + .map(({_metrics, credibility, ...rest}: any)=> rest as MainNews) + // LLM validation filter: ensure each headline truly refers to target company (avoid homonyms) + if (mainTopNews.length) { + try { + const validationPrompt = `Given the company domain ${domain} and these news items as JSON array, return ONLY a JSON array of the indices that are truly about this company (not other similarly named entities). Headlines JSON: ${JSON.stringify(mainTopNews.map((n: MainNews) => n.headline))}` + const idxJson = await safeGenerateJson(validationPrompt, llmProviders).catch(()=>[]) as number[] + if (Array.isArray(idxJson) && idxJson.length) { + const allowed = new Set(idxJson.filter(n=> Number.isFinite(n))) + mainTopNews = mainTopNews.filter((_,i: number)=> allowed.has(i)) + } + } catch {} + } + send('company-news', { news: mainTopNews }) + + // Stage 4: Competitor news (batched fetches, capped concurrency via limiter) + const compResults = await Promise.all((competitors || []).map((c: string) => + safeCall(() => fetchExaData(c, 'mentions', exaKey, { numResults: 8 }), { timeoutMs: 6000, stage: 'fetchExaData_comp', fallback: { results: [] } as any }) + )) + + // Flatten, take latest 4 across all competitors + const trustedSourcesComp = new Set(Array.from(TRUSTED_SOURCES_SET)) + const competitorNewsAllRaw = compResults.flatMap((r, idx) => { + const d = competitors[idx] + return (r.results || []).map((m: any) => ({ + domain: d, + headline: m.title, + url: m.url, + source: m.domain, + publishedDate: normalizePublishedDate(m.publishedDate || new Date().toISOString()) + })) + }).filter(n => n.headline) + + // Group per competitor and apply filtering (2–4 credible/most recent) + const grouped: Record = {} + for (const item of competitorNewsAllRaw) { + grouped[item.domain] = grouped[item.domain] || [] + grouped[item.domain].push(item) + } + const refined: any[] = [] + Object.entries(grouped).forEach(([cDomain, items]) => { + const ranked = items.map(n => { + const credibility = isTrustedSource(n.source) ? 2 : 0 + const metrics = scoreNewsItem({ title: n.headline, url: n.url, publishedDate: n.publishedDate, credibility, domain: n.source }) + return { ...n, credibility, _metrics: metrics } + }).sort((a,b)=> (b._metrics.score - a._metrics.score) || (new Date(b.publishedDate).getTime() - new Date(a.publishedDate).getTime())) + const slice = ranked.slice(0,4) + const finalSlice = slice.length < 2 ? ranked.slice(0,2) : slice + refined.push(...finalSlice.map(({credibility,_metrics,...rest})=>rest)) }) - .slice(0, 4) - // Guarantee minimum 2: if less than 2 after filtering, fallback to earliest remaining originals - if (filtered.length < 2) { - const originals = (company.news || []).filter((n: any) => n.headline && n.headline !== 'N/A') - .sort((a: any, b: any) => new Date(b.publishedDate).getTime() - new Date(a.publishedDate).getTime()) - .slice(0, 2) - company.news = originals - } else { - company.news = filtered.map(({ credibility, freshness, ...rest }: any) => rest) - } - }) - // --- Step 5: IMPROVED AI Strategic Synthesis with actual sentiment data --- - const superPrompt = `EXECUTIVE BRIEF: Strategic Analysis for ${domain} - -You are preparing a board-level strategic assessment. The data below represents real-time competitive intelligence with sentiment analysis across key market players. - -COMPETITIVE INTELLIGENCE: -Primary Company: ${domain} -- Sentiment Score: ${benchmarkMatrix[0]?.sentimentScore || 0}/100 -- Narrative Momentum: ${benchmarkMatrix[0]?.narrativeMomentum || 0}% -- Recent News: ${benchmarkMatrix[0]?.news?.slice(0, 2).map(n => n.headline).join('; ') || 'None'} - -Competitors: -${benchmarkMatrix.slice(1).map(comp => ` -- ${comp.domain}: Sentiment ${comp.sentimentScore}/100, Momentum ${comp.narrativeMomentum}%`).join('')} - -STRATEGIC ANALYSIS DIRECTIVE: -Based on the sentiment scores and momentum data, provide exactly 3 strategic insights: - -1. COMPETITIVE POSITIONING: How does ${domain}'s sentiment (${benchmarkMatrix[0]?.sentimentScore}/100) compare to competitors? What does this reveal about market perception? - -2. MARKET DYNAMICS: What do the sentiment differentials indicate about industry trends and ${domain}'s brand strength vs competitors? - -3. STRATEGIC PRIORITIES: Given the sentiment analysis, what should be the top priority to improve market perception and competitive position? - -REQUIREMENTS: -- Reference specific sentiment scores in your analysis -- Focus on actionable strategic implications -- Each point should be executive-level and forward-looking -- Maximum 35 words per bullet point - -OUTPUT: 3 bullet points, each starting with "• ";` - - const summaryResponse = await safeGenerateText( - superPrompt, - [ - { provider: 'gemini', apiKey: geminiApiKey ?? undefined, model: 'gemini-2.0-flash' }, - { provider: 'openai', apiKey: openAiApiKey ?? undefined, model: 'gpt-4o' } - ] - ).catch(() => ''); - - const summaryPoints = summaryResponse.split('\n') - .map(l => l.trim()) - .filter(l => l.startsWith('•') && l.length > 10) - .slice(0, 3); - - if (summaryPoints.length === 0) { - const primarySentiment = benchmarkMatrix[0]?.sentimentScore || 50; - const avgCompetitorSentiment = benchmarkMatrix.slice(1).reduce((acc, comp) => acc + comp.sentimentScore, 0) / Math.max(benchmarkMatrix.length - 1, 1); - - if (primarySentiment > avgCompetitorSentiment + 10) { - summaryPoints.push(`• ${domain} shows stronger market sentiment (${primarySentiment}/100) than competitors, indicating positive brand perception and growth potential.`); - summaryPoints.push(`• Leverage current positive sentiment momentum to expand market share and strengthen competitive positioning against rivals.`); - summaryPoints.push(`• Focus on maintaining sentiment advantage through consistent product innovation and strategic communications to sustain market leadership.`); - } else if (primarySentiment < avgCompetitorSentiment - 10) { - summaryPoints.push(`• ${domain} sentiment (${primarySentiment}/100) lags competitors, indicating potential brand perception challenges requiring immediate strategic attention.`); - summaryPoints.push(`• Implement comprehensive reputation management strategy to address sentiment gap and rebuild market confidence.`); - summaryPoints.push(`• Prioritize customer experience improvements and strategic communications to close sentiment deficit with key competitors.`); - } else { - summaryPoints.push(`• ${domain} maintains competitive sentiment parity (${primarySentiment}/100), suggesting stable market position with room for strategic differentiation.`); - summaryPoints.push(`• Balanced competitive landscape creates opportunity for breakthrough initiatives to capture sentiment leadership and market share.`); - summaryPoints.push(`• Focus on innovation and unique value proposition to break away from competitor sentiment clustering.`); + // Flatten refined (client groups anyway) but cap global to prevent overload + send('competitor-news', { news: refined.slice(0, 12) }) + + // Stage 5: Sentiment for all domains (main + competitors) + const allDomains = [domain, ...(competitors || [])] + const allMentions = [mainMentions, ...compResults] + const sentimentScores = await Promise.all(allMentions.map((res) => { + const headlines = (res.results || []).slice(0, 5).map((m: any) => m.title) + return getSentimentScore(headlines, llmProviders) + })) + + // Build benchmark rows + const enableEnhanced = process.env.ENABLE_ENHANCED_SENTIMENT === 'true' + const peerVolumes = allMentions.map(m => (m.results || []).length) + const benchmark = allDomains.map((d, i) => { + const mentions = (allMentions[i]?.results || []).map((m: any) => ({ + title: m.title, + publishedDate: m.publishedDate, + url: m.url, + domain: m.domain, + })) + const momentum = calculateNarrativeMomentum(mentions) + const sentiment = sentimentScores[i] || 50 + const pulse = calculatePulseIndex(momentum, sentiment) + const sentimentHistoricalData = generateSentimentHistoricalData(mentions, sentiment) + const enhancedSentiment = enableEnhanced ? generateEnhancedSentimentAnalysis( + mentions, + i === 0 ? [] : [], // placeholder: could include events when streaming adds them earlier + sentiment, + momentum, + { peerVolumes } + ) : undefined + return { domain: d, narrativeMomentum: momentum, sentimentScore: sentiment, pulseIndex: pulse, sentimentHistoricalData, enhancedSentiment } + }) + send('sentiment', { benchmark }) + + // Stage 6: Descriptive executive summary + const summaryPrompt = `You are generating an executive competitive briefing. Provide: +1) A one-sentence positioning statement (~22 words) about the company at ${domain} referencing industry & differentiation if evident. +2) Three strategic bullets (each <=30 words) capturing momentum, risks or opportunities grounded in benchmark sentiment & momentum. +3) One short competitive lens sentence (<25 words) comparing it to peers. +Return ONLY plain text lines prefixed with • for bullets, and no extra commentary. +Context JSON: +PROFILE: ${JSON.stringify(snapshotProfile)} +LEADERSHIP: ${JSON.stringify({ executives: executives?.slice?.(0,4), founders: founders?.slice?.(0,4) })} +BENCHMARK: ${JSON.stringify(benchmark.slice(0,5))} +NEWS: ${JSON.stringify(mainTopNews.slice(0,4))}` + const summary = await safeGenerateText(summaryPrompt, llmProviders).catch(() => '• Positioning: solid presence with emerging competitive signals.\n• Momentum stable vs peers.\n• Opportunity: sharpen differentiation narrative.\n• Peer lens: sentiment parity, differentiation moderate.') + send('summary', { summary }) + + // End + send('done', { ok: true }) + Sentry.addBreadcrumb({ category: 'sse', message: 'stream_done' }) + controller.close() + } catch (err: any) { + // Capture and flush to improve delivery chance for short-lived processes + try { + Sentry.withScope((scope: any) => { + scope.setTag('route', 'briefing/stream') + scope.setExtra('domain', domain) + scope.setExtra('llmProviders', llmProviders.map(p => p.provider)) + Sentry.captureException(err) + }) + await sentryFlush(2000) + } catch (e) {} + send('error', { message: err?.message || 'Unknown error' }) + controller.close() } } - // Log the executive summary for debugging - console.log(`šŸ“‹ Executive Summary Generated:`); - summaryPoints.forEach((point, i) => console.log(` ${i + 1}. ${point}`)); - - // --- Step 6: Bundle Response --- - const response: BriefingResponse = { - requestDomain: domain, - companyProfile: { ...companyProfile, domain }, - founderInfo, - benchmarkMatrix, - newsFeed: finalEventLog, - aiSummary: { - summary: summaryPoints, - groqTlDr, - }, - className: 'briefing' - }; - - return NextResponse.json(response); - - } catch (error: any) { - console.error('Full briefing generation error:', error); - return NextResponse.json({ error: 'Failed to generate briefing', details: error.message }, { status: 500 }); - } + }) + + return new Response(stream, { + headers: { + 'Content-Type': 'text/event-stream; charset=utf-8', + 'Cache-Control': 'no-cache, no-transform', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', + }, + }) } diff --git a/exora/lib/exa-service.ts b/exora/lib/exa-service.ts index 9c50327..1cf43db 100644 --- a/exora/lib/exa-service.ts +++ b/exora/lib/exa-service.ts @@ -2,35 +2,140 @@ import { sharedLimiter } from '@/lib/limiter'; import { normalizePublishedDate } from '@/lib/utils'; import { TRUSTED_SOURCES, isTrustedSource } from '@/lib/constants'; +import Sentry from '@/lib/sentry' + const EXA_API_URL = 'https://api.exa.ai/search'; +// Resilience configuration +const EXA_MAX_RETRIES = Number(process.env.EXA_MAX_RETRIES || 3); +const EXA_BACKOFF_BASE_MS = Number(process.env.EXA_BACKOFF_BASE_MS || 250); // initial backoff +const EXA_REQUEST_TIMEOUT_MS = Number(process.env.EXA_REQUEST_TIMEOUT_MS || 5000); // per-request timeout + +// Circuit breaker configuration +const CIRCUIT_THRESHOLD = Number(process.env.EXA_CIRCUIT_THRESHOLD || 5); // failures +const CIRCUIT_WINDOW_MS = Number(process.env.EXA_CIRCUIT_WINDOW_MS || 60_000); // sliding window +const CIRCUIT_COOLDOWN_MS = Number(process.env.EXA_CIRCUIT_COOLDOWN_MS || 30_000); // open period + +type CircuitState = { failures: number; firstFailureAt: number | null; openedUntil: number | null } +const circuit: CircuitState = { failures: 0, firstFailureAt: null, openedUntil: null } + +const metrics = { + exaAttempts: 0, + exaFailures: 0, + exaRetries: 0, +} + +function isCircuitOpen() { + if (circuit.openedUntil && Date.now() < circuit.openedUntil) return true + return false +} + +function recordFailure() { + metrics.exaFailures++ + const now = Date.now() + if (!circuit.firstFailureAt) circuit.firstFailureAt = now + circuit.failures += 1 + // if window expired, reset + if (circuit.firstFailureAt && now - circuit.firstFailureAt > CIRCUIT_WINDOW_MS) { + circuit.failures = 1 + circuit.firstFailureAt = now + } + if (circuit.failures >= CIRCUIT_THRESHOLD) { + circuit.openedUntil = now + CIRCUIT_COOLDOWN_MS + Sentry.addBreadcrumb({ category: 'exa', message: 'circuit_opened', data: { openedUntil: circuit.openedUntil } }) + console.warn('[exa] circuit opened until', new Date(circuit.openedUntil).toISOString()) + } +} + +function recordSuccess() { + // reset circuit + circuit.failures = 0 + circuit.firstFailureAt = null + circuit.openedUntil = null +} + +function maskKey(k?: string) { + if (!k) return '' + return k.length > 6 ? `${k.slice(0,4)}...${k.slice(-2)}` : '****' +} + +async function timeoutFetch(input: RequestInfo, init: RequestInit = {}, timeout = EXA_REQUEST_TIMEOUT_MS) { + const controller = new AbortController() + const id = setTimeout(() => controller.abort(), timeout) + try { + const res = await fetch(input, { ...init, signal: controller.signal }) + return res + } finally { + clearTimeout(id) + } +} + +function sleep(ms: number) { return new Promise(resolve => setTimeout(resolve, ms)) } + // šŸ”§ FIXED: Add delay function for rate limiting const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); async function exaSearch(apiKey: string, requestBody: object): Promise { return sharedLimiter.schedule(async () => { - try { - const response = await fetch(EXA_API_URL, { - method: 'POST', - headers: { - 'Accept': 'application/json', - 'Content-Type': 'application/json', - 'Authorization': `Bearer ${apiKey}`, - }, - body: JSON.stringify(requestBody), - }); + metrics.exaAttempts++ + if (isCircuitOpen()) { + Sentry.addBreadcrumb({ category: 'exa', message: 'request_blocked_circuit_open' }) + throw new Error('Exa circuit is open') + } - if (!response.ok) { - const errorBody = await response.text(); - console.error('Exa API Error:', errorBody); - throw new Error(`Exa API request failed with status ${response.status}`); - } + let lastErr: any = null + for (let attempt = 0; attempt <= EXA_MAX_RETRIES; attempt++) { + if (attempt > 0) metrics.exaRetries++ + try { + // Use timeout fetch + const resp = await timeoutFetch(EXA_API_URL, { + method: 'POST', + headers: { + 'Accept': 'application/json', + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${apiKey}`, + }, + body: JSON.stringify(requestBody), + }, EXA_REQUEST_TIMEOUT_MS) + + if (!resp.ok) { + const errText = await resp.text().catch(()=>'') + lastErr = new Error(`Exa API ${resp.status}: ${errText}`) + Sentry.addBreadcrumb({ category: 'exa', message: 'non_ok_response', data: { status: resp.status } }) + throw lastErr + } - return response.json(); - } catch (error) { - console.error('Failed to fetch from Exa API:', error); - throw error; + const json = await resp.json().catch((e) => { throw new Error('Invalid JSON from Exa') }) + + // Basic validation + if (!json || !Array.isArray(json.results)) { + throw new Error('Exa response missing results array') + } + + // success + recordSuccess() + return json + } catch (err: any) { + lastErr = err + recordFailure() + // If circuit opened as a result of this failure, break early + if (isCircuitOpen()) break + // Exponential backoff with jitter + const backoff = EXA_BACKOFF_BASE_MS * Math.pow(2, attempt) + const jitter = Math.floor(Math.random() * 100) + const wait = backoff + jitter + if (process.env.VERBOSE_LOGS === 'true') console.warn(`[exa] attempt ${attempt+1} failed, retrying in ${wait}ms:`, maskKey(apiKey)) + await sleep(wait) + continue + } } + metrics.exaFailures++ + Sentry.withScope(scope => { + scope.setTag('exa','failure') + scope.setExtra('requestBody', { ...(requestBody as any) }) + Sentry.captureException(lastErr) + }) + throw lastErr }); } From 7f9cdb248238f683afe0271bddf8fc4ede76eb38 Mon Sep 17 00:00:00 2001 From: rajarshidattapy Date: Sat, 25 Oct 2025 12:52:20 +0530 Subject: [PATCH 3/3] removed sentry line for this branch --- exora/lib/exa-service.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/exora/lib/exa-service.ts b/exora/lib/exa-service.ts index 1cf43db..beabc99 100644 --- a/exora/lib/exa-service.ts +++ b/exora/lib/exa-service.ts @@ -2,7 +2,6 @@ import { sharedLimiter } from '@/lib/limiter'; import { normalizePublishedDate } from '@/lib/utils'; import { TRUSTED_SOURCES, isTrustedSource } from '@/lib/constants'; -import Sentry from '@/lib/sentry' const EXA_API_URL = 'https://api.exa.ai/search';