diff --git a/backend/brain-prompt.ts b/backend/brain-prompt.ts index f62fd95f..d6794aa8 100644 --- a/backend/brain-prompt.ts +++ b/backend/brain-prompt.ts @@ -2,6 +2,7 @@ import type { Observation } from "./observer.js"; import type { MemoryNode, WorkingMemory } from "./memory/types.js"; import type { MemoryGraph } from "./memory/graph.js"; import { serializeNodesForPrompt, collectRelevantRejectedEdges, formatRejectedEdgesForPrompt } from "./memory/activation.js"; +import { isNewsletterParticipant, isClickbaitTopic } from "./memory/working-memory.js"; import { ariaPersonality } from "./aria-identity.js"; import type { CharacterOverride } from "./aria-identity.js"; import { getBrainConfig, getCharacterPreset, getOwnerLocalTime } from "./brain-config.js"; @@ -316,9 +317,17 @@ function formatWorkingMemory(wm: WorkingMemory): string { parts.push(`Follow-ups:\n${fuLines.join("\n")}`); } - // Active conversation threads + // Active conversation threads — filter newsletter/automation participants so + // promotional streams (AutoScout24 saved searches, no-reply notifications, etc.) + // don't crowd out real conversations in the prompt. if (wm.conversationThreads && wm.conversationThreads.length > 0) { - const activeThreads = wm.conversationThreads.filter(t => t.status === "active").slice(0, 5); + const activeThreads = wm.conversationThreads + .filter(t => t.status === "active") + .filter(t => { + const list = Array.isArray(t.participants) ? t.participants : (t.participants ? [t.participants] : []); + return !list.some(p => isNewsletterParticipant(p)) && !isClickbaitTopic(t.topic); + }) + .slice(0, 5); if (activeThreads.length > 0) { const threadLines = activeThreads.map(t => { const who = Array.isArray(t.participants) ? t.participants.join(", ") : (t.participants || "unknown"); @@ -895,13 +904,18 @@ function buildCommitmentsBlock( ): string { const sections: string[] = []; - // Moltbook-specific section (backwards compat) + // Moltbook-specific section. recentMoltbookActivity is sourced from the + // sa_moltbook sub-agent's run summary/details fields — intra-run scratch + // narrative ("I'll reply to 6 comments", "let me write a helper") that was + // already executed within that sub-agent run. Show the activity for + // context, but do NOT mine commitments from it: those phrases are not + // promises made to a human channel, they are sub-agent self-narration. if (recentMoltbookActivity && recentMoltbookActivity.length > 0) { - const moltbookCommitments = recentMoltbookActivity.flatMap(text => extractAndClassifyCommitments(text)); - const detectedSection = moltbookCommitments.length > 0 - ? `\nDetected commitment language in Moltbook posts:\n${moltbookCommitments.map(c => `- [${c.weight}] "${c.commitment}" (pattern: ${c.pattern})`).join("\n")}\n` - : ""; - sections.push(`Moltbook posts/comments:\n${detectedSection}${recentMoltbookActivity.map((text, i) => ` ${i + 1}. ${text.slice(0, 300)}`).join("\n")}`); + // These are non-actionable context only, so cap display to the 3 most + // recent runs and shorten each summary — showing all ~10 every reflect + // wastes prompt budget without changing decisions. + const moltbookForDisplay = recentMoltbookActivity.slice(0, 3); + sections.push(`Moltbook posts/comments (sa_moltbook sub-agent run summaries — already executed, NOT personal commitments):\n${moltbookForDisplay.map((text, i) => ` ${i + 1}. ${text.slice(0, 150)}`).join("\n")}`); } // General outgoing activity (WhatsApp, email, brain messages) — grouped by conversation @@ -937,6 +951,7 @@ ACTION REQUIRED: 3. For any non-trivial commitment not already tracked, create a goal via goalOps. 4. Trivial commitments (quick lookups/checks) are filtered out automatically. 5. Update progress on existing commitment-sourced goals. +6. Moltbook sub-agent run summaries are shown for context only — do NOT treat phrases like "I'll reply to X comments" or "let me write a helper" inside those summaries as personal commitments. They were already executed inside the sub-agent run. `; } diff --git a/backend/integrations/gmail.ts b/backend/integrations/gmail.ts index 1ff9189f..12a4303f 100644 --- a/backend/integrations/gmail.ts +++ b/backend/integrations/gmail.ts @@ -214,6 +214,57 @@ const PROMOTIONAL_SENDER_PATTERNS: RegExp[] = [ /calendar-notification@google\.com/i, ]; +// Strong promotional mailbox prefixes — local-part of the email address that +// is almost never used for real correspondence. Matched against the local part +// only (before "@") so we don't accidentally hit a domain that contains the +// substring. +const STRONG_PROMOTIONAL_LOCALPARTS: RegExp[] = [ + /^promotion[s]?$/i, + /^marketing$/i, + /^newsletter[s]?$/i, + /^news$/i, + /^deals$/i, + /^offers$/i, + /^mailing[s]?$/i, +]; + +// Weak promotional local parts — used by some legitimate businesses for real +// contact, so we only treat as promotional when paired with a clickbait subject. +const WEAK_PROMOTIONAL_LOCALPARTS: RegExp[] = [ + /^info$/i, + /^hello$/i, + /^hi$/i, +]; + +// Domains that send essentially nothing but bulk marketing. Sender from these +// is always treated as promotional regardless of local part. +const PROMOTIONAL_DOMAINS = [ + "aliexpress.com", + "temu.com", + "shein.com", + "wish.com", + "banggood.com", +]; + +// Clickbait subject patterns. Vague urgency / scarcity / "your X is waiting" +// copy that doesn't match how Gillis's actual correspondents write subject +// lines. Used in combination with weak promotional prefixes to catch promo +// blasts from mixed-use mailboxes like promotion@aliexpress.com sending +// "Uw voertuig wacht op u". +const CLICKBAIT_SUBJECT_PATTERNS: RegExp[] = [ + /wacht\s+op\s+u\b/i, + /\bklik\s+hier\b/i, + /\b\d{1,3}\s*%\s*(korting|off|discount)\b/i, + /\blaatste\s+kans\b/i, + /\blast\s+chance\b/i, + /\blimited\s+time\b/i, + /\bbeperkte?\s+aanbieding\b/i, + /\bspecial\s+offer\b/i, + /\bact\s+now\b/i, + /\bonly\s+today\b/i, + /\balleen\s+vandaag\b/i, +]; + /** * Extract the bare email address from a From header value. * "Display Name " → "user@example.com" @@ -223,9 +274,37 @@ function extractEmailAddress(from: string): string { return match ? match[1] : from.trim(); } -function isPromotionalSender(from: string): boolean { - const addr = extractEmailAddress(from); - return PROMOTIONAL_SENDER_PATTERNS.some(re => re.test(addr)); +function splitAddress(addr: string): { local: string; domain: string } { + const at = addr.lastIndexOf("@"); + if (at < 0) return { local: addr, domain: "" }; + return { local: addr.slice(0, at), domain: addr.slice(at + 1) }; +} + +function isClickbaitSubject(subject: string): boolean { + if (!subject) return false; + return CLICKBAIT_SUBJECT_PATTERNS.some(re => re.test(subject)); +} + +function isPromotionalSender(from: string, subject = ""): boolean { + const addr = extractEmailAddress(from).toLowerCase(); + if (PROMOTIONAL_SENDER_PATTERNS.some(re => re.test(addr))) return true; + + const { local, domain } = splitAddress(addr); + + // Known mass-promotional domains — always drop. + if (PROMOTIONAL_DOMAINS.some(d => domain === d || domain.endsWith("." + d))) return true; + + // Strong promotional prefixes — always drop. + if (STRONG_PROMOTIONAL_LOCALPARTS.some(re => re.test(local))) return true; + + // Weak prefixes (info@, hello@, hi@) — only drop when subject is clickbait. + // Real correspondence from info@ stays intact; promo blasts + // from info@ get filtered. + if (WEAK_PROMOTIONAL_LOCALPARTS.some(re => re.test(local)) && isClickbaitSubject(subject)) { + return true; + } + + return false; } async function fetchNewEmails(account: GmailAccount, state: GmailState): Promise { @@ -285,15 +364,17 @@ async function fetchNewEmails(account: GmailAccount, state: GmailState): Promise const headers = msg.payload?.headers; const from = getHeader(headers, "From"); + const subject = getHeader(headers, "Subject"); - // Drop known promotional senders before they consume brain context - if (isPromotionalSender(from)) { - log(`Skipped promotional email from ${from}`); + // Drop known promotional senders before they consume brain context. + // Subject is passed in so weak prefixes (info@, hello@) only trip the + // filter when paired with clickbait copy. + if (isPromotionalSender(from, subject)) { + log(`Skipped promotional email from ${from} (subject: "${subject}")`); return; } const to = getHeader(headers, "To"); - const subject = getHeader(headers, "Subject"); const body = msg.payload ? extractBody(msg.payload) : ""; const snippet = msg.snippet || ""; diff --git a/backend/memory/working-memory.ts b/backend/memory/working-memory.ts index 119e58fd..4e078d30 100644 --- a/backend/memory/working-memory.ts +++ b/backend/memory/working-memory.ts @@ -181,6 +181,87 @@ export function populateTemporalContext(wm: WorkingMemory): void { // ── Conversation Thread Tracking ── +// Newsletter / automation sender patterns. If a participant string matches one +// of these, the thread is treated as one-way noise — never promoted to "active" +// in the prompt, and never written as a fresh thread. Defense in depth: applied +// at both write time (here) and render time (brain-prompt.ts). +const NEWSLETTER_SUBSTRINGS = [ + "noreply", + "no-reply", + "notifications.", + "newsletter", + "savedsearches", + "mailings.", + "updates@", + "bounce", + // Promotional/automation mailbox prefixes. Participant strings look like + // "Display Name ", so substring match on "prefix@" is enough. + "promotion@", + "promotions@", + "marketing@", + "news@", + "newsletter@", + "deals@", + "offers@", + "info@", + "mailing@", +]; + +const NEWSLETTER_DOMAINS = [ + "autoscout24", + "schoolkassa", + "rdw", + "anwb.nl/notifications", + // Mass-promotional retail domains — sender from these is always marketing. + "aliexpress.com", + "temu.com", + "shein.com", + "wish.com", + "banggood.com", +]; + +// Clickbait subject/topic patterns. Email observations are stored as +// "[EMAIL] Subject: \n\n" so the thread topic (first 60 chars) +// captures the subject. If the topic matches one of these marketing tropes, +// the thread is treated as noise even when the sender slips past the address +// filter (e.g. a mixed-use domain that also sends real mail). +const CLICKBAIT_TOPIC_PATTERNS: RegExp[] = [ + /wacht\s+op\s+u\b/i, + /\bklik\s+hier\b/i, + /\b\d{1,3}\s*%\s*(korting|off|discount)\b/i, + /\blaatste\s+kans\b/i, + /\blast\s+chance\b/i, + /\blimited\s+time\b/i, + /\bbeperkte?\s+aanbieding\b/i, + /\bspecial\s+offer\b/i, + /\bact\s+now\b/i, + /\bonly\s+today\b/i, + /\balleen\s+vandaag\b/i, +]; + +export function isNewsletterParticipant(participant: string | undefined | null): boolean { + if (!participant) return false; + const p = participant.toLowerCase(); + for (const sub of NEWSLETTER_SUBSTRINGS) { + if (p.includes(sub)) return true; + } + for (const dom of NEWSLETTER_DOMAINS) { + if (p.includes(dom)) return true; + } + return false; +} + +export function isClickbaitTopic(topic: string | undefined | null): boolean { + if (!topic) return false; + return CLICKBAIT_TOPIC_PATTERNS.some(re => re.test(topic)); +} + +function threadHasNewsletterParticipant(participants: string[] | string | undefined): boolean { + if (!participants) return false; + const list = Array.isArray(participants) ? participants : [participants]; + return list.some(isNewsletterParticipant); +} + export function updateConversationThreads(wm: WorkingMemory, observations: Observation[]): void { const now = Date.now(); const STALE_THRESHOLD = 48 * 60 * 60 * 1000; // 48 hours @@ -194,6 +275,14 @@ export function updateConversationThreads(wm: WorkingMemory, observations: Obser let thread = wm.conversationThreads.find(t => t.id === key); if (!thread) { + // Reject newsletter/automation senders at write time — they're never real conversations. + if (isNewsletterParticipant(obs.sender) || isNewsletterParticipant(obs.chatName) || isNewsletterParticipant(obs.chatJid)) { + continue; + } + // Also reject if the topic (first 60 chars of text) looks like marketing clickbait. + if (isClickbaitTopic(obs.text.slice(0, 60))) { + continue; + } thread = { id: key, participants: [obs.sender], @@ -214,6 +303,14 @@ export function updateConversationThreads(wm: WorkingMemory, observations: Obser } } + // Sweep any pre-existing newsletter / clickbait threads that slipped in + // during prior ticks (before these guards were added, or via an alternative + // write path). Fixes already-stuck entries like the AliExpress + // "Uw voertuig wacht op u" promo blast. + wm.conversationThreads = wm.conversationThreads.filter( + t => !threadHasNewsletterParticipant(t.participants) && !isClickbaitTopic(t.topic), + ); + // Thread lifecycle: active → stale (48h) → closed (7d) → removed (14d) const CLOSED_THRESHOLD = 7 * 24 * 60 * 60 * 1000; // 7 days since last message const REMOVE_THRESHOLD = 14 * 24 * 60 * 60 * 1000; // 14 days since last message