Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions backend/brain-prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { Observation } from "./observer.js";
import type { MemoryNode, WorkingMemory } from "./memory/types.js";
import type { MemoryGraph } from "./memory/graph.js";
import { serializeNodesForPrompt, collectRelevantRejectedEdges, formatRejectedEdgesForPrompt } from "./memory/activation.js";
import { isNewsletterParticipant, isClickbaitTopic } from "./memory/working-memory.js";
import { ariaPersonality } from "./aria-identity.js";
import type { CharacterOverride } from "./aria-identity.js";
import { getBrainConfig, getCharacterPreset, getOwnerLocalTime } from "./brain-config.js";
Expand Down Expand Up @@ -316,9 +317,17 @@ function formatWorkingMemory(wm: WorkingMemory): string {
parts.push(`Follow-ups:\n${fuLines.join("\n")}`);
}

// Active conversation threads
// Active conversation threads — filter newsletter/automation participants so
// promotional streams (AutoScout24 saved searches, no-reply notifications, etc.)
// don't crowd out real conversations in the prompt.
if (wm.conversationThreads && wm.conversationThreads.length > 0) {
const activeThreads = wm.conversationThreads.filter(t => t.status === "active").slice(0, 5);
const activeThreads = wm.conversationThreads
.filter(t => t.status === "active")
.filter(t => {
const list = Array.isArray(t.participants) ? t.participants : (t.participants ? [t.participants] : []);
return !list.some(p => isNewsletterParticipant(p)) && !isClickbaitTopic(t.topic);
})
.slice(0, 5);
if (activeThreads.length > 0) {
const threadLines = activeThreads.map(t => {
const who = Array.isArray(t.participants) ? t.participants.join(", ") : (t.participants || "unknown");
Expand Down Expand Up @@ -895,13 +904,18 @@ function buildCommitmentsBlock(
): string {
const sections: string[] = [];

// Moltbook-specific section (backwards compat)
// Moltbook-specific section. recentMoltbookActivity is sourced from the
// sa_moltbook sub-agent's run summary/details fields — intra-run scratch
// narrative ("I'll reply to 6 comments", "let me write a helper") that was
// already executed within that sub-agent run. Show the activity for
// context, but do NOT mine commitments from it: those phrases are not
// promises made to a human channel, they are sub-agent self-narration.
if (recentMoltbookActivity && recentMoltbookActivity.length > 0) {
const moltbookCommitments = recentMoltbookActivity.flatMap(text => extractAndClassifyCommitments(text));
const detectedSection = moltbookCommitments.length > 0
? `\nDetected commitment language in Moltbook posts:\n${moltbookCommitments.map(c => `- [${c.weight}] "${c.commitment}" (pattern: ${c.pattern})`).join("\n")}\n`
: "";
sections.push(`Moltbook posts/comments:\n${detectedSection}${recentMoltbookActivity.map((text, i) => ` ${i + 1}. ${text.slice(0, 300)}`).join("\n")}`);
// These are non-actionable context only, so cap display to the 3 most
// recent runs and shorten each summary — showing all ~10 every reflect
// wastes prompt budget without changing decisions.
const moltbookForDisplay = recentMoltbookActivity.slice(0, 3);
sections.push(`Moltbook posts/comments (sa_moltbook sub-agent run summaries — already executed, NOT personal commitments):\n${moltbookForDisplay.map((text, i) => ` ${i + 1}. ${text.slice(0, 150)}`).join("\n")}`);
}

// General outgoing activity (WhatsApp, email, brain messages) — grouped by conversation
Expand Down Expand Up @@ -937,6 +951,7 @@ ACTION REQUIRED:
3. For any non-trivial commitment not already tracked, create a goal via goalOps.
4. Trivial commitments (quick lookups/checks) are filtered out automatically.
5. Update progress on existing commitment-sourced goals.
6. Moltbook sub-agent run summaries are shown for context only — do NOT treat phrases like "I'll reply to X comments" or "let me write a helper" inside those summaries as personal commitments. They were already executed inside the sub-agent run.
`;
}

Expand Down
95 changes: 88 additions & 7 deletions backend/integrations/gmail.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,57 @@ const PROMOTIONAL_SENDER_PATTERNS: RegExp[] = [
/calendar-notification@google\.com/i,
];

// Strong promotional mailbox prefixes — local-part of the email address that
// is almost never used for real correspondence. Matched against the local part
// only (before "@") so we don't accidentally hit a domain that contains the
// substring.
const STRONG_PROMOTIONAL_LOCALPARTS: RegExp[] = [
/^promotion[s]?$/i,
/^marketing$/i,
/^newsletter[s]?$/i,
/^news$/i,
/^deals$/i,
/^offers$/i,
/^mailing[s]?$/i,
];

// Weak promotional local parts — used by some legitimate businesses for real
// contact, so we only treat as promotional when paired with a clickbait subject.
const WEAK_PROMOTIONAL_LOCALPARTS: RegExp[] = [
/^info$/i,
/^hello$/i,
/^hi$/i,
];

// Domains that send essentially nothing but bulk marketing. Sender from these
// is always treated as promotional regardless of local part.
const PROMOTIONAL_DOMAINS = [
"aliexpress.com",
"temu.com",
"shein.com",
"wish.com",
"banggood.com",
];

// Clickbait subject patterns. Vague urgency / scarcity / "your X is waiting"
// copy that doesn't match how Gillis's actual correspondents write subject
// lines. Used in combination with weak promotional prefixes to catch promo
// blasts from mixed-use mailboxes like promotion@aliexpress.com sending
// "Uw voertuig wacht op u".
const CLICKBAIT_SUBJECT_PATTERNS: RegExp[] = [
/wacht\s+op\s+u\b/i,
/\bklik\s+hier\b/i,
/\b\d{1,3}\s*%\s*(korting|off|discount)\b/i,
/\blaatste\s+kans\b/i,
/\blast\s+chance\b/i,
/\blimited\s+time\b/i,
/\bbeperkte?\s+aanbieding\b/i,
/\bspecial\s+offer\b/i,
/\bact\s+now\b/i,
/\bonly\s+today\b/i,
/\balleen\s+vandaag\b/i,
];

/**
* Extract the bare email address from a From header value.
* "Display Name <user@example.com>" → "user@example.com"
Expand All @@ -223,9 +274,37 @@ function extractEmailAddress(from: string): string {
return match ? match[1] : from.trim();
}

function isPromotionalSender(from: string): boolean {
const addr = extractEmailAddress(from);
return PROMOTIONAL_SENDER_PATTERNS.some(re => re.test(addr));
function splitAddress(addr: string): { local: string; domain: string } {
const at = addr.lastIndexOf("@");
if (at < 0) return { local: addr, domain: "" };
return { local: addr.slice(0, at), domain: addr.slice(at + 1) };
}

function isClickbaitSubject(subject: string): boolean {
if (!subject) return false;
return CLICKBAIT_SUBJECT_PATTERNS.some(re => re.test(subject));
}

function isPromotionalSender(from: string, subject = ""): boolean {
const addr = extractEmailAddress(from).toLowerCase();
if (PROMOTIONAL_SENDER_PATTERNS.some(re => re.test(addr))) return true;

const { local, domain } = splitAddress(addr);

// Known mass-promotional domains — always drop.
if (PROMOTIONAL_DOMAINS.some(d => domain === d || domain.endsWith("." + d))) return true;

// Strong promotional prefixes — always drop.
if (STRONG_PROMOTIONAL_LOCALPARTS.some(re => re.test(local))) return true;

// Weak prefixes (info@, hello@, hi@) — only drop when subject is clickbait.
// Real correspondence from info@<localbusiness> stays intact; promo blasts
// from info@<bulksender> get filtered.
if (WEAK_PROMOTIONAL_LOCALPARTS.some(re => re.test(local)) && isClickbaitSubject(subject)) {
return true;
}

return false;
}

async function fetchNewEmails(account: GmailAccount, state: GmailState): Promise<void> {
Expand Down Expand Up @@ -285,15 +364,17 @@ async function fetchNewEmails(account: GmailAccount, state: GmailState): Promise

const headers = msg.payload?.headers;
const from = getHeader(headers, "From");
const subject = getHeader(headers, "Subject");

// Drop known promotional senders before they consume brain context
if (isPromotionalSender(from)) {
log(`Skipped promotional email from ${from}`);
// Drop known promotional senders before they consume brain context.
// Subject is passed in so weak prefixes (info@, hello@) only trip the
// filter when paired with clickbait copy.
if (isPromotionalSender(from, subject)) {
log(`Skipped promotional email from ${from} (subject: "${subject}")`);
return;
}

const to = getHeader(headers, "To");
const subject = getHeader(headers, "Subject");
const body = msg.payload ? extractBody(msg.payload) : "";
const snippet = msg.snippet || "";

Expand Down
97 changes: 97 additions & 0 deletions backend/memory/working-memory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,87 @@ export function populateTemporalContext(wm: WorkingMemory): void {

// ── Conversation Thread Tracking ──

// Newsletter / automation sender patterns. If a participant string matches one
// of these, the thread is treated as one-way noise — never promoted to "active"
// in the prompt, and never written as a fresh thread. Defense in depth: applied
// at both write time (here) and render time (brain-prompt.ts).
const NEWSLETTER_SUBSTRINGS = [
"noreply",
"no-reply",
"notifications.",
"newsletter",
"savedsearches",
"mailings.",
"updates@",
"bounce",
// Promotional/automation mailbox prefixes. Participant strings look like
// "Display Name <prefix@domain>", so substring match on "prefix@" is enough.
"promotion@",
"promotions@",
"marketing@",
"news@",
"newsletter@",
"deals@",
"offers@",
"info@",
"mailing@",
];

const NEWSLETTER_DOMAINS = [
"autoscout24",
"schoolkassa",
"rdw",
"anwb.nl/notifications",
// Mass-promotional retail domains — sender from these is always marketing.
"aliexpress.com",
"temu.com",
"shein.com",
"wish.com",
"banggood.com",
];

// Clickbait subject/topic patterns. Email observations are stored as
// "[EMAIL] Subject: <subject>\n\n<body>" so the thread topic (first 60 chars)
// captures the subject. If the topic matches one of these marketing tropes,
// the thread is treated as noise even when the sender slips past the address
// filter (e.g. a mixed-use domain that also sends real mail).
const CLICKBAIT_TOPIC_PATTERNS: RegExp[] = [
/wacht\s+op\s+u\b/i,
/\bklik\s+hier\b/i,
/\b\d{1,3}\s*%\s*(korting|off|discount)\b/i,
/\blaatste\s+kans\b/i,
/\blast\s+chance\b/i,
/\blimited\s+time\b/i,
/\bbeperkte?\s+aanbieding\b/i,
/\bspecial\s+offer\b/i,
/\bact\s+now\b/i,
/\bonly\s+today\b/i,
/\balleen\s+vandaag\b/i,
];

export function isNewsletterParticipant(participant: string | undefined | null): boolean {
if (!participant) return false;
const p = participant.toLowerCase();
for (const sub of NEWSLETTER_SUBSTRINGS) {
if (p.includes(sub)) return true;
}
for (const dom of NEWSLETTER_DOMAINS) {
if (p.includes(dom)) return true;
}
return false;
}

export function isClickbaitTopic(topic: string | undefined | null): boolean {
if (!topic) return false;
return CLICKBAIT_TOPIC_PATTERNS.some(re => re.test(topic));
}

function threadHasNewsletterParticipant(participants: string[] | string | undefined): boolean {
if (!participants) return false;
const list = Array.isArray(participants) ? participants : [participants];
return list.some(isNewsletterParticipant);
}

export function updateConversationThreads(wm: WorkingMemory, observations: Observation[]): void {
const now = Date.now();
const STALE_THRESHOLD = 48 * 60 * 60 * 1000; // 48 hours
Expand All @@ -194,6 +275,14 @@ export function updateConversationThreads(wm: WorkingMemory, observations: Obser
let thread = wm.conversationThreads.find(t => t.id === key);

if (!thread) {
// Reject newsletter/automation senders at write time — they're never real conversations.
if (isNewsletterParticipant(obs.sender) || isNewsletterParticipant(obs.chatName) || isNewsletterParticipant(obs.chatJid)) {
continue;
}
// Also reject if the topic (first 60 chars of text) looks like marketing clickbait.
if (isClickbaitTopic(obs.text.slice(0, 60))) {
continue;
}
thread = {
id: key,
participants: [obs.sender],
Expand All @@ -214,6 +303,14 @@ export function updateConversationThreads(wm: WorkingMemory, observations: Obser
}
}

// Sweep any pre-existing newsletter / clickbait threads that slipped in
// during prior ticks (before these guards were added, or via an alternative
// write path). Fixes already-stuck entries like the AliExpress
// "Uw voertuig wacht op u" promo blast.
wm.conversationThreads = wm.conversationThreads.filter(
t => !threadHasNewsletterParticipant(t.participants) && !isClickbaitTopic(t.topic),
);

// Thread lifecycle: active → stale (48h) → closed (7d) → removed (14d)
const CLOSED_THRESHOLD = 7 * 24 * 60 * 60 * 1000; // 7 days since last message
const REMOVE_THRESHOLD = 14 * 24 * 60 * 60 * 1000; // 14 days since last message
Expand Down