diff --git a/src/lib/email-clean.ts b/src/lib/email-clean.ts index 483cd09..24b8424 100644 --- a/src/lib/email-clean.ts +++ b/src/lib/email-clean.ts @@ -95,3 +95,96 @@ export function renderEmailBody(rawText: string): string { truncated )}`; } + +/** + * Strip dangerous markup from an inbound email HTML body before embedding it + * in a DevOps work item field. Best-effort regex sanitiser — DevOps applies + * its own sanitiser when rendering, this is defence-in-depth. + */ +export function sanitizeEmailHtml(html: string): string { + if (!html) return ''; + return ( + html + // Drop entire script/style/iframe/object/embed/link/meta blocks (with content). + .replace(/]*>[\s\S]*?<\/script\s*>/gi, '') + .replace(/]*>[\s\S]*?<\/style\s*>/gi, '') + .replace(/]*>[\s\S]*?<\/iframe\s*>/gi, '') + .replace(/]*>[\s\S]*?<\/object\s*>/gi, '') + .replace(/]*\/?>/gi, '') + .replace(/]*\/?>/gi, '') + .replace(/]*\/?>/gi, '') + // Strip inline event handlers (`onclick=...`, `onload=...`, ...). + .replace(/\son\w+\s*=\s*"[^"]*"/gi, '') + .replace(/\son\w+\s*=\s*'[^']*'/gi, '') + .replace(/\son\w+\s*=\s*[^\s>]+/gi, '') + // Neutralise javascript: and data: (non-image) URLs. + .replace(/(href|src)\s*=\s*"javascript:[^"]*"/gi, '$1="#"') + .replace(/(href|src)\s*=\s*'javascript:[^']*'/gi, "$1='#'") + ); +} + +/** + * Best-effort signature stripping for HTML email bodies. We can't use the + * line-based `stripSignature` directly — HTML emails are usually a single + * blob with `
` separators, not `\n`. Cut at the first reliable end-of- + * message marker we find. + */ +export function stripHtmlSignature(html: string): string { + if (!html) return ''; + + // Common hard markers — RFC 3676 delimiter rendered as HTML, mobile auto- + // sigs, gmail/outlook signature blocks. Take the FIRST occurrence: anything + // below it is signature. + const hardMarkers: RegExp[] = [ + /]*class="[^"]*gmail_signature[^"]*"[^>]*>/i, + /]*id="Signature"[^>]*>/i, + /]*class="[^"]*moz-signature[^"]*"[^>]*>/i, + /(?:\s*){1,3}--\s*(?:|<\/?p>|<\/div>)/i, + /(?:|

|]*>)\s*Sent from my (?:iPhone|iPad|Android|Galaxy|BlackBerry)/i, + /(?:|

|]*>)\s*Sent from (?:Outlook|Mail) for (?:iOS|Android|Windows)/i, + /(?:|

|]*>)\s*Get Outlook for (?:iOS|Android)/i, + ]; + + let cutAt = html.length; + for (const re of hardMarkers) { + const match = re.exec(html); + if (match && match.index < cutAt) cutAt = match.index; + } + return cutAt < html.length ? html.slice(0, cutAt).trimEnd() : html; +} + +/** + * Replace `cid:CONTENT_ID` references in `` tags with the + * URLs the matching files were uploaded to. Outlook and Gmail mark pasted + * screenshots as inline `cid:` images; without rewriting, the body shows a + * broken-image icon in DevOps. + */ +export function rewriteCidReferences( + html: string, + cidMap: Map +): string { + if (!html || cidMap.size === 0) return html; + return html.replace( + /(]*?\bsrc\s*=\s*)(["'])cid:([^"'>\s]+)\2/gi, + (full, prefix: string, quote: string, cid: string) => { + const target = cidMap.get(cid) || cidMap.get(cid.toLowerCase()); + if (!target) return full; + return `${prefix}${quote}${escapeHtml(target.url)}${quote} alt="${escapeHtml(target.filename)}"`; + } + ); +} + +/** + * Sanitise + signature-strip + truncate an HTML email body for safe storage + * in a DevOps work item. Mirror of `renderEmailBody` for the HTML path. + */ +export function renderEmailBodyHtml(rawHtml: string): string { + const sanitised = sanitizeEmailHtml(rawHtml); + const stripped = stripHtmlSignature(sanitised); + const truncated = + stripped.length > MAX_BODY_CHARS + ? stripped.slice(0, MAX_BODY_CHARS) + '

[truncated]

' + : stripped; + if (!truncated.replace(/<[^>]+>/g, '').trim()) return 'No content'; + return `
${truncated}
`; +} diff --git a/src/lib/email-ingest.ts b/src/lib/email-ingest.ts index b084326..901259d 100644 --- a/src/lib/email-ingest.ts +++ b/src/lib/email-ingest.ts @@ -9,21 +9,43 @@ import { getProjectFromEmail } from '@/lib/devops'; import { sendTicketConfirmation } from '@/lib/email'; -import { escapeHtml, renderEmailBody } from '@/lib/email-clean'; +import { + escapeHtml, + renderEmailBody, + renderEmailBodyHtml, + rewriteCidReferences, +} from '@/lib/email-clean'; const TICKET_REF_REGEX = /\[ZapDesk #(\d+)\]/; +export interface IngestEmailAttachment { + filename: string; + contentType: string; + /** Base64 file contents. Required for file attachments; absent for reference-only / item attachments. */ + content?: string; + /** Microsoft Graph contentId — used to rewrite `cid:` refs in HTML body. */ + contentId?: string; + /** True if the mail client flagged this as inline (pasted screenshot, signature image). */ + isInline?: boolean; + /** OneDrive / SharePoint link surfaced when the file isn't embedded. */ + referenceUrl?: string; + /** Subject of a forwarded `.eml` (item attachment) — surfaced as a note. */ + itemSubject?: string; +} + export interface IngestableEmail { /** Raw `From` value — `Name ` or bare `user@domain`. */ from: string; subject: string; /** - * Plain-text body of the message. Passed through `renderEmailBody`, which - * strips signatures, HTML-escapes the content, and wraps it in a `
`
-   * block — raw HTML in this field is escaped, not preserved.
+   * Body of the message. When `bodyType` is `'text'` (the default) the body
+   * is HTML-escaped and wrapped in a `
` block for safe rendering. When
+   * `'html'` it is sanitised, signature-stripped, and `cid:` references are
+   * rewritten to point at the uploaded DevOps attachments.
    */
   body: string;
-  attachments?: Array<{ filename: string; contentType: string; content: string }>;
+  bodyType?: 'html' | 'text';
+  attachments?: IngestEmailAttachment[];
 }
 
 export type IngestResult =
@@ -31,6 +53,13 @@ export type IngestResult =
   | { success: true; action: 'comment_added'; ticketId: number }
   | { success: false; status: number; error: string };
 
+interface UploadedAttachment {
+  filename: string;
+  url: string;
+  contentId?: string;
+  isInline: boolean;
+}
+
 export async function ingestEmail(email: IngestableEmail): Promise {
   if (!email.from || !email.subject) {
     return { success: false, status: 400, error: 'Missing required fields: from, subject' };
@@ -51,7 +80,7 @@ export async function ingestEmail(email: IngestableEmail): Promise
   const ticketMatch = email.subject.match(TICKET_REF_REGEX);
   if (ticketMatch) {
     const ticketId = parseInt(ticketMatch[1], 10);
-    return handleThreadReply(encodedPat, ticketId, senderEmail, email.body);
+    return handleThreadReply(encodedPat, ticketId, senderEmail, email);
   }
   return handleNewTicket(encodedPat, senderEmail, email);
 }
@@ -70,26 +99,36 @@ async function handleNewTicket(
   const devops = new AzureDevOpsServiceWithPAT(encodedPat);
   const priority = determinePriority(email.subject);
 
+  // Upload binary attachments first so we have URLs for cid: rewriting and
+  // can include the inline images in the description body. Failures are
+  // logged but never block ticket creation — the customer gets the ticket
+  // either way and we keep enough metadata to investigate.
+  const { uploaded, referenceLinks, itemNotes, failures } = await uploadAttachmentBlobs(
+    devops,
+    projectName,
+    email.attachments,
+    null
+  );
+
+  const description = formatEmailBody(email, senderEmail, uploaded, referenceLinks, itemNotes);
+
   const workItem = await devops.createTicket(
     projectName,
     email.subject,
-    formatEmailBody(email.body, senderEmail),
+    description,
     senderEmail,
     priority
   );
   const ticketId = workItem.id;
   console.log(`Created ticket #${ticketId} from email: ${senderEmail}`);
 
-  if (email.attachments?.length) {
-    for (const attachment of email.attachments) {
-      try {
-        await devops.uploadAttachment(projectName, ticketId, attachment);
-      } catch (err) {
-        console.error(`Failed to upload attachment ${attachment.filename}:`, err);
-      }
-    }
+  // Re-log any earlier upload failures with the ticket id now that we have it.
+  for (const f of failures) {
+    console.error(`[Ingest] ticket #${ticketId} attachment failed (${f.filename}):`, f.error);
   }
 
+  await linkUploadedAttachments(devops, ticketId, uploaded);
+
   // Fire-and-forget — never block ticket creation on email send.
   sendTicketConfirmation(ticketId, email.subject, senderEmail).catch(() => {});
 
@@ -100,18 +139,37 @@ async function handleThreadReply(
   encodedPat: string,
   ticketId: number,
   senderEmail: string,
-  body: string
+  email: IngestableEmail
 ): Promise {
   const devops = new AzureDevOpsServiceWithPAT(encodedPat);
   try {
+    const projectName = await devops.getProjectForWorkItem(ticketId);
+
+    const { uploaded, referenceLinks, itemNotes, failures } = await uploadAttachmentBlobs(
+      devops,
+      projectName,
+      email.attachments,
+      ticketId
+    );
+    for (const f of failures) {
+      console.error(`[Ingest] ticket #${ticketId} attachment failed (${f.filename}):`, f.error);
+    }
+
+    const renderedBody = renderEmailBodyForStorage(email, uploaded);
+    const appendix = buildAppendixHtml(uploaded, referenceLinks, itemNotes);
     const commentHtml = `
 

Email reply from: ${escapeHtml(senderEmail)}


- ${renderEmailBody(body)} + ${renderedBody} + ${appendix}
`.trim(); await devops.addComment(ticketId, commentHtml); + + if (projectName) { + await linkUploadedAttachments(devops, ticketId, uploaded); + } console.log(`Added email reply to ticket #${ticketId} from ${senderEmail}`); return { success: true, action: 'comment_added', ticketId }; } catch (error) { @@ -195,11 +253,11 @@ class AzureDevOpsServiceWithPAT { return response.json(); } - async uploadAttachment( + /** Upload a single file blob and return the attachment URL — does NOT link it. */ + async uploadAttachmentBlob( projectName: string, - workItemId: number, attachment: { filename: string; contentType: string; content: string } - ) { + ): Promise { const buffer = Buffer.from(attachment.content, 'base64'); const uploadResponse = await fetch( `${this.baseUrl}/${encodeURIComponent(projectName)}/_apis/wit/attachments?fileName=${encodeURIComponent(attachment.filename)}&api-version=7.0`, @@ -213,19 +271,24 @@ class AzureDevOpsServiceWithPAT { } ); if (!uploadResponse.ok) { - throw new Error(`Failed to upload attachment: ${uploadResponse.statusText}`); + throw new Error( + `Failed to upload attachment: ${uploadResponse.status} ${uploadResponse.statusText}` + ); } const uploadData = await uploadResponse.json(); - const attachmentUrl = uploadData.url; + return uploadData.url as string; + } + /** Attach an already-uploaded blob to a work item by URL. */ + async linkAttachment(workItemId: number, url: string, filename: string): Promise { const patchDocument = [ { op: 'add', path: '/relations/-', value: { rel: 'AttachedFile', - url: attachmentUrl, - attributes: { comment: `Email attachment: ${attachment.filename}` }, + url, + attributes: { comment: `Email attachment: ${filename}` }, }, }, ]; @@ -241,6 +304,161 @@ class AzureDevOpsServiceWithPAT { throw new Error(`Failed to link attachment: ${linkResponse.statusText}`); } } + + /** Resolve the project name for a work item — needed for the thread-reply attachment path. */ + async getProjectForWorkItem(workItemId: number): Promise { + const res = await fetch( + `${this.baseUrl}/_apis/wit/workitems/${workItemId}?fields=System.TeamProject&api-version=7.0`, + { headers: this.headers } + ); + if (!res.ok) { + console.warn( + `[Ingest] could not resolve project for work item ${workItemId}: ${res.status} ${res.statusText}` + ); + return null; + } + const data = await res.json(); + return data.fields?.['System.TeamProject'] || null; + } +} + +interface UploadGroup { + uploaded: UploadedAttachment[]; + referenceLinks: Array<{ filename: string; url: string }>; + itemNotes: Array<{ subject: string }>; + failures: Array<{ filename: string; error: unknown }>; +} + +async function uploadAttachmentBlobs( + devops: AzureDevOpsServiceWithPAT, + projectName: string | null, + attachments: IngestEmailAttachment[] | undefined, + workItemIdForLogging: number | null +): Promise { + const out: UploadGroup = { uploaded: [], referenceLinks: [], itemNotes: [], failures: [] }; + if (!attachments?.length) return out; + + for (const a of attachments) { + if (a.itemSubject && !a.content) { + out.itemNotes.push({ subject: a.itemSubject }); + continue; + } + if (a.content) { + if (!projectName) { + // Reply-path with no project resolved — record as a reference so the + // file at least appears as a link rather than vanishing. + out.failures.push({ + filename: a.filename, + error: 'No project resolved for upload', + }); + continue; + } + try { + const url = await devops.uploadAttachmentBlob(projectName, { + filename: a.filename, + contentType: a.contentType, + content: a.content, + }); + out.uploaded.push({ + filename: a.filename, + url, + contentId: a.contentId, + isInline: Boolean(a.isInline), + }); + } catch (err) { + const idTag = workItemIdForLogging ? ` ticket #${workItemIdForLogging}` : ''; + console.error(`[Ingest] upload failed for ${a.filename}${idTag}:`, err); + out.failures.push({ filename: a.filename, error: err }); + if (a.referenceUrl) { + out.referenceLinks.push({ filename: a.filename, url: a.referenceUrl }); + } + } + continue; + } + if (a.referenceUrl) { + out.referenceLinks.push({ filename: a.filename, url: a.referenceUrl }); + } + } + return out; +} + +async function linkUploadedAttachments( + devops: AzureDevOpsServiceWithPAT, + ticketId: number, + uploaded: UploadedAttachment[] +): Promise { + for (const u of uploaded) { + try { + await devops.linkAttachment(ticketId, u.url, u.filename); + } catch (err) { + console.error( + `[Ingest] failed to link attachment ${u.filename} to ticket #${ticketId}:`, + err + ); + } + } +} + +function buildCidMap( + uploaded: UploadedAttachment[] +): Map { + const map = new Map(); + for (const u of uploaded) { + if (!u.contentId) continue; + map.set(u.contentId, { url: u.url, filename: u.filename }); + map.set(u.contentId.toLowerCase(), { url: u.url, filename: u.filename }); + } + return map; +} + +function renderEmailBodyForStorage(email: IngestableEmail, uploaded: UploadedAttachment[]): string { + if (email.bodyType === 'html') { + const cidMap = buildCidMap(uploaded); + const rewritten = rewriteCidReferences(email.body, cidMap); + return renderEmailBodyHtml(rewritten); + } + return renderEmailBody(email.body); +} + +function buildAppendixHtml( + uploaded: UploadedAttachment[], + referenceLinks: Array<{ filename: string; url: string }>, + itemNotes: Array<{ subject: string }> +): string { + const parts: string[] = []; + + // Inline images that the body referenced via cid: are already rewritten in + // place; don't duplicate them here. Only show inline files when the body + // was plain text and we couldn't splice them in. + const orphanInline = uploaded.filter((u) => u.isInline && !u.contentId); + if (orphanInline.length) { + const items = orphanInline + .map( + (u) => + `
  • ${escapeHtml(u.filename)}
  • ` + ) + .join(''); + parts.push(`

    Inline images:

      ${items}
    `); + } + + if (referenceLinks.length) { + const items = referenceLinks + .map( + (r) => + `
  • ${escapeHtml(r.filename)}
  • ` + ) + .join(''); + parts.push(`

    Cloud attachments:

      ${items}
    `); + } + + if (itemNotes.length) { + const items = itemNotes + .map((n) => `
  • Forwarded message: ${escapeHtml(n.subject)} (not extracted)
  • `) + .join(''); + parts.push(`

    Forwarded messages:

      ${items}
    `); + } + + return parts.length ? `
    ${parts.join('')}` : ''; } function extractEmail(from: string): string | null { @@ -258,12 +476,21 @@ function determinePriority(subject: string): number { return 3; } -function formatEmailBody(body: string, senderEmail: string): string { +function formatEmailBody( + email: IngestableEmail, + senderEmail: string, + uploaded: UploadedAttachment[], + referenceLinks: Array<{ filename: string; url: string }>, + itemNotes: Array<{ subject: string }> +): string { + const renderedBody = renderEmailBodyForStorage(email, uploaded); + const appendix = buildAppendixHtml(uploaded, referenceLinks, itemNotes); return `

    From: ${escapeHtml(senderEmail)}


    - ${renderEmailBody(body)} + ${renderedBody} + ${appendix}
    `.trim(); } diff --git a/src/lib/email-poll.ts b/src/lib/email-poll.ts index b8909a7..1b75891 100644 --- a/src/lib/email-poll.ts +++ b/src/lib/email-poll.ts @@ -14,7 +14,7 @@ */ import { getMailGraphToken } from './email'; -import { ingestEmail, type IngestResult } from './email-ingest'; +import { ingestEmail, type IngestEmailAttachment, type IngestResult } from './email-ingest'; const GRAPH_BASE_URL = 'https://graph.microsoft.com/v1.0'; @@ -32,14 +32,22 @@ interface GraphMessage { receivedDateTime?: string; } -interface GraphFileAttachment { +interface GraphAttachment { '@odata.type': string; id: string; - name: string; - contentType: string; - size: number; - isInline: boolean; - contentBytes: string; + name?: string; + contentType?: string; + size?: number; + isInline?: boolean; + contentId?: string | null; + /** Present on `#microsoft.graph.fileAttachment`. */ + contentBytes?: string; + /** Present on `#microsoft.graph.referenceAttachment`. */ + sourceUrl?: string; + /** Microsoft Graph sometimes exposes a direct download for reference attachments. */ + '@microsoft.graph.downloadUrl'?: string; + /** Present on `#microsoft.graph.itemAttachment`. */ + item?: { subject?: string }; } export interface PollSummary { @@ -87,7 +95,7 @@ export async function pollMailbox(mailbox: string): Promise { const fromName = message.from?.emailAddress?.name; const from = fromName ? `${fromName} <${fromAddress}>` : fromAddress; - let attachments: Array<{ filename: string; contentType: string; content: string }> | undefined; + let attachments: IngestEmailAttachment[] | undefined; if (message.hasAttachments) { try { attachments = await fetchAttachments(token, mailbox, message.id); @@ -100,6 +108,7 @@ export async function pollMailbox(mailbox: string): Promise { from, subject: message.subject || '(no subject)', body: message.uniqueBody?.content || '', + bodyType: message.uniqueBody?.contentType === 'html' ? 'html' : 'text', attachments, }); @@ -140,10 +149,10 @@ async function listUnread(token: string, mailbox: string): Promise` references survive — we rewrite them + // post-upload to keep pasted screenshots inline. The body is sanitised + // and signature-stripped in `email-clean.ts` before storage. + Prefer: 'outlook.body-content-type="html"', }, }); if (!res.ok) { @@ -158,32 +167,98 @@ async function fetchAttachments( token: string, mailbox: string, messageId: string -): Promise> { - const url = `${GRAPH_BASE_URL}/users/${encodeURIComponent(mailbox)}/messages/${encodeURIComponent(messageId)}/attachments`; +): Promise { + // `$expand=microsoft.graph.itemAttachment/item` is required for forwarded + // .eml previews to include the inner message subject; without it `item` is + // null and we can't surface a useful note. + const url = + `${GRAPH_BASE_URL}/users/${encodeURIComponent(mailbox)}/messages/${encodeURIComponent(messageId)}/attachments` + + `?$expand=microsoft.graph.itemAttachment/item`; const res = await fetch(url, { headers: { Authorization: `Bearer ${token}` } }); if (!res.ok) { throw new Error(`Graph list-attachments failed (${res.status}): ${await res.text()}`); } - const json = (await res.json()) as { value?: GraphFileAttachment[] }; - const result: Array<{ filename: string; contentType: string; content: string }> = []; + const json = (await res.json()) as { value?: GraphAttachment[] }; + const result: IngestEmailAttachment[] = []; + for (const a of json.value || []) { - // Only fileAttachment with contentBytes — itemAttachment (forwarded message) - // and referenceAttachment (cloud links) need different handling and are rare - // in support traffic. - // - // Inline attachments (`isInline=true`) are also uploaded: Outlook and Gmail - // flag pasted screenshots and dragged-in images as inline even when users - // expect them as attachments, so dropping them silently loses common - // support artifacts. We extract the body as plain `uniqueBody`, so the - // inline reference in the HTML is gone — surfacing the file on the ticket - // is what matters. - if (a['@odata.type'] !== '#microsoft.graph.fileAttachment') continue; - if (!a.contentBytes) continue; - result.push({ - filename: a.name || `attachment-${a.id}`, - contentType: a.contentType || 'application/octet-stream', - content: a.contentBytes, - }); + const filename = a.name || `attachment-${a.id}`; + const contentType = a.contentType || 'application/octet-stream'; + + switch (a['@odata.type']) { + case '#microsoft.graph.fileAttachment': { + if (!a.contentBytes) { + console.warn( + `[Poll] fileAttachment ${a.id} (${filename}) has no contentBytes — skipping` + ); + continue; + } + // Inline files (pasted screenshots, signature images) are kept: the + // body fetch is HTML now, so `` references can be + // rewritten to point at the uploaded DevOps URL. + result.push({ + filename, + contentType, + content: a.contentBytes, + contentId: a.contentId || undefined, + isInline: Boolean(a.isInline), + }); + break; + } + case '#microsoft.graph.referenceAttachment': { + // Outlook converts files >35 MB (and any file when "Modern Attachments" + // is enabled) to OneDrive / SharePoint links. Try the direct download + // URL when Graph exposes it; otherwise surface the source URL so the + // agent can click through. + const downloadUrl = a['@microsoft.graph.downloadUrl']; + const sourceUrl = a.sourceUrl; + if (downloadUrl) { + try { + const fileRes = await fetch(downloadUrl); + if (fileRes.ok) { + const buf = Buffer.from(await fileRes.arrayBuffer()); + result.push({ + filename, + contentType, + content: buf.toString('base64'), + referenceUrl: sourceUrl, + }); + break; + } + console.warn( + `[Poll] referenceAttachment ${a.id} (${filename}) downloadUrl returned ${fileRes.status} — falling back to link` + ); + } catch (err) { + console.warn( + `[Poll] referenceAttachment ${a.id} (${filename}) download failed — falling back to link:`, + err + ); + } + } + if (sourceUrl) { + result.push({ filename, contentType, referenceUrl: sourceUrl }); + } else { + console.warn( + `[Poll] referenceAttachment ${a.id} (${filename}) has no sourceUrl — skipping` + ); + } + break; + } + case '#microsoft.graph.itemAttachment': { + // Forwarded .eml — Graph won't give us bytes through this endpoint, so + // record a tagged note so the message doesn't vanish silently. A + // future change can fetch the inner MIME and re-ingest. + result.push({ + filename, + contentType, + itemSubject: a.item?.subject || filename, + }); + break; + } + default: { + console.warn(`[Poll] unknown attachment type ${a['@odata.type']} (${filename}) — skipping`); + } + } } return result; } diff --git a/src/types/index.ts b/src/types/index.ts index 67646a1..ec44cb2 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -309,10 +309,21 @@ export interface EmailWebhookPayload { to: string; subject: string; body: string; + /** 'html' if `body` is HTML markup; 'text' (default) for plain text. */ + bodyType?: 'html' | 'text'; attachments?: Array<{ filename: string; contentType: string; - content: string; + /** Base64 file contents. Required for file attachments; omitted for reference / item attachments. */ + content?: string; + /** Microsoft Graph contentId — used to rewrite `cid:` refs in HTML body. */ + contentId?: string; + /** True if the mail client flagged this as inline (pasted screenshot, signature image). */ + isInline?: boolean; + /** For OneDrive / SharePoint reference attachments — clickable link surfaced in the ticket. */ + referenceUrl?: string; + /** For forwarded `.eml` (item attachment) — subject of the embedded message. */ + itemSubject?: string; }>; }