diff --git a/docs/EMERGENCY_RUNBOOK.md b/docs/EMERGENCY_RUNBOOK.md index 05e71d467..1fd04d162 100644 --- a/docs/EMERGENCY_RUNBOOK.md +++ b/docs/EMERGENCY_RUNBOOK.md @@ -33,7 +33,9 @@ Establish what is actually wrong before communicating or resuming. Communicate clearly with users and stakeholders. - Post status to the official channels (status page, social, governance forum). -- Record the incident timeline for the post-mortem. +- Record the incident timeline for the postmortem. Use the + [incident postmortem template](./postmortems/TEMPLATE.md) and linking flow + once the event is mitigated or resolved. - This step is a manual acknowledgement; the panel cannot complete it for you. ### Resume diff --git a/docs/backend_testing.md b/docs/backend_testing.md index e05782ab7..3b67a65dd 100644 --- a/docs/backend_testing.md +++ b/docs/backend_testing.md @@ -42,6 +42,15 @@ Replace `ID` with the actual incident ID from the previous command. curl -X PATCH http://localhost:3001/api/incidents/ID/resolve | jq ``` +### Link a postmortem + +After mitigation or resolution, create a public postmortem from +[`docs/postmortems/TEMPLATE.md`](./postmortems/TEMPLATE.md) and link the +incident record to that document from transparency views. The expected metadata +field is `postmortemUrl`; see +[`docs/incident-postmortems.md`](./incident-postmortems.md) for the full +linking flow and safety rules. + ## 3. Emergency Freeze Control (#288) _Note: These commands require the admin role. If the `requireAdmin` middleware is active, you may need an auth token._ diff --git a/docs/incident-postmortems.md b/docs/incident-postmortems.md new file mode 100644 index 000000000..e75e74fab --- /dev/null +++ b/docs/incident-postmortems.md @@ -0,0 +1,56 @@ +# Incident Postmortems + +Incident records should link to a public postmortem after the incident is +mitigated or resolved. This keeps operator notes, transparency views, and user +communications aligned around the same source of truth. + +## Template + +Start from [`docs/postmortems/TEMPLATE.md`](./postmortems/TEMPLATE.md). + +Create incident-specific files with this path shape: + +```text +docs/postmortems/YYYY-MM-DD-incident-title-slug.md +``` + +Use the incident start date in UTC. Keep the slug short and based on the public +incident title. + +## Required Sections + +Every public postmortem must include: + +- `Summary` +- `Impact` +- `Timeline` +- `Root Cause` +- `Resolution` +- `Prevention` +- `Public Transparency Note` + +## Linking Flow + +1. Create the incident through `POST /api/incidents` as soon as the event is + confirmed. +2. During response, keep the incident `description` focused on current user + impact and operational status. +3. After mitigation, copy the template to the incident-specific file path and + complete the required sections. +4. Link the incident record to the postmortem with a `postmortemUrl` metadata + field in API consumers or transparency data exports. +5. Render that link in transparency views when the incident is resolved or in + monitoring status. + +The current database schema does not store `postmortemUrl` directly. Until that +field is persisted, services and dashboards should use the guidance helper in +`IncidentService` to generate the expected repository path and display label. + +## Link Safety + +- Link only repository docs, status pages, or governance posts approved for + public release. +- Do not link private dashboards, logs, provider consoles, internal chat, or + raw monitoring dumps. +- Redact keys, wallet secrets, customer data, and exploit reproduction details + before publishing. diff --git a/docs/postmortems/TEMPLATE.md b/docs/postmortems/TEMPLATE.md new file mode 100644 index 000000000..a03f2a7b1 --- /dev/null +++ b/docs/postmortems/TEMPLATE.md @@ -0,0 +1,61 @@ +# Incident Postmortem Template + +Use this template for every incident that affects vault availability, yield +accuracy, provider connectivity, settlement, or public transparency data. Keep +the public version factual and safe to share: do not include private keys, +provider secrets, unreleased exploit details, or personal data. + +## Summary + +- Incident ID: +- Public title: +- Status: `investigating` | `mitigated` | `resolved` | `monitoring` +- Severity: `LOW` | `MEDIUM` | `HIGH` | `CRITICAL` +- Protocol or component: +- Started at: +- Resolved at: +- Postmortem owner: + +## Impact + +- Affected vaults, routes, providers, or users: +- User-visible symptoms: +- Duration: +- Estimated financial or operational impact: +- Data freshness or reporting impact: + +## Timeline + +| Time (UTC) | Event | Evidence | +| --- | --- | --- | +| | Incident detected | | +| | Mitigation started | | +| | User communication posted | | +| | Root cause confirmed | | +| | Fix deployed | | +| | Monitoring completed | | + +## Root Cause + +Describe the technical cause and the control that failed to catch it earlier. +If the cause is still unknown, state what evidence is missing and who owns the +follow-up investigation. + +## Resolution + +- Immediate mitigation: +- Permanent fix: +- Validation performed: +- Remaining risk: + +## Prevention + +| Action item | Owner | Due date | Tracking link | +| --- | --- | --- | --- | +| | | | | + +## Public Transparency Note + +Summarize what can be shown in user-facing transparency views. Include the +incident record link, current status, impact window, and prevention summary. +Keep this section concise enough for dashboards and release notes. diff --git a/server/src/__tests__/incidentService.test.ts b/server/src/__tests__/incidentService.test.ts index 831a10041..92c2d6068 100644 --- a/server/src/__tests__/incidentService.test.ts +++ b/server/src/__tests__/incidentService.test.ts @@ -70,4 +70,25 @@ describe("IncidentService", () => { expect(incident).not.toBeNull(); expect(incident?.id).toBe("mock-id-123"); }); + + it("should build postmortem linking guidance for transparency views", () => { + const guidance = service.getPostmortemLinkingGuidance({ + id: "incident-42", + title: "Provider RPC outage: Blend USDC vault", + startedAt: new Date("2026-05-27T08:15:00.000Z"), + resolved: true, + }); + + expect(guidance).toEqual({ + incidentId: "incident-42", + title: "Provider RPC outage: Blend USDC vault", + status: "resolved", + templatePath: "docs/postmortems/TEMPLATE.md", + expectedPostmortemPath: "docs/postmortems/2026-05-27-provider-rpc-outage-blend-usdc-vault.md", + linkField: "postmortemUrl", + displayLabel: "Postmortem: Provider RPC outage: Blend USDC vault", + transparencyHint: + "Render postmortemUrl in incident records and transparency views after mitigation or resolution.", + }); + }); }); diff --git a/server/src/services/incidentService.ts b/server/src/services/incidentService.ts index e1801e8b0..1bef39ba2 100644 --- a/server/src/services/incidentService.ts +++ b/server/src/services/incidentService.ts @@ -14,6 +14,30 @@ export interface IncidentWithRecommendations extends Incident { recommendations: RecoveryRecommendation[]; } +export const INCIDENT_POSTMORTEM_TEMPLATE_PATH = "docs/postmortems/TEMPLATE.md"; +export const INCIDENT_POSTMORTEM_LINK_FIELD = "postmortemUrl"; + +export interface IncidentPostmortemGuidance { + incidentId: string; + title: string; + status: "open" | "resolved"; + templatePath: string; + expectedPostmortemPath: string; + linkField: string; + displayLabel: string; + transparencyHint: string; +} + +function slugifyIncidentTitle(title: string): string { + const slug = title + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 72); + + return slug || "incident"; +} + export class IncidentService { async createIncident(data: { protocol: string; @@ -59,6 +83,25 @@ export class IncidentService { }); } + getPostmortemLinkingGuidance( + incident: Pick + ): IncidentPostmortemGuidance { + const date = incident.startedAt.toISOString().slice(0, 10); + const slug = slugifyIncidentTitle(incident.title); + + return { + incidentId: incident.id, + title: incident.title, + status: incident.resolved ? "resolved" : "open", + templatePath: INCIDENT_POSTMORTEM_TEMPLATE_PATH, + expectedPostmortemPath: `docs/postmortems/${date}-${slug}.md`, + linkField: INCIDENT_POSTMORTEM_LINK_FIELD, + displayLabel: `Postmortem: ${incident.title}`, + transparencyHint: + "Render postmortemUrl in incident records and transparency views after mitigation or resolution.", + }; + } + async getRecommendationsForIncident(id: string): Promise { const incident = await this.getIncidentById(id); if (!incident) return [];