diff --git a/docs/mission-control/phase1-observability-runbook.md b/docs/mission-control/phase1-observability-runbook.md index 7a8806a..c2739bf 100644 --- a/docs/mission-control/phase1-observability-runbook.md +++ b/docs/mission-control/phase1-observability-runbook.md @@ -29,6 +29,9 @@ All baseline metrics emit as JSON logs with `[obs]` prefix. This is intentionall - Consistency validator (catalog ↔ dashboard ↔ alerts ↔ routing ↔ provisioned endpoints): - `npm run mission-control:validate-observability` - Enforces route parity between dashboard + routing files and fails if a route target is not declared in the routing endpoint catalog (`routing.staging/production channel|pager`). + - Enforces severity-to-routing policy for production: `low|medium → slack`, `high|critical → slack + pagerduty`. +- Policy unit tests: + - `npm run mission-control:test-observability` ## Runnable path (today) 1. Start app and Convex dev stack. diff --git a/e2e/README.md b/e2e/README.md index dc98ec5..9584027 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -26,6 +26,8 @@ When these vars are present, tests seed `lisa-auth-state` + `lisa-jwt-token` in If these vars are absent, the fixture falls back to a fake local token (fine for local/dev auth, but cloud environments that validate JWTs will redirect to OTP and AC tests will skip with an explicit reason). +`mission-control-phase1.spec.ts` now always runs **AC0 auth readiness probe** in CI: it captures deterministic auth diagnostics artifacts (`auth-diagnostics-*.json`, `auth-gate-*.png`, `auth-gate-*.html`) when the app is OTP-gated so failures/skips are actionable without reproducing locally. + ## Mission Control AC5 perf fixture Set `MISSION_CONTROL_FIXTURE_PATH` to a JSON file for AC5 perf gate tuning (example: `e2e/fixtures/mission-control.production.json`). diff --git a/e2e/mission-control-phase1.spec.ts b/e2e/mission-control-phase1.spec.ts index ac91bed..1a4324b 100644 --- a/e2e/mission-control-phase1.spec.ts +++ b/e2e/mission-control-phase1.spec.ts @@ -1,8 +1,45 @@ -import { test, expect, type Page } from "@playwright/test"; +import { test, expect, type Page, type TestInfo } from "@playwright/test"; import { seedAuthSession } from "./fixtures/auth"; import { loadPerfFixtureFromEnv } from "./fixtures/mission-control-perf-fixture"; +import { computeP95, writePerfGateResult } from "./fixtures/mission-control-perf-report"; + +async function attachAuthDiagnostics(page: Page, testInfo: TestInfo, reason: string) { + const now = Date.now(); + + const diagnostics = { + reason, + url: page.url(), + hasOtpUi: + (await page.getByRole("button", { name: /send code|verify code/i }).count()) > 0 + || (await page.getByLabel(/email/i).count()) > 0 + || (await page.getByLabel(/verification code|otp/i).count()) > 0, + hasAppShell: (await page.getByRole("heading", { name: /your lists/i }).count()) > 0, + hasAuthEnvToken: Boolean(process.env.E2E_AUTH_TOKEN), + authEnv: { + email: process.env.E2E_AUTH_EMAIL ?? null, + subOrgId: process.env.E2E_AUTH_SUBORG_ID ?? null, + did: process.env.E2E_AUTH_DID ?? null, + }, + localStorageKeys: await page.evaluate(() => Object.keys(localStorage)), + }; + + await testInfo.attach(`auth-diagnostics-${now}.json`, { + body: Buffer.from(JSON.stringify(diagnostics, null, 2), "utf8"), + contentType: "application/json", + }); + + await testInfo.attach(`auth-gate-${now}.png`, { + body: await page.screenshot({ fullPage: true }), + contentType: "image/png", + }); + + await testInfo.attach(`auth-gate-${now}.html`, { + body: Buffer.from(await page.content(), "utf8"), + contentType: "text/html", + }); +} -async function openAuthenticatedApp(page: Page, displayName: string) { +async function openAuthenticatedApp(page: Page, testInfo: TestInfo, displayName: string) { await seedAuthSession(page, { displayName, email: `e2e+${displayName.toLowerCase().replace(/\s+/g, "-")}@poo.app`, @@ -24,24 +61,30 @@ async function openAuthenticatedApp(page: Page, displayName: string) { const usingSeededEnvAuth = Boolean(process.env.E2E_AUTH_TOKEN); if (hasOtpUi && !usingSeededEnvAuth) { + const reason = + "Environment requires server-validated auth. Set E2E_AUTH_TOKEN + E2E_AUTH_EMAIL + E2E_AUTH_SUBORG_ID + E2E_AUTH_DID to run Mission Control AC paths."; + await attachAuthDiagnostics(page, testInfo, reason); return { ready: false as const, - reason: - "Environment requires server-validated auth. Set E2E_AUTH_TOKEN + E2E_AUTH_EMAIL + E2E_AUTH_SUBORG_ID + E2E_AUTH_DID to run Mission Control AC paths.", + reason, }; } if (hasOtpUi && usingSeededEnvAuth) { + const reason = + "Seeded auth env vars are present, but app still shows OTP UI. Verify E2E_AUTH_* values match backend environment."; + await attachAuthDiagnostics(page, testInfo, reason); return { ready: false as const, - reason: - "Seeded auth env vars are present, but app still shows OTP UI. Verify E2E_AUTH_* values match backend environment.", + reason, }; } + const reason = "Authenticated app shell unavailable; no lists shell or OTP UI detected."; + await attachAuthDiagnostics(page, testInfo, reason); return { ready: false as const, - reason: "Authenticated app shell unavailable; no lists shell or OTP UI detected.", + reason, }; } @@ -77,23 +120,28 @@ async function seedPerfLists(page: Page, listCount: number, itemsPerList: number return seededListNames; } -function p95(values: number[]) { - const sorted = [...values].sort((a, b) => a - b); - const idx = Math.ceil(sorted.length * 0.95) - 1; - return sorted[Math.max(0, idx)] ?? 0; -} - test.describe("Mission Control Phase 1 acceptance", () => { const perfFixture = loadPerfFixtureFromEnv(); + test("AC0 auth readiness probe: capture deterministic diagnostics and proceed when shell is available", async ({ page }, testInfo) => { + const setup = await openAuthenticatedApp(page, testInfo, "MC Auth Probe"); + if (setup.ready) { + await expect(page.getByRole("heading", { name: /your lists/i })).toBeVisible(); + return; + } + + testInfo.annotations.push({ type: "auth-gated", description: setup.reason }); + expect(setup.ready).toBe(false); + }); + test("baseline harness boots app shell", async ({ page }) => { await seedAuthSession(page); await page.goto("/"); await expect(page).toHaveURL(/\/(app)?/); }); - test("AC1 assignee round-trip: assignee updates propagate to all active clients in <1s", async ({ page }) => { - const setup = await openAuthenticatedApp(page, "MC Assignee User"); + test("AC1 assignee round-trip: assignee updates propagate to all active clients in <1s", async ({ page }, testInfo) => { + const setup = await openAuthenticatedApp(page, testInfo, "MC Assignee User"); test.skip(!setup.ready, !setup.ready ? setup.reason : ""); await createList(page, "MC Assignee List"); await createItem(page, "MC Assigned Item"); @@ -110,8 +158,8 @@ test.describe("Mission Control Phase 1 acceptance", () => { expect(elapsed).toBeLessThan(1000); }); - test("AC2 activity log completeness: created|completed|assigned|commented|edited each writes exactly one activity row", async ({ page }) => { - const setup = await openAuthenticatedApp(page, "MC Activity User"); + test("AC2 activity log completeness: created|completed|assigned|commented|edited each writes exactly one activity row", async ({ page }, testInfo) => { + const setup = await openAuthenticatedApp(page, testInfo, "MC Activity User"); test.skip(!setup.ready, !setup.ready ? setup.reason : ""); await createList(page, "MC Activity List"); await createItem(page, "Activity Item"); @@ -138,7 +186,7 @@ test.describe("Mission Control Phase 1 acceptance", () => { await expect(page.getByText(/edited|renamed/i)).toHaveCount(1); }); - test("AC3 presence freshness: presence disappears <= 90s after list close", async ({ browser }) => { + test("AC3 presence freshness: presence disappears <= 90s after list close", async ({ browser }, testInfo) => { const contextA = await browser.newContext(); const contextB = await browser.newContext(); const pageA = await contextA.newPage(); @@ -147,7 +195,7 @@ test.describe("Mission Control Phase 1 acceptance", () => { await seedAuthSession(pageA, { displayName: "MC Presence A" }); await seedAuthSession(pageB, { displayName: "MC Presence B" }); - const setup = await openAuthenticatedApp(pageA, "MC Presence A"); + const setup = await openAuthenticatedApp(pageA, testInfo, "MC Presence A"); test.skip(!setup.ready, !setup.ready ? setup.reason : ""); await createList(pageA, "MC Presence List"); @@ -165,8 +213,8 @@ test.describe("Mission Control Phase 1 acceptance", () => { await contextB.close(); }); - test("AC4 no-regression core UX: non-collab user flow has no required new fields and no agent UI by default", async ({ page }) => { - const setup = await openAuthenticatedApp(page, "MC No Regression"); + test("AC4 no-regression core UX: non-collab user flow has no required new fields and no agent UI by default", async ({ page }, testInfo) => { + const setup = await openAuthenticatedApp(page, testInfo, "MC No Regression"); test.skip(!setup.ready, !setup.ready ? setup.reason : ""); await createList(page, "MC Core Flow"); await createItem(page, "Core Item"); @@ -180,8 +228,8 @@ test.describe("Mission Control Phase 1 acceptance", () => { await expect(page.getByRole("button", { name: /agent/i })).toHaveCount(0); }); - test("AC5a perf floor harness: P95 list open <500ms", async ({ page }) => { - const setup = await openAuthenticatedApp(page, "MC Perf User"); + test("AC5a perf floor harness: P95 list open <500ms", async ({ page }, testInfo) => { + const setup = await openAuthenticatedApp(page, testInfo, "MC Perf User"); test.skip(!setup.ready, !setup.ready ? setup.reason : ""); const samples: number[] = []; @@ -209,8 +257,8 @@ test.describe("Mission Control Phase 1 acceptance", () => { expect(listOpenP95).toBeLessThan(thresholdMs); }); - test("AC5b perf floor harness: activity panel load P95 <700ms", async ({ page }) => { - const setup = await openAuthenticatedApp(page, "MC Perf Activity User"); + test("AC5b perf floor harness: activity panel load P95 <700ms", async ({ page }, testInfo) => { + const setup = await openAuthenticatedApp(page, testInfo, "MC Perf Activity User"); test.skip(!setup.ready, !setup.ready ? setup.reason : ""); await createList(page, "MC Perf Activity List"); diff --git a/package.json b/package.json index 7c7c1c1..3cc3a4a 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "start": "serve dist -s", "test:e2e": "playwright test", "test:e2e:ui": "playwright test --ui", + "mission-control:test-observability": "node --test scripts/mission-control-alert-severity-policy.test.mjs", "mission-control:validate-observability": "node scripts/validate-mission-control-observability.mjs", "env:dev": "bash -c 'export $(grep -v \"^#\" .env.local | grep -E \"^(TURNKEY_|JWT_SECRET|WEBVH_DOMAIN)\" | xargs) && for k in TURNKEY_API_PUBLIC_KEY TURNKEY_API_PRIVATE_KEY TURNKEY_ORGANIZATION_ID JWT_SECRET WEBVH_DOMAIN; do npx convex env set \"$k\" \"${!k}\"; done'", "env:prod": "bash -c 'export $(grep -v \"^#\" .env.local | grep -E \"^(TURNKEY_|JWT_SECRET|WEBVH_DOMAIN)\" | xargs) && for k in TURNKEY_API_PUBLIC_KEY TURNKEY_API_PRIVATE_KEY TURNKEY_ORGANIZATION_ID JWT_SECRET WEBVH_DOMAIN; do npx convex env set --prod \"$k\" \"${!k}\"; done'", diff --git a/scripts/mission-control-alert-severity-policy.mjs b/scripts/mission-control-alert-severity-policy.mjs new file mode 100644 index 0000000..366d2d8 --- /dev/null +++ b/scripts/mission-control-alert-severity-policy.mjs @@ -0,0 +1,40 @@ +const SEVERITY_TO_REQUIRED_SCHEMES = { + low: ["slack"], + medium: ["slack"], + high: ["slack", "pagerduty"], + critical: ["slack", "pagerduty"], +}; + +export function normalizeSeverity(value) { + return String(value ?? "").trim().toLowerCase(); +} + +export function requiredSchemesForSeverity(severity) { + const normalized = normalizeSeverity(severity); + return SEVERITY_TO_REQUIRED_SCHEMES[normalized] ?? []; +} + +export function routeSchemes(routeList) { + return [...new Set((routeList ?? []) + .map((route) => String(route).trim()) + .filter(Boolean) + .map((route) => route.split("://")[0]))].sort(); +} + +export function validateSeverityRoutePolicy({ name, severity, productionRoutes }) { + const requiredSchemes = requiredSchemesForSeverity(severity); + if (requiredSchemes.length === 0) { + return [`Alert ${name} has unsupported severity: ${severity}`]; + } + + const present = new Set(routeSchemes(productionRoutes)); + const missing = requiredSchemes.filter((scheme) => !present.has(scheme)); + + if (missing.length > 0) { + return [ + `Alert ${name} (${normalizeSeverity(severity)}) missing production route scheme(s): ${missing.join(", ")}`, + ]; + } + + return []; +} diff --git a/scripts/mission-control-alert-severity-policy.test.mjs b/scripts/mission-control-alert-severity-policy.test.mjs new file mode 100644 index 0000000..2050adb --- /dev/null +++ b/scripts/mission-control-alert-severity-policy.test.mjs @@ -0,0 +1,60 @@ +import test from "node:test"; +import assert from "node:assert/strict"; + +import { + requiredSchemesForSeverity, + routeSchemes, + validateSeverityRoutePolicy, +} from "./mission-control-alert-severity-policy.mjs"; + +test("required schemes by severity are stable", () => { + assert.deepEqual(requiredSchemesForSeverity("low"), ["slack"]); + assert.deepEqual(requiredSchemesForSeverity("medium"), ["slack"]); + assert.deepEqual(requiredSchemesForSeverity("high"), ["slack", "pagerduty"]); + assert.deepEqual(requiredSchemesForSeverity("critical"), ["slack", "pagerduty"]); +}); + +test("route schemes normalize and dedupe", () => { + const schemes = routeSchemes([ + "slack://aviary-oncall-mission-control", + " pagerduty://mission-control-primary ", + "slack://aviary-oncall-mission-control", + ]); + + assert.deepEqual(schemes, ["pagerduty", "slack"]); +}); + +test("high severity requires pagerduty in production", () => { + const errors = validateSeverityRoutePolicy({ + name: "phase1_subscription_latency_p95_high", + severity: "high", + productionRoutes: ["slack://aviary-oncall-mission-control"], + }); + + assert.equal(errors.length, 1); + assert.match(errors[0], /missing production route scheme\(s\): pagerduty/); +}); + +test("critical severity passes with slack + pagerduty", () => { + const errors = validateSeverityRoutePolicy({ + name: "phase1_run_control_failure", + severity: "critical", + productionRoutes: [ + "slack://aviary-oncall-mission-control", + "pagerduty://mission-control-primary", + ], + }); + + assert.deepEqual(errors, []); +}); + +test("unsupported severity reports an error", () => { + const errors = validateSeverityRoutePolicy({ + name: "phase1_unknown", + severity: "sev0", + productionRoutes: ["slack://aviary-oncall-mission-control"], + }); + + assert.equal(errors.length, 1); + assert.match(errors[0], /unsupported severity/); +}); diff --git a/scripts/validate-mission-control-observability.mjs b/scripts/validate-mission-control-observability.mjs index e455b74..98e8f02 100644 --- a/scripts/validate-mission-control-observability.mjs +++ b/scripts/validate-mission-control-observability.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node import { readFileSync } from "node:fs"; import { resolve } from "node:path"; +import { validateSeverityRoutePolicy } from "./mission-control-alert-severity-policy.mjs"; function readJson(path) { return JSON.parse(readFileSync(resolve(process.cwd(), path), "utf8")); @@ -184,16 +185,19 @@ for (const alert of routing.alerts ?? []) { fail(`Production route mismatch for ${alert.name}: dashboard=${dashboardProduction.join("|")} routing=${routingProduction.join("|")}`); } - if (String(alert.severity) === "critical") { - const hasPagerDuty = routingProduction.some((target) => target.startsWith("pagerduty://")); - if (!hasPagerDuty) { - fail(`Critical alert ${alert.name} must include a pagerduty:// production route`); - } + const policyErrors = validateSeverityRoutePolicy({ + name: alert.name, + severity: alert.severity, + productionRoutes: routingProduction, + }); + for (const error of policyErrors) { + fail(error); } } } pass("Routing config includes staging and production targets for each alert"); pass("Alert routes match between dashboard and routing config"); +pass("Severity-based production routing policy is satisfied"); if (process.exitCode && process.exitCode !== 0) { console.error("Mission Control observability validation failed.");