Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/mission-control/phase1-observability-runbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ All baseline metrics emit as JSON logs with `[obs]` prefix. This is intentionall
- Consistency validator (catalog ↔ dashboard ↔ alerts ↔ routing ↔ provisioned endpoints):
- `npm run mission-control:validate-observability`
- Enforces route parity between dashboard + routing files and fails if a route target is not declared in the routing endpoint catalog (`routing.staging/production channel|pager`).
- Enforces severity-to-routing policy for production: `low|medium → slack`, `high|critical → slack + pagerduty`.
- Policy unit tests:
- `npm run mission-control:test-observability`

## Runnable path (today)
1. Start app and Convex dev stack.
Expand Down
2 changes: 2 additions & 0 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ When these vars are present, tests seed `lisa-auth-state` + `lisa-jwt-token` in

If these vars are absent, the fixture falls back to a fake local token (fine for local/dev auth, but cloud environments that validate JWTs will redirect to OTP and AC tests will skip with an explicit reason).

`mission-control-phase1.spec.ts` now always runs **AC0 auth readiness probe** in CI: it captures deterministic auth diagnostics artifacts (`auth-diagnostics-*.json`, `auth-gate-*.png`, `auth-gate-*.html`) when the app is OTP-gated so failures/skips are actionable without reproducing locally.

## Mission Control AC5 perf fixture

Set `MISSION_CONTROL_FIXTURE_PATH` to a JSON file for AC5 perf gate tuning (example: `e2e/fixtures/mission-control.production.json`).
Expand Down
98 changes: 73 additions & 25 deletions e2e/mission-control-phase1.spec.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,45 @@
import { test, expect, type Page } from "@playwright/test";
import { test, expect, type Page, type TestInfo } from "@playwright/test";
import { seedAuthSession } from "./fixtures/auth";
import { loadPerfFixtureFromEnv } from "./fixtures/mission-control-perf-fixture";
import { computeP95, writePerfGateResult } from "./fixtures/mission-control-perf-report";

async function attachAuthDiagnostics(page: Page, testInfo: TestInfo, reason: string) {
const now = Date.now();

const diagnostics = {
reason,
url: page.url(),
hasOtpUi:
(await page.getByRole("button", { name: /send code|verify code/i }).count()) > 0
|| (await page.getByLabel(/email/i).count()) > 0
|| (await page.getByLabel(/verification code|otp/i).count()) > 0,
hasAppShell: (await page.getByRole("heading", { name: /your lists/i }).count()) > 0,
hasAuthEnvToken: Boolean(process.env.E2E_AUTH_TOKEN),
authEnv: {
email: process.env.E2E_AUTH_EMAIL ?? null,
subOrgId: process.env.E2E_AUTH_SUBORG_ID ?? null,
did: process.env.E2E_AUTH_DID ?? null,
},
localStorageKeys: await page.evaluate(() => Object.keys(localStorage)),
};

await testInfo.attach(`auth-diagnostics-${now}.json`, {
body: Buffer.from(JSON.stringify(diagnostics, null, 2), "utf8"),
contentType: "application/json",
});

await testInfo.attach(`auth-gate-${now}.png`, {
body: await page.screenshot({ fullPage: true }),
contentType: "image/png",
});

await testInfo.attach(`auth-gate-${now}.html`, {
body: Buffer.from(await page.content(), "utf8"),
contentType: "text/html",
});
}

async function openAuthenticatedApp(page: Page, displayName: string) {
async function openAuthenticatedApp(page: Page, testInfo: TestInfo, displayName: string) {
await seedAuthSession(page, {
displayName,
email: `e2e+${displayName.toLowerCase().replace(/\s+/g, "-")}@poo.app`,
Expand All @@ -24,24 +61,30 @@ async function openAuthenticatedApp(page: Page, displayName: string) {

const usingSeededEnvAuth = Boolean(process.env.E2E_AUTH_TOKEN);
if (hasOtpUi && !usingSeededEnvAuth) {
const reason =
"Environment requires server-validated auth. Set E2E_AUTH_TOKEN + E2E_AUTH_EMAIL + E2E_AUTH_SUBORG_ID + E2E_AUTH_DID to run Mission Control AC paths.";
await attachAuthDiagnostics(page, testInfo, reason);
return {
ready: false as const,
reason:
"Environment requires server-validated auth. Set E2E_AUTH_TOKEN + E2E_AUTH_EMAIL + E2E_AUTH_SUBORG_ID + E2E_AUTH_DID to run Mission Control AC paths.",
reason,
};
}

if (hasOtpUi && usingSeededEnvAuth) {
const reason =
"Seeded auth env vars are present, but app still shows OTP UI. Verify E2E_AUTH_* values match backend environment.";
await attachAuthDiagnostics(page, testInfo, reason);
return {
ready: false as const,
reason:
"Seeded auth env vars are present, but app still shows OTP UI. Verify E2E_AUTH_* values match backend environment.",
reason,
};
}

const reason = "Authenticated app shell unavailable; no lists shell or OTP UI detected.";
await attachAuthDiagnostics(page, testInfo, reason);
return {
ready: false as const,
reason: "Authenticated app shell unavailable; no lists shell or OTP UI detected.",
reason,
};
}

Expand Down Expand Up @@ -77,23 +120,28 @@ async function seedPerfLists(page: Page, listCount: number, itemsPerList: number
return seededListNames;
}

function p95(values: number[]) {
const sorted = [...values].sort((a, b) => a - b);
const idx = Math.ceil(sorted.length * 0.95) - 1;
return sorted[Math.max(0, idx)] ?? 0;
}

test.describe("Mission Control Phase 1 acceptance", () => {
const perfFixture = loadPerfFixtureFromEnv();

test("AC0 auth readiness probe: capture deterministic diagnostics and proceed when shell is available", async ({ page }, testInfo) => {
const setup = await openAuthenticatedApp(page, testInfo, "MC Auth Probe");
if (setup.ready) {
await expect(page.getByRole("heading", { name: /your lists/i })).toBeVisible();
return;
}

testInfo.annotations.push({ type: "auth-gated", description: setup.reason });
expect(setup.ready).toBe(false);
});

test("baseline harness boots app shell", async ({ page }) => {
await seedAuthSession(page);
await page.goto("/");
await expect(page).toHaveURL(/\/(app)?/);
});

test("AC1 assignee round-trip: assignee updates propagate to all active clients in <1s", async ({ page }) => {
const setup = await openAuthenticatedApp(page, "MC Assignee User");
test("AC1 assignee round-trip: assignee updates propagate to all active clients in <1s", async ({ page }, testInfo) => {
const setup = await openAuthenticatedApp(page, testInfo, "MC Assignee User");
test.skip(!setup.ready, !setup.ready ? setup.reason : "");
await createList(page, "MC Assignee List");
await createItem(page, "MC Assigned Item");
Expand All @@ -110,8 +158,8 @@ test.describe("Mission Control Phase 1 acceptance", () => {
expect(elapsed).toBeLessThan(1000);
});

test("AC2 activity log completeness: created|completed|assigned|commented|edited each writes exactly one activity row", async ({ page }) => {
const setup = await openAuthenticatedApp(page, "MC Activity User");
test("AC2 activity log completeness: created|completed|assigned|commented|edited each writes exactly one activity row", async ({ page }, testInfo) => {
const setup = await openAuthenticatedApp(page, testInfo, "MC Activity User");
test.skip(!setup.ready, !setup.ready ? setup.reason : "");
await createList(page, "MC Activity List");
await createItem(page, "Activity Item");
Expand All @@ -138,7 +186,7 @@ test.describe("Mission Control Phase 1 acceptance", () => {
await expect(page.getByText(/edited|renamed/i)).toHaveCount(1);
});

test("AC3 presence freshness: presence disappears <= 90s after list close", async ({ browser }) => {
test("AC3 presence freshness: presence disappears <= 90s after list close", async ({ browser }, testInfo) => {
const contextA = await browser.newContext();
const contextB = await browser.newContext();
const pageA = await contextA.newPage();
Expand All @@ -147,7 +195,7 @@ test.describe("Mission Control Phase 1 acceptance", () => {
await seedAuthSession(pageA, { displayName: "MC Presence A" });
await seedAuthSession(pageB, { displayName: "MC Presence B" });

const setup = await openAuthenticatedApp(pageA, "MC Presence A");
const setup = await openAuthenticatedApp(pageA, testInfo, "MC Presence A");
test.skip(!setup.ready, !setup.ready ? setup.reason : "");
await createList(pageA, "MC Presence List");

Expand All @@ -165,8 +213,8 @@ test.describe("Mission Control Phase 1 acceptance", () => {
await contextB.close();
});

test("AC4 no-regression core UX: non-collab user flow has no required new fields and no agent UI by default", async ({ page }) => {
const setup = await openAuthenticatedApp(page, "MC No Regression");
test("AC4 no-regression core UX: non-collab user flow has no required new fields and no agent UI by default", async ({ page }, testInfo) => {
const setup = await openAuthenticatedApp(page, testInfo, "MC No Regression");
test.skip(!setup.ready, !setup.ready ? setup.reason : "");
await createList(page, "MC Core Flow");
await createItem(page, "Core Item");
Expand All @@ -180,8 +228,8 @@ test.describe("Mission Control Phase 1 acceptance", () => {
await expect(page.getByRole("button", { name: /agent/i })).toHaveCount(0);
});

test("AC5a perf floor harness: P95 list open <500ms", async ({ page }) => {
const setup = await openAuthenticatedApp(page, "MC Perf User");
test("AC5a perf floor harness: P95 list open <500ms", async ({ page }, testInfo) => {
const setup = await openAuthenticatedApp(page, testInfo, "MC Perf User");
test.skip(!setup.ready, !setup.ready ? setup.reason : "");

const samples: number[] = [];
Expand Down Expand Up @@ -209,8 +257,8 @@ test.describe("Mission Control Phase 1 acceptance", () => {
expect(listOpenP95).toBeLessThan(thresholdMs);
});

test("AC5b perf floor harness: activity panel load P95 <700ms", async ({ page }) => {
const setup = await openAuthenticatedApp(page, "MC Perf Activity User");
test("AC5b perf floor harness: activity panel load P95 <700ms", async ({ page }, testInfo) => {
const setup = await openAuthenticatedApp(page, testInfo, "MC Perf Activity User");
test.skip(!setup.ready, !setup.ready ? setup.reason : "");
await createList(page, "MC Perf Activity List");

Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"start": "serve dist -s",
"test:e2e": "playwright test",
"test:e2e:ui": "playwright test --ui",
"mission-control:test-observability": "node --test scripts/mission-control-alert-severity-policy.test.mjs",
"mission-control:validate-observability": "node scripts/validate-mission-control-observability.mjs",
"env:dev": "bash -c 'export $(grep -v \"^#\" .env.local | grep -E \"^(TURNKEY_|JWT_SECRET|WEBVH_DOMAIN)\" | xargs) && for k in TURNKEY_API_PUBLIC_KEY TURNKEY_API_PRIVATE_KEY TURNKEY_ORGANIZATION_ID JWT_SECRET WEBVH_DOMAIN; do npx convex env set \"$k\" \"${!k}\"; done'",
"env:prod": "bash -c 'export $(grep -v \"^#\" .env.local | grep -E \"^(TURNKEY_|JWT_SECRET|WEBVH_DOMAIN)\" | xargs) && for k in TURNKEY_API_PUBLIC_KEY TURNKEY_API_PRIVATE_KEY TURNKEY_ORGANIZATION_ID JWT_SECRET WEBVH_DOMAIN; do npx convex env set --prod \"$k\" \"${!k}\"; done'",
Expand Down
40 changes: 40 additions & 0 deletions scripts/mission-control-alert-severity-policy.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
const SEVERITY_TO_REQUIRED_SCHEMES = {
low: ["slack"],
medium: ["slack"],
high: ["slack", "pagerduty"],
critical: ["slack", "pagerduty"],
};

export function normalizeSeverity(value) {
return String(value ?? "").trim().toLowerCase();
}

export function requiredSchemesForSeverity(severity) {
const normalized = normalizeSeverity(severity);
return SEVERITY_TO_REQUIRED_SCHEMES[normalized] ?? [];
}

export function routeSchemes(routeList) {
return [...new Set((routeList ?? [])
.map((route) => String(route).trim())
.filter(Boolean)
.map((route) => route.split("://")[0]))].sort();
}

export function validateSeverityRoutePolicy({ name, severity, productionRoutes }) {
const requiredSchemes = requiredSchemesForSeverity(severity);
if (requiredSchemes.length === 0) {
return [`Alert ${name} has unsupported severity: ${severity}`];
}

const present = new Set(routeSchemes(productionRoutes));
const missing = requiredSchemes.filter((scheme) => !present.has(scheme));

if (missing.length > 0) {
return [
`Alert ${name} (${normalizeSeverity(severity)}) missing production route scheme(s): ${missing.join(", ")}`,
];
}

return [];
}
60 changes: 60 additions & 0 deletions scripts/mission-control-alert-severity-policy.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import test from "node:test";
import assert from "node:assert/strict";

import {
requiredSchemesForSeverity,
routeSchemes,
validateSeverityRoutePolicy,
} from "./mission-control-alert-severity-policy.mjs";

test("required schemes by severity are stable", () => {
assert.deepEqual(requiredSchemesForSeverity("low"), ["slack"]);
assert.deepEqual(requiredSchemesForSeverity("medium"), ["slack"]);
assert.deepEqual(requiredSchemesForSeverity("high"), ["slack", "pagerduty"]);
assert.deepEqual(requiredSchemesForSeverity("critical"), ["slack", "pagerduty"]);
});

test("route schemes normalize and dedupe", () => {
const schemes = routeSchemes([
"slack://aviary-oncall-mission-control",
" pagerduty://mission-control-primary ",
"slack://aviary-oncall-mission-control",
]);

assert.deepEqual(schemes, ["pagerduty", "slack"]);
});

test("high severity requires pagerduty in production", () => {
const errors = validateSeverityRoutePolicy({
name: "phase1_subscription_latency_p95_high",
severity: "high",
productionRoutes: ["slack://aviary-oncall-mission-control"],
});

assert.equal(errors.length, 1);
assert.match(errors[0], /missing production route scheme\(s\): pagerduty/);
});

test("critical severity passes with slack + pagerduty", () => {
const errors = validateSeverityRoutePolicy({
name: "phase1_run_control_failure",
severity: "critical",
productionRoutes: [
"slack://aviary-oncall-mission-control",
"pagerduty://mission-control-primary",
],
});

assert.deepEqual(errors, []);
});

test("unsupported severity reports an error", () => {
const errors = validateSeverityRoutePolicy({
name: "phase1_unknown",
severity: "sev0",
productionRoutes: ["slack://aviary-oncall-mission-control"],
});

assert.equal(errors.length, 1);
assert.match(errors[0], /unsupported severity/);
});
14 changes: 9 additions & 5 deletions scripts/validate-mission-control-observability.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env node
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import { validateSeverityRoutePolicy } from "./mission-control-alert-severity-policy.mjs";

function readJson(path) {
return JSON.parse(readFileSync(resolve(process.cwd(), path), "utf8"));
Expand Down Expand Up @@ -184,16 +185,19 @@ for (const alert of routing.alerts ?? []) {
fail(`Production route mismatch for ${alert.name}: dashboard=${dashboardProduction.join("|")} routing=${routingProduction.join("|")}`);
}

if (String(alert.severity) === "critical") {
const hasPagerDuty = routingProduction.some((target) => target.startsWith("pagerduty://"));
if (!hasPagerDuty) {
fail(`Critical alert ${alert.name} must include a pagerduty:// production route`);
}
const policyErrors = validateSeverityRoutePolicy({
name: alert.name,
severity: alert.severity,
productionRoutes: routingProduction,
});
for (const error of policyErrors) {
fail(error);
}
}
}
pass("Routing config includes staging and production targets for each alert");
pass("Alert routes match between dashboard and routing config");
pass("Severity-based production routing policy is satisfied");

if (process.exitCode && process.exitCode !== 0) {
console.error("Mission Control observability validation failed.");
Expand Down