Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows/sponsor-monitor-cron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,19 @@ jobs:
cat /tmp/response.json
exit 1
fi

# Annotate the workflow run with a clear diagnosis when the ping fails.
# This surfaces silent scheduler failures in the GitHub Actions UI and
# the notification emails sent to repo watchers.
- name: Annotate failure cause
if: failure()
run: |
STATUS="${{ steps.ping.outputs.http_status }}"
BODY=$(cat /tmp/response.json 2>/dev/null || echo "(no response body)")
if [ "$STATUS" = "401" ]; then
echo "::error title=Cron-Ping Auth Failure::CRON_SECRET mismatch between GitHub Actions secret and deployment env var. The sponsor monitor will NOT run until this is fixed. HTTP $STATUS: $BODY"
elif [ "$STATUS" = "503" ]; then
echo "::error title=Cron-Ping Not Configured::CRON_SECRET is not set in the deployment environment. Sponsor monitor external trigger is disabled. HTTP $STATUS: $BODY"
else
echo "::error title=Cron-Ping Unexpected Failure::HTTP $STATUS — $BODY"
fi
45 changes: 38 additions & 7 deletions client/src/components/HeroSection.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,13 @@ function RecentlyRevokedSection() {
interface NightlyStats {
totalActive: number;
lastRunDate: string | null;
lastSuccessfulRunAt: string | null;
addedCount: number;
removedCount: number;
changesCount: number;
revokedLast12Months: number;
staleDays: number;
hoursStale: number;
}

function formatRunDate(dateStr: string | null): string {
Expand Down Expand Up @@ -226,7 +228,17 @@ function NightlyStatsBar() {
</div>
<div className="px-4">
<p className={cn("text-2xl font-bold text-emerald-400", isLoading && "animate-pulse")}>{dateLabel}</p>
<p className="text-xs text-slate-400 mt-0.5">Register last checked</p>
<p className="text-xs text-slate-400 mt-0.5">
Register last checked
{!isLoading && data?.lastSuccessfulRunAt && (
<span
className="block text-slate-500 text-xs"
title={new Date(data.lastSuccessfulRunAt).toUTCString()}
>
{new Date(data.lastSuccessfulRunAt).toLocaleTimeString("en-GB", { hour: "2-digit", minute: "2-digit", timeZone: "UTC" })} UTC
</span>
)}
</p>
</div>
</div>
</div>
Expand Down Expand Up @@ -345,23 +357,42 @@ function UrgencyBanner() {
);
}

// No changes at all
// If staleDays >= 3 (no successful run in 3+ calendar days), show an amber
// warning banner — the register data may be outdated.
if (data.staleDays >= 3) {
// No changes at all.
// Severity tiers based on hours since last successful run:
// >48h → critical (red-toned): the pipeline may be broken.
// >24h → warn (amber): nightly update overdue.
// ≤24h → fresh: show calm confirmation.
const hoursStale = data.hoursStale ?? data.staleDays * 24;

if (hoursStale > 48) {
return (
<div className="bg-red-900 text-red-100">
<div className="max-w-7xl mx-auto px-4 py-2 flex items-center justify-center gap-2 text-center">
<XCircle className="w-3.5 h-3.5 shrink-0 text-red-300" aria-hidden="true" />
<p className="text-xs sm:text-sm font-medium">
Register data is out of date — last checked {formatRunDate(lastRunDate)}.{" "}
No update in {hoursStale}h. The nightly pipeline may be experiencing an issue.
</p>
</div>
</div>
);
}

if (hoursStale > 24) {
return (
<div className="bg-amber-900 text-amber-100">
<div className="max-w-7xl mx-auto px-4 py-2 flex items-center justify-center gap-2 text-center">
<AlertTriangle className="w-3.5 h-3.5 shrink-0 text-amber-300" aria-hidden="true" />
<p className="text-xs sm:text-sm font-medium">
Register data may be out of date — last checked {formatRunDate(lastRunDate)}. The nightly update may have been delayed.
Register data may be out of date — last checked {formatRunDate(lastRunDate)}.{" "}
The nightly update may have been delayed.
</p>
</div>
</div>
);
}

// No changes and data is fresh — show a calm confirmation strip
// No changes and data is fresh (≤24h) — show a calm confirmation strip.
return (
<div className="bg-slate-800 text-slate-200">
<div className="max-w-7xl mx-auto px-4 py-2 flex items-center justify-center gap-2 text-center">
Expand Down
121 changes: 121 additions & 0 deletions server/routes/__tests__/ops.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ vi.mock("../../utils/incidentManager", () => ({
tryAutoRemediate: vi.fn(async () => "corr-remediate-1"),
}));

vi.mock("../../utils/adminAlert", () => ({
sendAdminAlert: vi.fn(async () => undefined),
}));

import { registerOpsRoutes } from "../ops";
import { getAllJobHealthSnapshots } from "../../utils/jobTelemetry";
import { evaluateSeverity, createIncidentTicket } from "../../utils/incidentManager";
Expand Down Expand Up @@ -529,6 +533,123 @@ describe("ops routes", () => {
}
});

// ── Cron-ping auth / config failure paths ───────────────────────────────────

it("POST /cron-ping returns 503 when CRON_SECRET is not set", async () => {
const prev = process.env.CRON_SECRET;
delete process.env.CRON_SECRET;
const { server, baseUrl } = await startTestServer();
try {
const response = await fetch(`${baseUrl}/api/ops/cron-ping`, {
method: "POST",
headers: { "Content-Type": "application/json" },
});
expect(response.status).toBe(503);
const json = await response.json();
expect(json.message).toMatch(/not configured/i);
} finally {
server.close();
if (prev !== undefined) process.env.CRON_SECRET = prev;
}
});

it("POST /cron-ping returns 401 for wrong secret", async () => {
process.env.CRON_SECRET = "correct-secret-32-chars-xxxxxxxx";
const { server, baseUrl } = await startTestServer();
try {
const response = await fetch(`${baseUrl}/api/ops/cron-ping`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "******",
},
});
expect(response.status).toBe(401);
} finally {
server.close();
delete process.env.CRON_SECRET;
}
});

it("POST /cron-ping returns 401 for missing Authorization header", async () => {
process.env.CRON_SECRET = "correct-secret-32-chars-xxxxxxxx";
const { server, baseUrl } = await startTestServer();
try {
const response = await fetch(`${baseUrl}/api/ops/cron-ping`, {
method: "POST",
headers: { "Content-Type": "application/json" },
});
expect(response.status).toBe(401);
} finally {
server.close();
delete process.env.CRON_SECRET;
}
});

it("POST /cron-ping returns 202 with correct secret when no run today", async () => {
process.env.CRON_SECRET = "correct-secret-32-chars-xxxxxxxx";
// DB returns no existing run for today
dbState.selectQueue.push([]);
const { server, baseUrl } = await startTestServer();
try {
const response = await fetch(`${baseUrl}/api/ops/cron-ping`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer " + (process.env["CRON_SECRET"] ?? ""),
},
});
expect(response.status).toBe(202);
const json = await response.json();
expect(json.message).toMatch(/triggered/i);
} finally {
server.close();
delete process.env.CRON_SECRET;
}
});

it("POST /cron-ping returns 409 when today already succeeded", async () => {
process.env.CRON_SECRET = "correct-secret-32-chars-xxxxxxxx";
dbState.selectQueue.push([{ status: "success" }]);
const { server, baseUrl } = await startTestServer();
try {
const response = await fetch(`${baseUrl}/api/ops/cron-ping`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer " + (process.env["CRON_SECRET"] ?? ""),
},
});
expect(response.status).toBe(409);
const json = await response.json();
expect(json.message).toMatch(/already ran/i);
} finally {
server.close();
delete process.env.CRON_SECRET;
}
});

it("POST /cron-ping returns 423 when job is currently running", async () => {
process.env.CRON_SECRET = "correct-secret-32-chars-xxxxxxxx";
dbState.selectQueue.push([{ status: "running" }]);
const { server, baseUrl } = await startTestServer();
try {
const response = await fetch(`${baseUrl}/api/ops/cron-ping`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer " + (process.env["CRON_SECRET"] ?? ""),
},
});
expect(response.status).toBe(423);
const json = await response.json();
expect(json.message).toMatch(/running/i);
} finally {
server.close();
delete process.env.CRON_SECRET;
}
});

// ── Rollout status endpoint ─────────────────────────────────────────────────

it("GET /rollout/status returns 200 with aggregated state for analyst", async () => {
Expand Down
97 changes: 77 additions & 20 deletions server/routes/health.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import type { Express } from "express";
import { desc, eq } from "drizzle-orm";
import { db } from "../db";
import { monitorJobRuns, dailyDigest } from "@shared/schema";
import { isJobRunning, getLastRunInfo } from "../utils/sponsorMonitorJob";
import { getJobHealthSnapshot } from "../utils/jobTelemetry";
import { success } from "../lib/response";
Expand Down Expand Up @@ -38,38 +41,92 @@
});

app.get('/api/health/sponsor-monitor', async (req, res) => {
const lastRun = getLastRunInfo();
const lastRunMem = getLastRunInfo();
const jobRunning = await isJobRunning();

let hoursAgo: number | null = null;
let status: "ok" | "stale" | "running" | "unknown" = "unknown";
// Prefer DB data (survives restarts) over the in-memory snapshot.
const [dbRunRow, latestDigestRow] = await Promise.all([
db
.select({
runDate: monitorJobRuns.runDate,
status: monitorJobRuns.status,
completedAt: monitorJobRuns.completedAt,
startedAt: monitorJobRuns.startedAt,
recordsProcessed: monitorJobRuns.recordsProcessed,
changesDetected: monitorJobRuns.changesDetected,
errorMessage: monitorJobRuns.errorMessage,
})
.from(monitorJobRuns)
.where(eq(monitorJobRuns.status, "success"))
.orderBy(desc(monitorJobRuns.runDate))
.limit(1)
.catch(() => [] as Array<{ runDate: string; status: string; completedAt: Date | null; startedAt: Date | null; recordsProcessed: number | null; changesDetected: number | null; errorMessage: string | null }>),
db
.select({ snapshotDate: dailyDigest.snapshotDate })
.from(dailyDigest)
.orderBy(desc(dailyDigest.snapshotDate))
.limit(1)
.catch(() => [] as Array<{ snapshotDate: string }>),
]);

const dbRun = dbRunRow[0] ?? null;
const latestSnapshotDate = latestDigestRow[0]?.snapshotDate ?? null;

// Determine last-successful-at from DB (precise ISO timestamp) or fall back
// to the in-memory snapshot from the current process instance.
const lastSuccessfulRunAt: string | null =
dbRun?.completedAt?.toISOString() ??
(lastRunMem?.success ? lastRunMem.date : null);

const hoursSinceSuccess = lastSuccessfulRunAt
? Math.floor((Date.now() - Date.parse(lastSuccessfulRunAt)) / 3_600_000)
: null;

// Classify freshness: ok (<24h), warn (24–48h), critical (>48h), running, unknown.
type FreshnessStatus = "ok" | "warn" | "critical" | "running" | "unknown";
let freshnessStatus: FreshnessStatus = "unknown";
let staleReason: string | null = null;

if (jobRunning) {
status = "running";
} else if (lastRun) {
const lastRunDate = new Date(lastRun.date + "T00:00:00Z");
hoursAgo = Math.floor((Date.now() - lastRunDate.getTime()) / (1000 * 60 * 60));
if (lastRun.success) {
status = hoursAgo <= 48 ? "ok" : "stale";
freshnessStatus = "running";
} else if (hoursSinceSuccess !== null) {
if (hoursSinceSuccess <= 24) {
freshnessStatus = "ok";
} else if (hoursSinceSuccess <= 48) {
freshnessStatus = "warn";
staleReason = `No successful run in ${hoursSinceSuccess}h (warn threshold: 24h).`;
} else {
status = "stale";
freshnessStatus = "critical";
staleReason = `No successful run in ${hoursSinceSuccess}h (critical threshold: 48h).`;
}
} else if (lastRunMem && !lastRunMem.success) {
freshnessStatus = "warn";
staleReason = lastRunMem.error ?? "Last run failed.";
}

success(res, {
status,
status: freshnessStatus === "ok" || freshnessStatus === "running" ? freshnessStatus : "stale",
freshnessStatus,
staleReason,
running: jobRunning,
lastRun: lastRun
lastSuccessfulRunAt,
hoursSinceSuccess,
latestSnapshotDate,
lastRun: dbRun
? {
date: lastRun.date,
success: lastRun.success,
hoursAgo,
recordsProcessed: lastRun.recordsProcessed,
changesDetected: lastRun.changesDetected,
notificationsSent: lastRun.notificationsSent,
error: lastRun.error ?? null,
date: dbRun.runDate,
completedAt: dbRun.completedAt?.toISOString() ?? null,
recordsProcessed: dbRun.recordsProcessed,
changesDetected: dbRun.changesDetected,
}
: null,
: lastRunMem
? {
date: lastRunMem.date,
completedAt: null,
recordsProcessed: lastRunMem.recordsProcessed,
changesDetected: lastRunMem.changesDetected,
}
: null,

Check warning on line 129 in server/routes/health.ts

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Extract this nested ternary operation into an independent statement.

See more on https://sonarcloud.io/project/issues?id=Sam-Aitech_Checkbyai.net&issues=AZ61K_FFUSW9WmQYTuze&open=AZ61K_FFUSW9WmQYTuze&pullRequest=48
nextCronUtc: "Mon–Fri 00:30 UTC",
timestamp: new Date().toISOString(),
});
Expand Down
Loading
Loading