From 64ec0096b42e71da02dfaaf6c98860358b128ab8 Mon Sep 17 00:00:00 2001 From: Sam-Aitech Date: Fri, 12 Jun 2026 04:41:45 +0100 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20address=20PR=20review=20findings=20?= =?UTF-8?q?=E2=80=94=20column=20resolver,=20guard=20alerts,=20test=20cover?= =?UTF-8?q?age?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review fixes for the GOV.UK CSV schema-change PR: - sponsorCsvColumns: ratingIdx no longer aliases the TierRating column already claimed by typeIdx (returns -1 on the new format so callers fall back to typeRating, the correct source) - sponsorStateMachine: send admin alert when the fingerprint set is below the trust threshold and Phases C2/D2 are skipped; correct the circuit-breaker message (Phase C2 resurrections remain committed) - sponsorListFetcher: remove dead legacy-only column resolver (resolveColumnIndexes/rowToRecord had no callers and only matched the pre-May-2026 layout) - csvArchiver/sponsorMonitorJob: replace silent .catch(() => {}) on admin alerts and the FAILED-status write with logged catches - tests: fix qsvValidate/qsvCount mock shapes to match real contracts New coverage (22 tests): - sponsorCsvColumns.test.ts: both register layouts, predicate precedence, ratingIdx-alias regression, unknown-header fallback - stateMachineGuards.test.ts: mass-removal circuit breaker (trip, under-threshold, SPONSOR_ALLOW_MASS_REMOVAL=1 bypass), Phase C2 self-heal sweep (event + suppressed/mass-repair branches), trust gate on C2/D2, Phase D2 second-miss removal - sponsorCsvNewFormat.test.ts: parseCsvFile throws on mass row rejection; buildFingerprintedCsv throws and deletes gutted output --- .../utils/__tests__/sponsorCsvColumns.test.ts | 102 ++++++++ .../__tests__/sponsorCsvNewFormat.test.ts | 35 ++- .../__tests__/stateMachineGuards.test.ts | 247 ++++++++++++++++++ server/utils/csvArchiver.ts | 4 +- server/utils/sponsorCsvColumns.ts | 5 +- server/utils/sponsorListFetcher.ts | 37 +-- server/utils/sponsorMonitorJob.ts | 8 +- server/utils/sponsorStateMachine.ts | 15 +- 8 files changed, 413 insertions(+), 40 deletions(-) create mode 100644 server/utils/__tests__/sponsorCsvColumns.test.ts create mode 100644 server/utils/__tests__/stateMachineGuards.test.ts diff --git a/server/utils/__tests__/sponsorCsvColumns.test.ts b/server/utils/__tests__/sponsorCsvColumns.test.ts new file mode 100644 index 0000000..7cdea33 --- /dev/null +++ b/server/utils/__tests__/sponsorCsvColumns.test.ts @@ -0,0 +1,102 @@ +/** + * sponsorCsvColumns.test.ts + * + * Unit coverage for the shared GOV.UK register column resolver — the single + * source of truth consumed by csvArchiver and csvFingerprintBuilder. A matcher + * bug here silently misreads columns in BOTH parsers, so every known layout + * and the precedence rules between predicates are pinned down explicitly. + */ +import { describe, it, expect } from "vitest"; +import { resolveSponsorCsvColumns } from "../sponsorCsvColumns"; + +const LEGACY_HEADER = ["Organisation Name", "Town/City", "County", "Type & Rating", "Route"]; + +const CURRENT_HEADER = [ + "Sponsor Licence Number", + "Organisation Name", + "TierRating", + "Migrant Classification", + "Sponsor Status", +]; + +describe("resolveSponsorCsvColumns — legacy layout (pre May 2026)", () => { + it("resolves every legacy column to its index", () => { + const idx = resolveSponsorCsvColumns(LEGACY_HEADER); + + expect(idx.nameIdx).toBe(0); + expect(idx.townIdx).toBe(1); + expect(idx.countyIdx).toBe(2); + expect(idx.typeIdx).toBe(3); + expect(idx.routeIdx).toBe(4); + }); + + it("returns -1 for columns absent from the legacy layout", () => { + const idx = resolveSponsorCsvColumns(LEGACY_HEADER); + + expect(idx.statusIdx).toBe(-1); + expect(idx.licenceTypeIdx).toBe(-1); + expect(idx.ratingIdx).toBe(-1); + expect(idx.lastUpdatedIdx).toBe(-1); + expect(idx.licenceNumberIdx).toBe(-1); + }); +}); + +describe("resolveSponsorCsvColumns — current layout (May 2026 onwards)", () => { + it("resolves every current column to its index", () => { + const idx = resolveSponsorCsvColumns(CURRENT_HEADER); + + expect(idx.licenceNumberIdx).toBe(0); + expect(idx.nameIdx).toBe(1); + expect(idx.typeIdx).toBe(2); // "TierRating" via the tier+rating fallback + expect(idx.routeIdx).toBe(3); // "Migrant Classification" via the classification fallback + expect(idx.statusIdx).toBe(4); + }); + + it("does NOT alias ratingIdx onto the TierRating column claimed by typeIdx", () => { + // "tierrating" contains "rating" and lacks "type", so a naive + // rating-and-not-type predicate matches it — pointing ratingIdx and + // typeIdx at the same column. deriveSponsorRowEnums then double-parses + // one column, and the derived rating is only correct by coincidence. + // ratingIdx must return -1 here so callers fall back to typeRating. + const idx = resolveSponsorCsvColumns(CURRENT_HEADER); + + expect(idx.ratingIdx).toBe(-1); + expect(idx.ratingIdx).not.toBe(idx.typeIdx); + }); + + it("returns -1 for legacy-only columns", () => { + const idx = resolveSponsorCsvColumns(CURRENT_HEADER); + + expect(idx.townIdx).toBe(-1); + expect(idx.countyIdx).toBe(-1); + }); +}); + +describe("resolveSponsorCsvColumns — precedence and edge cases", () => { + it("prefers the legacy predicate when a header matches both layouts", () => { + const idx = resolveSponsorCsvColumns(["Organisation Name", "Type & Rating", "TierRating"]); + + expect(idx.typeIdx).toBe(1); // legacy "type & rating" wins over "tierrating" + }); + + it("resolves a standalone legacy Rating column distinct from Type & Rating", () => { + const idx = resolveSponsorCsvColumns(["Organisation Name", "Type & Rating", "Rating"]); + + expect(idx.typeIdx).toBe(1); + expect(idx.ratingIdx).toBe(2); + }); + + it("is case- and whitespace-insensitive", () => { + const idx = resolveSponsorCsvColumns([" ORGANISATION NAME ", " tier rating ", "SPONSOR STATUS"]); + + expect(idx.nameIdx).toBe(0); + expect(idx.typeIdx).toBe(1); + expect(idx.statusIdx).toBe(2); + }); + + it("returns -1 across the board for an unrecognized header", () => { + const idx = resolveSponsorCsvColumns(["Foo", "Bar", "Baz"]); + + expect(Object.values(idx).every((v) => v === -1)).toBe(true); + }); +}); diff --git a/server/utils/__tests__/sponsorCsvNewFormat.test.ts b/server/utils/__tests__/sponsorCsvNewFormat.test.ts index 06e9106..e967224 100644 --- a/server/utils/__tests__/sponsorCsvNewFormat.test.ts +++ b/server/utils/__tests__/sponsorCsvNewFormat.test.ts @@ -21,8 +21,8 @@ vi.mock("../adminAlert", () => ({ sendAdminAlert: vi.fn().mockResolvedValue(undefined), })); vi.mock("../binaryRunner", () => ({ - qsvValidate: vi.fn().mockResolvedValue({ ok: true }), - qsvCount: vi.fn().mockResolvedValue(0), + qsvValidate: vi.fn().mockResolvedValue({ valid: true, recordCount: 0, errors: [] }), + qsvCount: vi.fn().mockResolvedValue(150_000), })); import { parseCsvFile } from "../csvArchiver"; @@ -100,6 +100,37 @@ describe("new-format GOV.UK CSV — buildFingerprintedCsv", () => { }); }); +describe("schema-change abort — mass row rejection must throw, never return a gutted result", () => { + // Rows with an empty TierRating fail the typeRating Zod refine, so 100% of + // rows are rejected — over the 20% SCHEMA_CHANGE_REJECTION_THRESHOLD. Before + // the fix this path silently returned a near-empty result, which csvdiff + // read as "entire register deleted" (the 2026-05-20 incident). + const REJECTED_CSV = [ + NEW_FORMAT_HEADER, + "ABC123XYZ,Acme Global Ltd,,Skilled Worker,Licensed and Fully Active", + "DEF456UVW,Beta Care Homes Ltd,,Seasonal Worker,Licensed and Fully Active", + "GHI789RST,Gamma Logistics Ltd,,Skilled Worker,Licensed and Fully Active", + ].join("\n"); + + it("parseCsvFile throws instead of returning a near-empty record set", async () => { + const csvPath = writeTmpCsv("rejected.csv", REJECTED_CSV); + + await expect(parseCsvFile(csvPath)).rejects.toThrow(/Aborting.*rejected by validation/); + }); + + it("buildFingerprintedCsv throws and deletes the gutted output file", async () => { + const csvPath = writeTmpCsv("rejected.csv", REJECTED_CSV); + const outPath = path.join(tmpDir, "rejected.fingerprinted.csv"); + + await expect(buildFingerprintedCsv(csvPath, outPath)).rejects.toThrow( + /Aborting.*rejected by validation/, + ); + // Leaving a near-empty fingerprinted CSV on disk is how the incident + // propagated into the diff engine — the file must be gone. + expect(fs.existsSync(outPath)).toBe(false); + }); +}); + describe("new-format GOV.UK CSV — SponsorRowSchema derivation", () => { it("derives enums and validates a row mapped from the new format", () => { const derived = deriveSponsorRowEnums({ diff --git a/server/utils/__tests__/stateMachineGuards.test.ts b/server/utils/__tests__/stateMachineGuards.test.ts new file mode 100644 index 0000000..49a1e83 --- /dev/null +++ b/server/utils/__tests__/stateMachineGuards.test.ts @@ -0,0 +1,247 @@ +/** + * stateMachineGuards.test.ts + * + * Behavioral coverage for the post-incident safety guards added after the + * 2026-05-20 mass removal (GOV.UK CSV schema change emptied the fingerprinted + * file and the state machine removed all 143K sponsors): + * + * 1. Mass-removal circuit breaker (+ SPONSOR_ALLOW_MASS_REMOVAL bypass) + * 2. Phase C2 self-heal resurrection sweep (event and suppressed branches) + * 3. MIN_TRUSTWORTHY_FINGERPRINT_SET gate on Phases C2/D2 (+ degraded alert) + * 4. Phase D2 second-miss removal under a trusted fingerprint set + */ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import type { CsvDiffResult } from "../binaryRunner"; + +// Queue of results returned by successive db.select().from().where() chains. +// applyStateMachine issues selects in a fixed order (Phase A canonical load → +// Phase C2 stale rows → Phase D2 grace-period rows), so tests enqueue results +// in that order. An exhausted queue yields []. +let selectResults: unknown[][] = []; +const setSpy = vi.fn(() => ({ + where: vi.fn(() => ({ returning: vi.fn(() => []) })), +})); + +vi.mock("../../db", () => ({ + db: { + select: vi.fn(() => ({ + from: vi.fn(() => ({ + where: vi.fn(() => selectResults.shift() ?? []), + })), + })), + insert: vi.fn(() => ({ + values: vi.fn(() => ({ + returning: vi.fn(() => []), + onConflictDoNothing: vi.fn(() => []), + })), + })), + update: vi.fn(() => ({ set: setSpy })), + delete: vi.fn(() => ({ where: vi.fn(() => []) })), + execute: vi.fn(), + }, +})); + +vi.mock("../csvFingerprintBuilder", () => ({ + loadFingerprintSet: vi.fn(() => new Set()), +})); + +vi.mock("../adminAlert", () => ({ + sendAdminAlert: vi.fn().mockResolvedValue(undefined), +})); + +vi.mock("../../storage", () => ({ + storage: { + getPendingWatchesByCompanyName: vi.fn(() => []), + markSponsorWatchNotified: vi.fn(), + }, +})); + +vi.mock("../services/notificationEngine", () => ({ + buildEmail: vi.fn(), + sendViaResend: vi.fn(), + notifyUsersOfEvent: vi.fn(), + processQueuedEngineEvents: vi.fn(), +})); + +import { applyStateMachine } from "../sponsorStateMachine"; +import { loadFingerprintSet } from "../csvFingerprintBuilder"; +import { sendAdminAlert } from "../adminAlert"; +import { db } from "../../db"; + +const TODAY = "2026-06-12"; + +const EMPTY_DIFF: CsvDiffResult = { + Additions: [], + Deletions: [], + Modifications: [], + durationMs: 0, +}; + +/** Fingerprint set above MIN_TRUSTWORTHY_FINGERPRINT_SET (50K). */ +function trustedSet(extra: string[] = []): Set { + const set = new Set(Array.from({ length: 60_000 }, (_, i) => `fp-fill-${i}`)); + for (const fp of extra) set.add(fp); + return set; +} + +function deletionDiff(count: number): CsvDiffResult { + return { + Additions: [], + Deletions: Array.from({ length: count }, (_, i) => ({ + "Organisation Name": `Company ${i}`, + "fingerprint": `fp-del-${i}`, + })), + Modifications: [], + durationMs: 0, + }; +} + +function canonicalRows(count: number, status: string) { + return Array.from({ length: count }, (_, i) => ({ + id: i + 1, + fingerprint: `fp-del-${i}`, + currentName: `Company ${i}`, + townCity: "London", + typeRating: "Worker (A rating)", + route: "Skilled Worker", + status, + grantedAt: "2025-01-01", + consecutiveMisses: 0, + historicalNames: [], + })); +} + +function alertSubjects(): string[] { + return (sendAdminAlert as ReturnType).mock.calls.map((c) => c[0] as string); +} + +function statusesWritten(): unknown[] { + return setSpy.mock.calls.map((c) => (c[0] as { status?: unknown })?.status).filter(Boolean); +} + +beforeEach(() => { + vi.clearAllMocks(); + selectResults = []; + delete process.env.SPONSOR_ALLOW_MASS_REMOVAL; + (db.execute as ReturnType).mockResolvedValue({ rows: [{ cnt: 100_000 }] }); + (loadFingerprintSet as ReturnType).mockResolvedValue(new Set()); +}); + +afterEach(() => { + delete process.env.SPONSOR_ALLOW_MASS_REMOVAL; +}); + +describe("mass-removal circuit breaker", () => { + it("aborts before applying removals when deletions exceed 20% of live records", async () => { + selectResults = [canonicalRows(300, "ACTIVE")]; // Phase A load + (db.execute as ReturnType) + .mockResolvedValueOnce({ rows: [{ cnt: 100_000 }] }) // Phase C first-run count + .mockResolvedValueOnce({ rows: [{ cnt: 1_000 }] }); // breaker live count + + await expect(applyStateMachine(deletionDiff(300), TODAY, "dummy")) + .rejects.toThrow(/circuit breaker/i); + + expect(statusesWritten()).not.toContain("GRACE_PERIOD"); + expect(statusesWritten()).not.toContain("REMOVED_REVOKED"); + expect(alertSubjects().some((s) => s.includes("circuit breaker"))).toBe(true); + }); + + it("does not trip below the threshold", async () => { + selectResults = [canonicalRows(100, "ACTIVE")]; + (db.execute as ReturnType) + .mockResolvedValueOnce({ rows: [{ cnt: 100_000 }] }) + .mockResolvedValueOnce({ rows: [{ cnt: 1_000 }] }); // 100 <= 20% of 1000 + + const result = await applyStateMachine(deletionDiff(100), TODAY, "dummy"); + + expect(result.gracePeriodCount).toBe(100); + expect(statusesWritten()).toContain("GRACE_PERIOD"); + }); + + it("is bypassed by SPONSOR_ALLOW_MASS_REMOVAL=1", async () => { + process.env.SPONSOR_ALLOW_MASS_REMOVAL = "1"; + selectResults = [canonicalRows(300, "ACTIVE")]; + + const result = await applyStateMachine(deletionDiff(300), TODAY, "dummy"); + + expect(result.gracePeriodCount).toBe(300); + expect(statusesWritten()).toContain("GRACE_PERIOD"); + }); +}); + +describe("Phase C2 self-heal sweep", () => { + it("resurrects sponsors present in today's register but marked removed/grace in DB", async () => { + (loadFingerprintSet as ReturnType).mockResolvedValue( + trustedSet(["fp-stale-1", "fp-stale-2"]), + ); + selectResults = [ + [ + { fingerprint: "fp-stale-1", currentName: "Stale One", status: "REMOVED_REVOKED" }, + { fingerprint: "fp-stale-2", currentName: "Stale Two", status: "GRACE_PERIOD" }, + ], // Phase C2 stale rows + [], // Phase D2 grace-period rows + ]; + + const result = await applyStateMachine(EMPTY_DIFF, TODAY, "dummy"); + + expect(result.reactivatedCount).toBe(2); + const reactivated = result.changes.filter((c) => c.changeType === "RE_ACTIVATED"); + expect(reactivated).toHaveLength(2); + expect(reactivated[0]).toMatchObject({ newValue: "ACTIVE" }); + expect(setSpy).toHaveBeenCalledWith( + expect.objectContaining({ status: "ACTIVE", removedAt: null, consecutiveMisses: 0 }), + ); + }); + + it("suppresses per-company events and alerts admins above the mass-repair threshold", async () => { + const staleFps = Array.from({ length: 1_001 }, (_, i) => `fp-stale-${i}`); + (loadFingerprintSet as ReturnType).mockResolvedValue(trustedSet(staleFps)); + selectResults = [ + staleFps.map((fp, i) => ({ + fingerprint: fp, + currentName: `Stale ${i}`, + status: "REMOVED_REVOKED", + })), + [], + ]; + + const result = await applyStateMachine(EMPTY_DIFF, TODAY, "dummy"); + + expect(result.reactivatedCount).toBe(1_001); + expect(result.changes.filter((c) => c.changeType === "RE_ACTIVATED")).toHaveLength(0); + expect(alertSubjects().some((s) => s.includes("Mass self-heal"))).toBe(true); + }); +}); + +describe("MIN_TRUSTWORTHY_FINGERPRINT_SET gate", () => { + it("skips C2/D2 on a small fingerprint set, alerts admins, and removes nothing", async () => { + (loadFingerprintSet as ReturnType).mockResolvedValue(new Set(["only-one"])); + + const result = await applyStateMachine(EMPTY_DIFF, TODAY, "dummy"); + + expect(result.removedCount).toBe(0); + expect(result.reactivatedCount).toBe(0); + expect(statusesWritten()).not.toContain("REMOVED_REVOKED"); + expect(alertSubjects().some((s) => s.includes("fingerprint set too small"))).toBe(true); + }); + + it("confirms D2 second-miss removals only under a trusted set", async () => { + (loadFingerprintSet as ReturnType).mockResolvedValue(trustedSet()); + selectResults = [ + [], // Phase C2 stale rows + [{ fingerprint: "fp-grace-1", currentName: "Gone Ltd", consecutiveMisses: 1 }], // Phase D2 + ]; + + const result = await applyStateMachine(EMPTY_DIFF, TODAY, "dummy"); + + expect(result.removedCount).toBe(1); + expect(result.changes).toContainEqual( + expect.objectContaining({ + organisationName: "Gone Ltd", + changeType: "REMOVED_REVOKED", + previousValue: "GRACE_PERIOD", + }), + ); + expect(statusesWritten()).toContain("REMOVED_REVOKED"); + }); +}); diff --git a/server/utils/csvArchiver.ts b/server/utils/csvArchiver.ts index d8e588a..0635c07 100644 --- a/server/utils/csvArchiver.ts +++ b/server/utils/csvArchiver.ts @@ -321,7 +321,7 @@ export async function ensureTodaysArchive( `

qsv found structural issues in the downloaded CSV for ${date}:

${errMsg.replace(/
        

The pipeline will continue but the record count guard is the final safety check.

`, - ).catch(() => {}); + ).catch((err: unknown) => logger.error({ err }, "[CsvArchiver] Failed to send validation-warning admin alert")); } // ── qsv count + hard guard ────────────────────────────────────────────────── @@ -368,7 +368,7 @@ export async function ensureTodaysArchive(

The nightly monitor job has been aborted. No state machine changes were made. Yesterday's data remains unchanged.

Action: Check Gov.uk manually to verify the CSV is complete.

`, - ).catch(() => {}); + ).catch((err: unknown) => logger.error({ err }, "[CsvArchiver] Failed to send record-count-abort admin alert")); throw new Error(errorMsg); } diff --git a/server/utils/sponsorCsvColumns.ts b/server/utils/sponsorCsvColumns.ts index 88d9365..56e8833 100644 --- a/server/utils/sponsorCsvColumns.ts +++ b/server/utils/sponsorCsvColumns.ts @@ -59,7 +59,10 @@ export function resolveSponsorCsvColumns(header: string[]): SponsorCsvColumnInde ), statusIdx: find((c) => c.includes("status")), licenceTypeIdx: find((c) => c.includes("licence") && c.includes("type")), - ratingIdx: find((c) => c.includes("rating") && !c.includes("type")), + // Must not match "TierRating" — that column is already claimed by typeIdx, + // and aliasing both onto it double-parses one column. With -1 here, + // deriveSponsorRowEnums falls back to typeRating, the correct source. + ratingIdx: find((c) => c.includes("rating") && !c.includes("type") && !c.includes("tier")), lastUpdatedIdx: find((c) => c.includes("last") && c.includes("updated")), licenceNumberIdx: find((c) => c.includes("licence") && c.includes("number")), }; diff --git a/server/utils/sponsorListFetcher.ts b/server/utils/sponsorListFetcher.ts index 9ef6fab..3154658 100644 --- a/server/utils/sponsorListFetcher.ts +++ b/server/utils/sponsorListFetcher.ts @@ -300,38 +300,11 @@ export async function discoverCsvUrl(): Promise { return url; } -// ── Shared CSV parsing helpers ──────────────────────────────────────────────── - -interface ColumnIndexes { - nameIdx: number; - townIdx: number; - countyIdx: number; - typeIdx: number; - routeIdx: number; -} - -function resolveColumnIndexes(header: string[]): ColumnIndexes { - const h = header.map((s) => s.trim().toLowerCase()); - return { - nameIdx: h.findIndex((c) => c.includes("organisation") && c.includes("name")), - townIdx: h.findIndex((c) => c.includes("town") || c.includes("city")), - countyIdx: h.findIndex((c) => c.includes("county")), - typeIdx: h.findIndex((c) => c.includes("type") && c.includes("rating")), - routeIdx: h.findIndex((c) => c.includes("route")), - }; -} - -function rowToRecord(row: string[], idx: ColumnIndexes): SponsorRecord | null { - const orgName = (row[idx.nameIdx] ?? "").trim(); - if (!orgName) return null; - return { - organisationName: orgName, - townCity: (idx.townIdx >= 0 ? row[idx.townIdx] ?? "" : "").trim(), - county: (idx.countyIdx >= 0 ? row[idx.countyIdx] ?? "" : "").trim(), - typeRating:(idx.typeIdx >= 0 ? row[idx.typeIdx] ?? "" : "").trim(), - route: (idx.routeIdx >= 0 ? row[idx.routeIdx] ?? "" : "").trim(), - }; -} +// NOTE: this module deliberately has no CSV column-resolution logic of its own. +// All header→index mapping lives in sponsorCsvColumns.ts (shared by csvArchiver +// and csvFingerprintBuilder); CSV parsing goes through csvArchiver.parseCsvFile. +// A private legacy-only resolver that used to live here was removed after the +// May 2026 GOV.UK format change — do not reintroduce one. /** * Validates and returns HTML records from Scrapling fallback. diff --git a/server/utils/sponsorMonitorJob.ts b/server/utils/sponsorMonitorJob.ts index c91120a..9a95d48 100644 --- a/server/utils/sponsorMonitorJob.ts +++ b/server/utils/sponsorMonitorJob.ts @@ -668,12 +668,16 @@ export async function runSponsorMonitorJob( `

${msg}

The nightly monitor has been aborted to prevent mass REMOVED_REVOKED for legitimate sponsors.

`, ); - // Mark archive as FAILED so the integrity check surfaces it. + // Mark archive as FAILED so the integrity check surfaces it. If this + // write fails the archive stays PENDING_SYNC and will be re-attempted + // on the next run — log it so the retry loop is explicable. await db .update(csvArchive) .set({ syncStatus: "FAILED" }) .where(eq(csvArchive.snapshotDate, today)) - .catch(() => {}); + .catch((err: unknown) => + log.error({ err }, "[SponsorMonitorJob] Failed to mark archive FAILED — status remains PENDING_SYNC"), + ); throw new Error(msg); } diff --git a/server/utils/sponsorStateMachine.ts b/server/utils/sponsorStateMachine.ts index 2a11a26..4543747 100644 --- a/server/utils/sponsorStateMachine.ts +++ b/server/utils/sponsorStateMachine.ts @@ -639,6 +639,18 @@ export async function applyStateMachine( { size: todayFingerprintSet.size }, "[StateMachine] Today's fingerprint set is suspiciously small — skipping Phase C2 sweep (and Phase D2 will be skipped too).", ); + // Operators must hear about this, not just the logs: while skipped, sponsors + // already in GRACE_PERIOD are neither promoted to REMOVED_REVOKED nor + // resurrected, so they strand there until a trustworthy register arrives. + await sendAdminAlert( + "ALERT: Sponsor sync degraded — fingerprint set too small, Phase C2/D2 skipped", + `

Today's fingerprint set has ${todayFingerprintSet.size.toLocaleString()} entries, below the ` + + `${MIN_TRUSTWORTHY_FINGERPRINT_SET.toLocaleString()} trust threshold (register is normally ~140K rows). ` + + `Phase C2 (self-heal resurrection) and Phase D2 (second-miss removal) were both skipped. ` + + `Sponsors currently in GRACE_PERIOD will not change state until a full register is processed.

` + + `

Likely cause: truncated download or GOV.UK CSV schema change. ` + + `File: ${todayFingerprintedCsvPath}

`, + ).catch((err: unknown) => log.warn({ err }, "[StateMachine] Failed to send small-fingerprint-set alert")); } // ── Phase D: Deletions (first and second misses) ────────────────────────── @@ -694,7 +706,8 @@ export async function applyStateMachine( const msg = `Mass-removal circuit breaker tripped: run wants to remove/grace ${deletionImpact.toLocaleString()} ` + `of ${liveCount.toLocaleString()} live sponsors (> ${MASS_REMOVAL_FRACTION * 100}%). ` + - `Aborting before any status changes are applied. ` + + `Aborting before any removals are applied (Phase C2 resurrections from earlier in this run, ` + + `if any, remain committed — they only ever set sponsors back to ACTIVE). ` + `Set SPONSOR_ALLOW_MASS_REMOVAL=1 to override deliberately.`; log.error({ deletionImpact, liveCount }, `[StateMachine] ${msg}`); await sendAdminAlert( From 9ab804599cb366d45d3fb2969f5fe1a977143319 Mon Sep 17 00:00:00 2001 From: Sam-Aitech Date: Fri, 12 Jun 2026 04:50:34 +0100 Subject: [PATCH 2/2] refactor: replace redundant vi.fn casts with vi.mocked in guard tests Resolves SonarQube typescript:S4325 (unnecessary type assertion) on the db.execute mock casts; converted all nine ReturnType casts to vi.mocked() for consistency. --- .../utils/__tests__/stateMachineGuards.test.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/server/utils/__tests__/stateMachineGuards.test.ts b/server/utils/__tests__/stateMachineGuards.test.ts index 49a1e83..055e43c 100644 --- a/server/utils/__tests__/stateMachineGuards.test.ts +++ b/server/utils/__tests__/stateMachineGuards.test.ts @@ -112,7 +112,7 @@ function canonicalRows(count: number, status: string) { } function alertSubjects(): string[] { - return (sendAdminAlert as ReturnType).mock.calls.map((c) => c[0] as string); + return vi.mocked(sendAdminAlert).mock.calls.map((c) => c[0] as string); } function statusesWritten(): unknown[] { @@ -123,8 +123,8 @@ beforeEach(() => { vi.clearAllMocks(); selectResults = []; delete process.env.SPONSOR_ALLOW_MASS_REMOVAL; - (db.execute as ReturnType).mockResolvedValue({ rows: [{ cnt: 100_000 }] }); - (loadFingerprintSet as ReturnType).mockResolvedValue(new Set()); + vi.mocked(db.execute).mockResolvedValue({ rows: [{ cnt: 100_000 }] }); + vi.mocked(loadFingerprintSet).mockResolvedValue(new Set()); }); afterEach(() => { @@ -134,7 +134,7 @@ afterEach(() => { describe("mass-removal circuit breaker", () => { it("aborts before applying removals when deletions exceed 20% of live records", async () => { selectResults = [canonicalRows(300, "ACTIVE")]; // Phase A load - (db.execute as ReturnType) + vi.mocked(db.execute) .mockResolvedValueOnce({ rows: [{ cnt: 100_000 }] }) // Phase C first-run count .mockResolvedValueOnce({ rows: [{ cnt: 1_000 }] }); // breaker live count @@ -148,7 +148,7 @@ describe("mass-removal circuit breaker", () => { it("does not trip below the threshold", async () => { selectResults = [canonicalRows(100, "ACTIVE")]; - (db.execute as ReturnType) + vi.mocked(db.execute) .mockResolvedValueOnce({ rows: [{ cnt: 100_000 }] }) .mockResolvedValueOnce({ rows: [{ cnt: 1_000 }] }); // 100 <= 20% of 1000 @@ -171,7 +171,7 @@ describe("mass-removal circuit breaker", () => { describe("Phase C2 self-heal sweep", () => { it("resurrects sponsors present in today's register but marked removed/grace in DB", async () => { - (loadFingerprintSet as ReturnType).mockResolvedValue( + vi.mocked(loadFingerprintSet).mockResolvedValue( trustedSet(["fp-stale-1", "fp-stale-2"]), ); selectResults = [ @@ -195,7 +195,7 @@ describe("Phase C2 self-heal sweep", () => { it("suppresses per-company events and alerts admins above the mass-repair threshold", async () => { const staleFps = Array.from({ length: 1_001 }, (_, i) => `fp-stale-${i}`); - (loadFingerprintSet as ReturnType).mockResolvedValue(trustedSet(staleFps)); + vi.mocked(loadFingerprintSet).mockResolvedValue(trustedSet(staleFps)); selectResults = [ staleFps.map((fp, i) => ({ fingerprint: fp, @@ -215,7 +215,7 @@ describe("Phase C2 self-heal sweep", () => { describe("MIN_TRUSTWORTHY_FINGERPRINT_SET gate", () => { it("skips C2/D2 on a small fingerprint set, alerts admins, and removes nothing", async () => { - (loadFingerprintSet as ReturnType).mockResolvedValue(new Set(["only-one"])); + vi.mocked(loadFingerprintSet).mockResolvedValue(new Set(["only-one"])); const result = await applyStateMachine(EMPTY_DIFF, TODAY, "dummy"); @@ -226,7 +226,7 @@ describe("MIN_TRUSTWORTHY_FINGERPRINT_SET gate", () => { }); it("confirms D2 second-miss removals only under a trusted set", async () => { - (loadFingerprintSet as ReturnType).mockResolvedValue(trustedSet()); + vi.mocked(loadFingerprintSet).mockResolvedValue(trustedSet()); selectResults = [ [], // Phase C2 stale rows [{ fingerprint: "fp-grace-1", currentName: "Gone Ltd", consecutiveMisses: 1 }], // Phase D2