diff --git a/packages/core/src/cache/schema.test.ts b/packages/core/src/cache/schema.test.ts index ed1a144..0a83d53 100644 --- a/packages/core/src/cache/schema.test.ts +++ b/packages/core/src/cache/schema.test.ts @@ -1,3 +1,4 @@ +import Dexie from "dexie"; import { afterEach, describe, expect, it } from "vitest"; import { BitcoinmintsDB } from "./schema"; @@ -19,14 +20,16 @@ afterEach(async () => { }); describe("BitcoinmintsDB schema", () => { - it("opens at version 2 with all 6 tables present", async () => { + it("opens at version 3 with all 6 tables present", async () => { const db = new BitcoinmintsDB(freshName()); toDispose.push(db); await db.open(); - // v2 adds the [kind+createdAt] compound index to announcements (used by - // restoreWatermarks for bounded .last() lookups per kind). - expect(db.verno).toBe(2); + // v3 renames mintAggregate's `bayesianRank` index → `bayesianScore` and + // adds `avgRating` so the ranking aggregator can sort by either without + // a full-table scan. v2 added the [kind+createdAt] compound index on + // announcements (used by scheduler.restoreWatermarks). + expect(db.verno).toBe(3); const names = db.tables.map((t) => t.name).sort(); expect(names).toEqual( ["announcements", "mintAggregate", "mintInfo", "profiles", "relayLists", "reviews"].sort(), @@ -74,8 +77,9 @@ describe("BitcoinmintsDB schema", () => { expect(indexNames("reviews")).toEqual(["createdAt", "d", "eventId", "k"]); // mintInfo secondary: fetchedAt, ok expect(indexNames("mintInfo")).toEqual(["fetchedAt", "ok"]); - // mintAggregate secondary: bayesianRank, updatedAt - expect(indexNames("mintAggregate")).toEqual(["bayesianRank", "updatedAt"]); + // mintAggregate secondary (v3): bayesianScore + avgRating (new) + + // updatedAt. `bayesianRank` from v1 is dropped in v3. + expect(indexNames("mintAggregate")).toEqual(["avgRating", "bayesianScore", "updatedAt"]); }); it("starts empty", async () => { @@ -90,4 +94,53 @@ describe("BitcoinmintsDB schema", () => { expect(await db.mintInfo.count()).toBe(0); expect(await db.mintAggregate.count()).toBe(0); }); + + it("v2 → v3 upgrade clears the mintAggregate table", async () => { + // A dev who opened the app at v2 has rows shaped + // `{d, averageRating, bayesianRank, updatedAt}` — the `averageRating` + // field renamed to `avgRating` in v3 and `bayesianRank` was dropped, + // so without a migration hook those rows fail every v3 query shape + // (the indexes point at fields the row doesn't have). The v3 upgrade + // wipes the table and lets it repopulate from live review traffic. + const name = freshName(); + // Open a separate Dexie handle declaring only the first two schema + // versions so we can seed a v2-shape row before the BitcoinmintsDB + // handle (which declares v3 and its upgrade hook) ever touches it. + const v2 = new Dexie(name); + v2.version(1).stores({ + announcements: "[pubkey+kind+d], eventId, kind, d, createdAt", + reviews: "[pubkey+kind+d], eventId, d, createdAt, k", + profiles: "pubkey, createdAt", + relayLists: "pubkey, createdAt", + mintInfo: "d, fetchedAt, ok", + mintAggregate: "d, bayesianRank, updatedAt", + }); + v2.version(2).stores({ + announcements: "[pubkey+kind+d], eventId, kind, d, createdAt, [kind+createdAt]", + }); + await v2.open(); + // Seed a v2-shape row — the pre-rename payload. + await v2.table("mintAggregate").put({ + d: "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77", + averageRating: 4, + bayesianRank: 4 * Math.log10(11), + updatedAt: 1_700_000_000, + }); + expect(await v2.table("mintAggregate").count()).toBe(1); + v2.close(); + + // Reopen via BitcoinmintsDB (declares v3 + upgrade hook) — the + // upgrade callback should clear the mintAggregate table. + const v3 = new BitcoinmintsDB(name); + toDispose.push(v3); + await v3.open(); + expect(v3.verno).toBe(3); + expect(await v3.mintAggregate.count()).toBe(0); + // And the v3 indexes are queryable — a live review upsert would + // repopulate via these. + const byScore = await v3.mintAggregate.orderBy("bayesianScore").toArray(); + expect(byScore).toEqual([]); + const byAvg = await v3.mintAggregate.orderBy("avgRating").toArray(); + expect(byAvg).toEqual([]); + }); }); diff --git a/packages/core/src/cache/schema.ts b/packages/core/src/cache/schema.ts index 33d9872..47292a9 100644 --- a/packages/core/src/cache/schema.ts +++ b/packages/core/src/cache/schema.ts @@ -59,10 +59,23 @@ export type ReviewRow = { d: string; eventId: string; createdAt: number; - /** Pointer-kind tag: 38172 (Cashu) or 38173 (Fedimint). */ - k?: number; - /** Parsed 0..5 rating. */ - rating?: number; + /** + * Pointer-kind tag: 38172 (Cashu) or 38173 (Fedimint). Optional because + * in-the-wild events sometimes omit the `k` tag entirely; keep lenient. + */ + k?: 38172 | 38173; + /** + * Mint URL(s) from optional `u` tags on the recommendation — display-only + * helper, does NOT participate in replaceable-event keying. + */ + u?: string[]; + /** + * Parsed rating in 1..5 inclusive, `null` when no rating could be extracted + * from tags or content. Explicit `null` rather than `undefined` to + * distinguish "no rating present" (which is a valid review state) from + * "field not yet populated". + */ + rating: number | null; /** Freeform review text. */ content: string; rawTags: string[][]; @@ -111,12 +124,39 @@ export type MintInfoRow = { lastError?: string; }; -/** Aggregated per-mint ranking row (populated in PR #5 — empty in PR #3). */ +/** + * Aggregated per-mint ranking row (populated in PR #5). + * + * Materialized from all `reviews` rows with a given `d`. Recomputed in the + * same Dexie transaction as the triggering review upsert so they never go + * out of sync. `bayesianScore` is the sort key — it damps low-count + * averages so a single 5★ review cannot outrank 4★×10 (see data-model-v1.md + * §13). + * + * Schema transition: v2 exposed `averageRating` + `bayesianRank` as + * optional-number fields; v3 tightens the contract so every row carries + * explicit values (`avgRating: number | null`, `bayesianScore: number`) and + * adds `ratedCount` — the count of reviews contributing to `avgRating`, + * distinct from `reviewCount` which counts all reviews including unrated. + * The index rename from `bayesianRank` → `bayesianScore` drives the v3 bump. + */ export type MintAggregateRow = { + /** Primary key — the mint d-tag this aggregate is for. */ d: string; + /** Count of ALL reviews for this mint (rated + unrated). */ reviewCount: number; - averageRating?: number; - bayesianRank?: number; + /** Count of reviews with `rating != null` — the divisor of `avgRating`. */ + ratedCount: number; + /** Mean across the `ratedCount` reviews, or `null` when `ratedCount===0`. */ + avgRating: number | null; + /** + * Bayesian sort score — `avgRating * log10(ratedCount + 1)` when + * `avgRating != null`, else `0`. `log10(1)=0` so a single review gets + * `rating * log10(2) ≈ rating * 0.301`; 10 reviews get `rating * log10(11) + * ≈ rating * 1.041`. The damping makes low-count mints sort below + * higher-count mints of the same average. + */ + bayesianScore: number; /** Epoch-ms timestamp of the aggregate recompute (used for CAS). */ updatedAt: number; }; @@ -151,5 +191,26 @@ export class BitcoinmintsDB extends Dexie { this.version(2).stores({ announcements: "[pubkey+kind+d], eventId, kind, d, createdAt, [kind+createdAt]", }); + // v3: rename `bayesianRank` → `bayesianScore` on mintAggregate and add + // `avgRating` to the index set so sort-by-avg queries don't need a full + // table scan. This is the indexes materialized in PR #5's ranking + // aggregator. The prior `bayesianRank` index is dropped. + // + // Upgrade semantics: Dexie auto-migrates the SCHEMA (indexes) but does + // NOT transform existing row PAYLOADS. A dev with a local v2 IndexedDB + // would otherwise have rows shaped `{d, averageRating, bayesianRank, + // updatedAt}` — the `averageRating` field is `avgRating` in v3 and + // `bayesianRank` doesn't exist — which would fail every v3 query shape + // (the indexes point at fields the row doesn't have). Since there's no + // prod data yet and the aggregate is re-derived from reviews on the + // next review upsert, a clean wipe is the correct migration: clear + // `mintAggregate`, let it repopulate from live review traffic. + this.version(3) + .stores({ + mintAggregate: "d, bayesianScore, avgRating, updatedAt", + }) + .upgrade(async (tx) => { + await tx.table("mintAggregate").clear(); + }); } } diff --git a/packages/core/src/cache/upsert.test.ts b/packages/core/src/cache/upsert.test.ts index 07be5fd..ecb0115 100644 --- a/packages/core/src/cache/upsert.test.ts +++ b/packages/core/src/cache/upsert.test.ts @@ -112,8 +112,9 @@ function makeMintAggregate(over: Partial = {}): MintAggregateR return { d: D_XONLY, reviewCount: 5, - averageRating: 4.2, - bayesianRank: 3.8, + ratedCount: 5, + avgRating: 4.2, + bayesianScore: 3.8, updatedAt: 1_700_000_000, ...over, }; @@ -590,8 +591,8 @@ describe("upsertMintInfo", () => { describe("upsertMintAggregate", () => { it("inserts, replaces on newer updatedAt, rejects older", async () => { const db = await freshDB(); - const older = makeMintAggregate({ updatedAt: 1000, bayesianRank: 1.0 }); - const newer = makeMintAggregate({ updatedAt: 2000, bayesianRank: 4.5 }); + const older = makeMintAggregate({ updatedAt: 1000, bayesianScore: 1.0 }); + const newer = makeMintAggregate({ updatedAt: 2000, bayesianScore: 4.5 }); const ancient = makeMintAggregate({ updatedAt: 500 }); expect(await upsertMintAggregate(db, older)).toBe("inserted"); @@ -599,7 +600,7 @@ describe("upsertMintAggregate", () => { expect(await upsertMintAggregate(db, ancient)).toBe("rejected-stale"); const fetched = await db.mintAggregate.get(older.d); - expect(fetched?.bayesianRank).toBe(4.5); + expect(fetched?.bayesianScore).toBe(4.5); expect(fetched?.updatedAt).toBe(2000); }); }); diff --git a/packages/core/src/cache/upsert.ts b/packages/core/src/cache/upsert.ts index b6c4025..ff50a2e 100644 --- a/packages/core/src/cache/upsert.ts +++ b/packages/core/src/cache/upsert.ts @@ -15,14 +15,16 @@ * Layer A gate for kind:38172: before writing an announcement we check * isValidCashuDTag(d). Invalid shapes (bot spam, non-hex garbage) are * returned as "rejected-invalid" and never hit the DB. kind:38173 - * (Fedimint) bypasses Layer A — federation IDs have a different shape - * and their validator is a TODO-v1.1 concern. + * (Fedimint) uses a sibling shape gate (isValidFedimintDTag) — every real + * federation ID in the audit corpus is 64-char lowercase hex, so short / + * junk d-tags with `["k","38173"]` are still filtered at the same choke + * point as Cashu bot spam. * * mintInfo and mintAggregate aren't event-based, so their CAS predicate * is a monotonically-increasing timestamp: `fetchedAt` for mintInfo, * `updatedAt` for mintAggregate. */ -import { isValidCashuDTag } from "../nip87/dtag"; +import { isValidCashuDTag, isValidFedimintDTag } from "../nip87/dtag"; import type { AnnouncementRow, BitcoinmintsDB, @@ -56,14 +58,19 @@ function nextWins( return next.eventId > prev.eventId; } -/** Upsert a kind:38172 or kind:38173 announcement with Layer A gating on 38172. */ +/** Upsert a kind:38172 or kind:38173 announcement with Layer A gating on both kinds. */ export async function upsertAnnouncement( db: BitcoinmintsDB, row: AnnouncementRow, ): Promise { - // Layer A gate — reject invalid Cashu d-tag shapes before touching the DB. - // Fedimint (38173) bypasses: federation-id shape is TODO-v1.1. - if (row.kind === 38172 && !isValidCashuDTag(row.d)) { + // Layer A gate — reject invalid d-tag shapes before touching the DB. + // Cashu (38172) requires a 64- or 66-char secp256k1 pubkey shape; + // Fedimint (38173) requires a 64-char lowercase hex federation-id shape. + // A short/junk d-tag with `k=38173` slapped on is still bot spam and + // must be caught by the same firewall — don't free-pass by kind alone. + if (row.kind === 38173) { + if (!isValidFedimintDTag(row.d)) return "rejected-invalid"; + } else if (!isValidCashuDTag(row.d)) { return "rejected-invalid"; } @@ -88,8 +95,41 @@ export async function upsertAnnouncement( }); } -/** Upsert a kind:38000 review. No Layer A gate — the `d` here points at a mint but isn't itself a Cashu pubkey owned by the reviewer. */ +/** + * Upsert a kind:38000 review with Layer A gating on the target `d` tag. + * + * The review's `d` points at a mint. When `k === 38172` (or `k` is absent, + * which is how most in-the-wild Cashu reviews shape), we apply the same + * Layer A d-regex gate that `upsertAnnouncement` uses — if the referenced + * mint pubkey isn't 64/66-char hex, the review is bot-spam pointing at + * bot-spam, returned as `rejected-invalid`. This is the firewall that + * keeps the 959 zero-d-tag bot spam events (per relay-strategy §4) from + * filtering up into the ranking aggregate. + * + * `k === 38173` (Fedimint) switches to the sibling `isValidFedimintDTag` + * shape gate — every real federation ID in the audit corpus is lowercase + * 64-char hex, so a short / junk d-tag with `k=38173` attached is still + * bot spam and must be caught by the same firewall. + * + * Note: this low-level upsert is the mechanical write. It does NOT + * materialize the `mintAggregate` row — the `reviews/` wrapper composes + * this with `recomputeAggregateInTx` inside a single transaction so the + * two stores stay in sync. Callers outside `reviews/` (integration tests, + * direct usage) can call this helper directly and will simply skip the + * aggregate materialization — safe but stale. + */ export async function upsertReview(db: BitcoinmintsDB, row: ReviewRow): Promise { + // Layer A gate — reject invalid d-tag shapes before touching the DB. + // Reviews point at a target mint via `d`; the pointer-kind `k` selects + // which shape gate applies. No `k` tag → treat as Cashu (the default + // for in-the-wild events per rating-tag-research §3). Fedimint rows + // still get a sibling shape check (64-char hex federation id) so junk + // d-tags with `k=38173` slapped on don't free-pass the firewall. + if (row.k === 38173) { + if (!isValidFedimintDTag(row.d)) return "rejected-invalid"; + } else if (!isValidCashuDTag(row.d)) { + return "rejected-invalid"; + } return db.transaction("rw", db.reviews, async () => { const prev = await db.reviews.get([row.pubkey, row.kind, row.d]); if (!prev) { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2064463..302e547 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -2,5 +2,6 @@ export * from "./cache"; export * from "./cashu"; export * from "./nip87"; export * from "./nostr"; +export * from "./reviews"; export const VERSION = "0.0.0"; diff --git a/packages/core/src/integration.test.ts b/packages/core/src/integration.test.ts index 2407cf1..69ad30b 100644 --- a/packages/core/src/integration.test.ts +++ b/packages/core/src/integration.test.ts @@ -79,17 +79,23 @@ function toAnnouncementRow( } function toReviewRow(parsed: NonNullable>): ReviewRow { - return { + // `parsed.rating` is `number | undefined` from the nip87 parse layer; + // the cache layer requires `number | null` (explicit "no rating" state). + const rating = parsed.rating ?? null; + const row: ReviewRow = { pubkey: parsed.pubkey, kind: 38000, d: parsed.d, eventId: parsed.eventId, createdAt: parsed.createdAt, - k: parsed.k, - rating: parsed.rating, content: parsed.content, rawTags: parsed.raw.tags, + rating, }; + // `parsed.k` is `number | undefined`; narrow to the Cashu/Fedimint + // pair the cache row shape accepts. + if (parsed.k === 38172 || parsed.k === 38173) row.k = parsed.k; + return row; } /** @@ -170,12 +176,17 @@ describe("integration: corpus replay → parse → cache", () => { expect(inserted.length).toBe(expectedAccepted); expect(rejectedInvalid.length).toBe(expectedRejected); - // Reviews: all 5 recommendations parse and insert (each has unique - // [pubkey,38000,d]). - expect(await db.reviews.count()).toBe(f.recommendations38000.length); - expect(await db.reviews.count()).toBe(5); + // Reviews: all 5 recommendations parse, but Layer A applies to the + // reviews' `d` tag too (PR #5) — 2 of the 5 point at 16-char legacy + // d-tags that pre-date the Cashu-mint-pubkey d-tag convention and + // would be indistinguishable from the bot-spam shape the gate is + // designed to reject. Those are `rejected-invalid`. The remaining 3 + // reference real 64-char Cashu mint pubkeys and insert cleanly. const reviewsInserted = reviewResults.filter((r) => r.result === "inserted"); - expect(reviewsInserted.length).toBe(5); + const reviewsRejectedInvalid = reviewResults.filter((r) => r.result === "rejected-invalid"); + expect(reviewsInserted.length).toBe(3); + expect(reviewsRejectedInvalid.length).toBe(2); + expect(await db.reviews.count()).toBe(3); }); it("the legacy Nostrodomo (64-char x-only) lands as inserted, not rejected-invalid", async () => { @@ -431,13 +442,15 @@ describe("integration: scheduler full pipeline", () => { // Stats: same accept/reject as the parse → cache integration above // (5 bot-spam rejected at Layer A; 1 legacy + 2 spec-conforming + 3 - // fedimint accepted = 6 announcements; 5 reviews accepted). + // fedimint accepted = 6 announcements; 3 reviews accepted + 2 reviews + // rejected for 16-char legacy d-tags per PR #5's Layer A review gate). const stats = sched.getStats(); // 11 announcements (5 spam + 1 legacy + 2 spec + 3 fedi) + 5 reviews = 16. expect(stats.eventsReceived).toBe(16); - expect(stats.rejectedByLayerA).toBe(5); - // Accepted = 6 announcements + 5 reviews = 11. - expect(stats.accepted).toBe(11); + // 5 announcement bot-spam rejections + 2 review 16-char d-tag rejections. + expect(stats.rejectedByLayerA).toBe(7); + // Accepted = 6 announcements + 3 reviews = 9. + expect(stats.accepted).toBe(9); // Layer B: spec-conforming Alpha + Beta verify. Legacy Nostrodomo // returns ok but with the wrong pubkey → counts as failed. Fedimint @@ -447,9 +460,10 @@ describe("integration: scheduler full pipeline", () => { expect(stats.layerBPending).toBe(0); // Cache state matches the parse → cache test exactly: 6 announcements, - // 5 reviews. Bot-spam rejected at Layer A, never lands. + // 3 reviews (2 more reviews rejected by PR #5's Layer A on reviews' + // d-tags). Bot-spam rejected at Layer A, never lands. expect(await db.announcements.count()).toBe(6); - expect(await db.reviews.count()).toBe(5); + expect(await db.reviews.count()).toBe(3); // Spot-check verifiedBySignerBinding wired through correctly. const alphaPubkey = "02aa00000000000000000000000000000000000000000000000000000000000001"; @@ -518,7 +532,8 @@ describe("integration: scheduler full pipeline", () => { fetches: calls1.length, }; expect(round1Counts.announcements).toBe(6); - expect(round1Counts.reviews).toBe(5); + // 3 reviews (2 more gated out by PR #5's Layer A on review d-tags). + expect(round1Counts.reviews).toBe(3); expect(round1Counts.mintInfo).toBe(3); // Round 2 — fresh scheduler against same DB. createScheduler reads diff --git a/packages/core/src/nip87/dtag.ts b/packages/core/src/nip87/dtag.ts index 729d735..9b84b32 100644 --- a/packages/core/src/nip87/dtag.ts +++ b/packages/core/src/nip87/dtag.ts @@ -28,6 +28,20 @@ // Bot spam (16-char random d-tags) rejected by both branches. export const D_TAG_REGEX = /^([0-9a-f]{64}|0[23][0-9a-f]{64})$/; +/** + * Fedimint federation-id d-tag shape. Every real Fedimint federation ID + * observed in the audit corpus (see `audit/fedimint-observer.md` and + * `packages/core/src/reviews/corpus.test.ts`) is lowercase 64-char hex — + * the blake3 hash of the federation's consensus public key, serialized as + * 32 bytes of hex. A short/junk d-tag with `k=38173` slapped on is bot + * spam, not a federation, and must be rejected by the same Layer A + * firewall that catches 16-char Cashu bot spam. + * + * Keeping this sibling to `D_TAG_REGEX` so both Layer A shape gates live + * in one file — a reviewer touching one will see the other immediately. + */ +export const FEDIMINT_D_TAG_REGEX = /^[0-9a-f]{64}$/; + /** * True iff `d` is either a 64-char x-only secp256k1 pubkey or a 66-char * compressed secp256k1 pubkey, both lowercase hex. @@ -35,3 +49,10 @@ export const D_TAG_REGEX = /^([0-9a-f]{64}|0[23][0-9a-f]{64})$/; export function isValidCashuDTag(d: string): boolean { return D_TAG_REGEX.test(d); } + +/** + * True iff `d` is a lowercase 64-char hex Fedimint federation ID. + */ +export function isValidFedimintDTag(d: string): boolean { + return FEDIMINT_D_TAG_REGEX.test(d); +} diff --git a/packages/core/src/nip87/index.ts b/packages/core/src/nip87/index.ts index eb58113..b775c0d 100644 --- a/packages/core/src/nip87/index.ts +++ b/packages/core/src/nip87/index.ts @@ -1,4 +1,9 @@ -export { D_TAG_REGEX, isValidCashuDTag } from "./dtag"; +export { + D_TAG_REGEX, + FEDIMINT_D_TAG_REGEX, + isValidCashuDTag, + isValidFedimintDTag, +} from "./dtag"; export { parseMintAnnouncement, parseRecommendation } from "./parse"; export type { MintAnnouncement, diff --git a/packages/core/src/reviews/aggregate.test.ts b/packages/core/src/reviews/aggregate.test.ts new file mode 100644 index 0000000..f70b484 --- /dev/null +++ b/packages/core/src/reviews/aggregate.test.ts @@ -0,0 +1,199 @@ +/** + * Unit tests for the mintAggregate materialization — exercises the + * bayesian formula directly, then the recompute-from-reviews path. + */ +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB, type ReviewRow } from "../cache"; +import { bayesianScore, recomputeAggregateInTx } from "./aggregate"; + +const freshName = () => `test-aggregate-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +const D_A = "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77"; +const D_B = "0".repeat(63) + "1"; + +function makeReview(over: Partial & { d?: string } = {}): ReviewRow { + return { + pubkey: `pk${Math.random().toString(36).slice(2, 10)}${"0".repeat(50)}`, + kind: 38000, + d: over.d ?? D_A, + eventId: `${"0".repeat(58)}${Math.random().toString(36).slice(2, 8)}`, + createdAt: 1_700_000_000, + content: "", + rawTags: [], + rating: 5, + ...over, + }; +} + +/** + * Directly insert a review row without going through upsertReview's CAS + * transaction. Used by tests to populate the reviews table so we can + * exercise recompute in isolation. + */ +async function seedReviews(db: BitcoinmintsDB, rows: ReviewRow[]): Promise { + await db.reviews.bulkPut(rows); +} + +describe("bayesianScore formula", () => { + it("null avg → 0 (no rated reviews, no sort signal)", () => { + expect(bayesianScore(null, 0)).toBe(0); + expect(bayesianScore(null, 5)).toBe(0); + }); + + it("single 5★ review: 5 * log10(2) ≈ 1.505", () => { + const score = bayesianScore(5, 1); + expect(score).toBeCloseTo(5 * Math.log10(2), 6); + expect(score).toBeGreaterThan(1.5); + expect(score).toBeLessThan(1.51); + }); + + it("10 × 4★ reviews: 4 * log10(11) ≈ 4.166", () => { + const score = bayesianScore(4, 10); + expect(score).toBeCloseTo(4 * Math.log10(11), 6); + expect(score).toBeGreaterThan(4.15); + expect(score).toBeLessThan(4.17); + }); + + it("damping rule: single 5★ sorts BELOW 10 × 4★ (bayesian dominates)", () => { + const single5 = bayesianScore(5, 1); + const tenFour = bayesianScore(4, 10); + expect(single5).toBeLessThan(tenFour); + }); + + it("edge: log10(1)=0 when ratedCount=0 — even with avg set, degenerate case returns 0", () => { + // Shouldn't happen in practice, but guard against divide-by-zero glitches. + expect(bayesianScore(5, 0)).toBe(0); + }); +}); + +describe("recomputeAggregateInTx — materialization", () => { + it("0 reviews for d → writes reviewCount=0, ratedCount=0, avgRating=null, bayesianScore=0", async () => { + const db = await freshDB(); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A, () => 12345), + ); + expect(row.reviewCount).toBe(0); + expect(row.ratedCount).toBe(0); + expect(row.avgRating).toBeNull(); + expect(row.bayesianScore).toBe(0); + expect(row.updatedAt).toBe(12345); + + const persisted = await db.mintAggregate.get(D_A); + expect(persisted).toEqual(row); + }); + + it("1 rated review → avg = that rating; bayesian = rating * log10(2)", async () => { + const db = await freshDB(); + await seedReviews(db, [makeReview({ rating: 5, pubkey: "pk-1" })]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(1); + expect(row.ratedCount).toBe(1); + expect(row.avgRating).toBe(5); + expect(row.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + }); + + it("multiple rated reviews: correct mean and count", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: 5, pubkey: "pk-1" }), + makeReview({ rating: 3, pubkey: "pk-2" }), + makeReview({ rating: 4, pubkey: "pk-3" }), + makeReview({ rating: 5, pubkey: "pk-4" }), + ]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(4); + expect(row.ratedCount).toBe(4); + expect(row.avgRating).toBe((5 + 3 + 4 + 5) / 4); + expect(row.bayesianScore).toBeCloseTo(row.avgRating! * Math.log10(5), 6); + }); + + it("mixed rated + unrated reviews: reviewCount counts all, ratedCount counts rated, avg only from rated", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: 5, pubkey: "pk-1" }), + makeReview({ rating: null, pubkey: "pk-2" }), + makeReview({ rating: 3, pubkey: "pk-3" }), + makeReview({ rating: null, pubkey: "pk-4" }), + makeReview({ rating: null, pubkey: "pk-5" }), + ]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(5); + expect(row.ratedCount).toBe(2); + expect(row.avgRating).toBe((5 + 3) / 2); // 4 + expect(row.bayesianScore).toBeCloseTo(4 * Math.log10(3), 6); + }); + + it("all unrated reviews: avg stays null, bayesian is 0, but reviewCount reflects them", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: null, pubkey: "pk-1" }), + makeReview({ rating: null, pubkey: "pk-2" }), + ]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(2); + expect(row.ratedCount).toBe(0); + expect(row.avgRating).toBeNull(); + expect(row.bayesianScore).toBe(0); + }); + + it("scoping: recompute for d=A ignores reviews keyed on d=B", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: 5, pubkey: "pk-1", d: D_A }), + makeReview({ rating: 1, pubkey: "pk-2", d: D_B }), + makeReview({ rating: 1, pubkey: "pk-3", d: D_B }), + ]); + const rowA = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(rowA.reviewCount).toBe(1); + expect(rowA.avgRating).toBe(5); + + const rowB = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_B), + ); + expect(rowB.reviewCount).toBe(2); + expect(rowB.avgRating).toBe(1); + }); + + it("idempotency: recomputing twice with no changes produces the same row payload (except updatedAt)", async () => { + const db = await freshDB(); + await seedReviews(db, [makeReview({ rating: 4, pubkey: "pk-1" })]); + const first = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A, () => 1000), + ); + const second = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A, () => 2000), + ); + expect(first.reviewCount).toBe(second.reviewCount); + expect(first.ratedCount).toBe(second.ratedCount); + expect(first.avgRating).toBe(second.avgRating); + expect(first.bayesianScore).toBe(second.bayesianScore); + expect(second.updatedAt).toBeGreaterThan(first.updatedAt); + }); +}); diff --git a/packages/core/src/reviews/aggregate.ts b/packages/core/src/reviews/aggregate.ts new file mode 100644 index 0000000..5f47422 --- /dev/null +++ b/packages/core/src/reviews/aggregate.ts @@ -0,0 +1,90 @@ +/** + * `mintAggregate` materialization — derived from all `reviews` rows with a + * given `d` tag. Runs inside the same Dexie transaction as the triggering + * review upsert so the two stores can't go out of sync across a crash. + * + * See data-model-v1.md §13: we maintain a cached aggregate rather than a + * live Dexie query because ranking needs a per-filter-change sort and + * groupby-by-d is expensive in the browser. Recompute is cheap — scoped + * to a single `d`, per-mint review count is bounded (~500 worst case), + * entirely in memory once Dexie has surfaced the review rows. + * + * Bayesian score (damping formula, §13): + * + * bayesianScore = avgRating * log10(ratedCount + 1) when avgRating != null + * bayesianScore = 0 otherwise + * + * The damping makes a single 5★ review sort below 4★×10: + * single 5★: 5 * log10(2) ≈ 1.505 + * 4★ × 10: 4 * log10(11) ≈ 4.166 + * + * Writing out `0` for the un-rated case rather than `null` lets the index + * on `bayesianScore` be usable as a single `.orderBy('bayesianScore')` + * range — if we wrote `null`, Dexie would emit those rows at the start of + * the range under its normal sort order and we'd have to filter them out. + * Zero-scored mints sort below any positive-scored mint which is the + * correct UX (and matches the no-reviews baseline). + */ +import type { BitcoinmintsDB, MintAggregateRow, ReviewRow } from "../cache"; + +/** + * Compute the Bayesian sort key from an average rating and the count of + * reviews that contributed to it. Exposed for test assertions. + */ +export function bayesianScore(avgRating: number | null, ratedCount: number): number { + if (avgRating === null) return 0; + // log10(0 + 1) = 0 — guard against a degenerate avgRating-with-zero-count + // (shouldn't happen in practice but clamps to a safe value if it ever does). + return avgRating * Math.log10(ratedCount + 1); +} + +/** + * Recompute the aggregate for mint `d` from its current `reviews` rows and + * upsert it. Call INSIDE an open Dexie `rw` transaction that includes both + * `db.reviews` and `db.mintAggregate` — Dexie auto-binds this work to the + * outer transaction so a crash between the review write and the aggregate + * write is impossible. + * + * Behaviour on zero reviews: still writes a row with reviewCount=0, + * ratedCount=0, avgRating=null, bayesianScore=0. This matters for the + * "review deletion" path — the aggregate doesn't get orphaned with a + * stale average when the last review for a mint is replaced or removed. + * + * CAS policy: uses `put` (unconditional write) rather than the + * upsert.ts monotonic-timestamp gate. Inside the transaction we already + * hold the latest review state — the previously-written aggregate is by + * definition older (or equal in the zero-ops degenerate case, which is + * still a safe overwrite). Same-ms ties are fine because all row fields + * are deterministically derived from the review set. + * + * `now` is injectable for deterministic tests. Defaults to `Date.now`. + */ +export async function recomputeAggregateInTx( + db: BitcoinmintsDB, + d: string, + now: () => number = Date.now, +): Promise { + const reviews: ReviewRow[] = await db.reviews.where("d").equals(d).toArray(); + + const reviewCount = reviews.length; + let ratedCount = 0; + let sum = 0; + for (const r of reviews) { + if (r.rating !== null) { + ratedCount += 1; + sum += r.rating; + } + } + const avgRating = ratedCount > 0 ? sum / ratedCount : null; + const row: MintAggregateRow = { + d, + reviewCount, + ratedCount, + avgRating, + bayesianScore: bayesianScore(avgRating, ratedCount), + updatedAt: now(), + }; + + await db.mintAggregate.put(row); + return row; +} diff --git a/packages/core/src/reviews/corpus.test.ts b/packages/core/src/reviews/corpus.test.ts new file mode 100644 index 0000000..997be47 --- /dev/null +++ b/packages/core/src/reviews/corpus.test.ts @@ -0,0 +1,162 @@ +/** + * Corpus smoke test for the review pipeline. Uses a handful of real + * kind:38000 events lifted from + * /srv/forge/projects/bitcoinmints/audit/relay-data/recs-38000.json so the + * parse → upsert → aggregate → rank chain is exercised against the actual + * shapes relays emit — not just hand-written tests. + * + * Scope kept small (3 mints, ~8 events) so this file stays committable + * without pulling a 32k-line JSON corpus into the package. + */ +import type { Event as NostrEvent } from "nostr-tools/core"; +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB } from "../cache"; +import { parseReview } from "./parse"; +import { rankMints } from "./rank"; +import { upsertReviewWithAggregate } from "./upsert"; + +const freshName = () => `test-review-corpus-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +/** + * Real kind:38000 events taken verbatim from the audit's relay dump. IDs, + * signatures, and timestamps preserved. The d-tags are real Fedimint + * federation IDs and Cashu mint pubkeys seen in the wild. + * + * Note: Fedimint federation IDs aren't constrained by the Layer A d-regex, + * but this corpus uses 64-char hex which passes the regex either way. + */ +const REAL_EVENTS: NostrEvent[] = [ + // Fedimint 1 — 4 separate reviewers, all 5★. + { + content: "[5/5]", + created_at: 1776360005, + id: "50d2e3d560f5a312965ff977ed7755246de4dd64c03fdd5adf99caa587cb53d0", + kind: 38000, + pubkey: "1944cd868d0b996f58944b5748852d676e84f32c50cb224f65432ddf55045666", + sig: "033a48b4844452784ffeecd9c379c1814f5c3386414a7f7f1e255ea11bfdd4686fc8fd287582971f9fa541fd64113119e7c882d9d6c3f2a3eecd90468acb5d1f", + tags: [ + ["d", "27e032c0f1ff18213c3a94c2426f20a4000479b318712e93a7e56286fed00a2f"], + ["k", "38173"], + ["rating", "5"], + ], + }, + // Fedimint 2 — 3 reviewers, mix of ratings. + { + content: "[5/5]", + created_at: 1776351305, + id: "42c89639b9471d2f8aa9475731dde0873a3bb8d4b2dfa72d5162cd146d50fdd5", + kind: 38000, + pubkey: "3c00865afdb1dd2f8b68a9f802d0bbce2e6e9ebdb03f1a4686494a67e999b0a1", + sig: "d2e3306255989bc52fb1ccfc25a282a7e79beb8a5bc7fd80e79ce16f5621093226b4642c7f8e8510860595612547fd7999d3b96cef80ea45a3531e6f9c426d71", + tags: [ + ["d", "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"], + ["k", "38173"], + ["rating", "5"], + ], + }, + { + content: "[5/5]", + created_at: 1776298850, + id: "f22e2e76dca0d577a7695e45d5d0abd1ed6f5d6bc0e233c9e348c1b9339f8e6d", + kind: 38000, + pubkey: "82f1ae3bdd172c0ce69553165e8237e2fdf7fa32832707de130a274fcfaf1b10", + sig: "549aad601d19a699eed21ae9ce9f9f35b855d7e5f898c933636c00f2397d34cddd3a80c7f89eb8efc5b867dfdb88c18dab0573022be1588d4b44f6c456bc71a2", + tags: [ + ["d", "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"], + ["k", "38173"], + ["rating", "5"], + ], + }, + { + // Manufactured 2★ on the same mint to ensure the aggregate mean is + // actually computed (not a constant-5 test). + content: "[2/5]", + created_at: 1776298900, + id: "f22e2e76dca0d577a7695e45d5d0abd1ed6f5d6bc0e348c1b9339f8e6cffffff", + kind: 38000, + pubkey: "92f1ae3bdd172c0ce69553165e8237e2fdf7fa32832707de130a274fcfaf1b11", + sig: "00", + tags: [ + ["d", "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"], + ["k", "38173"], + ["rating", "2", "5"], + ], + }, + // Fedimint 3 — single review. + { + content: "[5/5]", + created_at: 1776291469, + id: "e810bc759a3dac39ebfdf652df932d79d031712499618401fb8e01ed412c88c3", + kind: 38000, + pubkey: "ddc17385fdd1cc2df1e6f3a248c5a14ccaa9fcab17281d057e58965423de4617", + sig: "a16145e091e36629c720ab791a34a6ece6e2b6ad94f8a61361fef06c96d3ef2f89c6b6b03e882d1281de71e05d0b4f89d068e29d00c6c8736c90a7c0a2970afd", + tags: [ + ["d", "3beb71872cea0b97082ff1f6450e722903bc7ac09e5b4dc33105999f2901b4eb"], + ["k", "38173"], + ["rating", "5"], + ], + }, +]; + +describe("reviews: real corpus pipeline", () => { + it("parses every event, materializes aggregates, ranks by damped bayesian", async () => { + const db = await freshDB(); + + // Parse every real event and push through the upsert+aggregate path. + const results: string[] = []; + for (const e of REAL_EVENTS) { + const row = parseReview(e); + expect(row).not.toBeNull(); + if (!row) continue; + const r = await upsertReviewWithAggregate(db, row); + results.push(r); + } + + // Every real event inserted (no duplicates, no stale). + expect(results).toEqual(["inserted", "inserted", "inserted", "inserted", "inserted"]); + + // 5 rows total. + expect(await db.reviews.count()).toBe(5); + + // Three aggregates — one per distinct d. + expect(await db.mintAggregate.count()).toBe(3); + + // Ranking order check. Federation with 3 rated reviews ((5+5+2)/3 ≈ 4.00 + // * log10(4)=0.602 → 2.408) outranks federation 1 with a single 5★ + // (5 * log10(2)=0.301 → 1.505) — even though fed1 has a higher + // average. The single-5★ federation 3 is ranked equal to federation 1 + // (also 1.505) but Dexie breaks the tie deterministically on primary + // key (the d string); either may come first — what matters is both + // sort BELOW the 3-review federation. + const ranked = await rankMints(db); + expect(ranked).toHaveLength(3); + expect(ranked[0]?.d).toBe("718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"); + expect(ranked[0]?.reviewCount).toBe(3); + expect(ranked[0]?.ratedCount).toBe(3); + expect(ranked[0]?.avgRating).toBeCloseTo((5 + 5 + 2) / 3, 6); + expect(ranked[0]?.bayesianScore).toBeCloseTo((12 / 3) * Math.log10(4), 6); + + expect(ranked[1]?.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + expect(ranked[2]?.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + // Sort invariant: scores strictly non-increasing. + expect(ranked[0]!.bayesianScore).toBeGreaterThan(ranked[1]!.bayesianScore); + expect(ranked[1]!.bayesianScore).toBeCloseTo(ranked[2]!.bayesianScore, 6); + }); +}); diff --git a/packages/core/src/reviews/index.ts b/packages/core/src/reviews/index.ts new file mode 100644 index 0000000..e2c307c --- /dev/null +++ b/packages/core/src/reviews/index.ts @@ -0,0 +1,4 @@ +export { bayesianScore, recomputeAggregateInTx } from "./aggregate"; +export { parseReview } from "./parse"; +export { rankMints } from "./rank"; +export { upsertReviewWithAggregate } from "./upsert"; diff --git a/packages/core/src/reviews/parse.test.ts b/packages/core/src/reviews/parse.test.ts new file mode 100644 index 0000000..f287aaf --- /dev/null +++ b/packages/core/src/reviews/parse.test.ts @@ -0,0 +1,424 @@ +/** + * Unit tests for the kind:38000 review parser — each rating precedence + * rule gets its own dedicated assertion so a regression in one format + * can't be masked by a fallback. + */ +import type { Event as NostrEvent } from "nostr-tools/core"; +import { describe, expect, it } from "vitest"; +import { parseReview } from "./parse"; + +/** Realistic 64-char x-only Cashu d-tag. */ +const D_VALID = "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77"; +/** 16-char legacy / bot-spam d-tag. */ +const D_LEGACY_16 = "psvef0yh2zk24tt7"; + +function makeEvent(over: Partial & { tags?: string[][] } = {}): NostrEvent { + return { + id: "1".repeat(64), + pubkey: "2".repeat(64), + created_at: 1_700_000_000, + kind: 38000, + tags: [["d", D_VALID]], + content: "", + sig: "", + ...over, + } as NostrEvent; +} + +describe("parseReview — basic structure", () => { + it("returns null for non-38000 kinds", () => { + const e = makeEvent({ kind: 1 as unknown as 38000 }); + expect(parseReview(e)).toBeNull(); + }); + + it("returns null when the d tag is missing", () => { + const e = makeEvent({ tags: [["k", "38172"]] }); + expect(parseReview(e)).toBeNull(); + }); + + it("returns null when the d tag is present but empty", () => { + const e = makeEvent({ tags: [["d", ""]] }); + expect(parseReview(e)).toBeNull(); + }); + + it("preserves eventId, pubkey, d, createdAt, content, rawTags verbatim", () => { + const e = makeEvent({ + id: "a".repeat(64), + pubkey: "b".repeat(64), + created_at: 1_800_000_000, + content: "great mint", + tags: [ + ["d", D_VALID], + ["k", "38172"], + ["rating", "4", "5"], + ], + }); + const row = parseReview(e); + expect(row).not.toBeNull(); + if (!row) return; + expect(row.eventId).toBe(e.id); + expect(row.pubkey).toBe(e.pubkey); + expect(row.d).toBe(D_VALID); + expect(row.createdAt).toBe(e.created_at); + expect(row.content).toBe("great mint"); + expect(row.rawTags).toEqual(e.tags); + expect(row.kind).toBe(38000); + }); +}); + +describe("parseReview — rating formats (precedence)", () => { + it("Format 1: structured tag ['rating','N','5'] wins — integer 1..5", () => { + for (const n of [1, 2, 3, 4, 5]) { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", String(n), "5"], + ], + }), + ); + expect(row?.rating).toBe(n); + } + }); + + it("Format 1: out-of-range N (0 or 6) falls through", () => { + const below = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "0", "5"], + ], + }), + ); + // No fallback content, so rating is null. + expect(below?.rating).toBeNull(); + + const above = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "6", "5"], + ], + }), + ); + expect(above?.rating).toBeNull(); + }); + + it("Format 2: legacy ['rating','N'] (no denominator) — integer 1..5", () => { + for (const n of [1, 3, 5]) { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", String(n)], + ], + }), + ); + expect(row?.rating).toBe(n); + } + }); + + it("Format 2: legacy ['rating','N'] with out-of-range N falls through", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "7"], + ], + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 1 wins over Format 2 when both are present on the same event", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + // Format 2 appears first… + ["rating", "2"], + // …but Format 1 wins even though it's second. + ["rating", "5", "5"], + ], + }), + ); + expect(row?.rating).toBe(5); + }); + + it("Format 3a: content `[N/5]` anchored at start", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "[4/5] decent mint", + }), + ); + expect(row?.rating).toBe(4); + }); + + it("Format 3a: content `N/5` without brackets", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "3/5 avg", + }), + ); + expect(row?.rating).toBe(3); + }); + + it("Format 3a: tag takes precedence over content even when both present", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "2", "5"], + ], + content: "[5/5] content says five", + }), + ); + expect(row?.rating).toBe(2); + }); + + it("Format 3b: content `N/10` divides and rounds to nearest 1..5", () => { + // 10/10 → 5, 8/10 → 4, 6/10 → 3, 4/10 → 2, 2/10 → 1. + const cases: Array<[string, number]> = [ + ["10/10", 5], + ["8/10 nice", 4], + ["7/10", 4], // round-to-nearest: 3.5 → 4 + ["6/10", 3], + ["5/10", 3], // round-to-nearest: 2.5 → 3 (banker's / half-up; Math.round uses half-away-from-zero) + ["4/10", 2], + ["3/10", 2], // 1.5 → 2 + ["2/10", 1], + ]; + for (const [content, expected] of cases) { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content, + }), + ); + expect(row?.rating).toBe(expected); + } + }); + + it("Format 3b: `0/10` is treated as no-rating (doesn't fabricate a 1★)", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "0/10 total trash", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 3: /5 wins over /10 when both are present (5 is tried first)", () => { + // Unlikely in practice but the ordering should be deterministic. + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "4/5 but also 8/10", + }), + ); + // Since 5-regex matches at index 0, it wins. + expect(row?.rating).toBe(4); + }); + + it("Format 4: leading emoji run of 1..5 stars (⭐)", () => { + const cases: Array<[string, number]> = [ + ["⭐ one star", 1], + ["⭐⭐ two", 2], + ["⭐⭐⭐ three", 3], + ["⭐⭐⭐⭐ four", 4], + ["⭐⭐⭐⭐⭐ five", 5], + ]; + for (const [content, expected] of cases) { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content, + }), + ); + expect(row?.rating).toBe(expected); + } + }); + + it("Format 4: 🌟 glyph works too (both are accepted)", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "🌟🌟🌟 three stars", + }), + ); + expect(row?.rating).toBe(3); + }); + + it("Format 4: a run of 6+ emojis is out of range → null", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "⭐⭐⭐⭐⭐⭐", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 4: emoji not at start of content does not match", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "great mint ⭐⭐⭐⭐⭐", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 3 numeric wins over Format 4 emoji", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + // `[4/5]` matches the numeric regex; the ⭐⭐⭐⭐⭐ after would + // be 5, but we prefer the structured numeric. + content: "[4/5] ⭐⭐⭐⭐⭐", + }), + ); + expect(row?.rating).toBe(4); + }); +}); + +describe("parseReview — malformed rating tag forms", () => { + it("malformed rating tag forms fall through to null (no content fallback)", () => { + // Each of these shapes is "structurally a rating tag" but the value + // payload is unusable — either not a number, empty string, missing, + // or `null`-as-string from a buggy emitter. None should parse to a + // rating, and without a content rating signal all should land at null. + const cases: string[][] = [ + ["rating", "foo", "5"], + ["rating", ""], + ["rating"], + ["rating", "", "5"], + // `null` coerced to a string via a buggy JSON emitter. The parser + // guards `typeof t[1] !== "string"` which catches the raw-null + // form; including it defensively in case a relay rewrites null + // into the literal string "null". + ["rating", null as unknown as string, "5"], + ]; + for (const tag of cases) { + const row = parseReview(makeEvent({ tags: [["d", D_VALID], tag as string[]] })); + expect(row).not.toBeNull(); + expect(row?.rating).toBeNull(); + } + }); +}); + +describe("parseReview — null fallback", () => { + it("returns rating: null when no rating tag and no content signal", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "just a plain review, no score", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("returns rating: null for empty content and no tags", () => { + const row = parseReview(makeEvent({ content: "" })); + expect(row?.rating).toBeNull(); + }); + + it("returns rating: null when content starts with a non-1..5 numeric", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "0/5 terrible", + }), + ); + // N=0 fails the 1..5 bounds check and no other format fires. + expect(row?.rating).toBeNull(); + }); +}); + +describe("parseReview — k tag normalization", () => { + it("k='38172' narrows to number 38172", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["k", "38172"], + ], + }), + ); + expect(row?.k).toBe(38172); + }); + + it("k='38173' narrows to number 38173", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["k", "38173"], + ], + }), + ); + expect(row?.k).toBe(38173); + }); + + it("k absent → row.k is undefined (field omitted)", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + }), + ); + expect(row?.k).toBeUndefined(); + }); + + it("k is something unexpected ('1985') → row.k is undefined", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["k", "1985"], + ], + }), + ); + expect(row?.k).toBeUndefined(); + }); +}); + +describe("parseReview — u tag collection (display helper)", () => { + it("collects all u tag values into an array", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["u", "https://mint.a.example", "cashu"], + ["u", "https://mint.a.example/v1", "cashu"], + ], + }), + ); + expect(row?.u).toEqual(["https://mint.a.example", "https://mint.a.example/v1"]); + }); + + it("omits u entirely when the event has no u tags", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + }), + ); + expect(row?.u).toBeUndefined(); + }); +}); + +describe("parseReview — parser is lenient on Layer A", () => { + it("16-char legacy d-tag still parses (gate is at upsert, not parse)", () => { + // The parser preserves whatever is there — bot-spam filtering is the + // cache layer's job. This keeps parser usable by raw-event log views. + const row = parseReview( + makeEvent({ + tags: [["d", D_LEGACY_16]], + }), + ); + expect(row).not.toBeNull(); + expect(row?.d).toBe(D_LEGACY_16); + }); +}); diff --git a/packages/core/src/reviews/parse.ts b/packages/core/src/reviews/parse.ts new file mode 100644 index 0000000..919ebf9 --- /dev/null +++ b/packages/core/src/reviews/parse.ts @@ -0,0 +1,209 @@ +/** + * Kind:38000 mint-recommendation / review parser. + * + * Emits `ReviewRow` directly — this is the primary parse path used by the + * scheduler's ingest pipeline and by the `reviews/upsert.ts` wrapper. The + * related `nip87/parseRecommendation` returns `MintRecommendation` (a + * parse-layer shape with `raw` preserved); this module produces the + * cache-layer shape with tags indexed for downstream Dexie writes. + * + * Rating extraction follows data-model-v1.md §4 + rating-tag-research.md §6 + * in strict precedence order: + * + * 1. `["rating", "", "5"]` — canonical v1 shape, integer 1..5. + * 2. `["rating", ""]` — legacy recall-trainer emitter, integer 1..5. + * 3. Content numeric: regex "^(N)/5" or "^(N)/10" anchored at start + * (see CONTENT_FIVE_REGEX and CONTENT_TEN_REGEX below). For /10 we + * divide by 2 and round to the nearest integer 1..5. + * 4. Content emoji: leading 1..5 run of star glyphs (see + * CONTENT_EMOJI_REGEX) — count the glyphs. + * 5. Otherwise null (no rating present). + * + * Rule of precedence: a tag wins over content even if both are present. + * This keeps v2-aware clients interop-free from cashu.me / bitcoinmints + * legacy that embed `[N/5]` in content alongside a structured tag. + * + * Parse rejects (returns `null`): + * - `event.kind !== 38000` + * - missing or non-string `d` tag + * + * Parse does NOT reject on Layer A d-shape — the upsert gate handles that + * so the parser stays pure and callable from tests, pagination dedup, etc. + * Callers who want the bot-spam firewall use `upsertReviewWithAggregate`. + */ +import type { Event as NostrEvent } from "nostr-tools/core"; +import type { ReviewRow } from "../cache"; + +/** Strict 1..5 integer bounds. Partial reviews (e.g. "3.5") round to nearest int. */ +const MIN_RATING = 1; +const MAX_RATING = 5; + +/** + * Content rating: `N/5` anchored at start. Tolerates leading `[` and + * surrounding whitespace. Captures N. + */ +const CONTENT_FIVE_REGEX = /^\s*\[?\s*(\d+)\s*\/\s*5\b/; +/** + * Content rating: `N/10` anchored at start — for clients that use a + * 10-point scale. Divide by 2 to normalize into 1..5. + */ +const CONTENT_TEN_REGEX = /^\s*\[?\s*(\d+)\s*\/\s*10\b/; +/** + * Leading run of star emoji, 1..5 count. Matches `⭐` (U+2B50) and `🌟` + * (U+1F31F) interchangeably — some clients render one, some the other, + * some use the variation-selector form. Captures the whole run so we can + * count code points (via the `u` flag). + */ +const CONTENT_EMOJI_REGEX = /^\s*((?:⭐|🌟)+)/u; + +/** Pull the first value of a named tag (or undefined). */ +function firstTagValue(tags: string[][], name: string): string | undefined { + for (const t of tags) { + if (t[0] === name && typeof t[1] === "string") return t[1]; + } + return undefined; +} + +/** Collect all values of a named tag. */ +function allTagValues(tags: string[][], name: string): string[] { + const out: string[] = []; + for (const t of tags) { + if (t[0] === name && typeof t[1] === "string") out.push(t[1]); + } + return out; +} + +/** Parse a numeric string to int 1..5, or undefined if out of range. */ +function toRating(raw: string): number | undefined { + const n = Number.parseInt(raw, 10); + if (!Number.isFinite(n)) return undefined; + if (n < MIN_RATING || n > MAX_RATING) return undefined; + return n; +} + +/** + * Extract rating from tags. Format 1 (`["rating","N","5"]`) wins over + * format 2 (`["rating","N"]`) — scan the entire tag list for format 1 + * first, then fall back to format 2. A single event with both shapes + * (which shouldn't happen, but is technically allowed by the event + * structure) always prefers the explicit-max form. + */ +function parseRatingFromTags(tags: string[][]): number | undefined { + // Format 1: ["rating", "", "5"] — canonical v1 shape. + for (const t of tags) { + if (t[0] !== "rating") continue; + if (typeof t[1] !== "string") continue; + if (t[2] !== "5") continue; + const r = toRating(t[1]); + if (r !== undefined) return r; + } + // Format 2: ["rating", ""] — legacy, no denominator. + for (const t of tags) { + if (t[0] !== "rating") continue; + if (typeof t[1] !== "string") continue; + if (t[2] !== undefined) continue; + const r = toRating(t[1]); + if (r !== undefined) return r; + } + return undefined; +} + +/** + * Count leading star emoji. `⭐` is a single BMP code point (U+2B50); `🌟` + * is a surrogate pair (U+1F31F). Using `[...]` iterates code points in + * modern JS so mixed runs count correctly. + */ +function countLeadingStars(match: string): number { + const codepoints = [...match]; + return codepoints.length; +} + +/** + * Extract rating from content using the 3rd and 4th precedence rules. + * Format 3 (N/5 and N/10) wins over format 4 (emoji) — a content starting + * with `[4/5] ⭐⭐⭐⭐⭐` parses as 4, not 5. + * + * Precedence is explicit, not emergent: when an N/5 or N/10 prefix matches + * the content but the parsed number is out of range (e.g. `0/10`, `7/5`), + * we return `undefined` rather than falling through to the emoji format. + * The reasoning: the author signalled "this review uses the numeric + * format" by leading with it — silently reading emoji that might follow + * would misrepresent their intent and reward malformed input. Out-of-range + * numeric prefixes collapse to "no rating" via the parseReview `?? null` + * fallback. + */ +function parseRatingFromContent(content: string): number | undefined { + // Format 3a: N/5 anchored at start. + // precedence: this format consumed → return (even when out of range) + const fiveMatch = content.match(CONTENT_FIVE_REGEX); + if (fiveMatch?.[1]) { + const r = toRating(fiveMatch[1]); + if (r !== undefined) return r; + return undefined; + } + // Format 3b: N/10 anchored at start — divide by 2, round to nearest, + // clamp into 1..5. We round-to-nearest (not floor) so `5/10` → 3 and + // `7/10` → 4 rather than both flooring to 3. An `N` outside 0..10 is + // treated as missing. + // precedence: this format consumed → return (even when out of range) + const tenMatch = content.match(CONTENT_TEN_REGEX); + if (tenMatch?.[1]) { + const n = Number.parseInt(tenMatch[1], 10); + if (Number.isFinite(n) && n >= 0 && n <= 10) { + const scaled = Math.round(n / 2); + // 0/10 → 0 which is below MIN_RATING; treat as no-rating rather + // than lying about a 1-star review. + if (scaled >= MIN_RATING && scaled <= MAX_RATING) return scaled; + } + return undefined; + } + // Format 4: leading 1..5 emoji run. + const emojiMatch = content.match(CONTENT_EMOJI_REGEX); + if (emojiMatch?.[1]) { + const n = countLeadingStars(emojiMatch[1]); + if (n >= MIN_RATING && n <= MAX_RATING) return n; + } + return undefined; +} + +/** Resolve the `k` tag into a recognized pointer-kind, or undefined. */ +function parsePointerKind(tags: string[][]): 38172 | 38173 | undefined { + const kStr = firstTagValue(tags, "k"); + if (kStr === "38172") return 38172; + if (kStr === "38173") return 38173; + return undefined; +} + +/** + * Parse a kind:38000 event into a ReviewRow. Returns `null` when the event + * is the wrong kind or is missing the required `d` tag. Layer A d-tag + * shape validation is deferred to the upsert layer. + */ +export function parseReview(event: NostrEvent): ReviewRow | null { + if (event.kind !== 38000) return null; + + const d = firstTagValue(event.tags, "d"); + if (d === undefined || d === "") return null; + + const content = typeof event.content === "string" ? event.content : ""; + const rating = parseRatingFromTags(event.tags) ?? parseRatingFromContent(content) ?? null; + + const row: ReviewRow = { + pubkey: event.pubkey, + kind: 38000, + d, + eventId: event.id, + createdAt: event.created_at, + content, + rawTags: event.tags, + rating, + }; + + const k = parsePointerKind(event.tags); + if (k !== undefined) row.k = k; + + const u = allTagValues(event.tags, "u"); + if (u.length > 0) row.u = u; + + return row; +} diff --git a/packages/core/src/reviews/rank.test.ts b/packages/core/src/reviews/rank.test.ts new file mode 100644 index 0000000..192b908 --- /dev/null +++ b/packages/core/src/reviews/rank.test.ts @@ -0,0 +1,170 @@ +/** + * rankMints — end-to-end test that walks the full review-ingest pipeline + * and asserts the ranked output matches the Bayesian-damped order + * (data-model-v1.md §13). + */ +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB, type ReviewRow } from "../cache"; +import { rankMints } from "./rank"; +import { upsertReviewWithAggregate } from "./upsert"; + +const freshName = () => `test-rank-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +/** Helper: 64-char valid Cashu d-tag deterministically generated from an index. */ +function dForIndex(n: number): string { + const hex = n.toString(16).padStart(64, "0"); + return hex; +} + +function makeReview(over: Partial & { pubkey: string; d: string }): ReviewRow { + return { + kind: 38000, + eventId: `${"0".repeat(58)}${over.pubkey.slice(-6)}`, + createdAt: 1_700_000_000, + content: "", + rawTags: [], + rating: 5, + ...over, + }; +} + +/** + * Seed a mint's aggregate by running N reviews of the given rating + * through the full upsert-with-aggregate pipeline. Returns the + * materialized aggregate for assertion convenience. + */ +async function seedMint( + db: BitcoinmintsDB, + d: string, + rating: number | null, + count: number, +): Promise { + for (let i = 0; i < count; i++) { + await upsertReviewWithAggregate( + db, + makeReview({ + // Unique pubkey per review so each is a separate (pubkey, d) + // replaceable-event key. + pubkey: `pk${i.toString(16).padStart(62, "0")}`, + d, + rating, + }), + ); + } +} + +describe("rankMints — sort order", () => { + it("empty aggregate table → empty result, no throw", async () => { + const db = await freshDB(); + const ranked = await rankMints(db); + expect(ranked).toEqual([]); + }); + + it("orders strictly by bayesianScore descending", async () => { + const db = await freshDB(); + const dHigh = dForIndex(1); + const dMid = dForIndex(2); + const dLow = dForIndex(3); + // High: 5★ × 10 → 5 * log10(11) ≈ 5.21 + await seedMint(db, dHigh, 5, 10); + // Mid: 4★ × 3 → 4 * log10(4) ≈ 2.41 + await seedMint(db, dMid, 4, 3); + // Low: 5★ × 1 → 5 * log10(2) ≈ 1.505 + await seedMint(db, dLow, 5, 1); + + const ranked = await rankMints(db); + expect(ranked.map((r) => r.d)).toEqual([dHigh, dMid, dLow]); + }); + + it("single 5★ review sorts BELOW 4★×10 — the formula damps low-count mints (§13)", async () => { + const db = await freshDB(); + const dSingle = dForIndex(10); + const dTen = dForIndex(11); + await seedMint(db, dSingle, 5, 1); + await seedMint(db, dTen, 4, 10); + + const ranked = await rankMints(db); + expect(ranked[0]?.d).toBe(dTen); + expect(ranked[1]?.d).toBe(dSingle); + // Sanity: confirm the numeric scores match the §13 table (5.21 vs 1.50). + expect(ranked[0]?.bayesianScore).toBeGreaterThan(ranked[1]!.bayesianScore); + }); + + it("unrated reviews (bayesianScore=0) sort at the bottom", async () => { + const db = await freshDB(); + const dRated = dForIndex(20); + const dUnrated = dForIndex(21); + await seedMint(db, dRated, 3, 2); // 3 * log10(3) ≈ 1.43 + await seedMint(db, dUnrated, null, 10); // bayesianScore=0 + + const ranked = await rankMints(db); + expect(ranked[0]?.d).toBe(dRated); + expect(ranked[1]?.d).toBe(dUnrated); + expect(ranked[1]?.bayesianScore).toBe(0); + }); +}); + +describe("rankMints — limit", () => { + it("defaults to top 50 — returns all 3 when < 50 mints are present", async () => { + const db = await freshDB(); + await seedMint(db, dForIndex(30), 5, 1); + await seedMint(db, dForIndex(31), 4, 1); + await seedMint(db, dForIndex(32), 3, 1); + const ranked = await rankMints(db); + expect(ranked).toHaveLength(3); + }); + + it("caps at the explicit limit", async () => { + const db = await freshDB(); + for (let i = 40; i < 50; i++) { + await seedMint(db, dForIndex(i), 5, i - 39); + } + const top3 = await rankMints(db, 3); + expect(top3).toHaveLength(3); + // Sanity: scores descending. + expect(top3[0]!.bayesianScore).toBeGreaterThan(top3[1]!.bayesianScore); + expect(top3[1]!.bayesianScore).toBeGreaterThan(top3[2]!.bayesianScore); + }); +}); + +describe("rankMints — limit bounds", () => { + it("limit=0 returns an empty array, no throw", async () => { + const db = await freshDB(); + await seedMint(db, dForIndex(60), 5, 3); + await seedMint(db, dForIndex(61), 4, 2); + expect(await rankMints(db, 0)).toEqual([]); + }); + + it("limit=Infinity returns every mint in score-descending order", async () => { + // Dexie's .limit() accepts Number.POSITIVE_INFINITY and clamps to the + // full result set (verified empirically in fake-indexeddb via this + // test). If this assertion ever breaks, swap in a high finite limit. + const db = await freshDB(); + await seedMint(db, dForIndex(70), 5, 10); // 5 * log10(11) ≈ 5.21 + await seedMint(db, dForIndex(71), 4, 3); // 4 * log10(4) ≈ 2.41 + await seedMint(db, dForIndex(72), 5, 1); // 5 * log10(2) ≈ 1.505 + const ranked = await rankMints(db, Number.POSITIVE_INFINITY); + expect(ranked).toHaveLength(3); + expect(ranked.map((r) => r.d)).toEqual([dForIndex(70), dForIndex(71), dForIndex(72)]); + // Strictly non-increasing. + expect(ranked[0]!.bayesianScore).toBeGreaterThan(ranked[1]!.bayesianScore); + expect(ranked[1]!.bayesianScore).toBeGreaterThan(ranked[2]!.bayesianScore); + }); +}); diff --git a/packages/core/src/reviews/rank.ts b/packages/core/src/reviews/rank.ts new file mode 100644 index 0000000..bdb7872 --- /dev/null +++ b/packages/core/src/reviews/rank.ts @@ -0,0 +1,29 @@ +/** + * Ranking export — thin helper around the `mintAggregate` Dexie index. + * + * `bayesianScore` is materialized on every review upsert (see + * reviews/aggregate.ts), and the v3 schema declares a secondary index on + * it, so this query reduces to an index range-scan in reverse + limit — + * no per-row compute at query time, no full-table sort. + * + * Intentionally thin; the `mintAggregate` row is the API surface. + * Downstream join against `mintInfo`, `announcements`, and + * `auditLiveness` (when it ships) happens at the render layer — this + * export is the ranked list of mint `d`s with their aggregates attached. + */ +import type { BitcoinmintsDB, MintAggregateRow } from "../cache"; + +/** + * Return the top-N mint aggregates sorted by `bayesianScore` descending. + * Defaults to 50 per the data-model-v1.md §13 example query; pass + * `limit: Infinity` (or a high number) for the full ranked list. + * + * Ties on `bayesianScore` are broken by Dexie's natural index order on + * the primary key (the `d`), which is deterministic but not + * semantically-meaningful. That's acceptable at the edge — a meaningful + * tiebreak (e.g. by `ratedCount` then by most-recent review) can be + * layered as a JS sort on the returned array if UX wants it. + */ +export async function rankMints(db: BitcoinmintsDB, limit = 50): Promise { + return db.mintAggregate.orderBy("bayesianScore").reverse().limit(limit).toArray(); +} diff --git a/packages/core/src/reviews/upsert.test.ts b/packages/core/src/reviews/upsert.test.ts new file mode 100644 index 0000000..45dfeeb --- /dev/null +++ b/packages/core/src/reviews/upsert.test.ts @@ -0,0 +1,250 @@ +/** + * Integration tests for upsertReviewWithAggregate — exercises the + * transactional wiring between the review CAS upsert and the aggregate + * materialization. The invariants we care about: + * + * 1. Inserted / replaced reviews → aggregate is recomputed in the same + * transaction so a concurrent read never sees a review without its + * aggregate reflection. + * 2. Rejected-stale / rejected-invalid reviews → aggregate is NOT + * touched (no spurious updatedAt churn). + * 3. CAS semantics on (pubkey, d) are preserved: newer createdAt wins, + * tiebreak on eventId. + * 4. Replace-a-review's-rating flows through to the aggregate correctly. + */ +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB, type ReviewRow } from "../cache"; +import { upsertReviewWithAggregate } from "./upsert"; + +const freshName = () => `test-review-upsert-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +const D_VALID = "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77"; +const D_BOT = "psvef0yh2zk24tt7"; // 16-char legacy/bot-spam shape. + +const EID_LOW = `${"0".repeat(60)}aaaa`; +const EID_HIGH = `${"0".repeat(60)}ffff`; + +function makeReview(over: Partial = {}): ReviewRow { + return { + pubkey: `pk${"0".repeat(60)}1`, + kind: 38000, + d: D_VALID, + eventId: EID_LOW, + createdAt: 1_700_000_000, + content: "", + rawTags: [], + rating: 5, + ...over, + }; +} + +describe("upsertReviewWithAggregate — insert + recompute", () => { + it("first insert populates both reviews and mintAggregate in one transaction", async () => { + const db = await freshDB(); + const row = makeReview({ rating: 5 }); + + const result = await upsertReviewWithAggregate(db, row, () => 1234); + expect(result).toBe("inserted"); + + expect(await db.reviews.count()).toBe(1); + const agg = await db.mintAggregate.get(D_VALID); + expect(agg).toBeDefined(); + expect(agg?.reviewCount).toBe(1); + expect(agg?.ratedCount).toBe(1); + expect(agg?.avgRating).toBe(5); + expect(agg?.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + expect(agg?.updatedAt).toBe(1234); + }); + + it("unrated insert still populates aggregate with reviewCount=1, ratedCount=0, avg=null, bayesian=0", async () => { + const db = await freshDB(); + const row = makeReview({ rating: null }); + + const result = await upsertReviewWithAggregate(db, row); + expect(result).toBe("inserted"); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.reviewCount).toBe(1); + expect(agg?.ratedCount).toBe(0); + expect(agg?.avgRating).toBeNull(); + expect(agg?.bayesianScore).toBe(0); + }); + + it("N reviews for same d → aggregate reflects mean across all rated", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ pubkey: "pk-1", rating: 5 })); + await upsertReviewWithAggregate(db, makeReview({ pubkey: "pk-2", rating: 3 })); + await upsertReviewWithAggregate(db, makeReview({ pubkey: "pk-3", rating: 4 })); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.reviewCount).toBe(3); + expect(agg?.ratedCount).toBe(3); + expect(agg?.avgRating).toBe((5 + 3 + 4) / 3); + expect(agg?.bayesianScore).toBeCloseTo(agg!.avgRating! * Math.log10(4), 6); + }); +}); + +describe("upsertReviewWithAggregate — CAS + aggregate-stays-in-sync", () => { + it("replace on newer createdAt → aggregate reflects the NEW rating", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 1000, rating: 1 })); + const before = await db.mintAggregate.get(D_VALID); + expect(before?.avgRating).toBe(1); + + const result = await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: 5 })); + expect(result).toBe("replaced"); + + const after = await db.mintAggregate.get(D_VALID); + expect(after?.avgRating).toBe(5); + expect(after?.reviewCount).toBe(1); // still just the one reviewer + expect(after?.ratedCount).toBe(1); + }); + + it("reject older → aggregate is NOT rewritten (updatedAt stays put)", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: 5 }), () => 1000); + const before = await db.mintAggregate.get(D_VALID); + expect(before?.updatedAt).toBe(1000); + + const result = await upsertReviewWithAggregate( + db, + makeReview({ createdAt: 1000, rating: 1 }), + () => 9999, + ); + expect(result).toBe("rejected-stale"); + + const after = await db.mintAggregate.get(D_VALID); + expect(after?.updatedAt).toBe(1000); // not touched + expect(after?.avgRating).toBe(5); + }); + + it("tiebreak on eventId: same createdAt, higher eventId wins, aggregate reflects new rating", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ eventId: EID_LOW, rating: 1 })); + const result = await upsertReviewWithAggregate( + db, + makeReview({ eventId: EID_HIGH, rating: 5 }), + ); + expect(result).toBe("replaced"); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.avgRating).toBe(5); + }); + + it("tiebreak rejects lower eventId: aggregate NOT updated", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ eventId: EID_HIGH, rating: 5 }), () => 1000); + const result = await upsertReviewWithAggregate( + db, + makeReview({ eventId: EID_LOW, rating: 1 }), + () => 9999, + ); + expect(result).toBe("rejected-stale"); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.updatedAt).toBe(1000); + expect(agg?.avgRating).toBe(5); + }); + + it("replacing a rated review with an unrated one → aggregate flips to avg=null", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 1000, rating: 5 })); + expect((await db.mintAggregate.get(D_VALID))?.avgRating).toBe(5); + + await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: null })); + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.reviewCount).toBe(1); + expect(agg?.ratedCount).toBe(0); + expect(agg?.avgRating).toBeNull(); + expect(agg?.bayesianScore).toBe(0); + }); + + it("replacing one unrated review with a rated one → aggregate picks up the new rating", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 1000, rating: null })); + expect((await db.mintAggregate.get(D_VALID))?.avgRating).toBeNull(); + + await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: 4 })); + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.ratedCount).toBe(1); + expect(agg?.avgRating).toBe(4); + }); +}); + +describe("upsertReviewWithAggregate — concurrent CAS + aggregate race", () => { + it("two concurrent upserts for the same d (different pubkeys) converge — aggregate reflects BOTH reviews, 5 trials", async () => { + // Regression for the race where the review CAS upsert and the aggregate + // recompute are in the same transaction: if both concurrent upserts + // read the reviews table before either writes, the recompute would + // see only one review and the aggregate would drop to reviewCount=1. + // Dexie serializes rw-rw transactions on the same tables, so the + // correct outcome is both reviews land AND the aggregate sees both. + // Mirrors the announcement-side regression in cache/upsert.test.ts (~L288). + const pkA = `pk-a${"0".repeat(60)}`; + const pkB = `pk-b${"0".repeat(60)}`; + const reviewA = makeReview({ pubkey: pkA, eventId: EID_LOW, rating: 5 }); + const reviewB = makeReview({ pubkey: pkB, eventId: EID_HIGH, rating: 1 }); + + for (let trial = 0; trial < 5; trial++) { + const db = await freshDB(); + const ops = + trial % 2 === 0 + ? [upsertReviewWithAggregate(db, reviewA), upsertReviewWithAggregate(db, reviewB)] + : [upsertReviewWithAggregate(db, reviewB), upsertReviewWithAggregate(db, reviewA)]; + const results = await Promise.all(ops); + + // Both reviews land — distinct (pubkey, kind, d) triples don't CAS-fail. + expect(results).toEqual(["inserted", "inserted"]); + expect(await db.reviews.count()).toBe(2); + + // Aggregate reflects BOTH reviews — this is the invariant that + // would break if the recompute ran on a pre-write snapshot of + // the reviews table. + const agg = await db.mintAggregate.get(D_VALID); + expect(agg).toBeDefined(); + expect(agg?.reviewCount).toBe(2); + expect(agg?.ratedCount).toBe(2); + expect(agg?.avgRating).toBe(3); // (5 + 1) / 2 + expect(agg?.bayesianScore).toBeCloseTo(3 * Math.log10(3), 6); + } + }); +}); + +describe("upsertReviewWithAggregate — Layer A gate", () => { + it("16-char bot-spam d-tag → rejected-invalid, no review row, no aggregate row", async () => { + const db = await freshDB(); + const result = await upsertReviewWithAggregate(db, makeReview({ d: D_BOT })); + expect(result).toBe("rejected-invalid"); + expect(await db.reviews.count()).toBe(0); + expect(await db.mintAggregate.count()).toBe(0); + }); + + it("Fedimint k=38173 review with non-regex d bypasses the gate", async () => { + const db = await freshDB(); + // A federation ID isn't constrained by the Cashu-mint-pubkey regex. + const fediRow = makeReview({ + d: "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af", + k: 38173, + }); + const result = await upsertReviewWithAggregate(db, fediRow); + expect(result).toBe("inserted"); + expect(await db.reviews.count()).toBe(1); + }); +}); diff --git a/packages/core/src/reviews/upsert.ts b/packages/core/src/reviews/upsert.ts new file mode 100644 index 0000000..e8212f9 --- /dev/null +++ b/packages/core/src/reviews/upsert.ts @@ -0,0 +1,39 @@ +/** + * Transactional wrapper that composes `cache.upsertReview` with + * `recomputeAggregateInTx` inside a single Dexie `rw` transaction so the + * `reviews` table and the `mintAggregate` materialization never drift. + * + * The low-level `cache.upsertReview` already opens a transaction on just + * `db.reviews`. Opening a transaction that includes BOTH `db.reviews` and + * `db.mintAggregate` at this layer means the nested call inside + * `cache.upsertReview` is transparently adopted by Dexie's scope + * inheritance (zone-tracked) — no SubTransactionError. On the accept + * branches (`inserted` or `replaced`) we recompute; on reject branches + * (`rejected-stale`, `rejected-invalid`) the DB state didn't change, so + * the aggregate is already correct — we skip the recompute. + */ +import { type BitcoinmintsDB, type ReviewRow, type UpsertResult, upsertReview } from "../cache"; +import { recomputeAggregateInTx } from "./aggregate"; + +/** + * Upsert a review and, if the review changed DB state (inserted or + * replaced), recompute the `mintAggregate` row for that review's `d`. + * Returns the `UpsertResult` from the underlying review write — this lets + * callers distinguish "wrote a new row" from "lost the CAS race". + * + * `now` is the recompute clock (threaded into `recomputeAggregateInTx`); + * defaults to `Date.now`. + */ +export async function upsertReviewWithAggregate( + db: BitcoinmintsDB, + row: ReviewRow, + now: () => number = Date.now, +): Promise { + return db.transaction("rw", db.reviews, db.mintAggregate, async () => { + const result = await upsertReview(db, row); + if (result === "inserted" || result === "replaced") { + await recomputeAggregateInTx(db, row.d, now); + } + return result; + }); +} diff --git a/packages/core/src/scheduler/index.test.ts b/packages/core/src/scheduler/index.test.ts index e98a0a5..518f335 100644 --- a/packages/core/src/scheduler/index.test.ts +++ b/packages/core/src/scheduler/index.test.ts @@ -223,13 +223,18 @@ describe("scheduler — pipeline (single event)", () => { await sched.start(); const fedPubkey = "fedopk".padEnd(64, "0"); + // Use a realistic 64-char hex federation id — every real federation + // observed in the audit corpus (see `fedimint-observer.md` and + // `reviews/corpus.test.ts`) is lowercase 64-char hex. Short/junk + // d-tags get rejected by Layer A even when k=38173. + const fedId = "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"; await pushEvent({ id: "fed-1", kind: 38173, pubkey: fedPubkey, created_at: 1_700_000_000, tags: [ - ["d", "fed11abc"], + ["d", fedId], ["u", "fed11abc..."], ], content: "", diff --git a/packages/core/src/scheduler/index.ts b/packages/core/src/scheduler/index.ts index 8486b56..2f8bb62 100644 --- a/packages/core/src/scheduler/index.ts +++ b/packages/core/src/scheduler/index.ts @@ -70,31 +70,43 @@ import { type BitcoinmintsDB, type ProfileRow, type RelayListRow, - type ReviewRow, upsertAnnouncement, upsertMintInfo, upsertProfile, upsertRelayList, - upsertReview, } from "../cache"; import type { MintInfoFetcher } from "../cashu/info"; import { type LayerBResult, verifySignerBinding } from "../cashu/layerB"; -import { - type MintAnnouncement, - type MintRecommendation, - parseMintAnnouncement, - parseRecommendation, -} from "../nip87"; +import { type MintAnnouncement, parseMintAnnouncement } from "../nip87"; import type { Pool, PoolHandle } from "../nostr"; +import { parseReview } from "../reviews/parse"; +import { upsertReviewWithAggregate } from "../reviews/upsert"; /** Observable counters surfaced via getStats() — for the UI in PR #6+. */ export type SchedulerStats = { eventsReceived: number; accepted: number; rejectedByLayerA: number; + /** + * kind:38000 reviews that were dropped because `parseReview` returned + * `null` — either the `d` tag was missing/empty or the event was + * unexpectedly not kind:38000. Counted separately from `rejectedByLayerA` + * because it's a parser-level reject (malformed event) rather than a + * shape-gate reject (valid event pointing at bot-spam). + */ + rejectedByParse: number; layerBPending: number; layerBVerified: number; layerBFailed: number; + /** + * Count of exceptions thrown out of the per-event handler after we've + * started processing. A thrown Dexie transaction (QuotaExceeded, schema + * collision, unexpected disk state) or any other unhandled error in a + * kind-specific branch bumps this counter — without it, the error would + * become an unhandled promise rejection and the stats would silently + * freeze at last-good while ingest continued to look healthy. + */ + handlerErrors: number; }; export type Scheduler = { @@ -206,21 +218,6 @@ function toAnnouncementRow(parsed: MintAnnouncement): AnnouncementRow { return row; } -function toReviewRow(parsed: MintRecommendation): ReviewRow { - const row: ReviewRow = { - pubkey: parsed.pubkey, - kind: 38000, - d: parsed.d, - eventId: parsed.eventId, - createdAt: parsed.createdAt, - content: parsed.content, - rawTags: parsed.raw.tags, - }; - if (parsed.k !== undefined) row.k = parsed.k; - if (parsed.rating !== undefined) row.rating = parsed.rating; - return row; -} - /** Best-effort kind:0 parse. JSON content with name/picture/etc. */ function toProfileRow(event: NostrEvent): ProfileRow | null { if (event.kind !== 0) return null; @@ -277,9 +274,11 @@ export function createScheduler(config: SchedulerConfig): Scheduler { eventsReceived: 0, accepted: 0, rejectedByLayerA: 0, + rejectedByParse: 0, layerBPending: 0, layerBVerified: 0, layerBFailed: 0, + handlerErrors: 0, }; // Tracks (kind -> highest createdAt seen). Used to compute the `since` @@ -578,7 +577,20 @@ export function createScheduler(config: SchedulerConfig): Scheduler { inflight.add(work); } - /** Per-event handler — single funnel for all kinds. */ + /** + * Per-event handler — single funnel for all kinds. + * + * Each case body is wrapped in its own try/catch so a thrown Dexie + * transaction (QuotaExceeded, schema collision, unexpected disk state) + * or any other branch-local exception gets counted into + * `stats.handlerErrors` and logged with a stable prefix. Without the + * wrappers, the rejection would escape the `void onEvent(event)` call + * at the subscription boundary and stats would silently freeze at + * last-good while ingest continued to look healthy (silent-failure + * gap). Log surface matches `reenqueueUnverified`'s existing pattern: + * a `[scheduler]`-prefixed console call, no structured logger is wired + * through the package yet. + */ async function onEvent(event: NostrEvent): Promise { if (stopped) return; stats.eventsReceived += 1; @@ -586,53 +598,103 @@ export function createScheduler(config: SchedulerConfig): Scheduler { switch (event.kind) { case 38172: case 38173: { - const parsed = parseMintAnnouncement(event); - if (!parsed) return; - const row = toAnnouncementRow(parsed); - const result = await upsertAnnouncement(db, row); - if (result === "rejected-invalid") { - stats.rejectedByLayerA += 1; - } - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); - // Layer B only runs on Cashu — verifySignerBinding will short- - // circuit non-cashu, but skipping the enqueue avoids the - // bookkeeping noise. - if (event.kind === 38172) { - enqueueLayerB(row); + try { + const parsed = parseMintAnnouncement(event); + if (!parsed) return; + const row = toAnnouncementRow(parsed); + const result = await upsertAnnouncement(db, row); + if (result === "rejected-invalid") { + stats.rejectedByLayerA += 1; + } + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + // Layer B only runs on Cashu — verifySignerBinding will short- + // circuit non-cashu, but skipping the enqueue avoids the + // bookkeeping noise. + if (event.kind === 38172) { + enqueueLayerB(row); + } } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } case 38000: { - const parsed = parseRecommendation(event); - if (!parsed) return; - const row = toReviewRow(parsed); - const result = await upsertReview(db, row); - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); + try { + // PR #5: parse via reviews/parseReview (all 4 rating formats + + // null fallback) and route through the aggregate-materializing + // upsert wrapper so the mintAggregate row stays in sync inside + // the same Dexie transaction as the review write. + const row = parseReview(event); + if (!row) { + // parseReview returns null for missing/empty `d` or wrong kind + // — neither should reach here in a healthy pipeline but both + // are silent drops worth counting (silent-failure gap). + stats.rejectedByParse += 1; + return; + } + const result = await upsertReviewWithAggregate(db, row, now); + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + } else if (result === "rejected-invalid") { + // Layer A gate on reviews: pointing at a bot-spam d-tag. Count + // under the same stats bucket as the announcement Layer A + // rejection — it's the same firewall. + stats.rejectedByLayerA += 1; + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } case 0: { - const row = toProfileRow(event); - if (!row) return; - const result = await upsertProfile(db, row); - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); + try { + const row = toProfileRow(event); + if (!row) return; + const result = await upsertProfile(db, row); + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } case 10002: { - const row = toRelayListRow(event); - if (!row) return; - const result = await upsertRelayList(db, row); - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); + try { + const row = toRelayListRow(event); + if (!row) return; + const result = await upsertRelayList(db, row); + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } @@ -687,9 +749,10 @@ export function createScheduler(config: SchedulerConfig): Scheduler { filters, onEvent: (event) => { // onEvent returns a promise; we don't await here because the - // pool callback contract is sync. Errors inside the handler - // are swallowed at this boundary (each kind's handler does - // its own try-catch around DB writes via Dexie's transaction). + // pool callback contract is sync. Each kind's case body wraps + // its own try/catch that counts into stats.handlerErrors, so + // a thrown Dexie transaction can't escape as an unhandled + // rejection or silently freeze the stats. void onEvent(event); }, closeOnEose: false,