From 4ff465685ef83c5f54e32ac4373a415309191322 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:18:33 -0700 Subject: [PATCH 1/8] feat(core/reviews): parse kind:38000, materialize aggregates, rank by bayesian damping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviews pipeline split across three modules under `src/reviews/`: - `parse.ts` — kind:38000 parser emitting `ReviewRow` directly; rating precedence per data-model-v1.md §4 (structured `["rating","N","5"]` > legacy `["rating","N"]` > content `N/5` or `N/10` > leading star emoji run > null). Parse stays pure (no d-tag gate) so it's usable from log views and pagination dedup. - `aggregate.ts` — `bayesianScore(avg, n) = avg * log10(n+1)` (null-avg → 0 for sort-index friendliness) and `recomputeAggregateInTx(db, d)` scoped to the reviews+mintAggregate transaction. - `upsert.ts` — `upsertReviewWithAggregate` wraps cache CAS + aggregate recompute in one `rw` transaction so concurrent reads never see a review without its aggregate reflection. Recompute skipped on rejected-stale and rejected-invalid. - `rank.ts` — `rankMints(db, limit=50)` using the `bayesianScore` index. Schema v3 bump: `bayesianRank` → `bayesianScore`, `averageRating` → `avgRating`, adds `ratedCount`; indexes `bayesianScore` + `avgRating` + `updatedAt`. Additive migration via Dexie `.version(3).stores({...})`. Scheduler's kind:38000 branch now delegates to the reviews module and counts `rejected-invalid` against `rejectedByLayerA`. The Layer A bot-spam gate moves into `upsertReview` with a Fedimint (k=38173) bypass, so 38172 Cashu-mint reviews with bogus 16-char d-tags get firewalled at the edge, while federation IDs (k=38173) pass through unconstrained per research. Tests: +60 added across 6 files — parse branches (30), aggregate formula + recompute, CAS+aggregate-in-sync, rank sort + limit, and a 5-event real-corpus smoke from audit/relay-data/recs-38000.json. Integration test updated: 2 of 5 fixture reviews now firewall on d-tag shape, which bumps `rejectedByLayerA` from 5→7 and `accepted` from 11→9. Full workspace: 219 tests passing. Co-Authored-By: Claude Opus 4.7 --- packages/core/src/cache/schema.test.ts | 15 +- packages/core/src/cache/schema.ts | 66 +++- packages/core/src/cache/upsert.test.ts | 11 +- packages/core/src/cache/upsert.ts | 30 +- packages/core/src/index.ts | 1 + packages/core/src/integration.test.ts | 45 ++- packages/core/src/reviews/aggregate.test.ts | 199 ++++++++++ packages/core/src/reviews/aggregate.ts | 90 +++++ packages/core/src/reviews/corpus.test.ts | 162 ++++++++ packages/core/src/reviews/index.ts | 4 + packages/core/src/reviews/parse.test.ts | 399 ++++++++++++++++++++ packages/core/src/reviews/parse.ts | 196 ++++++++++ packages/core/src/reviews/rank.test.ts | 147 ++++++++ packages/core/src/reviews/rank.ts | 29 ++ packages/core/src/reviews/upsert.test.ts | 211 +++++++++++ packages/core/src/reviews/upsert.ts | 39 ++ packages/core/src/scheduler/index.ts | 42 +-- 17 files changed, 1625 insertions(+), 61 deletions(-) create mode 100644 packages/core/src/reviews/aggregate.test.ts create mode 100644 packages/core/src/reviews/aggregate.ts create mode 100644 packages/core/src/reviews/corpus.test.ts create mode 100644 packages/core/src/reviews/index.ts create mode 100644 packages/core/src/reviews/parse.test.ts create mode 100644 packages/core/src/reviews/parse.ts create mode 100644 packages/core/src/reviews/rank.test.ts create mode 100644 packages/core/src/reviews/rank.ts create mode 100644 packages/core/src/reviews/upsert.test.ts create mode 100644 packages/core/src/reviews/upsert.ts diff --git a/packages/core/src/cache/schema.test.ts b/packages/core/src/cache/schema.test.ts index ed1a144..b8929d8 100644 --- a/packages/core/src/cache/schema.test.ts +++ b/packages/core/src/cache/schema.test.ts @@ -19,14 +19,16 @@ afterEach(async () => { }); describe("BitcoinmintsDB schema", () => { - it("opens at version 2 with all 6 tables present", async () => { + it("opens at version 3 with all 6 tables present", async () => { const db = new BitcoinmintsDB(freshName()); toDispose.push(db); await db.open(); - // v2 adds the [kind+createdAt] compound index to announcements (used by - // restoreWatermarks for bounded .last() lookups per kind). - expect(db.verno).toBe(2); + // v3 renames mintAggregate's `bayesianRank` index → `bayesianScore` and + // adds `avgRating` so the ranking aggregator can sort by either without + // a full-table scan. v2 added the [kind+createdAt] compound index on + // announcements (used by scheduler.restoreWatermarks). + expect(db.verno).toBe(3); const names = db.tables.map((t) => t.name).sort(); expect(names).toEqual( ["announcements", "mintAggregate", "mintInfo", "profiles", "relayLists", "reviews"].sort(), @@ -74,8 +76,9 @@ describe("BitcoinmintsDB schema", () => { expect(indexNames("reviews")).toEqual(["createdAt", "d", "eventId", "k"]); // mintInfo secondary: fetchedAt, ok expect(indexNames("mintInfo")).toEqual(["fetchedAt", "ok"]); - // mintAggregate secondary: bayesianRank, updatedAt - expect(indexNames("mintAggregate")).toEqual(["bayesianRank", "updatedAt"]); + // mintAggregate secondary (v3): bayesianScore + avgRating (new) + + // updatedAt. `bayesianRank` from v1 is dropped in v3. + expect(indexNames("mintAggregate")).toEqual(["avgRating", "bayesianScore", "updatedAt"]); }); it("starts empty", async () => { diff --git a/packages/core/src/cache/schema.ts b/packages/core/src/cache/schema.ts index 33d9872..2445fa6 100644 --- a/packages/core/src/cache/schema.ts +++ b/packages/core/src/cache/schema.ts @@ -59,10 +59,23 @@ export type ReviewRow = { d: string; eventId: string; createdAt: number; - /** Pointer-kind tag: 38172 (Cashu) or 38173 (Fedimint). */ - k?: number; - /** Parsed 0..5 rating. */ - rating?: number; + /** + * Pointer-kind tag: 38172 (Cashu) or 38173 (Fedimint). Optional because + * in-the-wild events sometimes omit the `k` tag entirely; keep lenient. + */ + k?: 38172 | 38173; + /** + * Mint URL(s) from optional `u` tags on the recommendation — display-only + * helper, does NOT participate in replaceable-event keying. + */ + u?: string[]; + /** + * Parsed rating in 1..5 inclusive, `null` when no rating could be extracted + * from tags or content. Explicit `null` rather than `undefined` to + * distinguish "no rating present" (which is a valid review state) from + * "field not yet populated". + */ + rating: number | null; /** Freeform review text. */ content: string; rawTags: string[][]; @@ -111,12 +124,39 @@ export type MintInfoRow = { lastError?: string; }; -/** Aggregated per-mint ranking row (populated in PR #5 — empty in PR #3). */ +/** + * Aggregated per-mint ranking row (populated in PR #5). + * + * Materialized from all `reviews` rows with a given `d`. Recomputed in the + * same Dexie transaction as the triggering review upsert so they never go + * out of sync. `bayesianScore` is the sort key — it damps low-count + * averages so a single 5★ review cannot outrank 4★×10 (see data-model-v1.md + * §13). + * + * Schema transition: v2 exposed `averageRating` + `bayesianRank` as + * optional-number fields; v3 tightens the contract so every row carries + * explicit values (`avgRating: number | null`, `bayesianScore: number`) and + * adds `ratedCount` — the count of reviews contributing to `avgRating`, + * distinct from `reviewCount` which counts all reviews including unrated. + * The index rename from `bayesianRank` → `bayesianScore` drives the v3 bump. + */ export type MintAggregateRow = { + /** Primary key — the mint d-tag this aggregate is for. */ d: string; + /** Count of ALL reviews for this mint (rated + unrated). */ reviewCount: number; - averageRating?: number; - bayesianRank?: number; + /** Count of reviews with `rating != null` — the divisor of `avgRating`. */ + ratedCount: number; + /** Mean across the `ratedCount` reviews, or `null` when `ratedCount===0`. */ + avgRating: number | null; + /** + * Bayesian sort score — `avgRating * log10(ratedCount + 1)` when + * `avgRating != null`, else `0`. `log10(1)=0` so a single review gets + * `rating * log10(2) ≈ rating * 0.301`; 10 reviews get `rating * log10(11) + * ≈ rating * 1.041`. The damping makes low-count mints sort below + * higher-count mints of the same average. + */ + bayesianScore: number; /** Epoch-ms timestamp of the aggregate recompute (used for CAS). */ updatedAt: number; }; @@ -151,5 +191,17 @@ export class BitcoinmintsDB extends Dexie { this.version(2).stores({ announcements: "[pubkey+kind+d], eventId, kind, d, createdAt, [kind+createdAt]", }); + // v3: rename `bayesianRank` → `bayesianScore` on mintAggregate and add + // `avgRating` to the index set so sort-by-avg queries don't need a full + // table scan. This is the indexes materialized in PR #5's ranking + // aggregator. The prior `bayesianRank` index is dropped — rows written + // under v1/v2 (there are none shipped in production yet) would simply + // not be queryable by that old name. Any pre-existing rows in local + // dev caches are re-keyed by Dexie's additive migration; the shape + // change from `averageRating` to `avgRating` is a TypeScript-layer + // concern (Dexie doesn't type-check row payloads). + this.version(3).stores({ + mintAggregate: "d, bayesianScore, avgRating, updatedAt", + }); } } diff --git a/packages/core/src/cache/upsert.test.ts b/packages/core/src/cache/upsert.test.ts index 07be5fd..ecb0115 100644 --- a/packages/core/src/cache/upsert.test.ts +++ b/packages/core/src/cache/upsert.test.ts @@ -112,8 +112,9 @@ function makeMintAggregate(over: Partial = {}): MintAggregateR return { d: D_XONLY, reviewCount: 5, - averageRating: 4.2, - bayesianRank: 3.8, + ratedCount: 5, + avgRating: 4.2, + bayesianScore: 3.8, updatedAt: 1_700_000_000, ...over, }; @@ -590,8 +591,8 @@ describe("upsertMintInfo", () => { describe("upsertMintAggregate", () => { it("inserts, replaces on newer updatedAt, rejects older", async () => { const db = await freshDB(); - const older = makeMintAggregate({ updatedAt: 1000, bayesianRank: 1.0 }); - const newer = makeMintAggregate({ updatedAt: 2000, bayesianRank: 4.5 }); + const older = makeMintAggregate({ updatedAt: 1000, bayesianScore: 1.0 }); + const newer = makeMintAggregate({ updatedAt: 2000, bayesianScore: 4.5 }); const ancient = makeMintAggregate({ updatedAt: 500 }); expect(await upsertMintAggregate(db, older)).toBe("inserted"); @@ -599,7 +600,7 @@ describe("upsertMintAggregate", () => { expect(await upsertMintAggregate(db, ancient)).toBe("rejected-stale"); const fetched = await db.mintAggregate.get(older.d); - expect(fetched?.bayesianRank).toBe(4.5); + expect(fetched?.bayesianScore).toBe(4.5); expect(fetched?.updatedAt).toBe(2000); }); }); diff --git a/packages/core/src/cache/upsert.ts b/packages/core/src/cache/upsert.ts index b6c4025..44534e8 100644 --- a/packages/core/src/cache/upsert.ts +++ b/packages/core/src/cache/upsert.ts @@ -88,8 +88,36 @@ export async function upsertAnnouncement( }); } -/** Upsert a kind:38000 review. No Layer A gate — the `d` here points at a mint but isn't itself a Cashu pubkey owned by the reviewer. */ +/** + * Upsert a kind:38000 review with Layer A gating on the target `d` tag. + * + * The review's `d` points at a mint. When `k === 38172` (or `k` is absent, + * which is how most in-the-wild Cashu reviews shape), we apply the same + * Layer A d-regex gate that `upsertAnnouncement` uses — if the referenced + * mint pubkey isn't 64/66-char hex, the review is bot-spam pointing at + * bot-spam, returned as `rejected-invalid`. This is the firewall that + * keeps the 959 zero-d-tag bot spam events (per relay-strategy §4) from + * filtering up into the ranking aggregate. + * + * `k === 38173` (Fedimint) bypasses the gate — federation IDs have a + * different shape and their validator is TODO-v1.1, identical to the + * announcement upsert. + * + * Note: this low-level upsert is the mechanical write. It does NOT + * materialize the `mintAggregate` row — the `reviews/` wrapper composes + * this with `recomputeAggregateInTx` inside a single transaction so the + * two stores stay in sync. Callers outside `reviews/` (integration tests, + * direct usage) can call this helper directly and will simply skip the + * aggregate materialization — safe but stale. + */ export async function upsertReview(db: BitcoinmintsDB, row: ReviewRow): Promise { + // Layer A gate for Cashu-pointing reviews. Fedimint (k=38173) bypasses. + // No `k` tag → treat as Cashu (the default for in-the-wild events per + // rating-tag-research §3). + const isFedimint = row.k === 38173; + if (!isFedimint && !isValidCashuDTag(row.d)) { + return "rejected-invalid"; + } return db.transaction("rw", db.reviews, async () => { const prev = await db.reviews.get([row.pubkey, row.kind, row.d]); if (!prev) { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 2064463..302e547 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -2,5 +2,6 @@ export * from "./cache"; export * from "./cashu"; export * from "./nip87"; export * from "./nostr"; +export * from "./reviews"; export const VERSION = "0.0.0"; diff --git a/packages/core/src/integration.test.ts b/packages/core/src/integration.test.ts index 2407cf1..69ad30b 100644 --- a/packages/core/src/integration.test.ts +++ b/packages/core/src/integration.test.ts @@ -79,17 +79,23 @@ function toAnnouncementRow( } function toReviewRow(parsed: NonNullable>): ReviewRow { - return { + // `parsed.rating` is `number | undefined` from the nip87 parse layer; + // the cache layer requires `number | null` (explicit "no rating" state). + const rating = parsed.rating ?? null; + const row: ReviewRow = { pubkey: parsed.pubkey, kind: 38000, d: parsed.d, eventId: parsed.eventId, createdAt: parsed.createdAt, - k: parsed.k, - rating: parsed.rating, content: parsed.content, rawTags: parsed.raw.tags, + rating, }; + // `parsed.k` is `number | undefined`; narrow to the Cashu/Fedimint + // pair the cache row shape accepts. + if (parsed.k === 38172 || parsed.k === 38173) row.k = parsed.k; + return row; } /** @@ -170,12 +176,17 @@ describe("integration: corpus replay → parse → cache", () => { expect(inserted.length).toBe(expectedAccepted); expect(rejectedInvalid.length).toBe(expectedRejected); - // Reviews: all 5 recommendations parse and insert (each has unique - // [pubkey,38000,d]). - expect(await db.reviews.count()).toBe(f.recommendations38000.length); - expect(await db.reviews.count()).toBe(5); + // Reviews: all 5 recommendations parse, but Layer A applies to the + // reviews' `d` tag too (PR #5) — 2 of the 5 point at 16-char legacy + // d-tags that pre-date the Cashu-mint-pubkey d-tag convention and + // would be indistinguishable from the bot-spam shape the gate is + // designed to reject. Those are `rejected-invalid`. The remaining 3 + // reference real 64-char Cashu mint pubkeys and insert cleanly. const reviewsInserted = reviewResults.filter((r) => r.result === "inserted"); - expect(reviewsInserted.length).toBe(5); + const reviewsRejectedInvalid = reviewResults.filter((r) => r.result === "rejected-invalid"); + expect(reviewsInserted.length).toBe(3); + expect(reviewsRejectedInvalid.length).toBe(2); + expect(await db.reviews.count()).toBe(3); }); it("the legacy Nostrodomo (64-char x-only) lands as inserted, not rejected-invalid", async () => { @@ -431,13 +442,15 @@ describe("integration: scheduler full pipeline", () => { // Stats: same accept/reject as the parse → cache integration above // (5 bot-spam rejected at Layer A; 1 legacy + 2 spec-conforming + 3 - // fedimint accepted = 6 announcements; 5 reviews accepted). + // fedimint accepted = 6 announcements; 3 reviews accepted + 2 reviews + // rejected for 16-char legacy d-tags per PR #5's Layer A review gate). const stats = sched.getStats(); // 11 announcements (5 spam + 1 legacy + 2 spec + 3 fedi) + 5 reviews = 16. expect(stats.eventsReceived).toBe(16); - expect(stats.rejectedByLayerA).toBe(5); - // Accepted = 6 announcements + 5 reviews = 11. - expect(stats.accepted).toBe(11); + // 5 announcement bot-spam rejections + 2 review 16-char d-tag rejections. + expect(stats.rejectedByLayerA).toBe(7); + // Accepted = 6 announcements + 3 reviews = 9. + expect(stats.accepted).toBe(9); // Layer B: spec-conforming Alpha + Beta verify. Legacy Nostrodomo // returns ok but with the wrong pubkey → counts as failed. Fedimint @@ -447,9 +460,10 @@ describe("integration: scheduler full pipeline", () => { expect(stats.layerBPending).toBe(0); // Cache state matches the parse → cache test exactly: 6 announcements, - // 5 reviews. Bot-spam rejected at Layer A, never lands. + // 3 reviews (2 more reviews rejected by PR #5's Layer A on reviews' + // d-tags). Bot-spam rejected at Layer A, never lands. expect(await db.announcements.count()).toBe(6); - expect(await db.reviews.count()).toBe(5); + expect(await db.reviews.count()).toBe(3); // Spot-check verifiedBySignerBinding wired through correctly. const alphaPubkey = "02aa00000000000000000000000000000000000000000000000000000000000001"; @@ -518,7 +532,8 @@ describe("integration: scheduler full pipeline", () => { fetches: calls1.length, }; expect(round1Counts.announcements).toBe(6); - expect(round1Counts.reviews).toBe(5); + // 3 reviews (2 more gated out by PR #5's Layer A on review d-tags). + expect(round1Counts.reviews).toBe(3); expect(round1Counts.mintInfo).toBe(3); // Round 2 — fresh scheduler against same DB. createScheduler reads diff --git a/packages/core/src/reviews/aggregate.test.ts b/packages/core/src/reviews/aggregate.test.ts new file mode 100644 index 0000000..f70b484 --- /dev/null +++ b/packages/core/src/reviews/aggregate.test.ts @@ -0,0 +1,199 @@ +/** + * Unit tests for the mintAggregate materialization — exercises the + * bayesian formula directly, then the recompute-from-reviews path. + */ +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB, type ReviewRow } from "../cache"; +import { bayesianScore, recomputeAggregateInTx } from "./aggregate"; + +const freshName = () => `test-aggregate-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +const D_A = "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77"; +const D_B = "0".repeat(63) + "1"; + +function makeReview(over: Partial & { d?: string } = {}): ReviewRow { + return { + pubkey: `pk${Math.random().toString(36).slice(2, 10)}${"0".repeat(50)}`, + kind: 38000, + d: over.d ?? D_A, + eventId: `${"0".repeat(58)}${Math.random().toString(36).slice(2, 8)}`, + createdAt: 1_700_000_000, + content: "", + rawTags: [], + rating: 5, + ...over, + }; +} + +/** + * Directly insert a review row without going through upsertReview's CAS + * transaction. Used by tests to populate the reviews table so we can + * exercise recompute in isolation. + */ +async function seedReviews(db: BitcoinmintsDB, rows: ReviewRow[]): Promise { + await db.reviews.bulkPut(rows); +} + +describe("bayesianScore formula", () => { + it("null avg → 0 (no rated reviews, no sort signal)", () => { + expect(bayesianScore(null, 0)).toBe(0); + expect(bayesianScore(null, 5)).toBe(0); + }); + + it("single 5★ review: 5 * log10(2) ≈ 1.505", () => { + const score = bayesianScore(5, 1); + expect(score).toBeCloseTo(5 * Math.log10(2), 6); + expect(score).toBeGreaterThan(1.5); + expect(score).toBeLessThan(1.51); + }); + + it("10 × 4★ reviews: 4 * log10(11) ≈ 4.166", () => { + const score = bayesianScore(4, 10); + expect(score).toBeCloseTo(4 * Math.log10(11), 6); + expect(score).toBeGreaterThan(4.15); + expect(score).toBeLessThan(4.17); + }); + + it("damping rule: single 5★ sorts BELOW 10 × 4★ (bayesian dominates)", () => { + const single5 = bayesianScore(5, 1); + const tenFour = bayesianScore(4, 10); + expect(single5).toBeLessThan(tenFour); + }); + + it("edge: log10(1)=0 when ratedCount=0 — even with avg set, degenerate case returns 0", () => { + // Shouldn't happen in practice, but guard against divide-by-zero glitches. + expect(bayesianScore(5, 0)).toBe(0); + }); +}); + +describe("recomputeAggregateInTx — materialization", () => { + it("0 reviews for d → writes reviewCount=0, ratedCount=0, avgRating=null, bayesianScore=0", async () => { + const db = await freshDB(); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A, () => 12345), + ); + expect(row.reviewCount).toBe(0); + expect(row.ratedCount).toBe(0); + expect(row.avgRating).toBeNull(); + expect(row.bayesianScore).toBe(0); + expect(row.updatedAt).toBe(12345); + + const persisted = await db.mintAggregate.get(D_A); + expect(persisted).toEqual(row); + }); + + it("1 rated review → avg = that rating; bayesian = rating * log10(2)", async () => { + const db = await freshDB(); + await seedReviews(db, [makeReview({ rating: 5, pubkey: "pk-1" })]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(1); + expect(row.ratedCount).toBe(1); + expect(row.avgRating).toBe(5); + expect(row.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + }); + + it("multiple rated reviews: correct mean and count", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: 5, pubkey: "pk-1" }), + makeReview({ rating: 3, pubkey: "pk-2" }), + makeReview({ rating: 4, pubkey: "pk-3" }), + makeReview({ rating: 5, pubkey: "pk-4" }), + ]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(4); + expect(row.ratedCount).toBe(4); + expect(row.avgRating).toBe((5 + 3 + 4 + 5) / 4); + expect(row.bayesianScore).toBeCloseTo(row.avgRating! * Math.log10(5), 6); + }); + + it("mixed rated + unrated reviews: reviewCount counts all, ratedCount counts rated, avg only from rated", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: 5, pubkey: "pk-1" }), + makeReview({ rating: null, pubkey: "pk-2" }), + makeReview({ rating: 3, pubkey: "pk-3" }), + makeReview({ rating: null, pubkey: "pk-4" }), + makeReview({ rating: null, pubkey: "pk-5" }), + ]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(5); + expect(row.ratedCount).toBe(2); + expect(row.avgRating).toBe((5 + 3) / 2); // 4 + expect(row.bayesianScore).toBeCloseTo(4 * Math.log10(3), 6); + }); + + it("all unrated reviews: avg stays null, bayesian is 0, but reviewCount reflects them", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: null, pubkey: "pk-1" }), + makeReview({ rating: null, pubkey: "pk-2" }), + ]); + const row = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(row.reviewCount).toBe(2); + expect(row.ratedCount).toBe(0); + expect(row.avgRating).toBeNull(); + expect(row.bayesianScore).toBe(0); + }); + + it("scoping: recompute for d=A ignores reviews keyed on d=B", async () => { + const db = await freshDB(); + await seedReviews(db, [ + makeReview({ rating: 5, pubkey: "pk-1", d: D_A }), + makeReview({ rating: 1, pubkey: "pk-2", d: D_B }), + makeReview({ rating: 1, pubkey: "pk-3", d: D_B }), + ]); + const rowA = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A), + ); + expect(rowA.reviewCount).toBe(1); + expect(rowA.avgRating).toBe(5); + + const rowB = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_B), + ); + expect(rowB.reviewCount).toBe(2); + expect(rowB.avgRating).toBe(1); + }); + + it("idempotency: recomputing twice with no changes produces the same row payload (except updatedAt)", async () => { + const db = await freshDB(); + await seedReviews(db, [makeReview({ rating: 4, pubkey: "pk-1" })]); + const first = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A, () => 1000), + ); + const second = await db.transaction("rw", db.reviews, db.mintAggregate, async () => + recomputeAggregateInTx(db, D_A, () => 2000), + ); + expect(first.reviewCount).toBe(second.reviewCount); + expect(first.ratedCount).toBe(second.ratedCount); + expect(first.avgRating).toBe(second.avgRating); + expect(first.bayesianScore).toBe(second.bayesianScore); + expect(second.updatedAt).toBeGreaterThan(first.updatedAt); + }); +}); diff --git a/packages/core/src/reviews/aggregate.ts b/packages/core/src/reviews/aggregate.ts new file mode 100644 index 0000000..5f47422 --- /dev/null +++ b/packages/core/src/reviews/aggregate.ts @@ -0,0 +1,90 @@ +/** + * `mintAggregate` materialization — derived from all `reviews` rows with a + * given `d` tag. Runs inside the same Dexie transaction as the triggering + * review upsert so the two stores can't go out of sync across a crash. + * + * See data-model-v1.md §13: we maintain a cached aggregate rather than a + * live Dexie query because ranking needs a per-filter-change sort and + * groupby-by-d is expensive in the browser. Recompute is cheap — scoped + * to a single `d`, per-mint review count is bounded (~500 worst case), + * entirely in memory once Dexie has surfaced the review rows. + * + * Bayesian score (damping formula, §13): + * + * bayesianScore = avgRating * log10(ratedCount + 1) when avgRating != null + * bayesianScore = 0 otherwise + * + * The damping makes a single 5★ review sort below 4★×10: + * single 5★: 5 * log10(2) ≈ 1.505 + * 4★ × 10: 4 * log10(11) ≈ 4.166 + * + * Writing out `0` for the un-rated case rather than `null` lets the index + * on `bayesianScore` be usable as a single `.orderBy('bayesianScore')` + * range — if we wrote `null`, Dexie would emit those rows at the start of + * the range under its normal sort order and we'd have to filter them out. + * Zero-scored mints sort below any positive-scored mint which is the + * correct UX (and matches the no-reviews baseline). + */ +import type { BitcoinmintsDB, MintAggregateRow, ReviewRow } from "../cache"; + +/** + * Compute the Bayesian sort key from an average rating and the count of + * reviews that contributed to it. Exposed for test assertions. + */ +export function bayesianScore(avgRating: number | null, ratedCount: number): number { + if (avgRating === null) return 0; + // log10(0 + 1) = 0 — guard against a degenerate avgRating-with-zero-count + // (shouldn't happen in practice but clamps to a safe value if it ever does). + return avgRating * Math.log10(ratedCount + 1); +} + +/** + * Recompute the aggregate for mint `d` from its current `reviews` rows and + * upsert it. Call INSIDE an open Dexie `rw` transaction that includes both + * `db.reviews` and `db.mintAggregate` — Dexie auto-binds this work to the + * outer transaction so a crash between the review write and the aggregate + * write is impossible. + * + * Behaviour on zero reviews: still writes a row with reviewCount=0, + * ratedCount=0, avgRating=null, bayesianScore=0. This matters for the + * "review deletion" path — the aggregate doesn't get orphaned with a + * stale average when the last review for a mint is replaced or removed. + * + * CAS policy: uses `put` (unconditional write) rather than the + * upsert.ts monotonic-timestamp gate. Inside the transaction we already + * hold the latest review state — the previously-written aggregate is by + * definition older (or equal in the zero-ops degenerate case, which is + * still a safe overwrite). Same-ms ties are fine because all row fields + * are deterministically derived from the review set. + * + * `now` is injectable for deterministic tests. Defaults to `Date.now`. + */ +export async function recomputeAggregateInTx( + db: BitcoinmintsDB, + d: string, + now: () => number = Date.now, +): Promise { + const reviews: ReviewRow[] = await db.reviews.where("d").equals(d).toArray(); + + const reviewCount = reviews.length; + let ratedCount = 0; + let sum = 0; + for (const r of reviews) { + if (r.rating !== null) { + ratedCount += 1; + sum += r.rating; + } + } + const avgRating = ratedCount > 0 ? sum / ratedCount : null; + const row: MintAggregateRow = { + d, + reviewCount, + ratedCount, + avgRating, + bayesianScore: bayesianScore(avgRating, ratedCount), + updatedAt: now(), + }; + + await db.mintAggregate.put(row); + return row; +} diff --git a/packages/core/src/reviews/corpus.test.ts b/packages/core/src/reviews/corpus.test.ts new file mode 100644 index 0000000..997be47 --- /dev/null +++ b/packages/core/src/reviews/corpus.test.ts @@ -0,0 +1,162 @@ +/** + * Corpus smoke test for the review pipeline. Uses a handful of real + * kind:38000 events lifted from + * /srv/forge/projects/bitcoinmints/audit/relay-data/recs-38000.json so the + * parse → upsert → aggregate → rank chain is exercised against the actual + * shapes relays emit — not just hand-written tests. + * + * Scope kept small (3 mints, ~8 events) so this file stays committable + * without pulling a 32k-line JSON corpus into the package. + */ +import type { Event as NostrEvent } from "nostr-tools/core"; +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB } from "../cache"; +import { parseReview } from "./parse"; +import { rankMints } from "./rank"; +import { upsertReviewWithAggregate } from "./upsert"; + +const freshName = () => `test-review-corpus-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +/** + * Real kind:38000 events taken verbatim from the audit's relay dump. IDs, + * signatures, and timestamps preserved. The d-tags are real Fedimint + * federation IDs and Cashu mint pubkeys seen in the wild. + * + * Note: Fedimint federation IDs aren't constrained by the Layer A d-regex, + * but this corpus uses 64-char hex which passes the regex either way. + */ +const REAL_EVENTS: NostrEvent[] = [ + // Fedimint 1 — 4 separate reviewers, all 5★. + { + content: "[5/5]", + created_at: 1776360005, + id: "50d2e3d560f5a312965ff977ed7755246de4dd64c03fdd5adf99caa587cb53d0", + kind: 38000, + pubkey: "1944cd868d0b996f58944b5748852d676e84f32c50cb224f65432ddf55045666", + sig: "033a48b4844452784ffeecd9c379c1814f5c3386414a7f7f1e255ea11bfdd4686fc8fd287582971f9fa541fd64113119e7c882d9d6c3f2a3eecd90468acb5d1f", + tags: [ + ["d", "27e032c0f1ff18213c3a94c2426f20a4000479b318712e93a7e56286fed00a2f"], + ["k", "38173"], + ["rating", "5"], + ], + }, + // Fedimint 2 — 3 reviewers, mix of ratings. + { + content: "[5/5]", + created_at: 1776351305, + id: "42c89639b9471d2f8aa9475731dde0873a3bb8d4b2dfa72d5162cd146d50fdd5", + kind: 38000, + pubkey: "3c00865afdb1dd2f8b68a9f802d0bbce2e6e9ebdb03f1a4686494a67e999b0a1", + sig: "d2e3306255989bc52fb1ccfc25a282a7e79beb8a5bc7fd80e79ce16f5621093226b4642c7f8e8510860595612547fd7999d3b96cef80ea45a3531e6f9c426d71", + tags: [ + ["d", "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"], + ["k", "38173"], + ["rating", "5"], + ], + }, + { + content: "[5/5]", + created_at: 1776298850, + id: "f22e2e76dca0d577a7695e45d5d0abd1ed6f5d6bc0e233c9e348c1b9339f8e6d", + kind: 38000, + pubkey: "82f1ae3bdd172c0ce69553165e8237e2fdf7fa32832707de130a274fcfaf1b10", + sig: "549aad601d19a699eed21ae9ce9f9f35b855d7e5f898c933636c00f2397d34cddd3a80c7f89eb8efc5b867dfdb88c18dab0573022be1588d4b44f6c456bc71a2", + tags: [ + ["d", "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"], + ["k", "38173"], + ["rating", "5"], + ], + }, + { + // Manufactured 2★ on the same mint to ensure the aggregate mean is + // actually computed (not a constant-5 test). + content: "[2/5]", + created_at: 1776298900, + id: "f22e2e76dca0d577a7695e45d5d0abd1ed6f5d6bc0e348c1b9339f8e6cffffff", + kind: 38000, + pubkey: "92f1ae3bdd172c0ce69553165e8237e2fdf7fa32832707de130a274fcfaf1b11", + sig: "00", + tags: [ + ["d", "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"], + ["k", "38173"], + ["rating", "2", "5"], + ], + }, + // Fedimint 3 — single review. + { + content: "[5/5]", + created_at: 1776291469, + id: "e810bc759a3dac39ebfdf652df932d79d031712499618401fb8e01ed412c88c3", + kind: 38000, + pubkey: "ddc17385fdd1cc2df1e6f3a248c5a14ccaa9fcab17281d057e58965423de4617", + sig: "a16145e091e36629c720ab791a34a6ece6e2b6ad94f8a61361fef06c96d3ef2f89c6b6b03e882d1281de71e05d0b4f89d068e29d00c6c8736c90a7c0a2970afd", + tags: [ + ["d", "3beb71872cea0b97082ff1f6450e722903bc7ac09e5b4dc33105999f2901b4eb"], + ["k", "38173"], + ["rating", "5"], + ], + }, +]; + +describe("reviews: real corpus pipeline", () => { + it("parses every event, materializes aggregates, ranks by damped bayesian", async () => { + const db = await freshDB(); + + // Parse every real event and push through the upsert+aggregate path. + const results: string[] = []; + for (const e of REAL_EVENTS) { + const row = parseReview(e); + expect(row).not.toBeNull(); + if (!row) continue; + const r = await upsertReviewWithAggregate(db, row); + results.push(r); + } + + // Every real event inserted (no duplicates, no stale). + expect(results).toEqual(["inserted", "inserted", "inserted", "inserted", "inserted"]); + + // 5 rows total. + expect(await db.reviews.count()).toBe(5); + + // Three aggregates — one per distinct d. + expect(await db.mintAggregate.count()).toBe(3); + + // Ranking order check. Federation with 3 rated reviews ((5+5+2)/3 ≈ 4.00 + // * log10(4)=0.602 → 2.408) outranks federation 1 with a single 5★ + // (5 * log10(2)=0.301 → 1.505) — even though fed1 has a higher + // average. The single-5★ federation 3 is ranked equal to federation 1 + // (also 1.505) but Dexie breaks the tie deterministically on primary + // key (the d string); either may come first — what matters is both + // sort BELOW the 3-review federation. + const ranked = await rankMints(db); + expect(ranked).toHaveLength(3); + expect(ranked[0]?.d).toBe("718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"); + expect(ranked[0]?.reviewCount).toBe(3); + expect(ranked[0]?.ratedCount).toBe(3); + expect(ranked[0]?.avgRating).toBeCloseTo((5 + 5 + 2) / 3, 6); + expect(ranked[0]?.bayesianScore).toBeCloseTo((12 / 3) * Math.log10(4), 6); + + expect(ranked[1]?.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + expect(ranked[2]?.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + // Sort invariant: scores strictly non-increasing. + expect(ranked[0]!.bayesianScore).toBeGreaterThan(ranked[1]!.bayesianScore); + expect(ranked[1]!.bayesianScore).toBeCloseTo(ranked[2]!.bayesianScore, 6); + }); +}); diff --git a/packages/core/src/reviews/index.ts b/packages/core/src/reviews/index.ts new file mode 100644 index 0000000..e2c307c --- /dev/null +++ b/packages/core/src/reviews/index.ts @@ -0,0 +1,4 @@ +export { bayesianScore, recomputeAggregateInTx } from "./aggregate"; +export { parseReview } from "./parse"; +export { rankMints } from "./rank"; +export { upsertReviewWithAggregate } from "./upsert"; diff --git a/packages/core/src/reviews/parse.test.ts b/packages/core/src/reviews/parse.test.ts new file mode 100644 index 0000000..ba767da --- /dev/null +++ b/packages/core/src/reviews/parse.test.ts @@ -0,0 +1,399 @@ +/** + * Unit tests for the kind:38000 review parser — each rating precedence + * rule gets its own dedicated assertion so a regression in one format + * can't be masked by a fallback. + */ +import type { Event as NostrEvent } from "nostr-tools/core"; +import { describe, expect, it } from "vitest"; +import { parseReview } from "./parse"; + +/** Realistic 64-char x-only Cashu d-tag. */ +const D_VALID = "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77"; +/** 16-char legacy / bot-spam d-tag. */ +const D_LEGACY_16 = "psvef0yh2zk24tt7"; + +function makeEvent(over: Partial & { tags?: string[][] } = {}): NostrEvent { + return { + id: "1".repeat(64), + pubkey: "2".repeat(64), + created_at: 1_700_000_000, + kind: 38000, + tags: [["d", D_VALID]], + content: "", + sig: "", + ...over, + } as NostrEvent; +} + +describe("parseReview — basic structure", () => { + it("returns null for non-38000 kinds", () => { + const e = makeEvent({ kind: 1 as unknown as 38000 }); + expect(parseReview(e)).toBeNull(); + }); + + it("returns null when the d tag is missing", () => { + const e = makeEvent({ tags: [["k", "38172"]] }); + expect(parseReview(e)).toBeNull(); + }); + + it("returns null when the d tag is present but empty", () => { + const e = makeEvent({ tags: [["d", ""]] }); + expect(parseReview(e)).toBeNull(); + }); + + it("preserves eventId, pubkey, d, createdAt, content, rawTags verbatim", () => { + const e = makeEvent({ + id: "a".repeat(64), + pubkey: "b".repeat(64), + created_at: 1_800_000_000, + content: "great mint", + tags: [ + ["d", D_VALID], + ["k", "38172"], + ["rating", "4", "5"], + ], + }); + const row = parseReview(e); + expect(row).not.toBeNull(); + if (!row) return; + expect(row.eventId).toBe(e.id); + expect(row.pubkey).toBe(e.pubkey); + expect(row.d).toBe(D_VALID); + expect(row.createdAt).toBe(e.created_at); + expect(row.content).toBe("great mint"); + expect(row.rawTags).toEqual(e.tags); + expect(row.kind).toBe(38000); + }); +}); + +describe("parseReview — rating formats (precedence)", () => { + it("Format 1: structured tag ['rating','N','5'] wins — integer 1..5", () => { + for (const n of [1, 2, 3, 4, 5]) { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", String(n), "5"], + ], + }), + ); + expect(row?.rating).toBe(n); + } + }); + + it("Format 1: out-of-range N (0 or 6) falls through", () => { + const below = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "0", "5"], + ], + }), + ); + // No fallback content, so rating is null. + expect(below?.rating).toBeNull(); + + const above = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "6", "5"], + ], + }), + ); + expect(above?.rating).toBeNull(); + }); + + it("Format 2: legacy ['rating','N'] (no denominator) — integer 1..5", () => { + for (const n of [1, 3, 5]) { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", String(n)], + ], + }), + ); + expect(row?.rating).toBe(n); + } + }); + + it("Format 2: legacy ['rating','N'] with out-of-range N falls through", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "7"], + ], + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 1 wins over Format 2 when both are present on the same event", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + // Format 2 appears first… + ["rating", "2"], + // …but Format 1 wins even though it's second. + ["rating", "5", "5"], + ], + }), + ); + expect(row?.rating).toBe(5); + }); + + it("Format 3a: content `[N/5]` anchored at start", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "[4/5] decent mint", + }), + ); + expect(row?.rating).toBe(4); + }); + + it("Format 3a: content `N/5` without brackets", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "3/5 avg", + }), + ); + expect(row?.rating).toBe(3); + }); + + it("Format 3a: tag takes precedence over content even when both present", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["rating", "2", "5"], + ], + content: "[5/5] content says five", + }), + ); + expect(row?.rating).toBe(2); + }); + + it("Format 3b: content `N/10` divides and rounds to nearest 1..5", () => { + // 10/10 → 5, 8/10 → 4, 6/10 → 3, 4/10 → 2, 2/10 → 1. + const cases: Array<[string, number]> = [ + ["10/10", 5], + ["8/10 nice", 4], + ["7/10", 4], // round-to-nearest: 3.5 → 4 + ["6/10", 3], + ["5/10", 3], // round-to-nearest: 2.5 → 3 (banker's / half-up; Math.round uses half-away-from-zero) + ["4/10", 2], + ["3/10", 2], // 1.5 → 2 + ["2/10", 1], + ]; + for (const [content, expected] of cases) { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content, + }), + ); + expect(row?.rating).toBe(expected); + } + }); + + it("Format 3b: `0/10` is treated as no-rating (doesn't fabricate a 1★)", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "0/10 total trash", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 3: /5 wins over /10 when both are present (5 is tried first)", () => { + // Unlikely in practice but the ordering should be deterministic. + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "4/5 but also 8/10", + }), + ); + // Since 5-regex matches at index 0, it wins. + expect(row?.rating).toBe(4); + }); + + it("Format 4: leading emoji run of 1..5 stars (⭐)", () => { + const cases: Array<[string, number]> = [ + ["⭐ one star", 1], + ["⭐⭐ two", 2], + ["⭐⭐⭐ three", 3], + ["⭐⭐⭐⭐ four", 4], + ["⭐⭐⭐⭐⭐ five", 5], + ]; + for (const [content, expected] of cases) { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content, + }), + ); + expect(row?.rating).toBe(expected); + } + }); + + it("Format 4: 🌟 glyph works too (both are accepted)", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "🌟🌟🌟 three stars", + }), + ); + expect(row?.rating).toBe(3); + }); + + it("Format 4: a run of 6+ emojis is out of range → null", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "⭐⭐⭐⭐⭐⭐", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 4: emoji not at start of content does not match", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "great mint ⭐⭐⭐⭐⭐", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("Format 3 numeric wins over Format 4 emoji", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + // `[4/5]` matches the numeric regex; the ⭐⭐⭐⭐⭐ after would + // be 5, but we prefer the structured numeric. + content: "[4/5] ⭐⭐⭐⭐⭐", + }), + ); + expect(row?.rating).toBe(4); + }); +}); + +describe("parseReview — null fallback", () => { + it("returns rating: null when no rating tag and no content signal", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "just a plain review, no score", + }), + ); + expect(row?.rating).toBeNull(); + }); + + it("returns rating: null for empty content and no tags", () => { + const row = parseReview(makeEvent({ content: "" })); + expect(row?.rating).toBeNull(); + }); + + it("returns rating: null when content starts with a non-1..5 numeric", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + content: "0/5 terrible", + }), + ); + // N=0 fails the 1..5 bounds check and no other format fires. + expect(row?.rating).toBeNull(); + }); +}); + +describe("parseReview — k tag normalization", () => { + it("k='38172' narrows to number 38172", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["k", "38172"], + ], + }), + ); + expect(row?.k).toBe(38172); + }); + + it("k='38173' narrows to number 38173", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["k", "38173"], + ], + }), + ); + expect(row?.k).toBe(38173); + }); + + it("k absent → row.k is undefined (field omitted)", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + }), + ); + expect(row?.k).toBeUndefined(); + }); + + it("k is something unexpected ('1985') → row.k is undefined", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["k", "1985"], + ], + }), + ); + expect(row?.k).toBeUndefined(); + }); +}); + +describe("parseReview — u tag collection (display helper)", () => { + it("collects all u tag values into an array", () => { + const row = parseReview( + makeEvent({ + tags: [ + ["d", D_VALID], + ["u", "https://mint.a.example", "cashu"], + ["u", "https://mint.a.example/v1", "cashu"], + ], + }), + ); + expect(row?.u).toEqual(["https://mint.a.example", "https://mint.a.example/v1"]); + }); + + it("omits u entirely when the event has no u tags", () => { + const row = parseReview( + makeEvent({ + tags: [["d", D_VALID]], + }), + ); + expect(row?.u).toBeUndefined(); + }); +}); + +describe("parseReview — parser is lenient on Layer A", () => { + it("16-char legacy d-tag still parses (gate is at upsert, not parse)", () => { + // The parser preserves whatever is there — bot-spam filtering is the + // cache layer's job. This keeps parser usable by raw-event log views. + const row = parseReview( + makeEvent({ + tags: [["d", D_LEGACY_16]], + }), + ); + expect(row).not.toBeNull(); + expect(row?.d).toBe(D_LEGACY_16); + }); +}); diff --git a/packages/core/src/reviews/parse.ts b/packages/core/src/reviews/parse.ts new file mode 100644 index 0000000..111b7cf --- /dev/null +++ b/packages/core/src/reviews/parse.ts @@ -0,0 +1,196 @@ +/** + * Kind:38000 mint-recommendation / review parser. + * + * Emits `ReviewRow` directly — this is the primary parse path used by the + * scheduler's ingest pipeline and by the `reviews/upsert.ts` wrapper. The + * related `nip87/parseRecommendation` returns `MintRecommendation` (a + * parse-layer shape with `raw` preserved); this module produces the + * cache-layer shape with tags indexed for downstream Dexie writes. + * + * Rating extraction follows data-model-v1.md §4 + rating-tag-research.md §6 + * in strict precedence order: + * + * 1. `["rating", "", "5"]` — canonical v1 shape, integer 1..5. + * 2. `["rating", ""]` — legacy recall-trainer emitter, integer 1..5. + * 3. Content numeric: regex "^(N)/5" or "^(N)/10" anchored at start + * (see CONTENT_FIVE_REGEX and CONTENT_TEN_REGEX below). For /10 we + * divide by 2 and round to the nearest integer 1..5. + * 4. Content emoji: leading 1..5 run of star glyphs (see + * CONTENT_EMOJI_REGEX) — count the glyphs. + * 5. Otherwise null (no rating present). + * + * Rule of precedence: a tag wins over content even if both are present. + * This keeps v2-aware clients interop-free from cashu.me / bitcoinmints + * legacy that embed `[N/5]` in content alongside a structured tag. + * + * Parse rejects (returns `null`): + * - `event.kind !== 38000` + * - missing or non-string `d` tag + * + * Parse does NOT reject on Layer A d-shape — the upsert gate handles that + * so the parser stays pure and callable from tests, pagination dedup, etc. + * Callers who want the bot-spam firewall use `upsertReviewWithAggregate`. + */ +import type { Event as NostrEvent } from "nostr-tools/core"; +import type { ReviewRow } from "../cache"; + +/** Strict 1..5 integer bounds. Partial reviews (e.g. "3.5") round to nearest int. */ +const MIN_RATING = 1; +const MAX_RATING = 5; + +/** + * Content rating: `N/5` anchored at start. Tolerates leading `[` and + * surrounding whitespace. Captures N. + */ +const CONTENT_FIVE_REGEX = /^\s*\[?\s*(\d+)\s*\/\s*5\b/; +/** + * Content rating: `N/10` anchored at start — for clients that use a + * 10-point scale. Divide by 2 to normalize into 1..5. + */ +const CONTENT_TEN_REGEX = /^\s*\[?\s*(\d+)\s*\/\s*10\b/; +/** + * Leading run of star emoji, 1..5 count. Matches `⭐` (U+2B50) and `🌟` + * (U+1F31F) interchangeably — some clients render one, some the other, + * some use the variation-selector form. Captures the whole run so we can + * count code points (via the `u` flag). + */ +const CONTENT_EMOJI_REGEX = /^\s*((?:⭐|🌟)+)/u; + +/** Pull the first value of a named tag (or undefined). */ +function firstTagValue(tags: string[][], name: string): string | undefined { + for (const t of tags) { + if (t[0] === name && typeof t[1] === "string") return t[1]; + } + return undefined; +} + +/** Collect all values of a named tag. */ +function allTagValues(tags: string[][], name: string): string[] { + const out: string[] = []; + for (const t of tags) { + if (t[0] === name && typeof t[1] === "string") out.push(t[1]); + } + return out; +} + +/** Parse a numeric string to int 1..5, or undefined if out of range. */ +function toRating(raw: string): number | undefined { + const n = Number.parseInt(raw, 10); + if (!Number.isFinite(n)) return undefined; + if (n < MIN_RATING || n > MAX_RATING) return undefined; + return n; +} + +/** + * Extract rating from tags. Format 1 (`["rating","N","5"]`) wins over + * format 2 (`["rating","N"]`) — scan the entire tag list for format 1 + * first, then fall back to format 2. A single event with both shapes + * (which shouldn't happen, but is technically allowed by the event + * structure) always prefers the explicit-max form. + */ +function parseRatingFromTags(tags: string[][]): number | undefined { + // Format 1: ["rating", "", "5"] — canonical v1 shape. + for (const t of tags) { + if (t[0] !== "rating") continue; + if (typeof t[1] !== "string") continue; + if (t[2] !== "5") continue; + const r = toRating(t[1]); + if (r !== undefined) return r; + } + // Format 2: ["rating", ""] — legacy, no denominator. + for (const t of tags) { + if (t[0] !== "rating") continue; + if (typeof t[1] !== "string") continue; + if (t[2] !== undefined) continue; + const r = toRating(t[1]); + if (r !== undefined) return r; + } + return undefined; +} + +/** + * Count leading star emoji. `⭐` is a single BMP code point (U+2B50); `🌟` + * is a surrogate pair (U+1F31F). Using `[...]` iterates code points in + * modern JS so mixed runs count correctly. + */ +function countLeadingStars(match: string): number { + const codepoints = [...match]; + return codepoints.length; +} + +/** + * Extract rating from content using the 3rd and 4th precedence rules. + * Format 3 (N/5 and N/10) wins over format 4 (emoji) — a content starting + * with `[4/5] ⭐⭐⭐⭐⭐` parses as 4, not 5. + */ +function parseRatingFromContent(content: string): number | undefined { + // Format 3a: N/5 anchored at start. + const fiveMatch = content.match(CONTENT_FIVE_REGEX); + if (fiveMatch?.[1]) { + const r = toRating(fiveMatch[1]); + if (r !== undefined) return r; + } + // Format 3b: N/10 anchored at start — divide by 2, round to nearest, + // clamp into 1..5. We round-to-nearest (not floor) so `5/10` → 3 and + // `7/10` → 4 rather than both flooring to 3. An `N` outside 0..10 is + // treated as missing. + const tenMatch = content.match(CONTENT_TEN_REGEX); + if (tenMatch?.[1]) { + const n = Number.parseInt(tenMatch[1], 10); + if (Number.isFinite(n) && n >= 0 && n <= 10) { + const scaled = Math.round(n / 2); + // 0/10 → 0 which is below MIN_RATING; treat as no-rating rather + // than lying about a 1-star review. + if (scaled >= MIN_RATING && scaled <= MAX_RATING) return scaled; + } + } + // Format 4: leading 1..5 emoji run. + const emojiMatch = content.match(CONTENT_EMOJI_REGEX); + if (emojiMatch?.[1]) { + const n = countLeadingStars(emojiMatch[1]); + if (n >= MIN_RATING && n <= MAX_RATING) return n; + } + return undefined; +} + +/** Resolve the `k` tag into a recognized pointer-kind, or undefined. */ +function parsePointerKind(tags: string[][]): 38172 | 38173 | undefined { + const kStr = firstTagValue(tags, "k"); + if (kStr === "38172") return 38172; + if (kStr === "38173") return 38173; + return undefined; +} + +/** + * Parse a kind:38000 event into a ReviewRow. Returns `null` when the event + * is the wrong kind or is missing the required `d` tag. Layer A d-tag + * shape validation is deferred to the upsert layer. + */ +export function parseReview(event: NostrEvent): ReviewRow | null { + if (event.kind !== 38000) return null; + + const d = firstTagValue(event.tags, "d"); + if (d === undefined || d === "") return null; + + const content = typeof event.content === "string" ? event.content : ""; + const rating = parseRatingFromTags(event.tags) ?? parseRatingFromContent(content) ?? null; + + const row: ReviewRow = { + pubkey: event.pubkey, + kind: 38000, + d, + eventId: event.id, + createdAt: event.created_at, + content, + rawTags: event.tags, + rating, + }; + + const k = parsePointerKind(event.tags); + if (k !== undefined) row.k = k; + + const u = allTagValues(event.tags, "u"); + if (u.length > 0) row.u = u; + + return row; +} diff --git a/packages/core/src/reviews/rank.test.ts b/packages/core/src/reviews/rank.test.ts new file mode 100644 index 0000000..a047315 --- /dev/null +++ b/packages/core/src/reviews/rank.test.ts @@ -0,0 +1,147 @@ +/** + * rankMints — end-to-end test that walks the full review-ingest pipeline + * and asserts the ranked output matches the Bayesian-damped order + * (data-model-v1.md §13). + */ +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB, type ReviewRow } from "../cache"; +import { rankMints } from "./rank"; +import { upsertReviewWithAggregate } from "./upsert"; + +const freshName = () => `test-rank-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +/** Helper: 64-char valid Cashu d-tag deterministically generated from an index. */ +function dForIndex(n: number): string { + const hex = n.toString(16).padStart(64, "0"); + return hex; +} + +function makeReview(over: Partial & { pubkey: string; d: string }): ReviewRow { + return { + pubkey: over.pubkey, + kind: 38000, + d: over.d, + eventId: `${"0".repeat(58)}${over.pubkey.slice(-6)}`, + createdAt: 1_700_000_000, + content: "", + rawTags: [], + rating: 5, + ...over, + }; +} + +/** + * Seed a mint's aggregate by running N reviews of the given rating + * through the full upsert-with-aggregate pipeline. Returns the + * materialized aggregate for assertion convenience. + */ +async function seedMint( + db: BitcoinmintsDB, + d: string, + rating: number | null, + count: number, +): Promise { + for (let i = 0; i < count; i++) { + await upsertReviewWithAggregate( + db, + makeReview({ + // Unique pubkey per review so each is a separate (pubkey, d) + // replaceable-event key. + pubkey: `pk${i.toString(16).padStart(62, "0")}`, + d, + rating, + }), + ); + } +} + +describe("rankMints — sort order", () => { + it("empty aggregate table → empty result, no throw", async () => { + const db = await freshDB(); + const ranked = await rankMints(db); + expect(ranked).toEqual([]); + }); + + it("orders strictly by bayesianScore descending", async () => { + const db = await freshDB(); + const dHigh = dForIndex(1); + const dMid = dForIndex(2); + const dLow = dForIndex(3); + // High: 5★ × 10 → 5 * log10(11) ≈ 5.21 + await seedMint(db, dHigh, 5, 10); + // Mid: 4★ × 3 → 4 * log10(4) ≈ 2.41 + await seedMint(db, dMid, 4, 3); + // Low: 5★ × 1 → 5 * log10(2) ≈ 1.505 + await seedMint(db, dLow, 5, 1); + + const ranked = await rankMints(db); + expect(ranked.map((r) => r.d)).toEqual([dHigh, dMid, dLow]); + }); + + it("single 5★ review sorts BELOW 4★×10 — the formula damps low-count mints (§13)", async () => { + const db = await freshDB(); + const dSingle = dForIndex(10); + const dTen = dForIndex(11); + await seedMint(db, dSingle, 5, 1); + await seedMint(db, dTen, 4, 10); + + const ranked = await rankMints(db); + expect(ranked[0]?.d).toBe(dTen); + expect(ranked[1]?.d).toBe(dSingle); + // Sanity: confirm the numeric scores match the §13 table (5.21 vs 1.50). + expect(ranked[0]?.bayesianScore).toBeGreaterThan(ranked[1]!.bayesianScore); + }); + + it("unrated reviews (bayesianScore=0) sort at the bottom", async () => { + const db = await freshDB(); + const dRated = dForIndex(20); + const dUnrated = dForIndex(21); + await seedMint(db, dRated, 3, 2); // 3 * log10(3) ≈ 1.43 + await seedMint(db, dUnrated, null, 10); // bayesianScore=0 + + const ranked = await rankMints(db); + expect(ranked[0]?.d).toBe(dRated); + expect(ranked[1]?.d).toBe(dUnrated); + expect(ranked[1]?.bayesianScore).toBe(0); + }); +}); + +describe("rankMints — limit", () => { + it("defaults to top 50 — returns all 3 when < 50 mints are present", async () => { + const db = await freshDB(); + await seedMint(db, dForIndex(30), 5, 1); + await seedMint(db, dForIndex(31), 4, 1); + await seedMint(db, dForIndex(32), 3, 1); + const ranked = await rankMints(db); + expect(ranked).toHaveLength(3); + }); + + it("caps at the explicit limit", async () => { + const db = await freshDB(); + for (let i = 40; i < 50; i++) { + await seedMint(db, dForIndex(i), 5, i - 39); + } + const top3 = await rankMints(db, 3); + expect(top3).toHaveLength(3); + // Sanity: scores descending. + expect(top3[0]!.bayesianScore).toBeGreaterThan(top3[1]!.bayesianScore); + expect(top3[1]!.bayesianScore).toBeGreaterThan(top3[2]!.bayesianScore); + }); +}); diff --git a/packages/core/src/reviews/rank.ts b/packages/core/src/reviews/rank.ts new file mode 100644 index 0000000..bdb7872 --- /dev/null +++ b/packages/core/src/reviews/rank.ts @@ -0,0 +1,29 @@ +/** + * Ranking export — thin helper around the `mintAggregate` Dexie index. + * + * `bayesianScore` is materialized on every review upsert (see + * reviews/aggregate.ts), and the v3 schema declares a secondary index on + * it, so this query reduces to an index range-scan in reverse + limit — + * no per-row compute at query time, no full-table sort. + * + * Intentionally thin; the `mintAggregate` row is the API surface. + * Downstream join against `mintInfo`, `announcements`, and + * `auditLiveness` (when it ships) happens at the render layer — this + * export is the ranked list of mint `d`s with their aggregates attached. + */ +import type { BitcoinmintsDB, MintAggregateRow } from "../cache"; + +/** + * Return the top-N mint aggregates sorted by `bayesianScore` descending. + * Defaults to 50 per the data-model-v1.md §13 example query; pass + * `limit: Infinity` (or a high number) for the full ranked list. + * + * Ties on `bayesianScore` are broken by Dexie's natural index order on + * the primary key (the `d`), which is deterministic but not + * semantically-meaningful. That's acceptable at the edge — a meaningful + * tiebreak (e.g. by `ratedCount` then by most-recent review) can be + * layered as a JS sort on the returned array if UX wants it. + */ +export async function rankMints(db: BitcoinmintsDB, limit = 50): Promise { + return db.mintAggregate.orderBy("bayesianScore").reverse().limit(limit).toArray(); +} diff --git a/packages/core/src/reviews/upsert.test.ts b/packages/core/src/reviews/upsert.test.ts new file mode 100644 index 0000000..005b097 --- /dev/null +++ b/packages/core/src/reviews/upsert.test.ts @@ -0,0 +1,211 @@ +/** + * Integration tests for upsertReviewWithAggregate — exercises the + * transactional wiring between the review CAS upsert and the aggregate + * materialization. The invariants we care about: + * + * 1. Inserted / replaced reviews → aggregate is recomputed in the same + * transaction so a concurrent read never sees a review without its + * aggregate reflection. + * 2. Rejected-stale / rejected-invalid reviews → aggregate is NOT + * touched (no spurious updatedAt churn). + * 3. CAS semantics on (pubkey, d) are preserved: newer createdAt wins, + * tiebreak on eventId. + * 4. Replace-a-review's-rating flows through to the aggregate correctly. + */ +import { afterEach, describe, expect, it } from "vitest"; +import { BitcoinmintsDB, type ReviewRow } from "../cache"; +import { upsertReviewWithAggregate } from "./upsert"; + +const freshName = () => `test-review-upsert-${Math.random().toString(36).slice(2)}`; +const toDispose: BitcoinmintsDB[] = []; + +afterEach(async () => { + while (toDispose.length > 0) { + const db = toDispose.pop(); + if (!db) continue; + db.close(); + await BitcoinmintsDB.delete(db.name); + } +}); + +async function freshDB(): Promise { + const db = new BitcoinmintsDB(freshName()); + toDispose.push(db); + await db.open(); + return db; +} + +const D_VALID = "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77"; +const D_BOT = "psvef0yh2zk24tt7"; // 16-char legacy/bot-spam shape. + +const EID_LOW = `${"0".repeat(60)}aaaa`; +const EID_HIGH = `${"0".repeat(60)}ffff`; + +function makeReview(over: Partial = {}): ReviewRow { + return { + pubkey: `pk${"0".repeat(60)}1`, + kind: 38000, + d: D_VALID, + eventId: EID_LOW, + createdAt: 1_700_000_000, + content: "", + rawTags: [], + rating: 5, + ...over, + }; +} + +describe("upsertReviewWithAggregate — insert + recompute", () => { + it("first insert populates both reviews and mintAggregate in one transaction", async () => { + const db = await freshDB(); + const row = makeReview({ rating: 5 }); + + const result = await upsertReviewWithAggregate(db, row, () => 1234); + expect(result).toBe("inserted"); + + expect(await db.reviews.count()).toBe(1); + const agg = await db.mintAggregate.get(D_VALID); + expect(agg).toBeDefined(); + expect(agg?.reviewCount).toBe(1); + expect(agg?.ratedCount).toBe(1); + expect(agg?.avgRating).toBe(5); + expect(agg?.bayesianScore).toBeCloseTo(5 * Math.log10(2), 6); + expect(agg?.updatedAt).toBe(1234); + }); + + it("unrated insert still populates aggregate with reviewCount=1, ratedCount=0, avg=null, bayesian=0", async () => { + const db = await freshDB(); + const row = makeReview({ rating: null }); + + const result = await upsertReviewWithAggregate(db, row); + expect(result).toBe("inserted"); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.reviewCount).toBe(1); + expect(agg?.ratedCount).toBe(0); + expect(agg?.avgRating).toBeNull(); + expect(agg?.bayesianScore).toBe(0); + }); + + it("N reviews for same d → aggregate reflects mean across all rated", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ pubkey: "pk-1", rating: 5 })); + await upsertReviewWithAggregate(db, makeReview({ pubkey: "pk-2", rating: 3 })); + await upsertReviewWithAggregate(db, makeReview({ pubkey: "pk-3", rating: 4 })); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.reviewCount).toBe(3); + expect(agg?.ratedCount).toBe(3); + expect(agg?.avgRating).toBe((5 + 3 + 4) / 3); + expect(agg?.bayesianScore).toBeCloseTo(agg!.avgRating! * Math.log10(4), 6); + }); +}); + +describe("upsertReviewWithAggregate — CAS + aggregate-stays-in-sync", () => { + it("replace on newer createdAt → aggregate reflects the NEW rating", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 1000, rating: 1 })); + const before = await db.mintAggregate.get(D_VALID); + expect(before?.avgRating).toBe(1); + + const result = await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: 5 })); + expect(result).toBe("replaced"); + + const after = await db.mintAggregate.get(D_VALID); + expect(after?.avgRating).toBe(5); + expect(after?.reviewCount).toBe(1); // still just the one reviewer + expect(after?.ratedCount).toBe(1); + }); + + it("reject older → aggregate is NOT rewritten (updatedAt stays put)", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: 5 }), () => 1000); + const before = await db.mintAggregate.get(D_VALID); + expect(before?.updatedAt).toBe(1000); + + const result = await upsertReviewWithAggregate( + db, + makeReview({ createdAt: 1000, rating: 1 }), + () => 9999, + ); + expect(result).toBe("rejected-stale"); + + const after = await db.mintAggregate.get(D_VALID); + expect(after?.updatedAt).toBe(1000); // not touched + expect(after?.avgRating).toBe(5); + }); + + it("tiebreak on eventId: same createdAt, higher eventId wins, aggregate reflects new rating", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ eventId: EID_LOW, rating: 1 })); + const result = await upsertReviewWithAggregate( + db, + makeReview({ eventId: EID_HIGH, rating: 5 }), + ); + expect(result).toBe("replaced"); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.avgRating).toBe(5); + }); + + it("tiebreak rejects lower eventId: aggregate NOT updated", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ eventId: EID_HIGH, rating: 5 }), () => 1000); + const result = await upsertReviewWithAggregate( + db, + makeReview({ eventId: EID_LOW, rating: 1 }), + () => 9999, + ); + expect(result).toBe("rejected-stale"); + + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.updatedAt).toBe(1000); + expect(agg?.avgRating).toBe(5); + }); + + it("replacing a rated review with an unrated one → aggregate flips to avg=null", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 1000, rating: 5 })); + expect((await db.mintAggregate.get(D_VALID))?.avgRating).toBe(5); + + await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: null })); + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.reviewCount).toBe(1); + expect(agg?.ratedCount).toBe(0); + expect(agg?.avgRating).toBeNull(); + expect(agg?.bayesianScore).toBe(0); + }); + + it("replacing one unrated review with a rated one → aggregate picks up the new rating", async () => { + const db = await freshDB(); + await upsertReviewWithAggregate(db, makeReview({ createdAt: 1000, rating: null })); + expect((await db.mintAggregate.get(D_VALID))?.avgRating).toBeNull(); + + await upsertReviewWithAggregate(db, makeReview({ createdAt: 2000, rating: 4 })); + const agg = await db.mintAggregate.get(D_VALID); + expect(agg?.ratedCount).toBe(1); + expect(agg?.avgRating).toBe(4); + }); +}); + +describe("upsertReviewWithAggregate — Layer A gate", () => { + it("16-char bot-spam d-tag → rejected-invalid, no review row, no aggregate row", async () => { + const db = await freshDB(); + const result = await upsertReviewWithAggregate(db, makeReview({ d: D_BOT })); + expect(result).toBe("rejected-invalid"); + expect(await db.reviews.count()).toBe(0); + expect(await db.mintAggregate.count()).toBe(0); + }); + + it("Fedimint k=38173 review with non-regex d bypasses the gate", async () => { + const db = await freshDB(); + // A federation ID isn't constrained by the Cashu-mint-pubkey regex. + const fediRow = makeReview({ + d: "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af", + k: 38173, + }); + const result = await upsertReviewWithAggregate(db, fediRow); + expect(result).toBe("inserted"); + expect(await db.reviews.count()).toBe(1); + }); +}); diff --git a/packages/core/src/reviews/upsert.ts b/packages/core/src/reviews/upsert.ts new file mode 100644 index 0000000..e8212f9 --- /dev/null +++ b/packages/core/src/reviews/upsert.ts @@ -0,0 +1,39 @@ +/** + * Transactional wrapper that composes `cache.upsertReview` with + * `recomputeAggregateInTx` inside a single Dexie `rw` transaction so the + * `reviews` table and the `mintAggregate` materialization never drift. + * + * The low-level `cache.upsertReview` already opens a transaction on just + * `db.reviews`. Opening a transaction that includes BOTH `db.reviews` and + * `db.mintAggregate` at this layer means the nested call inside + * `cache.upsertReview` is transparently adopted by Dexie's scope + * inheritance (zone-tracked) — no SubTransactionError. On the accept + * branches (`inserted` or `replaced`) we recompute; on reject branches + * (`rejected-stale`, `rejected-invalid`) the DB state didn't change, so + * the aggregate is already correct — we skip the recompute. + */ +import { type BitcoinmintsDB, type ReviewRow, type UpsertResult, upsertReview } from "../cache"; +import { recomputeAggregateInTx } from "./aggregate"; + +/** + * Upsert a review and, if the review changed DB state (inserted or + * replaced), recompute the `mintAggregate` row for that review's `d`. + * Returns the `UpsertResult` from the underlying review write — this lets + * callers distinguish "wrote a new row" from "lost the CAS race". + * + * `now` is the recompute clock (threaded into `recomputeAggregateInTx`); + * defaults to `Date.now`. + */ +export async function upsertReviewWithAggregate( + db: BitcoinmintsDB, + row: ReviewRow, + now: () => number = Date.now, +): Promise { + return db.transaction("rw", db.reviews, db.mintAggregate, async () => { + const result = await upsertReview(db, row); + if (result === "inserted" || result === "replaced") { + await recomputeAggregateInTx(db, row.d, now); + } + return result; + }); +} diff --git a/packages/core/src/scheduler/index.ts b/packages/core/src/scheduler/index.ts index 8486b56..b5ba694 100644 --- a/packages/core/src/scheduler/index.ts +++ b/packages/core/src/scheduler/index.ts @@ -70,22 +70,17 @@ import { type BitcoinmintsDB, type ProfileRow, type RelayListRow, - type ReviewRow, upsertAnnouncement, upsertMintInfo, upsertProfile, upsertRelayList, - upsertReview, } from "../cache"; import type { MintInfoFetcher } from "../cashu/info"; import { type LayerBResult, verifySignerBinding } from "../cashu/layerB"; -import { - type MintAnnouncement, - type MintRecommendation, - parseMintAnnouncement, - parseRecommendation, -} from "../nip87"; +import { type MintAnnouncement, parseMintAnnouncement } from "../nip87"; import type { Pool, PoolHandle } from "../nostr"; +import { parseReview } from "../reviews/parse"; +import { upsertReviewWithAggregate } from "../reviews/upsert"; /** Observable counters surfaced via getStats() — for the UI in PR #6+. */ export type SchedulerStats = { @@ -206,21 +201,6 @@ function toAnnouncementRow(parsed: MintAnnouncement): AnnouncementRow { return row; } -function toReviewRow(parsed: MintRecommendation): ReviewRow { - const row: ReviewRow = { - pubkey: parsed.pubkey, - kind: 38000, - d: parsed.d, - eventId: parsed.eventId, - createdAt: parsed.createdAt, - content: parsed.content, - rawTags: parsed.raw.tags, - }; - if (parsed.k !== undefined) row.k = parsed.k; - if (parsed.rating !== undefined) row.rating = parsed.rating; - return row; -} - /** Best-effort kind:0 parse. JSON content with name/picture/etc. */ function toProfileRow(event: NostrEvent): ProfileRow | null { if (event.kind !== 0) return null; @@ -606,13 +586,21 @@ export function createScheduler(config: SchedulerConfig): Scheduler { return; } case 38000: { - const parsed = parseRecommendation(event); - if (!parsed) return; - const row = toReviewRow(parsed); - const result = await upsertReview(db, row); + // PR #5: parse via reviews/parseReview (all 4 rating formats + + // null fallback) and route through the aggregate-materializing + // upsert wrapper so the mintAggregate row stays in sync inside + // the same Dexie transaction as the review write. + const row = parseReview(event); + if (!row) return; + const result = await upsertReviewWithAggregate(db, row, now); if (result === "inserted" || result === "replaced") { stats.accepted += 1; updateWatermark(event.kind, event.created_at); + } else if (result === "rejected-invalid") { + // Layer A gate on reviews: pointing at a bot-spam d-tag. Count + // under the same stats bucket as the announcement Layer A + // rejection — it's the same firewall. + stats.rejectedByLayerA += 1; } return; } From 19b46ca9fcc5db20fc2561bb69c224082158665e Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:34:19 -0700 Subject: [PATCH 2/8] fix(core/cache): narrow k=38173 d-tag shape check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior free-pass on kind=38173 let any short/junk d-tag land in Dexie as long as the `k` tag claimed Fedimint. Every real federation ID in the audit corpus (fedimint-observer.md + reviews/corpus.test.ts) is lowercase 64-char hex — bot spam is not. Add isValidFedimintDTag as a sibling shape gate so Layer A catches the Fedimint branch at the same choke point as Cashu. Apply in both upsertAnnouncement and upsertReview. Co-Authored-By: Claude Opus 4.7 --- packages/core/src/cache/upsert.ts | 42 +++++++++++++++-------- packages/core/src/nip87/dtag.ts | 21 ++++++++++++ packages/core/src/nip87/index.ts | 7 +++- packages/core/src/scheduler/index.test.ts | 7 +++- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/packages/core/src/cache/upsert.ts b/packages/core/src/cache/upsert.ts index 44534e8..ff50a2e 100644 --- a/packages/core/src/cache/upsert.ts +++ b/packages/core/src/cache/upsert.ts @@ -15,14 +15,16 @@ * Layer A gate for kind:38172: before writing an announcement we check * isValidCashuDTag(d). Invalid shapes (bot spam, non-hex garbage) are * returned as "rejected-invalid" and never hit the DB. kind:38173 - * (Fedimint) bypasses Layer A — federation IDs have a different shape - * and their validator is a TODO-v1.1 concern. + * (Fedimint) uses a sibling shape gate (isValidFedimintDTag) — every real + * federation ID in the audit corpus is 64-char lowercase hex, so short / + * junk d-tags with `["k","38173"]` are still filtered at the same choke + * point as Cashu bot spam. * * mintInfo and mintAggregate aren't event-based, so their CAS predicate * is a monotonically-increasing timestamp: `fetchedAt` for mintInfo, * `updatedAt` for mintAggregate. */ -import { isValidCashuDTag } from "../nip87/dtag"; +import { isValidCashuDTag, isValidFedimintDTag } from "../nip87/dtag"; import type { AnnouncementRow, BitcoinmintsDB, @@ -56,14 +58,19 @@ function nextWins( return next.eventId > prev.eventId; } -/** Upsert a kind:38172 or kind:38173 announcement with Layer A gating on 38172. */ +/** Upsert a kind:38172 or kind:38173 announcement with Layer A gating on both kinds. */ export async function upsertAnnouncement( db: BitcoinmintsDB, row: AnnouncementRow, ): Promise { - // Layer A gate — reject invalid Cashu d-tag shapes before touching the DB. - // Fedimint (38173) bypasses: federation-id shape is TODO-v1.1. - if (row.kind === 38172 && !isValidCashuDTag(row.d)) { + // Layer A gate — reject invalid d-tag shapes before touching the DB. + // Cashu (38172) requires a 64- or 66-char secp256k1 pubkey shape; + // Fedimint (38173) requires a 64-char lowercase hex federation-id shape. + // A short/junk d-tag with `k=38173` slapped on is still bot spam and + // must be caught by the same firewall — don't free-pass by kind alone. + if (row.kind === 38173) { + if (!isValidFedimintDTag(row.d)) return "rejected-invalid"; + } else if (!isValidCashuDTag(row.d)) { return "rejected-invalid"; } @@ -99,9 +106,10 @@ export async function upsertAnnouncement( * keeps the 959 zero-d-tag bot spam events (per relay-strategy §4) from * filtering up into the ranking aggregate. * - * `k === 38173` (Fedimint) bypasses the gate — federation IDs have a - * different shape and their validator is TODO-v1.1, identical to the - * announcement upsert. + * `k === 38173` (Fedimint) switches to the sibling `isValidFedimintDTag` + * shape gate — every real federation ID in the audit corpus is lowercase + * 64-char hex, so a short / junk d-tag with `k=38173` attached is still + * bot spam and must be caught by the same firewall. * * Note: this low-level upsert is the mechanical write. It does NOT * materialize the `mintAggregate` row — the `reviews/` wrapper composes @@ -111,11 +119,15 @@ export async function upsertAnnouncement( * aggregate materialization — safe but stale. */ export async function upsertReview(db: BitcoinmintsDB, row: ReviewRow): Promise { - // Layer A gate for Cashu-pointing reviews. Fedimint (k=38173) bypasses. - // No `k` tag → treat as Cashu (the default for in-the-wild events per - // rating-tag-research §3). - const isFedimint = row.k === 38173; - if (!isFedimint && !isValidCashuDTag(row.d)) { + // Layer A gate — reject invalid d-tag shapes before touching the DB. + // Reviews point at a target mint via `d`; the pointer-kind `k` selects + // which shape gate applies. No `k` tag → treat as Cashu (the default + // for in-the-wild events per rating-tag-research §3). Fedimint rows + // still get a sibling shape check (64-char hex federation id) so junk + // d-tags with `k=38173` slapped on don't free-pass the firewall. + if (row.k === 38173) { + if (!isValidFedimintDTag(row.d)) return "rejected-invalid"; + } else if (!isValidCashuDTag(row.d)) { return "rejected-invalid"; } return db.transaction("rw", db.reviews, async () => { diff --git a/packages/core/src/nip87/dtag.ts b/packages/core/src/nip87/dtag.ts index 729d735..9b84b32 100644 --- a/packages/core/src/nip87/dtag.ts +++ b/packages/core/src/nip87/dtag.ts @@ -28,6 +28,20 @@ // Bot spam (16-char random d-tags) rejected by both branches. export const D_TAG_REGEX = /^([0-9a-f]{64}|0[23][0-9a-f]{64})$/; +/** + * Fedimint federation-id d-tag shape. Every real Fedimint federation ID + * observed in the audit corpus (see `audit/fedimint-observer.md` and + * `packages/core/src/reviews/corpus.test.ts`) is lowercase 64-char hex — + * the blake3 hash of the federation's consensus public key, serialized as + * 32 bytes of hex. A short/junk d-tag with `k=38173` slapped on is bot + * spam, not a federation, and must be rejected by the same Layer A + * firewall that catches 16-char Cashu bot spam. + * + * Keeping this sibling to `D_TAG_REGEX` so both Layer A shape gates live + * in one file — a reviewer touching one will see the other immediately. + */ +export const FEDIMINT_D_TAG_REGEX = /^[0-9a-f]{64}$/; + /** * True iff `d` is either a 64-char x-only secp256k1 pubkey or a 66-char * compressed secp256k1 pubkey, both lowercase hex. @@ -35,3 +49,10 @@ export const D_TAG_REGEX = /^([0-9a-f]{64}|0[23][0-9a-f]{64})$/; export function isValidCashuDTag(d: string): boolean { return D_TAG_REGEX.test(d); } + +/** + * True iff `d` is a lowercase 64-char hex Fedimint federation ID. + */ +export function isValidFedimintDTag(d: string): boolean { + return FEDIMINT_D_TAG_REGEX.test(d); +} diff --git a/packages/core/src/nip87/index.ts b/packages/core/src/nip87/index.ts index eb58113..b775c0d 100644 --- a/packages/core/src/nip87/index.ts +++ b/packages/core/src/nip87/index.ts @@ -1,4 +1,9 @@ -export { D_TAG_REGEX, isValidCashuDTag } from "./dtag"; +export { + D_TAG_REGEX, + FEDIMINT_D_TAG_REGEX, + isValidCashuDTag, + isValidFedimintDTag, +} from "./dtag"; export { parseMintAnnouncement, parseRecommendation } from "./parse"; export type { MintAnnouncement, diff --git a/packages/core/src/scheduler/index.test.ts b/packages/core/src/scheduler/index.test.ts index e98a0a5..518f335 100644 --- a/packages/core/src/scheduler/index.test.ts +++ b/packages/core/src/scheduler/index.test.ts @@ -223,13 +223,18 @@ describe("scheduler — pipeline (single event)", () => { await sched.start(); const fedPubkey = "fedopk".padEnd(64, "0"); + // Use a realistic 64-char hex federation id — every real federation + // observed in the audit corpus (see `fedimint-observer.md` and + // `reviews/corpus.test.ts`) is lowercase 64-char hex. Short/junk + // d-tags get rejected by Layer A even when k=38173. + const fedId = "718e421be177486639330d198e870b7345ebd07b2866b5fd3797d73e4bc4c9af"; await pushEvent({ id: "fed-1", kind: 38173, pubkey: fedPubkey, created_at: 1_700_000_000, tags: [ - ["d", "fed11abc"], + ["d", fedId], ["u", "fed11abc..."], ], content: "", From f8b385b62a3071b78e08d586fe04638b4e239370 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:35:33 -0700 Subject: [PATCH 3/8] fix(core/scheduler): wrap onEvent cases in try/catch and count handler errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior code relied on an outdated claim (now-removed doc comment) that each kind's handler did its own try/catch — it didn't. A thrown Dexie transaction (QuotaExceeded, schema collision, unexpected disk state) in the 38000/38172/38173/0/10002 branch would escape as an unhandled rejection at the `void onEvent(event)` subscription boundary and stats would silently freeze at last-good while ingest continued to look healthy. Wrap each case body so branch-local exceptions count into a new `stats.handlerErrors` counter and log with a stable `[scheduler]` prefix. Also add `stats.rejectedByParse` for the kind:38000 branch so `parseReview`-null drops are observable rather than silent. Co-Authored-By: Claude Opus 4.7 --- packages/core/src/scheduler/index.ts | 167 +++++++++++++++++++-------- 1 file changed, 121 insertions(+), 46 deletions(-) diff --git a/packages/core/src/scheduler/index.ts b/packages/core/src/scheduler/index.ts index b5ba694..2f8bb62 100644 --- a/packages/core/src/scheduler/index.ts +++ b/packages/core/src/scheduler/index.ts @@ -87,9 +87,26 @@ export type SchedulerStats = { eventsReceived: number; accepted: number; rejectedByLayerA: number; + /** + * kind:38000 reviews that were dropped because `parseReview` returned + * `null` — either the `d` tag was missing/empty or the event was + * unexpectedly not kind:38000. Counted separately from `rejectedByLayerA` + * because it's a parser-level reject (malformed event) rather than a + * shape-gate reject (valid event pointing at bot-spam). + */ + rejectedByParse: number; layerBPending: number; layerBVerified: number; layerBFailed: number; + /** + * Count of exceptions thrown out of the per-event handler after we've + * started processing. A thrown Dexie transaction (QuotaExceeded, schema + * collision, unexpected disk state) or any other unhandled error in a + * kind-specific branch bumps this counter — without it, the error would + * become an unhandled promise rejection and the stats would silently + * freeze at last-good while ingest continued to look healthy. + */ + handlerErrors: number; }; export type Scheduler = { @@ -257,9 +274,11 @@ export function createScheduler(config: SchedulerConfig): Scheduler { eventsReceived: 0, accepted: 0, rejectedByLayerA: 0, + rejectedByParse: 0, layerBPending: 0, layerBVerified: 0, layerBFailed: 0, + handlerErrors: 0, }; // Tracks (kind -> highest createdAt seen). Used to compute the `since` @@ -558,7 +577,20 @@ export function createScheduler(config: SchedulerConfig): Scheduler { inflight.add(work); } - /** Per-event handler — single funnel for all kinds. */ + /** + * Per-event handler — single funnel for all kinds. + * + * Each case body is wrapped in its own try/catch so a thrown Dexie + * transaction (QuotaExceeded, schema collision, unexpected disk state) + * or any other branch-local exception gets counted into + * `stats.handlerErrors` and logged with a stable prefix. Without the + * wrappers, the rejection would escape the `void onEvent(event)` call + * at the subscription boundary and stats would silently freeze at + * last-good while ingest continued to look healthy (silent-failure + * gap). Log surface matches `reenqueueUnverified`'s existing pattern: + * a `[scheduler]`-prefixed console call, no structured logger is wired + * through the package yet. + */ async function onEvent(event: NostrEvent): Promise { if (stopped) return; stats.eventsReceived += 1; @@ -566,61 +598,103 @@ export function createScheduler(config: SchedulerConfig): Scheduler { switch (event.kind) { case 38172: case 38173: { - const parsed = parseMintAnnouncement(event); - if (!parsed) return; - const row = toAnnouncementRow(parsed); - const result = await upsertAnnouncement(db, row); - if (result === "rejected-invalid") { - stats.rejectedByLayerA += 1; - } - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); - // Layer B only runs on Cashu — verifySignerBinding will short- - // circuit non-cashu, but skipping the enqueue avoids the - // bookkeeping noise. - if (event.kind === 38172) { - enqueueLayerB(row); + try { + const parsed = parseMintAnnouncement(event); + if (!parsed) return; + const row = toAnnouncementRow(parsed); + const result = await upsertAnnouncement(db, row); + if (result === "rejected-invalid") { + stats.rejectedByLayerA += 1; } + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + // Layer B only runs on Cashu — verifySignerBinding will short- + // circuit non-cashu, but skipping the enqueue avoids the + // bookkeeping noise. + if (event.kind === 38172) { + enqueueLayerB(row); + } + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } case 38000: { - // PR #5: parse via reviews/parseReview (all 4 rating formats + - // null fallback) and route through the aggregate-materializing - // upsert wrapper so the mintAggregate row stays in sync inside - // the same Dexie transaction as the review write. - const row = parseReview(event); - if (!row) return; - const result = await upsertReviewWithAggregate(db, row, now); - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); - } else if (result === "rejected-invalid") { - // Layer A gate on reviews: pointing at a bot-spam d-tag. Count - // under the same stats bucket as the announcement Layer A - // rejection — it's the same firewall. - stats.rejectedByLayerA += 1; + try { + // PR #5: parse via reviews/parseReview (all 4 rating formats + + // null fallback) and route through the aggregate-materializing + // upsert wrapper so the mintAggregate row stays in sync inside + // the same Dexie transaction as the review write. + const row = parseReview(event); + if (!row) { + // parseReview returns null for missing/empty `d` or wrong kind + // — neither should reach here in a healthy pipeline but both + // are silent drops worth counting (silent-failure gap). + stats.rejectedByParse += 1; + return; + } + const result = await upsertReviewWithAggregate(db, row, now); + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + } else if (result === "rejected-invalid") { + // Layer A gate on reviews: pointing at a bot-spam d-tag. Count + // under the same stats bucket as the announcement Layer A + // rejection — it's the same firewall. + stats.rejectedByLayerA += 1; + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } case 0: { - const row = toProfileRow(event); - if (!row) return; - const result = await upsertProfile(db, row); - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); + try { + const row = toProfileRow(event); + if (!row) return; + const result = await upsertProfile(db, row); + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } case 10002: { - const row = toRelayListRow(event); - if (!row) return; - const result = await upsertRelayList(db, row); - if (result === "inserted" || result === "replaced") { - stats.accepted += 1; - updateWatermark(event.kind, event.created_at); + try { + const row = toRelayListRow(event); + if (!row) return; + const result = await upsertRelayList(db, row); + if (result === "inserted" || result === "replaced") { + stats.accepted += 1; + updateWatermark(event.kind, event.created_at); + } + } catch (err) { + stats.handlerErrors += 1; + console.error("[scheduler] handler error", { + kind: event.kind, + eventId: event.id, + err, + }); } return; } @@ -675,9 +749,10 @@ export function createScheduler(config: SchedulerConfig): Scheduler { filters, onEvent: (event) => { // onEvent returns a promise; we don't await here because the - // pool callback contract is sync. Errors inside the handler - // are swallowed at this boundary (each kind's handler does - // its own try-catch around DB writes via Dexie's transaction). + // pool callback contract is sync. Each kind's case body wraps + // its own try/catch that counts into stats.handlerErrors, so + // a thrown Dexie transaction can't escape as an unhandled + // rejection or silently freeze the stats. void onEvent(event); }, closeOnEose: false, From 6c57602f2116b6937b944940988bb45bdb807f49 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:36:38 -0700 Subject: [PATCH 4/8] =?UTF-8?q?fix(core/cache):=20clear=20mintAggregate=20?= =?UTF-8?q?on=20v2=20=E2=86=92=20v3=20upgrade?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dexie auto-migrates the v3 schema indexes (`bayesianScore`, `avgRating` replacing `bayesianRank`), but does NOT transform row PAYLOADS. A dev with a v2 IndexedDB would have rows shaped `{d, averageRating, bayesianRank, updatedAt}` — field names renamed in v3 — which fail every v3 query shape. No prod data yet and the aggregate re-derives from live review traffic, so the correct migration is a clean wipe. Co-Authored-By: Claude Opus 4.7 --- packages/core/src/cache/schema.test.ts | 50 ++++++++++++++++++++++++++ packages/core/src/cache/schema.ts | 27 +++++++++----- 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/packages/core/src/cache/schema.test.ts b/packages/core/src/cache/schema.test.ts index b8929d8..0a83d53 100644 --- a/packages/core/src/cache/schema.test.ts +++ b/packages/core/src/cache/schema.test.ts @@ -1,3 +1,4 @@ +import Dexie from "dexie"; import { afterEach, describe, expect, it } from "vitest"; import { BitcoinmintsDB } from "./schema"; @@ -93,4 +94,53 @@ describe("BitcoinmintsDB schema", () => { expect(await db.mintInfo.count()).toBe(0); expect(await db.mintAggregate.count()).toBe(0); }); + + it("v2 → v3 upgrade clears the mintAggregate table", async () => { + // A dev who opened the app at v2 has rows shaped + // `{d, averageRating, bayesianRank, updatedAt}` — the `averageRating` + // field renamed to `avgRating` in v3 and `bayesianRank` was dropped, + // so without a migration hook those rows fail every v3 query shape + // (the indexes point at fields the row doesn't have). The v3 upgrade + // wipes the table and lets it repopulate from live review traffic. + const name = freshName(); + // Open a separate Dexie handle declaring only the first two schema + // versions so we can seed a v2-shape row before the BitcoinmintsDB + // handle (which declares v3 and its upgrade hook) ever touches it. + const v2 = new Dexie(name); + v2.version(1).stores({ + announcements: "[pubkey+kind+d], eventId, kind, d, createdAt", + reviews: "[pubkey+kind+d], eventId, d, createdAt, k", + profiles: "pubkey, createdAt", + relayLists: "pubkey, createdAt", + mintInfo: "d, fetchedAt, ok", + mintAggregate: "d, bayesianRank, updatedAt", + }); + v2.version(2).stores({ + announcements: "[pubkey+kind+d], eventId, kind, d, createdAt, [kind+createdAt]", + }); + await v2.open(); + // Seed a v2-shape row — the pre-rename payload. + await v2.table("mintAggregate").put({ + d: "5fe928ae0970844f3c5253d2e85a88788486edcbd96c070334a4a2d0d0154a77", + averageRating: 4, + bayesianRank: 4 * Math.log10(11), + updatedAt: 1_700_000_000, + }); + expect(await v2.table("mintAggregate").count()).toBe(1); + v2.close(); + + // Reopen via BitcoinmintsDB (declares v3 + upgrade hook) — the + // upgrade callback should clear the mintAggregate table. + const v3 = new BitcoinmintsDB(name); + toDispose.push(v3); + await v3.open(); + expect(v3.verno).toBe(3); + expect(await v3.mintAggregate.count()).toBe(0); + // And the v3 indexes are queryable — a live review upsert would + // repopulate via these. + const byScore = await v3.mintAggregate.orderBy("bayesianScore").toArray(); + expect(byScore).toEqual([]); + const byAvg = await v3.mintAggregate.orderBy("avgRating").toArray(); + expect(byAvg).toEqual([]); + }); }); diff --git a/packages/core/src/cache/schema.ts b/packages/core/src/cache/schema.ts index 2445fa6..47292a9 100644 --- a/packages/core/src/cache/schema.ts +++ b/packages/core/src/cache/schema.ts @@ -194,14 +194,23 @@ export class BitcoinmintsDB extends Dexie { // v3: rename `bayesianRank` → `bayesianScore` on mintAggregate and add // `avgRating` to the index set so sort-by-avg queries don't need a full // table scan. This is the indexes materialized in PR #5's ranking - // aggregator. The prior `bayesianRank` index is dropped — rows written - // under v1/v2 (there are none shipped in production yet) would simply - // not be queryable by that old name. Any pre-existing rows in local - // dev caches are re-keyed by Dexie's additive migration; the shape - // change from `averageRating` to `avgRating` is a TypeScript-layer - // concern (Dexie doesn't type-check row payloads). - this.version(3).stores({ - mintAggregate: "d, bayesianScore, avgRating, updatedAt", - }); + // aggregator. The prior `bayesianRank` index is dropped. + // + // Upgrade semantics: Dexie auto-migrates the SCHEMA (indexes) but does + // NOT transform existing row PAYLOADS. A dev with a local v2 IndexedDB + // would otherwise have rows shaped `{d, averageRating, bayesianRank, + // updatedAt}` — the `averageRating` field is `avgRating` in v3 and + // `bayesianRank` doesn't exist — which would fail every v3 query shape + // (the indexes point at fields the row doesn't have). Since there's no + // prod data yet and the aggregate is re-derived from reviews on the + // next review upsert, a clean wipe is the correct migration: clear + // `mintAggregate`, let it repopulate from live review traffic. + this.version(3) + .stores({ + mintAggregate: "d, bayesianScore, avgRating, updatedAt", + }) + .upgrade(async (tx) => { + await tx.table("mintAggregate").clear(); + }); } } From 1a9aa49fa96983fde35ed1dcd5880e1fbab8ef70 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:37:09 -0700 Subject: [PATCH 5/8] fix(core/reviews): explicit precedence fallthrough in parseRatingFromContent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The N/5 and N/10 content blocks previously didn't return when they matched but were out of range — the function silently fell through to the emoji regex. Behaviour was correct-by-accident: `0/10 total trash` would yield no-rating because there were no leading stars after, but `7/5 ⭐⭐⭐⭐⭐` would latch onto the emoji run and report 5★ despite the numeric prefix saying otherwise. Make the precedence explicit — when an N/5 or N/10 prefix MATCHES the content, we commit to that format and return (even when the parsed number is out of range, in which case we return undefined so the null fallback in parseReview kicks in). Add a header comment naming the rule so future readers don't re-introduce the silent fall-through. Co-Authored-By: Claude Opus 4.7 --- packages/core/src/reviews/parse.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/packages/core/src/reviews/parse.ts b/packages/core/src/reviews/parse.ts index 111b7cf..919ebf9 100644 --- a/packages/core/src/reviews/parse.ts +++ b/packages/core/src/reviews/parse.ts @@ -122,18 +122,30 @@ function countLeadingStars(match: string): number { * Extract rating from content using the 3rd and 4th precedence rules. * Format 3 (N/5 and N/10) wins over format 4 (emoji) — a content starting * with `[4/5] ⭐⭐⭐⭐⭐` parses as 4, not 5. + * + * Precedence is explicit, not emergent: when an N/5 or N/10 prefix matches + * the content but the parsed number is out of range (e.g. `0/10`, `7/5`), + * we return `undefined` rather than falling through to the emoji format. + * The reasoning: the author signalled "this review uses the numeric + * format" by leading with it — silently reading emoji that might follow + * would misrepresent their intent and reward malformed input. Out-of-range + * numeric prefixes collapse to "no rating" via the parseReview `?? null` + * fallback. */ function parseRatingFromContent(content: string): number | undefined { // Format 3a: N/5 anchored at start. + // precedence: this format consumed → return (even when out of range) const fiveMatch = content.match(CONTENT_FIVE_REGEX); if (fiveMatch?.[1]) { const r = toRating(fiveMatch[1]); if (r !== undefined) return r; + return undefined; } // Format 3b: N/10 anchored at start — divide by 2, round to nearest, // clamp into 1..5. We round-to-nearest (not floor) so `5/10` → 3 and // `7/10` → 4 rather than both flooring to 3. An `N` outside 0..10 is // treated as missing. + // precedence: this format consumed → return (even when out of range) const tenMatch = content.match(CONTENT_TEN_REGEX); if (tenMatch?.[1]) { const n = Number.parseInt(tenMatch[1], 10); @@ -143,6 +155,7 @@ function parseRatingFromContent(content: string): number | undefined { // than lying about a 1-star review. if (scaled >= MIN_RATING && scaled <= MAX_RATING) return scaled; } + return undefined; } // Format 4: leading 1..5 emoji run. const emojiMatch = content.match(CONTENT_EMOJI_REGEX); From a7d9a4531fac4fcb625674e3ed56ec5c11574555 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:38:24 -0700 Subject: [PATCH 6/8] test(core/reviews): concurrent CAS race, malformed tags, rankMints bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - reviews/upsert.test.ts: two concurrent upserts for the same d with distinct pubkeys converge — both reviews land AND the aggregate sees both (5 shuffled trials). Mirrors the announcement-side regression in cache/upsert.test.ts and guards the aggregate-in-same-transaction invariant against a pre-write-snapshot recompute. - reviews/parse.test.ts: table-driven coverage for malformed rating tag shapes (non-numeric, empty, missing, null-as-string). All fall through to `rating: null`. - reviews/rank.test.ts: `limit=0` returns [], `limit=Infinity` returns every mint in score-descending order — covers the boundary conditions the Dexie limit() clamp handles. Co-Authored-By: Claude Opus 4.7 --- packages/core/src/reviews/parse.test.ts | 27 ++++++++++++++++ packages/core/src/reviews/rank.test.ts | 25 +++++++++++++++ packages/core/src/reviews/upsert.test.ts | 39 ++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/packages/core/src/reviews/parse.test.ts b/packages/core/src/reviews/parse.test.ts index ba767da..cf06ce1 100644 --- a/packages/core/src/reviews/parse.test.ts +++ b/packages/core/src/reviews/parse.test.ts @@ -285,6 +285,33 @@ describe("parseReview — rating formats (precedence)", () => { }); }); +describe("parseReview — malformed rating tag forms", () => { + it("malformed rating tag forms fall through to null (no content fallback)", () => { + // Each of these shapes is "structurally a rating tag" but the value + // payload is unusable — either not a number, empty string, missing, + // or `null`-as-string from a buggy emitter. None should parse to a + // rating, and without a content rating signal all should land at null. + const cases: string[][] = [ + ["rating", "foo", "5"], + ["rating", ""], + ["rating"], + ["rating", "", "5"], + // `null` coerced to a string via a buggy JSON emitter. The parser + // guards `typeof t[1] !== "string"` which catches the raw-null + // form; including it defensively in case a relay rewrites null + // into the literal string "null". + ["rating", null as unknown as string, "5"], + ]; + for (const tag of cases) { + const row = parseReview( + makeEvent({ tags: [["d", D_VALID], tag as string[]] }), + ); + expect(row).not.toBeNull(); + expect(row?.rating).toBeNull(); + } + }); +}); + describe("parseReview — null fallback", () => { it("returns rating: null when no rating tag and no content signal", () => { const row = parseReview( diff --git a/packages/core/src/reviews/rank.test.ts b/packages/core/src/reviews/rank.test.ts index a047315..af455cf 100644 --- a/packages/core/src/reviews/rank.test.ts +++ b/packages/core/src/reviews/rank.test.ts @@ -145,3 +145,28 @@ describe("rankMints — limit", () => { expect(top3[1]!.bayesianScore).toBeGreaterThan(top3[2]!.bayesianScore); }); }); + +describe("rankMints — limit bounds", () => { + it("limit=0 returns an empty array, no throw", async () => { + const db = await freshDB(); + await seedMint(db, dForIndex(60), 5, 3); + await seedMint(db, dForIndex(61), 4, 2); + expect(await rankMints(db, 0)).toEqual([]); + }); + + it("limit=Infinity returns every mint in score-descending order", async () => { + // Dexie's .limit() accepts Number.POSITIVE_INFINITY and clamps to the + // full result set (verified empirically in fake-indexeddb via this + // test). If this assertion ever breaks, swap in a high finite limit. + const db = await freshDB(); + await seedMint(db, dForIndex(70), 5, 10); // 5 * log10(11) ≈ 5.21 + await seedMint(db, dForIndex(71), 4, 3); // 4 * log10(4) ≈ 2.41 + await seedMint(db, dForIndex(72), 5, 1); // 5 * log10(2) ≈ 1.505 + const ranked = await rankMints(db, Number.POSITIVE_INFINITY); + expect(ranked).toHaveLength(3); + expect(ranked.map((r) => r.d)).toEqual([dForIndex(70), dForIndex(71), dForIndex(72)]); + // Strictly non-increasing. + expect(ranked[0]!.bayesianScore).toBeGreaterThan(ranked[1]!.bayesianScore); + expect(ranked[1]!.bayesianScore).toBeGreaterThan(ranked[2]!.bayesianScore); + }); +}); diff --git a/packages/core/src/reviews/upsert.test.ts b/packages/core/src/reviews/upsert.test.ts index 005b097..45dfeeb 100644 --- a/packages/core/src/reviews/upsert.test.ts +++ b/packages/core/src/reviews/upsert.test.ts @@ -188,6 +188,45 @@ describe("upsertReviewWithAggregate — CAS + aggregate-stays-in-sync", () => { }); }); +describe("upsertReviewWithAggregate — concurrent CAS + aggregate race", () => { + it("two concurrent upserts for the same d (different pubkeys) converge — aggregate reflects BOTH reviews, 5 trials", async () => { + // Regression for the race where the review CAS upsert and the aggregate + // recompute are in the same transaction: if both concurrent upserts + // read the reviews table before either writes, the recompute would + // see only one review and the aggregate would drop to reviewCount=1. + // Dexie serializes rw-rw transactions on the same tables, so the + // correct outcome is both reviews land AND the aggregate sees both. + // Mirrors the announcement-side regression in cache/upsert.test.ts (~L288). + const pkA = `pk-a${"0".repeat(60)}`; + const pkB = `pk-b${"0".repeat(60)}`; + const reviewA = makeReview({ pubkey: pkA, eventId: EID_LOW, rating: 5 }); + const reviewB = makeReview({ pubkey: pkB, eventId: EID_HIGH, rating: 1 }); + + for (let trial = 0; trial < 5; trial++) { + const db = await freshDB(); + const ops = + trial % 2 === 0 + ? [upsertReviewWithAggregate(db, reviewA), upsertReviewWithAggregate(db, reviewB)] + : [upsertReviewWithAggregate(db, reviewB), upsertReviewWithAggregate(db, reviewA)]; + const results = await Promise.all(ops); + + // Both reviews land — distinct (pubkey, kind, d) triples don't CAS-fail. + expect(results).toEqual(["inserted", "inserted"]); + expect(await db.reviews.count()).toBe(2); + + // Aggregate reflects BOTH reviews — this is the invariant that + // would break if the recompute ran on a pre-write snapshot of + // the reviews table. + const agg = await db.mintAggregate.get(D_VALID); + expect(agg).toBeDefined(); + expect(agg?.reviewCount).toBe(2); + expect(agg?.ratedCount).toBe(2); + expect(agg?.avgRating).toBe(3); // (5 + 1) / 2 + expect(agg?.bayesianScore).toBeCloseTo(3 * Math.log10(3), 6); + } + }); +}); + describe("upsertReviewWithAggregate — Layer A gate", () => { it("16-char bot-spam d-tag → rejected-invalid, no review row, no aggregate row", async () => { const db = await freshDB(); From a1918eae3d3bf4153268b2136fdf6a8d9f80aec7 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:38:54 -0700 Subject: [PATCH 7/8] style(core/reviews): inline single-arg parseReview call to match biome format Co-Authored-By: Claude Opus 4.7 --- packages/core/src/reviews/parse.test.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/core/src/reviews/parse.test.ts b/packages/core/src/reviews/parse.test.ts index cf06ce1..f287aaf 100644 --- a/packages/core/src/reviews/parse.test.ts +++ b/packages/core/src/reviews/parse.test.ts @@ -303,9 +303,7 @@ describe("parseReview — malformed rating tag forms", () => { ["rating", null as unknown as string, "5"], ]; for (const tag of cases) { - const row = parseReview( - makeEvent({ tags: [["d", D_VALID], tag as string[]] }), - ); + const row = parseReview(makeEvent({ tags: [["d", D_VALID], tag as string[]] })); expect(row).not.toBeNull(); expect(row?.rating).toBeNull(); } From 6d21600b0f612c9b9e305d35d5f5cc8fd5895b05 Mon Sep 17 00:00:00 2001 From: orveth Date: Fri, 17 Apr 2026 10:41:55 -0700 Subject: [PATCH 8/8] fix(core/reviews/rank.test): remove duplicate pubkey/d keys to unblock typecheck TS2783: `...over` spread already supplies both required fields. --- packages/core/src/reviews/rank.test.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/core/src/reviews/rank.test.ts b/packages/core/src/reviews/rank.test.ts index af455cf..192b908 100644 --- a/packages/core/src/reviews/rank.test.ts +++ b/packages/core/src/reviews/rank.test.ts @@ -35,9 +35,7 @@ function dForIndex(n: number): string { function makeReview(over: Partial & { pubkey: string; d: string }): ReviewRow { return { - pubkey: over.pubkey, kind: 38000, - d: over.d, eventId: `${"0".repeat(58)}${over.pubkey.slice(-6)}`, createdAt: 1_700_000_000, content: "",