crunchtools · fatherlinux · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/.specify/specs/030-moderation-gates/plan.md b/.specify/specs/030-moderation-gates/plan.md
@@ -0,0 +1,145 @@
+# Implementation Plan: Three-Gate Auto-Moderation
+
+> **Spec ID:** 030-moderation-gates
+> **Status:** Planning
+> **Last Updated:** 2026-05-29
+> **Estimated Effort:** M
+
+## Summary
+
+Refactor the news/event branch of `processItem()` in `moderationService.js` into three
+explicit gates — **Date**, **Relevance**, **POI** — each returning a `{verdict, reason}`,
+combine them into the publish/reject/pending decision, persist the verdicts in a new
+`moderation_gates` JSONB column, and surface them in the moderation card. Fold the POI
+Tier-1 check into the existing relevance votes; add geo-driven Tier-2 auto-reassign. Make
+the sweep batch size configurable so a monthly dump clears quickly.
+
+---
+
+## Architecture
+
+### Decision flow (news/event)
+
+```
+processItem(news|event)
+  ├─ hard rejects (unchanged): duplicate · no source URL · deny list
+  │
+  ├─ Gate: DATE        evaluateDateGate(row, { threshold, floorYear, trustedDomains })
+  │        pass  = date present, not future, year ≥ floor, AND (consensus ≥ threshold OR trusted domain)
+  │        review = missing / implausible / low-consensus-untrusted
+  │
+  ├─ Gate: RELEVANCE   from runContentRelevanceVotes() (3 votes, each {relevant, about_poi})
+  │        pass  = unanimous YES
+  │        fail  = unanimous NO     → REJECT (hard)
+  │        review= split
+  │
+  ├─ Gate: POI         evaluatePoiGate(pool, row, votes)
+  │        Tier 1 pass  = majority about_poi=YES (free, from votes)
+  │        Tier 2       = on Tier-1 miss, getReassignmentCandidates() → 1 LLM call picks
+  │                       owner/boundary/none; match ⇒ reassign poi_id + pass
+  │        Tier 3 review= none confirmed
+  │
+  └─ COMBINE
+       all three pass            → auto_approved (moderated_by = AUTO_PUBLISHER_USER_ID)
+       relevance fail            → rejected
+       otherwise                 → pending
+     persist: moderation_status, moderation_gates JSONB, ai_reasoning (summary),
+              relevance_signals (votes), confidence_score, moderation_processed=true
+```
+
+### Data flow for Tier-2 reassignment
+
+1. Tier 1 misses (content relevant but not about the assigned POI).
+2. `getReassignmentCandidates(pool, poiId)` returns `{ owner: {id,name}|null, boundary: {id,name}|null }`
+   — owner from `pois.owner_id`, boundary = smallest containing boundary POI.
+3. One structured LLM call: given title/summary + candidate names, return `assigned|owner|boundary|none`.
+4. `owner`/`boundary` ⇒ `UPDATE … SET poi_id = <newId>`, gate `pass`, record `reassigned_from/to`.
+5. `none`/no candidates/geo error ⇒ Tier 3 `review`.
+
+---
+
+## Implementation Steps
+
+### Phase 1: Schema + settings
+- [ ] `backend/migrations/070_moderation_gates.sql` — `ADD COLUMN IF NOT EXISTS moderation_gates JSONB` on `poi_news` and `poi_events`; insert `moderation_date_floor_year` (2010) and `moderation_sweep_batch_size` (50) into `admin_settings` (ON CONFLICT DO NOTHING).
+- [ ] `backend/routes/admin.js` — add both keys to the allowed-settings write list (~line 515–532).
+
+### Phase 2: Gate logic (backend)
+- [ ] `geoService.js` — add `getReassignmentCandidates(pool, poiId)` (owner via `owner_id`; smallest containing boundary POI id+name). Graceful `{owner:null,boundary:null}` on error.
+- [ ] `geminiService.js` — extend the relevance-vote response contract to include `about_poi` (boolean); add `assignBestPoi(pool, item, candidates)` returning `assigned|owner|boundary|none`.
+- [ ] `moderationService.js`:
+  - [ ] `evaluateDateGate(row, cfg)` → `{verdict, reason, trusted_source}`.
+  - [ ] `evaluatePoiGate(pool, row, votes)` → `{verdict, tier, reason, reassigned_from, reassigned_to}` (does the reassign UPDATE on Tier 2).
+  - [ ] derive relevance gate from votes (`unanimousYes`/`unanimousNo`/split).
+  - [ ] replace the inline decision block (lines ~374–427) with the three-gate combine; write `moderation_gates`.
+  - [ ] `processPendingItems()` — read `moderation_sweep_batch_size` and use it for the three `LIMIT` queries (default 50).
+  - [ ] `getQueue()` — add `n.moderation_gates` / `e.moderation_gates` to the SELECT lists (NULL for photos).
+
+### Phase 3: Admin UI
+- [ ] `ModerationExtras.jsx` — render three gate badges (Date / Relevance / POI) colored by verdict (green/orange/red) with reason tooltips; show reassignment ("→ Liberty Park") on Tier-2; expand to list relevance votes. Render only when `moderation_gates` present.
+
+### Phase 4: Tests
+- [ ] `backend/tests/services/moderationService.test.js` — unit-test `evaluateDateGate` (trusted vs untrusted, floor-year, future, missing) and the combine logic; keep existing `applyQualityFilters`/`getDomainReputation` tests or migrate them.
+- [ ] Add a focused test for `evaluatePoiGate` Tier-1/2/3 with mocked candidates + LLM.
+
+---
+
+## File Changes
+
+### New Files
+| File | Purpose |
+|------|---------|
+| `backend/migrations/070_moderation_gates.sql` | `moderation_gates` column + 2 settings |
+
+### Modified Files
+| File | Changes |
+|------|---------|
+| `backend/services/moderationService.js` | three-gate refactor of `processItem`, configurable sweep batch, `getQueue` SELECT |
+| `backend/services/geoService.js` | `getReassignmentCandidates()` |
+| `backend/services/geminiService.js` | `about_poi` in vote contract; `assignBestPoi()` |
+| `backend/routes/admin.js` | allow new settings keys |
+| `frontend/src/components/ModerationExtras.jsx` | gate badges + votes |
+| `backend/tests/services/moderationService.test.js` | gate unit tests |
+
+---
+
+## Database Migrations
+
+```sql
+-- Migration: 070_moderation_gates
+ALTER TABLE poi_news   ADD COLUMN IF NOT EXISTS moderation_gates JSONB;
+ALTER TABLE poi_events ADD COLUMN IF NOT EXISTS moderation_gates JSONB;
+
+INSERT INTO admin_settings (key, value, updated_at) VALUES
+  ('moderation_date_floor_year', '2010', CURRENT_TIMESTAMP),
+  ('moderation_sweep_batch_size', '50', CURRENT_TIMESTAMP)
+ON CONFLICT (key) DO NOTHING;
+```
+
+---
+
+## Testing Strategy
+
+### Manual Testing (port 8083, container `rotv-modgates`)
+1. `./run.sh start`, log in to admin, open Moderation Queue.
+2. Trigger a sweep (or wait for the scheduler); confirm pending items gain confidence % + three gate badges.
+3. Verify a trusted-source recent-date news item with unanimous-yes relevance auto-publishes.
+4. Verify an item about a park (assigned to a sub-POI) gets reassigned to the parent boundary and shows "→ <Park>".
+5. Verify a split-relevance or no-date item stays pending with the failing gate flagged.
+
+---
+
+## Risks and Mitigations
+| Risk | Impact | Mitigation |
+|------|--------|------------|
+| Tier-2 reassign moves an item to the wrong POI | Med | Only on confident LLM pick among a tiny candidate set; records reassignment for audit; never rejects |
+| Extra LLM call raises cost | Low | Tier-2 call fires only on Tier-1 misses, not every item |
+| Relaxing/auto-publishing too aggressively | Med | Publish requires ALL three pass + unanimous relevance; defaults conservative |
+| Migration re-run on every deploy | Low | Additive + `IF NOT EXISTS` / `ON CONFLICT DO NOTHING` |
+
+---
+
+## Changelog
+| Date | Changes |
+|------|---------|
+| 2026-05-29 | Initial plan |
diff --git a/.specify/specs/030-moderation-gates/spec.md b/.specify/specs/030-moderation-gates/spec.md
@@ -0,0 +1,159 @@
+# Specification: Three-Gate Auto-Moderation
+
+> **Spec ID:** 030-moderation-gates
+> **Status:** Draft
+> **Version:** 0.1.0
+> **Author:** Scott McCarty
+> **Date:** 2026-05-29
+
+## Overview
+
+The monthly collection run dumps hundreds of unscored items into the moderation
+queue (610 pending news items as of this writing, all with `confidence_score = null`).
+The existing auto-moderation logic collapses several distinct judgments into one
+opaque decision, never auto-rejects borderline content, and surfaces almost nothing
+to the admin about *why* an item is pending.
+
+This feature restructures auto-moderation around the **three independent judgments
+Scott actually makes by hand** when reviewing an item — **Date**, **Relevance**, and
+**POI correctness** — and auto-publishes only when all three pass. Each gate's verdict
+and reasoning is stored and shown in the admin UI, so a pending item tells you exactly
+which gate needs a human. The sweep that scores the backlog is made fast enough to
+clear a monthly dump.
+
+---
+
+## User Stories
+
+### Auto-moderation
+
+**US-001: Three-gate auto-publish**
+> As the site admin, I want items auto-published only when the date, relevance, AND
+> POI assignment all pass, so that I only hand-review the items where one of those
+> three judgments is uncertain.
+
+Acceptance Criteria:
+- [ ] An item is `auto_approved` only when all three gates return `pass`.
+- [ ] An item is `rejected` only on a hard-fail (existing: duplicate, no source URL, deny list) or a unanimous-NO relevance vote.
+- [ ] Every other item is `pending` with each gate's verdict recorded.
+- [ ] No item is ever rejected for being *old* — age is never a negative signal (historical content is valuable).
+
+**US-002: Date gate**
+> As the admin, I want a date to count as trustworthy when it is plausible and comes
+> from a source I trust, so that good dates from sources like cleveland.com / akron.com /
+> the trusted-domain list auto-pass without me checking each one.
+
+Acceptance Criteria:
+- [ ] Date gate `pass` requires: a publication date present, not in the future, with a plausible year (≥ floor, default 2010), AND (date-consensus score ≥ threshold **OR** the source domain is on `moderation_trusted_domains`).
+- [ ] A hallucinated date (year below the floor, e.g. an 1800s value) yields `review`, not `pass`.
+- [ ] A missing date, or a low-consensus date from an untrusted domain, yields `review`.
+- [ ] Old-but-trusted dates still pass (no recency ceiling).
+
+**US-003: Relevance gate (with visibility)**
+> As the admin, I want to see how the LLM relevance vote actually went, so that I trust
+> (or correct) the relevance judgment instead of guessing.
+
+Acceptance Criteria:
+- [ ] Relevance gate `pass` = unanimous YES across the votes; `fail` (reject) = unanimous NO; anything split = `review`.
+- [ ] The individual votes and their one-line reasons are visible in the moderation card.
+
+**US-004: POI gate (three-tier, with auto-reassign)**
+> As the admin, I want the AI to confirm the item belongs to its assigned POI — or, if
+> it really belongs to that POI's owner or its containing park boundary, to move it
+> there automatically — and only drop it on me when neither can be confirmed.
+
+The gate resolves in three tiers:
+
+1. **Relevant to the assigned POI?** → `pass`, keep the POI as-is.
+2. **Otherwise, more relevant to the POI's _owner_ (its `owner_id` organization) or its
+   _immediate geofence_ (the smallest boundary POI that contains it — e.g. Liberty Park
+   Nature Center → its parent *Liberty Park* boundary)?** → **reassign** `poi_id` to that
+   owner/boundary POI and `pass`.
+3. **Neither can be confirmed** → `review` (drop into the pending queue).
+
+Acceptance Criteria:
+- [ ] The relevance vote also returns whether the content is about the assigned POI (folded into the existing call — Tier 1 costs no extra round-trips).
+- [ ] Tier 2 candidate POIs come from existing relationships: `pois.owner_id` (owner org) and the smallest containing boundary POI (via the `getContainingBoundaries` machinery in `geoService.js`). A single follow-up LLM call — made **only** for items that fail Tier 1 — picks the best-fitting candidate or "none".
+- [ ] On a Tier 2 match, `poi_id` is updated to the owner/boundary POI; the gate verdict records the reassignment (old → new POI) for visibility.
+- [ ] POI gate never auto-rejects. Tier 3 routes to `pending` with a "Check POI" signal.
+- [ ] If PostGIS / geo lookups are unavailable, Tier 2 degrades gracefully to Tier 3 (review) rather than erroring — mirroring `getRollupPoiIds`.
+
+### Backlog throughput
+
+**US-005: Sweep keeps up with a monthly dump**
+> As the admin, I want the scoring sweep to clear a monthly backlog in a reasonable
+> number of cycles, so the queue isn't stuck showing hundreds of unscored items.
+
+Acceptance Criteria:
+- [ ] The per-cycle sweep batch size is a configurable setting (default raised from 20).
+- [ ] After the sweep runs, pending items carry gate verdicts and a confidence score (no more `null` scores sitting in the queue).
+
+---
+
+## Data Model
+
+### Schema Changes
+
+```sql
+-- Per-item structured gate verdicts (idempotent, additive)
+ALTER TABLE poi_news   ADD COLUMN IF NOT EXISTS moderation_gates JSONB;
+ALTER TABLE poi_events ADD COLUMN IF NOT EXISTS moderation_gates JSONB;
+```
+
+`moderation_gates` shape:
+```json
+{
+  "date":      { "verdict": "pass|review|fail", "reason": "...", "trusted_source": true },
+  "relevance": { "verdict": "pass|review|fail", "reason": "...", "yes": 3, "total": 3 },
+  "poi":       { "verdict": "pass|review", "tier": 1, "reason": "...", "reassigned_from": null, "reassigned_to": null }
+}
+```
+
+### New `admin_settings`
+
+| key | default | purpose |
+|-----|---------|---------|
+| `moderation_date_floor_year` | `2010` | dates below this year are implausible → date gate `review` |
+| `moderation_sweep_batch_size` | `50` | items processed per type per sweep cycle |
+
+Reuses existing `moderation_trusted_domains` and `moderation_news_date_threshold`.
+
+---
+
+## API Endpoints
+
+No new endpoints. `GET /api/admin/moderation/queue` gains a `moderation_gates` field per item; the two new keys are added to the allowed `admin_settings` write list.
+
+---
+
+## UI/UX Requirements
+
+### Modified Components
+
+- `ModerationExtras` — add three gate badges (Date / Relevance / POI), colored
+  green (`pass`) / orange (`review`) / red (`fail`), each with its reason as a tooltip;
+  expand to show the relevance votes. Existing confidence % and triage chips stay.
+
+---
+
+## Non-Functional Requirements
+
+**NFR-001: No added LLM cost**
+- The POI judgment folds into the existing relevance-vote call. No new per-item model round-trips.
+
+**NFR-002: Idempotent + backward compatible**
+- Migration is additive and re-runnable. Items without `moderation_gates` render exactly as today.
+
+---
+
+## Open Questions
+
+_None blocking — defaults chosen per Scott's review and the "old news is valuable" rule._
+
+---
+
+## Changelog
+
+| Version | Date | Changes |
+|---------|------|---------|
+| 0.1.0 | 2026-05-29 | Initial draft |
diff --git a/backend/migrations/070_moderation_gates.sql b/backend/migrations/070_moderation_gates.sql
@@ -0,0 +1,24 @@
+-- Migration 070: Three-gate auto-moderation
+-- Spec 030-moderation-gates
+--
+-- Stores the per-item verdicts of the three independent moderation gates
+-- (date / relevance / POI) so the admin queue can show exactly which gate
+-- needs a human. Additive and idempotent — re-runs on every container start.
+
+ALTER TABLE poi_news   ADD COLUMN IF NOT EXISTS moderation_gates JSONB;
+ALTER TABLE poi_events ADD COLUMN IF NOT EXISTS moderation_gates JSONB;
+
+-- Date gate: dates below this year are implausible (e.g. hallucinated 1800s
+-- values) and fail the gate to review instead of passing.
+-- Sweep batch: items scored per content type per scheduled sweep cycle
+-- (raised from the old hardcoded 20 so a monthly dump clears in a few cycles).
+DO $$
+BEGIN
+  IF EXISTS (SELECT FROM pg_tables WHERE schemaname = 'public' AND tablename = 'admin_settings') THEN
+    INSERT INTO admin_settings (key, value, updated_at)
+    VALUES
+      ('moderation_date_floor_year', '2010', CURRENT_TIMESTAMP),
+      ('moderation_sweep_batch_size', '50', CURRENT_TIMESTAMP)
+    ON CONFLICT (key) DO NOTHING;
+  END IF;
+END$$;
diff --git a/backend/migrations/071_pois_boundary_geom_gist.sql b/backend/migrations/071_pois_boundary_geom_gist.sql
@@ -0,0 +1,9 @@
+-- Migration 071: GiST index on pois.boundary_geom
+-- Spec 030-moderation-gates (PR #447 review)
+--
+-- The POI gate's Tier-2 reassignment (getReassignmentCandidates) runs ST_Contains
+-- against boundary polygons, as do getContainingBoundaries and getRollupPoiIds. Only
+-- the point `geom` column was indexed (idx_pois_geom); index boundary_geom too so the
+-- spatial containment lookups stay index-backed as boundary data grows.
+
+CREATE INDEX IF NOT EXISTS idx_pois_boundary_geom ON pois USING GIST (boundary_geom);
diff --git a/backend/routes/admin.js b/backend/routes/admin.js
@@ -515,6 +515,8 @@ export function createAdminRouter(pool, invalidateMosaicCache) {
       'moderation_enabled',
       'moderation_auto_approve_threshold',
       'moderation_auto_approve_enabled',
+      'moderation_date_floor_year',
+      'moderation_sweep_batch_size',
       'photo_submissions_enabled',
       'apify_api_token',
       'news_collection_prompt',

diff --git a/backend/routes/userSettings.js b/backend/routes/userSettings.js
@@ -115,10 +115,10 @@ export function createUserSettingsRouter(pool) {
 
   router.get('/mcp-token', isAuthenticated, async (req, res) => {
     try {
-      const result = await pool.query(
+      const tokenRow = await pool.query(
         'SELECT mcp_token FROM users WHERE id = $1', [req.user.id]
       );
-      let token = result.rows[0]?.mcp_token;
+      let token = tokenRow.rows[0]?.mcp_token;
       if (!token) {
         token = crypto.randomBytes(32).toString('base64url');
         await pool.query(

diff --git a/backend/services/dateExtractor.js b/backend/services/dateExtractor.js
@@ -184,7 +184,10 @@ export function scoreDeterministicSources(sources = {}) {
   for (const d of (sources.meta || [])) add(d, 1, 'meta');
   for (const d of (sources.timeTags || [])) add(d, 1, 'time-tag');
   add(sources.url, 1, 'url');
-  add(sources.searchDate, 3, 'search-date');
+  // Search-engine dates have proven reliable in moderation (often dead-on, even for
+  // Facebook/blog sources with no on-page date), so weight them on par with JSON-LD —
+  // an SE date alone then clears the date gate. (spec 030)
+  add(sources.searchDate, 4, 'search-date');
 
   return { scores, sourceMap };
 }