From b4072aa94c7ae6e5360770df49125ef849df61ad Mon Sep 17 00:00:00 2001 From: Scott McCarty Date: Thu, 28 May 2026 23:22:40 -0400 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20moderation=20audit=20trail=20?= =?UTF-8?q?=E2=80=94=20system=20users,=20MCP=20identity,=20date=20validati?= =?UTF-8?q?on=20(#442)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-published items had moderated_by=NULL (no audit trail), MCP admin tools passed null as userId (destroying approval records), and parseDate accepted year 62025 (mangled AI output bypassing recency filters). - Add system user rows: Auto-Publisher (id=-1) and MCP Admin (id=-2) - processItem: set moderated_by=-1 when auto-publishing - MCP_ADMIN_USER_ID: -2 instead of null - parseDate: reject years outside 2000-2100 - Backfill 508 existing NULL moderated_by items Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/migrations/068_add_system_users.sql | 16 ++++++++++++++++ backend/services/dateExtractor.js | 2 ++ backend/services/mcpServer.js | 2 +- backend/services/moderationService.js | 11 +++++++---- 4 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 backend/migrations/068_add_system_users.sql diff --git a/backend/migrations/068_add_system_users.sql b/backend/migrations/068_add_system_users.sql new file mode 100644 index 00000000..e1650c7b --- /dev/null +++ b/backend/migrations/068_add_system_users.sql @@ -0,0 +1,16 @@ +-- 068_add_system_users.sql +-- Create system user accounts for auto-publisher and MCP admin so that +-- moderated_by always has an audit trail. + +INSERT INTO users (id, email, name, oauth_provider, oauth_provider_id, is_admin, role) +VALUES + (-1, 'auto-publisher@system.rotv', 'Auto-Publisher', 'system', 'auto-publisher', false, 'viewer'), + (-2, 'mcp@system.rotv', 'MCP Admin', 'system', 'mcp-admin', false, 'viewer') +ON CONFLICT (id) DO NOTHING; + +-- Backfill: tag the 508 existing auto-published items with the system user +UPDATE poi_news SET moderated_by = -1, moderated_at = COALESCE(moderated_at, moderation_date) +WHERE moderation_status = 'published' AND moderated_by IS NULL AND content_source = 'ai'; + +UPDATE poi_events SET moderated_by = -1, moderated_at = COALESCE(moderated_at, moderation_date) +WHERE moderation_status = 'published' AND moderated_by IS NULL AND content_source = 'ai'; diff --git a/backend/services/dateExtractor.js b/backend/services/dateExtractor.js index 6e399420..54a69b2b 100644 --- a/backend/services/dateExtractor.js +++ b/backend/services/dateExtractor.js @@ -7,6 +7,7 @@ export function parseDate(raw, timezone = 'America/New_York') { if (/^\d{4}-\d{2}-\d{2}$/.test(trimmed)) { const [y, m, d] = trimmed.split('-').map(Number); + if (y < 2000 || y > 2100) return null; const probe = new Date(y, m - 1, d); if (probe.getFullYear() === y && probe.getMonth() === m - 1 && probe.getDate() === d) { return trimmed; @@ -24,6 +25,7 @@ export function parseDate(raw, timezone = 'America/New_York') { const month = d.get('month'); const day = d.get('day'); if (!year || !month || !day) return null; + if (year < 2000 || year > 2100) return null; return `${year}-${String(month).padStart(2, '0')}-${String(day).padStart(2, '0')}`; } diff --git a/backend/services/mcpServer.js b/backend/services/mcpServer.js index 6d87cc49..dcab7fcc 100644 --- a/backend/services/mcpServer.js +++ b/backend/services/mcpServer.js @@ -35,7 +35,7 @@ import { queueNewsletterJob } from './jobScheduler.js'; -const MCP_ADMIN_USER_ID = null; +const MCP_ADMIN_USER_ID = -2; function registerTools(server, pool, boss) { diff --git a/backend/services/moderationService.js b/backend/services/moderationService.js index 820404ec..f3c3177a 100644 --- a/backend/services/moderationService.js +++ b/backend/services/moderationService.js @@ -398,27 +398,30 @@ export async function processItem(pool, contentType, contentId, { forceStatus = } scoring = { confidence_score: newScore / 8.0, reasoning }; + const autoModeratedBy = resolvedStatus === 'published' ? -1 : null; // Only write publication_date when rescore produced a new value — writing the existing // value back through this path can silently corrupt a previously-good timestamp if (rescoredDate) { await pool.query( `UPDATE ${table} SET moderation_processed = true, moderation_status = $1, publication_date = $2, date_consensus_score = $3, - ai_reasoning = $4, relevance_signals = $5, moderation_date = CURRENT_TIMESTAMP + ai_reasoning = $4, relevance_signals = $5, moderation_date = CURRENT_TIMESTAMP, + moderated_by = COALESCE($7, moderated_by), moderated_at = CASE WHEN $7 IS NOT NULL THEN CURRENT_TIMESTAMP ELSE moderated_at END WHERE id = $6`, [resolvedStatus, newDate, newScore, reasoning, relevanceVotes.length > 0 ? JSON.stringify(relevanceVotes) : null, - contentId] + contentId, autoModeratedBy] ); } else { await pool.query( `UPDATE ${table} SET moderation_processed = true, moderation_status = $1, date_consensus_score = $2, - ai_reasoning = $3, relevance_signals = $4, moderation_date = CURRENT_TIMESTAMP + ai_reasoning = $3, relevance_signals = $4, moderation_date = CURRENT_TIMESTAMP, + moderated_by = COALESCE($6, moderated_by), moderated_at = CASE WHEN $6 IS NOT NULL THEN CURRENT_TIMESTAMP ELSE moderated_at END WHERE id = $5`, [resolvedStatus, newScore, reasoning, relevanceVotes.length > 0 ? JSON.stringify(relevanceVotes) : null, - contentId] + contentId, autoModeratedBy] ); } From 39c842af925aae6b5813069f6bc664a4ee2250c4 Mon Sep 17 00:00:00 2001 From: Scott McCarty Date: Thu, 28 May 2026 23:24:16 -0400 Subject: [PATCH 2/2] refactor: extract system user IDs into shared constants (PR #443 review) Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/services/mcpServer.js | 2 +- backend/services/moderationService.js | 3 ++- backend/utils/systemUsers.js | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 backend/utils/systemUsers.js diff --git a/backend/services/mcpServer.js b/backend/services/mcpServer.js index dcab7fcc..769a7eb4 100644 --- a/backend/services/mcpServer.js +++ b/backend/services/mcpServer.js @@ -35,7 +35,7 @@ import { queueNewsletterJob } from './jobScheduler.js'; -const MCP_ADMIN_USER_ID = -2; +import { MCP_ADMIN_USER_ID } from '../utils/systemUsers.js'; function registerTools(server, pool, boss) { diff --git a/backend/services/moderationService.js b/backend/services/moderationService.js index f3c3177a..33100ca7 100644 --- a/backend/services/moderationService.js +++ b/backend/services/moderationService.js @@ -3,6 +3,7 @@ import { renderPage } from './renderPage.js'; import { deepCrawlForArticle, isGenericUrl } from './deepCrawler.js'; import { logInfo, logError, flush as flushJobLogs } from './jobLogger.js'; import { parseDate, parseDateTime, localToUTC, scoreDateConsensus, extractUrlDate } from './dateExtractor.js'; +import { AUTO_PUBLISHER_USER_ID } from '../utils/systemUsers.js'; import { scoreDate, normalizeRenderUrl, normalizeTitle } from './newsService.js'; import { denyReason, sweepDenyLists } from './filterLists.js'; @@ -398,7 +399,7 @@ export async function processItem(pool, contentType, contentId, { forceStatus = } scoring = { confidence_score: newScore / 8.0, reasoning }; - const autoModeratedBy = resolvedStatus === 'published' ? -1 : null; + const autoModeratedBy = resolvedStatus === 'published' ? AUTO_PUBLISHER_USER_ID : null; // Only write publication_date when rescore produced a new value — writing the existing // value back through this path can silently corrupt a previously-good timestamp if (rescoredDate) { diff --git a/backend/utils/systemUsers.js b/backend/utils/systemUsers.js new file mode 100644 index 00000000..9c80b931 --- /dev/null +++ b/backend/utils/systemUsers.js @@ -0,0 +1,2 @@ +export const AUTO_PUBLISHER_USER_ID = -1; +export const MCP_ADMIN_USER_ID = -2;