From 874a4c0d9f5dd187e74f25e193a2a9c0c3ed84b9 Mon Sep 17 00:00:00 2001 From: Shuhui Luo <107524008+shuhuiluo@users.noreply.github.com> Date: Sun, 14 Dec 2025 23:34:35 -0500 Subject: [PATCH] fix: retry thread lookup to handle anchor race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a comment arrives before the PR anchor mapping is committed to DB, the thread lookup fails and the comment is posted standalone. This adds exponential backoff retries (250ms → 500ms → 1000ms) to handle the race. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/constants.ts | 8 ++++++ src/services/message-delivery-service.ts | 36 ++++++++++++++++++++---- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/src/constants.ts b/src/constants.ts index aa32a0b..165b144 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -102,3 +102,11 @@ export const OAUTH_STATE_CLEANUP_INTERVAL_MS = 60 * 60 * 1000; * After this period, mappings are cleaned up and threads can no longer be grouped. */ export const MESSAGE_MAPPING_EXPIRY_DAYS = 30; + +/** + * Thread lookup retry settings for race condition handling. + * When a comment arrives before anchor mapping is committed, retry with exponential backoff. + * Schedule: 250ms → 500ms → 1000ms (total max: 1.75s) + */ +export const THREAD_LOOKUP_INITIAL_DELAY_MS = 250; +export const THREAD_LOOKUP_MAX_RETRIES = 3; diff --git a/src/services/message-delivery-service.ts b/src/services/message-delivery-service.ts index 1f6c6a6..46af3d7 100644 --- a/src/services/message-delivery-service.ts +++ b/src/services/message-delivery-service.ts @@ -1,6 +1,10 @@ import { and, eq, gt, lt } from "drizzle-orm"; -import { MESSAGE_MAPPING_EXPIRY_DAYS } from "../constants"; +import { + MESSAGE_MAPPING_EXPIRY_DAYS, + THREAD_LOOKUP_INITIAL_DELAY_MS, + THREAD_LOOKUP_MAX_RETRIES, +} from "../constants"; import { db } from "../db"; import { messageMappings } from "../db/schema"; import type { TownsBot } from "../types/bot"; @@ -63,20 +67,40 @@ export class MessageDeliveryService { } = params; try { - // Thread lookup: non-anchors reply to their parent anchor - const threadId = + // Thread lookup with retry: handles race condition where comment arrives + // before anchor mapping is committed (exponential backoff: 0 → 250 → 500 → 1000ms) + let threadId: string | undefined; + if ( entityContext && !entityContext.isAnchor && entityContext.parentType && entityContext.parentNumber != null - ? ((await this.getMessageId( + ) { + for (let attempt = 0; attempt <= THREAD_LOOKUP_MAX_RETRIES; attempt++) { + const delay = + attempt > 0 + ? THREAD_LOOKUP_INITIAL_DELAY_MS * 2 ** (attempt - 1) + : 0; + if (delay) await new Promise(r => setTimeout(r, delay)); + threadId = + (await this.getMessageId( spaceId, channelId, repoFullName, entityContext.parentType, String(entityContext.parentNumber) - )) ?? undefined) - : undefined; + )) ?? undefined; + if (threadId) { + if (delay) + console.log(`Thread lookup succeeded after ${delay}ms retry`); + break; + } + } + if (!threadId) + console.log( + `Thread lookup failed after ${THREAD_LOOKUP_MAX_RETRIES} retries for ${entityContext.parentType}:${entityContext.parentNumber}` + ); + } // Skip synthetic thread replies (closed/reopened) when anchor doesn't exist // These have githubEntityType === parentType (e.g., "pr" for PR close event)