From 8a65f175c24d88d1c0e09b2b833872f652b9db3c Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 15 Jun 2026 13:12:09 -0500 Subject: [PATCH 1/4] Stop idle auto-compact loop on repeated/model-not-found failures --- .../services/idleCompactionService.test.ts | 47 ++++++++++ src/node/services/idleCompactionService.ts | 60 ++++++++++++ src/node/services/serviceContainer.ts | 6 ++ src/node/services/workspaceService.test.ts | 93 +++++++++++++++++++ src/node/services/workspaceService.ts | 52 ++++++++++- 5 files changed, 256 insertions(+), 2 deletions(-) diff --git a/src/node/services/idleCompactionService.test.ts b/src/node/services/idleCompactionService.test.ts index b488936a5f..beaacb3878 100644 --- a/src/node/services/idleCompactionService.test.ts +++ b/src/node/services/idleCompactionService.test.ts @@ -387,4 +387,51 @@ describe("IdleCompactionService", () => { expect(executeIdleCompactionMock).not.toHaveBeenCalled(); }); }); + + describe("recordOutcome (failure suppression)", () => { + const threshold24h = 24 * oneHourMs; + + test("stops the loop after two consecutive failures", async () => { + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: false }); + + // One failure is not enough to suppress. + const afterOne = await service.checkEligibility(testWorkspaceId, threshold24h, now); + expect(afterOne.eligible).toBe(true); + + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: false }); + + const afterTwo = await service.checkEligibility(testWorkspaceId, threshold24h, now); + expect(afterTwo.eligible).toBe(false); + expect(afterTwo.reason).toBe("suppressed_after_failures"); + }); + + test("stops the loop immediately on a model_not_found failure", async () => { + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: true }); + + const result = await service.checkEligibility(testWorkspaceId, threshold24h, now); + expect(result.eligible).toBe(false); + expect(result.reason).toBe("suppressed_after_failures"); + }); + + test("a success between failures resets the consecutive failure streak", async () => { + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: false }); + service.recordOutcome(testWorkspaceId, { success: true }); + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: false }); + + // Only one failure since the last success, so the workspace is still eligible. + const result = await service.checkEligibility(testWorkspaceId, threshold24h, now); + expect(result.eligible).toBe(true); + }); + + test("checkAllWorkspaces no longer queues a suppressed workspace", async () => { + // A non-recoverable failure suppresses the workspace immediately. + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: true }); + + await service.checkAllWorkspaces(); + + // Give the (fire-and-forget) queue a chance to run; it must not execute. + await new Promise((resolve) => setTimeout(resolve, 20)); + expect(executeIdleCompactionMock).not.toHaveBeenCalled(); + }); + }); }); diff --git a/src/node/services/idleCompactionService.ts b/src/node/services/idleCompactionService.ts index 2c17134abe..091d9dcd5a 100644 --- a/src/node/services/idleCompactionService.ts +++ b/src/node/services/idleCompactionService.ts @@ -9,11 +9,29 @@ const INITIAL_CHECK_DELAY_MS = 60 * 1000; // 1 minute - let startup initializati const CHECK_INTERVAL_MS = 60 * 60 * 1000; // 1 hour const HOURS_TO_MS = 60 * 60 * 1000; +/** + * Stop attempting idle compaction for a workspace once it has failed this many + * times in a row. The hourly loop would otherwise re-queue a persistently + * failing workspace forever (a failed compaction neither marks the workspace + * `compacted` nor refreshes recency, so it stays eligible). + */ +const MAX_CONSECUTIVE_IDLE_COMPACTION_FAILURES = 2; + interface QueuedIdleCompaction { workspaceId: string; thresholdMs: number; } +/** + * Terminal outcome of an idle compaction attempt, reported back to the service + * so it can decide whether to keep attempting compaction for a workspace. + * + * `modelNotFound` is treated as non-recoverable: the configured compaction model + * does not exist / is not available, and retrying will keep failing the same way, + * so we stop after a single occurrence rather than waiting for two failures. + */ +export type IdleCompactionOutcome = { success: true } | { success: false; modelNotFound: boolean }; + /** * IdleCompactionService monitors workspaces for idle time and executes * compaction directly through a backend callback. @@ -33,6 +51,11 @@ export class IdleCompactionService { private readonly activeWorkspaceIds = new Set(); private isProcessingQueue = false; private stopped = false; + // Per-workspace count of consecutive failed idle compactions (reset on success). + private readonly consecutiveFailures = new Map(); + // Workspaces for which the idle compaction loop has been stopped after repeated + // (or non-recoverable) failures. Sticky for the service lifetime; cleared on restart. + private readonly suppressedWorkspaceIds = new Set(); constructor( config: Config, @@ -223,6 +246,12 @@ export class IdleCompactionService { thresholdMs: number, now: number ): Promise<{ eligible: boolean; reason?: string }> { + // 0. Has the loop been stopped for this workspace after repeated/non-recoverable + // failures? Skip before touching history so a failing workspace stops re-queuing. + if (this.suppressedWorkspaceIds.has(workspaceId)) { + return { eligible: false, reason: "suppressed_after_failures" }; + } + // 1. Has messages? Only need tail messages — recency + last-message checks don't need full history. const historyResult = await this.historyService.getLastMessages(workspaceId, 50); if (!historyResult.success || historyResult.data.length === 0) { @@ -260,4 +289,35 @@ export class IdleCompactionService { return { eligible: true }; } + + /** + * Record the terminal outcome of an idle compaction attempt so the loop can + * stop re-attempting a persistently failing workspace. + * + * - Success resets the consecutive failure count. + * - A `model_not_found` failure is non-recoverable, so the loop stops immediately. + * - Any other failure stops the loop once it happens twice in a row. + * + * Suppression is in-memory and sticky for the service lifetime; restarting the + * app (e.g. after fixing the configured compaction model) clears it. + */ + recordOutcome(workspaceId: string, outcome: IdleCompactionOutcome): void { + if (outcome.success) { + this.consecutiveFailures.delete(workspaceId); + return; + } + + const failures = (this.consecutiveFailures.get(workspaceId) ?? 0) + 1; + this.consecutiveFailures.set(workspaceId, failures); + + if (outcome.modelNotFound || failures >= MAX_CONSECUTIVE_IDLE_COMPACTION_FAILURES) { + this.suppressedWorkspaceIds.add(workspaceId); + this.consecutiveFailures.delete(workspaceId); + log.warn("Stopping idle compaction for workspace after failure", { + workspaceId, + reason: outcome.modelNotFound ? "model_not_found" : "consecutive_failures", + failures, + }); + } + } } diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index 5d252d0965..ef11114547 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -254,6 +254,12 @@ export class ServiceContainer { this.extensionMetadata, (workspaceId) => this.workspaceService.executeIdleCompaction(workspaceId) ); + // Forward terminal idle-compaction outcomes so the loop stops re-attempting a + // persistently failing workspace (immediately on model_not_found, otherwise after + // two consecutive failures). + this.workspaceService.setIdleCompactionOutcomeListener((workspaceId, outcome) => + this.idleCompactionService.recordOutcome(workspaceId, outcome) + ); // IdleDispatcher + goal continuation bridge are owned by createCoreServices // so the wiring works for `mux run` too. Share the same dispatcher with // HeartbeatService — its priority ordering ensures an active goal diff --git a/src/node/services/workspaceService.test.ts b/src/node/services/workspaceService.test.ts index cd44ad0623..1719fd03de 100644 --- a/src/node/services/workspaceService.test.ts +++ b/src/node/services/workspaceService.test.ts @@ -1,5 +1,6 @@ import { describe, expect, test, mock, beforeEach, afterEach, spyOn, type Mock } from "bun:test"; import { WorkspaceService, generateForkBranchName, generateForkTitle } from "./workspaceService"; +import type { IdleCompactionOutcome } from "./idleCompactionService"; import type { AgentSession } from "./agentSession"; import { WorkspaceLifecycleHooks } from "./workspaceLifecycleHooks"; import { EventEmitter } from "events"; @@ -2821,6 +2822,98 @@ describe("WorkspaceService idle compaction dispatch", () => { } expect(executionError.message).toContain("idle-only send was skipped"); }); + + test("reports a model_not_found outcome when the compaction model is invalid", async () => { + const workspaceId = "idle-model-not-found-ws"; + const sendMessage = mock(() => + Promise.resolve( + Err({ + type: "invalid_model_string" as const, + message: "Invalid model string: openai:does-not-exist", + }) + ) + ); + const buildIdleCompactionSendOptions = mock(() => + Promise.resolve({ model: "openai:does-not-exist", agentId: "compact" }) + ); + const session = { isBusy: mock(() => false) } as unknown as AgentSession; + + ( + workspaceService as unknown as { + sendMessage: typeof sendMessage; + buildIdleCompactionSendOptions: typeof buildIdleCompactionSendOptions; + getOrCreateSession: (workspaceId: string) => AgentSession; + } + ).sendMessage = sendMessage; + ( + workspaceService as unknown as { + buildIdleCompactionSendOptions: typeof buildIdleCompactionSendOptions; + } + ).buildIdleCompactionSendOptions = buildIdleCompactionSendOptions; + ( + workspaceService as unknown as { + getOrCreateSession: (workspaceId: string) => AgentSession; + } + ).getOrCreateSession = () => session; + + const outcomes: Array<{ workspaceId: string; outcome: IdleCompactionOutcome }> = []; + workspaceService.setIdleCompactionOutcomeListener((id, outcome) => + outcomes.push({ workspaceId: id, outcome }) + ); + + let threw = false; + try { + await workspaceService.executeIdleCompaction(workspaceId); + } catch { + threw = true; + } + + expect(threw).toBe(true); + expect(outcomes).toEqual([{ workspaceId, outcome: { success: false, modelNotFound: true } }]); + }); + + test("reports a non-model_not_found outcome for generic pre-stream failures", async () => { + const workspaceId = "idle-generic-failure-ws"; + const sendMessage = mock(() => Promise.resolve(Err({ type: "unknown" as const, raw: "boom" }))); + const buildIdleCompactionSendOptions = mock(() => + Promise.resolve({ model: "openai:gpt-4o", agentId: "compact" }) + ); + const session = { isBusy: mock(() => false) } as unknown as AgentSession; + + ( + workspaceService as unknown as { + sendMessage: typeof sendMessage; + buildIdleCompactionSendOptions: typeof buildIdleCompactionSendOptions; + getOrCreateSession: (workspaceId: string) => AgentSession; + } + ).sendMessage = sendMessage; + ( + workspaceService as unknown as { + buildIdleCompactionSendOptions: typeof buildIdleCompactionSendOptions; + } + ).buildIdleCompactionSendOptions = buildIdleCompactionSendOptions; + ( + workspaceService as unknown as { + getOrCreateSession: (workspaceId: string) => AgentSession; + } + ).getOrCreateSession = () => session; + + const outcomes: Array<{ workspaceId: string; outcome: IdleCompactionOutcome }> = []; + workspaceService.setIdleCompactionOutcomeListener((id, outcome) => + outcomes.push({ workspaceId: id, outcome }) + ); + + let threw = false; + try { + await workspaceService.executeIdleCompaction(workspaceId); + } catch { + threw = true; + } + + expect(threw).toBe(true); + expect(outcomes).toEqual([{ workspaceId, outcome: { success: false, modelNotFound: false } }]); + }); + test("prefers global compact thinking default over exec and activity fallbacks", async () => { const projectPath = "/tmp/project"; const workspacePath = "/tmp/project/ws"; diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index b9dbeea999..0ab4f65eda 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -100,7 +100,10 @@ import type { } from "@/common/orpc/types"; import type { z } from "zod"; -import type { SendMessageError } from "@/common/types/errors"; +import type { SendMessageError, StreamErrorType } from "@/common/types/errors"; +// Aliased to avoid clashing with the private `formatSendMessageError` string formatter below. +import { formatSendMessageError as classifySendMessageError } from "@/node/services/utils/sendMessageError"; +import type { IdleCompactionOutcome } from "@/node/services/idleCompactionService"; import type { FrontendWorkspaceMetadata, GitStatus, @@ -1458,6 +1461,13 @@ export class WorkspaceService extends EventEmitter { // can tag the stream, letting the frontend suppress notifications for maintenance work. private readonly idleCompactingWorkspaces = new Set(); + // Reports the terminal outcome of an idle compaction (success or failure, including + // mid-stream failures like model_not_found) back to IdleCompactionService so it can + // stop re-attempting a persistently failing workspace. Wired in ServiceContainer. + private idleCompactionOutcomeListener: + | ((workspaceId: string, outcome: IdleCompactionOutcome) => void) + | undefined; + // Blocks new sends while a context reset is committing its durable boundary and cleanup. private readonly resettingContextWorkspaces = new Set(); @@ -1743,7 +1753,9 @@ export class WorkspaceService extends EventEmitter { isWorkspaceEvent(v) && (!("metadata" in (v as Record)) || isObj((v as StreamEndEvent).metadata)); const isStreamAbortEvent = (v: unknown): v is StreamAbortEvent => isWorkspaceEvent(v); - const isErrorEvent = (v: unknown): v is { workspaceId: string; error: string } => + const isErrorEvent = ( + v: unknown + ): v is { workspaceId: string; error: string; errorType?: StreamErrorType } => isWorkspaceEvent(v) && "error" in v && typeof (v as { error: unknown }).error === "string"; const isToolCallEndEvent = (v: unknown): v is ToolCallEndEvent => isWorkspaceEvent(v) && @@ -1791,6 +1803,14 @@ export class WorkspaceService extends EventEmitter { this.aiService.on("error", (data: unknown) => { if (isErrorEvent(data)) { + // Read the idle-compaction marker before stopStreamingStatus clears it, so a + // mid-stream failure (e.g. provider rejecting the compaction model) stops the loop. + if (this.idleCompactingWorkspaces.has(data.workspaceId)) { + this.reportIdleCompactionOutcome(data.workspaceId, { + success: false, + modelNotFound: data.errorType === "model_not_found", + }); + } void this.stopStreamingStatus(data.workspaceId); void this.workspaceGoalService?.applyPendingAfterStreamEnd(data.workspaceId); } @@ -1976,6 +1996,12 @@ export class WorkspaceService extends EventEmitter { const generation = this.streamingGenerations.get(workspaceId) ?? 0; const isIdleCompaction = this.idleCompactingWorkspaces.has(workspaceId); + if (isIdleCompaction) { + // A clean stream end means compaction succeeded; clear the failure streak. + // Mid-stream failures arrive via the "error" event instead. + this.reportIdleCompactionOutcome(workspaceId, { success: true }); + } + // Idle compaction is maintenance work, so preserve the pre-existing recency. // That keeps the workspace from jumping to the top of the sidebar and also // prevents the background activity path from treating compaction as a fresh response. @@ -8241,6 +8267,21 @@ export class WorkspaceService extends EventEmitter { return true; } + /** + * Register the callback that receives terminal idle-compaction outcomes. + * Wired by ServiceContainer to forward outcomes to IdleCompactionService so the + * idle loop can stop re-attempting a persistently failing workspace. + */ + setIdleCompactionOutcomeListener( + listener: (workspaceId: string, outcome: IdleCompactionOutcome) => void + ): void { + this.idleCompactionOutcomeListener = listener; + } + + private reportIdleCompactionOutcome(workspaceId: string, outcome: IdleCompactionOutcome): void { + this.idleCompactionOutcomeListener?.(workspaceId, outcome); + } + /** * Execute idle compaction for a workspace directly from the backend. * @@ -8301,6 +8342,13 @@ export class WorkspaceService extends EventEmitter { ? rawError.type : JSON.stringify(rawError) : String(rawError); + // Report the pre-stream failure (e.g. invalid/unavailable compaction model) so the + // idle loop can stop re-attempting this workspace. Mid-stream failures are reported + // separately from the stream error/completion listeners. + this.reportIdleCompactionOutcome(workspaceId, { + success: false, + modelNotFound: classifySendMessageError(sendResult.error).errorType === "model_not_found", + }); throw new Error(`Failed to execute idle compaction: ${formattedError}`); } From 083e71d465494d917bcf9a6a843907e34c0e7f59 Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 15 Jun 2026 13:35:54 -0500 Subject: [PATCH 2/4] Report idle compaction outcome from actual persistence, not stream-end Address Codex review: a clean provider stream-end does not guarantee the post-stream history compaction succeeded. Drive the idle success/failure outcome from CompactionHandler.handleCompletion (after the summary is persisted, or on a post-stream persistence failure) via a new onIdleCompactionOutcome callback, instead of reporting success on stream-end. --- src/node/services/agentSession.ts | 4 ++ src/node/services/compactionHandler.test.ts | 68 +++++++++++++++++++++ src/node/services/compactionHandler.ts | 25 ++++++-- src/node/services/workspaceService.ts | 18 ++++-- 4 files changed, 106 insertions(+), 9 deletions(-) diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index 3773f31b67..43303dd437 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -317,6 +317,8 @@ interface AgentSessionOptions { keepBackgroundProcesses?: boolean; /** Called when compaction completes (e.g., to clear idle compaction pending state) */ onCompactionComplete?: (metadata: CompactionCompletionMetadata) => void; + /** Called with the terminal outcome of an idle compaction (persisted success / post-stream failure) */ + onIdleCompactionOutcome?: (success: boolean) => void; /** Called when post-compaction context state may have changed (plan/file edits) */ onPostCompactionStateChange?: () => void; } @@ -515,6 +517,7 @@ export class AgentSession { workspaceGoalService, keepBackgroundProcesses, onCompactionComplete, + onIdleCompactionOutcome, onPostCompactionStateChange, } = options; @@ -539,6 +542,7 @@ export class AgentSession { telemetryService, emitter: this.emitter, onCompactionComplete, + onIdleCompactionOutcome, }); this.compactionMonitor = new CompactionMonitor( diff --git a/src/node/services/compactionHandler.test.ts b/src/node/services/compactionHandler.test.ts index 1674ddedd2..7acbac0ca1 100644 --- a/src/node/services/compactionHandler.test.ts +++ b/src/node/services/compactionHandler.test.ts @@ -236,6 +236,74 @@ describe("CompactionHandler", () => { expect(metadata?.previousBoundaryHistorySequence).toBe(1); }); + describe("onIdleCompactionOutcome", () => { + const createIdleCompactionRequest = (id = "idle-req"): MuxMessage => + createMuxMessage(id, "user", "Please summarize the conversation", { + muxMetadata: { + type: "compaction-request", + rawCommand: "/compact", + parsed: {}, + source: "idle-compaction", + }, + }); + + it("reports success only after the summary is persisted", async () => { + const onIdleCompactionOutcome = mock((_success: boolean) => undefined); + handler = new CompactionHandler({ + workspaceId, + historyService, + sessionDir, + telemetryService, + emitter: mockEmitter, + onIdleCompactionOutcome, + }); + await seedHistory(createIdleCompactionRequest()); + + const handled = await handler.handleCompletion(createStreamEndEvent("Summary")); + + expect(handled).toBe(true); + expect(onIdleCompactionOutcome.mock.calls).toEqual([[true]]); + }); + + it("reports failure when the post-stream summary is empty", async () => { + const onIdleCompactionOutcome = mock((_success: boolean) => undefined); + handler = new CompactionHandler({ + workspaceId, + historyService, + sessionDir, + telemetryService, + emitter: mockEmitter, + onIdleCompactionOutcome, + }); + await seedHistory(createIdleCompactionRequest()); + + // An empty summary means the provider stream ended but produced no usable + // content, so compaction cannot be persisted. + const handled = await handler.handleCompletion(createStreamEndEvent(" ")); + + expect(handled).toBe(false); + expect(onIdleCompactionOutcome.mock.calls).toEqual([[false]]); + }); + + it("does not report for a non-idle (manual) compaction", async () => { + const onIdleCompactionOutcome = mock((_success: boolean) => undefined); + handler = new CompactionHandler({ + workspaceId, + historyService, + sessionDir, + telemetryService, + emitter: mockEmitter, + onIdleCompactionOutcome, + }); + await seedHistory(createCompactionRequest()); + + const handled = await handler.handleCompletion(createStreamEndEvent("Summary")); + + expect(handled).toBe(true); + expect(onIdleCompactionOutcome).not.toHaveBeenCalled(); + }); + }); + it("should capture compaction_completed telemetry on successful compaction", async () => { const compactionReq = createCompactionRequest(); await seedHistory(compactionReq); diff --git a/src/node/services/compactionHandler.ts b/src/node/services/compactionHandler.ts index 0c1e65f97b..0ade10e109 100644 --- a/src/node/services/compactionHandler.ts +++ b/src/node/services/compactionHandler.ts @@ -332,6 +332,13 @@ interface CompactionHandlerOptions { emitter: EventEmitter; /** Called when compaction completes successfully (e.g., to clear idle compaction pending state) */ onCompactionComplete?: (metadata: CompactionCompletionMetadata) => void; + /** + * Called with the terminal outcome of an idle compaction (source === "idle-compaction"), + * after the summary is actually persisted (success) or a post-stream persistence failure + * (empty/invalid summary, history write error). Lets the idle loop stop re-attempting a + * workspace whose compaction keeps failing even though the provider stream ended cleanly. + */ + onIdleCompactionOutcome?: (success: boolean) => void; } /** @@ -353,6 +360,7 @@ export class CompactionHandler { private readonly processedCompactionRequestIds: Set = new Set(); private readonly onCompactionComplete?: (metadata: CompactionCompletionMetadata) => void; + private readonly onIdleCompactionOutcome?: (success: boolean) => void; /** Flag indicating post-compaction attachments should be generated on next turn */ private postCompactionAttachmentsPending = false; @@ -376,6 +384,7 @@ export class CompactionHandler { this.telemetryService = options.telemetryService; this.emitter = options.emitter; this.onCompactionComplete = options.onCompactionComplete; + this.onIdleCompactionOutcome = options.onIdleCompactionOutcome; } private async loadPersistedPendingStateIfNeeded(): Promise { @@ -752,6 +761,11 @@ export class CompactionHandler { return false; } + // Determine idle-compaction (auto-triggered due to inactivity) up-front so the + // post-stream failure paths below can report a terminal outcome to the idle loop. + const isIdleCompaction = + muxMeta?.type === "compaction-request" && muxMeta.source === "idle-compaction"; + // Dedupe: If we've already processed this compaction-request, skip if (this.processedCompactionRequestIds.has(lastUserMsg.id)) { return true; @@ -777,6 +791,7 @@ export class CompactionHandler { parts: partsSummary, }); // Don't mark as processed so user can retry + if (isIdleCompaction) this.onIdleCompactionOutcome?.(false); return false; } @@ -792,13 +807,10 @@ export class CompactionHandler { } ); // Don't mark as processed so user can retry + if (isIdleCompaction) this.onIdleCompactionOutcome?.(false); return false; } - // Check if this was an idle-compaction (auto-triggered due to inactivity) - const isIdleCompaction = - muxMeta?.type === "compaction-request" && muxMeta.source === "idle-compaction"; - // Extract follow-up content to attach to summary for crash-safe dispatch const pendingFollowUp = getCompactionFollowUpContent(muxMeta); @@ -825,6 +837,7 @@ export class CompactionHandler { ); if (!result.success) { log.error("Compaction failed:", result.error); + if (isIdleCompaction) this.onIdleCompactionOutcome?.(false); return false; } @@ -849,6 +862,10 @@ export class CompactionHandler { // Notify that compaction completed (clears idle compaction pending state) this.onCompactionComplete?.(result.data); + // Report the idle-compaction success only after the summary is actually persisted, + // so the idle loop's failure streak is reset on real success (not just stream end). + if (isIdleCompaction) this.onIdleCompactionOutcome?.(true); + // Emit a sanitized stream-end so UI can close streaming state without // re-introducing stale provider metadata from the pre-compaction row. this.emitChatEvent(this.sanitizeCompactionStreamEndEvent(event)); diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index 0ab4f65eda..cbb176b673 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -1996,11 +1996,9 @@ export class WorkspaceService extends EventEmitter { const generation = this.streamingGenerations.get(workspaceId) ?? 0; const isIdleCompaction = this.idleCompactingWorkspaces.has(workspaceId); - if (isIdleCompaction) { - // A clean stream end means compaction succeeded; clear the failure streak. - // Mid-stream failures arrive via the "error" event instead. - this.reportIdleCompactionOutcome(workspaceId, { success: true }); - } + // Note: idle-compaction success/failure is reported from onIdleCompactionOutcome + // (after the summary is actually persisted), not here — a clean provider stream-end + // does not guarantee the post-stream history compaction succeeded. // Idle compaction is maintenance work, so preserve the pre-existing recency. // That keeps the workspace from jumping to the top of the sidebar and also @@ -2176,6 +2174,16 @@ export class WorkspaceService extends EventEmitter { // the compacted epoch first, then let Dream sweep/merge the candidates. this.memoryConsolidationService?.triggerHarvestThenSweepInBackground(metadata); }, + onIdleCompactionOutcome: (success) => { + // Reports the *persisted* idle-compaction outcome (success only after the summary + // is written; failure on post-stream persistence errors). Reporting on actual + // persistence — not the provider stream-end — keeps the idle loop's failure streak + // accurate. A persistence failure is not a model error, so modelNotFound is false. + this.reportIdleCompactionOutcome( + workspaceId, + success ? { success: true } : { success: false, modelNotFound: false } + ); + }, onPostCompactionStateChange: () => { this.schedulePostCompactionMetadataRefresh(workspaceId); }, From c1a1b76bc41de5a1d1594e5c8397e1b18c89513a Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 15 Jun 2026 13:44:25 -0500 Subject: [PATCH 3/4] Treat requireIdle busy-skip as neutral, not an idle-compaction failure Address Codex review: the requireIdle path returns a busy-skip when the workspace becomes active after eligibility but before sendMessage. That is an expected race, not a failure. Skip reporting it as an outcome so two normal user-interaction races cannot suppress idle compaction for a healthy workspace. Factor the busy-skip message into a shared constant. --- src/node/services/workspaceService.test.ts | 8 ++++++ src/node/services/workspaceService.ts | 29 ++++++++++++++-------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/node/services/workspaceService.test.ts b/src/node/services/workspaceService.test.ts index 1719fd03de..0553d05f4b 100644 --- a/src/node/services/workspaceService.test.ts +++ b/src/node/services/workspaceService.test.ts @@ -2809,6 +2809,13 @@ describe("WorkspaceService idle compaction dispatch", () => { } ).buildIdleCompactionSendOptions = buildIdleCompactionSendOptions; + // The busy-skip is an expected race, so it must not be reported as a failure + // (otherwise two normal user-interaction races would suppress idle compaction). + const outcomes: Array<{ workspaceId: string; outcome: IdleCompactionOutcome }> = []; + workspaceService.setIdleCompactionOutcomeListener((id, outcome) => + outcomes.push({ workspaceId: id, outcome }) + ); + let executionError: unknown; try { await workspaceService.executeIdleCompaction(workspaceId); @@ -2821,6 +2828,7 @@ describe("WorkspaceService idle compaction dispatch", () => { throw new Error("Expected idle compaction to throw when workspace is busy"); } expect(executionError.message).toContain("idle-only send was skipped"); + expect(outcomes).toEqual([]); }); test("reports a model_not_found outcome when the compaction model is invalid", async () => { diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index cbb176b673..f48444876e 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -484,6 +484,11 @@ function isArchiveLossyUntrackedFilesConfirmation( const WORKSPACE_IDLE_WAIT_CANCELED_MESSAGE = "Workflow start canceled while waiting for workspace to become idle."; +// Returned by sendMessage when an idle-only (requireIdle) send is skipped because the +// workspace became active. This is an expected race, not a compaction failure, so the +// idle-compaction loop must not count it toward suppression. +const IDLE_ONLY_BUSY_SKIP_MESSAGE = "Workspace is busy; idle-only send was skipped."; + async function waitForAgentSessionIdle(session: AgentSession, signal?: AbortSignal): Promise { assert(session instanceof AgentSession, "waitForAgentSessionIdle requires an AgentSession"); try { @@ -6536,7 +6541,7 @@ export class WorkspaceService extends EventEmitter { if (internal?.requireIdle) { return Err({ type: "unknown", - raw: "Workspace is busy; idle-only send was skipped.", + raw: IDLE_ONLY_BUSY_SKIP_MESSAGE, }); } @@ -8315,9 +8320,8 @@ export class WorkspaceService extends EventEmitter { const session = this.getOrCreateSession(workspaceId); if (session.isBusy()) { - throw new Error( - "Failed to execute idle compaction: Workspace is busy; idle-only send was skipped." - ); + // Expected race (workspace became active), not a failure — do not report an outcome. + throw new Error(`Failed to execute idle compaction: ${IDLE_ONLY_BUSY_SKIP_MESSAGE}`); } const sendResult = await this.sendMessage( @@ -8350,13 +8354,18 @@ export class WorkspaceService extends EventEmitter { ? rawError.type : JSON.stringify(rawError) : String(rawError); - // Report the pre-stream failure (e.g. invalid/unavailable compaction model) so the + // Report genuine pre-stream failures (e.g. invalid/unavailable compaction model) so the // idle loop can stop re-attempting this workspace. Mid-stream failures are reported - // separately from the stream error/completion listeners. - this.reportIdleCompactionOutcome(workspaceId, { - success: false, - modelNotFound: classifySendMessageError(sendResult.error).errorType === "model_not_found", - }); + // separately from the stream error/completion listeners. The requireIdle busy-skip is an + // expected race (workspace became active), so it must NOT count toward suppression. + const isBusySkip = + sendResult.error.type === "unknown" && sendResult.error.raw === IDLE_ONLY_BUSY_SKIP_MESSAGE; + if (!isBusySkip) { + this.reportIdleCompactionOutcome(workspaceId, { + success: false, + modelNotFound: classifySendMessageError(sendResult.error).errorType === "model_not_found", + }); + } throw new Error(`Failed to execute idle compaction: ${formattedError}`); } From 866a615c9119d7c46e35baf15c1d8d645f1219eb Mon Sep 17 00:00:00 2001 From: Ammar Date: Mon, 15 Jun 2026 13:51:34 -0500 Subject: [PATCH 4/4] Lift idle-compaction suppression when a later compaction succeeds Address Codex review: an in-flight retry can persist a successful compaction after suppression was set. recordOutcome(success) now also clears the suppressed set, so the workspace self-heals and becomes eligible again. --- src/node/services/idleCompactionService.test.ts | 15 +++++++++++++++ src/node/services/idleCompactionService.ts | 8 +++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/src/node/services/idleCompactionService.test.ts b/src/node/services/idleCompactionService.test.ts index beaacb3878..6c8265eef1 100644 --- a/src/node/services/idleCompactionService.test.ts +++ b/src/node/services/idleCompactionService.test.ts @@ -423,6 +423,21 @@ describe("IdleCompactionService", () => { expect(result.eligible).toBe(true); }); + test("a later success lifts suppression (self-healing)", async () => { + // Two failures suppress the workspace. + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: false }); + service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: false }); + expect((await service.checkEligibility(testWorkspaceId, threshold24h, now)).eligible).toBe( + false + ); + + // An in-flight retry that actually persists a compaction clears suppression. + service.recordOutcome(testWorkspaceId, { success: true }); + + const result = await service.checkEligibility(testWorkspaceId, threshold24h, now); + expect(result.eligible).toBe(true); + }); + test("checkAllWorkspaces no longer queues a suppressed workspace", async () => { // A non-recoverable failure suppresses the workspace immediately. service.recordOutcome(testWorkspaceId, { success: false, modelNotFound: true }); diff --git a/src/node/services/idleCompactionService.ts b/src/node/services/idleCompactionService.ts index 091d9dcd5a..36060941c5 100644 --- a/src/node/services/idleCompactionService.ts +++ b/src/node/services/idleCompactionService.ts @@ -294,16 +294,18 @@ export class IdleCompactionService { * Record the terminal outcome of an idle compaction attempt so the loop can * stop re-attempting a persistently failing workspace. * - * - Success resets the consecutive failure count. + * - Success resets the failure count AND lifts any suppression — a compaction that + * actually persisted means the workspace is healthy again (self-healing). This also + * covers an in-flight retry that succeeds after suppression was set. * - A `model_not_found` failure is non-recoverable, so the loop stops immediately. * - Any other failure stops the loop once it happens twice in a row. * - * Suppression is in-memory and sticky for the service lifetime; restarting the - * app (e.g. after fixing the configured compaction model) clears it. + * Suppression is in-memory (also cleared on restart, e.g. after fixing the model config). */ recordOutcome(workspaceId: string, outcome: IdleCompactionOutcome): void { if (outcome.success) { this.consecutiveFailures.delete(workspaceId); + this.suppressedWorkspaceIds.delete(workspaceId); return; }