diff --git a/src/daemon/error-detector.ts b/src/daemon/error-detector.ts index 0c36a9c..6059861 100644 --- a/src/daemon/error-detector.ts +++ b/src/daemon/error-detector.ts @@ -61,9 +61,25 @@ export interface CliError { * 5xx as transient). 4xx codes (401/402/403/429) are NOT retried; * they're either auth/quota or already rate-limited. Retrying 429 * immediately would just compound the rate-limit. + * + * Lineage-specific extension (PR #85): when `kind` is undefined AND + * `lineage === 'opencode'`, treat as retryable. Opencode-go's gateway + * has known transport flakes where the subprocess exits 0 with empty + * output (no classified errorKind, no message) but a second attempt + * succeeds. Other lineages keep the conservative "no kind = not + * retryable" default — codex/claude/gemini's null-with-no-kind cases + * usually mean the model genuinely produced nothing, where retry would + * just produce nothing again. Victor caught the gap on the PR #83 + * audit (qwen3.6-plus on opencode produced null, no retry, straight to + * claude fallback — wasted the cheap save). */ -export function isRetryableErrorKind(kind: string | undefined): boolean { - if (!kind) return false; +export function isRetryableErrorKind( + kind: string | undefined, + lineage?: string, +): boolean { + if (!kind) { + return lineage === 'opencode'; + } switch (kind) { case 'cold_start_timeout': case 'tmux_dead': diff --git a/src/daemon/runner/doer-driver.ts b/src/daemon/runner/doer-driver.ts index 7de8f4a..7e96610 100644 --- a/src/daemon/runner/doer-driver.ts +++ b/src/daemon/runner/doer-driver.ts @@ -227,7 +227,7 @@ export async function runDoer( }); if (result !== null) return result; if (attempt === MAX_ATTEMPTS) return null; - if (!isRetryableErrorKind(lastError.kind)) return null; + if (!isRetryableErrorKind(lastError.kind, entry.lineage)) return null; if (handle.signal.aborted) return null; console.warn( `[doer] retrying transient failure chat=${chatId} round=${round} ` + diff --git a/src/daemon/runner/reviewer-driver.ts b/src/daemon/runner/reviewer-driver.ts index eb54b76..1db6560 100644 --- a/src/daemon/runner/reviewer-driver.ts +++ b/src/daemon/runner/reviewer-driver.ts @@ -563,7 +563,7 @@ async function runReviewer( }); if (result !== null) return result; if (attempt === MAX_ATTEMPTS) return null; - if (!isRetryableErrorKind(lastError.kind)) return null; + if (!isRetryableErrorKind(lastError.kind, entry.lineage)) return null; if (handle.signal.aborted) return null; console.warn( `[reviewer] retrying transient failure chat=${chatId} round=${round} ` + diff --git a/tests/error-detector-retryable.test.ts b/tests/error-detector-retryable.test.ts index 50b9257..2c273cf 100644 --- a/tests/error-detector-retryable.test.ts +++ b/tests/error-detector-retryable.test.ts @@ -11,7 +11,7 @@ import { describe, expect, it } from 'vitest'; import { isRetryableErrorKind } from '@/daemon/error-detector'; describe('isRetryableErrorKind', () => { - it('returns false for undefined kind', () => { + it('returns false for undefined kind (no lineage)', () => { // Happy-path null result with no recorded errorSummary — no retry. expect(isRetryableErrorKind(undefined)).toBe(false); }); @@ -20,6 +20,34 @@ describe('isRetryableErrorKind', () => { expect(isRetryableErrorKind('')).toBe(false); }); + describe('opencode-null special case (PR #85)', () => { + it('returns TRUE for undefined kind when lineage is opencode', () => { + // Opencode-go's gateway has known transport flakes where the + // subprocess exits 0 with empty output (no errorKind, no message) + // but a second attempt succeeds. Without this the qwen-style + // null-with-no-kind failure goes straight to fallback chain + // advance, wasting the cheap save. + expect(isRetryableErrorKind(undefined, 'opencode')).toBe(true); + }); + + it('keeps the conservative default for other lineages on undefined kind', () => { + // codex/claude/gemini null-with-no-kind usually means the model + // genuinely produced nothing — retry would produce nothing again. + expect(isRetryableErrorKind(undefined, 'openai')).toBe(false); + expect(isRetryableErrorKind(undefined, 'anthropic')).toBe(false); + expect(isRetryableErrorKind(undefined, 'google')).toBe(false); + expect(isRetryableErrorKind(undefined, 'antigravity')).toBe(false); + }); + + it('the lineage hint does NOT override an explicit non-retryable kind', () => { + // Even on opencode, an auth/quota/db-corrupt kind is still + // terminal — retry would just produce the same error. + expect(isRetryableErrorKind('quota_exhausted', 'opencode')).toBe(false); + expect(isRetryableErrorKind('opencode_db_corrupt', 'opencode')).toBe(false); + expect(isRetryableErrorKind('no_output', 'opencode')).toBe(false); + }); + }); + describe('terminal kinds (never retry)', () => { it('quota_exhausted is terminal', () => { // Quota windows are server-scheduled; retry within the same