Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/daemon/error-detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,25 @@ export interface CliError {
* 5xx as transient). 4xx codes (401/402/403/429) are NOT retried;
* they're either auth/quota or already rate-limited. Retrying 429
* immediately would just compound the rate-limit.
*
* Lineage-specific extension (PR #85): when `kind` is undefined AND
* `lineage === 'opencode'`, treat as retryable. Opencode-go's gateway
* has known transport flakes where the subprocess exits 0 with empty
* output (no classified errorKind, no message) but a second attempt
* succeeds. Other lineages keep the conservative "no kind = not
* retryable" default — codex/claude/gemini's null-with-no-kind cases
* usually mean the model genuinely produced nothing, where retry would
* just produce nothing again. Victor caught the gap on the PR #83
* audit (qwen3.6-plus on opencode produced null, no retry, straight to
* claude fallback — wasted the cheap save).
*/
export function isRetryableErrorKind(kind: string | undefined): boolean {
if (!kind) return false;
export function isRetryableErrorKind(
kind: string | undefined,
lineage?: string,
): boolean {
if (!kind) {
return lineage === 'opencode';
}
switch (kind) {
case 'cold_start_timeout':
case 'tmux_dead':
Expand Down
2 changes: 1 addition & 1 deletion src/daemon/runner/doer-driver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ export async function runDoer(
});
if (result !== null) return result;
if (attempt === MAX_ATTEMPTS) return null;
if (!isRetryableErrorKind(lastError.kind)) return null;
if (!isRetryableErrorKind(lastError.kind, entry.lineage)) return null;
if (handle.signal.aborted) return null;
console.warn(
`[doer] retrying transient failure chat=${chatId} round=${round} ` +
Expand Down
2 changes: 1 addition & 1 deletion src/daemon/runner/reviewer-driver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ async function runReviewer(
});
if (result !== null) return result;
if (attempt === MAX_ATTEMPTS) return null;
if (!isRetryableErrorKind(lastError.kind)) return null;
if (!isRetryableErrorKind(lastError.kind, entry.lineage)) return null;
if (handle.signal.aborted) return null;
console.warn(
`[reviewer] retrying transient failure chat=${chatId} round=${round} ` +
Expand Down
30 changes: 29 additions & 1 deletion tests/error-detector-retryable.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { describe, expect, it } from 'vitest';
import { isRetryableErrorKind } from '@/daemon/error-detector';

describe('isRetryableErrorKind', () => {
it('returns false for undefined kind', () => {
it('returns false for undefined kind (no lineage)', () => {
// Happy-path null result with no recorded errorSummary — no retry.
expect(isRetryableErrorKind(undefined)).toBe(false);
});
Expand All @@ -20,6 +20,34 @@ describe('isRetryableErrorKind', () => {
expect(isRetryableErrorKind('')).toBe(false);
});

describe('opencode-null special case (PR #85)', () => {
it('returns TRUE for undefined kind when lineage is opencode', () => {
// Opencode-go's gateway has known transport flakes where the
// subprocess exits 0 with empty output (no errorKind, no message)
// but a second attempt succeeds. Without this the qwen-style
// null-with-no-kind failure goes straight to fallback chain
// advance, wasting the cheap save.
expect(isRetryableErrorKind(undefined, 'opencode')).toBe(true);
});

it('keeps the conservative default for other lineages on undefined kind', () => {
// codex/claude/gemini null-with-no-kind usually means the model
// genuinely produced nothing — retry would produce nothing again.
expect(isRetryableErrorKind(undefined, 'openai')).toBe(false);
expect(isRetryableErrorKind(undefined, 'anthropic')).toBe(false);
expect(isRetryableErrorKind(undefined, 'google')).toBe(false);
expect(isRetryableErrorKind(undefined, 'antigravity')).toBe(false);
});

it('the lineage hint does NOT override an explicit non-retryable kind', () => {
// Even on opencode, an auth/quota/db-corrupt kind is still
// terminal — retry would just produce the same error.
expect(isRetryableErrorKind('quota_exhausted', 'opencode')).toBe(false);
expect(isRetryableErrorKind('opencode_db_corrupt', 'opencode')).toBe(false);
expect(isRetryableErrorKind('no_output', 'opencode')).toBe(false);
});
});

describe('terminal kinds (never retry)', () => {
it('quota_exhausted is terminal', () => {
// Quota windows are server-scheduled; retry within the same
Expand Down
Loading