Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/daemon/error-detector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ export interface CliError {
* Terminal (no retry — same call will fail the same way):
* - quota_exhausted quota window is server-scheduled
* - token_refresh_lost human must re-authenticate
* - mcp_handshake_failed auth issue, same remediation
* - opencode_db_corrupt local DB corruption persists
* - permission_prompt needs user interaction or recoverKeys
*
Expand All @@ -48,6 +47,18 @@ export interface CliError {
* ends with no kind match — usually a brief
* upstream blip
* - unknown same as stream_failure
* - mcp_handshake_failed codex's bundled MCP server boots racily —
* slow first start, handshake-timeout under
* load, server crash on first start. The
* error LOOKS auth-shaped (was originally
* classified terminal for that reason) but
* real auth failures surface as
* token_refresh_lost. Retry catches the boot
* race; a rare actual-misconfig fails twice
* and advances as before. (Caught when codex
* hit this on the PR #87 audit chat and went
* straight to claude fallback without any
* recovery attempt.)
* - openrouter_fetch_failed pre-HTTP network error from the OpenRouter
* shim — DNS blip, ECONNRESET mid-handshake,
* ETIMEDOUT. Exactly the case retry is for.
Expand Down Expand Up @@ -85,6 +96,7 @@ export function isRetryableErrorKind(
case 'tmux_dead':
case 'stream_failure':
case 'unknown':
case 'mcp_handshake_failed':
case 'openrouter_fetch_failed':
case 'openrouter_no_body':
return true;
Expand Down
14 changes: 10 additions & 4 deletions tests/error-detector-retryable.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ describe('isRetryableErrorKind', () => {
expect(isRetryableErrorKind('token_refresh_lost')).toBe(false);
});

it('mcp_handshake_failed is terminal', () => {
expect(isRetryableErrorKind('mcp_handshake_failed')).toBe(false);
});

it('opencode_db_corrupt is terminal', () => {
// Local DB corruption persists across retries.
expect(isRetryableErrorKind('opencode_db_corrupt')).toBe(false);
Expand Down Expand Up @@ -104,6 +100,16 @@ describe('isRetryableErrorKind', () => {
// Same treatment as stream_failure.
expect(isRetryableErrorKind('unknown')).toBe(true);
});

it('mcp_handshake_failed is retryable (codex MCP boot race)', () => {
// Was originally terminal (lumped with auth) but real auth
// surfaces as token_refresh_lost. mcp_handshake_failed is
// almost always codex's bundled MCP server booting racily —
// catches the cheap save without compounding cost on genuine
// misconfig. Caught when codex hit this on the PR #87 audit
// chat and went straight to claude fallback with no recovery.
expect(isRetryableErrorKind('mcp_handshake_failed')).toBe(true);
});
});

describe('OpenRouter shim error kinds', () => {
Expand Down
Loading