From 5d063491f339f7ec40fb3bc203b995f6d6826020 Mon Sep 17 00:00:00 2001 From: chorus-codes <280607145+chorus-codes@users.noreply.github.com> Date: Sun, 24 May 2026 12:37:44 +1000 Subject: [PATCH 1/2] refactor(runner): shim-owned retry policy (closes PR #87 no_output bypass) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #87 added a lineage-dispatch escape valve in isRetryableErrorKind for opencode null-with-no-kind. The chorus self-audit (6 request-changes / 2 approves) found two real problems: 1. THE BUG (antigravity + gemini, HIGH): The COMMON opencode-go failure is empty-stdout exit with eventCount=0. The runner's finally block synthesises kind='no_output' for that case — and no_output is universally terminal — so the new opencode- null retry was UNREACHABLE for the bug it shipped to fix. It only helped the rare case where opencode emits some events but no usable content. 2. SLIPPERY SLOPE (every reviewer): Central classifier with a lineage exception is a one-off escape valve that grows into a table. Moonshot (also opencode-go) was already an unprotected gap (codex flagged via isOpenCodeFamily idiom). Fix: move the retry-on-null-kind and retry-on-no-output policies out of the central classifier and into the shim layer. Each AgentShim declares its own `retryPolicy: { extraKinds?, onNullKind?, onNoOutput? }` block. - opencode shim: opts into both flags (closes the no_output bypass) - kimi shim (moonshot): same opts-in (closes the moonshot gap) - codex/claude/gemini/grok/antigravity/openrouter shims: unchanged — fall through to the universal transient list as before isRetryableErrorKind signature changes from (kind, lineage?) to (kind, shim?). Universal terminal/transient layer unchanged; shim policies layer on top. Universal terminal kinds (quota_exhausted, token_refresh_lost, opencode_db_corrupt) still beat shim opt-ins — explicit terminal kinds cannot be overridden by a shim. Tests: 978 -> 982 passing. Test cases now use tiny shim stubs instead of lineage strings, matching the production call shape. Out of scope (separate follow-ups flagged in audit): - Telemetry: record retry-succeeded rows so the policy's effectiveness is measurable. Universal across reviewers. - Verdict-parse-failure false positive (codex finding #5): opencode-cli-7 traced this and found non-empty content always returns a real verdict, so apology-shaped responses don't reach the retry path. Closed as non-issue. --- src/daemon/agents/kimi.ts | 13 ++++ src/daemon/agents/opencode.ts | 19 +++++ src/daemon/agents/types.ts | 33 ++++++++ src/daemon/error-detector.ts | 102 +++++++++++++------------ src/daemon/runner/doer-driver.ts | 2 +- src/daemon/runner/reviewer-driver.ts | 2 +- tests/error-detector-retryable.test.ts | 88 +++++++++++++++------ 7 files changed, 184 insertions(+), 75 deletions(-) diff --git a/src/daemon/agents/kimi.ts b/src/daemon/agents/kimi.ts index 4f8dd97..eaa6d33 100644 --- a/src/daemon/agents/kimi.ts +++ b/src/daemon/agents/kimi.ts @@ -243,4 +243,17 @@ export const kimiShim: AgentShim = { // Kimi CLI uses Moonshot subscription on the user's plan, not metered API. return 0; }, + + // Same opencode-go gateway flake risk as the opencode shim — when the + // requested model carries the opencode-go/ prefix the kimi shim + // delegates to opencode internally (see runHeadless above). Inherit + // the same retry posture so moonshot lineages don't silently fall + // back without an attempt at the cheap recovery. Flagged by codex on + // the PR #87 self-audit ("isOpenCodeFamily" idiom appears throughout + // error-detector.ts; the original lineage dispatch only checked + // 'opencode' and left this gap open). + retryPolicy: { + onNullKind: true, + onNoOutput: true, + }, }; diff --git a/src/daemon/agents/opencode.ts b/src/daemon/agents/opencode.ts index 042b021..1ac843f 100644 --- a/src/daemon/agents/opencode.ts +++ b/src/daemon/agents/opencode.ts @@ -218,4 +218,23 @@ export const opencodeShim: AgentShim = { // OpenCode Go subscription plan (Kimi/DeepSeek), not per-call API return 0; }, + + // Opencode-go's gateway has two well-documented transient failure + // modes that look terminal at the runner layer: + // - empty-stdout exit: subprocess exits 0 with no output -> the + // runner synthesises kind:'no_output' (universally terminal), + // but for opencode-go a second attempt typically succeeds with + // the same prompt (gateway boot race). + // - events-but-no-content: subprocess emits progress/tool events + // then closes without a message_done and without any error + // event -> kind stays undefined, content is empty. + // Both should be retried once. The original PR #87 attempted to + // address the second case with a lineage dispatch in the central + // classifier; the no_output case slipped through because no_output + // is in the universal terminal list. Moved here as part of the + // shim-owned retry refactor (PR #87 self-audit follow-up). + retryPolicy: { + onNullKind: true, + onNoOutput: true, + }, }; diff --git a/src/daemon/agents/types.ts b/src/daemon/agents/types.ts index ac9ec81..bc44cd8 100644 --- a/src/daemon/agents/types.ts +++ b/src/daemon/agents/types.ts @@ -152,6 +152,39 @@ export interface AgentShim { * lineages return 0; API-keyed lineages use the rate card. Best-effort. */ estimateCostUsd(inputTokens: number, outputTokens: number, model?: string): number; + /** + * Shim-owned retry policy. The runner consults this BEFORE consulting the + * universal `isRetryableErrorKind` taxonomy — letting each shim declare its + * own transport-specific transient failure modes without leaking + * lineage-dispatch into the central classifier (PR #87 audit feedback). + * + * Three orthogonal signals: + * - `extraKinds`: shim-specific error kinds beyond the universal transient + * list (e.g. an opencode-go-specific `provider_returned_error` could go + * here if the shim ever emits it). + * - `onNullKind`: retry when the run returned without setting any error + * kind (`lastError.kind === undefined`). Opencode-go's gateway has a + * transport flake where the subprocess emits some events but no usable + * content with no error event — this catches that. + * - `onNoOutput`: retry when the run exited with zero events (the finally + * block synthesises `kind: 'no_output'`). For most lineages this is a + * deterministic transport bug (e.g. TTY-only output), but for opencode- + * go's empty-stdout-exit flake it's exactly the case we want to retry. + * Closes the bug the original PR #87 missed: my opencode-null retry + * was unreachable because empty-exit always synthesised `no_output` + * terminal kind before the null-kind branch could see it (caught by + * antigravity + gemini on the PR #87 self-audit). + * + * Omit entirely (or set all false) for shims that should keep the + * conservative universal-default behaviour — codex, claude, gemini all + * fall through to the universal list and retry only on documented + * transient kinds. + */ + readonly retryPolicy?: { + readonly extraKinds?: readonly string[]; + readonly onNullKind?: boolean; + readonly onNoOutput?: boolean; + }; } /** diff --git a/src/daemon/error-detector.ts b/src/daemon/error-detector.ts index bcfa1f1..8d55c57 100644 --- a/src/daemon/error-detector.ts +++ b/src/daemon/error-detector.ts @@ -27,70 +27,72 @@ export interface CliError { /** * Decide whether a CLI failure is worth one immediate retry before - * advancing the fallback chain. Most CLI calls succeed; the failure - * cases that DO recur are deterministic — auth/quota/db-corrupt. The - * cases that DON'T recur are transient — network blip mid-stream, - * slow cold start, opencode DB lock that clears on the next attempt. - * One retry catches the cheap save without doubling spend on real - * outages (which fail twice and then advance as before). + * advancing the fallback chain. The decision is composed from TWO layers: * - * Terminal (no retry — same call will fail the same way): + * Universal layer (this function's switch + openrouter prefix check): + * Kinds that any CLI/shim might produce, where retry is universally + * safe — network blips, slow cold starts, generic stream failures. + * + * Shim-owned layer (consulted via `retryPolicy` on AgentShim): + * Each shim declares its own transport-specific transient signals. + * Avoids leaking lineage-dispatch into this central classifier + * (slippery-slope concern from PR #87 audit). To opt in for a new + * lineage, add a `retryPolicy` block to its shim file — not a new + * branch here. + * + * Universal terminal (no retry — same call will fail the same way): * - quota_exhausted quota window is server-scheduled * - token_refresh_lost human must re-authenticate * - opencode_db_corrupt local DB corruption persists * - permission_prompt needs user interaction or recoverKeys * - * Transient (retry once): + * Universal transient (retry once): * - cold_start_timeout CLI was slow; second cold start may hit warm cache * - tmux_dead session crashed; respawn may succeed - * - stream_failure catch-all from reviewer.ts when the stream - * ends with no kind match — usually a brief - * upstream blip + * - stream_failure catch-all when the stream ends with no kind match * - unknown same as stream_failure - * - mcp_handshake_failed codex's bundled MCP server boots racily — - * slow first start, handshake-timeout under - * load, server crash on first start. The - * error LOOKS auth-shaped (was originally - * classified terminal for that reason) but - * real auth failures surface as - * token_refresh_lost. Retry catches the boot - * race; a rare actual-misconfig fails twice - * and advances as before. (Caught when codex - * hit this on the PR #87 audit chat and went - * straight to claude fallback without any - * recovery attempt.) - * - openrouter_fetch_failed pre-HTTP network error from the OpenRouter - * shim — DNS blip, ECONNRESET mid-handshake, - * ETIMEDOUT. Exactly the case retry is for. - * (Flagged by codex in chorus self-audit on PR #79.) - * - openrouter_no_body 2xx response with empty body — anomalous - * edge state at the OpenRouter edge; second - * request normally succeeds. + * - mcp_handshake_failed codex's bundled MCP server boots racily — real + * auth failures surface as token_refresh_lost. + * (Reclassified in PR #89.) + * - openrouter_fetch_failed pre-HTTP network error (DNS, ECONNRESET, ETIMEDOUT) + * - openrouter_no_body 2xx response with empty body * - * Also retryable: HTTP-dispatched shim 5xx codes from OpenRouter (the - * caller passes the `openrouter_` kind verbatim — we accept any - * 5xx as transient). 4xx codes (401/402/403/429) are NOT retried; - * they're either auth/quota or already rate-limited. Retrying 429 - * immediately would just compound the rate-limit. + * Also retryable: OpenRouter 5xx HTTP codes (transient upstream outage). + * 4xx codes (401/402/403/429) stay terminal — auth/quota/rate-limit cases + * where retry just compounds the problem. * - * Lineage-specific extension (PR #85): when `kind` is undefined AND - * `lineage === 'opencode'`, treat as retryable. Opencode-go's gateway - * has known transport flakes where the subprocess exits 0 with empty - * output (no classified errorKind, no message) but a second attempt - * succeeds. Other lineages keep the conservative "no kind = not - * retryable" default — codex/claude/gemini's null-with-no-kind cases - * usually mean the model genuinely produced nothing, where retry would - * just produce nothing again. Victor caught the gap on the PR #83 - * audit (qwen3.6-plus on opencode produced null, no retry, straight to - * claude fallback — wasted the cheap save). + * Shim-specific signals (via `retryPolicy` on the shim): + * - extraKinds additional kinds beyond the universal list + * - onNullKind retry when `lastError.kind === undefined` + * (opencode-go: events emitted but no usable content, + * no error event — transport flake) + * - onNoOutput retry when the run synthesised `kind: 'no_output'` + * because `eventCount === 0` (opencode-go: subprocess + * exited 0 with empty stdout — the common case the + * original PR #87 missed because no_output was in + * the universal terminal list) + * + * The `shim` parameter is optional for callers that don't have one in scope + * (e.g. classifier-only tests). When omitted, only the universal layer + * applies — no shim-specific retries fire. */ +import type { AgentShim } from './agents/types.js'; + export function isRetryableErrorKind( kind: string | undefined, - lineage?: string, + shim?: { retryPolicy?: AgentShim['retryPolicy'] }, ): boolean { + const policy = shim?.retryPolicy; + // Shim-owned null-kind retry (was the PR #87 lineage-dispatch case). if (!kind) { - return lineage === 'opencode'; + return policy?.onNullKind === true; } + // Shim-owned no_output retry — the COMMON opencode-go flake that the + // original PR #87 silently missed. Synthesised by the runner's finally + // block when eventCount === 0, and would otherwise be filtered as + // terminal below. + if (kind === 'no_output' && policy?.onNoOutput === true) return true; + // Universal transient switch. switch (kind) { case 'cold_start_timeout': case 'tmux_dead': @@ -101,10 +103,14 @@ export function isRetryableErrorKind( case 'openrouter_no_body': return true; } + // OpenRouter HTTP-code escape: 5xx transient, 4xx terminal. if (kind.startsWith('openrouter_')) { const code = Number(kind.slice('openrouter_'.length)); - return Number.isFinite(code) && code >= 500 && code < 600; + if (Number.isFinite(code) && code >= 500 && code < 600) return true; } + // Shim-owned extra kinds (after universal — a shim CAN add a kind the + // universal list considers terminal, but only via explicit opt-in). + if (policy?.extraKinds?.includes(kind)) return true; return false; } diff --git a/src/daemon/runner/doer-driver.ts b/src/daemon/runner/doer-driver.ts index a605d95..af06c21 100644 --- a/src/daemon/runner/doer-driver.ts +++ b/src/daemon/runner/doer-driver.ts @@ -276,7 +276,7 @@ export async function runDoer( }); if (result !== null) return result; if (attempt === MAX_ATTEMPTS) return null; - if (!isRetryableErrorKind(lastError.kind, entry.lineage)) return null; + if (!isRetryableErrorKind(lastError.kind, entryShim)) return null; if (handle.signal.aborted) return null; console.warn( `[doer] retrying transient failure chat=${chatId} round=${round} ` + diff --git a/src/daemon/runner/reviewer-driver.ts b/src/daemon/runner/reviewer-driver.ts index fd33e4c..d1ce491 100644 --- a/src/daemon/runner/reviewer-driver.ts +++ b/src/daemon/runner/reviewer-driver.ts @@ -594,7 +594,7 @@ async function runReviewer( }); if (result !== null) return result; if (attempt === MAX_ATTEMPTS) return null; - if (!isRetryableErrorKind(lastError.kind, entry.lineage)) return null; + if (!isRetryableErrorKind(lastError.kind, entryShim)) return null; if (handle.signal.aborted) return null; console.warn( `[reviewer] retrying transient failure chat=${chatId} round=${round} ` + diff --git a/tests/error-detector-retryable.test.ts b/tests/error-detector-retryable.test.ts index af2649d..7e2616f 100644 --- a/tests/error-detector-retryable.test.ts +++ b/tests/error-detector-retryable.test.ts @@ -20,31 +20,69 @@ describe('isRetryableErrorKind', () => { expect(isRetryableErrorKind('')).toBe(false); }); - describe('opencode-null special case (PR #85)', () => { - it('returns TRUE for undefined kind when lineage is opencode', () => { - // Opencode-go's gateway has known transport flakes where the - // subprocess exits 0 with empty output (no errorKind, no message) - // but a second attempt succeeds. Without this the qwen-style - // null-with-no-kind failure goes straight to fallback chain - // advance, wasting the cheap save. - expect(isRetryableErrorKind(undefined, 'opencode')).toBe(true); - }); - - it('keeps the conservative default for other lineages on undefined kind', () => { - // codex/claude/gemini null-with-no-kind usually means the model - // genuinely produced nothing — retry would produce nothing again. - expect(isRetryableErrorKind(undefined, 'openai')).toBe(false); - expect(isRetryableErrorKind(undefined, 'anthropic')).toBe(false); - expect(isRetryableErrorKind(undefined, 'google')).toBe(false); - expect(isRetryableErrorKind(undefined, 'antigravity')).toBe(false); - }); - - it('the lineage hint does NOT override an explicit non-retryable kind', () => { - // Even on opencode, an auth/quota/db-corrupt kind is still - // terminal — retry would just produce the same error. - expect(isRetryableErrorKind('quota_exhausted', 'opencode')).toBe(false); - expect(isRetryableErrorKind('opencode_db_corrupt', 'opencode')).toBe(false); - expect(isRetryableErrorKind('no_output', 'opencode')).toBe(false); + describe('shim-owned retry policy (PR #87 follow-up)', () => { + // Shape matches `Pick`. Building tiny + // shim stubs keeps the classifier tests independent of the full + // AgentShim contract (formatPrompt, runHeadless, etc.). + const opencodeShim = { + retryPolicy: { onNullKind: true, onNoOutput: true }, + }; + const codexShim = { retryPolicy: undefined }; + const claudeShim = {}; + const customShim = { + retryPolicy: { extraKinds: ['shim_specific_blip' as string] }, + }; + + it('retries on null kind when shim opts in via onNullKind', () => { + // Opencode-go gateway flake: events emitted but no usable content + // and no error event. + expect(isRetryableErrorKind(undefined, opencodeShim)).toBe(true); + }); + + it('does NOT retry on null kind when shim has no retryPolicy', () => { + // Codex/claude/gemini null-with-no-kind = model genuinely produced + // nothing. Retry would produce the same nothing. + expect(isRetryableErrorKind(undefined, codexShim)).toBe(false); + expect(isRetryableErrorKind(undefined, claudeShim)).toBe(false); + expect(isRetryableErrorKind(undefined)).toBe(false); + }); + + it('retries on no_output when shim opts in via onNoOutput', () => { + // THE bug the original PR #87 missed: empty-stdout exit + // synthesises no_output (universally terminal), so the opencode + // null-kind path was unreachable for the most common opencode + // failure mode. onNoOutput closes that bypass. + expect(isRetryableErrorKind('no_output', opencodeShim)).toBe(true); + }); + + it('keeps no_output terminal for shims that did NOT opt in', () => { + expect(isRetryableErrorKind('no_output', codexShim)).toBe(false); + expect(isRetryableErrorKind('no_output', claudeShim)).toBe(false); + expect(isRetryableErrorKind('no_output')).toBe(false); + }); + + it('shim extraKinds list adds shim-specific retryable kinds', () => { + expect(isRetryableErrorKind('shim_specific_blip', customShim)).toBe(true); + // Same kind, no shim: stays terminal. + expect(isRetryableErrorKind('shim_specific_blip')).toBe(false); + }); + + it('shim policy does NOT override universal-terminal kinds', () => { + // Even an opencode shim with both flags on does NOT retry + // quota_exhausted / token_refresh_lost / opencode_db_corrupt — + // these are deterministic, retry would produce the same error. + expect(isRetryableErrorKind('quota_exhausted', opencodeShim)).toBe(false); + expect(isRetryableErrorKind('token_refresh_lost', opencodeShim)).toBe(false); + expect(isRetryableErrorKind('opencode_db_corrupt', opencodeShim)).toBe(false); + }); + + it('universal transient kinds retry regardless of shim presence', () => { + // The opt-in is for ADDITIONAL retries, not a gate on the + // universal set. A shim with no retryPolicy still gets the full + // universal transient behaviour. + expect(isRetryableErrorKind('stream_failure')).toBe(true); + expect(isRetryableErrorKind('stream_failure', codexShim)).toBe(true); + expect(isRetryableErrorKind('openrouter_503', codexShim)).toBe(true); }); }); From 9960480b611c4b6d579664ee4de7b9644e62a2f2 Mon Sep 17 00:00:00 2001 From: chorus-codes <280607145+chorus-codes@users.noreply.github.com> Date: Mon, 25 May 2026 14:17:43 +1000 Subject: [PATCH 2/2] fix(reviewer): classify ambiguous-verdict so opencode retry skips apology shapes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #91 audit catch (multiple reviewers, MEDIUM-HIGH severity): when a reviewer writes non-empty prose with no approve/reject keyword, verdictFromReviewerText returns null. Pre-fix, lastError.kind stayed undefined → the new opencode shim's onNullKind=true policy fired a retry that wasted tokens on the same prose shape. This isn't the transport flake retry was designed for — it's a successful model run that just didn't match the verdict heuristic (apology, refusal, off-topic continuation). Classify it as `verdict_ambiguous` (not in any retry list) so the driver advances the chain cleanly. Guarded on `!lastError.kind` so a real classified failure earlier in the run (e.g. stream_failure with the finally block writing a REVIEWER FAILED summary to answer.md) survives the verdict check. Closes my own PR description claim (PR #91) that opencode-cli-7 had closed this — that earlier trace was incomplete. The audit correctly re-traced it and found the path IS reachable. Tests: 983 -> 984 (+1 contract test for verdict_ambiguous + 1 classifier test confirming it's terminal even on opencode shim). --- src/daemon/runner/reviewer.ts | 27 ++++++++++++++++++++++- tests/error-detector-retryable.test.ts | 9 ++++++++ tests/runner-reviewer.test.ts | 30 ++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/daemon/runner/reviewer.ts b/src/daemon/runner/reviewer.ts index b6c0f3d..0b9e2e9 100644 --- a/src/daemon/runner/reviewer.ts +++ b/src/daemon/runner/reviewer.ts @@ -533,5 +533,30 @@ export async function runReviewerHeadless(args: { ); } - return verdictFromReviewerText(content); + const verdict = verdictFromReviewerText(content); + // PR #91 audit catch (multiple reviewers, MEDIUM-HIGH): when the + // reviewer produces non-empty content but verdictFromReviewerText + // can't extract a positive/negative keyword, it returns null. The + // caller (reviewer-driver retry loop) sees `result === null` with + // `lastError.kind === undefined` — and the new opencode-shim + // retryPolicy.onNullKind=true causes a retry. But this case ISN'T + // a transport flake — the model successfully wrote prose that just + // doesn't match the verdict heuristic (apology, refusal, off-topic). + // Retrying it produces the same shape on the next attempt — wasted + // tokens. Setting an explicit `verdict_ambiguous` kind classifies + // this as terminal (not in the universal transient list, not opted + // in by any shim) so the retry skips and the chain advances. + // + // Guarded on !lastError.kind so the classifier doesn't stomp a real + // classified kind set by the finally block (e.g. when an errored + // stream wrote a `## REVIEWER FAILED` block to answer.md — content + // is non-empty so verdictFromReviewerText returns null, but the + // failure was classified earlier as `stream_failure` / + // `quota_exhausted` / etc. and that signal must survive). + if (verdict === null && lastError && !lastError.kind) { + lastError.kind = 'verdict_ambiguous'; + lastError.message = + 'Reviewer wrote non-empty content but no approve/reject verdict was detected.'; + } + return verdict; } diff --git a/tests/error-detector-retryable.test.ts b/tests/error-detector-retryable.test.ts index 7e2616f..92ba93e 100644 --- a/tests/error-detector-retryable.test.ts +++ b/tests/error-detector-retryable.test.ts @@ -76,6 +76,15 @@ describe('isRetryableErrorKind', () => { expect(isRetryableErrorKind('opencode_db_corrupt', opencodeShim)).toBe(false); }); + it('verdict_ambiguous is terminal even on opencode shim (PR #91 audit catch)', () => { + // A reviewer that wrote non-empty prose but no approve/reject + // keyword is NOT a transport flake. The runner now sets + // lastError.kind='verdict_ambiguous' so the opencode shim's + // onNullKind=true policy can't accidentally retry it. + expect(isRetryableErrorKind('verdict_ambiguous', opencodeShim)).toBe(false); + expect(isRetryableErrorKind('verdict_ambiguous')).toBe(false); + }); + it('universal transient kinds retry regardless of shim presence', () => { // The opt-in is for ADDITIONAL retries, not a gate on the // universal set. A shim with no retryPolicy still gets the full diff --git a/tests/runner-reviewer.test.ts b/tests/runner-reviewer.test.ts index 6e89f47..a90f08b 100644 --- a/tests/runner-reviewer.test.ts +++ b/tests/runner-reviewer.test.ts @@ -156,6 +156,36 @@ describe('runReviewerHeadless', () => { expect(verdict).toBeNull(); }); + it('classifies ambiguous-verdict as verdict_ambiguous so opencode retry does NOT fire on apology-shaped responses', async () => { + // PR #91 audit catch: a reviewer that writes non-empty prose but + // no approve/reject keyword returns null. Pre-fix, lastError.kind + // stayed undefined → the new opencode shim's onNullKind=true + // policy fired a retry that wasted tokens on the same shape. + // Post-fix, lastError.kind is set to 'verdict_ambiguous' which is + // not in any retry list, so the driver advances the chain cleanly. + const text = `${PADDING} ${PADDING} the code seems consistent with the rest of the codebase.\n## DONE`; + const handle = makeFakeShim({ events: happyPathEvents(text) }); + const lastError: { kind?: string; message?: string } = {}; + const verdict = await runReviewerHeadless({ + shim: handle.shim, + chatId: 'test-chat', + phase: fixturePhase, + round: 1, + reviewerIdx: 0, + candidateLineage: 'openai', + candidateModel: 'gpt-5.5', + agentName: 'codex-cli', + askContent: 'review the doer output', + answerFile, + reviewerDir, + abortSignal: new AbortController().signal, + onEvent: (e) => events.push(e), + lastError, + }); + expect(verdict).toBeNull(); + expect(lastError.kind).toBe('verdict_ambiguous'); + }); + it('returns null when stream errors with no content', async () => { const handle = makeFakeShim({ events: [{ type: 'error', kind: 'quota_exhausted', message: 'limit hit' }],