Skip to content

Commit 683eea4

Browse files
Aegisclaude
authored andcommitted
fix(task-intelligence): port missing classifier branches from aegis-daemon
The deployed aegis-web worker bundles two copies of classifyTaskFailure — one from @stackbilt/aegis-core (this package) and one from aegis-daemon's local web/src/. Both cc-tasks route copies are also bundled, and Hono's first-registered-wins rule routes POST /api/cc-tasks/:id/complete to the core copy here, shadowing the daemon override. The daemon's copy had gained six classifier branches since the aegis-core migration that were never ported upstream, so every failure matching them was silently relabeled as generic_task_failure in production telemetry. Task-failure-pattern detector then filed bogus self-improvement issues (Stackbilt-dev/aegis#430, #431) against this bucket. Ported forward: - max_turns_exceeded (primary symptom — 7/7 generic_task_failure in last 7d were actually Claude turn-limit hits) - credit_exhausted (runner_credit_exhausted system contract) - auth_failure (runner_auth_degraded) - environment_failure (runner_environment_degraded, exit 3 + env signals) - hallucinated_task (dreaming/self-improvement targets that don't exist) - work_already_done (agent saw nothing to do, didn't signal completion) Also broadened branch_conflict detection to catch `branch ... already exists` without requiring `open pr`, and flipped it to retryable=true since the taskrunner now auto-closes stale PRs on retry. Added ENVIRONMENT_FAILURE_PATTERNS + isEnvironmentFailure helper for the exit-3 environment-vs-completion disambiguation. Ordering within classifyTaskFailure matches the daemon copy exactly, so behavior is byte-for-byte identical once this ships. Validation: - 9 new tests in web/tests/task-intelligence.test.ts, including one pinning the exact #430 symptom (Exit code 1 + [max_turns_exceeded] result → max_turns_exceeded kind, retryable=true) - Full web test suite: 63 files, 1436 pass, 1 skipped - Typecheck clean in aegis-oss/web and in aegis-daemon/web (downstream consumer via file:../../aegis-oss/web) Not ported: TaskBlastRadius interface / preflight.blast_radius field — tied to aegis-daemon's v1.95.0 self-improvement blast gate, no consumer in this package. Refs Stackbilt-dev/aegis#430, Stackbilt-dev/aegis#431 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 09dda60 commit 683eea4

2 files changed

Lines changed: 214 additions & 4 deletions

File tree

web/src/task-intelligence.ts

Lines changed: 133 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,32 @@ export function parseTaskAutopsy(raw: string | Record<string, unknown> | null |
7575
return parseJsonObject<TaskFailureAutopsy>(raw);
7676
}
7777

78+
// Environment failure patterns — tool/dependency/infra issues that exit code 3 can mask
79+
const ENVIRONMENT_FAILURE_PATTERNS = [
80+
/npm\s+(err|error|warn).*install/i,
81+
/enoent.*npm/i,
82+
/cannot find module/i,
83+
/module not found/i,
84+
/permission denied/i,
85+
/eacces/i,
86+
/network\s+(error|timeout|unreachable)/i,
87+
/connection\s+(refused|reset|timed?\s*out)/i,
88+
/no\s+such\s+file\s+or\s+directory/i,
89+
/spawn\s+\S+\s+enoent/i,
90+
/command\s+failed.*install/i,
91+
/failed\s+to\s+(fetch|download|install)/i,
92+
/dependency\s+(resolution|install)\s+fail/i,
93+
/exit\s+code\s+1.*npm\s+install/i,
94+
/errno\s+\d+/i,
95+
/segmentation\s+fault/i,
96+
/out\s+of\s+memory/i,
97+
/disk\s+(full|space)/i,
98+
];
99+
100+
function isEnvironmentFailure(haystack: string): boolean {
101+
return ENVIRONMENT_FAILURE_PATTERNS.some(p => p.test(haystack));
102+
}
103+
78104
export function classifyTaskFailure(input: FailureInput): TaskFailureAutopsy {
79105
const warnings = input.preflight?.warnings ?? [];
80106
const signals = [
@@ -125,12 +151,15 @@ export function classifyTaskFailure(input: FailureInput): TaskFailureAutopsy {
125151
);
126152
}
127153

128-
if (haystack.includes('exists on remote') && haystack.includes('open pr')) {
154+
if (
155+
(haystack.includes('exists on remote') && haystack.includes('open pr')) ||
156+
(haystack.includes('branch') && haystack.includes('already exists') && !haystack.includes('repo not found'))
157+
) {
129158
return createAutopsy(
130159
'branch_conflict',
131-
false,
132-
'Task branch already exists on the remote, suggesting an unresolved earlier run or open PR.',
133-
'Close or merge the existing branch/PR, or rerun with a new branch identity.',
160+
true,
161+
'Task branch already exists on the remote from a prior run. The taskrunner now auto-closes stale PRs and cleans up branches on retry.',
162+
'Retry the task — the taskrunner will clean up the stale branch automatically.',
134163
signals,
135164
);
136165
}
@@ -157,6 +186,40 @@ export function classifyTaskFailure(input: FailureInput): TaskFailureAutopsy {
157186
);
158187
}
159188

189+
// Credit/billing exhaustion — runner hit API spend limits
190+
if (
191+
haystack.includes('credit balance') ||
192+
haystack.includes('credit limit') ||
193+
haystack.includes('insufficient credits') ||
194+
haystack.includes('billing') ||
195+
haystack.includes('payment required') ||
196+
haystack.includes('rate limit') && haystack.includes('credit')
197+
) {
198+
return createAutopsy(
199+
'credit_exhausted',
200+
false,
201+
'Task failed because the LLM provider credit balance was exhausted or billing limit was reached.',
202+
'Top up credits or adjust the runner configuration (e.g. switch from --bare API to Claude Code OAuth).',
203+
signals,
204+
'runner_credit_exhausted',
205+
);
206+
}
207+
208+
// Authentication failures — invalid or expired API keys/tokens
209+
if (
210+
(haystack.includes('unauthorized') || haystack.includes('401') || haystack.includes('authentication failed') || haystack.includes('api key') && haystack.includes('invalid')) &&
211+
!haystack.includes('repo') // avoid false matches on repo auth
212+
) {
213+
return createAutopsy(
214+
'auth_failure',
215+
false,
216+
'Task failed due to an authentication or authorization error with an external service.',
217+
'Check and rotate the relevant API key or token, then retry.',
218+
signals,
219+
'runner_auth_degraded',
220+
);
221+
}
222+
160223
if (input.exitCode === 127 || haystack.includes('command not found')) {
161224
return createAutopsy(
162225
'command_missing',
@@ -181,6 +244,72 @@ export function classifyTaskFailure(input: FailureInput): TaskFailureAutopsy {
181244
);
182245
}
183246

247+
// Exit code 3 with environment failure signals → environment_failure (not retryable)
248+
// These are tool/dependency/infra issues, not missing completion signals.
249+
// Real examples: npm install failures, missing CLI tools, network timeouts.
250+
if (input.exitCode === 3 && isEnvironmentFailure(haystack)) {
251+
return createAutopsy(
252+
'environment_failure',
253+
false,
254+
'Task failed due to an environment or tool-availability issue on the runner.',
255+
'Investigate the runner environment: check tool versions, network access, and dependency availability before retrying.',
256+
signals,
257+
'runner_environment_degraded',
258+
);
259+
}
260+
261+
// max_turns_exceeded — Claude hit the turn limit before completing.
262+
// This is retryable (with higher max_turns or a simpler task scope).
263+
// Must come before completion_signal_missing since both can have exit code 3,
264+
// but max_turns is a distinct, actionable failure with a clear fix.
265+
if (haystack.includes('max_turns') || haystack.includes('error_max_turns') || haystack.includes('ran out of turns')) {
266+
const hasPr = haystack.includes('[taskrunner] pr:') || haystack.includes('pr created') || haystack.includes('pull request');
267+
return createAutopsy(
268+
'max_turns_exceeded',
269+
true,
270+
hasPr
271+
? 'Task hit the turn limit but created a PR — work was likely completed, signal was not emitted before timeout.'
272+
: 'Task hit the turn limit before completing. Claude ran out of turns without emitting a completion signal.',
273+
hasPr
274+
? 'Review the PR — task may be complete. If so, mark as success. Otherwise, retry with higher max_turns or split the task.'
275+
: 'Retry with higher max_turns (current limit may be too low for the task scope) or split into smaller subtasks.',
276+
signals,
277+
);
278+
}
279+
280+
// Hallucinated task — agent determined the target doesn't exist
281+
if (
282+
haystack.includes("doesn't exist") && haystack.includes('hallucinated') ||
283+
haystack.includes('does not exist') && (haystack.includes('dreaming') || haystack.includes('self-improvement')) ||
284+
haystack.includes('code that doesn\'t exist')
285+
) {
286+
return createAutopsy(
287+
'hallucinated_task',
288+
false,
289+
'Task referenced code or components that do not exist — likely generated by dreaming/self-improvement without verification.',
290+
'Improve task source (dreaming/self-improvement) to verify targets exist before queuing.',
291+
signals,
292+
);
293+
}
294+
295+
// "Nothing to do" — agent determined work was already done but didn't signal completion
296+
if (
297+
haystack.includes('already resolved') ||
298+
haystack.includes('already complete') ||
299+
haystack.includes('already confirmed') ||
300+
haystack.includes('already processed') ||
301+
haystack.includes('nothing to do') ||
302+
haystack.includes('no action needed')
303+
) {
304+
return createAutopsy(
305+
'work_already_done',
306+
false,
307+
'Agent determined the work was already completed or unnecessary, but did not emit a completion signal.',
308+
'Task should be marked as success — the agent correctly identified no work was needed. Consider improving the taskrunner to recognize "already done" as a valid completion.',
309+
signals,
310+
);
311+
}
312+
184313
if (haystack.includes('completion signal not found') || input.exitCode === 3) {
185314
return createAutopsy(
186315
'completion_signal_missing',

web/tests/task-intelligence.test.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,87 @@ describe('task intelligence', () => {
6161
expect(autopsy.system_contract).toBe('content_public_route_drift');
6262
});
6363

64+
it('classifies max_turns_exceeded as retryable even when exit code is 1', () => {
65+
const autopsy = classifyTaskFailure({
66+
title: 'Post-deploy visual QA: aegis',
67+
repo: 'aegis',
68+
error: 'Exit code 1',
69+
result: '[max_turns_exceeded] Task ran out of turns (12 used, unknown). Increase max_turns or simplify the task.',
70+
exitCode: 1,
71+
});
72+
73+
expect(autopsy.kind).toBe('max_turns_exceeded');
74+
expect(autopsy.retryable).toBe(true);
75+
});
76+
77+
it('classifies max_turns with existing PR as completed-but-unsignaled', () => {
78+
const autopsy = classifyTaskFailure({
79+
error: 'Exit code 1',
80+
result: '[max_turns_exceeded] Task ran out of turns. Pull request created: https://github.com/x/y/pull/1',
81+
exitCode: 1,
82+
});
83+
84+
expect(autopsy.kind).toBe('max_turns_exceeded');
85+
expect(autopsy.summary).toContain('created a PR');
86+
});
87+
88+
it('classifies credit exhaustion as non-retryable runner contract failure', () => {
89+
const autopsy = classifyTaskFailure({
90+
error: 'Your credit balance is too low to access the API',
91+
});
92+
93+
expect(autopsy.kind).toBe('credit_exhausted');
94+
expect(autopsy.retryable).toBe(false);
95+
expect(autopsy.system_contract).toBe('runner_credit_exhausted');
96+
});
97+
98+
it('classifies auth failures as runner_auth_degraded', () => {
99+
const autopsy = classifyTaskFailure({
100+
error: '401 unauthorized',
101+
result: 'authentication failed against the provider',
102+
});
103+
104+
expect(autopsy.kind).toBe('auth_failure');
105+
expect(autopsy.system_contract).toBe('runner_auth_degraded');
106+
});
107+
108+
it('classifies exit code 3 with npm errors as environment_failure, not completion_signal_missing', () => {
109+
const autopsy = classifyTaskFailure({
110+
error: 'npm ERR! install failed',
111+
result: 'Cannot find module foo',
112+
exitCode: 3,
113+
});
114+
115+
expect(autopsy.kind).toBe('environment_failure');
116+
expect(autopsy.system_contract).toBe('runner_environment_degraded');
117+
});
118+
119+
it('classifies work_already_done when agent reports nothing to do', () => {
120+
const autopsy = classifyTaskFailure({
121+
result: 'This issue has already been resolved in commit abc123 — nothing to do',
122+
});
123+
124+
expect(autopsy.kind).toBe('work_already_done');
125+
});
126+
127+
it('classifies hallucinated tasks from dreaming source', () => {
128+
const autopsy = classifyTaskFailure({
129+
title: 'fix: dreaming cycle task',
130+
result: 'The file referenced in the task does not exist in the repo',
131+
});
132+
133+
expect(autopsy.kind).toBe('hallucinated_task');
134+
});
135+
136+
it('classifies branch_conflict as retryable (auto-cleanup)', () => {
137+
const autopsy = classifyTaskFailure({
138+
result: 'Error: branch auto/docs/123 already exists on remote',
139+
});
140+
141+
expect(autopsy.kind).toBe('branch_conflict');
142+
expect(autopsy.retryable).toBe(true);
143+
});
144+
64145
it('deduplicates contract alerts by contract and repo', () => {
65146
const alerts = collectContractAlerts([
66147
{

0 commit comments

Comments
 (0)