diff --git a/src/lib/runner.ts b/src/lib/runner.ts index eaad2a3..0457888 100644 --- a/src/lib/runner.ts +++ b/src/lib/runner.ts @@ -35,6 +35,11 @@ export function trimMessages(messages: T[]): T[] { while (start < tail.length && tail[start].role === messages[0].role) { start++; } + // Skip orphaned tool messages whose parent assistant+tool_calls was sliced off. + // OpenAI hard-rejects these with 400; other providers silently degrade. + while (start < tail.length && tail[start].role === 'tool') { + start++; + } return [messages[0], ...tail.slice(start)]; } diff --git a/tests/unit/runner.test.ts b/tests/unit/runner.test.ts index 0d779f9..50a7b2a 100644 --- a/tests/unit/runner.test.ts +++ b/tests/unit/runner.test.ts @@ -328,4 +328,51 @@ describe('trimMessages', () => { expect(result[0].role).toBe('system'); expect(result.length).toBeLessThanOrEqual(MAX_CONTEXT_MESSAGES); }); + + it('skips orphaned tool messages at trim boundary', () => { + // Simulate a real agent conversation where trimming slices between + // an assistant+tool_calls message and its tool response. + // OpenAI rejects orphaned tool messages with 400. + const msgs: { role: string; content: string }[] = [ + { role: 'user', content: 'system prompt' }, + ]; + // Fill with user/assistant pairs to push past the limit + for (let i = 1; i < MAX_CONTEXT_MESSAGES; i++) { + msgs.push({ role: i % 2 === 0 ? 'user' : 'assistant', content: `msg-${i}` }); + } + // Now add a tool response that will land at the start of the tail + // after its parent assistant message gets sliced off + msgs.push({ role: 'tool', content: 'tool-response-orphaned' }); + msgs.push({ role: 'tool', content: 'tool-response-orphaned-2' }); + msgs.push({ role: 'assistant', content: 'next-reasoning' }); + msgs.push({ role: 'user', content: 'latest' }); + + const result = trimMessages(msgs); + // The orphaned tool messages right after the anchor should be skipped + expect(result[1].role).not.toBe('tool'); + // The non-orphaned messages (assistant, user) should follow the anchor + expect(result[1].role).toBe('assistant'); + }); + + it('handles tool message right after anchor role collision', () => { + // Edge case: anchor is 'user', tail starts with 'user' (dropped by + // existing dedup), then 'tool' (should also be dropped) + const msgs: { role: string; content: string }[] = [ + { role: 'user', content: 'anchor' }, + ]; + // Push past limit with alternating messages + for (let i = 1; i <= MAX_CONTEXT_MESSAGES + 2; i++) { + msgs.push({ role: i % 2 === 0 ? 'user' : 'assistant', content: `fill-${i}` }); + } + // Manually inject a user+tool sequence at the expected tail boundary + // After slicing, tail[0] = 'user' (deduped), tail[1] = 'tool' (orphaned) + const tailStart = msgs.length - MAX_CONTEXT_MESSAGES + 1; + msgs[tailStart] = { role: 'user', content: 'collision' }; + msgs[tailStart + 1] = { role: 'tool', content: 'orphaned-tool' }; + msgs[tailStart + 2] = { role: 'assistant', content: 'recovery' }; + + const result = trimMessages(msgs); + expect(result[0].role).toBe('user'); + expect(result[1].role).not.toBe('tool'); + }); });