From acfb3b0d4ca6d526d39b78fbecd261469a43c782 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 11 May 2026 17:23:00 +0200 Subject: [PATCH 1/5] Add Anthropic prompt caching breakpoints Split static and dynamic system prompt content so Anthropic-compatible providers can cache only the stable prefix, matching the Python agent-sdk behavior. Mark the last user/tool turn for cache extension and cover the native Anthropic and LiteLLM Claude request shapes with focused tests. --- .../sdk/llm/__tests__/promptCaching.test.ts | 198 ++++++++++++++++++ .../sdk/llm/__tests__/providerQuirks.test.ts | 38 +++- packages/agent-sdk/src/sdk/llm/anthropic.ts | 58 ++++- .../src/sdk/llm/openai-compatible.ts | 108 ++++++++-- .../agent-sdk/src/sdk/llm/providerQuirks.ts | 21 ++ packages/agent-sdk/src/sdk/llm/types.ts | 2 + packages/agent-sdk/src/sdk/runtime/Agent.ts | 44 ++-- .../__tests__/Agent.system-prompt.test.ts | 28 +++ .../runtime/__tests__/condensation.test.ts | 25 +++ .../agent-sdk/src/sdk/runtime/condensation.ts | 32 ++- 10 files changed, 505 insertions(+), 49 deletions(-) create mode 100644 packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts new file mode 100644 index 00000000..ec70fb5f --- /dev/null +++ b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts @@ -0,0 +1,198 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { LLMStreamer } from '../../runtime'; +import { AnthropicClient, OpenAICompatibleClient } from '../index'; +import type { ChatCompletionRequest, LLMConfiguration } from '../types'; + +const encoder = new TextEncoder(); +const EPHEMERAL_CACHE_CONTROL = { type: 'ephemeral' }; + +const createStreamResponse = (payload: string, status = 200): Response => + new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(payload)); + controller.close(); + }, + }), + { status, headers: { 'content-type': 'text/event-stream' } }, + ); + +const anthropicSse = [ + 'event: content_block_delta', + 'data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Done"}}', + '', + 'event: message_delta', + 'data: {"type":"message_delta","delta":{"stop_reason":"end_turn"}}', + '', +].join('\n'); + +const openAiSse = [ + 'data: {"choices":[{"delta":{"content":"Done"}}]}', + 'data: {"choices":[{"delta":{},"finish_reason":"stop"}]}', + 'data: [DONE]', +].join('\n'); + +const splitSystemPromptRequest = ( + overrides: Partial = {}, +): ChatCompletionRequest => ({ + systemPrompt: 'STATIC\n\nDYNAMIC', + cacheableSystemPrompt: 'STATIC', + dynamicSystemPrompt: 'DYNAMIC', + messages: [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }], + ...overrides, +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('Anthropic prompt caching', () => { + const baseConfig: LLMConfiguration = { + model: 'claude-sonnet-4-5-20250929', + provider: 'anthropic', + }; + + it('marks only the static system block and last user block for caching', async () => { + const fetchMock = vi + .spyOn(global, 'fetch') + .mockResolvedValue(createStreamResponse(anthropicSse)); + + const client = new AnthropicClient(baseConfig, 'test-key'); + const streamer = new LLMStreamer(client); + + await streamer.runChat(splitSystemPromptRequest()); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined; + const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null; + + expect(body?.system).toEqual([ + { type: 'text', text: 'STATIC', cache_control: EPHEMERAL_CACHE_CONTROL }, + { type: 'text', text: 'DYNAMIC' }, + ]); + expect(body?.messages?.[0]).toMatchObject({ + role: 'user', + content: [{ type: 'text', text: 'hello', cache_control: EPHEMERAL_CACHE_CONTROL }], + }); + }); + + it('moves the cache marker to the tool-result message level', async () => { + const fetchMock = vi + .spyOn(global, 'fetch') + .mockResolvedValue(createStreamResponse(anthropicSse)); + + const client = new AnthropicClient(baseConfig, 'test-key'); + const streamer = new LLMStreamer(client); + + await streamer.runChat(splitSystemPromptRequest({ + messages: [ + { role: 'user', content: [{ type: 'text', text: 'hello' }] }, + { + role: 'assistant', + content: [], + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { name: 'bash', arguments: '{"command":"echo hi"}' }, + }, + ], + }, + { + role: 'tool', + content: [{ type: 'text', text: 'hi' }], + tool_call_id: 'call_1', + }, + ], + tools: [{ type: 'function', function: { name: 'bash' } }], + })); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined; + const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null; + + expect(body?.messages?.at(-1)).toEqual({ + role: 'user', + content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'hi' }], + cache_control: EPHEMERAL_CACHE_CONTROL, + }); + }); +}); + +describe('OpenAI-compatible Anthropic prompt caching', () => { + const baseConfig: LLMConfiguration = { + model: 'claude-sonnet-4-5-20250929', + provider: 'litellm_proxy', + baseUrl: 'http://localhost:4000', + }; + + it('marks only the static system block and last user block for caching', async () => { + const fetchMock = vi + .spyOn(global, 'fetch') + .mockResolvedValue(createStreamResponse(openAiSse)); + + const client = new OpenAICompatibleClient(baseConfig, 'test-key'); + const streamer = new LLMStreamer(client); + + await streamer.runChat(splitSystemPromptRequest()); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined; + const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null; + + expect(body?.messages?.[0]).toEqual({ + role: 'system', + content: [ + { type: 'text', text: 'STATIC', cache_control: EPHEMERAL_CACHE_CONTROL }, + { type: 'text', text: 'DYNAMIC' }, + ], + }); + expect(body?.messages?.[1]).toMatchObject({ + role: 'user', + content: [{ type: 'text', text: 'hello', cache_control: EPHEMERAL_CACHE_CONTROL }], + }); + }); + + it('moves the cache marker to the tool message level', async () => { + const fetchMock = vi + .spyOn(global, 'fetch') + .mockResolvedValue(createStreamResponse(openAiSse)); + + const client = new OpenAICompatibleClient(baseConfig, 'test-key'); + const streamer = new LLMStreamer(client); + + await streamer.runChat(splitSystemPromptRequest({ + messages: [ + { role: 'user', content: [{ type: 'text', text: 'hello' }] }, + { + role: 'assistant', + content: [{ type: 'text', text: '' }], + tool_calls: [ + { + id: 'call_1', + type: 'function', + function: { name: 'bash', arguments: '{"command":"echo hi"}' }, + }, + ], + }, + { + role: 'tool', + content: [{ type: 'text', text: 'hi' }], + tool_call_id: 'call_1', + }, + ], + tools: [{ type: 'function', function: { name: 'bash' } }], + })); + + expect(fetchMock).toHaveBeenCalledTimes(1); + const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined; + const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null; + + expect(body?.messages?.at(-1)).toEqual({ + role: 'tool', + content: 'hi', + tool_call_id: 'call_1', + cache_control: EPHEMERAL_CACHE_CONTROL, + }); + }); +}); diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts index ed04456e..484c27bc 100644 --- a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts +++ b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts @@ -1,6 +1,12 @@ import { describe, expect, it } from 'vitest'; import type { LLMConfiguration } from '../types'; -import { normalizeGenerationParamsForModel, isAnthropicModel, supportsThinkingBlocks, getAnthropicThinkingBudget } from '../providerQuirks'; +import { + normalizeGenerationParamsForModel, + isAnthropicModel, + supportsPromptCaching, + supportsThinkingBlocks, + getAnthropicThinkingBudget, +} from '../providerQuirks'; const makeConfig = (overrides: Partial = {}): LLMConfiguration => ({ model: 'gpt-4o', @@ -135,6 +141,36 @@ describe('supportsThinkingBlocks', () => { }); }); +describe('supportsPromptCaching', () => { + it('returns true for supported Anthropic cacheable models', () => { + expect(supportsPromptCaching(makeConfig({ + model: 'claude-sonnet-4-5-20250929', + provider: 'anthropic', + }))).toBe(true); + }); + + it('returns true for LiteLLM Anthropic routing with supported cacheable models', () => { + expect(supportsPromptCaching(makeConfig({ + model: 'anthropic/claude-3-5-sonnet-20241022', + provider: 'litellm_proxy', + }))).toBe(true); + }); + + it('returns false for Anthropic models outside the prompt-cache allowlist', () => { + expect(supportsPromptCaching(makeConfig({ + model: 'claude-2.1', + provider: 'anthropic', + }))).toBe(false); + }); + + it('returns false for non-Anthropic models', () => { + expect(supportsPromptCaching(makeConfig({ + model: 'gpt-4o', + provider: 'openai', + }))).toBe(false); + }); +}); + describe('getAnthropicThinkingBudget', () => { it('returns undefined for non-Anthropic models', () => { expect(getAnthropicThinkingBudget(makeConfig({ diff --git a/packages/agent-sdk/src/sdk/llm/anthropic.ts b/packages/agent-sdk/src/sdk/llm/anthropic.ts index 8f446739..6cd3b3ce 100644 --- a/packages/agent-sdk/src/sdk/llm/anthropic.ts +++ b/packages/agent-sdk/src/sdk/llm/anthropic.ts @@ -1,8 +1,11 @@ import { reduceTextContent, DEFAULT_RETRY_OPTIONS, DEFAULT_TIMEOUT_MS, type ChatCompletionRequest, type LLMClient, type LLMConfiguration, type LLMStreamChunk, type LLMToolDefinition, type RetryOptions, type ToolCallAccumulator } from './types'; -import { getAnthropicThinkingBudget } from './providerQuirks'; +import { getAnthropicThinkingBudget, supportsPromptCaching } from './providerQuirks'; import { NonRetryableHttpStatusError, requestWithRetry } from './httpRetry'; const decoder = new TextDecoder(); +const EPHEMERAL_CACHE_CONTROL = { type: 'ephemeral' } as const; + +type AnthropicCacheControl = typeof EPHEMERAL_CACHE_CONTROL; // Anthropic content block types type AnthropicThinkingBlock = { @@ -14,6 +17,7 @@ type AnthropicThinkingBlock = { type AnthropicTextBlock = { type: 'text'; text: string; + cache_control?: AnthropicCacheControl; }; type AnthropicToolUseBlock = { @@ -32,6 +36,7 @@ type AnthropicToolResultBlock = { type AnthropicImageBlock = { type: 'image'; source: { type: 'base64'; media_type: string; data: string }; + cache_control?: AnthropicCacheControl; }; type AnthropicContentBlock = @@ -44,6 +49,7 @@ type AnthropicContentBlock = interface AnthropicMessage { role: 'user' | 'assistant'; content: AnthropicContentBlock[]; + cache_control?: AnthropicCacheControl; } type AnthropicEventName = 'message_start' | 'content_block_start' | 'content_block_delta' | 'message_delta' | (string & {}); @@ -137,10 +143,25 @@ const parseBase64DataUrl = (url: string): { mediaType: string; base64: string } return { mediaType: match[1].toLowerCase(), base64: match[2] }; }; -const toAnthropicMessages = (request: ChatCompletionRequest): AnthropicMessage[] => { +const toAnthropicMessages = ( + request: ChatCompletionRequest, + options?: { cacheLastMessage?: boolean }, +): AnthropicMessage[] => { const result: AnthropicMessage[] = []; + const lastCacheableMessageIndex = options?.cacheLastMessage + ? (() => { + for (let index = request.messages.length - 1; index >= 0; index -= 1) { + const role = request.messages[index]?.role; + if (role === 'user' || role === 'tool') { + return index; + } + } + return -1; + })() + : -1; - for (const message of request.messages) { + for (const [index, message] of request.messages.entries()) { + const shouldCacheMessage = index === lastCacheableMessageIndex; if (message.role === 'user') { const contentBlocks: AnthropicContentBlock[] = []; for (const part of message.content) { @@ -162,6 +183,14 @@ const toAnthropicMessages = (request: ChatCompletionRequest): AnthropicMessage[] if (contentBlocks.length === 0) { contentBlocks.push({ type: 'text', text: '' }); } + const lastBlock = contentBlocks.at(-1); + if ( + shouldCacheMessage && + lastBlock && + (lastBlock.type === 'text' || lastBlock.type === 'image') + ) { + lastBlock.cache_control = EPHEMERAL_CACHE_CONTROL; + } result.push({ role: 'user', content: contentBlocks }); } else if (message.role === 'assistant') { // Assistant messages: may have thinking + tool_use @@ -223,11 +252,15 @@ const toAnthropicMessages = (request: ChatCompletionRequest): AnthropicMessage[] if (lastMessage?.role === 'user') { // Append to existing user message lastMessage.content.push(toolResultBlock); + if (shouldCacheMessage) { + lastMessage.cache_control = EPHEMERAL_CACHE_CONTROL; + } } else { // Create new user message result.push({ role: 'user', content: [toolResultBlock], + ...(shouldCacheMessage ? { cache_control: EPHEMERAL_CACHE_CONTROL } : {}), }); } } @@ -411,6 +444,21 @@ export class AnthropicClient implements LLMClient { private requestBody(request: ChatCompletionRequest): Record { const anthropicTools = toAnthropicTools(request.tools); const thinkingBudget = getAnthropicThinkingBudget(this.config); + const cacheableSystemPrompt = + typeof request.cacheableSystemPrompt === 'string' && request.cacheableSystemPrompt.trim() + ? request.cacheableSystemPrompt + : request.systemPrompt; + const dynamicSystemPrompt = + typeof request.dynamicSystemPrompt === 'string' && request.dynamicSystemPrompt.trim() + ? request.dynamicSystemPrompt + : undefined; + const promptCachingEnabled = supportsPromptCaching(this.config); + const system = promptCachingEnabled + ? [ + { type: 'text' as const, text: cacheableSystemPrompt, cache_control: EPHEMERAL_CACHE_CONTROL }, + ...(dynamicSystemPrompt ? [{ type: 'text' as const, text: dynamicSystemPrompt }] : []), + ] + : [{ type: 'text' as const, text: request.systemPrompt }]; return { model: this.config.model, @@ -418,8 +466,8 @@ export class AnthropicClient implements LLMClient { // Note: temperature is normalized by providerQuirks.normalizeGenerationParamsForModel() // which sets temperature=1 when thinking is enabled (Anthropic requirement) temperature: this.config.temperature ?? 0, - system: [{ type: 'text', text: request.systemPrompt }], - messages: toAnthropicMessages(request), + system, + messages: toAnthropicMessages(request, { cacheLastMessage: promptCachingEnabled }), stream: true, ...(anthropicTools ? { tools: anthropicTools, tool_choice: { type: 'auto' } } : {}), thinking: thinkingBudget !== undefined diff --git a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts index 4ac2848f..b603fd4d 100644 --- a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts +++ b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts @@ -1,10 +1,13 @@ import { reduceTextContent, DEFAULT_RETRY_OPTIONS, DEFAULT_TIMEOUT_MS, type ChatCompletionRequest, type LLMClient, type LLMConfiguration, type LLMStreamChunk, type RetryOptions, type ToolCallAccumulator } from './types'; import { DEFAULT_PROVIDER_BASE_URLS } from './provider'; -import { supportsThinkingBlocks } from './providerQuirks'; +import { supportsPromptCaching, supportsThinkingBlocks } from './providerQuirks'; import { buildOpenAiHeaders } from './openaiHeaders'; import { NonRetryableHttpStatusError, requestWithRetry } from './httpRetry'; const decoder = new TextDecoder(); +const EPHEMERAL_CACHE_CONTROL = { type: 'ephemeral' } as const; + +type OpenAICacheControl = typeof EPHEMERAL_CACHE_CONTROL; type OpenAIThinkingContentBlock = { type: 'thinking'; @@ -15,11 +18,13 @@ type OpenAIThinkingContentBlock = { type OpenAITextContentBlock = { type: 'text'; text: string; + cache_control?: OpenAICacheControl; }; type OpenAIImageUrlContentBlock = { type: 'image_url'; image_url: { url: string; detail?: string }; + cache_control?: OpenAICacheControl; }; type OpenAIToolUseContentBlock = { @@ -37,6 +42,7 @@ type OpenAIChatMessage = { name?: string; tool_call_id?: string; tool_calls?: ChatCompletionRequest['messages'][number]['tool_calls']; + cache_control?: OpenAICacheControl; }; type OpenAIThinkingBlock = { @@ -88,8 +94,13 @@ const isOpenAIStreamChunk = (value: unknown): value is OpenAIStreamChunk => * tool_use blocks in content. LiteLLM converts tool_calls to tool_use when proxying to Anthropic. * However, thinking blocks must be sent in the content array since there's no OpenAI equivalent. */ -const toOpenAIMessage = (message: ChatCompletionRequest['messages'][number], config: LLMConfiguration): OpenAIChatMessage => { +const toOpenAIMessage = ( + message: ChatCompletionRequest['messages'][number], + config: LLMConfiguration, + options?: { cachePrompt?: boolean }, +): OpenAIChatMessage => { const contentText = reduceTextContent(message); + const shouldCachePrompt = options?.cachePrompt === true; // For Anthropic models with thinking enabled: include thinking blocks in content array // This is required when assistant messages have thinking content that needs to be preserved. @@ -140,14 +151,34 @@ const toOpenAIMessage = (message: ChatCompletionRequest['messages'][number], con } } } - if (blocks.some((b) => b.type === 'image_url')) { + if (blocks.some((b) => b.type === 'image_url') || shouldCachePrompt) { if (!blocks.some((b) => b.type === 'text')) { blocks.unshift({ type: 'text', text: '' }); } + if (shouldCachePrompt) { + const lastBlock = blocks.at(-1); + if ( + lastBlock && + (lastBlock.type === 'text' || lastBlock.type === 'image_url') + ) { + lastBlock.cache_control = EPHEMERAL_CACHE_CONTROL; + } + } return { role: 'user', content: blocks }; } } + if (message.role === 'tool' && shouldCachePrompt) { + const cachedToolMessage: OpenAIChatMessage = { + role: 'tool', + content: contentText, + cache_control: EPHEMERAL_CACHE_CONTROL, + }; + if (message.name) cachedToolMessage.name = message.name; + if (message.tool_call_id) cachedToolMessage.tool_call_id = message.tool_call_id; + return cachedToolMessage; + } + // Standard case: plain text content (for non-Anthropic models or messages without thinking) const base: OpenAIChatMessage = { role: message.role, @@ -159,25 +190,58 @@ const toOpenAIMessage = (message: ChatCompletionRequest['messages'][number], con return base; }; -const toRequestBody = (config: LLMConfiguration, request: ChatCompletionRequest) => ({ - model: config.model, - messages: [ - { - role: 'system', - content: request.systemPrompt, - }, - ...request.messages.map((msg) => toOpenAIMessage(msg, config)), - ], - stream: true, - stream_options: { include_usage: true }, - temperature: config.temperature ?? undefined, - // Do not send top_p or top_k for OpenAI-compatible endpoints to avoid proxy/model rejections - // top_p and top_k intentionally omitted - max_tokens: config.maxOutputTokens ?? undefined, - reasoning_effort: config.reasoningEffort && config.reasoningEffort !== 'none' ? config.reasoningEffort : undefined, - tools: request.tools, - tool_choice: request.tools?.length ? 'auto' : undefined, -}); +const toRequestBody = (config: LLMConfiguration, request: ChatCompletionRequest) => { + const promptCachingEnabled = supportsPromptCaching(config); + const cacheableSystemPrompt = + typeof request.cacheableSystemPrompt === 'string' && request.cacheableSystemPrompt.trim() + ? request.cacheableSystemPrompt + : request.systemPrompt; + const dynamicSystemPrompt = + typeof request.dynamicSystemPrompt === 'string' && request.dynamicSystemPrompt.trim() + ? request.dynamicSystemPrompt + : undefined; + const lastCacheableMessageIndex = promptCachingEnabled + ? (() => { + for (let index = request.messages.length - 1; index >= 0; index -= 1) { + const role = request.messages[index]?.role; + if (role === 'user' || role === 'tool') { + return index; + } + } + return -1; + })() + : -1; + + return { + model: config.model, + messages: [ + promptCachingEnabled + ? { + role: 'system' as const, + content: [ + { type: 'text' as const, text: cacheableSystemPrompt, cache_control: EPHEMERAL_CACHE_CONTROL }, + ...(dynamicSystemPrompt ? [{ type: 'text' as const, text: dynamicSystemPrompt }] : []), + ], + } + : { + role: 'system' as const, + content: request.systemPrompt, + }, + ...request.messages.map((msg, index) => + toOpenAIMessage(msg, config, { cachePrompt: index === lastCacheableMessageIndex }), + ), + ], + stream: true, + stream_options: { include_usage: true }, + temperature: config.temperature ?? undefined, + // Do not send top_p or top_k for OpenAI-compatible endpoints to avoid proxy/model rejections + // top_p and top_k intentionally omitted + max_tokens: config.maxOutputTokens ?? undefined, + reasoning_effort: config.reasoningEffort && config.reasoningEffort !== 'none' ? config.reasoningEffort : undefined, + tools: request.tools, + tool_choice: request.tools?.length ? 'auto' : undefined, + }; +}; const defaultBaseUrls: Record = { openai: DEFAULT_PROVIDER_BASE_URLS.openai, diff --git a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts index b3bea02f..32856638 100644 --- a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts +++ b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts @@ -57,6 +57,21 @@ import type { LLMConfiguration } from './types'; const ANTHROPIC_THINKING_MIN_BUDGET = 1024; const ANTHROPIC_THINKING_MAX_BUDGET = 128000; +const PROMPT_CACHE_MODELS = [ + 'claude-3-7-sonnet', + 'claude-sonnet-3-7-latest', + 'claude-3-5-sonnet', + 'claude-3-5-haiku', + 'claude-3-haiku-20240307', + 'claude-3-opus-20240229', + 'claude-sonnet-4', + 'claude-opus-4', + 'claude-haiku-4-5', + 'claude-sonnet-4-5', + 'claude-sonnet-4-6', + 'claude-opus-4-5', + 'claude-opus-4-6', +]; const isGpt5Model = (model: string | undefined): boolean => { if (typeof model !== 'string') return false; @@ -100,6 +115,12 @@ export const supportsThinkingBlocks = (config: LLMConfiguration): boolean => { return isAnthropicModel(config) && hasExtendedThinking(config); }; +export const supportsPromptCaching = (config: LLMConfiguration): boolean => { + if (!isAnthropicModel(config)) return false; + const model = config.model?.trim().toLowerCase() ?? ''; + return PROMPT_CACHE_MODELS.some((needle) => model.includes(needle)); +}; + /** * Get the thinking budget tokens for Anthropic extended thinking. * diff --git a/packages/agent-sdk/src/sdk/llm/types.ts b/packages/agent-sdk/src/sdk/llm/types.ts index 6ebaa6cb..f2857e80 100644 --- a/packages/agent-sdk/src/sdk/llm/types.ts +++ b/packages/agent-sdk/src/sdk/llm/types.ts @@ -63,6 +63,8 @@ export interface LLMConfiguration { export interface ChatCompletionRequest { systemPrompt: string; + cacheableSystemPrompt?: string; + dynamicSystemPrompt?: string | null; messages: Message[]; tools?: LLMToolDefinition[]; } diff --git a/packages/agent-sdk/src/sdk/runtime/Agent.ts b/packages/agent-sdk/src/sdk/runtime/Agent.ts index e059167c..9b2eb1b5 100644 --- a/packages/agent-sdk/src/sdk/runtime/Agent.ts +++ b/packages/agent-sdk/src/sdk/runtime/Agent.ts @@ -654,7 +654,8 @@ export class Agent extends EventEmitter { for (let condensationAttempt = 0; condensationAttempt <= MAX_CONDENSATIONS_PER_STEP; condensationAttempt += 1) { const request = buildChatRequestWithCondensation({ events: this.events.list(), - systemPrompt: this.buildSystemPrompt(), + systemPrompt: this.buildCacheableSystemPrompt(), + dynamicSystemPrompt: this.buildDynamicSystemPrompt(), tools: this.getToolDefinitions(), pastedImagesBaseDir: this.options.pastedImagesBaseDir, }); @@ -1017,27 +1018,12 @@ export class Agent extends EventEmitter { return this.getToolDefinitions().map((tool) => tool as unknown as Record); } - private buildSystemPrompt(): string { + private buildCacheableSystemPrompt(): string { const promptIdentity = this.agentContext?.getSystemMessagePrefix() ?? SYSTEM_PROMPT_IDENTITY; let systemPrompt = `${promptIdentity}\n\n${SYSTEM_PROMPT_BODY}`; if (!this.shouldIncludeSecurityRiskAssessment()) { systemPrompt = systemPrompt.replace(SECURITY_RISK_ASSESSMENT_SECTION, ''); } - if (this.agentContext) { - const { llmModel, llmProvider, llmBaseUrl } = resolveSystemPromptLlmContext( - this.options.settings?.llm, - this.options.profileStoreOptions, - ); - const suffix = this.agentContext.getSystemMessageSuffix({ - secretNames: this.secrets.getRegisteredNames(), - llmModel, - llmProvider, - llmBaseUrl, - }); - if (suffix) { - systemPrompt += '\n\n' + suffix; - } - } const summaries = this.getToolDefinitions() .map((tool) => { @@ -1054,6 +1040,30 @@ export class Agent extends EventEmitter { return systemPrompt; } + private buildDynamicSystemPrompt(): string | null { + if (!this.agentContext) { + return null; + } + + const { llmModel, llmProvider, llmBaseUrl } = resolveSystemPromptLlmContext( + this.options.settings?.llm, + this.options.profileStoreOptions, + ); + return this.agentContext.getSystemMessageSuffix({ + secretNames: this.secrets.getRegisteredNames(), + llmModel, + llmProvider, + llmBaseUrl, + }); + } + + private buildSystemPrompt(): string { + const parts = [this.buildCacheableSystemPrompt(), this.buildDynamicSystemPrompt()].filter( + (value): value is string => typeof value === 'string' && value.length > 0, + ); + return parts.join('\n\n'); + } + private shouldIncludeSecurityRiskAssessment(): boolean { return this.confirmationPolicy.kind !== 'NeverConfirm' || this.securityAnalyzer?.kind === 'LLMSecurityAnalyzer'; } diff --git a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts index e5e07d6d..9a49a461 100644 --- a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts +++ b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts @@ -139,6 +139,34 @@ describe('Agent system prompt', () => { expect(llm.requests[2]?.systemPrompt).not.toContain('Currently opened in the editor:'); }); + it('keeps the cacheable system prompt stable while dynamic context changes', async () => { + const settings: OpenHandsSettings = { + llm: { model: 'claude-sonnet-4-5-20250929' }, + agent: {}, + conversation: { maxIterations: 3 }, + confirmation: { policy: 'never' }, + secrets: {}, + }; + const llm = new RecordingLLM(); + const agentContext = new AgentContext({ systemMessageSuffix: 'Currently opened in the editor: /tmp/first.ts' }); + + const agent = new Agent({ + settings, + workspaceRoot: createWorkspaceRoot(), + llmClient: llm, + agentContext, + }); + + await agent.run('hi'); + agentContext.systemMessageSuffix = 'Currently opened in the editor: /tmp/second.ts'; + await agent.run('hi again'); + + expect(llm.requests[0]?.cacheableSystemPrompt).toBe(llm.requests[1]?.cacheableSystemPrompt); + expect(llm.requests[0]?.dynamicSystemPrompt).toContain('/tmp/first.ts'); + expect(llm.requests[1]?.dynamicSystemPrompt).toContain('/tmp/second.ts'); + expect(llm.requests[1]?.dynamicSystemPrompt).not.toContain('/tmp/first.ts'); + }); + it('gates vendor-specific repo skills using LLM profile config', async () => { const profilesRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-system-prompt-profiles-')); profileRoots.push(profilesRoot); diff --git a/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts b/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts index 8f09d9f7..9ac4b3df 100644 --- a/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts +++ b/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts @@ -66,6 +66,31 @@ describe('condensation helpers', () => { expect(userMessage.content).toEqual([{ type: 'text', text: 'Hello' }, { type: 'text', text: 'Context' }]); }); + it('keeps cacheable and dynamic system prompt parts separate', () => { + const condense = { + kind: 'Condensation', + source: 'environment', + forgotten_event_ids: [], + summary: 'short summary', + summary_offset: 4, + } satisfies Extract; + + const request = buildChatRequestWithCondensation({ + events: [condense], + systemPrompt: 'STATIC', + dynamicSystemPrompt: 'DYNAMIC', + tools: [], + }); + + expect(request.cacheableSystemPrompt).toBe('STATIC'); + expect(request.dynamicSystemPrompt).toBe( + 'DYNAMIC\n\n\nshort summary\n', + ); + expect(request.systemPrompt).toBe( + 'STATIC\n\nDYNAMIC\n\n\nshort summary\n', + ); + }); + it('only keeps for the most recent user message', () => { const message1 = { kind: 'MessageEvent', diff --git a/packages/agent-sdk/src/sdk/runtime/condensation.ts b/packages/agent-sdk/src/sdk/runtime/condensation.ts index e8d34c25..e0f5e0b0 100644 --- a/packages/agent-sdk/src/sdk/runtime/condensation.ts +++ b/packages/agent-sdk/src/sdk/runtime/condensation.ts @@ -35,15 +35,33 @@ export const getCondensationState = (events: Event[]): CondensationState => { export const buildChatRequestWithCondensation = (params: { events: Event[]; systemPrompt: string; + dynamicSystemPrompt?: string | null; tools: LLMToolDefinition[]; pastedImagesBaseDir?: string; -}): { systemPrompt: string; messages: Message[]; tools: LLMToolDefinition[] } => { +}): { + systemPrompt: string; + cacheableSystemPrompt: string; + dynamicSystemPrompt?: string; + messages: Message[]; + tools: LLMToolDefinition[]; +} => { const condensationState = getCondensationState(params.events); - let systemPrompt = params.systemPrompt; + const dynamicParts: string[] = []; + const baseDynamicSystemPrompt = + typeof params.dynamicSystemPrompt === 'string' ? params.dynamicSystemPrompt.trim() : ''; + if (baseDynamicSystemPrompt) { + dynamicParts.push(baseDynamicSystemPrompt); + } if (condensationState.summary) { - systemPrompt += `\n\n\n${condensationState.summary}\n`; + dynamicParts.push( + `\n${condensationState.summary}\n`, + ); } + const dynamicSystemPrompt = dynamicParts.join('\n\n') || undefined; + const systemPrompt = dynamicSystemPrompt + ? `${params.systemPrompt}\n\n${dynamicSystemPrompt}` + : params.systemPrompt; const messageEvents = params.events .filter(isMessageEvent) @@ -171,7 +189,13 @@ export const buildChatRequestWithCondensation = (params: { }).map(maybeExpandOpenHandsImages); const messages = sanitizeChatMessages(rawMessages); - return { systemPrompt, messages, tools: params.tools }; + return { + systemPrompt, + cacheableSystemPrompt: params.systemPrompt, + ...(dynamicSystemPrompt ? { dynamicSystemPrompt } : {}), + messages, + tools: params.tools, + }; }; export type CondensationResult = { From a841c0f039d1bff10fa121f6cb4787d0ba478afc Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 11 May 2026 17:33:14 +0200 Subject: [PATCH 2/5] Support claude-opus-4-7 prompt caching --- .../agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts | 4 ++-- .../agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts | 4 ++++ packages/agent-sdk/src/sdk/llm/providerQuirks.ts | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts index ec70fb5f..31ba7b35 100644 --- a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts +++ b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts @@ -48,7 +48,7 @@ afterEach(() => { describe('Anthropic prompt caching', () => { const baseConfig: LLMConfiguration = { - model: 'claude-sonnet-4-5-20250929', + model: 'claude-opus-4-7', provider: 'anthropic', }; @@ -121,7 +121,7 @@ describe('Anthropic prompt caching', () => { describe('OpenAI-compatible Anthropic prompt caching', () => { const baseConfig: LLMConfiguration = { - model: 'claude-sonnet-4-5-20250929', + model: 'claude-opus-4-7', provider: 'litellm_proxy', baseUrl: 'http://localhost:4000', }; diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts index 484c27bc..4a05b8f5 100644 --- a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts +++ b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts @@ -147,6 +147,10 @@ describe('supportsPromptCaching', () => { model: 'claude-sonnet-4-5-20250929', provider: 'anthropic', }))).toBe(true); + expect(supportsPromptCaching(makeConfig({ + model: 'claude-opus-4-7', + provider: 'anthropic', + }))).toBe(true); }); it('returns true for LiteLLM Anthropic routing with supported cacheable models', () => { diff --git a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts index 32856638..1617664a 100644 --- a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts +++ b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts @@ -71,6 +71,7 @@ const PROMPT_CACHE_MODELS = [ 'claude-sonnet-4-6', 'claude-opus-4-5', 'claude-opus-4-6', + 'claude-opus-4-7', ]; const isGpt5Model = (model: string | undefined): boolean => { From 2250e22c58bdcca5713e4b327565f738c5315c74 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 11 May 2026 17:56:43 +0200 Subject: [PATCH 3/5] Fix Anthropic tool-result cache placement --- .../src/sdk/llm/__tests__/promptCaching.test.ts | 10 +++++++--- .../src/sdk/llm/__tests__/thinkingBlocks.test.ts | 2 +- packages/agent-sdk/src/sdk/llm/anthropic.ts | 7 ++----- packages/agent-sdk/src/sdk/llm/openai-compatible.ts | 2 ++ packages/agent-sdk/src/sdk/runtime/Agent.ts | 8 ++++++++ .../sdk/runtime/__tests__/Agent.system-prompt.test.ts | 4 ++++ 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts index 31ba7b35..13b02b0c 100644 --- a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts +++ b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts @@ -76,7 +76,7 @@ describe('Anthropic prompt caching', () => { }); }); - it('moves the cache marker to the tool-result message level', async () => { + it('moves the cache marker to the tool_result block', async () => { const fetchMock = vi .spyOn(global, 'fetch') .mockResolvedValue(createStreamResponse(anthropicSse)); @@ -113,8 +113,12 @@ describe('Anthropic prompt caching', () => { expect(body?.messages?.at(-1)).toEqual({ role: 'user', - content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'hi' }], - cache_control: EPHEMERAL_CACHE_CONTROL, + content: [{ + type: 'tool_result', + tool_use_id: 'call_1', + content: 'hi', + cache_control: EPHEMERAL_CACHE_CONTROL, + }], }); }); }); diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts index 34c6b471..e08e4934 100644 --- a/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts +++ b/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts @@ -371,7 +371,7 @@ describe('AnthropicClient thinking blocks', () => { expect(toolResultMsg).toBeDefined(); const toolResultBlock = toolResultMsg.content.find((b: { type: string }) => b.type === 'tool_result'); - expect(toolResultBlock).toEqual({ + expect(toolResultBlock).toMatchObject({ type: 'tool_result', tool_use_id: 'call_1', content: 'hi', diff --git a/packages/agent-sdk/src/sdk/llm/anthropic.ts b/packages/agent-sdk/src/sdk/llm/anthropic.ts index 6cd3b3ce..1ab311d7 100644 --- a/packages/agent-sdk/src/sdk/llm/anthropic.ts +++ b/packages/agent-sdk/src/sdk/llm/anthropic.ts @@ -31,6 +31,7 @@ type AnthropicToolResultBlock = { type: 'tool_result'; tool_use_id: string; content: string; + cache_control?: AnthropicCacheControl; }; type AnthropicImageBlock = { @@ -49,7 +50,6 @@ type AnthropicContentBlock = interface AnthropicMessage { role: 'user' | 'assistant'; content: AnthropicContentBlock[]; - cache_control?: AnthropicCacheControl; } type AnthropicEventName = 'message_start' | 'content_block_start' | 'content_block_delta' | 'message_delta' | (string & {}); @@ -247,20 +247,17 @@ const toAnthropicMessages = ( type: 'tool_result', tool_use_id: message.tool_call_id ?? '', content: reduceTextContent(message), + ...(shouldCacheMessage ? { cache_control: EPHEMERAL_CACHE_CONTROL } : {}), }; if (lastMessage?.role === 'user') { // Append to existing user message lastMessage.content.push(toolResultBlock); - if (shouldCacheMessage) { - lastMessage.cache_control = EPHEMERAL_CACHE_CONTROL; - } } else { // Create new user message result.push({ role: 'user', content: [toolResultBlock], - ...(shouldCacheMessage ? { cache_control: EPHEMERAL_CACHE_CONTROL } : {}), }); } } diff --git a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts index b603fd4d..a0611c0e 100644 --- a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts +++ b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts @@ -42,6 +42,8 @@ type OpenAIChatMessage = { name?: string; tool_call_id?: string; tool_calls?: ChatCompletionRequest['messages'][number]['tool_calls']; + // LiteLLM tool-result caching follows the Python SDK quirk: the cache marker + // lives on the tool message envelope instead of the text block. cache_control?: OpenAICacheControl; }; diff --git a/packages/agent-sdk/src/sdk/runtime/Agent.ts b/packages/agent-sdk/src/sdk/runtime/Agent.ts index 9b2eb1b5..54bc6b8b 100644 --- a/packages/agent-sdk/src/sdk/runtime/Agent.ts +++ b/packages/agent-sdk/src/sdk/runtime/Agent.ts @@ -1018,6 +1018,14 @@ export class Agent extends EventEmitter { return this.getToolDefinitions().map((tool) => tool as unknown as Record); } + /** + * Builds the stable system-prompt prefix used for Anthropic prompt caching. + * + * This prefix stays cacheable as long as the agent identity, the shared + * system body, security-risk assessment inclusion, and registered tool + * summaries do not change. Runtime-mutated context such as the current editor + * state belongs in buildDynamicSystemPrompt() instead. + */ private buildCacheableSystemPrompt(): string { const promptIdentity = this.agentContext?.getSystemMessagePrefix() ?? SYSTEM_PROMPT_IDENTITY; let systemPrompt = `${promptIdentity}\n\n${SYSTEM_PROMPT_BODY}`; diff --git a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts index 9a49a461..41c75de6 100644 --- a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts +++ b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts @@ -165,6 +165,10 @@ describe('Agent system prompt', () => { expect(llm.requests[0]?.dynamicSystemPrompt).toContain('/tmp/first.ts'); expect(llm.requests[1]?.dynamicSystemPrompt).toContain('/tmp/second.ts'); expect(llm.requests[1]?.dynamicSystemPrompt).not.toContain('/tmp/first.ts'); + expect(llm.requests[0]?.cacheableSystemPrompt).not.toContain('/tmp/first.ts'); + expect(llm.requests[0]?.cacheableSystemPrompt).not.toContain('/tmp/second.ts'); + expect(llm.requests[1]?.cacheableSystemPrompt).not.toContain('/tmp/first.ts'); + expect(llm.requests[1]?.cacheableSystemPrompt).not.toContain('/tmp/second.ts'); }); it('gates vendor-specific repo skills using LLM profile config', async () => { From 139fcf6db3285c816c1290c452469208e073390e Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 11 May 2026 18:33:11 +0200 Subject: [PATCH 4/5] Update packages/agent-sdk/src/sdk/llm/providerQuirks.ts Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- packages/agent-sdk/src/sdk/llm/providerQuirks.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts index 1617664a..86faf14c 100644 --- a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts +++ b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts @@ -62,8 +62,8 @@ const PROMPT_CACHE_MODELS = [ 'claude-sonnet-3-7-latest', 'claude-3-5-sonnet', 'claude-3-5-haiku', - 'claude-3-haiku-20240307', - 'claude-3-opus-20240229', + 'claude-3-haiku', + 'claude-3-opus', 'claude-sonnet-4', 'claude-opus-4', 'claude-haiku-4-5', From f5bff0e88ae5ce3c48a0be03dafbdd6500fa9cb6 Mon Sep 17 00:00:00 2001 From: Engel Nyst Date: Mon, 11 May 2026 19:31:04 +0200 Subject: [PATCH 5/5] Add Anthropic cache smoke runner --- package.json | 1 + scripts/anthropic-cache-smoke.mjs | 276 ++++++++++++++++++++++++++++++ 2 files changed, 277 insertions(+) create mode 100644 scripts/anthropic-cache-smoke.mjs diff --git a/package.json b/package.json index a0cc948c..c04f5647 100644 --- a/package.json +++ b/package.json @@ -535,6 +535,7 @@ "lint:cycles": "node scripts/check-circular-deps.mjs", "lint:duplication": "node scripts/check-duplication.mjs", "lint:fix": "eslint . --fix", + "smoke:anthropic-cache": "npm run build -w @smolpaws/agent-sdk && node scripts/anthropic-cache-smoke.mjs", "build:webview": "node esbuild.webview.mjs", "agent-server": "bash scripts/start-agent-server.sh", "agent-server:prepare": "PREPARE=1 bash scripts/start-agent-server.sh", diff --git a/scripts/anthropic-cache-smoke.mjs b/scripts/anthropic-cache-smoke.mjs new file mode 100644 index 00000000..517429ec --- /dev/null +++ b/scripts/anthropic-cache-smoke.mjs @@ -0,0 +1,276 @@ +#!/usr/bin/env node + +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import process from 'process'; +import { fileURLToPath } from 'url'; + +import { + Agent, + EventLog, + ConversationStats, + TerminalTool, + FileEditorTool, + FinishTool, +} from '../packages/agent-sdk/dist/index.mjs'; + +const SCRIPT_PATH = fileURLToPath(import.meta.url); +const WORKSPACE_ROOT = path.resolve(path.join(path.dirname(SCRIPT_PATH), '..')); +const OPENHANDS_HOME = path.join(os.homedir(), '.openhands'); +const PROFILE_ID = process.argv[2] ?? 'opus-46'; +const PROFILE_PATH = path.join(OPENHANDS_HOME, 'llm-profiles', `${PROFILE_ID}.json`); + +const MESSAGES = [ + 'Reply with exactly "cache-smoke-ready". Do not use any tools.', + 'Use the terminal tool exactly once with {"command":"pwd"} and then reply with just the directory path. Do not use any other tools.', + 'How many tools have you used so far in this conversation? Reply with only the number. Do not use any tools.', + 'Use the file_editor tool exactly once with {"command":"view","path":"package.json","view_range":[1,20]} and then reply with just the top-level package name. Do not use any other tools.', + 'In one short sentence, summarize the two tool results from this conversation. Do not use any tools.', +]; + +function readJson(filePath) { + return JSON.parse(fs.readFileSync(filePath, 'utf8')); +} + +function truncate(text, maxChars = 200) { + const normalized = typeof text === 'string' ? text.replace(/\s+/g, ' ').trim() : ''; + if (normalized.length <= maxChars) return normalized; + return `${normalized.slice(0, maxChars - 1)}...`; +} + +function toObject(value) { + return value && typeof value === 'object' && !Array.isArray(value) ? value : null; +} + +function findSettingsApiKey(profileBaseUrl) { + const settingsDir = OPENHANDS_HOME; + const entries = fs.readdirSync(settingsDir) + .filter((name) => /^settings.*\.json$/.test(name)) + .map((name) => path.join(settingsDir, name)); + + const normalizedProfileBaseUrl = typeof profileBaseUrl === 'string' + ? profileBaseUrl.replace(/\/+$/, '') + : ''; + + let fallback; + for (const filePath of entries) { + let parsed; + try { + parsed = readJson(filePath); + } catch { + continue; + } + + const apiKey = typeof parsed?.llm_api_key === 'string' ? parsed.llm_api_key.trim() : ''; + if (!apiKey) continue; + + const baseUrl = typeof parsed?.llm_base_url === 'string' ? parsed.llm_base_url.replace(/\/+$/, '') : ''; + if (!fallback) { + fallback = { apiKey, source: filePath }; + } + + if (normalizedProfileBaseUrl && baseUrl === normalizedProfileBaseUrl) { + return { apiKey, source: filePath }; + } + } + + if (fallback) return fallback; + throw new Error(`Could not find a local OpenHands settings file with an API key under ${settingsDir}`); +} + +function extractAssistantText(message) { + const content = Array.isArray(message?.content) ? message.content : []; + return truncate(content + .filter((item) => item?.type === 'text' && typeof item.text === 'string') + .map((item) => item.text) + .join('\n\n')); +} + +function assistantMessageEventText(event) { + const content = Array.isArray(event?.llm_message?.content) ? event.llm_message.content : []; + return truncate(content + .filter((item) => item?.type === 'text' && typeof item.text === 'string') + .map((item) => item.text) + .join('\n\n')); +} + +function summarizeObservation(observation) { + if (typeof observation === 'string') return truncate(observation); + const obj = toObject(observation); + if (!obj) return truncate(JSON.stringify(observation)); + if (typeof obj.stdout === 'string' && obj.stdout.trim()) return truncate(obj.stdout); + if (typeof obj.stderr === 'string' && obj.stderr.trim()) return truncate(obj.stderr); + if (typeof obj.new_content === 'string' && obj.new_content.trim()) return truncate(obj.new_content); + if (typeof obj.old_content === 'string' && obj.old_content.trim()) return truncate(obj.old_content); + if (typeof obj.reason === 'string' && obj.reason.trim()) return truncate(obj.reason); + return truncate(JSON.stringify(obj)); +} + +function tokenUsageDelta(before, after) { + const a = toObject(after) ?? {}; + const b = toObject(before) ?? {}; + return { + promptTokens: Math.max(0, Number(a.promptTokens ?? 0) - Number(b.promptTokens ?? 0)), + completionTokens: Math.max(0, Number(a.completionTokens ?? 0) - Number(b.completionTokens ?? 0)), + cacheReadTokens: Math.max(0, Number(a.cacheReadTokens ?? 0) - Number(b.cacheReadTokens ?? 0)), + cacheWriteTokens: Math.max(0, Number(a.cacheWriteTokens ?? 0) - Number(b.cacheWriteTokens ?? 0)), + reasoningTokens: Math.max(0, Number(a.reasoningTokens ?? 0) - Number(b.reasoningTokens ?? 0)), + perTurnToken: Math.max(0, Number(a.promptTokens ?? 0) - Number(b.promptTokens ?? 0)) + + Math.max(0, Number(a.completionTokens ?? 0) - Number(b.completionTokens ?? 0)), + }; +} + +function countCacheControls(value) { + if (!value || typeof value !== 'object') return 0; + if (Array.isArray(value)) return value.reduce((sum, item) => sum + countCacheControls(item), 0); + let total = 0; + for (const [key, child] of Object.entries(value)) { + if (key === 'cache_control') total += 1; + total += countCacheControls(child); + } + return total; +} + +function summarizeRequestBody(body) { + const payload = toObject(body); + const messages = Array.isArray(payload?.messages) ? payload.messages : []; + const system = payload?.system; + return { + cacheControlCount: countCacheControls(payload), + messageCount: messages.length, + systemBlockCount: Array.isArray(system) ? system.length : (system ? 1 : 0), + }; +} + +function extractToolUses(newEvents) { + const observationsByToolCallId = new Map(); + for (const event of newEvents) { + if (event?.kind === 'ObservationEvent' && typeof event.tool_call_id === 'string') { + observationsByToolCallId.set(event.tool_call_id, summarizeObservation(event.observation)); + } + } + + return newEvents + .filter((event) => event?.kind === 'ActionEvent') + .map((event) => { + const action = toObject(event.action) ?? {}; + return { + tool: event.tool_name, + toolCallId: event.tool_call_id, + action: action, + observationPreview: observationsByToolCallId.get(event.tool_call_id) ?? null, + }; + }); +} + +const profile = readJson(PROFILE_PATH); +const { apiKey, source: apiKeySource } = findSettingsApiKey(profile.baseUrl); + +const wireRequests = []; +const normalizedProfileBaseUrl = typeof profile.baseUrl === 'string' + ? profile.baseUrl.replace(/\/+$/, '') + : ''; +const originalFetch = globalThis.fetch; + +if (typeof originalFetch !== 'function') { + throw new Error('global fetch is not available in this Node runtime'); +} + +globalThis.fetch = async (input, init) => { + const url = typeof input === 'string' || input instanceof URL + ? String(input) + : input?.url; + + const normalizedUrl = typeof url === 'string' ? url.replace(/\/+$/, '') : ''; + let parsedBody = null; + if (typeof init?.body === 'string') { + try { + parsedBody = JSON.parse(init.body); + } catch { + parsedBody = null; + } + } + + if (normalizedProfileBaseUrl && typeof normalizedUrl === 'string' && normalizedUrl.startsWith(normalizedProfileBaseUrl)) { + wireRequests.push({ + url, + method: init?.method ?? (input && typeof input === 'object' && 'method' in input ? input.method : 'GET'), + ...summarizeRequestBody(parsedBody), + }); + } + + return originalFetch(input, init); +}; + +const events = new EventLog(); +const stats = new ConversationStats(); +const agent = new Agent({ + workspaceRoot: WORKSPACE_ROOT, + events, + conversationStats: stats, + includeDefaultTools: false, + tools: [ + new FinishTool(), + new TerminalTool(), + new FileEditorTool(), + ], + settings: { + llm: { profileId: PROFILE_ID }, + agent: { enableSecurityAnalyzer: false }, + conversation: { maxIterations: 12 }, + confirmation: { policy: 'never' }, + secrets: { llmApiKey: apiKey }, + }, +}); + +const runResults = []; + +for (let index = 0; index < MESSAGES.length; index += 1) { + const message = MESSAGES[index]; + const beforeEventCount = events.list().length; + const beforeWireCount = wireRequests.length; + const beforeMetrics = stats.getCombinedMetrics().getSnapshot(); + + const response = await agent.run(message); + + const afterEvents = events.list().slice(beforeEventCount); + const afterWire = wireRequests.slice(beforeWireCount); + const afterMetrics = stats.getCombinedMetrics().getSnapshot(); + const delta = tokenUsageDelta(beforeMetrics.accumulatedTokenUsage, afterMetrics.accumulatedTokenUsage); + const assistantMessages = afterEvents.filter( + (event) => event?.kind === 'MessageEvent' && event?.llm_message?.role === 'assistant', + ); + const errors = afterEvents.filter( + (event) => event?.kind === 'ConversationErrorEvent' || event?.kind === 'AgentErrorEvent', + ); + + runResults.push({ + call: index + 1, + userPrompt: message, + finalAssistantText: extractAssistantText(response), + assistantMessages: assistantMessages.map(assistantMessageEventText).filter(Boolean), + toolUses: extractToolUses(afterEvents), + llmRequestCount: afterWire.length, + requestCacheControlCounts: afterWire.map((request) => request.cacheControlCount), + usage: delta, + cacheHit: delta.cacheReadTokens > 0, + errors: errors.map((event) => ({ + kind: event.kind, + code: event.code ?? null, + detail: truncate(event.detail ?? event.error ?? ''), + })), + }); +} + +const output = { + profileId: PROFILE_ID, + model: profile.model, + provider: profile.provider, + baseUrl: profile.baseUrl, + workspaceRoot: WORKSPACE_ROOT, + apiKeySource, + runResults, +}; + +process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);