From acfb3b0d4ca6d526d39b78fbecd261469a43c782 Mon Sep 17 00:00:00 2001
From: Engel Nyst <engel.nyst@gmail.com>
Date: Mon, 11 May 2026 17:23:00 +0200
Subject: [PATCH 1/5] Add Anthropic prompt caching breakpoints

Split static and dynamic system prompt content so Anthropic-compatible providers can cache only the stable prefix, matching the Python agent-sdk behavior. Mark the last user/tool turn for cache extension and cover the native Anthropic and LiteLLM Claude request shapes with focused tests.
---
 .../sdk/llm/__tests__/promptCaching.test.ts   | 198 ++++++++++++++++++
 .../sdk/llm/__tests__/providerQuirks.test.ts  |  38 +++-
 packages/agent-sdk/src/sdk/llm/anthropic.ts   |  58 ++++-
 .../src/sdk/llm/openai-compatible.ts          | 108 ++++++++--
 .../agent-sdk/src/sdk/llm/providerQuirks.ts   |  21 ++
 packages/agent-sdk/src/sdk/llm/types.ts       |   2 +
 packages/agent-sdk/src/sdk/runtime/Agent.ts   |  44 ++--
 .../__tests__/Agent.system-prompt.test.ts     |  28 +++
 .../runtime/__tests__/condensation.test.ts    |  25 +++
 .../agent-sdk/src/sdk/runtime/condensation.ts |  32 ++-
 10 files changed, 505 insertions(+), 49 deletions(-)
 create mode 100644 packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts

diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
new file mode 100644
index 00000000..ec70fb5f
--- /dev/null
+++ b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
@@ -0,0 +1,198 @@
+import { afterEach, describe, expect, it, vi } from 'vitest';
+import { LLMStreamer } from '../../runtime';
+import { AnthropicClient, OpenAICompatibleClient } from '../index';
+import type { ChatCompletionRequest, LLMConfiguration } from '../types';
+
+const encoder = new TextEncoder();
+const EPHEMERAL_CACHE_CONTROL = { type: 'ephemeral' };
+
+const createStreamResponse = (payload: string, status = 200): Response =>
+  new Response(
+    new ReadableStream({
+      start(controller) {
+        controller.enqueue(encoder.encode(payload));
+        controller.close();
+      },
+    }),
+    { status, headers: { 'content-type': 'text/event-stream' } },
+  );
+
+const anthropicSse = [
+  'event: content_block_delta',
+  'data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Done"}}',
+  '',
+  'event: message_delta',
+  'data: {"type":"message_delta","delta":{"stop_reason":"end_turn"}}',
+  '',
+].join('\n');
+
+const openAiSse = [
+  'data: {"choices":[{"delta":{"content":"Done"}}]}',
+  'data: {"choices":[{"delta":{},"finish_reason":"stop"}]}',
+  'data: [DONE]',
+].join('\n');
+
+const splitSystemPromptRequest = (
+  overrides: Partial<ChatCompletionRequest> = {},
+): ChatCompletionRequest => ({
+  systemPrompt: 'STATIC\n\nDYNAMIC',
+  cacheableSystemPrompt: 'STATIC',
+  dynamicSystemPrompt: 'DYNAMIC',
+  messages: [{ role: 'user', content: [{ type: 'text', text: 'hello' }] }],
+  ...overrides,
+});
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe('Anthropic prompt caching', () => {
+  const baseConfig: LLMConfiguration = {
+    model: 'claude-sonnet-4-5-20250929',
+    provider: 'anthropic',
+  };
+
+  it('marks only the static system block and last user block for caching', async () => {
+    const fetchMock = vi
+      .spyOn(global, 'fetch')
+      .mockResolvedValue(createStreamResponse(anthropicSse));
+
+    const client = new AnthropicClient(baseConfig, 'test-key');
+    const streamer = new LLMStreamer(client);
+
+    await streamer.runChat(splitSystemPromptRequest());
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined;
+    const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null;
+
+    expect(body?.system).toEqual([
+      { type: 'text', text: 'STATIC', cache_control: EPHEMERAL_CACHE_CONTROL },
+      { type: 'text', text: 'DYNAMIC' },
+    ]);
+    expect(body?.messages?.[0]).toMatchObject({
+      role: 'user',
+      content: [{ type: 'text', text: 'hello', cache_control: EPHEMERAL_CACHE_CONTROL }],
+    });
+  });
+
+  it('moves the cache marker to the tool-result message level', async () => {
+    const fetchMock = vi
+      .spyOn(global, 'fetch')
+      .mockResolvedValue(createStreamResponse(anthropicSse));
+
+    const client = new AnthropicClient(baseConfig, 'test-key');
+    const streamer = new LLMStreamer(client);
+
+    await streamer.runChat(splitSystemPromptRequest({
+      messages: [
+        { role: 'user', content: [{ type: 'text', text: 'hello' }] },
+        {
+          role: 'assistant',
+          content: [],
+          tool_calls: [
+            {
+              id: 'call_1',
+              type: 'function',
+              function: { name: 'bash', arguments: '{"command":"echo hi"}' },
+            },
+          ],
+        },
+        {
+          role: 'tool',
+          content: [{ type: 'text', text: 'hi' }],
+          tool_call_id: 'call_1',
+        },
+      ],
+      tools: [{ type: 'function', function: { name: 'bash' } }],
+    }));
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined;
+    const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null;
+
+    expect(body?.messages?.at(-1)).toEqual({
+      role: 'user',
+      content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'hi' }],
+      cache_control: EPHEMERAL_CACHE_CONTROL,
+    });
+  });
+});
+
+describe('OpenAI-compatible Anthropic prompt caching', () => {
+  const baseConfig: LLMConfiguration = {
+    model: 'claude-sonnet-4-5-20250929',
+    provider: 'litellm_proxy',
+    baseUrl: 'http://localhost:4000',
+  };
+
+  it('marks only the static system block and last user block for caching', async () => {
+    const fetchMock = vi
+      .spyOn(global, 'fetch')
+      .mockResolvedValue(createStreamResponse(openAiSse));
+
+    const client = new OpenAICompatibleClient(baseConfig, 'test-key');
+    const streamer = new LLMStreamer(client);
+
+    await streamer.runChat(splitSystemPromptRequest());
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined;
+    const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null;
+
+    expect(body?.messages?.[0]).toEqual({
+      role: 'system',
+      content: [
+        { type: 'text', text: 'STATIC', cache_control: EPHEMERAL_CACHE_CONTROL },
+        { type: 'text', text: 'DYNAMIC' },
+      ],
+    });
+    expect(body?.messages?.[1]).toMatchObject({
+      role: 'user',
+      content: [{ type: 'text', text: 'hello', cache_control: EPHEMERAL_CACHE_CONTROL }],
+    });
+  });
+
+  it('moves the cache marker to the tool message level', async () => {
+    const fetchMock = vi
+      .spyOn(global, 'fetch')
+      .mockResolvedValue(createStreamResponse(openAiSse));
+
+    const client = new OpenAICompatibleClient(baseConfig, 'test-key');
+    const streamer = new LLMStreamer(client);
+
+    await streamer.runChat(splitSystemPromptRequest({
+      messages: [
+        { role: 'user', content: [{ type: 'text', text: 'hello' }] },
+        {
+          role: 'assistant',
+          content: [{ type: 'text', text: '' }],
+          tool_calls: [
+            {
+              id: 'call_1',
+              type: 'function',
+              function: { name: 'bash', arguments: '{"command":"echo hi"}' },
+            },
+          ],
+        },
+        {
+          role: 'tool',
+          content: [{ type: 'text', text: 'hi' }],
+          tool_call_id: 'call_1',
+        },
+      ],
+      tools: [{ type: 'function', function: { name: 'bash' } }],
+    }));
+
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const init = fetchMock.mock.calls[0]?.[1] as { body?: unknown } | undefined;
+    const body = typeof init?.body === 'string' ? JSON.parse(init.body) : null;
+
+    expect(body?.messages?.at(-1)).toEqual({
+      role: 'tool',
+      content: 'hi',
+      tool_call_id: 'call_1',
+      cache_control: EPHEMERAL_CACHE_CONTROL,
+    });
+  });
+});
diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts
index ed04456e..484c27bc 100644
--- a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts
+++ b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts
@@ -1,6 +1,12 @@
 import { describe, expect, it } from 'vitest';
 import type { LLMConfiguration } from '../types';
-import { normalizeGenerationParamsForModel, isAnthropicModel, supportsThinkingBlocks, getAnthropicThinkingBudget } from '../providerQuirks';
+import {
+  normalizeGenerationParamsForModel,
+  isAnthropicModel,
+  supportsPromptCaching,
+  supportsThinkingBlocks,
+  getAnthropicThinkingBudget,
+} from '../providerQuirks';
 
 const makeConfig = (overrides: Partial<LLMConfiguration> = {}): LLMConfiguration => ({
   model: 'gpt-4o',
@@ -135,6 +141,36 @@ describe('supportsThinkingBlocks', () => {
   });
 });
 
+describe('supportsPromptCaching', () => {
+  it('returns true for supported Anthropic cacheable models', () => {
+    expect(supportsPromptCaching(makeConfig({
+      model: 'claude-sonnet-4-5-20250929',
+      provider: 'anthropic',
+    }))).toBe(true);
+  });
+
+  it('returns true for LiteLLM Anthropic routing with supported cacheable models', () => {
+    expect(supportsPromptCaching(makeConfig({
+      model: 'anthropic/claude-3-5-sonnet-20241022',
+      provider: 'litellm_proxy',
+    }))).toBe(true);
+  });
+
+  it('returns false for Anthropic models outside the prompt-cache allowlist', () => {
+    expect(supportsPromptCaching(makeConfig({
+      model: 'claude-2.1',
+      provider: 'anthropic',
+    }))).toBe(false);
+  });
+
+  it('returns false for non-Anthropic models', () => {
+    expect(supportsPromptCaching(makeConfig({
+      model: 'gpt-4o',
+      provider: 'openai',
+    }))).toBe(false);
+  });
+});
+
 describe('getAnthropicThinkingBudget', () => {
   it('returns undefined for non-Anthropic models', () => {
     expect(getAnthropicThinkingBudget(makeConfig({
diff --git a/packages/agent-sdk/src/sdk/llm/anthropic.ts b/packages/agent-sdk/src/sdk/llm/anthropic.ts
index 8f446739..6cd3b3ce 100644
--- a/packages/agent-sdk/src/sdk/llm/anthropic.ts
+++ b/packages/agent-sdk/src/sdk/llm/anthropic.ts
@@ -1,8 +1,11 @@
 import { reduceTextContent, DEFAULT_RETRY_OPTIONS, DEFAULT_TIMEOUT_MS, type ChatCompletionRequest, type LLMClient, type LLMConfiguration, type LLMStreamChunk, type LLMToolDefinition, type RetryOptions, type ToolCallAccumulator } from './types';
-import { getAnthropicThinkingBudget } from './providerQuirks';
+import { getAnthropicThinkingBudget, supportsPromptCaching } from './providerQuirks';
 import { NonRetryableHttpStatusError, requestWithRetry } from './httpRetry';
 
 const decoder = new TextDecoder();
+const EPHEMERAL_CACHE_CONTROL = { type: 'ephemeral' } as const;
+
+type AnthropicCacheControl = typeof EPHEMERAL_CACHE_CONTROL;
 
 // Anthropic content block types
 type AnthropicThinkingBlock = {
@@ -14,6 +17,7 @@ type AnthropicThinkingBlock = {
 type AnthropicTextBlock = {
   type: 'text';
   text: string;
+  cache_control?: AnthropicCacheControl;
 };
 
 type AnthropicToolUseBlock = {
@@ -32,6 +36,7 @@ type AnthropicToolResultBlock = {
 type AnthropicImageBlock = {
   type: 'image';
   source: { type: 'base64'; media_type: string; data: string };
+  cache_control?: AnthropicCacheControl;
 };
 
 type AnthropicContentBlock =
@@ -44,6 +49,7 @@ type AnthropicContentBlock =
 interface AnthropicMessage {
   role: 'user' | 'assistant';
   content: AnthropicContentBlock[];
+  cache_control?: AnthropicCacheControl;
 }
 
 type AnthropicEventName = 'message_start' | 'content_block_start' | 'content_block_delta' | 'message_delta' | (string & {});
@@ -137,10 +143,25 @@ const parseBase64DataUrl = (url: string): { mediaType: string; base64: string }
   return { mediaType: match[1].toLowerCase(), base64: match[2] };
 };
 
-const toAnthropicMessages = (request: ChatCompletionRequest): AnthropicMessage[] => {
+const toAnthropicMessages = (
+  request: ChatCompletionRequest,
+  options?: { cacheLastMessage?: boolean },
+): AnthropicMessage[] => {
   const result: AnthropicMessage[] = [];
+  const lastCacheableMessageIndex = options?.cacheLastMessage
+    ? (() => {
+        for (let index = request.messages.length - 1; index >= 0; index -= 1) {
+          const role = request.messages[index]?.role;
+          if (role === 'user' || role === 'tool') {
+            return index;
+          }
+        }
+        return -1;
+      })()
+    : -1;
 
-  for (const message of request.messages) {
+  for (const [index, message] of request.messages.entries()) {
+    const shouldCacheMessage = index === lastCacheableMessageIndex;
     if (message.role === 'user') {
       const contentBlocks: AnthropicContentBlock[] = [];
       for (const part of message.content) {
@@ -162,6 +183,14 @@ const toAnthropicMessages = (request: ChatCompletionRequest): AnthropicMessage[]
       if (contentBlocks.length === 0) {
         contentBlocks.push({ type: 'text', text: '' });
       }
+      const lastBlock = contentBlocks.at(-1);
+      if (
+        shouldCacheMessage &&
+        lastBlock &&
+        (lastBlock.type === 'text' || lastBlock.type === 'image')
+      ) {
+        lastBlock.cache_control = EPHEMERAL_CACHE_CONTROL;
+      }
       result.push({ role: 'user', content: contentBlocks });
     } else if (message.role === 'assistant') {
       // Assistant messages: may have thinking + tool_use
@@ -223,11 +252,15 @@ const toAnthropicMessages = (request: ChatCompletionRequest): AnthropicMessage[]
       if (lastMessage?.role === 'user') {
         // Append to existing user message
         lastMessage.content.push(toolResultBlock);
+        if (shouldCacheMessage) {
+          lastMessage.cache_control = EPHEMERAL_CACHE_CONTROL;
+        }
       } else {
         // Create new user message
         result.push({
           role: 'user',
           content: [toolResultBlock],
+          ...(shouldCacheMessage ? { cache_control: EPHEMERAL_CACHE_CONTROL } : {}),
         });
       }
     }
@@ -411,6 +444,21 @@ export class AnthropicClient implements LLMClient {
   private requestBody(request: ChatCompletionRequest): Record<string, unknown> {
     const anthropicTools = toAnthropicTools(request.tools);
     const thinkingBudget = getAnthropicThinkingBudget(this.config);
+    const cacheableSystemPrompt =
+      typeof request.cacheableSystemPrompt === 'string' && request.cacheableSystemPrompt.trim()
+        ? request.cacheableSystemPrompt
+        : request.systemPrompt;
+    const dynamicSystemPrompt =
+      typeof request.dynamicSystemPrompt === 'string' && request.dynamicSystemPrompt.trim()
+        ? request.dynamicSystemPrompt
+        : undefined;
+    const promptCachingEnabled = supportsPromptCaching(this.config);
+    const system = promptCachingEnabled
+      ? [
+          { type: 'text' as const, text: cacheableSystemPrompt, cache_control: EPHEMERAL_CACHE_CONTROL },
+          ...(dynamicSystemPrompt ? [{ type: 'text' as const, text: dynamicSystemPrompt }] : []),
+        ]
+      : [{ type: 'text' as const, text: request.systemPrompt }];
 
     return {
       model: this.config.model,
@@ -418,8 +466,8 @@ export class AnthropicClient implements LLMClient {
       // Note: temperature is normalized by providerQuirks.normalizeGenerationParamsForModel()
       // which sets temperature=1 when thinking is enabled (Anthropic requirement)
       temperature: this.config.temperature ?? 0,
-      system: [{ type: 'text', text: request.systemPrompt }],
-      messages: toAnthropicMessages(request),
+      system,
+      messages: toAnthropicMessages(request, { cacheLastMessage: promptCachingEnabled }),
       stream: true,
       ...(anthropicTools ? { tools: anthropicTools, tool_choice: { type: 'auto' } } : {}),
       thinking: thinkingBudget !== undefined
diff --git a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts
index 4ac2848f..b603fd4d 100644
--- a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts
+++ b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts
@@ -1,10 +1,13 @@
 import { reduceTextContent, DEFAULT_RETRY_OPTIONS, DEFAULT_TIMEOUT_MS, type ChatCompletionRequest, type LLMClient, type LLMConfiguration, type LLMStreamChunk, type RetryOptions, type ToolCallAccumulator } from './types';
 import { DEFAULT_PROVIDER_BASE_URLS } from './provider';
-import { supportsThinkingBlocks } from './providerQuirks';
+import { supportsPromptCaching, supportsThinkingBlocks } from './providerQuirks';
 import { buildOpenAiHeaders } from './openaiHeaders';
 import { NonRetryableHttpStatusError, requestWithRetry } from './httpRetry';
 
 const decoder = new TextDecoder();
+const EPHEMERAL_CACHE_CONTROL = { type: 'ephemeral' } as const;
+
+type OpenAICacheControl = typeof EPHEMERAL_CACHE_CONTROL;
 
 type OpenAIThinkingContentBlock = {
   type: 'thinking';
@@ -15,11 +18,13 @@ type OpenAIThinkingContentBlock = {
 type OpenAITextContentBlock = {
   type: 'text';
   text: string;
+  cache_control?: OpenAICacheControl;
 };
 
 type OpenAIImageUrlContentBlock = {
   type: 'image_url';
   image_url: { url: string; detail?: string };
+  cache_control?: OpenAICacheControl;
 };
 
 type OpenAIToolUseContentBlock = {
@@ -37,6 +42,7 @@ type OpenAIChatMessage = {
   name?: string;
   tool_call_id?: string;
   tool_calls?: ChatCompletionRequest['messages'][number]['tool_calls'];
+  cache_control?: OpenAICacheControl;
 };
 
 type OpenAIThinkingBlock = {
@@ -88,8 +94,13 @@ const isOpenAIStreamChunk = (value: unknown): value is OpenAIStreamChunk =>
  * tool_use blocks in content. LiteLLM converts tool_calls to tool_use when proxying to Anthropic.
  * However, thinking blocks must be sent in the content array since there's no OpenAI equivalent.
  */
-const toOpenAIMessage = (message: ChatCompletionRequest['messages'][number], config: LLMConfiguration): OpenAIChatMessage => {
+const toOpenAIMessage = (
+  message: ChatCompletionRequest['messages'][number],
+  config: LLMConfiguration,
+  options?: { cachePrompt?: boolean },
+): OpenAIChatMessage => {
   const contentText = reduceTextContent(message);
+  const shouldCachePrompt = options?.cachePrompt === true;
 
   // For Anthropic models with thinking enabled: include thinking blocks in content array
   // This is required when assistant messages have thinking content that needs to be preserved.
@@ -140,14 +151,34 @@ const toOpenAIMessage = (message: ChatCompletionRequest['messages'][number], con
         }
       }
     }
-    if (blocks.some((b) => b.type === 'image_url')) {
+    if (blocks.some((b) => b.type === 'image_url') || shouldCachePrompt) {
       if (!blocks.some((b) => b.type === 'text')) {
         blocks.unshift({ type: 'text', text: '' });
       }
+      if (shouldCachePrompt) {
+        const lastBlock = blocks.at(-1);
+        if (
+          lastBlock &&
+          (lastBlock.type === 'text' || lastBlock.type === 'image_url')
+        ) {
+          lastBlock.cache_control = EPHEMERAL_CACHE_CONTROL;
+        }
+      }
       return { role: 'user', content: blocks };
     }
   }
 
+  if (message.role === 'tool' && shouldCachePrompt) {
+    const cachedToolMessage: OpenAIChatMessage = {
+      role: 'tool',
+      content: contentText,
+      cache_control: EPHEMERAL_CACHE_CONTROL,
+    };
+    if (message.name) cachedToolMessage.name = message.name;
+    if (message.tool_call_id) cachedToolMessage.tool_call_id = message.tool_call_id;
+    return cachedToolMessage;
+  }
+
   // Standard case: plain text content (for non-Anthropic models or messages without thinking)
   const base: OpenAIChatMessage = {
     role: message.role,
@@ -159,25 +190,58 @@ const toOpenAIMessage = (message: ChatCompletionRequest['messages'][number], con
   return base;
 };
 
-const toRequestBody = (config: LLMConfiguration, request: ChatCompletionRequest) => ({
-  model: config.model,
-  messages: [
-    {
-      role: 'system',
-      content: request.systemPrompt,
-    },
-    ...request.messages.map((msg) => toOpenAIMessage(msg, config)),
-  ],
-  stream: true,
-  stream_options: { include_usage: true },
-  temperature: config.temperature ?? undefined,
-  // Do not send top_p or top_k for OpenAI-compatible endpoints to avoid proxy/model rejections
-  // top_p and top_k intentionally omitted
-  max_tokens: config.maxOutputTokens ?? undefined,
-  reasoning_effort: config.reasoningEffort && config.reasoningEffort !== 'none' ? config.reasoningEffort : undefined,
-  tools: request.tools,
-  tool_choice: request.tools?.length ? 'auto' : undefined,
-});
+const toRequestBody = (config: LLMConfiguration, request: ChatCompletionRequest) => {
+  const promptCachingEnabled = supportsPromptCaching(config);
+  const cacheableSystemPrompt =
+    typeof request.cacheableSystemPrompt === 'string' && request.cacheableSystemPrompt.trim()
+      ? request.cacheableSystemPrompt
+      : request.systemPrompt;
+  const dynamicSystemPrompt =
+    typeof request.dynamicSystemPrompt === 'string' && request.dynamicSystemPrompt.trim()
+      ? request.dynamicSystemPrompt
+      : undefined;
+  const lastCacheableMessageIndex = promptCachingEnabled
+    ? (() => {
+        for (let index = request.messages.length - 1; index >= 0; index -= 1) {
+          const role = request.messages[index]?.role;
+          if (role === 'user' || role === 'tool') {
+            return index;
+          }
+        }
+        return -1;
+      })()
+    : -1;
+
+  return {
+    model: config.model,
+    messages: [
+      promptCachingEnabled
+        ? {
+            role: 'system' as const,
+            content: [
+              { type: 'text' as const, text: cacheableSystemPrompt, cache_control: EPHEMERAL_CACHE_CONTROL },
+              ...(dynamicSystemPrompt ? [{ type: 'text' as const, text: dynamicSystemPrompt }] : []),
+            ],
+          }
+        : {
+            role: 'system' as const,
+            content: request.systemPrompt,
+          },
+      ...request.messages.map((msg, index) =>
+        toOpenAIMessage(msg, config, { cachePrompt: index === lastCacheableMessageIndex }),
+      ),
+    ],
+    stream: true,
+    stream_options: { include_usage: true },
+    temperature: config.temperature ?? undefined,
+    // Do not send top_p or top_k for OpenAI-compatible endpoints to avoid proxy/model rejections
+    // top_p and top_k intentionally omitted
+    max_tokens: config.maxOutputTokens ?? undefined,
+    reasoning_effort: config.reasoningEffort && config.reasoningEffort !== 'none' ? config.reasoningEffort : undefined,
+    tools: request.tools,
+    tool_choice: request.tools?.length ? 'auto' : undefined,
+  };
+};
 
 const defaultBaseUrls: Record<string, string> = {
   openai: DEFAULT_PROVIDER_BASE_URLS.openai,
diff --git a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
index b3bea02f..32856638 100644
--- a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
+++ b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
@@ -57,6 +57,21 @@ import type { LLMConfiguration } from './types';
 
 const ANTHROPIC_THINKING_MIN_BUDGET = 1024;
 const ANTHROPIC_THINKING_MAX_BUDGET = 128000;
+const PROMPT_CACHE_MODELS = [
+  'claude-3-7-sonnet',
+  'claude-sonnet-3-7-latest',
+  'claude-3-5-sonnet',
+  'claude-3-5-haiku',
+  'claude-3-haiku-20240307',
+  'claude-3-opus-20240229',
+  'claude-sonnet-4',
+  'claude-opus-4',
+  'claude-haiku-4-5',
+  'claude-sonnet-4-5',
+  'claude-sonnet-4-6',
+  'claude-opus-4-5',
+  'claude-opus-4-6',
+];
 
 const isGpt5Model = (model: string | undefined): boolean => {
   if (typeof model !== 'string') return false;
@@ -100,6 +115,12 @@ export const supportsThinkingBlocks = (config: LLMConfiguration): boolean => {
   return isAnthropicModel(config) && hasExtendedThinking(config);
 };
 
+export const supportsPromptCaching = (config: LLMConfiguration): boolean => {
+  if (!isAnthropicModel(config)) return false;
+  const model = config.model?.trim().toLowerCase() ?? '';
+  return PROMPT_CACHE_MODELS.some((needle) => model.includes(needle));
+};
+
 /**
  * Get the thinking budget tokens for Anthropic extended thinking.
  * 
diff --git a/packages/agent-sdk/src/sdk/llm/types.ts b/packages/agent-sdk/src/sdk/llm/types.ts
index 6ebaa6cb..f2857e80 100644
--- a/packages/agent-sdk/src/sdk/llm/types.ts
+++ b/packages/agent-sdk/src/sdk/llm/types.ts
@@ -63,6 +63,8 @@ export interface LLMConfiguration {
 
 export interface ChatCompletionRequest {
   systemPrompt: string;
+  cacheableSystemPrompt?: string;
+  dynamicSystemPrompt?: string | null;
   messages: Message[];
   tools?: LLMToolDefinition[];
 }
diff --git a/packages/agent-sdk/src/sdk/runtime/Agent.ts b/packages/agent-sdk/src/sdk/runtime/Agent.ts
index e059167c..9b2eb1b5 100644
--- a/packages/agent-sdk/src/sdk/runtime/Agent.ts
+++ b/packages/agent-sdk/src/sdk/runtime/Agent.ts
@@ -654,7 +654,8 @@ export class Agent extends EventEmitter {
     for (let condensationAttempt = 0; condensationAttempt <= MAX_CONDENSATIONS_PER_STEP; condensationAttempt += 1) {
       const request = buildChatRequestWithCondensation({
         events: this.events.list(),
-        systemPrompt: this.buildSystemPrompt(),
+        systemPrompt: this.buildCacheableSystemPrompt(),
+        dynamicSystemPrompt: this.buildDynamicSystemPrompt(),
         tools: this.getToolDefinitions(),
         pastedImagesBaseDir: this.options.pastedImagesBaseDir,
       });
@@ -1017,27 +1018,12 @@ export class Agent extends EventEmitter {
     return this.getToolDefinitions().map((tool) => tool as unknown as Record<string, unknown>);
   }
 
-  private buildSystemPrompt(): string {
+  private buildCacheableSystemPrompt(): string {
     const promptIdentity = this.agentContext?.getSystemMessagePrefix() ?? SYSTEM_PROMPT_IDENTITY;
     let systemPrompt = `${promptIdentity}\n\n${SYSTEM_PROMPT_BODY}`;
     if (!this.shouldIncludeSecurityRiskAssessment()) {
       systemPrompt = systemPrompt.replace(SECURITY_RISK_ASSESSMENT_SECTION, '');
     }
-    if (this.agentContext) {
-      const { llmModel, llmProvider, llmBaseUrl } = resolveSystemPromptLlmContext(
-        this.options.settings?.llm,
-        this.options.profileStoreOptions,
-      );
-      const suffix = this.agentContext.getSystemMessageSuffix({
-        secretNames: this.secrets.getRegisteredNames(),
-        llmModel,
-        llmProvider,
-        llmBaseUrl,
-      });
-      if (suffix) {
-        systemPrompt += '\n\n' + suffix;
-      }
-    }
 
     const summaries = this.getToolDefinitions()
       .map((tool) => {
@@ -1054,6 +1040,30 @@ export class Agent extends EventEmitter {
     return systemPrompt;
   }
 
+  private buildDynamicSystemPrompt(): string | null {
+    if (!this.agentContext) {
+      return null;
+    }
+
+    const { llmModel, llmProvider, llmBaseUrl } = resolveSystemPromptLlmContext(
+      this.options.settings?.llm,
+      this.options.profileStoreOptions,
+    );
+    return this.agentContext.getSystemMessageSuffix({
+      secretNames: this.secrets.getRegisteredNames(),
+      llmModel,
+      llmProvider,
+      llmBaseUrl,
+    });
+  }
+
+  private buildSystemPrompt(): string {
+    const parts = [this.buildCacheableSystemPrompt(), this.buildDynamicSystemPrompt()].filter(
+      (value): value is string => typeof value === 'string' && value.length > 0,
+    );
+    return parts.join('\n\n');
+  }
+
   private shouldIncludeSecurityRiskAssessment(): boolean {
     return this.confirmationPolicy.kind !== 'NeverConfirm' || this.securityAnalyzer?.kind === 'LLMSecurityAnalyzer';
   }
diff --git a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts
index e5e07d6d..9a49a461 100644
--- a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts
+++ b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts
@@ -139,6 +139,34 @@ describe('Agent system prompt', () => {
     expect(llm.requests[2]?.systemPrompt).not.toContain('Currently opened in the editor:');
   });
 
+  it('keeps the cacheable system prompt stable while dynamic context changes', async () => {
+    const settings: OpenHandsSettings = {
+      llm: { model: 'claude-sonnet-4-5-20250929' },
+      agent: {},
+      conversation: { maxIterations: 3 },
+      confirmation: { policy: 'never' },
+      secrets: {},
+    };
+    const llm = new RecordingLLM();
+    const agentContext = new AgentContext({ systemMessageSuffix: 'Currently opened in the editor: /tmp/first.ts' });
+
+    const agent = new Agent({
+      settings,
+      workspaceRoot: createWorkspaceRoot(),
+      llmClient: llm,
+      agentContext,
+    });
+
+    await agent.run('hi');
+    agentContext.systemMessageSuffix = 'Currently opened in the editor: /tmp/second.ts';
+    await agent.run('hi again');
+
+    expect(llm.requests[0]?.cacheableSystemPrompt).toBe(llm.requests[1]?.cacheableSystemPrompt);
+    expect(llm.requests[0]?.dynamicSystemPrompt).toContain('/tmp/first.ts');
+    expect(llm.requests[1]?.dynamicSystemPrompt).toContain('/tmp/second.ts');
+    expect(llm.requests[1]?.dynamicSystemPrompt).not.toContain('/tmp/first.ts');
+  });
+
   it('gates vendor-specific repo skills using LLM profile config', async () => {
     const profilesRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-system-prompt-profiles-'));
     profileRoots.push(profilesRoot);
diff --git a/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts b/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts
index 8f09d9f7..9ac4b3df 100644
--- a/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts
+++ b/packages/agent-sdk/src/sdk/runtime/__tests__/condensation.test.ts
@@ -66,6 +66,31 @@ describe('condensation helpers', () => {
     expect(userMessage.content).toEqual([{ type: 'text', text: 'Hello' }, { type: 'text', text: 'Context' }]);
   });
 
+  it('keeps cacheable and dynamic system prompt parts separate', () => {
+    const condense = {
+      kind: 'Condensation',
+      source: 'environment',
+      forgotten_event_ids: [],
+      summary: 'short summary',
+      summary_offset: 4,
+    } satisfies Extract<Event, { kind: 'Condensation' }>;
+
+    const request = buildChatRequestWithCondensation({
+      events: [condense],
+      systemPrompt: 'STATIC',
+      dynamicSystemPrompt: 'DYNAMIC',
+      tools: [],
+    });
+
+    expect(request.cacheableSystemPrompt).toBe('STATIC');
+    expect(request.dynamicSystemPrompt).toBe(
+      'DYNAMIC\n\n<CONVERSATION SUMMARY>\nshort summary\n</CONVERSATION SUMMARY>',
+    );
+    expect(request.systemPrompt).toBe(
+      'STATIC\n\nDYNAMIC\n\n<CONVERSATION SUMMARY>\nshort summary\n</CONVERSATION SUMMARY>',
+    );
+  });
+
   it('only keeps <environment information> for the most recent user message', () => {
     const message1 = {
       kind: 'MessageEvent',
diff --git a/packages/agent-sdk/src/sdk/runtime/condensation.ts b/packages/agent-sdk/src/sdk/runtime/condensation.ts
index e8d34c25..e0f5e0b0 100644
--- a/packages/agent-sdk/src/sdk/runtime/condensation.ts
+++ b/packages/agent-sdk/src/sdk/runtime/condensation.ts
@@ -35,15 +35,33 @@ export const getCondensationState = (events: Event[]): CondensationState => {
 export const buildChatRequestWithCondensation = (params: {
   events: Event[];
   systemPrompt: string;
+  dynamicSystemPrompt?: string | null;
   tools: LLMToolDefinition[];
   pastedImagesBaseDir?: string;
-}): { systemPrompt: string; messages: Message[]; tools: LLMToolDefinition[] } => {
+}): {
+  systemPrompt: string;
+  cacheableSystemPrompt: string;
+  dynamicSystemPrompt?: string;
+  messages: Message[];
+  tools: LLMToolDefinition[];
+} => {
   const condensationState = getCondensationState(params.events);
 
-  let systemPrompt = params.systemPrompt;
+  const dynamicParts: string[] = [];
+  const baseDynamicSystemPrompt =
+    typeof params.dynamicSystemPrompt === 'string' ? params.dynamicSystemPrompt.trim() : '';
+  if (baseDynamicSystemPrompt) {
+    dynamicParts.push(baseDynamicSystemPrompt);
+  }
   if (condensationState.summary) {
-    systemPrompt += `\n\n<CONVERSATION SUMMARY>\n${condensationState.summary}\n</CONVERSATION SUMMARY>`;
+    dynamicParts.push(
+      `<CONVERSATION SUMMARY>\n${condensationState.summary}\n</CONVERSATION SUMMARY>`,
+    );
   }
+  const dynamicSystemPrompt = dynamicParts.join('\n\n') || undefined;
+  const systemPrompt = dynamicSystemPrompt
+    ? `${params.systemPrompt}\n\n${dynamicSystemPrompt}`
+    : params.systemPrompt;
 
   const messageEvents = params.events
     .filter(isMessageEvent)
@@ -171,7 +189,13 @@ export const buildChatRequestWithCondensation = (params: {
   }).map(maybeExpandOpenHandsImages);
   const messages = sanitizeChatMessages(rawMessages);
 
-  return { systemPrompt, messages, tools: params.tools };
+  return {
+    systemPrompt,
+    cacheableSystemPrompt: params.systemPrompt,
+    ...(dynamicSystemPrompt ? { dynamicSystemPrompt } : {}),
+    messages,
+    tools: params.tools,
+  };
 };
 
 export type CondensationResult = {

From a841c0f039d1bff10fa121f6cb4787d0ba478afc Mon Sep 17 00:00:00 2001
From: Engel Nyst <engel.nyst@gmail.com>
Date: Mon, 11 May 2026 17:33:14 +0200
Subject: [PATCH 2/5] Support claude-opus-4-7 prompt caching

---
 .../agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts     | 4 ++--
 .../agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts    | 4 ++++
 packages/agent-sdk/src/sdk/llm/providerQuirks.ts              | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
index ec70fb5f..31ba7b35 100644
--- a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
+++ b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
@@ -48,7 +48,7 @@ afterEach(() => {
 
 describe('Anthropic prompt caching', () => {
   const baseConfig: LLMConfiguration = {
-    model: 'claude-sonnet-4-5-20250929',
+    model: 'claude-opus-4-7',
     provider: 'anthropic',
   };
 
@@ -121,7 +121,7 @@ describe('Anthropic prompt caching', () => {
 
 describe('OpenAI-compatible Anthropic prompt caching', () => {
   const baseConfig: LLMConfiguration = {
-    model: 'claude-sonnet-4-5-20250929',
+    model: 'claude-opus-4-7',
     provider: 'litellm_proxy',
     baseUrl: 'http://localhost:4000',
   };
diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts
index 484c27bc..4a05b8f5 100644
--- a/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts
+++ b/packages/agent-sdk/src/sdk/llm/__tests__/providerQuirks.test.ts
@@ -147,6 +147,10 @@ describe('supportsPromptCaching', () => {
       model: 'claude-sonnet-4-5-20250929',
       provider: 'anthropic',
     }))).toBe(true);
+    expect(supportsPromptCaching(makeConfig({
+      model: 'claude-opus-4-7',
+      provider: 'anthropic',
+    }))).toBe(true);
   });
 
   it('returns true for LiteLLM Anthropic routing with supported cacheable models', () => {
diff --git a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
index 32856638..1617664a 100644
--- a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
+++ b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
@@ -71,6 +71,7 @@ const PROMPT_CACHE_MODELS = [
   'claude-sonnet-4-6',
   'claude-opus-4-5',
   'claude-opus-4-6',
+  'claude-opus-4-7',
 ];
 
 const isGpt5Model = (model: string | undefined): boolean => {

From 2250e22c58bdcca5713e4b327565f738c5315c74 Mon Sep 17 00:00:00 2001
From: Engel Nyst <engel.nyst@gmail.com>
Date: Mon, 11 May 2026 17:56:43 +0200
Subject: [PATCH 3/5] Fix Anthropic tool-result cache placement

---
 .../src/sdk/llm/__tests__/promptCaching.test.ts        | 10 +++++++---
 .../src/sdk/llm/__tests__/thinkingBlocks.test.ts       |  2 +-
 packages/agent-sdk/src/sdk/llm/anthropic.ts            |  7 ++-----
 packages/agent-sdk/src/sdk/llm/openai-compatible.ts    |  2 ++
 packages/agent-sdk/src/sdk/runtime/Agent.ts            |  8 ++++++++
 .../sdk/runtime/__tests__/Agent.system-prompt.test.ts  |  4 ++++
 6 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
index 31ba7b35..13b02b0c 100644
--- a/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
+++ b/packages/agent-sdk/src/sdk/llm/__tests__/promptCaching.test.ts
@@ -76,7 +76,7 @@ describe('Anthropic prompt caching', () => {
     });
   });
 
-  it('moves the cache marker to the tool-result message level', async () => {
+  it('moves the cache marker to the tool_result block', async () => {
     const fetchMock = vi
       .spyOn(global, 'fetch')
       .mockResolvedValue(createStreamResponse(anthropicSse));
@@ -113,8 +113,12 @@ describe('Anthropic prompt caching', () => {
 
     expect(body?.messages?.at(-1)).toEqual({
       role: 'user',
-      content: [{ type: 'tool_result', tool_use_id: 'call_1', content: 'hi' }],
-      cache_control: EPHEMERAL_CACHE_CONTROL,
+      content: [{
+        type: 'tool_result',
+        tool_use_id: 'call_1',
+        content: 'hi',
+        cache_control: EPHEMERAL_CACHE_CONTROL,
+      }],
     });
   });
 });
diff --git a/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts b/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts
index 34c6b471..e08e4934 100644
--- a/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts
+++ b/packages/agent-sdk/src/sdk/llm/__tests__/thinkingBlocks.test.ts
@@ -371,7 +371,7 @@ describe('AnthropicClient thinking blocks', () => {
     expect(toolResultMsg).toBeDefined();
 
     const toolResultBlock = toolResultMsg.content.find((b: { type: string }) => b.type === 'tool_result');
-    expect(toolResultBlock).toEqual({
+    expect(toolResultBlock).toMatchObject({
       type: 'tool_result',
       tool_use_id: 'call_1',
       content: 'hi',
diff --git a/packages/agent-sdk/src/sdk/llm/anthropic.ts b/packages/agent-sdk/src/sdk/llm/anthropic.ts
index 6cd3b3ce..1ab311d7 100644
--- a/packages/agent-sdk/src/sdk/llm/anthropic.ts
+++ b/packages/agent-sdk/src/sdk/llm/anthropic.ts
@@ -31,6 +31,7 @@ type AnthropicToolResultBlock = {
   type: 'tool_result';
   tool_use_id: string;
   content: string;
+  cache_control?: AnthropicCacheControl;
 };
 
 type AnthropicImageBlock = {
@@ -49,7 +50,6 @@ type AnthropicContentBlock =
 interface AnthropicMessage {
   role: 'user' | 'assistant';
   content: AnthropicContentBlock[];
-  cache_control?: AnthropicCacheControl;
 }
 
 type AnthropicEventName = 'message_start' | 'content_block_start' | 'content_block_delta' | 'message_delta' | (string & {});
@@ -247,20 +247,17 @@ const toAnthropicMessages = (
         type: 'tool_result',
         tool_use_id: message.tool_call_id ?? '',
         content: reduceTextContent(message),
+        ...(shouldCacheMessage ? { cache_control: EPHEMERAL_CACHE_CONTROL } : {}),
       };
 
       if (lastMessage?.role === 'user') {
         // Append to existing user message
         lastMessage.content.push(toolResultBlock);
-        if (shouldCacheMessage) {
-          lastMessage.cache_control = EPHEMERAL_CACHE_CONTROL;
-        }
       } else {
         // Create new user message
         result.push({
           role: 'user',
           content: [toolResultBlock],
-          ...(shouldCacheMessage ? { cache_control: EPHEMERAL_CACHE_CONTROL } : {}),
         });
       }
     }
diff --git a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts
index b603fd4d..a0611c0e 100644
--- a/packages/agent-sdk/src/sdk/llm/openai-compatible.ts
+++ b/packages/agent-sdk/src/sdk/llm/openai-compatible.ts
@@ -42,6 +42,8 @@ type OpenAIChatMessage = {
   name?: string;
   tool_call_id?: string;
   tool_calls?: ChatCompletionRequest['messages'][number]['tool_calls'];
+  // LiteLLM tool-result caching follows the Python SDK quirk: the cache marker
+  // lives on the tool message envelope instead of the text block.
   cache_control?: OpenAICacheControl;
 };
 
diff --git a/packages/agent-sdk/src/sdk/runtime/Agent.ts b/packages/agent-sdk/src/sdk/runtime/Agent.ts
index 9b2eb1b5..54bc6b8b 100644
--- a/packages/agent-sdk/src/sdk/runtime/Agent.ts
+++ b/packages/agent-sdk/src/sdk/runtime/Agent.ts
@@ -1018,6 +1018,14 @@ export class Agent extends EventEmitter {
     return this.getToolDefinitions().map((tool) => tool as unknown as Record<string, unknown>);
   }
 
+  /**
+   * Builds the stable system-prompt prefix used for Anthropic prompt caching.
+   *
+   * This prefix stays cacheable as long as the agent identity, the shared
+   * system body, security-risk assessment inclusion, and registered tool
+   * summaries do not change. Runtime-mutated context such as the current editor
+   * state belongs in buildDynamicSystemPrompt() instead.
+   */
   private buildCacheableSystemPrompt(): string {
     const promptIdentity = this.agentContext?.getSystemMessagePrefix() ?? SYSTEM_PROMPT_IDENTITY;
     let systemPrompt = `${promptIdentity}\n\n${SYSTEM_PROMPT_BODY}`;
diff --git a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts
index 9a49a461..41c75de6 100644
--- a/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts
+++ b/packages/agent-sdk/src/sdk/runtime/__tests__/Agent.system-prompt.test.ts
@@ -165,6 +165,10 @@ describe('Agent system prompt', () => {
     expect(llm.requests[0]?.dynamicSystemPrompt).toContain('/tmp/first.ts');
     expect(llm.requests[1]?.dynamicSystemPrompt).toContain('/tmp/second.ts');
     expect(llm.requests[1]?.dynamicSystemPrompt).not.toContain('/tmp/first.ts');
+    expect(llm.requests[0]?.cacheableSystemPrompt).not.toContain('/tmp/first.ts');
+    expect(llm.requests[0]?.cacheableSystemPrompt).not.toContain('/tmp/second.ts');
+    expect(llm.requests[1]?.cacheableSystemPrompt).not.toContain('/tmp/first.ts');
+    expect(llm.requests[1]?.cacheableSystemPrompt).not.toContain('/tmp/second.ts');
   });
 
   it('gates vendor-specific repo skills using LLM profile config', async () => {

From 139fcf6db3285c816c1290c452469208e073390e Mon Sep 17 00:00:00 2001
From: Engel Nyst <engel.nyst@gmail.com>
Date: Mon, 11 May 2026 18:33:11 +0200
Subject: [PATCH 4/5] Update packages/agent-sdk/src/sdk/llm/providerQuirks.ts

Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com>
---
 packages/agent-sdk/src/sdk/llm/providerQuirks.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
index 1617664a..86faf14c 100644
--- a/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
+++ b/packages/agent-sdk/src/sdk/llm/providerQuirks.ts
@@ -62,8 +62,8 @@ const PROMPT_CACHE_MODELS = [
   'claude-sonnet-3-7-latest',
   'claude-3-5-sonnet',
   'claude-3-5-haiku',
-  'claude-3-haiku-20240307',
-  'claude-3-opus-20240229',
+  'claude-3-haiku',
+  'claude-3-opus',
   'claude-sonnet-4',
   'claude-opus-4',
   'claude-haiku-4-5',

From f5bff0e88ae5ce3c48a0be03dafbdd6500fa9cb6 Mon Sep 17 00:00:00 2001
From: Engel Nyst <engel.nyst@gmail.com>
Date: Mon, 11 May 2026 19:31:04 +0200
Subject: [PATCH 5/5] Add Anthropic cache smoke runner

---
 package.json                      |   1 +
 scripts/anthropic-cache-smoke.mjs | 276 ++++++++++++++++++++++++++++++
 2 files changed, 277 insertions(+)
 create mode 100644 scripts/anthropic-cache-smoke.mjs

diff --git a/package.json b/package.json
index a0cc948c..c04f5647 100644
--- a/package.json
+++ b/package.json
@@ -535,6 +535,7 @@
     "lint:cycles": "node scripts/check-circular-deps.mjs",
     "lint:duplication": "node scripts/check-duplication.mjs",
     "lint:fix": "eslint . --fix",
+    "smoke:anthropic-cache": "npm run build -w @smolpaws/agent-sdk && node scripts/anthropic-cache-smoke.mjs",
     "build:webview": "node esbuild.webview.mjs",
     "agent-server": "bash scripts/start-agent-server.sh",
     "agent-server:prepare": "PREPARE=1 bash scripts/start-agent-server.sh",
diff --git a/scripts/anthropic-cache-smoke.mjs b/scripts/anthropic-cache-smoke.mjs
new file mode 100644
index 00000000..517429ec
--- /dev/null
+++ b/scripts/anthropic-cache-smoke.mjs
@@ -0,0 +1,276 @@
+#!/usr/bin/env node
+
+import fs from 'fs';
+import os from 'os';
+import path from 'path';
+import process from 'process';
+import { fileURLToPath } from 'url';
+
+import {
+  Agent,
+  EventLog,
+  ConversationStats,
+  TerminalTool,
+  FileEditorTool,
+  FinishTool,
+} from '../packages/agent-sdk/dist/index.mjs';
+
+const SCRIPT_PATH = fileURLToPath(import.meta.url);
+const WORKSPACE_ROOT = path.resolve(path.join(path.dirname(SCRIPT_PATH), '..'));
+const OPENHANDS_HOME = path.join(os.homedir(), '.openhands');
+const PROFILE_ID = process.argv[2] ?? 'opus-46';
+const PROFILE_PATH = path.join(OPENHANDS_HOME, 'llm-profiles', `${PROFILE_ID}.json`);
+
+const MESSAGES = [
+  'Reply with exactly "cache-smoke-ready". Do not use any tools.',
+  'Use the terminal tool exactly once with {"command":"pwd"} and then reply with just the directory path. Do not use any other tools.',
+  'How many tools have you used so far in this conversation? Reply with only the number. Do not use any tools.',
+  'Use the file_editor tool exactly once with {"command":"view","path":"package.json","view_range":[1,20]} and then reply with just the top-level package name. Do not use any other tools.',
+  'In one short sentence, summarize the two tool results from this conversation. Do not use any tools.',
+];
+
+function readJson(filePath) {
+  return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+}
+
+function truncate(text, maxChars = 200) {
+  const normalized = typeof text === 'string' ? text.replace(/\s+/g, ' ').trim() : '';
+  if (normalized.length <= maxChars) return normalized;
+  return `${normalized.slice(0, maxChars - 1)}...`;
+}
+
+function toObject(value) {
+  return value && typeof value === 'object' && !Array.isArray(value) ? value : null;
+}
+
+function findSettingsApiKey(profileBaseUrl) {
+  const settingsDir = OPENHANDS_HOME;
+  const entries = fs.readdirSync(settingsDir)
+    .filter((name) => /^settings.*\.json$/.test(name))
+    .map((name) => path.join(settingsDir, name));
+
+  const normalizedProfileBaseUrl = typeof profileBaseUrl === 'string'
+    ? profileBaseUrl.replace(/\/+$/, '')
+    : '';
+
+  let fallback;
+  for (const filePath of entries) {
+    let parsed;
+    try {
+      parsed = readJson(filePath);
+    } catch {
+      continue;
+    }
+
+    const apiKey = typeof parsed?.llm_api_key === 'string' ? parsed.llm_api_key.trim() : '';
+    if (!apiKey) continue;
+
+    const baseUrl = typeof parsed?.llm_base_url === 'string' ? parsed.llm_base_url.replace(/\/+$/, '') : '';
+    if (!fallback) {
+      fallback = { apiKey, source: filePath };
+    }
+
+    if (normalizedProfileBaseUrl && baseUrl === normalizedProfileBaseUrl) {
+      return { apiKey, source: filePath };
+    }
+  }
+
+  if (fallback) return fallback;
+  throw new Error(`Could not find a local OpenHands settings file with an API key under ${settingsDir}`);
+}
+
+function extractAssistantText(message) {
+  const content = Array.isArray(message?.content) ? message.content : [];
+  return truncate(content
+    .filter((item) => item?.type === 'text' && typeof item.text === 'string')
+    .map((item) => item.text)
+    .join('\n\n'));
+}
+
+function assistantMessageEventText(event) {
+  const content = Array.isArray(event?.llm_message?.content) ? event.llm_message.content : [];
+  return truncate(content
+    .filter((item) => item?.type === 'text' && typeof item.text === 'string')
+    .map((item) => item.text)
+    .join('\n\n'));
+}
+
+function summarizeObservation(observation) {
+  if (typeof observation === 'string') return truncate(observation);
+  const obj = toObject(observation);
+  if (!obj) return truncate(JSON.stringify(observation));
+  if (typeof obj.stdout === 'string' && obj.stdout.trim()) return truncate(obj.stdout);
+  if (typeof obj.stderr === 'string' && obj.stderr.trim()) return truncate(obj.stderr);
+  if (typeof obj.new_content === 'string' && obj.new_content.trim()) return truncate(obj.new_content);
+  if (typeof obj.old_content === 'string' && obj.old_content.trim()) return truncate(obj.old_content);
+  if (typeof obj.reason === 'string' && obj.reason.trim()) return truncate(obj.reason);
+  return truncate(JSON.stringify(obj));
+}
+
+function tokenUsageDelta(before, after) {
+  const a = toObject(after) ?? {};
+  const b = toObject(before) ?? {};
+  return {
+    promptTokens: Math.max(0, Number(a.promptTokens ?? 0) - Number(b.promptTokens ?? 0)),
+    completionTokens: Math.max(0, Number(a.completionTokens ?? 0) - Number(b.completionTokens ?? 0)),
+    cacheReadTokens: Math.max(0, Number(a.cacheReadTokens ?? 0) - Number(b.cacheReadTokens ?? 0)),
+    cacheWriteTokens: Math.max(0, Number(a.cacheWriteTokens ?? 0) - Number(b.cacheWriteTokens ?? 0)),
+    reasoningTokens: Math.max(0, Number(a.reasoningTokens ?? 0) - Number(b.reasoningTokens ?? 0)),
+    perTurnToken: Math.max(0, Number(a.promptTokens ?? 0) - Number(b.promptTokens ?? 0))
+      + Math.max(0, Number(a.completionTokens ?? 0) - Number(b.completionTokens ?? 0)),
+  };
+}
+
+function countCacheControls(value) {
+  if (!value || typeof value !== 'object') return 0;
+  if (Array.isArray(value)) return value.reduce((sum, item) => sum + countCacheControls(item), 0);
+  let total = 0;
+  for (const [key, child] of Object.entries(value)) {
+    if (key === 'cache_control') total += 1;
+    total += countCacheControls(child);
+  }
+  return total;
+}
+
+function summarizeRequestBody(body) {
+  const payload = toObject(body);
+  const messages = Array.isArray(payload?.messages) ? payload.messages : [];
+  const system = payload?.system;
+  return {
+    cacheControlCount: countCacheControls(payload),
+    messageCount: messages.length,
+    systemBlockCount: Array.isArray(system) ? system.length : (system ? 1 : 0),
+  };
+}
+
+function extractToolUses(newEvents) {
+  const observationsByToolCallId = new Map();
+  for (const event of newEvents) {
+    if (event?.kind === 'ObservationEvent' && typeof event.tool_call_id === 'string') {
+      observationsByToolCallId.set(event.tool_call_id, summarizeObservation(event.observation));
+    }
+  }
+
+  return newEvents
+    .filter((event) => event?.kind === 'ActionEvent')
+    .map((event) => {
+      const action = toObject(event.action) ?? {};
+      return {
+        tool: event.tool_name,
+        toolCallId: event.tool_call_id,
+        action: action,
+        observationPreview: observationsByToolCallId.get(event.tool_call_id) ?? null,
+      };
+    });
+}
+
+const profile = readJson(PROFILE_PATH);
+const { apiKey, source: apiKeySource } = findSettingsApiKey(profile.baseUrl);
+
+const wireRequests = [];
+const normalizedProfileBaseUrl = typeof profile.baseUrl === 'string'
+  ? profile.baseUrl.replace(/\/+$/, '')
+  : '';
+const originalFetch = globalThis.fetch;
+
+if (typeof originalFetch !== 'function') {
+  throw new Error('global fetch is not available in this Node runtime');
+}
+
+globalThis.fetch = async (input, init) => {
+  const url = typeof input === 'string' || input instanceof URL
+    ? String(input)
+    : input?.url;
+
+  const normalizedUrl = typeof url === 'string' ? url.replace(/\/+$/, '') : '';
+  let parsedBody = null;
+  if (typeof init?.body === 'string') {
+    try {
+      parsedBody = JSON.parse(init.body);
+    } catch {
+      parsedBody = null;
+    }
+  }
+
+  if (normalizedProfileBaseUrl && typeof normalizedUrl === 'string' && normalizedUrl.startsWith(normalizedProfileBaseUrl)) {
+    wireRequests.push({
+      url,
+      method: init?.method ?? (input && typeof input === 'object' && 'method' in input ? input.method : 'GET'),
+      ...summarizeRequestBody(parsedBody),
+    });
+  }
+
+  return originalFetch(input, init);
+};
+
+const events = new EventLog();
+const stats = new ConversationStats();
+const agent = new Agent({
+  workspaceRoot: WORKSPACE_ROOT,
+  events,
+  conversationStats: stats,
+  includeDefaultTools: false,
+  tools: [
+    new FinishTool(),
+    new TerminalTool(),
+    new FileEditorTool(),
+  ],
+  settings: {
+    llm: { profileId: PROFILE_ID },
+    agent: { enableSecurityAnalyzer: false },
+    conversation: { maxIterations: 12 },
+    confirmation: { policy: 'never' },
+    secrets: { llmApiKey: apiKey },
+  },
+});
+
+const runResults = [];
+
+for (let index = 0; index < MESSAGES.length; index += 1) {
+  const message = MESSAGES[index];
+  const beforeEventCount = events.list().length;
+  const beforeWireCount = wireRequests.length;
+  const beforeMetrics = stats.getCombinedMetrics().getSnapshot();
+
+  const response = await agent.run(message);
+
+  const afterEvents = events.list().slice(beforeEventCount);
+  const afterWire = wireRequests.slice(beforeWireCount);
+  const afterMetrics = stats.getCombinedMetrics().getSnapshot();
+  const delta = tokenUsageDelta(beforeMetrics.accumulatedTokenUsage, afterMetrics.accumulatedTokenUsage);
+  const assistantMessages = afterEvents.filter(
+    (event) => event?.kind === 'MessageEvent' && event?.llm_message?.role === 'assistant',
+  );
+  const errors = afterEvents.filter(
+    (event) => event?.kind === 'ConversationErrorEvent' || event?.kind === 'AgentErrorEvent',
+  );
+
+  runResults.push({
+    call: index + 1,
+    userPrompt: message,
+    finalAssistantText: extractAssistantText(response),
+    assistantMessages: assistantMessages.map(assistantMessageEventText).filter(Boolean),
+    toolUses: extractToolUses(afterEvents),
+    llmRequestCount: afterWire.length,
+    requestCacheControlCounts: afterWire.map((request) => request.cacheControlCount),
+    usage: delta,
+    cacheHit: delta.cacheReadTokens > 0,
+    errors: errors.map((event) => ({
+      kind: event.kind,
+      code: event.code ?? null,
+      detail: truncate(event.detail ?? event.error ?? ''),
+    })),
+  });
+}
+
+const output = {
+  profileId: PROFILE_ID,
+  model: profile.model,
+  provider: profile.provider,
+  baseUrl: profile.baseUrl,
+  workspaceRoot: WORKSPACE_ROOT,
+  apiKeySource,
+  runResults,
+};
+
+process.stdout.write(`${JSON.stringify(output, null, 2)}\n`);