Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion benchmarks/terminal_bench/prepare_leaderboard_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@
"model_org_display_name": "Anthropic",
"folder_name": "Claude-Opus-4.7",
},
"anthropic/claude-opus-4-8": {
"model_name": "claude-opus-4-8",
"model_provider": "anthropic",
"model_display_name": "Claude Opus 4.8",
"model_org_display_name": "Anthropic",
"folder_name": "Claude-Opus-4.8",
},
# Keep historical GPT metadata alongside the current GPT-5.5 bench target
# so mixed or older artifact sets still map to the canonical leaderboard names.
"openai/gpt-5.2": {
Expand Down Expand Up @@ -442,7 +449,7 @@ def main():
parser.add_argument(
"--models",
nargs="+",
help="Only process specific models (e.g., anthropic/claude-opus-4-7, openai/gpt-5.5)",
help="Only process specific models (e.g., anthropic/claude-opus-4-8, openai/gpt-5.5)",
)
args = parser.parse_args()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export const ModelsConfigured: Story = {
isEnabled: true,
isConfigured: true,
baseUrl: "",
models: ["claude-sonnet-4-20250514", "claude-opus-4-7"],
models: ["claude-sonnet-4-20250514", "claude-opus-4-8"],
},
openai: {
apiKeySet: true,
Expand Down
2 changes: 1 addition & 1 deletion src/common/routing/types.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { ProviderName } from "@/common/constants/providers";

export interface RouteContext {
/** Canonical model string (e.g., "anthropic:claude-opus-4-7") */
/** Canonical model string (e.g., "anthropic:claude-opus-4-8") */
canonical: string;
/** Origin provider — who made the model. Determines capabilities. */
origin: ProviderName;
Expand Down
4 changes: 2 additions & 2 deletions src/common/types/thinking.ts
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
* Note: Opus 4.7 introduced a native "xhigh" effort level in the API, but the
* SDK's Zod validator still rejects "xhigh". Mux handles this by sending "max"
* through the SDK and rewriting `output_config.effort` to "xhigh" in a fetch
* wrapper for Opus 4.7 when the user selected the xhigh ThinkingLevel.
* wrapper for Opus 4.7+ when the user selected the xhigh ThinkingLevel.
* See `wrapFetchWithAnthropicCacheControl` and `buildRequestHeaders`.
*/
export type AnthropicEffortLevel = "low" | "medium" | "high" | "max";
Expand All @@ -189,7 +189,7 @@ const ANTHROPIC_EFFORT: Record<ThinkingLevel, AnthropicEffortLevel> = {
low: "low",
medium: "medium",
high: "high",
xhigh: "max", // SDK placeholder; fetch wrapper rewrites to "xhigh" on Opus 4.7
xhigh: "max", // SDK placeholder; fetch wrapper rewrites to "xhigh" on Opus 4.7+
max: "max",
};

Expand Down
4 changes: 3 additions & 1 deletion src/common/utils/ai/models.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ describe("Anthropic 1M context classification", () => {
expect(hasNative1MContext("anthropic:claude-sonnet-4-5")).toBe(false);
});

it("treats Opus 4.6 and Sonnet 4.6 as native 1M models", () => {
it("treats Opus 4.6 through 4.8 and Sonnet 4.6 as native 1M models", () => {
expect(getAnthropic1MContextMode("anthropic:claude-opus-4-8")).toBe("native");
expect(getAnthropic1MContextMode("anthropic:claude-opus-4-8-20260528")).toBe("native");
expect(getAnthropic1MContextMode("anthropic:claude-opus-4-6")).toBe("native");
expect(getAnthropic1MContextMode("anthropic:claude-opus-4-6-20260201")).toBe("native");
expect(getAnthropic1MContextMode("anthropic:claude-sonnet-4-6")).toBe("native");
Expand Down
5 changes: 2 additions & 3 deletions src/common/utils/ai/providerOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,7 @@ describe("buildRequestHeaders", () => {
});
}

describe("Opus 4.7 xhigh effort override", () => {
describe("Opus 4.7+ xhigh effort override", () => {
for (const { name, model, routeProvider, thinkingLevel, expected } of [
{
name: "emits override header when thinkingLevel=xhigh for Opus 4.7",
Expand All @@ -979,8 +979,7 @@ describe("buildRequestHeaders", () => {
expected: { [MUX_ANTHROPIC_EFFORT_OVERRIDE_HEADER]: "xhigh" },
},
{
name: "emits override header for hypothetical Opus 4.8",
// Detection should match future versions so xhigh doesn't silently collapse to max.
name: "emits override header for Opus 4.8",
model: "anthropic:claude-opus-4-8",
routeProvider: undefined,
thinkingLevel: "xhigh",
Expand Down
8 changes: 4 additions & 4 deletions src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import { normalizeToCanonical, supports1MContext } from "./models";
/**
* Request header used to override Anthropic's `output_config.effort` at the
* wire level. The @ai-sdk/anthropic Zod schema rejects "xhigh", so for
* Opus 4.7 + xhigh ThinkingLevel we send "max" through the SDK and ask the
* Opus 4.7+ with xhigh ThinkingLevel we send "max" through the SDK and ask the
* fetch wrapper to rewrite it to "xhigh" via this header (which is then stripped
* before the request reaches Anthropic).
*/
Expand Down Expand Up @@ -283,7 +283,7 @@ export function buildProviderOptions(
const usesAdaptiveThinking = isOpus46 || isOpus47Plus || isSonnet46;

if (isOpus45 || usesAdaptiveThinking) {
// Map to SDK-accepted effort. For Opus 4.7 + xhigh ThinkingLevel, the SDK
// Map to SDK-accepted effort. For Opus 4.7+ with xhigh ThinkingLevel, the SDK
// gets "max" as a placeholder and the Anthropic fetch wrapper rewrites
// `output_config.effort` to "xhigh" via the X-Mux-Anthropic-Effort header
// (added in buildRequestHeaders).
Expand All @@ -305,10 +305,10 @@ export function buildProviderOptions(
thinkingLevel: effectiveThinking,
});

// Note: Opus 4.7 requires `thinking.display: "summarized"` to receive
// Note: Opus 4.7+ requires `thinking.display: "summarized"` to receive
// thinking content, but the SDK's Zod schema strips unknown keys so we
// can't add it here. The Anthropic fetch wrapper injects `display` on the
// wire for Opus 4.7 + adaptive thinking requests.
// wire for Opus 4.7+ adaptive thinking requests.
const anthropicOptions: AnthropicProviderOptions = {
disableParallelToolUse: false,
sendReasoning: true,
Expand Down
4 changes: 2 additions & 2 deletions src/common/utils/thinking/policy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ describe("getThinkingPolicyForModel", () => {
]);
});

test("returns all 6 levels for future Opus versions (4.8+, 5+)", () => {
// Detection should extend forward so future models don't regress to the default policy.
test("returns all 6 levels for Opus 4.8 and future Opus versions", () => {
// Detection should extend forward so new Opus models don't regress to the default policy.
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-8")).toEqual([
"off",
"low",
Expand Down
28 changes: 15 additions & 13 deletions src/node/services/providerModelFactory.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1746,20 +1746,22 @@ function parseSentBody(call: CapturedFetchCall): Record<string, unknown> {
return JSON.parse(call.init.body as string) as Record<string, unknown>;
}

describe("wrapFetchWithAnthropicCacheControl — Opus 4.7 wire transforms", () => {
it("injects thinking.display=summarized for Opus 4.7 adaptive thinking", async () => {
const { calls, fakeFetch } = createCapturingFetch();
const wrapped = wrapFetchWithAnthropicCacheControl(fakeFetch);
const body = JSON.stringify({
model: "claude-opus-4-7",
thinking: { type: "adaptive" },
output_config: { effort: "medium" },
describe("wrapFetchWithAnthropicCacheControl — Opus 4.7+ wire transforms", () => {
for (const model of ["claude-opus-4-7", "claude-opus-4-8"] as const) {
it(`injects thinking.display=summarized for ${model} adaptive thinking`, async () => {
const { calls, fakeFetch } = createCapturingFetch();
const wrapped = wrapFetchWithAnthropicCacheControl(fakeFetch);
const body = JSON.stringify({
model,
thinking: { type: "adaptive" },
output_config: { effort: "medium" },
});
await wrapped("https://api.anthropic.com/v1/messages", { method: "POST", body });
expect(calls.length).toBe(1);
const sent = parseSentBody(calls[0]);
expect(sent.thinking).toEqual({ type: "adaptive", display: "summarized" });
});
await wrapped("https://api.anthropic.com/v1/messages", { method: "POST", body });
expect(calls.length).toBe(1);
const sent = parseSentBody(calls[0]);
expect(sent.thinking).toEqual({ type: "adaptive", display: "summarized" });
});
}

it("preserves a user-supplied display value on Opus 4.7", async () => {
const { calls, fakeFetch } = createCapturingFetch();
Expand Down
4 changes: 2 additions & 2 deletions src/node/services/providerModelFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1453,7 +1453,7 @@ export class ProviderModelFactory {
// Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
const baseFetch = getProviderFetch(providerConfig);
const disableBeta = muxProviderOptions?.anthropic?.disableBetaFeatures === true;
// Always wrap to apply Opus 4.7 wire-level transforms (display + effort
// Always wrap to apply Opus 4.7+ wire-level transforms (display + effort
// override); skip cache_control injection when beta features are off.
const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(
baseFetch,
Expand Down Expand Up @@ -1965,7 +1965,7 @@ export class ProviderModelFactory {
const baseFetch = getProviderFetch(providerConfig);
const isAnthropicModel = modelId.startsWith("anthropic/");
const disableBeta = muxProviderOptions?.anthropic?.disableBetaFeatures === true;
// For Anthropic models via gateway, always wrap to apply Opus 4.7 wire
// For Anthropic models via gateway, always wrap to apply Opus 4.7+ wire
// transforms; skip cache_control injection when beta features are off.
const fetchWithCacheControl = isAnthropicModel
? wrapFetchWithAnthropicCacheControl(baseFetch, effectiveAnthropicCacheTtl, {
Expand Down
Loading