From b46ccdddb1a89ab0279072f5b621db7b3db41570 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Fri, 20 Feb 2026 16:43:12 -0500 Subject: [PATCH] =?UTF-8?q?Fix=20reasoning=20effort=20loss=20in=20Anthropi?= =?UTF-8?q?c/Bedrock=20thinking=20=E2=86=92=20effort=20conversion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting Anthropic `thinking.budget_tokens` to universal `ReasoningConfig`, the budget→effort heuristic used DEFAULT_MAX_TOKENS (4096) instead of the actual request max_tokens. This caused incorrect effort levels when max_tokens differed from the default (e.g., budget=1024 with max_tokens=1024 → High, not Low). Added `From<(&Thinking, Option)>` impl that accepts max_tokens context, and updated both Anthropic and Bedrock adapters to pass actual max_tokens. Co-Authored-By: Claude Opus 4.6 --- .../lingua/src/providers/anthropic/adapter.rs | 72 +++++++++++++++++-- .../lingua/src/providers/bedrock/adapter.rs | 2 +- crates/lingua/src/universal/reasoning.rs | 32 +++++++++ payloads/cases/params.ts | 23 ++++++ .../anthropic/request.json | 14 ++++ .../chat-completions/request.json | 10 +++ .../responses/request.json | 12 ++++ 7 files changed, 160 insertions(+), 5 deletions(-) create mode 100644 payloads/snapshots/thinkingEnabledParam/anthropic/request.json create mode 100644 payloads/snapshots/thinkingEnabledParam/chat-completions/request.json create mode 100644 payloads/snapshots/thinkingEnabledParam/responses/request.json diff --git a/crates/lingua/src/providers/anthropic/adapter.rs b/crates/lingua/src/providers/anthropic/adapter.rs index 96790b56..65f730bc 100644 --- a/crates/lingua/src/providers/anthropic/adapter.rs +++ b/crates/lingua/src/providers/anthropic/adapter.rs @@ -152,10 +152,12 @@ impl ProviderAdapter for AnthropicAdapter { } }) .or_else(|| { - typed_params - .thinking - .as_ref() - .map(crate::universal::request::ReasoningConfig::from) + typed_params.thinking.as_ref().map(|t| { + crate::universal::request::ReasoningConfig::from(( + t, + typed_params.max_tokens, + )) + }) }), metadata: typed_params .metadata @@ -1454,4 +1456,66 @@ mod tests { "message_stop should return None (terminal event)" ); } + + #[test] + fn test_anthropic_thinking_to_openai_effort_with_small_max_tokens() { + // BUG REPRO: Anthropic request with thinking budget_tokens=1024 and max_tokens=1024 + // should translate to OpenAI reasoning_effort="high" (budget is 100% of max_tokens). + // Instead, From<&Thinking> ignores max_tokens and uses DEFAULT=4096, + // computing 1024/4096=0.25 → "low". + use crate::providers::openai::adapter::OpenAIAdapter; + + let anthropic_adapter = AnthropicAdapter; + let openai_adapter = OpenAIAdapter; + + let anthropic_payload = json!({ + "model": "claude-sonnet-4-20250514", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "Think hard about 2+2"}], + "thinking": { + "type": "enabled", + "budget_tokens": 1024 + } + }); + + // Anthropic → Universal + let universal = anthropic_adapter + .request_to_universal(anthropic_payload) + .unwrap(); + + // Verify the universal representation has reasoning + let reasoning = universal + .params + .reasoning + .as_ref() + .expect("reasoning should exist"); + assert_eq!(reasoning.enabled, Some(true)); + assert_eq!(reasoning.budget_tokens, Some(1024)); + + // The bug: effort is computed as Low (1024/4096=0.25) instead of High (1024/1024=1.0) + assert_eq!( + reasoning.effort, + Some(ReasoningEffort::High), + "budget_tokens=1024 with max_tokens=1024 should be High effort, not {:?}", + reasoning.effort + ); + + // Universal → OpenAI + let mut universal_for_openai = universal; + universal_for_openai.model = Some("gpt-5-nano".to_string()); + let openai_result = openai_adapter + .request_from_universal(&universal_for_openai) + .unwrap(); + + assert_eq!( + openai_result + .get("reasoning_effort") + .unwrap() + .as_str() + .unwrap(), + "high", + "OpenAI should get reasoning_effort=high, got: {}", + openai_result.get("reasoning_effort").unwrap() + ); + } } diff --git a/crates/lingua/src/providers/bedrock/adapter.rs b/crates/lingua/src/providers/bedrock/adapter.rs index 7df7d051..4eb6a835 100644 --- a/crates/lingua/src/providers/bedrock/adapter.rs +++ b/crates/lingua/src/providers/bedrock/adapter.rs @@ -80,7 +80,7 @@ impl ProviderAdapter for BedrockAdapter { .as_ref() .and_then(|fields| fields.get("thinking")) .and_then(|v| serde_json::from_value::(v.clone()).ok()) - .map(|t| ReasoningConfig::from(&t)); + .map(|t| ReasoningConfig::from((&t, max_tokens))); let mut params = UniversalParams { temperature, diff --git a/crates/lingua/src/universal/reasoning.rs b/crates/lingua/src/universal/reasoning.rs index 1f83e5eb..e7c552d8 100644 --- a/crates/lingua/src/universal/reasoning.rs +++ b/crates/lingua/src/universal/reasoning.rs @@ -176,6 +176,26 @@ impl From<&Thinking> for ReasoningConfig { } } +/// Convert Anthropic Thinking to ReasoningConfig with max_tokens context. +/// +/// Uses actual max_tokens for budget→effort conversion instead of DEFAULT_MAX_TOKENS. +/// This produces correct effort levels when max_tokens differs from the default. +impl From<(&Thinking, Option)> for ReasoningConfig { + fn from((thinking, max_tokens): (&Thinking, Option)) -> Self { + let enabled = matches!(thinking.thinking_type, ThinkingType::Enabled); + let budget_tokens = thinking.budget_tokens; + let effort = budget_tokens.map(|b| budget_to_effort(b, max_tokens)); + + ReasoningConfig { + enabled: Some(enabled), + effort, + budget_tokens, + canonical: Some(ReasoningCanonical::BudgetTokens), + ..Default::default() + } + } +} + /// Convert OpenAI ReasoningEffort to ReasoningConfig with context (for Chat API). /// /// OpenAI's effort is canonical. Budget_tokens is derived. @@ -519,6 +539,18 @@ mod tests { assert_eq!(config.budget_tokens, Some(2048)); } + #[test] + fn test_from_anthropic_thinking_without_max_tokens_uses_default() { + // Without max_tokens context, falls back to DEFAULT_MAX_TOKENS. + // budget=1024 / 4096 = 0.25 → Low + let thinking = Thinking { + thinking_type: ThinkingType::Enabled, + budget_tokens: Some(1024), + }; + let config = ReasoningConfig::from(&thinking); + assert_eq!(config.effort, Some(ReasoningEffort::Low)); + } + #[test] fn test_to_anthropic_thinking() { let config = ReasoningConfig { diff --git a/payloads/cases/params.ts b/payloads/cases/params.ts index e6ad2289..529d309b 100644 --- a/payloads/cases/params.ts +++ b/payloads/cases/params.ts @@ -1037,6 +1037,29 @@ export const paramsCases: TestCaseCollection = { bedrock: null, }, + // Anthropic thinking enabled with budget_tokens - exercises budget→effort conversion + // with small max_tokens (1024). budget/max_tokens = 100% → high effort. + thinkingEnabledParam: { + "chat-completions": { + model: OPENAI_RESPONSES_MODEL, + messages: [{ role: "user", content: "Think hard about 2+2" }], + reasoning_effort: "high", + }, + responses: { + model: OPENAI_RESPONSES_MODEL, + input: [{ role: "user", content: "Think hard about 2+2" }], + reasoning: { effort: "high" }, + }, + anthropic: { + model: ANTHROPIC_MODEL, + max_tokens: 1024, + messages: [{ role: "user", content: "Think hard about 2+2" }], + thinking: { type: "enabled", budget_tokens: 1024 }, + }, + google: null, + bedrock: null, + }, + // === Output Config (structured output) === outputFormatJsonSchemaParam: { diff --git a/payloads/snapshots/thinkingEnabledParam/anthropic/request.json b/payloads/snapshots/thinkingEnabledParam/anthropic/request.json new file mode 100644 index 00000000..5b822bf0 --- /dev/null +++ b/payloads/snapshots/thinkingEnabledParam/anthropic/request.json @@ -0,0 +1,14 @@ +{ + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [ + { + "role": "user", + "content": "Think hard about 2+2" + } + ], + "thinking": { + "type": "enabled", + "budget_tokens": 1024 + } +} \ No newline at end of file diff --git a/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json b/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json new file mode 100644 index 00000000..9804faa5 --- /dev/null +++ b/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json @@ -0,0 +1,10 @@ +{ + "model": "gpt-5-nano", + "messages": [ + { + "role": "user", + "content": "Think hard about 2+2" + } + ], + "reasoning_effort": "high" +} diff --git a/payloads/snapshots/thinkingEnabledParam/responses/request.json b/payloads/snapshots/thinkingEnabledParam/responses/request.json new file mode 100644 index 00000000..c99fdbaa --- /dev/null +++ b/payloads/snapshots/thinkingEnabledParam/responses/request.json @@ -0,0 +1,12 @@ +{ + "model": "gpt-5-nano", + "input": [ + { + "role": "user", + "content": "Think hard about 2+2" + } + ], + "reasoning": { + "effort": "high" + } +}