From b46ccdddb1a89ab0279072f5b621db7b3db41570 Mon Sep 17 00:00:00 2001
From: Matt Perpick <matt@braintrustdata.com>
Date: Fri, 20 Feb 2026 16:43:12 -0500
Subject: [PATCH] =?UTF-8?q?Fix=20reasoning=20effort=20loss=20in=20Anthropi?=
 =?UTF-8?q?c/Bedrock=20thinking=20=E2=86=92=20effort=20conversion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When converting Anthropic `thinking.budget_tokens` to universal `ReasoningConfig`,
the budget→effort heuristic used DEFAULT_MAX_TOKENS (4096) instead of the actual
request max_tokens. This caused incorrect effort levels when max_tokens differed
from the default (e.g., budget=1024 with max_tokens=1024 → High, not Low).

Added `From<(&Thinking, Option<i64>)>` impl that accepts max_tokens context, and
updated both Anthropic and Bedrock adapters to pass actual max_tokens.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../lingua/src/providers/anthropic/adapter.rs | 72 +++++++++++++++++--
 .../lingua/src/providers/bedrock/adapter.rs   |  2 +-
 crates/lingua/src/universal/reasoning.rs      | 32 +++++++++
 payloads/cases/params.ts                      | 23 ++++++
 .../anthropic/request.json                    | 14 ++++
 .../chat-completions/request.json             | 10 +++
 .../responses/request.json                    | 12 ++++
 7 files changed, 160 insertions(+), 5 deletions(-)
 create mode 100644 payloads/snapshots/thinkingEnabledParam/anthropic/request.json
 create mode 100644 payloads/snapshots/thinkingEnabledParam/chat-completions/request.json
 create mode 100644 payloads/snapshots/thinkingEnabledParam/responses/request.json
diff --git a/crates/lingua/src/providers/anthropic/adapter.rs b/crates/lingua/src/providers/anthropic/adapter.rs
index 96790b56..65f730bc 100644
--- a/crates/lingua/src/providers/anthropic/adapter.rs
+++ b/crates/lingua/src/providers/anthropic/adapter.rs
@@ -152,10 +152,12 @@ impl ProviderAdapter for AnthropicAdapter {
                     }
                 })
                 .or_else(|| {
-                    typed_params
-                        .thinking
-                        .as_ref()
-                        .map(crate::universal::request::ReasoningConfig::from)
+                    typed_params.thinking.as_ref().map(|t| {
+                        crate::universal::request::ReasoningConfig::from((
+                            t,
+                            typed_params.max_tokens,
+                        ))
+                    })
                 }),
             metadata: typed_params
                 .metadata
@@ -1454,4 +1456,66 @@ mod tests {
             "message_stop should return None (terminal event)"
         );
     }
+
+    #[test]
+    fn test_anthropic_thinking_to_openai_effort_with_small_max_tokens() {
+        // BUG REPRO: Anthropic request with thinking budget_tokens=1024 and max_tokens=1024
+        // should translate to OpenAI reasoning_effort="high" (budget is 100% of max_tokens).
+        // Instead, From<&Thinking> ignores max_tokens and uses DEFAULT=4096,
+        // computing 1024/4096=0.25 → "low".
+        use crate::providers::openai::adapter::OpenAIAdapter;
+
+        let anthropic_adapter = AnthropicAdapter;
+        let openai_adapter = OpenAIAdapter;
+
+        let anthropic_payload = json!({
+            "model": "claude-sonnet-4-20250514",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Think hard about 2+2"}],
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 1024
+            }
+        });
+
+        // Anthropic → Universal
+        let universal = anthropic_adapter
+            .request_to_universal(anthropic_payload)
+            .unwrap();
+
+        // Verify the universal representation has reasoning
+        let reasoning = universal
+            .params
+            .reasoning
+            .as_ref()
+            .expect("reasoning should exist");
+        assert_eq!(reasoning.enabled, Some(true));
+        assert_eq!(reasoning.budget_tokens, Some(1024));
+
+        // The bug: effort is computed as Low (1024/4096=0.25) instead of High (1024/1024=1.0)
+        assert_eq!(
+            reasoning.effort,
+            Some(ReasoningEffort::High),
+            "budget_tokens=1024 with max_tokens=1024 should be High effort, not {:?}",
+            reasoning.effort
+        );
+
+        // Universal → OpenAI
+        let mut universal_for_openai = universal;
+        universal_for_openai.model = Some("gpt-5-nano".to_string());
+        let openai_result = openai_adapter
+            .request_from_universal(&universal_for_openai)
+            .unwrap();
+
+        assert_eq!(
+            openai_result
+                .get("reasoning_effort")
+                .unwrap()
+                .as_str()
+                .unwrap(),
+            "high",
+            "OpenAI should get reasoning_effort=high, got: {}",
+            openai_result.get("reasoning_effort").unwrap()
+        );
+    }
 }
diff --git a/crates/lingua/src/providers/bedrock/adapter.rs b/crates/lingua/src/providers/bedrock/adapter.rs
index 7df7d051..4eb6a835 100644
--- a/crates/lingua/src/providers/bedrock/adapter.rs
+++ b/crates/lingua/src/providers/bedrock/adapter.rs
@@ -80,7 +80,7 @@ impl ProviderAdapter for BedrockAdapter {
             .as_ref()
             .and_then(|fields| fields.get("thinking"))
             .and_then(|v| serde_json::from_value::<Thinking>(v.clone()).ok())
-            .map(|t| ReasoningConfig::from(&t));
+            .map(|t| ReasoningConfig::from((&t, max_tokens)));
 
         let mut params = UniversalParams {
             temperature,
diff --git a/crates/lingua/src/universal/reasoning.rs b/crates/lingua/src/universal/reasoning.rs
index 1f83e5eb..e7c552d8 100644
--- a/crates/lingua/src/universal/reasoning.rs
+++ b/crates/lingua/src/universal/reasoning.rs
@@ -176,6 +176,26 @@ impl From<&Thinking> for ReasoningConfig {
     }
 }
 
+/// Convert Anthropic Thinking to ReasoningConfig with max_tokens context.
+///
+/// Uses actual max_tokens for budget→effort conversion instead of DEFAULT_MAX_TOKENS.
+/// This produces correct effort levels when max_tokens differs from the default.
+impl From<(&Thinking, Option<i64>)> for ReasoningConfig {
+    fn from((thinking, max_tokens): (&Thinking, Option<i64>)) -> Self {
+        let enabled = matches!(thinking.thinking_type, ThinkingType::Enabled);
+        let budget_tokens = thinking.budget_tokens;
+        let effort = budget_tokens.map(|b| budget_to_effort(b, max_tokens));
+
+        ReasoningConfig {
+            enabled: Some(enabled),
+            effort,
+            budget_tokens,
+            canonical: Some(ReasoningCanonical::BudgetTokens),
+            ..Default::default()
+        }
+    }
+}
+
 /// Convert OpenAI ReasoningEffort to ReasoningConfig with context (for Chat API).
 ///
 /// OpenAI's effort is canonical. Budget_tokens is derived.
@@ -519,6 +539,18 @@ mod tests {
         assert_eq!(config.budget_tokens, Some(2048));
     }
 
+    #[test]
+    fn test_from_anthropic_thinking_without_max_tokens_uses_default() {
+        // Without max_tokens context, falls back to DEFAULT_MAX_TOKENS.
+        // budget=1024 / 4096 = 0.25 → Low
+        let thinking = Thinking {
+            thinking_type: ThinkingType::Enabled,
+            budget_tokens: Some(1024),
+        };
+        let config = ReasoningConfig::from(&thinking);
+        assert_eq!(config.effort, Some(ReasoningEffort::Low));
+    }
+
     #[test]
     fn test_to_anthropic_thinking() {
         let config = ReasoningConfig {
diff --git a/payloads/cases/params.ts b/payloads/cases/params.ts
index e6ad2289..529d309b 100644
--- a/payloads/cases/params.ts
+++ b/payloads/cases/params.ts
@@ -1037,6 +1037,29 @@ export const paramsCases: TestCaseCollection = {
     bedrock: null,
   },
 
+  // Anthropic thinking enabled with budget_tokens - exercises budget→effort conversion
+  // with small max_tokens (1024). budget/max_tokens = 100% → high effort.
+  thinkingEnabledParam: {
+    "chat-completions": {
+      model: OPENAI_RESPONSES_MODEL,
+      messages: [{ role: "user", content: "Think hard about 2+2" }],
+      reasoning_effort: "high",
+    },
+    responses: {
+      model: OPENAI_RESPONSES_MODEL,
+      input: [{ role: "user", content: "Think hard about 2+2" }],
+      reasoning: { effort: "high" },
+    },
+    anthropic: {
+      model: ANTHROPIC_MODEL,
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "Think hard about 2+2" }],
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    },
+    google: null,
+    bedrock: null,
+  },
+
   // === Output Config (structured output) ===
 
   outputFormatJsonSchemaParam: {
diff --git a/payloads/snapshots/thinkingEnabledParam/anthropic/request.json b/payloads/snapshots/thinkingEnabledParam/anthropic/request.json
new file mode 100644
index 00000000..5b822bf0
--- /dev/null
+++ b/payloads/snapshots/thinkingEnabledParam/anthropic/request.json
@@ -0,0 +1,14 @@
+{
+  "model": "claude-sonnet-4-5-20250929",
+  "max_tokens": 1024,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Think hard about 2+2"
+    }
+  ],
+  "thinking": {
+    "type": "enabled",
+    "budget_tokens": 1024
+  }
+}
\ No newline at end of file
diff --git a/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json b/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json
new file mode 100644
index 00000000..9804faa5
--- /dev/null
+++ b/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json
@@ -0,0 +1,10 @@
+{
+  "model": "gpt-5-nano",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Think hard about 2+2"
+    }
+  ],
+  "reasoning_effort": "high"
+}
diff --git a/payloads/snapshots/thinkingEnabledParam/responses/request.json b/payloads/snapshots/thinkingEnabledParam/responses/request.json
new file mode 100644
index 00000000..c99fdbaa
--- /dev/null
+++ b/payloads/snapshots/thinkingEnabledParam/responses/request.json
@@ -0,0 +1,12 @@
+{
+  "model": "gpt-5-nano",
+  "input": [
+    {
+      "role": "user",
+      "content": "Think hard about 2+2"
+    }
+  ],
+  "reasoning": {
+    "effort": "high"
+  }
+}