braintrustdata · Matt Perpick (clutchski) · Feb 20, 2026
diff --git a/crates/lingua/src/providers/anthropic/adapter.rs b/crates/lingua/src/providers/anthropic/adapter.rs
@@ -152,10 +152,12 @@ impl ProviderAdapter for AnthropicAdapter {
                     }
                 })
                 .or_else(|| {
-                    typed_params
-                        .thinking
-                        .as_ref()
-                        .map(crate::universal::request::ReasoningConfig::from)
+                    typed_params.thinking.as_ref().map(|t| {
+                        crate::universal::request::ReasoningConfig::from((
+                            t,
+                            typed_params.max_tokens,
+                        ))
+                    })
                 }),
             metadata: typed_params
                 .metadata
@@ -1454,4 +1456,66 @@ mod tests {
             "message_stop should return None (terminal event)"
         );
     }
+
+    #[test]
+    fn test_anthropic_thinking_to_openai_effort_with_small_max_tokens() {
+        // BUG REPRO: Anthropic request with thinking budget_tokens=1024 and max_tokens=1024
+        // should translate to OpenAI reasoning_effort="high" (budget is 100% of max_tokens).
+        // Instead, From<&Thinking> ignores max_tokens and uses DEFAULT=4096,
+        // computing 1024/4096=0.25 → "low".
+        use crate::providers::openai::adapter::OpenAIAdapter;
+
+        let anthropic_adapter = AnthropicAdapter;
+        let openai_adapter = OpenAIAdapter;
+
+        let anthropic_payload = json!({
+            "model": "claude-sonnet-4-20250514",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Think hard about 2+2"}],
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 1024
+            }
+        });
+
+        // Anthropic → Universal
+        let universal = anthropic_adapter
+            .request_to_universal(anthropic_payload)
+            .unwrap();
+
+        // Verify the universal representation has reasoning
+        let reasoning = universal
+            .params
+            .reasoning
+            .as_ref()
+            .expect("reasoning should exist");
+        assert_eq!(reasoning.enabled, Some(true));
+        assert_eq!(reasoning.budget_tokens, Some(1024));
+
+        // The bug: effort is computed as Low (1024/4096=0.25) instead of High (1024/1024=1.0)
+        assert_eq!(
+            reasoning.effort,
+            Some(ReasoningEffort::High),
+            "budget_tokens=1024 with max_tokens=1024 should be High effort, not {:?}",
+            reasoning.effort
+        );
+
+        // Universal → OpenAI
+        let mut universal_for_openai = universal;
+        universal_for_openai.model = Some("gpt-5-nano".to_string());
+        let openai_result = openai_adapter
+            .request_from_universal(&universal_for_openai)
+            .unwrap();
+
+        assert_eq!(
+            openai_result
+                .get("reasoning_effort")
+                .unwrap()
+                .as_str()
+                .unwrap(),
+            "high",
+            "OpenAI should get reasoning_effort=high, got: {}",
+            openai_result.get("reasoning_effort").unwrap()
+        );
+    }
 }
diff --git a/crates/lingua/src/providers/bedrock/adapter.rs b/crates/lingua/src/providers/bedrock/adapter.rs
@@ -80,7 +80,7 @@ impl ProviderAdapter for BedrockAdapter {
             .as_ref()
             .and_then(|fields| fields.get("thinking"))
             .and_then(|v| serde_json::from_value::<Thinking>(v.clone()).ok())
-            .map(|t| ReasoningConfig::from(&t));
+            .map(|t| ReasoningConfig::from((&t, max_tokens)));
 
         let mut params = UniversalParams {
             temperature,

diff --git a/crates/lingua/src/universal/reasoning.rs b/crates/lingua/src/universal/reasoning.rs
@@ -176,6 +176,26 @@ impl From<&Thinking> for ReasoningConfig {
     }
 }
 
+/// Convert Anthropic Thinking to ReasoningConfig with max_tokens context.
+///
+/// Uses actual max_tokens for budget→effort conversion instead of DEFAULT_MAX_TOKENS.
+/// This produces correct effort levels when max_tokens differs from the default.
+impl From<(&Thinking, Option<i64>)> for ReasoningConfig {
+    fn from((thinking, max_tokens): (&Thinking, Option<i64>)) -> Self {
+        let enabled = matches!(thinking.thinking_type, ThinkingType::Enabled);
+        let budget_tokens = thinking.budget_tokens;
+        let effort = budget_tokens.map(|b| budget_to_effort(b, max_tokens));
+
+        ReasoningConfig {
+            enabled: Some(enabled),
+            effort,
+            budget_tokens,
+            canonical: Some(ReasoningCanonical::BudgetTokens),
+            ..Default::default()
+        }
+    }
+}
+
 /// Convert OpenAI ReasoningEffort to ReasoningConfig with context (for Chat API).
 ///
 /// OpenAI's effort is canonical. Budget_tokens is derived.
@@ -519,6 +539,18 @@ mod tests {
         assert_eq!(config.budget_tokens, Some(2048));
     }
 
+    #[test]
+    fn test_from_anthropic_thinking_without_max_tokens_uses_default() {
+        // Without max_tokens context, falls back to DEFAULT_MAX_TOKENS.
+        // budget=1024 / 4096 = 0.25 → Low
+        let thinking = Thinking {
+            thinking_type: ThinkingType::Enabled,
+            budget_tokens: Some(1024),
+        };
+        let config = ReasoningConfig::from(&thinking);
+        assert_eq!(config.effort, Some(ReasoningEffort::Low));
+    }
+
     #[test]
     fn test_to_anthropic_thinking() {
         let config = ReasoningConfig {

diff --git a/payloads/cases/params.ts b/payloads/cases/params.ts
@@ -1037,6 +1037,29 @@ export const paramsCases: TestCaseCollection = {
     bedrock: null,
   },
 
+  // Anthropic thinking enabled with budget_tokens - exercises budget→effort conversion
+  // with small max_tokens (1024). budget/max_tokens = 100% → high effort.
+  thinkingEnabledParam: {
+    "chat-completions": {
+      model: OPENAI_RESPONSES_MODEL,
+      messages: [{ role: "user", content: "Think hard about 2+2" }],
+      reasoning_effort: "high",
+    },
+    responses: {
+      model: OPENAI_RESPONSES_MODEL,
+      input: [{ role: "user", content: "Think hard about 2+2" }],
+      reasoning: { effort: "high" },
+    },
+    anthropic: {
+      model: ANTHROPIC_MODEL,
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "Think hard about 2+2" }],
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    },
+    google: null,
+    bedrock: null,
+  },
+
   // === Output Config (structured output) ===
 
   outputFormatJsonSchemaParam: {

diff --git a/payloads/snapshots/thinkingEnabledParam/anthropic/request.json b/payloads/snapshots/thinkingEnabledParam/anthropic/request.json
diff --git a/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json b/payloads/snapshots/thinkingEnabledParam/chat-completions/request.json
diff --git a/payloads/snapshots/thinkingEnabledParam/responses/request.json b/payloads/snapshots/thinkingEnabledParam/responses/request.json