braintrustdata
diff --git a/‎crates/lingua/src/providers/anthropic/adapter.rs‎
Lines changed: 68 additions & 4 deletions b/‎crates/lingua/src/providers/anthropic/adapter.rs‎
Lines changed: 68 additions & 4 deletions
diff --git a/‎crates/lingua/src/providers/bedrock/adapter.rs‎
Lines changed: 1 addition & 1 deletion b/‎crates/lingua/src/providers/bedrock/adapter.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/lingua/src/universal/reasoning.rs‎
Lines changed: 32 additions & 0 deletions b/‎crates/lingua/src/universal/reasoning.rs‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎fuzzbugs.md‎
Lines changed: 20 additions & 0 deletions b/‎fuzzbugs.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎payloads/cases/params.ts‎
Lines changed: 23 additions & 0 deletions b/‎payloads/cases/params.ts‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎payloads/snapshots/thinkingEnabledParam/anthropic/request.json‎
Lines changed: 14 additions & 0 deletions b/‎payloads/snapshots/thinkingEnabledParam/anthropic/request.json‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎payloads/snapshots/thinkingEnabledParam/chat-completions/request.json‎
Lines changed: 10 additions & 0 deletions b/‎payloads/snapshots/thinkingEnabledParam/chat-completions/request.json‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎payloads/snapshots/thinkingEnabledParam/responses/request.json‎
Lines changed: 12 additions & 0 deletions b/‎payloads/snapshots/thinkingEnabledParam/responses/request.json‎
Lines changed: 12 additions & 0 deletions
@@ -152,10 +152,12 @@ impl ProviderAdapter for AnthropicAdapter {
                     }
                 })
                 .or_else(|| {
-                    typed_params
-                        .thinking
-                        .as_ref()
-                        .map(crate::universal::request::ReasoningConfig::from)
+                    typed_params.thinking.as_ref().map(|t| {
+                        crate::universal::request::ReasoningConfig::from((
+                            t,
+                            typed_params.max_tokens,
+                        ))
+                    })
                 }),
             metadata: typed_params
                 .metadata
@@ -1454,4 +1456,66 @@ mod tests {
             "message_stop should return None (terminal event)"
         );
     }
+
+    #[test]
+    fn test_anthropic_thinking_to_openai_effort_with_small_max_tokens() {
+        // BUG REPRO: Anthropic request with thinking budget_tokens=1024 and max_tokens=1024
+        // should translate to OpenAI reasoning_effort="high" (budget is 100% of max_tokens).
+        // Instead, From<&Thinking> ignores max_tokens and uses DEFAULT=4096,
+        // computing 1024/4096=0.25 → "low".
+        use crate::providers::openai::adapter::OpenAIAdapter;
+
+        let anthropic_adapter = AnthropicAdapter;
+        let openai_adapter = OpenAIAdapter;
+
+        let anthropic_payload = json!({
+            "model": "claude-sonnet-4-20250514",
+            "max_tokens": 1024,
+            "messages": [{"role": "user", "content": "Think hard about 2+2"}],
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 1024
+            }
+        });
+
+        // Anthropic → Universal
+        let universal = anthropic_adapter
+            .request_to_universal(anthropic_payload)
+            .unwrap();
+
+        // Verify the universal representation has reasoning
+        let reasoning = universal
+            .params
+            .reasoning
+            .as_ref()
+            .expect("reasoning should exist");
+        assert_eq!(reasoning.enabled, Some(true));
+        assert_eq!(reasoning.budget_tokens, Some(1024));
+
+        // The bug: effort is computed as Low (1024/4096=0.25) instead of High (1024/1024=1.0)
+        assert_eq!(
+            reasoning.effort,
+            Some(ReasoningEffort::High),
+            "budget_tokens=1024 with max_tokens=1024 should be High effort, not {:?}",
+            reasoning.effort
+        );
+
+        // Universal → OpenAI
+        let mut universal_for_openai = universal;
+        universal_for_openai.model = Some("gpt-5-nano".to_string());
+        let openai_result = openai_adapter
+            .request_from_universal(&universal_for_openai)
+            .unwrap();
+
+        assert_eq!(
+            openai_result
+                .get("reasoning_effort")
+                .unwrap()
+                .as_str()
+                .unwrap(),
+            "high",
+            "OpenAI should get reasoning_effort=high, got: {}",
+            openai_result.get("reasoning_effort").unwrap()
+        );
+    }
 }
@@ -80,7 +80,7 @@ impl ProviderAdapter for BedrockAdapter {
             .as_ref()
             .and_then(|fields| fields.get("thinking"))
             .and_then(|v| serde_json::from_value::<Thinking>(v.clone()).ok())
-            .map(|t| ReasoningConfig::from(&t));
+            .map(|t| ReasoningConfig::from((&t, max_tokens)));
 
         let mut params = UniversalParams {
             temperature,
 
@@ -176,6 +176,26 @@ impl From<&Thinking> for ReasoningConfig {
     }
 }
 
+/// Convert Anthropic Thinking to ReasoningConfig with max_tokens context.
+///
+/// Uses actual max_tokens for budget→effort conversion instead of DEFAULT_MAX_TOKENS.
+/// This produces correct effort levels when max_tokens differs from the default.
+impl From<(&Thinking, Option<i64>)> for ReasoningConfig {
+    fn from((thinking, max_tokens): (&Thinking, Option<i64>)) -> Self {
+        let enabled = matches!(thinking.thinking_type, ThinkingType::Enabled);
+        let budget_tokens = thinking.budget_tokens;
+        let effort = budget_tokens.map(|b| budget_to_effort(b, max_tokens));
+
+        ReasoningConfig {
+            enabled: Some(enabled),
+            effort,
+            budget_tokens,
+            canonical: Some(ReasoningCanonical::BudgetTokens),
+            ..Default::default()
+        }
+    }
+}
+
 /// Convert OpenAI ReasoningEffort to ReasoningConfig with context (for Chat API).
 ///
 /// OpenAI's effort is canonical. Budget_tokens is derived.
@@ -519,6 +539,18 @@ mod tests {
         assert_eq!(config.budget_tokens, Some(2048));
     }
 
+    #[test]
+    fn test_from_anthropic_thinking_without_max_tokens_uses_default() {
+        // Without max_tokens context, falls back to DEFAULT_MAX_TOKENS.
+        // budget=1024 / 4096 = 0.25 → Low
+        let thinking = Thinking {
+            thinking_type: ThinkingType::Enabled,
+            budget_tokens: Some(1024),
+        };
+        let config = ReasoningConfig::from(&thinking);
+        assert_eq!(config.effort, Some(ReasoningEffort::Low));
+    }
+
     #[test]
     fn test_to_anthropic_thinking() {
         let config = ReasoningConfig {
 
@@ -0,0 +1,20 @@
+# Fuzz Bugs
+
+Cross-provider translation bugs found via fuzz testing (OpenAI -> Anthropic -> OpenAI).
+These affect real model behavior when proxying requests between providers.
+
+## Cross-provider (OpenAI <-> Anthropic)
+
+- [x] ~~**Message content corruption**: Messages get reordered/merged through Anthropic conversion.~~ NOT A BUG -- all cases are degenerate fuzz inputs (system-only messages, tool messages without preceding assistant). No real API caller sends these.
+- [x] ~~**Anthropic injects default token_budget**: When no max_tokens is specified, Anthropic adapter injects `token_budget: {tokens: 4096, type: output_tokens}`.~~ NOT A BUG -- Anthropic requires max_tokens. Injecting a default is correct behavior.
+- [x] ~~**Anthropic injects tool_choice**: When no tool_choice is set, Anthropic adapter injects `{disable_parallel: true, mode: Auto}`.~~ NOT A BUG -- Anthropic needs tool_choice to express `disable_parallel_tool_use` (OpenAI's `parallel_tool_calls: false`). `auto` is the default in both APIs.
+- [x] **reasoning.effort changes through heuristic**: effort "high" or "medium" becomes "low" after roundtrip through Anthropic budget_tokens conversion. 3x in fuzz. FIXED -- `From<&Thinking>` now takes `max_tokens` context for accurate budget-to-effort conversion.
+- [ ] **response_format upgrades JsonObject to JsonSchema**: Anthropic doesn't support json_object mode, so format_type changes from JsonObject to JsonSchema with a fabricated schema. 6x in fuzz.
+
+## Same-provider (OpenAI roundtrip)
+
+- [ ] **tool_choice lost**: Raw tool_choice not saved to extras, lost during canonical roundtrip. 64x in saved snapshots.
+- [ ] **reasoning_effort lost**: Raw reasoning_effort not saved to extras. 57x in saved snapshots. Also "minimal" normalized to "low".
+- [ ] **response_format lost**: Raw response_format not saved to extras. Strict and json_schema fields lost. 32x in saved snapshots.
+- [ ] **tools[N].function.strict lost**: strict:null normalized to field omission. 14x in saved snapshots.
+- [ ] **stop string normalized to array**: Single stop string becomes array. ~20x in fuzz.
@@ -1037,6 +1037,29 @@ export const paramsCases: TestCaseCollection = {
     bedrock: null,
   },
 
+  // Anthropic thinking enabled with budget_tokens - exercises budget→effort conversion
+  // with small max_tokens (1024). budget/max_tokens = 100% → high effort.
+  thinkingEnabledParam: {
+    "chat-completions": {
+      model: OPENAI_RESPONSES_MODEL,
+      messages: [{ role: "user", content: "Think hard about 2+2" }],
+      reasoning_effort: "high",
+    },
+    responses: {
+      model: OPENAI_RESPONSES_MODEL,
+      input: [{ role: "user", content: "Think hard about 2+2" }],
+      reasoning: { effort: "high" },
+    },
+    anthropic: {
+      model: ANTHROPIC_MODEL,
+      max_tokens: 1024,
+      messages: [{ role: "user", content: "Think hard about 2+2" }],
+      thinking: { type: "enabled", budget_tokens: 1024 },
+    },
+    google: null,
+    bedrock: null,
+  },
+
   // === Output Config (structured output) ===
 
   outputFormatJsonSchemaParam: {