Skip to content

Commit 31b981d

Browse files
clutchskiclaude
andcommitted
Fix reasoning effort loss in Anthropic/Bedrock thinking → effort conversion
When converting Anthropic `thinking.budget_tokens` to universal `ReasoningConfig`, the budget→effort heuristic used DEFAULT_MAX_TOKENS (4096) instead of the actual request max_tokens. This caused incorrect effort levels when max_tokens differed from the default (e.g., budget=1024 with max_tokens=1024 → High, not Low). Added `From<(&Thinking, Option<i64>)>` impl that accepts max_tokens context, and updated both Anthropic and Bedrock adapters to pass actual max_tokens. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3f83160 commit 31b981d

8 files changed

Lines changed: 180 additions & 5 deletions

File tree

crates/lingua/src/providers/anthropic/adapter.rs

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,12 @@ impl ProviderAdapter for AnthropicAdapter {
152152
}
153153
})
154154
.or_else(|| {
155-
typed_params
156-
.thinking
157-
.as_ref()
158-
.map(crate::universal::request::ReasoningConfig::from)
155+
typed_params.thinking.as_ref().map(|t| {
156+
crate::universal::request::ReasoningConfig::from((
157+
t,
158+
typed_params.max_tokens,
159+
))
160+
})
159161
}),
160162
metadata: typed_params
161163
.metadata
@@ -1454,4 +1456,66 @@ mod tests {
14541456
"message_stop should return None (terminal event)"
14551457
);
14561458
}
1459+
1460+
#[test]
1461+
fn test_anthropic_thinking_to_openai_effort_with_small_max_tokens() {
1462+
// BUG REPRO: Anthropic request with thinking budget_tokens=1024 and max_tokens=1024
1463+
// should translate to OpenAI reasoning_effort="high" (budget is 100% of max_tokens).
1464+
// Instead, From<&Thinking> ignores max_tokens and uses DEFAULT=4096,
1465+
// computing 1024/4096=0.25 → "low".
1466+
use crate::providers::openai::adapter::OpenAIAdapter;
1467+
1468+
let anthropic_adapter = AnthropicAdapter;
1469+
let openai_adapter = OpenAIAdapter;
1470+
1471+
let anthropic_payload = json!({
1472+
"model": "claude-sonnet-4-20250514",
1473+
"max_tokens": 1024,
1474+
"messages": [{"role": "user", "content": "Think hard about 2+2"}],
1475+
"thinking": {
1476+
"type": "enabled",
1477+
"budget_tokens": 1024
1478+
}
1479+
});
1480+
1481+
// Anthropic → Universal
1482+
let universal = anthropic_adapter
1483+
.request_to_universal(anthropic_payload)
1484+
.unwrap();
1485+
1486+
// Verify the universal representation has reasoning
1487+
let reasoning = universal
1488+
.params
1489+
.reasoning
1490+
.as_ref()
1491+
.expect("reasoning should exist");
1492+
assert_eq!(reasoning.enabled, Some(true));
1493+
assert_eq!(reasoning.budget_tokens, Some(1024));
1494+
1495+
// The bug: effort is computed as Low (1024/4096=0.25) instead of High (1024/1024=1.0)
1496+
assert_eq!(
1497+
reasoning.effort,
1498+
Some(ReasoningEffort::High),
1499+
"budget_tokens=1024 with max_tokens=1024 should be High effort, not {:?}",
1500+
reasoning.effort
1501+
);
1502+
1503+
// Universal → OpenAI
1504+
let mut universal_for_openai = universal;
1505+
universal_for_openai.model = Some("gpt-5-nano".to_string());
1506+
let openai_result = openai_adapter
1507+
.request_from_universal(&universal_for_openai)
1508+
.unwrap();
1509+
1510+
assert_eq!(
1511+
openai_result
1512+
.get("reasoning_effort")
1513+
.unwrap()
1514+
.as_str()
1515+
.unwrap(),
1516+
"high",
1517+
"OpenAI should get reasoning_effort=high, got: {}",
1518+
openai_result.get("reasoning_effort").unwrap()
1519+
);
1520+
}
14571521
}

crates/lingua/src/providers/bedrock/adapter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ impl ProviderAdapter for BedrockAdapter {
8080
.as_ref()
8181
.and_then(|fields| fields.get("thinking"))
8282
.and_then(|v| serde_json::from_value::<Thinking>(v.clone()).ok())
83-
.map(|t| ReasoningConfig::from(&t));
83+
.map(|t| ReasoningConfig::from((&t, max_tokens)));
8484

8585
let mut params = UniversalParams {
8686
temperature,

crates/lingua/src/universal/reasoning.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,26 @@ impl From<&Thinking> for ReasoningConfig {
176176
}
177177
}
178178

179+
/// Convert Anthropic Thinking to ReasoningConfig with max_tokens context.
180+
///
181+
/// Uses actual max_tokens for budget→effort conversion instead of DEFAULT_MAX_TOKENS.
182+
/// This produces correct effort levels when max_tokens differs from the default.
183+
impl From<(&Thinking, Option<i64>)> for ReasoningConfig {
184+
fn from((thinking, max_tokens): (&Thinking, Option<i64>)) -> Self {
185+
let enabled = matches!(thinking.thinking_type, ThinkingType::Enabled);
186+
let budget_tokens = thinking.budget_tokens;
187+
let effort = budget_tokens.map(|b| budget_to_effort(b, max_tokens));
188+
189+
ReasoningConfig {
190+
enabled: Some(enabled),
191+
effort,
192+
budget_tokens,
193+
canonical: Some(ReasoningCanonical::BudgetTokens),
194+
..Default::default()
195+
}
196+
}
197+
}
198+
179199
/// Convert OpenAI ReasoningEffort to ReasoningConfig with context (for Chat API).
180200
///
181201
/// OpenAI's effort is canonical. Budget_tokens is derived.
@@ -519,6 +539,18 @@ mod tests {
519539
assert_eq!(config.budget_tokens, Some(2048));
520540
}
521541

542+
#[test]
543+
fn test_from_anthropic_thinking_without_max_tokens_uses_default() {
544+
// Without max_tokens context, falls back to DEFAULT_MAX_TOKENS.
545+
// budget=1024 / 4096 = 0.25 → Low
546+
let thinking = Thinking {
547+
thinking_type: ThinkingType::Enabled,
548+
budget_tokens: Some(1024),
549+
};
550+
let config = ReasoningConfig::from(&thinking);
551+
assert_eq!(config.effort, Some(ReasoningEffort::Low));
552+
}
553+
522554
#[test]
523555
fn test_to_anthropic_thinking() {
524556
let config = ReasoningConfig {

fuzzbugs.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Fuzz Bugs
2+
3+
Cross-provider translation bugs found via fuzz testing (OpenAI -> Anthropic -> OpenAI).
4+
These affect real model behavior when proxying requests between providers.
5+
6+
## Cross-provider (OpenAI <-> Anthropic)
7+
8+
- [x] ~~**Message content corruption**: Messages get reordered/merged through Anthropic conversion.~~ NOT A BUG -- all cases are degenerate fuzz inputs (system-only messages, tool messages without preceding assistant). No real API caller sends these.
9+
- [x] ~~**Anthropic injects default token_budget**: When no max_tokens is specified, Anthropic adapter injects `token_budget: {tokens: 4096, type: output_tokens}`.~~ NOT A BUG -- Anthropic requires max_tokens. Injecting a default is correct behavior.
10+
- [x] ~~**Anthropic injects tool_choice**: When no tool_choice is set, Anthropic adapter injects `{disable_parallel: true, mode: Auto}`.~~ NOT A BUG -- Anthropic needs tool_choice to express `disable_parallel_tool_use` (OpenAI's `parallel_tool_calls: false`). `auto` is the default in both APIs.
11+
- [x] **reasoning.effort changes through heuristic**: effort "high" or "medium" becomes "low" after roundtrip through Anthropic budget_tokens conversion. 3x in fuzz. FIXED -- `From<&Thinking>` now takes `max_tokens` context for accurate budget-to-effort conversion.
12+
- [ ] **response_format upgrades JsonObject to JsonSchema**: Anthropic doesn't support json_object mode, so format_type changes from JsonObject to JsonSchema with a fabricated schema. 6x in fuzz.
13+
14+
## Same-provider (OpenAI roundtrip)
15+
16+
- [ ] **tool_choice lost**: Raw tool_choice not saved to extras, lost during canonical roundtrip. 64x in saved snapshots.
17+
- [ ] **reasoning_effort lost**: Raw reasoning_effort not saved to extras. 57x in saved snapshots. Also "minimal" normalized to "low".
18+
- [ ] **response_format lost**: Raw response_format not saved to extras. Strict and json_schema fields lost. 32x in saved snapshots.
19+
- [ ] **tools[N].function.strict lost**: strict:null normalized to field omission. 14x in saved snapshots.
20+
- [ ] **stop string normalized to array**: Single stop string becomes array. ~20x in fuzz.

payloads/cases/params.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,29 @@ export const paramsCases: TestCaseCollection = {
10371037
bedrock: null,
10381038
},
10391039

1040+
// Anthropic thinking enabled with budget_tokens - exercises budget→effort conversion
1041+
// with small max_tokens (1024). budget/max_tokens = 100% → high effort.
1042+
thinkingEnabledParam: {
1043+
"chat-completions": {
1044+
model: OPENAI_RESPONSES_MODEL,
1045+
messages: [{ role: "user", content: "Think hard about 2+2" }],
1046+
reasoning_effort: "high",
1047+
},
1048+
responses: {
1049+
model: OPENAI_RESPONSES_MODEL,
1050+
input: [{ role: "user", content: "Think hard about 2+2" }],
1051+
reasoning: { effort: "high" },
1052+
},
1053+
anthropic: {
1054+
model: ANTHROPIC_MODEL,
1055+
max_tokens: 1024,
1056+
messages: [{ role: "user", content: "Think hard about 2+2" }],
1057+
thinking: { type: "enabled", budget_tokens: 1024 },
1058+
},
1059+
google: null,
1060+
bedrock: null,
1061+
},
1062+
10401063
// === Output Config (structured output) ===
10411064

10421065
outputFormatJsonSchemaParam: {

payloads/snapshots/thinkingEnabledParam/anthropic/request.json

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

payloads/snapshots/thinkingEnabledParam/chat-completions/request.json

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

payloads/snapshots/thinkingEnabledParam/responses/request.json

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)