Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions scripts/test_provider_token_budgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,21 @@ def main() -> int:
# on provider, not model class. (The whole point is that the provider
# has a quirky reasoning-token accounting model.)
assert srv._budget_for_purpose("confer", "openai", "gpt-test") == 6144
# Gemini — only confer + triangulate were bumped this round; debate
# falls through to the non-reasoning ceiling because gemini-2.5-pro
# isn't currently tagged in PROVIDER_CAPS.reasoning_prefixes (a
# separate fix; flagged as a follow-up).
# Gemini — confer + triangulate use the shipped override (6144).
# Other purposes use the reasoning-safe default (2048) because
# gemini-2.5-pro is tagged in PROVIDER_CAPS.reasoning_prefixes
# no more falling through to the smaller non-reasoning ceilings.
assert srv._budget_for_purpose("confer", "gemini", "gemini-2.5-pro") == 6144
assert srv._budget_for_purpose("triangulate", "gemini", "gemini-2.5-pro") == 6144
assert srv._budget_for_purpose("debate", "gemini", "gemini-2.5-pro") == 1500
assert srv._budget_for_purpose("debate", "gemini", "gemini-2.5-pro") == 2048
assert srv._budget_for_purpose("synth", "gemini", "gemini-2.5-pro") == 2048
assert srv._budget_for_purpose("audit", "gemini", "gemini-2.5-pro") == 2048
# _is_reasoning_model returns True now
assert srv._is_reasoning_model("gemini", "gemini-2.5-pro") is True
# A hypothetical non-reasoning gemini (e.g. a future flash variant)
# still falls through to the non-reasoning ceilings
assert srv._is_reasoning_model("gemini", "gemini-1.5-flash") is False
assert srv._budget_for_purpose("audit", "gemini", "gemini-1.5-flash") == 768

# Non-overridden purposes for OpenAI fall through to the tier table.
# audit / synth on a NON-reasoning model -> non-reasoning ceiling.
Expand Down
3 changes: 2 additions & 1 deletion servers/python/crosscheck_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1921,7 +1921,8 @@ def _bucket_for(provider_name: str) -> _Bucket:
"mistral": {"family": "openai_chat", "system_role": "inline", "supports_temperature": True},
"groq": {"family": "openai_chat", "system_role": "inline", "supports_temperature": True},
"deepseek": {"family": "openai_chat", "system_role": "inline", "supports_temperature": True},
"gemini": {"family": "gemini", "system_role": "separate", "supports_temperature": True},
"gemini": {"family": "gemini", "system_role": "separate", "supports_temperature": True,
"reasoning_prefixes": ("gemini-2.5-pro",)},
}


Expand Down
Loading