From 47cfc348b37336e939d7281d5b39979582dd252a Mon Sep 17 00:00:00 2001 From: Frank Speiser Date: Sun, 31 May 2026 20:04:34 -0400 Subject: [PATCH] Tag gemini-2.5-pro as reasoning-class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the follow-up flagged in PR #27. gemini-2.5-pro burns reasoning tokens just like gpt-5 and claude-opus-4-7 — it should have been in PROVIDER_CAPS["gemini"]["reasoning_prefixes"] from day one. Effects of the tag: - `_is_reasoning_model("gemini", "gemini-2.5-pro")` now returns True. - `_budget_for_purpose` picks the reasoning-class ceilings (2048) for every non-overridden purpose instead of falling through to the smaller non-reasoning ceilings: debate: 1500 -> 2048 synth: 1024 -> 2048 audit: 768 -> 2048 moderator/orchestrate/plan/review/coordinate: already 2048 confer + triangulate keep the PR #27 override at 6144. - Prompt adapters now strip "think step by step" / "think out loud" preambles before sending to gemini-2.5-pro (already shipped in PR #19; matches behavior for the other reasoning families). Future-proof: the prefix is "gemini-2.5-pro" so a hypothetical gemini-1.5-flash / gemini-2.0-flash variant correctly falls through to the non-reasoning ceilings (verified in the test). No change to the Gemini API call itself — `generationConfig.temperature` + `maxOutputTokens` are both accepted by gemini-2.5-pro. Tests (scripts/test_provider_token_budgets.py): - `_is_reasoning_model("gemini", "gemini-2.5-pro")` is True - gemini debate / synth / audit all = 2048 (reasoning default) - gemini confer / triangulate stay = 6144 (PR #27 override) - A hypothetical non-reasoning gemini still falls through correctly Full suite (38 scripts) passes. Co-Authored-By: Claude Opus 4.7 --- scripts/test_provider_token_budgets.py | 18 +++++++++++++----- servers/python/crosscheck_server.py | 3 ++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/scripts/test_provider_token_budgets.py b/scripts/test_provider_token_budgets.py index 4922099..0314c83 100644 --- a/scripts/test_provider_token_budgets.py +++ b/scripts/test_provider_token_budgets.py @@ -73,13 +73,21 @@ def main() -> int: # on provider, not model class. (The whole point is that the provider # has a quirky reasoning-token accounting model.) assert srv._budget_for_purpose("confer", "openai", "gpt-test") == 6144 - # Gemini — only confer + triangulate were bumped this round; debate - # falls through to the non-reasoning ceiling because gemini-2.5-pro - # isn't currently tagged in PROVIDER_CAPS.reasoning_prefixes (a - # separate fix; flagged as a follow-up). + # Gemini — confer + triangulate use the shipped override (6144). + # Other purposes use the reasoning-safe default (2048) because + # gemini-2.5-pro is tagged in PROVIDER_CAPS.reasoning_prefixes — + # no more falling through to the smaller non-reasoning ceilings. assert srv._budget_for_purpose("confer", "gemini", "gemini-2.5-pro") == 6144 assert srv._budget_for_purpose("triangulate", "gemini", "gemini-2.5-pro") == 6144 - assert srv._budget_for_purpose("debate", "gemini", "gemini-2.5-pro") == 1500 + assert srv._budget_for_purpose("debate", "gemini", "gemini-2.5-pro") == 2048 + assert srv._budget_for_purpose("synth", "gemini", "gemini-2.5-pro") == 2048 + assert srv._budget_for_purpose("audit", "gemini", "gemini-2.5-pro") == 2048 + # _is_reasoning_model returns True now + assert srv._is_reasoning_model("gemini", "gemini-2.5-pro") is True + # A hypothetical non-reasoning gemini (e.g. a future flash variant) + # still falls through to the non-reasoning ceilings + assert srv._is_reasoning_model("gemini", "gemini-1.5-flash") is False + assert srv._budget_for_purpose("audit", "gemini", "gemini-1.5-flash") == 768 # Non-overridden purposes for OpenAI fall through to the tier table. # audit / synth on a NON-reasoning model -> non-reasoning ceiling. diff --git a/servers/python/crosscheck_server.py b/servers/python/crosscheck_server.py index 3c022af..03e2e9b 100755 --- a/servers/python/crosscheck_server.py +++ b/servers/python/crosscheck_server.py @@ -1921,7 +1921,8 @@ def _bucket_for(provider_name: str) -> _Bucket: "mistral": {"family": "openai_chat", "system_role": "inline", "supports_temperature": True}, "groq": {"family": "openai_chat", "system_role": "inline", "supports_temperature": True}, "deepseek": {"family": "openai_chat", "system_role": "inline", "supports_temperature": True}, - "gemini": {"family": "gemini", "system_role": "separate", "supports_temperature": True}, + "gemini": {"family": "gemini", "system_role": "separate", "supports_temperature": True, + "reasoning_prefixes": ("gemini-2.5-pro",)}, }