From 47cfc348b37336e939d7281d5b39979582dd252a Mon Sep 17 00:00:00 2001
From: Frank Speiser <frank.speiser@gmail.com>
Date: Sun, 31 May 2026 20:04:34 -0400
Subject: [PATCH] Tag gemini-2.5-pro as reasoning-class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the follow-up flagged in PR #27. gemini-2.5-pro burns reasoning
tokens just like gpt-5 and claude-opus-4-7 — it should have been in
PROVIDER_CAPS["gemini"]["reasoning_prefixes"] from day one.

Effects of the tag:
- `_is_reasoning_model("gemini", "gemini-2.5-pro")` now returns True.
- `_budget_for_purpose` picks the reasoning-class ceilings (2048) for
  every non-overridden purpose instead of falling through to the
  smaller non-reasoning ceilings:
    debate: 1500 -> 2048
    synth:  1024 -> 2048
    audit:   768 -> 2048
    moderator/orchestrate/plan/review/coordinate: already 2048
  confer + triangulate keep the PR #27 override at 6144.
- Prompt adapters now strip "think step by step" / "think out loud"
  preambles before sending to gemini-2.5-pro (already shipped in
  PR #19; matches behavior for the other reasoning families).

Future-proof: the prefix is "gemini-2.5-pro" so a hypothetical
gemini-1.5-flash / gemini-2.0-flash variant correctly falls through
to the non-reasoning ceilings (verified in the test).

No change to the Gemini API call itself — `generationConfig.temperature`
+ `maxOutputTokens` are both accepted by gemini-2.5-pro.

Tests (scripts/test_provider_token_budgets.py):
- `_is_reasoning_model("gemini", "gemini-2.5-pro")` is True
- gemini debate / synth / audit all = 2048 (reasoning default)
- gemini confer / triangulate stay = 6144 (PR #27 override)
- A hypothetical non-reasoning gemini still falls through correctly

Full suite (38 scripts) passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/test_provider_token_budgets.py | 18 +++++++++++++-----
 servers/python/crosscheck_server.py    |  3 ++-
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/scripts/test_provider_token_budgets.py b/scripts/test_provider_token_budgets.py
index 4922099..0314c83 100644
--- a/scripts/test_provider_token_budgets.py
+++ b/scripts/test_provider_token_budgets.py
@@ -73,13 +73,21 @@ def main() -> int:
     # on provider, not model class. (The whole point is that the provider
     # has a quirky reasoning-token accounting model.)
     assert srv._budget_for_purpose("confer", "openai", "gpt-test") == 6144
-    # Gemini — only confer + triangulate were bumped this round; debate
-    # falls through to the non-reasoning ceiling because gemini-2.5-pro
-    # isn't currently tagged in PROVIDER_CAPS.reasoning_prefixes (a
-    # separate fix; flagged as a follow-up).
+    # Gemini — confer + triangulate use the shipped override (6144).
+    # Other purposes use the reasoning-safe default (2048) because
+    # gemini-2.5-pro is tagged in PROVIDER_CAPS.reasoning_prefixes —
+    # no more falling through to the smaller non-reasoning ceilings.
     assert srv._budget_for_purpose("confer",      "gemini", "gemini-2.5-pro") == 6144
     assert srv._budget_for_purpose("triangulate", "gemini", "gemini-2.5-pro") == 6144
-    assert srv._budget_for_purpose("debate",      "gemini", "gemini-2.5-pro") == 1500
+    assert srv._budget_for_purpose("debate",      "gemini", "gemini-2.5-pro") == 2048
+    assert srv._budget_for_purpose("synth",       "gemini", "gemini-2.5-pro") == 2048
+    assert srv._budget_for_purpose("audit",       "gemini", "gemini-2.5-pro") == 2048
+    # _is_reasoning_model returns True now
+    assert srv._is_reasoning_model("gemini", "gemini-2.5-pro") is True
+    # A hypothetical non-reasoning gemini (e.g. a future flash variant)
+    # still falls through to the non-reasoning ceilings
+    assert srv._is_reasoning_model("gemini", "gemini-1.5-flash") is False
+    assert srv._budget_for_purpose("audit", "gemini", "gemini-1.5-flash") == 768
 
     # Non-overridden purposes for OpenAI fall through to the tier table.
     # audit / synth on a NON-reasoning model -> non-reasoning ceiling.
diff --git a/servers/python/crosscheck_server.py b/servers/python/crosscheck_server.py
index 3c022af..03e2e9b 100755
--- a/servers/python/crosscheck_server.py
+++ b/servers/python/crosscheck_server.py
@@ -1921,7 +1921,8 @@ def _bucket_for(provider_name: str) -> _Bucket:
     "mistral":   {"family": "openai_chat", "system_role": "inline",   "supports_temperature": True},
     "groq":      {"family": "openai_chat", "system_role": "inline",   "supports_temperature": True},
     "deepseek":  {"family": "openai_chat", "system_role": "inline",   "supports_temperature": True},
-    "gemini":    {"family": "gemini",      "system_role": "separate", "supports_temperature": True},
+    "gemini":    {"family": "gemini",      "system_role": "separate", "supports_temperature": True,
+                   "reasoning_prefixes": ("gemini-2.5-pro",)},
 }