From 8346333d3a8e8b4972fd8144ab80c1a68010f164 Mon Sep 17 00:00:00 2001
From: Frank Speiser <frank.speiser@gmail.com>
Date: Sun, 31 May 2026 15:59:27 -0400
Subject: [PATCH] Bump confer / triangulate token budgets for OpenAI + Gemini

Follow-up to PR #26. Same MAX_TOKENS truncation pattern is hitting
triangulate on gpt-5 and both confer + triangulate on gemini-2.5-pro:
reasoning tokens count against `max_completion_tokens`, the visible
answer gets cut mid-emission.

New _PROVIDER_TOKEN_BUDGETS entries (all 6144):
- openai.triangulate    (was 2048 reasoning-default)
- gemini.confer         (was 1500 non-reasoning ceiling)
- gemini.triangulate    (was 1500 non-reasoning ceiling)

Notable side-finding (logged as a follow-up, NOT fixed here):
gemini-2.5-pro is NOT currently tagged in
`PROVIDER_CAPS["gemini"]["reasoning_prefixes"]`, so non-overridden
purposes (debate, synth, audit) fall through to the SMALLER
non-reasoning ceilings (1500 / 1024 / 768) instead of the reasoning
default of 2048. The provider-specific override in this PR
short-circuits that for confer + triangulate, but if the user starts
hitting caps on gemini debate / synth / audit, the right fix is
adding `"reasoning_prefixes": ("gemini-2.5-pro",)` to the gemini
PROVIDER_CAPS entry. Out of scope for this PR.

Tests:
- openai.triangulate = 6144
- gemini.confer = 6144, gemini.triangulate = 6144
- gemini.debate falls through to 1500 (the non-reasoning ceiling,
  documenting the follow-up gap explicitly)

Full suite (38 scripts) passes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/test_provider_token_budgets.py | 18 +++++++++++++++---
 servers/python/crosscheck_server.py    | 14 ++++++++++++--
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/scripts/test_provider_token_budgets.py b/scripts/test_provider_token_budgets.py
index 3bcfcbf..4922099 100644
--- a/scripts/test_provider_token_budgets.py
+++ b/scripts/test_provider_token_budgets.py
@@ -60,14 +60,26 @@ def main() -> int:
     srv._CONFIG_PIN_STARTUP_DONE = True
 
     # ------------------------------------------------------------------
-    # 1) Shipped openai confer/debate override = 6144 (the actual ship)
+    # 1) Shipped provider overrides = 6144 on the multi-perspective flows
+    #    (gpt-5 and gemini-2.5-pro both burn reasoning tokens against the
+    #    completion budget; bumping confer/debate/triangulate prevents
+    #    mid-answer MAX_TOKENS truncation)
     # ------------------------------------------------------------------
-    assert srv._budget_for_purpose("confer", "openai", "gpt-5")    == 6144
-    assert srv._budget_for_purpose("debate", "openai", "gpt-5")    == 6144
+    # OpenAI
+    assert srv._budget_for_purpose("confer",      "openai", "gpt-5")    == 6144
+    assert srv._budget_for_purpose("debate",      "openai", "gpt-5")    == 6144
+    assert srv._budget_for_purpose("triangulate", "openai", "gpt-5")    == 6144
     # Non-reasoning OpenAI model: the override still applies — it's keyed
     # on provider, not model class. (The whole point is that the provider
     # has a quirky reasoning-token accounting model.)
     assert srv._budget_for_purpose("confer", "openai", "gpt-test") == 6144
+    # Gemini — only confer + triangulate were bumped this round; debate
+    # falls through to the non-reasoning ceiling because gemini-2.5-pro
+    # isn't currently tagged in PROVIDER_CAPS.reasoning_prefixes (a
+    # separate fix; flagged as a follow-up).
+    assert srv._budget_for_purpose("confer",      "gemini", "gemini-2.5-pro") == 6144
+    assert srv._budget_for_purpose("triangulate", "gemini", "gemini-2.5-pro") == 6144
+    assert srv._budget_for_purpose("debate",      "gemini", "gemini-2.5-pro") == 1500
 
     # Non-overridden purposes for OpenAI fall through to the tier table.
     # audit / synth on a NON-reasoning model -> non-reasoning ceiling.
diff --git a/servers/python/crosscheck_server.py b/servers/python/crosscheck_server.py
index 6f5cbee..3c022af 100755
--- a/servers/python/crosscheck_server.py
+++ b/servers/python/crosscheck_server.py
@@ -2529,8 +2529,18 @@ def _fts_index_transcript(kind: str, payload: dict, path: str, ts_ms: int) -> No
 # is empirically the bottom of the "always lands cleanly" zone.
 _PROVIDER_TOKEN_BUDGETS: dict[str, dict[str, int]] = {
     "openai": {
-        "confer": 6144,
-        "debate": 6144,
+        "confer":      6144,
+        "debate":      6144,
+        # triangulate runs N parallel perspectives + a synthesis step; it
+        # shares confer's structured-emission shape, so it hits the same
+        # MAX_TOKENS wall when gpt-5's reasoning eats the budget.
+        "triangulate": 6144,
+    },
+    # gemini-2.5-pro is reasoning-class too — same MAX_TOKENS truncation on
+    # multi-perspective flows (confer / triangulate) at the 2048 default.
+    "gemini": {
+        "confer":      6144,
+        "triangulate": 6144,
     },
 }