From 8346333d3a8e8b4972fd8144ab80c1a68010f164 Mon Sep 17 00:00:00 2001 From: Frank Speiser Date: Sun, 31 May 2026 15:59:27 -0400 Subject: [PATCH] Bump confer / triangulate token budgets for OpenAI + Gemini Follow-up to PR #26. Same MAX_TOKENS truncation pattern is hitting triangulate on gpt-5 and both confer + triangulate on gemini-2.5-pro: reasoning tokens count against `max_completion_tokens`, the visible answer gets cut mid-emission. New _PROVIDER_TOKEN_BUDGETS entries (all 6144): - openai.triangulate (was 2048 reasoning-default) - gemini.confer (was 1500 non-reasoning ceiling) - gemini.triangulate (was 1500 non-reasoning ceiling) Notable side-finding (logged as a follow-up, NOT fixed here): gemini-2.5-pro is NOT currently tagged in `PROVIDER_CAPS["gemini"]["reasoning_prefixes"]`, so non-overridden purposes (debate, synth, audit) fall through to the SMALLER non-reasoning ceilings (1500 / 1024 / 768) instead of the reasoning default of 2048. The provider-specific override in this PR short-circuits that for confer + triangulate, but if the user starts hitting caps on gemini debate / synth / audit, the right fix is adding `"reasoning_prefixes": ("gemini-2.5-pro",)` to the gemini PROVIDER_CAPS entry. Out of scope for this PR. Tests: - openai.triangulate = 6144 - gemini.confer = 6144, gemini.triangulate = 6144 - gemini.debate falls through to 1500 (the non-reasoning ceiling, documenting the follow-up gap explicitly) Full suite (38 scripts) passes. Co-Authored-By: Claude Opus 4.7 --- scripts/test_provider_token_budgets.py | 18 +++++++++++++++--- servers/python/crosscheck_server.py | 14 ++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/scripts/test_provider_token_budgets.py b/scripts/test_provider_token_budgets.py index 3bcfcbf..4922099 100644 --- a/scripts/test_provider_token_budgets.py +++ b/scripts/test_provider_token_budgets.py @@ -60,14 +60,26 @@ def main() -> int: srv._CONFIG_PIN_STARTUP_DONE = True # ------------------------------------------------------------------ - # 1) Shipped openai confer/debate override = 6144 (the actual ship) + # 1) Shipped provider overrides = 6144 on the multi-perspective flows + # (gpt-5 and gemini-2.5-pro both burn reasoning tokens against the + # completion budget; bumping confer/debate/triangulate prevents + # mid-answer MAX_TOKENS truncation) # ------------------------------------------------------------------ - assert srv._budget_for_purpose("confer", "openai", "gpt-5") == 6144 - assert srv._budget_for_purpose("debate", "openai", "gpt-5") == 6144 + # OpenAI + assert srv._budget_for_purpose("confer", "openai", "gpt-5") == 6144 + assert srv._budget_for_purpose("debate", "openai", "gpt-5") == 6144 + assert srv._budget_for_purpose("triangulate", "openai", "gpt-5") == 6144 # Non-reasoning OpenAI model: the override still applies — it's keyed # on provider, not model class. (The whole point is that the provider # has a quirky reasoning-token accounting model.) assert srv._budget_for_purpose("confer", "openai", "gpt-test") == 6144 + # Gemini — only confer + triangulate were bumped this round; debate + # falls through to the non-reasoning ceiling because gemini-2.5-pro + # isn't currently tagged in PROVIDER_CAPS.reasoning_prefixes (a + # separate fix; flagged as a follow-up). + assert srv._budget_for_purpose("confer", "gemini", "gemini-2.5-pro") == 6144 + assert srv._budget_for_purpose("triangulate", "gemini", "gemini-2.5-pro") == 6144 + assert srv._budget_for_purpose("debate", "gemini", "gemini-2.5-pro") == 1500 # Non-overridden purposes for OpenAI fall through to the tier table. # audit / synth on a NON-reasoning model -> non-reasoning ceiling. diff --git a/servers/python/crosscheck_server.py b/servers/python/crosscheck_server.py index 6f5cbee..3c022af 100755 --- a/servers/python/crosscheck_server.py +++ b/servers/python/crosscheck_server.py @@ -2529,8 +2529,18 @@ def _fts_index_transcript(kind: str, payload: dict, path: str, ts_ms: int) -> No # is empirically the bottom of the "always lands cleanly" zone. _PROVIDER_TOKEN_BUDGETS: dict[str, dict[str, int]] = { "openai": { - "confer": 6144, - "debate": 6144, + "confer": 6144, + "debate": 6144, + # triangulate runs N parallel perspectives + a synthesis step; it + # shares confer's structured-emission shape, so it hits the same + # MAX_TOKENS wall when gpt-5's reasoning eats the budget. + "triangulate": 6144, + }, + # gemini-2.5-pro is reasoning-class too — same MAX_TOKENS truncation on + # multi-perspective flows (confer / triangulate) at the 2048 default. + "gemini": { + "confer": 6144, + "triangulate": 6144, }, }