Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions scripts/test_provider_token_budgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,26 @@ def main() -> int:
srv._CONFIG_PIN_STARTUP_DONE = True

# ------------------------------------------------------------------
# 1) Shipped openai confer/debate override = 6144 (the actual ship)
# 1) Shipped provider overrides = 6144 on the multi-perspective flows
# (gpt-5 and gemini-2.5-pro both burn reasoning tokens against the
# completion budget; bumping confer/debate/triangulate prevents
# mid-answer MAX_TOKENS truncation)
# ------------------------------------------------------------------
assert srv._budget_for_purpose("confer", "openai", "gpt-5") == 6144
assert srv._budget_for_purpose("debate", "openai", "gpt-5") == 6144
# OpenAI
assert srv._budget_for_purpose("confer", "openai", "gpt-5") == 6144
assert srv._budget_for_purpose("debate", "openai", "gpt-5") == 6144
assert srv._budget_for_purpose("triangulate", "openai", "gpt-5") == 6144
# Non-reasoning OpenAI model: the override still applies — it's keyed
# on provider, not model class. (The whole point is that the provider
# has a quirky reasoning-token accounting model.)
assert srv._budget_for_purpose("confer", "openai", "gpt-test") == 6144
# Gemini — only confer + triangulate were bumped this round; debate
# falls through to the non-reasoning ceiling because gemini-2.5-pro
# isn't currently tagged in PROVIDER_CAPS.reasoning_prefixes (a
# separate fix; flagged as a follow-up).
assert srv._budget_for_purpose("confer", "gemini", "gemini-2.5-pro") == 6144
assert srv._budget_for_purpose("triangulate", "gemini", "gemini-2.5-pro") == 6144
assert srv._budget_for_purpose("debate", "gemini", "gemini-2.5-pro") == 1500

# Non-overridden purposes for OpenAI fall through to the tier table.
# audit / synth on a NON-reasoning model -> non-reasoning ceiling.
Expand Down
14 changes: 12 additions & 2 deletions servers/python/crosscheck_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2529,8 +2529,18 @@ def _fts_index_transcript(kind: str, payload: dict, path: str, ts_ms: int) -> No
# is empirically the bottom of the "always lands cleanly" zone.
_PROVIDER_TOKEN_BUDGETS: dict[str, dict[str, int]] = {
"openai": {
"confer": 6144,
"debate": 6144,
"confer": 6144,
"debate": 6144,
# triangulate runs N parallel perspectives + a synthesis step; it
# shares confer's structured-emission shape, so it hits the same
# MAX_TOKENS wall when gpt-5's reasoning eats the budget.
"triangulate": 6144,
},
# gemini-2.5-pro is reasoning-class too — same MAX_TOKENS truncation on
# multi-perspective flows (confer / triangulate) at the 2048 default.
"gemini": {
"confer": 6144,
"triangulate": 6144,
},
}

Expand Down
Loading