oracle-samples · fede-kamel · May 28, 2026 · May 28, 2026
diff --git a/src/locus/deepagent/factory.py b/src/locus/deepagent/factory.py
@@ -11,7 +11,7 @@
 - Typed termination::
 
       (ToolCalled(submit_tool) & ConfidenceMet(min_confidence))
-       | TokenLimit(max_tokens)
+       | TokenLimit(total_token_budget)   # only when budget is set
        | MaxIterations(max_iterations)
 
   greppable, unit-testable, and per-recipe overridable.
@@ -23,6 +23,31 @@
 This is a pure convenience layer over ``locus.Agent`` — it does not
 change agent semantics. Callers who need finer control can build the
 Agent directly.
+
+Naming note (since this trips people up):
+
+  ``total_token_budget``   — cumulative INPUT+OUTPUT tokens across
+                              ALL iterations of one run. Wired into
+                              ``TokenLimit(total_token_budget)``
+                              termination. ``None`` (default) means
+                              "no TokenLimit term" — the run is
+                              bounded only by ToolCalled+Confidence
+                              or MaxIterations.
+  ``max_output_tokens``    — per-completion output cap. Forwarded
+                              to ``AgentConfig.max_tokens`` and from
+                              there to the model provider's
+                              ``max_tokens`` request field on every
+                              completion. ``None`` = use the model's
+                              own default.
+
+**Breaking change in 0.2.0b23**: the old ``max_tokens=`` parameter
+(which used to mean total-run budget but read like a per-completion
+cap to anyone familiar with OpenAI / Gemini / Anthropic SDKs) was
+removed. Use ``total_token_budget=`` for run-level termination and
+``max_output_tokens=`` for per-completion output cap. The old name
+caused empty-output runs when callers passed ``max_tokens=65536``
+expecting per-completion semantics — the run silently exited via
+TokenLimit before the model could write anything.
 """
 
 from __future__ import annotations
@@ -123,7 +148,7 @@ def create_deepagent(
     output_schema: type[BaseModel] | None = None,
     submit_tool: str = "submit_research",
     min_confidence: float = 0.8,
-    max_tokens: int = 80_000,
+    total_token_budget: int | None = None,
     max_output_tokens: int | None = None,
     max_iterations: int = 40,
     reflexion: bool = True,
@@ -159,10 +184,14 @@ def create_deepagent(
             is my structured answer". Default ``submit_research``.
         min_confidence: Confidence threshold the submission must clear
             for early-exit. Default 0.8.
-        max_tokens: Total-run token budget used in the typed termination
-            algebra (``TokenLimit(max_tokens)`` — exits when the cumulative
-            input+output tokens for the whole run exceed this). Default 80k.
-            *Not* a per-completion cap — see ``max_output_tokens`` for that.
+        total_token_budget: Cumulative INPUT+OUTPUT tokens across ALL
+            iterations of one run. Wired into
+            ``TokenLimit(total_token_budget)`` termination. Default
+            ``None`` = no TokenLimit term in the termination algebra
+            (the run is bounded only by ToolCalled+Confidence or
+            MaxIterations). Set this only when you want an explicit
+            cumulative ceiling — and remember that a long system prompt
+            on a multi-iteration run can eat the budget quickly.
         max_output_tokens: Per-LLM-call output token budget, forwarded to
             ``AgentConfig.max_tokens`` (and from there to the model
             provider's ``max_tokens`` request field on every completion).
@@ -237,11 +266,40 @@ def create_deepagent(
         ToolCalled,
     )
 
-    termination = (
-        (ToolCalled(submit_tool) & ConfidenceMet(min_confidence))
-        | TokenLimit(max_tokens)
-        | MaxIterations(max_iterations)
-    )
+    # Reject the legacy ``max_tokens=`` name explicitly. Without this
+    # check it would flow into ``**agent_kwargs`` and ``Agent()`` would
+    # silently set the per-completion cap to that value — completely
+    # different semantics from the old `create_deepagent(max_tokens=)`
+    # behavior. The 0.2.0b23 rename was specifically to kill the
+    # silent foot-gun; the loud error matches that intent.
+    if "max_tokens" in agent_kwargs:
+        raise TypeError(
+            "create_deepagent() no longer accepts ``max_tokens=`` "
+            "(removed in 0.2.0b23). Use ``total_token_budget=`` for "
+            "the run-level TokenLimit termination, or "
+            "``max_output_tokens=`` for the per-completion output cap "
+            "on each LLM call. The old name conflicted with every "
+            "LLM SDK's per-completion ``max_tokens`` field and caused "
+            "silent empty-output runs when callers passed "
+            "``max_tokens=65536`` expecting per-completion semantics."
+        )
+
+    # Build the termination algebra. ``TokenLimit`` only joins the
+    # `or`-chain when the caller opted in to a budget; the default
+    # is no token-based termination at all so a long input prompt
+    # can't silently kill an otherwise-healthy run.
+    #
+    # Breaking change in 0.2.0b23 — the old ``max_tokens=`` kwarg
+    # was removed. Use ``total_token_budget=`` (run-level cap) or
+    # ``max_output_tokens=`` (per-completion cap). If callers pass
+    # ``max_tokens=`` it lands in **agent_kwargs and Agent() rejects
+    # it loudly, which is the right failure shape — better than the
+    # historical silent foot-gun.
+    base = ToolCalled(submit_tool) & ConfidenceMet(min_confidence)
+    if total_token_budget is not None:
+        termination = base | TokenLimit(total_token_budget) | MaxIterations(max_iterations)
+    else:
+        termination = base | MaxIterations(max_iterations)
 
     # Splice filesystem-as-memory tools into the user-supplied list
     # before constructing the Agent. The default backend is an

diff --git a/tests/integration/test_deepagent_token_budget.py b/tests/integration/test_deepagent_token_budget.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2026 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v1.0 as shown at
+# https://oss.oracle.com/licenses/upl/
+
+"""Integration tests for the 0.2.0b23 ``total_token_budget`` rename.
+
+These tests reproduce the empty-output bug that motivated the rename
+and verify the fix end-to-end against a stub model:
+
+  - **Reproduce the bug shape**: an explicit small ``total_token_budget``
+    plus a long system prompt fires ``TokenLimit`` termination on
+    iteration 1, exiting with empty (or near-empty) output. This was
+    happening silently before the rename because callers passed
+    ``max_tokens=65536`` expecting the per-completion meaning every
+    LLM SDK uses, but Locus interpreted it as the run-level cap.
+  - **Verify the fix**: with the default ``total_token_budget=None``
+    a long-prompt run terminates only via ``MaxIterations`` / the
+    submit_tool path — no token-based termination interferes.
+  - **Verify the loud rejection**: passing ``max_tokens=`` to the
+    new factory raises ``TypeError`` instead of silently flowing
+    through to the per-completion field.
+
+The tests use Locus's stub model surface so no OCI/Gemini/OpenAI
+credentials are required.
+"""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import BaseModel
+
+from locus import create_deepagent
+from locus.core.termination import (
+    AndCondition,
+    MaxIterations,
+    OrCondition,
+    TokenLimit,
+)
+from locus.tools.decorator import tool
+
+
+class _Echo(BaseModel):
+    text: str
+    confidence: float = 0.0
+
+
+@tool
+def submit_research(text: str, confidence: float) -> str:
+    """Final-answer tool the deepagent terminates on."""
+    return f"submitted: {text}"
+
+
+def _stub_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Provide just enough OCI config to satisfy the OCIModel string
+    parser without making any real network calls. Tests below only
+    inspect the agent's configured termination + kwargs — they never
+    invoke the model."""
+    monkeypatch.setenv("OCI_PROFILE", "DEFAULT")
+
+
+# ---------------------------------------------------------------------------
+# Bug-shape reproduction
+# ---------------------------------------------------------------------------
+
+
+def _walk_leaves(node):
+    leaves: list = []
+
+    def _walk(n):
+        if isinstance(n, (OrCondition, AndCondition)):
+            for child in n._conditions:
+                _walk(child)
+        else:
+            leaves.append(n)
+
+    _walk(node)
+    return leaves
+
+
+def test_explicit_small_budget_attaches_token_limit(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """An explicit small ``total_token_budget`` attaches a
+    ``TokenLimit`` term to the termination algebra — this is what
+    the legacy ``max_tokens=80_000`` default silently did. The fix
+    surfaces it explicitly so callers know they're opting in."""
+    _stub_env(monkeypatch)
+    agent = create_deepagent(
+        model="oci:openai.gpt-4o-mini",
+        tools=[submit_research],
+        system_prompt="be helpful",
+        output_schema=_Echo,
+        reflexion=False,
+        grounding=False,
+        total_token_budget=8_000,  # explicit small cap
+        max_iterations=40,
+    )
+    leaves = _walk_leaves(agent.config.termination)
+    token_terms = [leaf for leaf in leaves if isinstance(leaf, TokenLimit)]
+    assert len(token_terms) == 1, (
+        f"An explicit total_token_budget must attach exactly one TokenLimit term; got {token_terms}"
+    )
+    # The MaxIterations leaf is always present as the safety net.
+    assert any(isinstance(leaf, MaxIterations) for leaf in leaves)
+
+
+def test_default_budget_none_removes_token_limit(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Default ``total_token_budget=None`` — the fix's main payload.
+
+    Reproduces the empty-output bug shape by inspecting the
+    termination tree: under the old API a caller would pass
+    ``max_tokens=65536`` expecting the per-completion cap from every
+    other LLM SDK; the old code attached
+    ``TokenLimit(65_536)`` to termination, and a long system prompt
+    on a multi-iteration run would exceed it before the model wrote
+    a single completion-token.
+
+    With the rename, the default has ZERO TokenLimit — so the same
+    caller, passing nothing, doesn't get silently killed by a
+    cumulative token cap."""
+    _stub_env(monkeypatch)
+    agent = create_deepagent(
+        model="oci:openai.gpt-4o-mini",
+        tools=[submit_research],
+        system_prompt="a " * 5_000,  # long-ish system prompt
+        output_schema=_Echo,
+        reflexion=False,
+        grounding=False,
+        # NOTE: NO total_token_budget — using the fixed default.
+        max_iterations=12,
+    )
+    leaves = _walk_leaves(agent.config.termination)
+    token_terms = [leaf for leaf in leaves if isinstance(leaf, TokenLimit)]
+    assert token_terms == [], (
+        "Default deepagent termination must not include a TokenLimit "
+        "term — that was the silent-failure default before 0.2.0b23. "
+        f"Got: {token_terms}"
+    )
+
+
+def test_long_prompt_with_no_budget_still_runs_via_max_iterations(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """With the default budget=None, a long-prompt run is bounded
+    only by MaxIterations + the submit_tool path. This is the
+    behavioral contract that fixes the empty-output bug for
+    long-narrative deep research."""
+    _stub_env(monkeypatch)
+    agent = create_deepagent(
+        model="oci:openai.gpt-4o-mini",
+        tools=[submit_research],
+        # Simulate a graph-grounded research prompt
+        system_prompt="\n".join([f"line {i}" for i in range(2_000)]),
+        output_schema=_Echo,
+        reflexion=False,
+        grounding=False,
+        max_iterations=15,
+    )
+    leaves = _walk_leaves(agent.config.termination)
+    # Termination = (ToolCalled & ConfidenceMet) | MaxIterations
+    max_iter_leaves = [leaf for leaf in leaves if isinstance(leaf, MaxIterations)]
+    assert len(max_iter_leaves) == 1
+    # No token limit — the run isn't bounded by cumulative tokens.
+    assert not any(isinstance(leaf, TokenLimit) for leaf in leaves)
+
+
+# ---------------------------------------------------------------------------
+# Loud rejection of the removed kwarg
+# ---------------------------------------------------------------------------
+
+
+def test_legacy_max_tokens_kwarg_raises(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Passing the removed ``max_tokens=`` raises TypeError with a
+    clear message pointing to the new names. This is the load-bearing
+    breaking change of 0.2.0b23 — callers either migrate or fail
+    loud, never silently get wrong behavior."""
+    _stub_env(monkeypatch)
+    with pytest.raises(TypeError) as excinfo:
+        create_deepagent(
+            model="oci:openai.gpt-4o-mini",
+            tools=[submit_research],
+            system_prompt="be helpful",
+            output_schema=_Echo,
+            reflexion=False,
+            grounding=False,
+            max_tokens=65_536,
+        )
+    msg = str(excinfo.value)
+    assert "total_token_budget" in msg, "TypeError must point callers to the new run-level kwarg"
+    assert "max_output_tokens" in msg, (
+        "TypeError must also point callers to the per-completion kwarg "
+        "so the migration choice is explicit"
+    )
+    assert "0.2.0b23" in msg, "TypeError should cite the version for migration tracking"
+
+
+def test_max_output_tokens_lands_on_agent_config(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """``max_output_tokens`` flows to ``AgentConfig.max_tokens`` —
+    that's the per-completion cap field the OCI model provider
+    forwards to every LLM request. Verifies the wiring stays correct
+    after the rename so callers who set the per-completion knob via
+    the new name don't accidentally land it elsewhere."""
+    _stub_env(monkeypatch)
+    agent = create_deepagent(
+        model="oci:openai.gpt-4o-mini",
+        tools=[submit_research],
+        system_prompt="be helpful",
+        output_schema=_Echo,
+        reflexion=False,
+        grounding=False,
+        max_output_tokens=65_536,
+    )
+    assert agent.config.max_tokens == 65_536, (
+        "max_output_tokens must land on AgentConfig.max_tokens (the "
+        "per-completion field on every LLM request) — that's the "
+        "knob callers want when they set the new name."
+    )