Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 69 additions & 11 deletions src/locus/deepagent/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- Typed termination::

(ToolCalled(submit_tool) & ConfidenceMet(min_confidence))
| TokenLimit(max_tokens)
| TokenLimit(total_token_budget) # only when budget is set
| MaxIterations(max_iterations)

greppable, unit-testable, and per-recipe overridable.
Expand All @@ -23,6 +23,31 @@
This is a pure convenience layer over ``locus.Agent`` — it does not
change agent semantics. Callers who need finer control can build the
Agent directly.

Naming note (since this trips people up):

``total_token_budget`` — cumulative INPUT+OUTPUT tokens across
ALL iterations of one run. Wired into
``TokenLimit(total_token_budget)``
termination. ``None`` (default) means
"no TokenLimit term" — the run is
bounded only by ToolCalled+Confidence
or MaxIterations.
``max_output_tokens`` — per-completion output cap. Forwarded
to ``AgentConfig.max_tokens`` and from
there to the model provider's
``max_tokens`` request field on every
completion. ``None`` = use the model's
own default.

**Breaking change in 0.2.0b23**: the old ``max_tokens=`` parameter
(which used to mean total-run budget but read like a per-completion
cap to anyone familiar with OpenAI / Gemini / Anthropic SDKs) was
removed. Use ``total_token_budget=`` for run-level termination and
``max_output_tokens=`` for per-completion output cap. The old name
caused empty-output runs when callers passed ``max_tokens=65536``
expecting per-completion semantics — the run silently exited via
TokenLimit before the model could write anything.
"""

from __future__ import annotations
Expand Down Expand Up @@ -123,7 +148,7 @@ def create_deepagent(
output_schema: type[BaseModel] | None = None,
submit_tool: str = "submit_research",
min_confidence: float = 0.8,
max_tokens: int = 80_000,
total_token_budget: int | None = None,
max_output_tokens: int | None = None,
max_iterations: int = 40,
reflexion: bool = True,
Expand Down Expand Up @@ -159,10 +184,14 @@ def create_deepagent(
is my structured answer". Default ``submit_research``.
min_confidence: Confidence threshold the submission must clear
for early-exit. Default 0.8.
max_tokens: Total-run token budget used in the typed termination
algebra (``TokenLimit(max_tokens)`` — exits when the cumulative
input+output tokens for the whole run exceed this). Default 80k.
*Not* a per-completion cap — see ``max_output_tokens`` for that.
total_token_budget: Cumulative INPUT+OUTPUT tokens across ALL
iterations of one run. Wired into
``TokenLimit(total_token_budget)`` termination. Default
``None`` = no TokenLimit term in the termination algebra
(the run is bounded only by ToolCalled+Confidence or
MaxIterations). Set this only when you want an explicit
cumulative ceiling — and remember that a long system prompt
on a multi-iteration run can eat the budget quickly.
max_output_tokens: Per-LLM-call output token budget, forwarded to
``AgentConfig.max_tokens`` (and from there to the model
provider's ``max_tokens`` request field on every completion).
Expand Down Expand Up @@ -237,11 +266,40 @@ def create_deepagent(
ToolCalled,
)

termination = (
(ToolCalled(submit_tool) & ConfidenceMet(min_confidence))
| TokenLimit(max_tokens)
| MaxIterations(max_iterations)
)
# Reject the legacy ``max_tokens=`` name explicitly. Without this
# check it would flow into ``**agent_kwargs`` and ``Agent()`` would
# silently set the per-completion cap to that value — completely
# different semantics from the old `create_deepagent(max_tokens=)`
# behavior. The 0.2.0b23 rename was specifically to kill the
# silent foot-gun; the loud error matches that intent.
if "max_tokens" in agent_kwargs:
raise TypeError(
"create_deepagent() no longer accepts ``max_tokens=`` "
"(removed in 0.2.0b23). Use ``total_token_budget=`` for "
"the run-level TokenLimit termination, or "
"``max_output_tokens=`` for the per-completion output cap "
"on each LLM call. The old name conflicted with every "
"LLM SDK's per-completion ``max_tokens`` field and caused "
"silent empty-output runs when callers passed "
"``max_tokens=65536`` expecting per-completion semantics."
)

# Build the termination algebra. ``TokenLimit`` only joins the
# `or`-chain when the caller opted in to a budget; the default
# is no token-based termination at all so a long input prompt
# can't silently kill an otherwise-healthy run.
#
# Breaking change in 0.2.0b23 — the old ``max_tokens=`` kwarg
# was removed. Use ``total_token_budget=`` (run-level cap) or
# ``max_output_tokens=`` (per-completion cap). If callers pass
# ``max_tokens=`` it lands in **agent_kwargs and Agent() rejects
# it loudly, which is the right failure shape — better than the
# historical silent foot-gun.
base = ToolCalled(submit_tool) & ConfidenceMet(min_confidence)
if total_token_budget is not None:
termination = base | TokenLimit(total_token_budget) | MaxIterations(max_iterations)
else:
termination = base | MaxIterations(max_iterations)

# Splice filesystem-as-memory tools into the user-supplied list
# before constructing the Agent. The default backend is an
Expand Down
221 changes: 221 additions & 0 deletions tests/integration/test_deepagent_token_budget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# Copyright (c) 2026 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v1.0 as shown at
# https://oss.oracle.com/licenses/upl/

"""Integration tests for the 0.2.0b23 ``total_token_budget`` rename.

These tests reproduce the empty-output bug that motivated the rename
and verify the fix end-to-end against a stub model:

- **Reproduce the bug shape**: an explicit small ``total_token_budget``
plus a long system prompt fires ``TokenLimit`` termination on
iteration 1, exiting with empty (or near-empty) output. This was
happening silently before the rename because callers passed
``max_tokens=65536`` expecting the per-completion meaning every
LLM SDK uses, but Locus interpreted it as the run-level cap.
- **Verify the fix**: with the default ``total_token_budget=None``
a long-prompt run terminates only via ``MaxIterations`` / the
submit_tool path — no token-based termination interferes.
- **Verify the loud rejection**: passing ``max_tokens=`` to the
new factory raises ``TypeError`` instead of silently flowing
through to the per-completion field.

The tests use Locus's stub model surface so no OCI/Gemini/OpenAI
credentials are required.
"""

from __future__ import annotations

import pytest
from pydantic import BaseModel

from locus import create_deepagent
from locus.core.termination import (
AndCondition,
MaxIterations,
OrCondition,
TokenLimit,
)
from locus.tools.decorator import tool


class _Echo(BaseModel):
text: str
confidence: float = 0.0


@tool
def submit_research(text: str, confidence: float) -> str:
"""Final-answer tool the deepagent terminates on."""
return f"submitted: {text}"


def _stub_env(monkeypatch: pytest.MonkeyPatch) -> None:
"""Provide just enough OCI config to satisfy the OCIModel string
parser without making any real network calls. Tests below only
inspect the agent's configured termination + kwargs — they never
invoke the model."""
monkeypatch.setenv("OCI_PROFILE", "DEFAULT")


# ---------------------------------------------------------------------------
# Bug-shape reproduction
# ---------------------------------------------------------------------------


def _walk_leaves(node):
leaves: list = []

def _walk(n):
if isinstance(n, (OrCondition, AndCondition)):
for child in n._conditions:
_walk(child)
else:
leaves.append(n)

_walk(node)
return leaves


def test_explicit_small_budget_attaches_token_limit(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""An explicit small ``total_token_budget`` attaches a
``TokenLimit`` term to the termination algebra — this is what
the legacy ``max_tokens=80_000`` default silently did. The fix
surfaces it explicitly so callers know they're opting in."""
_stub_env(monkeypatch)
agent = create_deepagent(
model="oci:openai.gpt-4o-mini",
tools=[submit_research],
system_prompt="be helpful",
output_schema=_Echo,
reflexion=False,
grounding=False,
total_token_budget=8_000, # explicit small cap
max_iterations=40,
)
leaves = _walk_leaves(agent.config.termination)
token_terms = [leaf for leaf in leaves if isinstance(leaf, TokenLimit)]
assert len(token_terms) == 1, (
f"An explicit total_token_budget must attach exactly one TokenLimit term; got {token_terms}"
)
# The MaxIterations leaf is always present as the safety net.
assert any(isinstance(leaf, MaxIterations) for leaf in leaves)


def test_default_budget_none_removes_token_limit(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Default ``total_token_budget=None`` — the fix's main payload.

Reproduces the empty-output bug shape by inspecting the
termination tree: under the old API a caller would pass
``max_tokens=65536`` expecting the per-completion cap from every
other LLM SDK; the old code attached
``TokenLimit(65_536)`` to termination, and a long system prompt
on a multi-iteration run would exceed it before the model wrote
a single completion-token.

With the rename, the default has ZERO TokenLimit — so the same
caller, passing nothing, doesn't get silently killed by a
cumulative token cap."""
_stub_env(monkeypatch)
agent = create_deepagent(
model="oci:openai.gpt-4o-mini",
tools=[submit_research],
system_prompt="a " * 5_000, # long-ish system prompt
output_schema=_Echo,
reflexion=False,
grounding=False,
# NOTE: NO total_token_budget — using the fixed default.
max_iterations=12,
)
leaves = _walk_leaves(agent.config.termination)
token_terms = [leaf for leaf in leaves if isinstance(leaf, TokenLimit)]
assert token_terms == [], (
"Default deepagent termination must not include a TokenLimit "
"term — that was the silent-failure default before 0.2.0b23. "
f"Got: {token_terms}"
)


def test_long_prompt_with_no_budget_still_runs_via_max_iterations(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""With the default budget=None, a long-prompt run is bounded
only by MaxIterations + the submit_tool path. This is the
behavioral contract that fixes the empty-output bug for
long-narrative deep research."""
_stub_env(monkeypatch)
agent = create_deepagent(
model="oci:openai.gpt-4o-mini",
tools=[submit_research],
# Simulate a graph-grounded research prompt
system_prompt="\n".join([f"line {i}" for i in range(2_000)]),
output_schema=_Echo,
reflexion=False,
grounding=False,
max_iterations=15,
)
leaves = _walk_leaves(agent.config.termination)
# Termination = (ToolCalled & ConfidenceMet) | MaxIterations
max_iter_leaves = [leaf for leaf in leaves if isinstance(leaf, MaxIterations)]
assert len(max_iter_leaves) == 1
# No token limit — the run isn't bounded by cumulative tokens.
assert not any(isinstance(leaf, TokenLimit) for leaf in leaves)


# ---------------------------------------------------------------------------
# Loud rejection of the removed kwarg
# ---------------------------------------------------------------------------


def test_legacy_max_tokens_kwarg_raises(monkeypatch: pytest.MonkeyPatch) -> None:
"""Passing the removed ``max_tokens=`` raises TypeError with a
clear message pointing to the new names. This is the load-bearing
breaking change of 0.2.0b23 — callers either migrate or fail
loud, never silently get wrong behavior."""
_stub_env(monkeypatch)
with pytest.raises(TypeError) as excinfo:
create_deepagent(
model="oci:openai.gpt-4o-mini",
tools=[submit_research],
system_prompt="be helpful",
output_schema=_Echo,
reflexion=False,
grounding=False,
max_tokens=65_536,
)
msg = str(excinfo.value)
assert "total_token_budget" in msg, "TypeError must point callers to the new run-level kwarg"
assert "max_output_tokens" in msg, (
"TypeError must also point callers to the per-completion kwarg "
"so the migration choice is explicit"
)
assert "0.2.0b23" in msg, "TypeError should cite the version for migration tracking"


def test_max_output_tokens_lands_on_agent_config(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""``max_output_tokens`` flows to ``AgentConfig.max_tokens`` —
that's the per-completion cap field the OCI model provider
forwards to every LLM request. Verifies the wiring stays correct
after the rename so callers who set the per-completion knob via
the new name don't accidentally land it elsewhere."""
_stub_env(monkeypatch)
agent = create_deepagent(
model="oci:openai.gpt-4o-mini",
tools=[submit_research],
system_prompt="be helpful",
output_schema=_Echo,
reflexion=False,
grounding=False,
max_output_tokens=65_536,
)
assert agent.config.max_tokens == 65_536, (
"max_output_tokens must land on AgentConfig.max_tokens (the "
"per-completion field on every LLM request) — that's the "
"knob callers want when they set the new name."
)
Loading