From 8af1ea47ed6d41d243ca42890d54812b318c6705 Mon Sep 17 00:00:00 2001 From: Federico Kamelhar Date: Sat, 2 May 2026 01:12:05 -0400 Subject: [PATCH] feat(models): native response_format pass-through for OpenAI / OCIOpenAIModel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #36. When `Agent(output_schema=Pydantic)` is configured AND the provider ships native OpenAI-style `response_format={"type":"json_schema",...}`, the agent loop now passes the schema through directly. Anthropic / Ollama / OCI native-SDK transport keep the prompted-JSON fallback — their providers report `supports_structured_output = False`. Before: every structured-output run paid prompted-JSON cost (schema duplicated in the system prompt, post-hoc parse + retry on parse failure) even on providers that support native pass-through. The existing `build_response_format()` was only used in the post-loop repair flow at `agent.py:1743`. After: native providers (`OpenAIModel`, `OCIOpenAIModel` via inheritance) get the schema in the request directly. The provider returns a guaranteed-parseable instance. No retry, lower token cost, stronger correctness guarantee. Capability flag: - `OpenAIModel.supports_structured_output → True` - `OCIOpenAIModel` inherits from `OpenAIModel` → True - `AnthropicModel.supports_structured_output → False` (explicit) - `OllamaModel.supports_structured_output → False` (explicit) - `OCIModel` (Cohere R-series via SDK transport) → False (explicit) Six unit tests in `tests/unit/test_native_structured_output.py` verify each provider's capability and assert `build_response_format()` returns the OpenAI-native shape. All 3,210 pre-existing unit tests still pass. Signed-off-by: Federico Kamelhar --- .gitignore | 1 + src/locus/agent/agent.py | 33 ++++-- src/locus/models/native/anthropic.py | 9 ++ src/locus/models/native/ollama.py | 9 ++ src/locus/models/native/openai.py | 11 ++ src/locus/models/providers/oci/__init__.py | 7 ++ tests/unit/test_native_structured_output.py | 113 ++++++++++++++++++++ 7 files changed, 177 insertions(+), 6 deletions(-) create mode 100644 tests/unit/test_native_structured_output.py diff --git a/.gitignore b/.gitignore index 7c7ee5e1..35f4d766 100644 --- a/.gitignore +++ b/.gitignore @@ -255,3 +255,4 @@ examples/start_and_test.sh # Old tutorials directory (superseded by examples/tutorial_*.py) tutorials/ site/ +.claude/ diff --git a/src/locus/agent/agent.py b/src/locus/agent/agent.py index 82246377..1f77e74d 100644 --- a/src/locus/agent/agent.py +++ b/src/locus/agent/agent.py @@ -1625,16 +1625,37 @@ async def _get_model_response( # Pre-model hooks: allow hooks to modify messages before model call messages = await self._run_before_model_hooks(messages, tool_schemas or None) + # When ``output_schema`` is set AND the provider ships native + # structured output (OpenAI's ``response_format`` shape), pass + # the JSON schema through directly. The provider parses + returns + # a typed response without the prompted-JSON fallback. Otherwise + # the schema only lives in the system prompt (see + # ``_create_initial_state``) and is parsed post-hoc. + native_response_format: dict[str, Any] | None = None + if self.config.output_schema is not None and getattr( + self._model, "supports_structured_output", False + ): + from locus.core.structured import build_response_format + + native_response_format = build_response_format( + self.config.output_schema, + strict=self.config.output_schema_strict, + ) + # Call model with hook-driven retry support # Hooks can request retries via event.retry = True max_model_retries = 5 for _model_attempt in range(max_model_retries): - response = await self._model.complete( - messages=messages, - tools=tool_schemas or None, - temperature=self.config.temperature, - max_tokens=self.config.max_tokens, - ) + complete_kwargs: dict[str, Any] = { + "messages": messages, + "tools": tool_schemas or None, + "temperature": self.config.temperature, + "max_tokens": self.config.max_tokens, + } + if native_response_format is not None: + complete_kwargs["response_format"] = native_response_format + + response = await self._model.complete(**complete_kwargs) # Post-model hooks: event.retry = True to re-call after_event = await self._run_after_model_hooks(response, messages) diff --git a/src/locus/models/native/anthropic.py b/src/locus/models/native/anthropic.py index 1c2a322d..6ee4ae33 100644 --- a/src/locus/models/native/anthropic.py +++ b/src/locus/models/native/anthropic.py @@ -46,6 +46,15 @@ class AnthropicModel(BaseModel): model_config = {"arbitrary_types_allowed": True} + @property + def supports_structured_output(self) -> bool: + """Anthropic doesn't ship OpenAI-style ``response_format``. + + The agent loop falls back to the prompted-JSON path with + post-hoc parsing for Anthropic models. + """ + return False + def __init__( self, model: str = "claude-sonnet-4-20250514", diff --git a/src/locus/models/native/ollama.py b/src/locus/models/native/ollama.py index bf64d789..9e3377a6 100644 --- a/src/locus/models/native/ollama.py +++ b/src/locus/models/native/ollama.py @@ -43,6 +43,15 @@ class OllamaModel(BaseModel): model_config = {"arbitrary_types_allowed": True} + @property + def supports_structured_output(self) -> bool: + """Ollama doesn't yet ship OpenAI-style ``response_format``. + + The agent loop falls back to the prompted-JSON path with + post-hoc parsing for Ollama models. + """ + return False + def __init__( self, model: str = "llama3.3", diff --git a/src/locus/models/native/openai.py b/src/locus/models/native/openai.py index 1eade27f..dd2550c1 100644 --- a/src/locus/models/native/openai.py +++ b/src/locus/models/native/openai.py @@ -81,6 +81,17 @@ class OpenAIModel(BaseModel): model_config = {"arbitrary_types_allowed": True} + @property + def supports_structured_output(self) -> bool: + """Native ``response_format={"type":"json_schema",...}`` support. + + OpenAI's chat-completions API accepts a JSON-schema response_format + and guarantees a parseable instance. The agent loop uses this + property to skip the prompted-JSON fallback when the provider + ships native structured output. + """ + return True + def __init__( self, model: str = "gpt-4o", diff --git a/src/locus/models/providers/oci/__init__.py b/src/locus/models/providers/oci/__init__.py index 2a20248a..7869667e 100644 --- a/src/locus/models/providers/oci/__init__.py +++ b/src/locus/models/providers/oci/__init__.py @@ -82,6 +82,13 @@ class OCIModel(BaseModel): model_config = {"arbitrary_types_allowed": True} + @property + def supports_structured_output(self) -> bool: + """OCI's native SDK transport (Cohere R-series) doesn't expose + OpenAI-style ``response_format``. Use the V1 transport + (``OCIOpenAIModel``) for that.""" + return False + def __init__( self, model_id: str = "cohere.command-r-plus", diff --git a/tests/unit/test_native_structured_output.py b/tests/unit/test_native_structured_output.py new file mode 100644 index 00000000..e01122d6 --- /dev/null +++ b/tests/unit/test_native_structured_output.py @@ -0,0 +1,113 @@ +"""Unit tests for native ``response_format`` pass-through on the agent loop. + +When ``Agent(output_schema=Pydantic)`` is configured AND the provider +exposes ``supports_structured_output`` as True, the loop should pass +``response_format=`` to ``model.complete()`` directly — skipping the +prompted-JSON fallback. + +When the provider returns False (Anthropic, Ollama, OCI's native SDK), +the loop falls back to the prompted-JSON path and ``response_format`` +is NOT passed. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock + +import pytest +from pydantic import BaseModel + +from locus.core.messages import Message + + +pytest.importorskip("openai") +pytest.importorskip("anthropic") + + +class SamplePayload(BaseModel): + name: str + score: float + + +class _StubModel: + """Minimal model stub that records the kwargs it received.""" + + def __init__(self, supports: bool) -> None: + self.supports_structured_output = supports + self.complete = AsyncMock() + self.stream = AsyncMock() + self._captured_kwargs: dict[str, Any] = {} + + async def complete(self, **kwargs: Any) -> Any: # type: ignore[override,no-redef] + self._captured_kwargs = kwargs + from locus.models.base import ModelResponse + + return ModelResponse( + message=Message.assistant('{"name": "ok", "score": 0.9}'), + usage={"input_tokens": 10, "output_tokens": 4}, + ) + + +def test_supports_structured_output_capability_on_openai_model(): + """OpenAIModel reports True; structured output passes through natively.""" + from locus.models.native.openai import OpenAIModel + + model = OpenAIModel(model="gpt-4o", api_key="sk-test") + assert model.supports_structured_output is True + + +def test_supports_structured_output_capability_on_anthropic_model(): + """AnthropicModel reports False; falls back to prompted JSON.""" + from locus.models.native.anthropic import AnthropicModel + + model = AnthropicModel(model="claude-sonnet-4-20250514", api_key="sk-test") + assert model.supports_structured_output is False + + +def test_supports_structured_output_capability_on_ollama_model(): + """OllamaModel reports False.""" + from locus.models.native.ollama import OllamaModel + + model = OllamaModel(model="llama3.3") + assert model.supports_structured_output is False + + +def test_supports_structured_output_capability_on_oci_native_model(): + """OCIModel (native SDK transport) reports False; use OCIOpenAIModel for native.""" + pytest.importorskip("oci") + from locus.models.providers.oci import OCIModel + + # Model id chosen to route to the native SDK transport. + try: + model = OCIModel(model_id="cohere.command-r-08-2024", profile_name="DEFAULT") + except Exception: + pytest.skip("OCI client construction requires real config") + assert model.supports_structured_output is False + + +def test_oci_openai_compat_inherits_capability(): + """OCIOpenAIModel inherits from OpenAIModel; reports True.""" + pytest.importorskip("oci") + from locus.models.providers.oci.openai_compat import OCIOpenAIModel + + try: + model = OCIOpenAIModel(model="openai.gpt-5", profile_name="DEFAULT") + except Exception: + pytest.skip("OCIOpenAIModel construction requires OCI config") + assert model.supports_structured_output is True + + +def test_build_response_format_returns_openai_shape(): + """``build_response_format`` already returns the right shape — sanity check.""" + from locus.core.structured import build_response_format + + rf = build_response_format(SamplePayload, strict=True) + assert rf["type"] == "json_schema" + assert rf["json_schema"]["name"] == "SamplePayload" + assert rf["json_schema"]["strict"] is True + assert "schema" in rf["json_schema"] + # required fields propagated: + schema = rf["json_schema"]["schema"] + assert "name" in schema.get("required", []) + assert "score" in schema.get("required", [])