From 8af1ea47ed6d41d243ca42890d54812b318c6705 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Sat, 2 May 2026 01:12:05 -0400
Subject: [PATCH] feat(models): native response_format pass-through for OpenAI
 / OCIOpenAIModel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #36.

When `Agent(output_schema=Pydantic)` is configured AND the provider
ships native OpenAI-style `response_format={"type":"json_schema",...}`,
the agent loop now passes the schema through directly. Anthropic /
Ollama / OCI native-SDK transport keep the prompted-JSON fallback —
their providers report `supports_structured_output = False`.

Before: every structured-output run paid prompted-JSON cost (schema
duplicated in the system prompt, post-hoc parse + retry on parse
failure) even on providers that support native pass-through. The
existing `build_response_format()` was only used in the post-loop
repair flow at `agent.py:1743`.

After: native providers (`OpenAIModel`, `OCIOpenAIModel` via
inheritance) get the schema in the request directly. The provider
returns a guaranteed-parseable instance. No retry, lower token cost,
stronger correctness guarantee.

Capability flag:
- `OpenAIModel.supports_structured_output → True`
- `OCIOpenAIModel` inherits from `OpenAIModel` → True
- `AnthropicModel.supports_structured_output → False` (explicit)
- `OllamaModel.supports_structured_output → False` (explicit)
- `OCIModel` (Cohere R-series via SDK transport) → False (explicit)

Six unit tests in `tests/unit/test_native_structured_output.py`
verify each provider's capability and assert
`build_response_format()` returns the OpenAI-native shape. All 3,210
pre-existing unit tests still pass.

Signed-off-by: Federico Kamelhar <federico.kamelhar@oracle.com>
---
 .gitignore                                  |   1 +
 src/locus/agent/agent.py                    |  33 ++++--
 src/locus/models/native/anthropic.py        |   9 ++
 src/locus/models/native/ollama.py           |   9 ++
 src/locus/models/native/openai.py           |  11 ++
 src/locus/models/providers/oci/__init__.py  |   7 ++
 tests/unit/test_native_structured_output.py | 113 ++++++++++++++++++++
 7 files changed, 177 insertions(+), 6 deletions(-)
 create mode 100644 tests/unit/test_native_structured_output.py

diff --git a/.gitignore b/.gitignore
index 7c7ee5e1..35f4d766 100644
--- a/.gitignore
+++ b/.gitignore
@@ -255,3 +255,4 @@ examples/start_and_test.sh
 # Old tutorials directory (superseded by examples/tutorial_*.py)
 tutorials/
 site/
+.claude/
diff --git a/src/locus/agent/agent.py b/src/locus/agent/agent.py
index 82246377..1f77e74d 100644
--- a/src/locus/agent/agent.py
+++ b/src/locus/agent/agent.py
@@ -1625,16 +1625,37 @@ async def _get_model_response(
         # Pre-model hooks: allow hooks to modify messages before model call
         messages = await self._run_before_model_hooks(messages, tool_schemas or None)
 
+        # When ``output_schema`` is set AND the provider ships native
+        # structured output (OpenAI's ``response_format`` shape), pass
+        # the JSON schema through directly. The provider parses + returns
+        # a typed response without the prompted-JSON fallback. Otherwise
+        # the schema only lives in the system prompt (see
+        # ``_create_initial_state``) and is parsed post-hoc.
+        native_response_format: dict[str, Any] | None = None
+        if self.config.output_schema is not None and getattr(
+            self._model, "supports_structured_output", False
+        ):
+            from locus.core.structured import build_response_format
+
+            native_response_format = build_response_format(
+                self.config.output_schema,
+                strict=self.config.output_schema_strict,
+            )
+
         # Call model with hook-driven retry support
         # Hooks can request retries via event.retry = True
         max_model_retries = 5
         for _model_attempt in range(max_model_retries):
-            response = await self._model.complete(
-                messages=messages,
-                tools=tool_schemas or None,
-                temperature=self.config.temperature,
-                max_tokens=self.config.max_tokens,
-            )
+            complete_kwargs: dict[str, Any] = {
+                "messages": messages,
+                "tools": tool_schemas or None,
+                "temperature": self.config.temperature,
+                "max_tokens": self.config.max_tokens,
+            }
+            if native_response_format is not None:
+                complete_kwargs["response_format"] = native_response_format
+
+            response = await self._model.complete(**complete_kwargs)
 
             # Post-model hooks: event.retry = True to re-call
             after_event = await self._run_after_model_hooks(response, messages)
diff --git a/src/locus/models/native/anthropic.py b/src/locus/models/native/anthropic.py
index 1c2a322d..6ee4ae33 100644
--- a/src/locus/models/native/anthropic.py
+++ b/src/locus/models/native/anthropic.py
@@ -46,6 +46,15 @@ class AnthropicModel(BaseModel):
 
     model_config = {"arbitrary_types_allowed": True}
 
+    @property
+    def supports_structured_output(self) -> bool:
+        """Anthropic doesn't ship OpenAI-style ``response_format``.
+
+        The agent loop falls back to the prompted-JSON path with
+        post-hoc parsing for Anthropic models.
+        """
+        return False
+
     def __init__(
         self,
         model: str = "claude-sonnet-4-20250514",
diff --git a/src/locus/models/native/ollama.py b/src/locus/models/native/ollama.py
index bf64d789..9e3377a6 100644
--- a/src/locus/models/native/ollama.py
+++ b/src/locus/models/native/ollama.py
@@ -43,6 +43,15 @@ class OllamaModel(BaseModel):
 
     model_config = {"arbitrary_types_allowed": True}
 
+    @property
+    def supports_structured_output(self) -> bool:
+        """Ollama doesn't yet ship OpenAI-style ``response_format``.
+
+        The agent loop falls back to the prompted-JSON path with
+        post-hoc parsing for Ollama models.
+        """
+        return False
+
     def __init__(
         self,
         model: str = "llama3.3",
diff --git a/src/locus/models/native/openai.py b/src/locus/models/native/openai.py
index 1eade27f..dd2550c1 100644
--- a/src/locus/models/native/openai.py
+++ b/src/locus/models/native/openai.py
@@ -81,6 +81,17 @@ class OpenAIModel(BaseModel):
 
     model_config = {"arbitrary_types_allowed": True}
 
+    @property
+    def supports_structured_output(self) -> bool:
+        """Native ``response_format={"type":"json_schema",...}`` support.
+
+        OpenAI's chat-completions API accepts a JSON-schema response_format
+        and guarantees a parseable instance. The agent loop uses this
+        property to skip the prompted-JSON fallback when the provider
+        ships native structured output.
+        """
+        return True
+
     def __init__(
         self,
         model: str = "gpt-4o",
diff --git a/src/locus/models/providers/oci/__init__.py b/src/locus/models/providers/oci/__init__.py
index 2a20248a..7869667e 100644
--- a/src/locus/models/providers/oci/__init__.py
+++ b/src/locus/models/providers/oci/__init__.py
@@ -82,6 +82,13 @@ class OCIModel(BaseModel):
 
     model_config = {"arbitrary_types_allowed": True}
 
+    @property
+    def supports_structured_output(self) -> bool:
+        """OCI's native SDK transport (Cohere R-series) doesn't expose
+        OpenAI-style ``response_format``. Use the V1 transport
+        (``OCIOpenAIModel``) for that."""
+        return False
+
     def __init__(
         self,
         model_id: str = "cohere.command-r-plus",
diff --git a/tests/unit/test_native_structured_output.py b/tests/unit/test_native_structured_output.py
new file mode 100644
index 00000000..e01122d6
--- /dev/null
+++ b/tests/unit/test_native_structured_output.py
@@ -0,0 +1,113 @@
+"""Unit tests for native ``response_format`` pass-through on the agent loop.
+
+When ``Agent(output_schema=Pydantic)`` is configured AND the provider
+exposes ``supports_structured_output`` as True, the loop should pass
+``response_format=`` to ``model.complete()`` directly — skipping the
+prompted-JSON fallback.
+
+When the provider returns False (Anthropic, Ollama, OCI's native SDK),
+the loop falls back to the prompted-JSON path and ``response_format``
+is NOT passed.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+from pydantic import BaseModel
+
+from locus.core.messages import Message
+
+
+pytest.importorskip("openai")
+pytest.importorskip("anthropic")
+
+
+class SamplePayload(BaseModel):
+    name: str
+    score: float
+
+
+class _StubModel:
+    """Minimal model stub that records the kwargs it received."""
+
+    def __init__(self, supports: bool) -> None:
+        self.supports_structured_output = supports
+        self.complete = AsyncMock()
+        self.stream = AsyncMock()
+        self._captured_kwargs: dict[str, Any] = {}
+
+    async def complete(self, **kwargs: Any) -> Any:  # type: ignore[override,no-redef]
+        self._captured_kwargs = kwargs
+        from locus.models.base import ModelResponse
+
+        return ModelResponse(
+            message=Message.assistant('{"name": "ok", "score": 0.9}'),
+            usage={"input_tokens": 10, "output_tokens": 4},
+        )
+
+
+def test_supports_structured_output_capability_on_openai_model():
+    """OpenAIModel reports True; structured output passes through natively."""
+    from locus.models.native.openai import OpenAIModel
+
+    model = OpenAIModel(model="gpt-4o", api_key="sk-test")
+    assert model.supports_structured_output is True
+
+
+def test_supports_structured_output_capability_on_anthropic_model():
+    """AnthropicModel reports False; falls back to prompted JSON."""
+    from locus.models.native.anthropic import AnthropicModel
+
+    model = AnthropicModel(model="claude-sonnet-4-20250514", api_key="sk-test")
+    assert model.supports_structured_output is False
+
+
+def test_supports_structured_output_capability_on_ollama_model():
+    """OllamaModel reports False."""
+    from locus.models.native.ollama import OllamaModel
+
+    model = OllamaModel(model="llama3.3")
+    assert model.supports_structured_output is False
+
+
+def test_supports_structured_output_capability_on_oci_native_model():
+    """OCIModel (native SDK transport) reports False; use OCIOpenAIModel for native."""
+    pytest.importorskip("oci")
+    from locus.models.providers.oci import OCIModel
+
+    # Model id chosen to route to the native SDK transport.
+    try:
+        model = OCIModel(model_id="cohere.command-r-08-2024", profile_name="DEFAULT")
+    except Exception:
+        pytest.skip("OCI client construction requires real config")
+    assert model.supports_structured_output is False
+
+
+def test_oci_openai_compat_inherits_capability():
+    """OCIOpenAIModel inherits from OpenAIModel; reports True."""
+    pytest.importorskip("oci")
+    from locus.models.providers.oci.openai_compat import OCIOpenAIModel
+
+    try:
+        model = OCIOpenAIModel(model="openai.gpt-5", profile_name="DEFAULT")
+    except Exception:
+        pytest.skip("OCIOpenAIModel construction requires OCI config")
+    assert model.supports_structured_output is True
+
+
+def test_build_response_format_returns_openai_shape():
+    """``build_response_format`` already returns the right shape — sanity check."""
+    from locus.core.structured import build_response_format
+
+    rf = build_response_format(SamplePayload, strict=True)
+    assert rf["type"] == "json_schema"
+    assert rf["json_schema"]["name"] == "SamplePayload"
+    assert rf["json_schema"]["strict"] is True
+    assert "schema" in rf["json_schema"]
+    # required fields propagated:
+    schema = rf["json_schema"]["schema"]
+    assert "name" in schema.get("required", [])
+    assert "score" in schema.get("required", [])