Skip to content

Commit a9d16bc

Browse files
committed
Structured Output support
Added structured output support for chat. Tested all model providers + streaming. We now also include the usage as part of the TextGenerationOutput. This was nice because it led to finding a bug in usage reporting for gemini in streaming. Unit tests included as well
1 parent 89e9d57 commit a9d16bc

5 files changed

Lines changed: 94 additions & 4 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "opengradient"
7-
version = "0.9.4"
7+
version = "0.9.5"
88
description = "Python SDK for OpenGradient decentralized model management & inference services"
99
authors = [{name = "OpenGradient", email = "adam@vannalabs.ai"}]
1010
readme = "README.md"

src/opengradient/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ async def stream_example():
8888
InferenceResult,
8989
ModelOutput,
9090
ModelRepository,
91+
ResponseFormat,
9192
SchedulerParams,
9293
TextGenerationOutput,
9394
TextGenerationStream,
@@ -105,6 +106,7 @@ async def stream_example():
105106
"SchedulerParams",
106107
"CandleType",
107108
"CandleOrder",
109+
"ResponseFormat",
108110
"TextGenerationOutput",
109111
"TextGenerationStream",
110112
"x402SettlementMode",

src/opengradient/client/llm.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from x402.mechanisms.evm.exact.register import register_exact_evm_client
1515
from x402.mechanisms.evm.upto.register import register_upto_evm_client
1616

17-
from ..types import TEE_LLM, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode
17+
from ..types import TEE_LLM, ResponseFormat, StreamChoice, StreamChunk, StreamDelta, TextGenerationOutput, x402SettlementMode
1818
from .opg_token import Permit2ApprovalResult, ensure_opg_approval
1919
from .tee_connection import RegistryTEEConnection, StaticTEEConnection, TEEConnectionInterface
2020
from .tee_registry import TEERegistry
@@ -44,6 +44,7 @@ class _ChatParams:
4444
stop_sequence: Optional[List[str]]
4545
tools: Optional[List[Dict]]
4646
tool_choice: Optional[str]
47+
response_format: Optional[ResponseFormat]
4748
x402_settlement_mode: x402SettlementMode
4849

4950

@@ -152,6 +153,8 @@ def _chat_payload(self, params: _ChatParams, messages: List[Dict], stream: bool
152153
if params.tools:
153154
payload["tools"] = params.tools
154155
payload["tool_choice"] = params.tool_choice or "auto"
156+
if params.response_format:
157+
payload["response_format"] = params.response_format.to_dict()
155158
return payload
156159

157160
async def _call_with_tee_retry(
@@ -297,6 +300,7 @@ async def chat(
297300
temperature: float = 0.0,
298301
tools: Optional[List[Dict]] = None,
299302
tool_choice: Optional[str] = None,
303+
response_format: Optional[ResponseFormat] = None,
300304
x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED,
301305
stream: bool = False,
302306
) -> Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]:
@@ -311,6 +315,11 @@ async def chat(
311315
temperature (float): Temperature for LLM inference, between 0 and 1.
312316
tools (List[dict], optional): Set of tools for function calling.
313317
tool_choice (str, optional): Sets a specific tool to choose.
318+
response_format (ResponseFormat, optional): Enforce a specific output format.
319+
Use ``ResponseFormat(type="json_object")`` for any valid JSON (not supported
320+
by Anthropic models). Use ``ResponseFormat(type="json_schema", json_schema={...})``
321+
to enforce a strict schema (supported by all providers including Anthropic).
322+
Defaults to None (plain text).
314323
x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments.
315324
- PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving).
316325
- BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient).
@@ -324,15 +333,25 @@ async def chat(
324333
- If stream=True: Async generator yielding StreamChunk objects
325334
326335
Raises:
336+
ValueError: If ``response_format="json_object"`` is used with an Anthropic model.
327337
RuntimeError: If the inference fails.
328338
"""
339+
if response_format is not None and response_format.type == "json_object":
340+
provider = model.split("/")[0]
341+
if provider == "anthropic":
342+
raise ValueError(
343+
"Anthropic models do not support response_format type 'json_object'. "
344+
"Use ResponseFormat(type='json_schema', json_schema={...}) with an explicit schema instead."
345+
)
346+
329347
params = _ChatParams(
330348
model=model.split("/")[1],
331349
max_tokens=max_tokens,
332350
temperature=temperature,
333351
stop_sequence=stop_sequence,
334352
tools=tools,
335353
tool_choice=tool_choice,
354+
response_format=response_format,
336355
x402_settlement_mode=x402_settlement_mode,
337356
)
338357

@@ -379,6 +398,7 @@ async def _request() -> TextGenerationOutput:
379398
transaction_hash="external",
380399
finish_reason=choices[0].get("finish_reason"),
381400
chat_output=message,
401+
usage=result.get("usage"),
382402
tee_signature=result.get("tee_signature"),
383403
tee_timestamp=result.get("tee_timestamp"),
384404
**tee.metadata(),

src/opengradient/types.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ class TextGenerationOutput:
428428
completion_output: Optional[str] = None
429429
"""Raw text returned by a completion request."""
430430

431+
usage: Optional[Dict] = None
432+
"""Token usage for the request. Contains ``prompt_tokens``, ``completion_tokens``, and ``total_tokens`` when reported by the server."""
433+
431434
payment_hash: Optional[str] = None
432435
"""Payment hash for the x402 transaction."""
433436

@@ -526,6 +529,71 @@ class TEE_LLM(str, Enum):
526529
GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning"
527530

528531

532+
@dataclass
533+
class ResponseFormat:
534+
"""Controls the output format enforced by the TEE gateway.
535+
536+
Use ``type="json_object"`` to receive any valid JSON object (supported by
537+
OpenAI, Gemini, and Grok). Use ``type="json_schema"`` with a ``json_schema``
538+
definition to enforce a specific schema (supported by all providers,
539+
including Anthropic).
540+
541+
Attributes:
542+
type: One of ``"text"``, ``"json_object"``, or ``"json_schema"``.
543+
json_schema: Schema definition (required when ``type="json_schema"``).
544+
Must contain ``name`` (str) and ``schema`` (dict).
545+
``strict`` (bool) is optional.
546+
547+
Raises:
548+
ValueError: If ``type`` is not a recognised value, or if
549+
``type="json_schema"`` is used without providing ``json_schema``.
550+
551+
Examples::
552+
553+
# Any valid JSON object — OpenAI, Gemini, Grok only
554+
ResponseFormat(type="json_object")
555+
556+
# Strict schema — all providers including Anthropic
557+
ResponseFormat(
558+
type="json_schema",
559+
json_schema={
560+
"name": "person",
561+
"strict": True,
562+
"schema": {
563+
"type": "object",
564+
"properties": {
565+
"name": {"type": "string"},
566+
"age": {"type": "integer"},
567+
},
568+
"required": ["name", "age"],
569+
"additionalProperties": False,
570+
},
571+
},
572+
)
573+
"""
574+
575+
type: str
576+
json_schema: Optional[Dict] = None
577+
578+
def __post_init__(self) -> None:
579+
valid_types = ("text", "json_object", "json_schema")
580+
if self.type not in valid_types:
581+
raise ValueError(
582+
f"ResponseFormat.type must be one of {valid_types}, got '{self.type}'"
583+
)
584+
if self.type == "json_schema" and not self.json_schema:
585+
raise ValueError(
586+
"ResponseFormat.json_schema is required when type='json_schema'"
587+
)
588+
589+
def to_dict(self) -> Dict:
590+
"""Serialise to a JSON-compatible dict for the TEE gateway request payload."""
591+
d: Dict = {"type": self.type}
592+
if self.json_schema is not None:
593+
d["json_schema"] = self.json_schema
594+
return d
595+
596+
529597
@dataclass
530598
class SchedulerParams:
531599
frequency: int

uv.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)