Skip to content

Commit 13aabe7

Browse files
committed
another attempt at fixing google genai instructor bug
1 parent acdc984 commit 13aabe7

3 files changed

Lines changed: 176 additions & 31 deletions

File tree

maseval/interface/inference/google_genai.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,38 @@ def _parse_response(self, response: Any) -> ChatResponse:
319319
stop_reason=stop_reason,
320320
)
321321

322+
@staticmethod
323+
def _clean_schema_model(model: type) -> type:
324+
"""Create a subclass that strips additionalProperties from its JSON schema.
325+
326+
Gemini's structured output API rejects additionalProperties (which Pydantic v2
327+
emits by default). This creates a thin subclass that overrides model_json_schema()
328+
to remove it. The returned class preserves the original name (for instructor's
329+
function naming) and isinstance() checks (it's a subclass).
330+
"""
331+
332+
class CleanSchemaModel(model): # ty: ignore[unsupported-base]
333+
@classmethod
334+
def model_json_schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]:
335+
schema = super().model_json_schema(*args, **kwargs) # ty: ignore[unresolved-attribute]
336+
GoogleGenAIModelAdapter._strip_additional_properties(schema)
337+
return schema
338+
339+
CleanSchemaModel.__name__ = model.__name__
340+
CleanSchemaModel.__qualname__ = model.__qualname__
341+
return CleanSchemaModel
342+
343+
@staticmethod
344+
def _strip_additional_properties(obj: Any) -> None:
345+
"""Recursively remove additionalProperties from a JSON schema dict."""
346+
if isinstance(obj, dict):
347+
obj.pop("additionalProperties", None)
348+
for value in obj.values():
349+
GoogleGenAIModelAdapter._strip_additional_properties(value)
350+
elif isinstance(obj, list):
351+
for item in obj:
352+
GoogleGenAIModelAdapter._strip_additional_properties(item)
353+
322354
def _structured_chat(
323355
self,
324356
messages: List[Dict[str, Any]],
@@ -331,9 +363,16 @@ def _structured_chat(
331363
) -> "ChatResponse":
332364
"""Use instructor for structured output with validation and retries."""
333365
if self._instructor_client is None:
366+
import instructor
334367
from instructor import from_genai
335368

336-
self._instructor_client = from_genai(self._client)
369+
# Use GENAI_STRUCTURED_OUTPUTS (native JSON schema output) instead of
370+
# GENAI_TOOLS (function calling). GENAI_TOOLS has upstream instructor bugs
371+
# with Gemini thinking mode: (1) AttributeError when content is None on
372+
# MALFORMED_FUNCTION_CALL, and (2) AssertionError on duplicate function call
373+
# parts. These are not retried by instructor (only ValidationError is).
374+
# GENAI_STRUCTURED_OUTPUTS parses via completion.text, avoiding both issues.
375+
self._instructor_client = from_genai(self._client, mode=instructor.Mode.GENAI_STRUCTURED_OUTPUTS)
337376

338377
params = dict(self._default_generation_params)
339378
if generation_params:
@@ -358,9 +397,15 @@ def _structured_chat(
358397
if gen_config:
359398
instructor_kwargs["generation_config"] = gen_config
360399

400+
# Wrap the response model to strip additionalProperties from its JSON schema.
401+
# Pydantic v2 emits additionalProperties by default, but Gemini's structured
402+
# output API rejects it. GENAI_TOOLS handles this via map_to_genai_schema(),
403+
# but GENAI_STRUCTURED_OUTPUTS passes the raw Pydantic class to the SDK.
404+
clean_model = self._clean_schema_model(response_model)
405+
361406
result = self._instructor_client.chat.completions.create(
362407
model=self._model_id,
363-
response_model=response_model, # ty: ignore[invalid-argument-type]
408+
response_model=clean_model, # ty: ignore[invalid-argument-type]
364409
messages=messages, # ty: ignore[invalid-argument-type]
365410
max_retries=max_retries,
366411
**instructor_kwargs,

tests/test_interface/test_model_integration/test_live_api.py

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
import json
2929
import os
30+
from typing import Any, Dict
3031

3132
import pytest
3233
from pydantic import BaseModel, Field
@@ -41,6 +42,7 @@
4142
OPENAI_MODEL = "gpt-4o-mini"
4243
ANTHROPIC_MODEL = "claude-haiku-4-5"
4344
GOOGLE_MODEL = "gemini-2.0-flash"
45+
GOOGLE_THINKING_MODEL = "gemini-2.5-flash"
4446
LITELLM_MODEL = "gpt-4o-mini"
4547

4648

@@ -316,9 +318,8 @@ def test_structured_output(self):
316318
def test_structured_output_nested_model(self):
317319
"""Nested Pydantic models work despite additionalProperties in JSON schema.
318320
319-
Pydantic v2 emits additionalProperties: false for nested models.
320-
Gemini's native JSON schema output (GENAI_STRUCTURED_OUTPUTS) rejects this,
321-
but GENAI_TOOLS (function calling) handles it correctly via schema conversion.
321+
Pydantic v2 emits additionalProperties for nested models and Dict fields.
322+
The adapter strips it via _clean_schema_model before the SDK sees it.
322323
"""
323324
from google import genai
324325
from maseval.interface.inference.google_genai import GoogleGenAIModelAdapter
@@ -335,6 +336,68 @@ def test_structured_output_nested_model(self):
335336
assert response.structured_response.location.city.lower() == "paris"
336337
assert response.content is not None
337338

339+
@requires_google
340+
def test_structured_output_with_thinking(self):
341+
"""Structured output works with Gemini thinking mode enabled.
342+
343+
GENAI_TOOLS + thinking causes upstream instructor bugs (AttributeError on
344+
MALFORMED_FUNCTION_CALL, AssertionError on duplicate parts).
345+
GENAI_STRUCTURED_OUTPUTS avoids these by parsing completion.text directly.
346+
"""
347+
from google import genai
348+
from maseval.interface.inference.google_genai import GoogleGenAIModelAdapter
349+
350+
client = genai.Client()
351+
adapter = GoogleGenAIModelAdapter(
352+
client=client,
353+
model_id=GOOGLE_THINKING_MODEL,
354+
default_generation_params={
355+
"thinking_config": {"thinking_budget": 1024},
356+
},
357+
)
358+
response = adapter.chat(
359+
[{"role": "user", "content": "What is the capital of France?"}],
360+
response_model=Capital,
361+
)
362+
363+
assert isinstance(response.structured_response, Capital)
364+
assert response.structured_response.city.lower() == "paris"
365+
assert response.structured_response.country.lower() == "france"
366+
assert response.content is not None
367+
368+
@requires_google
369+
def test_structured_output_dict_field_with_thinking(self):
370+
"""Dict[str, Any] fields + thinking mode work together.
371+
372+
This is the exact scenario from the ToolLLMSimulator bug: a response model
373+
with Dict[str, Any] (additionalProperties) used with a thinking-enabled model.
374+
"""
375+
from google import genai
376+
from maseval.interface.inference.google_genai import GoogleGenAIModelAdapter
377+
378+
from pydantic import BaseModel, Field
379+
380+
class ToolOutput(BaseModel):
381+
text: str = Field(default="", description="Description")
382+
details: Dict[str, Any] = Field(default_factory=dict, description="Structured data")
383+
384+
client = genai.Client()
385+
adapter = GoogleGenAIModelAdapter(
386+
client=client,
387+
model_id=GOOGLE_THINKING_MODEL,
388+
default_generation_params={
389+
"thinking_config": {"thinking_budget": 1024},
390+
},
391+
)
392+
response = adapter.chat(
393+
[{"role": "user", "content": "Simulate a weather API response for Paris."}],
394+
response_model=ToolOutput,
395+
)
396+
397+
assert isinstance(response.structured_response, ToolOutput)
398+
assert len(response.structured_response.text) > 0
399+
assert response.content is not None
400+
338401

339402
# =============================================================================
340403
# LiteLLM (routes through OpenAI)

tests/test_interface/test_model_integration/test_model_adapters.py

Lines changed: 63 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2429,15 +2429,16 @@ def __init__(self):
24292429
# thinking_config must NOT be in generation_config
24302430
assert "thinking_config" not in gen_config
24312431

2432-
def test_structured_chat_uses_default_genai_tools_mode(self):
2433-
"""Instructor client uses default GENAI_TOOLS mode, not GENAI_STRUCTURED_OUTPUTS.
2432+
def test_structured_chat_uses_genai_structured_outputs_mode(self):
2433+
"""Instructor client uses GENAI_STRUCTURED_OUTPUTS mode.
24342434
2435-
GENAI_STRUCTURED_OUTPUTS passes Pydantic schemas directly to Gemini's native
2436-
JSON schema output, which rejects additionalProperties (emitted by Pydantic v2).
2437-
GENAI_TOOLS uses map_to_genai_schema() which strips unsupported fields.
2435+
GENAI_TOOLS has upstream bugs with thinking mode (AttributeError on
2436+
MALFORMED_FUNCTION_CALL, AssertionError on duplicate parts).
2437+
GENAI_STRUCTURED_OUTPUTS parses via completion.text, avoiding both.
24382438
"""
24392439
pytest.importorskip("google.genai")
24402440
pytest.importorskip("instructor")
2441+
import instructor
24412442
from maseval.interface.inference.google_genai import GoogleGenAIModelAdapter
24422443

24432444
class MockClient:
@@ -2465,20 +2466,62 @@ def __init__(self):
24652466
)
24662467

24672468
mock_from_genai.assert_called_once()
2468-
# Must NOT pass mode=GENAI_STRUCTURED_OUTPUTS — only GENAI_TOOLS
2469-
# handles additionalProperties in Pydantic schemas correctly.
2470-
_, call_kwargs = mock_from_genai.call_args
2471-
assert "mode" not in call_kwargs
2469+
call_kwargs = mock_from_genai.call_args
2470+
assert call_kwargs.kwargs.get("mode") == instructor.Mode.GENAI_STRUCTURED_OUTPUTS
24722471

2473-
def test_structured_chat_with_dict_field_model(self):
2474-
"""Response models with Dict[str, Any] fields (like ToolSimulatorResponse) work.
2472+
def test_clean_schema_model_strips_additional_properties(self):
2473+
"""_clean_schema_model strips additionalProperties from JSON schema.
24752474
2476-
Dict fields cause Pydantic v2 to emit additionalProperties in the JSON schema.
2477-
This must not be passed raw to the Gemini API.
2475+
Pydantic v2 emits additionalProperties for Dict fields and nested models.
2476+
Gemini's structured output API rejects it. The clean model must strip it
2477+
while preserving parsing and isinstance() behavior.
24782478
"""
24792479
pytest.importorskip("google.genai")
24802480
from maseval.interface.inference.google_genai import GoogleGenAIModelAdapter
24812481

2482+
from typing import Any, Dict
2483+
2484+
from pydantic import BaseModel, Field
2485+
2486+
class Inner(BaseModel):
2487+
name: str
2488+
2489+
class Outer(BaseModel):
2490+
text: str = Field(default="")
2491+
details: Dict[str, Any] = Field(default_factory=dict)
2492+
inner: Inner = Field(default_factory=lambda: Inner(name=""))
2493+
2494+
# Original schema has additionalProperties
2495+
original_schema = Outer.model_json_schema()
2496+
assert any("additionalProperties" in str(v) for v in original_schema.values()) or "additionalProperties" in str(
2497+
original_schema.get("$defs", {})
2498+
)
2499+
2500+
# Cleaned model's schema does not
2501+
CleanOuter = GoogleGenAIModelAdapter._clean_schema_model(Outer)
2502+
clean_schema = CleanOuter.model_json_schema() # ty: ignore[unresolved-attribute]
2503+
assert "additionalProperties" not in str(clean_schema)
2504+
2505+
# Parsing still works and returns instances of the original class
2506+
instance = CleanOuter.model_validate({"text": "hi", "details": {"k": "v"}, "inner": {"name": "test"}}) # ty: ignore[unresolved-attribute]
2507+
assert isinstance(instance, Outer)
2508+
assert instance.text == "hi"
2509+
assert instance.details == {"k": "v"}
2510+
assert instance.inner.name == "test"
2511+
2512+
def test_structured_chat_passes_clean_model_to_instructor(self):
2513+
"""_structured_chat wraps the response model to strip additionalProperties."""
2514+
pytest.importorskip("google.genai")
2515+
from maseval.interface.inference.google_genai import GoogleGenAIModelAdapter
2516+
2517+
from typing import Any, Dict
2518+
2519+
from pydantic import BaseModel, Field
2520+
2521+
class ToolOutput(BaseModel):
2522+
text: str = Field(default="", description="Description")
2523+
details: Dict[str, Any] = Field(default_factory=dict, description="Structured data")
2524+
24822525
class MockClient:
24832526
class Models:
24842527
def generate_content(self, model, contents, config=None):
@@ -2497,27 +2540,21 @@ def __init__(self):
24972540
mock_instructor.chat.completions.create.return_value = mock_result
24982541
adapter._instructor_client = mock_instructor
24992542

2500-
# Use a model with Dict[str, Any] — same pattern as ToolSimulatorResponse
2501-
from typing import Any, Dict
2502-
2503-
from pydantic import BaseModel, Field
2504-
2505-
class ToolOutput(BaseModel):
2506-
text: str = Field(default="", description="Description")
2507-
details: Dict[str, Any] = Field(default_factory=dict, description="Structured data")
2508-
2509-
response = adapter._structured_chat(
2543+
adapter._structured_chat(
25102544
messages=[{"role": "user", "content": "Hi"}],
25112545
response_model=ToolOutput,
25122546
generation_params={"temperature": 0.5},
25132547
)
25142548

2515-
# Verify the call went through with correct kwargs structure
25162549
call_kwargs = mock_instructor.chat.completions.create.call_args.kwargs
2517-
assert call_kwargs["response_model"] is ToolOutput
2550+
passed_model = call_kwargs["response_model"]
2551+
# Must be a subclass (not the original) with clean schema
2552+
assert passed_model is not ToolOutput
2553+
assert issubclass(passed_model, ToolOutput)
2554+
assert "additionalProperties" not in str(passed_model.model_json_schema())
2555+
# Generation params still wrapped correctly
25182556
gen_config = call_kwargs.get("generation_config", {})
25192557
assert gen_config.get("temperature") == 0.5
2520-
assert response.structured_response is mock_result
25212558

25222559

25232560
@pytest.mark.interface

0 commit comments

Comments
 (0)