feat(providers): enhance OpenAI tool history sanitization

jstzwj · jstzwj · commit 7e466583b8dd · 2026-03-07T15:59:23.000+08:00
Add sanitize_openai_tool_history function to enforce strict message pairing
in OpenAI-compatible provider sequences:
- Drops unpaired assistant tool_calls and orphan tool responses
- Keeps only the paired subset when mixing paired/unpaired tool_calls
- Preserves valid tool chains while removing invalid sequences
- Includes comprehensive test coverage for various edge cases

Also fix MCP schema variable name collisions detected by mypy

Generated with Ripperdoc

Co-Authored-By: Ripperdoc
diff --git a/ripperdoc/core/providers/base.py b/ripperdoc/core/providers/base.py
@@ -226,6 +226,123 @@ def _tool_result_ids(msg: Dict[str, Any]) -> set[str]:
     return sanitized
 
 
+def sanitize_openai_tool_history(normalized_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Normalize OpenAI chat-completions tool-call history.
+
+    Enforces strict pairing for OpenAI-compatible message sequences:
+    1. Drop assistant tool_calls that have no later matching role=tool response.
+    2. Drop role=tool messages that do not correspond to an earlier assistant tool_call.
+    3. If an assistant message mixes paired and unpaired tool_calls, keep only the paired subset.
+    4. Fold matching role=tool messages to immediately follow the assistant tool_call turn.
+    """
+    tool_response_indices: Dict[str, List[int]] = {}
+    for idx, message in enumerate(normalized_messages):
+        if message.get("role") != "tool":
+            continue
+        tool_call_id = str(message.get("tool_call_id") or "").strip()
+        if tool_call_id:
+            tool_response_indices.setdefault(tool_call_id, []).append(idx)
+
+    sanitized: List[Dict[str, Any]] = []
+    consumed_tool_indices: set[int] = set()
+    i = 0
+
+    while i < len(normalized_messages):
+        message = normalized_messages[i]
+        role = message.get("role")
+
+        if role == "tool":
+            tool_call_id = str(message.get("tool_call_id") or "").strip()
+            if i in consumed_tool_indices:
+                i += 1
+                continue
+            logger.debug(
+                "[provider_clients] Dropped orphan OpenAI tool response",
+                extra={"message_index": i, "tool_call_id": tool_call_id},
+            )
+            i += 1
+            continue
+
+        if role != "assistant":
+            sanitized.append(message)
+            i += 1
+            continue
+
+        tool_calls = message.get("tool_calls")
+        if not isinstance(tool_calls, list) or not tool_calls:
+            sanitized.append(message)
+            i += 1
+            continue
+
+        paired_tool_calls: List[Dict[str, Any]] = []
+        paired_ids: List[str] = []
+        for tool_call in tool_calls:
+            if not isinstance(tool_call, dict):
+                continue
+            tool_call_id = str(tool_call.get("id") or "").strip()
+            if not tool_call_id:
+                continue
+            response_positions = tool_response_indices.get(tool_call_id, [])
+            if any(response_idx > i and response_idx not in consumed_tool_indices for response_idx in response_positions):
+                paired_tool_calls.append(tool_call)
+                paired_ids.append(tool_call_id)
+
+        if not paired_tool_calls:
+            logger.debug(
+                "[provider_clients] Dropped OpenAI assistant message with unpaired tool_calls",
+                extra={"message_index": i},
+            )
+            i += 1
+            continue
+
+        if len(paired_tool_calls) != len(tool_calls):
+            logger.debug(
+                "[provider_clients] Sanitized OpenAI assistant tool_calls to paired subset",
+                extra={
+                    "message_index": i,
+                    "before_count": len(tool_calls),
+                    "after_count": len(paired_tool_calls),
+                },
+            )
+
+        sanitized.append({**message, "tool_calls": paired_tool_calls})
+
+        expected_ids = set(paired_ids)
+        seen_ids: set[str] = set()
+        deferred_messages: List[Dict[str, Any]] = []
+        j = i + 1
+        while j < len(normalized_messages):
+            next_message = normalized_messages[j]
+            next_role = next_message.get("role")
+            if next_role == "assistant":
+                break
+
+            if next_role == "tool":
+                tool_call_id = str(next_message.get("tool_call_id") or "").strip()
+                if tool_call_id in expected_ids and tool_call_id not in seen_ids:
+                    sanitized.append(next_message)
+                    consumed_tool_indices.add(j)
+                    seen_ids.add(tool_call_id)
+                else:
+                    logger.debug(
+                        "[provider_clients] Dropped orphan or duplicate OpenAI tool response",
+                        extra={"message_index": j, "tool_call_id": tool_call_id},
+                    )
+                if expected_ids.issubset(seen_ids):
+                    j += 1
+                    break
+                j += 1
+                continue
+
+            deferred_messages.append(next_message)
+            j += 1
+
+        sanitized.extend(deferred_messages)
+        i = j
+
+    return sanitized
+
+
 def _retry_delay_seconds(attempt: int, base_delay: float = 0.5, max_delay: float = 32.0) -> float:
     """Calculate exponential backoff with jitter."""
     capped_base: float = float(min(base_delay * (2 ** max(0, attempt - 1)), max_delay))
diff --git a/ripperdoc/core/providers/openai_non_oauth_strategies.py b/ripperdoc/core/providers/openai_non_oauth_strategies.py
@@ -24,6 +24,7 @@
     ProviderResponse,
     call_with_timeout_and_retries,
     iter_with_timeout,
+    sanitize_openai_tool_history,
     sanitize_tool_history,
 )
 from ripperdoc.core.providers.openai_responses import (
@@ -499,9 +500,12 @@ async def call(
         default_headers: Optional[Dict[str, str]] = None,
     ) -> ProviderResponse:
         openai_tools = await build_openai_tool_schemas(tools)
+        sanitized_messages = sanitize_openai_tool_history(
+            sanitize_tool_history(list(normalized_messages))
+        )
         openai_messages: List[Dict[str, object]] = [
             {"role": "system", "content": system_prompt}
-        ] + sanitize_tool_history(list(normalized_messages))
+        ] + sanitized_messages
 
         logger.debug(
             "[openai_client] Preparing request",
@@ -716,7 +720,9 @@ async def call(
     ) -> ProviderResponse:
         openai_tools = await build_openai_tool_schemas(tools)
         response_tools = convert_chat_function_tools_to_responses_tools(openai_tools)
-        sanitized_messages = sanitize_tool_history(list(normalized_messages))
+        sanitized_messages = sanitize_openai_tool_history(
+            sanitize_tool_history(list(normalized_messages))
+        )
         response_input = build_input_from_normalized_messages(
             cast(List[Dict[str, Any]], sanitized_messages),
             assistant_text_type="output_text",
diff --git a/ripperdoc/core/providers/openai_oauth_codex.py b/ripperdoc/core/providers/openai_oauth_codex.py
@@ -21,6 +21,7 @@
     ProgressCallback,
     ProviderResponse,
     call_with_timeout_and_retries,
+    sanitize_openai_tool_history,
     sanitize_tool_history,
 )
 from ripperdoc.core.providers.error_mapping import (
@@ -147,7 +148,9 @@ async def call_oauth_codex(
         )
 
     openai_tools = await build_openai_tool_schemas(tools)
-    sanitized_messages = sanitize_tool_history(list(normalized_messages))
+    sanitized_messages = sanitize_openai_tool_history(
+        sanitize_tool_history(list(normalized_messages))
+    )
     response_input = _build_codex_responses_input(
         cast(List[Dict[str, Any]], sanitized_messages),
         assistant_text_type="output_text",
diff --git a/ripperdoc/utils/mcp.py b/ripperdoc/utils/mcp.py
@@ -127,9 +127,9 @@ def _coerce_sdk_schema(value: Any) -> dict[str, Any]:
 
     if hasattr(value, "model_json_schema") and callable(value.model_json_schema):
         try:
-            schema = value.model_json_schema()
-            if isinstance(schema, dict):
-                return schema
+            json_schema = value.model_json_schema()
+            if isinstance(json_schema, dict):
+                return json_schema
         except (TypeError, ValueError, AttributeError):
             pass
 
@@ -153,10 +153,10 @@ def _coerce_sdk_schema(value: Any) -> dict[str, Any]:
             key_str = str(key)
             properties[key_str] = _coerce_sdk_schema(item)
             required.append(key_str)
-        schema: dict[str, Any] = {"type": "object", "properties": properties}
+        schema_dict: dict[str, Any] = {"type": "object", "properties": properties}
         if required:
-            schema["required"] = required
-        return schema
+            schema_dict["required"] = required
+        return schema_dict
 
     if isinstance(value, (list, tuple)):
         items = _coerce_sdk_schema(value[0]) if len(value) == 1 else {}
diff --git a/tests/test_messages.py b/tests/test_messages.py
@@ -88,7 +88,7 @@
     is_hidden_meta_message,
     normalize_messages_for_api,
 )
-from ripperdoc.core.providers.base import sanitize_tool_history
+from ripperdoc.core.providers.base import sanitize_openai_tool_history, sanitize_tool_history
 
 
 def test_create_user_message():
@@ -763,6 +763,117 @@ def test_sanitize_tool_history_replays_real_session_parallel_git_tool_calls():
     ]
 
 
+def test_sanitize_openai_tool_history_drops_unpaired_assistant_tool_calls():
+    normalized = [
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "Read", "arguments": '{"path":"README.md"}'},
+                }
+            ],
+        }
+    ]
+
+    sanitized = sanitize_openai_tool_history(normalized)
+
+    assert sanitized == []
+
+
+def test_sanitize_openai_tool_history_drops_orphan_tool_messages():
+    normalized = [
+        {"role": "user", "content": "hello"},
+        {"role": "tool", "tool_call_id": "orphan_call", "content": "result"},
+    ]
+
+    sanitized = sanitize_openai_tool_history(normalized)
+
+    assert sanitized == [{"role": "user", "content": "hello"}]
+
+
+def test_sanitize_openai_tool_history_keeps_only_paired_assistant_tool_calls():
+    normalized = [
+        {
+            "role": "assistant",
+            "content": "running tools",
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "Read", "arguments": '{"path":"README.md"}'},
+                },
+                {
+                    "id": "call_2",
+                    "type": "function",
+                    "function": {"name": "Glob", "arguments": '{"pattern":"*.py"}'},
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_2", "content": "matched files"},
+    ]
+
+    sanitized = sanitize_openai_tool_history(normalized)
+
+    assert len(sanitized) == 2
+    assert sanitized[0]["role"] == "assistant"
+    assert [call["id"] for call in sanitized[0]["tool_calls"]] == ["call_2"]
+    assert sanitized[1] == {"role": "tool", "tool_call_id": "call_2", "content": "matched files"}
+
+
+def test_sanitize_openai_tool_history_preserves_valid_tool_chain():
+    normalized = [
+        {"role": "user", "content": "read the file"},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "Read", "arguments": '{"path":"README.md"}'},
+                }
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_1", "content": "file contents"},
+        {"role": "assistant", "content": "done"},
+    ]
+
+    sanitized = sanitize_openai_tool_history(normalized)
+
+    assert sanitized == normalized
+
+
+def test_sanitize_openai_tool_history_reorders_intervening_messages_after_tool_results():
+    normalized = [
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "Read", "arguments": '{"path":"README.md"}'},
+                }
+            ],
+        },
+        {"role": "user", "content": "PreToolUse:Read hook additional context"},
+        {"role": "tool", "tool_call_id": "call_1", "content": "file contents"},
+        {"role": "assistant", "content": "done"},
+    ]
+
+    sanitized = sanitize_openai_tool_history(normalized)
+
+    assert sanitized == [
+        normalized[0],
+        normalized[2],
+        normalized[1],
+        normalized[3],
+    ]
+
+
 def test_normalize_messages_with_reasoning_metadata():
     """Ensure reasoning metadata is preserved for OpenAI-style messages."""
     assistant = create_assistant_message(