From c1f963be49416b18048542cd57545055935e3c68 Mon Sep 17 00:00:00 2001 From: Chisanan232 Date: Sun, 21 Jun 2026 22:53:38 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20(adapters):=20Fix=20OpenAI=20Age?= =?UTF-8?q?nts=20adapter=20to=20govern=20current=20framework=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OpenAI Agents adapter targeted a stale framework API and was a silent no-op, so agents on the OpenAI Agents SDK ran ungoverned (fail-open). It patched `openai.agents` + `FunctionTool.__call__`, but the shipped framework is the top-level `agents` package whose tools run via a per-instance `on_invoke_tool(ctx, json)` coroutine — there is no `__call__`, so the class-level patch never applied. Fix: - Detect and patch the `agents` package (not `openai.agents`). - Wrap `FunctionTool.__init__` / `Handoff.__init__` so every instance's `on_invoke_tool` / `on_invoke_handoff` coroutine is wrapped with the pre-execution governance check, deny/pending handling, result recording, and topology-edge emission. Deny returns the framework's documented string error, blocking the call without aborting the run. - Key `is_available()` on the real `agents` module. - Add `openai-agents` as a dev/test dependency so the importorskip integration test drives a REAL tool call in CI. Tests now drive the per-instance `on_invoke_tool` contract (and the real framework via importorskip) and fail if the patch reverts to a no-op, asserting genuine governance rather than just that apply() ran. Closes AAASM-3528 Co-Authored-By: Claude Opus 4.8 (1M context) --- .../adapters/openai_agents/adapter.py | 10 +- .../adapters/openai_agents/patch.py | 192 ++++++++++----- pyproject.toml | 7 + test/bench/test_latency_contracts.py | 25 +- test/bench/test_patched_call_overhead.py | 27 +- .../test_direct_functiontool_integration.py | 116 ++++++--- ...test_openai_mcp_coexistence_integration.py | 25 +- .../test_spawn_lineage_integration.py | 27 +- .../openai_agents/test_edge_emission.py | 49 ++-- .../unit/adapters/openai_agents/test_patch.py | 230 ++++++++++++------ .../openai_agents/test_runner_spawn_patch.py | 149 +++++------- test/unit/adapters/test_adapter_lifecycle.py | 20 +- test/unit/adapters/test_registry.py | 8 + uv.lock | 25 +- 14 files changed, 582 insertions(+), 328 deletions(-) diff --git a/agent_assembly/adapters/openai_agents/adapter.py b/agent_assembly/adapters/openai_agents/adapter.py index 05a59df8..f0ea5778 100644 --- a/agent_assembly/adapters/openai_agents/adapter.py +++ b/agent_assembly/adapters/openai_agents/adapter.py @@ -30,9 +30,15 @@ def get_supported_versions(self) -> list[str]: return [">=1.0.0"] def is_available(self) -> bool: - """Check specifically for openai.agents module, not just openai base.""" + """Check for the OpenAI Agents framework's top-level ``agents`` package. + + WHY ``agents`` and not ``openai.agents`` (AAASM-3528): the shipped + ``openai-agents`` distribution exposes a top-level ``agents`` package; + ``openai.agents`` does not exist. Detecting the wrong module made the + adapter report unavailable (or, when patched, a silent no-op). + """ try: - return importlib.util.find_spec("openai.agents") is not None + return importlib.util.find_spec("agents") is not None except (ModuleNotFoundError, ValueError): return False diff --git a/agent_assembly/adapters/openai_agents/patch.py b/agent_assembly/adapters/openai_agents/patch.py index 26af7984..3f3e5249 100644 --- a/agent_assembly/adapters/openai_agents/patch.py +++ b/agent_assembly/adapters/openai_agents/patch.py @@ -1,4 +1,26 @@ -"""OpenAI Agents patch module.""" +"""OpenAI Agents patch module. + +Installs SDK-layer governance hooks for the OpenAI Agents framework (the +top-level ``agents`` package shipped by ``openai-agents``). + +WHY this targets ``agents`` and ``on_invoke_tool`` (AAASM-3528): +The previous revision patched ``openai.agents`` and ``FunctionTool.__call__``. +Neither exists in the shipped framework: the package is the top-level ``agents`` +module, and a tool is executed by the runner calling the tool instance's +``on_invoke_tool(ctx, input_json)`` coroutine — there is no ``__call__``. Because +that class-level attribute does not exist, the old patch silently never applied, +so every OpenAI-Agents tool call ran ungoverned (fail-open). This module patches +the real interception point. + +Interception strategy: ``FunctionTool.on_invoke_tool`` is a *per-instance* +callable field, not a class method, so it cannot be patched once on the class. +Instead we wrap the dataclass ``__init__`` so that every ``FunctionTool`` +constructed after ``apply()`` has its ``on_invoke_tool`` coroutine wrapped with +the governance pre-execution check, deny/pending handling, and result recording. +``Handoff.on_invoke_handoff`` is wrapped the same way for spawn-context tracking +and topology edge emission. ``Runner.run`` is a real classmethod and is wrapped +directly. +""" from __future__ import annotations @@ -17,17 +39,20 @@ ) from agent_assembly.core.spawn import _SPAWN_CTX, SpawnContext, spawn_context_scope -_ORIGINAL_FUNCTION_TOOL_CALL = "_agent_assembly_original_openai_agents_function_tool_call" +_ORIGINAL_FUNCTION_TOOL_INIT = "_agent_assembly_original_openai_agents_function_tool_init" _PATCHED_FLAG = "_agent_assembly_openai_agents_function_tool_patched" +_WRAPPED_INVOKE_FLAG = "_agent_assembly_openai_agents_on_invoke_wrapped" _ORIGINAL_RUNNER_RUN = "_agent_assembly_original_openai_agents_runner_run" _RUNNER_PATCHED_FLAG = "_agent_assembly_openai_agents_runner_patched" -_ORIGINAL_HANDOFF_CALL = "_agent_assembly_original_openai_agents_handoff_call" +_ORIGINAL_HANDOFF_INIT = "_agent_assembly_original_openai_agents_handoff_init" _HANDOFF_PATCHED_FLAG = "_agent_assembly_openai_agents_handoff_patched" _PROCESS_AGENT_ID: str | None = None _EDGE_EMITTER: Any = None _MAX_AUDIT_RESULT_CHARS = 2000 _MAX_DELEGATION_REASON_CHARS = 256 -_OPENAI_AGENTS_MODULE = "openai.agents" +# The shipped framework is the top-level ``agents`` package (openai-agents), +# NOT ``openai.agents`` (which does not exist). See AAASM-3528. +_OPENAI_AGENTS_MODULE = "agents" def set_edge_emitter(emitter: Any) -> None: @@ -51,25 +76,25 @@ def apply(self) -> bool: function_tool_cls = _load_openai_agents_function_tool_class() if function_tool_cls is None: return False - _apply_function_tool_call_patch(function_tool_cls, self.callback_handler) + _apply_function_tool_patch(function_tool_cls, self.callback_handler) runner_cls = _load_openai_agents_runner_class() if runner_cls is not None: _apply_runner_run_patch(runner_cls, self.process_agent_id) handoff_cls = _load_openai_agents_handoff_class() if handoff_cls is not None: - _apply_handoff_call_patch(handoff_cls, self.process_agent_id) + _apply_handoff_patch(handoff_cls, self.process_agent_id) return True def revert(self) -> None: handoff_cls = _load_openai_agents_handoff_class() if handoff_cls is not None: - _revert_handoff_call_patch(handoff_cls) + _revert_handoff_patch(handoff_cls) runner_cls = _load_openai_agents_runner_class() if runner_cls is not None: _revert_runner_run_patch(runner_cls) function_tool_cls = _load_openai_agents_function_tool_class() if function_tool_cls is not None: - _revert_function_tool_call_patch(function_tool_cls) + _revert_function_tool_patch(function_tool_cls) set_process_agent_id(None) return None @@ -131,50 +156,71 @@ def _extract_handoff_delegation_reason(handoff_obj: Any) -> str: return "handoff" -def _apply_handoff_call_patch(handoff_cls: type[Any], process_agent_id: str | None) -> None: +def _apply_handoff_patch(handoff_cls: type[Any], process_agent_id: str | None) -> None: + """Wrap ``Handoff.__init__`` so each instance's ``on_invoke_handoff`` runs inside + a spawn-context scope and emits a ``delegates_to`` topology edge. + + WHY ``__init__`` and not a class method: like ``FunctionTool``, a handoff is + invoked through its per-instance ``on_invoke_handoff`` coroutine; there is no + ``Handoff.__call__`` to patch on the class. + """ if getattr(handoff_cls, _HANDOFF_PATCHED_FLAG, False): return None - if not callable(handoff_cls): + original_init = handoff_cls.__init__ + + @wraps(original_init) + def patched_init(self: Any, *args: Any, **kwargs: Any) -> None: + original_init(self, *args, **kwargs) + _wrap_on_invoke_handoff(self, process_agent_id) + + setattr(handoff_cls, _ORIGINAL_HANDOFF_INIT, original_init) + handoff_cls.__init__ = patched_init + setattr(handoff_cls, _HANDOFF_PATCHED_FLAG, True) + return None + + +def _wrap_on_invoke_handoff(handoff_obj: Any, process_agent_id: str | None) -> None: + original_invoke = getattr(handoff_obj, "on_invoke_handoff", None) + if not callable(original_invoke): + return None + if getattr(original_invoke, _WRAPPED_INVOKE_FLAG, False): return None - original_call = handoff_cls.__call__ - @wraps(original_call) - async def patched_call(self: Any, *args: Any, **kwargs: Any) -> Any: + @wraps(original_invoke) + async def wrapped_invoke(ctx: Any, input_json: Any) -> Any: spawn_ctx = SpawnContext( parent_agent_id=process_agent_id or "", depth=_current_spawn_depth(), spawned_by_tool=None, - delegation_reason=_extract_handoff_delegation_reason(self), + delegation_reason=_extract_handoff_delegation_reason(handoff_obj), ) with spawn_context_scope(spawn_ctx): - result = original_call(self, *args, **kwargs) + result = original_invoke(ctx, input_json) if inspect.isawaitable(result): result = await result - # Emit a DelegatesTo edge from the delegating agent to the handoff target. if _EDGE_EMITTER is not None and process_agent_id: - target_id = getattr(self, "agent_name", None) or getattr(self, "name", None) or "unknown" + target_id = getattr(handoff_obj, "agent_name", None) or getattr(handoff_obj, "name", None) or "unknown" emit = getattr(_EDGE_EMITTER, "emit", None) if callable(emit): - reason = _extract_handoff_delegation_reason(self) + reason = _extract_handoff_delegation_reason(handoff_obj) emit(process_agent_id, str(target_id), "delegates_to", {"reason": reason}) return result - setattr(handoff_cls, _ORIGINAL_HANDOFF_CALL, original_call) - handoff_cls.__call__ = patched_call - setattr(handoff_cls, _HANDOFF_PATCHED_FLAG, True) + setattr(wrapped_invoke, _WRAPPED_INVOKE_FLAG, True) + handoff_obj.on_invoke_handoff = wrapped_invoke return None -def _revert_handoff_call_patch(handoff_cls: type[Any]) -> None: +def _revert_handoff_patch(handoff_cls: type[Any]) -> None: if not getattr(handoff_cls, _HANDOFF_PATCHED_FLAG, False): return None - original_call = getattr(handoff_cls, _ORIGINAL_HANDOFF_CALL, None) - if callable(original_call): - handoff_cls.__call__ = original_call - for attr in (_ORIGINAL_HANDOFF_CALL, _HANDOFF_PATCHED_FLAG): + original_init = getattr(handoff_cls, _ORIGINAL_HANDOFF_INIT, None) + if callable(original_init): + handoff_cls.__init__ = original_init + for attr in (_ORIGINAL_HANDOFF_INIT, _HANDOFF_PATCHED_FLAG): if hasattr(handoff_cls, attr): delattr(handoff_cls, attr) return None @@ -250,6 +296,11 @@ def _resolve_agent_id(ctx: Any) -> str | None: candidate = getattr(ctx, "agent_id", None) if isinstance(candidate, str) and candidate: return candidate + agent = getattr(ctx, "agent", None) + if agent is not None: + name = getattr(agent, "name", None) + if isinstance(name, str) and name: + return name return _get_process_agent_id() @@ -324,31 +375,23 @@ async def _wait_for_async_tool_approval( return result -def _build_tool_result_error( +def _build_tool_deny_error( *, tool_name: str, reason: str | None, is_pending_rejection: bool, -) -> object: - try: - module = importlib.import_module(_OPENAI_AGENTS_MODULE) - except ImportError: - module = None - - tool_result_cls = getattr(module, "ToolResult", None) if module is not None else None +) -> str: + """Build the error string returned to the model when a tool call is denied. + + WHY a string: the framework's ``on_invoke_tool`` contract states that returning + a string error message sends the error back to the LLM (instead of raising, + which fails the whole run). Denying by returning a governance error string + blocks execution without aborting the agent. + """ reason_text = reason or "No reason provided." if is_pending_rejection: - error_message = f"Approval denied for tool '{tool_name}': {reason_text}" - else: - error_message = f"Action blocked by governance policy for tool '{tool_name}': {reason_text}" - - if isinstance(tool_result_cls, type): - try: - return tool_result_cls(error=error_message) - except Exception: - pass - - return {"error": error_message} + return f"Approval denied for tool '{tool_name}': {reason_text}" + return f"Action blocked by governance policy for tool '{tool_name}': {reason_text}" def _truncate_result_for_audit(result: object) -> str: @@ -396,20 +439,43 @@ def _is_governance_error(error: Exception) -> bool: return True -def _apply_function_tool_call_patch(function_tool_cls: type[Any], callback_handler: Any) -> None: +def _apply_function_tool_patch(function_tool_cls: type[Any], callback_handler: Any) -> None: + """Patch ``FunctionTool.__init__`` so every tool instance is governed. + + Each ``FunctionTool`` carries its execution logic in the per-instance + ``on_invoke_tool`` coroutine that the runner calls. We wrap that coroutine at + construction time with the governance pre-execution check; this is the actual + interception point in the shipped framework (AAASM-3528). + """ if getattr(function_tool_cls, _PATCHED_FLAG, False): return None - if not callable(function_tool_cls): + original_init = function_tool_cls.__init__ + + @wraps(original_init) + def patched_init(self: Any, *args: Any, **kwargs: Any) -> None: + original_init(self, *args, **kwargs) + _wrap_on_invoke_tool(self, callback_handler) + + setattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_INIT, original_init) + function_tool_cls.__init__ = patched_init + setattr(function_tool_cls, _PATCHED_FLAG, True) + return None + + +def _wrap_on_invoke_tool(tool_obj: Any, callback_handler: Any) -> None: + """Wrap a single tool instance's ``on_invoke_tool`` coroutine with governance.""" + original_invoke = getattr(tool_obj, "on_invoke_tool", None) + if not callable(original_invoke): + return None + if getattr(original_invoke, _WRAPPED_INVOKE_FLAG, False): return None - original_call = function_tool_cls.__call__ - @wraps(original_call) - async def patched_call(self: Any, ctx: Any, tool_input: Any, *args: Any, **kwargs: Any) -> Any: - tool_name = str(getattr(self, "name", self.__class__.__name__)) + @wraps(original_invoke) + async def governed_invoke(ctx: Any, tool_input: Any) -> Any: + tool_name = str(getattr(tool_obj, "name", tool_obj.__class__.__name__)) agent_id = _resolve_agent_id(ctx) - decision: object = {"status": "allow"} governance_failed = False try: decision = await _invoke_async_tool_check( @@ -435,7 +501,7 @@ async def patched_call(self: Any, ctx: Any, tool_input: Any, *args: Any, **kwarg status, reason = _normalize_decision(final_decision) if status == "deny": - blocked_result = _build_tool_result_error( + blocked_result = _build_tool_deny_error( tool_name=tool_name, reason=reason, is_pending_rejection=is_pending_flow, @@ -454,7 +520,7 @@ async def patched_call(self: Any, ctx: Any, tool_input: Any, *args: Any, **kwarg if not governance_failed: raise - result = original_call(self, ctx, tool_input, *args, **kwargs) + result = original_invoke(ctx, tool_input) if inspect.isawaitable(result): result = await result @@ -469,20 +535,20 @@ async def patched_call(self: Any, ctx: Any, tool_input: Any, *args: Any, **kwarg ) return result - setattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_CALL, original_call) - function_tool_cls.__call__ = patched_call - setattr(function_tool_cls, _PATCHED_FLAG, True) + setattr(governed_invoke, _WRAPPED_INVOKE_FLAG, True) + tool_obj.on_invoke_tool = governed_invoke + return None -def _revert_function_tool_call_patch(function_tool_cls: type[Any]) -> None: +def _revert_function_tool_patch(function_tool_cls: type[Any]) -> None: if not getattr(function_tool_cls, _PATCHED_FLAG, False): return None - original_call = getattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_CALL, None) - if callable(original_call): - function_tool_cls.__call__ = original_call + original_init = getattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_INIT, None) + if callable(original_init): + function_tool_cls.__init__ = original_init - if hasattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_CALL): - delattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_CALL) + if hasattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_INIT): + delattr(function_tool_cls, _ORIGINAL_FUNCTION_TOOL_INIT) if hasattr(function_tool_cls, _PATCHED_FLAG): delattr(function_tool_cls, _PATCHED_FLAG) diff --git a/pyproject.toml b/pyproject.toml index 299870c1..c6f7f997 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,13 @@ test = [ # installs `dev`), so the function-tool governance regression is actually # exercised instead of skipped. "pydantic-ai>=0.3.0", + # AAASM-3528: dev/test-only (NOT a runtime dependency). The shipped OpenAI + # Agents framework is the top-level `agents` package (NOT `openai.agents`), + # and a tool runs via its per-instance `on_invoke_tool` coroutine (NOT a + # `FunctionTool.__call__`). Installing it lets the `importorskip`-guarded + # integration test drive a real tool call, so a regression to the old + # fail-open no-op patch is actually caught instead of silently skipped. + "openai-agents>=0.1.0", ] pre-commit-ci = [ "pre-commit>=3.5.0,<5", diff --git a/test/bench/test_latency_contracts.py b/test/bench/test_latency_contracts.py index f97b9b7b..66cdae26 100644 --- a/test/bench/test_latency_contracts.py +++ b/test/bench/test_latency_contracts.py @@ -35,8 +35,9 @@ _revert_client_session_patch, ) from agent_assembly.adapters.openai_agents.patch import ( - _apply_function_tool_call_patch, - _revert_function_tool_call_patch, + _apply_function_tool_patch, + _revert_function_tool_patch, + _wrap_on_invoke_tool, ) from agent_assembly.adapters.pydantic_ai.patch import ( _apply_tool_run_patch, @@ -88,10 +89,15 @@ async def _run(self, ctx: Any, args: Any, **kwargs: Any) -> str: class _FakeOpenAIFunctionTool: - name = "bench_tool" + """Shaped like ``agents.FunctionTool``: per-instance ``on_invoke_tool``.""" - async def __call__(self, ctx: Any, input_str: str) -> str: - return "result" + def __init__(self) -> None: + self.name = "bench_tool" + + async def _invoke(ctx: Any, input_str: str) -> str: + return "result" + + self.on_invoke_tool = _invoke class _FakeMCPClientSession: @@ -216,17 +222,18 @@ async def measure() -> list[int]: def test_openai_agents_per_call_latency_under_2ms() -> None: - """Fail if OpenAI Agents patched FunctionTool.__call__() P99 exceeds 2ms.""" + """Fail if OpenAI Agents governed FunctionTool.on_invoke_tool() P99 exceeds 2ms.""" interceptor = _NoopInterceptor() - _apply_function_tool_call_patch(_FakeOpenAIFunctionTool, interceptor) + _apply_function_tool_patch(_FakeOpenAIFunctionTool, interceptor) tool = _FakeOpenAIFunctionTool() + _wrap_on_invoke_tool(tool, interceptor) ctx = type("FakeCtx", (), {"agent_id": None})() async def measure() -> list[int]: samples: list[int] = [] for _ in range(_ITERATIONS): start = time.perf_counter_ns() - await tool(ctx, "benchmark input") + await tool.on_invoke_tool(ctx, "benchmark input") elapsed = time.perf_counter_ns() - start samples.append(elapsed) return samples @@ -234,7 +241,7 @@ async def measure() -> list[int]: try: samples = asyncio.run(measure()) finally: - _revert_function_tool_call_patch(_FakeOpenAIFunctionTool) + _revert_function_tool_patch(_FakeOpenAIFunctionTool) p50, p95, p99 = _percentiles(samples) assert p99 < MAX_PER_CALL_NS, ( diff --git a/test/bench/test_patched_call_overhead.py b/test/bench/test_patched_call_overhead.py index 812ba4bf..a3f1b1c8 100644 --- a/test/bench/test_patched_call_overhead.py +++ b/test/bench/test_patched_call_overhead.py @@ -33,8 +33,9 @@ _revert_client_session_patch, ) from agent_assembly.adapters.openai_agents.patch import ( - _apply_function_tool_call_patch, - _revert_function_tool_call_patch, + _apply_function_tool_patch, + _revert_function_tool_patch, + _wrap_on_invoke_tool, ) from agent_assembly.adapters.pydantic_ai.patch import ( _apply_tool_run_patch, @@ -75,10 +76,15 @@ async def _run(self, ctx: Any, args: Any, **kwargs: Any) -> str: class _BenchOpenAIFunctionTool: - name = "bench_tool" + """Shaped like ``agents.FunctionTool``: per-instance ``on_invoke_tool``.""" - async def __call__(self, ctx: Any, input_str: str) -> str: - return "result" + def __init__(self) -> None: + self.name = "bench_tool" + + async def _invoke(ctx: Any, input_str: str) -> str: + return "result" + + self.on_invoke_tool = _invoke class _BenchMCPClientSession: @@ -180,19 +186,22 @@ def test_openai_agents_patched_call_overhead( noop_interceptor: Any, bench_event_loop: asyncio.AbstractEventLoop, ) -> None: - """Benchmark per-call overhead of governance-patched FunctionTool.__call__().""" - _apply_function_tool_call_patch(_BenchOpenAIFunctionTool, noop_interceptor) + """Benchmark per-call overhead of governance-wrapped FunctionTool.on_invoke_tool().""" + _apply_function_tool_patch(_BenchOpenAIFunctionTool, noop_interceptor) tool = _BenchOpenAIFunctionTool() + # The class patch wraps on_invoke_tool at construction; for instances built + # before apply() (or stub classes) wrap directly to benchmark the hot path. + _wrap_on_invoke_tool(tool, noop_interceptor) ctx = type("FakeCtx", (), {"agent_id": None})() try: def call() -> None: - bench_event_loop.run_until_complete(tool(ctx, "bench input")) + bench_event_loop.run_until_complete(tool.on_invoke_tool(ctx, "bench input")) benchmark(call) finally: - _revert_function_tool_call_patch(_BenchOpenAIFunctionTool) + _revert_function_tool_patch(_BenchOpenAIFunctionTool) @pytest.mark.benchmark(group="patched-call") diff --git a/test/integration/openai_agents/test_direct_functiontool_integration.py b/test/integration/openai_agents/test_direct_functiontool_integration.py index 33396392..27e9a9bb 100644 --- a/test/integration/openai_agents/test_direct_functiontool_integration.py +++ b/test/integration/openai_agents/test_direct_functiontool_integration.py @@ -1,6 +1,15 @@ +"""Integration test against the real ``agents`` (openai-agents) framework. + +WHY this uses the real framework (AAASM-3528): the bug was that the adapter +patched a non-existent ``openai.agents.FunctionTool.__call__``, so it silently +never intercepted anything (fail-open). This test installs the patch and then +drives a genuine ``agents.function_tool`` exactly as the framework's runner does +— ``tool.on_invoke_tool(ctx, args_json)`` — asserting the call is governed. If +the patch reverts to a no-op, the denied tool would execute and this test fails. +""" + from __future__ import annotations -from types import SimpleNamespace from typing import Any import pytest @@ -8,32 +17,25 @@ from agent_assembly.adapters.openai_agents import patch as openai_patch +def _make_tool_context(tool_name: str) -> Any: + """Build the real ``ToolContext`` the framework passes to ``on_invoke_tool``.""" + from agents.tool_context import ToolContext + + return ToolContext( + context=None, + tool_name=tool_name, + tool_call_id=f"call-{tool_name}", + tool_arguments="{}", + ) + + @pytest.mark.asyncio @pytest.mark.integration -async def test_direct_openai_agents_functiontool_blocks_then_allows_followup( - monkeypatch: pytest.MonkeyPatch, -) -> None: - class FakeToolResult: - def __init__(self, *, error: str | None = None, output: Any = None) -> None: - self.error = error - self.output = output - - class FakeFunctionTool: - def __init__(self, name: str) -> None: - self.name = name - - async def __call__(self, ctx: Any, tool_input: Any) -> dict[str, Any]: - return {"name": self.name, "ctx": ctx, "tool_input": tool_input} - - fake_openai_agents_module = SimpleNamespace( - FunctionTool=FakeFunctionTool, - ToolResult=FakeToolResult, - ) - monkeypatch.setattr( - openai_patch.importlib, - "import_module", - lambda name: fake_openai_agents_module if name == "openai.agents" else (_ for _ in ()).throw(ImportError(name)), - ) +async def test_real_openai_agents_functiontool_is_governed() -> None: + pytest.importorskip("agents") + from agents import function_tool + + recorded: list[str] = [] class Interceptor: async def check_tool_start(self, **kwargs: object) -> dict[str, str]: @@ -41,13 +43,65 @@ async def check_tool_start(self, **kwargs: object) -> dict[str, str]: return {"status": "deny", "reason": "blocked by policy"} return {"status": "allow"} + async def record_result(self, **kwargs: object) -> None: + recorded.append(str(kwargs.get("tool_name"))) + patcher = openai_patch.OpenAIAgentsPatch(callback_handler=Interceptor(), process_agent_id="agent-oa") + try: + assert patcher.apply() is True + + # Tools constructed AFTER apply() get their real on_invoke_tool wrapped. + @function_tool(name_override="blocked_tool") + def blocked_tool(value: str) -> str: + """A real tool whose body must NOT run when denied.""" + return f"executed:{value}" + + @function_tool(name_override="safe_tool") + def safe_tool(value: str) -> str: + """A real tool that is allowed and must run.""" + return "real-output" + + ctx_blocked = _make_tool_context("blocked_tool") + ctx_safe = _make_tool_context("safe_tool") + + blocked_result = await blocked_tool.on_invoke_tool(ctx_blocked, '{"value": "x"}') + safe_result = await safe_tool.on_invoke_tool(ctx_safe, '{"value": "x"}') + finally: + patcher.revert() + + # Denied: governance error string returned, tool body NOT executed. + assert isinstance(blocked_result, str) + assert "blocked by policy" in blocked_result + assert "executed:" not in blocked_result + + # Allowed: real tool output returned and result recorded. + assert safe_result == "real-output" + assert "safe_tool" in recorded + + +@pytest.mark.asyncio +@pytest.mark.integration +async def test_real_functiontool_runs_ungoverned_after_revert() -> None: + """Negative control: after revert, a deny decision no longer blocks — proving + the governed state above is a real (non-no-op) interception.""" + pytest.importorskip("agents") + from agents import function_tool + + class DenyAll: + async def check_tool_start(self, **kwargs: object) -> dict[str, str]: + del kwargs + return {"status": "deny", "reason": "should not apply after revert"} + + patcher = openai_patch.OpenAIAgentsPatch(callback_handler=DenyAll(), process_agent_id="agent-oa") assert patcher.apply() is True + patcher.revert() + + @function_tool + def ungoverned(value: str) -> str: + """Constructed after revert — must run normally.""" + return f"executed:{value}" - blocked = await FakeFunctionTool("blocked_tool")(SimpleNamespace(agent_id="agent-oa"), {"step": 1}) - safe = await FakeFunctionTool("safe_tool")(SimpleNamespace(agent_id="agent-oa"), {"step": 2}) + ctx = _make_tool_context("ungoverned") + result = await ungoverned.on_invoke_tool(ctx, '{"value": "y"}') - assert isinstance(blocked, FakeToolResult) - assert isinstance(blocked.error, str) - assert "blocked by policy" in blocked.error - assert safe["name"] == "safe_tool" + assert result == "executed:y" diff --git a/test/integration/openai_agents/test_openai_mcp_coexistence_integration.py b/test/integration/openai_agents/test_openai_mcp_coexistence_integration.py index f3eea92b..9564955d 100644 --- a/test/integration/openai_agents/test_openai_mcp_coexistence_integration.py +++ b/test/integration/openai_agents/test_openai_mcp_coexistence_integration.py @@ -22,27 +22,23 @@ async def call_tool(self, name: str, arguments: dict[str, Any] | None = None) -> payload = arguments or {} return f"mcp:{name}:{payload.get('q', '')}" - class FakeToolResult: - def __init__(self, *, error: str | None = None, output: Any = None) -> None: - self.error = error - self.output = output - class FakeFunctionTool: + """Shaped like ``agents.FunctionTool``: per-instance ``on_invoke_tool``.""" + def __init__(self, name: str) -> None: self.name = name - async def __call__(self, ctx: Any, tool_input: Any) -> dict[str, Any]: - mcp_result = await FakeMCPClientSession().call_tool("mcp_tool", {"q": "hello"}) - return {"name": self.name, "ctx": ctx, "tool_input": tool_input, "mcp_result": mcp_result} + async def _invoke(ctx: Any, tool_input: Any) -> dict[str, Any]: + mcp_result = await FakeMCPClientSession().call_tool("mcp_tool", {"q": "hello"}) + return {"name": self.name, "ctx": ctx, "tool_input": tool_input, "mcp_result": mcp_result} - fake_openai_agents_module = SimpleNamespace( - FunctionTool=FakeFunctionTool, - ToolResult=FakeToolResult, - ) + self.on_invoke_tool = _invoke + + fake_openai_agents_module = SimpleNamespace(FunctionTool=FakeFunctionTool) fake_mcp_module = SimpleNamespace(ClientSession=FakeMCPClientSession) def fake_import_module(name: str) -> object: - if name == "openai.agents": + if name == "agents": return fake_openai_agents_module if name == "mcp": return fake_mcp_module @@ -78,7 +74,8 @@ async def record_result(self, **kwargs: object) -> None: is True ) - result = await FakeFunctionTool("openai_tool")(SimpleNamespace(agent_id="agent-openai"), {"step": "run"}) + tool = FakeFunctionTool("openai_tool") + result = await tool.on_invoke_tool(SimpleNamespace(agent_id="agent-openai"), {"step": "run"}) assert result["name"] == "openai_tool" assert result["mcp_result"] == "mcp:mcp_tool:hello" diff --git a/test/integration/test_spawn_lineage_integration.py b/test/integration/test_spawn_lineage_integration.py index 6d84f2c8..33cd398a 100644 --- a/test/integration/test_spawn_lineage_integration.py +++ b/test/integration/test_spawn_lineage_integration.py @@ -194,23 +194,28 @@ def test_exception_in_scope_still_resets_ctx() -> None: async def test_chained_handoff_abc_depth_and_parent_agent_id() -> None: """Chained A→B→C handoffs: depth increments 1→2→3; spawned_by_tool is None.""" from agent_assembly.adapters.openai_agents.patch import ( - _apply_handoff_call_patch, - _revert_handoff_call_patch, + _apply_handoff_patch, + _revert_handoff_patch, ) captured: list[SpawnContext] = [] class FakeHandoff: + """Shaped like ``agents.Handoff``: per-instance ``on_invoke_handoff``.""" + def __init__(self, tool_description: str = "") -> None: self.tool_description = tool_description + self.agent_name = "downstream" - async def __call__(self, *_args: object, **_kwargs: object) -> str: - sc = _SPAWN_CTX.get() - if sc is not None: - captured.append(sc) - return "result" + async def _invoke(ctx: object, input_json: object) -> str: + sc = _SPAWN_CTX.get() + if sc is not None: + captured.append(sc) + return "result" + + self.on_invoke_handoff = _invoke - _apply_handoff_call_patch(FakeHandoff, "process-agent") + _apply_handoff_patch(FakeHandoff, "process-agent") try: # Simulate Runner.run establishing depth=1 context for Agent A ctx_a = SpawnContext( @@ -221,16 +226,16 @@ async def __call__(self, *_args: object, **_kwargs: object) -> str: with spawn_context_scope(ctx_a): # Agent A hands off to B — patch creates depth=2 spawn context handoff_b = FakeHandoff(tool_description="Transfer to agent B") - await handoff_b() + await handoff_b.on_invoke_handoff(None, "{}") # Simulate Runner establishing depth=2 context for Agent B ctx_b = SpawnContext(parent_agent_id="process-agent", depth=2, spawned_by_tool=None) with spawn_context_scope(ctx_b): # Agent B hands off to C — patch creates depth=3 spawn context handoff_c = FakeHandoff(tool_description="Transfer to agent C") - await handoff_c() + await handoff_c.on_invoke_handoff(None, "{}") finally: - _revert_handoff_call_patch(FakeHandoff) + _revert_handoff_patch(FakeHandoff) assert len(captured) == 2 diff --git a/test/unit/adapters/openai_agents/test_edge_emission.py b/test/unit/adapters/openai_agents/test_edge_emission.py index 5ddf1911..454549df 100644 --- a/test/unit/adapters/openai_agents/test_edge_emission.py +++ b/test/unit/adapters/openai_agents/test_edge_emission.py @@ -3,18 +3,20 @@ from __future__ import annotations from typing import Any +from unittest.mock import MagicMock import pytest from agent_assembly.adapters.openai_agents.patch import ( - _apply_handoff_call_patch, - _revert_handoff_call_patch, + _apply_handoff_patch, + _revert_handoff_patch, + _wrap_on_invoke_handoff, set_edge_emitter, set_process_agent_id, ) _HANDOFF_PATCHED_FLAG = "_agent_assembly_openai_agents_handoff_patched" -_ORIGINAL_HANDOFF_CALL = "_agent_assembly_original_openai_agents_handoff_call" +_ORIGINAL_HANDOFF_INIT = "_agent_assembly_original_openai_agents_handoff_init" class RecordingEdgeEmitter: @@ -28,28 +30,30 @@ def emit(self, source: str, target: str, edge_type: str, metadata: dict[str, Any class FakeHandoff: - """Minimal Handoff stand-in.""" + """Stand-in for ``agents.Handoff`` — delegation via ``on_invoke_handoff``.""" def __init__(self, agent_name: str = "agent_b", tool_description: str = "Transfer to B") -> None: self.agent_name = agent_name self.tool_description = tool_description - async def __call__(self, *_args: Any, **_kwargs: Any) -> str: - return "handoff-result" + async def _invoke(ctx: Any, input_json: Any) -> str: + return "handoff-result" + + self.on_invoke_handoff = _invoke class TestHandoffEdgeEmission: def setup_method(self) -> None: set_process_agent_id("agent-a") - for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_CALL): + for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_INIT): if hasattr(FakeHandoff, attr): delattr(FakeHandoff, attr) def teardown_method(self) -> None: - _revert_handoff_call_patch(FakeHandoff) + _revert_handoff_patch(FakeHandoff) set_process_agent_id(None) set_edge_emitter(None) - for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_CALL): + for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_INIT): if hasattr(FakeHandoff, attr): delattr(FakeHandoff, attr) @@ -57,10 +61,10 @@ def teardown_method(self) -> None: async def test_delegates_to_edge_emitted_on_handoff(self) -> None: emitter = RecordingEdgeEmitter() set_edge_emitter(emitter) - _apply_handoff_call_patch(FakeHandoff, "agent-a") + _apply_handoff_patch(FakeHandoff, "agent-a") h = FakeHandoff(agent_name="agent_b", tool_description="Transfer to B") - await h() + await h.on_invoke_handoff(MagicMock(), "{}") assert len(emitter.edges) == 1 src, tgt, etype, meta = emitter.edges[0] @@ -73,19 +77,19 @@ async def test_delegates_to_edge_emitted_on_handoff(self) -> None: @pytest.mark.asyncio async def test_no_edge_emitted_when_emitter_is_none(self) -> None: set_edge_emitter(None) - _apply_handoff_call_patch(FakeHandoff, "agent-a") + _apply_handoff_patch(FakeHandoff, "agent-a") h = FakeHandoff(agent_name="agent_b") - await h() # Should not raise + await h.on_invoke_handoff(MagicMock(), "{}") # Should not raise @pytest.mark.asyncio async def test_no_edge_emitted_when_process_agent_id_is_none(self) -> None: emitter = RecordingEdgeEmitter() set_edge_emitter(emitter) - _apply_handoff_call_patch(FakeHandoff, None) + _apply_handoff_patch(FakeHandoff, None) h = FakeHandoff(agent_name="agent_b") - await h() + await h.on_invoke_handoff(MagicMock(), "{}") assert emitter.edges == [] @@ -97,15 +101,18 @@ async def test_target_falls_back_to_name_attribute(self) -> None: class NameOnlyHandoff: """Handoff with no agent_name but with name.""" - name = "fallback_agent" - agent_name: None = None + def __init__(self) -> None: + self.name = "fallback_agent" + self.agent_name = None + + async def _invoke(ctx: Any, input_json: Any) -> str: + return "handoff-result" - async def __call__(self, *_args: Any, **_kwargs: Any) -> str: - return "handoff-result" + self.on_invoke_handoff = _invoke - _apply_handoff_call_patch(NameOnlyHandoff, "agent-a") h = NameOnlyHandoff() - await h() + _wrap_on_invoke_handoff(h, "agent-a") + await h.on_invoke_handoff(MagicMock(), "{}") assert len(emitter.edges) == 1 assert emitter.edges[0][1] == "fallback_agent" diff --git a/test/unit/adapters/openai_agents/test_patch.py b/test/unit/adapters/openai_agents/test_patch.py index 151bfec1..bc7bb100 100644 --- a/test/unit/adapters/openai_agents/test_patch.py +++ b/test/unit/adapters/openai_agents/test_patch.py @@ -1,3 +1,12 @@ +"""Unit tests for the OpenAI Agents FunctionTool governance patch. + +These tests target the *real* framework contract (AAASM-3528): the shipped +``agents`` package, whose tools execute via a per-instance ``on_invoke_tool`` +coroutine. They use a ``FakeFunctionTool`` shaped like the real dataclass (a +plain ``on_invoke_tool`` instance attribute, no ``__call__``) so that a no-op +patch would FAIL — i.e. these assert governance actually fires. +""" + from __future__ import annotations from types import SimpleNamespace @@ -8,73 +17,93 @@ from agent_assembly.adapters.openai_agents import patch as openai_patch -def _install_fake_openai_agents_module(monkeypatch: pytest.MonkeyPatch) -> tuple[type[Any], type[Any]]: - class FakeToolResult: - def __init__(self, *, error: str | None = None, output: Any = None) -> None: - self.error = error - self.output = output +class FakeFunctionTool: + """Stand-in for ``agents.FunctionTool``. + + Mirrors the real dataclass: tool logic lives in the per-instance + ``on_invoke_tool`` coroutine assigned in ``__init__``; there is NO + ``__call__``. Patching the class without wrapping ``on_invoke_tool`` is a + no-op against this shape. + """ + + def __init__(self, name: str = "fake_openai_tool") -> None: + self.name = name + + async def _invoke(ctx: Any, input_json: Any) -> dict[str, Any]: + return {"ctx": ctx, "tool_input": input_json, "name": self.name} - class FakeFunctionTool: - def __init__(self, name: str = "fake_openai_tool") -> None: - self.name = name + self.on_invoke_tool = _invoke - async def __call__(self, ctx: Any, tool_input: Any) -> dict[str, Any]: - return {"ctx": ctx, "tool_input": tool_input, "name": self.name} - fake_module = SimpleNamespace(FunctionTool=FakeFunctionTool, ToolResult=FakeToolResult) +@pytest.fixture(autouse=True) +def _reset_function_tool_patch_state() -> Any: + """Ensure the module-level ``FakeFunctionTool`` class starts and ends each test + unpatched, since the patch flag lives on the class and would otherwise leak + one test's callback_handler into the next.""" + openai_patch._revert_function_tool_patch(FakeFunctionTool) + openai_patch.set_process_agent_id(None) + yield + openai_patch._revert_function_tool_patch(FakeFunctionTool) + openai_patch.set_process_agent_id(None) + + +def _install_fake_openai_agents_module(monkeypatch: pytest.MonkeyPatch) -> type[FakeFunctionTool]: + fake_module = SimpleNamespace(FunctionTool=FakeFunctionTool) def fake_import_module(module_name: str) -> object: - if module_name == "openai.agents": + if module_name == "agents": return fake_module raise ImportError(module_name) monkeypatch.setattr(openai_patch.importlib, "import_module", fake_import_module) - monkeypatch.setattr(openai_patch.importlib.util, "find_spec", lambda package: object()) - return FakeFunctionTool, FakeToolResult + monkeypatch.setattr(openai_patch.importlib.util, "find_spec", lambda _package: object()) + return FakeFunctionTool -def test_apply_patches_functiontool_call_and_is_idempotent( +def test_apply_patches_function_tool_init_and_is_idempotent( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) patcher = openai_patch.OpenAIAgentsPatch(callback_handler=object(), process_agent_id="agent-1") - original_call = function_tool_cls.__call__ + original_init = function_tool_cls.__init__ assert patcher.apply() is True - patched_call = function_tool_cls.__call__ - assert patched_call is not original_call + patched_init = function_tool_cls.__init__ + assert patched_init is not original_init assert openai_patch._get_process_agent_id() == "agent-1" assert patcher.apply() is True - assert function_tool_cls.__call__ is patched_call + assert function_tool_cls.__init__ is patched_init + patcher.revert() -def test_revert_restores_original_functiontool_call_and_clears_process_agent_id( + +def test_revert_restores_original_init_and_clears_process_agent_id( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) - original_call = function_tool_cls.__call__ + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) + original_init = function_tool_cls.__init__ patcher = openai_patch.OpenAIAgentsPatch(callback_handler=object()) assert patcher.apply() is True - assert function_tool_cls.__call__ is not original_call + assert function_tool_cls.__init__ is not original_init patcher.revert() - assert function_tool_cls.__call__ is original_call + assert function_tool_cls.__init__ is original_init assert getattr(function_tool_cls, openai_patch._PATCHED_FLAG, False) is False assert openai_patch._get_process_agent_id() is None -def test_loader_edge_cases_and_apply_false_when_functiontool_missing( +def test_loader_edge_cases_and_apply_false_when_function_tool_missing( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setattr( openai_patch.importlib, "import_module", - lambda _: (_ for _ in ()).throw(ImportError("openai.agents")), + lambda _: (_ for _ in ()).throw(ImportError("agents")), ) - monkeypatch.setattr(openai_patch.importlib.util, "find_spec", lambda package: None) + monkeypatch.setattr(openai_patch.importlib.util, "find_spec", lambda _package: None) assert openai_patch._is_openai_agents_available() is False assert openai_patch._load_openai_agents_function_tool_class() is None @@ -84,18 +113,18 @@ def test_loader_edge_cases_and_apply_false_when_functiontool_missing( monkeypatch.setattr( openai_patch.importlib, "import_module", - lambda name: fake_module if name == "openai.agents" else (_ for _ in ()).throw(ImportError(name)), + lambda name: fake_module if name == "agents" else (_ for _ in ()).throw(ImportError(name)), ) - monkeypatch.setattr(openai_patch.importlib.util, "find_spec", lambda package: object()) + monkeypatch.setattr(openai_patch.importlib.util, "find_spec", lambda _package: object()) assert openai_patch._is_openai_agents_available() is True assert openai_patch._load_openai_agents_function_tool_class() is None @pytest.mark.asyncio -async def test_deny_returns_toolresult_error_without_raising( +async def test_deny_returns_governance_error_without_raising( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, tool_result_cls = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) class Interceptor: async def check_tool_start(self, **kwargs: object) -> dict[str, str]: @@ -107,18 +136,43 @@ async def check_tool_start(self, **kwargs: object) -> dict[str, str]: tool = function_tool_cls(name="blocked_tool") ctx = SimpleNamespace(agent_id="agent-deny") - result = await tool(ctx, {"topic": "finance"}) + result = await tool.on_invoke_tool(ctx, '{"topic": "finance"}') + + # A no-op patch would return the tool's real output dict — assert it was + # blocked instead, proving the patch governs the call. + assert isinstance(result, str) + assert "blocked by policy" in result + + +@pytest.mark.asyncio +async def test_revert_makes_governance_a_noop( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """After revert, a denied tool runs normally — this is the negative control + that proves the apply()-state actually intercepts.""" + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) + + class DenyAll: + async def check_tool_start(self, **kwargs: object) -> dict[str, str]: + del kwargs + return {"status": "deny", "reason": "nope"} + + patcher = openai_patch.OpenAIAgentsPatch(callback_handler=DenyAll()) + assert patcher.apply() is True + patcher.revert() + + tool = function_tool_cls(name="ungoverned_tool") + result = await tool.on_invoke_tool(SimpleNamespace(agent_id="a"), "{}") - assert isinstance(result, tool_result_cls) - assert isinstance(result.error, str) - assert "blocked by policy" in result.error + assert isinstance(result, dict) + assert result["name"] == "ungoverned_tool" @pytest.mark.asyncio async def test_pending_then_approved_executes_tool_and_records_result( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) wait_calls: list[dict[str, object]] = [] recorded_results: list[dict[str, object]] = [] @@ -141,7 +195,7 @@ async def record_result(self, **kwargs: object) -> None: tool = function_tool_cls(name="approved_tool") ctx = SimpleNamespace(agent_id="agent-allow") - result = await tool(ctx, {"q": "hello"}) + result = await tool.on_invoke_tool(ctx, '{"q": "hello"}') assert result["name"] == "approved_tool" assert len(wait_calls) == 1 @@ -152,10 +206,10 @@ async def record_result(self, **kwargs: object) -> None: @pytest.mark.asyncio -async def test_pending_then_denied_returns_toolresult_error( +async def test_pending_then_denied_returns_governance_error( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, tool_result_cls = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) class Interceptor: async def check_tool_start(self, **kwargs: object) -> dict[str, str]: @@ -171,18 +225,17 @@ async def wait_for_tool_approval(self, **kwargs: object) -> dict[str, str]: tool = function_tool_cls(name="rejected_tool") ctx = SimpleNamespace(agent_id="agent-pending-deny") - result = await tool(ctx, {"q": "secret"}) + result = await tool.on_invoke_tool(ctx, '{"q": "secret"}') - assert isinstance(result, tool_result_cls) - assert isinstance(result.error, str) - assert "approval rejected" in result.error + assert isinstance(result, str) + assert "approval rejected" in result @pytest.mark.asyncio async def test_extracts_agent_id_from_ctx_and_falls_back_to_process_context( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) seen_agent_ids: list[str | None] = [] class Interceptor: @@ -194,8 +247,8 @@ async def check_tool_start(self, **kwargs: object) -> dict[str, str]: assert patcher.apply() is True tool = function_tool_cls(name="agent_id_tool") - await tool(SimpleNamespace(agent_id="ctx-agent"), {"step": 1}) - await tool(SimpleNamespace(), {"step": 2}) + await tool.on_invoke_tool(SimpleNamespace(agent_id="ctx-agent"), '{"step": 1}') + await tool.on_invoke_tool(SimpleNamespace(), '{"step": 2}') assert seen_agent_ids == ["ctx-agent", "process-agent"] @@ -204,7 +257,7 @@ async def check_tool_start(self, **kwargs: object) -> dict[str, str]: async def test_records_tool_result_after_successful_execution( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) observed_results: list[str] = [] class Interceptor: @@ -219,7 +272,7 @@ async def record_result(self, **kwargs: object) -> None: assert patcher.apply() is True tool = function_tool_cls(name="record_tool") - result = await tool(SimpleNamespace(agent_id="agent-rec"), {"x": 1}) + result = await tool.on_invoke_tool(SimpleNamespace(agent_id="agent-rec"), '{"x": 1}') assert result["name"] == "record_tool" assert len(observed_results) == 1 @@ -230,7 +283,7 @@ async def record_result(self, **kwargs: object) -> None: async def test_gateway_error_uses_fail_open_and_executes_original_tool( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) class Interceptor: async def check_tool_start(self, **kwargs: object) -> dict[str, str]: @@ -241,7 +294,7 @@ async def check_tool_start(self, **kwargs: object) -> dict[str, str]: assert patcher.apply() is True tool = function_tool_cls(name="fail_open_tool") - result = await tool(SimpleNamespace(agent_id="agent-fail-open"), {"x": 2}) + result = await tool.on_invoke_tool(SimpleNamespace(agent_id="agent-fail-open"), '{"x": 2}') assert result["name"] == "fail_open_tool" @@ -250,21 +303,21 @@ async def check_tool_start(self, **kwargs: object) -> dict[str, str]: async def test_non_governance_error_is_reraised( monkeypatch: pytest.MonkeyPatch, ) -> None: - function_tool_cls, _ = _install_fake_openai_agents_module(monkeypatch) + function_tool_cls = _install_fake_openai_agents_module(monkeypatch) class Interceptor: async def check_tool_start(self, **kwargs: object) -> dict[str, str]: del kwargs raise ValueError("unexpected") - monkeypatch.setattr(openai_patch, "_is_governance_error", lambda error: False) + monkeypatch.setattr(openai_patch, "_is_governance_error", lambda _error: False) patcher = openai_patch.OpenAIAgentsPatch(callback_handler=Interceptor()) assert patcher.apply() is True tool = function_tool_cls(name="strict_tool") with pytest.raises(ValueError, match="unexpected"): - await tool(SimpleNamespace(agent_id="agent-strict"), {"x": 3}) + await tool.on_invoke_tool(SimpleNamespace(agent_id="agent-strict"), '{"x": 3}') @pytest.mark.asyncio @@ -349,42 +402,63 @@ def on_tool_end(self, **kwargs: object) -> None: ) assert observed_tool_end - class FailingToolResult: - def __init__(self, **kwargs: object) -> None: - del kwargs - raise RuntimeError("cannot build") - - fake_module = SimpleNamespace(FunctionTool=object(), ToolResult=FailingToolResult) - monkeypatch.setattr( - openai_patch.importlib, - "import_module", - lambda name: fake_module if name == "openai.agents" else (_ for _ in ()).throw(ImportError(name)), - ) - # _build_tool_result_error fallback dict branch - fallback_result = openai_patch._build_tool_result_error( +def test_build_tool_deny_error_returns_string_for_both_flows() -> None: + policy_msg = openai_patch._build_tool_deny_error( tool_name="x", reason="nope", is_pending_rejection=False, ) - assert isinstance(fallback_result, dict) - assert "error" in fallback_result + assert isinstance(policy_msg, str) + assert "governance policy" in policy_msg + assert "nope" in policy_msg + + approval_msg = openai_patch._build_tool_deny_error( + tool_name="x", + reason=None, + is_pending_rejection=True, + ) + assert isinstance(approval_msg, str) + assert "Approval denied" in approval_msg def test_apply_and_revert_helpers_cover_non_callable_and_unpatched_branches() -> None: - class NoCallable: - __call__ = None + class NoInvoke: + def __init__(self) -> None: + self.on_invoke_tool = None + self.name = "no_invoke" class NotPatched: - async def __call__(self, ctx: Any, tool_input: Any) -> None: - del ctx, tool_input - return None + def __init__(self) -> None: + async def _invoke(ctx: Any, input_json: Any) -> None: + del ctx, input_json + return None + + self.on_invoke_tool = _invoke + + # _wrap_on_invoke_tool early return when on_invoke_tool is not callable + openai_patch._wrap_on_invoke_tool(NoInvoke(), callback_handler=object()) + + # _revert_function_tool_patch early return when patch flag absent + openai_patch._revert_function_tool_patch(NotPatched) + + +def test_wrap_on_invoke_tool_is_idempotent_per_instance() -> None: + class Tool: + def __init__(self) -> None: + self.name = "t" + + async def _invoke(ctx: Any, input_json: Any) -> str: + return "ok" - # _apply_function_tool_call_patch early return when __call__ is not callable - openai_patch._apply_function_tool_call_patch(NoCallable, callback_handler=object()) + self.on_invoke_tool = _invoke - # _revert_function_tool_call_patch early return when patch flag absent - openai_patch._revert_function_tool_call_patch(NotPatched) + tool = Tool() + openai_patch._wrap_on_invoke_tool(tool, callback_handler=object()) + first_wrap = tool.on_invoke_tool + # Re-wrapping an already-wrapped callable must be a no-op. + openai_patch._wrap_on_invoke_tool(tool, callback_handler=object()) + assert tool.on_invoke_tool is first_wrap @pytest.mark.asyncio diff --git a/test/unit/adapters/openai_agents/test_runner_spawn_patch.py b/test/unit/adapters/openai_agents/test_runner_spawn_patch.py index a569921a..0e9c0b59 100644 --- a/test/unit/adapters/openai_agents/test_runner_spawn_patch.py +++ b/test/unit/adapters/openai_agents/test_runner_spawn_patch.py @@ -1,16 +1,17 @@ from __future__ import annotations +from typing import Any from unittest.mock import MagicMock, patch import pytest from agent_assembly.adapters.openai_agents.patch import ( - _apply_handoff_call_patch, + _apply_handoff_patch, _apply_runner_run_patch, _extract_handoff_delegation_reason, _load_openai_agents_handoff_class, _load_openai_agents_runner_class, - _revert_handoff_call_patch, + _revert_handoff_patch, _revert_runner_run_patch, set_process_agent_id, ) @@ -19,7 +20,7 @@ _RUNNER_PATCHED_FLAG = "_agent_assembly_openai_agents_runner_patched" _ORIGINAL_RUNNER_RUN = "_agent_assembly_original_openai_agents_runner_run" _HANDOFF_PATCHED_FLAG = "_agent_assembly_openai_agents_handoff_patched" -_ORIGINAL_HANDOFF_CALL = "_agent_assembly_original_openai_agents_handoff_call" +_ORIGINAL_HANDOFF_INIT = "_agent_assembly_original_openai_agents_handoff_init" class TestLoadRunnerClass: @@ -156,26 +157,33 @@ def test_truncates_to_256_chars(self) -> None: class FakeHandoff: - """Minimal Handoff stand-in for patching tests.""" + """Stand-in for ``agents.Handoff``. - def __init__(self, tool_description: str = "") -> None: + Like the real dataclass, delegation runs through a per-instance + ``on_invoke_handoff`` coroutine; there is NO ``__call__``. + """ + + def __init__(self, tool_description: str = "", agent_name: str = "agent_b") -> None: self.tool_description = tool_description + self.agent_name = agent_name + + async def _invoke(ctx: Any, input_json: Any) -> str: + return "handoff-result" - async def __call__(self, *_args: object, **_kwargs: object) -> str: - return "handoff-result" + self.on_invoke_handoff = _invoke -class TestApplyHandoffCallPatch: +class TestApplyHandoffPatch: def setup_method(self) -> None: set_process_agent_id("process-agent-001") - for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_CALL): + for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_INIT): if hasattr(FakeHandoff, attr): delattr(FakeHandoff, attr) def teardown_method(self) -> None: - _revert_handoff_call_patch(FakeHandoff) + _revert_handoff_patch(FakeHandoff) set_process_agent_id(None) - for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_CALL): + for attr in (_HANDOFF_PATCHED_FLAG, _ORIGINAL_HANDOFF_INIT): if hasattr(FakeHandoff, attr): delattr(FakeHandoff, attr) @@ -183,116 +191,83 @@ def teardown_method(self) -> None: async def test_patched_handoff_sets_spawn_ctx(self) -> None: captured: list[SpawnContext | None] = [] - class CapturingHandoff(FakeHandoff): - async def __call__(self, *_args: object, **_kwargs: object) -> str: - captured.append(_SPAWN_CTX.get()) - return "done" + _apply_handoff_patch(FakeHandoff, "process-agent-001") + h = FakeHandoff(tool_description="transfer to B") + + async def capturing_invoke(ctx: Any, input_json: Any) -> str: + captured.append(_SPAWN_CTX.get()) + return "done" - _apply_handoff_call_patch(CapturingHandoff, "process-agent-001") - h = CapturingHandoff(tool_description="transfer to B") - await h() + # Replace the underlying invoke, then re-wrap to capture spawn ctx. + h.on_invoke_handoff = capturing_invoke + from agent_assembly.adapters.openai_agents.patch import _wrap_on_invoke_handoff + + _wrap_on_invoke_handoff(h, "process-agent-001") + await h.on_invoke_handoff(MagicMock(), "{}") assert len(captured) == 1 sc = captured[0] assert sc is not None assert sc.parent_agent_id == "process-agent-001" assert sc.depth == 1 - - @pytest.mark.asyncio - async def test_spawned_by_tool_is_none_for_handoff(self) -> None: - captured: list[SpawnContext | None] = [] - - class CapturingHandoff(FakeHandoff): - async def __call__(self, *_args: object, **_kwargs: object) -> str: - captured.append(_SPAWN_CTX.get()) - return "done" - - _apply_handoff_call_patch(CapturingHandoff, "process-agent-001") - await CapturingHandoff()() - - assert captured[0] is not None - assert captured[0].spawned_by_tool is None - - @pytest.mark.asyncio - async def test_delegation_reason_from_tool_description(self) -> None: - captured: list[SpawnContext | None] = [] - - class CapturingHandoff(FakeHandoff): - async def __call__(self, *_args: object, **_kwargs: object) -> str: - captured.append(_SPAWN_CTX.get()) - return "done" - - _apply_handoff_call_patch(CapturingHandoff, "process-agent-001") - h = CapturingHandoff(tool_description="Transfer to agent B") - await h() - - assert captured[0] is not None - assert captured[0].delegation_reason == "Transfer to agent B" - - @pytest.mark.asyncio - async def test_delegation_reason_fallback_when_no_description(self) -> None: - captured: list[SpawnContext | None] = [] - - class CapturingHandoff(FakeHandoff): - def __init__(self) -> None: - pass # no tool_description - - async def __call__(self, *_args: object, **_kwargs: object) -> str: - captured.append(_SPAWN_CTX.get()) - return "done" - - _apply_handoff_call_patch(CapturingHandoff, "process-agent-001") - await CapturingHandoff()() - - assert captured[0] is not None - assert captured[0].delegation_reason == "handoff" + assert sc.spawned_by_tool is None + assert sc.delegation_reason == "transfer to B" @pytest.mark.asyncio async def test_spawn_ctx_reset_after_handoff(self) -> None: - _apply_handoff_call_patch(FakeHandoff, "process-agent-001") + _apply_handoff_patch(FakeHandoff, "process-agent-001") h = FakeHandoff() - await h() + await h.on_invoke_handoff(MagicMock(), "{}") assert _SPAWN_CTX.get() is None @pytest.mark.asyncio async def test_spawn_ctx_reset_on_exception(self) -> None: - class FailingHandoff(FakeHandoff): - async def __call__(self, *_args: object, **_kwargs: object) -> str: - raise RuntimeError("handoff failed") + _apply_handoff_patch(FakeHandoff, "process-agent-001") + h = FakeHandoff() + + async def failing_invoke(ctx: Any, input_json: Any) -> str: + raise RuntimeError("handoff failed") - _apply_handoff_call_patch(FailingHandoff, "process-agent-001") + h.on_invoke_handoff = failing_invoke + from agent_assembly.adapters.openai_agents.patch import _wrap_on_invoke_handoff + + _wrap_on_invoke_handoff(h, "process-agent-001") with pytest.raises(RuntimeError, match="handoff failed"): - await FailingHandoff()() + await h.on_invoke_handoff(MagicMock(), "{}") assert _SPAWN_CTX.get() is None def test_idempotent_apply(self) -> None: - _apply_handoff_call_patch(FakeHandoff, "process-agent-001") - original = getattr(FakeHandoff, _ORIGINAL_HANDOFF_CALL, None) - _apply_handoff_call_patch(FakeHandoff, "process-agent-001") - assert getattr(FakeHandoff, _ORIGINAL_HANDOFF_CALL, None) is original + _apply_handoff_patch(FakeHandoff, "process-agent-001") + original = getattr(FakeHandoff, _ORIGINAL_HANDOFF_INIT, None) + _apply_handoff_patch(FakeHandoff, "process-agent-001") + assert getattr(FakeHandoff, _ORIGINAL_HANDOFF_INIT, None) is original def test_revert_restores_original(self) -> None: - _apply_handoff_call_patch(FakeHandoff, "process-agent-001") - _revert_handoff_call_patch(FakeHandoff) + _apply_handoff_patch(FakeHandoff, "process-agent-001") + _revert_handoff_patch(FakeHandoff) assert not hasattr(FakeHandoff, _HANDOFF_PATCHED_FLAG) - assert not hasattr(FakeHandoff, _ORIGINAL_HANDOFF_CALL) + assert not hasattr(FakeHandoff, _ORIGINAL_HANDOFF_INIT) @pytest.mark.asyncio async def test_depth_increments_when_inside_existing_spawn_ctx(self) -> None: + from agent_assembly.adapters.openai_agents.patch import _wrap_on_invoke_handoff from agent_assembly.core.spawn import spawn_context_scope captured: list[SpawnContext | None] = [] - class CapturingHandoff(FakeHandoff): - async def __call__(self, *_args: object, **_kwargs: object) -> str: - captured.append(_SPAWN_CTX.get()) - return "done" + _apply_handoff_patch(FakeHandoff, "process-agent-001") + h = FakeHandoff(tool_description="transfer") + + async def capturing_invoke(ctx: Any, input_json: Any) -> str: + captured.append(_SPAWN_CTX.get()) + return "done" - _apply_handoff_call_patch(CapturingHandoff, "process-agent-001") + h.on_invoke_handoff = capturing_invoke + _wrap_on_invoke_handoff(h, "process-agent-001") outer = SpawnContext(parent_agent_id="root", depth=2) with spawn_context_scope(outer): - await CapturingHandoff()() + await h.on_invoke_handoff(MagicMock(), "{}") assert captured[0] is not None assert captured[0].depth == 3 # outer.depth + 1 diff --git a/test/unit/adapters/test_adapter_lifecycle.py b/test/unit/adapters/test_adapter_lifecycle.py index e2d2bb83..e885a07c 100644 --- a/test/unit/adapters/test_adapter_lifecycle.py +++ b/test/unit/adapters/test_adapter_lifecycle.py @@ -122,11 +122,27 @@ def test_openai_agents_adapter_metadata_and_lifecycle(monkeypatch: pytest.Monkey assert adapter._patch is None -def test_openai_agents_is_available_false_when_module_absent() -> None: - # openai.agents is not installed in the test env. +def test_openai_agents_is_available_false_when_module_absent( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # AAASM-3528: availability is keyed on the shipped top-level ``agents`` + # package (NOT ``openai.agents``). The framework is a dev/test dependency, + # so simulate its absence by stubbing find_spec to None. + monkeypatch.setattr( + "agent_assembly.adapters.openai_agents.adapter.importlib.util.find_spec", + lambda _name: None, + ) assert OpenAIAgentsAdapter().is_available() is False +def test_openai_agents_is_available_true_when_agents_installed() -> None: + # AAASM-3528 regression guard: with the real ``agents`` package installed, + # the adapter must report available (the old ``openai.agents`` probe wrongly + # reported unavailable, making the patch a silent no-op). + pytest.importorskip("agents") + assert OpenAIAgentsAdapter().is_available() is True + + def test_openai_agents_is_available_false_when_find_spec_raises( monkeypatch: pytest.MonkeyPatch, ) -> None: diff --git a/test/unit/adapters/test_registry.py b/test/unit/adapters/test_registry.py index dc3edf46..72a23ac1 100644 --- a/test/unit/adapters/test_registry.py +++ b/test/unit/adapters/test_registry.py @@ -235,6 +235,14 @@ def fake_import_module(module_name: str) -> object: raise ImportError monkeypatch.setattr("agent_assembly.adapters.base.importlib.import_module", fake_import_module) + # AAASM-3528: the OpenAI adapter overrides is_available() with a find_spec + # probe of the real top-level ``agents`` package (a dev/test dependency), so + # it bypasses the base import mock above. Stub it absent to keep this test + # isolated to the entry-point adapter under test. + monkeypatch.setattr( + "agent_assembly.adapters.openai_agents.adapter.importlib.util.find_spec", + lambda _name: None, + ) first = registry.auto_detect() second = registry.auto_detect() diff --git a/uv.lock b/uv.lock index ccb40c9f..324218b9 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12, <4.0" [[package]] @@ -31,6 +31,7 @@ dev = [ { name = "coverage" }, { name = "grpcio-tools" }, { name = "mypy" }, + { name = "openai-agents" }, { name = "pydantic-ai" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -61,6 +62,7 @@ pre-commit-ci = [ { name = "ruff" }, ] test = [ + { name = "openai-agents" }, { name = "pydantic-ai" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -85,6 +87,7 @@ dev = [ { name = "coverage", specifier = "~=7.10" }, { name = "grpcio-tools", specifier = ">=1.66,<2" }, { name = "mypy", specifier = ">=1.11,<3" }, + { name = "openai-agents", specifier = ">=0.1.0" }, { name = "pydantic-ai", specifier = ">=0.3.0" }, { name = "pytest", specifier = ">=8.1.1,<10" }, { name = "pytest-asyncio", specifier = ">=0.23.0,<2" }, @@ -115,6 +118,7 @@ pre-commit-ci = [ { name = "ruff", specifier = ">=0.1.0" }, ] test = [ + { name = "openai-agents", specifier = ">=0.1.0" }, { name = "pydantic-ai", specifier = ">=0.3.0" }, { name = "pytest", specifier = ">=8.1.1,<10" }, { name = "pytest-asyncio", specifier = ">=0.23.0,<2" }, @@ -2219,6 +2223,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/74/925d7b3892927e9804aaf58d374a45dc28e4420ff90e992272b77286343e/openai-2.41.1-py3-none-any.whl", hash = "sha256:a939565f350cb7443cb843b801b88c716ac8024b492fb94ca269d5f6b1bbefd6", size = 1353380, upload-time = "2026-06-10T16:10:35.756Z" }, ] +[[package]] +name = "openai-agents" +version = "0.17.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "griffelib" }, + { name = "mcp" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "types-requests" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/90/724f368d1f0656dc750a0b6d3ba06ad88efb293be8d43b08e9d32956252b/openai_agents-0.17.6.tar.gz", hash = "sha256:fed94f8cf0eb4c57c63a89ad4b107932d75f2a0b6d9e895c88fa3c217e63c822", size = 5426870, upload-time = "2026-06-19T06:04:11.635Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/39/c415599b5145a05d34089a1c9c9ec71dc5474027bbc63982cd6a9a242a0a/openai_agents-0.17.6-py3-none-any.whl", hash = "sha256:9f6bba61ac3ba2ecedd0a3ed1dd1c72d667b7637b884ecd1c2800ff6576a7ac8", size = 850769, upload-time = "2026-06-19T06:04:09.102Z" }, +] + [[package]] name = "openapi-pydantic" version = "0.5.1"