braintrustdata · Abhijeet Prasad (AbhiPrasad) · Apr 4, 2026
diff --git a/py/noxfile.py b/py/noxfile.py
@@ -83,6 +83,7 @@ def _pinned_python_version():
 # Test matrix
 ANTHROPIC_VERSIONS = (LATEST, "0.50.0", "0.49.0", "0.48.0")
 OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92")
+OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19")
 # litellm latest requires Python >= 3.10
 # Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512
 LITELLM_VERSIONS = ("1.82.0", "1.74.0")
@@ -240,10 +241,21 @@ def test_langchain(session, version):
 def test_openai(session, version):
     _install_test_deps(session)
     _install(session, "openai", version)
-    # openai-agents requires Python >= 3.10
-    _install(session, "openai-agents")
-    _run_tests(session, f"{WRAPPER_DIR}/test_openai.py")
-    _run_tests(session, f"{WRAPPER_DIR}/test_openai_openrouter_gateway.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_oai_attachments.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_openrouter_gateway.py")
+    _run_core_tests(session)
+
+
+@nox.session()
+@nox.parametrize("version", OPENAI_AGENTS_VERSIONS, ids=OPENAI_AGENTS_VERSIONS)
+def test_openai_agents(session, version):
+    if sys.version_info < (3, 10):
+        session.skip("openai-agents requires Python >= 3.10")
+    _install_test_deps(session)
+    _install(session, "openai")
+    _install(session, "openai-agents", version)
+    _run_tests(session, f"{INTEGRATION_DIR}/openai_agents/test_openai_agents.py")
     _run_core_tests(session)
 
 
@@ -254,7 +266,7 @@ def test_openai_http2_streaming(session):
     # h2 is isolated to this session because it's only needed to force the
     # HTTP/2 LegacyAPIResponse streaming path used by the regression test.
     session.install("h2")
-    _run_tests(session, f"{WRAPPER_DIR}/test_openai_http2.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_http2.py")
 
 
 @nox.session()

diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py
@@ -76,6 +76,9 @@ def is_equal(expected, output):
 from .integrations.litellm import (
     wrap_litellm,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
+from .integrations.openai import (
+    wrap_openai,  # noqa: F401 # type: ignore[reportUnusedImport]
+)
 from .integrations.openrouter import (
     wrap_openrouter,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
@@ -88,9 +91,6 @@ def is_equal(expected, output):
     _internal_reset_global_state,  # noqa: F401 # type: ignore[reportUnusedImport]
     _internal_with_custom_background_logger,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
-from .oai import (
-    wrap_openai,  # noqa: F401 # type: ignore[reportUnusedImport]
-)
 from .sandbox import (
     RegisteredSandboxFunction,  # noqa: F401 # type: ignore[reportUnusedImport]
     RegisterSandboxResult,  # noqa: F401 # type: ignore[reportUnusedImport]

diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
@@ -17,6 +17,8 @@
     GoogleGenAIIntegration,
     LangChainIntegration,
     LiteLLMIntegration,
+    OpenAIAgentsIntegration,
+    OpenAIIntegration,
     OpenRouterIntegration,
     PydanticAIIntegration,
 )
@@ -52,6 +54,7 @@ def auto_instrument(
     dspy: bool = True,
     adk: bool = True,
     langchain: bool = True,
+    openai_agents: bool = True,
 ) -> dict[str, bool]:
     """
     Auto-instrument supported AI/ML libraries for Braintrust tracing.
@@ -75,6 +78,7 @@ def auto_instrument(
         dspy: Enable DSPy instrumentation (default: True)
         adk: Enable Google ADK instrumentation (default: True)
         langchain: Enable LangChain instrumentation (default: True)
+        openai_agents: Enable OpenAI Agents SDK instrumentation (default: True)
 
     Returns:
         Dict mapping integration name to whether it was successfully instrumented.
@@ -123,7 +127,7 @@ def auto_instrument(
     results = {}
 
     if openai:
-        results["openai"] = _instrument_openai()
+        results["openai"] = _instrument_integration(OpenAIIntegration)
     if anthropic:
         results["anthropic"] = _instrument_integration(AnthropicIntegration)
     if litellm:
@@ -146,18 +150,12 @@ def auto_instrument(
         results["adk"] = _instrument_integration(ADKIntegration)
     if langchain:
         results["langchain"] = _instrument_integration(LangChainIntegration)
+    if openai_agents:
+        results["openai_agents"] = _instrument_integration(OpenAIAgentsIntegration)
 
     return results
 
 
-def _instrument_openai() -> bool:
-    with _try_patch():
-        from braintrust.oai import patch_openai
-
-        return patch_openai()
-    return False
-
-
 def _instrument_integration(integration) -> bool:
     with _try_patch():
         return integration.setup()

diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
@@ -7,6 +7,8 @@
 from .google_genai import GoogleGenAIIntegration
 from .langchain import LangChainIntegration
 from .litellm import LiteLLMIntegration
+from .openai import OpenAIIntegration
+from .openai_agents import OpenAIAgentsIntegration
 from .openrouter import OpenRouterIntegration
 from .pydantic_ai import PydanticAIIntegration
 
@@ -21,6 +23,8 @@
     "GoogleGenAIIntegration",
     "LiteLLMIntegration",
     "LangChainIntegration",
+    "OpenAIIntegration",
+    "OpenAIAgentsIntegration",
     "OpenRouterIntegration",
     "PydanticAIIntegration",
 ]
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py
@@ -14,12 +14,15 @@
 assert not LiteLLMIntegration.patchers[0].is_patched(litellm, None)
 
 # 2. Instrument
-results = auto_instrument()
+# Disable OpenAI auto-instrumentation here because LiteLLM's OpenAI-backed
+# chat path can otherwise produce both a LiteLLM span and an OpenAI span.
+# This test is meant to validate LiteLLM instrumentation in isolation.
+results = auto_instrument(openai=False)
 assert results.get("litellm") == True
 assert LiteLLMIntegration.patchers[0].is_patched(litellm, None)
 
 # 3. Idempotent
-results2 = auto_instrument()
+results2 = auto_instrument(openai=False)
 assert results2.get("litellm") == True
 
 # 4. Make API call and verify span

diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py
@@ -1,24 +1,36 @@
 """Test auto_instrument for OpenAI."""
 
+import inspect
+from pathlib import Path
+
 import openai
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
+from wrapt import FunctionWrapper
+
+
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai" / "cassettes"
+
+
+def _is_braintrust_wrapped() -> bool:
+    attr = inspect.getattr_static(openai.resources.chat.completions.Completions, "create", None)
+    return isinstance(attr, FunctionWrapper)
 
 
 # 1. Verify not patched initially
-assert not getattr(openai, "__braintrust_wrapped__", False)
+assert not _is_braintrust_wrapped()
 
 # 2. Instrument
 results = auto_instrument()
 assert results.get("openai") == True
-assert getattr(openai, "__braintrust_wrapped__", False)
+assert _is_braintrust_wrapped()
 
 # 3. Idempotent
 results2 = auto_instrument()
 assert results2.get("openai") == True
 
 # 4. Make API call and verify span
-with autoinstrument_test_context("test_auto_openai") as memory_logger:
+with autoinstrument_test_context("test_auto_openai", cassettes_dir=_CASSETTES_DIR) as memory_logger:
     client = openai.OpenAI()
     response = client.chat.completions.create(
         model="gpt-4o-mini",

diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py
@@ -0,0 +1,47 @@
+"""Test auto_instrument for the OpenAI Agents SDK."""
+
+import asyncio
+from pathlib import Path
+
+import agents
+from braintrust.auto import auto_instrument
+from braintrust.integrations.openai_agents import BraintrustTracingProcessor
+from braintrust.wrappers.test_utils import autoinstrument_test_context
+
+
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai_agents" / "cassettes"
+TEST_MODEL = "gpt-4o-mini"
+TEST_PROMPT = "What is 2+2? Just the number."
+TEST_AGENT_INSTRUCTIONS = "You are a helpful assistant. Be very concise."
+
+
+def _has_braintrust_processor() -> bool:
+    provider = agents.tracing.get_trace_provider()
+    processors = getattr(getattr(provider, "_multi_processor", None), "_processors", ())
+    return any(isinstance(processor, BraintrustTracingProcessor) for processor in processors)
+
+
+results = auto_instrument()
+assert results.get("openai_agents") == True
+assert _has_braintrust_processor()
+
+results2 = auto_instrument()
+assert results2.get("openai_agents") == True
+assert _has_braintrust_processor()
+
+with autoinstrument_test_context("test_auto_openai_agents", cassettes_dir=_CASSETTES_DIR) as memory_logger:
+    from agents import Agent
+    from agents.run import AgentRunner
+
+    async def run_agent():
+        agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS)
+        return await AgentRunner().run(agent, TEST_PROMPT)
+
+    result = asyncio.run(run_agent())
+    assert result is not None
+    assert hasattr(result, "final_output") or hasattr(result, "output")
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2, f"Expected at least 2 spans, got {len(spans)}"
+
+print("SUCCESS")
diff --git a/py/src/braintrust/integrations/openai/__init__.py b/py/src/braintrust/integrations/openai/__init__.py
@@ -0,0 +1,38 @@
+"""Braintrust integration for the OpenAI Python SDK and OpenAI-compatible gateways."""
+
+from braintrust.logger import NOOP_SPAN, current_span, init_logger
+
+from .integration import OpenAIIntegration
+from .patchers import wrap_openai
+
+
+__all__ = [
+    "OpenAIIntegration",
+    "setup_openai",
+    "wrap_openai",
+]
+
+
+def setup_openai(
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
+) -> bool:
+    """Setup Braintrust integration with OpenAI.
+
+    Patches OpenAI resource classes at the module level so that all clients
+    produce Braintrust tracing spans.
+
+    Args:
+        api_key: Braintrust API key (optional, can use env var BRAINTRUST_API_KEY)
+        project_id: Braintrust project ID (optional)
+        project_name: Braintrust project name (optional, can use env var BRAINTRUST_PROJECT)
+
+    Returns:
+        True if setup was successful, False otherwise
+    """
+    span = current_span()
+    if span == NOOP_SPAN:
+        init_logger(project=project_name, api_key=api_key, project_id=project_id)
+
+    return OpenAIIntegration.setup()
diff --git a/...est_patch_openai_async_creates_spans.yaml → ...Spans.test_setup_async_creates_spans.yaml b/...est_patch_openai_async_creates_spans.yaml → ...Spans.test_setup_async_creates_spans.yaml
diff --git a/...pans.test_patch_openai_creates_spans.yaml → ...nSetupSpans.test_setup_creates_spans.yaml b/...pans.test_patch_openai_creates_spans.yaml → ...nSetupSpans.test_setup_creates_spans.yaml
diff --git a/.../wrappers/cassettes/test_auto_openai.yaml → ...ns/openai/cassettes/test_auto_openai.yaml b/.../wrappers/cassettes/test_auto_openai.yaml → ...ns/openai/cassettes/test_auto_openai.yaml
diff --git a/...st_tracing_processor_concurrency_bug.yaml → ...st_tracing_processor_concurrency_bug.yaml b/...st_tracing_processor_concurrency_bug.yaml → ...st_tracing_processor_concurrency_bug.yaml
diff --git a/...ing_processor_current_span_detection.yaml → ...ing_processor_current_span_detection.yaml b/...ing_processor_current_span_detection.yaml → ...ing_processor_current_span_detection.yaml
diff --git a/.../test_openai_async_parallel_requests.yaml → .../test_openai_async_parallel_requests.yaml b/.../test_openai_async_parallel_requests.yaml → .../test_openai_async_parallel_requests.yaml
diff --git a/...ers/cassettes/test_openai_chat_async.yaml → ...nai/cassettes/test_openai_chat_async.yaml b/...ers/cassettes/test_openai_chat_async.yaml → ...nai/cassettes/test_openai_chat_async.yaml
diff --git a/...st_openai_chat_async_context_manager.yaml → ...st_openai_chat_async_context_manager.yaml b/...st_openai_chat_async_context_manager.yaml → ...st_openai_chat_async_context_manager.yaml
diff --git a/...openai_chat_async_with_system_prompt.yaml → ...openai_chat_async_with_system_prompt.yaml b/...openai_chat_async_with_system_prompt.yaml → ...openai_chat_async_with_system_prompt.yaml
diff --git a/...t_openai_chat_error_in_async_context.yaml → ...t_openai_chat_error_in_async_context.yaml b/...t_openai_chat_error_in_async_context.yaml → ...t_openai_chat_error_in_async_context.yaml
diff --git a/...s/cassettes/test_openai_chat_metrics.yaml → ...i/cassettes/test_openai_chat_metrics.yaml b/...s/cassettes/test_openai_chat_metrics.yaml → ...i/cassettes/test_openai_chat_metrics.yaml
diff --git a/...tes/test_openai_chat_streaming_async.yaml → ...tes/test_openai_chat_streaming_async.yaml b/...tes/test_openai_chat_streaming_async.yaml → ...tes/test_openai_chat_streaming_async.yaml
diff --git a/...p2_context_manager_preserves_wrapper.yaml → ...p2_context_manager_preserves_wrapper.yaml b/...p2_context_manager_preserves_wrapper.yaml → ...p2_context_manager_preserves_wrapper.yaml
diff --git a/...ttes/test_openai_chat_streaming_sync.yaml → ...ttes/test_openai_chat_streaming_sync.yaml b/...ttes/test_openai_chat_streaming_sync.yaml → ...ttes/test_openai_chat_streaming_sync.yaml
diff --git a/...p2_context_manager_preserves_wrapper.yaml → ...p2_context_manager_preserves_wrapper.yaml b/...p2_context_manager_preserves_wrapper.yaml → ...p2_context_manager_preserves_wrapper.yaml
diff --git a/...ync_http2_preserves_stream_interface.yaml → ...ync_http2_preserves_stream_interface.yaml b/...ync_http2_preserves_stream_interface.yaml → ...ync_http2_preserves_stream_interface.yaml
diff --git a/.../test_openai_chat_with_system_prompt.yaml → .../test_openai_chat_with_system_prompt.yaml b/.../test_openai_chat_with_system_prompt.yaml → .../test_openai_chat_with_system_prompt.yaml
diff --git a/.../test_openai_client_async_comparison.yaml → .../test_openai_client_async_comparison.yaml b/.../test_openai_client_async_comparison.yaml → .../test_openai_client_async_comparison.yaml
diff --git a/...ettes/test_openai_client_async_error.yaml → ...ettes/test_openai_client_async_error.yaml b/...ettes/test_openai_client_async_error.yaml → ...ettes/test_openai_client_async_error.yaml
diff --git a/...settes/test_openai_client_comparison.yaml → ...settes/test_openai_client_comparison.yaml b/...settes/test_openai_client_comparison.yaml → ...settes/test_openai_client_comparison.yaml
diff --git a/...s/cassettes/test_openai_client_error.yaml → ...i/cassettes/test_openai_client_error.yaml b/...s/cassettes/test_openai_client_error.yaml → ...i/cassettes/test_openai_client_error.yaml
diff --git a/...ers/cassettes/test_openai_embeddings.yaml → ...nai/cassettes/test_openai_embeddings.yaml b/...ers/cassettes/test_openai_embeddings.yaml → ...nai/cassettes/test_openai_embeddings.yaml
diff --git a/...ssettes/test_openai_embeddings_async.yaml → ...ssettes/test_openai_embeddings_async.yaml b/...ssettes/test_openai_embeddings_async.yaml → ...ssettes/test_openai_embeddings_async.yaml
diff --git a/...rust/integrations/openai/cassettes/test_openai_image_data_url_converts_to_attachment.yaml b/...rust/integrations/openai/cassettes/test_openai_image_data_url_converts_to_attachment.yaml
@@ -0,0 +1,117 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":[{"type":"text","text":"What color
+      is this image?"},{"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}],"model":"gpt-4o-mini"}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '277'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - OpenAI/Python 2.30.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - 'false'
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.30.0
+      X-Stainless-Raw-Response:
+      - 'true'
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DQiFPcXBqdj72osGdNznct7LOdS4U\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1775258987,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The image is red.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        8513,\n    \"completion_tokens\": 5,\n    \"total_tokens\": 8518,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_ebf4e532f9\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e6bdb7b6c73d8d9-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 03 Apr 2026 23:29:47 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '827'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '472'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=nQ.XMiAqbEVeR.9ilHFAV4G4Lr7C1VfG_qcqqLD25pc-1775258986.789194-1.0.1.1-OZs4yBKIgVJRt8TPeU0RwzFFZpzpHe0wallA39buBqR7rJNm4msYOCxouudLSaCVsCrH7FfDoXAGpAJyXrfrbaDIsjojElEvFuvv0L9z1PZpxcj_hIX69nD.oYVrz2X0;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 03 Apr 2026
+        23:59:47 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-input-images:
+      - '50000'
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-input-images:
+      - '49999'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999227'
+      x-ratelimit-reset-input-images:
+      - 1ms
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_52279379da51445eae3f8994d1027669
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/...ttes/test_openai_not_given_filtering.yaml → ...ttes/test_openai_not_given_filtering.yaml b/...ttes/test_openai_not_given_filtering.yaml → ...ttes/test_openai_not_given_filtering.yaml
diff --git a/...ttes/test_openai_parallel_tool_calls.yaml → ...ttes/test_openai_parallel_tool_calls.yaml b/...ttes/test_openai_parallel_tool_calls.yaml → ...ttes/test_openai_parallel_tool_calls.yaml