braintrustdata
diff --git a/‎py/noxfile.py‎
Lines changed: 17 additions & 5 deletions b/‎py/noxfile.py‎
Lines changed: 17 additions & 5 deletions
diff --git a/‎py/src/braintrust/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎py/src/braintrust/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎py/src/braintrust/auto.py‎
Lines changed: 7 additions & 9 deletions b/‎py/src/braintrust/auto.py‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎py/src/braintrust/integrations/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎py/src/braintrust/integrations/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py‎
Lines changed: 5 additions & 2 deletions b/‎py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py‎
Lines changed: 15 additions & 3 deletions b/‎py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py‎
Lines changed: 47 additions & 0 deletions b/‎py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎py/src/braintrust/integrations/openai/__init__.py‎
Lines changed: 38 additions & 0 deletions b/‎py/src/braintrust/integrations/openai/__init__.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎…st_patch_openai_async_creates_spans.yaml‎ ‎…pans.test_setup_async_creates_spans.yaml‎py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml renamed to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupAsyncSpans.test_setup_async_creates_spans.yaml b/‎…st_patch_openai_async_creates_spans.yaml‎ ‎…pans.test_setup_async_creates_spans.yaml‎py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml renamed to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupAsyncSpans.test_setup_async_creates_spans.yaml
diff --git a/‎…ans.test_patch_openai_creates_spans.yaml‎ ‎…SetupSpans.test_setup_creates_spans.yaml‎py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml renamed to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupSpans.test_setup_creates_spans.yaml b/‎…ans.test_patch_openai_creates_spans.yaml‎ ‎…SetupSpans.test_setup_creates_spans.yaml‎py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml renamed to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupSpans.test_setup_creates_spans.yaml
@@ -83,6 +83,7 @@ def _pinned_python_version():
 # Test matrix
 ANTHROPIC_VERSIONS = (LATEST, "0.50.0", "0.49.0", "0.48.0")
 OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92")
+OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19")
 # litellm latest requires Python >= 3.10
 # Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512
 LITELLM_VERSIONS = ("1.82.0", "1.74.0")
@@ -240,10 +241,21 @@ def test_langchain(session, version):
 def test_openai(session, version):
     _install_test_deps(session)
     _install(session, "openai", version)
-    # openai-agents requires Python >= 3.10
-    _install(session, "openai-agents")
-    _run_tests(session, f"{WRAPPER_DIR}/test_openai.py")
-    _run_tests(session, f"{WRAPPER_DIR}/test_openai_openrouter_gateway.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_oai_attachments.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_openrouter_gateway.py")
+    _run_core_tests(session)
+
+
+@nox.session()
+@nox.parametrize("version", OPENAI_AGENTS_VERSIONS, ids=OPENAI_AGENTS_VERSIONS)
+def test_openai_agents(session, version):
+    if sys.version_info < (3, 10):
+        session.skip("openai-agents requires Python >= 3.10")
+    _install_test_deps(session)
+    _install(session, "openai")
+    _install(session, "openai-agents", version)
+    _run_tests(session, f"{INTEGRATION_DIR}/openai_agents/test_openai_agents.py")
     _run_core_tests(session)
 
 
@@ -254,7 +266,7 @@ def test_openai_http2_streaming(session):
     # h2 is isolated to this session because it's only needed to force the
     # HTTP/2 LegacyAPIResponse streaming path used by the regression test.
     session.install("h2")
-    _run_tests(session, f"{WRAPPER_DIR}/test_openai_http2.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_http2.py")
 
 
 @nox.session()
 
@@ -76,6 +76,9 @@ def is_equal(expected, output):
 from .integrations.litellm import (
     wrap_litellm,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
+from .integrations.openai import (
+    wrap_openai,  # noqa: F401 # type: ignore[reportUnusedImport]
+)
 from .integrations.openrouter import (
     wrap_openrouter,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
@@ -88,9 +91,6 @@ def is_equal(expected, output):
     _internal_reset_global_state,  # noqa: F401 # type: ignore[reportUnusedImport]
     _internal_with_custom_background_logger,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
-from .oai import (
-    wrap_openai,  # noqa: F401 # type: ignore[reportUnusedImport]
-)
 from .sandbox import (
     RegisteredSandboxFunction,  # noqa: F401 # type: ignore[reportUnusedImport]
     RegisterSandboxResult,  # noqa: F401 # type: ignore[reportUnusedImport]
 
@@ -17,6 +17,8 @@
     GoogleGenAIIntegration,
     LangChainIntegration,
     LiteLLMIntegration,
+    OpenAIAgentsIntegration,
+    OpenAIIntegration,
     OpenRouterIntegration,
     PydanticAIIntegration,
 )
@@ -52,6 +54,7 @@ def auto_instrument(
     dspy: bool = True,
     adk: bool = True,
     langchain: bool = True,
+    openai_agents: bool = True,
 ) -> dict[str, bool]:
     """
     Auto-instrument supported AI/ML libraries for Braintrust tracing.
@@ -75,6 +78,7 @@ def auto_instrument(
         dspy: Enable DSPy instrumentation (default: True)
         adk: Enable Google ADK instrumentation (default: True)
         langchain: Enable LangChain instrumentation (default: True)
+        openai_agents: Enable OpenAI Agents SDK instrumentation (default: True)
 
     Returns:
         Dict mapping integration name to whether it was successfully instrumented.
@@ -123,7 +127,7 @@ def auto_instrument(
     results = {}
 
     if openai:
-        results["openai"] = _instrument_openai()
+        results["openai"] = _instrument_integration(OpenAIIntegration)
     if anthropic:
         results["anthropic"] = _instrument_integration(AnthropicIntegration)
     if litellm:
@@ -146,18 +150,12 @@ def auto_instrument(
         results["adk"] = _instrument_integration(ADKIntegration)
     if langchain:
         results["langchain"] = _instrument_integration(LangChainIntegration)
+    if openai_agents:
+        results["openai_agents"] = _instrument_integration(OpenAIAgentsIntegration)
 
     return results
 
 
-def _instrument_openai() -> bool:
-    with _try_patch():
-        from braintrust.oai import patch_openai
-
-        return patch_openai()
-    return False
-
-
 def _instrument_integration(integration) -> bool:
     with _try_patch():
         return integration.setup()
 
@@ -7,6 +7,8 @@
 from .google_genai import GoogleGenAIIntegration
 from .langchain import LangChainIntegration
 from .litellm import LiteLLMIntegration
+from .openai import OpenAIIntegration
+from .openai_agents import OpenAIAgentsIntegration
 from .openrouter import OpenRouterIntegration
 from .pydantic_ai import PydanticAIIntegration
 
@@ -21,6 +23,8 @@
     "GoogleGenAIIntegration",
     "LiteLLMIntegration",
     "LangChainIntegration",
+    "OpenAIIntegration",
+    "OpenAIAgentsIntegration",
     "OpenRouterIntegration",
     "PydanticAIIntegration",
 ]
@@ -14,12 +14,15 @@
 assert not LiteLLMIntegration.patchers[0].is_patched(litellm, None)
 
 # 2. Instrument
-results = auto_instrument()
+# Disable OpenAI auto-instrumentation here because LiteLLM's OpenAI-backed
+# chat path can otherwise produce both a LiteLLM span and an OpenAI span.
+# This test is meant to validate LiteLLM instrumentation in isolation.
+results = auto_instrument(openai=False)
 assert results.get("litellm") == True
 assert LiteLLMIntegration.patchers[0].is_patched(litellm, None)
 
 # 3. Idempotent
-results2 = auto_instrument()
+results2 = auto_instrument(openai=False)
 assert results2.get("litellm") == True
 
 # 4. Make API call and verify span
 
@@ -1,24 +1,36 @@
 """Test auto_instrument for OpenAI."""
 
+import inspect
+from pathlib import Path
+
 import openai
 from braintrust.auto import auto_instrument
 from braintrust.wrappers.test_utils import autoinstrument_test_context
+from wrapt import FunctionWrapper
+
+
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai" / "cassettes"
+
+
+def _is_braintrust_wrapped() -> bool:
+    attr = inspect.getattr_static(openai.resources.chat.completions.Completions, "create", None)
+    return isinstance(attr, FunctionWrapper)
 
 
 # 1. Verify not patched initially
-assert not getattr(openai, "__braintrust_wrapped__", False)
+assert not _is_braintrust_wrapped()
 
 # 2. Instrument
 results = auto_instrument()
 assert results.get("openai") == True
-assert getattr(openai, "__braintrust_wrapped__", False)
+assert _is_braintrust_wrapped()
 
 # 3. Idempotent
 results2 = auto_instrument()
 assert results2.get("openai") == True
 
 # 4. Make API call and verify span
-with autoinstrument_test_context("test_auto_openai") as memory_logger:
+with autoinstrument_test_context("test_auto_openai", cassettes_dir=_CASSETTES_DIR) as memory_logger:
     client = openai.OpenAI()
     response = client.chat.completions.create(
         model="gpt-4o-mini",
 
@@ -0,0 +1,47 @@
+"""Test auto_instrument for the OpenAI Agents SDK."""
+
+import asyncio
+from pathlib import Path
+
+import agents
+from braintrust.auto import auto_instrument
+from braintrust.integrations.openai_agents import BraintrustTracingProcessor
+from braintrust.wrappers.test_utils import autoinstrument_test_context
+
+
+_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai_agents" / "cassettes"
+TEST_MODEL = "gpt-4o-mini"
+TEST_PROMPT = "What is 2+2? Just the number."
+TEST_AGENT_INSTRUCTIONS = "You are a helpful assistant. Be very concise."
+
+
+def _has_braintrust_processor() -> bool:
+    provider = agents.tracing.get_trace_provider()
+    processors = getattr(getattr(provider, "_multi_processor", None), "_processors", ())
+    return any(isinstance(processor, BraintrustTracingProcessor) for processor in processors)
+
+
+results = auto_instrument()
+assert results.get("openai_agents") == True
+assert _has_braintrust_processor()
+
+results2 = auto_instrument()
+assert results2.get("openai_agents") == True
+assert _has_braintrust_processor()
+
+with autoinstrument_test_context("test_auto_openai_agents", cassettes_dir=_CASSETTES_DIR) as memory_logger:
+    from agents import Agent
+    from agents.run import AgentRunner
+
+    async def run_agent():
+        agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS)
+        return await AgentRunner().run(agent, TEST_PROMPT)
+
+    result = asyncio.run(run_agent())
+    assert result is not None
+    assert hasattr(result, "final_output") or hasattr(result, "output")
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2, f"Expected at least 2 spans, got {len(spans)}"
+
+print("SUCCESS")
@@ -0,0 +1,38 @@
+"""Braintrust integration for the OpenAI Python SDK and OpenAI-compatible gateways."""
+
+from braintrust.logger import NOOP_SPAN, current_span, init_logger
+
+from .integration import OpenAIIntegration
+from .patchers import wrap_openai
+
+
+__all__ = [
+    "OpenAIIntegration",
+    "setup_openai",
+    "wrap_openai",
+]
+
+
+def setup_openai(
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
+) -> bool:
+    """Setup Braintrust integration with OpenAI.
+
+    Patches OpenAI resource classes at the module level so that all clients
+    produce Braintrust tracing spans.
+
+    Args:
+        api_key: Braintrust API key (optional, can use env var BRAINTRUST_API_KEY)
+        project_id: Braintrust project ID (optional)
+        project_name: Braintrust project name (optional, can use env var BRAINTRUST_PROJECT)
+
+    Returns:
+        True if setup was successful, False otherwise
+    """
+    span = current_span()
+    if span == NOOP_SPAN:
+        init_logger(project=project_name, api_key=api_key, project_id=project_id)
+
+    return OpenAIIntegration.setup()
Original file line number	Diff line number	Diff line change
`@@ -76,6 +76,9 @@ def is_equal(expected, output):`
`76`	`76`	`from .integrations.litellm import (`
`77`	`77`	`wrap_litellm, # noqa: F401 # type: ignore[reportUnusedImport]`
`78`	`78`	`)`
	`79`	`+from .integrations.openai import (`
	`80`	`+ wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]`
	`81`	`+)`
`79`	`82`	`from .integrations.openrouter import (`
`80`	`83`	`wrap_openrouter, # noqa: F401 # type: ignore[reportUnusedImport]`
`81`	`84`	`)`
`@@ -88,9 +91,6 @@ def is_equal(expected, output):`
`88`	`91`	`_internal_reset_global_state, # noqa: F401 # type: ignore[reportUnusedImport]`
`89`	`92`	`_internal_with_custom_background_logger, # noqa: F401 # type: ignore[reportUnusedImport]`
`90`	`93`	`)`
`91`		`-from .oai import (`
`92`		`- wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]`
`93`		`-)`
`94`	`94`	`from .sandbox import (`
`95`	`95`	`RegisteredSandboxFunction, # noqa: F401 # type: ignore[reportUnusedImport]`
`96`	`96`	`RegisterSandboxResult, # noqa: F401 # type: ignore[reportUnusedImport]`