Skip to content

Commit 948dcd7

Browse files
committed
feat(openai): migrate openai wrapper and add claude agent sdk integration
Move OpenAI wrapper tests, cassettes, and tracing code from `wrappers/` into `integrations/openai/`. Split OpenAI Agents SDK tracing processor into its own `integrations/openai_agents/` package with a dedicated `test_openai_agents` nox session. Add new Claude Agent SDK integration under `integrations/` with subprocess transport-level cassette support. The old `wrappers/openai.py` now re-exports `BraintrustTracingProcessor` from the new location for backward compatibility. Test paths in `noxfile.py` are updated to point at the new integration directories.
1 parent 5c35051 commit 948dcd7

File tree

69 files changed

+3218
-2779
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+3218
-2779
lines changed

py/noxfile.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def _pinned_python_version():
8383
# Test matrix
8484
ANTHROPIC_VERSIONS = (LATEST, "0.50.0", "0.49.0", "0.48.0")
8585
OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92")
86+
OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19")
8687
# litellm latest requires Python >= 3.10
8788
# Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512
8889
LITELLM_VERSIONS = ("1.82.0", "1.74.0")
@@ -240,10 +241,21 @@ def test_langchain(session, version):
240241
def test_openai(session, version):
241242
_install_test_deps(session)
242243
_install(session, "openai", version)
243-
# openai-agents requires Python >= 3.10
244-
_install(session, "openai-agents")
245-
_run_tests(session, f"{WRAPPER_DIR}/test_openai.py")
246-
_run_tests(session, f"{WRAPPER_DIR}/test_openai_openrouter_gateway.py")
244+
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai.py")
245+
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_oai_attachments.py")
246+
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_openrouter_gateway.py")
247+
_run_core_tests(session)
248+
249+
250+
@nox.session()
251+
@nox.parametrize("version", OPENAI_AGENTS_VERSIONS, ids=OPENAI_AGENTS_VERSIONS)
252+
def test_openai_agents(session, version):
253+
if sys.version_info < (3, 10):
254+
session.skip("openai-agents requires Python >= 3.10")
255+
_install_test_deps(session)
256+
_install(session, "openai")
257+
_install(session, "openai-agents", version)
258+
_run_tests(session, f"{INTEGRATION_DIR}/openai_agents/test_openai_agents.py")
247259
_run_core_tests(session)
248260

249261

@@ -254,7 +266,7 @@ def test_openai_http2_streaming(session):
254266
# h2 is isolated to this session because it's only needed to force the
255267
# HTTP/2 LegacyAPIResponse streaming path used by the regression test.
256268
session.install("h2")
257-
_run_tests(session, f"{WRAPPER_DIR}/test_openai_http2.py")
269+
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_http2.py")
258270

259271

260272
@nox.session()

py/src/braintrust/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ def is_equal(expected, output):
7676
from .integrations.litellm import (
7777
wrap_litellm, # noqa: F401 # type: ignore[reportUnusedImport]
7878
)
79+
from .integrations.openai import (
80+
wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]
81+
)
7982
from .integrations.openrouter import (
8083
wrap_openrouter, # noqa: F401 # type: ignore[reportUnusedImport]
8184
)
@@ -88,9 +91,6 @@ def is_equal(expected, output):
8891
_internal_reset_global_state, # noqa: F401 # type: ignore[reportUnusedImport]
8992
_internal_with_custom_background_logger, # noqa: F401 # type: ignore[reportUnusedImport]
9093
)
91-
from .oai import (
92-
wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]
93-
)
9494
from .sandbox import (
9595
RegisteredSandboxFunction, # noqa: F401 # type: ignore[reportUnusedImport]
9696
RegisterSandboxResult, # noqa: F401 # type: ignore[reportUnusedImport]

py/src/braintrust/auto.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
GoogleGenAIIntegration,
1818
LangChainIntegration,
1919
LiteLLMIntegration,
20+
OpenAIAgentsIntegration,
21+
OpenAIIntegration,
2022
OpenRouterIntegration,
2123
PydanticAIIntegration,
2224
)
@@ -52,6 +54,7 @@ def auto_instrument(
5254
dspy: bool = True,
5355
adk: bool = True,
5456
langchain: bool = True,
57+
openai_agents: bool = True,
5558
) -> dict[str, bool]:
5659
"""
5760
Auto-instrument supported AI/ML libraries for Braintrust tracing.
@@ -75,6 +78,7 @@ def auto_instrument(
7578
dspy: Enable DSPy instrumentation (default: True)
7679
adk: Enable Google ADK instrumentation (default: True)
7780
langchain: Enable LangChain instrumentation (default: True)
81+
openai_agents: Enable OpenAI Agents SDK instrumentation (default: True)
7882
7983
Returns:
8084
Dict mapping integration name to whether it was successfully instrumented.
@@ -123,7 +127,7 @@ def auto_instrument(
123127
results = {}
124128

125129
if openai:
126-
results["openai"] = _instrument_openai()
130+
results["openai"] = _instrument_integration(OpenAIIntegration)
127131
if anthropic:
128132
results["anthropic"] = _instrument_integration(AnthropicIntegration)
129133
if litellm:
@@ -146,18 +150,12 @@ def auto_instrument(
146150
results["adk"] = _instrument_integration(ADKIntegration)
147151
if langchain:
148152
results["langchain"] = _instrument_integration(LangChainIntegration)
153+
if openai_agents:
154+
results["openai_agents"] = _instrument_integration(OpenAIAgentsIntegration)
149155

150156
return results
151157

152158

153-
def _instrument_openai() -> bool:
154-
with _try_patch():
155-
from braintrust.oai import patch_openai
156-
157-
return patch_openai()
158-
return False
159-
160-
161159
def _instrument_integration(integration) -> bool:
162160
with _try_patch():
163161
return integration.setup()

py/src/braintrust/integrations/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from .google_genai import GoogleGenAIIntegration
88
from .langchain import LangChainIntegration
99
from .litellm import LiteLLMIntegration
10+
from .openai import OpenAIIntegration
11+
from .openai_agents import OpenAIAgentsIntegration
1012
from .openrouter import OpenRouterIntegration
1113
from .pydantic_ai import PydanticAIIntegration
1214

@@ -21,6 +23,8 @@
2123
"GoogleGenAIIntegration",
2224
"LiteLLMIntegration",
2325
"LangChainIntegration",
26+
"OpenAIIntegration",
27+
"OpenAIAgentsIntegration",
2428
"OpenRouterIntegration",
2529
"PydanticAIIntegration",
2630
]

py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@
1414
assert not LiteLLMIntegration.patchers[0].is_patched(litellm, None)
1515

1616
# 2. Instrument
17-
results = auto_instrument()
17+
# Disable OpenAI auto-instrumentation here because LiteLLM's OpenAI-backed
18+
# chat path can otherwise produce both a LiteLLM span and an OpenAI span.
19+
# This test is meant to validate LiteLLM instrumentation in isolation.
20+
results = auto_instrument(openai=False)
1821
assert results.get("litellm") == True
1922
assert LiteLLMIntegration.patchers[0].is_patched(litellm, None)
2023

2124
# 3. Idempotent
22-
results2 = auto_instrument()
25+
results2 = auto_instrument(openai=False)
2326
assert results2.get("litellm") == True
2427

2528
# 4. Make API call and verify span

py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,36 @@
11
"""Test auto_instrument for OpenAI."""
22

3+
import inspect
4+
from pathlib import Path
5+
36
import openai
47
from braintrust.auto import auto_instrument
58
from braintrust.wrappers.test_utils import autoinstrument_test_context
9+
from wrapt import FunctionWrapper
10+
11+
12+
_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai" / "cassettes"
13+
14+
15+
def _is_braintrust_wrapped() -> bool:
16+
attr = inspect.getattr_static(openai.resources.chat.completions.Completions, "create", None)
17+
return isinstance(attr, FunctionWrapper)
618

719

820
# 1. Verify not patched initially
9-
assert not getattr(openai, "__braintrust_wrapped__", False)
21+
assert not _is_braintrust_wrapped()
1022

1123
# 2. Instrument
1224
results = auto_instrument()
1325
assert results.get("openai") == True
14-
assert getattr(openai, "__braintrust_wrapped__", False)
26+
assert _is_braintrust_wrapped()
1527

1628
# 3. Idempotent
1729
results2 = auto_instrument()
1830
assert results2.get("openai") == True
1931

2032
# 4. Make API call and verify span
21-
with autoinstrument_test_context("test_auto_openai") as memory_logger:
33+
with autoinstrument_test_context("test_auto_openai", cassettes_dir=_CASSETTES_DIR) as memory_logger:
2234
client = openai.OpenAI()
2335
response = client.chat.completions.create(
2436
model="gpt-4o-mini",
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""Test auto_instrument for the OpenAI Agents SDK."""
2+
3+
import asyncio
4+
from pathlib import Path
5+
6+
import agents
7+
from braintrust.auto import auto_instrument
8+
from braintrust.integrations.openai_agents import BraintrustTracingProcessor
9+
from braintrust.wrappers.test_utils import autoinstrument_test_context
10+
11+
12+
_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai_agents" / "cassettes"
13+
TEST_MODEL = "gpt-4o-mini"
14+
TEST_PROMPT = "What is 2+2? Just the number."
15+
TEST_AGENT_INSTRUCTIONS = "You are a helpful assistant. Be very concise."
16+
17+
18+
def _has_braintrust_processor() -> bool:
19+
provider = agents.tracing.get_trace_provider()
20+
processors = getattr(getattr(provider, "_multi_processor", None), "_processors", ())
21+
return any(isinstance(processor, BraintrustTracingProcessor) for processor in processors)
22+
23+
24+
results = auto_instrument()
25+
assert results.get("openai_agents") == True
26+
assert _has_braintrust_processor()
27+
28+
results2 = auto_instrument()
29+
assert results2.get("openai_agents") == True
30+
assert _has_braintrust_processor()
31+
32+
with autoinstrument_test_context("test_auto_openai_agents", cassettes_dir=_CASSETTES_DIR) as memory_logger:
33+
from agents import Agent
34+
from agents.run import AgentRunner
35+
36+
async def run_agent():
37+
agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS)
38+
return await AgentRunner().run(agent, TEST_PROMPT)
39+
40+
result = asyncio.run(run_agent())
41+
assert result is not None
42+
assert hasattr(result, "final_output") or hasattr(result, "output")
43+
44+
spans = memory_logger.pop()
45+
assert len(spans) >= 2, f"Expected at least 2 spans, got {len(spans)}"
46+
47+
print("SUCCESS")
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Braintrust integration for the OpenAI Python SDK and OpenAI-compatible gateways."""
2+
3+
from braintrust.logger import NOOP_SPAN, current_span, init_logger
4+
5+
from .integration import OpenAIIntegration
6+
from .patchers import wrap_openai
7+
8+
9+
__all__ = [
10+
"OpenAIIntegration",
11+
"setup_openai",
12+
"wrap_openai",
13+
]
14+
15+
16+
def setup_openai(
17+
api_key: str | None = None,
18+
project_id: str | None = None,
19+
project_name: str | None = None,
20+
) -> bool:
21+
"""Setup Braintrust integration with OpenAI.
22+
23+
Patches OpenAI resource classes at the module level so that all clients
24+
produce Braintrust tracing spans.
25+
26+
Args:
27+
api_key: Braintrust API key (optional, can use env var BRAINTRUST_API_KEY)
28+
project_id: Braintrust project ID (optional)
29+
project_name: Braintrust project name (optional, can use env var BRAINTRUST_PROJECT)
30+
31+
Returns:
32+
True if setup was successful, False otherwise
33+
"""
34+
span = current_span()
35+
if span == NOOP_SPAN:
36+
init_logger(project=project_name, api_key=api_key, project_id=project_id)
37+
38+
return OpenAIIntegration.setup()

py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml renamed to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupAsyncSpans.test_setup_async_creates_spans.yaml

File renamed without changes.

py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml renamed to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupSpans.test_setup_creates_spans.yaml

File renamed without changes.

0 commit comments

Comments
 (0)