Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions py/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def _pinned_python_version():
# Test matrix
ANTHROPIC_VERSIONS = (LATEST, "0.50.0", "0.49.0", "0.48.0")
OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92")
OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19")
# litellm latest requires Python >= 3.10
# Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512
LITELLM_VERSIONS = ("1.82.0", "1.74.0")
Expand Down Expand Up @@ -240,10 +241,21 @@ def test_langchain(session, version):
def test_openai(session, version):
_install_test_deps(session)
_install(session, "openai", version)
# openai-agents requires Python >= 3.10
_install(session, "openai-agents")
_run_tests(session, f"{WRAPPER_DIR}/test_openai.py")
_run_tests(session, f"{WRAPPER_DIR}/test_openai_openrouter_gateway.py")
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai.py")
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_oai_attachments.py")
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_openrouter_gateway.py")
_run_core_tests(session)


@nox.session()
@nox.parametrize("version", OPENAI_AGENTS_VERSIONS, ids=OPENAI_AGENTS_VERSIONS)
def test_openai_agents(session, version):
if sys.version_info < (3, 10):
session.skip("openai-agents requires Python >= 3.10")
_install_test_deps(session)
_install(session, "openai")
_install(session, "openai-agents", version)
_run_tests(session, f"{INTEGRATION_DIR}/openai_agents/test_openai_agents.py")
_run_core_tests(session)


Expand All @@ -254,7 +266,7 @@ def test_openai_http2_streaming(session):
# h2 is isolated to this session because it's only needed to force the
# HTTP/2 LegacyAPIResponse streaming path used by the regression test.
session.install("h2")
_run_tests(session, f"{WRAPPER_DIR}/test_openai_http2.py")
_run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_http2.py")


@nox.session()
Expand Down
6 changes: 3 additions & 3 deletions py/src/braintrust/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ def is_equal(expected, output):
from .integrations.litellm import (
wrap_litellm, # noqa: F401 # type: ignore[reportUnusedImport]
)
from .integrations.openai import (
wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]
)
from .integrations.openrouter import (
wrap_openrouter, # noqa: F401 # type: ignore[reportUnusedImport]
)
Expand All @@ -88,9 +91,6 @@ def is_equal(expected, output):
_internal_reset_global_state, # noqa: F401 # type: ignore[reportUnusedImport]
_internal_with_custom_background_logger, # noqa: F401 # type: ignore[reportUnusedImport]
)
from .oai import (
wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport]
)
from .sandbox import (
RegisteredSandboxFunction, # noqa: F401 # type: ignore[reportUnusedImport]
RegisterSandboxResult, # noqa: F401 # type: ignore[reportUnusedImport]
Expand Down
16 changes: 7 additions & 9 deletions py/src/braintrust/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
GoogleGenAIIntegration,
LangChainIntegration,
LiteLLMIntegration,
OpenAIAgentsIntegration,
OpenAIIntegration,
OpenRouterIntegration,
PydanticAIIntegration,
)
Expand Down Expand Up @@ -52,6 +54,7 @@ def auto_instrument(
dspy: bool = True,
adk: bool = True,
langchain: bool = True,
openai_agents: bool = True,
) -> dict[str, bool]:
"""
Auto-instrument supported AI/ML libraries for Braintrust tracing.
Expand All @@ -75,6 +78,7 @@ def auto_instrument(
dspy: Enable DSPy instrumentation (default: True)
adk: Enable Google ADK instrumentation (default: True)
langchain: Enable LangChain instrumentation (default: True)
openai_agents: Enable OpenAI Agents SDK instrumentation (default: True)

Returns:
Dict mapping integration name to whether it was successfully instrumented.
Expand Down Expand Up @@ -123,7 +127,7 @@ def auto_instrument(
results = {}

if openai:
results["openai"] = _instrument_openai()
results["openai"] = _instrument_integration(OpenAIIntegration)
if anthropic:
results["anthropic"] = _instrument_integration(AnthropicIntegration)
if litellm:
Expand All @@ -146,18 +150,12 @@ def auto_instrument(
results["adk"] = _instrument_integration(ADKIntegration)
if langchain:
results["langchain"] = _instrument_integration(LangChainIntegration)
if openai_agents:
results["openai_agents"] = _instrument_integration(OpenAIAgentsIntegration)

return results


def _instrument_openai() -> bool:
with _try_patch():
from braintrust.oai import patch_openai

return patch_openai()
return False


def _instrument_integration(integration) -> bool:
with _try_patch():
return integration.setup()
Expand Down
4 changes: 4 additions & 0 deletions py/src/braintrust/integrations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from .google_genai import GoogleGenAIIntegration
from .langchain import LangChainIntegration
from .litellm import LiteLLMIntegration
from .openai import OpenAIIntegration
from .openai_agents import OpenAIAgentsIntegration
from .openrouter import OpenRouterIntegration
from .pydantic_ai import PydanticAIIntegration

Expand All @@ -21,6 +23,8 @@
"GoogleGenAIIntegration",
"LiteLLMIntegration",
"LangChainIntegration",
"OpenAIIntegration",
"OpenAIAgentsIntegration",
"OpenRouterIntegration",
"PydanticAIIntegration",
]
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
assert not LiteLLMIntegration.patchers[0].is_patched(litellm, None)

# 2. Instrument
results = auto_instrument()
# Disable OpenAI auto-instrumentation here because LiteLLM's OpenAI-backed
# chat path can otherwise produce both a LiteLLM span and an OpenAI span.
# This test is meant to validate LiteLLM instrumentation in isolation.
results = auto_instrument(openai=False)
assert results.get("litellm") == True
assert LiteLLMIntegration.patchers[0].is_patched(litellm, None)

# 3. Idempotent
results2 = auto_instrument()
results2 = auto_instrument(openai=False)
assert results2.get("litellm") == True

# 4. Make API call and verify span
Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
"""Test auto_instrument for OpenAI."""

import inspect
from pathlib import Path

import openai
from braintrust.auto import auto_instrument
from braintrust.wrappers.test_utils import autoinstrument_test_context
from wrapt import FunctionWrapper


_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai" / "cassettes"


def _is_braintrust_wrapped() -> bool:
attr = inspect.getattr_static(openai.resources.chat.completions.Completions, "create", None)
return isinstance(attr, FunctionWrapper)


# 1. Verify not patched initially
assert not getattr(openai, "__braintrust_wrapped__", False)
assert not _is_braintrust_wrapped()

# 2. Instrument
results = auto_instrument()
assert results.get("openai") == True
assert getattr(openai, "__braintrust_wrapped__", False)
assert _is_braintrust_wrapped()

# 3. Idempotent
results2 = auto_instrument()
assert results2.get("openai") == True

# 4. Make API call and verify span
with autoinstrument_test_context("test_auto_openai") as memory_logger:
with autoinstrument_test_context("test_auto_openai", cassettes_dir=_CASSETTES_DIR) as memory_logger:
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4o-mini",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Test auto_instrument for the OpenAI Agents SDK."""

import asyncio
from pathlib import Path

import agents
from braintrust.auto import auto_instrument
from braintrust.integrations.openai_agents import BraintrustTracingProcessor
from braintrust.wrappers.test_utils import autoinstrument_test_context


_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai_agents" / "cassettes"
TEST_MODEL = "gpt-4o-mini"
TEST_PROMPT = "What is 2+2? Just the number."
TEST_AGENT_INSTRUCTIONS = "You are a helpful assistant. Be very concise."


def _has_braintrust_processor() -> bool:
provider = agents.tracing.get_trace_provider()
processors = getattr(getattr(provider, "_multi_processor", None), "_processors", ())
return any(isinstance(processor, BraintrustTracingProcessor) for processor in processors)


results = auto_instrument()
assert results.get("openai_agents") == True
assert _has_braintrust_processor()

results2 = auto_instrument()
assert results2.get("openai_agents") == True
assert _has_braintrust_processor()

with autoinstrument_test_context("test_auto_openai_agents", cassettes_dir=_CASSETTES_DIR) as memory_logger:
from agents import Agent
from agents.run import AgentRunner

async def run_agent():
agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS)
return await AgentRunner().run(agent, TEST_PROMPT)

result = asyncio.run(run_agent())
assert result is not None
assert hasattr(result, "final_output") or hasattr(result, "output")

spans = memory_logger.pop()
assert len(spans) >= 2, f"Expected at least 2 spans, got {len(spans)}"

print("SUCCESS")
38 changes: 38 additions & 0 deletions py/src/braintrust/integrations/openai/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Braintrust integration for the OpenAI Python SDK and OpenAI-compatible gateways."""

from braintrust.logger import NOOP_SPAN, current_span, init_logger

from .integration import OpenAIIntegration
from .patchers import wrap_openai


__all__ = [
"OpenAIIntegration",
"setup_openai",
"wrap_openai",
]


def setup_openai(
api_key: str | None = None,
project_id: str | None = None,
project_name: str | None = None,
) -> bool:
"""Setup Braintrust integration with OpenAI.

Patches OpenAI resource classes at the module level so that all clients
produce Braintrust tracing spans.

Args:
api_key: Braintrust API key (optional, can use env var BRAINTRUST_API_KEY)
project_id: Braintrust project ID (optional)
project_name: Braintrust project name (optional, can use env var BRAINTRUST_PROJECT)

Returns:
True if setup was successful, False otherwise
"""
span = current_span()
if span == NOOP_SPAN:
init_logger(project=project_name, api_key=api_key, project_id=project_id)

return OpenAIIntegration.setup()
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":[{"type":"text","text":"What color
is this image?"},{"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}],"model":"gpt-4o-mini"}'
headers:
Accept:
- application/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '277'
Content-Type:
- application/json
Host:
- api.openai.com
User-Agent:
- OpenAI/Python 2.30.0
X-Stainless-Arch:
- arm64
X-Stainless-Async:
- 'false'
X-Stainless-Lang:
- python
X-Stainless-OS:
- MacOS
X-Stainless-Package-Version:
- 2.30.0
X-Stainless-Raw-Response:
- 'true'
X-Stainless-Runtime:
- CPython
X-Stainless-Runtime-Version:
- 3.13.3
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-DQiFPcXBqdj72osGdNznct7LOdS4U\",\n \"object\":
\"chat.completion\",\n \"created\": 1775258987,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The image is red.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
8513,\n \"completion_tokens\": 5,\n \"total_tokens\": 8518,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_ebf4e532f9\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-Ray:
- 9e6bdb7b6c73d8d9-YYZ
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Fri, 03 Apr 2026 23:29:47 GMT
Server:
- cloudflare
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
content-length:
- '827'
openai-organization:
- braintrust-data
openai-processing-ms:
- '472'
openai-project:
- proj_vsCSXafhhByzWOThMrJcZiw9
openai-version:
- '2020-10-01'
set-cookie:
- __cf_bm=nQ.XMiAqbEVeR.9ilHFAV4G4Lr7C1VfG_qcqqLD25pc-1775258986.789194-1.0.1.1-OZs4yBKIgVJRt8TPeU0RwzFFZpzpHe0wallA39buBqR7rJNm4msYOCxouudLSaCVsCrH7FfDoXAGpAJyXrfrbaDIsjojElEvFuvv0L9z1PZpxcj_hIX69nD.oYVrz2X0;
HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 03 Apr 2026
23:59:47 GMT
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-input-images:
- '50000'
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-input-images:
- '49999'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999227'
x-ratelimit-reset-input-images:
- 1ms
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_52279379da51445eae3f8994d1027669
status:
code: 200
message: OK
version: 1
Loading
Loading