diff --git a/py/noxfile.py b/py/noxfile.py index c92395da..620becb5 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -83,6 +83,7 @@ def _pinned_python_version(): # Test matrix ANTHROPIC_VERSIONS = (LATEST, "0.50.0", "0.49.0", "0.48.0") OPENAI_VERSIONS = (LATEST, "1.77.0", "1.71", "1.91", "1.92") +OPENAI_AGENTS_VERSIONS = (LATEST, "0.0.19") # litellm latest requires Python >= 3.10 # Pin litellm because 1.82.7-1.82.8 are compromised: https://github.com/BerriAI/litellm/issues/24512 LITELLM_VERSIONS = ("1.82.0", "1.74.0") @@ -240,10 +241,21 @@ def test_langchain(session, version): def test_openai(session, version): _install_test_deps(session) _install(session, "openai", version) - # openai-agents requires Python >= 3.10 - _install(session, "openai-agents") - _run_tests(session, f"{WRAPPER_DIR}/test_openai.py") - _run_tests(session, f"{WRAPPER_DIR}/test_openai_openrouter_gateway.py") + _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai.py") + _run_tests(session, f"{INTEGRATION_DIR}/openai/test_oai_attachments.py") + _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_openrouter_gateway.py") + _run_core_tests(session) + + +@nox.session() +@nox.parametrize("version", OPENAI_AGENTS_VERSIONS, ids=OPENAI_AGENTS_VERSIONS) +def test_openai_agents(session, version): + if sys.version_info < (3, 10): + session.skip("openai-agents requires Python >= 3.10") + _install_test_deps(session) + _install(session, "openai") + _install(session, "openai-agents", version) + _run_tests(session, f"{INTEGRATION_DIR}/openai_agents/test_openai_agents.py") _run_core_tests(session) @@ -254,7 +266,7 @@ def test_openai_http2_streaming(session): # h2 is isolated to this session because it's only needed to force the # HTTP/2 LegacyAPIResponse streaming path used by the regression test. session.install("h2") - _run_tests(session, f"{WRAPPER_DIR}/test_openai_http2.py") + _run_tests(session, f"{INTEGRATION_DIR}/openai/test_openai_http2.py") @nox.session() diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py index dd37241e..9d2ffd0b 100644 --- a/py/src/braintrust/__init__.py +++ b/py/src/braintrust/__init__.py @@ -76,6 +76,9 @@ def is_equal(expected, output): from .integrations.litellm import ( wrap_litellm, # noqa: F401 # type: ignore[reportUnusedImport] ) +from .integrations.openai import ( + wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport] +) from .integrations.openrouter import ( wrap_openrouter, # noqa: F401 # type: ignore[reportUnusedImport] ) @@ -88,9 +91,6 @@ def is_equal(expected, output): _internal_reset_global_state, # noqa: F401 # type: ignore[reportUnusedImport] _internal_with_custom_background_logger, # noqa: F401 # type: ignore[reportUnusedImport] ) -from .oai import ( - wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport] -) from .sandbox import ( RegisteredSandboxFunction, # noqa: F401 # type: ignore[reportUnusedImport] RegisterSandboxResult, # noqa: F401 # type: ignore[reportUnusedImport] diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index dc44c7d2..3115da2c 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -17,6 +17,8 @@ GoogleGenAIIntegration, LangChainIntegration, LiteLLMIntegration, + OpenAIAgentsIntegration, + OpenAIIntegration, OpenRouterIntegration, PydanticAIIntegration, ) @@ -52,6 +54,7 @@ def auto_instrument( dspy: bool = True, adk: bool = True, langchain: bool = True, + openai_agents: bool = True, ) -> dict[str, bool]: """ Auto-instrument supported AI/ML libraries for Braintrust tracing. @@ -75,6 +78,7 @@ def auto_instrument( dspy: Enable DSPy instrumentation (default: True) adk: Enable Google ADK instrumentation (default: True) langchain: Enable LangChain instrumentation (default: True) + openai_agents: Enable OpenAI Agents SDK instrumentation (default: True) Returns: Dict mapping integration name to whether it was successfully instrumented. @@ -123,7 +127,7 @@ def auto_instrument( results = {} if openai: - results["openai"] = _instrument_openai() + results["openai"] = _instrument_integration(OpenAIIntegration) if anthropic: results["anthropic"] = _instrument_integration(AnthropicIntegration) if litellm: @@ -146,18 +150,12 @@ def auto_instrument( results["adk"] = _instrument_integration(ADKIntegration) if langchain: results["langchain"] = _instrument_integration(LangChainIntegration) + if openai_agents: + results["openai_agents"] = _instrument_integration(OpenAIAgentsIntegration) return results -def _instrument_openai() -> bool: - with _try_patch(): - from braintrust.oai import patch_openai - - return patch_openai() - return False - - def _instrument_integration(integration) -> bool: with _try_patch(): return integration.setup() diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py index 0062ec77..07057c76 100644 --- a/py/src/braintrust/integrations/__init__.py +++ b/py/src/braintrust/integrations/__init__.py @@ -7,6 +7,8 @@ from .google_genai import GoogleGenAIIntegration from .langchain import LangChainIntegration from .litellm import LiteLLMIntegration +from .openai import OpenAIIntegration +from .openai_agents import OpenAIAgentsIntegration from .openrouter import OpenRouterIntegration from .pydantic_ai import PydanticAIIntegration @@ -21,6 +23,8 @@ "GoogleGenAIIntegration", "LiteLLMIntegration", "LangChainIntegration", + "OpenAIIntegration", + "OpenAIAgentsIntegration", "OpenRouterIntegration", "PydanticAIIntegration", ] diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py index 5663c982..a4048d8d 100644 --- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_litellm.py @@ -14,12 +14,15 @@ assert not LiteLLMIntegration.patchers[0].is_patched(litellm, None) # 2. Instrument -results = auto_instrument() +# Disable OpenAI auto-instrumentation here because LiteLLM's OpenAI-backed +# chat path can otherwise produce both a LiteLLM span and an OpenAI span. +# This test is meant to validate LiteLLM instrumentation in isolation. +results = auto_instrument(openai=False) assert results.get("litellm") == True assert LiteLLMIntegration.patchers[0].is_patched(litellm, None) # 3. Idempotent -results2 = auto_instrument() +results2 = auto_instrument(openai=False) assert results2.get("litellm") == True # 4. Make API call and verify span diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py index ef5eaf8f..3cfdb0a3 100644 --- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai.py @@ -1,24 +1,36 @@ """Test auto_instrument for OpenAI.""" +import inspect +from pathlib import Path + import openai from braintrust.auto import auto_instrument from braintrust.wrappers.test_utils import autoinstrument_test_context +from wrapt import FunctionWrapper + + +_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai" / "cassettes" + + +def _is_braintrust_wrapped() -> bool: + attr = inspect.getattr_static(openai.resources.chat.completions.Completions, "create", None) + return isinstance(attr, FunctionWrapper) # 1. Verify not patched initially -assert not getattr(openai, "__braintrust_wrapped__", False) +assert not _is_braintrust_wrapped() # 2. Instrument results = auto_instrument() assert results.get("openai") == True -assert getattr(openai, "__braintrust_wrapped__", False) +assert _is_braintrust_wrapped() # 3. Idempotent results2 = auto_instrument() assert results2.get("openai") == True # 4. Make API call and verify span -with autoinstrument_test_context("test_auto_openai") as memory_logger: +with autoinstrument_test_context("test_auto_openai", cassettes_dir=_CASSETTES_DIR) as memory_logger: client = openai.OpenAI() response = client.chat.completions.create( model="gpt-4o-mini", diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py new file mode 100644 index 00000000..54b80200 --- /dev/null +++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_openai_agents.py @@ -0,0 +1,47 @@ +"""Test auto_instrument for the OpenAI Agents SDK.""" + +import asyncio +from pathlib import Path + +import agents +from braintrust.auto import auto_instrument +from braintrust.integrations.openai_agents import BraintrustTracingProcessor +from braintrust.wrappers.test_utils import autoinstrument_test_context + + +_CASSETTES_DIR = Path(__file__).resolve().parent.parent / "openai_agents" / "cassettes" +TEST_MODEL = "gpt-4o-mini" +TEST_PROMPT = "What is 2+2? Just the number." +TEST_AGENT_INSTRUCTIONS = "You are a helpful assistant. Be very concise." + + +def _has_braintrust_processor() -> bool: + provider = agents.tracing.get_trace_provider() + processors = getattr(getattr(provider, "_multi_processor", None), "_processors", ()) + return any(isinstance(processor, BraintrustTracingProcessor) for processor in processors) + + +results = auto_instrument() +assert results.get("openai_agents") == True +assert _has_braintrust_processor() + +results2 = auto_instrument() +assert results2.get("openai_agents") == True +assert _has_braintrust_processor() + +with autoinstrument_test_context("test_auto_openai_agents", cassettes_dir=_CASSETTES_DIR) as memory_logger: + from agents import Agent + from agents.run import AgentRunner + + async def run_agent(): + agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS) + return await AgentRunner().run(agent, TEST_PROMPT) + + result = asyncio.run(run_agent()) + assert result is not None + assert hasattr(result, "final_output") or hasattr(result, "output") + + spans = memory_logger.pop() + assert len(spans) >= 2, f"Expected at least 2 spans, got {len(spans)}" + +print("SUCCESS") diff --git a/py/src/braintrust/integrations/openai/__init__.py b/py/src/braintrust/integrations/openai/__init__.py new file mode 100644 index 00000000..0a8b99e7 --- /dev/null +++ b/py/src/braintrust/integrations/openai/__init__.py @@ -0,0 +1,38 @@ +"""Braintrust integration for the OpenAI Python SDK and OpenAI-compatible gateways.""" + +from braintrust.logger import NOOP_SPAN, current_span, init_logger + +from .integration import OpenAIIntegration +from .patchers import wrap_openai + + +__all__ = [ + "OpenAIIntegration", + "setup_openai", + "wrap_openai", +] + + +def setup_openai( + api_key: str | None = None, + project_id: str | None = None, + project_name: str | None = None, +) -> bool: + """Setup Braintrust integration with OpenAI. + + Patches OpenAI resource classes at the module level so that all clients + produce Braintrust tracing spans. + + Args: + api_key: Braintrust API key (optional, can use env var BRAINTRUST_API_KEY) + project_id: Braintrust project ID (optional) + project_name: Braintrust project name (optional, can use env var BRAINTRUST_PROJECT) + + Returns: + True if setup was successful, False otherwise + """ + span = current_span() + if span == NOOP_SPAN: + init_logger(project=project_name, api_key=api_key, project_id=project_id) + + return OpenAIIntegration.setup() diff --git a/py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml b/py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupAsyncSpans.test_setup_async_creates_spans.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml rename to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupAsyncSpans.test_setup_async_creates_spans.yaml diff --git a/py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml b/py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupSpans.test_setup_creates_spans.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml rename to py/src/braintrust/integrations/openai/cassettes/TestOpenAIIntegrationSetupSpans.test_setup_creates_spans.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_openai.yaml b/py/src/braintrust/integrations/openai/cassettes/test_auto_openai.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_auto_openai.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_auto_openai.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml b/py/src/braintrust/integrations/openai/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml b/py/src/braintrust/integrations/openai/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_async_parallel_requests.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_async_parallel_requests.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_async_parallel_requests.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_async_parallel_requests.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_async_context_manager.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_async_context_manager.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_async_context_manager.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_async_context_manager.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_async_with_system_prompt.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_async_with_system_prompt.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_async_with_system_prompt.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_async_with_system_prompt.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_error_in_async_context.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_error_in_async_context.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_error_in_async_context.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_error_in_async_context.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_metrics.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_metrics.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_metrics.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_metrics.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_async_http2_context_manager_preserves_wrapper.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_async_http2_context_manager_preserves_wrapper.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_async_http2_context_manager_preserves_wrapper.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_async_http2_context_manager_preserves_wrapper.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_sync.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_sync.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_sync_http2_context_manager_preserves_wrapper.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_http2_context_manager_preserves_wrapper.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_sync_http2_context_manager_preserves_wrapper.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_http2_context_manager_preserves_wrapper.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_sync_http2_preserves_stream_interface.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_http2_preserves_stream_interface.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_streaming_sync_http2_preserves_stream_interface.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_http2_preserves_stream_interface.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_chat_with_system_prompt.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_with_system_prompt.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_chat_with_system_prompt.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_chat_with_system_prompt.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_client_async_comparison.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_client_async_comparison.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_client_async_comparison.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_client_async_comparison.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_client_async_error.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_client_async_error.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_client_async_error.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_client_async_error.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_client_comparison.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_client_comparison.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_client_comparison.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_client_comparison.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_client_error.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_client_error.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_client_error.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_client_error.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_embeddings.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_embeddings.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_embeddings.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_embeddings.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_embeddings_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_embeddings_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_embeddings_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_embeddings_async.yaml diff --git a/py/src/braintrust/integrations/openai/cassettes/test_openai_image_data_url_converts_to_attachment.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_image_data_url_converts_to_attachment.yaml new file mode 100644 index 00000000..18d8d907 --- /dev/null +++ b/py/src/braintrust/integrations/openai/cassettes/test_openai_image_data_url_converts_to_attachment.yaml @@ -0,0 +1,117 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":[{"type":"text","text":"What color + is this image?"},{"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '277' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.30.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DQiFPcXBqdj72osGdNznct7LOdS4U\",\n \"object\": + \"chat.completion\",\n \"created\": 1775258987,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"The image is red.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 8513,\n \"completion_tokens\": 5,\n \"total_tokens\": 8518,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ebf4e532f9\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e6bdb7b6c73d8d9-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 03 Apr 2026 23:29:47 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '827' + openai-organization: + - braintrust-data + openai-processing-ms: + - '472' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=nQ.XMiAqbEVeR.9ilHFAV4G4Lr7C1VfG_qcqqLD25pc-1775258986.789194-1.0.1.1-OZs4yBKIgVJRt8TPeU0RwzFFZpzpHe0wallA39buBqR7rJNm4msYOCxouudLSaCVsCrH7FfDoXAGpAJyXrfrbaDIsjojElEvFuvv0L9z1PZpxcj_hIX69nD.oYVrz2X0; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 03 Apr 2026 + 23:59:47 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-input-images: + - '50000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-input-images: + - '49999' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999227' + x-ratelimit-reset-input-images: + - 1ms + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_52279379da51445eae3f8994d1027669 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_not_given_filtering.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_not_given_filtering.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_not_given_filtering.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_not_given_filtering.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_parallel_tool_calls.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_parallel_tool_calls.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_parallel_tool_calls.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_parallel_tool_calls.yaml diff --git a/py/src/braintrust/integrations/openai/cassettes/test_openai_pdf_data_url_converts_to_attachment.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_pdf_data_url_converts_to_attachment.yaml new file mode 100644 index 00000000..6513c130 --- /dev/null +++ b/py/src/braintrust/integrations/openai/cassettes/test_openai_pdf_data_url_converts_to_attachment.yaml @@ -0,0 +1,114 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":[{"type":"text","text":"What type + of document is this?"},{"type":"file","file":{"file_data":"data:application/pdf;base64,JVBERi0xLjAKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCjMgMCBvYmoKPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCA2MTIgNzkyXT4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZg0KMDAwMDAwMDAxMCAwMDAwMCBuDQowMDAwMDAwMDUzIDAwMDAwIG4NCjAwMDAwMDAxMDIgMDAwMDAgbg0KdHJhaWxlcgo8PC9TaXplIDQvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgoxNDkKJUVPRg==","filename":"test.pdf"}}]}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '602' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.30.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DQiFQWEix0oHS9FfS8LF0Ypyqe9QK\",\n \"object\": + \"chat.completion\",\n \"created\": 1775258988,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"It seems I don't have access to the + content of the document \\\"test.pdf\\\" to determine the type of document + it is. If there are specific details or topics you'd like to know about, feel + free to share, and I might be able to help!\",\n \"refusal\": null,\n + \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": + \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 233,\n \"completion_tokens\": + 51,\n \"total_tokens\": 284,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ebf4e532f9\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e6bdb805a22ebb6-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 03 Apr 2026 23:29:49 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '1037' + openai-organization: + - braintrust-data + openai-processing-ms: + - '1829' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=LGK8jAdqLu5BdN_dH4kK5cPgl21xo5QUUgJvQ0N8awE-1775258987.5709069-1.0.1.1-879ASnD01YaHQdEhYCOmcAGOZsLqSGj32VXAzUP9r.Q.m0ysS.nzRtc.LLe.LGwjkXs_H2NJ.N0KBfMghLCPmDv64s15gOU.XttTLyWZAH6oMdzJkOhmGbLZsuXNO5_o; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 03 Apr 2026 + 23:59:49 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999225' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_52a185077a1a438eaee0e308a38c075c + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_response_streaming_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_response_streaming_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_response_streaming_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_response_streaming_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_metadata_preservation.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_metadata_preservation.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_metadata_preservation.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_metadata_preservation.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_metrics.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_metrics.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_metrics.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_metrics.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_not_given_filtering.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_not_given_filtering.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_not_given_filtering.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_not_given_filtering.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_create.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_create.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_create.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_create.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_create_stream.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_create_stream.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_create_stream.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_create_stream.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_create_stream_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_create_stream_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_create_stream_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_create_stream_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_parse.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_parse.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_responses_with_raw_response_parse.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_responses_with_raw_response_parse.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openai_streaming_with_break.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_streaming_with_break.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openai_streaming_with_break.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openai_streaming_with_break.yaml diff --git a/py/src/braintrust/integrations/openai/cassettes/test_openai_unwrapped_client_no_conversion.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_unwrapped_client_no_conversion.yaml new file mode 100644 index 00000000..71ad7c80 --- /dev/null +++ b/py/src/braintrust/integrations/openai/cassettes/test_openai_unwrapped_client_no_conversion.yaml @@ -0,0 +1,115 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":[{"type":"text","text":"What color + is this image?"},{"type":"image_url","image_url":{"url":"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="}}]}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '277' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.30.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DQiFUK0uQkouZBYxROEx5or892Rn6\",\n \"object\": + \"chat.completion\",\n \"created\": 1775258992,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"The image is red.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 8513,\n \"completion_tokens\": 5,\n \"total_tokens\": 8518,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ebf4e532f9\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e6bdb990c47cf1c-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 03 Apr 2026 23:29:52 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '827' + openai-organization: + - braintrust-data + openai-processing-ms: + - '596' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=5z7So6sgskcy1l1nkACc6VESutFtORx68lYFxjqWoP0-1775258991.5244184-1.0.1.1-7dxeyVF1pdTW586TGG21qqdB73qZXdcIoLuChabkTmaxZHk99LEWqsLxjEAJw2y_aoAhsojJEMsRNzJfWRIpueOu0MlUP2oA6S06Nf_4pakz192LUeHI6oNErEWQZcNj; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Fri, 03 Apr 2026 + 23:59:52 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-input-images: + - '50000' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-input-images: + - '49999' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999227' + x-ratelimit-reset-input-images: + - 1ms + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_3fa70a00f0a44bfeba4cdc09653199d7 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_openrouter_chat_completion_async.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openrouter_chat_completion_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openrouter_chat_completion_async.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openrouter_chat_completion_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openrouter_chat_completion_sync.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openrouter_chat_completion_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openrouter_chat_completion_sync.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openrouter_chat_completion_sync.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_openrouter_streaming_sync.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openrouter_streaming_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_openrouter_streaming_sync.yaml rename to py/src/braintrust/integrations/openai/cassettes/test_openrouter_streaming_sync.yaml diff --git a/py/src/braintrust/integrations/openai/integration.py b/py/src/braintrust/integrations/openai/integration.py new file mode 100644 index 00000000..c10b88a0 --- /dev/null +++ b/py/src/braintrust/integrations/openai/integration.py @@ -0,0 +1,23 @@ +"""OpenAI integration orchestration.""" + +from braintrust.integrations.base import BaseIntegration + +from .patchers import ( + ChatCompletionsPatcher, + EmbeddingsPatcher, + ModerationsPatcher, + ResponsesPatcher, +) + + +class OpenAIIntegration(BaseIntegration): + """Braintrust instrumentation for the OpenAI Python SDK.""" + + name = "openai" + import_names = ("openai",) + patchers = ( + ChatCompletionsPatcher, + EmbeddingsPatcher, + ModerationsPatcher, + ResponsesPatcher, + ) diff --git a/py/src/braintrust/integrations/openai/patchers.py b/py/src/braintrust/integrations/openai/patchers.py new file mode 100644 index 00000000..617bafc2 --- /dev/null +++ b/py/src/braintrust/integrations/openai/patchers.py @@ -0,0 +1,352 @@ +"""OpenAI patchers and public helpers.""" + +import inspect +from typing import Any + +from braintrust.integrations.base import CompositeFunctionWrapperPatcher, FunctionWrapperPatcher +from wrapt import BoundFunctionWrapper, FunctionWrapper + +from .tracing import ( + _chat_completion_create_wrapper, + _chat_completion_parse_wrapper, + _embedding_create_wrapper, + _moderation_create_wrapper, + _responses_create_wrapper, + _responses_parse_wrapper, + _responses_raw_create_wrapper, + _responses_raw_parse_wrapper, +) + + +# --------------------------------------------------------------------------- +# Factory — single source of truth for each traced method +# --------------------------------------------------------------------------- + + +def _make_method_patchers( + *, + name_prefix: str, + target_module: str, + sync_class: str, + async_class: str, + method: str, + wrapper: Any, + wrap_name: str, +) -> tuple[type[FunctionWrapperPatcher], type[FunctionWrapperPatcher], type[FunctionWrapperPatcher]]: + """Create sync, async, and instance-level patchers for one method. + + Returns ``(sync_patcher, async_patcher, instance_patcher)``: + + * *sync_patcher* / *async_patcher* — module-level patchers used by + ``OpenAIIntegration.setup()`` to instrument class methods in-place. + * *instance_patcher* — used by ``wrap_openai()`` to instrument a specific + client instance's resource method. + + All three share the same *wrapper* callback, ensuring identical tracing + regardless of which code path activates the instrumentation. + """ + sync_patcher: type[FunctionWrapperPatcher] = type( + f"{name_prefix}.sync", + (FunctionWrapperPatcher,), + { + "__module__": __name__, + "name": f"{name_prefix}.sync", + "target_module": target_module, + "target_path": f"{sync_class}.{method}", + "wrapper": wrapper, + }, + ) + async_patcher: type[FunctionWrapperPatcher] = type( + f"{name_prefix}.async", + (FunctionWrapperPatcher,), + { + "__module__": __name__, + "name": f"{name_prefix}.async", + "target_module": target_module, + "target_path": f"{async_class}.{method}", + "wrapper": wrapper, + }, + ) + instance_patcher: type[FunctionWrapperPatcher] = type( + wrap_name, + (FunctionWrapperPatcher,), + { + "__module__": __name__, + "name": wrap_name, + "target_path": method, + "wrapper": wrapper, + }, + ) + return sync_patcher, async_patcher, instance_patcher + + +# --------------------------------------------------------------------------- +# Chat completions +# --------------------------------------------------------------------------- + +_cc_create_sync, _cc_create_async, _wrap_chat_create = _make_method_patchers( + name_prefix="openai.chat.completions.create", + target_module="openai.resources.chat.completions", + sync_class="Completions", + async_class="AsyncCompletions", + method="create", + wrapper=_chat_completion_create_wrapper, + wrap_name="openai.wrap.chat.create", +) + +_cc_parse_sync, _cc_parse_async, _wrap_chat_parse = _make_method_patchers( + name_prefix="openai.chat.completions.parse", + target_module="openai.resources.chat.completions", + sync_class="Completions", + async_class="AsyncCompletions", + method="parse", + wrapper=_chat_completion_parse_wrapper, + wrap_name="openai.wrap.chat.parse", +) + + +class ChatCompletionsPatcher(CompositeFunctionWrapperPatcher): + """Patch ``openai.resources.chat.completions`` for tracing.""" + + name = "openai.chat.completions" + sub_patchers = ( + _cc_create_sync, + _cc_create_async, + _cc_parse_sync, + _cc_parse_async, + ) + + +class _WrapChatCompletions(CompositeFunctionWrapperPatcher): + name = "openai.wrap.chat" + sub_patchers = (_wrap_chat_create, _wrap_chat_parse) + + +# --------------------------------------------------------------------------- +# Embeddings +# --------------------------------------------------------------------------- + +_emb_create_sync, _emb_create_async, _wrap_emb_create = _make_method_patchers( + name_prefix="openai.embeddings.create", + target_module="openai.resources.embeddings", + sync_class="Embeddings", + async_class="AsyncEmbeddings", + method="create", + wrapper=_embedding_create_wrapper, + wrap_name="openai.wrap.embeddings.create", +) + + +class EmbeddingsPatcher(CompositeFunctionWrapperPatcher): + """Patch ``openai.resources.embeddings`` for tracing.""" + + name = "openai.embeddings" + sub_patchers = ( + _emb_create_sync, + _emb_create_async, + ) + + +class _WrapEmbeddings(CompositeFunctionWrapperPatcher): + name = "openai.wrap.embeddings" + sub_patchers = (_wrap_emb_create,) + + +# --------------------------------------------------------------------------- +# Moderations +# --------------------------------------------------------------------------- + +_mod_create_sync, _mod_create_async, _wrap_mod_create = _make_method_patchers( + name_prefix="openai.moderations.create", + target_module="openai.resources.moderations", + sync_class="Moderations", + async_class="AsyncModerations", + method="create", + wrapper=_moderation_create_wrapper, + wrap_name="openai.wrap.moderations.create", +) + + +class ModerationsPatcher(CompositeFunctionWrapperPatcher): + """Patch ``openai.resources.moderations`` for tracing.""" + + name = "openai.moderations" + sub_patchers = ( + _mod_create_sync, + _mod_create_async, + ) + + +class _WrapModerations(CompositeFunctionWrapperPatcher): + name = "openai.wrap.moderations" + sub_patchers = (_wrap_mod_create,) + + +# --------------------------------------------------------------------------- +# Responses +# --------------------------------------------------------------------------- + +_resp_create_sync, _resp_create_async, _wrap_resp_create = _make_method_patchers( + name_prefix="openai.responses.create", + target_module="openai.resources.responses.responses", + sync_class="Responses", + async_class="AsyncResponses", + method="create", + wrapper=_responses_create_wrapper, + wrap_name="openai.wrap.responses.create", +) + +_resp_parse_sync, _resp_parse_async, _wrap_resp_parse = _make_method_patchers( + name_prefix="openai.responses.parse", + target_module="openai.resources.responses.responses", + sync_class="Responses", + async_class="AsyncResponses", + method="parse", + wrapper=_responses_parse_wrapper, + wrap_name="openai.wrap.responses.parse", +) + +_resp_raw_create_sync, _resp_raw_create_async, _wrap_resp_raw_create = _make_method_patchers( + name_prefix="openai.responses.raw.create", + target_module="openai.resources.responses.responses", + sync_class="ResponsesWithRawResponse", + async_class="AsyncResponsesWithRawResponse", + method="create", + wrapper=_responses_raw_create_wrapper, + wrap_name="openai.wrap.responses.raw.create", +) + +_resp_raw_parse_sync, _resp_raw_parse_async, _wrap_resp_raw_parse = _make_method_patchers( + name_prefix="openai.responses.raw.parse", + target_module="openai.resources.responses.responses", + sync_class="ResponsesWithRawResponse", + async_class="AsyncResponsesWithRawResponse", + method="parse", + wrapper=_responses_raw_parse_wrapper, + wrap_name="openai.wrap.responses.raw.parse", +) + + +class ResponsesPatcher(CompositeFunctionWrapperPatcher): + """Patch ``openai.resources.responses`` for tracing.""" + + name = "openai.responses" + sub_patchers = ( + _resp_create_sync, + _resp_create_async, + _resp_parse_sync, + _resp_parse_async, + _resp_raw_create_sync, + _resp_raw_create_async, + _resp_raw_parse_sync, + _resp_raw_parse_async, + ) + + +class _WrapResponses(CompositeFunctionWrapperPatcher): + name = "openai.wrap.responses" + sub_patchers = (_wrap_resp_create, _wrap_resp_parse) + + +class _WrapResponsesRaw(CompositeFunctionWrapperPatcher): + name = "openai.wrap.responses.raw" + sub_patchers = (_wrap_resp_raw_create, _wrap_resp_raw_parse) + + +# --------------------------------------------------------------------------- +# Resource mapping — single source of truth for wrap_openai. +# +# Each entry is (client_attr_path, instance_patcher). ``wrap_openai`` +# iterates this list to wrap a specific client instance. +# +# When adding a new resource, add it here AND add the corresponding +# module-level patchers to ``OpenAIIntegration.patchers`` in +# ``integration.py``. The ``test_wrap_openai_and_setup_use_same_wrappers`` +# test enforces that both paths cover the same wrapper functions. +# --------------------------------------------------------------------------- + +_WRAP_TARGETS: tuple[tuple[str, type[CompositeFunctionWrapperPatcher]], ...] = ( + ("chat.completions", _WrapChatCompletions), + ("embeddings", _WrapEmbeddings), + ("moderations", _WrapModerations), + ("responses", _WrapResponses), + ("responses.with_raw_response", _WrapResponsesRaw), + ("beta.chat.completions", _WrapChatCompletions), +) + + +# --------------------------------------------------------------------------- +# Public wrap_openai helper +# --------------------------------------------------------------------------- + + +def _is_class_method_wrapped(resource: Any, method_name: str) -> bool: + """Return ``True`` if *method_name* on the **class** of *resource* is + already a wrapt ``FunctionWrapper`` (i.e. patched by ``setup()``). + + This prevents double-tracing when both ``setup()`` and ``wrap_openai()`` + are active for the same client. + """ + cls_attr = inspect.getattr_static(type(resource), method_name, None) + return isinstance(cls_attr, FunctionWrapper) + + +def _delegates_to_wrapped_method(resource: Any, method_name: str) -> bool: + """Return ``True`` when *resource.method_name* already delegates to an + instrumented wrapt wrapper. + + OpenAI's ``with_raw_response`` helpers are regular functions created at + object construction time. When the parent resource has already been + wrapped, those helpers forward into the wrapped method via ``__wrapped__``. + Patching them again would create duplicate spans. + """ + method = getattr(resource, method_name, None) + wrapped = getattr(method, "__wrapped__", None) + return isinstance(wrapped, (FunctionWrapper, BoundFunctionWrapper)) + + +def _wrap_resource( + client: Any, + attr_path: str, + patcher: type[CompositeFunctionWrapperPatcher], +) -> None: + """Walk *attr_path* from *client* and apply ``patcher.wrap_target()``. + + Skips wrapping when the class methods are already patched at the module + level (by ``OpenAIIntegration.setup()``), avoiding double spans. + The instance-level patchers handle their own idempotency via + ``has_patch_marker``. + """ + resource = client + for part in attr_path.split("."): + resource = getattr(resource, part, None) + if resource is None: + return + # If any sub-patcher's target is already wrapped on the class, the module- + # level patchers are active and we can skip instance-level wrapping. + for sub in patcher.sub_patchers: + attr = sub.target_path.rsplit(".", 1)[-1] + if _is_class_method_wrapped(resource, attr): + return + if attr_path.endswith("with_raw_response") and _delegates_to_wrapped_method(resource, attr): + return + patcher.wrap_target(resource) + + +def wrap_openai(client: Any) -> Any: + """Manually wrap an OpenAI client instance for tracing. + + Patches resource methods on *client* so that API calls produce Braintrust + tracing spans. Only the given instance is affected; other clients and + the module-level classes are left untouched. + + Idempotent — each instance-level patcher sets its own marker via + ``has_patch_marker`` so repeated calls are no-ops. + + Returns *client* for convenient chaining:: + + client = wrap_openai(openai.OpenAI()) + """ + for attr_path, patcher in _WRAP_TARGETS: + _wrap_resource(client, attr_path, patcher) + return client diff --git a/py/src/braintrust/wrappers/test_oai_attachments.py b/py/src/braintrust/integrations/openai/test_oai_attachments.py similarity index 59% rename from py/src/braintrust/wrappers/test_oai_attachments.py rename to py/src/braintrust/integrations/openai/test_oai_attachments.py index 3e065f3c..d3998b58 100644 --- a/py/src/braintrust/wrappers/test_oai_attachments.py +++ b/py/src/braintrust/integrations/openai/test_oai_attachments.py @@ -20,10 +20,6 @@ def memory_logger(): yield bgl -def _is_wrapped(client): - return hasattr(client, "_NamedWrapper__wrapped") - - @pytest.mark.vcr def test_openai_image_data_url_converts_to_attachment(memory_logger): """Test that image data URLs in chat completions are converted to Attachment objects.""" @@ -160,126 +156,6 @@ def test_openai_pdf_data_url_converts_to_attachment(memory_logger): assert file_data_value.reference["key"] -@pytest.mark.vcr -def test_openai_pdf_data_url_without_filename_uses_fallback(memory_logger): - """Test that PDF data URLs without a filename use the generated fallback.""" - assert not memory_logger.pop() - - # Create a minimal PDF - base64_pdf = "JVBERi0xLjAKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCjMgMCBvYmoKPDwvVHlwZS9QYWdlL01lZGlhQm94WzAgMCA2MTIgNzkyXT4+ZW5kb2JqCnhyZWYKMCA0CjAwMDAwMDAwMDAgNjU1MzUgZg0KMDAwMDAwMDAxMCAwMDAwMCBuDQowMDAwMDAwMDUzIDAwMDAwIG4NCjAwMDAwMDAxMDIgMDAwMDAgbg0KdHJhaWxlcgo8PC9TaXplIDQvUm9vdCAxIDAgUj4+CnN0YXJ0eHJlZgoxNDkKJUVPRg==" - data_url = f"data:application/pdf;base64,{base64_pdf}" - - client = wrap_openai(openai.OpenAI()) - - start = time.time() - response = client.chat.completions.create( - model=TEST_MODEL, - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "What type of document is this?"}, - { - "type": "file", - "file": { - "file_data": data_url, - # No filename provided - should use fallback - }, - }, - ], - } - ], - ) - end = time.time() - - # Verify we got a successful response - assert response - assert response.choices - assert response.choices[0].message.content - - # Verify spans were created - spans = memory_logger.pop() - assert len(spans) == 1 - span = spans[0] - assert span - - # Verify metrics - metrics = span["metrics"] - assert_metrics_are_valid(metrics, start, end) - assert TEST_MODEL in span["metadata"]["model"] - assert span["metadata"]["provider"] == "openai" - - # Verify input contains the attachment - assert span["input"] - assert len(span["input"]) == 1 - message_content = span["input"][0]["content"] - assert len(message_content) == 2 - - # First item should be text - assert message_content[0]["type"] == "text" - assert message_content[0]["text"] == "What type of document is this?" - - # Second item should have the file_data converted to Attachment - assert message_content[1]["type"] == "file" - file_data_value = message_content[1]["file"]["file_data"] - assert isinstance(file_data_value, Attachment) - assert file_data_value.reference["type"] == "braintrust_attachment" - assert file_data_value.reference["content_type"] == "application/pdf" - # Should use the fallback filename since none was provided - assert file_data_value.reference["filename"] == "document.pdf" - assert file_data_value.reference["key"] - - -@pytest.mark.vcr -def test_openai_regular_url_preserved(memory_logger): - """Test that regular URLs (non-data URLs) are preserved unchanged.""" - assert not memory_logger.pop() - - # Use a regular URL (not a data URL) - regular_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - - client = wrap_openai(openai.OpenAI()) - - start = time.time() - response = client.chat.completions.create( - model=TEST_MODEL, - messages=[ - { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, - {"type": "image_url", "image_url": {"url": regular_url}}, - ], - } - ], - ) - end = time.time() - - # Verify we got a successful response - assert response - assert response.choices - assert response.choices[0].message.content - - # Verify spans were created - spans = memory_logger.pop() - assert len(spans) == 1 - span = spans[0] - assert span - - # Verify metrics - metrics = span["metrics"] - assert_metrics_are_valid(metrics, start, end) - - # Verify input has the URL unchanged (not converted to Attachment) - assert span["input"] - message_content = span["input"][0]["content"] - assert message_content[1]["type"] == "image_url" - image_url_value = message_content[1]["image_url"]["url"] - # Regular URLs should NOT be converted to Attachment - assert isinstance(image_url_value, str) - assert image_url_value == regular_url - - @pytest.mark.vcr def test_openai_unwrapped_client_no_conversion(memory_logger): """Test that unwrapped clients don't process attachments and don't generate spans.""" diff --git a/py/src/braintrust/wrappers/test_openai.py b/py/src/braintrust/integrations/openai/test_openai.py similarity index 63% rename from py/src/braintrust/wrappers/test_openai.py rename to py/src/braintrust/integrations/openai/test_openai.py index 6ab9b343..14cca357 100644 --- a/py/src/braintrust/wrappers/test_openai.py +++ b/py/src/braintrust/integrations/openai/test_openai.py @@ -1,13 +1,13 @@ import asyncio import time -import braintrust import openai import pytest from braintrust import logger, wrap_openai -from braintrust.oai import ChatCompletionWrapper +from braintrust.integrations.openai import OpenAIIntegration +from braintrust.integrations.openai.tracing import RAW_RESPONSE_HEADER, ChatCompletionWrapper from braintrust.test_helpers import assert_dict_matches, init_test_logger -from braintrust.wrappers.test_utils import assert_metrics_are_valid, run_in_subprocess, verify_autoinstrument_script +from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script from openai import AsyncOpenAI from openai._types import NOT_GIVEN from pydantic import BaseModel @@ -27,56 +27,31 @@ def memory_logger(): yield bgl -def test_tracing_processor_sets_current_span(memory_logger): - """Ensure that on_trace_start sets the span as current so nested spans work.""" - pytest.importorskip("agents", reason="agents package not available") - from braintrust.wrappers.openai import BraintrustTracingProcessor - - assert not memory_logger.pop() - processor = BraintrustTracingProcessor() - - class DummyTrace: - def __init__(self): - self.trace_id = "test-trace-id" - self.name = "test-trace" - - def export(self): - return {"group_id": "group", "metadata": {"foo": "bar"}} - - trace = DummyTrace() - - with braintrust.start_span(name="parent-span") as parent_span: - assert braintrust.current_span() == parent_span - processor.on_trace_start(trace) - created_span = processor._spans[trace.trace_id] - assert braintrust.current_span() == created_span - - processor.on_trace_end(trace) - assert braintrust.current_span() == parent_span - - spans = memory_logger.pop() - assert spans - assert any(span.get("span_attributes", {}).get("name") == trace.name for span in spans) - - @pytest.mark.vcr def test_openai_chat_metrics(memory_logger): assert not memory_logger.pop() - client = openai.OpenAI() - clients = [client, wrap_openai(client)] + unwrapped_client = openai.OpenAI() + wrapped_client = wrap_openai(openai.OpenAI()) + clients = [unwrapped_client, wrapped_client] for client in clients: start = time.time() response = client.chat.completions.create( - model=TEST_MODEL, messages=[{"role": "user", "content": TEST_PROMPT}] + model=TEST_MODEL, + messages=[{"role": "user", "content": TEST_PROMPT}], + extra_headers={RAW_RESPONSE_HEADER: "true"}, ) end = time.time() assert response - assert response.choices[0].message.content + assert response.headers + + parsed_response = response.parse() + assert parsed_response.choices[0].message.content assert ( - "24" in response.choices[0].message.content or "twenty-four" in response.choices[0].message.content.lower() + "24" in parsed_response.choices[0].message.content + or "twenty-four" in parsed_response.choices[0].message.content.lower() ) if not _is_wrapped(client): @@ -275,111 +250,6 @@ class SimpleAnswer(BaseModel): assert_metrics_are_valid(metrics, start, end) -@pytest.mark.vcr -def test_openai_responses_sparse_indices(memory_logger): - """Test that streaming responses with sparse/out-of-order indices are handled correctly.""" - assert not memory_logger.pop() - - from braintrust.oai import ResponseWrapper - - # Create a mock response with sparse content indices (e.g., indices 0, 2, 5) - # This simulates a streaming response where items arrive out of order or with gaps - class MockResult: - def __init__( - self, - type, - content_index=None, - delta=None, - annotation_index=None, - annotation=None, - output_index=None, - item=None, - ): - self.type = type - if content_index is not None: - self.content_index = content_index - if delta is not None: - self.delta = delta - if annotation_index is not None: - self.annotation_index = annotation_index - if annotation is not None: - self.annotation = annotation - if output_index is not None: - self.output_index = output_index - if item is not None: - self.item = item - - class MockItem: - def __init__(self, id="test_id", type="message"): - self.id = id - self.type = type - - # Test sparse content indices - all_results = [ - MockResult("response.output_item.added", item=MockItem()), - MockResult("response.output_text.delta", content_index=0, delta="First", output_index=0), - MockResult("response.output_text.delta", content_index=2, delta="Third", output_index=0), # Gap at index 1 - MockResult("response.output_text.delta", content_index=5, delta="Sixth", output_index=0), # Gap at indices 3,4 - ] - - # Process the results - wrapper = ResponseWrapper(None, None) - output = [{}] # Initialize with one output item - result = wrapper._postprocess_streaming_results(all_results) - - # Verify the output was built correctly with gaps filled - assert "output" in result - assert len(result["output"]) == 1 - content = result["output"][0].get("content", []) - - # Should have 6 items (indices 0-5) - assert len(content) >= 6 - assert content[0].get("text") == "First" - assert content[1].get("text", "") == "" # Gap should be empty - assert content[2].get("text") == "Third" - assert content[3].get("text", "") == "" # Gap should be empty - assert content[4].get("text", "") == "" # Gap should be empty - assert content[5].get("text") == "Sixth" - - # Test sparse annotation indices - all_results_with_annotations = [ - MockResult("response.output_item.added", item=MockItem()), - MockResult("response.output_text.delta", content_index=0, delta="Text", output_index=0), - MockResult( - "response.output_text.annotation.added", - content_index=0, - annotation_index=1, - annotation={"text": "Second annotation"}, - output_index=0, - ), - MockResult( - "response.output_text.annotation.added", - content_index=0, - annotation_index=3, - annotation={"text": "Fourth annotation"}, - output_index=0, - ), - ] - - result = wrapper._postprocess_streaming_results(all_results_with_annotations) - - # Verify annotations were built correctly with gaps filled - assert "output" in result - content = result["output"][0].get("content", []) - assert len(content) >= 1 - annotations = content[0].get("annotations", []) - - # Should have 4 items (indices 0-3) - assert len(annotations) >= 4 - assert annotations[0] == {} # Gap should be empty dict - assert annotations[1] == {"text": "Second annotation"} - assert annotations[2] == {} # Gap should be empty dict - assert annotations[3] == {"text": "Fourth annotation"} - - # No spans should be generated from this unit test - assert not memory_logger.pop() - - @pytest.mark.vcr def test_openai_embeddings(memory_logger): assert not memory_logger.pop() @@ -416,8 +286,7 @@ def test_openai_embeddings(memory_logger): def test_openai_chat_streaming_sync(memory_logger): assert not memory_logger.pop() - client = openai.OpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(openai.OpenAI(), False), (wrap_openai(openai.OpenAI()), True)] for client, is_wrapped in clients: start = time.time() @@ -468,8 +337,7 @@ def test_openai_chat_streaming_sync(memory_logger): def test_openai_chat_with_system_prompt(memory_logger): assert not memory_logger.pop() - client = openai.OpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(openai.OpenAI(), False), (wrap_openai(openai.OpenAI()), True)] for client, is_wrapped in clients: response = client.chat.completions.create( @@ -502,8 +370,7 @@ def test_openai_client_comparison(memory_logger): assert not memory_logger.pop() # Get regular and wrapped clients - client = openai.OpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(openai.OpenAI(), False), (wrap_openai(openai.OpenAI()), True)] for client, is_wrapped in clients: response = client.chat.completions.create( @@ -555,12 +422,18 @@ async def test_openai_chat_async(memory_logger): # First test with an unwrapped async client client = AsyncOpenAI() - resp = await client.chat.completions.create(model=TEST_MODEL, messages=[{"role": "user", "content": TEST_PROMPT}]) + resp = await client.chat.completions.create( + model=TEST_MODEL, + messages=[{"role": "user", "content": TEST_PROMPT}], + extra_headers={RAW_RESPONSE_HEADER: "true"}, + ) assert resp - assert resp.choices - assert resp.choices[0].message.content - content = resp.choices[0].message.content + assert resp.headers + parsed_response = resp.parse() + assert parsed_response.choices + assert parsed_response.choices[0].message.content + content = parsed_response.choices[0].message.content # Verify it contains a correct response assert "24" in content or "twenty-four" in content.lower() @@ -573,14 +446,18 @@ async def test_openai_chat_async(memory_logger): start = time.time() resp2 = await client2.chat.completions.create( - model=TEST_MODEL, messages=[{"role": "user", "content": TEST_PROMPT}] + model=TEST_MODEL, + messages=[{"role": "user", "content": TEST_PROMPT}], + extra_headers={RAW_RESPONSE_HEADER: "true"}, ) end = time.time() assert resp2 - assert resp2.choices - assert resp2.choices[0].message.content - content2 = resp2.choices[0].message.content + assert resp2.headers + parsed_response2 = resp2.parse() + assert parsed_response2.choices + assert parsed_response2.choices[0].message.content + content2 = parsed_response2.choices[0].message.content # Verify the wrapped client also gives correct responses assert "24" in content2 or "twenty-four" in content2.lower() @@ -602,8 +479,7 @@ async def test_openai_chat_async(memory_logger): async def test_openai_responses_async(memory_logger): assert not memory_logger.pop() - client = AsyncOpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(AsyncOpenAI(), False), (wrap_openai(AsyncOpenAI()), True)] for client, is_wrapped in clients: start = time.time() @@ -697,8 +573,7 @@ class NumberAnswer(BaseModel): async def test_openai_embeddings_async(memory_logger): assert not memory_logger.pop() - client = AsyncOpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(AsyncOpenAI(), False), (wrap_openai(AsyncOpenAI()), True)] for client, is_wrapped in clients: start = time.time() @@ -729,8 +604,7 @@ async def test_openai_embeddings_async(memory_logger): async def test_openai_chat_streaming_async(memory_logger): assert not memory_logger.pop() - client = AsyncOpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(AsyncOpenAI(), False), (wrap_openai(AsyncOpenAI()), True)] for client, is_wrapped in clients: start = time.time() @@ -782,8 +656,7 @@ async def test_openai_chat_streaming_async(memory_logger): async def test_openai_chat_async_with_system_prompt(memory_logger): assert not memory_logger.pop() - client = AsyncOpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(AsyncOpenAI(), False), (wrap_openai(AsyncOpenAI()), True)] for client, is_wrapped in clients: response = await client.chat.completions.create( @@ -875,8 +748,7 @@ async def test_openai_chat_async_context_manager(memory_logger): """Test async context manager behavior for chat completions streams.""" assert not memory_logger.pop() - client = AsyncOpenAI() - clients = [(client, False), (wrap_openai(client), True)] + clients = [(AsyncOpenAI(), False), (wrap_openai(AsyncOpenAI()), True)] for client, is_wrapped in clients: start = time.time() @@ -991,8 +863,9 @@ async def test_openai_response_streaming_async(memory_logger): """Test the newer responses API with streaming.""" assert not memory_logger.pop() - client = openai.AsyncOpenAI() - clients = [client, wrap_openai(client)] + unwrapped_client = openai.AsyncOpenAI() + wrapped_client = wrap_openai(openai.AsyncOpenAI()) + clients = [unwrapped_client, wrapped_client] for client in clients: start = time.time() @@ -1321,6 +1194,9 @@ class NumberAnswer(BaseModel): reasoning: str unwrapped_client = openai.OpenAI() + if not hasattr(unwrapped_client.responses.with_raw_response, "parse"): + pytest.skip("openai.responses.with_raw_response.parse is not available in this SDK version") + raw_parse = unwrapped_client.responses.with_raw_response.parse( model=TEST_MODEL, input=TEST_PROMPT, text_format=NumberAnswer ) @@ -1480,8 +1356,9 @@ def test_openai_parallel_tool_calls(memory_logger): }, ] - client = openai.OpenAI() - clients = [client, wrap_openai(client)] + unwrapped_client = openai.OpenAI() + wrapped_client = wrap_openai(openai.OpenAI()) + clients = [unwrapped_client, wrapped_client] for stream in [False, True]: for client in clients: @@ -1552,575 +1429,27 @@ def test_openai_parallel_tool_calls(memory_logger): def _is_wrapped(client): - return hasattr(client, "_NamedWrapper__wrapped") - - -@pytest.mark.asyncio -@pytest.mark.vcr -async def test_braintrust_tracing_processor_current_span_detection(memory_logger): - """Test that BraintrustTracingProcessor currentSpan() detection works with OpenAI Agents SDK.""" - pytest.importorskip("agents", reason="agents package not available") - - import agents - import braintrust - from agents import Agent - from agents.run import AgentRunner - from braintrust.wrappers.openai import BraintrustTracingProcessor - - assert not memory_logger.pop() - - @braintrust.traced(name="parent_span_test") - async def test_function(instructions: str): - # Verify we're in a traced context - detected_parent = braintrust.current_span() - assert detected_parent is not None, "Parent span should exist in traced context" - assert detected_parent != braintrust.logger.NOOP_SPAN, "Should not be NOOP span" - - # Create processor WITHOUT parentSpan - should auto-detect via current_span() - processor = BraintrustTracingProcessor() - - # Set up tracing - agents.set_tracing_disabled(False) - agents.add_trace_processor(processor) - - try: - # Create a simple agent - agent = Agent( - name="test-agent", - model=TEST_MODEL, - instructions="You are a helpful assistant. Be very concise.", - ) - - # Run the agent - this should create spans as children of detected parent - runner = AgentRunner() - result = await runner.run(agent, instructions) - assert result is not None, "Agent should return a result" - assert hasattr(result, "final_output") or hasattr(result, "output"), "Result should have output" - - return result - finally: - processor.shutdown() - - # Execute the wrapped function - result = await test_function("What is 2+2? Just the number.") - assert result is not None, "Test function should return a result" - - # Verify span hierarchy in logged spans - spans = memory_logger.pop() - assert len(spans) >= 2, f"Should have at least parent and child spans, got {len(spans)}" - - # Find parent and child spans - parent_span = None - child_spans = [] - - for span in spans: - if span.get("span_attributes", {}).get("name") == "parent_span_test": - parent_span = span - elif span.get("span_attributes", {}).get("name") == "Agent workflow": - child_spans.append(span) - - assert parent_span is not None, "Should find parent span with name 'parent_span_test'" - assert len(child_spans) > 0, "Should find at least one child span with name 'Agent workflow'" - - # Verify the child span has the parent as its parent - if child_spans and parent_span: - child_span = child_spans[0] - # In Braintrust, parent-child relationships are represented by span_parents array - child_span_parents = child_span.get("span_parents", []) - parent_span_id = parent_span.get("span_id") - - assert parent_span_id is not None, "Parent span should have a span_id" - assert isinstance(child_span_parents, list) and len(child_span_parents) > 0, ( - "Child span should have span_parents array" - ) - assert parent_span_id in child_span_parents, ( - f"Child span should include parent span_id {parent_span_id} in its span_parents array {child_span_parents} (currentSpan detection)" - ) - - # Verify both spans have the same root_span_id - assert child_span.get("root_span_id") == parent_span.get("root_span_id"), ( - "Parent and child should share the same root_span_id" - ) - - # Verify input/output are properly logged on parent span - assert parent_span.get("input") is not None, "Parent span should have input logged" - assert parent_span.get("output") is not None, "Parent span should have output logged" + """Return True if *client* has been instrumented by wrap_openai().""" + import inspect - # Verify that we have child spans beyond just "Agent workflow" - # The OpenAI SDK should generate multiple span types (generation, response, etc.) - parent_span_id = parent_span.get("span_id") - assert parent_span_id is not None, "Parent span should have a span_id" + from wrapt import FunctionWrapper - all_child_spans = [s for s in spans if parent_span_id in (s.get("span_parents") or [])] + completions = getattr(getattr(client, "chat", None), "completions", None) + if completions is None: + return False + attr = inspect.getattr_static(completions, "create", None) + return isinstance(attr, FunctionWrapper) - assert len(all_child_spans) >= 1, f"Should have at least 1 child span, but found {len(all_child_spans)}" - # We should see spans like Generation, Response, etc. from the OpenAI SDK - span_types = [s.get("span_attributes", {}).get("type") for s in all_child_spans] - has_llm_spans = "llm" in span_types - has_task_spans = "task" in span_types - - assert has_llm_spans or has_task_spans, ( - f"Should have LLM or task type spans from OpenAI SDK, got types: {span_types}" - ) - - -@pytest.mark.asyncio -@pytest.mark.vcr -async def test_braintrust_tracing_processor_concurrency_bug(memory_logger): - """Test that reproduces the concurrency bug where overlapping traces mix up first_input/last_output.""" - pytest.importorskip("agents", reason="agents package not available") - - import asyncio - - import agents - from agents import Agent - from agents.run import AgentRunner - from braintrust.wrappers.openai import BraintrustTracingProcessor - - assert not memory_logger.pop() - - # Create a single shared processor instance - processor = BraintrustTracingProcessor() - - # Set up tracing - agents.set_tracing_disabled(False) - agents.add_trace_processor(processor) - - try: - # Create agents for testing - agent_a = Agent( - name="agent-a", model=TEST_MODEL, instructions="You are agent A. Just respond with 'A' and nothing else." - ) - - agent_b = Agent( - name="agent-b", model=TEST_MODEL, instructions="You are agent B. Just respond with 'B' and nothing else." - ) - - runner = AgentRunner() - - # Define async functions to run agents - async def run_agent_a(): - """Run agent A with a delay to ensure overlap""" - result = await runner.run(agent_a, "What's your name?") - # Add a small delay to ensure traces overlap - await asyncio.sleep(0.1) - return result - - async def run_agent_b(): - """Run agent B immediately""" - result = await runner.run(agent_b, "Who are you?") - return result - - # Run both agents concurrently to create overlapping traces - results = await asyncio.gather(run_agent_a(), run_agent_b()) - - result_a, result_b = results - assert result_a is not None, "Agent A should return a result" - assert result_b is not None, "Agent B should return a result" - - finally: - processor.shutdown() - - # Get all spans - spans = memory_logger.pop() - assert len(spans) >= 2, f"Should have at least 2 trace spans, got {len(spans)}" - - # Find the root trace spans (these are created by on_trace_start/on_trace_end) - # These are actually the "Agent workflow" spans, not the agent-a/agent-b spans - trace_spans = [] - for span in spans: - span_name = span.get("span_attributes", {}).get("name", "") - # The actual traces are "Agent workflow" spans with no parents - if span_name == "Agent workflow" and not span.get("span_parents"): - trace_spans.append(span) - - # We should have exactly 2 trace spans - assert len(trace_spans) == 2, f"Should have exactly 2 trace spans, got {len(trace_spans)}" - - # Identify which trace is for which agent by looking at the input - agent_a_trace = None - agent_b_trace = None - for trace in trace_spans: - input_str = str(trace.get("input", "")) - if "What's your name?" in input_str: - agent_a_trace = trace - elif "Who are you?" in input_str: - agent_b_trace = trace - - assert agent_a_trace is not None, "Could not find Agent A's trace" - assert agent_b_trace is not None, "Could not find Agent B's trace" - - # With the fix, both traces should have their correct input and output - # Verify Agent A trace has correct input/output - assert agent_a_trace.get("input") is not None, "Agent A trace should have input" - assert agent_a_trace.get("output") is not None, "Agent A trace should have output" - - # Verify Agent B trace has correct input/output - assert agent_b_trace.get("input") is not None, "Agent B trace should have input" - assert agent_b_trace.get("output") is not None, "Agent B trace should have output" - - # Verify the inputs are different (they should be from different prompts) - assert agent_a_trace.get("input") != agent_b_trace.get("input"), ( - "Agent A and B traces should have different inputs" - ) - - # Verify the outputs are different (agents respond differently) - if agent_a_trace.get("output") and agent_b_trace.get("output"): - assert agent_a_trace.get("output") != agent_b_trace.get("output"), ( - "Agent A and B traces should have different outputs" - ) - - -@pytest.mark.asyncio -@pytest.mark.vcr -@pytest.mark.skip(reason="OAI Implementation changed, skipping until update") -async def test_agents_tool_openai_nested_spans(memory_logger): - """Test that OpenAI calls inside agent tools are properly nested under the tool span.""" - pytest.importorskip("agents", reason="agents package not available") - - from agents import Agent, Runner, function_tool, set_trace_processors - from braintrust import current_span, wrap_openai - from braintrust.wrappers.openai import BraintrustTracingProcessor - - assert not memory_logger.pop() - - # Create a tool that uses OpenAI within a manual span - @function_tool(strict_mode=False) - def analyze_text(text: str): - """Analyze text and return a structured summary with key points, sentiment, and statistics.""" - client = wrap_openai(openai.OpenAI()) - with current_span().start_span(name="text_analysis_tool") as span: - span.log(input={"text": text}) - - # Use a simple prompt for testing - just like other tests in this file - simple_prompt = f"Analyze this text briefly: {text}" - - response = client.chat.completions.create( - model=TEST_MODEL, - messages=[{"role": "user", "content": simple_prompt}], - ) - result = response.choices[0].message.content - span.log(output={"analysis": result}) - return result - - # Set up tracing - set_trace_processors([BraintrustTracingProcessor()]) - - # Create agent with the tool - agent = Agent( - name="Text Analysis Agent", - instructions="You are a helpful assistant that analyzes text. When asked to analyze text, you MUST use the analyze_text tool. Always call the tool with the exact text provided by the user. After using the tool, provide a two sentence summary of what the tool returned.", - tools=[analyze_text], - ) - - # Run agent with a specific text to analyze - test_text = "Artificial intelligence is transforming industries worldwide. Companies are adopting AI technologies to improve efficiency and innovation. However, challenges like ethics and job displacement remain concerns." - result = await Runner.run( - agent, - f"Please analyze this text: '{test_text}'", - max_turns=3, - ) - - assert result is not None, "Agent should return a result" - - # Verify spans were created - spans = memory_logger.pop() - assert len(spans) >= 3, f"Should have at least 3 spans (agent workflow, tool, chat completion), got {len(spans)}" - - # Find different types of spans - agent_spans = [] - tool_spans = [] - chat_spans = [] - - for span in spans: - span_name = span.get("span_attributes", {}).get("name", "") - span_type = span.get("span_attributes", {}).get("type", "") - - if "Agent workflow" in span_name or span_type == "task": - agent_spans.append(span) - elif span_name == "text_analysis_tool": - tool_spans.append(span) - elif span_name == "Chat Completion" and span_type == "llm": - chat_spans.append(span) - - # Verify we have the expected spans - assert len(agent_spans) > 0, "Should have at least one agent workflow span" - assert len(tool_spans) == 1, f"Should have exactly one tool span, got {len(tool_spans)}" - assert len(chat_spans) == 1, f"Should have exactly one chat completion span, got {len(chat_spans)}" - - tool_span = tool_spans[0] - chat_span = chat_spans[0] - - # Verify the chat completion span is nested under the tool span - chat_span_parents = chat_span.get("span_parents", []) - tool_span_id = tool_span.get("span_id") - - assert tool_span_id is not None, "Tool span should have a span_id" - assert isinstance(chat_span_parents, list) and len(chat_span_parents) > 0, ( - "Chat completion span should have span_parents array" - ) - assert tool_span_id in chat_span_parents, ( - f"Chat completion span should include tool span_id {tool_span_id} in its span_parents array {chat_span_parents}" - ) - - # Verify the tool span has input/output logged - assert "input" in tool_span, "Tool span should have input logged" - assert test_text in str(tool_span["input"]), "Tool span input should contain the test text" - assert "output" in tool_span, "Tool span should have output logged" - - # Verify we have chat completion spans - assert len(chat_spans) >= 1, f"Should have at least one chat completion span, got {len(chat_spans)}" - chat_span = chat_spans[0] - chat_span_parents = chat_span.get("span_parents", []) - - # Verify the chat completion span is nested under the tool span - assert isinstance(chat_span_parents, list) and len(chat_span_parents) > 0, ( - "Chat completion span should have span_parents array" - ) - assert tool_span_id in chat_span_parents, ( - f"Chat completion span should include tool span_id {tool_span_id} in its span_parents array {chat_span_parents}" - ) - - # Verify the chat completion span has proper LLM data - assert "input" in chat_span, "Chat completion span should have input logged" - assert "output" in chat_span, "Chat completion span should have output logged" - assert chat_span["metadata"]["model"] == TEST_MODEL, "Chat completion should use test model" - assert len(str(chat_span["output"])) > 0, "Chat completion should have some output content" - - -def test_braintrust_tracing_processor_trace_metadata_logging(memory_logger): - """Test that trace metadata flows through to root span via on_trace_end.""" - pytest.importorskip("agents", reason="agents package not available") - - from braintrust.wrappers.openai import BraintrustTracingProcessor - - assert not memory_logger.pop() - - processor = BraintrustTracingProcessor() - - # Mock trace with metadata (simulates native trace() API) - class MockTrace: - def __init__(self, trace_id, name, metadata): - self.trace_id = trace_id - self.name = name - self.metadata = metadata - - def export(self): - return {"group_id": self.trace_id, "metadata": self.metadata} - - trace = MockTrace("test-trace", "Test Trace", {"conversation_id": "test-12345"}) - - # Execute trace lifecycle - processor.on_trace_start(trace) - processor.on_trace_end(trace) - - # Verify metadata was logged to root span - spans = memory_logger.pop() - root_span = spans[0] - assert root_span["metadata"]["conversation_id"] == "test-12345", "Should log trace metadata" - - -class TestPatchOpenAI: - """Tests for patch_openai().""" - - def test_patch_openai_sets_wrapped_flag(self): - """patch_openai() should set __braintrust_wrapped__ on openai module.""" - result = run_in_subprocess(""" - from braintrust.oai import patch_openai - import openai - - assert not hasattr(openai, "__braintrust_wrapped__") - patch_openai() - assert hasattr(openai, "__braintrust_wrapped__") - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_wraps_new_clients(self): - """After patch_openai(), new OpenAI() clients should be wrapped.""" - result = run_in_subprocess(""" - from braintrust.oai import patch_openai - patch_openai() - - import openai - client = openai.OpenAI(api_key="test-key") - - # Check that chat completions is wrapped (our wrapper adds tracing) - # The wrapper replaces client.chat with a wrapped version - chat_type = type(client.chat).__name__ - print(f"chat_type={chat_type}") - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_creates_spans(self): - """patch_openai() should create spans when making API calls.""" - result = run_in_subprocess(""" - from braintrust.oai import patch_openai - from braintrust.test_helpers import init_test_logger - from braintrust import logger - - # Set up memory logger - init_test_logger("test-auto") - with logger._internal_with_memory_background_logger() as memory_logger: - patch_openai() - - import openai - client = openai.OpenAI() - - # Make a call within a span context - import braintrust - with braintrust.start_span(name="test") as span: - try: - # This will fail without API key, but span should still be created - client.chat.completions.create( - model="gpt-4o-mini", - messages=[{"role": "user", "content": "hi"}], - ) - except Exception: - pass # Expected without API key - - # Check that spans were logged - spans = memory_logger.pop() - # Should have at least the parent span - assert len(spans) >= 1, f"Expected spans, got {spans}" - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_before_import(self): - """patch_openai() should work when called before importing openai.""" - result = run_in_subprocess(""" - from braintrust.oai import patch_openai - - # Patch BEFORE importing openai - patch_openai() - - import openai - assert hasattr(openai, "__braintrust_wrapped__") - - client = openai.OpenAI(api_key="test-key") - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_after_import(self): - """patch_openai() should work when called after importing openai.""" - result = run_in_subprocess(""" - import openai - from braintrust.oai import patch_openai - - # Patch AFTER importing openai - patch_openai() - - assert hasattr(openai, "__braintrust_wrapped__") - - client = openai.OpenAI(api_key="test-key") - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_idempotent(self): - """Multiple patch_openai() calls should be safe.""" - result = run_in_subprocess(""" - from braintrust.oai import patch_openai - import openai - - patch_openai() - patch_openai() # Second call - should be no-op, not double-wrap - - # Verify we can still create clients - client = openai.OpenAI(api_key="test-key") - assert hasattr(client, "chat") - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_chains_with_other_patches(self): - """patch_openai() should chain with other libraries that patch OpenAI.""" - result = run_in_subprocess(""" - import openai - - # Simulate another library (like Datadog) patching OpenAI first - other_library_init_called = [] - - class OtherLibraryOpenAI(openai.OpenAI): - def __init__(self, *args, **kwargs): - other_library_init_called.append(True) - super().__init__(*args, **kwargs) - - openai.OpenAI = OtherLibraryOpenAI - - # Now apply our patch - should subclass OtherLibraryOpenAI - from braintrust.oai import patch_openai - patch_openai() - - # Create a client - both patches should run - client = openai.OpenAI(api_key="test-key") - - # Verify other library's __init__ was called (chaining works) - assert len(other_library_init_called) == 1, "Other library's patch should have run" - - # Verify our patch was applied (client has wrapped chat) - assert hasattr(client, "chat"), "Client should have chat attribute" - - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - def test_patch_openai_chains_async_client(self): - """patch_openai() should chain with other libraries for AsyncOpenAI too.""" - result = run_in_subprocess(""" - import openai - - # Simulate another library patching AsyncOpenAI first - other_library_init_called = [] - - class OtherLibraryAsyncOpenAI(openai.AsyncOpenAI): - def __init__(self, *args, **kwargs): - other_library_init_called.append(True) - super().__init__(*args, **kwargs) - - openai.AsyncOpenAI = OtherLibraryAsyncOpenAI - - # Now apply our patch - from braintrust.oai import patch_openai - patch_openai() - - # Create an async client - both patches should run - client = openai.AsyncOpenAI(api_key="test-key") - - # Verify other library's __init__ was called - assert len(other_library_init_called) == 1, "Other library's patch should have run" - - # Verify our patch was applied - assert hasattr(client, "chat"), "Client should have chat attribute" - - print("SUCCESS") - """) - assert result.returncode == 0, f"Failed: {result.stderr}" - assert "SUCCESS" in result.stdout - - -class TestPatchOpenAISpans: - """VCR-based tests verifying that patch_openai() produces spans.""" +class TestOpenAIIntegrationSetupSpans: + """VCR-based tests verifying that OpenAIIntegration.setup() produces spans.""" @pytest.mark.vcr - def test_patch_openai_creates_spans(self, memory_logger): - """patch_openai() should create spans when making API calls.""" - from braintrust.oai import patch_openai - + def test_setup_creates_spans(self, memory_logger): + """OpenAIIntegration.setup() should create spans when making API calls.""" assert not memory_logger.pop() - patch_openai() + OpenAIIntegration.setup() client = openai.OpenAI() response = client.chat.completions.create( model="gpt-4o-mini", @@ -2137,18 +1466,16 @@ def test_patch_openai_creates_spans(self, memory_logger): assert span["input"] -class TestPatchOpenAIAsyncSpans: - """VCR-based tests verifying that patch_openai() produces spans for async clients.""" +class TestOpenAIIntegrationSetupAsyncSpans: + """VCR-based tests verifying that OpenAIIntegration.setup() produces spans for async clients.""" @pytest.mark.vcr @pytest.mark.asyncio - async def test_patch_openai_async_creates_spans(self, memory_logger): - """patch_openai() should create spans for async API calls.""" - from braintrust.oai import patch_openai - + async def test_setup_async_creates_spans(self, memory_logger): + """OpenAIIntegration.setup() should create spans for async API calls.""" assert not memory_logger.pop() - patch_openai() + OpenAIIntegration.setup() client = openai.AsyncOpenAI() response = await client.chat.completions.create( model="gpt-4o-mini", @@ -2173,6 +1500,36 @@ def test_auto_instrument_openai(self): verify_autoinstrument_script("test_auto_openai.py") +def test_wrap_openai_and_setup_use_same_wrappers(): + """Ensure the wrapper functions used by setup() and wrap_openai() stay in sync. + + Both paths should cover the same set of wrapper callables so that the + traced span shape is identical regardless of which entry-point the user + chooses. If this test fails, a wrapper was added to one path but not + the other. + """ + from braintrust.integrations.openai.integration import OpenAIIntegration + from braintrust.integrations.openai.patchers import _WRAP_TARGETS + + # Collect wrapper functions from module-level patchers (setup path). + setup_wrappers: set = set() + for patcher in OpenAIIntegration.patchers: + for sub in patcher.sub_patchers: + setup_wrappers.add(sub.wrapper) + + # Collect wrapper functions from instance-level patchers (wrap_openai path). + wrap_wrappers: set = set() + for _path, patcher in _WRAP_TARGETS: + for sub in patcher.sub_patchers: + wrap_wrappers.add(sub.wrapper) + + assert setup_wrappers == wrap_wrappers, ( + f"Wrapper function mismatch between setup() and wrap_openai().\n" + f" Only in setup: {setup_wrappers - wrap_wrappers}\n" + f" Only in wrap_openai: {wrap_wrappers - setup_wrappers}" + ) + + class TestZAICompatibleOpenAI: """Tests for validating some ZAI compatibility with OpenAI wrapper.""" diff --git a/py/src/braintrust/wrappers/test_openai_http2.py b/py/src/braintrust/integrations/openai/test_openai_http2.py similarity index 100% rename from py/src/braintrust/wrappers/test_openai_http2.py rename to py/src/braintrust/integrations/openai/test_openai_http2.py diff --git a/py/src/braintrust/wrappers/test_openai_openrouter_gateway.py b/py/src/braintrust/integrations/openai/test_openai_openrouter_gateway.py similarity index 100% rename from py/src/braintrust/wrappers/test_openai_openrouter_gateway.py rename to py/src/braintrust/integrations/openai/test_openai_openrouter_gateway.py diff --git a/py/src/braintrust/integrations/openai/tracing.py b/py/src/braintrust/integrations/openai/tracing.py new file mode 100644 index 00000000..ab10906f --- /dev/null +++ b/py/src/braintrust/integrations/openai/tracing.py @@ -0,0 +1,1041 @@ +"""OpenAI-specific tracing wrappers, stream proxies, and serialization helpers.""" + +import abc +import base64 +import inspect +import re +import time +import warnings +from collections.abc import Callable +from typing import Any + +from braintrust.logger import Attachment, Span, start_span +from braintrust.span_types import SpanTypeAttribute +from braintrust.util import is_numeric, merge_dicts +from wrapt import FunctionWrapper + + +X_LEGACY_CACHED_HEADER = "x-cached" +X_CACHED_HEADER = "x-bt-cached" +RAW_RESPONSE_HEADER = "x-stainless-raw-response" + + +class NamedWrapper: + def __init__(self, wrapped: Any): + self.__wrapped = wrapped + + @property + def _wrapped(self) -> Any: + return self.__wrapped + + def __getattr__(self, name: str) -> Any: + return getattr(self.__wrapped, name) + + +class AsyncResponseWrapper: + """Wrapper that properly preserves async context manager behavior for OpenAI responses.""" + + def __init__(self, response: Any): + self._response = response + + async def __aenter__(self): + if hasattr(self._response, "__aenter__"): + await self._response.__aenter__() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if hasattr(self._response, "__aexit__"): + return await self._response.__aexit__(exc_type, exc_val, exc_tb) + + def __aiter__(self): + if hasattr(self._response, "__aiter__"): + return self._response.__aiter__() + raise TypeError("Response object is not an async iterator") + + async def __anext__(self): + if hasattr(self._response, "__anext__"): + return await self._response.__anext__() + raise StopAsyncIteration + + def __getattr__(self, name: str) -> Any: + return getattr(self._response, name) + + @property + def __class__(self): # type: ignore + return self._response.__class__ + + def __str__(self) -> str: + return str(self._response) + + def __repr__(self) -> str: + return repr(self._response) + + +def log_headers(response: Any, span: Span): + cached_value = response.headers.get(X_CACHED_HEADER) or response.headers.get(X_LEGACY_CACHED_HEADER) + + if cached_value: + span.log( + metrics={ + "cached": 1 if cached_value.lower() in ["true", "hit"] else 0, + } + ) + + +def _raw_response_requested(kwargs: dict[str, Any]) -> bool: + extra_headers = kwargs.get("extra_headers") + if not isinstance(extra_headers, dict): + return False + + for key, value in extra_headers.items(): + if isinstance(key, str) and key.lower() == RAW_RESPONSE_HEADER: + if isinstance(value, str): + return value.lower() == "true" + return bool(value) + + return False + + +def _convert_data_url_to_attachment(data_url: str, filename: str | None = None) -> Attachment | str: + """Helper function to convert data URL to an Attachment.""" + data_url_match = re.match(r"^data:([^;]+);base64,(.+)$", data_url) + if not data_url_match: + return data_url + + mime_type, base64_data = data_url_match.groups() + + try: + binary_data = base64.b64decode(base64_data) + + if filename is None: + extension = mime_type.split("/")[1] if "/" in mime_type else "bin" + prefix = "image" if mime_type.startswith("image/") else "document" + filename = f"{prefix}.{extension}" + + attachment = Attachment(data=binary_data, filename=filename, content_type=mime_type) + + return attachment + except Exception: + return data_url + + +def _process_attachments_in_input(input_data: Any) -> Any: + """Process input to convert data URL images and base64 documents to Attachment objects.""" + if isinstance(input_data, list): + return [_process_attachments_in_input(item) for item in input_data] + + if isinstance(input_data, dict): + # Check for OpenAI's image_url format with data URLs + if ( + input_data.get("type") == "image_url" + and isinstance(input_data.get("image_url"), dict) + and isinstance(input_data["image_url"].get("url"), str) + ): + processed_url = _convert_data_url_to_attachment(input_data["image_url"]["url"]) + return { + **input_data, + "image_url": { + **input_data["image_url"], + "url": processed_url, + }, + } + + # Check for OpenAI's file format with data URL (e.g., PDFs) + if ( + input_data.get("type") == "file" + and isinstance(input_data.get("file"), dict) + and isinstance(input_data["file"].get("file_data"), str) + ): + file_filename = input_data["file"].get("filename") + processed_file_data = _convert_data_url_to_attachment( + input_data["file"]["file_data"], + filename=file_filename if isinstance(file_filename, str) else None, + ) + return { + **input_data, + "file": { + **input_data["file"], + "file_data": processed_file_data, + }, + } + + # Recursively process nested objects + return {key: _process_attachments_in_input(value) for key, value in input_data.items()} + + return input_data + + +def _is_async_callable(fn: Any) -> bool: + fn = getattr(fn, "__func__", fn) + # Walk the __wrapped__ chain to see through decorators (e.g. OpenAI's + # @required_args) that hide the underlying coroutine function. + while fn is not None: + if inspect.iscoroutinefunction(fn): + return True + next_fn = getattr(fn, "__wrapped__", None) + if next_fn is fn: + break + fn = next_fn + return False + + +def _get_raw_callable(instance: Any, method_name: str) -> Any | None: + """Return the ``with_raw_response`` variant of *method_name* on *instance*. + + This allows wrappers to route through the raw-response path so that + ``log_headers`` can capture Braintrust proxy headers (e.g. cache status) + even when the user called the regular (non-raw) method. + + Returns ``None`` when: + * the resource does not expose ``with_raw_response`` + * the requested method does not exist on it + * the class-level method is already a wrapt ``FunctionWrapper`` — in that + case the raw callable would internally call the patched class method, + causing infinite recursion. + """ + raw_resource = getattr(instance, "with_raw_response", None) + if raw_resource is None: + return None + raw_callable = getattr(raw_resource, method_name, None) + if raw_callable is None: + return None + # When setup() patches the class method or wrap_openai() patches the + # instance method, the with_raw_response object captures the already- + # wrapped method. Calling it would re-enter our wrapper. Detect this + # by checking whether the descriptor (class-level from setup() or + # instance-level from wrap_openai()) is a FunctionWrapper. + cls_attr = inspect.getattr_static(type(instance), method_name, None) + if isinstance(cls_attr, FunctionWrapper): + return None + inst_attr = inspect.getattr_static(instance, method_name, None) + if isinstance(inst_attr, FunctionWrapper): + return None + return raw_callable + + +# --------------------------------------------------------------------------- +# wrapt wrapper callbacks — used by FunctionWrapperPatcher classes +# --------------------------------------------------------------------------- + + +def _chat_completion_create_wrapper(wrapped, instance, args, kwargs): + # Route through with_raw_response to capture response headers. + create_fn = _get_raw_callable(instance, "create") or wrapped + if _is_async_callable(wrapped): + + async def call(): + response = await ChatCompletionWrapper(None, create_fn).acreate(*args, **kwargs) + return AsyncResponseWrapper(response) + + return call() + return ChatCompletionWrapper(create_fn, None).create(*args, **kwargs) + + +def _chat_completion_parse_wrapper(wrapped, instance, args, kwargs): + if _is_async_callable(wrapped): + + async def call(): + response = await ChatCompletionWrapper(None, wrapped).acreate(*args, **kwargs) + return AsyncResponseWrapper(response) + + return call() + return ChatCompletionWrapper(wrapped, None).create(*args, **kwargs) + + +def _embedding_create_wrapper(wrapped, instance, args, kwargs): + create_fn = _get_raw_callable(instance, "create") or wrapped + if _is_async_callable(wrapped): + + async def call(): + response = await EmbeddingWrapper(None, create_fn).acreate(*args, **kwargs) + return AsyncResponseWrapper(response) + + return call() + return EmbeddingWrapper(create_fn, None).create(*args, **kwargs) + + +def _moderation_create_wrapper(wrapped, instance, args, kwargs): + create_fn = _get_raw_callable(instance, "create") or wrapped + if _is_async_callable(wrapped): + + async def call(): + response = await ModerationWrapper(None, create_fn).acreate(*args, **kwargs) + return AsyncResponseWrapper(response) + + return call() + return ModerationWrapper(create_fn, None).create(*args, **kwargs) + + +def _responses_create_wrapper(wrapped, instance, args, kwargs): + if _is_async_callable(wrapped): + + async def call(): + response = await ResponseWrapper(None, wrapped).acreate(*args, **kwargs) + return AsyncResponseWrapper(response) + + return call() + return ResponseWrapper(wrapped, None).create(*args, **kwargs) + + +def _responses_parse_wrapper(wrapped, instance, args, kwargs): + if _is_async_callable(wrapped): + + async def call(): + response = await ResponseWrapper(None, wrapped, "openai.responses.parse").acreate(*args, **kwargs) + return AsyncResponseWrapper(response) + + return call() + return ResponseWrapper(wrapped, None, "openai.responses.parse").create(*args, **kwargs) + + +def _responses_raw_create_wrapper(wrapped, instance, args, kwargs): + if _is_async_callable(wrapped): + + async def call(): + return await ResponseWrapper(None, wrapped, return_raw=True).acreate(*args, **kwargs) + + return call() + return ResponseWrapper(wrapped, None, return_raw=True).create(*args, **kwargs) + + +def _responses_raw_parse_wrapper(wrapped, instance, args, kwargs): + if _is_async_callable(wrapped): + + async def call(): + return await ResponseWrapper( + None, + wrapped, + "openai.responses.parse", + return_raw=True, + ).acreate(*args, **kwargs) + + return call() + return ResponseWrapper(wrapped, None, "openai.responses.parse", return_raw=True).create(*args, **kwargs) + + +# --------------------------------------------------------------------------- +# Core tracing wrappers +# --------------------------------------------------------------------------- + + +class ChatCompletionWrapper: + def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None): + self.create_fn = create_fn + self.acreate_fn = acreate_fn + + def create(self, *args: Any, **kwargs: Any) -> Any: + raw_requested = _raw_response_requested(kwargs) + params = self._parse_params(kwargs) + stream = kwargs.get("stream", False) + + span = start_span( + **merge_dicts(dict(name="Chat Completion", span_attributes={"type": SpanTypeAttribute.LLM}), params) + ) + should_end = True + + try: + start = time.time() + create_response = self.create_fn(*args, **kwargs) + if hasattr(create_response, "parse"): + raw_response = create_response.parse() + log_headers(create_response, span) + else: + raw_response = create_response + if stream: + + def gen(): + try: + first = True + all_results = [] + for item in raw_response: + if first: + span.log( + metrics={ + "time_to_first_token": time.time() - start, + } + ) + first = False + all_results.append(_try_to_dict(item)) + yield item + + span.log(**self._postprocess_streaming_results(all_results)) + finally: + span.end() + + should_end = False + if raw_requested and hasattr(create_response, "parse"): + return _RawResponseWithTracedStream(create_response, _TracedStream(raw_response, gen())) + return _TracedStream(raw_response, gen()) + else: + log_response = _try_to_dict(raw_response) + metrics = _parse_metrics_from_usage(log_response.get("usage", {})) + metrics["time_to_first_token"] = time.time() - start + span.log( + metrics=metrics, + output=log_response["choices"], + ) + return create_response if (raw_requested and hasattr(create_response, "parse")) else raw_response + finally: + if should_end: + span.end() + + async def acreate(self, *args: Any, **kwargs: Any) -> Any: + raw_requested = _raw_response_requested(kwargs) + params = self._parse_params(kwargs) + stream = kwargs.get("stream", False) + + span = start_span( + **merge_dicts(dict(name="Chat Completion", span_attributes={"type": SpanTypeAttribute.LLM}), params) + ) + should_end = True + + try: + start = time.time() + create_response = await self.acreate_fn(*args, **kwargs) + + if hasattr(create_response, "parse"): + raw_response = create_response.parse() + log_headers(create_response, span) + else: + raw_response = create_response + + if stream: + + async def gen(): + try: + first = True + all_results = [] + async for item in raw_response: + if first: + span.log( + metrics={ + "time_to_first_token": time.time() - start, + } + ) + first = False + all_results.append(_try_to_dict(item)) + yield item + + span.log(**self._postprocess_streaming_results(all_results)) + finally: + span.end() + + should_end = False + streamer = gen() + if raw_requested and hasattr(create_response, "parse"): + return _RawResponseWithTracedStream(create_response, _AsyncTracedStream(raw_response, streamer)) + return _AsyncTracedStream(raw_response, streamer) + else: + log_response = _try_to_dict(raw_response) + metrics = _parse_metrics_from_usage(log_response.get("usage")) + metrics["time_to_first_token"] = time.time() - start + span.log( + metrics=metrics, + output=log_response["choices"], + ) + return create_response if (raw_requested and hasattr(create_response, "parse")) else raw_response + finally: + if should_end: + span.end() + + @classmethod + def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: + # First, destructively remove span_info + ret = params.pop("span_info", {}) + + # Then, copy the rest of the params + params = prettify_params(params) + messages = params.pop("messages", None) + + # Process attachments in input (convert data URLs to Attachment objects) + processed_input = _process_attachments_in_input(messages) + + return merge_dicts( + ret, + { + "input": processed_input, + "metadata": {**params, "provider": "openai"}, + }, + ) + + @classmethod + def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> dict[str, Any]: + role = None + content = None + tool_calls: list[Any] | None = None + finish_reason = None + metrics: dict[str, float] = {} + for result in all_results: + usage = result.get("usage") + if usage: + metrics.update(_parse_metrics_from_usage(usage)) + + choices = result["choices"] + if not choices: + continue + delta = choices[0]["delta"] + if not delta: + continue + + if role is None and delta.get("role") is not None: + role = delta.get("role") + + if delta.get("finish_reason") is not None: + finish_reason = delta.get("finish_reason") + + if delta.get("content") is not None: + content = (content or "") + delta.get("content") + + if delta.get("tool_calls") is not None: + delta_tool_calls = delta.get("tool_calls") + if not delta_tool_calls: + continue + tool_delta = delta_tool_calls[0] + + # pylint: disable=unsubscriptable-object + if not tool_calls or (tool_delta.get("id") and tool_calls[-1]["id"] != tool_delta.get("id")): + function_arg = tool_delta.get("function", {}) + tool_calls = (tool_calls or []) + [ + { + "id": tool_delta.get("id"), + "type": tool_delta.get("type"), + "function": { + "name": function_arg.get("name"), + "arguments": function_arg.get("arguments") or "", + }, + } + ] + else: + # pylint: disable=unsubscriptable-object + # append to existing tool call + function_arg = tool_delta.get("function", {}) + args = function_arg.get("arguments") or "" + if isinstance(args, str): + # pylint: disable=unsubscriptable-object + tool_calls[-1]["function"]["arguments"] += args + + return { + "metrics": metrics, + "output": [ + { + "index": 0, + "message": { + "role": role, + "content": content, + "tool_calls": tool_calls, + }, + "logprobs": None, + "finish_reason": finish_reason, + } + ], + } + + +class _TracedStream(NamedWrapper): + """Traced sync stream. Iterates via the traced generator while delegating + SDK-specific attributes (e.g. .close(), .response) to the original stream.""" + + def __init__(self, original_stream: Any, traced_generator: Any) -> None: + self._traced_generator = traced_generator + super().__init__(original_stream) + + def __iter__(self) -> Any: + return self + + def __next__(self) -> Any: + return next(self._traced_generator) + + def __enter__(self) -> Any: + if hasattr(self._wrapped, "__enter__"): + self._wrapped.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> Any: + if hasattr(self._wrapped, "__exit__"): + return self._wrapped.__exit__(exc_type, exc_val, exc_tb) + return None + + +class _AsyncTracedStream(NamedWrapper): + """Traced async stream. Iterates via the traced generator while delegating + SDK-specific attributes (e.g. .close(), .response) to the original stream.""" + + def __init__(self, original_stream: Any, traced_generator: Any) -> None: + self._traced_generator = traced_generator + super().__init__(original_stream) + + def __aiter__(self) -> Any: + return self + + async def __anext__(self) -> Any: + return await self._traced_generator.__anext__() + + async def __aenter__(self) -> Any: + if hasattr(self._wrapped, "__aenter__"): + await self._wrapped.__aenter__() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> Any: + if hasattr(self._wrapped, "__aexit__"): + return await self._wrapped.__aexit__(exc_type, exc_val, exc_tb) + return None + + +class _RawResponseWithTracedStream(NamedWrapper): + """Proxy for LegacyAPIResponse that replaces parse() with a traced stream, + so that with_raw_response + stream=True preserves both headers and tracing.""" + + def __init__(self, raw_response: Any, traced_stream: Any) -> None: + self._traced_stream = traced_stream + super().__init__(raw_response) + + def parse(self, *args: Any, **kwargs: Any) -> Any: + return self._traced_stream + + +class ResponseWrapper: + def __init__( + self, + create_fn: Callable[..., Any] | None, + acreate_fn: Callable[..., Any] | None, + name: str = "openai.responses.create", + return_raw: bool = False, + ): + self.create_fn = create_fn + self.acreate_fn = acreate_fn + self.name = name + self.return_raw = return_raw + + def create(self, *args: Any, **kwargs: Any) -> Any: + raw_requested = self.return_raw or _raw_response_requested(kwargs) + params = self._parse_params(kwargs) + stream = kwargs.get("stream", False) + + span = start_span(**merge_dicts(dict(name=self.name, span_attributes={"type": SpanTypeAttribute.LLM}), params)) + should_end = True + + try: + start = time.time() + create_response = self.create_fn(*args, **kwargs) + if hasattr(create_response, "parse"): + raw_response = create_response.parse() + log_headers(create_response, span) + else: + raw_response = create_response + if stream: + + def gen(): + try: + first = True + all_results = [] + for item in raw_response: + if first: + span.log( + metrics={ + "time_to_first_token": time.time() - start, + } + ) + first = False + all_results.append(item) + yield item + + span.log(**self._postprocess_streaming_results(all_results)) + finally: + span.end() + + should_end = False + if raw_requested and hasattr(create_response, "parse"): + return _RawResponseWithTracedStream(create_response, _TracedStream(raw_response, gen())) + return _TracedStream(raw_response, gen()) + else: + log_response = _try_to_dict(raw_response) + event_data = self._parse_event_from_result(log_response) + if "metrics" not in event_data: + event_data["metrics"] = {} + event_data["metrics"]["time_to_first_token"] = time.time() - start + span.log(**event_data) + return create_response if (raw_requested and hasattr(create_response, "parse")) else raw_response + finally: + if should_end: + span.end() + + async def acreate(self, *args: Any, **kwargs: Any) -> Any: + raw_requested = self.return_raw or _raw_response_requested(kwargs) + params = self._parse_params(kwargs) + stream = kwargs.get("stream", False) + + span = start_span(**merge_dicts(dict(name=self.name, span_attributes={"type": SpanTypeAttribute.LLM}), params)) + should_end = True + + try: + start = time.time() + create_response = await self.acreate_fn(*args, **kwargs) + if hasattr(create_response, "parse"): + raw_response = create_response.parse() + log_headers(create_response, span) + else: + raw_response = create_response + if stream: + + async def gen(): + try: + first = True + all_results = [] + async for item in raw_response: + if first: + span.log( + metrics={ + "time_to_first_token": time.time() - start, + } + ) + first = False + all_results.append(item) + yield item + + span.log(**self._postprocess_streaming_results(all_results)) + finally: + span.end() + + should_end = False + streamer = gen() + if raw_requested and hasattr(create_response, "parse"): + return _RawResponseWithTracedStream(create_response, _AsyncTracedStream(raw_response, streamer)) + return _AsyncTracedStream(raw_response, streamer) + else: + log_response = _try_to_dict(raw_response) + event_data = self._parse_event_from_result(log_response) + if "metrics" not in event_data: + event_data["metrics"] = {} + event_data["metrics"]["time_to_first_token"] = time.time() - start + span.log(**event_data) + return create_response if (raw_requested and hasattr(create_response, "parse")) else raw_response + finally: + if should_end: + span.end() + + @classmethod + def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: + # First, destructively remove span_info + ret = params.pop("span_info", {}) + + # Then, copy the rest of the params + params = prettify_params(params) + input_data = params.pop("input", None) + + # Process attachments in input (convert data URLs to Attachment objects) + processed_input = _process_attachments_in_input(input_data) + + return merge_dicts( + ret, + { + "input": processed_input, + "metadata": {**params, "provider": "openai"}, + }, + ) + + @classmethod + def _parse_event_from_result(cls, result: dict[str, Any]) -> dict[str, Any]: + """Parse event from response result""" + data = {"metrics": {}} + + if not result: + return data + + if "output" in result: + data["output"] = result["output"] + + metadata = {k: v for k, v in result.items() if k not in ["output", "usage"]} + if metadata: + data["metadata"] = metadata + + if "usage" in result: + data["metrics"] = _parse_metrics_from_usage(result["usage"]) + + return data + + @classmethod + def _postprocess_streaming_results(cls, all_results: list[Any]) -> dict[str, Any]: + """Process streaming results - minimal version focused on metrics extraction.""" + metrics = {} + output = [] + + for result in all_results: + usage = getattr(result, "usage", None) + if ( + not usage + and hasattr(result, "type") + and result.type == "response.completed" + and hasattr(result, "response") + ): + # Handle summaries from completed response if present + if hasattr(result.response, "output") and result.response.output: + output_by_id = {item.get("id"): item for item in output if item.get("id")} + for output_item in result.response.output: + if hasattr(output_item, "summary") and output_item.summary: + matched = output_by_id.get(output_item.id) + if matched: + matched["summary"] = output_item.summary + usage = getattr(result.response, "usage", None) + + if usage: + parsed_metrics = _parse_metrics_from_usage(usage) + metrics.update(parsed_metrics) + + # Skip processing if result doesn't have a type attribute + if not hasattr(result, "type"): + continue + + if result.type == "response.output_item.added": + item_data = {"id": result.item.id, "type": result.item.type} + if hasattr(result.item, "role"): + item_data["role"] = result.item.role + output.append(item_data) + continue + + if result.type == "response.completed": + if hasattr(result, "response") and hasattr(result.response, "output"): + return { + "metrics": metrics, + "output": result.response.output, + } + continue + + # Handle output_index based updates + if hasattr(result, "output_index"): + output_index = result.output_index + if output_index < len(output): + current_output = output[output_index] + + if result.type == "response.output_item.done": + current_output["status"] = result.item.status + continue + + if result.type == "response.output_item.delta": + current_output["delta"] = result.delta + continue + + # Handle content_index based updates + if hasattr(result, "content_index"): + if "content" not in current_output: + current_output["content"] = [] + content_index = result.content_index + # Fill any gaps in the content array + while len(current_output["content"]) <= content_index: + current_output["content"].append({}) + current_content = current_output["content"][content_index] + current_content["type"] = "output_text" + if hasattr(result, "delta") and result.delta: + current_content["text"] = (current_content.get("text") or "") + result.delta + + if result.type == "response.output_text.annotation.added": + annotation_index = result.annotation_index + if "annotations" not in current_content: + current_content["annotations"] = [] + # Fill any gaps in the annotations array + while len(current_content["annotations"]) <= annotation_index: + current_content["annotations"].append({}) + current_content["annotations"][annotation_index] = _try_to_dict(result.annotation) + + return { + "metrics": metrics, + "output": output, + } + + +class BaseWrapper(abc.ABC): + def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None, name: str): + self._create_fn = create_fn + self._acreate_fn = acreate_fn + self._name = name + + @abc.abstractmethod + def process_output(self, response: dict[str, Any], span: Span): + """Process the API response and log relevant information to the span.""" + pass + + def create(self, *args: Any, **kwargs: Any) -> Any: + params = self._parse_params(kwargs) + + with start_span( + **merge_dicts(dict(name=self._name, span_attributes={"type": SpanTypeAttribute.LLM}), params) + ) as span: + create_response = self._create_fn(*args, **kwargs) + if hasattr(create_response, "parse"): + raw_response = create_response.parse() + log_headers(create_response, span) + else: + raw_response = create_response + + log_response = _try_to_dict(raw_response) + self.process_output(log_response, span) + return raw_response + + async def acreate(self, *args: Any, **kwargs: Any) -> Any: + params = self._parse_params(kwargs) + + with start_span( + **merge_dicts(dict(name=self._name, span_attributes={"type": SpanTypeAttribute.LLM}), params) + ) as span: + create_response = await self._acreate_fn(*args, **kwargs) + if hasattr(create_response, "parse"): + raw_response = create_response.parse() + log_headers(create_response, span) + else: + raw_response = create_response + log_response = _try_to_dict(raw_response) + self.process_output(log_response, span) + return raw_response + + @classmethod + def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: + # First, destructively remove span_info + ret = params.pop("span_info", {}) + + params = prettify_params(params) + input_data = params.pop("input", None) + + # Process attachments in input (convert data URLs to Attachment objects) + processed_input = _process_attachments_in_input(input_data) + + return merge_dicts( + ret, + { + "input": processed_input, + "metadata": {**params, "provider": "openai"}, + }, + ) + + +class EmbeddingWrapper(BaseWrapper): + def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None): + super().__init__(create_fn, acreate_fn, "Embedding") + + def process_output(self, response: dict[str, Any], span: Span): + usage = response.get("usage") + metrics = _parse_metrics_from_usage(usage) + span.log( + metrics=metrics, + # TODO: Add a flag to control whether to log the full embedding vector, + # possibly w/ JSON compression. + output={"embedding_length": len(response["data"][0]["embedding"])}, + ) + + +class ModerationWrapper(BaseWrapper): + def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None): + super().__init__(create_fn, acreate_fn, "Moderation") + + def process_output(self, response: Any, span: Span): + span.log( + output=response["results"], + ) + + +# OpenAI's representation to Braintrust's representation +TOKEN_NAME_MAP = { + # chat API + "total_tokens": "tokens", + "prompt_tokens": "prompt_tokens", + "completion_tokens": "completion_tokens", + # responses API + "tokens": "tokens", + "input_tokens": "prompt_tokens", + "output_tokens": "completion_tokens", +} + +TOKEN_PREFIX_MAP = { + "input": "prompt", + "output": "completion", +} + + +def _parse_metrics_from_usage(usage: Any) -> dict[str, Any]: + # For simplicity, this function handles all the different APIs + metrics = {} + + if not usage: + return metrics + + # This might be a dict or a Usage object that can be cast to a dict + # to a dict + usage = _try_to_dict(usage) + if not isinstance(usage, dict): + return metrics # unexpected + + for oai_name, value in usage.items(): + if oai_name.endswith("_tokens_details"): + # handle `_tokens_detail` dicts + if not isinstance(value, dict): + continue # unexpected + raw_prefix = oai_name[: -len("_tokens_details")] + prefix = TOKEN_PREFIX_MAP.get(raw_prefix, raw_prefix) + for k, v in value.items(): + if is_numeric(v): + metrics[f"{prefix}_{k}"] = v + elif is_numeric(value): + name = TOKEN_NAME_MAP.get(oai_name, oai_name) + metrics[name] = value + + return metrics + + +def prettify_params(params: dict[str, Any]) -> dict[str, Any]: + # Filter out NOT_GIVEN parameters + # https://linear.app/braintrustdata/issue/BRA-2467 + ret = {k: v for k, v in params.items() if not _is_not_given(v)} + + if "response_format" in ret: + ret["response_format"] = serialize_response_format(ret["response_format"]) + return ret + + +def _try_to_dict(obj: Any) -> dict[str, Any]: + if isinstance(obj, dict): + return obj + # convert a pydantic object to a dict + # Suppress Pydantic serializer warnings from generic/discriminated-union models + # (e.g. OpenAI's ParsedResponse[T]). See + # https://github.com/braintrustdata/braintrust-sdk-python/issues/60 + if hasattr(obj, "model_dump") and callable(obj.model_dump): + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="Pydantic serializer warnings", category=UserWarning) + return obj.model_dump() + except Exception: + pass + # deprecated pydantic method, try model_dump first. + if hasattr(obj, "dict") and callable(obj.dict): + try: + return obj.dict() + except Exception: + pass + return obj + + +def serialize_response_format(response_format: Any) -> Any: + try: + from pydantic import BaseModel + except ImportError: + return response_format + + if isinstance(response_format, type) and issubclass(response_format, BaseModel): + return dict( + type="json_schema", + json_schema=dict( + name=response_format.__name__, + schema=response_format.model_json_schema(), + ), + ) + else: + return response_format + + +def _is_not_given(value: Any) -> bool: + if value is None: + return False + try: + # Check by type name and repr to avoid import dependency + type_name = type(value).__name__ + return type_name == "NotGiven" + except Exception: + return False diff --git a/py/src/braintrust/integrations/openai_agents/__init__.py b/py/src/braintrust/integrations/openai_agents/__init__.py new file mode 100644 index 00000000..a000b358 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/__init__.py @@ -0,0 +1,31 @@ +"""Braintrust integration for the OpenAI Agents SDK.""" + +from braintrust.logger import NOOP_SPAN, current_span, init_logger + +from .integration import OpenAIAgentsIntegration + + +try: + from .tracing import BraintrustTracingProcessor +except ImportError as exc: # pragma: no cover - optional dependency not installed + _IMPORT_ERROR = exc + + class BraintrustTracingProcessor: # type: ignore[no-redef] + def __init__(self, *args, **kwargs): + raise ImportError("openai-agents is required for braintrust.integrations.openai_agents") from _IMPORT_ERROR + + +__all__ = ["BraintrustTracingProcessor", "OpenAIAgentsIntegration", "setup_openai_agents"] + + +def setup_openai_agents( + api_key: str | None = None, + project_id: str | None = None, + project: str | None = None, + project_name: str | None = None, +) -> bool: + """Setup Braintrust tracing for the OpenAI Agents SDK.""" + if current_span() == NOOP_SPAN: + init_logger(project=project_name or project, api_key=api_key, project_id=project_id) + + return OpenAIAgentsIntegration.setup() diff --git a/py/src/braintrust/integrations/openai_agents/cassettes/test_auto_openai_agents.yaml b/py/src/braintrust/integrations/openai_agents/cassettes/test_auto_openai_agents.yaml new file mode 100644 index 00000000..fb433203 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/cassettes/test_auto_openai_agents.yaml @@ -0,0 +1,111 @@ +interactions: +- request: + body: '{"include":[],"input":[{"content":"What is 2+2? Just the number.","role":"user"}],"instructions":"You + are a helpful assistant. Be very concise.","model":"gpt-4o-mini","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '178' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - Agents/Python 0.6.5 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3xUy47bMAy85ysEnTcL23Gcx7F/USwKg5HprLqyqEpUsEGRfy8sx47dZnuz+RiS + M6R+r4SQupFHIT0GV2cNVACbfVGp6rDNIMuqQ7XPimpb4Lbc54ctNBvY53leFVgesoOSLz0EnX6i + 4hGGbMDBrjwCY1ND78t31b4symyzS77AwDH0OYo6Z5CxGZJOoD7OnqLt+2rBBBzM2hhtz/Iofq+E + EEI6uKLv8xu8oCGHXq6EuA2FR8inpdF76jNtNCYZWo+/Ilp1rR1aMHyVR5G9Zsmn7QhWN8igTZhn + ahvYR8WabJrlO0UBHgWIdzSujUZACDowWH4V31Bc0F+FIqt0wNdh3g4+a4rsItdMH2gX+L2TiUyt + wCwrd9Sg6UueHa9LWnfa6nWRFeU6263z/V2YhCuP4i1xNjA3ad6F838k32OV9ZLvt1lVtgpxVzSb + XV4m5ITCV4cJB0OAMz4cX2mbnIoso300NW9sATuSgp88ZacAsJYYRs7ffiychs7O0+mJJwEdhSzl + ZL3dv6ZA6cmk4pNsQ3AfmIKkAw/GoFnKwj4Oa+o8BrQKn2yS83jRFEM9HkmdZJgUdZ46x7UC9Y71 + B16/9HnsCdRk5xEeIZBdXAi2LXmeBfXSxK4DP2JPBxOgRb7WuumBW42L8wjoL1phzXo8uBaiGUSR + gcnjnAHGzqEHjsmc34e/k3/vrCXfweN/JnqKGyi/d3xBf6KgE5Wyw0bH7nHogwjvpNWgWmSSk+Ox + A5LJ1bPNyCajm/foo1VwJ1Y2OsDJjK9STBs+DaDt4l43xcu/9tl7MY2ZBGweidli1L+fgeKZ/Rns + JP5XyEwMZtZvOTEYw1LsDhkaYOjhb6vbHwAAAP//AwCbn65uIwYAAA== + headers: + CF-RAY: + - 9be006980c67e60f-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:53:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; + path=/; expires=Wed, 14-Jan-26 21:23:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '560' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '565' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999950' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d8b831ee46ec9bc399abd66e9591fa2f + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/openai_agents/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml b/py/src/braintrust/integrations/openai_agents/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml new file mode 100644 index 00000000..76fadf73 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/cassettes/test_braintrust_tracing_processor_concurrency_bug.yaml @@ -0,0 +1,214 @@ +interactions: +- request: + body: '{"include":[],"input":[{"content":"What''s your name?","role":"user"}],"instructions":"You + are agent A. Just respond with ''A'' and nothing else.","model":"gpt-4o-mini","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '177' + Content-Type: + - application/json + Cookie: + - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; + _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000 + Host: + - api.openai.com + User-Agent: + - Agents/Python 0.6.5 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RUwW6jMBC95yssX3ppKkJoArn1un+wqlZoMAPx1thee5xtVOXfV5hAoJvcYN7M + 88x7Hn+tGOOy5gfGHXpbJoXY5AXm2GxRFEmaJLtilyfp7hWLTOSb4rXCfAtV2iS7bfZaZQV/7ilM + 9RsFjTRGexziwiEQ1iX02Ga/y7M0S7b7iHkCCr6vEaazCgnroagC8dE6E3TfVwPK4xCWSknd8gP7 + WjHGGLdwRtfX13hCZSw6vmLsMhw8Un47Oo8oOmf6Sh2UioHG4Z+AWpxLixoUnfmBJS9JxKQeycoa + CaTy80qpPbkgSBodZ/lpAgOHDFrUxN5e2I/giQ2q1OyvpCN7entioGumDR2lbhkqjy/D6B18liaQ + DVSS+UC9OKoHyRhVClDLJjpTo+pPby2tM7PupJbrNEmzdbJfb/KrR5GXH9h7lG8QcbK/8+1j93fb + bZP37ueZKHLM9vtCIIhicDmy0Nli5EHvoZ0Bj2yOoDCaUN+amje2oB1FwU+aqmMCaG0IRvnffy1A + ZVrrTHUHiUQHxt/4FL1cv6ZE7oyKh4P30hNoGpL7xJjELThQCtXSFnJhuLHWoUct8M6lsg5P0gRf + jvtSRhsmR60znaVSgDhi+YHnh5jDXkBp9DzDIXijF8uCTWMczZJ6a0LXgRu5p93x0CCdS1n3xI3E + xaZ4dCcpsCQ57l4DQQ2mcE/G4VwBws6iAwoxvLkOfxX/2lljXAe3/5npMW+Q/NrxCV1lvIxS8g5r + Gbrbzg8mHI0Ug2uBDJ+A2x3gZGw5uxnJFLTzHl3QAq7C8lp6qNT4QIV4w6cBpF7s6zZ5/j8+ezqm + MaOB9a0wWYz6/RlI78Xv0U7mP2ImQ6Bm/aaTgsEvze6QoAaCnv6yuvwDAAD//wMABfpPzy4GAAA= + headers: + CF-RAY: + - 9be0069c9e0fa3be-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:53:58 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '458' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '463' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999950' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2ed2447bbb924a109ffff829941e504f + status: + code: 200 + message: OK +- request: + body: '{"include":[],"input":[{"content":"Who are you?","role":"user"}],"instructions":"You + are agent B. Just respond with ''B'' and nothing else.","model":"gpt-4o-mini","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '172' + Content-Type: + - application/json + Cookie: + - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; + _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000 + Host: + - api.openai.com + User-Agent: + - Agents/Python 0.6.5 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3RUwW6jMBC95yssX3ppKiCEkhx73D9YVSs0mCHx1thee5xtVOXfV5hAoJvcYN7M + 88x7Hn+tGOOy4XvGHXpbJfkG2rbcClG2bVZuk6TYFWWSFVssRVKmuxyyAhpIcZNsm3xXpvy5pzD1 + bxQ00hjtcYgLh0DYVNBj6WtR5lmebF4j5gko+L5GmM4qJGyGohrEx8GZoPu+WlAeh7BUSuoD37Ov + FWOMcQtndH19gydUxqLjK8Yuw8Ej5bejy4iic6av1EGpGGgd/gmoxbmyqEHRme9Z8pJETOqRrGqQ + QCo/r5TakwuCpNFxlp8mMHDI4ICa2NsL+xE8sUGVhv2VdGRPb08MdMO0oaPUB4bK48swegeflQlk + A1VkPlAvjupBMkZVAtSyic40qPrTD5bWuVl3Ust1lmT5Onldp+XVo8jL9+w9yjeIONnf+cNj94tt + kuW9+7t0lxRNm9Yix7qAOjJHFjpbjDzoPRzwBjyyOYLCaEJ9a2re2IJ2FAU/aaqOCaC1IRjlf/+1 + AJU5WGfqO0gk2jP+xqfo5fo1JXJnVDwcvJeeQNOQ3CfGJG7BgVKolraQC8ONtQ49aoF3LpV1eJIm + +GrclyraMDlqneksVQLEEasPPD/EHPYCSqPnGQ7BG71YFmxb42iW1FsTug7cyD3tjocW6VzJpidu + JS42xaM7SYEVyXH3WghqMIV7Mg7nChB2Fh1QiOH0OvxV/GtnrXEd3P5npse8QfJrxyd0tfEySsk7 + bGTobjs/mHA0UgyuBTJ8Am53gJOx1exmJFPQznt0QQu4Cssb6aFW4wMV4g2fBpB6sa+b5Pn/+Ozp + mMaMBja3wmQx6vdnILsXv0c7mf+ImQyBmvWbTQoGvzS7Q4IGCHr6y+ryDwAA//8DAJHmPlYuBgAA + headers: + CF-RAY: + - 9be0069c9e92e637-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:53:58 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '594' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '596' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999950' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_ea321d2a288b42beb45e94f44b291ab6 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/openai_agents/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml b/py/src/braintrust/integrations/openai_agents/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml new file mode 100644 index 00000000..fb433203 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/cassettes/test_braintrust_tracing_processor_current_span_detection.yaml @@ -0,0 +1,111 @@ +interactions: +- request: + body: '{"include":[],"input":[{"content":"What is 2+2? Just the number.","role":"user"}],"instructions":"You + are a helpful assistant. Be very concise.","model":"gpt-4o-mini","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '178' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - Agents/Python 0.6.5 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3xUy47bMAy85ysEnTcL23Gcx7F/USwKg5HprLqyqEpUsEGRfy8sx47dZnuz+RiS + M6R+r4SQupFHIT0GV2cNVACbfVGp6rDNIMuqQ7XPimpb4Lbc54ctNBvY53leFVgesoOSLz0EnX6i + 4hGGbMDBrjwCY1ND78t31b4symyzS77AwDH0OYo6Z5CxGZJOoD7OnqLt+2rBBBzM2hhtz/Iofq+E + EEI6uKLv8xu8oCGHXq6EuA2FR8inpdF76jNtNCYZWo+/Ilp1rR1aMHyVR5G9Zsmn7QhWN8igTZhn + ahvYR8WabJrlO0UBHgWIdzSujUZACDowWH4V31Bc0F+FIqt0wNdh3g4+a4rsItdMH2gX+L2TiUyt + wCwrd9Sg6UueHa9LWnfa6nWRFeU6263z/V2YhCuP4i1xNjA3ad6F838k32OV9ZLvt1lVtgpxVzSb + XV4m5ITCV4cJB0OAMz4cX2mbnIoso300NW9sATuSgp88ZacAsJYYRs7ffiychs7O0+mJJwEdhSzl + ZL3dv6ZA6cmk4pNsQ3AfmIKkAw/GoFnKwj4Oa+o8BrQKn2yS83jRFEM9HkmdZJgUdZ46x7UC9Y71 + B16/9HnsCdRk5xEeIZBdXAi2LXmeBfXSxK4DP2JPBxOgRb7WuumBW42L8wjoL1phzXo8uBaiGUSR + gcnjnAHGzqEHjsmc34e/k3/vrCXfweN/JnqKGyi/d3xBf6KgE5Wyw0bH7nHogwjvpNWgWmSSk+Ox + A5LJ1bPNyCajm/foo1VwJ1Y2OsDJjK9STBs+DaDt4l43xcu/9tl7MY2ZBGweidli1L+fgeKZ/Rns + JP5XyEwMZtZvOTEYw1LsDhkaYOjhb6vbHwAAAP//AwCbn65uIwYAAA== + headers: + CF-RAY: + - 9be006980c67e60f-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:53:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; + path=/; expires=Wed, 14-Jan-26 21:23:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '560' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '565' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999950' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d8b831ee46ec9bc399abd66e9591fa2f + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/openai_agents/cassettes/test_openai_agents_integration_setup_creates_spans.yaml b/py/src/braintrust/integrations/openai_agents/cassettes/test_openai_agents_integration_setup_creates_spans.yaml new file mode 100644 index 00000000..fb433203 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/cassettes/test_openai_agents_integration_setup_creates_spans.yaml @@ -0,0 +1,111 @@ +interactions: +- request: + body: '{"include":[],"input":[{"content":"What is 2+2? Just the number.","role":"user"}],"instructions":"You + are a helpful assistant. Be very concise.","model":"gpt-4o-mini","tools":[]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '178' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - Agents/Python 0.6.5 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: !!binary | + H4sIAAAAAAAAA3xUy47bMAy85ysEnTcL23Gcx7F/USwKg5HprLqyqEpUsEGRfy8sx47dZnuz+RiS + M6R+r4SQupFHIT0GV2cNVACbfVGp6rDNIMuqQ7XPimpb4Lbc54ctNBvY53leFVgesoOSLz0EnX6i + 4hGGbMDBrjwCY1ND78t31b4symyzS77AwDH0OYo6Z5CxGZJOoD7OnqLt+2rBBBzM2hhtz/Iofq+E + EEI6uKLv8xu8oCGHXq6EuA2FR8inpdF76jNtNCYZWo+/Ilp1rR1aMHyVR5G9Zsmn7QhWN8igTZhn + ahvYR8WabJrlO0UBHgWIdzSujUZACDowWH4V31Bc0F+FIqt0wNdh3g4+a4rsItdMH2gX+L2TiUyt + wCwrd9Sg6UueHa9LWnfa6nWRFeU6263z/V2YhCuP4i1xNjA3ad6F838k32OV9ZLvt1lVtgpxVzSb + XV4m5ITCV4cJB0OAMz4cX2mbnIoso300NW9sATuSgp88ZacAsJYYRs7ffiychs7O0+mJJwEdhSzl + ZL3dv6ZA6cmk4pNsQ3AfmIKkAw/GoFnKwj4Oa+o8BrQKn2yS83jRFEM9HkmdZJgUdZ46x7UC9Y71 + B16/9HnsCdRk5xEeIZBdXAi2LXmeBfXSxK4DP2JPBxOgRb7WuumBW42L8wjoL1phzXo8uBaiGUSR + gcnjnAHGzqEHjsmc34e/k3/vrCXfweN/JnqKGyi/d3xBf6KgE5Wyw0bH7nHogwjvpNWgWmSSk+Ox + A5LJ1bPNyCajm/foo1VwJ1Y2OsDJjK9STBs+DaDt4l43xcu/9tl7MY2ZBGweidli1L+fgeKZ/Rns + JP5XyEwMZtZvOTEYw1LsDhkaYOjhb6vbHwAAAP//AwCbn65uIwYAAA== + headers: + CF-RAY: + - 9be006980c67e60f-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:53:57 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; + path=/; expires=Wed, 14-Jan-26 21:23:57 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '560' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '565' + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999950' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d8b831ee46ec9bc399abd66e9591fa2f + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/openai_agents/integration.py b/py/src/braintrust/integrations/openai_agents/integration.py new file mode 100644 index 00000000..a44b7b31 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/integration.py @@ -0,0 +1,14 @@ +"""OpenAI Agents SDK integration orchestration.""" + +from braintrust.integrations.base import BaseIntegration + +from .patchers import OpenAIAgentsTracingPatcher + + +class OpenAIAgentsIntegration(BaseIntegration): + """Braintrust instrumentation for the OpenAI Agents SDK.""" + + name = "openai_agents" + import_names = ("agents",) + min_version = "0.0.19" + patchers = (OpenAIAgentsTracingPatcher,) diff --git a/py/src/braintrust/integrations/openai_agents/patchers.py b/py/src/braintrust/integrations/openai_agents/patchers.py new file mode 100644 index 00000000..a621860b --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/patchers.py @@ -0,0 +1,24 @@ +"""OpenAI Agents SDK patchers.""" + +from braintrust.integrations.base import CallbackPatcher + + +def _setup_openai_agents_tracing() -> None: + from .tracing import _setup_openai_agents_tracing as setup_openai_agents_tracing + + setup_openai_agents_tracing() + + +def _has_braintrust_tracing_processor() -> bool: + from .tracing import _has_braintrust_tracing_processor as has_braintrust_tracing_processor + + return has_braintrust_tracing_processor() + + +class OpenAIAgentsTracingPatcher(CallbackPatcher): + """Register the Braintrust tracing processor with the OpenAI Agents SDK.""" + + name = "openai_agents.tracing" + target_module = "agents" + callback = _setup_openai_agents_tracing + state_getter = _has_braintrust_tracing_processor diff --git a/py/src/braintrust/integrations/openai_agents/test_openai_agents.py b/py/src/braintrust/integrations/openai_agents/test_openai_agents.py new file mode 100644 index 00000000..b508ca63 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/test_openai_agents.py @@ -0,0 +1,268 @@ +import asyncio + +import braintrust +import pytest +from braintrust import logger +from braintrust.integrations.openai_agents import BraintrustTracingProcessor, OpenAIAgentsIntegration +from braintrust.test_helpers import init_test_logger +from braintrust.wrappers.test_utils import verify_autoinstrument_script + + +PROJECT_NAME = "test-project-openai-agents-tracing" +TEST_MODEL = "gpt-4o-mini" +TEST_PROMPT = "What is 2+2? Just the number." +TEST_AGENT_INSTRUCTIONS = "You are a helpful assistant. Be very concise." + + +@pytest.fixture +def memory_logger(): + init_test_logger(PROJECT_NAME) + with logger._internal_with_memory_background_logger() as bgl: + yield bgl + + +@pytest.fixture(autouse=True) +def isolate_openai_agents_tracing(): + pytest.importorskip("agents", reason="agents package not available") + + import agents + + provider = agents.tracing.get_trace_provider() + processors = tuple(getattr(getattr(provider, "_multi_processor", None), "_processors", ())) + manual_disabled = getattr(provider, "_manual_disabled", None) + + yield + + provider.set_processors(list(processors)) + provider._manual_disabled = manual_disabled + if hasattr(provider, "_refresh_disabled_flag"): + provider._refresh_disabled_flag() + + +def test_tracing_processor_sets_current_span(memory_logger): + """Ensure that on_trace_start sets the span as current so nested spans work.""" + assert not memory_logger.pop() + processor = BraintrustTracingProcessor() + + class DummyTrace: + def __init__(self): + self.trace_id = "test-trace-id" + self.name = "test-trace" + + def export(self): + return {"group_id": "group", "metadata": {"foo": "bar"}} + + trace = DummyTrace() + + with braintrust.start_span(name="parent-span") as parent_span: + assert braintrust.current_span() == parent_span + processor.on_trace_start(trace) + created_span = processor._spans[trace.trace_id] + assert braintrust.current_span() == created_span + + processor.on_trace_end(trace) + assert braintrust.current_span() == parent_span + + spans = memory_logger.pop() + assert spans + assert any(span.get("span_attributes", {}).get("name") == trace.name for span in spans) + + +def test_braintrust_tracing_processor_trace_metadata_logging(memory_logger): + """Trace metadata should flow through to the root span.""" + assert not memory_logger.pop() + + processor = BraintrustTracingProcessor() + + class MockTrace: + def __init__(self, trace_id, name, metadata): + self.trace_id = trace_id + self.name = name + self.metadata = metadata + + def export(self): + return {"group_id": self.trace_id, "metadata": self.metadata} + + trace = MockTrace("test-trace", "Test Trace", {"conversation_id": "test-12345"}) + + processor.on_trace_start(trace) + processor.on_trace_end(trace) + + spans = memory_logger.pop() + root_span = spans[0] + assert root_span["metadata"]["conversation_id"] == "test-12345" + + +@pytest.mark.asyncio +@pytest.mark.vcr +async def test_openai_agents_integration_setup_creates_spans(memory_logger): + import agents + from agents import Agent + from agents.run import AgentRunner + + assert not memory_logger.pop() + + assert OpenAIAgentsIntegration.setup() is True + assert agents.tracing.get_trace_provider()._disabled is False + + agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS) + result = await AgentRunner().run(agent, TEST_PROMPT) + + assert result is not None + assert hasattr(result, "final_output") or hasattr(result, "output") + + spans = memory_logger.pop() + assert len(spans) >= 2 + + root_spans = [ + span + for span in spans + if span.get("span_attributes", {}).get("name") == "Agent workflow" and not span.get("span_parents") + ] + assert len(root_spans) == 1 + assert TEST_PROMPT in str(root_spans[0].get("input")) + assert root_spans[0].get("output") is not None + + llm_spans = [span for span in spans if span.get("span_attributes", {}).get("type") == "llm"] + assert llm_spans + + +@pytest.mark.asyncio +@pytest.mark.vcr +async def test_braintrust_tracing_processor_current_span_detection(memory_logger): + import agents + from agents import Agent + from agents.run import AgentRunner + + assert not memory_logger.pop() + + @braintrust.traced(name="parent_span_test") + async def test_function(instructions: str): + detected_parent = braintrust.current_span() + assert detected_parent is not None + assert detected_parent != braintrust.logger.NOOP_SPAN + + processor = BraintrustTracingProcessor() + + agents.set_tracing_disabled(False) + agents.add_trace_processor(processor) + + try: + agent = Agent(name="test-agent", model=TEST_MODEL, instructions=TEST_AGENT_INSTRUCTIONS) + runner = AgentRunner() + result = await runner.run(agent, instructions) + assert result is not None + assert hasattr(result, "final_output") or hasattr(result, "output") + return result + finally: + processor.shutdown() + + result = await test_function(TEST_PROMPT) + assert result is not None + + spans = memory_logger.pop() + assert len(spans) >= 2 + + parent_span = None + child_spans = [] + for span in spans: + if span.get("span_attributes", {}).get("name") == "parent_span_test": + parent_span = span + elif span.get("span_attributes", {}).get("name") == "Agent workflow": + child_spans.append(span) + + assert parent_span is not None + assert child_spans + + child_span = child_spans[0] + child_span_parents = child_span.get("span_parents", []) + parent_span_id = parent_span.get("span_id") + + assert parent_span_id is not None + assert isinstance(child_span_parents, list) and child_span_parents + assert parent_span_id in child_span_parents + assert child_span.get("root_span_id") == parent_span.get("root_span_id") + assert parent_span.get("input") is not None + assert parent_span.get("output") is not None + + all_child_spans = [s for s in spans if parent_span_id in (s.get("span_parents") or [])] + assert all_child_spans + span_types = [s.get("span_attributes", {}).get("type") for s in all_child_spans] + assert "llm" in span_types or "task" in span_types + + +@pytest.mark.asyncio +@pytest.mark.vcr +async def test_braintrust_tracing_processor_concurrency_bug(memory_logger): + import agents + from agents import Agent + from agents.run import AgentRunner + + assert not memory_logger.pop() + + processor = BraintrustTracingProcessor() + agents.set_tracing_disabled(False) + agents.add_trace_processor(processor) + + try: + agent_a = Agent( + name="agent-a", + model=TEST_MODEL, + instructions="You are agent A. Just respond with 'A' and nothing else.", + ) + agent_b = Agent( + name="agent-b", + model=TEST_MODEL, + instructions="You are agent B. Just respond with 'B' and nothing else.", + ) + runner = AgentRunner() + + async def run_agent_a(): + result = await runner.run(agent_a, "What's your name?") + await asyncio.sleep(0.1) + return result + + async def run_agent_b(): + return await runner.run(agent_b, "Who are you?") + + result_a, result_b = await asyncio.gather(run_agent_a(), run_agent_b()) + assert result_a is not None + assert result_b is not None + finally: + processor.shutdown() + + spans = memory_logger.pop() + assert len(spans) >= 2 + + trace_spans = [ + span + for span in spans + if span.get("span_attributes", {}).get("name", "") == "Agent workflow" and not span.get("span_parents") + ] + assert len(trace_spans) == 2 + + agent_a_trace = None + agent_b_trace = None + for trace in trace_spans: + input_str = str(trace.get("input", "")) + if "What's your name?" in input_str: + agent_a_trace = trace + elif "Who are you?" in input_str: + agent_b_trace = trace + + assert agent_a_trace is not None + assert agent_b_trace is not None + assert agent_a_trace.get("input") is not None + assert agent_a_trace.get("output") is not None + assert agent_b_trace.get("input") is not None + assert agent_b_trace.get("output") is not None + assert agent_a_trace.get("input") != agent_b_trace.get("input") + if agent_a_trace.get("output") and agent_b_trace.get("output"): + assert agent_a_trace.get("output") != agent_b_trace.get("output") + + +class TestAutoInstrumentOpenAIAgents: + """Tests for auto_instrument() with the OpenAI Agents SDK.""" + + def test_auto_instrument_openai_agents(self): + verify_autoinstrument_script("test_auto_openai_agents.py") diff --git a/py/src/braintrust/integrations/openai_agents/tracing.py b/py/src/braintrust/integrations/openai_agents/tracing.py new file mode 100644 index 00000000..8a73c6c0 --- /dev/null +++ b/py/src/braintrust/integrations/openai_agents/tracing.py @@ -0,0 +1,328 @@ +"""OpenAI Agents SDK tracing processor.""" + +import datetime +from typing import Any + +from agents import tracing +from braintrust.logger import NOOP_SPAN, Experiment, Logger, Span, current_span, flush, start_span +from braintrust.span_types import SpanTypeAttribute + + +def _span_type(span: tracing.Span[Any]) -> SpanTypeAttribute: + if span.span_data.type in ["agent", "handoff", "custom", "speech_group"]: + return SpanTypeAttribute.TASK + elif span.span_data.type in ["function", "guardrail", "mcp_tools"]: + return SpanTypeAttribute.TOOL + elif span.span_data.type in ["generation", "response", "transcription", "speech"]: + return SpanTypeAttribute.LLM + else: + return SpanTypeAttribute.TASK + + +def _span_name(span: tracing.Span[Any]) -> str: + # TODO(sachin): span name should also come from the span_data. + if ( + isinstance(span.span_data, tracing.AgentSpanData) + or isinstance(span.span_data, tracing.FunctionSpanData) + or isinstance(span.span_data, tracing.GuardrailSpanData) + or isinstance(span.span_data, tracing.CustomSpanData) + ): + return span.span_data.name + elif isinstance(span.span_data, tracing.GenerationSpanData): + return "Generation" + elif isinstance(span.span_data, tracing.ResponseSpanData): + return "Response" + elif isinstance(span.span_data, tracing.HandoffSpanData): + return "Handoff" + elif isinstance(span.span_data, tracing.MCPListToolsSpanData): + if span.span_data.server: + return f"List Tools ({span.span_data.server})" + return "MCP List Tools" + elif isinstance(span.span_data, tracing.TranscriptionSpanData): + return "Transcription" + elif isinstance(span.span_data, tracing.SpeechSpanData): + return "Speech" + elif isinstance(span.span_data, tracing.SpeechGroupSpanData): + return "Speech Group" + else: + return "Unknown" + + +def _timestamp_from_maybe_iso(timestamp: str | None) -> float | None: + if timestamp is None: + return None + return datetime.datetime.fromisoformat(timestamp).timestamp() + + +def _maybe_timestamp_elapsed(end: str | None, start: str | None) -> float | None: + if start is None or end is None: + return None + return (datetime.datetime.fromisoformat(end) - datetime.datetime.fromisoformat(start)).total_seconds() + + +class BraintrustTracingProcessor(tracing.TracingProcessor): + """Tracing processor that logs OpenAI Agents SDK traces to Braintrust.""" + + def __init__(self, logger: Span | Experiment | Logger | None = None): + self._logger = logger + self._spans: dict[str, Span] = {} + self._first_input: dict[str, Any] = {} + self._last_output: dict[str, Any] = {} + + def on_trace_start(self, trace: tracing.Trace) -> None: + trace_meta = trace.export() or {} + metadata = { + "group_id": trace_meta.get("group_id"), + **(trace_meta.get("metadata") or {}), + } + + current_context = current_span() + if current_context != NOOP_SPAN: + span = current_context.start_span( + name=trace.name, + span_attributes={"type": "task", "name": trace.name}, + metadata=metadata, + ) + elif self._logger is not None: + span = self._logger.start_span( + span_attributes={"type": "task", "name": trace.name}, + span_id=trace.trace_id, + root_span_id=trace.trace_id, + metadata=metadata, + ) + else: + span = start_span( + id=trace.trace_id, + span_attributes={"type": "task", "name": trace.name}, + metadata=metadata, + ) + if span != NOOP_SPAN: + span.set_current() + self._spans[trace.trace_id] = span + + def on_trace_end(self, trace: tracing.Trace) -> None: + span = self._spans.pop(trace.trace_id) + trace_first_input = self._first_input.pop(trace.trace_id, None) + trace_last_output = self._last_output.pop(trace.trace_id, None) + span.log(input=trace_first_input, output=trace_last_output) + span.end() + span.unset_current() + + def _agent_log_data(self, span: tracing.Span[tracing.AgentSpanData]) -> dict[str, Any]: + return { + "metadata": { + "tools": span.span_data.tools, + "handoffs": span.span_data.handoffs, + "output_type": span.span_data.output_type, + } + } + + def _response_log_data(self, span: tracing.Span[tracing.ResponseSpanData]) -> dict[str, Any]: + data = {} + if span.span_data.input is not None: + data["input"] = span.span_data.input + if span.span_data.response is not None: + data["output"] = span.span_data.response.output + if span.span_data.response is not None: + data["metadata"] = span.span_data.response.metadata or {} + data["metadata"].update( + span.span_data.response.model_dump(exclude={"input", "output", "metadata", "usage"}) + ) + + data["metrics"] = {} + ttft = _maybe_timestamp_elapsed(span.ended_at, span.started_at) + if ttft is not None: + data["metrics"]["time_to_first_token"] = ttft + if span.span_data.response is not None and span.span_data.response.usage is not None: + data["metrics"]["tokens"] = span.span_data.response.usage.total_tokens + data["metrics"]["prompt_tokens"] = span.span_data.response.usage.input_tokens + data["metrics"]["completion_tokens"] = span.span_data.response.usage.output_tokens + + return data + + def _function_log_data(self, span: tracing.Span[tracing.FunctionSpanData]) -> dict[str, Any]: + return { + "input": span.span_data.input, + "output": span.span_data.output, + } + + def _handoff_log_data(self, span: tracing.Span[tracing.HandoffSpanData]) -> dict[str, Any]: + return { + "metadata": { + "from_agent": span.span_data.from_agent, + "to_agent": span.span_data.to_agent, + } + } + + def _guardrail_log_data(self, span: tracing.Span[tracing.GuardrailSpanData]) -> dict[str, Any]: + return { + "metadata": { + "triggered": span.span_data.triggered, + } + } + + def _generation_log_data(self, span: tracing.Span[tracing.GenerationSpanData]) -> dict[str, Any]: + metrics = {} + ttft = _maybe_timestamp_elapsed(span.ended_at, span.started_at) + + if ttft is not None: + metrics["time_to_first_token"] = ttft + + usage = span.span_data.usage or {} + if "prompt_tokens" in usage: + metrics["prompt_tokens"] = usage["prompt_tokens"] + elif "input_tokens" in usage: + metrics["prompt_tokens"] = usage["input_tokens"] + + if "completion_tokens" in usage: + metrics["completion_tokens"] = usage["completion_tokens"] + elif "output_tokens" in usage: + metrics["completion_tokens"] = usage["output_tokens"] + + if "total_tokens" in usage: + metrics["tokens"] = usage["total_tokens"] + elif "input_tokens" in usage and "output_tokens" in usage: + metrics["tokens"] = usage["input_tokens"] + usage["output_tokens"] + + return { + "input": span.span_data.input, + "output": span.span_data.output, + "metadata": { + "model": span.span_data.model, + "model_config": span.span_data.model_config, + }, + "metrics": metrics, + } + + def _custom_log_data(self, span: tracing.Span[tracing.CustomSpanData]) -> dict[str, Any]: + return span.span_data.data + + def _mcp_list_tools_log_data(self, span: tracing.Span[tracing.MCPListToolsSpanData]) -> dict[str, Any]: + return { + "output": span.span_data.result, + "metadata": { + "server": span.span_data.server, + }, + } + + def _transcription_log_data(self, span: tracing.Span[tracing.TranscriptionSpanData]) -> dict[str, Any]: + return { + "input": span.span_data.input, + "output": span.span_data.output, + "metadata": { + "model": span.span_data.model, + "model_config": span.span_data.model_config, + }, + } + + def _speech_log_data(self, span: tracing.Span[tracing.SpeechSpanData]) -> dict[str, Any]: + return { + "input": span.span_data.input, + "output": span.span_data.output, + "metadata": { + "model": span.span_data.model, + "model_config": span.span_data.model_config, + }, + } + + def _speech_group_log_data(self, span: tracing.Span[tracing.SpeechGroupSpanData]) -> dict[str, Any]: + return { + "input": span.span_data.input, + } + + def _log_data(self, span: tracing.Span[Any]) -> dict[str, Any]: + if isinstance(span.span_data, tracing.AgentSpanData): + return self._agent_log_data(span) + elif isinstance(span.span_data, tracing.ResponseSpanData): + return self._response_log_data(span) + elif isinstance(span.span_data, tracing.FunctionSpanData): + return self._function_log_data(span) + elif isinstance(span.span_data, tracing.HandoffSpanData): + return self._handoff_log_data(span) + elif isinstance(span.span_data, tracing.GuardrailSpanData): + return self._guardrail_log_data(span) + elif isinstance(span.span_data, tracing.GenerationSpanData): + return self._generation_log_data(span) + elif isinstance(span.span_data, tracing.CustomSpanData): + return self._custom_log_data(span) + elif isinstance(span.span_data, tracing.MCPListToolsSpanData): + return self._mcp_list_tools_log_data(span) + elif isinstance(span.span_data, tracing.TranscriptionSpanData): + return self._transcription_log_data(span) + elif isinstance(span.span_data, tracing.SpeechSpanData): + return self._speech_log_data(span) + elif isinstance(span.span_data, tracing.SpeechGroupSpanData): + return self._speech_group_log_data(span) + else: + return {} + + def on_span_start(self, span: tracing.Span[tracing.SpanData]) -> None: + if span.parent_id is not None: + parent = self._spans[span.parent_id] + else: + parent = self._spans[span.trace_id] + created_span = parent.start_span( + id=span.span_id, + name=_span_name(span), + type=_span_type(span), + start_time=_timestamp_from_maybe_iso(span.started_at), + ) + self._spans[span.span_id] = created_span + created_span.set_current() + + def on_span_end(self, span: tracing.Span[tracing.SpanData]) -> None: + s = self._spans.pop(span.span_id) + event = dict(error=span.error, **self._log_data(span)) + s.log(**event) + s.unset_current() + s.end(_timestamp_from_maybe_iso(span.ended_at)) + + input_ = event.get("input") + output = event.get("output") + trace_id = span.trace_id + if trace_id not in self._first_input and input_ is not None: + self._first_input[trace_id] = input_ + + if output is not None: + self._last_output[trace_id] = output + + def shutdown(self) -> None: + if self._logger is not None: + self._logger.flush() + else: + flush() + + def force_flush(self) -> None: + if self._logger is not None: + self._logger.flush() + else: + flush() + + +# --------------------------------------------------------------------------- +# Setup helpers — used by OpenAIAgentsTracingPatcher in patchers.py +# --------------------------------------------------------------------------- + + +def _get_trace_provider(): + return tracing.get_trace_provider() + + +def _get_processors(): + provider = _get_trace_provider() + return getattr(getattr(provider, "_multi_processor", None), "_processors", ()) + + +def _has_braintrust_tracing_processor() -> bool: + provider = _get_trace_provider() + has_processor = any(isinstance(p, BraintrustTracingProcessor) for p in _get_processors()) + return has_processor and not getattr(provider, "_disabled", False) + + +def _setup_openai_agents_tracing() -> None: + import agents as agents_mod + + if not any(isinstance(p, BraintrustTracingProcessor) for p in _get_processors()): + agents_mod.add_trace_processor(BraintrustTracingProcessor()) + + agents_mod.set_tracing_disabled(False) diff --git a/py/src/braintrust/oai.py b/py/src/braintrust/oai.py index ba1fa7f0..db44704e 100644 --- a/py/src/braintrust/oai.py +++ b/py/src/braintrust/oai.py @@ -1,1147 +1,12 @@ -import abc -import base64 -import re -import time -import warnings -from collections.abc import Callable -from typing import Any - -from wrapt import wrap_function_wrapper - -from .logger import Attachment, Span, start_span -from .span_types import SpanTypeAttribute -from .util import is_numeric, merge_dicts - - -X_LEGACY_CACHED_HEADER = "x-cached" -X_CACHED_HEADER = "x-bt-cached" - - -class NamedWrapper: - def __init__(self, wrapped: Any): - # Keep the legacy mangled attribute for existing wrapped-client checks - # that introspect `_NamedWrapper__wrapped` directly. - self.__wrapped = wrapped - - @property - def _wrapped(self) -> Any: - return self.__wrapped - - def __getattr__(self, name: str) -> Any: - return getattr(self.__wrapped, name) - - -class AsyncResponseWrapper: - """Wrapper that properly preserves async context manager behavior for OpenAI responses.""" - - def __init__(self, response: Any): - self._response = response - - async def __aenter__(self): - if hasattr(self._response, "__aenter__"): - await self._response.__aenter__() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - if hasattr(self._response, "__aexit__"): - return await self._response.__aexit__(exc_type, exc_val, exc_tb) - - def __aiter__(self): - if hasattr(self._response, "__aiter__"): - return self._response.__aiter__() - raise TypeError("Response object is not an async iterator") - - async def __anext__(self): - if hasattr(self._response, "__anext__"): - return await self._response.__anext__() - raise StopAsyncIteration - - def __getattr__(self, name: str) -> Any: - return getattr(self._response, name) - - @property - def __class__(self): # type: ignore - return self._response.__class__ - - def __str__(self) -> str: - return str(self._response) - - def __repr__(self) -> str: - return repr(self._response) - - -def log_headers(response: Any, span: Span): - cached_value = response.headers.get(X_CACHED_HEADER) or response.headers.get(X_LEGACY_CACHED_HEADER) - - if cached_value: - span.log( - metrics={ - "cached": 1 if cached_value.lower() in ["true", "hit"] else 0, - } - ) - - -def _convert_data_url_to_attachment(data_url: str, filename: str | None = None) -> Attachment | str: - """Helper function to convert data URL to an Attachment.""" - data_url_match = re.match(r"^data:([^;]+);base64,(.+)$", data_url) - if not data_url_match: - return data_url - - mime_type, base64_data = data_url_match.groups() - - try: - binary_data = base64.b64decode(base64_data) - - if filename is None: - extension = mime_type.split("/")[1] if "/" in mime_type else "bin" - prefix = "image" if mime_type.startswith("image/") else "document" - filename = f"{prefix}.{extension}" - - attachment = Attachment(data=binary_data, filename=filename, content_type=mime_type) - - return attachment - except Exception: - return data_url - - -def _process_attachments_in_input(input_data: Any) -> Any: - """Process input to convert data URL images and base64 documents to Attachment objects.""" - if isinstance(input_data, list): - return [_process_attachments_in_input(item) for item in input_data] - - if isinstance(input_data, dict): - # Check for OpenAI's image_url format with data URLs - if ( - input_data.get("type") == "image_url" - and isinstance(input_data.get("image_url"), dict) - and isinstance(input_data["image_url"].get("url"), str) - ): - processed_url = _convert_data_url_to_attachment(input_data["image_url"]["url"]) - return { - **input_data, - "image_url": { - **input_data["image_url"], - "url": processed_url, - }, - } - - # Check for OpenAI's file format with data URL (e.g., PDFs) - if ( - input_data.get("type") == "file" - and isinstance(input_data.get("file"), dict) - and isinstance(input_data["file"].get("file_data"), str) - ): - file_filename = input_data["file"].get("filename") - processed_file_data = _convert_data_url_to_attachment( - input_data["file"]["file_data"], - filename=file_filename if isinstance(file_filename, str) else None, - ) - return { - **input_data, - "file": { - **input_data["file"], - "file_data": processed_file_data, - }, - } - - # Recursively process nested objects - return {key: _process_attachments_in_input(value) for key, value in input_data.items()} - - return input_data - - -class ChatCompletionWrapper: - def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None): - self.create_fn = create_fn - self.acreate_fn = acreate_fn - - def create(self, *args: Any, **kwargs: Any) -> Any: - params = self._parse_params(kwargs) - stream = kwargs.get("stream", False) - - span = start_span( - **merge_dicts(dict(name="Chat Completion", span_attributes={"type": SpanTypeAttribute.LLM}), params) - ) - should_end = True - - try: - start = time.time() - create_response = self.create_fn(*args, **kwargs) - if hasattr(create_response, "parse"): - raw_response = create_response.parse() - log_headers(create_response, span) - else: - raw_response = create_response - if stream: - - def gen(): - try: - first = True - all_results = [] - for item in raw_response: - if first: - span.log( - metrics={ - "time_to_first_token": time.time() - start, - } - ) - first = False - all_results.append(_try_to_dict(item)) - yield item - - span.log(**self._postprocess_streaming_results(all_results)) - finally: - span.end() - - should_end = False - return _TracedStream(raw_response, gen()) - else: - log_response = _try_to_dict(raw_response) - metrics = _parse_metrics_from_usage(log_response.get("usage", {})) - metrics["time_to_first_token"] = time.time() - start - span.log( - metrics=metrics, - output=log_response["choices"], - ) - return raw_response - finally: - if should_end: - span.end() - - async def acreate(self, *args: Any, **kwargs: Any) -> Any: - params = self._parse_params(kwargs) - stream = kwargs.get("stream", False) - - span = start_span( - **merge_dicts(dict(name="Chat Completion", span_attributes={"type": SpanTypeAttribute.LLM}), params) - ) - should_end = True - - try: - start = time.time() - create_response = await self.acreate_fn(*args, **kwargs) - - if hasattr(create_response, "parse"): - raw_response = create_response.parse() - log_headers(create_response, span) - else: - raw_response = create_response - - if stream: - - async def gen(): - try: - first = True - all_results = [] - async for item in raw_response: - if first: - span.log( - metrics={ - "time_to_first_token": time.time() - start, - } - ) - first = False - all_results.append(_try_to_dict(item)) - yield item - - span.log(**self._postprocess_streaming_results(all_results)) - finally: - span.end() - - should_end = False - streamer = gen() - return _AsyncTracedStream(raw_response, streamer) - else: - log_response = _try_to_dict(raw_response) - metrics = _parse_metrics_from_usage(log_response.get("usage")) - metrics["time_to_first_token"] = time.time() - start - span.log( - metrics=metrics, - output=log_response["choices"], - ) - return raw_response - finally: - if should_end: - span.end() - - @classmethod - def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: - # First, destructively remove span_info - ret = params.pop("span_info", {}) - - # Then, copy the rest of the params - params = prettify_params(params) - messages = params.pop("messages", None) - - # Process attachments in input (convert data URLs to Attachment objects) - processed_input = _process_attachments_in_input(messages) - - return merge_dicts( - ret, - { - "input": processed_input, - "metadata": {**params, "provider": "openai"}, - }, - ) - - @classmethod - def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> dict[str, Any]: - role = None - content = None - tool_calls: list[Any] | None = None - finish_reason = None - metrics: dict[str, float] = {} - for result in all_results: - usage = result.get("usage") - if usage: - metrics.update(_parse_metrics_from_usage(usage)) - - choices = result["choices"] - if not choices: - continue - delta = choices[0]["delta"] - if not delta: - continue - - if role is None and delta.get("role") is not None: - role = delta.get("role") - - if delta.get("finish_reason") is not None: - finish_reason = delta.get("finish_reason") - - if delta.get("content") is not None: - content = (content or "") + delta.get("content") - - if delta.get("tool_calls") is not None: - delta_tool_calls = delta.get("tool_calls") - if not delta_tool_calls: - continue - tool_delta = delta_tool_calls[0] - - # pylint: disable=unsubscriptable-object - if not tool_calls or (tool_delta.get("id") and tool_calls[-1]["id"] != tool_delta.get("id")): - function_arg = tool_delta.get("function", {}) - tool_calls = (tool_calls or []) + [ - { - "id": tool_delta.get("id"), - "type": tool_delta.get("type"), - "function": { - "name": function_arg.get("name"), - "arguments": function_arg.get("arguments") or "", - }, - } - ] - else: - # pylint: disable=unsubscriptable-object - # append to existing tool call - function_arg = tool_delta.get("function", {}) - args = function_arg.get("arguments") or "" - if isinstance(args, str): - # pylint: disable=unsubscriptable-object - tool_calls[-1]["function"]["arguments"] += args - - return { - "metrics": metrics, - "output": [ - { - "index": 0, - "message": { - "role": role, - "content": content, - "tool_calls": tool_calls, - }, - "logprobs": None, - "finish_reason": finish_reason, - } - ], - } - - -class _TracedStream(NamedWrapper): - """Traced sync stream. Iterates via the traced generator while delegating - SDK-specific attributes (e.g. .close(), .response) to the original stream.""" - - def __init__(self, original_stream: Any, traced_generator: Any) -> None: - self._traced_generator = traced_generator - super().__init__(original_stream) - - def __iter__(self) -> Any: - return self._traced_generator - - def __next__(self) -> Any: - return next(self._traced_generator) - - def __enter__(self) -> Any: - if hasattr(self._wrapped, "__enter__"): - self._wrapped.__enter__() - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> Any: - if hasattr(self._wrapped, "__exit__"): - return self._wrapped.__exit__(exc_type, exc_val, exc_tb) - return None - - -class _AsyncTracedStream(NamedWrapper): - """Traced async stream. Iterates via the traced generator while delegating - SDK-specific attributes (e.g. .close(), .response) to the original stream.""" - - def __init__(self, original_stream: Any, traced_generator: Any) -> None: - self._traced_generator = traced_generator - super().__init__(original_stream) - - def __aiter__(self) -> Any: - return self._traced_generator - - async def __anext__(self) -> Any: - return await self._traced_generator.__anext__() - - async def __aenter__(self) -> Any: - if hasattr(self._wrapped, "__aenter__"): - await self._wrapped.__aenter__() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb) -> Any: - if hasattr(self._wrapped, "__aexit__"): - return await self._wrapped.__aexit__(exc_type, exc_val, exc_tb) - return None - - -class _RawResponseWithTracedStream(NamedWrapper): - """Proxy for LegacyAPIResponse that replaces parse() with a traced stream, - so that with_raw_response + stream=True preserves both headers and tracing.""" - - def __init__(self, raw_response: Any, traced_stream: Any) -> None: - self._traced_stream = traced_stream - super().__init__(raw_response) - - def parse(self, *args: Any, **kwargs: Any) -> Any: - return self._traced_stream - - -class ResponseWrapper: - def __init__( - self, - create_fn: Callable[..., Any] | None, - acreate_fn: Callable[..., Any] | None, - name: str = "openai.responses.create", - return_raw: bool = False, - ): - self.create_fn = create_fn - self.acreate_fn = acreate_fn - self.name = name - self.return_raw = return_raw - - def create(self, *args: Any, **kwargs: Any) -> Any: - params = self._parse_params(kwargs) - stream = kwargs.get("stream", False) - - span = start_span(**merge_dicts(dict(name=self.name, span_attributes={"type": SpanTypeAttribute.LLM}), params)) - should_end = True - - try: - start = time.time() - create_response = self.create_fn(*args, **kwargs) - if hasattr(create_response, "parse"): - raw_response = create_response.parse() - log_headers(create_response, span) - else: - raw_response = create_response - if stream: - - def gen(): - try: - first = True - all_results = [] - for item in raw_response: - if first: - span.log( - metrics={ - "time_to_first_token": time.time() - start, - } - ) - first = False - all_results.append(item) - yield item - - span.log(**self._postprocess_streaming_results(all_results)) - finally: - span.end() - - should_end = False - if self.return_raw and hasattr(create_response, "parse"): - return _RawResponseWithTracedStream(create_response, _TracedStream(raw_response, gen())) - return _TracedStream(raw_response, gen()) - else: - log_response = _try_to_dict(raw_response) - event_data = self._parse_event_from_result(log_response) - if "metrics" not in event_data: - event_data["metrics"] = {} - event_data["metrics"]["time_to_first_token"] = time.time() - start - span.log(**event_data) - return create_response if (self.return_raw and hasattr(create_response, "parse")) else raw_response - finally: - if should_end: - span.end() - - async def acreate(self, *args: Any, **kwargs: Any) -> Any: - params = self._parse_params(kwargs) - stream = kwargs.get("stream", False) - - span = start_span(**merge_dicts(dict(name=self.name, span_attributes={"type": SpanTypeAttribute.LLM}), params)) - should_end = True - - try: - start = time.time() - create_response = await self.acreate_fn(*args, **kwargs) - if hasattr(create_response, "parse"): - raw_response = create_response.parse() - log_headers(create_response, span) - else: - raw_response = create_response - if stream: - - async def gen(): - try: - first = True - all_results = [] - async for item in raw_response: - if first: - span.log( - metrics={ - "time_to_first_token": time.time() - start, - } - ) - first = False - all_results.append(item) - yield item - - span.log(**self._postprocess_streaming_results(all_results)) - finally: - span.end() - - should_end = False - streamer = gen() - if self.return_raw and hasattr(create_response, "parse"): - return _RawResponseWithTracedStream(create_response, _AsyncTracedStream(raw_response, streamer)) - return _AsyncTracedStream(raw_response, streamer) - else: - log_response = _try_to_dict(raw_response) - event_data = self._parse_event_from_result(log_response) - if "metrics" not in event_data: - event_data["metrics"] = {} - event_data["metrics"]["time_to_first_token"] = time.time() - start - span.log(**event_data) - return create_response if (self.return_raw and hasattr(create_response, "parse")) else raw_response - finally: - if should_end: - span.end() - - @classmethod - def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: - # First, destructively remove span_info - ret = params.pop("span_info", {}) - - # Then, copy the rest of the params - params = prettify_params(params) - input_data = params.pop("input", None) - - # Process attachments in input (convert data URLs to Attachment objects) - processed_input = _process_attachments_in_input(input_data) - - return merge_dicts( - ret, - { - "input": processed_input, - "metadata": {**params, "provider": "openai"}, - }, - ) - - @classmethod - def _parse_event_from_result(cls, result: dict[str, Any]) -> dict[str, Any]: - """Parse event from response result""" - data = {"metrics": {}} - - if not result: - return data - - if "output" in result: - data["output"] = result["output"] - - metadata = {k: v for k, v in result.items() if k not in ["output", "usage"]} - if metadata: - data["metadata"] = metadata - - if "usage" in result: - data["metrics"] = _parse_metrics_from_usage(result["usage"]) - - return data - - @classmethod - def _postprocess_streaming_results(cls, all_results: list[Any]) -> dict[str, Any]: - """Process streaming results - minimal version focused on metrics extraction.""" - metrics = {} - output = [] - - for result in all_results: - usage = getattr(result, "usage", None) - if ( - not usage - and hasattr(result, "type") - and result.type == "response.completed" - and hasattr(result, "response") - ): - # Handle summaries from completed response if present - if hasattr(result.response, "output") and result.response.output: - for output_item in result.response.output: - if hasattr(output_item, "summary") and output_item.summary: - for item in output: - if item.get("id") == output_item.id: - item["summary"] = output_item.summary - usage = getattr(result.response, "usage", None) - - if usage: - parsed_metrics = _parse_metrics_from_usage(usage) - metrics.update(parsed_metrics) - - # Skip processing if result doesn't have a type attribute - if not hasattr(result, "type"): - continue - - if result.type == "response.output_item.added": - item_data = {"id": result.item.id, "type": result.item.type} - if hasattr(result.item, "role"): - item_data["role"] = result.item.role - output.append(item_data) - continue - - if result.type == "response.completed": - if hasattr(result, "response") and hasattr(result.response, "output"): - return { - "metrics": metrics, - "output": result.response.output, - } - continue - - # Handle output_index based updates - if hasattr(result, "output_index"): - output_index = result.output_index - if output_index < len(output): - current_output = output[output_index] - - if result.type == "response.output_item.done": - current_output["status"] = result.item.status - continue - - if result.type == "response.output_item.delta": - current_output["delta"] = result.delta - continue - - # Handle content_index based updates - if hasattr(result, "content_index"): - if "content" not in current_output: - current_output["content"] = [] - content_index = result.content_index - # Fill any gaps in the content array - while len(current_output["content"]) <= content_index: - current_output["content"].append({}) - current_content = current_output["content"][content_index] - current_content["type"] = "output_text" - if hasattr(result, "delta") and result.delta: - current_content["text"] = (current_content.get("text") or "") + result.delta - - if result.type == "response.output_text.annotation.added": - annotation_index = result.annotation_index - if "annotations" not in current_content: - current_content["annotations"] = [] - # Fill any gaps in the annotations array - while len(current_content["annotations"]) <= annotation_index: - current_content["annotations"].append({}) - current_content["annotations"][annotation_index] = _try_to_dict(result.annotation) - - return { - "metrics": metrics, - "output": output, - } - - -class BaseWrapper(abc.ABC): - def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None, name: str): - self._create_fn = create_fn - self._acreate_fn = acreate_fn - self._name = name - - @abc.abstractmethod - def process_output(self, response: dict[str, Any], span: Span): - """Process the API response and log relevant information to the span.""" - pass - - def create(self, *args: Any, **kwargs: Any) -> Any: - params = self._parse_params(kwargs) - - with start_span( - **merge_dicts(dict(name=self._name, span_attributes={"type": SpanTypeAttribute.LLM}), params) - ) as span: - create_response = self._create_fn(*args, **kwargs) - if hasattr(create_response, "parse"): - raw_response = create_response.parse() - log_headers(create_response, span) - else: - raw_response = create_response - - log_response = _try_to_dict(raw_response) - self.process_output(log_response, span) - return raw_response - - async def acreate(self, *args: Any, **kwargs: Any) -> Any: - params = self._parse_params(kwargs) - - with start_span( - **merge_dicts(dict(name=self._name, span_attributes={"type": SpanTypeAttribute.LLM}), params) - ) as span: - create_response = await self._acreate_fn(*args, **kwargs) - if hasattr(create_response, "parse"): - raw_response = create_response.parse() - log_headers(create_response, span) - else: - raw_response = create_response - log_response = _try_to_dict(raw_response) - self.process_output(log_response, span) - return raw_response - - @classmethod - def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: - # First, destructively remove span_info - ret = params.pop("span_info", {}) - - params = prettify_params(params) - input_data = params.pop("input", None) - - # Process attachments in input (convert data URLs to Attachment objects) - processed_input = _process_attachments_in_input(input_data) - - return merge_dicts( - ret, - { - "input": processed_input, - "metadata": {**params, "provider": "openai"}, - }, - ) - - -class EmbeddingWrapper(BaseWrapper): - def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None): - super().__init__(create_fn, acreate_fn, "Embedding") - - def process_output(self, response: dict[str, Any], span: Span): - usage = response.get("usage") - metrics = _parse_metrics_from_usage(usage) - span.log( - metrics=metrics, - # TODO: Add a flag to control whether to log the full embedding vector, - # possibly w/ JSON compression. - output={"embedding_length": len(response["data"][0]["embedding"])}, - ) - - -class ModerationWrapper(BaseWrapper): - def __init__(self, create_fn: Callable[..., Any] | None, acreate_fn: Callable[..., Any] | None): - super().__init__(create_fn, acreate_fn, "Moderation") - - def process_output(self, response: Any, span: Span): - span.log( - output=response["results"], - ) - - -class ChatCompletionV0Wrapper(NamedWrapper): - def __init__(self, chat: Any): - self.__chat = chat - super().__init__(chat) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return ChatCompletionWrapper(self.__chat.create, self.__chat.acreate).create(*args, **kwargs) - - async def acreate(self, *args: Any, **kwargs: Any) -> Any: - return await ChatCompletionWrapper(self.__chat.create, self.__chat.acreate).acreate(*args, **kwargs) - - -class EmbeddingV0Wrapper(NamedWrapper): - def __init__(self, embedding: Any): - self.__embedding = embedding - super().__init__(embedding) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return EmbeddingWrapper(self.__embedding.create, self.__embedding.acreate).create(*args, **kwargs) - - async def acreate(self, *args: Any, **kwargs: Any) -> Any: - return await ChatCompletionWrapper(self.__embedding.create, self.__embedding.acreate).acreate(*args, **kwargs) - - -class ModerationV0Wrapper(NamedWrapper): - def __init__(self, moderation: Any): - self.__moderation = moderation - super().__init__(moderation) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return ModerationWrapper(self.__moderation.create, self.__moderation.acreate).create(*args, **kwargs) - - async def acreate(self, *args: Any, **kwargs: Any) -> Any: - return await ModerationWrapper(self.__moderation.create, self.__moderation.acreate).acreate(*args, **kwargs) - - -# This wraps 0.*.* versions of the openai module, eg https://github.com/openai/openai-python/tree/v0.28.1 -class OpenAIV0Wrapper(NamedWrapper): - def __init__(self, openai: Any): - super().__init__(openai) - self.ChatCompletion = ChatCompletionV0Wrapper(openai.ChatCompletion) - self.Embedding = EmbeddingV0Wrapper(openai.Embedding) - self.Moderation = ModerationV0Wrapper(openai.Moderation) - - -class CompletionsV1Wrapper(NamedWrapper): - def __init__(self, completions: Any): - self.__completions = completions - super().__init__(completions) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return ChatCompletionWrapper(self.__completions.with_raw_response.create, None).create(*args, **kwargs) - - -class EmbeddingV1Wrapper(NamedWrapper): - def __init__(self, embedding: Any): - self.__embedding = embedding - super().__init__(embedding) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return EmbeddingWrapper(self.__embedding.with_raw_response.create, None).create(*args, **kwargs) - - -class ModerationV1Wrapper(NamedWrapper): - def __init__(self, moderation: Any): - self.__moderation = moderation - super().__init__(moderation) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return ModerationWrapper(self.__moderation.with_raw_response.create, None).create(*args, **kwargs) - - -class AsyncCompletionsV1Wrapper(NamedWrapper): - def __init__(self, completions: Any): - self.__completions = completions - super().__init__(completions) - - async def create(self, *args: Any, **kwargs: Any) -> Any: - response = await ChatCompletionWrapper(None, self.__completions.with_raw_response.create).acreate( - *args, **kwargs - ) - return AsyncResponseWrapper(response) - - -class AsyncEmbeddingV1Wrapper(NamedWrapper): - def __init__(self, embedding: Any): - self.__embedding = embedding - super().__init__(embedding) - - async def create(self, *args: Any, **kwargs: Any) -> Any: - response = await EmbeddingWrapper(None, self.__embedding.with_raw_response.create).acreate(*args, **kwargs) - return AsyncResponseWrapper(response) - - -class AsyncModerationV1Wrapper(NamedWrapper): - def __init__(self, moderation: Any): - self.__moderation = moderation - super().__init__(moderation) - - async def create(self, *args: Any, **kwargs: Any) -> Any: - response = await ModerationWrapper(None, self.__moderation.with_raw_response.create).acreate(*args, **kwargs) - return AsyncResponseWrapper(response) - - -class ChatV1Wrapper(NamedWrapper): - def __init__(self, chat: Any): - super().__init__(chat) - - import openai - - if type(chat.completions) == openai.resources.chat.completions.AsyncCompletions: - self.completions = AsyncCompletionsV1Wrapper(chat.completions) - else: - self.completions = CompletionsV1Wrapper(chat.completions) - - -class ResponsesV1Wrapper(NamedWrapper): - def __init__(self, responses: Any, return_raw: bool = False) -> None: - self.__responses = responses - self.__return_raw = return_raw - if not return_raw: - self.with_raw_response = ResponsesV1Wrapper(responses, return_raw=True) - super().__init__(responses) - - def create(self, *args: Any, **kwargs: Any) -> Any: - return ResponseWrapper(self.__responses.with_raw_response.create, None, return_raw=self.__return_raw).create( - *args, **kwargs - ) - - def parse(self, *args: Any, **kwargs: Any) -> Any: - return ResponseWrapper( - self.__responses.with_raw_response.parse, None, "openai.responses.parse", return_raw=self.__return_raw - ).create(*args, **kwargs) - - -class AsyncResponsesV1Wrapper(NamedWrapper): - def __init__(self, responses: Any, return_raw: bool = False) -> None: - self.__responses = responses - self.__return_raw = return_raw - if not return_raw: - self.with_raw_response = AsyncResponsesV1Wrapper(responses, return_raw=True) - super().__init__(responses) - - async def create(self, *args: Any, **kwargs: Any) -> Any: - response = await ResponseWrapper( - None, self.__responses.with_raw_response.create, return_raw=self.__return_raw - ).acreate(*args, **kwargs) - return response if self.__return_raw else AsyncResponseWrapper(response) - - async def parse(self, *args: Any, **kwargs: Any) -> Any: - response = await ResponseWrapper( - None, self.__responses.with_raw_response.parse, "openai.responses.parse", return_raw=self.__return_raw - ).acreate(*args, **kwargs) - return response if self.__return_raw else AsyncResponseWrapper(response) - - -class BetaCompletionsV1Wrapper(NamedWrapper): - def __init__(self, completions: Any): - self.__completions = completions - super().__init__(completions) - - def parse(self, *args: Any, **kwargs: Any) -> Any: - return ChatCompletionWrapper(self.__completions.parse, None).create(*args, **kwargs) - - -class AsyncBetaCompletionsV1Wrapper(NamedWrapper): - def __init__(self, completions: Any): - self.__completions = completions - super().__init__(completions) - - async def parse(self, *args: Any, **kwargs: Any) -> Any: - response = await ChatCompletionWrapper(None, self.__completions.parse).acreate(*args, **kwargs) - return AsyncResponseWrapper(response) - - -class BetaChatV1Wrapper(NamedWrapper): - def __init__(self, chat: Any): - super().__init__(chat) - - if "AsyncCompletions" in type(chat.completions).__name__: - self.completions = AsyncBetaCompletionsV1Wrapper(chat.completions) - else: - self.completions = BetaCompletionsV1Wrapper(chat.completions) - - -class BetaV1Wrapper(NamedWrapper): - def __init__(self, beta: Any): - super().__init__(beta) - if hasattr(beta, "chat"): - self.chat = BetaChatV1Wrapper(beta.chat) - - -# This wraps 1.*.* versions of the openai module, eg https://github.com/openai/openai-python/tree/v1.1.0 -class OpenAIV1Wrapper(NamedWrapper): - def __init__(self, openai: Any): - super().__init__(openai) - import openai as oai - - self.chat = ChatV1Wrapper(openai.chat) - - if hasattr(openai, "beta"): - self.beta = BetaV1Wrapper(openai.beta) - - if hasattr(openai, "responses"): - if type(openai.responses) == oai.resources.responses.responses.AsyncResponses: - self.responses = AsyncResponsesV1Wrapper(openai.responses) - else: - self.responses = ResponsesV1Wrapper(openai.responses) - - if type(openai.embeddings) == oai.resources.embeddings.AsyncEmbeddings: - self.embeddings = AsyncEmbeddingV1Wrapper(openai.embeddings) - else: - self.embeddings = EmbeddingV1Wrapper(openai.embeddings) - - if type(openai.moderations) == oai.resources.moderations.AsyncModerations: - self.moderations = AsyncModerationV1Wrapper(openai.moderations) - else: - self.moderations = ModerationV1Wrapper(openai.moderations) - - -def wrap_openai(openai: Any): - """ - Wrap the openai module (pre v1) or OpenAI instance (v1.* or v2.*) to add tracing. - If Braintrust is not configured, nothing will be traced. If this is not an - `OpenAI` object, this function is a no-op. - - :param openai: The openai module or OpenAI object - """ - if hasattr(openai, "chat") and hasattr(openai.chat, "completions"): - return OpenAIV1Wrapper(openai) - else: - return OpenAIV0Wrapper(openai) - - -# OpenAI's representation to Braintrust's representation -TOKEN_NAME_MAP = { - # chat API - "total_tokens": "tokens", - "prompt_tokens": "prompt_tokens", - "completion_tokens": "completion_tokens", - # responses API - "tokens": "tokens", - "input_tokens": "prompt_tokens", - "output_tokens": "completion_tokens", -} - -TOKEN_PREFIX_MAP = { - "input": "prompt", - "output": "completion", -} - - -def _parse_metrics_from_usage(usage: Any) -> dict[str, Any]: - # For simplicity, this function handles all the different APIs - metrics = {} - - if not usage: - return metrics - - # This might be a dict or a Usage object that can be cast to a dict - # to a dict - usage = _try_to_dict(usage) - if not isinstance(usage, dict): - return metrics # unexpected - - for oai_name, value in usage.items(): - if oai_name.endswith("_tokens_details"): - # handle `_tokens_detail` dicts - if not isinstance(value, dict): - continue # unexpected - raw_prefix = oai_name[: -len("_tokens_details")] - prefix = TOKEN_PREFIX_MAP.get(raw_prefix, raw_prefix) - for k, v in value.items(): - if is_numeric(v): - metrics[f"{prefix}_{k}"] = v - elif is_numeric(value): - name = TOKEN_NAME_MAP.get(oai_name, oai_name) - metrics[name] = value - - return metrics - - -def prettify_params(params: dict[str, Any]) -> dict[str, Any]: - # Filter out NOT_GIVEN parameters - # https://linear.app/braintrustdata/issue/BRA-2467 - ret = {k: v for k, v in params.items() if not _is_not_given(v)} - - if "response_format" in ret: - ret["response_format"] = serialize_response_format(ret["response_format"]) - return ret - - -def _try_to_dict(obj: Any) -> dict[str, Any]: - if isinstance(obj, dict): - return obj - # convert a pydantic object to a dict - # Suppress Pydantic serializer warnings from generic/discriminated-union models - # (e.g. OpenAI's ParsedResponse[T]). See - # https://github.com/braintrustdata/braintrust-sdk-python/issues/60 - if hasattr(obj, "model_dump") and callable(obj.model_dump): - try: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="Pydantic serializer warnings", category=UserWarning) - return obj.model_dump() - except Exception: - pass - # deprecated pydantic method, try model_dump first. - if hasattr(obj, "dict") and callable(obj.dict): - try: - return obj.dict() - except Exception: - pass - return obj - - -def serialize_response_format(response_format: Any) -> Any: - try: - from pydantic import BaseModel - except ImportError: - return response_format - - if isinstance(response_format, type) and issubclass(response_format, BaseModel): - return dict( - type="json_schema", - json_schema=dict( - name=response_format.__name__, - schema=response_format.model_json_schema(), - ), - ) - else: - return response_format - - -def _is_not_given(value: Any) -> bool: - if value is None: - return False - try: - # Check by type name and repr to avoid import dependency - type_name = type(value).__name__ - return type_name == "NotGiven" - except Exception: - return False - - -def _openai_init_wrapper(wrapped, instance, args, kwargs): - """Wrapper for OpenAI.__init__ that applies tracing after initialization.""" - wrapped(*args, **kwargs) - _apply_openai_wrapper(instance) +from braintrust.integrations.openai import OpenAIIntegration, wrap_openai def patch_openai() -> bool: - """ - Patch OpenAI to add Braintrust tracing globally. - - After calling this, all new OpenAI() and AsyncOpenAI() clients - will automatically have tracing enabled. - - Returns: - True if OpenAI was patched (or already patched), False if OpenAI is not installed. - - Example: - ```python - import braintrust - braintrust.patch_openai() - - import openai - client = openai.OpenAI() - # All calls are now traced! - ``` - """ - try: - import openai - - if getattr(openai, "__braintrust_wrapped__", False): - return True # Already patched - - wrap_function_wrapper("openai", "OpenAI.__init__", _openai_init_wrapper) - wrap_function_wrapper("openai", "AsyncOpenAI.__init__", _openai_init_wrapper) - openai.__braintrust_wrapped__ = True - return True - - except ImportError: - return False + """Patch OpenAI globally for Braintrust tracing.""" + return OpenAIIntegration.setup() -def _apply_openai_wrapper(client): - """Apply tracing wrapper to an OpenAI client instance in-place.""" - wrapped = wrap_openai(client) - for attr in ("chat", "responses", "embeddings", "moderations", "beta"): - if hasattr(wrapped, attr): - setattr(client, attr, getattr(wrapped, attr)) +__all__ = [ + "patch_openai", + "wrap_openai", +] diff --git a/py/src/braintrust/wrappers/cassettes/test_agents_tool_openai_nested_spans.yaml b/py/src/braintrust/wrappers/cassettes/test_agents_tool_openai_nested_spans.yaml deleted file mode 100644 index fe69dd17..00000000 --- a/py/src/braintrust/wrappers/cassettes/test_agents_tool_openai_nested_spans.yaml +++ /dev/null @@ -1,436 +0,0 @@ -interactions: -- request: - body: '{"include":[],"input":[{"content":"Please analyze this text: ''Artificial - intelligence is transforming industries worldwide. Companies are adopting AI - technologies to improve efficiency and innovation. However, challenges like - ethics and job displacement remain concerns.''","role":"user"}],"instructions":"You - are a helpful assistant that analyzes text. When asked to analyze text, you - MUST use the analyze_text tool. Always call the tool with the exact text provided - by the user. After using the tool, provide a two sentence summary of what the - tool returned.","model":"gpt-4.1","tools":[{"name":"analyze_text","parameters":{"properties":{"text":{"title":"Text","type":"string"}},"required":["text"],"title":"analyze_text_args","type":"object"},"strict":false,"type":"function","description":"Analyze - text and return a structured summary with key points, sentiment, and statistics."}]}' - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '885' - Content-Type: - - application/json - Cookie: - - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; - _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000 - Host: - - api.openai.com - User-Agent: - - Agents/Python 0.6.5 - X-Stainless-Arch: - - arm64 - X-Stainless-Async: - - async:asyncio - X-Stainless-Lang: - - python - X-Stainless-OS: - - MacOS - X-Stainless-Package-Version: - - 2.15.0 - X-Stainless-Runtime: - - CPython - X-Stainless-Runtime-Version: - - 3.13.3 - x-stainless-read-timeout: - - '600' - x-stainless-retry-count: - - '0' - method: POST - uri: https://api.openai.com/v1/responses - response: - body: - string: !!binary | - H4sIAAAAAAAAA4RVTW/jNhC9+1cMdHYM2XH8kVtQYNEeiqLILoqiWQhjcmRzTZEMObTXG/i/F6Rs - WfYm2Js0j/PBxzczbwOAQsniEQpPwVXldHkvJ2Mc1+WynE0mZTlbzhblZDZDpMVivLxfziere5pJ - UT5McToTxTCFsKtvJPgcxppArV14QiZZYcLG89liOpmW94uMBUaOIfkI2zhNTLJ1WqHYrr2NJtVV - ow7UmpXWyqyLR3gbAAAUDg/kk7+kHWnryBcDgGOb+BzyJvUyo+S9TZ4map0NtafXSEYcKkcGNR+K - RyhHZcaUOQerJDEqHfqeygT2UbCyJt/lXxsBPQHChrSrowYMQQVGw8AbZECD+vCDAjB95xH8syED - GLYkge0ZzNgQDjbCn1+eP0MMBLyhM1wlGNhaPYInvcdDAIFa5yPJCnvFm/xH31FwjgbO252SJGF1 - yFAM5EfwVDN5iEGZdec+PJ8FBN5bCGSYjCAIsWnQH8DWsE9X6fJ54ugNyVH7gA1+r2xkF7liuyVz - RVgCk1OVSr5GrCSdOFw7vpuOxneTcvJwV07vxtOTynLM4hH+ywJoZdAJuBYfy3c+Jpnlu1jO6gd6 - GK9mU0FzOc+BcxA+OMphosmvmeu7wB+pNYPo17Ehwxl/eykS4S/F40vx5FnVSijUoAyT1mqdmVQB - 2KMJtfVNol4ZGQN7RQH21mu5V5JG8JttHJpkzIqS1nE6/PQHMImNsdquE8gWVJOejIDqlC0JGdBI - UMbYHabbjOB3u6cd+SGIDWpNZk0BtNoSEG+UCPn8N7sCqYLTKCjdBzw1qAwIawR5E0YvxfFy60RQ - 1XKfP1/94q85zT5N5vPn+y/POx6/fgr67x8XD4NN5riv4yKDxwHA1/zIDn2qT1+LhH1sp4DzFBKF - 7zSq87RTNobqPIPa4jp9OW8bx5VAsaFqS4cPMU9MJrHWP+EJgzVXA4jq2nruHUo6aXvkZOzmUcCa - +FApmQLXiq6mTyC/U4IqVud5VmPUXJzGpPXUZ4CpceSRYzaPT5fPVHaVJV3h5b8n7x7lp4p35Fc2 - qExl0ZBUsbnM0fYRNlaJ9uEi26IDws+teNtFl7eXFIRX7kRr8dSbdFl77QwBhHaeRk+yGzh5nm3p - AM4qw2GYR5JKCh1m39ScKrASYfQLsXVokllDTD70WGqF4Mizomv7LcMXq2Kd03zuh79hI7W2WRc9 - 8Nh9Hy8+RdpCypPseO2n7gxfex5d+v4tK/TrUPRPneo4renBTeZcX17fedveNCRbV2m7dt6uEidl - Z3R9+floBJ4fV6qAK33e5zHgmi7aVOZqMYyns+HPQG/Vvl3mjdiQvHiWVzK+XTjT6XvAe3G7zv4o - NFtG3at4WXb9kbbo1Q4jRomMKf5xcPwfAAD//wMA3nkxFGAJAAA= - headers: - CF-RAY: - - 9be006a14e44823c-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 14 Jan 2026 20:53:59 GMT - Server: - - cloudflare - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - braintrust-data - openai-processing-ms: - - '921' - openai-project: - - proj_vsCSXafhhByzWOThMrJcZiw9 - openai-version: - - '2020-10-01' - x-envoy-upstream-service-time: - - '923' - x-ratelimit-limit-requests: - - '10000' - x-ratelimit-limit-tokens: - - '30000000' - x-ratelimit-remaining-requests: - - '9999' - x-ratelimit-remaining-tokens: - - '30000000' - x-ratelimit-reset-requests: - - 6ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_f123f2742f0942de816ad96a756cdbd7 - status: - code: 200 - message: OK -- request: - body: '{"messages":[{"role":"user","content":"Analyze this text briefly: Artificial - intelligence is transforming industries worldwide. Companies are adopting AI - technologies to improve efficiency and innovation. However, challenges like - ethics and job displacement remain concerns."}],"model":"gpt-4o-mini"}' - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '300' - Content-Type: - - application/json - Host: - - api.openai.com - User-Agent: - - OpenAI/Python 2.15.0 - X-Stainless-Arch: - - arm64 - X-Stainless-Async: - - 'false' - X-Stainless-Lang: - - python - X-Stainless-OS: - - MacOS - X-Stainless-Package-Version: - - 2.15.0 - X-Stainless-Raw-Response: - - 'true' - X-Stainless-Runtime: - - CPython - X-Stainless-Runtime-Version: - - 3.13.3 - x-stainless-read-timeout: - - '600' - x-stainless-retry-count: - - '0' - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//jFTLbhsxDLz7Kwhd2gJ2YDtG4uRm9IGmx6JAD0VhyBJXS0dLChLXqVvk - 3wutndh9Ab0sBA5JzYzI/TECMOTNLRjXWnVdipPX+/nqw+ertH1/OX/rme275fs3H7er7dsPNzsz - rhWy2aLTp6oLJ12KqCR8gF1Gq1i7zq6vlov5Ynp5MwCdeIy1LCSdLGTSEdNkPp0vJtPryWx5rG6F - HBZzC19GAAA/hm/lyR6/mVuYjp8iHZZiA5rb5yQAkyXWiLGlUFHLasYn0Akr8kD9U4ug+E2hpdBG - Cq0W0BahUGBqyFlWoC5ZpyAN2Kw1SDYCsWKMFJAdwsvV3SsQhhBlM2C+L5oJyxhYlDiAtlZBcrBM - 3201qYDNCBF3mG2oGas7UAHk1taOG9EWsKmXIbs9WPZAzLIbii/gTsG6e5aHiD7ggfMGGRvSMjD1 - ko4XY6kSXcsSJRAWeGgpIthYBDI6CUzfayZ1SXK1ClxrY0QOlX9J6AYjYtwDaltP4IQLecxPStgP - BJJUW6s7jWTYygY8lRStww5ZL6CaLVVwjKDCCKUPAYsWsLCxsQr3sCN8GEPPHnNxko8awPc2Alvt - M1Z9q7sXBYib2A8PIDwkPUi+byQ7HCgVcYS6vzh/+oxNX2wdP+5jPAMss+hBTx26r0fk8XnMooSU - ZVN+KzUNMZV2ndEW4TpSRSWZAX0cAXwdxrn/ZUJNytIlXavc43DdYnZoZ05LdAKvr4+gitp4is9m - y/Ff2q09qqVYzvbBOOta9KfS0/LY3pOcAaMz0X+y+Vvvg3Di8D/tT4BzmBT9OmX05H5VfErLWP8x - /0p7NnkgbArmHTlcK2GuD+GxsX08bL4p+6LYrRvigDllOqx/k9bzm8vLqb25Wi7N6HH0EwAA//8D - APVKo5cMBQAA - headers: - CF-RAY: - - 9be006a818f7f272-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 14 Jan 2026 20:54:01 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=I0BZkhsUPY3DLfzpbwc9OWeppp07t5QFeRu_m00wgog-1768424041-1.0.1.1-gETSipnxLLc0Ghg6dXXsg1IQakBeH94nd_g9cm85Mqk_VLyb4xcziyOgojXuiw.UR2pUSGYqt0Spk59fpXI66AfO3SY8pT5922lFcQiChGg; - path=/; expires=Wed, 14-Jan-26 21:24:01 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=MNlPFDZf9.vuvIjQV4efb5fyMc312F16NmfSey.xJwg-1768424041558-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - braintrust-data - openai-processing-ms: - - '1749' - openai-project: - - proj_vsCSXafhhByzWOThMrJcZiw9 - openai-version: - - '2020-10-01' - x-envoy-upstream-service-time: - - '1776' - x-openai-proxy-wasm: - - v0.1 - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149999937' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_4807fef8d5fd407fb648cb38247dd532 - status: - code: 200 - message: OK -- request: - body: '{"data":[{"object":"trace","id":"trace_77059e8449aa48a7ab8b95a280cacb4a","workflow_name":"Agent - workflow","group_id":null,"metadata":null},{"object":"trace.span","id":"span_a1310ceb4e8e4846ada194e7","trace_id":"trace_77059e8449aa48a7ab8b95a280cacb4a","parent_id":"span_d7511d04da724181b44594a8","started_at":"2026-01-14T20:53:56.946594+00:00","ended_at":"2026-01-14T20:53:57.797427+00:00","span_data":{"type":"response","response_id":"resp_0da6aa3826c6950a00696802652e548195ad3a811162e4909c"},"error":null},{"object":"trace.span","id":"span_d7511d04da724181b44594a8","trace_id":"trace_77059e8449aa48a7ab8b95a280cacb4a","parent_id":null,"started_at":"2026-01-14T20:53:56.937476+00:00","ended_at":"2026-01-14T20:53:57.798899+00:00","span_data":{"type":"agent","name":"test-agent","handoffs":[],"tools":[],"output_type":"str"},"error":null},{"object":"trace","id":"trace_53ea0713a8bf4a0b8f59a0d885e1cc5d","workflow_name":"Agent - workflow","group_id":null,"metadata":null},{"object":"trace","id":"trace_35aed2860f7b4703932f1502aa329afc","workflow_name":"Agent - workflow","group_id":null,"metadata":null},{"object":"trace.span","id":"span_838ffe7b3f3045cbbef994df","trace_id":"trace_53ea0713a8bf4a0b8f59a0d885e1cc5d","parent_id":"span_301d4d97ffd34f0b9a247668","started_at":"2026-01-14T20:53:57.804773+00:00","ended_at":"2026-01-14T20:53:58.397553+00:00","span_data":{"type":"response","response_id":"resp_09c189e8ef3ec9020069680265e94c8195be83ab2f06345b49"},"error":null},{"object":"trace.span","id":"span_301d4d97ffd34f0b9a247668","trace_id":"trace_53ea0713a8bf4a0b8f59a0d885e1cc5d","parent_id":null,"started_at":"2026-01-14T20:53:57.804133+00:00","ended_at":"2026-01-14T20:53:58.398525+00:00","span_data":{"type":"agent","name":"agent-a","handoffs":[],"tools":[],"output_type":"str"},"error":null},{"object":"trace.span","id":"span_7d58da841ace461aa1ead190","trace_id":"trace_35aed2860f7b4703932f1502aa329afc","parent_id":"span_1db31ac16c1445428e08dfbb","started_at":"2026-01-14T20:53:57.805195+00:00","ended_at":"2026-01-14T20:53:58.543287+00:00","span_data":{"type":"response","response_id":"resp_043aff85cc8ff2850069680265e8c08194a26ada1e305d4981"},"error":null},{"object":"trace.span","id":"span_1db31ac16c1445428e08dfbb","trace_id":"trace_35aed2860f7b4703932f1502aa329afc","parent_id":null,"started_at":"2026-01-14T20:53:57.804370+00:00","ended_at":"2026-01-14T20:53:58.544328+00:00","span_data":{"type":"agent","name":"agent-b","handoffs":[],"tools":[],"output_type":"str"},"error":null}]}' - headers: - Accept: - - '*/*' - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '2491' - Content-Type: - - application/json - Host: - - api.openai.com - OpenAI-Beta: - - traces=v1 - User-Agent: - - python-httpx/0.28.1 - method: POST - uri: https://api.openai.com/v1/traces/ingest - response: - body: - string: '' - headers: - CF-RAY: - - 9be006b5ae683b92-IAD - Connection: - - keep-alive - Date: - - Wed, 14 Jan 2026 20:54:02 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=eIA31fCLRTVyDCgitQLonvU0q.gZ9FMb.axNF5KHAOM-1768424042-1.0.1.1-FHSCThp6LycbpvtcrA21tGEyF5pTfRJTgTy9ZCaDLzCTEyuq8Xi27jN54eVW4OMcKIdOUL0oZpPucWsaBAnzBI4oiHWo61Bbskg7hgCak9o; - path=/; expires=Wed, 14-Jan-26 21:24:02 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=hEtZSoG7Ssq9XjEFJ71G3C.SdGKcdFQAfb_c_qXS2LM-1768424042060-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - braintrust-data - openai-processing-ms: - - '105' - openai-project: - - proj_vsCSXafhhByzWOThMrJcZiw9 - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-envoy-upstream-service-time: - - '107' - x-openai-proxy-wasm: - - v0.1 - x-request-id: - - req_4b033eb97e9750bfbaf34e5d64ed69ed - status: - code: 204 - message: No Content -- request: - body: '{"include":[],"input":[{"content":"Please analyze this text: ''Artificial - intelligence is transforming industries worldwide. Companies are adopting AI - technologies to improve efficiency and innovation. However, challenges like - ethics and job displacement remain concerns.''","role":"user"},{"arguments":"{\"text\":\"Artificial - intelligence is transforming industries worldwide. Companies are adopting AI - technologies to improve efficiency and innovation. However, challenges like - ethics and job displacement remain concerns.\"}","call_id":"call_qr8O7e6F277S3USvt1qFslQz","name":"analyze_text","type":"function_call","id":"fc_0493d21a1f09062200696802671ed88193896f5e51b64ce7d7","status":"completed"},{"call_id":"call_qr8O7e6F277S3USvt1qFslQz","output":"The - text highlights the significant impact of artificial intelligence (AI) on global - industries, noting that organizations are leveraging AI to enhance both efficiency - and innovation. It acknowledges the benefits of adopting these technologies - while also recognizing important challenges, specifically ethical considerations - and the potential for job displacement. The overall tone suggests a balanced - view, underscoring the dual nature of AI''s influence on the workforce and society.","type":"function_call_output"}],"instructions":"You - are a helpful assistant that analyzes text. When asked to analyze text, you - MUST use the analyze_text tool. Always call the tool with the exact text provided - by the user. After using the tool, provide a two sentence summary of what the - tool returned.","model":"gpt-4.1","tools":[{"name":"analyze_text","parameters":{"properties":{"text":{"title":"Text","type":"string"}},"required":["text"],"title":"analyze_text_args","type":"object"},"strict":false,"type":"function","description":"Analyze - text and return a structured summary with key points, sentiment, and statistics."}]}' - headers: - Accept: - - application/json - Accept-Encoding: - - gzip, deflate - Connection: - - keep-alive - Content-Length: - - '1864' - Content-Type: - - application/json - Cookie: - - __cf_bm=IwSSJ.RHOyrDHy71t190C0u4EL9HMgUY2jiVoTE3Rx0-1768424037-1.0.1.1-ZID4mvxCwpZVRzFS1fLdN1Y2IWkkn_wazHoPQBolLYHzMoZNRkTFDL0fqX4m.0FY97.b95rhiBzBDf3ubnonNwcnYBcTqnrX4_OgE7Fq6Lw; - _cfuvid=RePkKlnxLbAHj0ymuEQEUhV2_qf3Ejhb0yEUFttOP24-1768424037764-0.0.1.1-604800000 - Host: - - api.openai.com - User-Agent: - - Agents/Python 0.6.5 - X-Stainless-Arch: - - arm64 - X-Stainless-Async: - - async:asyncio - X-Stainless-Lang: - - python - X-Stainless-OS: - - MacOS - X-Stainless-Package-Version: - - 2.15.0 - X-Stainless-Runtime: - - CPython - X-Stainless-Runtime-Version: - - 3.13.3 - x-stainless-read-timeout: - - '600' - x-stainless-retry-count: - - '0' - method: POST - uri: https://api.openai.com/v1/responses - response: - body: - string: !!binary | - H4sIAAAAAAAAAwAAAP//fFZNb+M2EL3nVwx0dgz5I944txz30FO3KIpFIYzIkc01RbLk0I66yH8v - SFmy5E16k/jmSzNvHvXzAaBQsniBwlNwVbndb+R6haum3Je79bosd/vdc7ne7WuU2+fVfvMsnla1 - lE97sdk9rfZPxSKFsPUPEjyEsSZQfy48IZOsMGGrL7vn7XpbblcZC4wcQ/IRtnWamGTvVKM4HbyN - JtXVoA7UHyutlTkUL/DzAQCgcNiRT/6SzqStI188ALz3iYeQd6k3GSXvbfI0Uet80Hj6J5IRXeXI - oOaueIFyWWZMmSFYJYlR6TD1VCawj4KVNflb/rIR0BMgHEm7JmrAEFRgNAx8RAY0qLt/KQDTGy/h - zyMZwHAiCWwHMGML6GyE3/74/RvEQMBHGuAqwcDW6iW86gt2AQRqnU3SKVwUH/MbvaHgHA2ct2cl - SULdZSgG8kt4bZg8xKDMYXRfDLaAwBcLgQyTEQQhti36DmwDl/QpYz5PHL0huewH2OJbZSO7yBXb - E5lZwxKYnKpU8hyxknTq4cHx43a5elyX66fHcvu42l5ZlmMWL/A9E6CnwUjgNhz+h7+NlCLxt97g - altunuu6rLGRmxw5R+HOUY5DIeCBbsBnRM2gsKk5t6Kmhc3CDg2hNx69swEaYxkHAn3/ewZqe3De - 1h8gOdALFN+GITRpY3qO5cGkoUsVRAyBAtT2Sglng2J1JlCtS+SwDbx+BWuAmkYJlZYA0EhQxthz - LmsBGOBCOlEZhDWCvAmAtY0MxEclQnb4YeuUz2kU1JLhJXzlbK5jot1YmD2Tz2y1hkAFqFGjESQX - gOJk7EWTPCQ6jhWjPKNhPFCfRxxRazLpta8dpXWpzmUx9uf9+jS2rPBW5zGM29gbJ8NsVDhMVZGe - k5N97NXHeQppCT4QCOfprGwM1aB9VSbkyGvnbeu4EiiOVJ2o+xTzlKikrJlaeMJgzUz4qGms54lR - Imm/m9fDUQcDNsRdpWQK3CiaqV4gf1aCKlaDjjYYdU/PIrD1NO0AU+vII8d8vLp+/JWG18oa61u8 - vU/on+36ll8rPpOvExe7fumkiu1Nv/shHK0S/dQi22IEwq8SMKRposlafFtQSUF45a5tLV4nCpvp - 1GsXIPQ6Hj3JUeiyjp6oA2eV4bDIUqgSuRfZNymDCqxEWN4SGmz7midifUMTzVpi8mHSpZ4Ijjwr - mp/fd/h2qrgn9Ld7QZl0I7BPxJmA77cVufkU6fZTnuRMxWA6tNkyTdNPv7JCfwjF1GrQvv734OEu - c64v/zbkW/5uIdm6aiJ/5XjopvTz0QgchitVwFoP/xExy/jITWVmF9L6y27xKzC54kdu5eWUN89y - RuP7i267/wj4KO642Z+FZsuob+Bm/TTuR7q9Z3cnMUpkTPHfH97/AwAA//8DAG1C7ibYCQAA - headers: - CF-RAY: - - 9be006b469231fdf-IAD - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Wed, 14 Jan 2026 20:54:03 GMT - Server: - - cloudflare - Strict-Transport-Security: - - max-age=31536000; includeSubDomains; preload - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - braintrust-data - openai-processing-ms: - - '1621' - openai-project: - - proj_vsCSXafhhByzWOThMrJcZiw9 - openai-version: - - '2020-10-01' - x-envoy-upstream-service-time: - - '1623' - x-ratelimit-limit-requests: - - '10000' - x-ratelimit-limit-tokens: - - '30000000' - x-ratelimit-remaining-requests: - - '9999' - x-ratelimit-remaining-tokens: - - '30000000' - x-ratelimit-reset-requests: - - 6ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_27c452732f6e400bb7923aba68411a23 - status: - code: 200 - message: OK -version: 1 diff --git a/py/src/braintrust/wrappers/openai.py b/py/src/braintrust/wrappers/openai.py index 484a769d..13ce8870 100644 --- a/py/src/braintrust/wrappers/openai.py +++ b/py/src/braintrust/wrappers/openai.py @@ -1,317 +1,6 @@ -""" -Exports `BraintrustTracingProcessor`, a `tracing.TracingProcessor` that logs traces to Braintrust. -""" +"""Compatibility re-export for the migrated OpenAI Agents SDK tracing processor.""" -import datetime -from typing import Any +from braintrust.integrations.openai_agents.tracing import BraintrustTracingProcessor -import braintrust -from agents import tracing -from braintrust.logger import NOOP_SPAN - -def _span_type(span: tracing.Span[Any]) -> braintrust.SpanTypeAttribute: - if span.span_data.type in ["agent", "handoff", "custom", "speech_group"]: - return braintrust.SpanTypeAttribute.TASK - elif span.span_data.type in ["function", "guardrail", "mcp_tools"]: - return braintrust.SpanTypeAttribute.TOOL - elif span.span_data.type in ["generation", "response", "transcription", "speech"]: - return braintrust.SpanTypeAttribute.LLM - else: - return braintrust.SpanTypeAttribute.TASK - - -def _span_name(span: tracing.Span[Any]) -> str: - # TODO(sachin): span name should also come from the span_data. - if ( - isinstance(span.span_data, tracing.AgentSpanData) - or isinstance(span.span_data, tracing.FunctionSpanData) - or isinstance(span.span_data, tracing.GuardrailSpanData) - or isinstance(span.span_data, tracing.CustomSpanData) - ): - return span.span_data.name - elif isinstance(span.span_data, tracing.GenerationSpanData): - return "Generation" - elif isinstance(span.span_data, tracing.ResponseSpanData): - return "Response" - elif isinstance(span.span_data, tracing.HandoffSpanData): - return "Handoff" - elif isinstance(span.span_data, tracing.MCPListToolsSpanData): - if span.span_data.server: - return f"List Tools ({span.span_data.server})" - return "MCP List Tools" - elif isinstance(span.span_data, tracing.TranscriptionSpanData): - return "Transcription" - elif isinstance(span.span_data, tracing.SpeechSpanData): - return "Speech" - elif isinstance(span.span_data, tracing.SpeechGroupSpanData): - return "Speech Group" - else: - return "Unknown" - - -def _timestamp_from_maybe_iso(timestamp: str | None) -> float | None: - if timestamp is None: - return None - return datetime.datetime.fromisoformat(timestamp).timestamp() - - -def _maybe_timestamp_elapsed(end: str | None, start: str | None) -> float | None: - if start is None or end is None: - return None - return (datetime.datetime.fromisoformat(end) - datetime.datetime.fromisoformat(start)).total_seconds() - - -class BraintrustTracingProcessor(tracing.TracingProcessor): - """ - `BraintrustTracingProcessor` is a `tracing.TracingProcessor` that logs traces to Braintrust. - - Args: - logger: A `braintrust.Span` or `braintrust.Experiment` or `braintrust.Logger` to use for logging. - If `None`, the current span, experiment, or logger will be selected exactly as in `braintrust.start_span`. - """ - - def __init__(self, logger: braintrust.Span | braintrust.Experiment | braintrust.Logger | None = None): - self._logger = logger - self._spans: dict[str, braintrust.Span] = {} - self._first_input: dict[str, Any] = {} - self._last_output: dict[str, Any] = {} - - def on_trace_start(self, trace: tracing.Trace) -> None: - trace_meta = trace.export() or {} - metadata = { - "group_id": trace_meta.get("group_id"), - **(trace_meta.get("metadata") or {}), - } - - current_context = braintrust.current_span() - if current_context != NOOP_SPAN: - span = current_context.start_span( - name=trace.name, - span_attributes={"type": "task", "name": trace.name}, - metadata=metadata, - ) - elif self._logger is not None: - span = self._logger.start_span( - span_attributes={"type": "task", "name": trace.name}, - span_id=trace.trace_id, - root_span_id=trace.trace_id, - metadata=metadata, - # TODO(sachin): Add start time when SDK provides it. - # start_time=_timestamp_from_maybe_iso(trace.started_at), - ) - else: - span = braintrust.start_span( - id=trace.trace_id, - span_attributes={"type": "task", "name": trace.name}, - metadata=metadata, - # TODO(sachin): Add start time when SDK provides it. - # start_time=_timestamp_from_maybe_iso(trace.started_at), - ) - if span != NOOP_SPAN: - span.set_current() - self._spans[trace.trace_id] = span - - def on_trace_end(self, trace: tracing.Trace) -> None: - span = self._spans.pop(trace.trace_id) - # Get the first input and last output for this specific trace - trace_first_input = self._first_input.pop(trace.trace_id, None) - trace_last_output = self._last_output.pop(trace.trace_id, None) - span.log(input=trace_first_input, output=trace_last_output) - span.end() - span.unset_current() - # TODO(sachin): Add end time when SDK provides it. - # span.end(_timestamp_from_maybe_iso(trace.ended_at)) - - def _agent_log_data(self, span: tracing.Span[tracing.AgentSpanData]) -> dict[str, Any]: - return { - "metadata": { - "tools": span.span_data.tools, - "handoffs": span.span_data.handoffs, - "output_type": span.span_data.output_type, - } - } - - def _response_log_data(self, span: tracing.Span[tracing.ResponseSpanData]) -> dict[str, Any]: - data = {} - if span.span_data.input is not None: - data["input"] = span.span_data.input - if span.span_data.response is not None: - data["output"] = span.span_data.response.output - if span.span_data.response is not None: - data["metadata"] = span.span_data.response.metadata or {} - data["metadata"].update( - span.span_data.response.model_dump(exclude={"input", "output", "metadata", "usage"}) - ) - - data["metrics"] = {} - ttft = _maybe_timestamp_elapsed(span.ended_at, span.started_at) - if ttft is not None: - data["metrics"]["time_to_first_token"] = ttft - if span.span_data.response is not None and span.span_data.response.usage is not None: - data["metrics"]["tokens"] = span.span_data.response.usage.total_tokens - data["metrics"]["prompt_tokens"] = span.span_data.response.usage.input_tokens - data["metrics"]["completion_tokens"] = span.span_data.response.usage.output_tokens - - return data - - def _function_log_data(self, span: tracing.Span[tracing.FunctionSpanData]) -> dict[str, Any]: - return { - "input": span.span_data.input, - "output": span.span_data.output, - } - - def _handoff_log_data(self, span: tracing.Span[tracing.HandoffSpanData]) -> dict[str, Any]: - return { - "metadata": { - "from_agent": span.span_data.from_agent, - "to_agent": span.span_data.to_agent, - } - } - - def _guardrail_log_data(self, span: tracing.Span[tracing.GuardrailSpanData]) -> dict[str, Any]: - return { - "metadata": { - "triggered": span.span_data.triggered, - } - } - - def _generation_log_data(self, span: tracing.Span[tracing.GenerationSpanData]) -> dict[str, Any]: - metrics = {} - ttft = _maybe_timestamp_elapsed(span.ended_at, span.started_at) - - if ttft is not None: - metrics["time_to_first_token"] = ttft - - usage = span.span_data.usage or {} - if "prompt_tokens" in usage: - metrics["prompt_tokens"] = usage["prompt_tokens"] - elif "input_tokens" in usage: - metrics["prompt_tokens"] = usage["input_tokens"] - - if "completion_tokens" in usage: - metrics["completion_tokens"] = usage["completion_tokens"] - elif "output_tokens" in usage: - metrics["completion_tokens"] = usage["output_tokens"] - - if "total_tokens" in usage: - metrics["tokens"] = usage["total_tokens"] - elif "input_tokens" in usage and "output_tokens" in usage: - metrics["tokens"] = usage["input_tokens"] + usage["output_tokens"] - - return { - "input": span.span_data.input, - "output": span.span_data.output, - "metadata": { - "model": span.span_data.model, - "model_config": span.span_data.model_config, - }, - "metrics": metrics, - } - - def _custom_log_data(self, span: tracing.Span[tracing.CustomSpanData]) -> dict[str, Any]: - return span.span_data.data - - def _mcp_list_tools_log_data(self, span: tracing.Span[tracing.MCPListToolsSpanData]) -> dict[str, Any]: - return { - "output": span.span_data.result, - "metadata": { - "server": span.span_data.server, - }, - } - - def _transcription_log_data(self, span: tracing.Span[tracing.TranscriptionSpanData]) -> dict[str, Any]: - return { - "input": span.span_data.input, - "output": span.span_data.output, - "metadata": { - "model": span.span_data.model, - "model_config": span.span_data.model_config, - }, - } - - def _speech_log_data(self, span: tracing.Span[tracing.SpeechSpanData]) -> dict[str, Any]: - return { - "input": span.span_data.input, - "output": span.span_data.output, - "metadata": { - "model": span.span_data.model, - "model_config": span.span_data.model_config, - }, - } - - def _speech_group_log_data(self, span: tracing.Span[tracing.SpeechGroupSpanData]) -> dict[str, Any]: - return { - "input": span.span_data.input, - } - - def _log_data(self, span: tracing.Span[Any]) -> dict[str, Any]: - if isinstance(span.span_data, tracing.AgentSpanData): - return self._agent_log_data(span) - elif isinstance(span.span_data, tracing.ResponseSpanData): - return self._response_log_data(span) - elif isinstance(span.span_data, tracing.FunctionSpanData): - return self._function_log_data(span) - elif isinstance(span.span_data, tracing.HandoffSpanData): - return self._handoff_log_data(span) - elif isinstance(span.span_data, tracing.GuardrailSpanData): - return self._guardrail_log_data(span) - elif isinstance(span.span_data, tracing.GenerationSpanData): - return self._generation_log_data(span) - elif isinstance(span.span_data, tracing.CustomSpanData): - return self._custom_log_data(span) - elif isinstance(span.span_data, tracing.MCPListToolsSpanData): - return self._mcp_list_tools_log_data(span) - elif isinstance(span.span_data, tracing.TranscriptionSpanData): - return self._transcription_log_data(span) - elif isinstance(span.span_data, tracing.SpeechSpanData): - return self._speech_log_data(span) - elif isinstance(span.span_data, tracing.SpeechGroupSpanData): - return self._speech_group_log_data(span) - else: - return {} - - def on_span_start(self, span: tracing.Span[tracing.SpanData]) -> None: - if span.parent_id is not None: - parent = self._spans[span.parent_id] - else: - parent = self._spans[span.trace_id] - created_span = parent.start_span( - id=span.span_id, - name=_span_name(span), - type=_span_type(span), - start_time=_timestamp_from_maybe_iso(span.started_at), - ) - self._spans[span.span_id] = created_span - - # Set the span as current so current_span() calls will return it - created_span.set_current() - - def on_span_end(self, span: tracing.Span[tracing.SpanData]) -> None: - s = self._spans.pop(span.span_id) - event = dict(error=span.error, **self._log_data(span)) - s.log(**event) - s.unset_current() - s.end(_timestamp_from_maybe_iso(span.ended_at)) - - input_ = event.get("input") - output = event.get("output") - # Store first input and last output per trace_id - trace_id = span.trace_id - if trace_id not in self._first_input and input_ is not None: - self._first_input[trace_id] = input_ - - if output is not None: - self._last_output[trace_id] = output - - def shutdown(self) -> None: - if self._logger is not None: - self._logger.flush() - else: - braintrust.flush() - - def force_flush(self) -> None: - if self._logger is not None: - self._logger.flush() - else: - braintrust.flush() +__all__ = ["BraintrustTracingProcessor"]