Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions py/src/braintrust/integrations/anthropic/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ def __getattr__(self, name: str) -> Any:
("ephemeral_1h_input_tokens", "prompt_cache_creation_ephemeral_1h_tokens"),
)

_ANTHROPIC_SERVER_TOOL_USE_METRIC_FIELDS = (
("web_search_requests", "server_tool_use_web_search_requests"),
("web_fetch_requests", "server_tool_use_web_fetch_requests"),
)

_ANTHROPIC_USAGE_METADATA_FIELDS = frozenset(
{
"service_tier",
Expand Down Expand Up @@ -95,8 +90,8 @@ def extract_anthropic_usage(usage: Any) -> tuple[dict[str, float], dict[str, Any

server_tool_use = _try_to_dict(usage.get("server_tool_use"))
if server_tool_use is not None:
for source_name, metric_name in _ANTHROPIC_SERVER_TOOL_USE_METRIC_FIELDS:
_set_numeric_metric(metrics, metric_name, server_tool_use.get(source_name))
for source_name, value in server_tool_use.items():
_set_numeric_metric(metrics, f"server_tool_use_{source_name}", value)

if "prompt_cache_creation_tokens" not in metrics and cache_creation_breakdown:
metrics["prompt_cache_creation_tokens"] = sum(cache_creation_breakdown)
Expand Down
34 changes: 34 additions & 0 deletions py/src/braintrust/integrations/anthropic/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ def test_log_message_to_span_includes_stop_reason_and_stop_sequence():
"output_tokens": 7,
"cache_read_input_tokens": 0,
"cache_creation_input_tokens": 0,
"server_tool_use": {
"web_search_requests": 2,
"web_fetch_requests": 1,
},
},
)

Expand All @@ -71,13 +75,43 @@ def test_log_message_to_span_includes_stop_reason_and_stop_sequence():
"completion_tokens": 7.0,
"prompt_cached_tokens": 0.0,
"prompt_cache_creation_tokens": 0.0,
"server_tool_use_web_search_requests": 2.0,
"server_tool_use_web_fetch_requests": 1.0,
"tokens": 18.0,
"time_to_first_token": 0.123,
},
metadata={},
)


def test_extract_anthropic_usage_includes_server_tool_use_metrics_from_objects():
usage = SimpleNamespace(
input_tokens=11,
output_tokens=7,
cache_read_input_tokens=3,
cache_creation_input_tokens=2,
server_tool_use=SimpleNamespace(
web_search_requests=2,
web_fetch_requests=1,
code_execution_requests=4,
),
)

metrics, metadata = extract_anthropic_usage(usage)

assert metrics == {
"prompt_tokens": 16.0,
"completion_tokens": 7.0,
"prompt_cached_tokens": 3.0,
"prompt_cache_creation_tokens": 2.0,
"server_tool_use_web_search_requests": 2.0,
"server_tool_use_web_fetch_requests": 1.0,
"server_tool_use_code_execution_requests": 4.0,
"tokens": 23.0,
}
assert metadata == {}


@pytest.mark.vcr
def test_anthropic_messages_create_stream_true(memory_logger):
assert not memory_logger.pop()
Expand Down
Loading