Skip to content

Commit 19ecb8a

Browse files
authored
fix(invoke): encode request body as UTF-8 bytes to prevent Latin-1 corruption (#194)
`invoke()` passed the JSON body as a Python `str` to `requests.post(data=...)`, which encodes `str` bodies as Latin-1 by default. Any non-Latin-1 character in the payload (em dashes, smart quotes, etc.) either raised a `UnicodeEncodeError` or was silently corrupted before the request was sent. - Encode `bt_dumps()` output to UTF-8 bytes before passing to `data=` - Add `Content-Type: application/json` header (was missing; only `Accept` was set) `bt_dumps` is kept — it handles Pydantic models, dataclasses, and NaN/Inf values that stdlib `json` cannot serialize. Other SDK paths (`logger.py`) already use `.encode("utf-8")` correctly; this brings `invoke()` in line. Fixes BT-4620
1 parent 0a708e5 commit 19ecb8a

File tree

3 files changed

+68
-4
lines changed

3 files changed

+68
-4
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
interactions:
2+
- request:
3+
body: '{"api_version": 1, "input": {"text": "result \u2014 excellent"}, "metadata":
4+
null, "parent": "", "project_name": "test-project", "slug": "test-fn", "stream":
5+
false, "tags": null}'
6+
headers:
7+
Accept:
8+
- application/json
9+
Accept-Encoding:
10+
- gzip, deflate
11+
Connection:
12+
- keep-alive
13+
Content-Type:
14+
- application/json
15+
User-Agent:
16+
- python-requests/2.32.5
17+
method: POST
18+
uri: https://proxy.braintrust.ai/function/invoke
19+
response:
20+
body:
21+
string: '{"output": "result \u2014 excellent"}'
22+
headers:
23+
Content-Type:
24+
- application/json; charset=utf-8
25+
status:
26+
code: 200
27+
message: OK
28+
version: 1
29+

py/src/braintrust/functions/invoke.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,17 @@ def invoke(
198198
if strict is not None:
199199
request["strict"] = strict
200200

201-
headers = {"Accept": "text/event-stream" if stream else "application/json"}
201+
headers = {
202+
"Accept": "text/event-stream" if stream else "application/json",
203+
"Content-Type": "application/json",
204+
}
202205
if project_id is not None:
203206
headers["x-bt-project-id"] = project_id
204207
if org_name is not None:
205208
headers["x-bt-org-name"] = org_name
206209

207210
request_json = bt_dumps(request)
208-
resp = proxy_conn().post("function/invoke", data=request_json, headers=headers, stream=stream)
211+
resp = proxy_conn().post("function/invoke", data=request_json.encode("utf-8"), headers=headers, stream=stream)
209212
if resp.status_code == 500:
210213
raise BraintrustInvokeError(resp.text)
211214

py/src/braintrust/functions/test_invoke.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import pytest
77
from braintrust.functions.invoke import init_function, invoke
8-
from braintrust.logger import _internal_get_global_state, _internal_reset_global_state
8+
from braintrust.logger import TEST_API_KEY, _internal_get_global_state, _internal_reset_global_state
99

1010

1111
class TestInitFunction:
@@ -83,7 +83,9 @@ def _invoke_with_messages(messages):
8383
kwargs = mock_conn.post.call_args.kwargs
8484
assert "data" in kwargs, "invoke must use data= (bt_dumps) not json= (json.dumps) (see issue 38)"
8585
assert "json" not in kwargs
86-
return json.loads(kwargs["data"])
86+
data = kwargs["data"]
87+
assert isinstance(data, bytes), "body must be bytes so requests does not re-encode as Latin-1"
88+
return json.loads(data.decode("utf-8"))
8789

8890

8991
def test_invoke_serializes_openai_messages():
@@ -114,3 +116,33 @@ def test_invoke_serializes_google_messages():
114116
msg = google_types.Content(role="model", parts=[google_types.Part(text="The answer is X.")])
115117
parsed = _invoke_with_messages([msg])
116118
assert isinstance(parsed, dict) and parsed
119+
120+
121+
@pytest.mark.vcr
122+
def test_invoke_encodes_body_as_utf8_bytes(monkeypatch):
123+
"""Regression test for BT-4620: non-Latin-1 Unicode must not be corrupted.
124+
125+
When invoke() serializes the request body via bt_dumps() and passes it to
126+
requests.post(data=...), the body must be UTF-8 encoded bytes — not a str.
127+
Passing a str causes requests to re-encode with Latin-1, which raises
128+
UnicodeEncodeError (or silently corrupts data) for characters outside U+007F.
129+
130+
Uses TEST_API_KEY to skip the HTTP login entirely, so the cassette only needs
131+
to capture the single POST to /function/invoke. BRAINTRUST_PROXY_URL is
132+
cleared so the proxy URL is always the predictable test stub value
133+
(https://proxy.braintrust.ai) regardless of the local environment.
134+
"""
135+
# Prevent local env overrides from changing the proxy URL used in the cassette.
136+
monkeypatch.delenv("BRAINTRUST_PROXY_URL", raising=False)
137+
monkeypatch.delenv("BRAINTRUST_API_URL", raising=False)
138+
_internal_reset_global_state()
139+
140+
em_dash = "\u2014" # — (U+2014) is outside Latin-1; triggers the bug when body is str
141+
result = invoke(
142+
project_name="test-project",
143+
slug="test-fn",
144+
input={"text": f"result {em_dash} excellent"},
145+
parent="", # skip span-parent lookup; no extra HTTP call needed
146+
api_key=TEST_API_KEY,
147+
)
148+
assert result["output"] == f"result {em_dash} excellent"

0 commit comments

Comments
 (0)