diff --git a/py/src/braintrust/functions/cassettes/test_invoke_encodes_body_as_utf8_bytes.yaml b/py/src/braintrust/functions/cassettes/test_invoke_encodes_body_as_utf8_bytes.yaml new file mode 100644 index 00000000..64123a0a --- /dev/null +++ b/py/src/braintrust/functions/cassettes/test_invoke_encodes_body_as_utf8_bytes.yaml @@ -0,0 +1,29 @@ +interactions: +- request: + body: '{"api_version": 1, "input": {"text": "result \u2014 excellent"}, "metadata": + null, "parent": "", "project_name": "test-project", "slug": "test-fn", "stream": + false, "tags": null}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.5 + method: POST + uri: https://proxy.braintrust.ai/function/invoke + response: + body: + string: '{"output": "result \u2014 excellent"}' + headers: + Content-Type: + - application/json; charset=utf-8 + status: + code: 200 + message: OK +version: 1 + diff --git a/py/src/braintrust/functions/invoke.py b/py/src/braintrust/functions/invoke.py index 85e471fc..7868356f 100644 --- a/py/src/braintrust/functions/invoke.py +++ b/py/src/braintrust/functions/invoke.py @@ -198,14 +198,17 @@ def invoke( if strict is not None: request["strict"] = strict - headers = {"Accept": "text/event-stream" if stream else "application/json"} + headers = { + "Accept": "text/event-stream" if stream else "application/json", + "Content-Type": "application/json", + } if project_id is not None: headers["x-bt-project-id"] = project_id if org_name is not None: headers["x-bt-org-name"] = org_name request_json = bt_dumps(request) - resp = proxy_conn().post("function/invoke", data=request_json, headers=headers, stream=stream) + resp = proxy_conn().post("function/invoke", data=request_json.encode("utf-8"), headers=headers, stream=stream) if resp.status_code == 500: raise BraintrustInvokeError(resp.text) diff --git a/py/src/braintrust/functions/test_invoke.py b/py/src/braintrust/functions/test_invoke.py index 264217e2..650f5c81 100644 --- a/py/src/braintrust/functions/test_invoke.py +++ b/py/src/braintrust/functions/test_invoke.py @@ -5,7 +5,7 @@ import pytest from braintrust.functions.invoke import init_function, invoke -from braintrust.logger import _internal_get_global_state, _internal_reset_global_state +from braintrust.logger import TEST_API_KEY, _internal_get_global_state, _internal_reset_global_state class TestInitFunction: @@ -83,7 +83,9 @@ def _invoke_with_messages(messages): kwargs = mock_conn.post.call_args.kwargs assert "data" in kwargs, "invoke must use data= (bt_dumps) not json= (json.dumps) (see issue 38)" assert "json" not in kwargs - return json.loads(kwargs["data"]) + data = kwargs["data"] + assert isinstance(data, bytes), "body must be bytes so requests does not re-encode as Latin-1" + return json.loads(data.decode("utf-8")) def test_invoke_serializes_openai_messages(): @@ -114,3 +116,33 @@ def test_invoke_serializes_google_messages(): msg = google_types.Content(role="model", parts=[google_types.Part(text="The answer is X.")]) parsed = _invoke_with_messages([msg]) assert isinstance(parsed, dict) and parsed + + +@pytest.mark.vcr +def test_invoke_encodes_body_as_utf8_bytes(monkeypatch): + """Regression test for BT-4620: non-Latin-1 Unicode must not be corrupted. + + When invoke() serializes the request body via bt_dumps() and passes it to + requests.post(data=...), the body must be UTF-8 encoded bytes — not a str. + Passing a str causes requests to re-encode with Latin-1, which raises + UnicodeEncodeError (or silently corrupts data) for characters outside U+007F. + + Uses TEST_API_KEY to skip the HTTP login entirely, so the cassette only needs + to capture the single POST to /function/invoke. BRAINTRUST_PROXY_URL is + cleared so the proxy URL is always the predictable test stub value + (https://proxy.braintrust.ai) regardless of the local environment. + """ + # Prevent local env overrides from changing the proxy URL used in the cassette. + monkeypatch.delenv("BRAINTRUST_PROXY_URL", raising=False) + monkeypatch.delenv("BRAINTRUST_API_URL", raising=False) + _internal_reset_global_state() + + em_dash = "\u2014" # — (U+2014) is outside Latin-1; triggers the bug when body is str + result = invoke( + project_name="test-project", + slug="test-fn", + input={"text": f"result {em_dash} excellent"}, + parent="", # skip span-parent lookup; no extra HTTP call needed + api_key=TEST_API_KEY, + ) + assert result["output"] == f"result {em_dash} excellent"