From 7484fb7954afe905b8c7c9e0ca6fbb11e6da00fa Mon Sep 17 00:00:00 2001 From: Harrison Weinstock Date: Fri, 8 May 2026 12:31:45 +0000 Subject: [PATCH] fix: remove double-base64 encoding in upload_file/download_file (#458) --- .github/workflows/integration-testing.yml | 5 ++ .../tools/code_interpreter_client.py | 11 ++- .../tools/test_code_interpreter_client.py | 25 +++---- .../tools/test_code_interpreter_client.py | 72 +++++++++++++++++++ 4 files changed, 92 insertions(+), 21 deletions(-) create mode 100644 tests_integ/tools/test_code_interpreter_client.py diff --git a/.github/workflows/integration-testing.yml b/.github/workflows/integration-testing.yml index 37c0e7f4..136e3dfe 100644 --- a/.github/workflows/integration-testing.yml +++ b/.github/workflows/integration-testing.yml @@ -158,6 +158,11 @@ jobs: timeout: 15 extra-deps: "strands-agents strands-agents-tools" ignore: "" + - group: tools + path: tests_integ/tools + timeout: 15 + extra-deps: "" + ignore: "" steps: - name: Configure Credentials uses: aws-actions/configure-aws-credentials@v6 diff --git a/src/bedrock_agentcore/tools/code_interpreter_client.py b/src/bedrock_agentcore/tools/code_interpreter_client.py index 359e5d0b..147f8d08 100644 --- a/src/bedrock_agentcore/tools/code_interpreter_client.py +++ b/src/bedrock_agentcore/tools/code_interpreter_client.py @@ -4,7 +4,6 @@ applications to start, stop, and invoke code execution in a managed sandbox environment. """ -import base64 import logging import re import uuid @@ -482,7 +481,7 @@ def upload_file( 'scripts/analysis.py'). Must be relative to the working directory. Absolute paths starting with '/' are not allowed. content: File content as string (text files) or bytes (binary files). - Binary content will be base64 encoded automatically. + Binary content will be encoded automatically by botocore. description: Optional semantic description of the file contents. This is stored as metadata and can help LLMs understand the data structure (e.g., "CSV with columns: date, revenue, product_id"). @@ -514,7 +513,7 @@ def upload_file( # Handle binary content if isinstance(content, bytes): - file_content = {"path": path, "blob": base64.b64encode(content).decode("utf-8")} + file_content = {"path": path, "blob": content} else: file_content = {"path": path, "text": content} @@ -564,7 +563,7 @@ def upload_files( raise ValueError(f"Path must be relative, not absolute. Got: {path}") if isinstance(content, bytes): - file_contents.append({"path": path, "blob": base64.b64encode(content).decode("utf-8")}) + file_contents.append({"path": path, "blob": content}) else: file_contents.append({"path": path, "text": content}) @@ -649,7 +648,7 @@ def download_file( if "text" in resource: return resource["text"] elif "blob" in resource: - raw = base64.b64decode(resource["blob"]) + raw = resource["blob"] try: return raw.decode("utf-8") except (UnicodeDecodeError, ValueError): @@ -690,7 +689,7 @@ def download_files( if "text" in resource: files[file_path] = resource["text"] elif "blob" in resource: - raw = base64.b64decode(resource["blob"]) + raw = resource["blob"] try: files[file_path] = raw.decode("utf-8") except (UnicodeDecodeError, ValueError): diff --git a/tests/bedrock_agentcore/tools/test_code_interpreter_client.py b/tests/bedrock_agentcore/tools/test_code_interpreter_client.py index 12796609..8c968481 100644 --- a/tests/bedrock_agentcore/tools/test_code_interpreter_client.py +++ b/tests/bedrock_agentcore/tools/test_code_interpreter_client.py @@ -1,4 +1,3 @@ -import base64 import datetime from unittest.mock import ANY, MagicMock, patch @@ -657,7 +656,6 @@ def test_upload_file_binary_content(self, mock_boto3): client.data_plane_client.invoke_code_interpreter.return_value = mock_response binary_content = b"\x89PNG\r\n\x1a\n" # PNG header bytes - expected_b64 = base64.b64encode(binary_content).decode("utf-8") # Act result = client.upload_file(path="image.png", content=binary_content) @@ -667,7 +665,7 @@ def test_upload_file_binary_content(self, mock_boto3): codeInterpreterIdentifier="test.identifier", sessionId="test-session-id", name="writeFiles", - arguments={"content": [{"path": "image.png", "blob": expected_b64}]}, + arguments={"content": [{"path": "image.png", "blob": binary_content}]}, ) assert result == mock_response @@ -758,7 +756,6 @@ def test_upload_files_mixed_content(self, mock_boto3): client.data_plane_client.invoke_code_interpreter.return_value = mock_response binary_content = b"\x00\x01\x02\x03" - expected_b64 = base64.b64encode(binary_content).decode("utf-8") files = [ {"path": "text.txt", "content": "hello world"}, @@ -776,7 +773,7 @@ def test_upload_files_mixed_content(self, mock_boto3): arguments={ "content": [ {"path": "text.txt", "text": "hello world"}, - {"path": "binary.bin", "blob": expected_b64}, + {"path": "binary.bin", "blob": binary_content}, ] }, ) @@ -963,14 +960,13 @@ def test_download_file_binary(self, mock_boto3): client.session_id = "test-session-id" binary_content = b"\x89PNG\r\n\x1a\n" # PNG header bytes - encoded_content = base64.b64encode(binary_content).decode("utf-8") mock_response = { "stream": [ { "result": { "content": [ - {"type": "resource", "resource": {"uri": "file://image.png", "blob": encoded_content}} + {"type": "resource", "resource": {"uri": "file://image.png", "blob": binary_content}} ] } } @@ -997,14 +993,16 @@ def test_download_file_blob_utf8_returns_str(self, mock_boto3): client.session_id = "test-session-id" text_content = "hello world" - encoded_content = base64.b64encode(text_content.encode("utf-8")).decode("utf-8") mock_response = { "stream": [ { "result": { "content": [ - {"type": "resource", "resource": {"uri": "file://data.bin", "blob": encoded_content}} + { + "type": "resource", + "resource": {"uri": "file://data.bin", "blob": text_content.encode("utf-8")}, + } ] } } @@ -1102,7 +1100,6 @@ def test_download_files_binary(self, mock_boto3): client.session_id = "test-session-id" binary_content = b"\x89PNG\r\n\x1a\n" # PNG header bytes - encoded_binary = base64.b64encode(binary_content).decode("utf-8") mock_response = { "stream": [ @@ -1120,7 +1117,7 @@ def test_download_files_binary(self, mock_boto3): "type": "resource", "resource": { "uri": "file:///opt/amazon/genesis1p-tools/var/chart.png", - "blob": encoded_binary, + "blob": binary_content, }, }, ] @@ -1150,9 +1147,7 @@ def test_download_files_blob_utf8_returns_str(self, mock_boto3): client.session_id = "test-session-id" text_content = "some utf-8 blob content" - encoded_text = base64.b64encode(text_content.encode("utf-8")).decode("utf-8") binary_content = b"\x89PNG\r\n\x1a\n" - encoded_binary = base64.b64encode(binary_content).decode("utf-8") mock_response = { "stream": [ @@ -1163,14 +1158,14 @@ def test_download_files_blob_utf8_returns_str(self, mock_boto3): "type": "resource", "resource": { "uri": "file:///opt/amazon/genesis1p-tools/var/data.bin", - "blob": encoded_text, + "blob": text_content.encode("utf-8"), }, }, { "type": "resource", "resource": { "uri": "file:///opt/amazon/genesis1p-tools/var/chart.png", - "blob": encoded_binary, + "blob": binary_content, }, }, ] diff --git a/tests_integ/tools/test_code_interpreter_client.py b/tests_integ/tools/test_code_interpreter_client.py new file mode 100644 index 00000000..3a7c8c7b --- /dev/null +++ b/tests_integ/tools/test_code_interpreter_client.py @@ -0,0 +1,72 @@ +"""Integration tests for CodeInterpreter client. + +Run with: + uv run pytest tests_integ/tools/test_code_interpreter_client.py -xvs +""" + +import os + +import pytest + +from bedrock_agentcore.tools.code_interpreter_client import CodeInterpreter + +# 67 bytes of binary data; base64 encoding would produce 92 bytes +PAYLOAD = b"\x89PNG\r\n\x1a\n" + bytes(range(59)) +EXPECTED_SIZE = len(PAYLOAD) # 67 + + +def _extract_stdout(stream): + """Extract stdout content from an execute_code stream response. + + Returns the stdout string or raises AssertionError if not found. + """ + for event in stream: + r = event.get("result", {}) + stdout = r.get("structuredContent", {}).get("stdout", "") + if stdout: + return stdout + content = r.get("content", "") + if content: + return str(content) + raise AssertionError("stdout not found in stream response") + + +@pytest.mark.integration +class TestCodeInterpreterClient: + """Integration tests for CodeInterpreter client.""" + + @classmethod + def setup_class(cls): + cls.region = os.environ.get("BEDROCK_TEST_REGION", "us-east-1") + cls.client = CodeInterpreter(cls.region) + cls.client.start() + cls.client.upload_file(path="test.bin", content=PAYLOAD) + + @classmethod + def teardown_class(cls): + cls.client.stop() + + def test_upload_file_writes_correct_size(self): + """upload_file with binary bytes writes the correct size to disk (not double-base64 encoded).""" + result = self.client.execute_code("import os\nprint(os.path.getsize('test.bin'))") + + stdout = _extract_stdout(result["stream"]) + disk_size = None + for line in stdout.splitlines(): + if line.strip().isdigit(): + disk_size = int(line.strip()) + + assert disk_size is not None, "Could not parse disk size from stdout" + assert disk_size == EXPECTED_SIZE, ( + f"Expected {EXPECTED_SIZE} bytes on disk, got {disk_size} (92 would indicate double-base64 encoding)" + ) + + def test_download_file_returns_original_bytes(self): + """download_file returns the exact original bytes that were uploaded.""" + downloaded = self.client.download_file("test.bin") + + assert isinstance(downloaded, bytes), f"Expected bytes, got {type(downloaded).__name__}" + assert downloaded == PAYLOAD, ( + f"Downloaded content does not match original payload. " + f"Got {len(downloaded)} bytes, expected {EXPECTED_SIZE} bytes." + )