-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_image_support.py
More file actions
226 lines (169 loc) · 6.9 KB
/
test_image_support.py
File metadata and controls
226 lines (169 loc) · 6.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#!/usr/bin/env python3
"""
Test script for image support in Python Agent
This script tests the new multimodal capabilities by:
1. Creating a test image
2. Testing image encoding/decoding
3. Testing message creation with images
4. Testing API provider with multimodal content
"""
import asyncio
import os
import sys
from pathlib import Path
# Add the python_agent to the path
sys.path.insert(0, str(Path(__file__).parent / "python_agent"))
from python_agent.utils.image_utils import (
encode_image_to_base64,
create_anthropic_image_block,
is_image_file,
validate_image_size
)
from python_agent.core.message import Message, MessageType, MessageState
from python_agent.core.config import AgentConfig, ApiConfiguration, ApiProviderType
def test_image_utils():
"""Test image utility functions"""
print("=== Testing Image Utilities ===")
# Test with the existing screenshot
test_image_path = "temp_images/mcp_image_20250705_185030_891449.jpeg"
if not Path(test_image_path).exists():
print(f"❌ Test image not found: {test_image_path}")
return False
# Test image file detection
print(f"Is image file: {is_image_file(test_image_path)}")
# Test image size validation
print(f"Valid image size: {validate_image_size(test_image_path)}")
# Test base64 encoding
base64_data = encode_image_to_base64(test_image_path)
if base64_data:
print(f"✅ Base64 encoding successful (length: {len(base64_data)})")
else:
print("❌ Base64 encoding failed")
return False
# Test Anthropic image block creation
image_block = create_anthropic_image_block(test_image_path)
if image_block:
print("✅ Anthropic image block created successfully")
print(f" Block type: {image_block['type']}")
print(f" Media type: {image_block['source']['media_type']}")
print(f" Data length: {len(image_block['source']['data'])}")
else:
print("❌ Anthropic image block creation failed")
return False
return True
def test_message_with_images():
"""Test message creation with images"""
print("\n=== Testing Message with Images ===")
test_image_path = "temp_images/mcp_image_20250705_185030_891449.jpeg"
if not Path(test_image_path).exists():
print(f"❌ Test image not found: {test_image_path}")
return False
# Create a message with images
message = Message(
type=MessageType.USER,
content="Please analyze this screenshot",
images=[test_image_path]
)
print(f"Message has images: {message.has_images()}")
print(f"Number of images: {len(message.images)}")
# Test multimodal content creation
multimodal_content = message.create_multimodal_content()
print(f"Multimodal content blocks: {len(multimodal_content)}")
for i, block in enumerate(multimodal_content):
print(f" Block {i}: {block['type']}")
if block['type'] == 'image':
print(f" Media type: {block['source']['media_type']}")
return True
def test_message_state_with_images():
"""Test MessageState with image support"""
print("\n=== Testing MessageState with Images ===")
test_image_path = "temp_images/mcp_image_20250705_185030_891449.jpeg"
if not Path(test_image_path).exists():
print(f"❌ Test image not found: {test_image_path}")
return False
message_state = MessageState()
# Start a turn with images
turn = message_state.start_new_turn(
"Analyze this screenshot and tell me what you see",
images=[test_image_path]
)
print(f"Turn created with {len(turn.user_message.images)} images")
# Get conversation history for API
history = message_state.get_conversation_history()
print(f"History entries: {len(history)}")
if history:
user_message = history[0]
print(f"User message role: {user_message['role']}")
print(f"Content type: {type(user_message['content'])}")
if isinstance(user_message['content'], list):
print(f"Content blocks: {len(user_message['content'])}")
for i, block in enumerate(user_message['content']):
print(f" Block {i}: {block['type']}")
return True
async def test_agent_with_image():
"""Test Agent with image message (if API key available)"""
print("\n=== Testing Agent with Image ===")
# Check if we have an API key
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
print("⚠️ No ANTHROPIC_API_KEY found, skipping API test")
return True
from python_agent.agent import Agent
# Create agent configuration
config = AgentConfig(
api_config=ApiConfiguration(
provider=ApiProviderType.ANTHROPIC,
api_key=api_key,
model_id="claude-3-haiku-20240307" # Use a smaller model for testing
)
)
agent = Agent(config)
try:
await agent.start()
print("✅ Agent started successfully")
# Test with image - we'll simulate what would happen with a screenshot
test_image_path = "temp_images/mcp_image_20250705_185030_891449.jpeg"
if Path(test_image_path).exists():
# Add image to the current turn manually for testing
turn = agent.message_state.start_new_turn(
"What do you see in this screenshot?",
images=[test_image_path]
)
print(f"Created turn with {len(turn.user_message.images)} images")
# Test conversation history generation
history = agent.message_state.get_conversation_history()
if history and isinstance(history[0]['content'], list):
print("✅ Multimodal content correctly formatted for API")
else:
print("❌ Multimodal content not formatted correctly")
except Exception as e:
print(f"❌ Agent test failed: {e}")
return False
finally:
await agent.stop()
print("Agent stopped")
return True
async def main():
"""Run all tests"""
print("🧪 Testing Image Support in Python Agent\n")
success = True
# Test image utilities
if not test_image_utils():
success = False
# Test message with images
if not test_message_with_images():
success = False
# Test message state with images
if not test_message_state_with_images():
success = False
# Test agent with image (if API key available)
if not await test_agent_with_image():
success = False
print(f"\n{'='*50}")
if success:
print("🎉 All tests passed! Image support is working.")
else:
print("❌ Some tests failed. Check the output above.")
return success
if __name__ == "__main__":
asyncio.run(main())