-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimage_aiagent.py
More file actions
160 lines (134 loc) · 6 KB
/
image_aiagent.py
File metadata and controls
160 lines (134 loc) · 6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from openai import OpenAI
import google.generativeai as genai
from PIL import Image
import logging
import time
import socket
from contextlib import closing
filename_perp="/home/nav/Projects/Perp_API_key.txt"
filename_gemini="/home/nav/Projects/Gem_API_key.txt"
# Configure logging before any other initialization
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def load_api_key(filename: str) -> str:
"""Load API key from a file"""
try:
with open(filename, 'r') as f:
return f.read().strip()
except FileNotFoundError:
logger.error(f"API key file {filename} not found")
raise
except Exception as e:
logger.error(f"Error reading API key: {str(e)}")
raise
# Load API key from file
PERP_API_KEY = load_api_key(filename_perp)
GEM_API_KEY = load_api_key(filename_gemini)
def check_network():
"""Check if we can connect to Google's servers"""
try:
with closing(socket.create_connection(("generativelanguage.googleapis.com", 443), timeout=5)):
return True
except:
return False
def init_genai():
"""Initialize Gemini API with proper error handling"""
try:
if not check_network():
raise ConnectionError("No network connectivity to Google API servers")
genai.configure(api_key=GEM_API_KEY)
# Test connection with a simple request
genai.GenerativeModel('gemini-1.5-flash')
logger.info("Gemini API initialized successfully")
return True
except Exception as e:
logger.error(f"Failed to initialize Gemini API: {str(e)}")
return False
def web_search(prompt) -> str:
"""Search the web using Perplexity's sonar-pro model"""
try:
client = OpenAI(api_key=PERP_API_KEY, base_url="https://api.perplexity.ai")
response = client.chat.completions.create(
model="sonar-pro",
messages=[{"role": "user", "content": prompt}],
max_tokens=500
)
if response and response.choices:
return response.choices[0].message.content
return "No search results found"
except Exception as e:
logger.error(f"Web search failed: {str(e)}")
return f"Error performing web search: {str(e)}"
def describe_image(image_path: str, max_retries: int = 3) -> str:
"""Generate image description using Gemini 1.5 Flash."""
if not init_genai():
return "Failed to initialize API connection"
for attempt in range(max_retries):
try:
logger.info(f"Attempt {attempt + 1}/{max_retries} to process image")
# Load and prepare image
image = Image.open(image_path)
# Initialize model with safety settings
model = genai.GenerativeModel(
model_name='gemini-1.5-flash',
safety_settings=[
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]
)
# Initial analysis
response = model.generate_content(
contents=[image, "Analyse the object in the image. Ignore the background and focus on the object. Display the brand of the object along with its model name in the following format: 'Brand Model'"],
generation_config={
"temperature": 0.7,
"max_output_tokens": 250,
"top_p": 0.8,
"top_k": 40
}
)
if not response:
raise ValueError("No response received from model")
response.resolve()
initial_description = response.text if response.text else "No description generated"
print("\nItem in the photo:")
print(initial_description)
# Get user confirmation of object identification
print("\nIs this identification correct? (yes/no)")
user_confirmation = input().lower()
if user_confirmation == 'yes':
current_description = initial_description
else:
print("Please provide the correct item description:")
current_description = input().strip()
prompt = f"""
Based on this item description: {current_description}
1. Do an internet search and find its current price.
2. Summarize user reviews and ratings
3. Then continue to be a friendly AI agent that accesses the web as required.
"""
# Search the web for more details
perplexity_result = web_search(prompt)
print("\nItem details:")
print(perplexity_result)
while True:
# Get user feedback
user_prompt = input("\nEnter feedback (or type 'exit' to finish): ")
if user_prompt.lower() == 'exit':
return perplexity_result
if user_prompt.strip():
# Search web again with updated information
updated_result = web_search(f"{current_description} {user_prompt}")
print("\nUpdated Item Details:")
print(updated_result)
perplexity_result = updated_result
except Exception as e:
wait_time = 2 ** attempt # Exponential backoff
logger.error(f"Attempt {attempt + 1}/{max_retries} failed: {str(e)}")
if attempt < max_retries - 1:
logger.info(f"Waiting {wait_time} seconds before retry...")
time.sleep(wait_time)
else:
return f"Error processing image after {max_retries} attempts: {str(e)}"