-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdescription_via_local.py
More file actions
92 lines (78 loc) · 3.1 KB
/
description_via_local.py
File metadata and controls
92 lines (78 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import requests
import base64
import csv
from dotenv import load_dotenv
load_dotenv() ;
# === Configuration ===
#API_URL = "http://localhost:1234/v1/chat/completions"
API_URL = os.environ.get("API_URL_LOCAL")
API_KEY = os.environ.get("OPENAI_API_KEY")
# use the loaded model one can specifiy it
MODEL_NAME = ""
IMAGE_FOLDER = "./images"
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
TEXT_LOG = "captions_log.txt"
CSV_FILE = "captions_output.csv"
# === Encode image as base64 ===
def encode_image(image_path):
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
# === Create payload for OpenAI-compatible API ===
def create_payload(encoded_image):
return {
"model": MODEL_NAME,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "What is in this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encoded_image}"
}
}
]
}
],
"temperature": 0.7
}
# === Main process ===
def process_images():
results = []
with open(TEXT_LOG, "w", encoding="utf-8") as txt_log:
txt_log.write("Image Caption Log\n==================\n\n")
for filename in os.listdir(IMAGE_FOLDER):
ext = os.path.splitext(filename)[1].lower()
if ext in ALLOWED_EXTENSIONS:
image_path = os.path.join(IMAGE_FOLDER, filename)
print(f"\n📷 Processing: {filename}")
try:
encoded = encode_image(image_path)
payload = create_payload(encoded)
response = requests.post(API_URL, headers={"Content-Type": "application/json"}, json=payload)
if response.status_code == 200:
caption = response.json()["choices"][0]["message"]["content"]
print(f"🧠 Caption: {caption}")
txt_log.write(f"{filename}:\n{caption}\n\n")
results.append((filename, caption))
else:
error_msg = f"API error ({response.status_code}): {response.text}"
print(f"❌ {error_msg}")
txt_log.write(f"{filename}:\nERROR: {error_msg}\n\n")
results.append((filename, f"ERROR: {error_msg}"))
except Exception as e:
print(f"❌ Failed: {e}")
txt_log.write(f"{filename}:\nERROR: {e}\n\n")
results.append((filename, f"ERROR: {e}"))
# Save to CSV
with open(CSV_FILE, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Image Filename", "Caption"])
for row in results:
writer.writerow(row)
print(f"\n✅ Captions saved to:\n- {TEXT_LOG}\n- {CSV_FILE}")
# === Run it ===
if __name__ == "__main__":
process_images()