-
Notifications
You must be signed in to change notification settings - Fork 32
Open
Description
I gave this image a unrelated word (apple) but it still get detected.
import torch
from transformers import AutoModelForZeroShotObjectDetection, AutoProcessor
from transformers.image_utils import load_image
# Prepare processor and model
model_id = "iSEE-Laboratory/llmdet_base"
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForZeroShotObjectDetection.from_pretrained(model_id).to(device)
# Prepare inputs
image = load_image("missing_object_results/edited/3.jpg")
text_labels = [["front wheels", "apple"]]
inputs = processor(images=image, text=text_labels, return_tensors="pt").to(device)
# Run inference
with torch.no_grad():
outputs = model(**inputs)
# Postprocess outputs
results = processor.post_process_grounded_object_detection(
outputs,
threshold=0.3,
target_sizes=[(image.height, image.width)]
)
# Retrieve the first image result
result = results[0]
for box, score, labels in zip(result["boxes"], result["scores"], result["labels"]):
box = [round(x, 2) for x in box.tolist()]
print(f"Detected {labels} with confidence {round(score.item(), 3)} at location {box}")
# Visualize results
from PIL import Image, ImageDraw
draw = ImageDraw.Draw(image)
for box, labels in zip(result["boxes"], result["labels"]):
box = [round(x, 2) for x in box.tolist()]
draw.rectangle(box, outline="red", width=2)
draw.text((box[0], box[1]), labels, fill="red")
imageReactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels