-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathGDRealTime.py
More file actions
76 lines (60 loc) · 2.14 KB
/
GDRealTime.py
File metadata and controls
76 lines (60 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import cv2
import os
import torch
import numpy as np
from groundingdino.util.inference import predict, annotate
from PIL import Image
from groundingdino.util.inference import load_model
import groundingdino.datasets.transforms as T
def transform_image(rgb_image):
transform = T.Compose(
[
T.ToTensor(),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
]
)
image_transformed, _ = transform(rgb_image, None)
return image_transformed
projectdir = os.getcwd()
# Define paths
groundingdino_dir = os.path.join(projectdir, "GroundingDINO")
model_config_path = os.path.join(groundingdino_dir, "groundingdino/config/GroundingDINO_SwinT_OGC.py")
weights_path = os.path.join(projectdir, "weights/groundingdino_swint_ogc.pth")
# Load model
model = load_model(model_config_path, weights_path)
# Define constants and paths
TEXT_PROMPT = "cap"
BOX_THRESHOLD = 0.30
TEXT_THRESHOLD = 0.20
# Initialize webcam capture
cap = cv2.VideoCapture(0)
while cap.isOpened():
success, frame = cap.read()
if not success:
print("Error: Failed to capture image from webcam")
break
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert BGR to RGB
image = cv2.resize(image, (800, 800)) # Resize to match model input size
image = image.astype(np.float32) / 255.0 # Normalize pixel values
image_tensor = transform_image(image)
print(image_tensor.shape)
# Perform object detection
with torch.no_grad():
# Perform object detection using Grounding Dino
boxes, logits, phrases = predict(
model=model,
image=image_tensor,
caption=TEXT_PROMPT,
box_threshold=BOX_THRESHOLD,
text_threshold=TEXT_THRESHOLD,
)
# Annotate the image
annotated_frame = annotate(image_source=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), boxes=boxes, logits=logits, phrases=phrases)
# Display the annotated image
cv2.imshow("Result", annotated_frame)
# Check for key press to exit
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the webcam and close OpenCV windows
cap.release()
cv2.destroyAllWindows()