Skip to content

Can anyone survey matching algorithm together? #27

Description

@gogog01-29-2021

We should find matching algorithm and implement it and find code and library with most compatibility to make it and gather the code together later.
Below is what I've find.
https://huggingface.co/spaces/Realcat/image-matching-webui
https://github.com/Vincentqyw/image-matching-webui?tab=readme-ov-file

Discussed in #26

Originally posted by gogog01-29-2021 December 2, 2024
import cv2
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
from PIL import Image
import numpy as np

Initialize ORB for fallback

orb = cv2.ORB_create()

Load PyTorch models

dino_model = torch.hub.load('facebookresearch/dino:main', 'dino_vits16')
dino_model.eval()

detr_model = torch.hub.load('facebookresearch/detr:main', 'detr_resnet50', pretrained=True)
detr_model.eval()

Preprocessing for DINO

preprocess_dino = transforms.Compose([
transforms.Resize((224, 224)), # Explicit size to avoid deprecation warnings
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def extract_dino_features(image):
"""
Extract features from an image using DINO.
"""
image = Image.fromarray(image)
if image.size[0] == 0 or image.size[1] == 0:
raise ValueError(f"Image has invalid dimensions: {image.size}")
image_tensor = preprocess_dino(image).unsqueeze(0) # Add batch dimension
with torch.no_grad():
features = dino_model(image_tensor)
return features

def detect_objects_with_detr(image):
"""
Detect objects in the input image using DETR.
"""
image_tensor = F.to_tensor(image).unsqueeze(0) # Add batch dimension
with torch.no_grad():
outputs = detr_model(image_tensor)

probabilities = outputs['pred_logits'].softmax(-1)[0, :, :-1]
boxes = outputs['pred_boxes'][0]
return probabilities, boxes

def match_keypoints(des_template, des_frame):
"""
Match descriptors using BFMatcher with Lowe's ratio test.
"""
bf = cv2.BFMatcher(cv2.NORM_HAMMING)
matches = bf.knnMatch(des_template, des_frame, k=2)
good_matches = [m for m, n in matches if m.distance < 0.75 * n.distance]
return good_matches

def find_homography_and_draw(template, kp_template, frame, kp_frame, good_matches):
"""
Find homography and draw bounding box if enough matches are found.
"""
if len(good_matches) > 20: # Minimum matches threshold
src_pts = np.float32([kp_template[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp_frame[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)

    M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    h, w = template.shape
    pts = np.float32([[0, 0], [0, h], [w, h], [w, 0]]).reshape(-1, 1, 2)
    dst = cv2.perspectiveTransform(pts, M)
    frame = cv2.polylines(frame, [np.int32(dst)], True, (0, 255, 0), 3, cv2.LINE_AA)
    print("Match found using homography!")
return frame

Load template image and extract DINO features

template_path = '../../images/won_1000.jpg'
template_image = cv2.imread(template_path)
if template_image is None:
raise FileNotFoundError(f"Template image not found at {template_path}")
template_features = extract_dino_features(template_image)

Extract ORB keypoints and descriptors for fallback

template_gray = cv2.cvtColor(template_image, cv2.COLOR_BGR2GRAY)
kp_template, des_template = orb.detectAndCompute(template_gray, None)

Access the camera

cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
if not cap.isOpened():
raise RuntimeError("Could not open camera")

try:
while True:
ret, frame = cap.read()
if not ret:
print("Failed to grab frame")
break

    # Convert frame to RGB for PyTorch models
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Step 1: Detect objects with DETR
    probabilities, boxes = detect_objects_with_detr(frame_rgb)
    confidence_threshold = 0.7
    for prob, box in zip(probabilities, boxes):
        if prob.max().item() > confidence_threshold:
            # Extract object region
            box = box.cpu().numpy()
            x_min, y_min, x_max, y_max = box * [frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0]]
            x_min, y_min, x_max, y_max = map(int, [x_min, y_min, x_max, y_max])
            if y_max <= y_min or x_max <= x_min:
                print(f"Invalid bounding box: x_min={x_min}, y_min={y_min}, x_max={x_max}, y_max={y_max}")
                continue
            object_region = frame_rgb[y_min:y_max, x_min:x_max]

            # Step 2: Extract DINO features for the detected region
            if object_region.size == 0:
                print("Skipping empty object region")
                continue

            object_features = extract_dino_features(object_region)

            # Step 3: Match DINO features with template features
            similarity_score = torch.nn.functional.cosine_similarity(template_features, object_features).item()
            if similarity_score > 0.8:
                print("Detected and matched object using DINO!")
                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

    # Step 4: Fallback to ORB if DINO/DETR fails
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    kp_frame, des_frame = orb.detectAndCompute(frame_gray, None)
    if des_frame is not None:
        good_matches = match_keypoints(des_template, des_frame)
        frame = find_homography_and_draw(template_gray, kp_template, frame, kp_frame, good_matches)

    # Display the frame
    cv2.imshow('Integrated Pipeline', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

finally:
cap.release()
cv2.destroyAllWindows()
241202pytorchkeypointmatching

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions