Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions deeptuner/datagenerators/triplet_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,22 @@ def __init__(self, image_paths, labels, batch_size, image_size, num_classes):
self.label_encoder = LabelEncoder()
self.encoded_labels = self.label_encoder.fit_transform(labels)
self.image_data_generator = ImageDataGenerator(preprocessing_function=resnet.preprocess_input)

# Precompute label to paths mapping for O(1) access
self.unique_labels = np.unique(self.encoded_labels)
if len(self.unique_labels) < 2:
raise ValueError("TripletDataGenerator requires at least 2 classes.")

self.label_to_paths = {}
for p, label in zip(self.image_paths, self.encoded_labels):
if label not in self.label_to_paths:
self.label_to_paths[label] = []
self.label_to_paths[label].append(p)

# Convert to numpy arrays for faster sampling
for label in self.label_to_paths:
self.label_to_paths[label] = np.array(self.label_to_paths[label])

self.on_epoch_end()
print(f"Initialized TripletDataGenerator with {len(self.image_paths)} images")

Expand All @@ -40,12 +56,14 @@ def _generate_triplet_batch(self, batch_image_paths, batch_labels):
anchor_path = batch_image_paths[i]
anchor_label = batch_labels[i]

positive_path = np.random.choice(
[p for p, l in zip(self.image_paths, self.encoded_labels) if l == anchor_label]
)
negative_path = np.random.choice(
[p for p, l in zip(self.image_paths, self.encoded_labels) if l != anchor_label]
)
positive_path = np.random.choice(self.label_to_paths[anchor_label])

# Rejection sampling for negative example
while True:
idx = np.random.randint(len(self.image_paths))
if self.encoded_labels[idx] != anchor_label:
negative_path = self.image_paths[idx]
break

anchor_image = load_img(anchor_path, target_size=self.image_size)
positive_image = load_img(positive_path, target_size=self.image_size)
Expand Down