Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion rfdetr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,11 @@ class TrainConfig(BaseModel):
num_select: int = 300
dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow"
square_resize_div_64: bool = True
dataset_dir: str
dataset_dir: str = "/data/datasets/detr_train_dixy"
output_dir: str = "output"
ann_file: str = "/data/datasets/detr_train_dixy/train/_annotations.coco.json"
conf_folder: str = "/detr_train/configs"
datasets_conf: str = "config_dixy.yaml"
multi_scale: bool = True
expanded_scales: bool = True
use_ema: bool = True
Expand Down
8 changes: 6 additions & 2 deletions rfdetr/detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def train(self, **kwargs):
self.train_from_config(config, **kwargs)

def export(self, **kwargs):
self.model.export(**kwargs)
config = self.get_train_config(**kwargs)
self.model.export(config, **kwargs)

def train_from_config(self, config: TrainConfig, **kwargs):
with open(
Expand Down Expand Up @@ -82,13 +83,16 @@ def train_from_config(self, config: TrainConfig, **kwargs):

if config.wandb:
metrics_wandb_sink = MetricsWandBSink(
config_train = self.get_train_config(**kwargs),
output_dir=config.output_dir,
datasets_conf=config.datasets_conf,
#data_dir=config.data_dir,
project=config.project,
run=config.run,
config=config.model_dump()
)
self.callbacks["on_fit_epoch_end"].append(metrics_wandb_sink.update)
self.callbacks["on_train_end"].append(metrics_wandb_sink.close)
#self.callbacks["on_train_end"].append(metrics_wandb_sink.close)

if config.early_stopping:
from rfdetr.util.early_stopping import EarlyStoppingCallback
Expand Down
4 changes: 2 additions & 2 deletions rfdetr/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@

def get_autocast_args(args):
if DEPRECATED_AMP:
return {'enabled': args.amp, 'dtype': torch.bfloat16}
return {'enabled': args.amp, 'dtype': torch.float16}
else:
return {'device_type': 'cuda', 'enabled': args.amp, 'dtype': torch.bfloat16}
return {'device_type': 'cuda', 'enabled': args.amp, 'dtype': torch.float16}


def train_one_epoch(
Expand Down
40 changes: 37 additions & 3 deletions rfdetr/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
import shutil
from rfdetr.util.files import download_file
import os
from rfdetr.config import TrainConfig

if str(os.environ.get("USE_FILE_SYSTEM_SHARING", "False")).lower() in ["true", "1"]:
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')
Expand Down Expand Up @@ -440,11 +442,11 @@ def lr_lambda(current_step: int):
for callback in callbacks["on_train_end"]:
callback()

def export(self, output_dir="output", infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, **kwargs):
def export(self, config: TrainConfig, output_dir="output", infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, **kwargs):
"""Export the trained model to ONNX format"""
print(f"Exporting model to ONNX format")
from rfdetr.deploy.export import export_onnx, onnx_simplify, make_infer_image

import onnx

device = self.device
model = deepcopy(self.model.to("cpu"))
Expand Down Expand Up @@ -501,7 +503,39 @@ def export(self, output_dir="output", infer_dir=None, simplify=False, backbone_

print("ONNX export completed successfully")
self.model = self.model.to(device)

import wandb
with open(config.ann_file, 'r') as file:
data_json = json.load(file)

labels = [i["name"] for i in data_json["categories"]]
model = onnx.load("/detr_train/output/inference_model.onnx")
metadata = model.metadata_props.add()
metadata.key = "labels"
metadata.value = json.dumps(labels)
onnx.save(model, "/detr_train/output/model_metadata.onnx")
if os.path.exists("/detr_train/output/model_metadata.onnx"):
#with wandb.init(id="test", resume="allow", project="detr_kudo") as run:
art_model = wandb.Artifact("model_metadata.onnx", type='model')
art_model.add_file(local_path="/detr_train/output/model_metadata.onnx", name="model")
wandb.log_artifact(art_model)
artifact = wandb.Artifact(name="checkpoint.pth", type="checkpoint")
artifact.add_file(local_path="/detr_train/output/checkpoint.pth", name="checkpoint")
wandb.log_artifact(artifact)

#IMPORT TEST IMAGES TO WANDB
from rfdetr.util.detr_inf import process_images_from_folder
processed_20 = process_images_from_folder(
labelmap=labels,
src_folder = config.dataset_dir,
model_path = "/detr_train/output/model_metadata.onnx")
#print(len(processed_20))
examples = []
for i in range(len(processed_20)):
image = wandb.Image(processed_20[i], caption=f"test_images")
examples.append(image)
wandb.log({"test_images": examples})
wandb.finish()

if __name__ == '__main__':
parser = argparse.ArgumentParser('LWDETR training and evaluation script', parents=[get_args_parser()])
Expand Down Expand Up @@ -994,4 +1028,4 @@ def populate_args(
gradient_checkpointing=gradient_checkpointing,
**extra_kwargs
)
return args
return args
169 changes: 169 additions & 0 deletions rfdetr/util/detr_inf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import onnxruntime as ort
import numpy as np
import supervision as sv
import cv2
from pathlib import Path
from typing import Optional
import os

def box_cxcywh_to_xyxy(boxes):
"""Convert center coordinates (cx, cy, w, h) to (x1, y1, x2, y2)"""
cx, cy, w, h = np.split(boxes, 4, axis=-1)
x1 = cx - 0.5 * w
y1 = cy - 0.5 * h
x2 = cx + 0.5 * w
y2 = cy + 0.5 * h
return np.concatenate([x1, y1, x2, y2], axis=-1)


class RfDetr:

def __init__(self, model_path: str, confidence_threshold: float = 0.45, resolution: int = 336):
self._session = ort.InferenceSession(model_path)
self._resolution = resolution
self._input_name = self._session.get_inputs()[0].name
self._means = np.array([0.485, 0.456, 0.406], dtype=np.float32)
self._stds = np.array([0.229, 0.224, 0.225], dtype=np.float32)
self._confidence_threshold = confidence_threshold

def __call__(self, *inputs, return_info=False, return_bboxes=False):
img = inputs[0]
orig_h, orig_w = img.shape[:2]
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
input_data = self._preprocess(rgb)
outputs = self._session.run(None, {self._input_name: input_data})
res = self._postprocess(outputs, np.array([[orig_h, orig_w]])) # Исправлено здесь
return res

def _preprocess(self, img_array: np.array):
img_array = cv2.resize(img_array, (self._resolution, self._resolution))
img_array = np.array(img_array, dtype=np.float32) / 255.0
normalized = (img_array - self._means) / self._stds
return normalized.transpose(2, 0, 1)[np.newaxis, ...]

def _postprocess(self, outputs, target_sizes, num_select=300):
# Проверяем порядок выходов модели
out_bbox, out_logits = outputs # Убедитесь, что порядок правильный

batch_size = out_logits.shape[0]
num_classes = out_logits.shape[2]

prob = 1 / (1 + np.exp(-out_logits))

scores = np.zeros((batch_size, num_select))
labels = np.zeros((batch_size, num_select), dtype=int)
topk_boxes = np.zeros((batch_size, num_select), dtype=int)

for i in range(batch_size):
flat_probs = prob[i].ravel()
topk_indices = np.argpartition(flat_probs, -num_select)[-num_select:]
topk_values = flat_probs[topk_indices]

sorted_indices = np.argsort(-topk_values)
topk_values = topk_values[sorted_indices]
topk_indices = topk_indices[sorted_indices]

scores[i] = topk_values
labels[i] = topk_indices % num_classes
topk_boxes[i] = topk_indices // num_classes

boxes = box_cxcywh_to_xyxy(out_bbox)

selected_boxes = np.take_along_axis(boxes, topk_boxes[..., np.newaxis].repeat(4, axis=-1), axis=1)

img_h = target_sizes[:, 0]
img_w = target_sizes[:, 1]
scale_fct = np.stack([img_w, img_h, img_w, img_h], axis=1)
scaled_boxes = selected_boxes * scale_fct[:, np.newaxis, :]

mask = scores[i] >= self._confidence_threshold

return sv.Detections(
xyxy=scaled_boxes[i][mask],
class_id=labels[i][mask],
confidence=scores[i][mask]
)


# -------

def process_frame(
frame: np.ndarray,
model: callable,
labelmap: tuple,
) -> np.ndarray:
"""Обрабатывает и аннотирует один кадр"""
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator(smart_position=True)

#rotated_frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
resized_frame = cv2.resize(frame, FRAME_SIZE)
detections = model(resized_frame)

# Аннотация кадра
annotated_frame = box_annotator.annotate(
resized_frame.copy(), detections=detections
)
#annotated_frame = box_annotator.annotate(detections=detections)
#print(labelmap)
result_frame = label_annotator.annotate(
annotated_frame, detections=detections, labels=[labelmap[class_id - 1] for class_id in detections.class_id]
)
#result_frame = label_annotator.annotate(labels=[labelmap[class_id] for class_id in detections.class_id])
return result_frame

#DEFAULT_LABELMAP = ("barcode", "pallet")
FRAME_SIZE = (432, 768)

def process_images_from_folder(
labelmap,
src_folder: Path,
#target_folder: Path,
model_path: Optional[Path] = None,
image_exts: tuple = ('.jpg')
):
"""
Обрабатывает все изображения из папки src_folder и сохраняет результаты в target_folder.

Параметры:
src_folder: Путь к папке с исходными изображениями
target_folder: Путь для сохранения обработанных изображений
model_path: Путь к модели детекции
labelmap: Карта меток для аннотации
image_exts: Кортеж расширений изображений для обработки
"""

#DEFAULT_LABELMAP = ("barcode", "pallet")
FRAME_SIZE = (432, 768)
#target_folder = Path(target_folder)
#target_folder.mkdir(exist_ok=True, parents=True)

train = Path("train")
src_folder = os.path.join(src_folder, train)
src_folder = Path(src_folder)
# Инициализация модели
model = RfDetr(str(model_path), resolution=336)

image_files = [f for f in src_folder.iterdir() if f.suffix.lower() in image_exts]
#total = len(image_files)
processed_20 = []
for idx, image_path in enumerate(image_files[:20], 1):
img = cv2.imread(str(image_path))
if img is None:
print(f"Не удалось загрузить изображение: {image_path}")
continue

processed = process_frame(img, model, labelmap)
processed_20.append(processed)
return processed_20
#out_path = target_folder / image_path.name
#cv2.imwrite(str(out_path), processed)
#print(f"[{idx}/{total}] {image_path.name} -> {out_path.name}", end='\r')

if __name__ == "main":
process_images_from_folder(
labelmap = tuple,
src_folder=Path("input_images"),
#target_folder=Path("output_images"),
model_path=Path("inference_model.onnx"),
)
Loading