FastSense · Armore27 · Apr 10, 2025 · Apr 14, 2025 · Apr 15, 2025 · Apr 17, 2025
diff --git a/rfdetr/config.py b/rfdetr/config.py
@@ -64,8 +64,11 @@ class TrainConfig(BaseModel):
     num_select: int = 300
     dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow"
     square_resize_div_64: bool = True
-    dataset_dir: str
+    dataset_dir: str = "/data/datasets/detr_train_dixy"
     output_dir: str = "output"
+    ann_file: str = "/data/datasets/detr_train_dixy/train/_annotations.coco.json"
+    conf_folder: str = "/detr_train/configs"
+    datasets_conf: str = "config_dixy.yaml"
     multi_scale: bool = True
     expanded_scales: bool = True
     use_ema: bool = True

diff --git a/rfdetr/detr.py b/rfdetr/detr.py
@@ -36,7 +36,8 @@ def train(self, **kwargs):
         self.train_from_config(config, **kwargs)
 
     def export(self, **kwargs):
-        self.model.export(**kwargs)
+        config = self.get_train_config(**kwargs)
+        self.model.export(config, **kwargs)
 
     def train_from_config(self, config: TrainConfig, **kwargs):
         with open(
@@ -82,13 +83,16 @@ def train_from_config(self, config: TrainConfig, **kwargs):
 
         if config.wandb:
             metrics_wandb_sink = MetricsWandBSink(
+                config_train = self.get_train_config(**kwargs),
                 output_dir=config.output_dir,
+                datasets_conf=config.datasets_conf,
+                #data_dir=config.data_dir, 
                 project=config.project,
                 run=config.run,
                 config=config.model_dump()
             )
             self.callbacks["on_fit_epoch_end"].append(metrics_wandb_sink.update)
-            self.callbacks["on_train_end"].append(metrics_wandb_sink.close)
+            #self.callbacks["on_train_end"].append(metrics_wandb_sink.close)
 
         if config.early_stopping:
             from rfdetr.util.early_stopping import EarlyStoppingCallback

diff --git a/rfdetr/engine.py b/rfdetr/engine.py
@@ -36,9 +36,9 @@
 
 def get_autocast_args(args):
     if DEPRECATED_AMP:
-        return {'enabled': args.amp, 'dtype': torch.bfloat16}
+        return {'enabled': args.amp, 'dtype': torch.float16}
     else:
-        return {'device_type': 'cuda', 'enabled': args.amp, 'dtype': torch.bfloat16}
+        return {'device_type': 'cuda', 'enabled': args.amp, 'dtype': torch.float16}
 
 
 def train_one_epoch(

diff --git a/rfdetr/main.py b/rfdetr/main.py
@@ -45,6 +45,8 @@
 import shutil
 from rfdetr.util.files import download_file
 import os
+from rfdetr.config import TrainConfig
+
 if str(os.environ.get("USE_FILE_SYSTEM_SHARING", "False")).lower() in ["true", "1"]:
     import torch.multiprocessing
     torch.multiprocessing.set_sharing_strategy('file_system')
@@ -440,11 +442,11 @@ def lr_lambda(current_step: int):
         for callback in callbacks["on_train_end"]:
             callback()
 
-    def export(self, output_dir="output", infer_dir=None, simplify=False,  backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, **kwargs):
+    def export(self, config: TrainConfig, output_dir="output", infer_dir=None, simplify=False,  backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, **kwargs):
         """Export the trained model to ONNX format"""
         print(f"Exporting model to ONNX format")
         from rfdetr.deploy.export import export_onnx, onnx_simplify, make_infer_image
-
+        import onnx
 
         device = self.device
         model = deepcopy(self.model.to("cpu"))
@@ -501,7 +503,39 @@ def export(self, output_dir="output", infer_dir=None, simplify=False,  backbone_
 
         print("ONNX export completed successfully")
         self.model = self.model.to(device)
+
+        import wandb
+        with open(config.ann_file, 'r') as file:
+            data_json = json.load(file)
+
+        labels = [i["name"] for i in data_json["categories"]]
+        model = onnx.load("/detr_train/output/inference_model.onnx")
+        metadata = model.metadata_props.add()
+        metadata.key = "labels"
+        metadata.value = json.dumps(labels)
+        onnx.save(model, "/detr_train/output/model_metadata.onnx")
+        if os.path.exists("/detr_train/output/model_metadata.onnx"):
+        #with wandb.init(id="test", resume="allow", project="detr_kudo") as run:       
+            art_model = wandb.Artifact("model_metadata.onnx", type='model')
+            art_model.add_file(local_path="/detr_train/output/model_metadata.onnx", name="model")
+            wandb.log_artifact(art_model)  
+            artifact = wandb.Artifact(name="checkpoint.pth", type="checkpoint")
+            artifact.add_file(local_path="/detr_train/output/checkpoint.pth", name="checkpoint")
+            wandb.log_artifact(artifact) 
 
+        #IMPORT TEST IMAGES TO WANDB
+        from rfdetr.util.detr_inf import process_images_from_folder
+        processed_20 = process_images_from_folder(
+            labelmap=labels, 
+            src_folder = config.dataset_dir, 
+            model_path = "/detr_train/output/model_metadata.onnx")
+        #print(len(processed_20))
+        examples = []
+        for i in range(len(processed_20)):      
+                image = wandb.Image(processed_20[i], caption=f"test_images")
+                examples.append(image)
+        wandb.log({"test_images": examples})
+        wandb.finish()            
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser('LWDETR training and evaluation script', parents=[get_args_parser()])
@@ -994,4 +1028,4 @@ def populate_args(
         gradient_checkpointing=gradient_checkpointing,
         **extra_kwargs
     )
-    return args
+    return args
diff --git a/rfdetr/util/detr_inf.py b/rfdetr/util/detr_inf.py
@@ -0,0 +1,169 @@
+import onnxruntime as ort
+import numpy as np
+import supervision as sv
+import cv2
+from pathlib import Path
+from typing import Optional
+import os
+
+def box_cxcywh_to_xyxy(boxes):
+    """Convert center coordinates (cx, cy, w, h) to (x1, y1, x2, y2)"""
+    cx, cy, w, h = np.split(boxes, 4, axis=-1)
+    x1 = cx - 0.5 * w
+    y1 = cy - 0.5 * h
+    x2 = cx + 0.5 * w
+    y2 = cy + 0.5 * h
+    return np.concatenate([x1, y1, x2, y2], axis=-1)
+
+
+class RfDetr:
+
+    def __init__(self, model_path: str, confidence_threshold: float = 0.45, resolution: int = 336):
+        self._session = ort.InferenceSession(model_path)
+        self._resolution = resolution
+        self._input_name = self._session.get_inputs()[0].name
+        self._means = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+        self._stds = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+        self._confidence_threshold = confidence_threshold
+
+    def __call__(self, *inputs, return_info=False, return_bboxes=False):
+        img = inputs[0]
+        orig_h, orig_w = img.shape[:2]
+        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        input_data = self._preprocess(rgb)
+        outputs = self._session.run(None, {self._input_name: input_data})
+        res = self._postprocess(outputs, np.array([[orig_h, orig_w]]))  # Исправлено здесь
+        return res
+
+    def _preprocess(self, img_array: np.array):
+        img_array = cv2.resize(img_array, (self._resolution, self._resolution))
+        img_array = np.array(img_array, dtype=np.float32) / 255.0
+        normalized = (img_array - self._means) / self._stds
+        return normalized.transpose(2, 0, 1)[np.newaxis, ...]
+
+    def _postprocess(self, outputs, target_sizes, num_select=300):
+        # Проверяем порядок выходов модели
+        out_bbox, out_logits = outputs  # Убедитесь, что порядок правильный
+
+        batch_size = out_logits.shape[0]
+        num_classes = out_logits.shape[2]
+
+        prob = 1 / (1 + np.exp(-out_logits))
+
+        scores = np.zeros((batch_size, num_select))
+        labels = np.zeros((batch_size, num_select), dtype=int)
+        topk_boxes = np.zeros((batch_size, num_select), dtype=int)
+
+        for i in range(batch_size):
+            flat_probs = prob[i].ravel()
+            topk_indices = np.argpartition(flat_probs, -num_select)[-num_select:]
+            topk_values = flat_probs[topk_indices]
+
+            sorted_indices = np.argsort(-topk_values)
+            topk_values = topk_values[sorted_indices]
+            topk_indices = topk_indices[sorted_indices]
+
+            scores[i] = topk_values
+            labels[i] = topk_indices % num_classes
+            topk_boxes[i] = topk_indices // num_classes
+
+        boxes = box_cxcywh_to_xyxy(out_bbox)
+
+        selected_boxes = np.take_along_axis(boxes, topk_boxes[..., np.newaxis].repeat(4, axis=-1), axis=1)
+
+        img_h = target_sizes[:, 0]
+        img_w = target_sizes[:, 1]
+        scale_fct = np.stack([img_w, img_h, img_w, img_h], axis=1)
+        scaled_boxes = selected_boxes * scale_fct[:, np.newaxis, :]
+
+        mask = scores[i] >= self._confidence_threshold
+
+        return sv.Detections(
+            xyxy=scaled_boxes[i][mask],
+            class_id=labels[i][mask],
+            confidence=scores[i][mask]
+        )
+
+
+# -------
+
+def process_frame(
+    frame: np.ndarray,
+    model: callable,
+    labelmap: tuple,
+) -> np.ndarray:
+    """Обрабатывает и аннотирует один кадр"""
+    box_annotator = sv.BoxAnnotator()
+    label_annotator = sv.LabelAnnotator(smart_position=True)
+
+    #rotated_frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
+    resized_frame = cv2.resize(frame, FRAME_SIZE)
+    detections = model(resized_frame)
+
+    # Аннотация кадра
+    annotated_frame = box_annotator.annotate(
+        resized_frame.copy(), detections=detections
+    )
+    #annotated_frame = box_annotator.annotate(detections=detections)
+    #print(labelmap)
+    result_frame = label_annotator.annotate(
+        annotated_frame, detections=detections, labels=[labelmap[class_id - 1] for class_id in detections.class_id]
+    )
+    #result_frame = label_annotator.annotate(labels=[labelmap[class_id] for class_id in detections.class_id])
+    return result_frame
+
+#DEFAULT_LABELMAP = ("barcode", "pallet")
+FRAME_SIZE = (432, 768)
+
+def process_images_from_folder(
+    labelmap, 
+    src_folder: Path,
+    #target_folder: Path,
+    model_path: Optional[Path] = None,
+    image_exts: tuple = ('.jpg')
+):
+    """
+    Обрабатывает все изображения из папки src_folder и сохраняет результаты в target_folder.
+
+    Параметры:
+        src_folder: Путь к папке с исходными изображениями
+        target_folder: Путь для сохранения обработанных изображений
+        model_path: Путь к модели детекции
+        labelmap: Карта меток для аннотации
+        image_exts: Кортеж расширений изображений для обработки
+    """
+
+    #DEFAULT_LABELMAP = ("barcode", "pallet")
+    FRAME_SIZE = (432, 768)
+    #target_folder = Path(target_folder)
+    #target_folder.mkdir(exist_ok=True, parents=True)
+
+    train = Path("train")
+    src_folder = os.path.join(src_folder, train)
+    src_folder = Path(src_folder)
+    # Инициализация модели
+    model = RfDetr(str(model_path), resolution=336)
+
+    image_files = [f for f in src_folder.iterdir() if f.suffix.lower() in image_exts]
+    #total = len(image_files)
+    processed_20 = []
+    for idx, image_path in enumerate(image_files[:20], 1):
+        img = cv2.imread(str(image_path))
+        if img is None:
+            print(f"Не удалось загрузить изображение: {image_path}")
+            continue
+
+        processed = process_frame(img, model, labelmap)
+        processed_20.append(processed)
+    return processed_20
+        #out_path = target_folder / image_path.name
+        #cv2.imwrite(str(out_path), processed)
+        #print(f"[{idx}/{total}] {image_path.name} -> {out_path.name}", end='\r')
+
+if __name__ == "main":
+    process_images_from_folder(
+        labelmap = tuple,
+        src_folder=Path("input_images"),
+        #target_folder=Path("output_images"),
+        model_path=Path("inference_model.onnx"),
+    )