diff --git a/rfdetr/config.py b/rfdetr/config.py index 656c8c606..2311f7db0 100644 --- a/rfdetr/config.py +++ b/rfdetr/config.py @@ -64,8 +64,11 @@ class TrainConfig(BaseModel): num_select: int = 300 dataset_file: Literal["coco", "o365", "roboflow"] = "roboflow" square_resize_div_64: bool = True - dataset_dir: str + dataset_dir: str = "/data/datasets/detr_train_dixy" output_dir: str = "output" + ann_file: str = "/data/datasets/detr_train_dixy/train/_annotations.coco.json" + conf_folder: str = "/detr_train/configs" + datasets_conf: str = "config_dixy.yaml" multi_scale: bool = True expanded_scales: bool = True use_ema: bool = True diff --git a/rfdetr/detr.py b/rfdetr/detr.py index 54b583430..5d5cd7ba8 100644 --- a/rfdetr/detr.py +++ b/rfdetr/detr.py @@ -36,7 +36,8 @@ def train(self, **kwargs): self.train_from_config(config, **kwargs) def export(self, **kwargs): - self.model.export(**kwargs) + config = self.get_train_config(**kwargs) + self.model.export(config, **kwargs) def train_from_config(self, config: TrainConfig, **kwargs): with open( @@ -82,13 +83,16 @@ def train_from_config(self, config: TrainConfig, **kwargs): if config.wandb: metrics_wandb_sink = MetricsWandBSink( + config_train = self.get_train_config(**kwargs), output_dir=config.output_dir, + datasets_conf=config.datasets_conf, + #data_dir=config.data_dir, project=config.project, run=config.run, config=config.model_dump() ) self.callbacks["on_fit_epoch_end"].append(metrics_wandb_sink.update) - self.callbacks["on_train_end"].append(metrics_wandb_sink.close) + #self.callbacks["on_train_end"].append(metrics_wandb_sink.close) if config.early_stopping: from rfdetr.util.early_stopping import EarlyStoppingCallback diff --git a/rfdetr/engine.py b/rfdetr/engine.py index 8ab362523..e05901e2f 100644 --- a/rfdetr/engine.py +++ b/rfdetr/engine.py @@ -36,9 +36,9 @@ def get_autocast_args(args): if DEPRECATED_AMP: - return {'enabled': args.amp, 'dtype': torch.bfloat16} + return {'enabled': args.amp, 'dtype': torch.float16} else: - return {'device_type': 'cuda', 'enabled': args.amp, 'dtype': torch.bfloat16} + return {'device_type': 'cuda', 'enabled': args.amp, 'dtype': torch.float16} def train_one_epoch( diff --git a/rfdetr/main.py b/rfdetr/main.py index 8b683ddde..2d9bb444d 100644 --- a/rfdetr/main.py +++ b/rfdetr/main.py @@ -45,6 +45,8 @@ import shutil from rfdetr.util.files import download_file import os +from rfdetr.config import TrainConfig + if str(os.environ.get("USE_FILE_SYSTEM_SHARING", "False")).lower() in ["true", "1"]: import torch.multiprocessing torch.multiprocessing.set_sharing_strategy('file_system') @@ -440,11 +442,11 @@ def lr_lambda(current_step: int): for callback in callbacks["on_train_end"]: callback() - def export(self, output_dir="output", infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, **kwargs): + def export(self, config: TrainConfig, output_dir="output", infer_dir=None, simplify=False, backbone_only=False, opset_version=17, verbose=True, force=False, shape=None, batch_size=1, **kwargs): """Export the trained model to ONNX format""" print(f"Exporting model to ONNX format") from rfdetr.deploy.export import export_onnx, onnx_simplify, make_infer_image - + import onnx device = self.device model = deepcopy(self.model.to("cpu")) @@ -501,7 +503,39 @@ def export(self, output_dir="output", infer_dir=None, simplify=False, backbone_ print("ONNX export completed successfully") self.model = self.model.to(device) + + import wandb + with open(config.ann_file, 'r') as file: + data_json = json.load(file) + + labels = [i["name"] for i in data_json["categories"]] + model = onnx.load("/detr_train/output/inference_model.onnx") + metadata = model.metadata_props.add() + metadata.key = "labels" + metadata.value = json.dumps(labels) + onnx.save(model, "/detr_train/output/model_metadata.onnx") + if os.path.exists("/detr_train/output/model_metadata.onnx"): + #with wandb.init(id="test", resume="allow", project="detr_kudo") as run: + art_model = wandb.Artifact("model_metadata.onnx", type='model') + art_model.add_file(local_path="/detr_train/output/model_metadata.onnx", name="model") + wandb.log_artifact(art_model) + artifact = wandb.Artifact(name="checkpoint.pth", type="checkpoint") + artifact.add_file(local_path="/detr_train/output/checkpoint.pth", name="checkpoint") + wandb.log_artifact(artifact) + #IMPORT TEST IMAGES TO WANDB + from rfdetr.util.detr_inf import process_images_from_folder + processed_20 = process_images_from_folder( + labelmap=labels, + src_folder = config.dataset_dir, + model_path = "/detr_train/output/model_metadata.onnx") + #print(len(processed_20)) + examples = [] + for i in range(len(processed_20)): + image = wandb.Image(processed_20[i], caption=f"test_images") + examples.append(image) + wandb.log({"test_images": examples}) + wandb.finish() if __name__ == '__main__': parser = argparse.ArgumentParser('LWDETR training and evaluation script', parents=[get_args_parser()]) @@ -994,4 +1028,4 @@ def populate_args( gradient_checkpointing=gradient_checkpointing, **extra_kwargs ) - return args \ No newline at end of file + return args diff --git a/rfdetr/util/detr_inf.py b/rfdetr/util/detr_inf.py new file mode 100644 index 000000000..a2fd7e0fa --- /dev/null +++ b/rfdetr/util/detr_inf.py @@ -0,0 +1,169 @@ +import onnxruntime as ort +import numpy as np +import supervision as sv +import cv2 +from pathlib import Path +from typing import Optional +import os + +def box_cxcywh_to_xyxy(boxes): + """Convert center coordinates (cx, cy, w, h) to (x1, y1, x2, y2)""" + cx, cy, w, h = np.split(boxes, 4, axis=-1) + x1 = cx - 0.5 * w + y1 = cy - 0.5 * h + x2 = cx + 0.5 * w + y2 = cy + 0.5 * h + return np.concatenate([x1, y1, x2, y2], axis=-1) + + +class RfDetr: + + def __init__(self, model_path: str, confidence_threshold: float = 0.45, resolution: int = 336): + self._session = ort.InferenceSession(model_path) + self._resolution = resolution + self._input_name = self._session.get_inputs()[0].name + self._means = np.array([0.485, 0.456, 0.406], dtype=np.float32) + self._stds = np.array([0.229, 0.224, 0.225], dtype=np.float32) + self._confidence_threshold = confidence_threshold + + def __call__(self, *inputs, return_info=False, return_bboxes=False): + img = inputs[0] + orig_h, orig_w = img.shape[:2] + rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + input_data = self._preprocess(rgb) + outputs = self._session.run(None, {self._input_name: input_data}) + res = self._postprocess(outputs, np.array([[orig_h, orig_w]])) # Исправлено здесь + return res + + def _preprocess(self, img_array: np.array): + img_array = cv2.resize(img_array, (self._resolution, self._resolution)) + img_array = np.array(img_array, dtype=np.float32) / 255.0 + normalized = (img_array - self._means) / self._stds + return normalized.transpose(2, 0, 1)[np.newaxis, ...] + + def _postprocess(self, outputs, target_sizes, num_select=300): + # Проверяем порядок выходов модели + out_bbox, out_logits = outputs # Убедитесь, что порядок правильный + + batch_size = out_logits.shape[0] + num_classes = out_logits.shape[2] + + prob = 1 / (1 + np.exp(-out_logits)) + + scores = np.zeros((batch_size, num_select)) + labels = np.zeros((batch_size, num_select), dtype=int) + topk_boxes = np.zeros((batch_size, num_select), dtype=int) + + for i in range(batch_size): + flat_probs = prob[i].ravel() + topk_indices = np.argpartition(flat_probs, -num_select)[-num_select:] + topk_values = flat_probs[topk_indices] + + sorted_indices = np.argsort(-topk_values) + topk_values = topk_values[sorted_indices] + topk_indices = topk_indices[sorted_indices] + + scores[i] = topk_values + labels[i] = topk_indices % num_classes + topk_boxes[i] = topk_indices // num_classes + + boxes = box_cxcywh_to_xyxy(out_bbox) + + selected_boxes = np.take_along_axis(boxes, topk_boxes[..., np.newaxis].repeat(4, axis=-1), axis=1) + + img_h = target_sizes[:, 0] + img_w = target_sizes[:, 1] + scale_fct = np.stack([img_w, img_h, img_w, img_h], axis=1) + scaled_boxes = selected_boxes * scale_fct[:, np.newaxis, :] + + mask = scores[i] >= self._confidence_threshold + + return sv.Detections( + xyxy=scaled_boxes[i][mask], + class_id=labels[i][mask], + confidence=scores[i][mask] + ) + + +# ------- + +def process_frame( + frame: np.ndarray, + model: callable, + labelmap: tuple, +) -> np.ndarray: + """Обрабатывает и аннотирует один кадр""" + box_annotator = sv.BoxAnnotator() + label_annotator = sv.LabelAnnotator(smart_position=True) + + #rotated_frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) + resized_frame = cv2.resize(frame, FRAME_SIZE) + detections = model(resized_frame) + + # Аннотация кадра + annotated_frame = box_annotator.annotate( + resized_frame.copy(), detections=detections + ) + #annotated_frame = box_annotator.annotate(detections=detections) + #print(labelmap) + result_frame = label_annotator.annotate( + annotated_frame, detections=detections, labels=[labelmap[class_id - 1] for class_id in detections.class_id] + ) + #result_frame = label_annotator.annotate(labels=[labelmap[class_id] for class_id in detections.class_id]) + return result_frame + +#DEFAULT_LABELMAP = ("barcode", "pallet") +FRAME_SIZE = (432, 768) + +def process_images_from_folder( + labelmap, + src_folder: Path, + #target_folder: Path, + model_path: Optional[Path] = None, + image_exts: tuple = ('.jpg') +): + """ + Обрабатывает все изображения из папки src_folder и сохраняет результаты в target_folder. + + Параметры: + src_folder: Путь к папке с исходными изображениями + target_folder: Путь для сохранения обработанных изображений + model_path: Путь к модели детекции + labelmap: Карта меток для аннотации + image_exts: Кортеж расширений изображений для обработки + """ + + #DEFAULT_LABELMAP = ("barcode", "pallet") + FRAME_SIZE = (432, 768) + #target_folder = Path(target_folder) + #target_folder.mkdir(exist_ok=True, parents=True) + + train = Path("train") + src_folder = os.path.join(src_folder, train) + src_folder = Path(src_folder) + # Инициализация модели + model = RfDetr(str(model_path), resolution=336) + + image_files = [f for f in src_folder.iterdir() if f.suffix.lower() in image_exts] + #total = len(image_files) + processed_20 = [] + for idx, image_path in enumerate(image_files[:20], 1): + img = cv2.imread(str(image_path)) + if img is None: + print(f"Не удалось загрузить изображение: {image_path}") + continue + + processed = process_frame(img, model, labelmap) + processed_20.append(processed) + return processed_20 + #out_path = target_folder / image_path.name + #cv2.imwrite(str(out_path), processed) + #print(f"[{idx}/{total}] {image_path.name} -> {out_path.name}", end='\r') + +if __name__ == "main": + process_images_from_folder( + labelmap = tuple, + src_folder=Path("input_images"), + #target_folder=Path("output_images"), + model_path=Path("inference_model.onnx"), + ) diff --git a/rfdetr/util/metrics.py b/rfdetr/util/metrics.py index 3aef809b9..19cb7f066 100644 --- a/rfdetr/util/metrics.py +++ b/rfdetr/util/metrics.py @@ -2,6 +2,12 @@ import matplotlib.pyplot as plt import numpy as np +import os + +import yaml +import json + +from rfdetr.config import TrainConfig try: from torch.utils.tensorboard import SummaryWriter @@ -184,8 +190,10 @@ class MetricsWandBSink: config (dict, optional): Input parameters, like hyperparameters or data preprocessing settings for the run for later comparison. """ - def __init__(self, output_dir: str, project: Optional[str] = None, run: Optional[str] = None, config: Optional[dict] = None): + def __init__(self,config_train: TrainConfig, output_dir: str, datasets_conf: str, project: Optional[str] = None, run: Optional[str] = None, config: Optional[dict] = None): self.output_dir = output_dir + #self.ds_dir = config.data_dir + #self.ds_conf = config.datasets_conf if wandb: self.run = wandb.init( project=project, @@ -198,6 +206,33 @@ def __init__(self, output_dir: str, project: Optional[str] = None, run: Optional self.run = None print("Unable to initialize W&B. Logging is turned off for this session. Run 'pip install wandb' to enable logging.") + #Add datasets table and labels to wandb run + #conf_folder = "/detr_train/configs" + #ann_file = "train/_annotations.coco.json" + #data_dir="/data/datasets/detr_train_testalgo" + #datasets_conf="config_kudo.yaml" + yaml_path = os.path.join(config_train.conf_folder, config_train.datasets_conf) + json_path = config_train.ann_file + with open(yaml_path, 'r') as file: + data_yaml = yaml.safe_load(file) + + with open(json_path, 'r') as file: + data_json = json.load(file) + + labels = [i["name"] for i in data_json["categories"]] + + yaml_key = "datasets" + data_tab = [data_yaml[yaml_key], [60, 20, 20], labels] + max_len = max(map(len, data_tab)) + data_tab = np.array([i + [""] * (max_len - len(i)) for i in data_tab]).T + + + ds_table = wandb.Table(columns=["datasets", "division", "labels"], data=data_tab) + wandb.log({"Datasets": ds_table}) + #artifact = wandb.Artifact("datasets", type = "dataset") + #artifact.add(ds_table, "datasets") + #wandb.log_artifact(artifact) + def update(self, values: dict): if not wandb or not self.run: return @@ -213,12 +248,16 @@ def update(self, values: dict): if 'test_coco_eval_bbox' in values: coco_eval = values['test_coco_eval_bbox'] ap50_90 = safe_index(coco_eval, 0) - ap50 = safe_index(coco_eval, 1) + #ap50 = safe_index(coco_eval, 1) ar50_90 = safe_index(coco_eval, 6) + f1_50_90 = 2*(ap50_90*ar50_90)/(ap50_90+ar50_90) + + log_dict["Metrics/Base/F1_50_90"] = f1_50_90 + if ap50_90 is not None: log_dict["Metrics/Base/AP50_90"] = ap50_90 - if ap50 is not None: - log_dict["Metrics/Base/AP50"] = ap50 + #if ap50 is not None: + # log_dict["Metrics/Base/AP50"] = ap50 if ar50_90 is not None: log_dict["Metrics/Base/AR50_90"] = ar50_90 @@ -227,12 +266,16 @@ def update(self, values: dict): ema_ap50_90 = safe_index(ema_coco_eval, 0) ema_ap50 = safe_index(ema_coco_eval, 1) ema_ar50_90 = safe_index(ema_coco_eval, 6) - if ema_ap50_90 is not None: - log_dict["Metrics/EMA/AP50_90"] = ema_ap50_90 - if ema_ap50 is not None: - log_dict["Metrics/EMA/AP50"] = ema_ap50 - if ema_ar50_90 is not None: - log_dict["Metrics/EMA/AR50_90"] = ema_ar50_90 + ema_f1_50_90 = 2*(ema_ap50_90*ema_ar50_90)/(ema_ap50_90+ema_ar50_90) + + #log_dict["Metrics/EMA/F1_50_90"] = ema_f1_50_90 + + #if ema_ap50_90 is not None: + #log_dict["Metrics/EMA/AP50_90"] = ema_ap50_90 + #if ema_ap50 is not None: + #log_dict["Metrics/EMA/AP50"] = ema_ap50 + #if ema_ar50_90 is not None: + #log_dict["Metrics/EMA/AR50_90"] = ema_ar50_90 wandb.log(log_dict) @@ -240,4 +283,4 @@ def close(self): if not wandb or not self.run: return - self.run.finish() \ No newline at end of file + self.run.finish()