diff --git a/.gitignore b/.gitignore index 62952767..3405711d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,16 +1,26 @@ -**__pycache__** -**build** -**egg-info** -**dist** -data/ -*.pyc -venv/ -*.idea/ +data +*.log + +.DS_Store + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions *.so -*.yaml -*.sh -*.pth -*.pkl -*.zip -*.bin -.vscode/ + +# Virtual Environments +venv/ +env/ +.env + +# Distribution / packaging +build/ +dist/ +work_dirs/ + +*.egg-info/ +*.log.json +nohup.out diff --git a/Changes.md b/Changes.md new file mode 100644 index 00000000..78bdf3db --- /dev/null +++ b/Changes.md @@ -0,0 +1,34 @@ +# Changes: Knowledge Distillation (KD) + +Knowledge distillation was added on top of the existing CenterPoint training stack. Two variants exist; only one is used per config: + +- **Response-based KD** (`kd.type = "heatmap_mse"`): MSE between student and teacher **heatmap** outputs, weighted by `lambda_kd`. +- **Feature-based KD** (`kd.type = "feature_mse"`): MSE on **`CenterHead.shared_conv`** features, weighted by `lambda_feat`. + +The original detection loss (focal heatmap + L1 regression from `centernet_loss.py`) is unchanged. KD terms are added in `CenterHead.loss`. At test time only the student checkpoint is used. + +--- + +## Code changes + +| File | Change | +|------|--------| +| `det3d/torchie/apis/train.py` | If `cfg.kd.enabled`, build teacher from `teacher_config`, load `teacher_checkpoint`, freeze it, pass `teacher_model` and `kd_cfg` to the trainer. | +| `det3d/torchie/trainer/trainer.py` | Each training step: teacher forward under `no_grad` (`return_preds` and/or `return_feats`), then student forward with teacher outputs and `kd_cfg`. | +| `det3d/models/detectors/point_pillars.py` | Return `head_shared` from the head; support `return_preds` / `return_feats` for the teacher; pass KD arguments into `bbox_head.loss`. | +| `det3d/models/detectors/voxelnet.py` | Same KD-related forward/loss wiring as PointPillars. | +| `det3d/models/bbox_heads/center_head.py` | In `loss()`: compute `hm_kd_loss` (heatmap MSE) or `feat_kd_loss` (shared-feature MSE) and add to the per-task loss; log both metrics. | +| `det3d/torchie/trainer/hooks/logger/text.py` | Log `hm_kd_loss` and `feat_kd_loss` with 6 decimal places. | +| `det3d/torchie/apis/env.py` | Device selection limited to CUDA or CPU (MPS removed). | + +**Unchanged:** `det3d/models/losses/centernet_loss.py` (baseline `FastFocalLoss`, `RegLoss`). + +--- + +## New configs (`configs/nusc/pp/`) + +Each file sets a slimmer student (reduced reader/neck/head channels) and a `kd` block (`enabled`, `type`, `lambda_kd` or `lambda_feat`, `teacher_config`, `teacher_checkpoint`). + +**Response-based:** `response_based_kd.py`, `response_based_kd_05.py`, `response_based_kd_08.py`, `response_based_kd_smoke.py`, `response_based_kd_resnet.py`, `response_based_kd_resnet_smoke.py` + +**Feature-based:** `feature_based_kd.py`, `feature_based_kd_05.py`, `feature_based_kd_08.py`, `feature_based_kd_smoke.py` diff --git a/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale.py b/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale.py index c58a6046..4ab92871 100644 --- a/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale.py +++ b/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -87,12 +88,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -165,8 +166,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale_virtual.py b/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale_virtual.py index 2071f2ac..3790bc97 100644 --- a/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale_virtual.py +++ b/configs/mvp/nusc_centerpoint_pp_fix_bn_z_scale_virtual.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -88,12 +89,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo_virtual.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo_virtual.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -166,8 +167,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale.py b/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale.py index 40c3425c..9e90ce5a 100644 --- a/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale.py +++ b/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -87,12 +88,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -156,8 +157,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_debug.py b/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_debug.py index dd7e0fc0..5a4be6ec 100644 --- a/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_debug.py +++ b/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_debug.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -87,12 +88,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -156,8 +157,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_virtual.py b/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_virtual.py index 39b8e4cf..9ada7854 100644 --- a/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_virtual.py +++ b/configs/mvp/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_scale_virtual.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -88,12 +89,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo_virtual.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo_virtual.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -157,8 +158,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/mvp/nusc_two_stage_base_with_virtual.py b/configs/mvp/nusc_two_stage_base_with_virtual.py index 6344e21c..a8987a1b 100644 --- a/configs/mvp/nusc_two_stage_base_with_virtual.py +++ b/configs/mvp/nusc_two_stage_base_with_virtual.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -138,12 +139,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo_virtual.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo_virtual.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -213,8 +214,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_painted_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_painted_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_painted_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_painted_True.pkl" test_anno = None data = dict( diff --git a/configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_demo.py b/configs/nusc/pp/.ipynb_checkpoints/demo-checkpoint.py similarity index 100% rename from configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_demo.py rename to configs/nusc/pp/.ipynb_checkpoints/demo-checkpoint.py diff --git a/configs/nusc/pp/baseline.py b/configs/nusc/pp/baseline.py new file mode 100644 index 00000000..22e87b46 --- /dev/null +++ b/configs/nusc/pp/baseline.py @@ -0,0 +1,248 @@ +import os +import itertools +import logging +from det3d.utils.config_tool import get_downsample_factor +import os +from dotenv import load_dotenv + +print(f"Found ENV: {load_dotenv()}") +samples_per_gpu = int(os.getenv("GPU_SAMPLES", 2)) +workers_per_gpu = int(os.getenv("GPU_WORKERS", 4)) +gpu_ids = os.getenv("SELECTED_GPUS", "0") +use_subset = os.getenv("USE_SUBSET", "True").lower() in ("true", "1", "yes") + +print(f"Samples per gpu: {samples_per_gpu}") +print(f"Workers per gpu: {workers_per_gpu}") +print(f"Using GPUs: {gpu_ids}") +print(f"Using {'subset' if use_subset else 'full'}") + +tasks = [ + dict(num_class=1, class_names=["car"]), + dict(num_class=2, class_names=["truck", "construction_vehicle"]), + dict(num_class=2, class_names=["bus", "trailer"]), + dict(num_class=1, class_names=["barrier"]), + dict(num_class=2, class_names=["motorcycle", "bicycle"]), + dict(num_class=2, class_names=["pedestrian", "traffic_cone"]), +] + +class_names = list(itertools.chain(*[t["class_names"] for t in tasks])) + +# training and testing settings +target_assigner = dict( + tasks=tasks, +) + + +# model settings +model = dict( + type="PointPillars", + pretrained=None, + reader=dict( + type="PillarFeatureNet", + num_filters=[64, 64], + num_input_features=5, + with_distance=False, + voxel_size=(0.2, 0.2, 8), + pc_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0), + ), + backbone=dict(type="PointPillarsScatter", ds_factor=1), + neck=dict( + type="RPN", + layer_nums=[3, 5, 5], + ds_layer_strides=[2, 2, 2], + ds_num_filters=[64, 128, 256], + us_layer_strides=[0.5, 1, 2], + us_num_filters=[128, 128, 128], + num_input_features=64, + logger=logging.getLogger("RPN"), + ), + bbox_head=dict( + # type='RPNHead', + type="CenterHead", + in_channels=sum([128, 128, 128]), + tasks=tasks, + dataset='nuscenes', + weight=0.25, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0], + common_heads={'reg': (2, 2), 'height': (1, 2), 'dim':(3, 2), 'rot':(2, 2), 'vel': (2, 2)}, # (output_channel, num_conv) + ), +) + +assigner = dict( + target_assigner=target_assigner, + out_size_factor=get_downsample_factor(model), + gaussian_overlap=0.1, + max_objs=500, + min_radius=2, +) + + +train_cfg = dict(assigner=assigner) + +test_cfg = dict( + post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + max_per_img=500, + nms=dict( + nms_pre_max_size=1000, + nms_post_max_size=83, + nms_iou_threshold=0.2, + ), + score_threshold=0.1, + pc_range=[-51.2, -51.2], + out_size_factor=get_downsample_factor(model), + voxel_size=[0.2, 0.2] +) + +# dataset settings +dataset_type = "NuScenesDataset" +nsweeps = 10 +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") + +db_sampler = dict( + type="GT-AUG", + enable=False, + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", + sample_groups=[ + dict(car=2), + dict(truck=3), + dict(construction_vehicle=7), + dict(bus=4), + dict(trailer=6), + dict(barrier=2), + dict(motorcycle=6), + dict(bicycle=6), + dict(pedestrian=2), + dict(traffic_cone=2), + ], + db_prep_steps=[ + dict( + filter_by_min_num_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + construction_vehicle=5, + traffic_cone=5, + barrier=5, + motorcycle=5, + bicycle=5, + pedestrian=5, + ) + ), + dict(filter_by_difficulty=[-1],), + ], + global_random_rotation_range_per_object=[0, 0], + rate=1.0, +) +train_preprocessor = dict( + mode="train", + shuffle_points=True, + global_rot_noise=[-0.3925, 0.3925], + global_scale_noise=[0.95, 1.05], + db_sampler=None, + class_names=class_names, +) + +val_preprocessor = dict( + mode="val", + shuffle_points=False, +) + +voxel_generator = dict( + range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0], + voxel_size=[0.2, 0.2, 8], + max_points_in_voxel=20, + max_voxel_num=[30000, 60000], +) + +train_pipeline = [ + dict(type="LoadPointCloudFromFile", dataset=dataset_type), + dict(type="LoadPointCloudAnnotations", with_bbox=True), + dict(type="Preprocess", cfg=train_preprocessor), + dict(type="Voxelization", cfg=voxel_generator), + dict(type="AssignLabel", cfg=train_cfg["assigner"]), + dict(type="Reformat"), +] +test_pipeline = [ + dict(type="LoadPointCloudFromFile", dataset=dataset_type), + dict(type="LoadPointCloudAnnotations", with_bbox=True), + dict(type="Preprocess", cfg=val_preprocessor), + dict(type="Voxelization", cfg=voxel_generator), + dict(type="AssignLabel", cfg=train_cfg["assigner"]), + dict(type="Reformat"), +] + +if use_subset: + train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True_subset_10.pkl" +else: + train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" + +if use_subset: + val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" +else: + val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True_subset_10.pkl" + +test_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" + +data = dict( + samples_per_gpu=samples_per_gpu, + workers_per_gpu=workers_per_gpu, + train=dict( + type=dataset_type, + root_path=data_root, + info_path=train_anno, + ann_file=train_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + root_path=data_root, + info_path=val_anno, + test_mode=True, + ann_file=val_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + root_path=data_root, + info_path=test_anno, + ann_file=test_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=test_pipeline, + ), +) + + +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# optimizer +optimizer = dict( + type="adam", amsgrad=0.0, wd=0.01, fixed_wd=True, moving_average=False, +) +lr_config = dict( + type="one_cycle", lr_max=0.001, moms=[0.95, 0.85], div_factor=10.0, pct_start=0.4, +) + +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=5, + hooks=[ + dict(type="TextLoggerHook"), + # dict(type='TensorboardLoggerHook') + ], +) +# yapf:enable +# runtime settings +total_epochs = 20 +device_ids = [int(x) for x in gpu_ids.split(",") if x.strip() in ["0", "1"]] +dist_params = dict(backend="nccl", init_method="env://") +log_level = "INFO" +work_dir = './work_dirs/{}/'.format(__file__[__file__.rfind('/') + 1:-3]) +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/nusc/pp/baseline_smoke.py b/configs/nusc/pp/baseline_smoke.py new file mode 100644 index 00000000..a5e1c125 --- /dev/null +++ b/configs/nusc/pp/baseline_smoke.py @@ -0,0 +1,21 @@ +from pathlib import Path + +# Keep smoke runs aligned with the real training setup by loading +# the main PointPillars config first, then overriding only runtime knobs. +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Run a tiny local loop quickly while preserving model/data pipeline structure. +total_epochs = 1 + +data["samples_per_gpu"] = 1 +data["workers_per_gpu"] = 0 + +# Subsample infos to reduce the number of train/val samples for smoke tests. +for split in ("train", "val", "test"): + if split in data and isinstance(data[split], dict): + data[split]["load_interval"] = 20 + +log_config["interval"] = 1 +checkpoint_config["interval"] = 1 +work_dir = "./work_dirs/baseline_smoke/" diff --git a/configs/nusc/pp/feature_based_kd.py b/configs/nusc/pp/feature_based_kd.py new file mode 100644 index 00000000..ef6957cc --- /dev/null +++ b/configs/nusc/pp/feature_based_kd.py @@ -0,0 +1,25 @@ +from pathlib import Path + +# Uses the base config and overrides for student architecture and feature-based KD +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Smaller student architecture +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +# Feature KD (CenterHead shared_conv output MSE vs teacher) +kd = dict( + enabled=True, + type="feature_mse", + lambda_feat=0.2, + teacher_config="./configs/nusc/pp/baseline.py", + # teacher_checkpoint="./work_dirs/baseline_smoke/latest.pth", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep/latest.pth", +) + +work_dir = "./work_dirs/feature_based_kd/" diff --git a/configs/nusc/pp/feature_based_kd_05.py b/configs/nusc/pp/feature_based_kd_05.py new file mode 100644 index 00000000..875eee06 --- /dev/null +++ b/configs/nusc/pp/feature_based_kd_05.py @@ -0,0 +1,25 @@ +from pathlib import Path + +# Uses the base config and overrides for student architecture and feature-based KD +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Smaller student architecture +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +# Feature KD (CenterHead shared_conv output MSE vs teacher) +kd = dict( + enabled=True, + type="feature_mse", + lambda_feat=0.5, + teacher_config="./configs/nusc/pp/baseline.py", + # teacher_checkpoint="./work_dirs/baseline_smoke/latest.pth", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep/latest.pth", +) + +work_dir = "./work_dirs/feature_based_kd/" diff --git a/configs/nusc/pp/feature_based_kd_08.py b/configs/nusc/pp/feature_based_kd_08.py new file mode 100644 index 00000000..4a07591b --- /dev/null +++ b/configs/nusc/pp/feature_based_kd_08.py @@ -0,0 +1,25 @@ +from pathlib import Path + +# Uses the base config and overrides for student architecture and feature-based KD +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Smaller student architecture +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +# Feature KD (CenterHead shared_conv output MSE vs teacher) +kd = dict( + enabled=True, + type="feature_mse", + lambda_feat=0.8, + teacher_config="./configs/nusc/pp/baseline.py", + # teacher_checkpoint="./work_dirs/baseline_smoke/latest.pth", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep/latest.pth", +) + +work_dir = "./work_dirs/feature_based_kd/" diff --git a/configs/nusc/pp/feature_based_kd_smoke.py b/configs/nusc/pp/feature_based_kd_smoke.py new file mode 100644 index 00000000..b8264980 --- /dev/null +++ b/configs/nusc/pp/feature_based_kd_smoke.py @@ -0,0 +1,17 @@ +from pathlib import Path + +_base_cfg = Path(__file__).with_name("feature_based_kd.py") +exec(_base_cfg.read_text(), globals(), globals()) + +total_epochs = 1 + +data["samples_per_gpu"] = 1 +data["workers_per_gpu"] = 0 + +for split in ("train", "val", "test"): + if split in data and isinstance(data[split], dict): + data[split]["load_interval"] = 20 + +log_config["interval"] = 1 +checkpoint_config["interval"] = 1 +work_dir = "./work_dirs/feature_based_kd_smoke/" diff --git a/configs/nusc/pp/resnet.py b/configs/nusc/pp/resnet.py new file mode 100644 index 00000000..10af1194 --- /dev/null +++ b/configs/nusc/pp/resnet.py @@ -0,0 +1,248 @@ +import os +import itertools +import logging +from det3d.utils.config_tool import get_downsample_factor +import os +from dotenv import load_dotenv + +print(f"Found ENV: {load_dotenv()}") +samples_per_gpu = int(os.getenv("GPU_SAMPLES", 2)) +workers_per_gpu = int(os.getenv("GPU_WORKERS", 4)) +gpu_ids = os.getenv("SELECTED_GPUS", "0") +use_subset = os.getenv("USE_SUBSET", "True").lower() in ("true", "1", "yes") + +print(f"Samples per gpu: {samples_per_gpu}") +print(f"Workers per gpu: {workers_per_gpu}") +print(f"Using GPUs: {gpu_ids}") +print(f"Using {'subset' if use_subset else 'full'}") + +tasks = [ + dict(num_class=1, class_names=["car"]), + dict(num_class=2, class_names=["truck", "construction_vehicle"]), + dict(num_class=2, class_names=["bus", "trailer"]), + dict(num_class=1, class_names=["barrier"]), + dict(num_class=2, class_names=["motorcycle", "bicycle"]), + dict(num_class=2, class_names=["pedestrian", "traffic_cone"]), +] + +class_names = list(itertools.chain(*[t["class_names"] for t in tasks])) + +# training and testing settings +target_assigner = dict( + tasks=tasks, +) + + +# model settings +model = dict( + type="PointPillars", + pretrained=None, + reader=dict( + type="PillarFeatureNet", + num_filters=[64, 64], + num_input_features=5, + with_distance=False, + voxel_size=(0.2, 0.2, 8), + pc_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0), + ), + backbone=dict(type="PointPillarsScatter", ds_factor=1), + neck=dict( + type="ResNetNeck", + layer_nums=[2, 2, 2], + ds_layer_strides=[2, 2, 2], + ds_num_filters=[64, 128, 256], + us_layer_strides=[0.5, 1, 2], + us_num_filters=[128, 128, 128], + num_input_features=64, + logger=logging.getLogger("ResNetNeck"), + ), + bbox_head=dict( + type="CenterHead", + in_channels=sum([128, 128, 128]), + tasks=tasks, + dataset='nuscenes', + weight=0.25, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0], + common_heads={'reg': (2, 2), 'height': (1, 2), 'dim':(3, 2), 'rot':(2, 2), 'vel': (2, 2)}, # (output_channel, num_conv) + ), +) + +assigner = dict( + target_assigner=target_assigner, + out_size_factor=get_downsample_factor(model), + gaussian_overlap=0.1, + max_objs=500, + min_radius=2, +) + + +train_cfg = dict(assigner=assigner) + +test_cfg = dict( + post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + max_per_img=500, + nms=dict( + nms_pre_max_size=1000, + nms_post_max_size=83, + nms_iou_threshold=0.2, + ), + score_threshold=0.1, + pc_range=[-51.2, -51.2], + out_size_factor=get_downsample_factor(model), + voxel_size=[0.2, 0.2] +) + +# dataset settings +dataset_type = "NuScenesDataset" +nsweeps = 10 +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") + +db_sampler = dict( + type="GT-AUG", + enable=False, + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", + sample_groups=[ + dict(car=2), + dict(truck=3), + dict(construction_vehicle=7), + dict(bus=4), + dict(trailer=6), + dict(barrier=2), + dict(motorcycle=6), + dict(bicycle=6), + dict(pedestrian=2), + dict(traffic_cone=2), + ], + db_prep_steps=[ + dict( + filter_by_min_num_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + construction_vehicle=5, + traffic_cone=5, + barrier=5, + motorcycle=5, + bicycle=5, + pedestrian=5, + ) + ), + dict(filter_by_difficulty=[-1],), + ], + global_random_rotation_range_per_object=[0, 0], + rate=1.0, +) +train_preprocessor = dict( + mode="train", + shuffle_points=True, + global_rot_noise=[-0.3925, 0.3925], + global_scale_noise=[0.95, 1.05], + db_sampler=None, + class_names=class_names, +) + +val_preprocessor = dict( + mode="val", + shuffle_points=False, +) + +voxel_generator = dict( + range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0], + voxel_size=[0.2, 0.2, 8], + max_points_in_voxel=20, + max_voxel_num=[30000, 60000], +) + +train_pipeline = [ + dict(type="LoadPointCloudFromFile", dataset=dataset_type), + dict(type="LoadPointCloudAnnotations", with_bbox=True), + dict(type="Preprocess", cfg=train_preprocessor), + dict(type="Voxelization", cfg=voxel_generator), + dict(type="AssignLabel", cfg=train_cfg["assigner"]), + dict(type="Reformat"), +] +test_pipeline = [ + dict(type="LoadPointCloudFromFile", dataset=dataset_type), + dict(type="LoadPointCloudAnnotations", with_bbox=True), + dict(type="Preprocess", cfg=val_preprocessor), + dict(type="Voxelization", cfg=voxel_generator), + dict(type="AssignLabel", cfg=train_cfg["assigner"]), + dict(type="Reformat"), +] + +if use_subset: + train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True_subset_10.pkl" +else: + train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" + +if use_subset: + val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" +else: + val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True_subset_10.pkl" + +test_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" + + +data = dict( + samples_per_gpu=samples_per_gpu, + workers_per_gpu=workers_per_gpu, + train=dict( + type=dataset_type, + root_path=data_root, + info_path=train_anno, + ann_file=train_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + root_path=data_root, + info_path=val_anno, + test_mode=True, + ann_file=val_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + root_path=data_root, + info_path=test_anno, + ann_file=test_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=test_pipeline, + ), +) + + +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# optimizer +optimizer = dict( + type="adam", amsgrad=0.0, wd=0.01, fixed_wd=True, moving_average=False, +) +lr_config = dict( + type="one_cycle", lr_max=0.001, moms=[0.95, 0.85], div_factor=10.0, pct_start=0.4, +) + +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=5, + hooks=[ + dict(type="TextLoggerHook"), + # dict(type='TensorboardLoggerHook') + ], +) +# yapf:enable +# runtime settings +total_epochs = 20 +device_ids = [int(x) for x in gpu_ids.split(",") if x.strip() in ["0", "1"]] +dist_params = dict(backend="nccl", init_method="env://") +log_level = "INFO" +work_dir = './work_dirs/{}/'.format(__file__[__file__.rfind('/') + 1:-3]) +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/nusc/pp/resnet_fastpillars.py b/configs/nusc/pp/resnet_fastpillars.py new file mode 100644 index 00000000..f6dfb506 --- /dev/null +++ b/configs/nusc/pp/resnet_fastpillars.py @@ -0,0 +1,248 @@ +import os +import itertools +import logging +from det3d.utils.config_tool import get_downsample_factor +import os +from dotenv import load_dotenv + +print(f"Found ENV: {load_dotenv()}") +samples_per_gpu = int(os.getenv("GPU_SAMPLES", 2)) +workers_per_gpu = int(os.getenv("GPU_WORKERS", 4)) +gpu_ids = os.getenv("SELECTED_GPUS", "0") +use_subset = os.getenv("USE_SUBSET", "True").lower() in ("true", "1", "yes") + +print(f"Samples per gpu: {samples_per_gpu}") +print(f"Workers per gpu: {workers_per_gpu}") +print(f"Using GPUs: {gpu_ids}") +print(f"Using {'subset' if use_subset else 'full'}") + +tasks = [ + dict(num_class=1, class_names=["car"]), + dict(num_class=2, class_names=["truck", "construction_vehicle"]), + dict(num_class=2, class_names=["bus", "trailer"]), + dict(num_class=1, class_names=["barrier"]), + dict(num_class=2, class_names=["motorcycle", "bicycle"]), + dict(num_class=2, class_names=["pedestrian", "traffic_cone"]), +] + +class_names = list(itertools.chain(*[t["class_names"] for t in tasks])) + +# training and testing settings +target_assigner = dict( + tasks=tasks, +) + + +# model settings +model = dict( + type="PointPillars", + pretrained=None, + reader=dict( + type="FastPillarFeatureNet", + num_filters=[64], + num_input_features=5, + with_distance=False, + voxel_size=(0.2, 0.2, 8), + pc_range=(-51.2, -51.2, -5.0, 51.2, 51.2, 3.0), + ), + backbone=dict(type="PointPillarsScatter", ds_factor=1), + neck=dict( + type="ResNetNeck", + layer_nums=[2, 2, 2], + ds_layer_strides=[2, 2, 2], + ds_num_filters=[64, 128, 256], + us_layer_strides=[0.5, 1, 2], + us_num_filters=[128, 128, 128], + num_input_features=64, + logger=logging.getLogger("ResNetNeck"), + ), + bbox_head=dict( + type="CenterHead", + in_channels=sum([128, 128, 128]), + tasks=tasks, + dataset='nuscenes', + weight=0.25, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 1.0, 1.0], + common_heads={'reg': (2, 2), 'height': (1, 2), 'dim':(3, 2), 'rot':(2, 2), 'vel': (2, 2)}, # (output_channel, num_conv) + ), +) + +assigner = dict( + target_assigner=target_assigner, + out_size_factor=get_downsample_factor(model), + gaussian_overlap=0.1, + max_objs=500, + min_radius=2, +) + + +train_cfg = dict(assigner=assigner) + +test_cfg = dict( + post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], + max_per_img=500, + nms=dict( + nms_pre_max_size=1000, + nms_post_max_size=83, + nms_iou_threshold=0.2, + ), + score_threshold=0.1, + pc_range=[-51.2, -51.2], + out_size_factor=get_downsample_factor(model), + voxel_size=[0.2, 0.2] +) + +# dataset settings +dataset_type = "NuScenesDataset" +nsweeps = 10 +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") + +db_sampler = dict( + type="GT-AUG", + enable=False, + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", + sample_groups=[ + dict(car=2), + dict(truck=3), + dict(construction_vehicle=7), + dict(bus=4), + dict(trailer=6), + dict(barrier=2), + dict(motorcycle=6), + dict(bicycle=6), + dict(pedestrian=2), + dict(traffic_cone=2), + ], + db_prep_steps=[ + dict( + filter_by_min_num_points=dict( + car=5, + truck=5, + bus=5, + trailer=5, + construction_vehicle=5, + traffic_cone=5, + barrier=5, + motorcycle=5, + bicycle=5, + pedestrian=5, + ) + ), + dict(filter_by_difficulty=[-1],), + ], + global_random_rotation_range_per_object=[0, 0], + rate=1.0, +) +train_preprocessor = dict( + mode="train", + shuffle_points=True, + global_rot_noise=[-0.3925, 0.3925], + global_scale_noise=[0.95, 1.05], + db_sampler=None, + class_names=class_names, +) + +val_preprocessor = dict( + mode="val", + shuffle_points=False, +) + +voxel_generator = dict( + range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0], + voxel_size=[0.2, 0.2, 8], + max_points_in_voxel=20, + max_voxel_num=[30000, 60000], +) + +train_pipeline = [ + dict(type="LoadPointCloudFromFile", dataset=dataset_type), + dict(type="LoadPointCloudAnnotations", with_bbox=True), + dict(type="Preprocess", cfg=train_preprocessor), + dict(type="Voxelization", cfg=voxel_generator), + dict(type="AssignLabel", cfg=train_cfg["assigner"]), + dict(type="Reformat"), +] +test_pipeline = [ + dict(type="LoadPointCloudFromFile", dataset=dataset_type), + dict(type="LoadPointCloudAnnotations", with_bbox=True), + dict(type="Preprocess", cfg=val_preprocessor), + dict(type="Voxelization", cfg=voxel_generator), + dict(type="AssignLabel", cfg=train_cfg["assigner"]), + dict(type="Reformat"), +] + +if use_subset: + train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True_subset_10.pkl" +else: + train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" + +if use_subset: + val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" +else: + val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True_subset_10.pkl" + +test_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" + + +data = dict( + samples_per_gpu=samples_per_gpu, + workers_per_gpu=workers_per_gpu, + train=dict( + type=dataset_type, + root_path=data_root, + info_path=train_anno, + ann_file=train_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=train_pipeline, + ), + val=dict( + type=dataset_type, + root_path=data_root, + info_path=val_anno, + test_mode=True, + ann_file=val_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=test_pipeline, + ), + test=dict( + type=dataset_type, + root_path=data_root, + info_path=test_anno, + ann_file=test_anno, + nsweeps=nsweeps, + class_names=class_names, + pipeline=test_pipeline, + ), +) + + +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# optimizer +optimizer = dict( + type="adam", amsgrad=0.0, wd=0.01, fixed_wd=True, moving_average=False, +) +lr_config = dict( + type="one_cycle", lr_max=0.001, moms=[0.95, 0.85], div_factor=10.0, pct_start=0.4, +) + +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=5, + hooks=[ + dict(type="TextLoggerHook"), + # dict(type='TensorboardLoggerHook') + ], +) +# yapf:enable +# runtime settings +total_epochs = 20 +device_ids = [int(x) for x in gpu_ids.split(",") if x.strip() in ["0", "1"]] +dist_params = dict(backend="nccl", init_method="env://") +log_level = "INFO" +work_dir = './work_dirs/{}/'.format(__file__[__file__.rfind('/') + 1:-3]) +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/nusc/pp/response_based_kd.py b/configs/nusc/pp/response_based_kd.py new file mode 100644 index 00000000..e1af4dfb --- /dev/null +++ b/configs/nusc/pp/response_based_kd.py @@ -0,0 +1,25 @@ +from pathlib import Path + +# Uses the base config and overrides for student architecture and response-based KD parent +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Smaller student architecture +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +# Heatmap KD parent settings +kd = dict( + enabled=True, + type="heatmap_mse", + lambda_kd=0.2, + teacher_config="./configs/nusc/pp/baseline.py", + # teacher_checkpoint="./work_dirs/baseline_smoke/latest.pth", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep/latest.pth", +) + +work_dir = "./work_dirs/response_based_kd/" diff --git a/configs/nusc/pp/response_based_kd_05.py b/configs/nusc/pp/response_based_kd_05.py new file mode 100644 index 00000000..5adafb3c --- /dev/null +++ b/configs/nusc/pp/response_based_kd_05.py @@ -0,0 +1,25 @@ +from pathlib import Path + +# Uses the base config and overrides for student architecture and response-based KD parent +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Smaller student architecture +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +# Heatmap KD parent settings +kd = dict( + enabled=True, + type="heatmap_mse", + lambda_kd=0.5, + teacher_config="./configs/nusc/pp/baseline.py", + # teacher_checkpoint="./work_dirs/baseline_smoke/latest.pth", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep/latest.pth", +) + +work_dir = "./work_dirs/response_based_kd/" diff --git a/configs/nusc/pp/response_based_kd_08.py b/configs/nusc/pp/response_based_kd_08.py new file mode 100644 index 00000000..467efd2f --- /dev/null +++ b/configs/nusc/pp/response_based_kd_08.py @@ -0,0 +1,25 @@ +from pathlib import Path + +# Uses the base config and overrides for student architecture and response-based KD parent +_base_cfg = Path(__file__).with_name("baseline.py") +exec(_base_cfg.read_text(), globals(), globals()) + +# Smaller student architecture +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +# Heatmap KD parent settings +kd = dict( + enabled=True, + type="heatmap_mse", + lambda_kd=0.8, + teacher_config="./configs/nusc/pp/baseline.py", + # teacher_checkpoint="./work_dirs/baseline_smoke/latest.pth", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep/latest.pth", +) + +work_dir = "./work_dirs/response_based_kd/" diff --git a/configs/nusc/pp/response_based_kd_resnet.py b/configs/nusc/pp/response_based_kd_resnet.py new file mode 100644 index 00000000..a88f06c1 --- /dev/null +++ b/configs/nusc/pp/response_based_kd_resnet.py @@ -0,0 +1,23 @@ +from pathlib import Path + +# Response-based KD: teacher = PointPillars (PFN) + ResNetNeck (matches cluster +# nusc_centerpoint_pp_02voxel_two_pfn_10sweep_resnet). Student = narrower PFN + ResNetNeck. +_base_cfg = Path(__file__).with_name("resnet.py") +exec(_base_cfg.read_text(), globals(), globals()) + +model["reader"]["num_filters"] = [32, 32] +model["backbone"]["num_input_features"] = 32 +model["neck"]["num_input_features"] = 32 +model["neck"]["ds_num_filters"] = [32, 64, 128] +model["neck"]["us_num_filters"] = [64, 64, 64] +model["bbox_head"]["in_channels"] = sum([64, 64, 64]) + +kd = dict( + enabled=True, + type="heatmap_mse", + lambda_kd=0.2, + teacher_config="./configs/nusc/pp/resnet.py", + teacher_checkpoint="../Computer-Vision/work_dirs/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_resnet/latest.pth", +) + +work_dir = "./work_dirs/response_based_kd_resnet/" diff --git a/configs/nusc/pp/response_based_kd_resnet_smoke.py b/configs/nusc/pp/response_based_kd_resnet_smoke.py new file mode 100644 index 00000000..b0b4cbe6 --- /dev/null +++ b/configs/nusc/pp/response_based_kd_resnet_smoke.py @@ -0,0 +1,17 @@ +from pathlib import Path + +_base_cfg = Path(__file__).with_name("response_based_kd_resnet.py") +exec(_base_cfg.read_text(), globals(), globals()) + +total_epochs = 1 + +data["samples_per_gpu"] = 1 +data["workers_per_gpu"] = 0 + +for split in ("train", "val", "test"): + if split in data and isinstance(data[split], dict): + data[split]["load_interval"] = 20 + +log_config["interval"] = 1 +checkpoint_config["interval"] = 1 +work_dir = "./work_dirs/response_based_kd_resnet_smoke/" diff --git a/configs/nusc/pp/response_based_kd_smoke.py b/configs/nusc/pp/response_based_kd_smoke.py new file mode 100644 index 00000000..4e83aeb1 --- /dev/null +++ b/configs/nusc/pp/response_based_kd_smoke.py @@ -0,0 +1,17 @@ +from pathlib import Path + +_base_cfg = Path(__file__).with_name("response_based_kd.py") +exec(_base_cfg.read_text(), globals(), globals()) + +total_epochs = 1 + +data["samples_per_gpu"] = 1 +data["workers_per_gpu"] = 0 + +for split in ("train", "val", "test"): + if split in data and isinstance(data[split], dict): + data[split]["load_interval"] = 20 + +log_config["interval"] = 1 +checkpoint_config["interval"] = 1 +work_dir = "./work_dirs/response_based_kd_smoke/" diff --git a/configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_circular_nms.py b/configs/nusc/pp/unused/circular_nms.py similarity index 94% rename from configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_circular_nms.py rename to configs/nusc/pp/unused/circular_nms.py index 71c22461..02042783 100644 --- a/configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep_circular_nms.py +++ b/configs/nusc/pp/unused/circular_nms.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -85,12 +86,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -161,13 +162,13 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( - samples_per_gpu=4, - workers_per_gpu=8, + samples_per_gpu=1, + workers_per_gpu=2, train=dict( type=dataset_type, root_path=data_root, diff --git a/configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep.py b/configs/nusc/pp/unused/demo.py similarity index 95% rename from configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep.py rename to configs/nusc/pp/unused/demo.py index 1a89adf3..6ffefdd3 100644 --- a/configs/nusc/pp/nusc_centerpoint_pp_02voxel_two_pfn_10sweep.py +++ b/configs/nusc/pp/unused/demo.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -83,12 +84,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -159,8 +160,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = "demo/nuScenes/demo_infos.pkl" +val_anno = "demo/nuScenes/demo_infos.pkl" test_anno = None data = dict( diff --git a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn.py b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn.py index e51f4c28..efb537f2 100644 --- a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn.py +++ b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -85,12 +86,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -162,8 +163,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn_flip.py b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn_flip.py index b5e729e5..80c82f3f 100644 --- a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn_flip.py +++ b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_dcn_flip.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -87,12 +88,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -166,9 +167,9 @@ dict(type="Reformat", double_flip=DOUBLE_FLIP), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" -test_anno = "data/nuScenes/infos_test_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" +test_anno = f"{data_root}/infos_test_10sweeps_withvelo_filter_True.pkl" data = dict( samples_per_gpu=4, diff --git a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z.py b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z.py index a4b8db5a..0affb717 100644 --- a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z.py +++ b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -85,12 +86,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -163,8 +164,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_flip.py b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_flip.py index 97858927..bde36417 100644 --- a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_flip.py +++ b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_0075voxel_fix_bn_z_flip.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -87,12 +88,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -167,9 +168,9 @@ dict(type="Reformat", double_flip=DOUBLE_FLIP), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" -test_anno = "data/nuScenes/infos_test_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" +test_anno = f"{data_root}/infos_test_10sweeps_withvelo_filter_True.pkl" data = dict( samples_per_gpu=4, diff --git a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_01voxel.py b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_01voxel.py index c3c499cd..0eb35975 100644 --- a/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_01voxel.py +++ b/configs/nusc/voxelnet/nusc_centerpoint_voxelnet_01voxel.py @@ -1,3 +1,4 @@ +import os import itertools import logging @@ -81,12 +82,12 @@ # dataset settings dataset_type = "NuScenesDataset" nsweeps = 10 -data_root = "data/nuScenes" +data_root = os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") db_sampler = dict( type="GT-AUG", enable=False, - db_info_path="data/nuScenes/dbinfos_train_10sweeps_withvelo.pkl", + db_info_path=f"{data_root}/dbinfos_train_10sweeps_withvelo.pkl", sample_groups=[ dict(car=2), dict(truck=3), @@ -157,8 +158,8 @@ dict(type="Reformat"), ] -train_anno = "data/nuScenes/infos_train_10sweeps_withvelo_filter_True.pkl" -val_anno = "data/nuScenes/infos_val_10sweeps_withvelo_filter_True.pkl" +train_anno = f"{data_root}/infos_train_10sweeps_withvelo_filter_True.pkl" +val_anno = f"{data_root}/infos_val_10sweeps_withvelo_filter_True.pkl" test_anno = None data = dict( diff --git a/create_subset.py b/create_subset.py new file mode 100644 index 00000000..ff52a428 --- /dev/null +++ b/create_subset.py @@ -0,0 +1,31 @@ +import pickle +import os + +# Get the absolute path to your current directory +base_path = os.getcwd() + +def create_subset(filename, ratio=10): + input_path = os.path.join(base_path, 'data/nuScenes', filename) + output_path = input_path.replace('.pkl', '_subset_10.pkl') + + if not os.path.exists(input_path): + print(f"ERROR: Cannot find {input_path}") + return + + print(f"Processing {filename}...") + with open(input_path, 'rb') as f: + data = pickle.load(f) + + # Slice the data + if isinstance(data, dict) and 'infos' in data: + data['infos'] = data['infos'][::ratio] + else: + data = data[::ratio] + + with open(output_path, 'wb') as f: + pickle.dump(data, f) + + print(f"SUCCESS: Saved to {output_path}") + +create_subset('infos_train_10sweeps_withvelo_filter_True.pkl') +create_subset('infos_val_10sweeps_withvelo_filter_True.pkl') diff --git a/cv29f26@gpucluster.st.lab.au.dk b/cv29f26@gpucluster.st.lab.au.dk new file mode 100644 index 00000000..055e52da --- /dev/null +++ b/cv29f26@gpucluster.st.lab.au.dk @@ -0,0 +1,289 @@ +name: centerpoint +channels: + - defaults + - nvidia + - nvidia/label/cuda-11.3.0 + - pytorch +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - binutils=2.40=h1680402_0 + - binutils_impl_linux-64=2.40=h5293946_0 + - binutils_linux-64=2.40.0=h06a4308_3 + - blas=1.0=mkl + - bzip2=1.0.8=h5eee18b_6 + - ca-certificates=2025.12.2=h06a4308_0 + - certifi=2022.12.7=py37h06a4308_0 + - cuda-cccl=12.4.127=0 + - cuda-command-line-tools=12.4.1=0 + - cuda-cudart=12.4.127=0 + - cuda-cudart-dev=12.4.127=0 + - cuda-cudart-static=12.4.127=0 + - cuda-cuobjdump=12.4.127=0 + - cuda-cupti=12.4.127=0 + - cuda-cupti-static=12.4.127=0 + - cuda-cuxxfilt=12.4.127=0 + - cuda-documentation=12.4.127=0 + - cuda-driver-dev=12.4.127=0 + - cuda-gdb=12.4.127=0 + - cuda-libraries=12.4.1=h06a4308_1 + - cuda-libraries-dev=12.4.1=h06a4308_1 + - cuda-libraries-static=12.4.1=0 + - cuda-nsight=12.4.127=0 + - cuda-nvcc=11.3.58=h2467b9f_0 + - cuda-nvdisasm=12.4.127=0 + - cuda-nvml-dev=12.4.127=0 + - cuda-nvprof=12.4.127=0 + - cuda-nvprune=12.4.127=0 + - cuda-nvrtc=12.4.127=0 + - cuda-nvrtc-dev=12.4.127=0 + - cuda-nvrtc-static=12.4.127=0 + - cuda-nvtx=12.4.127=0 + - cuda-nvvp=12.4.127=0 + - cuda-opencl=12.4.127=0 + - cuda-opencl-dev=12.4.127=0 + - cuda-profiler-api=12.4.127=0 + - cuda-sanitizer-api=12.4.127=0 + - cuda-tools=12.4.1=0 + - cuda-visual-tools=12.4.1=h06a4308_1 + - cudatoolkit=11.3.1=ha36c431_9 + - ffmpeg=4.2.2=h20bf706_0 + - flit-core=3.6.0=pyhd3eb1b0_0 + - freetype=2.14.1=hf5b9546_0 + - gcc_impl_linux-64=11.2.0=h1234567_1 + - gcc_linux-64=11.2.0=h931ca3c_3 + - gds-tools=1.9.1.3=0 + - giflib=5.2.2=h5eee18b_0 + - gmp=6.3.0=h6a678d5_0 + - gnutls=3.6.15=he1e5248_0 + - gxx_impl_linux-64=11.2.0=h1234567_1 + - gxx_linux-64=11.2.0=h06a4308_3 + - intel-openmp=2021.4.0=h06a4308_3561 + - jpeg=9f=h5ce9db8_0 + - kernel-headers_linux-64=4.18.0=h3108a97_1 + - lame=3.100=hbd0596d_1 + - lcms2=2.16=hb9589c4_0 + - ld_impl_linux-64=2.40=h12ee557_0 + - lerc=4.0.0=h6a678d5_0 + - libcublas=12.4.5.8=0 + - libcublas-dev=12.4.5.8=0 + - libcublas-static=12.4.5.8=0 + - libcufft=11.2.1.3=0 + - libcufft-dev=11.2.1.3=0 + - libcufft-static=11.2.1.3=0 + - libcufile=1.9.1.3=0 + - libcufile-dev=1.9.1.3=0 + - libcufile-static=1.9.1.3=0 + - libcurand=10.3.5.147=0 + - libcurand-dev=10.3.5.147=0 + - libcurand-static=10.3.5.147=0 + - libcusolver=11.6.1.9=0 + - libcusolver-dev=11.6.1.9=0 + - libcusolver-static=11.6.1.9=0 + - libcusparse=12.3.1.170=0 + - libcusparse-dev=12.3.1.170=0 + - libcusparse-static=12.3.1.170=0 + - libdeflate=1.22=h5eee18b_0 + - libffi=3.4.4=h6a678d5_1 + - libgcc=15.2.0=h69a1729_7 + - libgcc-devel_linux-64=11.2.0=h1234567_1 + - libgcc-ng=15.2.0=h166f726_7 + - libgomp=15.2.0=h4751f2c_7 + - libidn2=2.3.4=h5eee18b_0 + - libnpp=12.2.5.30=0 + - libnpp-dev=12.2.5.30=0 + - libnpp-static=12.2.5.30=0 + - libnvfatbin=12.4.127=0 + - libnvfatbin-dev=12.4.127=0 + - libnvjitlink=12.4.127=0 + - libnvjitlink-dev=12.4.127=0 + - libnvjpeg=12.3.1.117=0 + - libnvjpeg-dev=12.3.1.117=0 + - libnvjpeg-static=12.3.1.117=0 + - libopus=1.6.1=h9f10d21_0 + - libpng=1.6.55=h22898a0_0 + - libstdcxx=15.2.0=h39759b7_7 + - libstdcxx-devel_linux-64=11.2.0=h1234567_1 + - libstdcxx-ng=15.2.0=hc03a8fd_7 + - libtasn1=4.21.0=h27ab2c4_0 + - libtiff=4.5.1=hffd6297_1 + - libunistring=0.9.10=h27cfd23_0 + - libuv=1.52.0=heb5a705_0 + - libvpx=1.7.0=h439df22_0 + - libwebp=1.2.4=h11a3e52_1 + - libwebp-base=1.2.4=h5eee18b_1 + - libxcb=1.17.0=h9b100fa_0 + - libzlib=1.3.1=hb25bd0a_0 + - lz4-c=1.9.4=h6a678d5_1 + - mkl=2021.4.0=h06a4308_640 + - mkl-service=2.4.0=py37h7f8727e_0 + - mkl_fft=1.3.1=py37hd3c417c_0 + - mkl_random=1.2.2=py37h51133e4_0 + - ncurses=6.5=h7934f7d_0 + - nettle=3.7.3=hbbd107a_1 + - nsight-compute=2024.1.1.4=0 + - numpy=1.21.5=py37h6c91a56_3 + - numpy-base=1.21.5=py37ha15fc14_3 + - openh264=2.1.1=h4ff587b_0 + - openssl=1.1.1w=h7f8727e_0 + - pillow=9.4.0=py37h6a678d5_0 + - pip=22.3.1=py37h06a4308_0 + - pthread-stubs=0.3=h0ce48e5_1 + - python=3.7.16=h7a1cb2a_0 + - pytorch=1.10.1=py3.7_cuda11.3_cudnn8.2.0_0 + - pytorch-mutex=1.0=cuda + - readline=8.3=hc2a1206_0 + - setuptools=65.6.3=py37h06a4308_0 + - six=1.16.0=pyhd3eb1b0_1 + - sqlite=3.51.2=h3e8d24a_0 + - sysroot_linux-64=2.28=h3108a97_1 + - tk=8.6.15=h54e0aa7_0 + - torchvision=0.11.2=py37_cu113 + - tzdata=2026a=he532380_0 + - wheel=0.38.4=py37h06a4308_0 + - x264=1!157.20191217=h7b6447c_0 + - xorg-libx11=1.8.12=h9b100fa_1 + - xorg-libxau=1.0.12=h9b100fa_0 + - xorg-libxdmcp=1.1.5=h9b100fa_0 + - xorg-xorgproto=2024.1=h5eee18b_1 + - xz=5.8.2=h448239c_0 + - zlib=1.3.1=hb25bd0a_0 + - zstd=1.5.7=h11fc155_0 + - pip: + - addict==2.4.0 + - aiofiles==22.1.0 + - aiosqlite==0.19.0 + - anyio==3.7.1 + - argon2-cffi==23.1.0 + - argon2-cffi-bindings==21.2.0 + - arrow==1.2.3 + - attrs==24.2.0 + - babel==2.14.0 + - backcall==0.2.0 + - beautifulsoup4==4.14.3 + - bleach==6.0.0 + - cached-property==1.5.2 + - cachetools==5.5.2 + - ccimport==0.4.4 + - cffi==1.15.1 + - charset-normalizer==3.4.6 + - comm==0.1.4 + - cumm-cu113==0.4.11 + - cumm-cu114==0.4.11 + - cycler==0.11.0 + - debugpy==1.7.0 + - decorator==5.1.1 + - defusedxml==0.7.1 + - descartes==1.1.0 + - dictor==0.1.12 + - entrypoints==0.4 + - exceptiongroup==1.3.1 + - fastjsonschema==2.21.2 + - fire==0.7.1 + - fonttools==4.38.0 + - fqdn==1.5.1 + - idna==3.10 + - importlib-metadata==6.7.0 + - importlib-resources==5.12.0 + - ipykernel==6.16.2 + - ipython==7.34.0 + - ipython-genutils==0.2.0 + - ipywidgets==8.1.8 + - isoduration==20.11.0 + - jedi==0.19.2 + - jinja2==3.1.6 + - joblib==1.3.2 + - json5==0.9.16 + - jsonpointer==3.0.0 + - jsonschema==4.17.3 + - jupyter==1.1.1 + - jupyter-client==7.4.9 + - jupyter-console==6.6.3 + - jupyter-core==4.12.0 + - jupyter-events==0.6.3 + - jupyter-server==1.24.0 + - jupyter-server-fileid==0.9.3 + - jupyter-server-ydoc==0.8.0 + - jupyter-ydoc==0.2.5 + - jupyterlab==3.6.8 + - jupyterlab-pygments==0.2.2 + - jupyterlab-server==2.24.0 + - jupyterlab-widgets==3.0.16 + - kiwisolver==1.4.5 + - lark==1.1.9 + - llvmlite==0.39.1 + - markupsafe==2.1.5 + - matplotlib==3.5.2 + - matplotlib-inline==0.1.6 + - mistune==3.0.2 + - moves==0.1 + - nbclassic==1.2.0 + - nbclient==0.7.4 + - nbconvert==7.6.0 + - nbformat==5.8.0 + - nest-asyncio==1.6.0 + - ninja==1.11.1.4 + - notebook==6.5.7 + - notebook-shim==0.2.4 + - numba==0.56.4 + - nuscenes-devkit==1.1.10 + - opencv-python-headless==4.3.0.36 + - packaging==24.0 + - pandocfilters==1.5.1 + - parso==0.8.6 + - pccm==0.4.16 + - pexpect==4.9.0 + - pickleshare==0.7.5 + - pkgutil-resolve-name==1.3.10 + - portalocker==2.7.0 + - prometheus-client==0.17.1 + - prompt-toolkit==3.0.48 + - protobuf==4.24.4 + - psutil==7.2.2 + - ptyprocess==0.7.0 + - pybind11==2.13.6 + - pycocotools==2.0.7 + - pycparser==2.21 + - pygments==2.17.2 + - pyparsing==3.1.4 + - pyquaternion==0.9.9 + - pyrsistent==0.19.3 + - python-dateutil==2.9.0.post0 + - python-json-logger==3.0.1 + - pytz==2026.1.post1 + - pyyaml==6.0.1 + - pyzmq==26.2.1 + - requests==2.31.0 + - rfc3339-validator==0.1.4 + - rfc3986-validator==0.1.1 + - scikit-learn==1.0.2 + - scipy==1.7.3 + - send2trash==1.8.3 + - shapely==1.8.5 + - sharedarray==3.2.4 + - sniffio==1.3.1 + - soupsieve==2.4.1 + - spconv-cu113==2.3.6 + - spconv-cu114==2.3.6 + - tensorboardx==2.6.2.2 + - termcolor==2.3.0 + - terminado==0.17.1 + - terminaltables==3.1.10 + - threadpoolctl==3.1.0 + - tinycss2==1.2.1 + - tomli==2.0.1 + - tornado==6.2 + - tqdm==4.67.3 + - traitlets==5.9.0 + - typing-extensions==4.7.1 + - uri-template==1.3.0 + - urllib3==2.0.7 + - wcwidth==0.2.14 + - webcolors==1.13 + - webencodings==0.5.1 + - websocket-client==1.6.1 + - widgetsnbextension==4.0.15 + - y-py==0.6.2 + - ypy-websocket==0.8.4 + - zipp==3.15.0 +prefix: /home/acer/miniconda3/envs/centerpoint diff --git a/det3d/models/bbox_heads/center_head.py b/det3d/models/bbox_heads/center_head.py index 87e85416..691498f0 100644 --- a/det3d/models/bbox_heads/center_head.py +++ b/det3d/models/bbox_heads/center_head.py @@ -9,6 +9,7 @@ from collections import defaultdict from det3d.core import box_torch_ops import torch +import torch.nn.functional as F from det3d.torchie.cnn import kaiming_init from torch import double, nn from det3d.models.losses.centernet_loss import FastFocalLoss, RegLoss @@ -248,6 +249,47 @@ def _sigmoid(self, x): return y def loss(self, example, preds_dicts, test_cfg, **kwargs): + teacher_preds_dicts = kwargs.get("teacher_preds_dicts") + kd_cfg = kwargs.get("kd_cfg") + kd_enabled = bool(kd_cfg and kd_cfg.get("enabled", False)) + kd_type = (kd_cfg.get("type", "heatmap_mse") if kd_cfg else "heatmap_mse") + kd_weight = float(kd_cfg.get("lambda_kd", 0.0)) if kd_enabled else 0.0 + lambda_feat = float(kd_cfg.get("lambda_feat", 0.0)) if kd_enabled else 0.0 + student_feats = kwargs.get("student_feats") or {} + teacher_feats = kwargs.get("teacher_feats") or {} + + if kd_enabled and kd_type == "heatmap_mse": + if teacher_preds_dicts is None: + raise ValueError("KD is enabled but teacher_preds_dicts is None.") + if len(teacher_preds_dicts) != len(preds_dicts): + raise ValueError( + "Teacher/student task count mismatch: " + f"{len(teacher_preds_dicts)} vs {len(preds_dicts)}" + ) + + feat_kd_loss = None + if kd_enabled and kd_type == "feature_mse" and lambda_feat > 0.0: + if "head_shared" not in student_feats or "head_shared" not in teacher_feats: + raise ValueError( + "feature_mse KD requires student_feats['head_shared'] and " + "teacher_feats['head_shared']." + ) + s_feat = student_feats["head_shared"] + t_feat = teacher_feats["head_shared"].detach() + if s_feat.shape[2:] != t_feat.shape[2:]: + s_feat = F.interpolate( + s_feat, + size=t_feat.shape[2:], + mode="bilinear", + align_corners=False, + ) + if s_feat.shape != t_feat.shape: + raise ValueError( + "Teacher/student head_shared shape mismatch after align: " + f"{s_feat.shape} vs {t_feat.shape}" + ) + feat_kd_loss = F.mse_loss(s_feat, t_feat) + rets = [] for task_id, preds_dict in enumerate(preds_dicts): # heatmap focal loss @@ -276,8 +318,36 @@ def loss(self, example, preds_dicts, test_cfg, **kwargs): loc_loss = (box_loss*box_loss.new_tensor(self.code_weights)).sum() loss = hm_loss + self.weight*loc_loss + hm_kd_loss = loss.new_tensor(0.0) + if kd_enabled and kd_type == "heatmap_mse" and kd_weight > 0.0: + teacher_hm = teacher_preds_dicts[task_id]['hm'] + if teacher_hm.shape != preds_dict['hm'].shape: + raise ValueError( + "Teacher/student hm shape mismatch at task " + f"{task_id}: {teacher_hm.shape} vs {preds_dict['hm'].shape}" + ) + teacher_hm_prob = torch.clamp( + torch.sigmoid(teacher_hm.detach()), min=1e-4, max=1 - 1e-4 + ) + hm_kd_loss = F.mse_loss(preds_dict['hm'], teacher_hm_prob) + loss = loss + kd_weight * hm_kd_loss + + if ( + task_id == 0 + and kd_enabled + and kd_type == "feature_mse" + and lambda_feat > 0.0 + and feat_kd_loss is not None + ): + loss = loss + lambda_feat * feat_kd_loss + + feat_kd_loss_log = ( + feat_kd_loss.detach().cpu() + if feat_kd_loss is not None + else torch.tensor(0.0) + ) - ret.update({'loss': loss, 'hm_loss': hm_loss.detach().cpu(), 'loc_loss':loc_loss, 'loc_loss_elem': box_loss.detach().cpu(), 'num_positive': example['mask'][task_id].float().sum()}) + ret.update({'loss': loss, 'hm_loss': hm_loss.detach().cpu(), 'loc_loss':loc_loss, 'loc_loss_elem': box_loss.detach().cpu(), 'hm_kd_loss': hm_kd_loss.detach().cpu(), 'feat_kd_loss': feat_kd_loss_log, 'num_positive': example['mask'][task_id].float().sum()}) rets.append(ret) diff --git a/det3d/models/detectors/point_pillars.py b/det3d/models/detectors/point_pillars.py index 00045ab0..0e41c6bd 100644 --- a/det3d/models/detectors/point_pillars.py +++ b/det3d/models/detectors/point_pillars.py @@ -46,10 +46,25 @@ def forward(self, example, return_loss=True, **kwargs): ) x = self.extract_feat(data) - preds, _ = self.bbox_head(x) + preds, head_shared = self.bbox_head(x) if return_loss: - return self.bbox_head.loss(example, preds, self.test_cfg) + if kwargs.get("return_preds", False): + if kwargs.get("return_feats", False): + return { + "preds": preds, + "feats": {"head_shared": head_shared}, + } + return preds + return self.bbox_head.loss( + example, + preds, + self.test_cfg, + teacher_preds_dicts=kwargs.get("teacher_preds_dicts"), + kd_cfg=kwargs.get("kd_cfg"), + student_feats={"head_shared": head_shared}, + teacher_feats=kwargs.get("teacher_feats"), + ) else: return self.bbox_head.predict(example, preds, self.test_cfg) @@ -71,7 +86,7 @@ def forward_two_stage(self, example, return_loss=True, **kwargs): x = self.extract_feat(data) bev_feature = x - preds, _ = self.bbox_head(x) + preds, _head_shared = self.bbox_head(x) # manual deepcopy ... new_preds = [] diff --git a/det3d/models/detectors/voxelnet.py b/det3d/models/detectors/voxelnet.py index 5a8218fb..24e5bd24 100644 --- a/det3d/models/detectors/voxelnet.py +++ b/det3d/models/detectors/voxelnet.py @@ -54,10 +54,25 @@ def extract_feat(self, data): def forward(self, example, return_loss=True, **kwargs): x, _ = self.extract_feat(example) - preds, _ = self.bbox_head(x) + preds, head_shared = self.bbox_head(x) if return_loss: - return self.bbox_head.loss(example, preds, self.test_cfg) + if kwargs.get("return_preds", False): + if kwargs.get("return_feats", False): + return { + "preds": preds, + "feats": {"head_shared": head_shared}, + } + return preds + return self.bbox_head.loss( + example, + preds, + self.test_cfg, + teacher_preds_dicts=kwargs.get("teacher_preds_dicts"), + kd_cfg=kwargs.get("kd_cfg"), + student_feats={"head_shared": head_shared}, + teacher_feats=kwargs.get("teacher_feats"), + ) else: return self.bbox_head.predict(example, preds, self.test_cfg) diff --git a/det3d/models/necks/__init__.py b/det3d/models/necks/__init__.py index 1a1db7e4..7e94b63d 100644 --- a/det3d/models/necks/__init__.py +++ b/det3d/models/necks/__init__.py @@ -1,3 +1,4 @@ from .rpn import RPN +from .res_net import ResNetNeck -__all__ = ["RPN"] +__all__ = ["RPN", "ResNetNeck"] diff --git a/det3d/models/necks/res_net.py b/det3d/models/necks/res_net.py new file mode 100644 index 00000000..ca43b1a8 --- /dev/null +++ b/det3d/models/necks/res_net.py @@ -0,0 +1,115 @@ +import numpy as np +import torch +from torch import nn + +from det3d.models.utils import Sequential +from ..registry import NECKS +from ..utils import build_norm_layer + +class BasicBlock(nn.Module): + """Standard ResNet Basic Block adapted for det3d.""" + def __init__( + self, + inplanes, + planes, + stride=1, + norm_cfg=None): + super(BasicBlock, self).__init__() + if norm_cfg is None: + norm_cfg = dict(type="BN", eps=1e-3, momentum=0.01) + + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = build_norm_layer(norm_cfg, planes)[1] + self.relu = nn.ReLU(inplace=True) + + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = build_norm_layer(norm_cfg, planes)[1] + + self.downsample = None + if stride != 1 or inplanes != planes: + self.downsample = Sequential( + nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False), + build_norm_layer(norm_cfg, planes)[1], + ) + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + return out + +@NECKS.register_module +class ResNetNeck(nn.Module): + def __init__(self, layer_nums, ds_layer_strides, ds_num_filters, us_layer_strides, us_num_filters, num_input_features, norm_cfg=None, logger=None, **kwargs): + super(ResNetNeck, self).__init__() + self._layer_strides = ds_layer_strides + self._num_filters = ds_num_filters + self._layer_nums = layer_nums + self._upsample_strides = us_layer_strides + self._num_upsample_filters = us_num_filters + self._num_input_features = num_input_features + + if norm_cfg is None: + norm_cfg = dict(type="BN", eps=1e-3, momentum=0.01) + self._norm_cfg = norm_cfg + + self._upsample_start_idx = len(self._layer_nums) - len(self._upsample_strides) + + in_filters = [self._num_input_features, *self._num_filters[:-1]] + blocks = [] + deblocks = [] + + for i, layer_num in enumerate(self._layer_nums): + # Build ResNet stage + stage_blocks = [] + stage_blocks.append(BasicBlock(in_filters[i], self._num_filters[i], stride=self._layer_strides[i], norm_cfg=self._norm_cfg)) + for _ in range(1, layer_num): + stage_blocks.append(BasicBlock(self._num_filters[i], self._num_filters[i], stride=1, norm_cfg=self._norm_cfg)) + + blocks.append(Sequential(*stage_blocks)) + num_out_filters = self._num_filters[i] + + # Build Upsampling (FPN) stage - identical to your RPN + if i - self._upsample_start_idx >= 0: + stride = (self._upsample_strides[i - self._upsample_start_idx]) + if stride > 1: + deblock = Sequential( + nn.ConvTranspose2d(num_out_filters, self._num_upsample_filters[i - self._upsample_start_idx], stride, stride=stride, bias=False), + build_norm_layer(self._norm_cfg, self._num_upsample_filters[i - self._upsample_start_idx])[1], + nn.ReLU(), + ) + else: + stride = np.round(1 / stride).astype(np.int64) + deblock = Sequential( + nn.Conv2d(num_out_filters, self._num_upsample_filters[i - self._upsample_start_idx], stride, stride=stride, bias=False), + build_norm_layer(self._norm_cfg, self._num_upsample_filters[i - self._upsample_start_idx])[1], + nn.ReLU(), + ) + deblocks.append(deblock) + + self.blocks = nn.ModuleList(blocks) + self.deblocks = nn.ModuleList(deblocks) + + if logger: + logger.info("Finish ResNetNeck Initialization") + + def forward(self, x): + ups = [] + for i in range(len(self.blocks)): + x = self.blocks[i](x) + if i - self._upsample_start_idx >= 0: + ups.append(self.deblocks[i - self._upsample_start_idx](x)) + if len(ups) > 0: + x = torch.cat(ups, dim=1) + return x \ No newline at end of file diff --git a/det3d/models/readers/__init__.py b/det3d/models/readers/__init__.py index 94ed32c7..cf8e51b7 100644 --- a/det3d/models/readers/__init__.py +++ b/det3d/models/readers/__init__.py @@ -1,4 +1,4 @@ -from .pillar_encoder import PillarFeatureNet, PointPillarsScatter +from .pillar_encoder import PillarFeatureNet, PointPillarsScatter, FastPillarFeatureNet from .voxel_encoder import VoxelFeatureExtractorV3 from .dynamic_voxel_encoder import DynamicVoxelEncoder diff --git a/det3d/models/readers/pillar_encoder.py b/det3d/models/readers/pillar_encoder.py index 1a2553a3..b11d32c3 100644 --- a/det3d/models/readers/pillar_encoder.py +++ b/det3d/models/readers/pillar_encoder.py @@ -11,6 +11,56 @@ from ..registry import BACKBONES, READERS from ..utils import build_norm_layer +class MAPELayer(nn.Module): + """ + Max-and-Attention Pillar Encoding (MAPE) + Based on the FastPillars (2023) architecture. + """ + def __init__(self, in_channels, out_channels): + super().__init__() + # Standard Point-wise Feature Extraction + self.linear = nn.Linear(in_channels, out_channels, bias=False) + self.norm = nn.BatchNorm1d(out_channels) + + # Attention Generation Branch + self.attention_fc = nn.Linear(out_channels, out_channels, bias=False) + self.attention_norm = nn.BatchNorm1d(out_channels) + + def forward(self, inputs): + # inputs shape: [M, max_points_per_pillar, in_channels] + # M = total number of non-empty pillars + + # 1. Extract Point Features + x = self.linear(inputs) + + # Reshape for BatchNorm1d + M, num_points, C = x.shape + x = x.view(M * num_points, C).unsqueeze(-1) + x = self.norm(x) + x = x.view(M, num_points, C) + x = F.relu(x) + + # 2. Extract Global Geometric Context (Standard Max Pooling) + x_max = torch.max(x, dim=1, keepdim=True)[0] # Shape: [M, 1, C] + + # 3. Generate Attention Weights + # Use the global feature to determine which channels matter most + attn_weights = self.attention_fc(x_max) + + # Reshape for BatchNorm + attn_weights = attn_weights.view(M, C).unsqueeze(-1) + attn_weights = self.attention_norm(attn_weights) + attn_weights = attn_weights.view(M, 1, C) + + # Sigmoid to scale weights between 0 and 1 + attn_weights = torch.sigmoid(attn_weights) # Shape: [M, 1, C] + + # 4. Attentive Fusion + # Multiply the original features by the attention weights, then pool + x_attended = x * attn_weights + out = torch.max(x_attended, dim=1)[0] # Shape: [M, C] + + return out class PFNLayer(nn.Module): def __init__(self, in_channels, out_channels, norm_cfg=None, last_layer=False): @@ -216,3 +266,101 @@ def forward(self, voxel_features, coords, batch_size, input_shape): # Undo the column stacking to final 4-dim tensor batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx) return batch_canvas + +@READERS.register_module +class FastPillarFeatureNet(nn.Module): + def __init__( + self, + num_input_features=4, + num_filters=(64,), + with_distance=False, + voxel_size=(0.2, 0.2, 4), + pc_range=(0, -40, -3, 70.4, 40, 1), + norm_cfg=None, + virtual=False + ): + """ + FastPillars version of the Pillar Feature Net using MAPE. + """ + super().__init__() + self.name = "FastPillarFeatureNet" + assert len(num_filters) > 0 + + self.num_input = num_input_features + num_input_features += 5 + if with_distance: + num_input_features += 1 + self._with_distance = with_distance + + # Create FastPillars layers + num_filters = [num_input_features] + list(num_filters) + pfn_layers = [] + for i in range(len(num_filters) - 1): + in_filters = num_filters[i] + out_filters = num_filters[i + 1] + + # Intermediate layers use standard PFN + if i < len(num_filters) - 2: + pfn_layers.append( + PFNLayer(in_filters, out_filters, norm_cfg=norm_cfg, last_layer=False) + ) + # Final layer uses MAPE Attention + else: + pfn_layers.append( + MAPELayer(in_filters, out_filters) + ) + + self.pfn_layers = nn.ModuleList(pfn_layers) + + self.virtual = virtual + + # Need pillar (voxel) size and x/y offset in order to calculate pillar offset + self.vx = voxel_size[0] + self.vy = voxel_size[1] + self.x_offset = self.vx / 2 + pc_range[0] + self.y_offset = self.vy / 2 + pc_range[1] + + def forward(self, features, num_voxels, coors): + device = features.device + + if self.virtual: + virtual_point_mask = features[..., -2] == -1 + virtual_points = features[virtual_point_mask] + virtual_points[..., -2] = 1 + features[..., -2] = 0 + features[virtual_point_mask] = virtual_points + + dtype = features.dtype + # Find distance of x, y, and z from cluster center + points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as( + features + ).view(-1, 1, 1) + f_cluster = features[:, :, :3] - points_mean + + # Find distance of x, y, and z from pillar center + f_center = torch.zeros_like(features[:, :, :2]) + f_center[:, :, 0] = features[:, :, 0] - ( + coors[:, 3].to(dtype).unsqueeze(1) * self.vx + self.x_offset + ) + f_center[:, :, 1] = features[:, :, 1] - ( + coors[:, 2].to(dtype).unsqueeze(1) * self.vy + self.y_offset + ) + + # Combine together feature decorations + features_ls = [features, f_cluster, f_center] + if self._with_distance: + points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) + features_ls.append(points_dist) + features = torch.cat(features_ls, dim=-1) + + # The feature decorations were calculated without regard to whether pillar was empty + voxel_count = features.shape[1] + mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) + mask = torch.unsqueeze(mask, -1).type_as(features) + features *= mask + + # Forward pass through layers + for pfn in self.pfn_layers: + features = pfn(features) + + return features.squeeze() \ No newline at end of file diff --git a/det3d/ops/dcn/build/lib.linux-x86_64-cpython-37/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so b/det3d/ops/dcn/build/lib.linux-x86_64-cpython-37/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 00000000..f8aa1957 Binary files /dev/null and b/det3d/ops/dcn/build/lib.linux-x86_64-cpython-37/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so differ diff --git a/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/.ninja_deps b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/.ninja_deps new file mode 100644 index 00000000..0d39d7f0 Binary files /dev/null and b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/.ninja_deps differ diff --git a/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/.ninja_log b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/.ninja_log new file mode 100644 index 00000000..64a5ab3f --- /dev/null +++ b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/.ninja_log @@ -0,0 +1,3 @@ +# ninja log v5 +1 9883 1776796852868130717 /home/cv29f26/Computer-Vision/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda_kernel.o a316ef61b475275 +1 10800 1776796853740129920 /home/cv29f26/Computer-Vision/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda.o 5666843f3bf74a42 diff --git a/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/build.ninja b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/build.ninja new file mode 100644 index 00000000..b131b4ed --- /dev/null +++ b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/build.ninja @@ -0,0 +1,29 @@ +ninja_required_version = 1.3 +cxx = g++-10 +nvcc = /home/cv29f26/.conda/envs/centerpoint/bin/nvcc + +cflags = -Wno-unused-result -Wsign-compare -DNDEBUG -fwrapv -O2 -Wall -Wstrict-prototypes -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -DAT_CHECK=TORCH_CHECK -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/cv29f26/.conda/envs/centerpoint/include -fPIC -DWITH_CUDA -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/TH -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/THC -I/home/cv29f26/.conda/envs/centerpoint/include -I/home/cv29f26/.conda/envs/centerpoint/include/python3.7m -c +post_cflags = -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=deform_conv_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 +cuda_cflags = -DWITH_CUDA -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/TH -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/THC -I/home/cv29f26/.conda/envs/centerpoint/include -I/home/cv29f26/.conda/envs/centerpoint/include/python3.7m -c +cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=deform_conv_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 -ccbin gcc-10 -std=c++14 +ldflags = + +rule compile + command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags + depfile = $out.d + deps = gcc + +rule cuda_compile + depfile = $out.d + deps = gcc + command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags + + + +build /home/cv29f26/Computer-Vision/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda.o: compile /home/cv29f26/Computer-Vision/det3d/ops/dcn/src/deform_conv_cuda.cpp +build /home/cv29f26/Computer-Vision/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda_kernel.o: cuda_compile /home/cv29f26/Computer-Vision/det3d/ops/dcn/src/deform_conv_cuda_kernel.cu + + + + + diff --git a/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda.o b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda.o new file mode 100644 index 00000000..5343a6bc Binary files /dev/null and b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda.o differ diff --git a/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda_kernel.o b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda_kernel.o new file mode 100644 index 00000000..09db43f6 Binary files /dev/null and b/det3d/ops/dcn/build/temp.linux-x86_64-cpython-37/src/deform_conv_cuda_kernel.o differ diff --git a/det3d/ops/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so b/det3d/ops/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 00000000..f8aa1957 Binary files /dev/null and b/det3d/ops/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so differ diff --git a/det3d/ops/iou3d_nms/__init__.py b/det3d/ops/iou3d_nms/__init__.py index c267f071..4e9c5a0e 100644 --- a/det3d/ops/iou3d_nms/__init__.py +++ b/det3d/ops/iou3d_nms/__init__.py @@ -1 +1,6 @@ -from det3d.ops.iou3d_nms import iou3d_nms_cuda, iou3d_nms_utils +from . import iou3d_nms_utils + +try: + from . import iou3d_nms_cuda +except Exception: + iou3d_nms_cuda = None diff --git a/det3d/ops/iou3d_nms/build/lib.linux-x86_64-cpython-37/iou3d_nms_cuda.cpython-37m-x86_64-linux-gnu.so b/det3d/ops/iou3d_nms/build/lib.linux-x86_64-cpython-37/iou3d_nms_cuda.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 00000000..a9d1e5a2 Binary files /dev/null and b/det3d/ops/iou3d_nms/build/lib.linux-x86_64-cpython-37/iou3d_nms_cuda.cpython-37m-x86_64-linux-gnu.so differ diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/.ninja_deps b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/.ninja_deps new file mode 100644 index 00000000..b73a148d Binary files /dev/null and b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/.ninja_deps differ diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/.ninja_log b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/.ninja_log new file mode 100644 index 00000000..dbff32ff --- /dev/null +++ b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/.ninja_log @@ -0,0 +1,5 @@ +# ninja log v5 +3 1666 1776796662828301919 /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_kernel.o 6278d9d101f3f9d +2 8010 1776796669172296297 /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_cpu.o a1924083def66e03 +3 8198 1776796669356296133 /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms.o f5d5ce7a772ac0a2 +3 13073 1776796674228291810 /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_api.o 629ee59c12c50298 diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/build.ninja b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/build.ninja new file mode 100644 index 00000000..942230c1 --- /dev/null +++ b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/build.ninja @@ -0,0 +1,31 @@ +ninja_required_version = 1.3 +cxx = g++-10 +nvcc = /home/cv29f26/.conda/envs/centerpoint/bin/nvcc + +cflags = -Wno-unused-result -Wsign-compare -DNDEBUG -fwrapv -O2 -Wall -Wstrict-prototypes -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/cv29f26/.conda/envs/centerpoint/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/cv29f26/.conda/envs/centerpoint/include -fPIC -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/TH -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/THC -I/home/cv29f26/.conda/envs/centerpoint/include -I/home/cv29f26/.conda/envs/centerpoint/include/python3.7m -c +post_cflags = -g '-I /usr/local/cuda/include' -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=iou3d_nms_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++14 +cuda_cflags = -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/TH -I/home/cv29f26/.conda/envs/centerpoint/lib/python3.7/site-packages/torch/include/THC -I/home/cv29f26/.conda/envs/centerpoint/include -I/home/cv29f26/.conda/envs/centerpoint/include/python3.7m -c +cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O2 -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=iou3d_nms_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 -ccbin gcc-10 -std=c++14 +ldflags = + +rule compile + command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags + depfile = $out.d + deps = gcc + +rule cuda_compile + depfile = $out.d + deps = gcc + command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags + + + +build /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_cpu.o: compile /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/src/iou3d_cpu.cpp +build /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms.o: compile /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/src/iou3d_nms.cpp +build /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_api.o: compile /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/src/iou3d_nms_api.cpp +build /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_kernel.o: cuda_compile /home/cv29f26/Computer-Vision/det3d/ops/iou3d_nms/src/iou3d_nms_kernel.cu + + + + + diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_cpu.o b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_cpu.o new file mode 100644 index 00000000..3ae30abb Binary files /dev/null and b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_cpu.o differ diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms.o b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms.o new file mode 100644 index 00000000..9d9794ae Binary files /dev/null and b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms.o differ diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_api.o b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_api.o new file mode 100644 index 00000000..786bc608 Binary files /dev/null and b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_api.o differ diff --git a/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_kernel.o b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_kernel.o new file mode 100644 index 00000000..bb5e0c52 Binary files /dev/null and b/det3d/ops/iou3d_nms/build/temp.linux-x86_64-cpython-37/src/iou3d_nms_kernel.o differ diff --git a/det3d/ops/iou3d_nms/iou3d_nms_cuda.cpython-37m-x86_64-linux-gnu.so b/det3d/ops/iou3d_nms/iou3d_nms_cuda.cpython-37m-x86_64-linux-gnu.so new file mode 100755 index 00000000..a9d1e5a2 Binary files /dev/null and b/det3d/ops/iou3d_nms/iou3d_nms_cuda.cpython-37m-x86_64-linux-gnu.so differ diff --git a/det3d/ops/iou3d_nms/iou3d_nms_utils.py b/det3d/ops/iou3d_nms/iou3d_nms_utils.py index 4d71e33a..fa5742b1 100644 --- a/det3d/ops/iou3d_nms/iou3d_nms_utils.py +++ b/det3d/ops/iou3d_nms/iou3d_nms_utils.py @@ -5,7 +5,10 @@ """ import torch -from . import iou3d_nms_cuda +try: + from . import iou3d_nms_cuda +except Exception: + iou3d_nms_cuda = None import numpy as np diff --git a/det3d/solver/fastai_optim.py b/det3d/solver/fastai_optim.py index a5434478..7eb72d49 100644 --- a/det3d/solver/fastai_optim.py +++ b/det3d/solver/fastai_optim.py @@ -1,4 +1,9 @@ -from collections import Iterable, defaultdict +from collections import defaultdict + +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable from copy import deepcopy from itertools import chain diff --git a/det3d/solver/optim.py b/det3d/solver/optim.py index 224ada0b..914c14d4 100644 --- a/det3d/solver/optim.py +++ b/det3d/solver/optim.py @@ -1,4 +1,9 @@ -from collections import Iterable, defaultdict +from collections import defaultdict + +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable from copy import deepcopy from itertools import chain diff --git a/det3d/torchie/apis/__init__.py b/det3d/torchie/apis/__init__.py index 952d978c..fca90aa1 100644 --- a/det3d/torchie/apis/__init__.py +++ b/det3d/torchie/apis/__init__.py @@ -1,4 +1,4 @@ -from .env import get_root_logger, init_dist, set_random_seed +from .env import get_root_logger, get_train_device, init_dist, set_random_seed from .train import batch_processor, batch_processor_ensemble, build_optimizer, train_detector # from .inference import init_detector, inference_detector, show_result @@ -6,6 +6,7 @@ __all__ = [ "init_dist", "get_root_logger", + "get_train_device", "set_random_seed", "train_detector", "build_optimizer", diff --git a/det3d/torchie/apis/env.py b/det3d/torchie/apis/env.py index 75dc44e5..4d4da533 100644 --- a/det3d/torchie/apis/env.py +++ b/det3d/torchie/apis/env.py @@ -10,6 +10,18 @@ from det3d.torchie.trainer import get_dist_info +def get_train_device(local_rank=0): + """Pick training device: CUDA (per rank) if available, else CPU.""" + forced = os.environ.get("CENTERPOINT_DEVICE", "").strip().lower() + if forced in ("cpu", "cuda"): + if forced == "cuda" and torch.cuda.is_available(): + return torch.device("cuda", int(local_rank)) + return torch.device("cpu") + if torch.cuda.is_available(): + return torch.device("cuda", int(local_rank)) + return torch.device("cpu") + + def init_dist(launcher, backend="nccl", **kwargs): if mp.get_start_method(allow_none=True) is None: mp.set_start_method("spawn") @@ -52,7 +64,8 @@ def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed_all(seed) def get_root_logger(log_level=logging.INFO): diff --git a/det3d/torchie/apis/train.py b/det3d/torchie/apis/train.py index 9c3e5fb7..bc8886d7 100644 --- a/det3d/torchie/apis/train.py +++ b/det3d/torchie/apis/train.py @@ -12,17 +12,24 @@ import numpy as np import torch from det3d.builder import _create_learning_rate_scheduler +from det3d.models import build_detector # from det3d.datasets.kitti.eval_hooks import KittiDistEvalmAPHook, KittiEvalmAPHookV2 from det3d.core import DistOptimizerHook from det3d.datasets import DATASETS, build_dataloader from det3d.solver.fastai_optim import OptimWrapper -from det3d.torchie.trainer import DistSamplerSeedHook, Trainer, obj_from_dict +from det3d.torchie import Config +from det3d.torchie.trainer import ( + DistSamplerSeedHook, + Trainer, + load_checkpoint, + obj_from_dict, +) from det3d.utils.print_utils import metric_to_str from torch import nn from torch.nn.parallel import DistributedDataParallel -from .env import get_root_logger +from .env import get_root_logger, get_train_device def example_to_device(example, device=None, non_blocking=False) -> dict: @@ -91,10 +98,13 @@ def parse_second_losses(losses): def batch_processor(model, data, train_mode, **kwargs): - if "local_rank" in kwargs: - device = torch.device(kwargs["local_rank"]) - else: - device = None + device = kwargs.get("train_device") + if device is None: + if torch.cuda.is_available(): + lr = int(kwargs.get("local_rank", 0)) + device = torch.device("cuda", lr) + else: + device = torch.device("cpu") # data = example_convert_to_torch(data, device=device) example = example_to_device(data, device, non_blocking=False) @@ -159,7 +169,7 @@ def build_one_cycle_optimizer(model, optimizer_config): torch.optim.Adam, betas=(0.9, 0.99), amsgrad=optimizer_config.amsgrad ) else: - optimizer_func = partial(torch.optim.Adam, amsgrad=optimizer_cfg.amsgrad) + optimizer_func = partial(torch.optim.Adam, amsgrad=optimizer_config.amsgrad) optimizer = OptimWrapper.create( optimizer_func, @@ -252,6 +262,9 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, logge if logger is None: logger = get_root_logger(cfg.log_level) + train_device = get_train_device(cfg.local_rank if distributed else 0) + logger.info("Training device: %s", train_device) + # start training # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] @@ -264,8 +277,11 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, logge total_steps = cfg.total_epochs * len(data_loaders[0]) # print(f"total_steps: {total_steps}") - if distributed: - model = apex.parallel.convert_syncbn_model(model) + if distributed and torch.cuda.is_available(): + try: + model = apex.parallel.convert_syncbn_model(model) + except Exception: + logger.warning("apex convert_syncbn_model skipped (apex unavailable or incompatible).") if cfg.lr_config.type == "one_cycle": # build trainer optimizer = build_one_cycle_optimizer(model, cfg.optimizer) @@ -279,22 +295,61 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, logge # lr_scheduler = None cfg.lr_config = None - # put model on gpus + # put model on device (CUDA / CPU) if distributed: + if not torch.cuda.is_available(): + raise RuntimeError( + "Distributed training requires CUDA in this codebase. " + "Run without torch.distributed (single process) for CPU." + ) model = DistributedDataParallel( - model.cuda(cfg.local_rank), + model.to(train_device), device_ids=[cfg.local_rank], output_device=cfg.local_rank, # broadcast_buffers=False, find_unused_parameters=True, ) else: - model = model.cuda() + model = model.to(train_device) logger.info(f"model structure: {model}") + kd_cfg = cfg.get("kd", None) + kd_enabled = bool(kd_cfg and kd_cfg.get("enabled", False)) + teacher_model = None + if kd_enabled: + teacher_cfg_path = kd_cfg.get("teacher_config", None) + teacher_ckpt_path = kd_cfg.get("teacher_checkpoint", None) + if not teacher_ckpt_path: + raise ValueError("KD is enabled but kd.teacher_checkpoint is not set.") + + if teacher_cfg_path: + teacher_cfg = Config.fromfile(teacher_cfg_path) + teacher_model_cfg = teacher_cfg.model + teacher_train_cfg = teacher_cfg.train_cfg + teacher_test_cfg = teacher_cfg.test_cfg + else: + teacher_model_cfg = cfg.model + teacher_train_cfg = cfg.train_cfg + teacher_test_cfg = cfg.test_cfg + + teacher_model = build_detector( + teacher_model_cfg, train_cfg=teacher_train_cfg, test_cfg=teacher_test_cfg + ) + teacher_model = teacher_model.to(train_device) + load_checkpoint(teacher_model, teacher_ckpt_path, map_location=str(train_device)) + teacher_model.eval() + teacher_model.requires_grad_(False) + logger.info("KD enabled with teacher checkpoint: %s", teacher_ckpt_path) + trainer = Trainer( - model, batch_processor, optimizer, lr_scheduler, cfg.work_dir, cfg.log_level + model, + batch_processor, + optimizer, + lr_scheduler, + cfg.work_dir, + cfg.log_level, + train_device=train_device, ) if distributed: @@ -323,4 +378,12 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, logge elif cfg.load_from: trainer.load_checkpoint(cfg.load_from) - trainer.run(data_loaders, cfg.workflow, cfg.total_epochs, local_rank=cfg.local_rank) + trainer.run( + data_loaders, + cfg.workflow, + cfg.total_epochs, + local_rank=cfg.local_rank, + train_device=train_device, + teacher_model=teacher_model, + kd_cfg=kd_cfg, + ) diff --git a/det3d/torchie/trainer/checkpoint.py b/det3d/torchie/trainer/checkpoint.py index 61c1f670..946543c3 100644 --- a/det3d/torchie/trainer/checkpoint.py +++ b/det3d/torchie/trainer/checkpoint.py @@ -43,8 +43,12 @@ try: import spconv.pytorch as spconv -except: - import spconv as spconv +except Exception: + try: + import spconv as spconv + except Exception: + spconv = None + def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]: """ @@ -52,6 +56,8 @@ def find_all_spconv_keys(model: nn.Module, prefix="") -> Set[str]: from https://github.com/acivgin1/OpenPCDet/blob/8fc1a5d57bcb418d71d5118fb3df4b58d4ea0244/pcdet/utils/spconv_utils.py """ found_keys: Set[str] = set() + if spconv is None: + return found_keys for name, child in model.named_children(): new_prefix = f"{prefix}.{name}" if prefix != "" else name diff --git a/det3d/torchie/trainer/hooks/logger/text.py b/det3d/torchie/trainer/hooks/logger/text.py index 32f38570..686bd00c 100644 --- a/det3d/torchie/trainer/hooks/logger/text.py +++ b/det3d/torchie/trainer/hooks/logger/text.py @@ -30,11 +30,12 @@ def _get_max_memory(self, trainer): dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX) return mem_mb.item() - def _convert_to_precision4(self, val): + def _format_metric_value(self, metric_name, val): + precision = 6 if metric_name in ("hm_kd_loss", "feat_kd_loss") else 4 if isinstance(val, float): - val = "{:.4f}".format(val) + val = f"{val:.{precision}f}" elif isinstance(val, list): - val = [self._convert_to_precision4(v) for v in val] + val = [self._format_metric_value(metric_name, v) for v in val] return val @@ -60,7 +61,8 @@ def _log_info(self, log_dict, trainer): log_dict["forward_time"] - log_dict["transfer_time"], log_dict["loss_parse_time"] - log_dict["forward_time"], ) - log_str += "memory: {}, ".format(log_dict["memory"]) + if "memory" in log_dict: + log_str += "memory: {}, ".format(log_dict["memory"]) else: log_str = "Epoch({}) [{}][{}]\t".format( log_dict["mode"], log_dict["epoch"] - 1, log_dict["iter"] @@ -94,11 +96,11 @@ def _log_info(self, log_dict, trainer): continue if isinstance(val, float): - val = "{:.4f}".format(val) + val = self._format_metric_value(name, val) if isinstance(val, list): log_items.append( - "{}: {}".format(name, self._convert_to_precision4(val[idx])) + "{}: {}".format(name, self._format_metric_value(name, val[idx])) ) else: log_items.append("{}: {}".format(name, val)) diff --git a/det3d/torchie/trainer/hooks/memory.py b/det3d/torchie/trainer/hooks/memory.py index 990f8cec..763a8ab3 100644 --- a/det3d/torchie/trainer/hooks/memory.py +++ b/det3d/torchie/trainer/hooks/memory.py @@ -10,13 +10,13 @@ def __init__(self, before_epoch=False, after_epoch=True, after_iter=False): self._after_iter = after_iter def after_iter(self, trainer): - if self._after_iter: + if self._after_iter and torch.cuda.is_available(): torch.cuda.empty_cache() def before_epoch(self, trainer): - if self._before_epoch: + if self._before_epoch and torch.cuda.is_available(): torch.cuda.empty_cache() def after_epoch(self, trainer): - if self._after_epoch: + if self._after_epoch and torch.cuda.is_available(): torch.cuda.empty_cache() diff --git a/det3d/torchie/trainer/trainer.py b/det3d/torchie/trainer/trainer.py index 18bfc967..af56bae8 100644 --- a/det3d/torchie/trainer/trainer.py +++ b/det3d/torchie/trainer/trainer.py @@ -147,6 +147,7 @@ def __init__( work_dir=None, log_level=logging.INFO, logger=None, + train_device=None, **kwargs, ): assert callable(batch_processor) @@ -155,6 +156,11 @@ def __init__( self.lr_scheduler = lr_scheduler self.batch_processor = batch_processor + self.train_device = ( + train_device + if train_device is not None + else (torch.device("cuda", 0) if torch.cuda.is_available() else torch.device("cpu")) + ) # Create work_dir if torchie.is_str(work_dir): @@ -351,20 +357,43 @@ def save_checkpoint( def batch_processor_inline(self, model, data, train_mode, **kwargs): - if "local_rank" in kwargs: - device = torch.device(kwargs["local_rank"]) - else: - device = None + device = kwargs.get("train_device", self.train_device) # data = example_convert_to_torch(data, device=device) - example = example_to_device( - data, torch.cuda.current_device(), non_blocking=False - ) + example = example_to_device(data, device, non_blocking=False) self.call_hook("after_data_to_device") if train_mode: - losses = model(example, return_loss=True) + teacher_model = kwargs.get("teacher_model") + kd_cfg = kwargs.get("kd_cfg") + kd_enabled = bool(kd_cfg and kd_cfg.get("enabled", False)) + teacher_preds_dicts = None + teacher_feats = None + kd_type = kd_cfg.get("type", "heatmap_mse") if kd_cfg else "heatmap_mse" + + if teacher_model is not None and kd_enabled: + with torch.no_grad(): + if kd_type == "feature_mse": + teacher_out = teacher_model( + example, + return_loss=True, + return_preds=True, + return_feats=True, + ) + teacher_feats = teacher_out["feats"] + else: + teacher_preds_dicts = teacher_model( + example, return_loss=True, return_preds=True + ) + + losses = model( + example, + return_loss=True, + teacher_preds_dicts=teacher_preds_dicts, + teacher_feats=teacher_feats, + kd_cfg=kd_cfg, + ) self.call_hook("after_forward") loss, log_vars = parse_second_losses(losses) del losses @@ -480,9 +509,15 @@ def val(self, data_loader, **kwargs): def resume(self, checkpoint, resume_optimizer=True, map_location="default"): if map_location == "default": - checkpoint = self.load_checkpoint( - checkpoint , map_location='cuda:{}'.format(torch.cuda.current_device()) # TODO: FIX THIS!! - ) + if torch.cuda.is_available(): + loc = "cuda:{}".format(torch.cuda.current_device()) + else: + loc = ( + self.train_device + if isinstance(self.train_device, str) + else str(self.train_device) + ) + checkpoint = self.load_checkpoint(checkpoint, map_location=loc) else: checkpoint = self.load_checkpoint(checkpoint, map_location=map_location) diff --git a/tools/nms_better.py b/tools/nms_better.py index bc2b3bc7..2191b9b2 100644 --- a/tools/nms_better.py +++ b/tools/nms_better.py @@ -23,7 +23,7 @@ def parse_args(): parser = argparse.ArgumentParser(description="Ensemble Models") parser.add_argument("ensemble_dir", help="path to a dir that contains all prediction file") parser.add_argument("--output_path", help="the path to save ensemble output") - parser.add_argument("--data_root", type=str, default="data/nuScenes/v1.0-trainval") + parser.add_argument("--data_root", type=str, default=os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes") + "/v1.0-trainval") args = parser.parse_args() diff --git a/tools/nusc_tracking/pub_test.py b/tools/nusc_tracking/pub_test.py index 4234ae66..57d903d6 100644 --- a/tools/nusc_tracking/pub_test.py +++ b/tools/nusc_tracking/pub_test.py @@ -26,7 +26,7 @@ def parse_args(): "--checkpoint", help="the dir to checkpoint which the model read from" ) parser.add_argument("--hungarian", action='store_true') - parser.add_argument("--root", type=str, default="data/nuScenes") + parser.add_argument("--root", type=str, default=os.environ.get("NUSCENES_DATA_ROOT", "data/nuScenes")) parser.add_argument("--version", type=str, default='v1.0-trainval') parser.add_argument("--max_age", type=int, default=3) diff --git a/tools/train.py b/tools/train.py index a3a4354e..32ca6b05 100644 --- a/tools/train.py +++ b/tools/train.py @@ -71,6 +71,9 @@ def main(): cfg = Config.fromfile(args.config) + if not hasattr(cfg, "gpus") or cfg.gpus is None: + cfg.gpus = args.gpus + # update configs according to CLI args if args.work_dir is not None: cfg.work_dir = args.work_dir @@ -83,8 +86,11 @@ def main(): if distributed: if args.launcher == "pytorch": - torch.cuda.set_device(args.local_rank) - torch.distributed.init_process_group(backend="nccl", init_method="env://") + if torch.cuda.is_available(): + torch.cuda.set_device(args.local_rank) + torch.distributed.init_process_group(backend="nccl", init_method="env://") + else: + torch.distributed.init_process_group(backend="gloo", init_method="env://") cfg.local_rank = args.local_rank elif args.launcher == "slurm": proc_id = int(os.environ["SLURM_PROCID"])