From 6c248335f8c2caee7951f9ce1f28fde26a361919 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Jan 2023 23:00:36 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- app/client.py | 2 +- mmyolo/.dev_scripts/gather_models.py | 249 ++-- mmyolo/_git/packed-refs | 2 +- mmyolo/configs/_base_/default_runtime.py | 32 +- mmyolo/configs/deploy/base_dynamic.py | 21 +- mmyolo/configs/deploy/base_static.py | 23 +- .../deploy/detection_onnxruntime_dynamic.py | 16 +- .../deploy/detection_onnxruntime_static.py | 16 +- .../detection_rknn-fp16_static-320x320.py | 14 +- .../detection_rknn-int8_static-320x320.py | 14 +- ...n_tensorrt-fp16_dynamic-192x192-960x960.py | 12 +- ...n_tensorrt-fp16_dynamic-64x64-1344x1344.py | 12 +- .../detection_tensorrt-fp16_static-640x640.py | 12 +- ...n_tensorrt-int8_dynamic-192x192-960x960.py | 17 +- .../detection_tensorrt-int8_static-640x640.py | 17 +- ...ection_tensorrt_dynamic-192x192-960x960.py | 12 +- .../detection_tensorrt_static-640x640.py | 12 +- .../configs/deploy/model/yolov5_s-static.py | 24 +- .../configs/deploy/model/yolov6_s-static.py | 24 +- .../ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py | 10 +- .../ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py | 10 +- .../ppyoloe_plus_l_fast_8xb8-80e_coco.py | 7 +- .../ppyoloe_plus_m_fast_8xb8-80e_coco.py | 7 +- .../ppyoloe_plus_s_fast_8xb8-80e_coco.py | 220 +-- .../ppyoloe_plus_x_fast_8xb8-80e_coco.py | 7 +- .../ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py | 18 +- .../ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py | 2 +- .../ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py | 10 +- .../cspnext-s_8xb256-rsb-a1-600e_in1k.py | 61 +- .../cspnext-tiny_8xb256-rsb-a1-600e_in1k.py | 6 +- .../rtmdet_l_syncbn_fast_8xb32-300e_coco.py | 222 ++-- .../rtmdet_m_syncbn_fast_8xb32-300e_coco.py | 5 +- .../rtmdet_s_syncbn_fast_8xb32-300e_coco.py | 76 +- ...rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py | 41 +- .../rtmdet_x_syncbn_fast_8xb32-300e_coco.py | 5 +- ...lov5_s-v61_8xb16-300e_ignore_crowdhuman.py | 73 +- ...yolov5_s-v61_fast_8xb16-300e_crowdhuman.py | 35 +- .../voc/yolov5_l-v61_fast_1xb32-50e_voc.py | 13 +- .../voc/yolov5_m-v61_fast_1xb64-50e_voc.py | 7 +- .../voc/yolov5_n-v61_fast_1xb64-50e_voc.py | 7 +- .../voc/yolov5_s-v61_fast_1xb64-50e_voc.py | 187 +-- .../voc/yolov5_x-v61_fast_1xb32-50e_voc.py | 11 +- ...v5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py | 5 +- ...olov5_l-v61_syncbn_fast_8xb16-300e_coco.py | 5 +- ...v5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py | 63 +- ...olov5_m-v61_syncbn_fast_8xb16-300e_coco.py | 63 +- ...v5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py | 5 +- ...olov5_n-v61_syncbn_fast_8xb16-300e_coco.py | 5 +- ...v5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py | 94 +- ...ov5_s-v61_syncbn-detect_8xb16-300e_coco.py | 33 +- .../yolov5_s-v61_syncbn_8xb16-300e_coco.py | 233 ++-- ...ov5_s-v61_syncbn_fast_1xb4-300e_balloon.py | 24 +- ...olov5_s-v61_syncbn_fast_8xb16-300e_coco.py | 14 +- ...v5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py | 5 +- ...olov5_x-v61_syncbn_fast_8xb16-300e_coco.py | 5 +- .../yolov6_l_syncbn_fast_8xb32-300e_coco.py | 23 +- .../yolov6_m_syncbn_fast_8xb32-300e_coco.py | 59 +- .../yolov6_n_syncbn_fast_8xb32-300e_coco.py | 7 +- .../yolov6_n_syncbn_fast_8xb32-400e_coco.py | 7 +- .../yolov6_s_syncbn_fast_8xb32-300e_coco.py | 26 +- .../yolov6_s_syncbn_fast_8xb32-400e_coco.py | 230 ++-- .../yolov6_t_syncbn_fast_8xb32-300e_coco.py | 8 +- .../yolov6_t_syncbn_fast_8xb32-400e_coco.py | 8 +- ...yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py | 16 +- ...yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py | 18 +- ...lov7_e2e-p6_syncbn_fast_8x16b-300e_coco.py | 18 +- .../yolov7_l_syncbn_fast_8x16b-300e_coco.py | 242 ++-- ...yolov7_tiny_syncbn_fast_8x16b-300e_coco.py | 72 +- ...yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py | 104 +- .../yolov7_x_syncbn_fast_8x16b-300e_coco.py | 15 +- .../yolov8_l_syncbn_fast_8xb16-500e_coco.py | 24 +- .../yolov8_m_syncbn_fast_8xb16-500e_coco.py | 60 +- .../yolov8_n_syncbn_fast_8xb16-500e_coco.py | 5 +- .../yolov8_s_syncbn_fast_8xb16-500e_coco.py | 254 ++-- .../yolov8_x_syncbn_fast_8xb16-500e_coco.py | 5 +- .../configs/yolox/yolox_l_8xb8-300e_coco.py | 5 +- .../configs/yolox/yolox_m_8xb8-300e_coco.py | 5 +- .../yolox/yolox_nano_8xb8-300e_coco.py | 13 +- .../configs/yolox/yolox_s_8xb8-300e_coco.py | 241 ++-- .../yolox/yolox_tiny_8xb8-300e_coco.py | 72 +- .../configs/yolox/yolox_x_8xb8-300e_coco.py | 5 +- mmyolo/demo.py | 15 +- mmyolo/demo/boxam_vis_demo.py | 183 +-- mmyolo/demo/deploy_demo.py | 41 +- mmyolo/demo/featmap_vis_demo.py | 110 +- mmyolo/demo/image_demo.py | 75 +- mmyolo/demo/large_image_demo.py | 145 +- mmyolo/demo/video_demo.py | 53 +- mmyolo/docs/en/conf.py | 65 +- mmyolo/docs/en/stat.py | 25 +- mmyolo/docs/zh_cn/conf.py | 67 +- mmyolo/docs/zh_cn/stat.py | 25 +- mmyolo/mmyolo/__init__.py | 54 +- mmyolo/mmyolo/datasets/__init__.py | 7 +- mmyolo/mmyolo/datasets/transforms/__init__.py | 27 +- .../datasets/transforms/mix_img_transforms.py | 675 ++++++---- .../mmyolo/datasets/transforms/transforms.py | 618 +++++---- mmyolo/mmyolo/datasets/utils.py | 51 +- mmyolo/mmyolo/datasets/yolov5_coco.py | 8 +- mmyolo/mmyolo/datasets/yolov5_crowdhuman.py | 1 - mmyolo/mmyolo/datasets/yolov5_voc.py | 2 +- mmyolo/mmyolo/deploy/__init__.py | 2 +- .../deploy/models/dense_heads/__init__.py | 2 +- .../deploy/models/dense_heads/yolov5_head.py | 81 +- .../mmyolo/deploy/models/layers/__init__.py | 2 +- .../mmyolo/deploy/models/layers/bbox_nms.py | 49 +- mmyolo/mmyolo/deploy/object_detection.py | 45 +- mmyolo/mmyolo/engine/hooks/__init__.py | 6 +- .../hooks/ppyoloe_param_scheduler_hook.py | 48 +- .../hooks/yolov5_param_scheduler_hook.py | 74 +- .../engine/hooks/yolox_mode_switch_hook.py | 15 +- mmyolo/mmyolo/engine/optimizers/__init__.py | 2 +- .../optimizers/yolov5_optim_constructor.py | 66 +- .../yolov7_optim_wrapper_constructor.py | 66 +- mmyolo/mmyolo/models/backbones/__init__.py | 12 +- .../mmyolo/models/backbones/base_backbone.py | 50 +- mmyolo/mmyolo/models/backbones/csp_darknet.py | 186 ++- mmyolo/mmyolo/models/backbones/csp_resnet.py | 82 +- mmyolo/mmyolo/models/backbones/cspnext.py | 62 +- .../mmyolo/models/backbones/efficient_rep.py | 105 +- .../models/backbones/yolov7_backbone.py | 221 ++-- .../models/data_preprocessors/__init__.py | 13 +- .../data_preprocessors/data_preprocessor.py | 101 +- mmyolo/mmyolo/models/dense_heads/__init__.py | 19 +- .../mmyolo/models/dense_heads/ppyoloe_head.py | 250 ++-- .../mmyolo/models/dense_heads/rtmdet_head.py | 210 +-- .../mmyolo/models/dense_heads/yolov5_head.py | 604 +++++---- .../mmyolo/models/dense_heads/yolov6_head.py | 249 ++-- .../mmyolo/models/dense_heads/yolov7_head.py | 232 ++-- .../mmyolo/models/dense_heads/yolov8_head.py | 227 ++-- .../mmyolo/models/dense_heads/yolox_head.py | 296 +++-- mmyolo/mmyolo/models/detectors/__init__.py | 2 +- .../mmyolo/models/detectors/yolo_detector.py | 25 +- mmyolo/mmyolo/models/layers/__init__.py | 44 +- mmyolo/mmyolo/models/layers/ema.py | 34 +- mmyolo/mmyolo/models/layers/yolo_bricks.py | 733 +++++----- mmyolo/mmyolo/models/losses/__init__.py | 2 +- mmyolo/mmyolo/models/losses/iou_loss.py | 92 +- mmyolo/mmyolo/models/necks/__init__.py | 12 +- mmyolo/mmyolo/models/necks/base_yolo_neck.py | 42 +- mmyolo/mmyolo/models/necks/cspnext_pafpn.py | 50 +- mmyolo/mmyolo/models/necks/ppyoloe_csppan.py | 67 +- mmyolo/mmyolo/models/necks/yolov5_pafpn.py | 66 +- mmyolo/mmyolo/models/necks/yolov6_pafpn.py | 113 +- mmyolo/mmyolo/models/necks/yolov7_pafpn.py | 94 +- mmyolo/mmyolo/models/necks/yolov8_pafpn.py | 42 +- mmyolo/mmyolo/models/necks/yolox_pafpn.py | 60 +- mmyolo/mmyolo/models/plugins/__init__.py | 2 +- mmyolo/mmyolo/models/plugins/cbam.py | 34 +- mmyolo/mmyolo/models/task_modules/__init__.py | 6 +- .../models/task_modules/assigners/__init__.py | 16 +- .../assigners/batch_atss_assigner.py | 186 +-- .../assigners/batch_dsl_assigner.py | 90 +- .../assigners/batch_task_aligned_assigner.py | 180 ++- .../assigners/batch_yolov7_assigner.py | 213 +-- .../models/task_modules/assigners/utils.py | 34 +- .../models/task_modules/coders/__init__.py | 2 +- .../coders/distance_point_bbox_coder.py | 22 +- .../task_modules/coders/yolov5_bbox_coder.py | 24 +- .../task_modules/coders/yolox_bbox_coder.py | 17 +- mmyolo/mmyolo/models/utils/__init__.py | 2 +- mmyolo/mmyolo/models/utils/misc.py | 4 +- mmyolo/mmyolo/registry.py | 51 +- mmyolo/mmyolo/testing/__init__.py | 2 +- mmyolo/mmyolo/testing/_utils.py | 5 +- mmyolo/mmyolo/utils/__init__.py | 2 +- mmyolo/mmyolo/utils/boxam_utils.py | 268 ++-- mmyolo/mmyolo/utils/collect_env.py | 10 +- mmyolo/mmyolo/utils/labelme_utils.py | 42 +- mmyolo/mmyolo/utils/large_image.py | 37 +- mmyolo/mmyolo/utils/misc.py | 43 +- mmyolo/mmyolo/utils/setup_env.py | 26 +- mmyolo/mmyolo/version.py | 12 +- .../assigner_visualization.py | 110 +- ...t_8xb16-300e_coco_assignervisualization.py | 17 +- .../dense_heads/__init__.py | 2 +- .../dense_heads/yolov5_head_assigner.py | 169 +-- .../detectors/__init__.py | 7 +- .../detectors/yolo_detector_assigner.py | 7 +- .../visualization/__init__.py | 2 +- .../visualization/assigner_visualizer.py | 237 ++-- .../projects/easydeploy/backbone/__init__.py | 2 +- mmyolo/projects/easydeploy/backbone/common.py | 3 +- mmyolo/projects/easydeploy/backbone/focus.py | 21 +- .../projects/easydeploy/bbox_code/__init__.py | 5 +- .../easydeploy/bbox_code/bbox_coder.py | 28 +- mmyolo/projects/easydeploy/model/__init__.py | 2 +- .../easydeploy/model/backendwrapper.py | 76 +- mmyolo/projects/easydeploy/model/model.py | 68 +- mmyolo/projects/easydeploy/nms/__init__.py | 2 +- mmyolo/projects/easydeploy/nms/ort_nms.py | 89 +- mmyolo/projects/easydeploy/nms/trt_nms.py | 92 +- .../projects/easydeploy/tools/build_engine.py | 100 +- mmyolo/projects/easydeploy/tools/export.py | 106 +- .../projects/easydeploy/tools/image-demo.py | 62 +- ...mmy-backbone_v61_syncbn_8xb16-300e_coco.py | 6 +- .../example_project/dummy/__init__.py | 2 +- .../dummy/dummy_yolov5cspdarknet.py | 2 +- ...yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py | 47 +- .../yolov6_s_syncbn_fast_1xb32-100e_cat.py | 56 +- .../yolov7_tiny_syncbn_fast_1xb32-100e_cat.py | 47 +- mmyolo/setup.py | 129 +- .../test_mix_img_transforms.py | 332 +++-- .../test_transforms/test_transforms.py | 423 +++--- mmyolo/tests/test_datasets/test_utils.py | 121 +- .../tests/test_datasets/test_yolov5_coco.py | 43 +- mmyolo/tests/test_datasets/test_yolov5_voc.py | 65 +- .../tests/test_deploy/test_mmyolo_models.py | 137 +- .../test_deploy/test_object_detection.py | 76 +- .../test_hooks/test_switch_to_deploy_hook.py | 1 - .../test_yolov5_param_scheduler_hook.py | 42 +- .../test_hooks/test_yolox_mode_switch_hook.py | 16 +- .../test_yolov5_optim_constructor.py | 47 +- .../test_yolov7_optim_wrapper_constructor.py | 47 +- .../test_backbone/test_csp_darknet.py | 40 +- .../test_backbone/test_csp_resnet.py | 27 +- .../test_backbone/test_efficient_rep.py | 50 +- .../test_backbone/test_yolov7_backbone.py | 34 +- .../tests/test_models/test_backbone/utils.py | 6 +- .../test_data_preprocessor.py | 86 +- .../test_dense_heads/test_ppyoloe_head.py | 195 +-- .../test_dense_heads/test_rtmdet_head.py | 116 +- .../test_dense_heads/test_yolov5_head.py | 275 ++-- .../test_dense_heads/test_yolov6_head.py | 37 +- .../test_dense_heads/test_yolov7_head.py | 140 +- .../test_dense_heads/test_yolov8_head.py | 155 ++- .../test_dense_heads/test_yolox_head.py | 159 ++- .../test_detectors/test_yolo_detector.py | 119 +- .../tests/test_models/test_layers/test_ema.py | 39 +- .../test_layers/test_yolo_bricks.py | 4 +- .../test_necks/test_cspnext_pafpn.py | 7 +- .../test_necks/test_ppyoloe_csppan.py | 11 +- .../test_necks/test_yolov5_pafpn.py | 1 - .../test_necks/test_yolov6_pafpn.py | 7 +- .../test_necks/test_yolov7_pafpn.py | 14 +- .../test_necks/test_yolov8_pafpn.py | 1 - .../test_necks/test_yolox_pafpn.py | 1 - .../test_models/test_plugins/test_cbam.py | 3 +- .../test_batch_atss_assigner.py | 272 ++-- .../test_batch_task_aligned_assigner.py | 97 +- .../test_distance_point_bbox_coder.py | 23 +- .../test_coders/test_yolov5_bbox_coder.py | 34 +- .../test_coders/test_yolox_bbox_coder.py | 26 +- mmyolo/tests/test_utils/test_collect_env.py | 25 +- mmyolo/tests/test_utils/test_setup_env.py | 31 +- mmyolo/tools/analysis_tools/benchmark.py | 144 +- .../tools/analysis_tools/browse_coco_json.py | 83 +- mmyolo/tools/analysis_tools/browse_dataset.py | 161 ++- .../tools/analysis_tools/dataset_analysis.py | 354 +++-- .../tools/analysis_tools/optimize_anchors.py | 332 ++--- mmyolo/tools/analysis_tools/vis_scheduler.py | 183 +-- .../tools/dataset_converters/balloon2coco.py | 40 +- .../tools/dataset_converters/labelme2coco.py | 187 +-- mmyolo/tools/dataset_converters/yolo2coco.py | 141 +- mmyolo/tools/misc/coco_split.py | 81 +- mmyolo/tools/misc/download_dataset.py | 79 +- mmyolo/tools/misc/extract_subcoco.py | 112 +- .../model_converters/ppyoloe_to_mmyolo.py | 186 ++- .../model_converters/rtmdet_to_mmyolo.py | 50 +- .../model_converters/yolov5_to_mmyolo.py | 146 +- .../model_converters/yolov6_to_mmyolo.py | 169 ++- .../model_converters/yolov7_to_mmyolo.py | 1178 ++++++++--------- .../model_converters/yolov8_to_mmyolo.py | 91 +- .../tools/model_converters/yolox_to_mmyolo.py | 100 +- mmyolo/tools/test.py | 99 +- mmyolo/tools/train.py | 87 +- 266 files changed, 11420 insertions(+), 9563 deletions(-) diff --git a/app/client.py b/app/client.py index 66fe5add..931f8791 100644 --- a/app/client.py +++ b/app/client.py @@ -78,7 +78,7 @@ def main(): st.session_state["inferenced"] = None models = get_models() - images = get_images() + get_images() videos = get_videos() video_names = get_video_names() diff --git a/mmyolo/.dev_scripts/gather_models.py b/mmyolo/.dev_scripts/gather_models.py index 05dd2c2e..929dca2d 100644 --- a/mmyolo/.dev_scripts/gather_models.py +++ b/mmyolo/.dev_scripts/gather_models.py @@ -16,58 +16,58 @@ def ordered_yaml_dump(data, stream=None, Dumper=yaml.SafeDumper, **kwds): - class OrderedDumper(Dumper): pass def _dict_representer(dumper, data): return dumper.represent_mapping( - yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items() + ) OrderedDumper.add_representer(OrderedDict, _dict_representer) return yaml.dump(data, stream, OrderedDumper, **kwds) def process_checkpoint(in_file, out_file): - checkpoint = torch.load(in_file, map_location='cpu') + checkpoint = torch.load(in_file, map_location="cpu") # remove optimizer for smaller file size - if 'optimizer' in checkpoint: - del checkpoint['optimizer'] - if 'message_hub' in checkpoint: - del checkpoint['message_hub'] - if 'ema_state_dict' in checkpoint: - del checkpoint['ema_state_dict'] - - for key in list(checkpoint['state_dict']): - if key.startswith('data_preprocessor'): - checkpoint['state_dict'].pop(key) - elif 'priors_base_sizes' in key: - checkpoint['state_dict'].pop(key) - elif 'grid_offset' in key: - checkpoint['state_dict'].pop(key) - elif 'prior_inds' in key: - checkpoint['state_dict'].pop(key) + if "optimizer" in checkpoint: + del checkpoint["optimizer"] + if "message_hub" in checkpoint: + del checkpoint["message_hub"] + if "ema_state_dict" in checkpoint: + del checkpoint["ema_state_dict"] + + for key in list(checkpoint["state_dict"]): + if key.startswith("data_preprocessor"): + checkpoint["state_dict"].pop(key) + elif "priors_base_sizes" in key: + checkpoint["state_dict"].pop(key) + elif "grid_offset" in key: + checkpoint["state_dict"].pop(key) + elif "prior_inds" in key: + checkpoint["state_dict"].pop(key) # if it is necessary to remove some sensitive data in checkpoint['meta'], # add the code here. - if torch.__version__ >= '1.6': + if torch.__version__ >= "1.6": torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) else: torch.save(checkpoint, out_file) - sha = subprocess.check_output(['sha256sum', out_file]).decode() - final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth' - subprocess.Popen(['mv', out_file, final_file]) + sha = subprocess.check_output(["sha256sum", out_file]).decode() + final_file = out_file.rstrip(".pth") + f"-{sha[:8]}.pth" + subprocess.Popen(["mv", out_file, final_file]) return final_file def is_by_epoch(config): - cfg = Config.fromfile('./configs/' + config) - return cfg.train_cfg.type == 'EpochBasedTrainLoop' + cfg = Config.fromfile("./configs/" + config) + return cfg.train_cfg.type == "EpochBasedTrainLoop" def get_final_epoch_or_iter(config): - cfg = Config.fromfile('./configs/' + config) - if cfg.train_cfg.type == 'EpochBasedTrainLoop': + cfg = Config.fromfile("./configs/" + config) + if cfg.train_cfg.type == "EpochBasedTrainLoop": return cfg.train_cfg.max_epochs else: return cfg.train_cfg.max_iters @@ -75,30 +75,29 @@ def get_final_epoch_or_iter(config): def get_best_epoch_or_iter(exp_dir): best_epoch_iter_full_path = list( - sorted(glob.glob(osp.join(exp_dir, 'best_*.pth'))))[-1] - best_epoch_or_iter_model_path = best_epoch_iter_full_path.split('/')[-1] - best_epoch_or_iter = best_epoch_or_iter_model_path. \ - split('_')[-1].split('.')[0] + sorted(glob.glob(osp.join(exp_dir, "best_*.pth"))) + )[-1] + best_epoch_or_iter_model_path = best_epoch_iter_full_path.split("/")[-1] + best_epoch_or_iter = best_epoch_or_iter_model_path.split("_")[-1].split(".")[0] return best_epoch_or_iter_model_path, int(best_epoch_or_iter) def get_real_epoch_or_iter(config): - cfg = Config.fromfile('./configs/' + config) - if cfg.train_cfg.type == 'EpochBasedTrainLoop': + cfg = Config.fromfile("./configs/" + config) + if cfg.train_cfg.type == "EpochBasedTrainLoop": epoch = cfg.train_cfg.max_epochs return epoch else: return cfg.runner.max_iters -def get_final_results(log_json_path, - epoch_or_iter, - results_lut='coco/bbox_mAP', - by_epoch=True): +def get_final_results( + log_json_path, epoch_or_iter, results_lut="coco/bbox_mAP", by_epoch=True +): result_dict = dict() with open(log_json_path) as f: r = f.readlines()[-1] - last_metric = r.split(',')[0].split(': ')[-1].strip() + last_metric = r.split(",")[0].split(": ")[-1].strip() result_dict[results_lut] = last_metric return result_dict @@ -106,19 +105,20 @@ def get_final_results(log_json_path, def get_dataset_name(config): # If there are more dataset, add here. name_map = dict( - CityscapesDataset='Cityscapes', - CocoDataset='COCO', - YOLOv5CocoDataset='COCO', - CocoPanopticDataset='COCO', - DeepFashionDataset='Deep Fashion', - LVISV05Dataset='LVIS v0.5', - LVISV1Dataset='LVIS v1', - VOCDataset='Pascal VOC', - YOLOv5VOCDataset='Pascal VOC', - WIDERFaceDataset='WIDER Face', - OpenImagesDataset='OpenImagesDataset', - OpenImagesChallengeDataset='OpenImagesChallengeDataset') - cfg = Config.fromfile('./configs/' + config) + CityscapesDataset="Cityscapes", + CocoDataset="COCO", + YOLOv5CocoDataset="COCO", + CocoPanopticDataset="COCO", + DeepFashionDataset="Deep Fashion", + LVISV05Dataset="LVIS v0.5", + LVISV1Dataset="LVIS v1", + VOCDataset="Pascal VOC", + YOLOv5VOCDataset="Pascal VOC", + WIDERFaceDataset="WIDER Face", + OpenImagesDataset="OpenImagesDataset", + OpenImagesChallengeDataset="OpenImagesChallengeDataset", + ) + cfg = Config.fromfile("./configs/" + config) return name_map[cfg.dataset_type] @@ -126,8 +126,7 @@ def find_last_dir(model_dir): dst_times = [] for time_stamp in os.scandir(model_dir): if osp.isdir(time_stamp): - dst_time = time.mktime( - time.strptime(time_stamp.name, '%Y%m%d_%H%M%S')) + dst_time = time.mktime(time.strptime(time_stamp.name, "%Y%m%d_%H%M%S")) dst_times.append([dst_time, time_stamp.name]) return max(dst_times, key=lambda x: x[0])[1] @@ -135,53 +134,60 @@ def find_last_dir(model_dir): def convert_model_info_to_pwc(model_infos): pwc_files = {} for model in model_infos: - cfg_folder_name = osp.split(model['config'])[-2] + cfg_folder_name = osp.split(model["config"])[-2] pwc_model_info = OrderedDict() - pwc_model_info['Name'] = osp.split(model['config'])[-1].split('.')[0] - pwc_model_info['In Collection'] = 'Please fill in Collection name' - pwc_model_info['Config'] = osp.join('configs', model['config']) + pwc_model_info["Name"] = osp.split(model["config"])[-1].split(".")[0] + pwc_model_info["In Collection"] = "Please fill in Collection name" + pwc_model_info["Config"] = osp.join("configs", model["config"]) # get metadata meta_data = OrderedDict() - if 'epochs' in model: - meta_data['Epochs'] = get_real_epoch_or_iter(model['config']) + if "epochs" in model: + meta_data["Epochs"] = get_real_epoch_or_iter(model["config"]) else: - meta_data['Iterations'] = get_real_epoch_or_iter(model['config']) - pwc_model_info['Metadata'] = meta_data + meta_data["Iterations"] = get_real_epoch_or_iter(model["config"]) + pwc_model_info["Metadata"] = meta_data # get dataset name - dataset_name = get_dataset_name(model['config']) + dataset_name = get_dataset_name(model["config"]) # get results results = [] # if there are more metrics, add here. - if 'bbox_mAP' in model['results']: - metric = round(model['results']['bbox_mAP'] * 100, 1) + if "bbox_mAP" in model["results"]: + metric = round(model["results"]["bbox_mAP"] * 100, 1) results.append( OrderedDict( - Task='Object Detection', + Task="Object Detection", Dataset=dataset_name, - Metrics={'box AP': metric})) - if 'segm_mAP' in model['results']: - metric = round(model['results']['segm_mAP'] * 100, 1) + Metrics={"box AP": metric}, + ) + ) + if "segm_mAP" in model["results"]: + metric = round(model["results"]["segm_mAP"] * 100, 1) results.append( OrderedDict( - Task='Instance Segmentation', + Task="Instance Segmentation", Dataset=dataset_name, - Metrics={'mask AP': metric})) - if 'PQ' in model['results']: - metric = round(model['results']['PQ'], 1) + Metrics={"mask AP": metric}, + ) + ) + if "PQ" in model["results"]: + metric = round(model["results"]["PQ"], 1) results.append( OrderedDict( - Task='Panoptic Segmentation', + Task="Panoptic Segmentation", Dataset=dataset_name, - Metrics={'PQ': metric})) - pwc_model_info['Results'] = results - - link_string = 'https://download.openmmlab.com/mmyolo/v0/' - link_string += '{}/{}'.format(model['config'].rstrip('.py'), - osp.split(model['model_path'])[-1]) - pwc_model_info['Weights'] = link_string + Metrics={"PQ": metric}, + ) + ) + pwc_model_info["Results"] = results + + link_string = "https://download.openmmlab.com/mmyolo/v0/" + link_string += "{}/{}".format( + model["config"].rstrip(".py"), osp.split(model["model_path"])[-1] + ) + pwc_model_info["Weights"] = link_string if cfg_folder_name in pwc_files: pwc_files[cfg_folder_name].append(pwc_model_info) else: @@ -190,17 +196,16 @@ def convert_model_info_to_pwc(model_infos): def parse_args(): - parser = argparse.ArgumentParser(description='Gather benchmarked models') + parser = argparse.ArgumentParser(description="Gather benchmarked models") parser.add_argument( - 'root', - type=str, - help='root path of benchmarked models to be gathered') + "root", type=str, help="root path of benchmarked models to be gathered" + ) parser.add_argument( - 'out', type=str, help='output path of gathered models to be stored') + "out", type=str, help="output path of gathered models to be stored" + ) parser.add_argument( - '--best', - action='store_true', - help='whether to gather the best model.') + "--best", action="store_true", help="whether to gather the best model." + ) args = parser.parse_args() return args @@ -214,14 +219,14 @@ def main(): mkdir_or_exist(models_out) # find all models in the root directory to be gathered - raw_configs = list(scandir('./configs', '.py', recursive=True)) + raw_configs = list(scandir("./configs", ".py", recursive=True)) # filter configs that is not trained in the experiments dir used_configs = [] for raw_config in raw_configs: if osp.exists(osp.join(models_root, raw_config)): used_configs.append(raw_config) - print(f'Find {len(used_configs)} models to be gathered') + print(f"Find {len(used_configs)} models to be gathered") # find final_ckpt and log file for trained each config # and parse the best performance @@ -234,8 +239,9 @@ def main(): final_model, final_epoch_or_iter = get_best_epoch_or_iter(exp_dir) else: final_epoch_or_iter = get_final_epoch_or_iter(used_config) - final_model = '{}_{}.pth'.format('epoch' if by_epoch else 'iter', - final_epoch_or_iter) + final_model = "{}_{}.pth".format( + "epoch" if by_epoch else "iter", final_epoch_or_iter + ) model_path = osp.join(exp_dir, final_model) # skip if the model is still training @@ -244,11 +250,13 @@ def main(): # get the latest logs latest_exp_name = find_last_dir(exp_dir) - latest_exp_json = osp.join(exp_dir, latest_exp_name, 'vis_data', - latest_exp_name + '.json') + latest_exp_json = osp.join( + exp_dir, latest_exp_name, "vis_data", latest_exp_name + ".json" + ) model_performance = get_final_results( - latest_exp_json, final_epoch_or_iter, by_epoch=by_epoch) + latest_exp_json, final_epoch_or_iter, by_epoch=by_epoch + ) if model_performance is None: continue @@ -258,53 +266,56 @@ def main(): results=model_performance, final_model=final_model, latest_exp_json=latest_exp_json, - latest_exp_name=latest_exp_name) - model_info['epochs' if by_epoch else 'iterations'] = \ - final_epoch_or_iter + latest_exp_name=latest_exp_name, + ) + model_info["epochs" if by_epoch else "iterations"] = final_epoch_or_iter model_infos.append(model_info) # publish model for each checkpoint publish_model_infos = [] for model in model_infos: - model_publish_dir = osp.join(models_out, model['config'].rstrip('.py')) + model_publish_dir = osp.join(models_out, model["config"].rstrip(".py")) mkdir_or_exist(model_publish_dir) - model_name = osp.split(model['config'])[-1].split('.')[0] + model_name = osp.split(model["config"])[-1].split(".")[0] - model_name += '_' + model['latest_exp_name'] + model_name += "_" + model["latest_exp_name"] publish_model_path = osp.join(model_publish_dir, model_name) - trained_model_path = osp.join(models_root, model['config'], - model['final_model']) + trained_model_path = osp.join( + models_root, model["config"], model["final_model"] + ) # convert model - final_model_path = process_checkpoint(trained_model_path, - publish_model_path) + final_model_path = process_checkpoint(trained_model_path, publish_model_path) # copy log - shutil.copy(model['latest_exp_json'], - osp.join(model_publish_dir, f'{model_name}.log.json')) + shutil.copy( + model["latest_exp_json"], + osp.join(model_publish_dir, f"{model_name}.log.json"), + ) # copy config to guarantee reproducibility - config_path = model['config'] - config_path = osp.join( - 'configs', - config_path) if 'configs' not in config_path else config_path + config_path = model["config"] + config_path = ( + osp.join("configs", config_path) + if "configs" not in config_path + else config_path + ) target_config_path = osp.split(config_path)[-1] - shutil.copy(config_path, osp.join(model_publish_dir, - target_config_path)) + shutil.copy(config_path, osp.join(model_publish_dir, target_config_path)) - model['model_path'] = final_model_path + model["model_path"] = final_model_path publish_model_infos.append(model) models = dict(models=publish_model_infos) - print(f'Totally gathered {len(publish_model_infos)} models') - dump(models, osp.join(models_out, 'model_info.json')) + print(f"Totally gathered {len(publish_model_infos)} models") + dump(models, osp.join(models_out, "model_info.json")) pwc_files = convert_model_info_to_pwc(publish_model_infos) for name in pwc_files: - with open(osp.join(models_out, name + '_metafile.yml'), 'w') as f: - ordered_yaml_dump(pwc_files[name], f, encoding='utf-8') + with open(osp.join(models_out, name + "_metafile.yml"), "w") as f: + ordered_yaml_dump(pwc_files[name], f, encoding="utf-8") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/_git/packed-refs b/mmyolo/_git/packed-refs index ca9f81ce..65af2a06 100644 --- a/mmyolo/_git/packed-refs +++ b/mmyolo/_git/packed-refs @@ -1,4 +1,4 @@ -# pack-refs with: peeled fully-peeled sorted +# pack-refs with: peeled fully-peeled sorted 6153603ac10c179bbbcc05c2e8ec762640117c77 refs/remotes/origin/damo-yolo c682ae82c60383e5716e088fd0e54d2d1e3c4da1 refs/remotes/origin/dev b0cf0732412d9d7a69f5b7fb9368dab8be6983cc refs/remotes/origin/main diff --git a/mmyolo/configs/_base_/default_runtime.py b/mmyolo/configs/_base_/default_runtime.py index 2f0db2e3..40e9eb47 100644 --- a/mmyolo/configs/_base_/default_runtime.py +++ b/mmyolo/configs/_base_/default_runtime.py @@ -1,27 +1,27 @@ -default_scope = 'mmyolo' +default_scope = "mmyolo" default_hooks = dict( - timer=dict(type='IterTimerHook'), - logger=dict(type='LoggerHook', interval=50), - param_scheduler=dict(type='ParamSchedulerHook'), - checkpoint=dict(type='CheckpointHook', interval=1), - sampler_seed=dict(type='DistSamplerSeedHook'), - visualization=dict(type='mmdet.DetVisualizationHook')) + timer=dict(type="IterTimerHook"), + logger=dict(type="LoggerHook", interval=50), + param_scheduler=dict(type="ParamSchedulerHook"), + checkpoint=dict(type="CheckpointHook", interval=1), + sampler_seed=dict(type="DistSamplerSeedHook"), + visualization=dict(type="mmdet.DetVisualizationHook"), +) env_cfg = dict( cudnn_benchmark=False, - mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), - dist_cfg=dict(backend='nccl'), + mp_cfg=dict(mp_start_method="fork", opencv_num_threads=0), + dist_cfg=dict(backend="nccl"), ) -vis_backends = [dict(type='LocalVisBackend')] +vis_backends = [dict(type="LocalVisBackend")] visualizer = dict( - type='mmdet.DetLocalVisualizer', - vis_backends=vis_backends, - name='visualizer') -log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) + type="mmdet.DetLocalVisualizer", vis_backends=vis_backends, name="visualizer" +) +log_processor = dict(type="LogProcessor", window_size=50, by_epoch=True) -log_level = 'INFO' +log_level = "INFO" load_from = None resume = False @@ -31,4 +31,4 @@ # './data/': 's3://openmmlab/datasets/detection/', # 'data/': 's3://openmmlab/datasets/detection/' # })) -file_client_args = dict(backend='disk') +file_client_args = dict(backend="disk") diff --git a/mmyolo/configs/deploy/base_dynamic.py b/mmyolo/configs/deploy/base_dynamic.py index 747c21fd..72595b22 100644 --- a/mmyolo/configs/deploy/base_dynamic.py +++ b/mmyolo/configs/deploy/base_dynamic.py @@ -1,17 +1,8 @@ -_base_ = ['./base_static.py'] +_base_ = ["./base_static.py"] onnx_config = dict( dynamic_axes={ - 'input': { - 0: 'batch', - 2: 'height', - 3: 'width' - }, - 'dets': { - 0: 'batch', - 1: 'num_dets' - }, - 'labels': { - 0: 'batch', - 1: 'num_dets' - } - }) + "input": {0: "batch", 2: "height", 3: "width"}, + "dets": {0: "batch", 1: "num_dets"}, + "labels": {0: "batch", 1: "num_dets"}, + } +) diff --git a/mmyolo/configs/deploy/base_static.py b/mmyolo/configs/deploy/base_static.py index dee01dd5..12a9a13f 100644 --- a/mmyolo/configs/deploy/base_static.py +++ b/mmyolo/configs/deploy/base_static.py @@ -1,17 +1,18 @@ onnx_config = dict( - type='onnx', + type="onnx", export_params=True, keep_initializers_as_inputs=False, opset_version=11, - save_file='end2end.onnx', - input_names=['input'], - output_names=['dets', 'labels'], + save_file="end2end.onnx", + input_names=["input"], + output_names=["dets", "labels"], input_shape=None, - optimize=True) + optimize=True, +) codebase_config = dict( - type='mmyolo', - task='ObjectDetection', - model_type='end2end', + type="mmyolo", + task="ObjectDetection", + model_type="end2end", post_processing=dict( score_threshold=0.05, confidence_threshold=0.005, @@ -19,5 +20,7 @@ max_output_boxes_per_class=200, pre_top_k=5000, keep_top_k=100, - background_label_id=-1), - module=['mmyolo.deploy']) + background_label_id=-1, + ), + module=["mmyolo.deploy"], +) diff --git a/mmyolo/configs/deploy/detection_onnxruntime_dynamic.py b/mmyolo/configs/deploy/detection_onnxruntime_dynamic.py index 14f4a121..82fbdfc8 100644 --- a/mmyolo/configs/deploy/detection_onnxruntime_dynamic.py +++ b/mmyolo/configs/deploy/detection_onnxruntime_dynamic.py @@ -1,8 +1,8 @@ -_base_ = ['./base_dynamic.py'] +_base_ = ["./base_dynamic.py"] codebase_config = dict( - type='mmyolo', - task='ObjectDetection', - model_type='end2end', + type="mmyolo", + task="ObjectDetection", + model_type="end2end", post_processing=dict( score_threshold=0.05, confidence_threshold=0.005, @@ -10,6 +10,8 @@ max_output_boxes_per_class=200, pre_top_k=5000, keep_top_k=100, - background_label_id=-1), - module=['mmyolo.deploy']) -backend_config = dict(type='onnxruntime') + background_label_id=-1, + ), + module=["mmyolo.deploy"], +) +backend_config = dict(type="onnxruntime") diff --git a/mmyolo/configs/deploy/detection_onnxruntime_static.py b/mmyolo/configs/deploy/detection_onnxruntime_static.py index 3eac8ca7..038d45e3 100644 --- a/mmyolo/configs/deploy/detection_onnxruntime_static.py +++ b/mmyolo/configs/deploy/detection_onnxruntime_static.py @@ -1,8 +1,8 @@ -_base_ = ['./base_static.py'] +_base_ = ["./base_static.py"] codebase_config = dict( - type='mmyolo', - task='ObjectDetection', - model_type='end2end', + type="mmyolo", + task="ObjectDetection", + model_type="end2end", post_processing=dict( score_threshold=0.05, confidence_threshold=0.005, @@ -10,6 +10,8 @@ max_output_boxes_per_class=200, pre_top_k=5000, keep_top_k=100, - background_label_id=-1), - module=['mmyolo.deploy']) -backend_config = dict(type='onnxruntime') + background_label_id=-1, + ), + module=["mmyolo.deploy"], +) +backend_config = dict(type="onnxruntime") diff --git a/mmyolo/configs/deploy/detection_rknn-fp16_static-320x320.py b/mmyolo/configs/deploy/detection_rknn-fp16_static-320x320.py index b7bd3133..2dc6dab1 100644 --- a/mmyolo/configs/deploy/detection_rknn-fp16_static-320x320.py +++ b/mmyolo/configs/deploy/detection_rknn-fp16_static-320x320.py @@ -1,9 +1,9 @@ -_base_ = ['./base_static.py'] -onnx_config = dict( - input_shape=[320, 320], output_names=['feat0', 'feat1', 'feat2']) -codebase_config = dict(model_type='rknn') +_base_ = ["./base_static.py"] +onnx_config = dict(input_shape=[320, 320], output_names=["feat0", "feat1", "feat2"]) +codebase_config = dict(model_type="rknn") backend_config = dict( - type='rknn', - common_config=dict(target_platform='rv1126', optimization_level=1), + type="rknn", + common_config=dict(target_platform="rv1126", optimization_level=1), quantization_config=dict(do_quantization=False, dataset=None), - input_size_list=[[3, 320, 320]]) + input_size_list=[[3, 320, 320]], +) diff --git a/mmyolo/configs/deploy/detection_rknn-int8_static-320x320.py b/mmyolo/configs/deploy/detection_rknn-int8_static-320x320.py index 10c96b2f..d49fbf48 100644 --- a/mmyolo/configs/deploy/detection_rknn-int8_static-320x320.py +++ b/mmyolo/configs/deploy/detection_rknn-int8_static-320x320.py @@ -1,9 +1,9 @@ -_base_ = ['./base_static.py'] -onnx_config = dict( - input_shape=[320, 320], output_names=['feat0', 'feat1', 'feat2']) -codebase_config = dict(model_type='rknn') +_base_ = ["./base_static.py"] +onnx_config = dict(input_shape=[320, 320], output_names=["feat0", "feat1", "feat2"]) +codebase_config = dict(model_type="rknn") backend_config = dict( - type='rknn', - common_config=dict(target_platform='rv1126', optimization_level=1), + type="rknn", + common_config=dict(target_platform="rv1126", optimization_level=1), quantization_config=dict(do_quantization=True, dataset=None), - input_size_list=[[3, 320, 320]]) + input_size_list=[[3, 320, 320]], +) diff --git a/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-192x192-960x960.py b/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-192x192-960x960.py index da565b6c..67d24a2c 100644 --- a/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-192x192-960x960.py +++ b/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-192x192-960x960.py @@ -1,6 +1,6 @@ -_base_ = ['./base_dynamic.py'] +_base_ = ["./base_dynamic.py"] backend_config = dict( - type='tensorrt', + type="tensorrt", common_config=dict(fp16_mode=True, max_workspace_size=1 << 30), model_inputs=[ dict( @@ -8,6 +8,10 @@ input=dict( min_shape=[1, 3, 192, 192], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 960, 960]))) - ]) + max_shape=[1, 3, 960, 960], + ) + ) + ) + ], +) use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-64x64-1344x1344.py b/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-64x64-1344x1344.py index bad8521a..d0111afe 100644 --- a/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-64x64-1344x1344.py +++ b/mmyolo/configs/deploy/detection_tensorrt-fp16_dynamic-64x64-1344x1344.py @@ -1,6 +1,6 @@ -_base_ = ['./base_dynamic.py'] +_base_ = ["./base_dynamic.py"] backend_config = dict( - type='tensorrt', + type="tensorrt", common_config=dict(fp16_mode=True, max_workspace_size=1 << 32), model_inputs=[ dict( @@ -8,6 +8,10 @@ input=dict( min_shape=[1, 3, 64, 64], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 1344, 1344]))) - ]) + max_shape=[1, 3, 1344, 1344], + ) + ) + ) + ], +) use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/detection_tensorrt-fp16_static-640x640.py b/mmyolo/configs/deploy/detection_tensorrt-fp16_static-640x640.py index 24d2a00d..1c160c26 100644 --- a/mmyolo/configs/deploy/detection_tensorrt-fp16_static-640x640.py +++ b/mmyolo/configs/deploy/detection_tensorrt-fp16_static-640x640.py @@ -1,7 +1,7 @@ -_base_ = ['./base_static.py'] +_base_ = ["./base_static.py"] onnx_config = dict(input_shape=(640, 640)) backend_config = dict( - type='tensorrt', + type="tensorrt", common_config=dict(fp16_mode=True, max_workspace_size=1 << 30), model_inputs=[ dict( @@ -9,6 +9,10 @@ input=dict( min_shape=[1, 3, 640, 640], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 640, 640]))) - ]) + max_shape=[1, 3, 640, 640], + ) + ) + ) + ], +) use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/detection_tensorrt-int8_dynamic-192x192-960x960.py b/mmyolo/configs/deploy/detection_tensorrt-int8_dynamic-192x192-960x960.py index 21591c4d..f7eb0918 100644 --- a/mmyolo/configs/deploy/detection_tensorrt-int8_dynamic-192x192-960x960.py +++ b/mmyolo/configs/deploy/detection_tensorrt-int8_dynamic-192x192-960x960.py @@ -1,15 +1,18 @@ -_base_ = ['./base_dynamic.py'] +_base_ = ["./base_dynamic.py"] backend_config = dict( - type='tensorrt', - common_config=dict( - fp16_mode=True, max_workspace_size=1 << 30, int8_mode=True), + type="tensorrt", + common_config=dict(fp16_mode=True, max_workspace_size=1 << 30, int8_mode=True), model_inputs=[ dict( input_shapes=dict( input=dict( min_shape=[1, 3, 192, 192], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 960, 960]))) - ]) -calib_config = dict(create_calib=True, calib_file='calib_data.h5') + max_shape=[1, 3, 960, 960], + ) + ) + ) + ], +) +calib_config = dict(create_calib=True, calib_file="calib_data.h5") use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/detection_tensorrt-int8_static-640x640.py b/mmyolo/configs/deploy/detection_tensorrt-int8_static-640x640.py index ac394a6b..70986e30 100644 --- a/mmyolo/configs/deploy/detection_tensorrt-int8_static-640x640.py +++ b/mmyolo/configs/deploy/detection_tensorrt-int8_static-640x640.py @@ -1,16 +1,19 @@ -_base_ = ['./base_static.py'] +_base_ = ["./base_static.py"] onnx_config = dict(input_shape=(640, 640)) backend_config = dict( - type='tensorrt', - common_config=dict( - fp16_mode=True, max_workspace_size=1 << 30, int8_mode=True), + type="tensorrt", + common_config=dict(fp16_mode=True, max_workspace_size=1 << 30, int8_mode=True), model_inputs=[ dict( input_shapes=dict( input=dict( min_shape=[1, 3, 640, 640], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 640, 640]))) - ]) -calib_config = dict(create_calib=True, calib_file='calib_data.h5') + max_shape=[1, 3, 640, 640], + ) + ) + ) + ], +) +calib_config = dict(create_calib=True, calib_file="calib_data.h5") use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/detection_tensorrt_dynamic-192x192-960x960.py b/mmyolo/configs/deploy/detection_tensorrt_dynamic-192x192-960x960.py index 17047d73..a31b49e1 100644 --- a/mmyolo/configs/deploy/detection_tensorrt_dynamic-192x192-960x960.py +++ b/mmyolo/configs/deploy/detection_tensorrt_dynamic-192x192-960x960.py @@ -1,6 +1,6 @@ -_base_ = ['./base_dynamic.py'] +_base_ = ["./base_dynamic.py"] backend_config = dict( - type='tensorrt', + type="tensorrt", common_config=dict(fp16_mode=False, max_workspace_size=1 << 30), model_inputs=[ dict( @@ -8,6 +8,10 @@ input=dict( min_shape=[1, 3, 192, 192], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 960, 960]))) - ]) + max_shape=[1, 3, 960, 960], + ) + ) + ) + ], +) use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/detection_tensorrt_static-640x640.py b/mmyolo/configs/deploy/detection_tensorrt_static-640x640.py index 9ec49cc1..39a4df27 100644 --- a/mmyolo/configs/deploy/detection_tensorrt_static-640x640.py +++ b/mmyolo/configs/deploy/detection_tensorrt_static-640x640.py @@ -1,7 +1,7 @@ -_base_ = ['./base_static.py'] +_base_ = ["./base_static.py"] onnx_config = dict(input_shape=(640, 640)) backend_config = dict( - type='tensorrt', + type="tensorrt", common_config=dict(fp16_mode=False, max_workspace_size=1 << 30), model_inputs=[ dict( @@ -9,6 +9,10 @@ input=dict( min_shape=[1, 3, 640, 640], opt_shape=[1, 3, 640, 640], - max_shape=[1, 3, 640, 640]))) - ]) + max_shape=[1, 3, 640, 640], + ) + ) + ) + ], +) use_efficientnms = False # whether to replace TRTBatchedNMS plugin with EfficientNMS plugin # noqa E501 diff --git a/mmyolo/configs/deploy/model/yolov5_s-static.py b/mmyolo/configs/deploy/model/yolov5_s-static.py index 470807e8..c956fe6c 100644 --- a/mmyolo/configs/deploy/model/yolov5_s-static.py +++ b/mmyolo/configs/deploy/model/yolov5_s-static.py @@ -1,19 +1,25 @@ -_base_ = '../../yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' +_base_ = "../../yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py" test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), dict( - type='LetterResize', + type="LetterResize", scale=_base_.img_scale, allow_scale_up=False, use_mini_pad=False, ), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] -test_dataloader = dict( - dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None)) +test_dataloader = dict(dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None)) diff --git a/mmyolo/configs/deploy/model/yolov6_s-static.py b/mmyolo/configs/deploy/model/yolov6_s-static.py index d9044aba..dab3adc2 100644 --- a/mmyolo/configs/deploy/model/yolov6_s-static.py +++ b/mmyolo/configs/deploy/model/yolov6_s-static.py @@ -1,19 +1,25 @@ -_base_ = '../../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py' +_base_ = "../../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py" test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), dict( - type='LetterResize', + type="LetterResize", scale=_base_.img_scale, allow_scale_up=False, use_mini_pad=False, ), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] -test_dataloader = dict( - dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None)) +test_dataloader = dict(dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None)) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py index ef1b4eaa..d58857b7 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_l_fast_8xb20-300e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_s_fast_8xb32-300e_coco.py' +_base_ = "./ppyoloe_s_fast_8xb32-300e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_l_imagenet1k_pretrained-c0010e6c.pth' # noqa +checkpoint = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_l_imagenet1k_pretrained-c0010e6c.pth" # noqa deepen_factor = 1.0 widen_factor = 1.0 @@ -13,11 +13,13 @@ backbone=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - init_cfg=dict(checkpoint=checkpoint)), + init_cfg=dict(checkpoint=checkpoint), + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_dataloader = dict(batch_size=train_batch_size_per_gpu) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py index abcfd783..2e0795bb 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_m_fast_8xb28-300e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_s_fast_8xb32-300e_coco.py' +_base_ = "./ppyoloe_s_fast_8xb32-300e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_m_imagenet1k_pretrained-09f1eba2.pth' # noqa +checkpoint = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_m_imagenet1k_pretrained-09f1eba2.pth" # noqa deepen_factor = 0.67 widen_factor = 0.75 @@ -13,11 +13,13 @@ backbone=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - init_cfg=dict(checkpoint=checkpoint)), + init_cfg=dict(checkpoint=checkpoint), + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_dataloader = dict(batch_size=train_batch_size_per_gpu) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py index 9db53e26..d36d2ce8 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_plus_l_fast_8xb8-80e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py' +_base_ = "./ppyoloe_plus_s_fast_8xb8-80e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_l_obj365_pretrained-3dd89562.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_l_obj365_pretrained-3dd89562.pth" # noqa deepen_factor = 1.0 widen_factor = 1.0 @@ -13,4 +13,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py index 17cb3355..a029bf73 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_plus_m_fast_8xb8-80e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py' +_base_ = "./ppyoloe_plus_s_fast_8xb8-80e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_m_ojb365_pretrained-03206892.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_m_ojb365_pretrained-03206892.pth" # noqa deepen_factor = 0.67 widen_factor = 0.75 @@ -13,4 +13,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py index 7c5ce298..da383880 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py @@ -1,8 +1,8 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" # dataset settings -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" # parameters that often need to be modified img_scale = (640, 640) # width, height @@ -18,7 +18,7 @@ # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_s_obj365_pretrained-bcfe8478.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_s_obj365_pretrained-bcfe8478.pth" # noqa # persistent_workers must be False if num_workers is 0. persistent_workers = True @@ -29,116 +29,133 @@ strides = [8, 16, 32] model = dict( - type='YOLODetector', + type="YOLODetector", data_preprocessor=dict( # use this to support multi_scale training - type='PPYOLOEDetDataPreprocessor', + type="PPYOLOEDetDataPreprocessor", pad_size_divisor=32, batch_augments=[ dict( - type='PPYOLOEBatchRandomResize', + type="PPYOLOEBatchRandomResize", random_size_range=(320, 800), interval=1, size_divisor=32, random_interp=True, - keep_ratio=False) + keep_ratio=False, + ) ], - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ), backbone=dict( - type='PPYOLOECSPResNet', + type="PPYOLOECSPResNet", deepen_factor=deepen_factor, widen_factor=widen_factor, - block_cfg=dict( - type='PPYOLOEBasicBlock', shortcut=True, use_alpha=True), - norm_cfg=dict(type='BN', momentum=0.1, eps=1e-5), - act_cfg=dict(type='SiLU', inplace=True), - attention_cfg=dict( - type='EffectiveSELayer', act_cfg=dict(type='HSigmoid')), - use_large_stem=True), + block_cfg=dict(type="PPYOLOEBasicBlock", shortcut=True, use_alpha=True), + norm_cfg=dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg=dict(type="SiLU", inplace=True), + attention_cfg=dict(type="EffectiveSELayer", act_cfg=dict(type="HSigmoid")), + use_large_stem=True, + ), neck=dict( - type='PPYOLOECSPPAFPN', + type="PPYOLOECSPPAFPN", in_channels=[256, 512, 1024], out_channels=[192, 384, 768], deepen_factor=deepen_factor, widen_factor=widen_factor, num_csplayer=1, num_blocks_per_layer=3, - block_cfg=dict( - type='PPYOLOEBasicBlock', shortcut=False, use_alpha=False), - norm_cfg=dict(type='BN', momentum=0.1, eps=1e-5), - act_cfg=dict(type='SiLU', inplace=True), + block_cfg=dict(type="PPYOLOEBasicBlock", shortcut=False, use_alpha=False), + norm_cfg=dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg=dict(type="SiLU", inplace=True), drop_block_cfg=None, - use_spp=True), + use_spp=True, + ), bbox_head=dict( - type='PPYOLOEHead', + type="PPYOLOEHead", head_module=dict( - type='PPYOLOEHeadModule', + type="PPYOLOEHeadModule", num_classes=num_classes, in_channels=[192, 384, 768], widen_factor=widen_factor, featmap_strides=strides, reg_max=16, - norm_cfg=dict(type='BN', momentum=0.1, eps=1e-5), - act_cfg=dict(type='SiLU', inplace=True), - num_base_priors=1), + norm_cfg=dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg=dict(type="SiLU", inplace=True), + num_base_priors=1, + ), prior_generator=dict( - type='mmdet.MlvlPointGenerator', offset=0.5, strides=strides), - bbox_coder=dict(type='DistancePointBBoxCoder'), + type="mmdet.MlvlPointGenerator", offset=0.5, strides=strides + ), + bbox_coder=dict(type="DistancePointBBoxCoder"), loss_cls=dict( - type='mmdet.VarifocalLoss', + type="mmdet.VarifocalLoss", use_sigmoid=True, alpha=0.75, gamma=2.0, iou_weighted=True, - reduction='sum', - loss_weight=1.0), + reduction="sum", + loss_weight=1.0, + ), loss_bbox=dict( - type='IoULoss', - iou_mode='giou', - bbox_format='xyxy', - reduction='mean', + type="IoULoss", + iou_mode="giou", + bbox_format="xyxy", + reduction="mean", loss_weight=2.5, - return_iou=False), + return_iou=False, + ), # Since the dflloss is implemented differently in the official # and mmdet, we're going to divide loss_weight by 4. loss_dfl=dict( - type='mmdet.DistributionFocalLoss', - reduction='mean', - loss_weight=0.5 / 4)), + type="mmdet.DistributionFocalLoss", reduction="mean", loss_weight=0.5 / 4 + ), + ), train_cfg=dict( initial_epoch=30, initial_assigner=dict( - type='BatchATSSAssigner', + type="BatchATSSAssigner", num_classes=num_classes, topk=9, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=num_classes, topk=13, alpha=1, beta=6, - eps=1e-9)), + eps=1e-9, + ), + ), test_cfg=dict( multi_label=True, nms_pre=1000, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.7), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.7), + max_per_img=300, + ), +) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True), - dict(type='PPYOLOERandomDistort'), - dict(type='mmdet.Expand', mean=(103.53, 116.28, 123.675)), - dict(type='PPYOLOERandomCrop'), - dict(type='mmdet.RandomFlip', prob=0.5), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), + dict(type="PPYOLOERandomDistort"), + dict(type="mmdet.Expand", mean=(103.53, 116.28, 123.675)), + dict(type="PPYOLOERandomCrop"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict( @@ -146,29 +163,32 @@ num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - collate_fn=dict(type='yolov5_collate', use_ms_training=True), + sampler=dict(type="DefaultSampler", shuffle=True), + collate_fn=dict(type="yolov5_collate", use_ms_training=True), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=True, min_size=0), - pipeline=train_pipeline)) + pipeline=train_pipeline, + ), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), dict( - type='mmdet.FixShapeResize', + type="mmdet.FixShapeResize", width=img_scale[0], height=img_scale[1], keep_ratio=False, - interpolation='bicubic'), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + interpolation="bicubic", + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + type="mmdet.PackDetInputs", + meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"), + ), ] val_dataloader = dict( @@ -177,63 +197,67 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, test_mode=True, - data_prefix=dict(img='val2017/'), + data_prefix=dict(img="val2017/"), filter_cfg=dict(filter_empty_gt=True, min_size=0), - ann_file='annotations/instances_val2017.json', - pipeline=test_pipeline)) + ann_file="annotations/instances_val2017.json", + pipeline=test_pipeline, + ), +) test_dataloader = val_dataloader param_scheduler = None optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', - lr=base_lr, - momentum=0.9, - weight_decay=5e-4, - nesterov=False), - paramwise_cfg=dict(norm_decay_mult=0.)) + type="SGD", lr=base_lr, momentum=0.9, weight_decay=5e-4, nesterov=False + ), + paramwise_cfg=dict(norm_decay_mult=0.0), +) default_hooks = dict( param_scheduler=dict( - type='PPYOLOEParamSchedulerHook', + type="PPYOLOEParamSchedulerHook", warmup_min_iter=1000, - start_factor=0., + start_factor=0.0, warmup_epochs=5, min_lr_ratio=0.0, - total_epochs=int(max_epochs * 1.2)), + total_epochs=int(max_epochs * 1.2), + ), checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, - save_best='auto', - max_keep_ckpts=3)) + save_best="auto", + max_keep_ckpts=3, + ), +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0002, update_buffers=True, strict_load=False, - priority=49) + priority=49, + ) ] val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=max_epochs, - val_interval=save_epoch_intervals) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_epoch_intervals +) +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") diff --git a/mmyolo/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py index b8e61120..98a09b30 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_plus_x_fast_8xb8-80e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py' +_base_ = "./ppyoloe_plus_s_fast_8xb8-80e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_x_obj365_pretrained-43a8000d.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/ppyoloe_plus_x_obj365_pretrained-43a8000d.pth" # noqa deepen_factor = 1.33 widen_factor = 1.25 @@ -13,4 +13,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py index 62233289..9322c9e3 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-300e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_plus_s_fast_8xb8-80e_coco.py' +_base_ = "./ppyoloe_plus_s_fast_8xb8-80e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_s_imagenet1k_pretrained-2be81763.pth' # noqa +checkpoint = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_s_imagenet1k_pretrained-2be81763.pth" # noqa train_batch_size_per_gpu = 32 max_epochs = 300 @@ -13,15 +13,19 @@ model = dict( data_preprocessor=dict( mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], - std=[0.229 * 255., 0.224 * 255., 0.225 * 255.]), + std=[0.229 * 255.0, 0.224 * 255.0, 0.225 * 255.0], + ), backbone=dict( block_cfg=dict(use_alpha=False), init_cfg=dict( - type='Pretrained', - prefix='backbone.', + type="Pretrained", + prefix="backbone.", checkpoint=checkpoint, - map_location='cpu')), - train_cfg=dict(initial_epoch=100)) + map_location="cpu", + ), + ), + train_cfg=dict(initial_epoch=100), +) train_dataloader = dict(batch_size=train_batch_size_per_gpu) diff --git a/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py index bef9e913..4510c149 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_s_fast_8xb32-400e_coco.py @@ -1,4 +1,4 @@ -_base_ = './ppyoloe_s_fast_8xb32-300e_coco.py' +_base_ = "./ppyoloe_s_fast_8xb32-300e_coco.py" max_epochs = 400 diff --git a/mmyolo/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py b/mmyolo/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py index fed594f0..6fff84c9 100644 --- a/mmyolo/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/ppyoloe/ppyoloe_x_fast_8xb16-300e_coco.py @@ -1,8 +1,8 @@ -_base_ = './ppyoloe_s_fast_8xb32-300e_coco.py' +_base_ = "./ppyoloe_s_fast_8xb32-300e_coco.py" # The pretrained model is geted and converted from official PPYOLOE. # https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/configs/ppyoloe/README.md -checkpoint = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_x_imagenet1k_pretrained-81c33ccb.pth' # noqa +checkpoint = "https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_pretrain/cspresnet_x_imagenet1k_pretrained-81c33ccb.pth" # noqa deepen_factor = 1.33 widen_factor = 1.25 @@ -13,11 +13,13 @@ backbone=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - init_cfg=dict(checkpoint=checkpoint)), + init_cfg=dict(checkpoint=checkpoint), + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_dataloader = dict(batch_size=train_batch_size_per_gpu) diff --git a/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py b/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py index 4281f9cd..0624c95a 100644 --- a/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py +++ b/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-s_8xb256-rsb-a1-600e_in1k.py @@ -1,67 +1,76 @@ _base_ = [ - 'mmcls::_base_/datasets/imagenet_bs256_rsb_a12.py', - 'mmcls::_base_/schedules/imagenet_bs2048_rsb.py', - 'mmcls::_base_/default_runtime.py' + "mmcls::_base_/datasets/imagenet_bs256_rsb_a12.py", + "mmcls::_base_/schedules/imagenet_bs2048_rsb.py", + "mmcls::_base_/default_runtime.py", ] custom_imports = dict( - imports=['mmdet.models', 'mmyolo.models'], allow_failed_imports=False) + imports=["mmdet.models", "mmyolo.models"], allow_failed_imports=False +) model = dict( - type='ImageClassifier', + type="ImageClassifier", backbone=dict( - type='mmyolo.CSPNeXt', - arch='P5', - out_indices=(4, ), + type="mmyolo.CSPNeXt", + arch="P5", + out_indices=(4,), expand_ratio=0.5, deepen_factor=0.33, widen_factor=0.5, channel_attention=True, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='mmyolo.SiLU')), - neck=dict(type='GlobalAveragePooling'), + norm_cfg=dict(type="BN"), + act_cfg=dict(type="mmyolo.SiLU"), + ), + neck=dict(type="GlobalAveragePooling"), head=dict( - type='LinearClsHead', + type="LinearClsHead", num_classes=1000, in_channels=512, loss=dict( - type='LabelSmoothLoss', + type="LabelSmoothLoss", label_smooth_val=0.1, - mode='original', - loss_weight=1.0), - topk=(1, 5)), - train_cfg=dict(augments=[ - dict(type='Mixup', alpha=0.2, num_classes=1000), - dict(type='CutMix', alpha=1.0, num_classes=1000) - ])) + mode="original", + loss_weight=1.0, + ), + topk=(1, 5), + ), + train_cfg=dict( + augments=[ + dict(type="Mixup", alpha=0.2, num_classes=1000), + dict(type="CutMix", alpha=1.0, num_classes=1000), + ] + ), +) # dataset settings -train_dataloader = dict(sampler=dict(type='RepeatAugSampler', shuffle=True)) +train_dataloader = dict(sampler=dict(type="RepeatAugSampler", shuffle=True)) # schedule settings optim_wrapper = dict( optimizer=dict(weight_decay=0.01), - paramwise_cfg=dict(bias_decay_mult=0., norm_decay_mult=0.), + paramwise_cfg=dict(bias_decay_mult=0.0, norm_decay_mult=0.0), ) param_scheduler = [ # warm up learning rate scheduler dict( - type='LinearLR', + type="LinearLR", start_factor=0.0001, by_epoch=True, begin=0, end=5, # update by iter - convert_to_iter_based=True), + convert_to_iter_based=True, + ), # main learning rate scheduler dict( - type='CosineAnnealingLR', + type="CosineAnnealingLR", T_max=595, eta_min=1.0e-6, by_epoch=True, begin=5, - end=600) + end=600, + ), ] train_cfg = dict(by_epoch=True, max_epochs=600) diff --git a/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py b/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py index af3170bd..b914f105 100644 --- a/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py +++ b/mmyolo/configs/rtmdet/cspnext_imagenet_pretrain/cspnext-tiny_8xb256-rsb-a1-600e_in1k.py @@ -1,5 +1,5 @@ -_base_ = './cspnext-s_8xb256-rsb-a1-600e_in1k.py' +_base_ = "./cspnext-s_8xb256-rsb-a1-600e_in1k.py" model = dict( - backbone=dict(deepen_factor=0.167, widen_factor=0.375), - head=dict(in_channels=384)) + backbone=dict(deepen_factor=0.167, widen_factor=0.375), head=dict(in_channels=384) +) diff --git a/mmyolo/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py index a5add2c5..6a74e48e 100644 --- a/mmyolo/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py @@ -1,7 +1,7 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" img_scale = (640, 640) # width, height deepen_factor = 1.0 @@ -26,130 +26,149 @@ # only on Val batch_shapes_cfg = dict( - type='BatchShapePolicy', + type="BatchShapePolicy", batch_size=val_batch_size_per_gpu, img_size=img_scale[0], size_divisor=32, - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) model = dict( - type='YOLODetector', + type="YOLODetector", data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', + type="YOLOv5DetDataPreprocessor", mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], - bgr_to_rgb=False), + bgr_to_rgb=False, + ), backbone=dict( - type='CSPNeXt', - arch='P5', + type="CSPNeXt", + arch="P5", expand_ratio=0.5, deepen_factor=deepen_factor, widen_factor=widen_factor, channel_attention=True, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN"), + act_cfg=dict(type="SiLU", inplace=True), + ), neck=dict( - type='CSPNeXtPAFPN', + type="CSPNeXtPAFPN", deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3, expand_ratio=0.5, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN"), + act_cfg=dict(type="SiLU", inplace=True), + ), bbox_head=dict( - type='RTMDetHead', + type="RTMDetHead", head_module=dict( - type='RTMDetSepBNHeadModule', + type="RTMDetSepBNHeadModule", num_classes=num_classes, in_channels=256, stacked_convs=2, feat_channels=256, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='SiLU', inplace=True), + norm_cfg=dict(type="BN"), + act_cfg=dict(type="SiLU", inplace=True), share_conv=True, pred_kernel_size=1, - featmap_strides=strides), + featmap_strides=strides, + ), prior_generator=dict( - type='mmdet.MlvlPointGenerator', offset=0, strides=strides), - bbox_coder=dict(type='DistancePointBBoxCoder'), + type="mmdet.MlvlPointGenerator", offset=0, strides=strides + ), + bbox_coder=dict(type="DistancePointBBoxCoder"), loss_cls=dict( - type='mmdet.QualityFocalLoss', - use_sigmoid=True, - beta=2.0, - loss_weight=1.0), - loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)), + type="mmdet.QualityFocalLoss", use_sigmoid=True, beta=2.0, loss_weight=1.0 + ), + loss_bbox=dict(type="mmdet.GIoULoss", loss_weight=2.0), + ), train_cfg=dict( assigner=dict( - type='BatchDynamicSoftLabelAssigner', + type="BatchDynamicSoftLabelAssigner", num_classes=num_classes, topk=13, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), allowed_border=-1, pos_weight=-1, - debug=False), + debug=False, + ), test_cfg=dict( multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300), + nms=dict(type="nms", iou_threshold=0.65), + max_per_img=300, + ), ) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), dict( - type='Mosaic', + type="Mosaic", img_scale=img_scale, use_cached=True, max_cached_images=40, - pad_val=114.0), + pad_val=114.0, + ), dict( - type='mmdet.RandomResize', + type="mmdet.RandomResize", # img_scale is (width, height) scale=(img_scale[0] * 2, img_scale[1] * 2), ratio_range=(0.1, 2.0), - resize_type='mmdet.Resize', - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=img_scale), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), - dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20), - dict(type='mmdet.PackDetInputs') + resize_type="mmdet.Resize", + keep_ratio=True, + ), + dict(type="mmdet.RandomCrop", crop_size=img_scale), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.Pad", size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type="YOLOv5MixUp", use_cached=True, max_cached_images=20), + dict(type="mmdet.PackDetInputs"), ] train_pipeline_stage2 = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), dict( - type='mmdet.RandomResize', + type="mmdet.RandomResize", scale=img_scale, ratio_range=(0.1, 2.0), - resize_type='mmdet.Resize', - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=img_scale), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') + resize_type="mmdet.Resize", + keep_ratio=True, + ), + dict(type="mmdet.RandomCrop", crop_size=img_scale), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.Pad", size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type="mmdet.PackDetInputs"), ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] train_dataloader = dict( @@ -157,15 +176,17 @@ num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, - collate_fn=dict(type='yolov5_collate'), - sampler=dict(type='DefaultSampler', shuffle=True), + collate_fn=dict(type="yolov5_collate"), + sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=True, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + ), +) val_dataloader = dict( batch_size=val_batch_size_per_gpu, @@ -173,79 +194,82 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), + ann_file="annotations/instances_val2017.json", + data_prefix=dict(img="val2017/"), test_mode=True, batch_shapes_cfg=batch_shapes_cfg, - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) test_dataloader = val_dataloader # Reduce evaluation time val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator # optimizer optim_wrapper = dict( - type='OptimWrapper', - optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), - paramwise_cfg=dict( - norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) + type="OptimWrapper", + optimizer=dict(type="AdamW", lr=base_lr, weight_decay=0.05), + paramwise_cfg=dict(norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True), +) # learning rate param_scheduler = [ - dict( - type='LinearLR', - start_factor=1.0e-5, - by_epoch=False, - begin=0, - end=1000), + dict(type="LinearLR", start_factor=1.0e-5, by_epoch=False, begin=0, end=1000), dict( # use cosine lr from 150 to 300 epoch - type='CosineAnnealingLR', + type="CosineAnnealingLR", eta_min=base_lr * 0.05, begin=max_epochs // 2, end=max_epochs, T_max=max_epochs // 2, by_epoch=True, - convert_to_iter_based=True), + convert_to_iter_based=True, + ), ] # hooks default_hooks = dict( checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=interval, - max_keep_ckpts=3 # only keep latest 3 checkpoints - )) + max_keep_ckpts=3, # only keep latest 3 checkpoints + ) +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0002, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=max_epochs - stage2_num_epochs, - switch_pipeline=train_pipeline_stage2) + switch_pipeline=train_pipeline_stage2, + ), ] train_cfg = dict( - type='EpochBasedTrainLoop', + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=interval, - dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)]) + dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)], +) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") diff --git a/mmyolo/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py index 2e8e5a40..b4bc02ce 100644 --- a/mmyolo/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./rtmdet_l_syncbn_fast_8xb32-300e_coco.py" deepen_factor = 0.67 widen_factor = 0.75 @@ -6,4 +6,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py index 8ea4847e..5d20908b 100644 --- a/mmyolo/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py @@ -1,5 +1,5 @@ -_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py' -checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' # noqa +_base_ = "./rtmdet_l_syncbn_fast_8xb32-300e_coco.py" +checkpoint = "https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth" # noqa deepen_factor = 0.33 widen_factor = 0.5 @@ -13,68 +13,76 @@ # it must be forced to set map_location. # Once checkpoint is fixed, it can be removed. init_cfg=dict( - type='Pretrained', - prefix='backbone.', + type="Pretrained", + prefix="backbone.", checkpoint=checkpoint, - map_location='cpu')), + map_location="cpu", + ), + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), dict( - type='Mosaic', + type="Mosaic", img_scale=img_scale, use_cached=True, max_cached_images=40, - pad_val=114.0), + pad_val=114.0, + ), dict( - type='mmdet.RandomResize', + type="mmdet.RandomResize", # img_scale is (width, height) scale=(img_scale[0] * 2, img_scale[1] * 2), ratio_range=(0.5, 2.0), # note - resize_type='mmdet.Resize', - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=img_scale), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), - dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20), - dict(type='mmdet.PackDetInputs') + resize_type="mmdet.Resize", + keep_ratio=True, + ), + dict(type="mmdet.RandomCrop", crop_size=img_scale), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.Pad", size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type="YOLOv5MixUp", use_cached=True, max_cached_images=20), + dict(type="mmdet.PackDetInputs"), ] train_pipeline_stage2 = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), dict( - type='mmdet.RandomResize', + type="mmdet.RandomResize", scale=img_scale, ratio_range=(0.5, 2.0), # note - resize_type='mmdet.Resize', - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=img_scale), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), - dict(type='mmdet.PackDetInputs') + resize_type="mmdet.Resize", + keep_ratio=True, + ), + dict(type="mmdet.RandomCrop", crop_size=img_scale), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.Pad", size=img_scale, pad_val=dict(img=(114, 114, 114))), + dict(type="mmdet.PackDetInputs"), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0002, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs, - switch_pipeline=train_pipeline_stage2) + switch_pipeline=train_pipeline_stage2, + ), ] diff --git a/mmyolo/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py index 281062c1..41d71341 100644 --- a/mmyolo/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py @@ -1,6 +1,6 @@ -_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./rtmdet_s_syncbn_fast_8xb32-300e_coco.py" -checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth' # noqa +checkpoint = "https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth" # noqa deepen_factor = 0.167 widen_factor = 0.375 @@ -10,41 +10,46 @@ backbone=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - init_cfg=dict(checkpoint=checkpoint)), + init_cfg=dict(checkpoint=checkpoint), + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), dict( - type='Mosaic', + type="Mosaic", img_scale=img_scale, use_cached=True, max_cached_images=20, # note random_pop=False, # note - pad_val=114.0), + pad_val=114.0, + ), dict( - type='mmdet.RandomResize', + type="mmdet.RandomResize", # img_scale is (width, height) scale=(img_scale[0] * 2, img_scale[1] * 2), ratio_range=(0.5, 2.0), - resize_type='mmdet.Resize', - keep_ratio=True), - dict(type='mmdet.RandomCrop', crop_size=img_scale), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))), + resize_type="mmdet.Resize", + keep_ratio=True, + ), + dict(type="mmdet.RandomCrop", crop_size=img_scale), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.Pad", size=img_scale, pad_val=dict(img=(114, 114, 114))), dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", use_cached=True, random_pop=False, max_cached_images=10, - prob=0.5), - dict(type='mmdet.PackDetInputs') + prob=0.5, + ), + dict(type="mmdet.PackDetInputs"), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmyolo/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py index 0978c787..5892047e 100644 --- a/mmyolo/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./rtmdet_l_syncbn_fast_8xb32-300e_coco.py" deepen_factor = 1.33 widen_factor = 1.25 @@ -6,4 +6,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py b/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py index 90ba758a..81d9caca 100644 --- a/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py +++ b/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py @@ -1,63 +1,70 @@ -_base_ = 'yolov5_s-v61_fast_8xb16-300e_crowdhuman.py' +_base_ = "yolov5_s-v61_fast_8xb16-300e_crowdhuman.py" model = dict( data_preprocessor=dict( _delete_=True, - type='mmdet.DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), - bbox_head=dict(ignore_iof_thr=0.5)) + type="mmdet.DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ), + bbox_head=dict(ignore_iof_thr=0.5), +) img_scale = _base_.img_scale albu_train_transforms = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type="Blur", p=0.01), + dict(type="MedianBlur", p=0.01), + dict(type="ToGray", p=0.01), + dict(type="CLAHE", p=0.01), ] pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), # only change this - dict(type='mmdet.LoadAnnotations', with_bbox=True) + dict(type="mmdet.LoadAnnotations", with_bbox=True), ] train_pipeline = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(0.5, 1.5), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transforms, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict( - collate_fn=dict(type='pseudo_collate'), - dataset=dict(pipeline=train_pipeline)) + collate_fn=dict(type="pseudo_collate"), dataset=dict(pipeline=train_pipeline) +) diff --git a/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py b/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py index a61859fa..4e4bc82e 100644 --- a/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py +++ b/mmyolo/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py @@ -1,11 +1,11 @@ -_base_ = '../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" # Use the model trained on the COCO as the pretrained model -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth" # noqa # dataset settings -data_root = 'data/CrowdHuman/' -dataset_type = 'YOLOv5CrowdHumanDataset' +data_root = "data/CrowdHuman/" +dataset_type = "YOLOv5CrowdHumanDataset" # parameters that often need to be modified num_classes = 1 @@ -13,35 +13,42 @@ anchors = [ [(6, 14), (12, 28), (19, 48)], # P3/8 [(29, 79), (46, 124), (142, 54)], # P4/16 - [(73, 198), (124, 330), (255, 504)] # P5/32 + [(73, 198), (124, 330), (255, 504)], # P5/32 ] model = dict( bbox_head=dict( head_module=dict(num_classes=num_classes), - prior_generator=dict(base_sizes=anchors))) + prior_generator=dict(base_sizes=anchors), + ) +) train_dataloader = dict( dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotation_train.odgt', - data_prefix=dict(img='Images/'))) + ann_file="annotation_train.odgt", + data_prefix=dict(img="Images/"), + ) +) val_dataloader = dict( dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotation_val.odgt', - data_prefix=dict(img='Images/'), + ann_file="annotation_val.odgt", + data_prefix=dict(img="Images/"), # CrowdHumanMetric does not support out-of-order output images # for the time being. batch_shapes_cfg does not support. - batch_shapes_cfg=None)) + batch_shapes_cfg=None, + ) +) test_dataloader = val_dataloader val_evaluator = dict( _delete_=True, - type='mmdet.CrowdHumanMetric', - ann_file=data_root + 'annotation_val.odgt', - metric=['AP', 'MR', 'JI']) + type="mmdet.CrowdHumanMetric", + ann_file=data_root + "annotation_val.odgt", + metric=["AP", "MR", "JI"], +) test_evaluator = val_evaluator diff --git a/mmyolo/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py b/mmyolo/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py index 4b470973..000942c9 100644 --- a/mmyolo/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py +++ b/mmyolo/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py @@ -1,11 +1,11 @@ -_base_ = './yolov5_s-v61_fast_1xb64-50e_voc.py' +_base_ = "./yolov5_s-v61_fast_1xb64-50e_voc.py" deepen_factor = 1.0 widen_factor = 1.0 train_batch_size_per_gpu = 32 train_num_workers = 8 -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth" # noqa model = dict( backbone=dict( @@ -16,10 +16,11 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_dataloader = dict( - batch_size=train_batch_size_per_gpu, num_workers=train_num_workers) + batch_size=train_batch_size_per_gpu, num_workers=train_num_workers +) -optim_wrapper = dict( - optimizer=dict(batch_size_per_gpu=train_batch_size_per_gpu)) +optim_wrapper = dict(optimizer=dict(batch_size_per_gpu=train_batch_size_per_gpu)) diff --git a/mmyolo/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py b/mmyolo/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py index 2ed2127a..694aefc7 100644 --- a/mmyolo/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py +++ b/mmyolo/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py @@ -1,9 +1,9 @@ -_base_ = './yolov5_s-v61_fast_1xb64-50e_voc.py' +_base_ = "./yolov5_s-v61_fast_1xb64-50e_voc.py" deepen_factor = 0.67 widen_factor = 0.75 -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth" # noqa model = dict( backbone=dict( @@ -14,4 +14,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py b/mmyolo/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py index 041f6537..2fc7d18b 100644 --- a/mmyolo/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py +++ b/mmyolo/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py @@ -1,9 +1,9 @@ -_base_ = './yolov5_s-v61_fast_1xb64-50e_voc.py' +_base_ = "./yolov5_s-v61_fast_1xb64-50e_voc.py" deepen_factor = 0.33 widen_factor = 0.25 -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth" # noqa model = dict( backbone=dict( @@ -14,4 +14,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py b/mmyolo/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py index 54f6cdeb..571788c7 100644 --- a/mmyolo/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py +++ b/mmyolo/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py @@ -1,8 +1,8 @@ -_base_ = '../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" # dataset settings -data_root = 'data/VOCdevkit/' -dataset_type = 'YOLOv5VOCDataset' +data_root = "data/VOCdevkit/" +dataset_type = "YOLOv5VOCDataset" # parameters that often need to be modified num_classes = 20 @@ -22,12 +22,14 @@ # only on Val batch_shapes_cfg = dict(img_size=img_scale[0]) -anchors = [[(26, 44), (67, 57), (61, 130)], [(121, 118), (120, 239), - (206, 182)], - [(376, 161), (234, 324), (428, 322)]] +anchors = [ + [(26, 44), (67, 57), (61, 130)], + [(121, 118), (120, 239), (206, 182)], + [(376, 161), (234, 324), (428, 322)], +] num_det_layers = 3 -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth" # noqa # Hyperparameter reference from: # https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml @@ -37,102 +39,115 @@ prior_generator=dict(base_sizes=anchors), loss_cls=dict( loss_weight=0.21638 * (num_classes / 80 * 3 / num_det_layers), - class_weight=0.5), + class_weight=0.5, + ), loss_bbox=dict(loss_weight=0.02 * (3 / num_det_layers)), loss_obj=dict( - loss_weight=0.51728 * - ((img_scale[0] / 640)**2 * 3 / num_det_layers), - class_weight=0.67198), + loss_weight=0.51728 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers), + class_weight=0.67198, + ), # Different from COCO - prior_match_thr=3.3744), - test_cfg=dict(nms=dict(iou_threshold=0.6))) + prior_match_thr=3.3744, + ), + test_cfg=dict(nms=dict(iou_threshold=0.6)), +) albu_train_transforms = _base_.albu_train_transforms pre_transform = _base_.pre_transform with_mosiac_pipeline = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_translate_ratio=0.04591, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", prob=0.04266, pre_transform=[ *pre_transform, dict( - type='Mosaic', + type="Mosaic", img_scale=img_scale, pad_val=114.0, - pre_transform=pre_transform), + pre_transform=pre_transform, + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_translate_ratio=0.04591, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)) - ]) + border_val=(114, 114, 114), + ), + ], + ), ] without_mosaic_pipeline = [ dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_translate_ratio=0.04591, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), border=(0, 0), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), dict( - type='LetterResize', - scale=img_scale, - allow_scale_up=True, - pad_val=dict(img=114)) + type="LetterResize", scale=img_scale, allow_scale_up=True, pad_val=dict(img=114) + ), ] # Because the border parameter is inconsistent when # using mosaic or not, `RandomChoice` is used here. randchoice_mosaic_pipeline = dict( - type='RandomChoice', + type="RandomChoice", transforms=[with_mosiac_pipeline, without_mosaic_pipeline], - prob=[0.85834, 0.14166]) + prob=[0.85834, 0.14166], +) train_pipeline = [ - *pre_transform, randchoice_mosaic_pipeline, + *pre_transform, + randchoice_mosaic_pipeline, dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transforms, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), dict( - type='YOLOv5HSVRandomAug', + type="YOLOv5HSVRandomAug", hue_delta=0.01041, saturation_delta=0.54703, - value_delta=0.27739), - dict(type='mmdet.RandomFlip', prob=0.5), + value_delta=0.27739, + ), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict( @@ -141,43 +156,55 @@ num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), + sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( - type='ConcatDataset', + type="ConcatDataset", datasets=[ dict( type=dataset_type, data_root=data_root, - ann_file='VOC2007/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2007/'), + ann_file="VOC2007/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2007/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline), + pipeline=train_pipeline, + ), dict( type=dataset_type, data_root=data_root, - ann_file='VOC2012/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2012/'), + ann_file="VOC2012/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2012/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline) + pipeline=train_pipeline, + ), ], # Use ignore_keys to avoid judging metainfo is # not equal in `ConcatDataset`. - ignore_keys='dataset_type'), - collate_fn=dict(type='yolov5_collate')) + ignore_keys="dataset_type", + ), + collate_fn=dict(type="yolov5_collate"), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] val_dataloader = dict( @@ -186,15 +213,17 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='VOC2007/ImageSets/Main/test.txt', - data_prefix=dict(sub_data_root='VOC2007/'), + ann_file="VOC2007/ImageSets/Main/test.txt", + data_prefix=dict(sub_data_root="VOC2007/"), test_mode=True, pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader @@ -204,7 +233,9 @@ lr=0.00334, momentum=0.74832, weight_decay=0.00025, - batch_size_per_gpu=train_batch_size_per_gpu)) + batch_size_per_gpu=train_batch_size_per_gpu, + ) +) default_hooks = dict( param_scheduler=dict( @@ -212,22 +243,26 @@ max_epochs=max_epochs, warmup_epochs=3.3835, warmup_momentum=0.59462, - warmup_bias_lr=0.18657)) + warmup_bias_lr=0.18657, + ) +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, # To load COCO pretrained model, need to set `strict_load=False` strict_load=False, - priority=49) + priority=49, + ) ] # TODO: Support using coco metric in voc dataset val_evaluator = dict( - _delete_=True, type='mmdet.VOCMetric', metric='mAP', eval_mode='area') + _delete_=True, type="mmdet.VOCMetric", metric="mAP", eval_mode="area" +) test_evaluator = val_evaluator diff --git a/mmyolo/configs/yolov5/voc/yolov5_x-v61_fast_1xb32-50e_voc.py b/mmyolo/configs/yolov5/voc/yolov5_x-v61_fast_1xb32-50e_voc.py index 2fc4d79f..4b46563f 100644 --- a/mmyolo/configs/yolov5/voc/yolov5_x-v61_fast_1xb32-50e_voc.py +++ b/mmyolo/configs/yolov5/voc/yolov5_x-v61_fast_1xb32-50e_voc.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_s-v61_fast_1xb64-50e_voc.py' +_base_ = "./yolov5_s-v61_fast_1xb64-50e_voc.py" deepen_factor = 1.33 widen_factor = 1.25 @@ -17,10 +17,11 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) train_dataloader = dict( - batch_size=train_batch_size_per_gpu, num_workers=train_num_workers) + batch_size=train_batch_size_per_gpu, num_workers=train_num_workers +) -optim_wrapper = dict( - optimizer=dict(batch_size_per_gpu=train_batch_size_per_gpu)) +optim_wrapper = dict(optimizer=dict(batch_size_per_gpu=train_batch_size_per_gpu)) diff --git a/mmyolo/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py index 6a84fdbe..d5088601 100644 --- a/mmyolo/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 1.0 widen_factor = 1.0 @@ -12,4 +12,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py index 60a11a37..96f47c56 100644 --- a/mmyolo/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 1.0 widen_factor = 1.0 @@ -12,4 +12,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py index f2ccf787..a055f385 100644 --- a/mmyolo/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 0.67 widen_factor = 0.75 @@ -20,54 +20,63 @@ ), bbox_head=dict( head_module=dict(widen_factor=widen_factor), - loss_cls=dict(loss_weight=0.3 * - (num_classes / 80 * 3 / num_det_layers)), - loss_obj=dict(loss_weight=0.7 * - ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) + loss_cls=dict(loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)), + loss_obj=dict( + loss_weight=0.7 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers) + ), + ), +) pre_transform = _base_.pre_transform albu_train_transforms = _base_.albu_train_transforms mosaic_affine_pipeline = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)) + border_val=(114, 114, 114), + ), ] # enable mixup train_pipeline = [ - *pre_transform, *mosaic_affine_pipeline, + *pre_transform, + *mosaic_affine_pipeline, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", prob=0.1, - pre_transform=[*pre_transform, *mosaic_affine_pipeline]), + pre_transform=[*pre_transform, *mosaic_affine_pipeline], + ), dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transforms, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmyolo/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py index fdce9603..26285b03 100644 --- a/mmyolo/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 0.67 widen_factor = 0.75 @@ -20,54 +20,63 @@ ), bbox_head=dict( head_module=dict(widen_factor=widen_factor), - loss_cls=dict(loss_weight=0.3 * - (num_classes / 80 * 3 / num_det_layers)), - loss_obj=dict(loss_weight=0.7 * - ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) + loss_cls=dict(loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)), + loss_obj=dict( + loss_weight=0.7 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers) + ), + ), +) pre_transform = _base_.pre_transform albu_train_transforms = _base_.albu_train_transforms mosaic_affine_pipeline = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)) + border_val=(114, 114, 114), + ), ] # enable mixup train_pipeline = [ - *pre_transform, *mosaic_affine_pipeline, + *pre_transform, + *mosaic_affine_pipeline, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", prob=0.1, - pre_transform=[*pre_transform, *mosaic_affine_pipeline]), + pre_transform=[*pre_transform, *mosaic_affine_pipeline], + ), dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transforms, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmyolo/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py index 3cd2d6b7..fc05272b 100644 --- a/mmyolo/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = 'yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py' +_base_ = "yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 0.33 widen_factor = 0.25 @@ -12,4 +12,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py index b6f93428..bb36ed13 100644 --- a/mmyolo/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 0.33 widen_factor = 0.25 @@ -12,4 +12,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py index d7cb0925..2af0976d 100644 --- a/mmyolo/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" img_scale = (1280, 1280) # width, height num_classes = 80 @@ -9,26 +9,26 @@ [(19, 27), (44, 40), (38, 94)], # P3/8 [(96, 68), (86, 152), (180, 137)], # P4/16 [(140, 301), (303, 264), (238, 542)], # P5/32 - [(436, 615), (739, 380), (925, 792)] # P6/64 + [(436, 615), (739, 380), (925, 792)], # P6/64 ] strides = [8, 16, 32, 64] num_det_layers = 4 model = dict( - backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)), - neck=dict( - in_channels=[256, 512, 768, 1024], out_channels=[256, 512, 768, 1024]), + backbone=dict(arch="P6", out_indices=(2, 3, 4, 5)), + neck=dict(in_channels=[256, 512, 768, 1024], out_channels=[256, 512, 768, 1024]), bbox_head=dict( - head_module=dict( - in_channels=[256, 512, 768, 1024], featmap_strides=strides), + head_module=dict(in_channels=[256, 512, 768, 1024], featmap_strides=strides), prior_generator=dict(base_sizes=anchors, strides=strides), # scaled based on number of detection layers - loss_cls=dict(loss_weight=0.5 * - (num_classes / 80 * 3 / num_det_layers)), + loss_cls=dict(loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)), loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)), - loss_obj=dict(loss_weight=1.0 * - ((img_scale[0] / 640)**2 * 3 / num_det_layers)), - obj_level_weights=[4.0, 1.0, 0.25, 0.06])) + loss_obj=dict( + loss_weight=1.0 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers) + ), + obj_level_weights=[4.0, 1.0, 0.25, 0.06], + ), +) pre_transform = _base_.pre_transform albu_train_transforms = _base_.albu_train_transforms @@ -36,55 +36,69 @@ train_pipeline = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(0.5, 1.5), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transforms, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] val_dataloader = dict( - dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) + dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg) +) test_dataloader = val_dataloader diff --git a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py index 2789c959..18bb266b 100644 --- a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py @@ -1,25 +1,30 @@ -_base_ = 'yolov5_s-v61_syncbn_8xb16-300e_coco.py' +_base_ = "yolov5_s-v61_syncbn_8xb16-300e_coco.py" test_pipeline = [ + dict(type="LoadImageFromFile", file_client_args={{_base_.file_client_args}}), dict( - type='LoadImageFromFile', - file_client_args={{_base_.file_client_args}}), - dict( - type='LetterResize', + type="LetterResize", scale=_base_.img_scale, allow_scale_up=True, - use_mini_pad=True), - dict(type='LoadAnnotations', with_bbox=True), + use_mini_pad=True, + ), + dict(type="LoadAnnotations", with_bbox=True), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] -val_dataloader = dict( - dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None)) +val_dataloader = dict(dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None)) test_dataloader = val_dataloader model = dict( - test_cfg=dict( - multi_label=False, score_thr=0.25, nms=dict(iou_threshold=0.45))) + test_cfg=dict(multi_label=False, score_thr=0.25, nms=dict(iou_threshold=0.45)) +) diff --git a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py index d06f75c4..684a9dc6 100644 --- a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py @@ -1,8 +1,8 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" # dataset settings -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" # parameters that often need to be modified num_classes = 80 @@ -24,16 +24,17 @@ # only on Val batch_shapes_cfg = dict( - type='BatchShapePolicy', + type="BatchShapePolicy", batch_size=val_batch_size_per_gpu, img_size=img_scale[0], size_divisor=32, - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) anchors = [ [(10, 13), (16, 30), (33, 23)], # P3/8 [(30, 61), (62, 45), (59, 119)], # P4/16 - [(116, 90), (156, 198), (373, 326)] # P5/32 + [(116, 90), (156, 198), (373, 326)], # P5/32 ] strides = [8, 16, 32] num_det_layers = 3 @@ -43,112 +44,126 @@ env_cfg = dict(cudnn_benchmark=True) model = dict( - type='YOLODetector', + type="YOLODetector", data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), + type="mmdet.DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ), backbone=dict( - type='YOLOv5CSPDarknet', + type="YOLOv5CSPDarknet", deepen_factor=deepen_factor, widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), neck=dict( - type='YOLOv5PAFPN', + type="YOLOv5PAFPN", deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, 1024], out_channels=[256, 512, 1024], num_csp_blocks=3, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), bbox_head=dict( - type='YOLOv5Head', + type="YOLOv5Head", head_module=dict( - type='YOLOv5HeadModule', + type="YOLOv5HeadModule", num_classes=num_classes, in_channels=[256, 512, 1024], widen_factor=widen_factor, featmap_strides=strides, - num_base_priors=3), + num_base_priors=3, + ), prior_generator=dict( - type='mmdet.YOLOAnchorGenerator', - base_sizes=anchors, - strides=strides), + type="mmdet.YOLOAnchorGenerator", base_sizes=anchors, strides=strides + ), # scaled based on number of detection layers loss_cls=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='mean', - loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)), + reduction="mean", + loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers), + ), loss_bbox=dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', + type="IoULoss", + iou_mode="ciou", + bbox_format="xywh", eps=1e-7, - reduction='mean', + reduction="mean", loss_weight=0.05 * (3 / num_det_layers), - return_iou=True), + return_iou=True, + ), loss_obj=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='mean', - loss_weight=1.0 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)), - prior_match_thr=4., - obj_level_weights=[4., 1., 0.4]), + reduction="mean", + loss_weight=1.0 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers), + ), + prior_match_thr=4.0, + obj_level_weights=[4.0, 1.0, 0.4], + ), test_cfg=dict( multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.65), + max_per_img=300, + ), +) albu_train_transforms = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type="Blur", p=0.01), + dict(type="MedianBlur", p=0.01), + dict(type="ToGray", p=0.01), + dict(type="CLAHE", p=0.01), ] pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), ] train_pipeline = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(0.5, 1.5), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transforms, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict( @@ -156,28 +171,38 @@ num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), + sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + ), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] val_dataloader = dict( @@ -186,62 +211,70 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, test_mode=True, - data_prefix=dict(img='val2017/'), - ann_file='annotations/instances_val2017.json', + data_prefix=dict(img="val2017/"), + ann_file="annotations/instances_val2017.json", pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader param_scheduler = None optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', + type="SGD", lr=base_lr, momentum=0.937, weight_decay=0.0005, nesterov=True, - batch_size_per_gpu=train_batch_size_per_gpu), - constructor='YOLOv5OptimizerConstructor') + batch_size_per_gpu=train_batch_size_per_gpu, + ), + constructor="YOLOv5OptimizerConstructor", +) default_hooks = dict( param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='linear', + type="YOLOv5ParamSchedulerHook", + scheduler_type="linear", lr_factor=0.01, - max_epochs=max_epochs), + max_epochs=max_epochs, + ), checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, - save_best='auto', - max_keep_ckpts=3)) + save_best="auto", + max_keep_ckpts=3, + ), +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49) + priority=49, + ) ] val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator train_cfg = dict( - type='EpochBasedTrainLoop', - max_epochs=max_epochs, - val_interval=save_epoch_intervals) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_epoch_intervals +) +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") diff --git a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py index 529702e2..cdb90956 100644 --- a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py +++ b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py @@ -1,15 +1,15 @@ -_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" -data_root = 'data/balloon/' +data_root = "data/balloon/" train_batch_size_per_gpu = 4 train_num_workers = 2 metainfo = { - 'classes': ('balloon', ), - 'palette': [ + "classes": ("balloon",), + "palette": [ (220, 20, 60), - ] + ], } train_dataloader = dict( @@ -18,19 +18,23 @@ dataset=dict( data_root=data_root, metainfo=metainfo, - data_prefix=dict(img='train/'), - ann_file='train.json')) + data_prefix=dict(img="train/"), + ann_file="train.json", + ), +) val_dataloader = dict( dataset=dict( data_root=data_root, metainfo=metainfo, - data_prefix=dict(img='val/'), - ann_file='val.json')) + data_prefix=dict(img="val/"), + ann_file="val.json", + ) +) test_dataloader = val_dataloader -val_evaluator = dict(ann_file=data_root + 'val.json') +val_evaluator = dict(ann_file=data_root + "val.json") test_evaluator = val_evaluator diff --git a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py index 17b4a73b..2a429e4f 100644 --- a/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py @@ -1,12 +1,14 @@ -_base_ = 'yolov5_s-v61_syncbn_8xb16-300e_coco.py' +_base_ = "yolov5_s-v61_syncbn_8xb16-300e_coco.py" # fast means faster training speed, # but less flexibility for multitasking model = dict( data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True)) + type="YOLOv5DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ) +) -train_dataloader = dict(collate_fn=dict(type='yolov5_collate')) +train_dataloader = dict(collate_fn=dict(type="yolov5_collate")) diff --git a/mmyolo/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py index 9fe5c010..fe7624b0 100644 --- a/mmyolo/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_x-p6-v62_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 1.33 widen_factor = 1.25 @@ -11,4 +11,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco.py b/mmyolo/configs/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco.py index 8782eed8..bd6f2105 100644 --- a/mmyolo/configs/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco.py +++ b/mmyolo/configs/yolov5/yolov5_x-v61_syncbn_fast_8xb16-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "./yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py" deepen_factor = 1.33 widen_factor = 1.25 @@ -11,4 +11,5 @@ deepen_factor=deepen_factor, widen_factor=widen_factor, ), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py index 924f1075..7cf3ecff 100644 --- a/mmyolo/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov6_m_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./yolov6_m_syncbn_fast_8xb32-300e_coco.py" deepen_factor = 1 widen_factor = 1 @@ -7,17 +7,20 @@ backbone=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - hidden_ratio=1. / 2, + hidden_ratio=1.0 / 2, block_cfg=dict( - type='ConvWrapper', - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001)), - act_cfg=dict(type='SiLU', inplace=True)), + type="ConvWrapper", norm_cfg=dict(type="BN", momentum=0.03, eps=0.001) + ), + act_cfg=dict(type="SiLU", inplace=True), + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - hidden_ratio=1. / 2, + hidden_ratio=1.0 / 2, block_cfg=dict( - type='ConvWrapper', - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001)), - block_act_cfg=dict(type='SiLU', inplace=True)), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + type="ConvWrapper", norm_cfg=dict(type="BN", momentum=0.03, eps=0.001) + ), + block_act_cfg=dict(type="SiLU", inplace=True), + ), + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py index 4f8e33ab..b8336e65 100644 --- a/mmyolo/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./yolov6_s_syncbn_fast_8xb32-300e_coco.py" deepen_factor = 0.6 widen_factor = 0.75 @@ -6,50 +6,63 @@ model = dict( backbone=dict( - type='YOLOv6CSPBep', + type="YOLOv6CSPBep", deepen_factor=deepen_factor, widen_factor=widen_factor, - hidden_ratio=2. / 3, - block_cfg=dict(type='RepVGGBlock'), - act_cfg=dict(type='ReLU', inplace=True)), + hidden_ratio=2.0 / 3, + block_cfg=dict(type="RepVGGBlock"), + act_cfg=dict(type="ReLU", inplace=True), + ), neck=dict( - type='YOLOv6CSPRepPAFPN', + type="YOLOv6CSPRepPAFPN", deepen_factor=deepen_factor, widen_factor=widen_factor, - block_cfg=dict(type='RepVGGBlock'), - hidden_ratio=2. / 3, - block_act_cfg=dict(type='ReLU', inplace=True)), - bbox_head=dict( - type='YOLOv6Head', head_module=dict(widen_factor=widen_factor))) + block_cfg=dict(type="RepVGGBlock"), + hidden_ratio=2.0 / 3, + block_act_cfg=dict(type="ReLU", inplace=True), + ), + bbox_head=dict(type="YOLOv6Head", head_module=dict(widen_factor=widen_factor)), +) mosaic_affine_pipeline = [ dict( - type='Mosaic', + type="Mosaic", img_scale=_base_.img_scale, pad_val=114.0, - pre_transform=_base_.pre_transform), + pre_transform=_base_.pre_transform, + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2), - border_val=(114, 114, 114)) + border_val=(114, 114, 114), + ), ] train_pipeline = [ - *_base_.pre_transform, *mosaic_affine_pipeline, + *_base_.pre_transform, + *mosaic_affine_pipeline, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", prob=0.1, - pre_transform=[*_base_.pre_transform, *mosaic_affine_pipeline]), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + pre_transform=[*_base_.pre_transform, *mosaic_affine_pipeline], + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py index 4b992a55..f0f57ec9 100644 --- a/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./yolov6_s_syncbn_fast_8xb32-300e_coco.py" deepen_factor = 0.33 widen_factor = 0.25 @@ -7,7 +7,8 @@ backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), bbox_head=dict( - head_module=dict(widen_factor=widen_factor), - loss_bbox=dict(iou_mode='siou'))) + head_module=dict(widen_factor=widen_factor), loss_bbox=dict(iou_mode="siou") + ), +) default_hooks = dict(param_scheduler=dict(lr_factor=0.02)) diff --git a/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py b/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py index 36718f19..abda00b6 100644 --- a/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py' +_base_ = "./yolov6_s_syncbn_fast_8xb32-400e_coco.py" deepen_factor = 0.33 widen_factor = 0.25 @@ -7,7 +7,8 @@ backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), bbox_head=dict( - head_module=dict(widen_factor=widen_factor), - loss_bbox=dict(iou_mode='siou'))) + head_module=dict(widen_factor=widen_factor), loss_bbox=dict(iou_mode="siou") + ), +) default_hooks = dict(param_scheduler=dict(lr_factor=0.02)) diff --git a/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py index a5201b32..bb1d0fcf 100644 --- a/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py @@ -1,29 +1,33 @@ -_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py' +_base_ = "./yolov6_s_syncbn_fast_8xb32-400e_coco.py" max_epochs = 300 num_last_epochs = 15 default_hooks = dict( param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='cosine', + type="YOLOv5ParamSchedulerHook", + scheduler_type="cosine", lr_factor=0.01, - max_epochs=max_epochs)) + max_epochs=max_epochs, + ) +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=max_epochs - num_last_epochs, - switch_pipeline=_base_.train_pipeline_stage2) + switch_pipeline=_base_.train_pipeline_stage2, + ), ] train_cfg = dict( - max_epochs=max_epochs, - dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) + max_epochs=max_epochs, dynamic_intervals=[(max_epochs - num_last_epochs, 1)] +) diff --git a/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py b/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py index f76f6b8b..286a4c00 100644 --- a/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py @@ -1,8 +1,8 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" # dataset settings -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" num_last_epochs = 15 max_epochs = 400 @@ -27,159 +27,190 @@ # only on Val batch_shapes_cfg = dict( - type='BatchShapePolicy', + type="BatchShapePolicy", batch_size=val_batch_size_per_gpu, img_size=img_scale[0], size_divisor=32, - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) # single-scale training is recommended to # be turned on, which can speed up training. env_cfg = dict(cudnn_benchmark=True) model = dict( - type='YOLODetector', + type="YOLODetector", data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), + type="YOLOv5DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ), backbone=dict( - type='YOLOv6EfficientRep', + type="YOLOv6EfficientRep", deepen_factor=deepen_factor, widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='ReLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="ReLU", inplace=True), + ), neck=dict( - type='YOLOv6RepPAFPN', + type="YOLOv6RepPAFPN", deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, 1024], out_channels=[128, 256, 512], num_csp_blocks=12, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="ReLU", inplace=True), ), bbox_head=dict( - type='YOLOv6Head', + type="YOLOv6Head", head_module=dict( - type='YOLOv6HeadModule', + type="YOLOv6HeadModule", num_classes=num_classes, in_channels=[128, 256, 512], widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True), - featmap_strides=[8, 16, 32]), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + featmap_strides=[8, 16, 32], + ), loss_bbox=dict( - type='IoULoss', - iou_mode='giou', - bbox_format='xyxy', - reduction='mean', + type="IoULoss", + iou_mode="giou", + bbox_format="xyxy", + reduction="mean", loss_weight=2.5, - return_iou=False)), + return_iou=False, + ), + ), train_cfg=dict( initial_epoch=4, initial_assigner=dict( - type='BatchATSSAssigner', + type="BatchATSSAssigner", num_classes=num_classes, topk=9, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=num_classes, topk=13, alpha=1, - beta=6), + beta=6, + ), ), test_cfg=dict( multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.65), + max_per_img=300, + ), +) # The training pipeline of YOLOv6 is basically the same as YOLOv5. # The difference is that Mosaic and RandomAffine will be closed in the last 15 epochs. # noqa pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), ] train_pipeline = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_translate_ratio=0.1, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), border_val=(114, 114, 114), - max_shear_degree=0.0), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + max_shear_degree=0.0, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_pipeline_stage2 = [ *pre_transform, - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', - scale=img_scale, - allow_scale_up=True, - pad_val=dict(img=114)), + type="LetterResize", scale=img_scale, allow_scale_up=True, pad_val=dict(img=114) + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_translate_ratio=0.1, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), max_shear_degree=0.0, ), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict( batch_size=train_batch_size_per_gpu, num_workers=train_num_workers, - collate_fn=dict(type='yolov5_collate'), + collate_fn=dict(type="yolov5_collate"), persistent_workers=persistent_workers, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), + sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + ), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] val_dataloader = dict( @@ -188,68 +219,79 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, test_mode=True, - data_prefix=dict(img='val2017/'), - ann_file='annotations/instances_val2017.json', + data_prefix=dict(img="val2017/"), + ann_file="annotations/instances_val2017.json", pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader # Optimizer and learning rate scheduler of YOLOv6 are basically the same as YOLOv5. # noqa # The difference is that the scheduler_type of YOLOv6 is cosine. optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', + type="SGD", lr=base_lr, momentum=0.937, weight_decay=0.0005, nesterov=True, - batch_size_per_gpu=train_batch_size_per_gpu), - constructor='YOLOv5OptimizerConstructor') + batch_size_per_gpu=train_batch_size_per_gpu, + ), + constructor="YOLOv5OptimizerConstructor", +) default_hooks = dict( param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='cosine', + type="YOLOv5ParamSchedulerHook", + scheduler_type="cosine", lr_factor=0.01, - max_epochs=max_epochs), + max_epochs=max_epochs, + ), checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, max_keep_ckpts=3, - save_best='auto')) + save_best="auto", + ), +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=max_epochs - num_last_epochs, - switch_pipeline=train_pipeline_stage2) + switch_pipeline=train_pipeline_stage2, + ), ] val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator train_cfg = dict( - type='EpochBasedTrainLoop', + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_epoch_intervals, - dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') + dynamic_intervals=[(max_epochs - num_last_epochs, 1)], +) +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") diff --git a/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py b/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py index d5a19e16..5d37d41d 100644 --- a/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py' +_base_ = "./yolov6_s_syncbn_fast_8xb32-300e_coco.py" deepen_factor = 0.33 widen_factor = 0.375 @@ -7,6 +7,8 @@ backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), bbox_head=dict( - type='YOLOv6Head', + type="YOLOv6Head", head_module=dict(widen_factor=widen_factor), - loss_bbox=dict(iou_mode='siou'))) + loss_bbox=dict(iou_mode="siou"), + ), +) diff --git a/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py b/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py index e8592072..c11643a7 100644 --- a/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py +++ b/mmyolo/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py' +_base_ = "./yolov6_s_syncbn_fast_8xb32-400e_coco.py" deepen_factor = 0.33 widen_factor = 0.375 @@ -7,6 +7,8 @@ backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), bbox_head=dict( - type='YOLOv6Head', + type="YOLOv6Head", head_module=dict(widen_factor=widen_factor), - loss_bbox=dict(iou_mode='siou'))) + loss_bbox=dict(iou_mode="siou"), + ), +) diff --git a/mmyolo/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py index a6871526..16fc5e8e 100644 --- a/mmyolo/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_d-p6_syncbn_fast_8x16b-300e_coco.py @@ -1,21 +1,25 @@ -_base_ = './yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py' +_base_ = "./yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py" model = dict( - backbone=dict(arch='D'), + backbone=dict(arch="D"), neck=dict( use_maxpool_in_downsample=True, use_in_channels_in_downsample=True, block_cfg=dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.4, block_ratio=0.2, num_blocks=6, - num_convs_in_block=1), + num_convs_in_block=1, + ), in_channels=[384, 768, 1152, 1536], - out_channels=[192, 384, 576, 768]), + out_channels=[192, 384, 576, 768], + ), bbox_head=dict( head_module=dict( in_channels=[192, 384, 576, 768], main_out_channels=[384, 768, 1152, 1536], aux_out_channels=[384, 768, 1152, 1536], - ))) + ) + ), +) diff --git a/mmyolo/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py index 3d1463dc..c5f3f0ae 100644 --- a/mmyolo/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py @@ -1,19 +1,23 @@ -_base_ = './yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py' +_base_ = "./yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py" model = dict( - backbone=dict(arch='E'), + backbone=dict(arch="E"), neck=dict( use_maxpool_in_downsample=True, use_in_channels_in_downsample=True, block_cfg=dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.4, block_ratio=0.2, num_blocks=6, - num_convs_in_block=1), + num_convs_in_block=1, + ), in_channels=[320, 640, 960, 1280], - out_channels=[160, 320, 480, 640]), + out_channels=[160, 320, 480, 640], + ), bbox_head=dict( head_module=dict( - in_channels=[160, 320, 480, 640], - main_out_channels=[320, 640, 960, 1280]))) + in_channels=[160, 320, 480, 640], main_out_channels=[320, 640, 960, 1280] + ) + ), +) diff --git a/mmyolo/configs/yolov7/yolov7_e2e-p6_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_e2e-p6_syncbn_fast_8x16b-300e_coco.py index 6af81051..ae16d93c 100644 --- a/mmyolo/configs/yolov7/yolov7_e2e-p6_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_e2e-p6_syncbn_fast_8x16b-300e_coco.py @@ -1,20 +1,24 @@ -_base_ = './yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py' +_base_ = "./yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py" model = dict( - backbone=dict(arch='E2E'), + backbone=dict(arch="E2E"), neck=dict( use_maxpool_in_downsample=True, use_in_channels_in_downsample=True, block_cfg=dict( - type='EELANBlock', + type="EELANBlock", num_elan_block=2, middle_ratio=0.4, block_ratio=0.2, num_blocks=6, - num_convs_in_block=1), + num_convs_in_block=1, + ), in_channels=[320, 640, 960, 1280], - out_channels=[160, 320, 480, 640]), + out_channels=[160, 320, 480, 640], + ), bbox_head=dict( head_module=dict( - in_channels=[160, 320, 480, 640], - main_out_channels=[320, 640, 960, 1280]))) + in_channels=[160, 320, 480, 640], main_out_channels=[320, 640, 960, 1280] + ) + ), +) diff --git a/mmyolo/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py index 2bf8cb7f..650ad9e5 100644 --- a/mmyolo/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py @@ -1,8 +1,8 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" # dataset settings -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" # parameters that often need to be modified img_scale = (640, 640) # width, height @@ -17,17 +17,18 @@ # only on Val batch_shapes_cfg = dict( - type='BatchShapePolicy', + type="BatchShapePolicy", batch_size=val_batch_size_per_gpu, img_size=img_scale[0], size_divisor=32, - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, +) # different from yolov5 anchors = [ [(12, 16), (19, 36), (40, 28)], # P3/8 [(36, 75), (76, 55), (72, 146)], # P4/16 - [(142, 110), (192, 243), (459, 401)] # P5/32 + [(142, 110), (192, 243), (459, 401)], # P5/32 ] strides = [8, 16, 32] num_det_layers = 3 @@ -38,133 +39,150 @@ env_cfg = dict(cudnn_benchmark=True) model = dict( - type='YOLODetector', + type="YOLODetector", data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), + type="YOLOv5DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ), backbone=dict( - type='YOLOv7Backbone', - arch='L', - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + type="YOLOv7Backbone", + arch="L", + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), neck=dict( - type='YOLOv7PAFPN', + type="YOLOv7PAFPN", block_cfg=dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.5, block_ratio=0.25, num_blocks=4, - num_convs_in_block=1), + num_convs_in_block=1, + ), upsample_feats_cat_first=False, in_channels=[512, 1024, 1024], # The real output channel will be multiplied by 2 out_channels=[128, 256, 512], - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), bbox_head=dict( - type='YOLOv7Head', + type="YOLOv7Head", head_module=dict( - type='YOLOv7HeadModule', + type="YOLOv7HeadModule", num_classes=num_classes, in_channels=[256, 512, 1024], featmap_strides=strides, - num_base_priors=3), + num_base_priors=3, + ), prior_generator=dict( - type='mmdet.YOLOAnchorGenerator', - base_sizes=anchors, - strides=strides), + type="mmdet.YOLOAnchorGenerator", base_sizes=anchors, strides=strides + ), # scaled based on number of detection layers loss_cls=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='mean', - loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)), + reduction="mean", + loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers), + ), loss_bbox=dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', - reduction='mean', + type="IoULoss", + iou_mode="ciou", + bbox_format="xywh", + reduction="mean", loss_weight=0.05 * (3 / num_det_layers), - return_iou=True), + return_iou=True, + ), loss_obj=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='mean', - loss_weight=0.7 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)), - obj_level_weights=[4., 1., 0.4], + reduction="mean", + loss_weight=0.7 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers), + ), + obj_level_weights=[4.0, 1.0, 0.4], # BatchYOLOv7Assigner params - prior_match_thr=4., + prior_match_thr=4.0, simota_candidate_topk=10, simota_iou_weight=3.0, - simota_cls_weight=1.0), + simota_cls_weight=1.0, + ), test_cfg=dict( multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.65), + max_per_img=300, + ), +) pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), ] mosiac4_pipeline = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_translate_ratio=0.2, # note scaling_ratio_range=(0.1, 2.0), # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), ] mosiac9_pipeline = [ dict( - type='Mosaic9', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic9", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_translate_ratio=0.2, # note scaling_ratio_range=(0.1, 2.0), # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), ] randchoice_mosaic_pipeline = dict( - type='RandomChoice', + type="RandomChoice", transforms=[mosiac4_pipeline, mosiac9_pipeline], - prob=[0.8, 0.2]) + prob=[0.8, 0.2], +) train_pipeline = [ *pre_transform, randchoice_mosaic_pipeline, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", alpha=8.0, # note beta=8.0, # note prob=0.15, - pre_transform=[*pre_transform, randchoice_mosaic_pipeline]), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + pre_transform=[*pre_transform, randchoice_mosaic_pipeline], + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict( @@ -172,29 +190,39 @@ num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - collate_fn=dict(type='yolov5_collate'), # FASTER + sampler=dict(type="DefaultSampler", shuffle=True), + collate_fn=dict(type="yolov5_collate"), # FASTER dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + ), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] val_dataloader = dict( @@ -203,67 +231,77 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, test_mode=True, - data_prefix=dict(img='val2017/'), - ann_file='annotations/instances_val2017.json', + data_prefix=dict(img="val2017/"), + ann_file="annotations/instances_val2017.json", pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader param_scheduler = None optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', + type="SGD", lr=0.01, momentum=0.937, weight_decay=0.0005, nesterov=True, - batch_size_per_gpu=train_batch_size_per_gpu), - constructor='YOLOv7OptimWrapperConstructor') + batch_size_per_gpu=train_batch_size_per_gpu, + ), + constructor="YOLOv7OptimWrapperConstructor", +) default_hooks = dict( param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='cosine', + type="YOLOv5ParamSchedulerHook", + scheduler_type="cosine", lr_factor=0.1, # note - max_epochs=max_epochs), + max_epochs=max_epochs, + ), checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", save_param_scheduler=False, interval=1, - save_best='auto', - max_keep_ckpts=3)) + save_best="auto", + max_keep_ckpts=3, + ), +) val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), # Can be accelerated - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator train_cfg = dict( - type='EpochBasedTrainLoop', + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_epoch_intervals, - dynamic_intervals=[(270, 1)]) + dynamic_intervals=[(270, 1)], +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49) + priority=49, + ) ] -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") # randomness = dict(seed=1, deterministic=True) diff --git a/mmyolo/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py index afb00402..27c4e60b 100644 --- a/mmyolo/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py' +_base_ = "./yolov7_l_syncbn_fast_8x16b-300e_coco.py" num_classes = _base_.num_classes num_det_layers = _base_.num_det_layers @@ -6,76 +6,84 @@ pre_transform = _base_.pre_transform model = dict( - backbone=dict( - arch='Tiny', act_cfg=dict(type='LeakyReLU', negative_slope=0.1)), + backbone=dict(arch="Tiny", act_cfg=dict(type="LeakyReLU", negative_slope=0.1)), neck=dict( is_tiny_version=True, in_channels=[128, 256, 512], out_channels=[64, 128, 256], - block_cfg=dict( - _delete_=True, type='TinyDownSampleBlock', middle_ratio=0.25), - act_cfg=dict(type='LeakyReLU', negative_slope=0.1), - use_repconv_outs=False), + block_cfg=dict(_delete_=True, type="TinyDownSampleBlock", middle_ratio=0.25), + act_cfg=dict(type="LeakyReLU", negative_slope=0.1), + use_repconv_outs=False, + ), bbox_head=dict( head_module=dict(in_channels=[128, 256, 512]), - loss_cls=dict(loss_weight=0.5 * - (num_classes / 80 * 3 / num_det_layers)), - loss_obj=dict(loss_weight=1.0 * - ((img_scale[0] / 640)**2 * 3 / num_det_layers)))) + loss_cls=dict(loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)), + loss_obj=dict( + loss_weight=1.0 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers) + ), + ), +) mosiac4_pipeline = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_translate_ratio=0.1, # change scaling_ratio_range=(0.5, 1.6), # change # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), ] mosiac9_pipeline = [ dict( - type='Mosaic9', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic9", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_translate_ratio=0.1, # change scaling_ratio_range=(0.5, 1.6), # change border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), ] randchoice_mosaic_pipeline = dict( - type='RandomChoice', + type="RandomChoice", transforms=[mosiac4_pipeline, mosiac9_pipeline], - prob=[0.8, 0.2]) + prob=[0.8, 0.2], +) train_pipeline = [ *pre_transform, randchoice_mosaic_pipeline, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", alpha=8.0, beta=8.0, prob=0.05, # change - pre_transform=[*pre_transform, randchoice_mosaic_pipeline]), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + pre_transform=[*pre_transform, randchoice_mosaic_pipeline], + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmyolo/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py index 6536c093..721d8e06 100644 --- a/mmyolo/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py' +_base_ = "./yolov7_l_syncbn_fast_8x16b-300e_coco.py" img_scale = (1280, 1280) # height, width num_classes = 80 @@ -9,110 +9,130 @@ [(19, 27), (44, 40), (38, 94)], # P3/8 [(96, 68), (86, 152), (180, 137)], # P4/16 [(140, 301), (303, 264), (238, 542)], # P5/32 - [(436, 615), (739, 380), (925, 792)] # P6/64 + [(436, 615), (739, 380), (925, 792)], # P6/64 ] strides = [8, 16, 32, 64] num_det_layers = 4 model = dict( - backbone=dict(arch='W', out_indices=(2, 3, 4, 5)), + backbone=dict(arch="W", out_indices=(2, 3, 4, 5)), neck=dict( in_channels=[256, 512, 768, 1024], out_channels=[128, 256, 384, 512], use_maxpool_in_downsample=False, - use_repconv_outs=False), + use_repconv_outs=False, + ), bbox_head=dict( head_module=dict( - type='YOLOv7p6HeadModule', + type="YOLOv7p6HeadModule", in_channels=[128, 256, 384, 512], featmap_strides=strides, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), prior_generator=dict(base_sizes=anchors, strides=strides), simota_candidate_topk=20, # note # scaled based on number of detection layers - loss_cls=dict(loss_weight=0.3 * - (num_classes / 80 * 3 / num_det_layers)), + loss_cls=dict(loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)), loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)), - loss_obj=dict(loss_weight=0.7 * - ((img_scale[0] / 640)**2 * 3 / num_det_layers)), - obj_level_weights=[4.0, 1.0, 0.25, 0.06])) + loss_obj=dict( + loss_weight=0.7 * ((img_scale[0] / 640) ** 2 * 3 / num_det_layers) + ), + obj_level_weights=[4.0, 1.0, 0.25, 0.06], + ), +) pre_transform = _base_.pre_transform mosiac4_pipeline = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_translate_ratio=0.2, # note scaling_ratio_range=(0.1, 2.0), # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), ] mosiac9_pipeline = [ dict( - type='Mosaic9', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic9", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_translate_ratio=0.2, # note scaling_ratio_range=(0.1, 2.0), # note # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), + border_val=(114, 114, 114), + ), ] randchoice_mosaic_pipeline = dict( - type='RandomChoice', + type="RandomChoice", transforms=[mosiac4_pipeline, mosiac9_pipeline], - prob=[0.8, 0.2]) + prob=[0.8, 0.2], +) train_pipeline = [ *pre_transform, randchoice_mosaic_pipeline, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", alpha=8.0, # note beta=8.0, # note prob=0.15, - pre_transform=[*pre_transform, randchoice_mosaic_pipeline]), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + pre_transform=[*pre_transform, randchoice_mosaic_pipeline], + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] val_dataloader = dict( - dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg)) + dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg) +) test_dataloader = val_dataloader # The only difference between P6 and P5 in terms of diff --git a/mmyolo/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py b/mmyolo/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py index 99297059..f159b20d 100644 --- a/mmyolo/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py +++ b/mmyolo/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py @@ -1,15 +1,18 @@ -_base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py' +_base_ = "./yolov7_l_syncbn_fast_8x16b-300e_coco.py" model = dict( - backbone=dict(arch='X'), + backbone=dict(arch="X"), neck=dict( in_channels=[640, 1280, 1280], out_channels=[160, 320, 640], block_cfg=dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.4, block_ratio=0.4, num_blocks=3, - num_convs_in_block=2), - use_repconv_outs=False), - bbox_head=dict(head_module=dict(in_channels=[320, 640, 1280]))) + num_convs_in_block=2, + ), + use_repconv_outs=False, + ), + bbox_head=dict(head_module=dict(in_channels=[320, 640, 1280])), +) diff --git a/mmyolo/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py b/mmyolo/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py index 425edaed..b034cb7d 100644 --- a/mmyolo/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py +++ b/mmyolo/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov8_m_syncbn_fast_8xb16-500e_coco.py' +_base_ = "./yolov8_m_syncbn_fast_8xb16-500e_coco.py" deepen_factor = 1.00 widen_factor = 1.00 @@ -9,16 +9,20 @@ backbone=dict( last_stage_out_channels=last_stage_out_channels, deepen_factor=deepen_factor, - widen_factor=widen_factor), + widen_factor=widen_factor, + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, last_stage_out_channels], - out_channels=[256, 512, last_stage_out_channels]), + out_channels=[256, 512, last_stage_out_channels], + ), bbox_head=dict( head_module=dict( - widen_factor=widen_factor, - in_channels=[256, 512, last_stage_out_channels]))) + widen_factor=widen_factor, in_channels=[256, 512, last_stage_out_channels] + ) + ), +) pre_transform = _base_.pre_transform albu_train_transform = _base_.albu_train_transform @@ -26,12 +30,14 @@ last_transform = _base_.last_transform train_pipeline = [ - *pre_transform, *mosaic_affine_transform, + *pre_transform, + *mosaic_affine_transform, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", prob=mixup_ratio, - pre_transform=[*pre_transform, *mosaic_affine_transform]), - *last_transform + pre_transform=[*pre_transform, *mosaic_affine_transform], + ), + *last_transform, ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) diff --git a/mmyolo/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py b/mmyolo/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py index ed350683..5d0256f8 100644 --- a/mmyolo/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py +++ b/mmyolo/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py' +_base_ = "./yolov8_s_syncbn_fast_8xb16-500e_coco.py" deepen_factor = 0.67 widen_factor = 0.75 @@ -15,16 +15,20 @@ backbone=dict( last_stage_out_channels=last_stage_out_channels, deepen_factor=deepen_factor, - widen_factor=widen_factor), + widen_factor=widen_factor, + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, last_stage_out_channels], - out_channels=[256, 512, last_stage_out_channels]), + out_channels=[256, 512, last_stage_out_channels], + ), bbox_head=dict( head_module=dict( - widen_factor=widen_factor, - in_channels=[256, 512, last_stage_out_channels]))) + widen_factor=widen_factor, in_channels=[256, 512, last_stage_out_channels] + ) + ), +) pre_transform = _base_.pre_transform albu_train_transform = _base_.albu_train_transform @@ -32,59 +36,65 @@ mosaic_affine_transform = [ dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, max_aspect_ratio=100, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)) + border_val=(114, 114, 114), + ), ] train_pipeline = [ - *pre_transform, *mosaic_affine_transform, + *pre_transform, + *mosaic_affine_transform, dict( - type='YOLOv5MixUp', + type="YOLOv5MixUp", prob=mixup_ratio, - pre_transform=[*pre_transform, *mosaic_affine_transform]), - *last_transform + pre_transform=[*pre_transform, *mosaic_affine_transform], + ), + *last_transform, ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) train_pipeline_stage2 = [ *pre_transform, - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=True, - pad_val=dict(img=114.0)), + pad_val=dict(img=114.0), + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(1 - affine_scale, 1 + affine_scale), max_aspect_ratio=100, - border_val=(114, 114, 114)), *last_transform + border_val=(114, 114, 114), + ), + *last_transform, ] custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=_base_.max_epochs - 10, - switch_pipeline=train_pipeline_stage2) + switch_pipeline=train_pipeline_stage2, + ), ] diff --git a/mmyolo/configs/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py b/mmyolo/configs/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py index 5833df3a..1632980e 100644 --- a/mmyolo/configs/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py +++ b/mmyolo/configs/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py' +_base_ = "./yolov8_s_syncbn_fast_8xb16-500e_coco.py" deepen_factor = 0.33 widen_factor = 0.25 @@ -6,4 +6,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py b/mmyolo/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py index 641862b8..5c08b84b 100644 --- a/mmyolo/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py +++ b/mmyolo/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py @@ -1,8 +1,8 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" # dataset settings -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" # parameters that often need to be modified num_classes = 80 @@ -33,142 +33,161 @@ env_cfg = dict(cudnn_benchmark=True) model = dict( - type='YOLODetector', + type="YOLODetector", data_preprocessor=dict( - type='YOLOv5DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True), + type="YOLOv5DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ), backbone=dict( - type='YOLOv8CSPDarknet', - arch='P5', + type="YOLOv8CSPDarknet", + arch="P5", last_stage_out_channels=last_stage_out_channels, deepen_factor=deepen_factor, widen_factor=widen_factor, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), neck=dict( - type='YOLOv8PAFPN', + type="YOLOv8PAFPN", deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, last_stage_out_channels], out_channels=[256, 512, last_stage_out_channels], num_csp_blocks=3, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), bbox_head=dict( - type='YOLOv8Head', + type="YOLOv8Head", head_module=dict( - type='YOLOv8HeadModule', + type="YOLOv8HeadModule", num_classes=num_classes, in_channels=[256, 512, last_stage_out_channels], widen_factor=widen_factor, reg_max=16, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True), - featmap_strides=[8, 16, 32]), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + featmap_strides=[8, 16, 32], + ), prior_generator=dict( - type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]), - bbox_coder=dict(type='DistancePointBBoxCoder'), + type="mmdet.MlvlPointGenerator", offset=0.5, strides=[8, 16, 32] + ), + bbox_coder=dict(type="DistancePointBBoxCoder"), loss_cls=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='none', - loss_weight=0.5), + reduction="none", + loss_weight=0.5, + ), loss_bbox=dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xyxy', - reduction='sum', + type="IoULoss", + iou_mode="ciou", + bbox_format="xyxy", + reduction="sum", loss_weight=7.5, - return_iou=False), + return_iou=False, + ), # Since the dfloss is implemented differently in the official # and mmdet, we're going to divide loss_weight by 4. loss_dfl=dict( - type='mmdet.DistributionFocalLoss', - reduction='mean', - loss_weight=1.5 / 4)), + type="mmdet.DistributionFocalLoss", reduction="mean", loss_weight=1.5 / 4 + ), + ), train_cfg=dict( assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=num_classes, use_ciou=True, topk=10, alpha=0.5, beta=6.0, - eps=1e-9)), + eps=1e-9, + ) + ), test_cfg=dict( multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.7), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.7), + max_per_img=300, + ), +) albu_train_transform = [ - dict(type='Blur', p=0.01), - dict(type='MedianBlur', p=0.01), - dict(type='ToGray', p=0.01), - dict(type='CLAHE', p=0.01) + dict(type="Blur", p=0.01), + dict(type="MedianBlur", p=0.01), + dict(type="ToGray", p=0.01), + dict(type="CLAHE", p=0.01), ] pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), ] last_transform = [ dict( - type='mmdet.Albu', + type="mmdet.Albu", transforms=albu_train_transform, bbox_params=dict( - type='BboxParams', - format='pascal_voc', - label_fields=['gt_bboxes_labels', 'gt_ignore_flags']), - keymap={ - 'img': 'image', - 'gt_bboxes': 'bboxes' - }), - dict(type='YOLOv5HSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + type="BboxParams", + format="pascal_voc", + label_fields=["gt_bboxes_labels", "gt_ignore_flags"], + ), + keymap={"img": "image", "gt_bboxes": "bboxes"}, + ), + dict(type="YOLOv5HSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_pipeline = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(0.5, 1.5), max_aspect_ratio=100, # img_scale is (width, height) border=(-img_scale[0] // 2, -img_scale[1] // 2), - border_val=(114, 114, 114)), - *last_transform + border_val=(114, 114, 114), + ), + *last_transform, ] train_pipeline_stage2 = [ *pre_transform, - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=True, - pad_val=dict(img=114.0)), + pad_val=dict(img=114.0), + ), dict( - type='YOLOv5RandomAffine', + type="YOLOv5RandomAffine", max_rotate_degree=0.0, max_shear_degree=0.0, scaling_ratio_range=(0.5, 1.5), max_aspect_ratio=100, - border_val=(114, 114, 114)), *last_transform + border_val=(114, 114, 114), + ), + *last_transform, ] train_dataloader = dict( @@ -176,29 +195,39 @@ num_workers=train_num_workers, persistent_workers=persistent_workers, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), - collate_fn=dict(type='yolov5_collate'), + sampler=dict(type="DefaultSampler", shuffle=True), + collate_fn=dict(type="yolov5_collate"), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline)) + pipeline=train_pipeline, + ), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='YOLOv5KeepRatioResize', scale=img_scale), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="YOLOv5KeepRatioResize", scale=img_scale), dict( - type='LetterResize', + type="LetterResize", scale=img_scale, allow_scale_up=False, - pad_val=dict(img=114)), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + pad_val=dict(img=114), + ), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor', 'pad_param')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "scale_factor", + "pad_param", + ), + ), ] # only on Val @@ -218,69 +247,80 @@ persistent_workers=persistent_workers, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, test_mode=True, - data_prefix=dict(img='val2017/'), - ann_file='annotations/instances_val2017.json', + data_prefix=dict(img="val2017/"), + ann_file="annotations/instances_val2017.json", pipeline=test_pipeline, - batch_shapes_cfg=batch_shapes_cfg)) + batch_shapes_cfg=batch_shapes_cfg, + ), +) test_dataloader = val_dataloader param_scheduler = None optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", clip_grad=dict(max_norm=10.0), optimizer=dict( - type='SGD', + type="SGD", lr=base_lr, momentum=0.937, weight_decay=0.0005, nesterov=True, - batch_size_per_gpu=train_batch_size_per_gpu), - constructor='YOLOv5OptimizerConstructor') + batch_size_per_gpu=train_batch_size_per_gpu, + ), + constructor="YOLOv5OptimizerConstructor", +) default_hooks = dict( param_scheduler=dict( - type='YOLOv5ParamSchedulerHook', - scheduler_type='linear', + type="YOLOv5ParamSchedulerHook", + scheduler_type="linear", lr_factor=lr_factor, - max_epochs=max_epochs), + max_epochs=max_epochs, + ), checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, - save_best='auto', - max_keep_ckpts=2)) + save_best="auto", + max_keep_ckpts=2, + ), +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=max_epochs - 10, - switch_pipeline=train_pipeline_stage2) + switch_pipeline=train_pipeline_stage2, + ), ] val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator train_cfg = dict( - type='EpochBasedTrainLoop', + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_epoch_intervals, - dynamic_intervals=[(max_epochs - 10, 1)]) + dynamic_intervals=[(max_epochs - 10, 1)], +) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") diff --git a/mmyolo/configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py b/mmyolo/configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py index 3d8e6653..56c39eba 100644 --- a/mmyolo/configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py +++ b/mmyolo/configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolov8_l_syncbn_fast_8xb16-500e_coco.py' +_base_ = "./yolov8_l_syncbn_fast_8xb16-500e_coco.py" deepen_factor = 1.00 widen_factor = 1.25 @@ -6,4 +6,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolox/yolox_l_8xb8-300e_coco.py b/mmyolo/configs/yolox/yolox_l_8xb8-300e_coco.py index e5e971d9..9191e05d 100644 --- a/mmyolo/configs/yolox/yolox_l_8xb8-300e_coco.py +++ b/mmyolo/configs/yolox/yolox_l_8xb8-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolox_s_8xb8-300e_coco.py' +_base_ = "./yolox_s_8xb8-300e_coco.py" deepen_factor = 1.0 widen_factor = 1.0 @@ -7,4 +7,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolox/yolox_m_8xb8-300e_coco.py b/mmyolo/configs/yolox/yolox_m_8xb8-300e_coco.py index 2d869413..a2d5101f 100644 --- a/mmyolo/configs/yolox/yolox_m_8xb8-300e_coco.py +++ b/mmyolo/configs/yolox/yolox_m_8xb8-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolox_s_8xb8-300e_coco.py' +_base_ = "./yolox_s_8xb8-300e_coco.py" deepen_factor = 0.67 widen_factor = 0.75 @@ -7,4 +7,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/configs/yolox/yolox_nano_8xb8-300e_coco.py b/mmyolo/configs/yolox/yolox_nano_8xb8-300e_coco.py index 2c94a5d9..dc223b99 100644 --- a/mmyolo/configs/yolox/yolox_nano_8xb8-300e_coco.py +++ b/mmyolo/configs/yolox/yolox_nano_8xb8-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolox_tiny_8xb8-300e_coco.py' +_base_ = "./yolox_tiny_8xb8-300e_coco.py" deepen_factor = 0.33 widen_factor = 0.25 @@ -9,11 +9,14 @@ backbone=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - use_depthwise=use_depthwise), + use_depthwise=use_depthwise, + ), neck=dict( deepen_factor=deepen_factor, widen_factor=widen_factor, - use_depthwise=use_depthwise), + use_depthwise=use_depthwise, + ), bbox_head=dict( - head_module=dict( - widen_factor=widen_factor, use_depthwise=use_depthwise))) + head_module=dict(widen_factor=widen_factor, use_depthwise=use_depthwise) + ), +) diff --git a/mmyolo/configs/yolox/yolox_s_8xb8-300e_coco.py b/mmyolo/configs/yolox/yolox_s_8xb8-300e_coco.py index 0cebbb0e..b8f7f065 100644 --- a/mmyolo/configs/yolox/yolox_s_8xb8-300e_coco.py +++ b/mmyolo/configs/yolox/yolox_s_8xb8-300e_coco.py @@ -1,7 +1,7 @@ -_base_ = '../_base_/default_runtime.py' +_base_ = "../_base_/default_runtime.py" -data_root = 'data/coco/' -dataset_type = 'YOLOv5CocoDataset' +data_root = "data/coco/" +dataset_type = "YOLOv5CocoDataset" img_scale = (640, 640) # width, height deepen_factor = 0.33 @@ -18,47 +18,51 @@ # model settings model = dict( - type='YOLODetector', + type="YOLODetector", init_cfg=dict( - type='Kaiming', - layer='Conv2d', + type="Kaiming", + layer="Conv2d", a=2.23606797749979, # math.sqrt(5) - distribution='uniform', - mode='fan_in', - nonlinearity='leaky_relu'), + distribution="uniform", + mode="fan_in", + nonlinearity="leaky_relu", + ), # TODO: Waiting for mmengine support use_syncbn=False, data_preprocessor=dict( - type='mmdet.DetDataPreprocessor', + type="mmdet.DetDataPreprocessor", pad_size_divisor=32, batch_augments=[ dict( - type='mmdet.BatchSyncRandomResize', + type="mmdet.BatchSyncRandomResize", random_size_range=(480, 800), size_divisor=32, - interval=10) - ]), + interval=10, + ) + ], + ), backbone=dict( - type='YOLOXCSPDarknet', + type="YOLOXCSPDarknet", deepen_factor=deepen_factor, widen_factor=widen_factor, out_indices=(2, 3, 4), spp_kernal_sizes=(5, 9, 13), - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), ), neck=dict( - type='YOLOXPAFPN', + type="YOLOXPAFPN", deepen_factor=deepen_factor, widen_factor=widen_factor, in_channels=[256, 512, 1024], out_channels=256, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True)), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), + ), bbox_head=dict( - type='YOLOXHead', + type="YOLOXHead", head_module=dict( - type='YOLOXHeadModule', + type="YOLOXHeadModule", num_classes=80, in_channels=256, feat_channels=256, @@ -66,90 +70,99 @@ stacked_convs=2, featmap_strides=(8, 16, 32), use_depthwise=False, - norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), - act_cfg=dict(type='SiLU', inplace=True), + norm_cfg=dict(type="BN", momentum=0.03, eps=0.001), + act_cfg=dict(type="SiLU", inplace=True), ), loss_cls=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='sum', - loss_weight=1.0), + reduction="sum", + loss_weight=1.0, + ), loss_bbox=dict( - type='mmdet.IoULoss', - mode='square', + type="mmdet.IoULoss", + mode="square", eps=1e-16, - reduction='sum', - loss_weight=5.0), + reduction="sum", + loss_weight=5.0, + ), loss_obj=dict( - type='mmdet.CrossEntropyLoss', + type="mmdet.CrossEntropyLoss", use_sigmoid=True, - reduction='sum', - loss_weight=1.0), - loss_bbox_aux=dict( - type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)), + reduction="sum", + loss_weight=1.0, + ), + loss_bbox_aux=dict(type="mmdet.L1Loss", reduction="sum", loss_weight=1.0), + ), train_cfg=dict( assigner=dict( - type='mmdet.SimOTAAssigner', + type="mmdet.SimOTAAssigner", center_radius=2.5, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'))), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ) + ), test_cfg=dict( yolox_style=True, # better multi_label=True, # 40.5 -> 40.7 score_thr=0.001, max_per_img=300, - nms=dict(type='nms', iou_threshold=0.65))) + nms=dict(type="nms", iou_threshold=0.65), + ), +) pre_transform = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="LoadAnnotations", with_bbox=True), ] train_pipeline_stage1 = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='mmdet.RandomAffine', + type="mmdet.RandomAffine", scaling_ratio_range=(0.1, 2), # img_scale is (width, height) - border=(-img_scale[0] // 2, -img_scale[1] // 2)), + border=(-img_scale[0] // 2, -img_scale[1] // 2), + ), dict( - type='YOLOXMixUp', + type="YOLOXMixUp", img_scale=img_scale, ratio_range=(0.8, 1.6), pad_val=114.0, - pre_transform=pre_transform), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.FilterAnnotations', - min_gt_bbox_wh=(1, 1), - keep_empty=False), + pre_transform=pre_transform, + ), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] train_pipeline_stage2 = [ *pre_transform, - dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), + dict(type="mmdet.Resize", scale=img_scale, keep_ratio=True), dict( - type='mmdet.Pad', + type="mmdet.Pad", pad_to_square=True, # If the image is three-channel, the pad value needs # to be set separately for each channel. - pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), - dict( - type='mmdet.FilterAnnotations', - min_gt_bbox_wh=(1, 1), - keep_empty=False), - dict(type='mmdet.PackDetInputs') + pad_val=dict(img=(114.0, 114.0, 114.0)), + ), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), + dict(type="mmdet.PackDetInputs"), ] train_dataloader = dict( @@ -157,27 +170,26 @@ num_workers=train_num_workers, persistent_workers=True, pin_memory=True, - sampler=dict(type='DefaultSampler', shuffle=True), + sampler=dict(type="DefaultSampler", shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_train2017.json', - data_prefix=dict(img='train2017/'), + ann_file="annotations/instances_train2017.json", + data_prefix=dict(img="train2017/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=train_pipeline_stage1)) + pipeline=train_pipeline_stage1, + ), +) test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="mmdet.Resize", scale=img_scale, keep_ratio=True), + dict(type="mmdet.Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.Pad', - pad_to_square=True, - pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + type="mmdet.PackDetInputs", + meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"), + ), ] val_dataloader = dict( @@ -186,22 +198,25 @@ persistent_workers=True, pin_memory=True, drop_last=False, - sampler=dict(type='DefaultSampler', shuffle=False), + sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, - ann_file='annotations/instances_val2017.json', - data_prefix=dict(img='val2017/'), + ann_file="annotations/instances_val2017.json", + data_prefix=dict(img="val2017/"), test_mode=True, - pipeline=test_pipeline)) + pipeline=test_pipeline, + ), +) test_dataloader = val_dataloader # Reduce evaluation time val_evaluator = dict( - type='mmdet.CocoMetric', + type="mmdet.CocoMetric", proposal_nums=(100, 1, 10), - ann_file=data_root + 'annotations/instances_val2017.json', - metric='bbox') + ann_file=data_root + "annotations/instances_val2017.json", + metric="bbox", +) test_evaluator = val_evaluator @@ -209,11 +224,12 @@ # default 8 gpu base_lr = 0.01 optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4, - nesterov=True), - paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.)) + type="SGD", lr=base_lr, momentum=0.9, weight_decay=5e-4, nesterov=True + ), + paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0), +) # learning rate param_scheduler = [ @@ -221,56 +237,63 @@ # use quadratic formula to warm up 5 epochs # and lr is updated by iteration # TODO: fix default scope in get function - type='mmdet.QuadraticWarmupLR', + type="mmdet.QuadraticWarmupLR", by_epoch=True, begin=0, end=5, - convert_to_iter_based=True), + convert_to_iter_based=True, + ), dict( # use cosine lr from 5 to 285 epoch - type='CosineAnnealingLR', + type="CosineAnnealingLR", eta_min=base_lr * 0.05, begin=5, T_max=max_epochs - num_last_epochs, end=max_epochs - num_last_epochs, by_epoch=True, - convert_to_iter_based=True), + convert_to_iter_based=True, + ), dict( # use fixed lr during last 15 epochs - type='ConstantLR', + type="ConstantLR", by_epoch=True, factor=1, begin=max_epochs - num_last_epochs, end=max_epochs, - ) + ), ] default_hooks = dict( checkpoint=dict( - type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='auto')) + type="CheckpointHook", interval=1, max_keep_ckpts=3, save_best="auto" + ) +) custom_hooks = [ dict( - type='YOLOXModeSwitchHook', + type="YOLOXModeSwitchHook", num_last_epochs=num_last_epochs, new_train_pipeline=train_pipeline_stage2, - priority=48), - dict(type='mmdet.SyncNormHook', priority=48), + priority=48, + ), + dict(type="mmdet.SyncNormHook", priority=48), dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49) + priority=49, + ), ] train_cfg = dict( - type='EpochBasedTrainLoop', + type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_epoch_intervals, - dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) + dynamic_intervals=[(max_epochs - num_last_epochs, 1)], +) auto_scale_lr = dict(base_batch_size=64) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") diff --git a/mmyolo/configs/yolox/yolox_tiny_8xb8-300e_coco.py b/mmyolo/configs/yolox/yolox_tiny_8xb8-300e_coco.py index 0fd0a15a..018c1bba 100644 --- a/mmyolo/configs/yolox/yolox_tiny_8xb8-300e_coco.py +++ b/mmyolo/configs/yolox/yolox_tiny_8xb8-300e_coco.py @@ -1,20 +1,24 @@ -_base_ = './yolox_s_8xb8-300e_coco.py' +_base_ = "./yolox_s_8xb8-300e_coco.py" deepen_factor = 0.33 widen_factor = 0.375 # model settings model = dict( - data_preprocessor=dict(batch_augments=[ - dict( - type='mmdet.BatchSyncRandomResize', - random_size_range=(320, 640), # note - size_divisor=32, - interval=10) - ]), + data_preprocessor=dict( + batch_augments=[ + dict( + type="mmdet.BatchSyncRandomResize", + random_size_range=(320, 640), # note + size_divisor=32, + interval=10, + ) + ] + ), backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) img_scale = _base_.img_scale pre_transform = _base_.pre_transform @@ -22,39 +26,39 @@ train_pipeline_stage1 = [ *pre_transform, dict( - type='Mosaic', - img_scale=img_scale, - pad_val=114.0, - pre_transform=pre_transform), + type="Mosaic", img_scale=img_scale, pad_val=114.0, pre_transform=pre_transform + ), dict( - type='mmdet.RandomAffine', + type="mmdet.RandomAffine", scaling_ratio_range=(0.5, 1.5), # note # img_scale is (width, height) - border=(-img_scale[0] // 2, -img_scale[1] // 2)), - dict(type='mmdet.YOLOXHSVRandomAug'), - dict(type='mmdet.RandomFlip', prob=0.5), + border=(-img_scale[0] // 2, -img_scale[1] // 2), + ), + dict(type="mmdet.YOLOXHSVRandomAug"), + dict(type="mmdet.RandomFlip", prob=0.5), + dict(type="mmdet.FilterAnnotations", min_gt_bbox_wh=(1, 1), keep_empty=False), dict( - type='mmdet.FilterAnnotations', - min_gt_bbox_wh=(1, 1), - keep_empty=False), - dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip', - 'flip_direction')) + type="mmdet.PackDetInputs", + meta_keys=( + "img_id", + "img_path", + "ori_shape", + "img_shape", + "flip", + "flip_direction", + ), + ), ] test_pipeline = [ - dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args), - dict(type='mmdet.Resize', scale=(416, 416), keep_ratio=True), # note - dict( - type='mmdet.Pad', - pad_to_square=True, - pad_val=dict(img=(114.0, 114.0, 114.0))), - dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'), + dict(type="LoadImageFromFile", file_client_args=_base_.file_client_args), + dict(type="mmdet.Resize", scale=(416, 416), keep_ratio=True), # note + dict(type="mmdet.Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))), + dict(type="LoadAnnotations", with_bbox=True, _scope_="mmdet"), dict( - type='mmdet.PackDetInputs', - meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', - 'scale_factor')) + type="mmdet.PackDetInputs", + meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"), + ), ] train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1)) diff --git a/mmyolo/configs/yolox/yolox_x_8xb8-300e_coco.py b/mmyolo/configs/yolox/yolox_x_8xb8-300e_coco.py index 56f1280a..c06164e7 100644 --- a/mmyolo/configs/yolox/yolox_x_8xb8-300e_coco.py +++ b/mmyolo/configs/yolox/yolox_x_8xb8-300e_coco.py @@ -1,4 +1,4 @@ -_base_ = './yolox_s_8xb8-300e_coco.py' +_base_ = "./yolox_s_8xb8-300e_coco.py" deepen_factor = 1.33 widen_factor = 1.25 @@ -7,4 +7,5 @@ model = dict( backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor), - bbox_head=dict(head_module=dict(widen_factor=widen_factor))) + bbox_head=dict(head_module=dict(widen_factor=widen_factor)), +) diff --git a/mmyolo/demo.py b/mmyolo/demo.py index 059afa50..4f2159d7 100644 --- a/mmyolo/demo.py +++ b/mmyolo/demo.py @@ -1,8 +1,13 @@ -from mmdet.apis import init_detector, inference_detector +from mmdet.apis import inference_detector, init_detector + from mmyolo.utils import register_all_modules register_all_modules() -config_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' -checkpoint_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' -model = init_detector(config_file, checkpoint_file, device='cuda:0') # or device='cuda:0' 'cpu' -inference_detector(model, 'demo/demo.jpg') \ No newline at end of file +config_file = "yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" +checkpoint_file = ( + "yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth" +) +model = init_detector( + config_file, checkpoint_file, device="cuda:0" +) # or device='cuda:0' 'cpu' +inference_detector(model, "demo/demo.jpg") diff --git a/mmyolo/demo/boxam_vis_demo.py b/mmyolo/demo/boxam_vis_demo.py index 3672b727..8dc266e4 100644 --- a/mmyolo/demo/boxam_vis_demo.py +++ b/mmyolo/demo/boxam_vis_demo.py @@ -19,111 +19,118 @@ from mmengine.utils import ProgressBar from mmyolo.utils import register_all_modules -from mmyolo.utils.boxam_utils import (BoxAMDetectorVisualizer, - BoxAMDetectorWrapper, DetAblationLayer, - DetBoxScoreTarget, GradCAM, - GradCAMPlusPlus, reshape_transform) +from mmyolo.utils.boxam_utils import ( + BoxAMDetectorVisualizer, + BoxAMDetectorWrapper, + DetAblationLayer, + DetBoxScoreTarget, + GradCAM, + GradCAMPlusPlus, + reshape_transform, +) from mmyolo.utils.misc import get_file_list try: from pytorch_grad_cam import AblationCAM, EigenCAM except ImportError: - raise ImportError('Please run `pip install "grad-cam"` to install ' - 'pytorch_grad_cam package.') + raise ImportError( + 'Please run `pip install "grad-cam"` to install ' "pytorch_grad_cam package." + ) GRAD_FREE_METHOD_MAP = { - 'ablationcam': AblationCAM, - 'eigencam': EigenCAM, + "ablationcam": AblationCAM, + "eigencam": EigenCAM, # 'scorecam': ScoreCAM, # consumes too much memory } -GRAD_BASED_METHOD_MAP = {'gradcam': GradCAM, 'gradcam++': GradCAMPlusPlus} +GRAD_BASED_METHOD_MAP = {"gradcam": GradCAM, "gradcam++": GradCAMPlusPlus} -ALL_SUPPORT_METHODS = list(GRAD_FREE_METHOD_MAP.keys() - | GRAD_BASED_METHOD_MAP.keys()) +ALL_SUPPORT_METHODS = list(GRAD_FREE_METHOD_MAP.keys() | GRAD_BASED_METHOD_MAP.keys()) IGNORE_LOSS_PARAMS = { - 'yolov5': ['loss_obj'], - 'yolov6': ['loss_cls'], - 'yolox': ['loss_obj'], - 'rtmdet': ['loss_cls'], + "yolov5": ["loss_obj"], + "yolov6": ["loss_cls"], + "yolox": ["loss_obj"], + "rtmdet": ["loss_cls"], } # This parameter is required in some algorithms # for calculating Loss message_hub = MessageHub.get_current_instance() -message_hub.runtime_info['epoch'] = 0 +message_hub.runtime_info["epoch"] = 0 def parse_args(): - parser = argparse.ArgumentParser(description='Visualize Box AM') + parser = argparse.ArgumentParser(description="Visualize Box AM") + parser.add_argument("img", help="Image path, include image file, dir and URL.") + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") parser.add_argument( - 'img', help='Image path, include image file, dir and URL.') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') - parser.add_argument( - '--method', - default='gradcam', + "--method", + default="gradcam", choices=ALL_SUPPORT_METHODS, - help='Type of method to use, supports ' - f'{", ".join(ALL_SUPPORT_METHODS)}.') + help="Type of method to use, supports " f'{", ".join(ALL_SUPPORT_METHODS)}.', + ) parser.add_argument( - '--target-layers', - default=['neck.out_layers[2]'], - nargs='+', + "--target-layers", + default=["neck.out_layers[2]"], + nargs="+", type=str, - help='The target layers to get Box AM, if not set, the tool will ' - 'specify the neck.out_layers[2]') - parser.add_argument( - '--out-dir', default='./output', help='Path to output file') - parser.add_argument( - '--show', action='store_true', help='Show the CAM results') + help="The target layers to get Box AM, if not set, the tool will " + "specify the neck.out_layers[2]", + ) + parser.add_argument("--out-dir", default="./output", help="Path to output file") + parser.add_argument("--show", action="store_true", help="Show the CAM results") + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + "--score-thr", type=float, default=0.3, help="Bbox score threshold" + ) parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') - parser.add_argument( - '--topk', + "--topk", type=int, default=-1, - help='Select topk predict resutls to show. -1 are mean all.') + help="Select topk predict resutls to show. -1 are mean all.", + ) parser.add_argument( - '--max-shape', - nargs='+', + "--max-shape", + nargs="+", type=int, default=-1, - help='max shapes. Its purpose is to save GPU memory. ' - 'The activation map is scaled and then evaluated. ' - 'If set to -1, it means no scaling.') + help="max shapes. Its purpose is to save GPU memory. " + "The activation map is scaled and then evaluated. " + "If set to -1, it means no scaling.", + ) parser.add_argument( - '--preview-model', + "--preview-model", default=False, - action='store_true', - help='To preview all the model layers') + action="store_true", + help="To preview all the model layers", + ) parser.add_argument( - '--norm-in-bbox', action='store_true', help='Norm in bbox of am image') + "--norm-in-bbox", action="store_true", help="Norm in bbox of am image" + ) parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) # Only used by AblationCAM parser.add_argument( - '--batch-size', - type=int, - default=1, - help='batch of inference of AblationCAM') + "--batch-size", type=int, default=1, help="batch of inference of AblationCAM" + ) parser.add_argument( - '--ratio-channels-to-ablate', + "--ratio-channels-to-ablate", type=int, default=0.5, - help='Making it much faster of AblationCAM. ' - 'The parameter controls how many channels should be ablated') + help="Making it much faster of AblationCAM. " + "The parameter controls how many channels should be ablated", + ) args = parser.parse_args() return args @@ -136,26 +143,26 @@ def init_detector_and_visualizer(args, cfg): assert len(max_shape) == 1 or len(max_shape) == 2 model_wrapper = BoxAMDetectorWrapper( - cfg, args.checkpoint, args.score_thr, device=args.device) + cfg, args.checkpoint, args.score_thr, device=args.device + ) if args.preview_model: print(model_wrapper.detector) - print('\n Please remove `--preview-model` to get the BoxAM.') + print("\n Please remove `--preview-model` to get the BoxAM.") return None, None target_layers = [] for target_layer in args.target_layers: try: - target_layers.append( - eval(f'model_wrapper.detector.{target_layer}')) + target_layers.append(eval(f"model_wrapper.detector.{target_layer}")) except Exception as e: print(model_wrapper.detector) - raise RuntimeError('layer does not exist', e) + raise RuntimeError("layer does not exist", e) ablationcam_extra_params = { - 'batch_size': args.batch_size, - 'ablation_layer': DetAblationLayer(), - 'ratio_channels_to_ablate': args.ratio_channels_to_ablate + "batch_size": args.batch_size, + "ablation_layer": DetAblationLayer(), + "ratio_channels_to_ablate": args.ratio_channels_to_ablate, } if args.method in GRAD_BASED_METHOD_MAP: @@ -170,9 +177,11 @@ def init_detector_and_visualizer(args, cfg): model_wrapper, target_layers, reshape_transform=partial( - reshape_transform, max_shape=max_shape, is_need_grad=is_need_grad), + reshape_transform, max_shape=max_shape, is_need_grad=is_need_grad + ), is_need_grad=is_need_grad, - extra_params=ablationcam_extra_params) + extra_params=ablationcam_extra_params, + ) return model_wrapper, boxam_detector_visualizer @@ -185,7 +194,7 @@ def main(): ignore_loss_params = None for param_keys in IGNORE_LOSS_PARAMS: if param_keys in args.config: - print(f'The algorithm currently used is {param_keys}') + print(f"The algorithm currently used is {param_keys}") ignore_loss_params = IGNORE_LOSS_PARAMS[param_keys] break @@ -196,8 +205,7 @@ def main(): if not os.path.exists(args.out_dir) and not args.show: os.mkdir(args.out_dir) - model_wrapper, boxam_detector_visualizer = init_detector_and_visualizer( - args, cfg) + model_wrapper, boxam_detector_visualizer = init_detector_and_visualizer(args, cfg) # get file list image_list, source_type = get_file_list(args.img) @@ -216,24 +224,24 @@ def main(): pred_instances = pred_instances[pred_instances.scores > args.score_thr] if len(pred_instances) == 0: - warnings.warn('empty detection results! skip this') + warnings.warn("empty detection results! skip this") continue if args.topk > 0: - pred_instances = pred_instances[:args.topk] + pred_instances = pred_instances[: args.topk] targets = [ DetBoxScoreTarget( pred_instances, device=args.device, - ignore_loss_params=ignore_loss_params) + ignore_loss_params=ignore_loss_params, + ) ] if args.method in GRAD_BASED_METHOD_MAP: model_wrapper.need_loss(True) model_wrapper.set_input_data(image, pred_instances) - boxam_detector_visualizer.switch_activations_and_grads( - model_wrapper) + boxam_detector_visualizer.switch_activations_and_grads(model_wrapper) # get box am image grayscale_boxam = boxam_detector_visualizer(image, targets=targets) @@ -244,10 +252,11 @@ def main(): image, pred_instances, grayscale_boxam, - with_norm_in_bboxes=args.norm_in_bbox) + with_norm_in_bboxes=args.norm_in_bbox, + ) - if source_type['is_dir']: - filename = os.path.relpath(image_path, args.img).replace('/', '_') + if source_type["is_dir"]: + filename = os.path.relpath(image_path, args.img).replace("/", "_") else: filename = os.path.basename(image_path) out_file = None if args.show else os.path.join(args.out_dir, filename) @@ -262,15 +271,15 @@ def main(): # switch if args.method in GRAD_BASED_METHOD_MAP: model_wrapper.need_loss(False) - boxam_detector_visualizer.switch_activations_and_grads( - model_wrapper) + boxam_detector_visualizer.switch_activations_and_grads(model_wrapper) progress_bar.update() if not args.show: - print(f'All done!' - f'\nResults have been saved at {os.path.abspath(args.out_dir)}') + print( + f"All done!" f"\nResults have been saved at {os.path.abspath(args.out_dir)}" + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/demo/deploy_demo.py b/mmyolo/demo/deploy_demo.py index f5d08df4..73d5cf76 100644 --- a/mmyolo/demo/deploy_demo.py +++ b/mmyolo/demo/deploy_demo.py @@ -37,26 +37,25 @@ from mmdeploy.utils import get_input_shape, load_config except ImportError: raise ImportError( - 'mmdeploy is not installed, please see ' - 'https://mmdeploy.readthedocs.io/en/1.x/01-how-to-build/build_from_source.html' # noqa + "mmdeploy is not installed, please see " + "https://mmdeploy.readthedocs.io/en/1.x/01-how-to-build/build_from_source.html" # noqa ) def parse_args(): - parser = argparse.ArgumentParser(description='For mmdeploy predict') + parser = argparse.ArgumentParser(description="For mmdeploy predict") + parser.add_argument("img", help="Image path, include image file, dir and URL.") + parser.add_argument("config", help="model config root") + parser.add_argument("checkpoint", help="checkpoint backend model path") + parser.add_argument("--deploy-cfg", help="deploy config path") + parser.add_argument("--device", default="cuda:0", help="device used for conversion") + parser.add_argument("--out-dir", default="./output", help="Path to output file") parser.add_argument( - 'img', help='Image path, include image file, dir and URL.') - parser.add_argument('config', help='model config root') - parser.add_argument('checkpoint', help='checkpoint backend model path') - parser.add_argument('--deploy-cfg', help='deploy config path') - parser.add_argument( - '--device', default='cuda:0', help='device used for conversion') - parser.add_argument( - '--out-dir', default='./output', help='Path to output file') - parser.add_argument( - '--show', action='store_true', help='Show the detection results') + "--show", action="store_true", help="Show the detection results" + ) parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') + "--score-thr", type=float, default=0.3, help="Bbox score threshold" + ) args = parser.parse_args() return args @@ -91,8 +90,8 @@ def main(): with torch.no_grad(): result = model.test_step(model_inputs) - if source_type['is_dir']: - filename = os.path.relpath(file, args.img).replace('/', '_') + if source_type["is_dir"]: + filename = os.path.relpath(file, args.img).replace("/", "_") else: filename = os.path.basename(file) out_file = None if args.show else os.path.join(args.out_dir, filename) @@ -100,7 +99,8 @@ def main(): # filter score result = result[0] result.pred_instances = result.pred_instances[ - result.pred_instances.scores > args.score_thr] + result.pred_instances.scores > args.score_thr + ] # visualize results task_processor.visualize( @@ -109,12 +109,13 @@ def main(): result=result, show_result=args.show, window_name=os.path.basename(filename), - output_file=out_file) + output_file=out_file, + ) progress_bar.update() - print('All done!') + print("All done!") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/demo/featmap_vis_demo.py b/mmyolo/demo/featmap_vis_demo.py index 2006c7af..f90ec32c 100644 --- a/mmyolo/demo/featmap_vis_demo.py +++ b/mmyolo/demo/featmap_vis_demo.py @@ -14,63 +14,65 @@ def parse_args(): - parser = argparse.ArgumentParser(description='Visualize feature map') + parser = argparse.ArgumentParser(description="Visualize feature map") + parser.add_argument("img", help="Image path, include image file, dir and URL.") + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") + parser.add_argument("--out-dir", default="./output", help="Path to output file") parser.add_argument( - 'img', help='Image path, include image file, dir and URL.') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') - parser.add_argument( - '--out-dir', default='./output', help='Path to output file') - parser.add_argument( - '--target-layers', - default=['backbone'], - nargs='+', + "--target-layers", + default=["backbone"], + nargs="+", type=str, - help='The target layers to get feature map, if not set, the tool will ' - 'specify the backbone') + help="The target layers to get feature map, if not set, the tool will " + "specify the backbone", + ) parser.add_argument( - '--preview-model', + "--preview-model", default=False, - action='store_true', - help='To preview all the model layers') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + action="store_true", + help="To preview all the model layers", + ) + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') + "--score-thr", type=float, default=0.3, help="Bbox score threshold" + ) + parser.add_argument("--show", action="store_true", help="Show the featmap results") parser.add_argument( - '--show', action='store_true', help='Show the featmap results') + "--channel-reduction", + default="select_max", + help="Reduce multiple channels to a single channel", + ) parser.add_argument( - '--channel-reduction', - default='select_max', - help='Reduce multiple channels to a single channel') - parser.add_argument( - '--topk', + "--topk", type=int, default=4, - help='Select topk channel to show by the sum of each channel') + help="Select topk channel to show by the sum of each channel", + ) parser.add_argument( - '--arrangement', - nargs='+', + "--arrangement", + nargs="+", type=int, default=[2, 2], - help='The arrangement of featmap when channel_reduction is ' - 'not None and topk > 0') + help="The arrangement of featmap when channel_reduction is " + "not None and topk > 0", + ) parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) args = parser.parse_args() return args class ActivationsWrapper: - def __init__(self, model, target_layers): self.model = model self.activations = [] @@ -78,7 +80,8 @@ def __init__(self, model, target_layers): self.image = None for target_layer in target_layers: self.handles.append( - target_layer.register_forward_hook(self.save_activation)) + target_layer.register_forward_hook(self.save_activation) + ) def save_activation(self, module, input, output): self.activations.append(output) @@ -104,7 +107,7 @@ def main(): cfg.merge_from_dict(args.cfg_options) channel_reduction = args.channel_reduction - if channel_reduction == 'None': + if channel_reduction == "None": channel_reduction = None assert len(args.arrangement) == 2 @@ -115,17 +118,19 @@ def main(): if args.preview_model: print(model) - print('\n This flag is only show model, if you want to continue, ' - 'please remove `--preview-model` to get the feature map.') + print( + "\n This flag is only show model, if you want to continue, " + "please remove `--preview-model` to get the feature map." + ) return target_layers = [] for target_layer in args.target_layers: try: - target_layers.append(eval(f'model.{target_layer}')) + target_layers.append(eval(f"model.{target_layer}")) except Exception as e: print(model) - raise RuntimeError('layer does not exist', e) + raise RuntimeError("layer does not exist", e) activations_wrapper = ActivationsWrapper(model, target_layers) @@ -150,10 +155,10 @@ def main(): flatten_featmaps.append(featmap) img = mmcv.imread(image_path) - img = mmcv.imconvert(img, 'bgr', 'rgb') + img = mmcv.imconvert(img, "bgr", "rgb") - if source_type['is_dir']: - filename = os.path.relpath(image_path, args.img).replace('/', '_') + if source_type["is_dir"]: + filename = os.path.relpath(image_path, args.img).replace("/", "_") else: filename = os.path.basename(image_path) out_file = None if args.show else os.path.join(args.out_dir, filename) @@ -161,14 +166,15 @@ def main(): # show the results shown_imgs = [] visualizer.add_datasample( - 'result', + "result", img, data_sample=result, draw_gt=False, show=False, wait_time=0, out_file=None, - pred_score_thr=args.score_thr) + pred_score_thr=args.score_thr, + ) drawn_img = visualizer.get_image() for featmap in flatten_featmaps: @@ -177,7 +183,8 @@ def main(): drawn_img, channel_reduction=channel_reduction, topk=args.topk, - arrangement=args.arrangement) + arrangement=args.arrangement, + ) shown_imgs.append(shown_img) shown_imgs = auto_arrange_images(shown_imgs) @@ -190,11 +197,12 @@ def main(): visualizer.show(shown_imgs) if not args.show: - print(f'All done!' - f'\nResults have been saved at {os.path.abspath(args.out_dir)}') + print( + f"All done!" f"\nResults have been saved at {os.path.abspath(args.out_dir)}" + ) # Please refer to the usage tutorial: # https://github.com/open-mmlab/mmyolo/blob/main/docs/zh_cn/user_guides/visualization.md # noqa -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/demo/image_demo.py b/mmyolo/demo/image_demo.py index 8f9fde1a..f44d3ed2 100644 --- a/mmyolo/demo/image_demo.py +++ b/mmyolo/demo/image_demo.py @@ -15,31 +15,26 @@ def parse_args(): parser = ArgumentParser() + parser.add_argument("img", help="Image path, include image file, dir and URL.") + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") + parser.add_argument("--out-dir", default="./output", help="Path to output file") + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - 'img', help='Image path, include image file, dir and URL.') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + "--show", action="store_true", help="Show the detection results" + ) parser.add_argument( - '--out-dir', default='./output', help='Path to output file') + "--deploy", action="store_true", help="Switch model to deployment mode" + ) parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + "--score-thr", type=float, default=0.3, help="Bbox score threshold" + ) parser.add_argument( - '--show', action='store_true', help='Show the detection results') + "--class-name", nargs="+", type=str, help="Only Save those classes if set" + ) parser.add_argument( - '--deploy', - action='store_true', - help='Switch model to deployment mode') - parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') - parser.add_argument( - '--class-name', - nargs='+', - type=str, - help='Only Save those classes if set') - parser.add_argument( - '--to-labelme', - action='store_true', - help='Output labelme style label file') + "--to-labelme", action="store_true", help="Output labelme style label file" + ) args = parser.parse_args() return args @@ -48,8 +43,9 @@ def main(): args = parse_args() if args.to_labelme and args.show: - raise RuntimeError('`--to-labelme` or `--show` only ' - 'can choose one at the same time.') + raise RuntimeError( + "`--to-labelme` or `--show` only " "can choose one at the same time." + ) # register all modules in mmdet into the registries register_all_modules() @@ -71,7 +67,7 @@ def main(): files, source_type = get_file_list(args.img) # get model class name - dataset_classes = model.dataset_meta.get('classes') + dataset_classes = model.dataset_meta.get("classes") # ready for labelme format if it is needed to_label_format = LabelmeFormat(classes=dataset_classes) @@ -83,8 +79,9 @@ def main(): continue show_data_classes(dataset_classes) raise RuntimeError( - 'Expected args.class_name to be one of the list, ' - f'but got "{class_name}"') + "Expected args.class_name to be one of the list, " + f'but got "{class_name}"' + ) # start detector inference progress_bar = ProgressBar(len(files)) @@ -92,10 +89,10 @@ def main(): result = inference_detector(model, file) img = mmcv.imread(file) - img = mmcv.imconvert(img, 'bgr', 'rgb') + img = mmcv.imconvert(img, "bgr", "rgb") - if source_type['is_dir']: - filename = os.path.relpath(file, args.img).replace('/', '_') + if source_type["is_dir"]: + filename = os.path.relpath(file, args.img).replace("/", "_") else: filename = os.path.basename(file) out_file = None if args.show else os.path.join(args.out_dir, filename) @@ -104,14 +101,13 @@ def main(): # Get candidate predict info with score threshold pred_instances = result.pred_instances[ - result.pred_instances.scores > args.score_thr] + result.pred_instances.scores > args.score_thr + ] if args.to_labelme: # save result to labelme files - out_file = out_file.replace( - os.path.splitext(out_file)[-1], '.json') - to_label_format(pred_instances, result.metainfo, out_file, - args.class_name) + out_file = out_file.replace(os.path.splitext(out_file)[-1], ".json") + to_label_format(pred_instances, result.metainfo, out_file, args.class_name) continue visualizer.add_datasample( @@ -122,16 +118,17 @@ def main(): show=args.show, wait_time=0, out_file=out_file, - pred_score_thr=args.score_thr) + pred_score_thr=args.score_thr, + ) if not args.show and not args.to_labelme: - print_log( - f'\nResults have been saved at {os.path.abspath(args.out_dir)}') + print_log(f"\nResults have been saved at {os.path.abspath(args.out_dir)}") elif args.to_labelme: - print_log('\nLabelme format label files ' - f'had all been saved in {args.out_dir}') + print_log( + "\nLabelme format label files " f"had all been saved in {args.out_dir}" + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/demo/large_image_demo.py b/mmyolo/demo/large_image_demo.py index 27d14949..cb4c88d1 100644 --- a/mmyolo/demo/large_image_demo.py +++ b/mmyolo/demo/large_image_demo.py @@ -21,68 +21,68 @@ from mmengine.logging import print_log from mmengine.utils import ProgressBar -try: - from sahi.slicing import slice_image -except ImportError: - raise ImportError('Please run "pip install -U sahi" ' - 'to install sahi first for large image inference.') - from mmyolo.registry import VISUALIZERS from mmyolo.utils import register_all_modules, switch_to_deploy from mmyolo.utils.large_image import merge_results_by_nms, shift_predictions from mmyolo.utils.misc import get_file_list +try: + from sahi.slicing import slice_image +except ImportError: + raise ImportError( + 'Please run "pip install -U sahi" ' + "to install sahi first for large image inference." + ) + def parse_args(): - parser = ArgumentParser( - description='Perform MMYOLO inference on large images.') - parser.add_argument( - 'img', help='Image path, include image file, dir and URL.') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + parser = ArgumentParser(description="Perform MMYOLO inference on large images.") + parser.add_argument("img", help="Image path, include image file, dir and URL.") + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") + parser.add_argument("--out-dir", default="./output", help="Path to output file") + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - '--out-dir', default='./output', help='Path to output file') + "--show", action="store_true", help="Show the detection results" + ) parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + "--deploy", action="store_true", help="Switch model to deployment mode" + ) parser.add_argument( - '--show', action='store_true', help='Show the detection results') + "--score-thr", type=float, default=0.3, help="Bbox score threshold" + ) parser.add_argument( - '--deploy', - action='store_true', - help='Switch model to deployment mode') + "--patch-size", type=int, default=640, help="The size of patches" + ) parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') - parser.add_argument( - '--patch-size', type=int, default=640, help='The size of patches') - parser.add_argument( - '--patch-overlap-ratio', + "--patch-overlap-ratio", type=int, default=0.25, - help='Ratio of overlap between two patches') + help="Ratio of overlap between two patches", + ) parser.add_argument( - '--merge-iou-thr', + "--merge-iou-thr", type=float, default=0.25, - help='IoU threshould for merging results') + help="IoU threshould for merging results", + ) parser.add_argument( - '--merge-nms-type', - type=str, - default='nms', - help='NMS type for merging results') + "--merge-nms-type", type=str, default="nms", help="NMS type for merging results" + ) parser.add_argument( - '--batch-size', + "--batch-size", type=int, default=1, - help='Batch size, must greater than or equal to 1') + help="Batch size, must greater than or equal to 1", + ) parser.add_argument( - '--debug', - action='store_true', - help='Export debug results before merging') + "--debug", action="store_true", help="Export debug results before merging" + ) parser.add_argument( - '--save-patch', - action='store_true', - help='Save the results of each patch. ' - 'The `--debug` must be enabled.') + "--save-patch", + action="store_true", + help="Save the results of each patch. " "The `--debug` must be enabled.", + ) args = parser.parse_args() return args @@ -110,8 +110,7 @@ def main(): files, source_type = get_file_list(args.img) # start detector inference - print(f'Performing inference on {len(files)} images.... ' - 'This may take a while.') + print(f"Performing inference on {len(files)} images.... " "This may take a while.") progress_bar = ProgressBar(len(files)) for file in files: # read image @@ -145,12 +144,12 @@ def main(): break start += args.batch_size - if source_type['is_dir']: - filename = os.path.relpath(file, args.img).replace('/', '_') + if source_type["is_dir"]: + filename = os.path.relpath(file, args.img).replace("/", "_") else: filename = os.path.basename(file) - img = mmcv.imconvert(img, 'bgr', 'rgb') + img = mmcv.imconvert(img, "bgr", "rgb") out_file = None if args.show else os.path.join(args.out_dir, filename) # export debug images @@ -161,13 +160,15 @@ def main(): shifted_instances = shift_predictions( slice_results, sliced_image_object.starting_pixels, - src_image_shape=(height, width)) + src_image_shape=(height, width), + ) merged_result = slice_results[0].clone() merged_result.pred_instances = shifted_instances - debug_file_name = name + '_debug' + suffix - debug_out_file = None if args.show else os.path.join( - args.out_dir, debug_file_name) + debug_file_name = name + "_debug" + suffix + debug_out_file = ( + None if args.show else os.path.join(args.out_dir, debug_file_name) + ) visualizer.set_image(img.copy()) debug_grids = [] @@ -177,28 +178,23 @@ def main(): end_point_x = start_point_x + args.patch_size end_point_y = start_point_y + args.patch_size debug_grids.append( - [start_point_x, start_point_y, end_point_x, end_point_y]) + [start_point_x, start_point_y, end_point_x, end_point_y] + ) debug_grids = np.array(debug_grids) - debug_grids[:, 0::2] = np.clip(debug_grids[:, 0::2], 1, - img.shape[1] - 1) - debug_grids[:, 1::2] = np.clip(debug_grids[:, 1::2], 1, - img.shape[0] - 1) + debug_grids[:, 0::2] = np.clip(debug_grids[:, 0::2], 1, img.shape[1] - 1) + debug_grids[:, 1::2] = np.clip(debug_grids[:, 1::2], 1, img.shape[0] - 1) palette = np.random.randint(0, 256, size=(len(debug_grids), 3)) palette = [tuple(c) for c in palette] - line_styles = random.choices(['-', '-.', ':'], k=len(debug_grids)) - visualizer.draw_bboxes( - debug_grids, - edge_colors=palette, - alpha=1, - line_styles=line_styles) + line_styles = random.choices(["-", "-.", ":"], k=len(debug_grids)) visualizer.draw_bboxes( - debug_grids, face_colors=palette, alpha=0.15) + debug_grids, edge_colors=palette, alpha=1, line_styles=line_styles + ) + visualizer.draw_bboxes(debug_grids, face_colors=palette, alpha=0.15) visualizer.draw_texts( - list(range(len(debug_grids))), - debug_grids[:, :2] + 5, - colors='w') + list(range(len(debug_grids))), debug_grids[:, :2] + 5, colors="w" + ) visualizer.add_datasample( debug_file_name, @@ -212,17 +208,15 @@ def main(): ) if args.save_patch: - debug_patch_out_dir = os.path.join(args.out_dir, - f'{name}_patch') + debug_patch_out_dir = os.path.join(args.out_dir, f"{name}_patch") for i, slice_result in enumerate(slice_results): patch_out_file = os.path.join( - debug_patch_out_dir, - f'{filename}_slice_{i}_result.jpg') - image = mmcv.imconvert(sliced_image_object.images[i], - 'bgr', 'rgb') + debug_patch_out_dir, f"{filename}_slice_{i}_result.jpg" + ) + image = mmcv.imconvert(sliced_image_object.images[i], "bgr", "rgb") visualizer.add_datasample( - 'patch_result', + "patch_result", image, data_sample=slice_result, draw_gt=False, @@ -236,10 +230,8 @@ def main(): slice_results, sliced_image_object.starting_pixels, src_image_shape=(height, width), - nms_cfg={ - 'type': args.merge_nms_type, - 'iou_thr': args.merge_iou_thr - }) + nms_cfg={"type": args.merge_nms_type, "iou_thr": args.merge_iou_thr}, + ) visualizer.add_datasample( filename, @@ -254,9 +246,8 @@ def main(): progress_bar.update() if not args.show or (args.debug and args.save_patch): - print_log( - f'\nResults have been saved at {os.path.abspath(args.out_dir)}') + print_log(f"\nResults have been saved at {os.path.abspath(args.out_dir)}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/demo/video_demo.py b/mmyolo/demo/video_demo.py index 7020c9fe..ee1315d4 100644 --- a/mmyolo/demo/video_demo.py +++ b/mmyolo/demo/video_demo.py @@ -24,30 +24,32 @@ def parse_args(): - parser = argparse.ArgumentParser(description='MMYOLO video demo') - parser.add_argument('video', help='Video file') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') + parser = argparse.ArgumentParser(description="MMYOLO video demo") + parser.add_argument("video", help="Video file") + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + "--score-thr", type=float, default=0.3, help="Bbox score threshold" + ) + parser.add_argument("--out", type=str, help="Output video file") + parser.add_argument("--show", action="store_true", help="Show video") parser.add_argument( - '--score-thr', type=float, default=0.3, help='Bbox score threshold') - parser.add_argument('--out', type=str, help='Output video file') - parser.add_argument('--show', action='store_true', help='Show video') - parser.add_argument( - '--wait-time', + "--wait-time", type=float, default=1, - help='The interval of show (s), 0 is block') + help="The interval of show (s), 0 is block", + ) args = parser.parse_args() return args def main(): args = parse_args() - assert args.out or args.show, \ - ('Please specify at least one operation (save/show the ' - 'video) with the argument "--out" or "--show"') + assert args.out or args.show, ( + "Please specify at least one operation (save/show the " + 'video) with the argument "--out" or "--show"' + ) # register all modules in mmdet into the registries register_all_modules() @@ -56,8 +58,7 @@ def main(): model = init_detector(args.config, args.checkpoint, device=args.device) # build test pipeline - model.cfg.test_dataloader.dataset.pipeline[ - 0].type = 'mmdet.LoadImageFromNDArray' + model.cfg.test_dataloader.dataset.pipeline[0].type = "mmdet.LoadImageFromNDArray" test_pipeline = Compose(model.cfg.test_dataloader.dataset.pipeline) # init visualizer @@ -69,25 +70,29 @@ def main(): video_reader = mmcv.VideoReader(args.video) video_writer = None if args.out: - fourcc = cv2.VideoWriter_fourcc(*'mp4v') + fourcc = cv2.VideoWriter_fourcc(*"mp4v") video_writer = cv2.VideoWriter( - args.out, fourcc, video_reader.fps, - (video_reader.width, video_reader.height)) + args.out, + fourcc, + video_reader.fps, + (video_reader.width, video_reader.height), + ) for frame in track_iter_progress(video_reader): result = inference_detector(model, frame, test_pipeline=test_pipeline) visualizer.add_datasample( - name='video', + name="video", image=frame, data_sample=result, draw_gt=False, show=False, - pred_score_thr=args.score_thr) + pred_score_thr=args.score_thr, + ) frame = visualizer.get_image() if args.show: - cv2.namedWindow('video', 0) - mmcv.imshow(frame, 'video', args.wait_time) + cv2.namedWindow("video", 0) + mmcv.imshow(frame, "video", args.wait_time) if args.out: video_writer.write(frame) @@ -96,5 +101,5 @@ def main(): cv2.destroyAllWindows() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/docs/en/conf.py b/mmyolo/docs/en/conf.py index 437a257a..69c4523b 100644 --- a/mmyolo/docs/en/conf.py +++ b/mmyolo/docs/en/conf.py @@ -16,20 +16,20 @@ import pytorch_sphinx_theme -sys.path.insert(0, os.path.abspath('../../')) +sys.path.insert(0, os.path.abspath("../../")) # -- Project information ----------------------------------------------------- -project = 'MMYOLO' -copyright = '2022, OpenMMLab' -author = 'MMYOLO Authors' -version_file = '../../mmyolo/version.py' +project = "MMYOLO" +copyright = "2022, OpenMMLab" +author = "MMYOLO Authors" +version_file = "../../mmyolo/version.py" def get_version(): with open(version_file) as f: - exec(compile(f.read(), version_file, 'exec')) - return locals()['__version__'] + exec(compile(f.read(), version_file, "exec")) + return locals()["__version__"] # The full version, including alpha/beta/rc tags @@ -41,39 +41,43 @@ def get_version(): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'myst_parser', - 'sphinx_markdown_tables', - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "myst_parser", + "sphinx_markdown_tables", + "sphinx_copybutton", ] -myst_enable_extensions = ['colon_fence'] +myst_enable_extensions = ["colon_fence"] myst_heading_anchors = 3 autodoc_mock_imports = [ - 'matplotlib', 'pycocotools', 'terminaltables', 'mmyolo.version', 'mmcv.ops' + "matplotlib", + "pycocotools", + "terminaltables", + "mmyolo.version", + "mmcv.ops", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = { - '.rst': 'restructuredtext', - '.md': 'markdown', + ".rst": "restructuredtext", + ".md": "markdown", } # The master toctree document. -master_doc = 'index' +master_doc = "index" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -81,35 +85,32 @@ def get_version(): # a list of builtin themes. # # html_theme = 'sphinx_rtd_theme' -html_theme = 'pytorch_sphinx_theme' +html_theme = "pytorch_sphinx_theme" html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] html_theme_options = { - 'menu': [ - { - 'name': 'GitHub', - 'url': 'https://github.com/open-mmlab/mmyolo' - }, + "menu": [ + {"name": "GitHub", "url": "https://github.com/open-mmlab/mmyolo"}, ], # Specify the language of shared menu - 'menu_lang': 'en', + "menu_lang": "en", } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_css_files = ['css/readthedocs.css'] +html_static_path = ["_static"] +html_css_files = ["css/readthedocs.css"] # -- Extension configuration ------------------------------------------------- # Ignore >>> when copying code -copybutton_prompt_text = r'>>> |\.\.\. ' +copybutton_prompt_text = r">>> |\.\.\. " copybutton_prompt_is_regexp = True def builder_inited_handler(app): - subprocess.run(['./stat.py']) + subprocess.run(["./stat.py"]) def setup(app): - app.connect('builder-inited', builder_inited_handler) + app.connect("builder-inited", builder_inited_handler) diff --git a/mmyolo/docs/en/stat.py b/mmyolo/docs/en/stat.py index 6c8afcc7..4f3ef861 100755 --- a/mmyolo/docs/en/stat.py +++ b/mmyolo/docs/en/stat.py @@ -6,30 +6,27 @@ import numpy as np -url_prefix = 'https://github.com/open-mmlab/mmdetection/blob/3.x/configs' +url_prefix = "https://github.com/open-mmlab/mmdetection/blob/3.x/configs" -files = sorted(glob.glob('../../configs/*/README.md')) +files = sorted(glob.glob("../../configs/*/README.md")) stats = [] titles = [] num_ckpts = 0 for f in files: - url = osp.dirname(f.replace('../../configs', url_prefix)) + url = osp.dirname(f.replace("../../configs", url_prefix)) with open(f) as content_file: content = content_file.read() - title = content.split('\n')[0].replace('# ', '').strip() - ckpts = { - x.lower().strip() - for x in re.findall(r'\[model\]\((https?.*)\)', content) - } + title = content.split("\n")[0].replace("# ", "").strip() + ckpts = {x.lower().strip() for x in re.findall(r"\[model\]\((https?.*)\)", content)} if len(ckpts) == 0: continue - _papertype = [x for x in re.findall(r'\[([A-Z]+)\]', content)] + _papertype = [x for x in re.findall(r"\[([A-Z]+)\]", content)] assert len(_papertype) > 0 papertype = _papertype[0] @@ -44,12 +41,10 @@ stats.append((paper, ckpts, statsmsg)) allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats]) -msglist = '\n'.join(x for _, _, x in stats) +msglist = "\n".join(x for _, _, x in stats) -papertypes, papercounts = np.unique([t for t, _ in allpapers], - return_counts=True) -countstr = '\n'.join( - [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) +papertypes, papercounts = np.unique([t for t, _ in allpapers], return_counts=True) +countstr = "\n".join([f" - {t}: {c}" for t, c in zip(papertypes, papercounts)]) modelzoo = f""" # Model Zoo Statistics @@ -62,5 +57,5 @@ {msglist} """ -with open('modelzoo_statistics.md', 'w') as f: +with open("modelzoo_statistics.md", "w") as f: f.write(modelzoo) diff --git a/mmyolo/docs/zh_cn/conf.py b/mmyolo/docs/zh_cn/conf.py index 70eb0c0e..5ff0ce9c 100644 --- a/mmyolo/docs/zh_cn/conf.py +++ b/mmyolo/docs/zh_cn/conf.py @@ -16,20 +16,20 @@ import pytorch_sphinx_theme -sys.path.insert(0, os.path.abspath('../../')) +sys.path.insert(0, os.path.abspath("../../")) # -- Project information ----------------------------------------------------- -project = 'MMYOLO' -copyright = '2022, OpenMMLab' -author = 'MMYOLO Authors' -version_file = '../../mmyolo/version.py' +project = "MMYOLO" +copyright = "2022, OpenMMLab" +author = "MMYOLO Authors" +version_file = "../../mmyolo/version.py" def get_version(): with open(version_file) as f: - exec(compile(f.read(), version_file, 'exec')) - return locals()['__version__'] + exec(compile(f.read(), version_file, "exec")) + return locals()["__version__"] # The full version, including alpha/beta/rc tags @@ -41,39 +41,43 @@ def get_version(): # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'myst_parser', - 'sphinx_markdown_tables', - 'sphinx_copybutton', + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "myst_parser", + "sphinx_markdown_tables", + "sphinx_copybutton", ] -myst_enable_extensions = ['colon_fence'] +myst_enable_extensions = ["colon_fence"] myst_heading_anchors = 3 autodoc_mock_imports = [ - 'matplotlib', 'pycocotools', 'terminaltables', 'mmyolo.version', 'mmcv.ops' + "matplotlib", + "pycocotools", + "terminaltables", + "mmyolo.version", + "mmcv.ops", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = { - '.rst': 'restructuredtext', - '.md': 'markdown', + ".rst": "restructuredtext", + ".md": "markdown", } # The master toctree document. -master_doc = 'index' +master_doc = "index" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -81,37 +85,34 @@ def get_version(): # a list of builtin themes. # # html_theme = 'sphinx_rtd_theme' -html_theme = 'pytorch_sphinx_theme' +html_theme = "pytorch_sphinx_theme" html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] html_theme_options = { - 'menu': [ - { - 'name': 'GitHub', - 'url': 'https://github.com/open-mmlab/mmyolo' - }, + "menu": [ + {"name": "GitHub", "url": "https://github.com/open-mmlab/mmyolo"}, ], # Specify the language of shared menu - 'menu_lang': 'cn', + "menu_lang": "cn", } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_css_files = ['css/readthedocs.css'] +html_static_path = ["_static"] +html_css_files = ["css/readthedocs.css"] -language = 'zh_CN' +language = "zh_CN" # -- Extension configuration ------------------------------------------------- # Ignore >>> when copying code -copybutton_prompt_text = r'>>> |\.\.\. ' +copybutton_prompt_text = r">>> |\.\.\. " copybutton_prompt_is_regexp = True def builder_inited_handler(app): - subprocess.run(['./stat.py']) + subprocess.run(["./stat.py"]) def setup(app): - app.connect('builder-inited', builder_inited_handler) + app.connect("builder-inited", builder_inited_handler) diff --git a/mmyolo/docs/zh_cn/stat.py b/mmyolo/docs/zh_cn/stat.py index 44505546..8ab95eb1 100755 --- a/mmyolo/docs/zh_cn/stat.py +++ b/mmyolo/docs/zh_cn/stat.py @@ -6,30 +6,27 @@ import numpy as np -url_prefix = 'https://github.com/open-mmlab/mmyolo/blob/main/' +url_prefix = "https://github.com/open-mmlab/mmyolo/blob/main/" -files = sorted(glob.glob('../configs/*/README.md')) +files = sorted(glob.glob("../configs/*/README.md")) stats = [] titles = [] num_ckpts = 0 for f in files: - url = osp.dirname(f.replace('../', url_prefix)) + url = osp.dirname(f.replace("../", url_prefix)) with open(f) as content_file: content = content_file.read() - title = content.split('\n')[0].replace('# ', '').strip() - ckpts = { - x.lower().strip() - for x in re.findall(r'\[model\]\((https?.*)\)', content) - } + title = content.split("\n")[0].replace("# ", "").strip() + ckpts = {x.lower().strip() for x in re.findall(r"\[model\]\((https?.*)\)", content)} if len(ckpts) == 0: continue - _papertype = [x for x in re.findall(r'\[([A-Z]+)\]', content)] + _papertype = [x for x in re.findall(r"\[([A-Z]+)\]", content)] assert len(_papertype) > 0 papertype = _papertype[0] @@ -44,12 +41,10 @@ stats.append((paper, ckpts, statsmsg)) allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats]) -msglist = '\n'.join(x for _, _, x in stats) +msglist = "\n".join(x for _, _, x in stats) -papertypes, papercounts = np.unique([t for t, _ in allpapers], - return_counts=True) -countstr = '\n'.join( - [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) +papertypes, papercounts = np.unique([t for t, _ in allpapers], return_counts=True) +countstr = "\n".join([f" - {t}: {c}" for t, c in zip(papertypes, papercounts)]) modelzoo = f""" # Model Zoo Statistics @@ -62,5 +57,5 @@ {msglist} """ -with open('modelzoo_statistics.md', 'w') as f: +with open("modelzoo_statistics.md", "w") as f: f.write(modelzoo) diff --git a/mmyolo/mmyolo/__init__.py b/mmyolo/mmyolo/__init__.py index 757c4084..16804a46 100644 --- a/mmyolo/mmyolo/__init__.py +++ b/mmyolo/mmyolo/__init__.py @@ -6,34 +6,40 @@ from .version import __version__, version_info -mmcv_minimum_version = '2.0.0rc0' -mmcv_maximum_version = '2.1.0' +mmcv_minimum_version = "2.0.0rc0" +mmcv_maximum_version = "2.1.0" mmcv_version = digit_version(mmcv.__version__) -mmengine_minimum_version = '0.3.1' -mmengine_maximum_version = '1.0.0' +mmengine_minimum_version = "0.3.1" +mmengine_maximum_version = "1.0.0" mmengine_version = digit_version(mmengine.__version__) -mmdet_minimum_version = '3.0.0rc5' -mmdet_maximum_version = '3.1.0' +mmdet_minimum_version = "3.0.0rc5" +mmdet_maximum_version = "3.1.0" mmdet_version = digit_version(mmdet.__version__) -assert (mmcv_version >= digit_version(mmcv_minimum_version) - and mmcv_version < digit_version(mmcv_maximum_version)), \ - f'MMCV=={mmcv.__version__} is used but incompatible. ' \ - f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.' - -assert (mmengine_version >= digit_version(mmengine_minimum_version) - and mmengine_version < digit_version(mmengine_maximum_version)), \ - f'MMEngine=={mmengine.__version__} is used but incompatible. ' \ - f'Please install mmengine>={mmengine_minimum_version}, ' \ - f'<{mmengine_maximum_version}.' - -assert (mmdet_version >= digit_version(mmdet_minimum_version) - and mmdet_version < digit_version(mmdet_maximum_version)), \ - f'MMDetection=={mmdet.__version__} is used but incompatible. ' \ - f'Please install mmdet>={mmdet_minimum_version}, ' \ - f'<{mmdet_maximum_version}.' - -__all__ = ['__version__', 'version_info', 'digit_version'] +assert mmcv_version >= digit_version( + mmcv_minimum_version +) and mmcv_version < digit_version(mmcv_maximum_version), ( + f"MMCV=={mmcv.__version__} is used but incompatible. " + f"Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}." +) + +assert mmengine_version >= digit_version( + mmengine_minimum_version +) and mmengine_version < digit_version(mmengine_maximum_version), ( + f"MMEngine=={mmengine.__version__} is used but incompatible. " + f"Please install mmengine>={mmengine_minimum_version}, " + f"<{mmengine_maximum_version}." +) + +assert mmdet_version >= digit_version( + mmdet_minimum_version +) and mmdet_version < digit_version(mmdet_maximum_version), ( + f"MMDetection=={mmdet.__version__} is used but incompatible. " + f"Please install mmdet>={mmdet_minimum_version}, " + f"<{mmdet_maximum_version}." +) + +__all__ = ["__version__", "version_info", "digit_version"] diff --git a/mmyolo/mmyolo/datasets/__init__.py b/mmyolo/mmyolo/datasets/__init__.py index 592535eb..5d4de57f 100644 --- a/mmyolo/mmyolo/datasets/__init__.py +++ b/mmyolo/mmyolo/datasets/__init__.py @@ -6,6 +6,9 @@ from .yolov5_voc import YOLOv5VOCDataset __all__ = [ - 'YOLOv5CocoDataset', 'YOLOv5VOCDataset', 'BatchShapePolicy', - 'yolov5_collate', 'YOLOv5CrowdHumanDataset' + "YOLOv5CocoDataset", + "YOLOv5VOCDataset", + "BatchShapePolicy", + "yolov5_collate", + "YOLOv5CrowdHumanDataset", ] diff --git a/mmyolo/mmyolo/datasets/transforms/__init__.py b/mmyolo/mmyolo/datasets/transforms/__init__.py index ea1cd41e..516905d5 100644 --- a/mmyolo/mmyolo/datasets/transforms/__init__.py +++ b/mmyolo/mmyolo/datasets/transforms/__init__.py @@ -1,12 +1,25 @@ # Copyright (c) OpenMMLab. All rights reserved. from .mix_img_transforms import Mosaic, Mosaic9, YOLOv5MixUp, YOLOXMixUp -from .transforms import (LetterResize, LoadAnnotations, PPYOLOERandomCrop, - PPYOLOERandomDistort, YOLOv5HSVRandomAug, - YOLOv5KeepRatioResize, YOLOv5RandomAffine) +from .transforms import ( + LetterResize, + LoadAnnotations, + PPYOLOERandomCrop, + PPYOLOERandomDistort, + YOLOv5HSVRandomAug, + YOLOv5KeepRatioResize, + YOLOv5RandomAffine, +) __all__ = [ - 'YOLOv5KeepRatioResize', 'LetterResize', 'Mosaic', 'YOLOXMixUp', - 'YOLOv5MixUp', 'YOLOv5HSVRandomAug', 'LoadAnnotations', - 'YOLOv5RandomAffine', 'PPYOLOERandomDistort', 'PPYOLOERandomCrop', - 'Mosaic9' + "YOLOv5KeepRatioResize", + "LetterResize", + "Mosaic", + "YOLOXMixUp", + "YOLOv5MixUp", + "YOLOv5HSVRandomAug", + "LoadAnnotations", + "YOLOv5RandomAffine", + "PPYOLOERandomDistort", + "PPYOLOERandomCrop", + "Mosaic9", ] diff --git a/mmyolo/mmyolo/datasets/transforms/mix_img_transforms.py b/mmyolo/mmyolo/datasets/transforms/mix_img_transforms.py index 9cd5ad98..152b2088 100644 --- a/mmyolo/mmyolo/datasets/transforms/mix_img_transforms.py +++ b/mmyolo/mmyolo/datasets/transforms/mix_img_transforms.py @@ -42,13 +42,15 @@ class BaseMixImageTransform(BaseTransform, metaclass=ABCMeta): iteration is terminated and raise the error. Defaults to 15. """ - def __init__(self, - pre_transform: Optional[Sequence[str]] = None, - prob: float = 1.0, - use_cached: bool = False, - max_cached_images: int = 40, - random_pop: bool = True, - max_refetch: int = 15): + def __init__( + self, + pre_transform: Optional[Sequence[str]] = None, + prob: float = 1.0, + use_cached: bool = False, + max_cached_images: int = 40, + random_pop: bool = True, + max_refetch: int = 15, + ): self.max_refetch = max_refetch self.prob = prob @@ -64,8 +66,7 @@ def __init__(self, self.pre_transform = Compose(pre_transform) @abstractmethod - def get_indexes(self, dataset: Union[BaseDataset, - list]) -> Union[list, int]: + def get_indexes(self, dataset: Union[BaseDataset, list]) -> Union[list, int]: """Call function to collect indexes. Args: @@ -74,7 +75,6 @@ def get_indexes(self, dataset: Union[BaseDataset, Returns: list or int: indexes. """ - pass @abstractmethod def mix_img_transform(self, results: dict) -> dict: @@ -86,7 +86,6 @@ def mix_img_transform(self, results: dict) -> dict: Returns: results (dict): Updated result dict. """ - pass @autocast_box_type() def transform(self, results: dict) -> dict: @@ -114,7 +113,7 @@ def transform(self, results: dict) -> dict: if self.use_cached: # Be careful: deep copying can be very time-consuming # if results includes dataset. - dataset = results.pop('dataset', None) + dataset = results.pop("dataset", None) self.results_cache.append(copy.deepcopy(results)) if len(self.results_cache) > self.max_cached_images: if self.random_pop: @@ -126,10 +125,10 @@ def transform(self, results: dict) -> dict: if len(self.results_cache) <= 4: return results else: - assert 'dataset' in results + assert "dataset" in results # Be careful: deep copying can be very time-consuming # if results includes dataset. - dataset = results.pop('dataset', None) + dataset = results.pop("dataset", None) for _ in range(self.max_refetch): # get index of one or three other images @@ -142,42 +141,40 @@ def transform(self, results: dict) -> dict: indexes = [indexes] if self.use_cached: - mix_results = [ - copy.deepcopy(self.results_cache[i]) for i in indexes - ] + mix_results = [copy.deepcopy(self.results_cache[i]) for i in indexes] else: # get images information will be used for Mosaic or MixUp mix_results = [ - copy.deepcopy(dataset.get_data_info(index)) - for index in indexes + copy.deepcopy(dataset.get_data_info(index)) for index in indexes ] if self.pre_transform is not None: for i, data in enumerate(mix_results): # pre_transform may also require dataset - data.update({'dataset': dataset}) + data.update({"dataset": dataset}) # before Mosaic or MixUp need to go through # the necessary pre_transform _results = self.pre_transform(data) - _results.pop('dataset') + _results.pop("dataset") mix_results[i] = _results if None not in mix_results: - results['mix_results'] = mix_results + results["mix_results"] = mix_results break - print('Repeated calculation') + print("Repeated calculation") else: raise RuntimeError( - 'The loading pipeline of the original dataset' - ' always return None. Please check the correctness ' - 'of the dataset and its pipeline.') + "The loading pipeline of the original dataset" + " always return None. Please check the correctness " + "of the dataset and its pipeline." + ) # Mosaic or MixUp results = self.mix_img_transform(results) - if 'mix_results' in results: - results.pop('mix_results') - results['dataset'] = dataset + if "mix_results" in results: + results.pop("mix_results") + results["dataset"] = dataset return results @@ -261,23 +258,27 @@ class Mosaic(BaseMixImageTransform): iteration is terminated and raise the error. Defaults to 15. """ - def __init__(self, - img_scale: Tuple[int, int] = (640, 640), - center_ratio_range: Tuple[float, float] = (0.5, 1.5), - bbox_clip_border: bool = True, - pad_val: float = 114.0, - pre_transform: Sequence[dict] = None, - prob: float = 1.0, - use_cached: bool = False, - max_cached_images: int = 40, - random_pop: bool = True, - max_refetch: int = 15): + def __init__( + self, + img_scale: Tuple[int, int] = (640, 640), + center_ratio_range: Tuple[float, float] = (0.5, 1.5), + bbox_clip_border: bool = True, + pad_val: float = 114.0, + pre_transform: Sequence[dict] = None, + prob: float = 1.0, + use_cached: bool = False, + max_cached_images: int = 40, + random_pop: bool = True, + max_refetch: int = 15, + ): assert isinstance(img_scale, tuple) - assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. ' \ - f'got {prob}.' + assert 0 <= prob <= 1.0, ( + "The probability should be in range [0,1]. " f"got {prob}." + ) if use_cached: - assert max_cached_images >= 4, 'The length of cache must >= 4, ' \ - f'but got {max_cached_images}.' + assert max_cached_images >= 4, ( + "The length of cache must >= 4, " f"but got {max_cached_images}." + ) super().__init__( pre_transform=pre_transform, @@ -285,7 +286,8 @@ def __init__(self, use_cached=use_cached, max_cached_images=max_cached_images, random_pop=random_pop, - max_refetch=max_refetch) + max_refetch=max_refetch, + ) self.img_scale = img_scale self.center_ratio_range = center_ratio_range @@ -313,45 +315,50 @@ def mix_img_transform(self, results: dict) -> dict: Returns: results (dict): Updated result dict. """ - assert 'mix_results' in results + assert "mix_results" in results mosaic_bboxes = [] mosaic_bboxes_labels = [] mosaic_ignore_flags = [] # self.img_scale is wh format img_scale_w, img_scale_h = self.img_scale - if len(results['img'].shape) == 3: + if len(results["img"].shape) == 3: mosaic_img = np.full( (int(img_scale_h * 2), int(img_scale_w * 2), 3), self.pad_val, - dtype=results['img'].dtype) + dtype=results["img"].dtype, + ) else: - mosaic_img = np.full((int(img_scale_h * 2), int(img_scale_w * 2)), - self.pad_val, - dtype=results['img'].dtype) + mosaic_img = np.full( + (int(img_scale_h * 2), int(img_scale_w * 2)), + self.pad_val, + dtype=results["img"].dtype, + ) # mosaic center x, y center_x = int(random.uniform(*self.center_ratio_range) * img_scale_w) center_y = int(random.uniform(*self.center_ratio_range) * img_scale_h) center_position = (center_x, center_y) - loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right') + loc_strs = ("top_left", "top_right", "bottom_left", "bottom_right") for i, loc in enumerate(loc_strs): - if loc == 'top_left': + if loc == "top_left": results_patch = results else: - results_patch = results['mix_results'][i - 1] + results_patch = results["mix_results"][i - 1] - img_i = results_patch['img'] + img_i = results_patch["img"] h_i, w_i = img_i.shape[:2] # keep_ratio resize scale_ratio_i = min(img_scale_h / h_i, img_scale_w / w_i) img_i = mmcv.imresize( - img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i))) + img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)) + ) # compute the combine parameters paste_coord, crop_coord = self._mosaic_combine( - loc, center_position, img_i.shape[:2][::-1]) + loc, center_position, img_i.shape[:2][::-1] + ) x1_p, y1_p, x2_p, y2_p = paste_coord x1_c, y1_c, x2_c, y2_c = crop_coord @@ -359,9 +366,9 @@ def mix_img_transform(self, results: dict) -> dict: mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c] # adjust coordinate - gt_bboxes_i = results_patch['gt_bboxes'] - gt_bboxes_labels_i = results_patch['gt_bboxes_labels'] - gt_ignore_flags_i = results_patch['gt_ignore_flags'] + gt_bboxes_i = results_patch["gt_bboxes"] + gt_bboxes_labels_i = results_patch["gt_bboxes_labels"] + gt_ignore_flags_i = results_patch["gt_ignore_flags"] padw = x1_p - x1_c padh = y1_p - y1_c @@ -380,21 +387,22 @@ def mix_img_transform(self, results: dict) -> dict: else: # remove outside bboxes inside_inds = mosaic_bboxes.is_inside( - [2 * img_scale_h, 2 * img_scale_w]).numpy() + [2 * img_scale_h, 2 * img_scale_w] + ).numpy() mosaic_bboxes = mosaic_bboxes[inside_inds] mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds] mosaic_ignore_flags = mosaic_ignore_flags[inside_inds] - results['img'] = mosaic_img - results['img_shape'] = mosaic_img.shape - results['gt_bboxes'] = mosaic_bboxes - results['gt_bboxes_labels'] = mosaic_bboxes_labels - results['gt_ignore_flags'] = mosaic_ignore_flags + results["img"] = mosaic_img + results["img_shape"] = mosaic_img.shape + results["gt_bboxes"] = mosaic_bboxes + results["gt_bboxes_labels"] = mosaic_bboxes_labels + results["gt_ignore_flags"] = mosaic_ignore_flags return results def _mosaic_combine( - self, loc: str, center_position_xy: Sequence[float], - img_shape_wh: Sequence[int]) -> Tuple[Tuple[int], Tuple[int]]: + self, loc: str, center_position_xy: Sequence[float], img_shape_wh: Sequence[int] + ) -> Tuple[Tuple[int], Tuple[int]]: """Calculate global coordinate of mosaic image and local coordinate of cropped sub-image. @@ -411,56 +419,76 @@ def _mosaic_combine( - paste_coord (tuple): paste corner coordinate in mosaic image. - crop_coord (tuple): crop corner coordinate in mosaic image. """ - assert loc in ('top_left', 'top_right', 'bottom_left', 'bottom_right') - if loc == 'top_left': + assert loc in ("top_left", "top_right", "bottom_left", "bottom_right") + if loc == "top_left": # index0 to top left part of image - x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \ - max(center_position_xy[1] - img_shape_wh[1], 0), \ - center_position_xy[0], \ - center_position_xy[1] - crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - ( - y2 - y1), img_shape_wh[0], img_shape_wh[1] - - elif loc == 'top_right': + x1, y1, x2, y2 = ( + max(center_position_xy[0] - img_shape_wh[0], 0), + max(center_position_xy[1] - img_shape_wh[1], 0), + center_position_xy[0], + center_position_xy[1], + ) + crop_coord = ( + img_shape_wh[0] - (x2 - x1), + img_shape_wh[1] - (y2 - y1), + img_shape_wh[0], + img_shape_wh[1], + ) + + elif loc == "top_right": # index1 to top right part of image - x1, y1, x2, y2 = center_position_xy[0], \ - max(center_position_xy[1] - img_shape_wh[1], 0), \ - min(center_position_xy[0] + img_shape_wh[0], - self.img_scale[0] * 2), \ - center_position_xy[1] - crop_coord = 0, img_shape_wh[1] - (y2 - y1), min( - img_shape_wh[0], x2 - x1), img_shape_wh[1] - - elif loc == 'bottom_left': + x1, y1, x2, y2 = ( + center_position_xy[0], + max(center_position_xy[1] - img_shape_wh[1], 0), + min(center_position_xy[0] + img_shape_wh[0], self.img_scale[0] * 2), + center_position_xy[1], + ) + crop_coord = ( + 0, + img_shape_wh[1] - (y2 - y1), + min(img_shape_wh[0], x2 - x1), + img_shape_wh[1], + ) + + elif loc == "bottom_left": # index2 to bottom left part of image - x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \ - center_position_xy[1], \ - center_position_xy[0], \ - min(self.img_scale[1] * 2, center_position_xy[1] + - img_shape_wh[1]) - crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min( - y2 - y1, img_shape_wh[1]) + x1, y1, x2, y2 = ( + max(center_position_xy[0] - img_shape_wh[0], 0), + center_position_xy[1], + center_position_xy[0], + min(self.img_scale[1] * 2, center_position_xy[1] + img_shape_wh[1]), + ) + crop_coord = ( + img_shape_wh[0] - (x2 - x1), + 0, + img_shape_wh[0], + min(y2 - y1, img_shape_wh[1]), + ) else: # index3 to bottom right part of image - x1, y1, x2, y2 = center_position_xy[0], \ - center_position_xy[1], \ - min(center_position_xy[0] + img_shape_wh[0], - self.img_scale[0] * 2), \ - min(self.img_scale[1] * 2, center_position_xy[1] + - img_shape_wh[1]) - crop_coord = 0, 0, min(img_shape_wh[0], - x2 - x1), min(y2 - y1, img_shape_wh[1]) + x1, y1, x2, y2 = ( + center_position_xy[0], + center_position_xy[1], + min(center_position_xy[0] + img_shape_wh[0], self.img_scale[0] * 2), + min(self.img_scale[1] * 2, center_position_xy[1] + img_shape_wh[1]), + ) + crop_coord = ( + 0, + 0, + min(img_shape_wh[0], x2 - x1), + min(y2 - y1, img_shape_wh[1]), + ) paste_coord = x1, y1, x2, y2 return paste_coord, crop_coord def __repr__(self) -> str: repr_str = self.__class__.__name__ - repr_str += f'(img_scale={self.img_scale}, ' - repr_str += f'center_ratio_range={self.center_ratio_range}, ' - repr_str += f'pad_val={self.pad_val}, ' - repr_str += f'prob={self.prob})' + repr_str += f"(img_scale={self.img_scale}, " + repr_str += f"center_ratio_range={self.center_ratio_range}, " + repr_str += f"pad_val={self.pad_val}, " + repr_str += f"prob={self.prob})" return repr_str @@ -543,22 +571,26 @@ class Mosaic9(BaseMixImageTransform): iteration is terminated and raise the error. Defaults to 15. """ - def __init__(self, - img_scale: Tuple[int, int] = (640, 640), - bbox_clip_border: bool = True, - pad_val: Union[float, int] = 114.0, - pre_transform: Sequence[dict] = None, - prob: float = 1.0, - use_cached: bool = False, - max_cached_images: int = 50, - random_pop: bool = True, - max_refetch: int = 15): + def __init__( + self, + img_scale: Tuple[int, int] = (640, 640), + bbox_clip_border: bool = True, + pad_val: Union[float, int] = 114.0, + pre_transform: Sequence[dict] = None, + prob: float = 1.0, + use_cached: bool = False, + max_cached_images: int = 50, + random_pop: bool = True, + max_refetch: int = 15, + ): assert isinstance(img_scale, tuple) - assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. ' \ - f'got {prob}.' + assert 0 <= prob <= 1.0, ( + "The probability should be in range [0,1]. " f"got {prob}." + ) if use_cached: - assert max_cached_images >= 9, 'The length of cache must >= 9, ' \ - f'but got {max_cached_images}.' + assert max_cached_images >= 9, ( + "The length of cache must >= 9, " f"but got {max_cached_images}." + ) super().__init__( pre_transform=pre_transform, @@ -566,7 +598,8 @@ def __init__(self, use_cached=use_cached, max_cached_images=max_cached_images, random_pop=random_pop, - max_refetch=max_refetch) + max_refetch=max_refetch, + ) self.img_scale = img_scale self.bbox_clip_border = bbox_clip_border @@ -598,7 +631,7 @@ def mix_img_transform(self, results: dict) -> dict: Returns: results (dict): Updated result dict. """ - assert 'mix_results' in results + assert "mix_results" in results mosaic_bboxes = [] mosaic_bboxes_labels = [] @@ -606,41 +639,52 @@ def mix_img_transform(self, results: dict) -> dict: img_scale_w, img_scale_h = self.img_scale - if len(results['img'].shape) == 3: + if len(results["img"].shape) == 3: mosaic_img = np.full( (int(img_scale_h * 3), int(img_scale_w * 3), 3), self.pad_val, - dtype=results['img'].dtype) + dtype=results["img"].dtype, + ) else: - mosaic_img = np.full((int(img_scale_h * 3), int(img_scale_w * 3)), - self.pad_val, - dtype=results['img'].dtype) + mosaic_img = np.full( + (int(img_scale_h * 3), int(img_scale_w * 3)), + self.pad_val, + dtype=results["img"].dtype, + ) # index = 0 is mean original image # len(results['mix_results']) = 8 - loc_strs = ('center', 'top', 'top_right', 'right', 'bottom_right', - 'bottom', 'bottom_left', 'left', 'top_left') - - results_all = [results, *results['mix_results']] + loc_strs = ( + "center", + "top", + "top_right", + "right", + "bottom_right", + "bottom", + "bottom_left", + "left", + "top_left", + ) + + results_all = [results, *results["mix_results"]] for index, results_patch in enumerate(results_all): - img_i = results_patch['img'] + img_i = results_patch["img"] # keep_ratio resize img_i_h, img_i_w = img_i.shape[:2] scale_ratio_i = min(img_scale_h / img_i_h, img_scale_w / img_i_w) img_i = mmcv.imresize( - img_i, - (int(img_i_w * scale_ratio_i), int(img_i_h * scale_ratio_i))) + img_i, (int(img_i_w * scale_ratio_i), int(img_i_h * scale_ratio_i)) + ) - paste_coord = self._mosaic_combine(loc_strs[index], - img_i.shape[:2]) + paste_coord = self._mosaic_combine(loc_strs[index], img_i.shape[:2]) padw, padh = paste_coord[:2] x1, y1, x2, y2 = (max(x, 0) for x in paste_coord) - mosaic_img[y1:y2, x1:x2] = img_i[y1 - padh:, x1 - padw:] + mosaic_img[y1:y2, x1:x2] = img_i[y1 - padh :, x1 - padw :] - gt_bboxes_i = results_patch['gt_bboxes'] - gt_bboxes_labels_i = results_patch['gt_bboxes_labels'] - gt_ignore_flags_i = results_patch['gt_ignore_flags'] + gt_bboxes_i = results_patch["gt_bboxes"] + gt_bboxes_labels_i = results_patch["gt_bboxes_labels"] + gt_ignore_flags_i = results_patch["gt_ignore_flags"] gt_bboxes_i.rescale_([scale_ratio_i, scale_ratio_i]) gt_bboxes_i.translate_([padw, padh]) @@ -651,8 +695,9 @@ def mix_img_transform(self, results: dict) -> dict: # Offset offset_x = int(random.uniform(0, img_scale_w)) offset_y = int(random.uniform(0, img_scale_h)) - mosaic_img = mosaic_img[offset_y:offset_y + 2 * img_scale_h, - offset_x:offset_x + 2 * img_scale_w] + mosaic_img = mosaic_img[ + offset_y : offset_y + 2 * img_scale_h, offset_x : offset_x + 2 * img_scale_w + ] mosaic_bboxes = mosaic_bboxes[0].cat(mosaic_bboxes, 0) mosaic_bboxes.translate_([-offset_x, -offset_y]) @@ -664,20 +709,22 @@ def mix_img_transform(self, results: dict) -> dict: else: # remove outside bboxes inside_inds = mosaic_bboxes.is_inside( - [2 * img_scale_h, 2 * img_scale_w]).numpy() + [2 * img_scale_h, 2 * img_scale_w] + ).numpy() mosaic_bboxes = mosaic_bboxes[inside_inds] mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds] mosaic_ignore_flags = mosaic_ignore_flags[inside_inds] - results['img'] = mosaic_img - results['img_shape'] = mosaic_img.shape - results['gt_bboxes'] = mosaic_bboxes - results['gt_bboxes_labels'] = mosaic_bboxes_labels - results['gt_ignore_flags'] = mosaic_ignore_flags + results["img"] = mosaic_img + results["img_shape"] = mosaic_img.shape + results["gt_bboxes"] = mosaic_bboxes + results["gt_bboxes_labels"] = mosaic_bboxes_labels + results["gt_ignore_flags"] = mosaic_ignore_flags return results - def _mosaic_combine(self, loc: str, - img_shape_hw: Tuple[int, int]) -> Tuple[int, ...]: + def _mosaic_combine( + self, loc: str, img_shape_hw: Tuple[int, int] + ) -> Tuple[int, ...]: """Calculate global coordinate of mosaic image. Args: @@ -687,8 +734,17 @@ def _mosaic_combine(self, loc: str, Returns: paste_coord (tuple): paste corner coordinate in mosaic image. """ - assert loc in ('center', 'top', 'top_right', 'right', 'bottom_right', - 'bottom', 'bottom_left', 'left', 'top_left') + assert loc in ( + "center", + "top", + "top_right", + "right", + "bottom_right", + "bottom", + "bottom_left", + "left", + "top_left", + ) img_scale_w, img_scale_h = self.img_scale @@ -697,55 +753,71 @@ def _mosaic_combine(self, loc: str, previous_img_h, previous_img_w = self._previous_img_shape center_img_h, center_img_w = self._center_img_shape - if loc == 'center': + if loc == "center": self._center_img_shape = self._current_img_shape # xmin, ymin, xmax, ymax - paste_coord = img_scale_w, \ - img_scale_h, \ - img_scale_w + current_img_w, \ - img_scale_h + current_img_h - elif loc == 'top': - paste_coord = img_scale_w, \ - img_scale_h - current_img_h, \ - img_scale_w + current_img_w, \ - img_scale_h - elif loc == 'top_right': - paste_coord = img_scale_w + previous_img_w, \ - img_scale_h - current_img_h, \ - img_scale_w + previous_img_w + current_img_w, \ - img_scale_h - elif loc == 'right': - paste_coord = img_scale_w + center_img_w, \ - img_scale_h, \ - img_scale_w + center_img_w + current_img_w, \ - img_scale_h + current_img_h - elif loc == 'bottom_right': - paste_coord = img_scale_w + center_img_w, \ - img_scale_h + previous_img_h, \ - img_scale_w + center_img_w + current_img_w, \ - img_scale_h + previous_img_h + current_img_h - elif loc == 'bottom': - paste_coord = img_scale_w + center_img_w - current_img_w, \ - img_scale_h + center_img_h, \ - img_scale_w + center_img_w, \ - img_scale_h + center_img_h + current_img_h - elif loc == 'bottom_left': - paste_coord = img_scale_w + center_img_w - \ - previous_img_w - current_img_w, \ - img_scale_h + center_img_h, \ - img_scale_w + center_img_w - previous_img_w, \ - img_scale_h + center_img_h + current_img_h - elif loc == 'left': - paste_coord = img_scale_w - current_img_w, \ - img_scale_h + center_img_h - current_img_h, \ - img_scale_w, \ - img_scale_h + center_img_h - elif loc == 'top_left': - paste_coord = img_scale_w - current_img_w, \ - img_scale_h + center_img_h - \ - previous_img_h - current_img_h, \ - img_scale_w, \ - img_scale_h + center_img_h - previous_img_h + paste_coord = ( + img_scale_w, + img_scale_h, + img_scale_w + current_img_w, + img_scale_h + current_img_h, + ) + elif loc == "top": + paste_coord = ( + img_scale_w, + img_scale_h - current_img_h, + img_scale_w + current_img_w, + img_scale_h, + ) + elif loc == "top_right": + paste_coord = ( + img_scale_w + previous_img_w, + img_scale_h - current_img_h, + img_scale_w + previous_img_w + current_img_w, + img_scale_h, + ) + elif loc == "right": + paste_coord = ( + img_scale_w + center_img_w, + img_scale_h, + img_scale_w + center_img_w + current_img_w, + img_scale_h + current_img_h, + ) + elif loc == "bottom_right": + paste_coord = ( + img_scale_w + center_img_w, + img_scale_h + previous_img_h, + img_scale_w + center_img_w + current_img_w, + img_scale_h + previous_img_h + current_img_h, + ) + elif loc == "bottom": + paste_coord = ( + img_scale_w + center_img_w - current_img_w, + img_scale_h + center_img_h, + img_scale_w + center_img_w, + img_scale_h + center_img_h + current_img_h, + ) + elif loc == "bottom_left": + paste_coord = ( + img_scale_w + center_img_w - previous_img_w - current_img_w, + img_scale_h + center_img_h, + img_scale_w + center_img_w - previous_img_w, + img_scale_h + center_img_h + current_img_h, + ) + elif loc == "left": + paste_coord = ( + img_scale_w - current_img_w, + img_scale_h + center_img_h - current_img_h, + img_scale_w, + img_scale_h + center_img_h, + ) + elif loc == "top_left": + paste_coord = ( + img_scale_w - current_img_w, + img_scale_h + center_img_h - previous_img_h - current_img_h, + img_scale_w, + img_scale_h + center_img_h - previous_img_h, + ) self._previous_img_shape = self._current_img_shape # xmin, ymin, xmax, ymax @@ -753,9 +825,9 @@ def _mosaic_combine(self, loc: str, def __repr__(self) -> str: repr_str = self.__class__.__name__ - repr_str += f'(img_scale={self.img_scale}, ' - repr_str += f'pad_val={self.pad_val}, ' - repr_str += f'prob={self.prob})' + repr_str += f"(img_scale={self.img_scale}, " + repr_str += f"pad_val={self.pad_val}, " + repr_str += f"prob={self.prob})" return repr_str @@ -812,25 +884,29 @@ class YOLOv5MixUp(BaseMixImageTransform): empty, then the iteration is terminated. Defaults to 15. """ - def __init__(self, - alpha: float = 32.0, - beta: float = 32.0, - pre_transform: Sequence[dict] = None, - prob: float = 1.0, - use_cached: bool = False, - max_cached_images: int = 20, - random_pop: bool = True, - max_refetch: int = 15): + def __init__( + self, + alpha: float = 32.0, + beta: float = 32.0, + pre_transform: Sequence[dict] = None, + prob: float = 1.0, + use_cached: bool = False, + max_cached_images: int = 20, + random_pop: bool = True, + max_refetch: int = 15, + ): if use_cached: - assert max_cached_images >= 2, 'The length of cache must >= 2, ' \ - f'but got {max_cached_images}.' + assert max_cached_images >= 2, ( + "The length of cache must >= 2, " f"but got {max_cached_images}." + ) super().__init__( pre_transform=pre_transform, prob=prob, use_cached=use_cached, max_cached_images=max_cached_images, random_pop=random_pop, - max_refetch=max_refetch) + max_refetch=max_refetch, + ) self.alpha = alpha self.beta = beta @@ -854,34 +930,37 @@ def mix_img_transform(self, results: dict) -> dict: Returns: results (dict): Updated result dict. """ - assert 'mix_results' in results + assert "mix_results" in results - retrieve_results = results['mix_results'][0] - retrieve_img = retrieve_results['img'] - ori_img = results['img'] + retrieve_results = results["mix_results"][0] + retrieve_img = retrieve_results["img"] + ori_img = results["img"] assert ori_img.shape == retrieve_img.shape # Randomly obtain the fusion ratio from the beta distribution, # which is around 0.5 ratio = np.random.beta(self.alpha, self.beta) - mixup_img = (ori_img * ratio + retrieve_img * (1 - ratio)) + mixup_img = ori_img * ratio + retrieve_img * (1 - ratio) - retrieve_gt_bboxes = retrieve_results['gt_bboxes'] - retrieve_gt_bboxes_labels = retrieve_results['gt_bboxes_labels'] - retrieve_gt_ignore_flags = retrieve_results['gt_ignore_flags'] + retrieve_gt_bboxes = retrieve_results["gt_bboxes"] + retrieve_gt_bboxes_labels = retrieve_results["gt_bboxes_labels"] + retrieve_gt_ignore_flags = retrieve_results["gt_ignore_flags"] mixup_gt_bboxes = retrieve_gt_bboxes.cat( - (results['gt_bboxes'], retrieve_gt_bboxes), dim=0) + (results["gt_bboxes"], retrieve_gt_bboxes), dim=0 + ) mixup_gt_bboxes_labels = np.concatenate( - (results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0) + (results["gt_bboxes_labels"], retrieve_gt_bboxes_labels), axis=0 + ) mixup_gt_ignore_flags = np.concatenate( - (results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0) + (results["gt_ignore_flags"], retrieve_gt_ignore_flags), axis=0 + ) - results['img'] = mixup_img.astype(np.uint8) - results['img_shape'] = mixup_img.shape - results['gt_bboxes'] = mixup_gt_bboxes - results['gt_bboxes_labels'] = mixup_gt_bboxes_labels - results['gt_ignore_flags'] = mixup_gt_ignore_flags + results["img"] = mixup_img.astype(np.uint8) + results["img_shape"] = mixup_img.shape + results["gt_bboxes"] = mixup_gt_bboxes + results["gt_bboxes_labels"] = mixup_gt_bboxes_labels + results["gt_ignore_flags"] = mixup_gt_ignore_flags return results @@ -960,29 +1039,33 @@ class YOLOXMixUp(BaseMixImageTransform): empty, then the iteration is terminated. Defaults to 15. """ - def __init__(self, - img_scale: Tuple[int, int] = (640, 640), - ratio_range: Tuple[float, float] = (0.5, 1.5), - flip_ratio: float = 0.5, - pad_val: float = 114.0, - bbox_clip_border: bool = True, - pre_transform: Sequence[dict] = None, - prob: float = 1.0, - use_cached: bool = False, - max_cached_images: int = 20, - random_pop: bool = True, - max_refetch: int = 15): + def __init__( + self, + img_scale: Tuple[int, int] = (640, 640), + ratio_range: Tuple[float, float] = (0.5, 1.5), + flip_ratio: float = 0.5, + pad_val: float = 114.0, + bbox_clip_border: bool = True, + pre_transform: Sequence[dict] = None, + prob: float = 1.0, + use_cached: bool = False, + max_cached_images: int = 20, + random_pop: bool = True, + max_refetch: int = 15, + ): assert isinstance(img_scale, tuple) if use_cached: - assert max_cached_images >= 2, 'The length of cache must >= 2, ' \ - f'but got {max_cached_images}.' + assert max_cached_images >= 2, ( + "The length of cache must >= 2, " f"but got {max_cached_images}." + ) super().__init__( pre_transform=pre_transform, prob=prob, use_cached=use_cached, max_cached_images=max_cached_images, random_pop=random_pop, - max_refetch=max_refetch) + max_refetch=max_refetch, + ) self.img_scale = img_scale self.ratio_range = ratio_range self.flip_ratio = flip_ratio @@ -1009,52 +1092,66 @@ def mix_img_transform(self, results: dict) -> dict: Returns: results (dict): Updated result dict. """ - assert 'mix_results' in results - assert len( - results['mix_results']) == 1, 'MixUp only support 2 images now !' + assert "mix_results" in results + assert len(results["mix_results"]) == 1, "MixUp only support 2 images now !" - if results['mix_results'][0]['gt_bboxes'].shape[0] == 0: + if results["mix_results"][0]["gt_bboxes"].shape[0] == 0: # empty bbox return results - retrieve_results = results['mix_results'][0] - retrieve_img = retrieve_results['img'] + retrieve_results = results["mix_results"][0] + retrieve_img = retrieve_results["img"] jit_factor = random.uniform(*self.ratio_range) is_filp = random.uniform(0, 1) > self.flip_ratio if len(retrieve_img.shape) == 3: - out_img = np.ones((self.img_scale[1], self.img_scale[0], 3), - dtype=retrieve_img.dtype) * self.pad_val + out_img = ( + np.ones( + (self.img_scale[1], self.img_scale[0], 3), dtype=retrieve_img.dtype + ) + * self.pad_val + ) else: - out_img = np.ones( - self.img_scale[::-1], dtype=retrieve_img.dtype) * self.pad_val + out_img = ( + np.ones(self.img_scale[::-1], dtype=retrieve_img.dtype) * self.pad_val + ) # 1. keep_ratio resize - scale_ratio = min(self.img_scale[1] / retrieve_img.shape[0], - self.img_scale[0] / retrieve_img.shape[1]) + scale_ratio = min( + self.img_scale[1] / retrieve_img.shape[0], + self.img_scale[0] / retrieve_img.shape[1], + ) retrieve_img = mmcv.imresize( - retrieve_img, (int(retrieve_img.shape[1] * scale_ratio), - int(retrieve_img.shape[0] * scale_ratio))) + retrieve_img, + ( + int(retrieve_img.shape[1] * scale_ratio), + int(retrieve_img.shape[0] * scale_ratio), + ), + ) # 2. paste - out_img[:retrieve_img.shape[0], :retrieve_img.shape[1]] = retrieve_img + out_img[: retrieve_img.shape[0], : retrieve_img.shape[1]] = retrieve_img # 3. scale jit scale_ratio *= jit_factor - out_img = mmcv.imresize(out_img, (int(out_img.shape[1] * jit_factor), - int(out_img.shape[0] * jit_factor))) + out_img = mmcv.imresize( + out_img, + (int(out_img.shape[1] * jit_factor), int(out_img.shape[0] * jit_factor)), + ) # 4. flip if is_filp: out_img = out_img[:, ::-1, :] # 5. random crop - ori_img = results['img'] + ori_img = results["img"] origin_h, origin_w = out_img.shape[:2] target_h, target_w = ori_img.shape[:2] - padded_img = np.ones((max(origin_h, target_h), max( - origin_w, target_w), 3)) * self.pad_val + padded_img = ( + np.ones((max(origin_h, target_h), max(origin_w, target_w), 3)) + * self.pad_val + ) padded_img = padded_img.astype(np.uint8) padded_img[:origin_h, :origin_w] = out_img @@ -1063,18 +1160,18 @@ def mix_img_transform(self, results: dict) -> dict: y_offset = random.randint(0, padded_img.shape[0] - target_h) if padded_img.shape[1] > target_w: x_offset = random.randint(0, padded_img.shape[1] - target_w) - padded_cropped_img = padded_img[y_offset:y_offset + target_h, - x_offset:x_offset + target_w] + padded_cropped_img = padded_img[ + y_offset : y_offset + target_h, x_offset : x_offset + target_w + ] # 6. adjust bbox - retrieve_gt_bboxes = retrieve_results['gt_bboxes'] + retrieve_gt_bboxes = retrieve_results["gt_bboxes"] retrieve_gt_bboxes.rescale_([scale_ratio, scale_ratio]) if self.bbox_clip_border: retrieve_gt_bboxes.clip_([origin_h, origin_w]) if is_filp: - retrieve_gt_bboxes.flip_([origin_h, origin_w], - direction='horizontal') + retrieve_gt_bboxes.flip_([origin_h, origin_w], direction="horizontal") # 7. filter cp_retrieve_gt_bboxes = retrieve_gt_bboxes.clone() @@ -1085,38 +1182,40 @@ def mix_img_transform(self, results: dict) -> dict: # 8. mix up mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img - retrieve_gt_bboxes_labels = retrieve_results['gt_bboxes_labels'] - retrieve_gt_ignore_flags = retrieve_results['gt_ignore_flags'] + retrieve_gt_bboxes_labels = retrieve_results["gt_bboxes_labels"] + retrieve_gt_ignore_flags = retrieve_results["gt_ignore_flags"] mixup_gt_bboxes = cp_retrieve_gt_bboxes.cat( - (results['gt_bboxes'], cp_retrieve_gt_bboxes), dim=0) + (results["gt_bboxes"], cp_retrieve_gt_bboxes), dim=0 + ) mixup_gt_bboxes_labels = np.concatenate( - (results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0) + (results["gt_bboxes_labels"], retrieve_gt_bboxes_labels), axis=0 + ) mixup_gt_ignore_flags = np.concatenate( - (results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0) + (results["gt_ignore_flags"], retrieve_gt_ignore_flags), axis=0 + ) if not self.bbox_clip_border: # remove outside bbox - inside_inds = mixup_gt_bboxes.is_inside([target_h, - target_w]).numpy() + inside_inds = mixup_gt_bboxes.is_inside([target_h, target_w]).numpy() mixup_gt_bboxes = mixup_gt_bboxes[inside_inds] mixup_gt_bboxes_labels = mixup_gt_bboxes_labels[inside_inds] mixup_gt_ignore_flags = mixup_gt_ignore_flags[inside_inds] - results['img'] = mixup_img.astype(np.uint8) - results['img_shape'] = mixup_img.shape - results['gt_bboxes'] = mixup_gt_bboxes - results['gt_bboxes_labels'] = mixup_gt_bboxes_labels - results['gt_ignore_flags'] = mixup_gt_ignore_flags + results["img"] = mixup_img.astype(np.uint8) + results["img_shape"] = mixup_img.shape + results["gt_bboxes"] = mixup_gt_bboxes + results["gt_bboxes_labels"] = mixup_gt_bboxes_labels + results["gt_ignore_flags"] = mixup_gt_ignore_flags return results def __repr__(self) -> str: repr_str = self.__class__.__name__ - repr_str += f'(img_scale={self.img_scale}, ' - repr_str += f'ratio_range={self.ratio_range}, ' - repr_str += f'flip_ratio={self.flip_ratio}, ' - repr_str += f'pad_val={self.pad_val}, ' - repr_str += f'max_refetch={self.max_refetch}, ' - repr_str += f'bbox_clip_border={self.bbox_clip_border})' + repr_str += f"(img_scale={self.img_scale}, " + repr_str += f"ratio_range={self.ratio_range}, " + repr_str += f"flip_ratio={self.flip_ratio}, " + repr_str += f"pad_val={self.pad_val}, " + repr_str += f"max_refetch={self.max_refetch}, " + repr_str += f"bbox_clip_border={self.bbox_clip_border})" return repr_str diff --git a/mmyolo/mmyolo/datasets/transforms/transforms.py b/mmyolo/mmyolo/datasets/transforms/transforms.py index 720f7756..b3ec325a 100644 --- a/mmyolo/mmyolo/datasets/transforms/transforms.py +++ b/mmyolo/mmyolo/datasets/transforms/transforms.py @@ -10,8 +10,7 @@ from mmcv.transforms.utils import cache_randomness from mmdet.datasets.transforms import LoadAnnotations as MMDET_LoadAnnotations from mmdet.datasets.transforms import Resize as MMDET_Resize -from mmdet.structures.bbox import (HorizontalBoxes, autocast_box_type, - get_box_type) +from mmdet.structures.bbox import HorizontalBoxes, autocast_box_type, get_box_type from numpy import random from mmyolo.registry import TRANSFORMS @@ -44,16 +43,16 @@ class YOLOv5KeepRatioResize(MMDET_Resize): scale (Union[int, Tuple[int, int]]): Images scales for resizing. """ - def __init__(self, - scale: Union[int, Tuple[int, int]], - keep_ratio: bool = True, - **kwargs): + def __init__( + self, scale: Union[int, Tuple[int, int]], keep_ratio: bool = True, **kwargs + ): assert keep_ratio is True super().__init__(scale=scale, keep_ratio=True, **kwargs) @staticmethod - def _get_rescale_ratio(old_size: Tuple[int, int], - scale: Union[float, Tuple[int]]) -> float: + def _get_rescale_ratio( + old_size: Tuple[int, int], scale: Union[float, Tuple[int]] + ) -> float: """Calculate the ratio for rescaling. Args: @@ -70,16 +69,16 @@ def _get_rescale_ratio(old_size: Tuple[int, int], w, h = old_size if isinstance(scale, (float, int)): if scale <= 0: - raise ValueError(f'Invalid scale {scale}, must be positive.') + raise ValueError(f"Invalid scale {scale}, must be positive.") scale_factor = scale elif isinstance(scale, tuple): max_long_edge = max(scale) max_short_edge = min(scale) - scale_factor = min(max_long_edge / max(h, w), - max_short_edge / min(h, w)) + scale_factor = min(max_long_edge / max(h, w), max_short_edge / min(h, w)) else: - raise TypeError('Scale must be a number or tuple of int, ' - f'but got {type(scale)}') + raise TypeError( + "Scale must be a number or tuple of int, " f"but got {type(scale)}" + ) return scale_factor @@ -87,28 +86,28 @@ def _resize_img(self, results: dict): """Resize images with ``results['scale']``.""" assert self.keep_ratio is True - if results.get('img', None) is not None: - image = results['img'] + if results.get("img", None) is not None: + image = results["img"] original_h, original_w = image.shape[:2] - ratio = self._get_rescale_ratio((original_h, original_w), - self.scale) + ratio = self._get_rescale_ratio((original_h, original_w), self.scale) if ratio != 1: # resize image according to the ratio image = mmcv.imrescale( img=image, scale=ratio, - interpolation='area' if ratio < 1 else 'bilinear', - backend=self.backend) + interpolation="area" if ratio < 1 else "bilinear", + backend=self.backend, + ) resized_h, resized_w = image.shape[:2] scale_ratio = resized_h / original_h scale_factor = (scale_ratio, scale_ratio) - results['img'] = image - results['img_shape'] = image.shape[:2] - results['scale_factor'] = scale_factor + results["img"] = image + results["img_shape"] = image.shape[:2] + results["scale_factor"] = scale_factor @TRANSFORMS.register_module() @@ -139,20 +138,23 @@ class LetterResize(MMDET_Resize): allow_scale_up (bool): Allow scale up when ratio > 1. Defaults to True """ - def __init__(self, - scale: Union[int, Tuple[int, int]], - pad_val: dict = dict(img=0, mask=0, seg=255), - use_mini_pad: bool = False, - stretch_only: bool = False, - allow_scale_up: bool = True, - **kwargs): + def __init__( + self, + scale: Union[int, Tuple[int, int]], + pad_val: dict = dict(img=0, mask=0, seg=255), + use_mini_pad: bool = False, + stretch_only: bool = False, + allow_scale_up: bool = True, + **kwargs, + ): super().__init__(scale=scale, keep_ratio=True, **kwargs) self.pad_val = pad_val if isinstance(pad_val, (int, float)): pad_val = dict(img=pad_val, seg=255) assert isinstance( - pad_val, dict), f'pad_val must be dict, but got {type(pad_val)}' + pad_val, dict + ), f"pad_val must be dict, but got {type(pad_val)}" self.use_mini_pad = use_mini_pad self.stretch_only = stretch_only @@ -160,13 +162,13 @@ def __init__(self, def _resize_img(self, results: dict): """Resize images with ``results['scale']``.""" - image = results.get('img', None) + image = results.get("img", None) if image is None: return # Use batch_shape if a batch_shape policy is configured - if 'batch_shape' in results: - scale = tuple(results['batch_shape']) # hw + if "batch_shape" in results: + scale = tuple(results["batch_shape"]) # hw else: scale = self.scale[::-1] # wh -> hw @@ -182,13 +184,13 @@ def _resize_img(self, results: dict): ratio = [ratio, ratio] # float -> (float, float) for (height, width) # compute the best size of the image - no_pad_shape = (int(round(image_shape[0] * ratio[0])), - int(round(image_shape[1] * ratio[1]))) + no_pad_shape = ( + int(round(image_shape[0] * ratio[0])), + int(round(image_shape[1] * ratio[1])), + ) # padding height & width - padding_h, padding_w = [ - scale[0] - no_pad_shape[0], scale[1] - no_pad_shape[1] - ] + padding_h, padding_w = [scale[0] - no_pad_shape[0], scale[1] - no_pad_shape[1]] if self.use_mini_pad: # minimum rectangle padding padding_w, padding_h = np.mod(padding_w, 32), np.mod(padding_h, 32) @@ -197,109 +199,126 @@ def _resize_img(self, results: dict): # stretch to the specified size directly padding_h, padding_w = 0.0, 0.0 no_pad_shape = (scale[0], scale[1]) - ratio = [scale[0] / image_shape[0], - scale[1] / image_shape[1]] # height, width ratios + ratio = [ + scale[0] / image_shape[0], + scale[1] / image_shape[1], + ] # height, width ratios if image_shape != no_pad_shape: # compare with no resize and padding size image = mmcv.imresize( - image, (no_pad_shape[1], no_pad_shape[0]), + image, + (no_pad_shape[1], no_pad_shape[0]), interpolation=self.interpolation, - backend=self.backend) + backend=self.backend, + ) scale_factor = (ratio[1], ratio[0]) # mmcv scale factor is (w, h) - if 'scale_factor' in results: - results['scale_factor_origin'] = results['scale_factor'] - results['scale_factor'] = scale_factor + if "scale_factor" in results: + results["scale_factor_origin"] = results["scale_factor"] + results["scale_factor"] = scale_factor # padding top_padding, left_padding = int(round(padding_h // 2 - 0.1)), int( - round(padding_w // 2 - 0.1)) + round(padding_w // 2 - 0.1) + ) bottom_padding = padding_h - top_padding right_padding = padding_w - left_padding - padding_list = [ - top_padding, bottom_padding, left_padding, right_padding - ] - if top_padding != 0 or bottom_padding != 0 or \ - left_padding != 0 or right_padding != 0: + padding_list = [top_padding, bottom_padding, left_padding, right_padding] + if ( + top_padding != 0 + or bottom_padding != 0 + or left_padding != 0 + or right_padding != 0 + ): - pad_val = self.pad_val.get('img', 0) + pad_val = self.pad_val.get("img", 0) if isinstance(pad_val, int) and image.ndim == 3: pad_val = tuple(pad_val for _ in range(image.shape[2])) image = mmcv.impad( img=image, - padding=(padding_list[2], padding_list[0], padding_list[3], - padding_list[1]), + padding=( + padding_list[2], + padding_list[0], + padding_list[3], + padding_list[1], + ), pad_val=pad_val, - padding_mode='constant') + padding_mode="constant", + ) - results['img'] = image - results['img_shape'] = image.shape - if 'pad_param' in results: - results['pad_param_origin'] = results['pad_param'] * \ - np.repeat(ratio, 2) - results['pad_param'] = np.array(padding_list, dtype=np.float32) + results["img"] = image + results["img_shape"] = image.shape + if "pad_param" in results: + results["pad_param_origin"] = results["pad_param"] * np.repeat(ratio, 2) + results["pad_param"] = np.array(padding_list, dtype=np.float32) def _resize_masks(self, results: dict): """Resize masks with ``results['scale']``""" - if results.get('gt_masks', None) is None: + if results.get("gt_masks", None) is None: return # resize the gt_masks - gt_mask_height = results['gt_masks'].height * \ - results['scale_factor'][1] - gt_mask_width = results['gt_masks'].width * \ - results['scale_factor'][0] - gt_masks = results['gt_masks'].resize( - (int(round(gt_mask_height)), int(round(gt_mask_width)))) + gt_mask_height = results["gt_masks"].height * results["scale_factor"][1] + gt_mask_width = results["gt_masks"].width * results["scale_factor"][0] + gt_masks = results["gt_masks"].resize( + (int(round(gt_mask_height)), int(round(gt_mask_width))) + ) # padding the gt_masks if len(gt_masks) == 0: - padded_masks = np.empty((0, *results['img_shape'][:2]), - dtype=np.uint8) + padded_masks = np.empty((0, *results["img_shape"][:2]), dtype=np.uint8) else: # TODO: The function is incorrect. Because the mask may not # be able to pad. - padded_masks = np.stack([ - mmcv.impad( - mask, - padding=(int(results['pad_param'][2]), - int(results['pad_param'][0]), - int(results['pad_param'][3]), - int(results['pad_param'][1])), - pad_val=self.pad_val.get('masks', 0)) for mask in gt_masks - ]) - results['gt_masks'] = type(results['gt_masks'])( - padded_masks, *results['img_shape'][:2]) + padded_masks = np.stack( + [ + mmcv.impad( + mask, + padding=( + int(results["pad_param"][2]), + int(results["pad_param"][0]), + int(results["pad_param"][3]), + int(results["pad_param"][1]), + ), + pad_val=self.pad_val.get("masks", 0), + ) + for mask in gt_masks + ] + ) + results["gt_masks"] = type(results["gt_masks"])( + padded_masks, *results["img_shape"][:2] + ) def _resize_bboxes(self, results: dict): """Resize bounding boxes with ``results['scale_factor']``.""" - if results.get('gt_bboxes', None) is None: + if results.get("gt_bboxes", None) is None: return - results['gt_bboxes'].rescale_(results['scale_factor']) + results["gt_bboxes"].rescale_(results["scale_factor"]) - if len(results['pad_param']) != 4: + if len(results["pad_param"]) != 4: return - results['gt_bboxes'].translate_( - (results['pad_param'][2], results['pad_param'][0])) + results["gt_bboxes"].translate_( + (results["pad_param"][2], results["pad_param"][0]) + ) if self.clip_object_border: - results['gt_bboxes'].clip_(results['img_shape']) + results["gt_bboxes"].clip_(results["img_shape"]) def transform(self, results: dict) -> dict: results = super().transform(results) - if 'scale_factor_origin' in results: - scale_factor_origin = results.pop('scale_factor_origin') - results['scale_factor'] = (results['scale_factor'][0] * - scale_factor_origin[0], - results['scale_factor'][1] * - scale_factor_origin[1]) - if 'pad_param_origin' in results: - pad_param_origin = results.pop('pad_param_origin') - results['pad_param'] += pad_param_origin + if "scale_factor_origin" in results: + scale_factor_origin = results.pop("scale_factor_origin") + results["scale_factor"] = ( + results["scale_factor"][0] * scale_factor_origin[0], + results["scale_factor"][1] * scale_factor_origin[1], + ) + if "pad_param_origin" in results: + pad_param_origin = results.pop("pad_param_origin") + results["pad_param"] += pad_param_origin return results @@ -322,10 +341,12 @@ class YOLOv5HSVRandomAug(BaseTransform): value_delta ([int, float]): delta of value. Defaults to 0.4. """ - def __init__(self, - hue_delta: Union[int, float] = 0.015, - saturation_delta: Union[int, float] = 0.7, - value_delta: Union[int, float] = 0.4): + def __init__( + self, + hue_delta: Union[int, float] = 0.015, + saturation_delta: Union[int, float] = 0.7, + value_delta: Union[int, float] = 0.4, + ): self.hue_delta = hue_delta self.saturation_delta = saturation_delta self.value_delta = value_delta @@ -339,11 +360,12 @@ def transform(self, results: dict) -> dict: Returns: dict: The result dict. """ - hsv_gains = \ - random.uniform(-1, 1, 3) * \ - [self.hue_delta, self.saturation_delta, self.value_delta] + 1 - hue, sat, val = cv2.split( - cv2.cvtColor(results['img'], cv2.COLOR_BGR2HSV)) + hsv_gains = ( + random.uniform(-1, 1, 3) + * [self.hue_delta, self.saturation_delta, self.value_delta] + + 1 + ) + hue, sat, val = cv2.split(cv2.cvtColor(results["img"], cv2.COLOR_BGR2HSV)) table_list = np.arange(0, 256, dtype=hsv_gains.dtype) lut_hue = ((table_list * hsv_gains[0]) % 180).astype(np.uint8) @@ -351,9 +373,9 @@ def transform(self, results: dict) -> dict: lut_val = np.clip(table_list * hsv_gains[2], 0, 255).astype(np.uint8) im_hsv = cv2.merge( - (cv2.LUT(hue, lut_hue), cv2.LUT(sat, - lut_sat), cv2.LUT(val, lut_val))) - results['img'] = cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR) + (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)) + ) + results["img"] = cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR) return results @@ -377,18 +399,19 @@ def _load_bboxes(self, results: dict): """ gt_bboxes = [] gt_ignore_flags = [] - for instance in results.get('instances', []): - if instance['ignore_flag'] == 0: - gt_bboxes.append(instance['bbox']) - gt_ignore_flags.append(instance['ignore_flag']) - results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool) + for instance in results.get("instances", []): + if instance["ignore_flag"] == 0: + gt_bboxes.append(instance["bbox"]) + gt_ignore_flags.append(instance["ignore_flag"]) + results["gt_ignore_flags"] = np.array(gt_ignore_flags, dtype=bool) if self.box_type is None: - results['gt_bboxes'] = np.array( - gt_bboxes, dtype=np.float32).reshape((-1, 4)) + results["gt_bboxes"] = np.array(gt_bboxes, dtype=np.float32).reshape( + (-1, 4) + ) else: _, box_type_cls = get_box_type(self.box_type) - results['gt_bboxes'] = box_type_cls(gt_bboxes, dtype=torch.float32) + results["gt_bboxes"] = box_type_cls(gt_bboxes, dtype=torch.float32) def _load_labels(self, results: dict): """Private function to load label annotations. @@ -402,11 +425,10 @@ def _load_labels(self, results: dict): dict: The dict contains loaded label annotations. """ gt_bboxes_labels = [] - for instance in results.get('instances', []): - if instance['ignore_flag'] == 0: - gt_bboxes_labels.append(instance['bbox_label']) - results['gt_bboxes_labels'] = np.array( - gt_bboxes_labels, dtype=np.int64) + for instance in results.get("instances", []): + if instance["ignore_flag"] == 0: + gt_bboxes_labels.append(instance["bbox_label"]) + results["gt_bboxes_labels"] = np.array(gt_bboxes_labels, dtype=np.int64) @TRANSFORMS.register_module() @@ -452,17 +474,19 @@ class YOLOv5RandomAffine(BaseTransform): need to clip the gt bboxes in these cases. Defaults to True. """ - def __init__(self, - max_rotate_degree: float = 10.0, - max_translate_ratio: float = 0.1, - scaling_ratio_range: Tuple[float, float] = (0.5, 1.5), - max_shear_degree: float = 2.0, - border: Tuple[int, int] = (0, 0), - border_val: Tuple[int, int, int] = (114, 114, 114), - bbox_clip_border: bool = True, - min_bbox_size: int = 2, - min_area_ratio: float = 0.1, - max_aspect_ratio: int = 20): + def __init__( + self, + max_rotate_degree: float = 10.0, + max_translate_ratio: float = 0.1, + scaling_ratio_range: Tuple[float, float] = (0.5, 1.5), + max_shear_degree: float = 2.0, + border: Tuple[int, int] = (0, 0), + border_val: Tuple[int, int, int] = (114, 114, 114), + bbox_clip_border: bool = True, + min_bbox_size: int = 2, + min_area_ratio: float = 0.1, + max_aspect_ratio: int = 20, + ): assert 0 <= max_translate_ratio <= 1 assert scaling_ratio_range[0] <= scaling_ratio_range[1] assert scaling_ratio_range[0] > 0 @@ -479,8 +503,9 @@ def __init__(self, self.max_aspect_ratio = max_aspect_ratio @cache_randomness - def _get_random_homography_matrix(self, height: int, - width: int) -> Tuple[np.ndarray, float]: + def _get_random_homography_matrix( + self, height: int, width: int + ) -> Tuple[np.ndarray, float]: """Get random homography matrix. Args: @@ -492,30 +517,37 @@ def _get_random_homography_matrix(self, height: int, scaling_ratio. """ # Rotation - rotation_degree = random.uniform(-self.max_rotate_degree, - self.max_rotate_degree) + rotation_degree = random.uniform( + -self.max_rotate_degree, self.max_rotate_degree + ) rotation_matrix = self._get_rotation_matrix(rotation_degree) # Scaling - scaling_ratio = random.uniform(self.scaling_ratio_range[0], - self.scaling_ratio_range[1]) + scaling_ratio = random.uniform( + self.scaling_ratio_range[0], self.scaling_ratio_range[1] + ) scaling_matrix = self._get_scaling_matrix(scaling_ratio) # Shear - x_degree = random.uniform(-self.max_shear_degree, - self.max_shear_degree) - y_degree = random.uniform(-self.max_shear_degree, - self.max_shear_degree) + x_degree = random.uniform(-self.max_shear_degree, self.max_shear_degree) + y_degree = random.uniform(-self.max_shear_degree, self.max_shear_degree) shear_matrix = self._get_shear_matrix(x_degree, y_degree) # Translation - trans_x = random.uniform(0.5 - self.max_translate_ratio, - 0.5 + self.max_translate_ratio) * width - trans_y = random.uniform(0.5 - self.max_translate_ratio, - 0.5 + self.max_translate_ratio) * height + trans_x = ( + random.uniform( + 0.5 - self.max_translate_ratio, 0.5 + self.max_translate_ratio + ) + * width + ) + trans_y = ( + random.uniform( + 0.5 - self.max_translate_ratio, 0.5 + self.max_translate_ratio + ) + * height + ) translate_matrix = self._get_translation_matrix(trans_x, trans_y) - warp_matrix = ( - translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix) + warp_matrix = translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix return warp_matrix, scaling_ratio @autocast_box_type() @@ -528,7 +560,7 @@ def transform(self, results: dict) -> dict: Returns: dict: The result dict. """ - img = results['img'] + img = results["img"] # self.border is wh format height = img.shape[0] + self.border[1] * 2 width = img.shape[1] + self.border[0] * 2 @@ -538,19 +570,16 @@ def transform(self, results: dict) -> dict: center_matrix[0, 2] = -img.shape[1] / 2 center_matrix[1, 2] = -img.shape[0] / 2 - warp_matrix, scaling_ratio = self._get_random_homography_matrix( - height, width) + warp_matrix, scaling_ratio = self._get_random_homography_matrix(height, width) warp_matrix = warp_matrix @ center_matrix img = cv2.warpPerspective( - img, - warp_matrix, - dsize=(width, height), - borderValue=self.border_val) - results['img'] = img - results['img_shape'] = img.shape - - bboxes = results['gt_bboxes'] + img, warp_matrix, dsize=(width, height), borderValue=self.border_val + ) + results["img"] = img + results["img_shape"] = img.shape + + bboxes = results["gt_bboxes"] num_bboxes = len(bboxes) if num_bboxes: orig_bboxes = bboxes.clone() @@ -565,18 +594,17 @@ def transform(self, results: dict) -> dict: # Be careful: valid_index must convert to numpy, # otherwise it will raise out of bounds when len(valid_index)=1 valid_index = self.filter_gt_bboxes(orig_bboxes, bboxes).numpy() - results['gt_bboxes'] = bboxes[valid_index] - results['gt_bboxes_labels'] = results['gt_bboxes_labels'][ - valid_index] - results['gt_ignore_flags'] = results['gt_ignore_flags'][ - valid_index] - - if 'gt_masks' in results: - raise NotImplementedError('RandomAffine only supports bbox.') + results["gt_bboxes"] = bboxes[valid_index] + results["gt_bboxes_labels"] = results["gt_bboxes_labels"][valid_index] + results["gt_ignore_flags"] = results["gt_ignore_flags"][valid_index] + + if "gt_masks" in results: + raise NotImplementedError("RandomAffine only supports bbox.") return results - def filter_gt_bboxes(self, origin_bboxes: HorizontalBoxes, - wrapped_bboxes: HorizontalBoxes) -> torch.Tensor: + def filter_gt_bboxes( + self, origin_bboxes: HorizontalBoxes, wrapped_bboxes: HorizontalBoxes + ) -> torch.Tensor: """Filter gt bboxes. Args: @@ -590,25 +618,28 @@ def filter_gt_bboxes(self, origin_bboxes: HorizontalBoxes, origin_h = origin_bboxes.heights wrapped_w = wrapped_bboxes.widths wrapped_h = wrapped_bboxes.heights - aspect_ratio = np.maximum(wrapped_w / (wrapped_h + 1e-16), - wrapped_h / (wrapped_w + 1e-16)) - - wh_valid_idx = (wrapped_w > self.min_bbox_size) & \ - (wrapped_h > self.min_bbox_size) - area_valid_idx = wrapped_w * wrapped_h / (origin_w * origin_h + - 1e-16) > self.min_area_ratio + aspect_ratio = np.maximum( + wrapped_w / (wrapped_h + 1e-16), wrapped_h / (wrapped_w + 1e-16) + ) + + wh_valid_idx = (wrapped_w > self.min_bbox_size) & ( + wrapped_h > self.min_bbox_size + ) + area_valid_idx = ( + wrapped_w * wrapped_h / (origin_w * origin_h + 1e-16) > self.min_area_ratio + ) aspect_ratio_valid_idx = aspect_ratio < self.max_aspect_ratio return wh_valid_idx & area_valid_idx & aspect_ratio_valid_idx def __repr__(self) -> str: repr_str = self.__class__.__name__ - repr_str += f'(max_rotate_degree={self.max_rotate_degree}, ' - repr_str += f'max_translate_ratio={self.max_translate_ratio}, ' - repr_str += f'scaling_ratio_range={self.scaling_ratio_range}, ' - repr_str += f'max_shear_degree={self.max_shear_degree}, ' - repr_str += f'border={self.border}, ' - repr_str += f'border_val={self.border_val}, ' - repr_str += f'bbox_clip_border={self.bbox_clip_border})' + repr_str += f"(max_rotate_degree={self.max_rotate_degree}, " + repr_str += f"max_translate_ratio={self.max_translate_ratio}, " + repr_str += f"scaling_ratio_range={self.scaling_ratio_range}, " + repr_str += f"max_shear_degree={self.max_shear_degree}, " + repr_str += f"border={self.border}, " + repr_str += f"border_val={self.border_val}, " + repr_str += f"bbox_clip_border={self.bbox_clip_border})" return repr_str @staticmethod @@ -623,9 +654,13 @@ def _get_rotation_matrix(rotate_degrees: float) -> np.ndarray: """ radian = math.radians(rotate_degrees) rotation_matrix = np.array( - [[np.cos(radian), -np.sin(radian), 0.], - [np.sin(radian), np.cos(radian), 0.], [0., 0., 1.]], - dtype=np.float32) + [ + [np.cos(radian), -np.sin(radian), 0.0], + [np.sin(radian), np.cos(radian), 0.0], + [0.0, 0.0, 1.0], + ], + dtype=np.float32, + ) return rotation_matrix @staticmethod @@ -639,13 +674,13 @@ def _get_scaling_matrix(scale_ratio: float) -> np.ndarray: np.ndarray: The scaling matrix. """ scaling_matrix = np.array( - [[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]], - dtype=np.float32) + [[scale_ratio, 0.0, 0.0], [0.0, scale_ratio, 0.0], [0.0, 0.0, 1.0]], + dtype=np.float32, + ) return scaling_matrix @staticmethod - def _get_shear_matrix(x_shear_degrees: float, - y_shear_degrees: float) -> np.ndarray: + def _get_shear_matrix(x_shear_degrees: float, y_shear_degrees: float) -> np.ndarray: """Get shear matrix. Args: @@ -657,9 +692,10 @@ def _get_shear_matrix(x_shear_degrees: float, """ x_radian = math.radians(x_shear_degrees) y_radian = math.radians(y_shear_degrees) - shear_matrix = np.array([[1, np.tan(x_radian), 0.], - [np.tan(y_radian), 1, 0.], [0., 0., 1.]], - dtype=np.float32) + shear_matrix = np.array( + [[1, np.tan(x_radian), 0.0], [np.tan(y_radian), 1, 0.0], [0.0, 0.0, 1.0]], + dtype=np.float32, + ) return shear_matrix @staticmethod @@ -673,8 +709,9 @@ def _get_translation_matrix(x: float, y: float) -> np.ndarray: Returns: np.ndarray: The translation matrix. """ - translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]], - dtype=np.float32) + translation_matrix = np.array( + [[1, 0.0, x], [0.0, 1, y], [0.0, 0.0, 1.0]], dtype=np.float32 + ) return translation_matrix @@ -703,81 +740,82 @@ class PPYOLOERandomDistort(BaseTransform): to 4. """ - def __init__(self, - hue_cfg: dict = dict(min=-18, max=18, prob=0.5), - saturation_cfg: dict = dict(min=0.5, max=1.5, prob=0.5), - contrast_cfg: dict = dict(min=0.5, max=1.5, prob=0.5), - brightness_cfg: dict = dict(min=0.5, max=1.5, prob=0.5), - num_distort_func: int = 4): + def __init__( + self, + hue_cfg: dict = dict(min=-18, max=18, prob=0.5), + saturation_cfg: dict = dict(min=0.5, max=1.5, prob=0.5), + contrast_cfg: dict = dict(min=0.5, max=1.5, prob=0.5), + brightness_cfg: dict = dict(min=0.5, max=1.5, prob=0.5), + num_distort_func: int = 4, + ): self.hue_cfg = hue_cfg self.saturation_cfg = saturation_cfg self.contrast_cfg = contrast_cfg self.brightness_cfg = brightness_cfg self.num_distort_func = num_distort_func - assert 0 < self.num_distort_func <= 4,\ - 'num_distort_func must > 0 and <= 4' + assert 0 < self.num_distort_func <= 4, "num_distort_func must > 0 and <= 4" for cfg in [ - self.hue_cfg, self.saturation_cfg, self.contrast_cfg, - self.brightness_cfg + self.hue_cfg, + self.saturation_cfg, + self.contrast_cfg, + self.brightness_cfg, ]: - assert 0. <= cfg['prob'] <= 1., 'prob must >=0 and <=1' + assert 0.0 <= cfg["prob"] <= 1.0, "prob must >=0 and <=1" def transform_hue(self, results): """Transform hue randomly.""" - if random.uniform(0., 1.) >= self.hue_cfg['prob']: + if random.uniform(0.0, 1.0) >= self.hue_cfg["prob"]: return results - img = results['img'] - delta = random.uniform(self.hue_cfg['min'], self.hue_cfg['max']) + img = results["img"] + delta = random.uniform(self.hue_cfg["min"], self.hue_cfg["max"]) u = np.cos(delta * np.pi) w = np.sin(delta * np.pi) delta_iq = np.array([[1.0, 0.0, 0.0], [0.0, u, -w], [0.0, w, u]]) - rgb2yiq_matrix = np.array([[0.114, 0.587, 0.299], - [-0.321, -0.274, 0.596], - [0.311, -0.523, 0.211]]) - yiq2rgb_matric = np.array([[1.0, -1.107, 1.705], [1.0, -0.272, -0.647], - [1.0, 0.956, 0.621]]) + rgb2yiq_matrix = np.array( + [[0.114, 0.587, 0.299], [-0.321, -0.274, 0.596], [0.311, -0.523, 0.211]] + ) + yiq2rgb_matric = np.array( + [[1.0, -1.107, 1.705], [1.0, -0.272, -0.647], [1.0, 0.956, 0.621]] + ) t = np.dot(np.dot(yiq2rgb_matric, delta_iq), rgb2yiq_matrix).T img = np.dot(img, t) - results['img'] = img + results["img"] = img return results def transform_saturation(self, results): """Transform saturation randomly.""" - if random.uniform(0., 1.) >= self.saturation_cfg['prob']: + if random.uniform(0.0, 1.0) >= self.saturation_cfg["prob"]: return results - img = results['img'] - delta = random.uniform(self.saturation_cfg['min'], - self.saturation_cfg['max']) + img = results["img"] + delta = random.uniform(self.saturation_cfg["min"], self.saturation_cfg["max"]) # convert bgr img to gray img gray = img * np.array([[[0.114, 0.587, 0.299]]], dtype=np.float32) gray = gray.sum(axis=2, keepdims=True) - gray *= (1.0 - delta) + gray *= 1.0 - delta img *= delta img += gray - results['img'] = img + results["img"] = img return results def transform_contrast(self, results): """Transform contrast randomly.""" - if random.uniform(0., 1.) >= self.contrast_cfg['prob']: + if random.uniform(0.0, 1.0) >= self.contrast_cfg["prob"]: return results - img = results['img'] - delta = random.uniform(self.contrast_cfg['min'], - self.contrast_cfg['max']) + img = results["img"] + delta = random.uniform(self.contrast_cfg["min"], self.contrast_cfg["max"]) img *= delta - results['img'] = img + results["img"] = img return results def transform_brightness(self, results): """Transform brightness randomly.""" - if random.uniform(0., 1.) >= self.brightness_cfg['prob']: + if random.uniform(0.0, 1.0) >= self.brightness_cfg["prob"]: return results - img = results['img'] - delta = random.uniform(self.brightness_cfg['min'], - self.brightness_cfg['max']) + img = results["img"] + delta = random.uniform(self.brightness_cfg["min"], self.brightness_cfg["max"]) img += delta - results['img'] = img + results["img"] = img return results def transform(self, results: dict) -> dict: @@ -789,13 +827,15 @@ def transform(self, results: dict) -> dict: Returns: dict: The result dict. """ - results['img'] = results['img'].astype(np.float32) + results["img"] = results["img"].astype(np.float32) functions = [ - self.transform_brightness, self.transform_contrast, - self.transform_saturation, self.transform_hue + self.transform_brightness, + self.transform_contrast, + self.transform_saturation, + self.transform_hue, ] - distortions = random.permutation(functions)[:self.num_distort_func] + distortions = random.permutation(functions)[: self.num_distort_func] for func in distortions: results = func(results) return results @@ -840,13 +880,15 @@ class PPYOLOERandomCrop(BaseTransform): Default to False. """ - def __init__(self, - aspect_ratio: List[float] = [.5, 2.], - thresholds: List[float] = [.0, .1, .3, .5, .7, .9], - scaling: List[float] = [.3, 1.], - num_attempts: int = 50, - allow_no_crop: bool = True, - cover_all_box: bool = False): + def __init__( + self, + aspect_ratio: List[float] = [0.5, 2.0], + thresholds: List[float] = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9], + scaling: List[float] = [0.3, 1.0], + num_attempts: int = 50, + allow_no_crop: bool = True, + cover_all_box: bool = False, + ): self.aspect_ratio = aspect_ratio self.thresholds = thresholds self.scaling = scaling @@ -854,8 +896,9 @@ def __init__(self, self.allow_no_crop = allow_no_crop self.cover_all_box = cover_all_box - def _crop_data(self, results: dict, crop_box: Tuple[int, int, int, int], - valid_inds: np.ndarray) -> Union[dict, None]: + def _crop_data( + self, results: dict, crop_box: Tuple[int, int, int, int], valid_inds: np.ndarray + ) -> Union[dict, None]: """Function to randomly crop images, bounding boxes, masks, semantic segmentation maps. @@ -872,38 +915,37 @@ def _crop_data(self, results: dict, crop_box: Tuple[int, int, int, int], be returned when there is no valid bbox after cropping. """ # crop the image - img = results['img'] + img = results["img"] crop_x1, crop_y1, crop_x2, crop_y2 = crop_box img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] - results['img'] = img + results["img"] = img img_shape = img.shape - results['img_shape'] = img.shape + results["img_shape"] = img.shape # crop bboxes accordingly and clip to the image boundary - if results.get('gt_bboxes', None) is not None: - bboxes = results['gt_bboxes'] + if results.get("gt_bboxes", None) is not None: + bboxes = results["gt_bboxes"] bboxes.translate_([-crop_x1, -crop_y1]) bboxes.clip_(img_shape[:2]) - results['gt_bboxes'] = bboxes[valid_inds] + results["gt_bboxes"] = bboxes[valid_inds] - if results.get('gt_ignore_flags', None) is not None: - results['gt_ignore_flags'] = \ - results['gt_ignore_flags'][valid_inds] + if results.get("gt_ignore_flags", None) is not None: + results["gt_ignore_flags"] = results["gt_ignore_flags"][valid_inds] - if results.get('gt_bboxes_labels', None) is not None: - results['gt_bboxes_labels'] = \ - results['gt_bboxes_labels'][valid_inds] + if results.get("gt_bboxes_labels", None) is not None: + results["gt_bboxes_labels"] = results["gt_bboxes_labels"][valid_inds] - if results.get('gt_masks', None) is not None: - results['gt_masks'] = results['gt_masks'][ - valid_inds.nonzero()[0]].crop( - np.asarray([crop_x1, crop_y1, crop_x2, crop_y2])) + if results.get("gt_masks", None) is not None: + results["gt_masks"] = results["gt_masks"][valid_inds.nonzero()[0]].crop( + np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]) + ) # crop semantic seg - if results.get('gt_seg_map', None) is not None: - results['gt_seg_map'] = results['gt_seg_map'][crop_y1:crop_y2, - crop_x1:crop_x2] + if results.get("gt_seg_map", None) is not None: + results["gt_seg_map"] = results["gt_seg_map"][ + crop_y1:crop_y2, crop_x1:crop_x2 + ] return results @@ -917,21 +959,20 @@ def transform(self, results: dict) -> Union[dict, None]: Returns: dict: The result dict. """ - if results.get('gt_bboxes', None) is None or len( - results['gt_bboxes']) == 0: + if results.get("gt_bboxes", None) is None or len(results["gt_bboxes"]) == 0: return results - orig_img_h, orig_img_w = results['img'].shape[:2] - gt_bboxes = results['gt_bboxes'] + orig_img_h, orig_img_w = results["img"].shape[:2] + gt_bboxes = results["gt_bboxes"] thresholds = list(self.thresholds) if self.allow_no_crop: - thresholds.append('no_crop') + thresholds.append("no_crop") random.shuffle(thresholds) for thresh in thresholds: # Determine the coordinates for cropping - if thresh == 'no_crop': + if thresh == "no_crop": return results found = False @@ -950,8 +991,9 @@ def transform(self, results: dict) -> Union[dict, None]: crop_box = [crop_x1, crop_y1, crop_x2, crop_y2] # Calculate the iou between gt_bboxes and crop_boxes - iou = self._iou_matrix(gt_bboxes, - np.array([crop_box], dtype=np.float32)) + iou = self._iou_matrix( + gt_bboxes, np.array([crop_box], dtype=np.float32) + ) # If the maximum value of the iou is less than thresh, # the current crop_box is considered invalid. if iou.max() < thresh: @@ -965,7 +1007,8 @@ def transform(self, results: dict) -> Union[dict, None]: # Get which gt_bboxes to keep after cropping. valid_inds = self._get_valid_inds( - gt_bboxes, np.array(crop_box, dtype=np.float32)) + gt_bboxes, np.array(crop_box, dtype=np.float32) + ) if valid_inds.size > 0: found = True break @@ -1007,7 +1050,8 @@ def _get_crop_size(self, image_size: Tuple[int, int]) -> Tuple[int, int]: if self.aspect_ratio is not None: min_ar, max_ar = self.aspect_ratio aspect_ratio = random.uniform( - max(min_ar, scale**2), min(max_ar, scale**-2)) + max(min_ar, scale**2), min(max_ar, scale**-2) + ) h_scale = scale / np.sqrt(aspect_ratio) w_scale = scale * np.sqrt(aspect_ratio) else: @@ -1017,10 +1061,9 @@ def _get_crop_size(self, image_size: Tuple[int, int]) -> Tuple[int, int]: crop_w = w * w_scale return int(crop_h), int(crop_w) - def _iou_matrix(self, - gt_bbox: HorizontalBoxes, - crop_bbox: np.ndarray, - eps: float = 1e-10) -> np.ndarray: + def _iou_matrix( + self, gt_bbox: HorizontalBoxes, crop_bbox: np.ndarray, eps: float = 1e-10 + ) -> np.ndarray: """Calculate iou between gt and image crop box. Args: @@ -1035,16 +1078,17 @@ def _iou_matrix(self, lefttop = np.maximum(gt_bbox[:, np.newaxis, :2], crop_bbox[:, :2]) rightbottom = np.minimum(gt_bbox[:, np.newaxis, 2:], crop_bbox[:, 2:]) - overlap = np.prod( - rightbottom - lefttop, - axis=2) * (lefttop < rightbottom).all(axis=2) + overlap = np.prod(rightbottom - lefttop, axis=2) * (lefttop < rightbottom).all( + axis=2 + ) area_gt_bbox = np.prod(gt_bbox[:, 2:] - crop_bbox[:, :2], axis=1) area_crop_bbox = np.prod(gt_bbox[:, 2:] - crop_bbox[:, :2], axis=1) - area_o = (area_gt_bbox[:, np.newaxis] + area_crop_bbox - overlap) + area_o = area_gt_bbox[:, np.newaxis] + area_crop_bbox - overlap return overlap / (area_o + eps) - def _get_valid_inds(self, gt_bbox: HorizontalBoxes, - img_crop_bbox: np.ndarray) -> np.ndarray: + def _get_valid_inds( + self, gt_bbox: HorizontalBoxes, img_crop_bbox: np.ndarray + ) -> np.ndarray: """Get which Bboxes to keep at the current cropping coordinates. Args: @@ -1064,9 +1108,11 @@ def _get_valid_inds(self, gt_bbox: HorizontalBoxes, cropped_box[:, 2:] -= img_crop_bbox[:2] centers = (gt_bbox[:, :2] + gt_bbox[:, 2:]) / 2 - valid = np.logical_and(img_crop_bbox[:2] <= centers, - centers < img_crop_bbox[2:]).all(axis=1) valid = np.logical_and( - valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1)) + img_crop_bbox[:2] <= centers, centers < img_crop_bbox[2:] + ).all(axis=1) + valid = np.logical_and( + valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1) + ) return np.where(valid)[0] diff --git a/mmyolo/mmyolo/datasets/utils.py b/mmyolo/mmyolo/datasets/utils.py index 0cca341b..a08de73b 100644 --- a/mmyolo/mmyolo/datasets/utils.py +++ b/mmyolo/mmyolo/datasets/utils.py @@ -9,8 +9,7 @@ @COLLATE_FUNCTIONS.register_module() -def yolov5_collate(data_batch: Sequence, - use_ms_training: bool = False) -> dict: +def yolov5_collate(data_batch: Sequence, use_ms_training: bool = False) -> dict: """Rewrite collate_fn to get faster training speed. Args: @@ -20,26 +19,22 @@ def yolov5_collate(data_batch: Sequence, batch_imgs = [] batch_bboxes_labels = [] for i in range(len(data_batch)): - datasamples = data_batch[i]['data_samples'] - inputs = data_batch[i]['inputs'] + datasamples = data_batch[i]["data_samples"] + inputs = data_batch[i]["inputs"] gt_bboxes = datasamples.gt_instances.bboxes.tensor gt_labels = datasamples.gt_instances.labels batch_idx = gt_labels.new_full((len(gt_labels), 1), i) - bboxes_labels = torch.cat((batch_idx, gt_labels[:, None], gt_bboxes), - dim=1) + bboxes_labels = torch.cat((batch_idx, gt_labels[:, None], gt_bboxes), dim=1) batch_bboxes_labels.append(bboxes_labels) batch_imgs.append(inputs) if use_ms_training: - return { - 'inputs': batch_imgs, - 'data_samples': torch.cat(batch_bboxes_labels, 0) - } + return {"inputs": batch_imgs, "data_samples": torch.cat(batch_bboxes_labels, 0)} else: return { - 'inputs': torch.stack(batch_imgs, 0), - 'data_samples': torch.cat(batch_bboxes_labels, 0) + "inputs": torch.stack(batch_imgs, 0), + "data_samples": torch.cat(batch_bboxes_labels, 0), } @@ -57,11 +52,13 @@ class BatchShapePolicy: extra_pad_ratio (float): Extra pad ratio. Defaults to 0.5. """ - def __init__(self, - batch_size: int = 32, - img_size: int = 640, - size_divisor: int = 32, - extra_pad_ratio: float = 0.5): + def __init__( + self, + batch_size: int = 32, + img_size: int = 640, + size_divisor: int = 32, + extra_pad_ratio: float = 0.5, + ): self.batch_size = batch_size self.img_size = img_size self.size_divisor = size_divisor @@ -70,13 +67,14 @@ def __init__(self, def __call__(self, data_list: List[dict]) -> List[dict]: image_shapes = [] for data_info in data_list: - image_shapes.append((data_info['width'], data_info['height'])) + image_shapes.append((data_info["width"], data_info["height"])) image_shapes = np.array(image_shapes, dtype=np.float64) n = len(image_shapes) # number of images batch_index = np.floor(np.arange(n) / self.batch_size).astype( - np.int64) # batch index + np.int64 + ) # batch index number_of_batches = batch_index[-1] + 1 # number of batches aspect_ratio = image_shapes[:, 1] / image_shapes[:, 0] # aspect ratio @@ -89,18 +87,21 @@ def __call__(self, data_list: List[dict]) -> List[dict]: shapes = [[1, 1]] * number_of_batches for i in range(number_of_batches): aspect_ratio_index = aspect_ratio[batch_index == i] - min_index, max_index = aspect_ratio_index.min( - ), aspect_ratio_index.max() + min_index, max_index = aspect_ratio_index.min(), aspect_ratio_index.max() if max_index < 1: shapes[i] = [max_index, 1] elif min_index > 1: shapes[i] = [1, 1 / min_index] - batch_shapes = np.ceil( - np.array(shapes) * self.img_size / self.size_divisor + - self.extra_pad_ratio).astype(np.int64) * self.size_divisor + batch_shapes = ( + np.ceil( + np.array(shapes) * self.img_size / self.size_divisor + + self.extra_pad_ratio + ).astype(np.int64) + * self.size_divisor + ) for i, data_info in enumerate(data_list): - data_info['batch_shape'] = batch_shapes[batch_index[i]] + data_info["batch_shape"] = batch_shapes[batch_index[i]] return data_list diff --git a/mmyolo/mmyolo/datasets/yolov5_coco.py b/mmyolo/mmyolo/datasets/yolov5_coco.py index 55bc899a..05ec5312 100644 --- a/mmyolo/mmyolo/datasets/yolov5_coco.py +++ b/mmyolo/mmyolo/datasets/yolov5_coco.py @@ -11,10 +11,7 @@ class BatchShapePolicyDataset(BaseDetDataset): pixels during batch inference process, which does not require the image scales of all batches to be the same throughout validation.""" - def __init__(self, - *args, - batch_shapes_cfg: Optional[dict] = None, - **kwargs): + def __init__(self, *args, batch_shapes_cfg: Optional[dict] = None, **kwargs): self.batch_shapes_cfg = batch_shapes_cfg super().__init__(*args, **kwargs) @@ -49,7 +46,7 @@ def prepare_data(self, idx: int) -> Any: data augmentation, such as Mosaic and MixUp.""" if self.test_mode is False: data_info = self.get_data_info(idx) - data_info['dataset'] = self + data_info["dataset"] = self return self.pipeline(data_info) else: return super().prepare_data(idx) @@ -62,4 +59,3 @@ class YOLOv5CocoDataset(BatchShapePolicyDataset, CocoDataset): We only add `BatchShapePolicy` function compared with CocoDataset. See `mmyolo/datasets/utils.py#BatchShapePolicy` for details """ - pass diff --git a/mmyolo/mmyolo/datasets/yolov5_crowdhuman.py b/mmyolo/mmyolo/datasets/yolov5_crowdhuman.py index 486a8324..062300f6 100644 --- a/mmyolo/mmyolo/datasets/yolov5_crowdhuman.py +++ b/mmyolo/mmyolo/datasets/yolov5_crowdhuman.py @@ -12,4 +12,3 @@ class YOLOv5CrowdHumanDataset(BatchShapePolicyDataset, CrowdHumanDataset): We only add `BatchShapePolicy` function compared with CrowdHumanDataset. See `mmyolo/datasets/utils.py#BatchShapePolicy` for details """ - pass diff --git a/mmyolo/mmyolo/datasets/yolov5_voc.py b/mmyolo/mmyolo/datasets/yolov5_voc.py index 5be764f1..1e79b29a 100644 --- a/mmyolo/mmyolo/datasets/yolov5_voc.py +++ b/mmyolo/mmyolo/datasets/yolov5_voc.py @@ -2,6 +2,7 @@ from mmdet.datasets import VOCDataset from mmyolo.datasets.yolov5_coco import BatchShapePolicyDataset + from ..registry import DATASETS @@ -12,4 +13,3 @@ class YOLOv5VOCDataset(BatchShapePolicyDataset, VOCDataset): We only add `BatchShapePolicy` function compared with VOCDataset. See `mmyolo/datasets/utils.py#BatchShapePolicy` for details """ - pass diff --git a/mmyolo/mmyolo/deploy/__init__.py b/mmyolo/mmyolo/deploy/__init__.py index 4904a905..75f2c9d3 100644 --- a/mmyolo/mmyolo/deploy/__init__.py +++ b/mmyolo/mmyolo/deploy/__init__.py @@ -4,4 +4,4 @@ from .models import * # noqa: F401,F403 from .object_detection import MMYOLO, YOLOObjectDetection -__all__ = ['MMCodebase', 'MMYOLO', 'YOLOObjectDetection'] +__all__ = ["MMCodebase", "MMYOLO", "YOLOObjectDetection"] diff --git a/mmyolo/mmyolo/deploy/models/dense_heads/__init__.py b/mmyolo/mmyolo/deploy/models/dense_heads/__init__.py index cc423af3..0ca0b289 100644 --- a/mmyolo/mmyolo/deploy/models/dense_heads/__init__.py +++ b/mmyolo/mmyolo/deploy/models/dense_heads/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from . import yolov5_head # noqa: F401,F403 -__all__ = ['yolov5_head'] +__all__ = ["yolov5_head"] diff --git a/mmyolo/mmyolo/deploy/models/dense_heads/yolov5_head.py b/mmyolo/mmyolo/deploy/models/dense_heads/yolov5_head.py index ac996ba4..98a016b2 100644 --- a/mmyolo/mmyolo/deploy/models/dense_heads/yolov5_head.py +++ b/mmyolo/mmyolo/deploy/models/dense_heads/yolov5_head.py @@ -15,8 +15,7 @@ from mmyolo.models.dense_heads import YOLOv5Head -def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor, - stride: int) -> Tensor: +def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor, stride: int) -> Tensor: """Decode YOLOv5 bounding boxes. Args: @@ -36,26 +35,27 @@ def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor, x_center_pred = (bbox_preds[..., 0] - 0.5) * 2 * stride + x_center y_center_pred = (bbox_preds[..., 1] - 0.5) * 2 * stride + y_center - w_pred = (bbox_preds[..., 2] * 2)**2 * w - h_pred = (bbox_preds[..., 3] * 2)**2 * h + w_pred = (bbox_preds[..., 2] * 2) ** 2 * w + h_pred = (bbox_preds[..., 3] * 2) ** 2 * h - decoded_bboxes = torch.stack( - [x_center_pred, y_center_pred, w_pred, h_pred], dim=-1) + decoded_bboxes = torch.stack([x_center_pred, y_center_pred, w_pred, h_pred], dim=-1) return decoded_bboxes @FUNCTION_REWRITER.register_rewriter( - func_name='mmyolo.models.dense_heads.yolov5_head.' - 'YOLOv5Head.predict_by_feat') -def yolov5_head__predict_by_feat(self, - cls_scores: List[Tensor], - bbox_preds: List[Tensor], - objectnesses: Optional[List[Tensor]] = None, - batch_img_metas: Optional[List[dict]] = None, - cfg: Optional[ConfigDict] = None, - rescale: bool = False, - with_nms: bool = True) -> Tuple[InstanceData]: + func_name="mmyolo.models.dense_heads.yolov5_head." "YOLOv5Head.predict_by_feat" +) +def yolov5_head__predict_by_feat( + self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + objectnesses: Optional[List[Tensor]] = None, + batch_img_metas: Optional[List[dict]] = None, + cfg: Optional[ConfigDict] = None, + rescale: bool = False, + with_nms: bool = True, +) -> Tuple[InstanceData]: """Transform a batch of output features extracted by the head into bbox results. Args: @@ -87,7 +87,7 @@ def yolov5_head__predict_by_feat(self, ctx = FUNCTION_REWRITER.get_context() detector_type = type(self) deploy_cfg = ctx.cfg - use_efficientnms = deploy_cfg.get('use_efficientnms', False) + use_efficientnms = deploy_cfg.get("use_efficientnms", False) dtype = cls_scores[0].dtype device = cls_scores[0].device bbox_decoder = self.bbox_coder.decode @@ -107,14 +107,15 @@ def yolov5_head__predict_by_feat(self, featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] mlvl_priors = self.prior_generator.grid_priors( - featmap_sizes, dtype=dtype, device=device) + featmap_sizes, dtype=dtype, device=device + ) flatten_priors = torch.cat(mlvl_priors) mlvl_strides = [ flatten_priors.new_full( - (featmap_size[0] * featmap_size[1] * self.num_base_priors, ), - stride) + (featmap_size[0] * featmap_size[1] * self.num_base_priors,), stride + ) for featmap_size, stride in zip(featmap_sizes, self.featmap_strides) ] flatten_stride = torch.cat(mlvl_strides) @@ -142,29 +143,36 @@ def yolov5_head__predict_by_feat(self, scores = cls_scores - bboxes = bbox_decoder(flatten_priors[None], flatten_bbox_preds, - flatten_stride) + bboxes = bbox_decoder(flatten_priors[None], flatten_bbox_preds, flatten_stride) if not with_nms: return bboxes, scores post_params = get_post_processing_params(deploy_cfg) max_output_boxes_per_class = post_params.max_output_boxes_per_class - iou_threshold = cfg.nms.get('iou_threshold', post_params.iou_threshold) - score_threshold = cfg.get('score_thr', post_params.score_threshold) + iou_threshold = cfg.nms.get("iou_threshold", post_params.iou_threshold) + score_threshold = cfg.get("score_thr", post_params.score_threshold) pre_top_k = post_params.pre_top_k - keep_top_k = cfg.get('max_per_img', post_params.keep_top_k) + keep_top_k = cfg.get("max_per_img", post_params.keep_top_k) - return nms_func(bboxes, scores, max_output_boxes_per_class, iou_threshold, - score_threshold, pre_top_k, keep_top_k) + return nms_func( + bboxes, + scores, + max_output_boxes_per_class, + iou_threshold, + score_threshold, + pre_top_k, + keep_top_k, + ) @FUNCTION_REWRITER.register_rewriter( - func_name='mmyolo.models.dense_heads.yolov5_head.' - 'YOLOv5Head.predict', - backend='rknn') -def yolov5_head__predict__rknn(self, x: Tuple[Tensor], *args, - **kwargs) -> Tuple[Tensor, Tensor, Tensor]: + func_name="mmyolo.models.dense_heads.yolov5_head." "YOLOv5Head.predict", + backend="rknn", +) +def yolov5_head__predict__rknn( + self, x: Tuple[Tensor], *args, **kwargs +) -> Tuple[Tensor, Tensor, Tensor]: """Perform forward propagation of the detection head and predict detection results on the features of the upstream network. @@ -177,11 +185,12 @@ def yolov5_head__predict__rknn(self, x: Tuple[Tensor], *args, @FUNCTION_REWRITER.register_rewriter( - func_name='mmyolo.models.dense_heads.yolov5_head.' - 'YOLOv5HeadModule.forward', - backend='rknn') + func_name="mmyolo.models.dense_heads.yolov5_head." "YOLOv5HeadModule.forward", + backend="rknn", +) def yolov5_head_module__forward__rknn( - self, x: Tensor, *args, **kwargs) -> Tuple[Tensor, Tensor, Tensor]: + self, x: Tensor, *args, **kwargs +) -> Tuple[Tensor, Tensor, Tensor]: """Forward feature of a single scale level.""" out = [] for i, feat in enumerate(x): diff --git a/mmyolo/mmyolo/deploy/models/layers/__init__.py b/mmyolo/mmyolo/deploy/models/layers/__init__.py index 6017cf83..28501e74 100644 --- a/mmyolo/mmyolo/deploy/models/layers/__init__.py +++ b/mmyolo/mmyolo/deploy/models/layers/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .bbox_nms import efficient_nms -__all__ = ['efficient_nms'] +__all__ = ["efficient_nms"] diff --git a/mmyolo/mmyolo/deploy/models/layers/bbox_nms.py b/mmyolo/mmyolo/deploy/models/layers/bbox_nms.py index 4db81c02..6e1fde6c 100644 --- a/mmyolo/mmyolo/deploy/models/layers/bbox_nms.py +++ b/mmyolo/mmyolo/deploy/models/layers/bbox_nms.py @@ -39,8 +39,16 @@ def _efficient_nms( """ boxes = boxes if boxes.dim() == 4 else boxes.unsqueeze(2) _, det_boxes, det_scores, labels = TRTEfficientNMSop.apply( - boxes, scores, -1, box_coding, iou_threshold, keep_top_k, '1', 0, - score_threshold) + boxes, + scores, + -1, + box_coding, + iou_threshold, + keep_top_k, + "1", + 0, + score_threshold, + ) dets = torch.cat([det_boxes, det_scores.unsqueeze(2)], -1) # retain shape info @@ -53,7 +61,7 @@ def _efficient_nms( return dets, labels -@mark('efficient_nms', inputs=['boxes', 'scores'], outputs=['dets', 'labels']) +@mark("efficient_nms", inputs=["boxes", "scores"], outputs=["dets", "labels"]) def efficient_nms(*args, **kwargs): """Wrapper function for `_efficient_nms`.""" return _efficient_nms(*args, **kwargs) @@ -71,34 +79,36 @@ def forward( box_coding=0, iou_threshold=0.45, max_output_boxes=100, - plugin_version='1', + plugin_version="1", score_activation=0, score_threshold=0.25, ): """Forward function of TRTEfficientNMSop.""" batch_size, num_boxes, num_classes = scores.shape - num_det = torch.randint( - 0, max_output_boxes, (batch_size, 1), dtype=torch.int32) + num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32) det_boxes = torch.randn(batch_size, max_output_boxes, 4) det_scores = torch.randn(batch_size, max_output_boxes) det_classes = torch.randint( - 0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32) + 0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32 + ) return num_det, det_boxes, det_scores, det_classes @staticmethod - def symbolic(g, - boxes, - scores, - background_class=-1, - box_coding=0, - iou_threshold=0.45, - max_output_boxes=100, - plugin_version='1', - score_activation=0, - score_threshold=0.25): + def symbolic( + g, + boxes, + scores, + background_class=-1, + box_coding=0, + iou_threshold=0.45, + max_output_boxes=100, + plugin_version="1", + score_activation=0, + score_threshold=0.25, + ): """Symbolic function of TRTEfficientNMSop.""" out = g.op( - 'TRT::EfficientNMS_TRT', + "TRT::EfficientNMS_TRT", boxes, scores, background_class_i=background_class, @@ -108,6 +118,7 @@ def symbolic(g, plugin_version_s=plugin_version, score_activation_i=score_activation, score_threshold_f=score_threshold, - outputs=4) + outputs=4, + ) nums, boxes, scores, classes = out return nums, boxes, scores, classes diff --git a/mmyolo/mmyolo/deploy/object_detection.py b/mmyolo/mmyolo/deploy/object_detection.py index 7efdfcfb..7031a4ef 100644 --- a/mmyolo/mmyolo/deploy/object_detection.py +++ b/mmyolo/mmyolo/deploy/object_detection.py @@ -8,7 +8,7 @@ from mmengine import Config from mmengine.registry import Registry -MMYOLO_TASK = Registry('mmyolo_tasks') +MMYOLO_TASK = Registry("mmyolo_tasks") @CODEBASE.register_module(Codebase.MMYOLO.value) @@ -27,11 +27,13 @@ def register_deploy_modules(cls): @classmethod def register_all_modules(cls): """register all modules.""" - from mmdet.utils.setup_env import \ - register_all_modules as register_all_modules_mmdet + from mmdet.utils.setup_env import ( + register_all_modules as register_all_modules_mmdet, + ) - from mmyolo.utils.setup_env import \ - register_all_modules as register_all_modules_mmyolo + from mmyolo.utils.setup_env import ( + register_all_modules as register_all_modules_mmyolo, + ) cls.register_deploy_modules() register_all_modules_mmyolo(True) @@ -51,9 +53,7 @@ def _get_dataset_metainfo(model_cfg: Config): from mmyolo.registry import DATASETS module_dict = DATASETS.module_dict - for dataloader_name in [ - 'test_dataloader', 'val_dataloader', 'train_dataloader' - ]: + for dataloader_name in ["test_dataloader", "val_dataloader", "train_dataloader"]: if dataloader_name not in model_cfg: continue dataloader_cfg = model_cfg[dataloader_name] @@ -61,13 +61,13 @@ def _get_dataset_metainfo(model_cfg: Config): dataset_cls = module_dict.get(dataset_cfg.type, None) if dataset_cls is None: continue - if hasattr(dataset_cls, '_load_metainfo') and isinstance( - dataset_cls._load_metainfo, Callable): - meta = dataset_cls._load_metainfo( - dataset_cfg.get('metainfo', None)) + if hasattr(dataset_cls, "_load_metainfo") and isinstance( + dataset_cls._load_metainfo, Callable + ): + meta = dataset_cls._load_metainfo(dataset_cfg.get("metainfo", None)) if meta is not None: return meta - if hasattr(dataset_cls, 'METAINFO'): + if hasattr(dataset_cls, "METAINFO"): return dataset_cls.METAINFO return None @@ -88,16 +88,19 @@ def get_visualizer(self, name: str, save_dir: str): Visualizer: A visualizer instance. """ from mmdet.visualization import DetLocalVisualizer # noqa: F401,F403 + metainfo = _get_dataset_metainfo(self.model_cfg) visualizer = super().get_visualizer(name, save_dir) if metainfo is not None: visualizer.dataset_meta = metainfo return visualizer - def build_pytorch_model(self, - model_checkpoint: Optional[str] = None, - cfg_options: Optional[Dict] = None, - **kwargs) -> torch.nn.Module: + def build_pytorch_model( + self, + model_checkpoint: Optional[str] = None, + cfg_options: Optional[Dict] = None, + **kwargs, + ) -> torch.nn.Module: """Initialize torch model. Args: @@ -116,13 +119,13 @@ def build_pytorch_model(self, from mmyolo.utils import switch_to_deploy model = deepcopy(self.model_cfg.model) - preprocess_cfg = deepcopy(self.model_cfg.get('preprocess_cfg', {})) - preprocess_cfg.update( - deepcopy(self.model_cfg.get('data_preprocessor', {}))) - model.setdefault('data_preprocessor', preprocess_cfg) + preprocess_cfg = deepcopy(self.model_cfg.get("preprocess_cfg", {})) + preprocess_cfg.update(deepcopy(self.model_cfg.get("data_preprocessor", {}))) + model.setdefault("data_preprocessor", preprocess_cfg) model = MODELS.build(model) if model_checkpoint is not None: from mmengine.runner.checkpoint import load_checkpoint + load_checkpoint(model, model_checkpoint, map_location=self.device) model = revert_sync_batchnorm(model) diff --git a/mmyolo/mmyolo/engine/hooks/__init__.py b/mmyolo/mmyolo/engine/hooks/__init__.py index 0b8deebc..a181f017 100644 --- a/mmyolo/mmyolo/engine/hooks/__init__.py +++ b/mmyolo/mmyolo/engine/hooks/__init__.py @@ -5,6 +5,8 @@ from .yolox_mode_switch_hook import YOLOXModeSwitchHook __all__ = [ - 'YOLOv5ParamSchedulerHook', 'YOLOXModeSwitchHook', 'SwitchToDeployHook', - 'PPYOLOEParamSchedulerHook' + "YOLOv5ParamSchedulerHook", + "YOLOXModeSwitchHook", + "SwitchToDeployHook", + "PPYOLOEParamSchedulerHook", ] diff --git a/mmyolo/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py b/mmyolo/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py index 26dfe6ef..ecb37179 100644 --- a/mmyolo/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py +++ b/mmyolo/mmyolo/engine/hooks/ppyoloe_param_scheduler_hook.py @@ -24,14 +24,17 @@ class PPYOLOEParamSchedulerHook(ParamSchedulerHook): total_epochs (int): In PPYOLOE, `total_epochs` is set to training_epochs x 1.2. Defaults to 360. """ + priority = 9 - def __init__(self, - warmup_min_iter: int = 1000, - start_factor: float = 0., - warmup_epochs: int = 5, - min_lr_ratio: float = 0.0, - total_epochs: int = 360): + def __init__( + self, + warmup_min_iter: int = 1000, + start_factor: float = 0.0, + warmup_epochs: int = 5, + min_lr_ratio: float = 0.0, + total_epochs: int = 360, + ): self.warmup_min_iter = warmup_min_iter self.start_factor = start_factor @@ -52,17 +55,14 @@ def before_train(self, runner: Runner): for group in optimizer.param_groups: # If the param is never be scheduled, record the current value # as the initial value. - group.setdefault('initial_lr', group['lr']) + group.setdefault("initial_lr", group["lr"]) - self._base_lr = [ - group['initial_lr'] for group in optimizer.param_groups - ] + self._base_lr = [group["initial_lr"] for group in optimizer.param_groups] self._min_lr = [i * self.min_lr_ratio for i in self._base_lr] - def before_train_iter(self, - runner: Runner, - batch_idx: int, - data_batch: Optional[dict] = None): + def before_train_iter( + self, runner: Runner, batch_idx: int, data_batch: Optional[dict] = None + ): """Operations before each training iteration. Args: @@ -76,7 +76,8 @@ def before_train_iter(self, # The minimum warmup is self.warmup_min_iter warmup_total_iters = max( - round(self.warmup_epochs * dataloader_len), self.warmup_min_iter) + round(self.warmup_epochs * dataloader_len), self.warmup_min_iter + ) if cur_iters <= warmup_total_iters: # warm up @@ -84,13 +85,18 @@ def before_train_iter(self, factor = self.start_factor * (1 - alpha) + alpha for group_idx, param in enumerate(optimizer.param_groups): - param['lr'] = self._base_lr[group_idx] * factor + param["lr"] = self._base_lr[group_idx] * factor else: for group_idx, param in enumerate(optimizer.param_groups): total_iters = self.total_epochs * dataloader_len lr = self._min_lr[group_idx] + ( - self._base_lr[group_idx] - - self._min_lr[group_idx]) * 0.5 * ( - math.cos((cur_iters - warmup_total_iters) * math.pi / - (total_iters - warmup_total_iters)) + 1.0) - param['lr'] = lr + self._base_lr[group_idx] - self._min_lr[group_idx] + ) * 0.5 * ( + math.cos( + (cur_iters - warmup_total_iters) + * math.pi + / (total_iters - warmup_total_iters) + ) + + 1.0 + ) + param["lr"] = lr diff --git a/mmyolo/mmyolo/engine/hooks/yolov5_param_scheduler_hook.py b/mmyolo/mmyolo/engine/hooks/yolov5_param_scheduler_hook.py index 777bb49d..deb726c6 100644 --- a/mmyolo/mmyolo/engine/hooks/yolov5_param_scheduler_hook.py +++ b/mmyolo/mmyolo/engine/hooks/yolov5_param_scheduler_hook.py @@ -16,26 +16,30 @@ def linear_fn(lr_factor: float, max_epochs: int): def cosine_fn(lr_factor: float, max_epochs: int): """Generate cosine function.""" - return lambda x: ( - (1 - math.cos(x * math.pi / max_epochs)) / 2) * (lr_factor - 1) + 1 + return ( + lambda x: ((1 - math.cos(x * math.pi / max_epochs)) / 2) * (lr_factor - 1) + 1 + ) @HOOKS.register_module() class YOLOv5ParamSchedulerHook(ParamSchedulerHook): """A hook to update learning rate and momentum in optimizer of YOLOv5.""" + priority = 9 - scheduler_maps = {'linear': linear_fn, 'cosine': cosine_fn} + scheduler_maps = {"linear": linear_fn, "cosine": cosine_fn} - def __init__(self, - scheduler_type: str = 'linear', - lr_factor: float = 0.01, - max_epochs: int = 300, - warmup_epochs: int = 3, - warmup_bias_lr: float = 0.1, - warmup_momentum: float = 0.8, - warmup_mim_iter: int = 1000, - **kwargs): + def __init__( + self, + scheduler_type: str = "linear", + lr_factor: float = 0.01, + max_epochs: int = 300, + warmup_epochs: int = 3, + warmup_bias_lr: float = 0.1, + warmup_momentum: float = 0.8, + warmup_mim_iter: int = 1000, + **kwargs, + ): assert scheduler_type in self.scheduler_maps @@ -44,7 +48,7 @@ def __init__(self, self.warmup_momentum = warmup_momentum self.warmup_mim_iter = warmup_mim_iter - kwargs.update({'lr_factor': lr_factor, 'max_epochs': max_epochs}) + kwargs.update({"lr_factor": lr_factor, "max_epochs": max_epochs}) self.scheduler_fn = self.scheduler_maps[scheduler_type](**kwargs) self._warmup_end = False @@ -61,20 +65,17 @@ def before_train(self, runner: Runner): for group in optimizer.param_groups: # If the param is never be scheduled, record the current value # as the initial value. - group.setdefault('initial_lr', group['lr']) - group.setdefault('initial_momentum', group.get('momentum', -1)) + group.setdefault("initial_lr", group["lr"]) + group.setdefault("initial_momentum", group.get("momentum", -1)) - self._base_lr = [ - group['initial_lr'] for group in optimizer.param_groups - ] + self._base_lr = [group["initial_lr"] for group in optimizer.param_groups] self._base_momentum = [ - group['initial_momentum'] for group in optimizer.param_groups + group["initial_momentum"] for group in optimizer.param_groups ] - def before_train_iter(self, - runner: Runner, - batch_idx: int, - data_batch: Optional[dict] = None): + def before_train_iter( + self, runner: Runner, batch_idx: int, data_batch: Optional[dict] = None + ): """Operations before each training iteration. Args: @@ -89,7 +90,8 @@ def before_train_iter(self, # The minimum warmup is self.warmup_mim_iter warmup_total_iters = max( round(self.warmup_epochs * len(runner.train_dataloader)), - self.warmup_mim_iter) + self.warmup_mim_iter, + ) if cur_iters <= warmup_total_iters: xp = [0, warmup_total_iters] @@ -98,19 +100,18 @@ def before_train_iter(self, # bias learning rate will be handled specially yp = [ self.warmup_bias_lr, - self._base_lr[group_idx] * self.scheduler_fn(cur_epoch) + self._base_lr[group_idx] * self.scheduler_fn(cur_epoch), ] else: - yp = [ - 0.0, - self._base_lr[group_idx] * self.scheduler_fn(cur_epoch) - ] - param['lr'] = np.interp(cur_iters, xp, yp) - - if 'momentum' in param: - param['momentum'] = np.interp( - cur_iters, xp, - [self.warmup_momentum, self._base_momentum[group_idx]]) + yp = [0.0, self._base_lr[group_idx] * self.scheduler_fn(cur_epoch)] + param["lr"] = np.interp(cur_iters, xp, yp) + + if "momentum" in param: + param["momentum"] = np.interp( + cur_iters, + xp, + [self.warmup_momentum, self._base_momentum[group_idx]], + ) else: self._warmup_end = True @@ -126,5 +127,4 @@ def after_train_epoch(self, runner: Runner): cur_epoch = runner.epoch optimizer = runner.optim_wrapper.optimizer for group_idx, param in enumerate(optimizer.param_groups): - param['lr'] = self._base_lr[group_idx] * self.scheduler_fn( - cur_epoch) + param["lr"] = self._base_lr[group_idx] * self.scheduler_fn(cur_epoch) diff --git a/mmyolo/mmyolo/engine/hooks/yolox_mode_switch_hook.py b/mmyolo/mmyolo/engine/hooks/yolox_mode_switch_hook.py index 27711768..b9cbdf78 100644 --- a/mmyolo/mmyolo/engine/hooks/yolox_mode_switch_hook.py +++ b/mmyolo/mmyolo/engine/hooks/yolox_mode_switch_hook.py @@ -22,9 +22,9 @@ class YOLOXModeSwitchHook(Hook): Defaults to 15. """ - def __init__(self, - num_last_epochs: int = 15, - new_train_pipeline: Sequence[dict] = None): + def __init__( + self, num_last_epochs: int = 15, new_train_pipeline: Sequence[dict] = None + ): self.num_last_epochs = num_last_epochs self.new_train_pipeline_cfg = new_train_pipeline @@ -36,7 +36,7 @@ def before_train_epoch(self, runner: Runner): model = model.module if (epoch + 1) == runner.max_epochs - self.num_last_epochs: - runner.logger.info(f'New Pipeline: {self.new_train_pipeline_cfg}') + runner.logger.info(f"New Pipeline: {self.new_train_pipeline_cfg}") train_dataloader_cfg = copy.deepcopy(runner.cfg.train_dataloader) train_dataloader_cfg.dataset.pipeline = self.new_train_pipeline_cfg @@ -45,10 +45,9 @@ def before_train_epoch(self, runner: Runner): # it will lead to potential risks, such as the global instance # object FileClient data is disordered. # This problem needs to be solved in the future. - new_train_dataloader = Runner.build_dataloader( - train_dataloader_cfg) + new_train_dataloader = Runner.build_dataloader(train_dataloader_cfg) runner.train_loop.dataloader = new_train_dataloader - runner.logger.info('recreate the dataloader!') - runner.logger.info('Add additional bbox reg loss now!') + runner.logger.info("recreate the dataloader!") + runner.logger.info("Add additional bbox reg loss now!") model.bbox_head.use_bbox_aux = True diff --git a/mmyolo/mmyolo/engine/optimizers/__init__.py b/mmyolo/mmyolo/engine/optimizers/__init__.py index b598020d..e875f192 100644 --- a/mmyolo/mmyolo/engine/optimizers/__init__.py +++ b/mmyolo/mmyolo/engine/optimizers/__init__.py @@ -2,4 +2,4 @@ from .yolov5_optim_constructor import YOLOv5OptimizerConstructor from .yolov7_optim_wrapper_constructor import YOLOv7OptimWrapperConstructor -__all__ = ['YOLOv5OptimizerConstructor', 'YOLOv7OptimWrapperConstructor'] +__all__ = ["YOLOv5OptimizerConstructor", "YOLOv7OptimWrapperConstructor"] diff --git a/mmyolo/mmyolo/engine/optimizers/yolov5_optim_constructor.py b/mmyolo/mmyolo/engine/optimizers/yolov5_optim_constructor.py index 5e5f42cb..6ca64cfd 100644 --- a/mmyolo/mmyolo/engine/optimizers/yolov5_optim_constructor.py +++ b/mmyolo/mmyolo/engine/optimizers/yolov5_optim_constructor.py @@ -7,8 +7,7 @@ from mmengine.model import is_model_wrapper from mmengine.optim import OptimWrapper -from mmyolo.registry import (OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, - OPTIMIZERS) +from mmyolo.registry import OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, OPTIMIZERS @OPTIM_WRAPPER_CONSTRUCTORS.register_module() @@ -60,73 +59,72 @@ class YOLOv5OptimizerConstructor: >>> optim_wrapper = optim_wrapper_builder(model) """ - def __init__(self, - optim_wrapper_cfg: dict, - paramwise_cfg: Optional[dict] = None): + def __init__(self, optim_wrapper_cfg: dict, paramwise_cfg: Optional[dict] = None): if paramwise_cfg is None: - paramwise_cfg = {'base_total_batch_size': 64} - assert 'base_total_batch_size' in paramwise_cfg + paramwise_cfg = {"base_total_batch_size": 64} + assert "base_total_batch_size" in paramwise_cfg if not isinstance(optim_wrapper_cfg, dict): - raise TypeError('optimizer_cfg should be a dict', - f'but got {type(optim_wrapper_cfg)}') - assert 'optimizer' in optim_wrapper_cfg, ( - '`optim_wrapper_cfg` must contain "optimizer" config') + raise TypeError( + "optimizer_cfg should be a dict", f"but got {type(optim_wrapper_cfg)}" + ) + assert ( + "optimizer" in optim_wrapper_cfg + ), '`optim_wrapper_cfg` must contain "optimizer" config' self.optim_wrapper_cfg = optim_wrapper_cfg - self.optimizer_cfg = self.optim_wrapper_cfg.pop('optimizer') - self.base_total_batch_size = paramwise_cfg['base_total_batch_size'] + self.optimizer_cfg = self.optim_wrapper_cfg.pop("optimizer") + self.base_total_batch_size = paramwise_cfg["base_total_batch_size"] def __call__(self, model: nn.Module) -> OptimWrapper: if is_model_wrapper(model): model = model.module optimizer_cfg = self.optimizer_cfg.copy() - weight_decay = optimizer_cfg.pop('weight_decay', 0) + weight_decay = optimizer_cfg.pop("weight_decay", 0) - if 'batch_size_per_gpu' in optimizer_cfg: - batch_size_per_gpu = optimizer_cfg.pop('batch_size_per_gpu') + if "batch_size_per_gpu" in optimizer_cfg: + batch_size_per_gpu = optimizer_cfg.pop("batch_size_per_gpu") # No scaling if total_batch_size is less than # base_total_batch_size, otherwise linear scaling. total_batch_size = get_world_size() * batch_size_per_gpu - accumulate = max( - round(self.base_total_batch_size / total_batch_size), 1) - scale_factor = total_batch_size * \ - accumulate / self.base_total_batch_size + accumulate = max(round(self.base_total_batch_size / total_batch_size), 1) + scale_factor = total_batch_size * accumulate / self.base_total_batch_size if scale_factor != 1: weight_decay *= scale_factor - print_log(f'Scaled weight_decay to {weight_decay}', 'current') + print_log(f"Scaled weight_decay to {weight_decay}", "current") params_groups = [], [], [] for v in model.modules(): - if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): params_groups[2].append(v.bias) # Includes SyncBatchNorm if isinstance(v, nn.modules.batchnorm._NormBase): params_groups[1].append(v.weight) - elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): params_groups[0].append(v.weight) # Note: Make sure bias is in the last parameter group - optimizer_cfg['params'] = [] + optimizer_cfg["params"] = [] # conv - optimizer_cfg['params'].append({ - 'params': params_groups[0], - 'weight_decay': weight_decay - }) + optimizer_cfg["params"].append( + {"params": params_groups[0], "weight_decay": weight_decay} + ) # bn - optimizer_cfg['params'].append({'params': params_groups[1]}) + optimizer_cfg["params"].append({"params": params_groups[1]}) # bias - optimizer_cfg['params'].append({'params': params_groups[2]}) + optimizer_cfg["params"].append({"params": params_groups[2]}) print_log( - 'Optimizer groups: %g .bias, %g conv.weight, %g other' % - (len(params_groups[2]), len(params_groups[0]), len( - params_groups[1])), 'current') + "Optimizer groups: %g .bias, %g conv.weight, %g other" + % (len(params_groups[2]), len(params_groups[0]), len(params_groups[1])), + "current", + ) del params_groups optimizer = OPTIMIZERS.build(optimizer_cfg) optim_wrapper = OPTIM_WRAPPERS.build( - self.optim_wrapper_cfg, default_args=dict(optimizer=optimizer)) + self.optim_wrapper_cfg, default_args=dict(optimizer=optimizer) + ) return optim_wrapper diff --git a/mmyolo/mmyolo/engine/optimizers/yolov7_optim_wrapper_constructor.py b/mmyolo/mmyolo/engine/optimizers/yolov7_optim_wrapper_constructor.py index 79ea8b69..980c26ae 100644 --- a/mmyolo/mmyolo/engine/optimizers/yolov7_optim_wrapper_constructor.py +++ b/mmyolo/mmyolo/engine/optimizers/yolov7_optim_wrapper_constructor.py @@ -8,8 +8,7 @@ from mmengine.optim import OptimWrapper from mmyolo.models.dense_heads.yolov7_head import ImplicitA, ImplicitM -from mmyolo.registry import (OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, - OPTIMIZERS) +from mmyolo.registry import OPTIM_WRAPPER_CONSTRUCTORS, OPTIM_WRAPPERS, OPTIMIZERS # TODO: Consider merging into YOLOv5OptimizerConstructor @@ -62,42 +61,40 @@ class YOLOv7OptimWrapperConstructor: >>> optim_wrapper = optim_wrapper_builder(model) """ - def __init__(self, - optim_wrapper_cfg: dict, - paramwise_cfg: Optional[dict] = None): + def __init__(self, optim_wrapper_cfg: dict, paramwise_cfg: Optional[dict] = None): if paramwise_cfg is None: - paramwise_cfg = {'base_total_batch_size': 64} - assert 'base_total_batch_size' in paramwise_cfg + paramwise_cfg = {"base_total_batch_size": 64} + assert "base_total_batch_size" in paramwise_cfg if not isinstance(optim_wrapper_cfg, dict): - raise TypeError('optimizer_cfg should be a dict', - f'but got {type(optim_wrapper_cfg)}') - assert 'optimizer' in optim_wrapper_cfg, ( - '`optim_wrapper_cfg` must contain "optimizer" config') + raise TypeError( + "optimizer_cfg should be a dict", f"but got {type(optim_wrapper_cfg)}" + ) + assert ( + "optimizer" in optim_wrapper_cfg + ), '`optim_wrapper_cfg` must contain "optimizer" config' self.optim_wrapper_cfg = optim_wrapper_cfg - self.optimizer_cfg = self.optim_wrapper_cfg.pop('optimizer') - self.base_total_batch_size = paramwise_cfg['base_total_batch_size'] + self.optimizer_cfg = self.optim_wrapper_cfg.pop("optimizer") + self.base_total_batch_size = paramwise_cfg["base_total_batch_size"] def __call__(self, model: nn.Module) -> OptimWrapper: if is_model_wrapper(model): model = model.module optimizer_cfg = self.optimizer_cfg.copy() - weight_decay = optimizer_cfg.pop('weight_decay', 0) + weight_decay = optimizer_cfg.pop("weight_decay", 0) - if 'batch_size_per_gpu' in optimizer_cfg: - batch_size_per_gpu = optimizer_cfg.pop('batch_size_per_gpu') + if "batch_size_per_gpu" in optimizer_cfg: + batch_size_per_gpu = optimizer_cfg.pop("batch_size_per_gpu") # No scaling if total_batch_size is less than # base_total_batch_size, otherwise linear scaling. total_batch_size = get_world_size() * batch_size_per_gpu - accumulate = max( - round(self.base_total_batch_size / total_batch_size), 1) - scale_factor = total_batch_size * \ - accumulate / self.base_total_batch_size + accumulate = max(round(self.base_total_batch_size / total_batch_size), 1) + scale_factor = total_batch_size * accumulate / self.base_total_batch_size if scale_factor != 1: weight_decay *= scale_factor - print_log(f'Scaled weight_decay to {weight_decay}', 'current') + print_log(f"Scaled weight_decay to {weight_decay}", "current") params_groups = [], [], [] for v in model.modules(): @@ -108,32 +105,33 @@ def __call__(self, model: nn.Module) -> OptimWrapper: elif isinstance(v, nn.modules.batchnorm._NormBase): params_groups[0].append(v.weight) # apply decay - elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): params_groups[1].append(v.weight) # apply decay # biases, no decay - if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): params_groups[2].append(v.bias) # Note: Make sure bias is in the last parameter group - optimizer_cfg['params'] = [] + optimizer_cfg["params"] = [] # conv - optimizer_cfg['params'].append({ - 'params': params_groups[1], - 'weight_decay': weight_decay - }) + optimizer_cfg["params"].append( + {"params": params_groups[1], "weight_decay": weight_decay} + ) # bn ... - optimizer_cfg['params'].append({'params': params_groups[0]}) + optimizer_cfg["params"].append({"params": params_groups[0]}) # bias - optimizer_cfg['params'].append({'params': params_groups[2]}) + optimizer_cfg["params"].append({"params": params_groups[2]}) print_log( - 'Optimizer groups: %g .bias, %g conv.weight, %g other' % - (len(params_groups[2]), len(params_groups[1]), len( - params_groups[0])), 'current') + "Optimizer groups: %g .bias, %g conv.weight, %g other" + % (len(params_groups[2]), len(params_groups[1]), len(params_groups[0])), + "current", + ) del params_groups optimizer = OPTIMIZERS.build(optimizer_cfg) optim_wrapper = OPTIM_WRAPPERS.build( - self.optim_wrapper_cfg, default_args=dict(optimizer=optimizer)) + self.optim_wrapper_cfg, default_args=dict(optimizer=optimizer) + ) return optim_wrapper diff --git a/mmyolo/mmyolo/models/backbones/__init__.py b/mmyolo/mmyolo/models/backbones/__init__.py index 48c8e28b..d9894550 100644 --- a/mmyolo/mmyolo/models/backbones/__init__.py +++ b/mmyolo/mmyolo/models/backbones/__init__.py @@ -7,7 +7,13 @@ from .yolov7_backbone import YOLOv7Backbone __all__ = [ - 'YOLOv5CSPDarknet', 'BaseBackbone', 'YOLOv6EfficientRep', 'YOLOv6CSPBep', - 'YOLOXCSPDarknet', 'CSPNeXt', 'YOLOv7Backbone', 'PPYOLOECSPResNet', - 'YOLOv8CSPDarknet' + "YOLOv5CSPDarknet", + "BaseBackbone", + "YOLOv6EfficientRep", + "YOLOv6CSPBep", + "YOLOXCSPDarknet", + "CSPNeXt", + "YOLOv7Backbone", + "PPYOLOECSPResNet", + "YOLOv8CSPDarknet", ] diff --git a/mmyolo/mmyolo/models/backbones/base_backbone.py b/mmyolo/mmyolo/models/backbones/base_backbone.py index 730c7095..57f41e81 100644 --- a/mmyolo/mmyolo/models/backbones/base_backbone.py +++ b/mmyolo/mmyolo/models/backbones/base_backbone.py @@ -74,29 +74,32 @@ class BaseBackbone(BaseModule, metaclass=ABCMeta): Defaults to None. """ - def __init__(self, - arch_setting: list, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Sequence[int] = (2, 3, 4), - frozen_stages: int = -1, - plugins: Union[dict, List[dict]] = None, - norm_cfg: ConfigType = None, - act_cfg: ConfigType = None, - norm_eval: bool = False, - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch_setting: list, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Sequence[int] = (2, 3, 4), + frozen_stages: int = -1, + plugins: Union[dict, List[dict]] = None, + norm_cfg: ConfigType = None, + act_cfg: ConfigType = None, + norm_eval: bool = False, + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg) self.num_stages = len(arch_setting) self.arch_setting = arch_setting - assert set(out_indices).issubset( - i for i in range(len(arch_setting) + 1)) + assert set(out_indices).issubset(i for i in range(len(arch_setting) + 1)) if frozen_stages not in range(-1, len(arch_setting) + 1): - raise ValueError('"frozen_stages" must be in range(-1, ' - 'len(arch_setting) + 1). But received ' - f'{frozen_stages}') + raise ValueError( + '"frozen_stages" must be in range(-1, ' + "len(arch_setting) + 1). But received " + f"{frozen_stages}" + ) self.input_channels = input_channels self.out_indices = out_indices @@ -109,20 +112,19 @@ def __init__(self, self.plugins = plugins self.stem = self.build_stem_layer() - self.layers = ['stem'] + self.layers = ["stem"] for idx, setting in enumerate(arch_setting): stage = [] stage += self.build_stage_layer(idx, setting) if plugins is not None: stage += self.make_stage_plugins(plugins, idx, setting) - self.add_module(f'stage{idx + 1}', nn.Sequential(*stage)) - self.layers.append(f'stage{idx + 1}') + self.add_module(f"stage{idx + 1}", nn.Sequential(*stage)) + self.layers.append(f"stage{idx + 1}") @abstractmethod def build_stem_layer(self): """Build a stem layer.""" - pass @abstractmethod def build_stage_layer(self, stage_idx: int, setting: list): @@ -132,7 +134,6 @@ def build_stage_layer(self, stage_idx: int, setting: list): stage_idx (int): The index of a stage layer. setting (list): The architecture setting of a stage layer. """ - pass def make_stage_plugins(self, plugins, stage_idx, setting): """Make plugins for backbone ``stage_idx`` th stage. @@ -185,11 +186,10 @@ def make_stage_plugins(self, plugins, stage_idx, setting): plugin_layers = [] for plugin in plugins: plugin = plugin.copy() - stages = plugin.pop('stages', None) + stages = plugin.pop("stages", None) assert stages is None or len(stages) == self.num_stages if stages is None or stages[stage_idx]: - name, layer = build_plugin_layer( - plugin['cfg'], in_channels=in_channels) + name, layer = build_plugin_layer(plugin["cfg"], in_channels=in_channels) plugin_layers.append(layer) return plugin_layers diff --git a/mmyolo/mmyolo/models/backbones/csp_darknet.py b/mmyolo/mmyolo/models/backbones/csp_darknet.py index 92bd69a5..2afff489 100644 --- a/mmyolo/mmyolo/models/backbones/csp_darknet.py +++ b/mmyolo/mmyolo/models/backbones/csp_darknet.py @@ -8,6 +8,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from ..layers import CSPLayerWithTwoConv, SPPFBottleneck from ..utils import make_divisible, make_round from .base_backbone import BaseBackbone @@ -55,29 +56,39 @@ class YOLOv5CSPDarknet(BaseBackbone): (1, 512, 26, 26) (1, 1024, 13, 13) """ + # From left to right: # in_channels, out_channels, num_blocks, add_identity, use_spp arch_settings = { - 'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False], - [256, 512, 9, True, False], [512, 1024, 3, True, True]], - 'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False], - [256, 512, 9, True, False], [512, 768, 3, True, False], - [768, 1024, 3, True, True]] + "P5": [ + [64, 128, 3, True, False], + [128, 256, 6, True, False], + [256, 512, 9, True, False], + [512, 1024, 3, True, True], + ], + "P6": [ + [64, 128, 3, True, False], + [128, 256, 6, True, False], + [256, 512, 9, True, False], + [512, 768, 3, True, False], + [768, 1024, 3, True, True], + ], } - def __init__(self, - arch: str = 'P5', - plugins: Union[dict, List[dict]] = None, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - norm_eval: bool = False, - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch: str = "P5", + plugins: Union[dict, List[dict]] = None, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + norm_eval: bool = False, + init_cfg: OptMultiConfig = None, + ): super().__init__( self.arch_settings[arch], deepen_factor, @@ -89,7 +100,8 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, norm_eval=norm_eval, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" @@ -100,7 +112,8 @@ def build_stem_layer(self) -> nn.Module: stride=2, padding=2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_stage_layer(self, stage_idx: int, setting: list) -> list: """Build a stage layer. @@ -122,7 +135,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(conv_layer) csp_layer = CSPLayer( out_channels, @@ -130,7 +144,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: num_blocks=num_blocks, add_identity=add_identity, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(csp_layer) if use_spp: spp = SPPFBottleneck( @@ -138,7 +153,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels, kernel_sizes=5, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(spp) return stage @@ -200,28 +216,34 @@ class YOLOv8CSPDarknet(BaseBackbone): (1, 512, 26, 26) (1, 1024, 13, 13) """ + # From left to right: # in_channels, out_channels, num_blocks, add_identity, use_spp # the final out_channels will be set according to the param. arch_settings = { - 'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False], - [256, 512, 6, True, False], [512, None, 3, True, True]], + "P5": [ + [64, 128, 3, True, False], + [128, 256, 6, True, False], + [256, 512, 6, True, False], + [512, None, 3, True, True], + ], } - def __init__(self, - arch: str = 'P5', - last_stage_out_channels: int = 1024, - plugins: Union[dict, List[dict]] = None, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - norm_eval: bool = False, - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch: str = "P5", + last_stage_out_channels: int = 1024, + plugins: Union[dict, List[dict]] = None, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + norm_eval: bool = False, + init_cfg: OptMultiConfig = None, + ): self.arch_settings[arch][-1][1] = last_stage_out_channels super().__init__( self.arch_settings[arch], @@ -234,7 +256,8 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, norm_eval=norm_eval, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" @@ -245,7 +268,8 @@ def build_stem_layer(self) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_stage_layer(self, stage_idx: int, setting: list) -> list: """Build a stage layer. @@ -267,7 +291,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(conv_layer) csp_layer = CSPLayerWithTwoConv( out_channels, @@ -275,7 +300,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: num_blocks=num_blocks, add_identity=add_identity, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(csp_layer) if use_spp: spp = SPPFBottleneck( @@ -283,7 +309,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels, kernel_sizes=5, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(spp) return stage @@ -347,33 +374,49 @@ class YOLOXCSPDarknet(BaseBackbone): (1, 512, 26, 26) (1, 1024, 13, 13) """ + # From left to right: # in_channels, out_channels, num_blocks, add_identity, use_spp arch_settings = { - 'P5': [[64, 128, 3, True, False], [128, 256, 9, True, False], - [256, 512, 9, True, False], [512, 1024, 3, False, True]], + "P5": [ + [64, 128, 3, True, False], + [128, 256, 9, True, False], + [256, 512, 9, True, False], + [512, 1024, 3, False, True], + ], } - def __init__(self, - arch: str = 'P5', - plugins: Union[dict, List[dict]] = None, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - use_depthwise: bool = False, - spp_kernal_sizes: Tuple[int] = (5, 9, 13), - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - norm_eval: bool = False, - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch: str = "P5", + plugins: Union[dict, List[dict]] = None, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + use_depthwise: bool = False, + spp_kernal_sizes: Tuple[int] = (5, 9, 13), + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + norm_eval: bool = False, + init_cfg: OptMultiConfig = None, + ): self.use_depthwise = use_depthwise self.spp_kernal_sizes = spp_kernal_sizes - super().__init__(self.arch_settings[arch], deepen_factor, widen_factor, - input_channels, out_indices, frozen_stages, plugins, - norm_cfg, act_cfg, norm_eval, init_cfg) + super().__init__( + self.arch_settings[arch], + deepen_factor, + widen_factor, + input_channels, + out_indices, + frozen_stages, + plugins, + norm_cfg, + act_cfg, + norm_eval, + init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" @@ -382,7 +425,8 @@ def build_stem_layer(self) -> nn.Module: make_divisible(64, self.widen_factor), kernel_size=3, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_stage_layer(self, stage_idx: int, setting: list) -> list: """Build a stage layer. @@ -397,8 +441,7 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels = make_divisible(out_channels, self.widen_factor) num_blocks = make_round(num_blocks, self.deepen_factor) stage = [] - conv = DepthwiseSeparableConvModule \ - if self.use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if self.use_depthwise else ConvModule conv_layer = conv( in_channels, out_channels, @@ -406,7 +449,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(conv_layer) if use_spp: spp = SPPFBottleneck( @@ -414,7 +458,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels, kernel_sizes=self.spp_kernal_sizes, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(spp) csp_layer = CSPLayer( out_channels, @@ -422,6 +467,7 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: num_blocks=num_blocks, add_identity=add_identity, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(csp_layer) return stage diff --git a/mmyolo/mmyolo/models/backbones/csp_resnet.py b/mmyolo/mmyolo/models/backbones/csp_resnet.py index a42ed489..e17a5b0b 100644 --- a/mmyolo/mmyolo/models/backbones/csp_resnet.py +++ b/mmyolo/mmyolo/models/backbones/csp_resnet.py @@ -49,39 +49,44 @@ class PPYOLOECSPResNet(BaseBackbone): use_large_stem (bool): Whether to use large stem layer. Defaults to False. """ + # From left to right: # in_channels, out_channels, num_blocks - arch_settings = { - 'P5': [[64, 128, 3], [128, 256, 6], [256, 512, 6], [512, 1024, 3]] - } + arch_settings = {"P5": [[64, 128, 3], [128, 256, 6], [256, 512, 6], [512, 1024, 3]]} - def __init__(self, - arch: str = 'P5', - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - plugins: Union[dict, List[dict]] = None, - arch_ovewrite: dict = None, - block_cfg: ConfigType = dict( - type='PPYOLOEBasicBlock', shortcut=True, use_alpha=True), - norm_cfg: ConfigType = dict( - type='BN', momentum=0.1, eps=1e-5), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - attention_cfg: ConfigType = dict( - type='EffectiveSELayer', act_cfg=dict(type='HSigmoid')), - norm_eval: bool = False, - init_cfg: OptMultiConfig = None, - use_large_stem: bool = False): + def __init__( + self, + arch: str = "P5", + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + plugins: Union[dict, List[dict]] = None, + arch_ovewrite: dict = None, + block_cfg: ConfigType = dict( + type="PPYOLOEBasicBlock", shortcut=True, use_alpha=True + ), + norm_cfg: ConfigType = dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + attention_cfg: ConfigType = dict( + type="EffectiveSELayer", act_cfg=dict(type="HSigmoid") + ), + norm_eval: bool = False, + init_cfg: OptMultiConfig = None, + use_large_stem: bool = False, + ): arch_setting = self.arch_settings[arch] if arch_ovewrite: arch_setting = arch_ovewrite - arch_setting = [[ - int(in_channels * widen_factor), - int(out_channels * widen_factor), - round(num_blocks * deepen_factor) - ] for in_channels, out_channels, num_blocks in arch_setting] + arch_setting = [ + [ + int(in_channels * widen_factor), + int(out_channels * widen_factor), + round(num_blocks * deepen_factor), + ] + for in_channels, out_channels, num_blocks in arch_setting + ] self.block_cfg = block_cfg self.use_large_stem = use_large_stem self.attention_cfg = attention_cfg @@ -97,7 +102,8 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, norm_eval=norm_eval, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" @@ -110,7 +116,8 @@ def build_stem_layer(self) -> nn.Module: stride=2, padding=1, act_cfg=self.act_cfg, - norm_cfg=self.norm_cfg), + norm_cfg=self.norm_cfg, + ), ConvModule( self.arch_setting[0][0] // 2, self.arch_setting[0][0] // 2, @@ -118,7 +125,8 @@ def build_stem_layer(self) -> nn.Module: stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( self.arch_setting[0][0] // 2, self.arch_setting[0][0], @@ -126,7 +134,9 @@ def build_stem_layer(self) -> nn.Module: stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) else: stem = nn.Sequential( ConvModule( @@ -136,7 +146,8 @@ def build_stem_layer(self) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( self.arch_setting[0][0] // 2, self.arch_setting[0][0], @@ -144,7 +155,9 @@ def build_stem_layer(self) -> nn.Module: stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) return stem def build_stage_layer(self, stage_idx: int, setting: list) -> list: @@ -165,5 +178,6 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, attention_cfg=self.attention_cfg, - use_spp=False) + use_spp=False, + ) return [cspres_layer] diff --git a/mmyolo/mmyolo/models/backbones/cspnext.py b/mmyolo/mmyolo/models/backbones/cspnext.py index adca9dd9..1668134a 100644 --- a/mmyolo/mmyolo/models/backbones/cspnext.py +++ b/mmyolo/mmyolo/models/backbones/cspnext.py @@ -8,6 +8,7 @@ from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig from mmyolo.registry import MODELS + from ..layers import SPPFBottleneck from .base_backbone import BaseBackbone @@ -51,19 +52,28 @@ class CSPNeXt(BaseBackbone): init_cfg (:obj:`ConfigDict` or dict or list[dict] or list[:obj:`ConfigDict`]): Initialization config dict. """ + # From left to right: # in_channels, out_channels, num_blocks, add_identity, use_spp arch_settings = { - 'P5': [[64, 128, 3, True, False], [128, 256, 6, True, False], - [256, 512, 6, True, False], [512, 1024, 3, False, True]], - 'P6': [[64, 128, 3, True, False], [128, 256, 6, True, False], - [256, 512, 6, True, False], [512, 768, 3, True, False], - [768, 1024, 3, False, True]] + "P5": [ + [64, 128, 3, True, False], + [128, 256, 6, True, False], + [256, 512, 6, True, False], + [512, 1024, 3, False, True], + ], + "P6": [ + [64, 128, 3, True, False], + [128, 256, 6, True, False], + [256, 512, 6, True, False], + [512, 768, 3, True, False], + [768, 1024, 3, False, True], + ], } def __init__( self, - arch: str = 'P5', + arch: str = "P5", deepen_factor: float = 1.0, widen_factor: float = 1.0, input_channels: int = 3, @@ -75,24 +85,24 @@ def __init__( arch_ovewrite: dict = None, channel_attention: bool = True, conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict(type='BN'), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), + norm_cfg: ConfigType = dict(type="BN"), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), norm_eval: bool = False, init_cfg: OptMultiConfig = dict( - type='Kaiming', - layer='Conv2d', + type="Kaiming", + layer="Conv2d", a=math.sqrt(5), - distribution='uniform', - mode='fan_in', - nonlinearity='leaky_relu') + distribution="uniform", + mode="fan_in", + nonlinearity="leaky_relu", + ), ) -> None: arch_setting = self.arch_settings[arch] if arch_ovewrite: arch_setting = arch_ovewrite self.channel_attention = channel_attention self.use_depthwise = use_depthwise - self.conv = DepthwiseSeparableConvModule \ - if use_depthwise else ConvModule + self.conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule self.expand_ratio = expand_ratio self.conv_cfg = conv_cfg @@ -107,7 +117,8 @@ def __init__( norm_cfg=norm_cfg, act_cfg=act_cfg, norm_eval=norm_eval, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" @@ -119,7 +130,8 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( int(self.arch_setting[0][0] * self.widen_factor // 2), int(self.arch_setting[0][0] * self.widen_factor // 2), @@ -127,7 +139,8 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( int(self.arch_setting[0][0] * self.widen_factor // 2), int(self.arch_setting[0][0] * self.widen_factor), @@ -135,7 +148,9 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) return stem def build_stage_layer(self, stage_idx: int, setting: list) -> list: @@ -160,7 +175,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(conv_layer) if use_spp: spp = SPPFBottleneck( @@ -169,7 +185,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: kernel_sizes=5, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(spp) csp_layer = CSPLayer( out_channels, @@ -182,6 +199,7 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: channel_attention=self.channel_attention, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(csp_layer) return stage diff --git a/mmyolo/mmyolo/models/backbones/efficient_rep.py b/mmyolo/mmyolo/models/backbones/efficient_rep.py index 691c5b84..582fdb7b 100644 --- a/mmyolo/mmyolo/models/backbones/efficient_rep.py +++ b/mmyolo/mmyolo/models/backbones/efficient_rep.py @@ -8,6 +8,7 @@ from mmyolo.models.layers.yolo_bricks import SPPFBottleneck from mmyolo.registry import MODELS + from ..layers import BepC3StageBlock, RepStageBlock from ..utils import make_round from .base_backbone import BaseBackbone @@ -57,27 +58,33 @@ class YOLOv6EfficientRep(BaseBackbone): (1, 512, 26, 26) (1, 1024, 13, 13) """ + # From left to right: # in_channels, out_channels, num_blocks, use_spp arch_settings = { - 'P5': [[64, 128, 6, False], [128, 256, 12, False], - [256, 512, 18, False], [512, 1024, 6, True]] + "P5": [ + [64, 128, 6, False], + [128, 256, 12, False], + [256, 512, 18, False], + [512, 1024, 6, True], + ] } - def __init__(self, - arch: str = 'P5', - plugins: Union[dict, List[dict]] = None, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='ReLU', inplace=True), - norm_eval: bool = False, - block_cfg: ConfigType = dict(type='RepVGGBlock'), - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch: str = "P5", + plugins: Union[dict, List[dict]] = None, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="ReLU", inplace=True), + norm_eval: bool = False, + block_cfg: ConfigType = dict(type="RepVGGBlock"), + init_cfg: OptMultiConfig = None, + ): self.block_cfg = block_cfg super().__init__( self.arch_settings[arch], @@ -90,7 +97,8 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, norm_eval=norm_eval, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" @@ -102,7 +110,8 @@ def build_stem_layer(self) -> nn.Module: out_channels=int(self.arch_setting[0][0] * self.widen_factor), kernel_size=3, stride=2, - )) + ) + ) return MODELS.build(block_cfg) def build_stage_layer(self, stage_idx: int, setting: list) -> list: @@ -131,7 +140,9 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: in_channels=in_channels, out_channels=out_channels, kernel_size=3, - stride=2)) + stride=2, + ) + ) stage = [] ef_block = nn.Sequential(MODELS.build(block_cfg), rep_stage_block) @@ -144,7 +155,8 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels=out_channels, kernel_sizes=5, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(spp) return stage @@ -206,28 +218,34 @@ class YOLOv6CSPBep(YOLOv6EfficientRep): (1, 512, 26, 26) (1, 1024, 13, 13) """ + # From left to right: # in_channels, out_channels, num_blocks, use_spp arch_settings = { - 'P5': [[64, 128, 6, False], [128, 256, 12, False], - [256, 512, 18, False], [512, 1024, 6, True]] + "P5": [ + [64, 128, 6, False], + [128, 256, 12, False], + [256, 512, 18, False], + [512, 1024, 6, True], + ] } - def __init__(self, - arch: str = 'P5', - plugins: Union[dict, List[dict]] = None, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - hidden_ratio: float = 0.5, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - norm_eval: bool = False, - block_cfg: ConfigType = dict(type='ConvWrapper'), - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch: str = "P5", + plugins: Union[dict, List[dict]] = None, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + hidden_ratio: float = 0.5, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + norm_eval: bool = False, + block_cfg: ConfigType = dict(type="ConvWrapper"), + init_cfg: OptMultiConfig = None, + ): self.hidden_ratio = hidden_ratio super().__init__( arch=arch, @@ -241,7 +259,8 @@ def __init__(self, act_cfg=act_cfg, norm_eval=norm_eval, block_cfg=block_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stage_layer(self, stage_idx: int, setting: list) -> list: """Build a stage layer. @@ -262,14 +281,17 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: hidden_ratio=self.hidden_ratio, block_cfg=self.block_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) block_cfg = self.block_cfg.copy() block_cfg.update( dict( in_channels=in_channels, out_channels=out_channels, kernel_size=3, - stride=2)) + stride=2, + ) + ) stage = [] ef_block = nn.Sequential(MODELS.build(block_cfg), rep_stage_block) @@ -282,6 +304,7 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels=out_channels, kernel_sizes=5, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) stage.append(spp) return stage diff --git a/mmyolo/mmyolo/models/backbones/yolov7_backbone.py b/mmyolo/mmyolo/models/backbones/yolov7_backbone.py index bb9a5eed..ca2bff36 100644 --- a/mmyolo/mmyolo/models/backbones/yolov7_backbone.py +++ b/mmyolo/mmyolo/models/backbones/yolov7_backbone.py @@ -7,6 +7,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from ..layers import MaxPoolAndStrideConvBlock from .base_backbone import BaseBackbone @@ -40,105 +41,132 @@ class YOLOv7Backbone(BaseBackbone): init_cfg (:obj:`ConfigDict` or dict or list[dict] or list[:obj:`ConfigDict`]): Initialization config dict. """ - _tiny_stage1_cfg = dict(type='TinyDownSampleBlock', middle_ratio=0.5) - _tiny_stage2_4_cfg = dict(type='TinyDownSampleBlock', middle_ratio=1.0) + + _tiny_stage1_cfg = dict(type="TinyDownSampleBlock", middle_ratio=0.5) + _tiny_stage2_4_cfg = dict(type="TinyDownSampleBlock", middle_ratio=1.0) _l_expand_channel_2x = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.5, block_ratio=0.5, num_blocks=2, - num_convs_in_block=2) + num_convs_in_block=2, + ) _l_no_change_channel = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.25, block_ratio=0.25, num_blocks=2, - num_convs_in_block=2) + num_convs_in_block=2, + ) _x_expand_channel_2x = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.4, block_ratio=0.4, num_blocks=3, - num_convs_in_block=2) + num_convs_in_block=2, + ) _x_no_change_channel = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.2, block_ratio=0.2, num_blocks=3, - num_convs_in_block=2) + num_convs_in_block=2, + ) _w_no_change_channel = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.5, block_ratio=0.5, num_blocks=2, - num_convs_in_block=2) + num_convs_in_block=2, + ) _e_no_change_channel = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=0.4, block_ratio=0.4, num_blocks=3, - num_convs_in_block=2) + num_convs_in_block=2, + ) _d_no_change_channel = dict( - type='ELANBlock', + type="ELANBlock", middle_ratio=1 / 3, block_ratio=1 / 3, num_blocks=4, - num_convs_in_block=2) + num_convs_in_block=2, + ) _e2e_no_change_channel = dict( - type='EELANBlock', + type="EELANBlock", num_elan_block=2, middle_ratio=0.4, block_ratio=0.4, num_blocks=3, - num_convs_in_block=2) + num_convs_in_block=2, + ) # From left to right: # in_channels, out_channels, Block_params arch_settings = { - 'Tiny': [[64, 64, _tiny_stage1_cfg], [64, 128, _tiny_stage2_4_cfg], - [128, 256, _tiny_stage2_4_cfg], - [256, 512, _tiny_stage2_4_cfg]], - 'L': [[64, 256, _l_expand_channel_2x], - [256, 512, _l_expand_channel_2x], - [512, 1024, _l_expand_channel_2x], - [1024, 1024, _l_no_change_channel]], - 'X': [[80, 320, _x_expand_channel_2x], - [320, 640, _x_expand_channel_2x], - [640, 1280, _x_expand_channel_2x], - [1280, 1280, _x_no_change_channel]], - 'W': - [[64, 128, _w_no_change_channel], [128, 256, _w_no_change_channel], - [256, 512, _w_no_change_channel], [512, 768, _w_no_change_channel], - [768, 1024, _w_no_change_channel]], - 'E': - [[80, 160, _e_no_change_channel], [160, 320, _e_no_change_channel], - [320, 640, _e_no_change_channel], [640, 960, _e_no_change_channel], - [960, 1280, _e_no_change_channel]], - 'D': [[96, 192, - _d_no_change_channel], [192, 384, _d_no_change_channel], - [384, 768, _d_no_change_channel], - [768, 1152, _d_no_change_channel], - [1152, 1536, _d_no_change_channel]], - 'E2E': [[80, 160, _e2e_no_change_channel], - [160, 320, _e2e_no_change_channel], - [320, 640, _e2e_no_change_channel], - [640, 960, _e2e_no_change_channel], - [960, 1280, _e2e_no_change_channel]], + "Tiny": [ + [64, 64, _tiny_stage1_cfg], + [64, 128, _tiny_stage2_4_cfg], + [128, 256, _tiny_stage2_4_cfg], + [256, 512, _tiny_stage2_4_cfg], + ], + "L": [ + [64, 256, _l_expand_channel_2x], + [256, 512, _l_expand_channel_2x], + [512, 1024, _l_expand_channel_2x], + [1024, 1024, _l_no_change_channel], + ], + "X": [ + [80, 320, _x_expand_channel_2x], + [320, 640, _x_expand_channel_2x], + [640, 1280, _x_expand_channel_2x], + [1280, 1280, _x_no_change_channel], + ], + "W": [ + [64, 128, _w_no_change_channel], + [128, 256, _w_no_change_channel], + [256, 512, _w_no_change_channel], + [512, 768, _w_no_change_channel], + [768, 1024, _w_no_change_channel], + ], + "E": [ + [80, 160, _e_no_change_channel], + [160, 320, _e_no_change_channel], + [320, 640, _e_no_change_channel], + [640, 960, _e_no_change_channel], + [960, 1280, _e_no_change_channel], + ], + "D": [ + [96, 192, _d_no_change_channel], + [192, 384, _d_no_change_channel], + [384, 768, _d_no_change_channel], + [768, 1152, _d_no_change_channel], + [1152, 1536, _d_no_change_channel], + ], + "E2E": [ + [80, 160, _e2e_no_change_channel], + [160, 320, _e2e_no_change_channel], + [320, 640, _e2e_no_change_channel], + [640, 960, _e2e_no_change_channel], + [960, 1280, _e2e_no_change_channel], + ], } - def __init__(self, - arch: str = 'L', - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - input_channels: int = 3, - out_indices: Tuple[int] = (2, 3, 4), - frozen_stages: int = -1, - plugins: Union[dict, List[dict]] = None, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - norm_eval: bool = False, - init_cfg: OptMultiConfig = None): + def __init__( + self, + arch: str = "L", + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + input_channels: int = 3, + out_indices: Tuple[int] = (2, 3, 4), + frozen_stages: int = -1, + plugins: Union[dict, List[dict]] = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + norm_eval: bool = False, + init_cfg: OptMultiConfig = None, + ): assert arch in self.arch_settings.keys() self.arch = arch super().__init__( @@ -152,11 +180,12 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, norm_eval=norm_eval, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_stem_layer(self) -> nn.Module: """Build a stem layer.""" - if self.arch in ['L', 'X']: + if self.arch in ["L", "X"]: stem = nn.Sequential( ConvModule( 3, @@ -165,7 +194,8 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( int(self.arch_setting[0][0] * self.widen_factor // 2), int(self.arch_setting[0][0] * self.widen_factor), @@ -173,7 +203,8 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( int(self.arch_setting[0][0] * self.widen_factor), int(self.arch_setting[0][0] * self.widen_factor), @@ -181,8 +212,10 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) - elif self.arch == 'Tiny': + act_cfg=self.act_cfg, + ), + ) + elif self.arch == "Tiny": stem = nn.Sequential( ConvModule( 3, @@ -191,7 +224,8 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( int(self.arch_setting[0][0] * self.widen_factor // 2), int(self.arch_setting[0][0] * self.widen_factor), @@ -199,14 +233,17 @@ def build_stem_layer(self) -> nn.Module: padding=1, stride=2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) - elif self.arch in ['W', 'E', 'D', 'E2E']: + act_cfg=self.act_cfg, + ), + ) + elif self.arch in ["W", "E", "D", "E2E"]: stem = Focus( 3, int(self.arch_setting[0][0] * self.widen_factor), kernel_size=3, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) return stem def build_stage_layer(self, stage_idx: int, setting: list) -> list: @@ -221,37 +258,40 @@ def build_stage_layer(self, stage_idx: int, setting: list) -> list: out_channels = int(out_channels * self.widen_factor) stage_block_cfg = stage_block_cfg.copy() - stage_block_cfg.setdefault('norm_cfg', self.norm_cfg) - stage_block_cfg.setdefault('act_cfg', self.act_cfg) + stage_block_cfg.setdefault("norm_cfg", self.norm_cfg) + stage_block_cfg.setdefault("act_cfg", self.act_cfg) - stage_block_cfg['in_channels'] = in_channels - stage_block_cfg['out_channels'] = out_channels + stage_block_cfg["in_channels"] = in_channels + stage_block_cfg["out_channels"] = out_channels stage = [] - if self.arch in ['W', 'E', 'D', 'E2E']: - stage_block_cfg['in_channels'] = out_channels - elif self.arch in ['L', 'X']: + if self.arch in ["W", "E", "D", "E2E"]: + stage_block_cfg["in_channels"] = out_channels + elif self.arch in ["L", "X"]: if stage_idx == 0: - stage_block_cfg['in_channels'] = out_channels // 2 + stage_block_cfg["in_channels"] = out_channels // 2 downsample_layer = self._build_downsample_layer( - stage_idx, in_channels, out_channels) + stage_idx, in_channels, out_channels + ) stage.append(MODELS.build(stage_block_cfg)) if downsample_layer is not None: stage.insert(0, downsample_layer) return stage - def _build_downsample_layer(self, stage_idx: int, in_channels: int, - out_channels: int) -> Optional[nn.Module]: + def _build_downsample_layer( + self, stage_idx: int, in_channels: int, out_channels: int + ) -> Optional[nn.Module]: """Build a downsample layer pre stage.""" - if self.arch in ['E', 'D', 'E2E']: + if self.arch in ["E", "D", "E2E"]: downsample_layer = MaxPoolAndStrideConvBlock( in_channels, out_channels, use_in_channels_of_middle=True, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) - elif self.arch == 'W': + act_cfg=self.act_cfg, + ) + elif self.arch == "W": downsample_layer = ConvModule( in_channels, out_channels, @@ -259,13 +299,14 @@ def _build_downsample_layer(self, stage_idx: int, in_channels: int, stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) - elif self.arch == 'Tiny': + act_cfg=self.act_cfg, + ) + elif self.arch == "Tiny": if stage_idx != 0: downsample_layer = nn.MaxPool2d(2, 2) else: downsample_layer = None - elif self.arch in ['L', 'X']: + elif self.arch in ["L", "X"]: if stage_idx == 0: downsample_layer = ConvModule( in_channels, @@ -274,12 +315,14 @@ def _build_downsample_layer(self, stage_idx: int, in_channels: int, stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: downsample_layer = MaxPoolAndStrideConvBlock( in_channels, in_channels, use_in_channels_of_middle=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) return downsample_layer diff --git a/mmyolo/mmyolo/models/data_preprocessors/__init__.py b/mmyolo/mmyolo/models/data_preprocessors/__init__.py index 4e31aa71..9ceeec47 100644 --- a/mmyolo/mmyolo/models/data_preprocessors/__init__.py +++ b/mmyolo/mmyolo/models/data_preprocessors/__init__.py @@ -1,9 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .data_preprocessor import (PPYOLOEBatchRandomResize, - PPYOLOEDetDataPreprocessor, - YOLOv5DetDataPreprocessor) +from .data_preprocessor import ( + PPYOLOEBatchRandomResize, + PPYOLOEDetDataPreprocessor, + YOLOv5DetDataPreprocessor, +) __all__ = [ - 'YOLOv5DetDataPreprocessor', 'PPYOLOEDetDataPreprocessor', - 'PPYOLOEBatchRandomResize' + "YOLOv5DetDataPreprocessor", + "PPYOLOEDetDataPreprocessor", + "PPYOLOEBatchRandomResize", ] diff --git a/mmyolo/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/mmyolo/models/data_preprocessors/data_preprocessor.py index c7281fa5..16d2063c 100644 --- a/mmyolo/mmyolo/models/data_preprocessors/data_preprocessor.py +++ b/mmyolo/mmyolo/models/data_preprocessors/data_preprocessor.py @@ -32,14 +32,15 @@ def forward(self, data: dict, training: bool = False) -> dict: """ if not training: return super().forward(data, training) - assert isinstance(data['data_samples'], torch.Tensor), \ - '"data_samples" should be a tensor, but got ' \ - f'{type(data["data_samples"])}. The possible reason for this ' \ - 'is that you are not using it with ' \ - '"mmyolo.datasets.utils.yolov5_collate". Please refer to ' \ + assert isinstance(data["data_samples"], torch.Tensor), ( + '"data_samples" should be a tensor, but got ' + f'{type(data["data_samples"])}. The possible reason for this ' + "is that you are not using it with " + '"mmyolo.datasets.utils.yolov5_collate". Please refer to ' '"configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py".' + ) - inputs = data['inputs'].to(self.device, non_blocking=True) + inputs = data["inputs"].to(self.device, non_blocking=True) if self._channel_conversion and inputs.shape[1] == 3: inputs = inputs[:, [2, 1, 0], ...] @@ -47,16 +48,16 @@ def forward(self, data: dict, training: bool = False) -> dict: if self._enable_normalize: inputs = (inputs - self.mean) / self.std - data_samples = data['data_samples'].to(self.device, non_blocking=True) + data_samples = data["data_samples"].to(self.device, non_blocking=True) if self.batch_augments is not None: for batch_aug in self.batch_augments: inputs, data_samples = batch_aug(inputs, data_samples) - img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs) - data_samples = {'bboxes_labels': data_samples, 'img_metas': img_metas} + img_metas = [{"batch_input_shape": inputs.shape[2:]}] * len(inputs) + data_samples = {"bboxes_labels": data_samples, "img_metas": img_metas} - return {'inputs': inputs, 'data_samples': data_samples} + return {"inputs": inputs, "data_samples": data_samples} @MODELS.register_module() @@ -88,16 +89,18 @@ def forward(self, data: dict, training: bool = False) -> dict: if not training: return super().forward(data, training) - assert isinstance(data['inputs'], list) and is_list_of( - data['inputs'], torch.Tensor), \ - '"inputs" should be a list of Tensor, but got ' \ - f'{type(data["inputs"])}. The possible reason for this ' \ - 'is that you are not using it with ' \ - '"mmyolo.datasets.utils.yolov5_collate". Please refer to ' \ + assert isinstance(data["inputs"], list) and is_list_of( + data["inputs"], torch.Tensor + ), ( + '"inputs" should be a list of Tensor, but got ' + f'{type(data["inputs"])}. The possible reason for this ' + "is that you are not using it with " + '"mmyolo.datasets.utils.yolov5_collate". Please refer to ' '"cconfigs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py".' + ) data = self.cast_data(data) - inputs, data_samples = data['inputs'], data['data_samples'] + inputs, data_samples = data["inputs"], data["data_samples"] # Process data. batch_inputs = [] @@ -119,10 +122,10 @@ def forward(self, data: dict, training: bool = False) -> dict: if self._enable_normalize: inputs = (inputs - self.mean) / self.std - img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs) - data_samples = {'bboxes_labels': data_samples, 'img_metas': img_metas} + img_metas = [{"batch_input_shape": inputs.shape[2:]}] * len(inputs) + data_samples = {"bboxes_labels": data_samples, "img_metas": img_metas} - return {'inputs': inputs, 'data_samples': data_samples} + return {"inputs": inputs, "data_samples": data_samples} # TODO: No generality. Its input data format is different @@ -149,37 +152,37 @@ class PPYOLOEBatchRandomResize(BatchSyncRandomResize): Defaults to False. """ - def __init__(self, - random_size_range: Tuple[int, int], - interval: int = 1, - size_divisor: int = 32, - random_interp=True, - interp_mode: Union[List[str], str] = [ - 'nearest', 'bilinear', 'bicubic', 'area' - ], - keep_ratio: bool = False) -> None: + def __init__( + self, + random_size_range: Tuple[int, int], + interval: int = 1, + size_divisor: int = 32, + random_interp=True, + interp_mode: Union[List[str], str] = ["nearest", "bilinear", "bicubic", "area"], + keep_ratio: bool = False, + ) -> None: super().__init__(random_size_range, interval, size_divisor) self.random_interp = random_interp self.keep_ratio = keep_ratio # TODO: need to support keep_ratio==True - assert not self.keep_ratio, 'We do not yet support keep_ratio=True' + assert not self.keep_ratio, "We do not yet support keep_ratio=True" if self.random_interp: - assert isinstance(interp_mode, list) and len(interp_mode) > 1,\ - 'While random_interp==True, the type of `interp_mode`' \ - ' must be list and len(interp_mode) must large than 1' + assert isinstance(interp_mode, list) and len(interp_mode) > 1, ( + "While random_interp==True, the type of `interp_mode`" + " must be list and len(interp_mode) must large than 1" + ) self.interp_mode_list = interp_mode self.interp_mode = None else: - assert isinstance(interp_mode, str),\ - 'While random_interp==False, the type of ' \ - '`interp_mode` must be str' - assert interp_mode in ['nearest', 'bilinear', 'bicubic', 'area'] + assert isinstance(interp_mode, str), ( + "While random_interp==False, the type of " "`interp_mode` must be str" + ) + assert interp_mode in ["nearest", "bilinear", "bicubic", "area"] self.interp_mode_list = None self.interp_mode = interp_mode - def forward(self, inputs: list, - data_samples: Tensor) -> Tuple[Tensor, Tensor]: + def forward(self, inputs: list, data_samples: Tensor) -> Tuple[Tensor, Tensor]: """Resize a batch of images and bboxes to shape ``self._input_size``. The inputs and data_samples should be list, and @@ -187,12 +190,13 @@ def forward(self, inputs: list, ``PPYOLOEDetDataPreprocessor`` and ``yolov5_collate`` with ``use_ms_training == True``. """ - assert isinstance(inputs, list),\ - 'The type of inputs must be list. The possible reason for this ' \ - 'is that you are not using it with `PPYOLOEDetDataPreprocessor` ' \ - 'and `yolov5_collate` with use_ms_training == True.' + assert isinstance(inputs, list), ( + "The type of inputs must be list. The possible reason for this " + "is that you are not using it with `PPYOLOEDetDataPreprocessor` " + "and `yolov5_collate` with use_ms_training == True." + ) message_hub = MessageHub.get_current_instance() - if (message_hub.get_info('iter') + 1) % self._interval == 0: + if (message_hub.get_info("iter") + 1) % self._interval == 0: # get current input size self._input_size, interp_mode = self._get_random_size_and_interp() if self.random_interp: @@ -206,8 +210,8 @@ def forward(self, inputs: list, h, w = _batch_input.shape[-2:] scale_y = self._input_size[0] / h scale_x = self._input_size[1] / w - if scale_x != 1. or scale_y != 1.: - if self.interp_mode in ('nearest', 'area'): + if scale_x != 1.0 or scale_y != 1.0: + if self.interp_mode in ("nearest", "area"): align_corners = None else: align_corners = False @@ -215,7 +219,8 @@ def forward(self, inputs: list, _batch_input.unsqueeze(0), size=self._input_size, mode=self.interp_mode, - align_corners=align_corners) + align_corners=align_corners, + ) # rescale boxes indexes = data_samples[:, 0] == i @@ -231,7 +236,7 @@ def forward(self, inputs: list, # convert to Tensor return torch.cat(outputs, dim=0), data_samples else: - raise NotImplementedError('Not implemented yet!') + raise NotImplementedError("Not implemented yet!") def _get_random_size_and_interp(self) -> Tuple[int, int]: """Randomly generate a shape in ``_random_size_range`` and a diff --git a/mmyolo/mmyolo/models/dense_heads/__init__.py b/mmyolo/mmyolo/models/dense_heads/__init__.py index 0b29f30b..1cc64e47 100644 --- a/mmyolo/mmyolo/models/dense_heads/__init__.py +++ b/mmyolo/mmyolo/models/dense_heads/__init__.py @@ -8,8 +8,19 @@ from .yolox_head import YOLOXHead, YOLOXHeadModule __all__ = [ - 'YOLOv5Head', 'YOLOv6Head', 'YOLOXHead', 'YOLOv5HeadModule', - 'YOLOv6HeadModule', 'YOLOXHeadModule', 'RTMDetHead', - 'RTMDetSepBNHeadModule', 'YOLOv7Head', 'PPYOLOEHead', 'PPYOLOEHeadModule', - 'YOLOv7HeadModule', 'YOLOv7p6HeadModule', 'YOLOv8Head', 'YOLOv8HeadModule' + "YOLOv5Head", + "YOLOv6Head", + "YOLOXHead", + "YOLOv5HeadModule", + "YOLOv6HeadModule", + "YOLOXHeadModule", + "RTMDetHead", + "RTMDetSepBNHeadModule", + "YOLOv7Head", + "PPYOLOEHead", + "PPYOLOEHeadModule", + "YOLOv7HeadModule", + "YOLOv7p6HeadModule", + "YOLOv8Head", + "YOLOv8HeadModule", ] diff --git a/mmyolo/mmyolo/models/dense_heads/ppyoloe_head.py b/mmyolo/mmyolo/models/dense_heads/ppyoloe_head.py index 67c1160b..7923e0c8 100644 --- a/mmyolo/mmyolo/models/dense_heads/ppyoloe_head.py +++ b/mmyolo/mmyolo/models/dense_heads/ppyoloe_head.py @@ -5,14 +5,20 @@ import torch.nn as nn import torch.nn.functional as F from mmdet.models.utils import multi_apply -from mmdet.utils import (ConfigType, OptConfigType, OptInstanceList, - OptMultiConfig, reduce_mean) +from mmdet.utils import ( + ConfigType, + OptConfigType, + OptInstanceList, + OptMultiConfig, + reduce_mean, +) from mmengine import MessageHub from mmengine.model import BaseModule, bias_init_with_prob from mmengine.structures import InstanceData from torch import Tensor from mmyolo.registry import MODELS + from ..layers.yolo_bricks import PPYOLOESELayer from .yolov6_head import YOLOv6Head @@ -43,17 +49,18 @@ class PPYOLOEHeadModule(BaseModule): Defaults to None. """ - def __init__(self, - num_classes: int, - in_channels: Union[int, Sequence], - widen_factor: float = 1.0, - num_base_priors: int = 1, - featmap_strides: Sequence[int] = (8, 16, 32), - reg_max: int = 16, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.1, eps=1e-5), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + num_classes: int, + in_channels: Union[int, Sequence], + widen_factor: float = 1.0, + num_base_priors: int = 1, + featmap_strides: Sequence[int] = (8, 16, 32), + reg_max: int = 16, + norm_cfg: ConfigType = dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.num_classes = num_classes @@ -65,8 +72,7 @@ def __init__(self, self.reg_max = reg_max if isinstance(in_channels, int): - self.in_channels = [int(in_channels * widen_factor) - ] * self.num_levels + self.in_channels = [int(in_channels * widen_factor)] * self.num_levels else: self.in_channels = [int(i * widen_factor) for i in in_channels] @@ -77,11 +83,11 @@ def init_weights(self, prior_prob=0.01): super().init_weights() for conv in self.cls_preds: conv.bias.data.fill_(bias_init_with_prob(prior_prob)) - conv.weight.data.fill_(0.) + conv.weight.data.fill_(0.0) for conv in self.reg_preds: conv.bias.data.fill_(1.0) - conv.weight.data.fill_(0.) + conv.weight.data.fill_(0.0) def _init_layers(self): """initialize conv layers in PPYOLOE head.""" @@ -92,22 +98,23 @@ def _init_layers(self): for in_channel in self.in_channels: self.cls_stems.append( - PPYOLOESELayer( - in_channel, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg)) + PPYOLOESELayer(in_channel, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) + ) self.reg_stems.append( - PPYOLOESELayer( - in_channel, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg)) + PPYOLOESELayer(in_channel, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg) + ) for in_channel in self.in_channels: - self.cls_preds.append( - nn.Conv2d(in_channel, self.num_classes, 3, padding=1)) + self.cls_preds.append(nn.Conv2d(in_channel, self.num_classes, 3, padding=1)) self.reg_preds.append( - nn.Conv2d(in_channel, 4 * (self.reg_max + 1), 3, padding=1)) + nn.Conv2d(in_channel, 4 * (self.reg_max + 1), 3, padding=1) + ) # init proj proj = torch.linspace(0, self.reg_max, self.reg_max + 1).view( - [1, self.reg_max + 1, 1, 1]) - self.register_buffer('proj', proj, persistent=False) + [1, self.reg_max + 1, 1, 1] + ) + self.register_buffer("proj", proj, persistent=False) def forward(self, x: Tuple[Tensor]) -> Tensor: """Forward features from the upstream network. @@ -121,12 +128,23 @@ def forward(self, x: Tuple[Tensor]) -> Tensor: """ assert len(x) == self.num_levels - return multi_apply(self.forward_single, x, self.cls_stems, - self.cls_preds, self.reg_stems, self.reg_preds) - - def forward_single(self, x: Tensor, cls_stem: nn.ModuleList, - cls_pred: nn.ModuleList, reg_stem: nn.ModuleList, - reg_pred: nn.ModuleList) -> Tensor: + return multi_apply( + self.forward_single, + x, + self.cls_stems, + self.cls_preds, + self.reg_stems, + self.reg_preds, + ) + + def forward_single( + self, + x: Tensor, + cls_stem: nn.ModuleList, + cls_pred: nn.ModuleList, + reg_stem: nn.ModuleList, + reg_pred: nn.ModuleList, + ) -> Tensor: """Forward feature of a single scale level.""" b, _, h, w = x.shape hw = h * w @@ -135,7 +153,8 @@ def forward_single(self, x: Tensor, cls_stem: nn.ModuleList, bbox_dist_preds = reg_pred(reg_stem(x, avg_feat)) # TODO: Test whether use matmul instead of conv can speed up training. bbox_dist_preds = bbox_dist_preds.reshape( - [-1, 4, self.reg_max + 1, hw]).permute(0, 2, 3, 1) + [-1, 4, self.reg_max + 1, hw] + ).permute(0, 2, 3, 1) bbox_preds = F.conv2d(F.softmax(bbox_dist_preds, dim=1), self.proj) @@ -169,35 +188,37 @@ class PPYOLOEHead(YOLOv6Head): Defaults to None. """ - def __init__(self, - head_module: ConfigType, - prior_generator: ConfigType = dict( - type='mmdet.MlvlPointGenerator', - offset=0.5, - strides=[8, 16, 32]), - bbox_coder: ConfigType = dict(type='DistancePointBBoxCoder'), - loss_cls: ConfigType = dict( - type='mmdet.VarifocalLoss', - use_sigmoid=True, - alpha=0.75, - gamma=2.0, - iou_weighted=True, - reduction='sum', - loss_weight=1.0), - loss_bbox: ConfigType = dict( - type='IoULoss', - iou_mode='giou', - bbox_format='xyxy', - reduction='mean', - loss_weight=2.5, - return_iou=False), - loss_dfl: ConfigType = dict( - type='mmdet.DistributionFocalLoss', - reduction='mean', - loss_weight=0.5 / 4), - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - init_cfg: OptMultiConfig = None): + def __init__( + self, + head_module: ConfigType, + prior_generator: ConfigType = dict( + type="mmdet.MlvlPointGenerator", offset=0.5, strides=[8, 16, 32] + ), + bbox_coder: ConfigType = dict(type="DistancePointBBoxCoder"), + loss_cls: ConfigType = dict( + type="mmdet.VarifocalLoss", + use_sigmoid=True, + alpha=0.75, + gamma=2.0, + iou_weighted=True, + reduction="sum", + loss_weight=1.0, + ), + loss_bbox: ConfigType = dict( + type="IoULoss", + iou_mode="giou", + bbox_format="xyxy", + reduction="mean", + loss_weight=2.5, + return_iou=False, + ), + loss_dfl: ConfigType = dict( + type="mmdet.DistributionFocalLoss", reduction="mean", loss_weight=0.5 / 4 + ), + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + init_cfg: OptMultiConfig = None, + ): super().__init__( head_module=head_module, prior_generator=prior_generator, @@ -206,19 +227,21 @@ def __init__(self, loss_bbox=loss_bbox, train_cfg=train_cfg, test_cfg=test_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) self.loss_dfl = MODELS.build(loss_dfl) # ppyoloe doesn't need loss_obj self.loss_obj = None def loss_by_feat( - self, - cls_scores: Sequence[Tensor], - bbox_preds: Sequence[Tensor], - bbox_dist_preds: Sequence[Tensor], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: Sequence[Tensor], + bbox_preds: Sequence[Tensor], + bbox_dist_preds: Sequence[Tensor], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -246,13 +269,11 @@ def loss_by_feat( # get epoch information from message hub message_hub = MessageHub.get_current_instance() - current_epoch = message_hub.get_info('epoch') + current_epoch = message_hub.get_info("epoch") num_imgs = len(batch_img_metas) - current_featmap_sizes = [ - cls_score.shape[2:] for cls_score in cls_scores - ] + current_featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] # If the shape does not equal, generate new one if current_featmap_sizes != self.featmap_sizes_train: self.featmap_sizes_train = current_featmap_sizes @@ -261,11 +282,11 @@ def loss_by_feat( self.featmap_sizes_train, dtype=cls_scores[0].dtype, device=cls_scores[0].device, - with_stride=True) + with_stride=True, + ) self.num_level_priors = [len(n) for n in mlvl_priors_with_stride] - self.flatten_priors_train = torch.cat( - mlvl_priors_with_stride, dim=0) + self.flatten_priors_train = torch.cat(mlvl_priors_with_stride, dim=0) self.stride_tensor = self.flatten_priors_train[..., [2]] # gt info @@ -276,8 +297,7 @@ def loss_by_feat( # pred info flatten_cls_preds = [ - cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.num_classes) + cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_classes) for cls_pred in cls_scores ] flatten_pred_bboxes = [ @@ -287,7 +307,8 @@ def loss_by_feat( # (bs, reg_max+1, n, 4) -> (bs, n, 4, reg_max+1) flatten_pred_dists = [ bbox_pred_org.permute(0, 2, 3, 1).reshape( - num_imgs, -1, (self.head_module.reg_max + 1) * 4) + num_imgs, -1, (self.head_module.reg_max + 1) * 4 + ) for bbox_pred_org in bbox_dist_preds ] @@ -295,24 +316,34 @@ def loss_by_feat( flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1) flatten_pred_bboxes = torch.cat(flatten_pred_bboxes, dim=1) flatten_pred_bboxes = self.bbox_coder.decode( - self.flatten_priors_train[..., :2], flatten_pred_bboxes, - self.stride_tensor[..., 0]) + self.flatten_priors_train[..., :2], + flatten_pred_bboxes, + self.stride_tensor[..., 0], + ) pred_scores = torch.sigmoid(flatten_cls_preds) if current_epoch < self.initial_epoch: assigned_result = self.initial_assigner( - flatten_pred_bboxes.detach(), self.flatten_priors_train, - self.num_level_priors, gt_labels, gt_bboxes, pad_bbox_flag) + flatten_pred_bboxes.detach(), + self.flatten_priors_train, + self.num_level_priors, + gt_labels, + gt_bboxes, + pad_bbox_flag, + ) else: - assigned_result = self.assigner(flatten_pred_bboxes.detach(), - pred_scores.detach(), - self.flatten_priors_train, - gt_labels, gt_bboxes, - pad_bbox_flag) - - assigned_bboxes = assigned_result['assigned_bboxes'] - assigned_scores = assigned_result['assigned_scores'] - fg_mask_pre_prior = assigned_result['fg_mask_pre_prior'] + assigned_result = self.assigner( + flatten_pred_bboxes.detach(), + pred_scores.detach(), + self.flatten_priors_train, + gt_labels, + gt_bboxes, + pad_bbox_flag, + ) + + assigned_bboxes = assigned_result["assigned_bboxes"] + assigned_scores = assigned_result["assigned_scores"] + fg_mask_pre_prior = assigned_result["fg_mask_pre_prior"] # cls loss with torch.cuda.amp.autocast(enabled=False): @@ -324,8 +355,7 @@ def loss_by_feat( assigned_scores_sum = assigned_scores.sum() # reduce_mean between all gpus - assigned_scores_sum = torch.clamp( - reduce_mean(assigned_scores_sum), min=1) + assigned_scores_sum = torch.clamp(reduce_mean(assigned_scores_sum), min=1) loss_cls /= assigned_scores_sum # select positive samples mask @@ -336,36 +366,44 @@ def loss_by_feat( # iou loss prior_bbox_mask = fg_mask_pre_prior.unsqueeze(-1).repeat([1, 1, 4]) pred_bboxes_pos = torch.masked_select( - flatten_pred_bboxes, prior_bbox_mask).reshape([-1, 4]) + flatten_pred_bboxes, prior_bbox_mask + ).reshape([-1, 4]) assigned_bboxes_pos = torch.masked_select( - assigned_bboxes, prior_bbox_mask).reshape([-1, 4]) + assigned_bboxes, prior_bbox_mask + ).reshape([-1, 4]) bbox_weight = torch.masked_select( - assigned_scores.sum(-1), fg_mask_pre_prior).unsqueeze(-1) + assigned_scores.sum(-1), fg_mask_pre_prior + ).unsqueeze(-1) loss_bbox = self.loss_bbox( pred_bboxes_pos, assigned_bboxes_pos, weight=bbox_weight, - avg_factor=assigned_scores_sum) + avg_factor=assigned_scores_sum, + ) # dfl loss dist_mask = fg_mask_pre_prior.unsqueeze(-1).repeat( - [1, 1, (self.head_module.reg_max + 1) * 4]) + [1, 1, (self.head_module.reg_max + 1) * 4] + ) - pred_dist_pos = torch.masked_select( - flatten_dist_preds, - dist_mask).reshape([-1, 4, self.head_module.reg_max + 1]) + pred_dist_pos = torch.masked_select(flatten_dist_preds, dist_mask).reshape( + [-1, 4, self.head_module.reg_max + 1] + ) assigned_ltrb = self.bbox_coder.encode( self.flatten_priors_train[..., :2] / self.stride_tensor, assigned_bboxes, max_dis=self.head_module.reg_max, - eps=0.01) + eps=0.01, + ) assigned_ltrb_pos = torch.masked_select( - assigned_ltrb, prior_bbox_mask).reshape([-1, 4]) + assigned_ltrb, prior_bbox_mask + ).reshape([-1, 4]) loss_dfl = self.loss_dfl( pred_dist_pos.reshape(-1, self.head_module.reg_max + 1), assigned_ltrb_pos.reshape(-1), weight=bbox_weight.expand(-1, 4).reshape(-1), - avg_factor=assigned_scores_sum) + avg_factor=assigned_scores_sum, + ) else: loss_bbox = flatten_pred_bboxes.sum() * 0 loss_dfl = flatten_pred_bboxes.sum() * 0 diff --git a/mmyolo/mmyolo/models/dense_heads/rtmdet_head.py b/mmyolo/mmyolo/models/dense_heads/rtmdet_head.py index 1547f276..47dfb356 100644 --- a/mmyolo/mmyolo/models/dense_heads/rtmdet_head.py +++ b/mmyolo/mmyolo/models/dense_heads/rtmdet_head.py @@ -6,13 +6,19 @@ from mmcv.cnn import ConvModule, is_norm from mmdet.models.task_modules.samplers import PseudoSampler from mmdet.structures.bbox import distance2bbox -from mmdet.utils import (ConfigType, InstanceList, OptConfigType, - OptInstanceList, OptMultiConfig, reduce_mean) -from mmengine.model import (BaseModule, bias_init_with_prob, constant_init, - normal_init) +from mmdet.utils import ( + ConfigType, + InstanceList, + OptConfigType, + OptInstanceList, + OptMultiConfig, + reduce_mean, +) +from mmengine.model import BaseModule, bias_init_with_prob, constant_init, normal_init from torch import Tensor from mmyolo.registry import MODELS, TASK_UTILS + from .yolov5_head import YOLOv5Head @@ -60,8 +66,8 @@ def __init__( share_conv: bool = True, pred_kernel_size: int = 1, conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict(type='BN'), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), + norm_cfg: ConfigType = dict(type="BN"), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), init_cfg: OptMultiConfig = None, ): super().__init__(init_cfg=init_cfg) @@ -102,7 +108,9 @@ def _init_layers(self): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) reg_convs.append( ConvModule( chn, @@ -112,7 +120,9 @@ def _init_layers(self): padding=1, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.cls_convs.append(cls_convs) self.reg_convs.append(reg_convs) @@ -121,13 +131,17 @@ def _init_layers(self): self.feat_channels, self.num_base_priors * self.num_classes, self.pred_kernel_size, - padding=self.pred_kernel_size // 2)) + padding=self.pred_kernel_size // 2, + ) + ) self.rtm_reg.append( nn.Conv2d( self.feat_channels, self.num_base_priors * 4, self.pred_kernel_size, - padding=self.pred_kernel_size // 2)) + padding=self.pred_kernel_size // 2, + ) + ) if self.share_conv: for n in range(len(self.featmap_strides)): @@ -206,28 +220,27 @@ class RTMDetHead(YOLOv5Head): Defaults to None. """ - def __init__(self, - head_module: ConfigType, - prior_generator: ConfigType = dict( - type='mmdet.MlvlPointGenerator', - offset=0, - strides=[8, 16, 32]), - bbox_coder: ConfigType = dict(type='DistancePointBBoxCoder'), - loss_cls: ConfigType = dict( - type='mmdet.QualityFocalLoss', - use_sigmoid=True, - beta=2.0, - loss_weight=1.0), - loss_bbox: ConfigType = dict( - type='mmdet.GIoULoss', loss_weight=2.0), - loss_obj: ConfigType = dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=1.0), - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - init_cfg: OptMultiConfig = None): + def __init__( + self, + head_module: ConfigType, + prior_generator: ConfigType = dict( + type="mmdet.MlvlPointGenerator", offset=0, strides=[8, 16, 32] + ), + bbox_coder: ConfigType = dict(type="DistancePointBBoxCoder"), + loss_cls: ConfigType = dict( + type="mmdet.QualityFocalLoss", use_sigmoid=True, beta=2.0, loss_weight=1.0 + ), + loss_bbox: ConfigType = dict(type="mmdet.GIoULoss", loss_weight=2.0), + loss_obj: ConfigType = dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="sum", + loss_weight=1.0, + ), + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + init_cfg: OptMultiConfig = None, + ): super().__init__( head_module=head_module, @@ -238,9 +251,10 @@ def __init__(self, loss_obj=loss_obj, train_cfg=train_cfg, test_cfg=test_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) - self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) + self.use_sigmoid_cls = loss_cls.get("use_sigmoid", False) if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes else: @@ -254,9 +268,10 @@ def special_init(self): """ if self.train_cfg: self.assigner = TASK_UTILS.build(self.train_cfg.assigner) - if self.train_cfg.get('sampler', None) is not None: + if self.train_cfg.get("sampler", None) is not None: self.sampler = TASK_UTILS.build( - self.train_cfg.sampler, default_args=dict(context=self)) + self.train_cfg.sampler, default_args=dict(context=self) + ) else: self.sampler = PseudoSampler(context=self) @@ -276,12 +291,13 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: return self.head_module(x) def loss_by_feat( - self, - cls_scores: List[Tensor], - bbox_preds: List[Tensor], - batch_gt_instances: InstanceList, - batch_img_metas: List[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + batch_gt_instances: InstanceList, + batch_img_metas: List[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Compute losses of the head. Args: @@ -318,62 +334,73 @@ def loss_by_feat( if featmap_sizes != self.featmap_sizes_train: self.featmap_sizes_train = featmap_sizes mlvl_priors_with_stride = self.prior_generator.grid_priors( - featmap_sizes, device=device, with_stride=True) - self.flatten_priors_train = torch.cat( - mlvl_priors_with_stride, dim=0) - - flatten_cls_scores = torch.cat([ - cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.cls_out_channels) - for cls_score in cls_scores - ], 1).contiguous() - - flatten_bboxes = torch.cat([ - bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) - for bbox_pred in bbox_preds - ], 1) - flatten_bboxes = flatten_bboxes * self.flatten_priors_train[..., -1, - None] - flatten_bboxes = distance2bbox(self.flatten_priors_train[..., :2], - flatten_bboxes) - - assigned_result = self.assigner(flatten_bboxes.detach(), - flatten_cls_scores.detach(), - self.flatten_priors_train, gt_labels, - gt_bboxes, pad_bbox_flag) - - labels = assigned_result['assigned_labels'].reshape(-1) - label_weights = assigned_result['assigned_labels_weights'].reshape(-1) - bbox_targets = assigned_result['assigned_bboxes'].reshape(-1, 4) - assign_metrics = assigned_result['assign_metrics'].reshape(-1) + featmap_sizes, device=device, with_stride=True + ) + self.flatten_priors_train = torch.cat(mlvl_priors_with_stride, dim=0) + + flatten_cls_scores = torch.cat( + [ + cls_score.permute(0, 2, 3, 1).reshape( + num_imgs, -1, self.cls_out_channels + ) + for cls_score in cls_scores + ], + 1, + ).contiguous() + + flatten_bboxes = torch.cat( + [ + bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) + for bbox_pred in bbox_preds + ], + 1, + ) + flatten_bboxes = flatten_bboxes * self.flatten_priors_train[..., -1, None] + flatten_bboxes = distance2bbox( + self.flatten_priors_train[..., :2], flatten_bboxes + ) + + assigned_result = self.assigner( + flatten_bboxes.detach(), + flatten_cls_scores.detach(), + self.flatten_priors_train, + gt_labels, + gt_bboxes, + pad_bbox_flag, + ) + + labels = assigned_result["assigned_labels"].reshape(-1) + label_weights = assigned_result["assigned_labels_weights"].reshape(-1) + bbox_targets = assigned_result["assigned_bboxes"].reshape(-1, 4) + assign_metrics = assigned_result["assign_metrics"].reshape(-1) cls_preds = flatten_cls_scores.reshape(-1, self.num_classes) bbox_preds = flatten_bboxes.reshape(-1, 4) # FG cat_id: [0, num_classes -1], BG cat_id: num_classes bg_class_ind = self.num_classes - pos_inds = ((labels >= 0) - & (labels < bg_class_ind)).nonzero().squeeze(1) + pos_inds = ((labels >= 0) & (labels < bg_class_ind)).nonzero().squeeze(1) avg_factor = reduce_mean(assign_metrics.sum()).clamp_(min=1).item() loss_cls = self.loss_cls( - cls_preds, (labels, assign_metrics), - label_weights, - avg_factor=avg_factor) + cls_preds, (labels, assign_metrics), label_weights, avg_factor=avg_factor + ) if len(pos_inds) > 0: loss_bbox = self.loss_bbox( bbox_preds[pos_inds], bbox_targets[pos_inds], weight=assign_metrics[pos_inds], - avg_factor=avg_factor) + avg_factor=avg_factor, + ) else: loss_bbox = bbox_preds.sum() * 0 return dict(loss_cls=loss_cls, loss_bbox=loss_bbox) @staticmethod - def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], - batch_size: int) -> Tensor: + def gt_instances_preprocess( + batch_gt_instances: Union[Tensor, Sequence], batch_size: int + ) -> Tensor: """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6] to. @@ -390,24 +417,24 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], """ if isinstance(batch_gt_instances, Sequence): max_gt_bbox_len = max( - [len(gt_instances) for gt_instances in batch_gt_instances]) + [len(gt_instances) for gt_instances in batch_gt_instances] + ) # fill [-1., 0., 0., 0., 0.] if some shape of # single batch not equal max_gt_bbox_len batch_instance_list = [] for index, gt_instance in enumerate(batch_gt_instances): bboxes = gt_instance.bboxes labels = gt_instance.labels - batch_instance_list.append( - torch.cat((labels[:, None], bboxes), dim=-1)) + batch_instance_list.append(torch.cat((labels[:, None], bboxes), dim=-1)) if bboxes.shape[0] >= max_gt_bbox_len: continue - fill_tensor = bboxes.new_full( - [max_gt_bbox_len - bboxes.shape[0], 5], 0) - fill_tensor[:, 0] = -1. + fill_tensor = bboxes.new_full([max_gt_bbox_len - bboxes.shape[0], 5], 0) + fill_tensor[:, 0] = -1.0 batch_instance_list[index] = torch.cat( - (batch_instance_list[-1], fill_tensor), dim=0) + (batch_instance_list[-1], fill_tensor), dim=0 + ) return torch.stack(batch_instance_list) else: @@ -417,8 +444,9 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], batch_instance_list = [] max_gt_bbox_len = 0 for i in range(batch_size): - single_batch_instance = \ - batch_gt_instances[batch_gt_instances[:, 0] == i, :] + single_batch_instance = batch_gt_instances[ + batch_gt_instances[:, 0] == i, : + ] single_batch_instance = single_batch_instance[:, 1:] batch_instance_list.append(single_batch_instance) if len(single_batch_instance) > max_gt_bbox_len: @@ -430,9 +458,11 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], if gt_instance.shape[0] >= max_gt_bbox_len: continue fill_tensor = batch_gt_instances.new_full( - [max_gt_bbox_len - gt_instance.shape[0], 5], 0) - fill_tensor[:, 0] = -1. + [max_gt_bbox_len - gt_instance.shape[0], 5], 0 + ) + fill_tensor[:, 0] = -1.0 batch_instance_list[index] = torch.cat( - (batch_instance_list[index], fill_tensor), dim=0) + (batch_instance_list[index], fill_tensor), dim=0 + ) return torch.stack(batch_instance_list) diff --git a/mmyolo/mmyolo/models/dense_heads/yolov5_head.py b/mmyolo/mmyolo/models/dense_heads/yolov5_head.py index c49d0851..d892e8ba 100644 --- a/mmyolo/mmyolo/models/dense_heads/yolov5_head.py +++ b/mmyolo/mmyolo/models/dense_heads/yolov5_head.py @@ -8,8 +8,7 @@ from mmdet.models.dense_heads.base_dense_head import BaseDenseHead from mmdet.models.utils import filter_scores_and_topk, multi_apply from mmdet.structures.bbox import bbox_overlaps -from mmdet.utils import (ConfigType, OptConfigType, OptInstanceList, - OptMultiConfig) +from mmdet.utils import ConfigType, OptConfigType, OptInstanceList, OptMultiConfig from mmengine.config import ConfigDict from mmengine.dist import get_dist_info from mmengine.logging import print_log @@ -18,11 +17,13 @@ from torch import Tensor from mmyolo.registry import MODELS, TASK_UTILS + from ..utils import make_divisible -def get_prior_xy_info(index: int, num_base_priors: int, - featmap_sizes: int) -> Tuple[int, int, int]: +def get_prior_xy_info( + index: int, num_base_priors: int, featmap_sizes: int +) -> Tuple[int, int, int]: """Get prior index and xy index in feature map by flatten index.""" _, featmap_w = featmap_sizes priors = index % num_base_priors @@ -52,13 +53,15 @@ class YOLOv5HeadModule(BaseModule): Defaults to None. """ - def __init__(self, - num_classes: int, - in_channels: Union[int, Sequence], - widen_factor: float = 1.0, - num_base_priors: int = 3, - featmap_strides: Sequence[int] = (8, 16, 32), - init_cfg: OptMultiConfig = None): + def __init__( + self, + num_classes: int, + in_channels: Union[int, Sequence], + widen_factor: float = 1.0, + num_base_priors: int = 3, + featmap_strides: Sequence[int] = (8, 16, 32), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.num_classes = num_classes self.widen_factor = widen_factor @@ -69,12 +72,11 @@ def __init__(self, self.num_base_priors = num_base_priors if isinstance(in_channels, int): - self.in_channels = [make_divisible(in_channels, widen_factor) - ] * self.num_levels - else: self.in_channels = [ - make_divisible(i, widen_factor) for i in in_channels - ] + make_divisible(in_channels, widen_factor) + ] * self.num_levels + else: + self.in_channels = [make_divisible(i, widen_factor) for i in in_channels] self._init_layers() @@ -82,9 +84,9 @@ def _init_layers(self): """initialize conv layers in YOLOv5 head.""" self.convs_pred = nn.ModuleList() for i in range(self.num_levels): - conv_pred = nn.Conv2d(self.in_channels[i], - self.num_base_priors * self.num_out_attrib, - 1) + conv_pred = nn.Conv2d( + self.in_channels[i], self.num_base_priors * self.num_out_attrib, 1 + ) self.convs_pred.append(conv_pred) @@ -94,7 +96,7 @@ def init_weights(self): for mi, s in zip(self.convs_pred, self.featmap_strides): # from b = mi.bias.data.view(self.num_base_priors, -1) # obj (8 objects per 640 image) - b.data[:, 4] += math.log(8 / (640 / s)**2) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) b.data[:, 5:] += math.log(0.6 / (self.num_classes - 0.999999)) mi.bias.data = b.view(-1) @@ -112,14 +114,14 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: assert len(x) == self.num_levels return multi_apply(self.forward_single, x, self.convs_pred) - def forward_single(self, x: Tensor, - convs: nn.Module) -> Tuple[Tensor, Tensor, Tensor]: + def forward_single( + self, x: Tensor, convs: nn.Module + ) -> Tuple[Tensor, Tensor, Tensor]: """Forward feature of a single scale level.""" pred_map = convs(x) bs, _, ny, nx = pred_map.shape - pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, - ny, nx) + pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, ny, nx) cls_score = pred_map[:, :, 5:, ...].reshape(bs, -1, ny, nx) bbox_pred = pred_map[:, :, :4, ...].reshape(bs, -1, ny, nx) @@ -152,40 +154,48 @@ class YOLOv5Head(BaseDenseHead): Defaults to None. """ - def __init__(self, - head_module: ConfigType, - prior_generator: ConfigType = dict( - type='mmdet.YOLOAnchorGenerator', - base_sizes=[[(10, 13), (16, 30), (33, 23)], - [(30, 61), (62, 45), (59, 119)], - [(116, 90), (156, 198), (373, 326)]], - strides=[8, 16, 32]), - bbox_coder: ConfigType = dict(type='YOLOv5BBoxCoder'), - loss_cls: ConfigType = dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=0.5), - loss_bbox: ConfigType = dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xywh', - eps=1e-7, - reduction='mean', - loss_weight=0.05, - return_iou=True), - loss_obj: ConfigType = dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='mean', - loss_weight=1.0), - prior_match_thr: float = 4.0, - near_neighbor_thr: float = 0.5, - ignore_iof_thr: float = -1.0, - obj_level_weights: List[float] = [4.0, 1.0, 0.4], - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - init_cfg: OptMultiConfig = None): + def __init__( + self, + head_module: ConfigType, + prior_generator: ConfigType = dict( + type="mmdet.YOLOAnchorGenerator", + base_sizes=[ + [(10, 13), (16, 30), (33, 23)], + [(30, 61), (62, 45), (59, 119)], + [(116, 90), (156, 198), (373, 326)], + ], + strides=[8, 16, 32], + ), + bbox_coder: ConfigType = dict(type="YOLOv5BBoxCoder"), + loss_cls: ConfigType = dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="mean", + loss_weight=0.5, + ), + loss_bbox: ConfigType = dict( + type="IoULoss", + iou_mode="ciou", + bbox_format="xywh", + eps=1e-7, + reduction="mean", + loss_weight=0.05, + return_iou=True, + ), + loss_obj: ConfigType = dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="mean", + loss_weight=1.0, + ), + prior_match_thr: float = 4.0, + near_neighbor_thr: float = 0.5, + ignore_iof_thr: float = -1.0, + obj_level_weights: List[float] = [4.0, 1.0, 0.4], + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.head_module = MODELS.build(head_module) @@ -219,42 +229,49 @@ def special_init(self): The special_init function is designed to deal with this situation. """ - assert len(self.obj_level_weights) == len( - self.featmap_strides) == self.num_levels + assert ( + len(self.obj_level_weights) == len(self.featmap_strides) == self.num_levels + ) if self.prior_match_thr != 4.0: print_log( "!!!Now, you've changed the prior_match_thr " - 'parameter to something other than 4.0. Please make sure ' - 'that you have modified both the regression formula in ' - 'bbox_coder and before loss_box computation, ' - 'otherwise the accuracy may be degraded!!!') + "parameter to something other than 4.0. Please make sure " + "that you have modified both the regression formula in " + "bbox_coder and before loss_box computation, " + "otherwise the accuracy may be degraded!!!" + ) if self.num_classes == 1: - print_log('!!!You are using `YOLOv5Head` with num_classes == 1.' - ' The loss_cls will be 0. This is a normal phenomenon.') + print_log( + "!!!You are using `YOLOv5Head` with num_classes == 1." + " The loss_cls will be 0. This is a normal phenomenon." + ) priors_base_sizes = torch.tensor( - self.prior_generator.base_sizes, dtype=torch.float) - featmap_strides = torch.tensor( - self.featmap_strides, dtype=torch.float)[:, None, None] - self.register_buffer( - 'priors_base_sizes', - priors_base_sizes / featmap_strides, - persistent=False) - - grid_offset = torch.tensor([ - [0, 0], # center - [1, 0], # left - [0, 1], # up - [-1, 0], # right - [0, -1], # bottom - ]).float() + self.prior_generator.base_sizes, dtype=torch.float + ) + featmap_strides = torch.tensor(self.featmap_strides, dtype=torch.float)[ + :, None, None + ] self.register_buffer( - 'grid_offset', grid_offset[:, None], persistent=False) + "priors_base_sizes", priors_base_sizes / featmap_strides, persistent=False + ) + + grid_offset = torch.tensor( + [ + [0, 0], # center + [1, 0], # left + [0, 1], # up + [-1, 0], # right + [0, -1], # bottom + ] + ).float() + self.register_buffer("grid_offset", grid_offset[:, None], persistent=False) - prior_inds = torch.arange(self.num_base_priors).float().view( - self.num_base_priors, 1) - self.register_buffer('prior_inds', prior_inds, persistent=False) + prior_inds = ( + torch.arange(self.num_base_priors).float().view(self.num_base_priors, 1) + ) + self.register_buffer("prior_inds", prior_inds, persistent=False) def forward(self, x: Tuple[Tensor]) -> Tuple[List]: """Forward features from the upstream network. @@ -268,14 +285,16 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: """ return self.head_module(x) - def predict_by_feat(self, - cls_scores: List[Tensor], - bbox_preds: List[Tensor], - objectnesses: Optional[List[Tensor]] = None, - batch_img_metas: Optional[List[dict]] = None, - cfg: Optional[ConfigDict] = None, - rescale: bool = True, - with_nms: bool = True) -> List[InstanceData]: + def predict_by_feat( + self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + objectnesses: Optional[List[Tensor]] = None, + batch_img_metas: Optional[List[dict]] = None, + cfg: Optional[ConfigDict] = None, + rescale: bool = True, + with_nms: bool = True, + ) -> List[InstanceData]: """Transform a batch of output features extracted by the head into bbox results. Args: @@ -329,23 +348,22 @@ def predict_by_feat(self, # If the shape does not change, use the previous mlvl_priors if featmap_sizes != self.featmap_sizes: self.mlvl_priors = self.prior_generator.grid_priors( - featmap_sizes, - dtype=cls_scores[0].dtype, - device=cls_scores[0].device) + featmap_sizes, dtype=cls_scores[0].dtype, device=cls_scores[0].device + ) self.featmap_sizes = featmap_sizes flatten_priors = torch.cat(self.mlvl_priors) mlvl_strides = [ flatten_priors.new_full( - (featmap_size.numel() * self.num_base_priors, ), stride) for - featmap_size, stride in zip(featmap_sizes, self.featmap_strides) + (featmap_size.numel() * self.num_base_priors,), stride + ) + for featmap_size, stride in zip(featmap_sizes, self.featmap_strides) ] flatten_stride = torch.cat(mlvl_strides) # flatten cls_scores, bbox_preds and objectness flatten_cls_scores = [ - cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.num_classes) + cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_classes) for cls_score in cls_scores ] flatten_bbox_preds = [ @@ -356,7 +374,8 @@ def predict_by_feat(self, flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid() flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1) flatten_decoded_bboxes = self.bbox_coder.decode( - flatten_priors[None], flatten_bbox_preds, flatten_stride) + flatten_priors[None], flatten_bbox_preds, flatten_stride + ) if with_objectnesses: flatten_objectness = [ @@ -368,20 +387,26 @@ def predict_by_feat(self, flatten_objectness = [None for _ in range(num_imgs)] results_list = [] - for (bboxes, scores, objectness, - img_meta) in zip(flatten_decoded_bboxes, flatten_cls_scores, - flatten_objectness, batch_img_metas): - ori_shape = img_meta['ori_shape'] - scale_factor = img_meta['scale_factor'] - if 'pad_param' in img_meta: - pad_param = img_meta['pad_param'] + for (bboxes, scores, objectness, img_meta) in zip( + flatten_decoded_bboxes, + flatten_cls_scores, + flatten_objectness, + batch_img_metas, + ): + ori_shape = img_meta["ori_shape"] + scale_factor = img_meta["scale_factor"] + if "pad_param" in img_meta: + pad_param = img_meta["pad_param"] else: pad_param = None - score_thr = cfg.get('score_thr', -1) + score_thr = cfg.get("score_thr", -1) # yolox_style does not require the following operations - if objectness is not None and score_thr > 0 and not cfg.get( - 'yolox_style', False): + if ( + objectness is not None + and score_thr > 0 + and not cfg.get("yolox_style", False) + ): conf_inds = objectness > score_thr bboxes = bboxes[conf_inds, :] scores = scores[conf_inds, :] @@ -399,31 +424,30 @@ def predict_by_feat(self, results_list.append(empty_results) continue - nms_pre = cfg.get('nms_pre', 100000) + nms_pre = cfg.get("nms_pre", 100000) if cfg.multi_label is False: scores, labels = scores.max(1, keepdim=True) scores, _, keep_idxs, results = filter_scores_and_topk( - scores, - score_thr, - nms_pre, - results=dict(labels=labels[:, 0])) - labels = results['labels'] + scores, score_thr, nms_pre, results=dict(labels=labels[:, 0]) + ) + labels = results["labels"] else: scores, labels, keep_idxs, _ = filter_scores_and_topk( - scores, score_thr, nms_pre) + scores, score_thr, nms_pre + ) results = InstanceData( - scores=scores, labels=labels, bboxes=bboxes[keep_idxs]) + scores=scores, labels=labels, bboxes=bboxes[keep_idxs] + ) if rescale: if pad_param is not None: - results.bboxes -= results.bboxes.new_tensor([ - pad_param[2], pad_param[0], pad_param[2], pad_param[0] - ]) - results.bboxes /= results.bboxes.new_tensor( - scale_factor).repeat((1, 2)) + results.bboxes -= results.bboxes.new_tensor( + [pad_param[2], pad_param[0], pad_param[2], pad_param[0]] + ) + results.bboxes /= results.bboxes.new_tensor(scale_factor).repeat((1, 2)) - if cfg.get('yolox_style', False): + if cfg.get("yolox_style", False): # do not need max_per_img cfg.max_per_img = len(results) @@ -432,15 +456,15 @@ def predict_by_feat(self, cfg=cfg, rescale=False, with_nms=with_nms, - img_meta=img_meta) + img_meta=img_meta, + ) results.bboxes[:, 0::2].clamp_(0, ori_shape[1]) results.bboxes[:, 1::2].clamp_(0, ori_shape[0]) results_list.append(results) return results_list - def loss(self, x: Tuple[Tensor], batch_data_samples: Union[list, - dict]) -> dict: + def loss(self, x: Tuple[Tensor], batch_data_samples: Union[list, dict]) -> dict: """Perform forward propagation and loss calculation of the detection head on the features of the upstream network. @@ -460,20 +484,23 @@ def loss(self, x: Tuple[Tensor], batch_data_samples: Union[list, else: outs = self(x) # Fast version - loss_inputs = outs + (batch_data_samples['bboxes_labels'], - batch_data_samples['img_metas']) + loss_inputs = outs + ( + batch_data_samples["bboxes_labels"], + batch_data_samples["img_metas"], + ) losses = self.loss_by_feat(*loss_inputs) return losses def loss_by_feat( - self, - cls_scores: Sequence[Tensor], - bbox_preds: Sequence[Tensor], - objectnesses: Sequence[Tensor], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: Sequence[Tensor], + bbox_preds: Sequence[Tensor], + objectnesses: Sequence[Tensor], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -508,13 +535,11 @@ def loss_by_feat( labels = gt_instances_ignore.labels index = bboxes.new_full((len(bboxes), 1), i) # (batch_idx, label, bboxes) - target = torch.cat((index, labels[:, None].float(), bboxes), - dim=1) + target = torch.cat((index, labels[:, None].float(), bboxes), dim=1) batch_target_ignore_list.append(target) # (num_bboxes, 6) - batch_gt_targets_ignore = torch.cat( - batch_target_ignore_list, dim=0) + batch_gt_targets_ignore = torch.cat(batch_target_ignore_list, dim=0) if batch_gt_targets_ignore.shape[0] != 0: # Consider regions with ignore in annotations return self._loss_by_feat_with_ignore( @@ -523,11 +548,13 @@ def loss_by_feat( objectnesses, batch_gt_instances=batch_gt_instances, batch_img_metas=batch_img_metas, - batch_gt_instances_ignore=batch_gt_targets_ignore) + batch_gt_instances_ignore=batch_gt_targets_ignore, + ) # 1. Convert gt to norm format batch_targets_normed = self._convert_gt_to_norm_format( - batch_gt_instances, batch_img_metas) + batch_gt_instances, batch_img_metas + ) device = cls_scores[0].device loss_cls = torch.zeros(1, device=device) @@ -543,31 +570,34 @@ def loss_by_feat( if batch_targets_normed.shape[1] == 0: loss_box += bbox_preds[i].sum() * 0 loss_cls += cls_scores[i].sum() * 0 - loss_obj += self.loss_obj( - objectnesses[i], target_obj) * self.obj_level_weights[i] + loss_obj += ( + self.loss_obj(objectnesses[i], target_obj) + * self.obj_level_weights[i] + ) continue priors_base_sizes_i = self.priors_base_sizes[i] # feature map scale whwh - scaled_factor[2:6] = torch.tensor( - bbox_preds[i].shape)[[3, 2, 3, 2]] + scaled_factor[2:6] = torch.tensor(bbox_preds[i].shape)[[3, 2, 3, 2]] # Scale batch_targets from range 0-1 to range 0-features_maps size. # (num_base_priors, num_bboxes, 7) batch_targets_scaled = batch_targets_normed * scaled_factor # 2. Shape match - wh_ratio = batch_targets_scaled[..., - 4:6] / priors_base_sizes_i[:, None] - match_inds = torch.max( - wh_ratio, 1 / wh_ratio).max(2)[0] < self.prior_match_thr + wh_ratio = batch_targets_scaled[..., 4:6] / priors_base_sizes_i[:, None] + match_inds = ( + torch.max(wh_ratio, 1 / wh_ratio).max(2)[0] < self.prior_match_thr + ) batch_targets_scaled = batch_targets_scaled[match_inds] # no gt bbox matches anchor if batch_targets_scaled.shape[0] == 0: loss_box += bbox_preds[i].sum() * 0 loss_cls += cls_scores[i].sum() * 0 - loss_obj += self.loss_obj( - objectnesses[i], target_obj) * self.obj_level_weights[i] + loss_obj += ( + self.loss_obj(objectnesses[i], target_obj) + * self.obj_level_weights[i] + ) continue # 3. Positive samples with additional neighbors @@ -577,57 +607,60 @@ def loss_by_feat( # them as positive samples as well. batch_targets_cxcy = batch_targets_scaled[:, 2:4] grid_xy = scaled_factor[[2, 3]] - batch_targets_cxcy - left, up = ((batch_targets_cxcy % 1 < self.near_neighbor_thr) & - (batch_targets_cxcy > 1)).T - right, bottom = ((grid_xy % 1 < self.near_neighbor_thr) & - (grid_xy > 1)).T - offset_inds = torch.stack( - (torch.ones_like(left), left, up, right, bottom)) - - batch_targets_scaled = batch_targets_scaled.repeat( - (5, 1, 1))[offset_inds] - retained_offsets = self.grid_offset.repeat(1, offset_inds.shape[1], - 1)[offset_inds] + left, up = ( + (batch_targets_cxcy % 1 < self.near_neighbor_thr) + & (batch_targets_cxcy > 1) + ).T + right, bottom = ((grid_xy % 1 < self.near_neighbor_thr) & (grid_xy > 1)).T + offset_inds = torch.stack((torch.ones_like(left), left, up, right, bottom)) + + batch_targets_scaled = batch_targets_scaled.repeat((5, 1, 1))[offset_inds] + retained_offsets = self.grid_offset.repeat(1, offset_inds.shape[1], 1)[ + offset_inds + ] # prepare pred results and positive sample indexes to # calculate class loss and bbox lo _chunk_targets = batch_targets_scaled.chunk(4, 1) img_class_inds, grid_xy, grid_wh, priors_inds = _chunk_targets - priors_inds, (img_inds, class_inds) = priors_inds.long().view( - -1), img_class_inds.long().T + priors_inds, (img_inds, class_inds) = ( + priors_inds.long().view(-1), + img_class_inds.long().T, + ) - grid_xy_long = (grid_xy - - retained_offsets * self.near_neighbor_thr).long() + grid_xy_long = (grid_xy - retained_offsets * self.near_neighbor_thr).long() grid_x_inds, grid_y_inds = grid_xy_long.T bboxes_targets = torch.cat((grid_xy - grid_xy_long, grid_wh), 1) # 4. Calculate loss # bbox loss retained_bbox_pred = bbox_preds[i].reshape( - batch_size, self.num_base_priors, -1, h, - w)[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] + batch_size, self.num_base_priors, -1, h, w + )[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] priors_base_sizes_i = priors_base_sizes_i[priors_inds] decoded_bbox_pred = self._decode_bbox_to_xywh( - retained_bbox_pred, priors_base_sizes_i) + retained_bbox_pred, priors_base_sizes_i + ) loss_box_i, iou = self.loss_bbox(decoded_bbox_pred, bboxes_targets) loss_box += loss_box_i # obj loss iou = iou.detach().clamp(0) - target_obj[img_inds, priors_inds, grid_y_inds, - grid_x_inds] = iou.type(target_obj.dtype) - loss_obj += self.loss_obj(objectnesses[i], - target_obj) * self.obj_level_weights[i] + target_obj[img_inds, priors_inds, grid_y_inds, grid_x_inds] = iou.type( + target_obj.dtype + ) + loss_obj += ( + self.loss_obj(objectnesses[i], target_obj) * self.obj_level_weights[i] + ) # cls loss if self.num_classes > 1: pred_cls_scores = cls_scores[i].reshape( - batch_size, self.num_base_priors, -1, h, - w)[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] + batch_size, self.num_base_priors, -1, h, w + )[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] - target_class = torch.full_like(pred_cls_scores, 0.) - target_class[range(batch_targets_scaled.shape[0]), - class_inds] = 1. + target_class = torch.full_like(pred_cls_scores, 0.0) + target_class[range(batch_targets_scaled.shape[0]), class_inds] = 1.0 loss_cls += self.loss_cls(pred_cls_scores, target_class) else: loss_cls += cls_scores[i].sum() * 0 @@ -636,14 +669,17 @@ def loss_by_feat( return dict( loss_cls=loss_cls * batch_size * world_size, loss_obj=loss_obj * batch_size * world_size, - loss_bbox=loss_box * batch_size * world_size) - - def _convert_gt_to_norm_format(self, - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict]) -> Tensor: + loss_bbox=loss_box * batch_size * world_size, + ) + + def _convert_gt_to_norm_format( + self, + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + ) -> Tensor: if isinstance(batch_gt_instances, torch.Tensor): # fast version - img_shape = batch_img_metas[0]['batch_input_shape'] + img_shape = batch_img_metas[0]["batch_input_shape"] gt_bboxes_xyxy = batch_gt_instances[:, 2:] xy1, xy2 = gt_bboxes_xyxy.split((2, 2), dim=-1) gt_bboxes_xywh = torch.cat([(xy2 + xy1) / 2, (xy2 - xy1)], dim=-1) @@ -652,13 +688,12 @@ def _convert_gt_to_norm_format(self, batch_gt_instances[:, 2:] = gt_bboxes_xywh # (num_base_priors, num_bboxes, 6) - batch_targets_normed = batch_gt_instances.repeat( - self.num_base_priors, 1, 1) + batch_targets_normed = batch_gt_instances.repeat(self.num_base_priors, 1, 1) else: batch_target_list = [] # Convert xyxy bbox to yolo format. for i, gt_instances in enumerate(batch_gt_instances): - img_shape = batch_img_metas[i]['batch_input_shape'] + img_shape = batch_img_metas[i]["batch_input_shape"] bboxes = gt_instances.bboxes labels = gt_instances.labels @@ -670,36 +705,41 @@ def _convert_gt_to_norm_format(self, index = bboxes.new_full((len(bboxes), 1), i) # (batch_idx, label, normed_bbox) - target = torch.cat((index, labels[:, None].float(), bboxes), - dim=1) + target = torch.cat((index, labels[:, None].float(), bboxes), dim=1) batch_target_list.append(target) # (num_base_priors, num_bboxes, 6) - batch_targets_normed = torch.cat( - batch_target_list, dim=0).repeat(self.num_base_priors, 1, 1) + batch_targets_normed = torch.cat(batch_target_list, dim=0).repeat( + self.num_base_priors, 1, 1 + ) # (num_base_priors, num_bboxes, 1) batch_targets_prior_inds = self.prior_inds.repeat( - 1, batch_targets_normed.shape[1])[..., None] + 1, batch_targets_normed.shape[1] + )[..., None] # (num_base_priors, num_bboxes, 7) # (img_ind, labels, bbox_cx, bbox_cy, bbox_w, bbox_h, prior_ind) batch_targets_normed = torch.cat( - (batch_targets_normed, batch_targets_prior_inds), 2) + (batch_targets_normed, batch_targets_prior_inds), 2 + ) return batch_targets_normed def _decode_bbox_to_xywh(self, bbox_pred, priors_base_sizes) -> Tensor: bbox_pred = bbox_pred.sigmoid() pred_xy = bbox_pred[:, :2] * 2 - 0.5 - pred_wh = (bbox_pred[:, 2:] * 2)**2 * priors_base_sizes + pred_wh = (bbox_pred[:, 2:] * 2) ** 2 * priors_base_sizes decoded_bbox_pred = torch.cat((pred_xy, pred_wh), dim=-1) return decoded_bbox_pred def _loss_by_feat_with_ignore( - self, cls_scores: Sequence[Tensor], bbox_preds: Sequence[Tensor], - objectnesses: Sequence[Tensor], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: Sequence[Tensor]) -> dict: + self, + cls_scores: Sequence[Tensor], + bbox_preds: Sequence[Tensor], + objectnesses: Sequence[Tensor], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: Sequence[Tensor], + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -726,14 +766,14 @@ def _loss_by_feat_with_ignore( """ # 1. Convert gt to norm format batch_targets_normed = self._convert_gt_to_norm_format( - batch_gt_instances, batch_img_metas) + batch_gt_instances, batch_img_metas + ) featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] if featmap_sizes != self.featmap_sizes: self.mlvl_priors = self.prior_generator.grid_priors( - featmap_sizes, - dtype=cls_scores[0].dtype, - device=cls_scores[0].device) + featmap_sizes, dtype=cls_scores[0].dtype, device=cls_scores[0].device + ) self.featmap_sizes = featmap_sizes device = cls_scores[0].device @@ -746,63 +786,69 @@ def _loss_by_feat_with_ignore( batch_size, _, h, w = bbox_preds[i].shape target_obj = torch.zeros_like(objectnesses[i]) - not_ignore_flags = bbox_preds[i].new_ones(batch_size, - self.num_base_priors, h, - w) + not_ignore_flags = bbox_preds[i].new_ones( + batch_size, self.num_base_priors, h, w + ) - ignore_overlaps = bbox_overlaps(self.mlvl_priors[i], - batch_gt_instances_ignore[..., 2:], - 'iof') - ignore_max_overlaps, ignore_max_ignore_index = ignore_overlaps.max( - dim=1) + ignore_overlaps = bbox_overlaps( + self.mlvl_priors[i], batch_gt_instances_ignore[..., 2:], "iof" + ) + ignore_max_overlaps, ignore_max_ignore_index = ignore_overlaps.max(dim=1) - batch_inds = batch_gt_instances_ignore[:, - 0][ignore_max_ignore_index] + batch_inds = batch_gt_instances_ignore[:, 0][ignore_max_ignore_index] ignore_inds = (ignore_max_overlaps > self.ignore_iof_thr).nonzero( - as_tuple=True)[0] + as_tuple=True + )[0] batch_inds = batch_inds[ignore_inds].long() ignore_priors, ignore_grid_xs, ignore_grid_ys = get_prior_xy_info( - ignore_inds, self.num_base_priors, self.featmap_sizes[i]) - not_ignore_flags[batch_inds, ignore_priors, ignore_grid_ys, - ignore_grid_xs] = 0 + ignore_inds, self.num_base_priors, self.featmap_sizes[i] + ) + not_ignore_flags[ + batch_inds, ignore_priors, ignore_grid_ys, ignore_grid_xs + ] = 0 # empty gt bboxes if batch_targets_normed.shape[1] == 0: loss_box += bbox_preds[i].sum() * 0 loss_cls += cls_scores[i].sum() * 0 - loss_obj += self.loss_obj( - objectnesses[i], - target_obj, - weight=not_ignore_flags, - avg_factor=max(not_ignore_flags.sum(), - 1)) * self.obj_level_weights[i] + loss_obj += ( + self.loss_obj( + objectnesses[i], + target_obj, + weight=not_ignore_flags, + avg_factor=max(not_ignore_flags.sum(), 1), + ) + * self.obj_level_weights[i] + ) continue priors_base_sizes_i = self.priors_base_sizes[i] # feature map scale whwh - scaled_factor[2:6] = torch.tensor( - bbox_preds[i].shape)[[3, 2, 3, 2]] + scaled_factor[2:6] = torch.tensor(bbox_preds[i].shape)[[3, 2, 3, 2]] # Scale batch_targets from range 0-1 to range 0-features_maps size. # (num_base_priors, num_bboxes, 7) batch_targets_scaled = batch_targets_normed * scaled_factor # 2. Shape match - wh_ratio = batch_targets_scaled[..., - 4:6] / priors_base_sizes_i[:, None] - match_inds = torch.max( - wh_ratio, 1 / wh_ratio).max(2)[0] < self.prior_match_thr + wh_ratio = batch_targets_scaled[..., 4:6] / priors_base_sizes_i[:, None] + match_inds = ( + torch.max(wh_ratio, 1 / wh_ratio).max(2)[0] < self.prior_match_thr + ) batch_targets_scaled = batch_targets_scaled[match_inds] # no gt bbox matches anchor if batch_targets_scaled.shape[0] == 0: loss_box += bbox_preds[i].sum() * 0 loss_cls += cls_scores[i].sum() * 0 - loss_obj += self.loss_obj( - objectnesses[i], - target_obj, - weight=not_ignore_flags, - avg_factor=max(not_ignore_flags.sum(), - 1)) * self.obj_level_weights[i] + loss_obj += ( + self.loss_obj( + objectnesses[i], + target_obj, + weight=not_ignore_flags, + avg_factor=max(not_ignore_flags.sum(), 1), + ) + * self.obj_level_weights[i] + ) continue # 3. Positive samples with additional neighbors @@ -812,74 +858,81 @@ def _loss_by_feat_with_ignore( # them as positive samples as well. batch_targets_cxcy = batch_targets_scaled[:, 2:4] grid_xy = scaled_factor[[2, 3]] - batch_targets_cxcy - left, up = ((batch_targets_cxcy % 1 < self.near_neighbor_thr) & - (batch_targets_cxcy > 1)).T - right, bottom = ((grid_xy % 1 < self.near_neighbor_thr) & - (grid_xy > 1)).T - offset_inds = torch.stack( - (torch.ones_like(left), left, up, right, bottom)) - - batch_targets_scaled = batch_targets_scaled.repeat( - (5, 1, 1))[offset_inds] - retained_offsets = self.grid_offset.repeat(1, offset_inds.shape[1], - 1)[offset_inds] + left, up = ( + (batch_targets_cxcy % 1 < self.near_neighbor_thr) + & (batch_targets_cxcy > 1) + ).T + right, bottom = ((grid_xy % 1 < self.near_neighbor_thr) & (grid_xy > 1)).T + offset_inds = torch.stack((torch.ones_like(left), left, up, right, bottom)) + + batch_targets_scaled = batch_targets_scaled.repeat((5, 1, 1))[offset_inds] + retained_offsets = self.grid_offset.repeat(1, offset_inds.shape[1], 1)[ + offset_inds + ] # prepare pred results and positive sample indexes to # calculate class loss and bbox lo _chunk_targets = batch_targets_scaled.chunk(4, 1) img_class_inds, grid_xy, grid_wh, priors_inds = _chunk_targets - priors_inds, (img_inds, class_inds) = priors_inds.long().view( - -1), img_class_inds.long().T + priors_inds, (img_inds, class_inds) = ( + priors_inds.long().view(-1), + img_class_inds.long().T, + ) - grid_xy_long = (grid_xy - - retained_offsets * self.near_neighbor_thr).long() + grid_xy_long = (grid_xy - retained_offsets * self.near_neighbor_thr).long() grid_x_inds, grid_y_inds = grid_xy_long.T bboxes_targets = torch.cat((grid_xy - grid_xy_long, grid_wh), 1) # 4. Calculate loss # bbox loss retained_bbox_pred = bbox_preds[i].reshape( - batch_size, self.num_base_priors, -1, h, - w)[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] + batch_size, self.num_base_priors, -1, h, w + )[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] priors_base_sizes_i = priors_base_sizes_i[priors_inds] decoded_bbox_pred = self._decode_bbox_to_xywh( - retained_bbox_pred, priors_base_sizes_i) + retained_bbox_pred, priors_base_sizes_i + ) - not_ignore_weights = not_ignore_flags[img_inds, priors_inds, - grid_y_inds, grid_x_inds] + not_ignore_weights = not_ignore_flags[ + img_inds, priors_inds, grid_y_inds, grid_x_inds + ] loss_box_i, iou = self.loss_bbox( decoded_bbox_pred, bboxes_targets, weight=not_ignore_weights, - avg_factor=max(not_ignore_weights.sum(), 1)) + avg_factor=max(not_ignore_weights.sum(), 1), + ) loss_box += loss_box_i # obj loss iou = iou.detach().clamp(0) - target_obj[img_inds, priors_inds, grid_y_inds, - grid_x_inds] = iou.type(target_obj.dtype) - loss_obj += self.loss_obj( - objectnesses[i], - target_obj, - weight=not_ignore_flags, - avg_factor=max(not_ignore_flags.sum(), - 1)) * self.obj_level_weights[i] + target_obj[img_inds, priors_inds, grid_y_inds, grid_x_inds] = iou.type( + target_obj.dtype + ) + loss_obj += ( + self.loss_obj( + objectnesses[i], + target_obj, + weight=not_ignore_flags, + avg_factor=max(not_ignore_flags.sum(), 1), + ) + * self.obj_level_weights[i] + ) # cls loss if self.num_classes > 1: pred_cls_scores = cls_scores[i].reshape( - batch_size, self.num_base_priors, -1, h, - w)[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] + batch_size, self.num_base_priors, -1, h, w + )[img_inds, priors_inds, :, grid_y_inds, grid_x_inds] - target_class = torch.full_like(pred_cls_scores, 0.) - target_class[range(batch_targets_scaled.shape[0]), - class_inds] = 1. + target_class = torch.full_like(pred_cls_scores, 0.0) + target_class[range(batch_targets_scaled.shape[0]), class_inds] = 1.0 loss_cls += self.loss_cls( pred_cls_scores, target_class, - weight=not_ignore_weights[:, None].repeat( - 1, self.num_classes), - avg_factor=max(not_ignore_weights.sum(), 1)) + weight=not_ignore_weights[:, None].repeat(1, self.num_classes), + avg_factor=max(not_ignore_weights.sum(), 1), + ) else: loss_cls += cls_scores[i].sum() * 0 @@ -887,4 +940,5 @@ def _loss_by_feat_with_ignore( return dict( loss_cls=loss_cls * batch_size * world_size, loss_obj=loss_obj * batch_size * world_size, - loss_bbox=loss_box * batch_size * world_size) + loss_bbox=loss_box * batch_size * world_size, + ) diff --git a/mmyolo/mmyolo/models/dense_heads/yolov6_head.py b/mmyolo/mmyolo/models/dense_heads/yolov6_head.py index 60d39620..25794a1d 100644 --- a/mmyolo/mmyolo/models/dense_heads/yolov6_head.py +++ b/mmyolo/mmyolo/models/dense_heads/yolov6_head.py @@ -5,8 +5,7 @@ import torch.nn as nn from mmcv.cnn import ConvModule from mmdet.models.utils import multi_apply -from mmdet.utils import (ConfigType, OptConfigType, OptInstanceList, - OptMultiConfig) +from mmdet.utils import ConfigType, OptConfigType, OptInstanceList, OptMultiConfig from mmengine import MessageHub from mmengine.dist import get_dist_info from mmengine.model import BaseModule, bias_init_with_prob @@ -14,6 +13,7 @@ from torch import Tensor from mmyolo.registry import MODELS, TASK_UTILS + from .yolov5_head import YOLOv5Head @@ -44,16 +44,17 @@ class YOLOv6HeadModule(BaseModule): Defaults to None. """ - def __init__(self, - num_classes: int, - in_channels: Union[int, Sequence], - widen_factor: float = 1.0, - num_base_priors: int = 1, - featmap_strides: Sequence[int] = (8, 16, 32), - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + num_classes: int, + in_channels: Union[int, Sequence], + widen_factor: float = 1.0, + num_base_priors: int = 1, + featmap_strides: Sequence[int] = (8, 16, 32), + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.num_classes = num_classes @@ -64,8 +65,7 @@ def __init__(self, self.act_cfg = act_cfg if isinstance(in_channels, int): - self.in_channels = [int(in_channels * widen_factor) - ] * self.num_levels + self.in_channels = [int(in_channels * widen_factor)] * self.num_levels else: self.in_channels = [int(i * widen_factor) for i in in_channels] @@ -88,7 +88,9 @@ def _init_layers(self): stride=1, padding=1 // 2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.cls_convs.append( ConvModule( in_channels=self.in_channels[i], @@ -97,7 +99,9 @@ def _init_layers(self): stride=1, padding=3 // 2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.reg_convs.append( ConvModule( in_channels=self.in_channels[i], @@ -106,28 +110,34 @@ def _init_layers(self): stride=1, padding=3 // 2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ) + ) self.cls_preds.append( nn.Conv2d( in_channels=self.in_channels[i], out_channels=self.num_base_priors * self.num_classes, - kernel_size=1)) + kernel_size=1, + ) + ) self.reg_preds.append( nn.Conv2d( in_channels=self.in_channels[i], out_channels=self.num_base_priors * 4, - kernel_size=1)) + kernel_size=1, + ) + ) def init_weights(self): super().init_weights() bias_init = bias_init_with_prob(0.01) for conv in self.cls_preds: conv.bias.data.fill_(bias_init) - conv.weight.data.fill_(0.) + conv.weight.data.fill_(0.0) for conv in self.reg_preds: conv.bias.data.fill_(1.0) - conv.weight.data.fill_(0.) + conv.weight.data.fill_(0.0) def forward(self, x: Tuple[Tensor]) -> Tuple[List]: """Forward features from the upstream network. @@ -140,12 +150,25 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: predictions. """ assert len(x) == self.num_levels - return multi_apply(self.forward_single, x, self.stems, self.cls_convs, - self.cls_preds, self.reg_convs, self.reg_preds) - - def forward_single(self, x: Tensor, stem: nn.Module, cls_conv: nn.Module, - cls_pred: nn.Module, reg_conv: nn.Module, - reg_pred: nn.Module) -> Tuple[Tensor, Tensor]: + return multi_apply( + self.forward_single, + x, + self.stems, + self.cls_convs, + self.cls_preds, + self.reg_convs, + self.reg_preds, + ) + + def forward_single( + self, + x: Tensor, + stem: nn.Module, + cls_conv: nn.Module, + cls_pred: nn.Module, + reg_conv: nn.Module, + reg_pred: nn.Module, + ) -> Tuple[Tensor, Tensor]: """Forward feature of a single scale level.""" y = stem(x) cls_x = y @@ -178,31 +201,34 @@ class YOLOv6Head(YOLOv5Head): Defaults to None. """ - def __init__(self, - head_module: ConfigType, - prior_generator: ConfigType = dict( - type='mmdet.MlvlPointGenerator', - offset=0.5, - strides=[8, 16, 32]), - bbox_coder: ConfigType = dict(type='DistancePointBBoxCoder'), - loss_cls: ConfigType = dict( - type='mmdet.VarifocalLoss', - use_sigmoid=True, - alpha=0.75, - gamma=2.0, - iou_weighted=True, - reduction='sum', - loss_weight=1.0), - loss_bbox: ConfigType = dict( - type='IoULoss', - iou_mode='giou', - bbox_format='xyxy', - reduction='mean', - loss_weight=2.5, - return_iou=False), - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - init_cfg: OptMultiConfig = None): + def __init__( + self, + head_module: ConfigType, + prior_generator: ConfigType = dict( + type="mmdet.MlvlPointGenerator", offset=0.5, strides=[8, 16, 32] + ), + bbox_coder: ConfigType = dict(type="DistancePointBBoxCoder"), + loss_cls: ConfigType = dict( + type="mmdet.VarifocalLoss", + use_sigmoid=True, + alpha=0.75, + gamma=2.0, + iou_weighted=True, + reduction="sum", + loss_weight=1.0, + ), + loss_bbox: ConfigType = dict( + type="IoULoss", + iou_mode="giou", + bbox_format="xyxy", + reduction="mean", + loss_weight=2.5, + return_iou=False, + ), + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + init_cfg: OptMultiConfig = None, + ): super().__init__( head_module=head_module, prior_generator=prior_generator, @@ -211,7 +237,8 @@ def __init__(self, loss_bbox=loss_bbox, train_cfg=train_cfg, test_cfg=test_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) # yolov6 doesn't need loss_obj self.loss_obj = None @@ -222,9 +249,8 @@ def special_init(self): The special_init function is designed to deal with this situation. """ if self.train_cfg: - self.initial_epoch = self.train_cfg['initial_epoch'] - self.initial_assigner = TASK_UTILS.build( - self.train_cfg.initial_assigner) + self.initial_epoch = self.train_cfg["initial_epoch"] + self.initial_assigner = TASK_UTILS.build(self.train_cfg.initial_assigner) self.assigner = TASK_UTILS.build(self.train_cfg.assigner) # Add common attributes to reduce calculation @@ -234,12 +260,13 @@ def special_init(self): self.stride_tensor = None def loss_by_feat( - self, - cls_scores: Sequence[Tensor], - bbox_preds: Sequence[Tensor], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: Sequence[Tensor], + bbox_preds: Sequence[Tensor], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -265,15 +292,13 @@ def loss_by_feat( # get epoch information from message hub message_hub = MessageHub.get_current_instance() - current_epoch = message_hub.get_info('epoch') + current_epoch = message_hub.get_info("epoch") num_imgs = len(batch_img_metas) if batch_gt_instances_ignore is None: batch_gt_instances_ignore = [None] * num_imgs - current_featmap_sizes = [ - cls_score.shape[2:] for cls_score in cls_scores - ] + current_featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] # If the shape does not equal, generate new one if current_featmap_sizes != self.featmap_sizes_train: self.featmap_sizes_train = current_featmap_sizes @@ -282,11 +307,11 @@ def loss_by_feat( self.featmap_sizes_train, dtype=cls_scores[0].dtype, device=cls_scores[0].device, - with_stride=True) + with_stride=True, + ) self.num_level_priors = [len(n) for n in mlvl_priors_with_stride] - self.flatten_priors_train = torch.cat( - mlvl_priors_with_stride, dim=0) + self.flatten_priors_train = torch.cat(mlvl_priors_with_stride, dim=0) self.stride_tensor = self.flatten_priors_train[..., [2]] # gt info @@ -297,8 +322,7 @@ def loss_by_feat( # pred info flatten_cls_preds = [ - cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.num_classes) + cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_classes) for cls_pred in cls_scores ] @@ -310,24 +334,34 @@ def loss_by_feat( flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1) flatten_pred_bboxes = torch.cat(flatten_pred_bboxes, dim=1) flatten_pred_bboxes = self.bbox_coder.decode( - self.flatten_priors_train[..., :2], flatten_pred_bboxes, - self.stride_tensor[:, 0]) + self.flatten_priors_train[..., :2], + flatten_pred_bboxes, + self.stride_tensor[:, 0], + ) pred_scores = torch.sigmoid(flatten_cls_preds) if current_epoch < self.initial_epoch: assigned_result = self.initial_assigner( - flatten_pred_bboxes.detach(), self.flatten_priors_train, - self.num_level_priors, gt_labels, gt_bboxes, pad_bbox_flag) + flatten_pred_bboxes.detach(), + self.flatten_priors_train, + self.num_level_priors, + gt_labels, + gt_bboxes, + pad_bbox_flag, + ) else: - assigned_result = self.assigner(flatten_pred_bboxes.detach(), - pred_scores.detach(), - self.flatten_priors_train, - gt_labels, gt_bboxes, - pad_bbox_flag) - - assigned_bboxes = assigned_result['assigned_bboxes'] - assigned_scores = assigned_result['assigned_scores'] - fg_mask_pre_prior = assigned_result['fg_mask_pre_prior'] + assigned_result = self.assigner( + flatten_pred_bboxes.detach(), + pred_scores.detach(), + self.flatten_priors_train, + gt_labels, + gt_bboxes, + pad_bbox_flag, + ) + + assigned_bboxes = assigned_result["assigned_bboxes"] + assigned_scores = assigned_result["assigned_scores"] + fg_mask_pre_prior = assigned_result["fg_mask_pre_prior"] # cls loss with torch.cuda.amp.autocast(enabled=False): @@ -350,26 +384,30 @@ def loss_by_feat( # iou loss prior_bbox_mask = fg_mask_pre_prior.unsqueeze(-1).repeat([1, 1, 4]) pred_bboxes_pos = torch.masked_select( - flatten_pred_bboxes, prior_bbox_mask).reshape([-1, 4]) + flatten_pred_bboxes, prior_bbox_mask + ).reshape([-1, 4]) assigned_bboxes_pos = torch.masked_select( - assigned_bboxes, prior_bbox_mask).reshape([-1, 4]) + assigned_bboxes, prior_bbox_mask + ).reshape([-1, 4]) bbox_weight = torch.masked_select( - assigned_scores.sum(-1), fg_mask_pre_prior).unsqueeze(-1) + assigned_scores.sum(-1), fg_mask_pre_prior + ).unsqueeze(-1) loss_bbox = self.loss_bbox( pred_bboxes_pos, assigned_bboxes_pos, weight=bbox_weight, - avg_factor=assigned_scores_sum) + avg_factor=assigned_scores_sum, + ) else: loss_bbox = flatten_pred_bboxes.sum() * 0 _, world_size = get_dist_info() - return dict( - loss_cls=loss_cls * world_size, loss_bbox=loss_bbox * world_size) + return dict(loss_cls=loss_cls * world_size, loss_bbox=loss_bbox * world_size) @staticmethod - def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], - batch_size: int) -> Tensor: + def gt_instances_preprocess( + batch_gt_instances: Union[Tensor, Sequence], batch_size: int + ) -> Tensor: """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6] to. @@ -386,24 +424,24 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], """ if isinstance(batch_gt_instances, Sequence): max_gt_bbox_len = max( - [len(gt_instances) for gt_instances in batch_gt_instances]) + [len(gt_instances) for gt_instances in batch_gt_instances] + ) # fill [-1., 0., 0., 0., 0.] if some shape of # single batch not equal max_gt_bbox_len batch_instance_list = [] for index, gt_instance in enumerate(batch_gt_instances): bboxes = gt_instance.bboxes labels = gt_instance.labels - batch_instance_list.append( - torch.cat((labels[:, None], bboxes), dim=-1)) + batch_instance_list.append(torch.cat((labels[:, None], bboxes), dim=-1)) if bboxes.shape[0] >= max_gt_bbox_len: continue - fill_tensor = bboxes.new_full( - [max_gt_bbox_len - bboxes.shape[0], 5], 0) - fill_tensor[:, 0] = -1. + fill_tensor = bboxes.new_full([max_gt_bbox_len - bboxes.shape[0], 5], 0) + fill_tensor[:, 0] = -1.0 batch_instance_list[index] = torch.cat( - (batch_instance_list[-1], fill_tensor), dim=0) + (batch_instance_list[-1], fill_tensor), dim=0 + ) return torch.stack(batch_instance_list) else: @@ -413,8 +451,9 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], batch_instance_list = [] max_gt_bbox_len = 0 for i in range(batch_size): - single_batch_instance = \ - batch_gt_instances[batch_gt_instances[:, 0] == i, :] + single_batch_instance = batch_gt_instances[ + batch_gt_instances[:, 0] == i, : + ] single_batch_instance = single_batch_instance[:, 1:] batch_instance_list.append(single_batch_instance) if len(single_batch_instance) > max_gt_bbox_len: @@ -426,9 +465,11 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], if gt_instance.shape[0] >= max_gt_bbox_len: continue fill_tensor = batch_gt_instances.new_full( - [max_gt_bbox_len - gt_instance.shape[0], 5], 0) - fill_tensor[:, 0] = -1. + [max_gt_bbox_len - gt_instance.shape[0], 5], 0 + ) + fill_tensor[:, 0] = -1.0 batch_instance_list[index] = torch.cat( - (batch_instance_list[index], fill_tensor), dim=0) + (batch_instance_list[index], fill_tensor), dim=0 + ) return torch.stack(batch_instance_list) diff --git a/mmyolo/mmyolo/models/dense_heads/yolov7_head.py b/mmyolo/mmyolo/models/dense_heads/yolov7_head.py index 80e6aadd..1786999c 100644 --- a/mmyolo/mmyolo/models/dense_heads/yolov7_head.py +++ b/mmyolo/mmyolo/models/dense_heads/yolov7_head.py @@ -12,6 +12,7 @@ from torch import Tensor from mmyolo.registry import MODELS + from ..layers import ImplicitA, ImplicitM from ..task_modules.assigners.batch_yolov7_assigner import BatchYOLOv7Assigner from .yolov5_head import YOLOv5Head, YOLOv5HeadModule @@ -27,8 +28,9 @@ def _init_layers(self): for i in range(self.num_levels): conv_pred = nn.Sequential( ImplicitA(self.in_channels[i]), - nn.Conv2d(self.in_channels[i], - self.num_base_priors * self.num_out_attrib, 1), + nn.Conv2d( + self.in_channels[i], self.num_base_priors * self.num_out_attrib, 1 + ), ImplicitM(self.num_base_priors * self.num_out_attrib), ) self.convs_pred.append(conv_pred) @@ -41,7 +43,7 @@ def init_weights(self): b = mi.bias.data.view(3, -1) # obj (8 objects per 640 image) - b.data[:, 4] += math.log(8 / (640 / s)**2) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) b.data[:, 5:] += math.log(0.6 / (self.num_classes - 0.99)) mi.bias.data = b.view(-1) @@ -51,15 +53,16 @@ def init_weights(self): class YOLOv7p6HeadModule(YOLOv5HeadModule): """YOLOv7Head head module used in YOLOv7.""" - def __init__(self, - *args, - main_out_channels: Sequence[int] = [256, 512, 768, 1024], - aux_out_channels: Sequence[int] = [320, 640, 960, 1280], - use_aux: bool = True, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - **kwargs): + def __init__( + self, + *args, + main_out_channels: Sequence[int] = [256, 512, 768, 1024], + aux_out_channels: Sequence[int] = [320, 640, 960, 1280], + use_aux: bool = True, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + **kwargs, + ): self.main_out_channels = main_out_channels self.aux_out_channels = aux_out_channels self.use_aux = use_aux @@ -78,10 +81,14 @@ def _init_layers(self): 3, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ImplicitA(self.main_out_channels[i]), - nn.Conv2d(self.main_out_channels[i], - self.num_base_priors * self.num_out_attrib, 1), + nn.Conv2d( + self.main_out_channels[i], + self.num_base_priors * self.num_out_attrib, + 1, + ), ImplicitM(self.num_base_priors * self.num_out_attrib), ) self.main_convs_pred.append(conv_pred) @@ -96,9 +103,14 @@ def _init_layers(self): 3, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), - nn.Conv2d(self.aux_out_channels[i], - self.num_base_priors * self.num_out_attrib, 1)) + act_cfg=self.act_cfg, + ), + nn.Conv2d( + self.aux_out_channels[i], + self.num_base_priors * self.num_out_attrib, + 1, + ), + ) self.aux_convs_pred.append(aux_pred) else: self.aux_convs_pred = [None] * len(self.main_convs_pred) @@ -106,12 +118,13 @@ def _init_layers(self): def init_weights(self): """Initialize the bias of YOLOv5 head.""" super(YOLOv5HeadModule, self).init_weights() - for mi, aux, s in zip(self.main_convs_pred, self.aux_convs_pred, - self.featmap_strides): # from + for mi, aux, s in zip( + self.main_convs_pred, self.aux_convs_pred, self.featmap_strides + ): # from mi = mi[2] # nn.Conv2d b = mi.bias.data.view(3, -1) # obj (8 objects per 640 image) - b.data[:, 4] += math.log(8 / (640 / s)**2) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) b.data[:, 5:] += math.log(0.6 / (self.num_classes - 0.99)) mi.bias.data = b.view(-1) @@ -119,7 +132,7 @@ def init_weights(self): aux = aux[1] # nn.Conv2d b = aux.bias.data.view(3, -1) # obj (8 objects per 640 image) - b.data[:, 4] += math.log(8 / (640 / s)**2) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) b.data[:, 5:] += math.log(0.6 / (self.num_classes - 0.99)) mi.bias.data = b.view(-1) @@ -134,19 +147,18 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: predictions, and objectnesses. """ assert len(x) == self.num_levels - return multi_apply(self.forward_single, x, self.main_convs_pred, - self.aux_convs_pred) + return multi_apply( + self.forward_single, x, self.main_convs_pred, self.aux_convs_pred + ) - def forward_single(self, x: Tensor, convs: nn.Module, - aux_convs: Optional[nn.Module]) \ - -> Tuple[Union[Tensor, List], Union[Tensor, List], - Union[Tensor, List]]: + def forward_single( + self, x: Tensor, convs: nn.Module, aux_convs: Optional[nn.Module] + ) -> Tuple[Union[Tensor, List], Union[Tensor, List], Union[Tensor, List]]: """Forward feature of a single scale level.""" pred_map = convs(x) bs, _, ny, nx = pred_map.shape - pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, - ny, nx) + pred_map = pred_map.view(bs, self.num_base_priors, self.num_out_attrib, ny, nx) cls_score = pred_map[:, :, 5:, ...].reshape(bs, -1, ny, nx) bbox_pred = pred_map[:, :, :4, ...].reshape(bs, -1, ny, nx) @@ -156,16 +168,18 @@ def forward_single(self, x: Tensor, convs: nn.Module, return cls_score, bbox_pred, objectness else: aux_pred_map = aux_convs(x) - aux_pred_map = aux_pred_map.view(bs, self.num_base_priors, - self.num_out_attrib, ny, nx) + aux_pred_map = aux_pred_map.view( + bs, self.num_base_priors, self.num_out_attrib, ny, nx + ) aux_cls_score = aux_pred_map[:, :, 5:, ...].reshape(bs, -1, ny, nx) aux_bbox_pred = aux_pred_map[:, :, :4, ...].reshape(bs, -1, ny, nx) - aux_objectness = aux_pred_map[:, :, 4:5, - ...].reshape(bs, -1, ny, nx) + aux_objectness = aux_pred_map[:, :, 4:5, ...].reshape(bs, -1, ny, nx) - return [cls_score, - aux_cls_score], [bbox_pred, aux_bbox_pred - ], [objectness, aux_objectness] + return ( + [cls_score, aux_cls_score], + [bbox_pred, aux_bbox_pred], + [objectness, aux_objectness], + ) @MODELS.register_module() @@ -182,13 +196,15 @@ class YOLOv7Head(YOLOv5Head): cost in BatchYOLOv7Assigner. Defaults to 1.0. """ - def __init__(self, - *args, - simota_candidate_topk: int = 20, - simota_iou_weight: float = 3.0, - simota_cls_weight: float = 1.0, - aux_loss_weights: float = 0.25, - **kwargs): + def __init__( + self, + *args, + simota_candidate_topk: int = 20, + simota_iou_weight: float = 3.0, + simota_cls_weight: float = 1.0, + aux_loss_weights: float = 0.25, + **kwargs, + ): super().__init__(*args, **kwargs) self.aux_loss_weights = aux_loss_weights self.assigner = BatchYOLOv7Assigner( @@ -198,16 +214,18 @@ def __init__(self, prior_match_thr=self.prior_match_thr, candidate_topk=simota_candidate_topk, iou_weight=simota_iou_weight, - cls_weight=simota_cls_weight) + cls_weight=simota_cls_weight, + ) def loss_by_feat( - self, - cls_scores: Sequence[Union[Tensor, List]], - bbox_preds: Sequence[Union[Tensor, List]], - objectnesses: Sequence[Union[Tensor, List]], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: Sequence[Union[Tensor, List]], + bbox_preds: Sequence[Union[Tensor, List]], + objectnesses: Sequence[Union[Tensor, List]], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -243,25 +261,28 @@ def loss_by_feat( objectnesses_main, objectnesses_aux = zip(*objectnesses) cls_scores_main, cls_scores_aux = zip(*cls_scores) - head_preds = self._merge_predict_results(bbox_preds_main, - objectnesses_main, - cls_scores_main) + head_preds = self._merge_predict_results( + bbox_preds_main, objectnesses_main, cls_scores_main + ) head_preds_aux = self._merge_predict_results( - bbox_preds_aux, objectnesses_aux, cls_scores_aux) + bbox_preds_aux, objectnesses_aux, cls_scores_aux + ) else: with_aux = False batch_size = cls_scores[0].shape[0] device = cls_scores[0].device - head_preds = self._merge_predict_results(bbox_preds, objectnesses, - cls_scores) + head_preds = self._merge_predict_results( + bbox_preds, objectnesses, cls_scores + ) # Convert gt to norm xywh format # (num_base_priors, num_batch_gt, 7) # 7 is mean (batch_idx, cls_id, x_norm, y_norm, # w_norm, h_norm, prior_idx) batch_targets_normed = self._convert_gt_to_norm_format( - batch_gt_instances, batch_img_metas) + batch_gt_instances, batch_img_metas + ) scaled_factors = [ torch.tensor(head_pred.shape, device=device)[[3, 2, 3, 2]] @@ -275,7 +296,8 @@ def loss_by_feat( near_neighbor_thr=self.near_neighbor_thr, scaled_factors=scaled_factors, batch_img_metas=batch_img_metas, - device=device) + device=device, + ) if with_aux: loss_cls_aux, loss_obj_aux, loss_box_aux = self._calc_loss( @@ -285,7 +307,8 @@ def loss_by_feat( near_neighbor_thr=self.near_neighbor_thr * 2, scaled_factors=scaled_factors, batch_img_metas=batch_img_metas, - device=device) + device=device, + ) loss_cls += self.aux_loss_weights * loss_cls_aux loss_obj += self.aux_loss_weights * loss_obj_aux loss_box += self.aux_loss_weights * loss_box_aux @@ -294,10 +317,19 @@ def loss_by_feat( return dict( loss_cls=loss_cls * batch_size * world_size, loss_obj=loss_obj * batch_size * world_size, - loss_bbox=loss_box * batch_size * world_size) - - def _calc_loss(self, head_preds, head_preds_aux, batch_targets_normed, - near_neighbor_thr, scaled_factors, batch_img_metas, device): + loss_bbox=loss_box * batch_size * world_size, + ) + + def _calc_loss( + self, + head_preds, + head_preds_aux, + batch_targets_normed, + near_neighbor_thr, + scaled_factors, + batch_img_metas, + device, + ): loss_cls = torch.zeros(1, device=device) loss_box = torch.zeros(1, device=device) loss_obj = torch.zeros(1, device=device) @@ -305,14 +337,15 @@ def _calc_loss(self, head_preds, head_preds_aux, batch_targets_normed, assigner_results = self.assigner( head_preds, batch_targets_normed, - batch_img_metas[0]['batch_input_shape'], + batch_img_metas[0]["batch_input_shape"], self.priors_base_sizes, self.grid_offset, - near_neighbor_thr=near_neighbor_thr) + near_neighbor_thr=near_neighbor_thr, + ) # mlvl is mean multi_level - mlvl_positive_infos = assigner_results['mlvl_positive_infos'] - mlvl_priors = assigner_results['mlvl_priors'] - mlvl_targets_normed = assigner_results['mlvl_targets_normed'] + mlvl_positive_infos = assigner_results["mlvl_positive_infos"] + mlvl_priors = assigner_results["mlvl_priors"] + mlvl_targets_normed = assigner_results["mlvl_targets_normed"] if head_preds_aux is not None: # This is mean calc aux branch loss @@ -326,47 +359,53 @@ def _calc_loss(self, head_preds, head_preds_aux, batch_targets_normed, if num_pred_positive == 0: loss_box += head_pred[..., :4].sum() * 0 loss_cls += head_pred[..., 5:].sum() * 0 - loss_obj += self.loss_obj( - head_pred[..., 4], target_obj) * self.obj_level_weights[i] + loss_obj += ( + self.loss_obj(head_pred[..., 4], target_obj) + * self.obj_level_weights[i] + ) continue priors = mlvl_priors[i] targets_normed = mlvl_targets_normed[i] - head_pred_positive = head_pred[batch_inds, proir_idx, grid_y, - grid_x] + head_pred_positive = head_pred[batch_inds, proir_idx, grid_y, grid_x] # calc bbox loss grid_xy = torch.stack([grid_x, grid_y], dim=1) decoded_pred_bbox = self._decode_bbox_to_xywh( - head_pred_positive[:, :4], priors, grid_xy) + head_pred_positive[:, :4], priors, grid_xy + ) target_bbox_scaled = targets_normed[:, 2:6] * scaled_factors[i] - loss_box_i, iou = self.loss_bbox(decoded_pred_bbox, - target_bbox_scaled) + loss_box_i, iou = self.loss_bbox(decoded_pred_bbox, target_bbox_scaled) loss_box += loss_box_i # calc obj loss - target_obj[batch_inds, proir_idx, grid_y, - grid_x] = iou.detach().clamp(0).type(target_obj.dtype) - loss_obj += self.loss_obj(head_pred[..., 4], - target_obj) * self.obj_level_weights[i] + target_obj[batch_inds, proir_idx, grid_y, grid_x] = ( + iou.detach().clamp(0).type(target_obj.dtype) + ) + loss_obj += ( + self.loss_obj(head_pred[..., 4], target_obj) * self.obj_level_weights[i] + ) # calc cls loss if self.num_classes > 1: pred_cls_scores = targets_normed[:, 1].long() target_class = torch.full_like( - head_pred_positive[:, 5:], 0., device=device) - target_class[range(num_pred_positive), pred_cls_scores] = 1. - loss_cls += self.loss_cls(head_pred_positive[:, 5:], - target_class) + head_pred_positive[:, 5:], 0.0, device=device + ) + target_class[range(num_pred_positive), pred_cls_scores] = 1.0 + loss_cls += self.loss_cls(head_pred_positive[:, 5:], target_class) else: loss_cls += head_pred_positive[:, 5:].sum() * 0 return loss_cls, loss_obj, loss_box - def _merge_predict_results(self, bbox_preds: Sequence[Tensor], - objectnesses: Sequence[Tensor], - cls_scores: Sequence[Tensor]) -> List[Tensor]: + def _merge_predict_results( + self, + bbox_preds: Sequence[Tensor], + objectnesses: Sequence[Tensor], + cls_scores: Sequence[Tensor], + ) -> List[Tensor]: """Merge predict output from 3 heads. Args: @@ -384,21 +423,24 @@ def _merge_predict_results(self, bbox_preds: Sequence[Tensor], List[Tensor]: Merged output. """ head_preds = [] - for bbox_pred, objectness, cls_score in zip(bbox_preds, objectnesses, - cls_scores): + for bbox_pred, objectness, cls_score in zip( + bbox_preds, objectnesses, cls_scores + ): b, _, h, w = bbox_pred.shape bbox_pred = bbox_pred.reshape(b, self.num_base_priors, -1, h, w) objectness = objectness.reshape(b, self.num_base_priors, -1, h, w) cls_score = cls_score.reshape(b, self.num_base_priors, -1, h, w) - head_pred = torch.cat([bbox_pred, objectness, cls_score], - dim=2).permute(0, 1, 3, 4, 2).contiguous() + head_pred = ( + torch.cat([bbox_pred, objectness, cls_score], dim=2) + .permute(0, 1, 3, 4, 2) + .contiguous() + ) head_preds.append(head_pred) return head_preds - def _decode_bbox_to_xywh(self, bbox_pred, priors_base_sizes, - grid_xy) -> Tensor: + def _decode_bbox_to_xywh(self, bbox_pred, priors_base_sizes, grid_xy) -> Tensor: bbox_pred = bbox_pred.sigmoid() pred_xy = bbox_pred[:, :2] * 2 - 0.5 + grid_xy - pred_wh = (bbox_pred[:, 2:] * 2)**2 * priors_base_sizes + pred_wh = (bbox_pred[:, 2:] * 2) ** 2 * priors_base_sizes decoded_bbox_pred = torch.cat((pred_xy, pred_wh), dim=-1) return decoded_bbox_pred diff --git a/mmyolo/mmyolo/models/dense_heads/yolov8_head.py b/mmyolo/mmyolo/models/dense_heads/yolov8_head.py index 7e6bf52e..12914b81 100644 --- a/mmyolo/mmyolo/models/dense_heads/yolov8_head.py +++ b/mmyolo/mmyolo/models/dense_heads/yolov8_head.py @@ -6,14 +6,14 @@ import torch.nn as nn from mmcv.cnn import ConvModule from mmdet.models.utils import multi_apply -from mmdet.utils import (ConfigType, OptConfigType, OptInstanceList, - OptMultiConfig) +from mmdet.utils import ConfigType, OptConfigType, OptInstanceList, OptMultiConfig from mmengine.dist import get_dist_info from mmengine.model import BaseModule from mmengine.structures import InstanceData from torch import Tensor from mmyolo.registry import MODELS, TASK_UTILS + from ..utils import make_divisible from .yolov5_head import YOLOv5Head @@ -44,17 +44,18 @@ class YOLOv8HeadModule(BaseModule): Defaults to None. """ - def __init__(self, - num_classes: int, - in_channels: Union[int, Sequence], - widen_factor: float = 1.0, - num_base_priors: int = 1, - featmap_strides: Sequence[int] = (8, 16, 32), - reg_max: int = 16, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + num_classes: int, + in_channels: Union[int, Sequence], + widen_factor: float = 1.0, + num_base_priors: int = 1, + featmap_strides: Sequence[int] = (8, 16, 32), + reg_max: int = 16, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.num_classes = num_classes self.featmap_strides = featmap_strides @@ -76,12 +77,14 @@ def __init__(self, def init_weights(self, prior_prob=0.01): """Initialize the weight and bias of PPYOLOE head.""" super().init_weights() - for reg_pred, cls_pred, stride in zip(self.reg_preds, self.cls_preds, - self.featmap_strides): + for reg_pred, cls_pred, stride in zip( + self.reg_preds, self.cls_preds, self.featmap_strides + ): reg_pred[-1].bias.data[:] = 1.0 # box # cls (.01 objects, 80 classes, 640 img) - cls_pred[-1].bias.data[:self.num_classes] = math.log( - 5 / self.num_classes / (640 / stride)**2) + cls_pred[-1].bias.data[: self.num_classes] = math.log( + 5 / self.num_classes / (640 / stride) ** 2 + ) def _init_layers(self): """initialize conv layers in YOLOv8 head.""" @@ -89,8 +92,7 @@ def _init_layers(self): self.cls_preds = nn.ModuleList() self.reg_preds = nn.ModuleList() - reg_out_channels = max( - (16, self.in_channels[0] // 4, self.reg_max * 4)) + reg_out_channels = max((16, self.in_channels[0] // 4, self.reg_max * 4)) cls_out_channels = max(self.in_channels[0], self.num_classes) for i in range(self.num_levels): @@ -103,7 +105,8 @@ def _init_layers(self): stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( in_channels=reg_out_channels, out_channels=reg_out_channels, @@ -111,11 +114,15 @@ def _init_layers(self): stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), nn.Conv2d( in_channels=reg_out_channels, out_channels=4 * self.reg_max, - kernel_size=1))) + kernel_size=1, + ), + ) + ) self.cls_preds.append( nn.Sequential( ConvModule( @@ -125,7 +132,8 @@ def _init_layers(self): stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( in_channels=cls_out_channels, out_channels=cls_out_channels, @@ -133,14 +141,18 @@ def _init_layers(self): stride=1, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), nn.Conv2d( in_channels=cls_out_channels, out_channels=self.num_classes, - kernel_size=1))) + kernel_size=1, + ), + ) + ) proj = torch.arange(self.reg_max, dtype=torch.float) - self.register_buffer('proj', proj, persistent=False) + self.register_buffer("proj", proj, persistent=False) def forward(self, x: Tuple[Tensor]) -> Tuple[List]: """Forward features from the upstream network. @@ -153,18 +165,19 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: predictions """ assert len(x) == self.num_levels - return multi_apply(self.forward_single, x, self.cls_preds, - self.reg_preds) + return multi_apply(self.forward_single, x, self.cls_preds, self.reg_preds) - def forward_single(self, x: torch.Tensor, cls_pred: nn.ModuleList, - reg_pred: nn.ModuleList) -> Tuple: + def forward_single( + self, x: torch.Tensor, cls_pred: nn.ModuleList, reg_pred: nn.ModuleList + ) -> Tuple: """Forward feature of a single scale level.""" b, _, h, w = x.shape cls_logit = cls_pred(x) bbox_dist_preds = reg_pred(x) if self.reg_max > 1: bbox_dist_preds = bbox_dist_preds.reshape( - [-1, 4, self.reg_max, h * w]).permute(0, 3, 1, 2) + [-1, 4, self.reg_max, h * w] + ).permute(0, 3, 1, 2) bbox_preds = bbox_dist_preds.softmax(3).matmul(self.proj) bbox_preds = bbox_preds.transpose(1, 2).reshape(b, -1, h, w) else: @@ -197,32 +210,34 @@ class YOLOv8Head(YOLOv5Head): Defaults to None. """ - def __init__(self, - head_module: ConfigType, - prior_generator: ConfigType = dict( - type='mmdet.MlvlPointGenerator', - offset=0.5, - strides=[8, 16, 32]), - bbox_coder: ConfigType = dict(type='DistancePointBBoxCoder'), - loss_cls: ConfigType = dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='none', - loss_weight=0.5), - loss_bbox: ConfigType = dict( - type='IoULoss', - iou_mode='ciou', - bbox_format='xyxy', - reduction='sum', - loss_weight=7.5, - return_iou=False), - loss_dfl=dict( - type='mmdet.DistributionFocalLoss', - reduction='mean', - loss_weight=1.5 / 4), - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - init_cfg: OptMultiConfig = None): + def __init__( + self, + head_module: ConfigType, + prior_generator: ConfigType = dict( + type="mmdet.MlvlPointGenerator", offset=0.5, strides=[8, 16, 32] + ), + bbox_coder: ConfigType = dict(type="DistancePointBBoxCoder"), + loss_cls: ConfigType = dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="none", + loss_weight=0.5, + ), + loss_bbox: ConfigType = dict( + type="IoULoss", + iou_mode="ciou", + bbox_format="xyxy", + reduction="sum", + loss_weight=7.5, + return_iou=False, + ), + loss_dfl=dict( + type="mmdet.DistributionFocalLoss", reduction="mean", loss_weight=1.5 / 4 + ), + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + init_cfg: OptMultiConfig = None, + ): super().__init__( head_module=head_module, prior_generator=prior_generator, @@ -231,7 +246,8 @@ def __init__(self, loss_bbox=loss_bbox, train_cfg=train_cfg, test_cfg=test_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) self.loss_dfl = MODELS.build(loss_dfl) # YOLOv8 doesn't need loss_obj self.loss_obj = None @@ -252,13 +268,14 @@ def special_init(self): self.stride_tensor = None def loss_by_feat( - self, - cls_scores: Sequence[Tensor], - bbox_preds: Sequence[Tensor], - bbox_dist_preds: Sequence[Tensor], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: Sequence[Tensor], + bbox_preds: Sequence[Tensor], + bbox_dist_preds: Sequence[Tensor], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -285,9 +302,7 @@ def loss_by_feat( """ num_imgs = len(batch_img_metas) - current_featmap_sizes = [ - cls_score.shape[2:] for cls_score in cls_scores - ] + current_featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] # If the shape does not equal, generate new one if current_featmap_sizes != self.featmap_sizes_train: self.featmap_sizes_train = current_featmap_sizes @@ -296,11 +311,11 @@ def loss_by_feat( self.featmap_sizes_train, dtype=cls_scores[0].dtype, device=cls_scores[0].device, - with_stride=True) + with_stride=True, + ) self.num_level_priors = [len(n) for n in mlvl_priors_with_stride] - self.flatten_priors_train = torch.cat( - mlvl_priors_with_stride, dim=0) + self.flatten_priors_train = torch.cat(mlvl_priors_with_stride, dim=0) self.stride_tensor = self.flatten_priors_train[..., [2]] # gt info @@ -311,8 +326,7 @@ def loss_by_feat( # pred info flatten_cls_preds = [ - cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.num_classes) + cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_classes) for cls_pred in cls_scores ] flatten_pred_bboxes = [ @@ -329,17 +343,23 @@ def loss_by_feat( flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1) flatten_pred_bboxes = torch.cat(flatten_pred_bboxes, dim=1) flatten_pred_bboxes = self.bbox_coder.decode( - self.flatten_priors_train[..., :2], flatten_pred_bboxes, - self.stride_tensor[..., 0]) + self.flatten_priors_train[..., :2], + flatten_pred_bboxes, + self.stride_tensor[..., 0], + ) assigned_result = self.assigner( (flatten_pred_bboxes.detach()).type(gt_bboxes.dtype), - flatten_cls_preds.detach().sigmoid(), self.flatten_priors_train, - gt_labels, gt_bboxes, pad_bbox_flag) + flatten_cls_preds.detach().sigmoid(), + self.flatten_priors_train, + gt_labels, + gt_bboxes, + pad_bbox_flag, + ) - assigned_bboxes = assigned_result['assigned_bboxes'] - assigned_scores = assigned_result['assigned_scores'] - fg_mask_pre_prior = assigned_result['fg_mask_pre_prior'] + assigned_bboxes = assigned_result["assigned_bboxes"] + assigned_scores = assigned_result["assigned_scores"] + fg_mask_pre_prior = assigned_result["fg_mask_pre_prior"] assigned_scores_sum = assigned_scores.sum().clamp(min=1) @@ -358,14 +378,18 @@ def loss_by_feat( # iou loss prior_bbox_mask = fg_mask_pre_prior.unsqueeze(-1).repeat([1, 1, 4]) pred_bboxes_pos = torch.masked_select( - flatten_pred_bboxes, prior_bbox_mask).reshape([-1, 4]) + flatten_pred_bboxes, prior_bbox_mask + ).reshape([-1, 4]) assigned_bboxes_pos = torch.masked_select( - assigned_bboxes, prior_bbox_mask).reshape([-1, 4]) + assigned_bboxes, prior_bbox_mask + ).reshape([-1, 4]) bbox_weight = torch.masked_select( - assigned_scores.sum(-1), fg_mask_pre_prior).unsqueeze(-1) - loss_bbox = self.loss_bbox( - pred_bboxes_pos, assigned_bboxes_pos, - weight=bbox_weight) / assigned_scores_sum + assigned_scores.sum(-1), fg_mask_pre_prior + ).unsqueeze(-1) + loss_bbox = ( + self.loss_bbox(pred_bboxes_pos, assigned_bboxes_pos, weight=bbox_weight) + / assigned_scores_sum + ) # dfl loss pred_dist_pos = flatten_dist_preds[fg_mask_pre_prior] @@ -373,14 +397,17 @@ def loss_by_feat( self.flatten_priors_train[..., :2] / self.stride_tensor, assigned_bboxes, max_dis=self.head_module.reg_max - 1, - eps=0.01) + eps=0.01, + ) assigned_ltrb_pos = torch.masked_select( - assigned_ltrb, prior_bbox_mask).reshape([-1, 4]) + assigned_ltrb, prior_bbox_mask + ).reshape([-1, 4]) loss_dfl = self.loss_dfl( pred_dist_pos.reshape(-1, self.head_module.reg_max), assigned_ltrb_pos.reshape(-1), weight=bbox_weight.expand(-1, 4).reshape(-1), - avg_factor=assigned_scores_sum) + avg_factor=assigned_scores_sum, + ) else: loss_bbox = flatten_pred_bboxes.sum() * 0 loss_dfl = flatten_pred_bboxes.sum() * 0 @@ -388,11 +415,13 @@ def loss_by_feat( return dict( loss_cls=loss_cls * num_imgs * world_size, loss_bbox=loss_bbox * num_imgs * world_size, - loss_dfl=loss_dfl * num_imgs * world_size) + loss_dfl=loss_dfl * num_imgs * world_size, + ) @staticmethod - def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], - batch_size: int) -> Tensor: + def gt_instances_preprocess( + batch_gt_instances: Union[Tensor, Sequence], batch_size: int + ) -> Tensor: """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6] to. @@ -409,24 +438,24 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence], """ if isinstance(batch_gt_instances, Sequence): max_gt_bbox_len = max( - [len(gt_instances) for gt_instances in batch_gt_instances]) + [len(gt_instances) for gt_instances in batch_gt_instances] + ) # fill [-1., 0., 0., 0., 0.] if some shape of # single batch not equal max_gt_bbox_len batch_instance_list = [] for index, gt_instance in enumerate(batch_gt_instances): bboxes = gt_instance.bboxes labels = gt_instance.labels - batch_instance_list.append( - torch.cat((labels[:, None], bboxes), dim=-1)) + batch_instance_list.append(torch.cat((labels[:, None], bboxes), dim=-1)) if bboxes.shape[0] >= max_gt_bbox_len: continue - fill_tensor = bboxes.new_full( - [max_gt_bbox_len - bboxes.shape[0], 5], 0) - fill_tensor[:, 0] = -1. + fill_tensor = bboxes.new_full([max_gt_bbox_len - bboxes.shape[0], 5], 0) + fill_tensor[:, 0] = -1.0 batch_instance_list[index] = torch.cat( - (batch_instance_list[-1], fill_tensor), dim=0) + (batch_instance_list[-1], fill_tensor), dim=0 + ) return torch.stack(batch_instance_list) else: diff --git a/mmyolo/mmyolo/models/dense_heads/yolox_head.py b/mmyolo/mmyolo/models/dense_heads/yolox_head.py index 9ab4c269..c34b4a17 100644 --- a/mmyolo/mmyolo/models/dense_heads/yolox_head.py +++ b/mmyolo/mmyolo/models/dense_heads/yolox_head.py @@ -8,13 +8,19 @@ from mmdet.models.task_modules.samplers import PseudoSampler from mmdet.models.utils import multi_apply from mmdet.structures.bbox import bbox_xyxy_to_cxcywh -from mmdet.utils import (ConfigType, OptConfigType, OptInstanceList, - OptMultiConfig, reduce_mean) +from mmdet.utils import ( + ConfigType, + OptConfigType, + OptInstanceList, + OptMultiConfig, + reduce_mean, +) from mmengine.model import BaseModule, bias_init_with_prob from mmengine.structures import InstanceData from torch import Tensor from mmyolo.registry import MODELS, TASK_UTILS + from .yolov5_head import YOLOv5Head @@ -66,10 +72,10 @@ def __init__( featmap_strides: Sequence[int] = [8, 16, 32], use_depthwise: bool = False, dcn_on_last_conv: bool = False, - conv_bias: Union[bool, str] = 'auto', + conv_bias: Union[bool, str] = "auto", conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict(type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), init_cfg: OptMultiConfig = None, ): super().__init__(init_cfg=init_cfg) @@ -78,7 +84,7 @@ def __init__( self.stacked_convs = stacked_convs self.use_depthwise = use_depthwise self.dcn_on_last_conv = dcn_on_last_conv - assert conv_bias == 'auto' or isinstance(conv_bias, bool) + assert conv_bias == "auto" or isinstance(conv_bias, bool) self.conv_bias = conv_bias self.num_base_priors = num_base_priors @@ -110,13 +116,12 @@ def _init_layers(self): def _build_stacked_convs(self) -> nn.Sequential: """Initialize conv layers of a single level head.""" - conv = DepthwiseSeparableConvModule \ - if self.use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if self.use_depthwise else ConvModule stacked_convs = [] for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels if self.dcn_on_last_conv and i == self.stacked_convs - 1: - conv_cfg = dict(type='DCNv2') + conv_cfg = dict(type="DCNv2") else: conv_cfg = self.conv_cfg stacked_convs.append( @@ -129,7 +134,9 @@ def _build_stacked_convs(self) -> nn.Sequential: conv_cfg=conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, - bias=self.conv_bias)) + bias=self.conv_bias, + ) + ) return nn.Sequential(*stacked_convs) def _build_predictor(self) -> Tuple[nn.Module, nn.Module, nn.Module]: @@ -144,8 +151,9 @@ def init_weights(self): # Use prior in model initialization to improve stability super().init_weights() bias_init = bias_init_with_prob(0.01) - for conv_cls, conv_obj in zip(self.multi_level_conv_cls, - self.multi_level_conv_obj): + for conv_cls, conv_obj in zip( + self.multi_level_conv_cls, self.multi_level_conv_obj + ): conv_cls.bias.data.fill_(bias_init) conv_obj.bias.data.fill_(bias_init) @@ -160,16 +168,25 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: predictions, and objectnesses. """ - return multi_apply(self.forward_single, x, self.multi_level_cls_convs, - self.multi_level_reg_convs, - self.multi_level_conv_cls, - self.multi_level_conv_reg, - self.multi_level_conv_obj) - - def forward_single(self, x: Tensor, cls_convs: nn.Module, - reg_convs: nn.Module, conv_cls: nn.Module, - conv_reg: nn.Module, - conv_obj: nn.Module) -> Tuple[Tensor, Tensor, Tensor]: + return multi_apply( + self.forward_single, + x, + self.multi_level_cls_convs, + self.multi_level_reg_convs, + self.multi_level_conv_cls, + self.multi_level_conv_reg, + self.multi_level_conv_obj, + ) + + def forward_single( + self, + x: Tensor, + cls_convs: nn.Module, + reg_convs: nn.Module, + conv_cls: nn.Module, + conv_reg: nn.Module, + conv_obj: nn.Module, + ) -> Tuple[Tensor, Tensor, Tensor]: """Forward feature of a single scale level.""" cls_feat = cls_convs(x) @@ -203,34 +220,39 @@ class YOLOXHead(YOLOv5Head): Defaults to None. """ - def __init__(self, - head_module: ConfigType, - prior_generator: ConfigType = dict( - type='mmdet.MlvlPointGenerator', - offset=0, - strides=[8, 16, 32]), - bbox_coder: ConfigType = dict(type='YOLOXBBoxCoder'), - loss_cls: ConfigType = dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=1.0), - loss_bbox: ConfigType = dict( - type='mmdet.IoULoss', - mode='square', - eps=1e-16, - reduction='sum', - loss_weight=5.0), - loss_obj: ConfigType = dict( - type='mmdet.CrossEntropyLoss', - use_sigmoid=True, - reduction='sum', - loss_weight=1.0), - loss_bbox_aux: ConfigType = dict( - type='mmdet.L1Loss', reduction='sum', loss_weight=1.0), - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - init_cfg: OptMultiConfig = None): + def __init__( + self, + head_module: ConfigType, + prior_generator: ConfigType = dict( + type="mmdet.MlvlPointGenerator", offset=0, strides=[8, 16, 32] + ), + bbox_coder: ConfigType = dict(type="YOLOXBBoxCoder"), + loss_cls: ConfigType = dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="sum", + loss_weight=1.0, + ), + loss_bbox: ConfigType = dict( + type="mmdet.IoULoss", + mode="square", + eps=1e-16, + reduction="sum", + loss_weight=5.0, + ), + loss_obj: ConfigType = dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="sum", + loss_weight=1.0, + ), + loss_bbox_aux: ConfigType = dict( + type="mmdet.L1Loss", reduction="sum", loss_weight=1.0 + ), + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + init_cfg: OptMultiConfig = None, + ): self.use_bbox_aux = False self.loss_bbox_aux = loss_bbox_aux @@ -243,7 +265,8 @@ def __init__(self, loss_obj=loss_obj, train_cfg=train_cfg, test_cfg=test_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def special_init(self): """Since YOLO series algorithms will inherit from YOLOv5Head, but @@ -261,13 +284,14 @@ def forward(self, x: Tuple[Tensor]) -> Tuple[List]: return self.head_module(x) def loss_by_feat( - self, - cls_scores: Sequence[Tensor], - bbox_preds: Sequence[Tensor], - objectnesses: Sequence[Tensor], - batch_gt_instances: Sequence[InstanceData], - batch_img_metas: Sequence[dict], - batch_gt_instances_ignore: OptInstanceList = None) -> dict: + self, + cls_scores: Sequence[Tensor], + bbox_preds: Sequence[Tensor], + objectnesses: Sequence[Tensor], + batch_gt_instances: Sequence[InstanceData], + batch_img_metas: Sequence[dict], + batch_gt_instances_ignore: OptInstanceList = None, + ) -> dict: """Calculate the loss based on the features extracted by the detection head. @@ -302,11 +326,11 @@ def loss_by_feat( featmap_sizes, dtype=cls_scores[0].dtype, device=cls_scores[0].device, - with_stride=True) + with_stride=True, + ) flatten_cls_preds = [ - cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.num_classes) + cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_classes) for cls_pred in cls_scores ] flatten_bbox_preds = [ @@ -322,24 +346,33 @@ def loss_by_feat( flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1) flatten_objectness = torch.cat(flatten_objectness, dim=1) flatten_priors = torch.cat(mlvl_priors) - flatten_bboxes = self.bbox_coder.decode(flatten_priors[..., :2], - flatten_bbox_preds, - flatten_priors[..., 2]) - - (pos_masks, cls_targets, obj_targets, bbox_targets, bbox_aux_target, - num_fg_imgs) = multi_apply( - self._get_targets_single, - flatten_priors.unsqueeze(0).repeat(num_imgs, 1, 1), - flatten_cls_preds.detach(), flatten_bboxes.detach(), - flatten_objectness.detach(), batch_gt_instances, batch_img_metas, - batch_gt_instances_ignore) + flatten_bboxes = self.bbox_coder.decode( + flatten_priors[..., :2], flatten_bbox_preds, flatten_priors[..., 2] + ) + + ( + pos_masks, + cls_targets, + obj_targets, + bbox_targets, + bbox_aux_target, + num_fg_imgs, + ) = multi_apply( + self._get_targets_single, + flatten_priors.unsqueeze(0).repeat(num_imgs, 1, 1), + flatten_cls_preds.detach(), + flatten_bboxes.detach(), + flatten_objectness.detach(), + batch_gt_instances, + batch_img_metas, + batch_gt_instances_ignore, + ) # The experimental results show that 'reduce_mean' can improve # performance on the COCO dataset. num_pos = torch.tensor( - sum(num_fg_imgs), - dtype=torch.float, - device=flatten_cls_preds.device) + sum(num_fg_imgs), dtype=torch.float, device=flatten_cls_preds.device + ) num_total_samples = max(reduce_mean(num_pos), 1.0) pos_masks = torch.cat(pos_masks, 0) @@ -349,15 +382,21 @@ def loss_by_feat( if self.use_bbox_aux: bbox_aux_target = torch.cat(bbox_aux_target, 0) - loss_obj = self.loss_obj(flatten_objectness.view(-1, 1), - obj_targets) / num_total_samples + loss_obj = ( + self.loss_obj(flatten_objectness.view(-1, 1), obj_targets) + / num_total_samples + ) if num_pos > 0: - loss_cls = self.loss_cls( - flatten_cls_preds.view(-1, self.num_classes)[pos_masks], - cls_targets) / num_total_samples - loss_bbox = self.loss_bbox( - flatten_bboxes.view(-1, 4)[pos_masks], - bbox_targets) / num_total_samples + loss_cls = ( + self.loss_cls( + flatten_cls_preds.view(-1, self.num_classes)[pos_masks], cls_targets + ) + / num_total_samples + ) + loss_bbox = ( + self.loss_bbox(flatten_bboxes.view(-1, 4)[pos_masks], bbox_targets) + / num_total_samples + ) else: # Avoid cls and reg branch not participating in the gradient # propagation when there is no ground-truth in the images. @@ -366,14 +405,16 @@ def loss_by_feat( loss_cls = flatten_cls_preds.sum() * 0 loss_bbox = flatten_bboxes.sum() * 0 - loss_dict = dict( - loss_cls=loss_cls, loss_bbox=loss_bbox, loss_obj=loss_obj) + loss_dict = dict(loss_cls=loss_cls, loss_bbox=loss_bbox, loss_obj=loss_obj) if self.use_bbox_aux: if num_pos > 0: - loss_bbox_aux = self.loss_bbox_aux( - flatten_bbox_preds.view(-1, 4)[pos_masks], - bbox_aux_target) / num_total_samples + loss_bbox_aux = ( + self.loss_bbox_aux( + flatten_bbox_preds.view(-1, 4)[pos_masks], bbox_aux_target + ) + / num_total_samples + ) else: # Avoid cls and reg branch not participating in the gradient # propagation when there is no ground-truth in the images. @@ -386,14 +427,15 @@ def loss_by_feat( @torch.no_grad() def _get_targets_single( - self, - priors: Tensor, - cls_preds: Tensor, - decoded_bboxes: Tensor, - objectness: Tensor, - gt_instances: InstanceData, - img_meta: dict, - gt_instances_ignore: Optional[InstanceData] = None) -> tuple: + self, + priors: Tensor, + cls_preds: Tensor, + decoded_bboxes: Tensor, + objectness: Tensor, + gt_instances: InstanceData, + img_meta: dict, + gt_instances_ignore: Optional[InstanceData] = None, + ) -> tuple: """Compute classification, regression, and objectness targets for priors in a single image. @@ -435,52 +477,70 @@ def _get_targets_single( bbox_aux_target = cls_preds.new_zeros((0, 4)) obj_target = cls_preds.new_zeros((num_priors, 1)) foreground_mask = cls_preds.new_zeros(num_priors).bool() - return (foreground_mask, cls_target, obj_target, bbox_target, - bbox_aux_target, 0) + return ( + foreground_mask, + cls_target, + obj_target, + bbox_target, + bbox_aux_target, + 0, + ) # YOLOX uses center priors with 0.5 offset to assign targets, # but use center priors without offset to regress bboxes. offset_priors = torch.cat( - [priors[:, :2] + priors[:, 2:] * 0.5, priors[:, 2:]], dim=-1) + [priors[:, :2] + priors[:, 2:] * 0.5, priors[:, 2:]], dim=-1 + ) scores = cls_preds.sigmoid() * objectness.unsqueeze(1).sigmoid() pred_instances = InstanceData( - bboxes=decoded_bboxes, scores=scores.sqrt_(), priors=offset_priors) + bboxes=decoded_bboxes, scores=scores.sqrt_(), priors=offset_priors + ) assign_result = self.assigner.assign( pred_instances=pred_instances, gt_instances=gt_instances, - gt_instances_ignore=gt_instances_ignore) + gt_instances_ignore=gt_instances_ignore, + ) - sampling_result = self.sampler.sample(assign_result, pred_instances, - gt_instances) + sampling_result = self.sampler.sample( + assign_result, pred_instances, gt_instances + ) pos_inds = sampling_result.pos_inds num_pos_per_img = pos_inds.size(0) pos_ious = assign_result.max_overlaps[pos_inds] # IOU aware classification score - cls_target = F.one_hot(sampling_result.pos_gt_labels, - self.num_classes) * pos_ious.unsqueeze(-1) + cls_target = F.one_hot( + sampling_result.pos_gt_labels, self.num_classes + ) * pos_ious.unsqueeze(-1) obj_target = torch.zeros_like(objectness).unsqueeze(-1) obj_target[pos_inds] = 1 bbox_target = sampling_result.pos_gt_bboxes bbox_aux_target = cls_preds.new_zeros((num_pos_per_img, 4)) if self.use_bbox_aux: bbox_aux_target = self._get_bbox_aux_target( - bbox_aux_target, bbox_target, priors[pos_inds]) + bbox_aux_target, bbox_target, priors[pos_inds] + ) foreground_mask = torch.zeros_like(objectness).to(torch.bool) foreground_mask[pos_inds] = 1 - return (foreground_mask, cls_target, obj_target, bbox_target, - bbox_aux_target, num_pos_per_img) - - def _get_bbox_aux_target(self, - bbox_aux_target: Tensor, - gt_bboxes: Tensor, - priors: Tensor, - eps: float = 1e-8) -> Tensor: + return ( + foreground_mask, + cls_target, + obj_target, + bbox_target, + bbox_aux_target, + num_pos_per_img, + ) + + def _get_bbox_aux_target( + self, + bbox_aux_target: Tensor, + gt_bboxes: Tensor, + priors: Tensor, + eps: float = 1e-8, + ) -> Tensor: """Convert gt bboxes to center offset and log width height.""" gt_cxcywh = bbox_xyxy_to_cxcywh(gt_bboxes) - bbox_aux_target[:, :2] = (gt_cxcywh[:, :2] - - priors[:, :2]) / priors[:, 2:] - bbox_aux_target[:, - 2:] = torch.log(gt_cxcywh[:, 2:] / priors[:, 2:] + eps) + bbox_aux_target[:, :2] = (gt_cxcywh[:, :2] - priors[:, :2]) / priors[:, 2:] + bbox_aux_target[:, 2:] = torch.log(gt_cxcywh[:, 2:] / priors[:, 2:] + eps) return bbox_aux_target diff --git a/mmyolo/mmyolo/models/detectors/__init__.py b/mmyolo/mmyolo/models/detectors/__init__.py index 74fb1c6c..cc0e02e8 100644 --- a/mmyolo/mmyolo/models/detectors/__init__.py +++ b/mmyolo/mmyolo/models/detectors/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .yolo_detector import YOLODetector -__all__ = ['YOLODetector'] +__all__ = ["YOLODetector"] diff --git a/mmyolo/mmyolo/models/detectors/yolo_detector.py b/mmyolo/mmyolo/models/detectors/yolo_detector.py index e6783fba..d775c35a 100644 --- a/mmyolo/mmyolo/models/detectors/yolo_detector.py +++ b/mmyolo/mmyolo/models/detectors/yolo_detector.py @@ -29,15 +29,17 @@ class YOLODetector(SingleStageDetector): use_syncbn (bool): whether to use SyncBatchNorm. Defaults to True. """ - def __init__(self, - backbone: ConfigType, - neck: ConfigType, - bbox_head: ConfigType, - train_cfg: OptConfigType = None, - test_cfg: OptConfigType = None, - data_preprocessor: OptConfigType = None, - init_cfg: OptMultiConfig = None, - use_syncbn: bool = True): + def __init__( + self, + backbone: ConfigType, + neck: ConfigType, + bbox_head: ConfigType, + train_cfg: OptConfigType = None, + test_cfg: OptConfigType = None, + data_preprocessor: OptConfigType = None, + init_cfg: OptMultiConfig = None, + use_syncbn: bool = True, + ): super().__init__( backbone=backbone, neck=neck, @@ -45,9 +47,10 @@ def __init__(self, train_cfg=train_cfg, test_cfg=test_cfg, data_preprocessor=data_preprocessor, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) # TODO: Waiting for mmengine support if use_syncbn and get_world_size() > 1: torch.nn.SyncBatchNorm.convert_sync_batchnorm(self) - print_log('Using SyncBatchNorm()', 'current') + print_log("Using SyncBatchNorm()", "current") diff --git a/mmyolo/mmyolo/models/layers/__init__.py b/mmyolo/mmyolo/models/layers/__init__.py index f709dbb7..4bcbd522 100644 --- a/mmyolo/mmyolo/models/layers/__init__.py +++ b/mmyolo/mmyolo/models/layers/__init__.py @@ -1,16 +1,38 @@ # Copyright (c) OpenMMLab. All rights reserved. from .ema import ExpMomentumEMA -from .yolo_bricks import (BepC3StageBlock, CSPLayerWithTwoConv, - DarknetBottleneck, EELANBlock, EffectiveSELayer, - ELANBlock, ImplicitA, ImplicitM, - MaxPoolAndStrideConvBlock, PPYOLOEBasicBlock, - RepStageBlock, RepVGGBlock, SPPFBottleneck, - SPPFCSPBlock, TinyDownSampleBlock) +from .yolo_bricks import ( + BepC3StageBlock, + CSPLayerWithTwoConv, + DarknetBottleneck, + EELANBlock, + EffectiveSELayer, + ELANBlock, + ImplicitA, + ImplicitM, + MaxPoolAndStrideConvBlock, + PPYOLOEBasicBlock, + RepStageBlock, + RepVGGBlock, + SPPFBottleneck, + SPPFCSPBlock, + TinyDownSampleBlock, +) __all__ = [ - 'SPPFBottleneck', 'RepVGGBlock', 'RepStageBlock', 'ExpMomentumEMA', - 'ELANBlock', 'MaxPoolAndStrideConvBlock', 'SPPFCSPBlock', - 'PPYOLOEBasicBlock', 'EffectiveSELayer', 'TinyDownSampleBlock', - 'EELANBlock', 'ImplicitA', 'ImplicitM', 'BepC3StageBlock', - 'CSPLayerWithTwoConv', 'DarknetBottleneck' + "SPPFBottleneck", + "RepVGGBlock", + "RepStageBlock", + "ExpMomentumEMA", + "ELANBlock", + "MaxPoolAndStrideConvBlock", + "SPPFCSPBlock", + "PPYOLOEBasicBlock", + "EffectiveSELayer", + "TinyDownSampleBlock", + "EELANBlock", + "ImplicitA", + "ImplicitM", + "BepC3StageBlock", + "CSPLayerWithTwoConv", + "DarknetBottleneck", ] diff --git a/mmyolo/mmyolo/models/layers/ema.py b/mmyolo/mmyolo/models/layers/ema.py index 02ed2041..79ffc45a 100644 --- a/mmyolo/mmyolo/models/layers/ema.py +++ b/mmyolo/mmyolo/models/layers/ema.py @@ -34,20 +34,23 @@ class ExpMomentumEMA(MMDET_ExpMomentumEMA): False. """ - def __init__(self, - model: nn.Module, - momentum: float = 0.0002, - gamma: int = 2000, - interval=1, - device: Optional[torch.device] = None, - update_buffers: bool = False): + def __init__( + self, + model: nn.Module, + momentum: float = 0.0002, + gamma: int = 2000, + interval=1, + device: Optional[torch.device] = None, + update_buffers: bool = False, + ): super().__init__( model=model, momentum=momentum, interval=interval, device=device, - update_buffers=update_buffers) - assert gamma > 0, f'gamma must be greater than 0, but got {gamma}' + update_buffers=update_buffers, + ) + assert gamma > 0, f"gamma must be greater than 0, but got {gamma}" self.gamma = gamma # Note: There is no need to re-fetch every update, @@ -55,12 +58,13 @@ def __init__(self, # during the training process. self.src_parameters = ( model.state_dict() - if self.update_buffers else dict(model.named_parameters())) + if self.update_buffers + else dict(model.named_parameters()) + ) if not self.update_buffers: self.src_buffers = model.buffers() - def avg_func(self, averaged_param: Tensor, source_param: Tensor, - steps: int): + def avg_func(self, averaged_param: Tensor, source_param: Tensor, steps: int): """Compute the moving average of the parameters using the exponential momentum strategy. @@ -71,7 +75,8 @@ def avg_func(self, averaged_param: Tensor, source_param: Tensor, updated. """ momentum = (1 - self.momentum) * math.exp( - -float(1 + steps) / self.gamma) + self.momentum + -float(1 + steps) / self.gamma + ) + self.momentum averaged_param.lerp_(source_param, momentum) def update_parameters(self, model: nn.Module): @@ -86,8 +91,7 @@ def update_parameters(self, model: nn.Module): elif self.steps % self.interval == 0: for k, p_avg in self.avg_parameters.items(): if p_avg.dtype.is_floating_point: - self.avg_func(p_avg.data, self.src_parameters[k].data, - self.steps) + self.avg_func(p_avg.data, self.src_parameters[k].data, self.steps) if not self.update_buffers: # If not update the buffers, # keep the buffers in sync with the source model. diff --git a/mmyolo/mmyolo/models/layers/yolo_bricks.py b/mmyolo/mmyolo/models/layers/yolo_bricks.py index 2e69d528..6c1007be 100644 --- a/mmyolo/mmyolo/models/layers/yolo_bricks.py +++ b/mmyolo/mmyolo/models/layers/yolo_bricks.py @@ -4,10 +4,13 @@ import numpy as np import torch import torch.nn as nn -from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, MaxPool2d, - build_norm_layer) -from mmdet.models.layers.csp_layer import \ - DarknetBottleneck as MMDET_DarknetBottleneck +from mmcv.cnn import ( + ConvModule, + DepthwiseSeparableConvModule, + MaxPool2d, + build_norm_layer, +) +from mmdet.models.layers.csp_layer import DarknetBottleneck as MMDET_DarknetBottleneck from mmdet.utils import ConfigType, OptConfigType, OptMultiConfig from mmengine.model import BaseModule from mmengine.utils import digit_version @@ -15,8 +18,8 @@ from mmyolo.registry import MODELS -if digit_version(torch.__version__) >= digit_version('1.7.0'): - MODELS.register_module(module=nn.SiLU, name='SiLU') +if digit_version(torch.__version__) >= digit_version("1.7.0"): + MODELS.register_module(module=nn.SiLU, name="SiLU") else: class SiLU(nn.Module): @@ -28,7 +31,7 @@ def __init__(self, inplace=True): def forward(self, inputs) -> Tensor: return inputs * torch.sigmoid(inputs) - MODELS.register_module(module=SiLU, name='SiLU') + MODELS.register_module(module=SiLU, name="SiLU") class SPPFBottleneck(BaseModule): @@ -57,17 +60,18 @@ class SPPFBottleneck(BaseModule): Defaults to None. """ - def __init__(self, - in_channels: int, - out_channels: int, - kernel_sizes: Union[int, Sequence[int]] = 5, - use_conv_first: bool = True, - mid_channels_scale: float = 0.5, - conv_cfg: ConfigType = None, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_sizes: Union[int, Sequence[int]] = 5, + use_conv_first: bool = True, + mid_channels_scale: float = 0.5, + conv_cfg: ConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg) if use_conv_first: @@ -79,20 +83,24 @@ def __init__(self, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: mid_channels = in_channels self.conv1 = None self.kernel_sizes = kernel_sizes if isinstance(kernel_sizes, int): self.poolings = nn.MaxPool2d( - kernel_size=kernel_sizes, stride=1, padding=kernel_sizes // 2) + kernel_size=kernel_sizes, stride=1, padding=kernel_sizes // 2 + ) conv2_in_channels = mid_channels * 4 else: - self.poolings = nn.ModuleList([ - nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) - for ks in kernel_sizes - ]) + self.poolings = nn.ModuleList( + [ + nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) + for ks in kernel_sizes + ] + ) conv2_in_channels = mid_channels * (len(kernel_sizes) + 1) self.conv2 = ConvModule( @@ -101,7 +109,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, x: Tensor) -> Tensor: """Forward process @@ -115,8 +124,7 @@ def forward(self, x: Tensor) -> Tensor: y2 = self.poolings(y1) x = torch.cat([x, y1, y2, self.poolings(y2)], dim=1) else: - x = torch.cat( - [x] + [pooling(x) for pooling in self.poolings], dim=1) + x = torch.cat([x] + [pooling(x) for pooling in self.poolings], dim=1) x = self.conv2(x) return x @@ -149,22 +157,23 @@ class RepVGGBlock(nn.Module): deploy (bool): Whether in deploy mode. Default: False """ - def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int]] = 3, - stride: Union[int, Tuple[int]] = 1, - padding: Union[int, Tuple[int]] = 1, - dilation: Union[int, Tuple[int]] = 1, - groups: Optional[int] = 1, - padding_mode: Optional[str] = 'zeros', - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='ReLU', inplace=True), - use_se: bool = False, - use_alpha: bool = False, - use_bn_first=True, - deploy: bool = False): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int]] = 3, + stride: Union[int, Tuple[int]] = 1, + padding: Union[int, Tuple[int]] = 1, + dilation: Union[int, Tuple[int]] = 1, + groups: Optional[int] = 1, + padding_mode: Optional[str] = "zeros", + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="ReLU", inplace=True), + use_se: bool = False, + use_alpha: bool = False, + use_bn_first=True, + deploy: bool = False, + ): super().__init__() self.deploy = deploy self.groups = groups @@ -179,14 +188,18 @@ def __init__(self, self.nonlinearity = MODELS.build(act_cfg) if use_se: - raise NotImplementedError('se block not supported yet') + raise NotImplementedError("se block not supported yet") else: self.se = nn.Identity() if use_alpha: - alpha = torch.ones([ - 1, - ], dtype=torch.float32, requires_grad=True) + alpha = torch.ones( + [ + 1, + ], + dtype=torch.float32, + requires_grad=True, + ) self.alpha = nn.Parameter(alpha, requires_grad=True) else: self.alpha = None @@ -201,12 +214,14 @@ def __init__(self, dilation=dilation, groups=groups, bias=True, - padding_mode=padding_mode) + padding_mode=padding_mode, + ) else: if use_bn_first and (out_channels == in_channels) and stride == 1: self.rbr_identity = build_norm_layer( - norm_cfg, num_features=in_channels)[1] + norm_cfg, num_features=in_channels + )[1] else: self.rbr_identity = None @@ -219,7 +234,8 @@ def __init__(self, groups=groups, bias=False, norm_cfg=norm_cfg, - act_cfg=None) + act_cfg=None, + ) self.rbr_1x1 = ConvModule( in_channels=in_channels, out_channels=out_channels, @@ -229,7 +245,8 @@ def __init__(self, groups=groups, bias=False, norm_cfg=norm_cfg, - act_cfg=None) + act_cfg=None, + ) def forward(self, inputs: Tensor) -> Tensor: """Forward process. @@ -239,7 +256,7 @@ def forward(self, inputs: Tensor) -> Tensor: Returns: Tensor: The output tensor. """ - if hasattr(self, 'rbr_reparam'): + if hasattr(self, "rbr_reparam"): return self.nonlinearity(self.se(self.rbr_reparam(inputs))) if self.rbr_identity is None: @@ -249,12 +266,13 @@ def forward(self, inputs: Tensor) -> Tensor: if self.alpha: return self.nonlinearity( self.se( - self.rbr_dense(inputs) + - self.alpha * self.rbr_1x1(inputs) + id_out)) + self.rbr_dense(inputs) + self.alpha * self.rbr_1x1(inputs) + id_out + ) + ) else: return self.nonlinearity( - self.se( - self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)) + self.se(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) + ) def get_equivalent_kernel_bias(self): """Derives the equivalent kernel and bias in a differentiable way. @@ -266,11 +284,17 @@ def get_equivalent_kernel_bias(self): kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) if self.alpha: - return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor( - kernel1x1) + kernelid, bias3x3 + self.alpha * bias1x1 + biasid + return ( + kernel3x3 + + self.alpha * self._pad_1x1_to_3x3_tensor(kernel1x1) + + kernelid, + bias3x3 + self.alpha * bias1x1 + biasid, + ) else: - return kernel3x3 + self._pad_1x1_to_3x3_tensor( - kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid + return ( + kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, + bias3x3 + bias1x1 + biasid, + ) def _pad_1x1_to_3x3_tensor(self, kernel1x1): """Pad 1x1 tensor to 3x3. @@ -306,14 +330,14 @@ def _fuse_bn_tensor(self, branch: nn.Module) -> Tuple[np.ndarray, Tensor]: eps = branch.bn.eps else: assert isinstance(branch, (nn.SyncBatchNorm, nn.BatchNorm2d)) - if not hasattr(self, 'id_tensor'): + if not hasattr(self, "id_tensor"): input_dim = self.in_channels // self.groups - kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), - dtype=np.float32) + kernel_value = np.zeros( + (self.in_channels, input_dim, 3, 3), dtype=np.float32 + ) for i in range(self.in_channels): kernel_value[i, i % input_dim, 1, 1] = 1 - self.id_tensor = torch.from_numpy(kernel_value).to( - branch.weight.device) + self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) kernel = self.id_tensor running_mean = branch.running_mean running_var = branch.running_var @@ -326,7 +350,7 @@ def _fuse_bn_tensor(self, branch: nn.Module) -> Tuple[np.ndarray, Tensor]: def switch_to_deploy(self): """Switch to deploy mode.""" - if hasattr(self, 'rbr_reparam'): + if hasattr(self, "rbr_reparam"): return kernel, bias = self.get_equivalent_kernel_bias() self.rbr_reparam = nn.Conv2d( @@ -337,17 +361,18 @@ def switch_to_deploy(self): padding=self.rbr_dense.conv.padding, dilation=self.rbr_dense.conv.dilation, groups=self.rbr_dense.conv.groups, - bias=True) + bias=True, + ) self.rbr_reparam.weight.data = kernel self.rbr_reparam.bias.data = bias for para in self.parameters(): para.detach_() - self.__delattr__('rbr_dense') - self.__delattr__('rbr_1x1') - if hasattr(self, 'rbr_identity'): - self.__delattr__('rbr_identity') - if hasattr(self, 'id_tensor'): - self.__delattr__('id_tensor') + self.__delattr__("rbr_dense") + self.__delattr__("rbr_1x1") + if hasattr(self, "rbr_identity"): + self.__delattr__("rbr_identity") + if hasattr(self, "id_tensor"): + self.__delattr__("id_tensor") self.deploy = True @@ -371,16 +396,17 @@ class BepC3StageBlock(nn.Module): Defaults to dict(type='ReLU', inplace=True). """ - def __init__(self, - in_channels: int, - out_channels: int, - num_blocks: int = 1, - hidden_ratio: float = 0.5, - concat_all_layer: bool = True, - block_cfg: ConfigType = dict(type='RepVGGBlock'), - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='ReLU', inplace=True)): + def __init__( + self, + in_channels: int, + out_channels: int, + num_blocks: int = 1, + hidden_ratio: float = 0.5, + concat_all_layer: bool = True, + block_cfg: ConfigType = dict(type="RepVGGBlock"), + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="ReLU", inplace=True), + ): super().__init__() hidden_channels = int(out_channels * hidden_ratio) @@ -392,7 +418,8 @@ def __init__(self, groups=1, bias=False, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.conv2 = ConvModule( in_channels, hidden_channels, @@ -401,7 +428,8 @@ def __init__(self, groups=1, bias=False, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.conv3 = ConvModule( 2 * hidden_channels, out_channels, @@ -410,13 +438,15 @@ def __init__(self, groups=1, bias=False, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.block = RepStageBlock( in_channels=hidden_channels, out_channels=hidden_channels, num_blocks=num_blocks, block_cfg=block_cfg, - bottle_block=BottleRep) + bottle_block=BottleRep, + ) self.concat_all_layer = concat_all_layer if not concat_all_layer: self.conv3 = ConvModule( @@ -427,12 +457,14 @@ def __init__(self, groups=1, bias=False, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, x): if self.concat_all_layer is True: return self.conv3( - torch.cat((self.block(self.conv1(x)), self.conv2(x)), dim=1)) + torch.cat((self.block(self.conv1(x)), self.conv2(x)), dim=1) + ) else: return self.conv3(self.block(self.conv1(x))) @@ -449,19 +481,19 @@ class BottleRep(nn.Module): Defaults False. """ - def __init__(self, - in_channels: int, - out_channels: int, - block_cfg: ConfigType = dict(type='RepVGGBlock'), - adaptive_weight: bool = False): + def __init__( + self, + in_channels: int, + out_channels: int, + block_cfg: ConfigType = dict(type="RepVGGBlock"), + adaptive_weight: bool = False, + ): super().__init__() conv1_cfg = block_cfg.copy() conv2_cfg = block_cfg.copy() - conv1_cfg.update( - dict(in_channels=in_channels, out_channels=out_channels)) - conv2_cfg.update( - dict(in_channels=out_channels, out_channels=out_channels)) + conv1_cfg.update(dict(in_channels=in_channels, out_channels=out_channels)) + conv2_cfg.update(dict(in_channels=out_channels, out_channels=out_channels)) self.conv1 = MODELS.build(conv1_cfg) self.conv2 = MODELS.build(conv2_cfg) @@ -499,15 +531,17 @@ class ConvWrapper(nn.Module): Defaults to dict(type='ReLU', inplace=True). """ - def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: int = 3, - stride: int = 1, - groups: int = 1, - bias: bool = True, - norm_cfg: ConfigType = None, - act_cfg: ConfigType = dict(type='SiLU')): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 1, + groups: int = 1, + bias: bool = True, + norm_cfg: ConfigType = None, + act_cfg: ConfigType = dict(type="SiLU"), + ): super().__init__() self.block = ConvModule( in_channels, @@ -518,7 +552,8 @@ def __init__(self, groups=groups, bias=bias, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, x: Tensor) -> Tensor: return self.block(x) @@ -539,9 +574,7 @@ class EffectiveSELayer(nn.Module): Defaults to dict(type='HSigmoid'). """ - def __init__(self, - channels: int, - act_cfg: ConfigType = dict(type='HSigmoid')): + def __init__(self, channels: int, act_cfg: ConfigType = dict(type="HSigmoid")): super().__init__() assert isinstance(act_cfg, dict) self.fc = ConvModule(channels, channels, 1, act_cfg=None) @@ -551,9 +584,9 @@ def __init__(self, def forward(self, x: Tensor) -> Tensor: """Forward process - Args: - x (Tensor): The input tensor. - """ + Args: + x (Tensor): The input tensor. + """ x_se = x.mean((2, 3), keepdim=True) x_se = self.fc(x_se) return x * self.activate(x_se) @@ -576,20 +609,18 @@ class PPYOLOESELayer(nn.Module): Defaults to dict(type='SiLU', inplace=True). """ - def __init__(self, - feat_channels: int, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.1, eps=1e-5), - act_cfg: ConfigType = dict(type='SiLU', inplace=True)): + def __init__( + self, + feat_channels: int, + norm_cfg: ConfigType = dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + ): super().__init__() self.fc = nn.Conv2d(feat_channels, feat_channels, 1) self.sig = nn.Sigmoid() self.conv = ConvModule( - feat_channels, - feat_channels, - 1, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + feat_channels, feat_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg + ) self._init_weights() @@ -599,10 +630,10 @@ def _init_weights(self): def forward(self, feat: Tensor, avg_feat: Tensor) -> Tensor: """Forward process - Args: - feat (Tensor): The input tensor. - avg_feat (Tensor): Average pooling feature tensor. - """ + Args: + feat (Tensor): The input tensor. + avg_feat (Tensor): Average pooling feature tensor. + """ weight = self.sig(self.fc(avg_feat)) return self.conv(feat * weight) @@ -632,26 +663,26 @@ class ELANBlock(BaseModule): Defaults to None. """ - def __init__(self, - in_channels: int, - out_channels: int, - middle_ratio: float, - block_ratio: float, - num_blocks: int = 2, - num_convs_in_block: int = 1, - conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: int, + out_channels: int, + middle_ratio: float, + block_ratio: float, + num_blocks: int = 2, + num_convs_in_block: int = 1, + conv_cfg: OptConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) assert num_blocks >= 1 assert num_convs_in_block >= 1 middle_channels = int(in_channels * middle_ratio) block_channels = int(in_channels * block_ratio) - final_conv_in_channels = int( - num_blocks * block_channels) + 2 * middle_channels + final_conv_in_channels = int(num_blocks * block_channels) + 2 * middle_channels self.main_conv = ConvModule( in_channels, @@ -659,7 +690,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.short_conv = ConvModule( in_channels, @@ -667,7 +699,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.blocks = nn.ModuleList() for _ in range(num_blocks): @@ -679,7 +712,8 @@ def __init__(self, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: internal_block = [] for _ in range(num_convs_in_block): @@ -691,7 +725,9 @@ def __init__(self, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) middle_channels = block_channels internal_block = nn.Sequential(*internal_block) @@ -704,13 +740,14 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, x: Tensor) -> Tensor: """Forward process - Args: - x (Tensor): The input tensor. - """ + Args: + x (Tensor): The input tensor. + """ x_short = self.short_conv(x) x_main = self.main_conv(x) block_outs = [] @@ -764,31 +801,34 @@ class MaxPoolAndStrideConvBlock(BaseModule): Defaults to None. """ - def __init__(self, - in_channels: int, - out_channels: int, - maxpool_kernel_sizes: int = 2, - use_in_channels_of_middle: bool = False, - conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: int, + out_channels: int, + maxpool_kernel_sizes: int = 2, + use_in_channels_of_middle: bool = False, + conv_cfg: OptConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) - middle_channels = in_channels if use_in_channels_of_middle \ - else out_channels // 2 + middle_channels = ( + in_channels if use_in_channels_of_middle else out_channels // 2 + ) self.maxpool_branches = nn.Sequential( - MaxPool2d( - kernel_size=maxpool_kernel_sizes, stride=maxpool_kernel_sizes), + MaxPool2d(kernel_size=maxpool_kernel_sizes, stride=maxpool_kernel_sizes), ConvModule( in_channels, out_channels // 2, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ), + ) self.stride_conv_branches = nn.Sequential( ConvModule( @@ -797,7 +837,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg), + act_cfg=act_cfg, + ), ConvModule( middle_channels, out_channels // 2, @@ -806,7 +847,9 @@ def __init__(self, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ), + ) def forward(self, x: Tensor) -> Tensor: """Forward process @@ -840,15 +883,16 @@ class TinyDownSampleBlock(BaseModule): """ def __init__( - self, - in_channels: int, - out_channels: int, - middle_ratio: float = 1.0, - kernel_sizes: Union[int, Sequence[int]] = 3, - conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict(type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='LeakyReLU', negative_slope=0.1), - init_cfg: OptMultiConfig = None): + self, + in_channels: int, + out_channels: int, + middle_ratio: float = 1.0, + kernel_sizes: Union[int, Sequence[int]] = 3, + conv_cfg: OptConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="LeakyReLU", negative_slope=0.1), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg) middle_channels = int(in_channels * middle_ratio) @@ -859,7 +903,8 @@ def __init__( 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.main_convs = nn.ModuleList() for i in range(3): @@ -871,7 +916,9 @@ def __init__( 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) else: self.main_convs.append( ConvModule( @@ -881,7 +928,9 @@ def __init__( padding=(kernel_sizes - 1) // 2, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ) + ) self.final_conv = ConvModule( middle_channels * 4, @@ -889,7 +938,8 @@ def __init__( 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, x) -> Tensor: short_out = self.short_conv(x) @@ -906,38 +956,39 @@ def forward(self, x) -> Tensor: @MODELS.register_module() class SPPFCSPBlock(BaseModule): """Spatial pyramid pooling - Fast (SPPF) layer with CSP for - YOLOv7 + YOLOv7 - Args: - in_channels (int): The input channels of this Module. - out_channels (int): The output channels of this Module. - expand_ratio (float): Expand ratio of SPPCSPBlock. - Defaults to 0.5. - kernel_sizes (int, tuple[int]): Sequential or number of kernel - sizes of pooling layers. Defaults to 5. - is_tiny_version (bool): Is tiny version of SPPFCSPBlock. If True, - it means it is a yolov7 tiny model. Defaults to False. - conv_cfg (dict): Config dict for convolution layer. Defaults to None. - which means using conv2d. Defaults to None. - norm_cfg (dict): Config dict for normalization layer. - Defaults to dict(type='BN', momentum=0.03, eps=0.001). - act_cfg (dict): Config dict for activation layer. - Defaults to dict(type='SiLU', inplace=True). - init_cfg (dict or list[dict], optional): Initialization config dict. - Defaults to None. - """ - - def __init__(self, - in_channels: int, - out_channels: int, - expand_ratio: float = 0.5, - kernel_sizes: Union[int, Sequence[int]] = 5, - is_tiny_version: bool = False, - conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + expand_ratio (float): Expand ratio of SPPCSPBlock. + Defaults to 0.5. + kernel_sizes (int, tuple[int]): Sequential or number of kernel + sizes of pooling layers. Defaults to 5. + is_tiny_version (bool): Is tiny version of SPPFCSPBlock. If True, + it means it is a yolov7 tiny model. Defaults to False. + conv_cfg (dict): Config dict for convolution layer. Defaults to None. + which means using conv2d. Defaults to None. + norm_cfg (dict): Config dict for normalization layer. + Defaults to dict(type='BN', momentum=0.03, eps=0.001). + act_cfg (dict): Config dict for activation layer. + Defaults to dict(type='SiLU', inplace=True). + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + expand_ratio: float = 0.5, + kernel_sizes: Union[int, Sequence[int]] = 5, + is_tiny_version: bool = False, + conv_cfg: OptConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg=init_cfg) self.is_tiny_version = is_tiny_version @@ -950,7 +1001,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: self.main_layers = nn.Sequential( ConvModule( @@ -959,7 +1011,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg), + act_cfg=act_cfg, + ), ConvModule( mid_channels, mid_channels, @@ -967,25 +1020,30 @@ def __init__(self, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg), + act_cfg=act_cfg, + ), ConvModule( mid_channels, mid_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg), + act_cfg=act_cfg, + ), ) self.kernel_sizes = kernel_sizes if isinstance(kernel_sizes, int): self.poolings = nn.MaxPool2d( - kernel_size=kernel_sizes, stride=1, padding=kernel_sizes // 2) + kernel_size=kernel_sizes, stride=1, padding=kernel_sizes // 2 + ) else: - self.poolings = nn.ModuleList([ - nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) - for ks in kernel_sizes - ]) + self.poolings = nn.ModuleList( + [ + nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) + for ks in kernel_sizes + ] + ) if is_tiny_version: self.fuse_layers = ConvModule( @@ -994,7 +1052,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: self.fuse_layers = nn.Sequential( ConvModule( @@ -1003,7 +1062,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg), + act_cfg=act_cfg, + ), ConvModule( mid_channels, mid_channels, @@ -1011,7 +1071,9 @@ def __init__(self, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg)) + act_cfg=act_cfg, + ), + ) self.short_layer = ConvModule( in_channels, @@ -1019,7 +1081,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.final_conv = ConvModule( 2 * mid_channels, @@ -1027,7 +1090,8 @@ def __init__(self, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) def forward(self, x) -> Tensor: """Forward process @@ -1063,7 +1127,7 @@ class ImplicitA(nn.Module): std (float): Std value of implicit module. Defaults to 0.02 """ - def __init__(self, in_channels: int, mean: float = 0., std: float = .02): + def __init__(self, in_channels: int, mean: float = 0.0, std: float = 0.02): super().__init__() self.implicit = nn.Parameter(torch.zeros(1, in_channels, 1, 1)) nn.init.normal_(self.implicit, mean=mean, std=std) @@ -1085,7 +1149,7 @@ class ImplicitM(nn.Module): std (float): Std value of implicit module. Defaults to 0.02. """ - def __init__(self, in_channels: int, mean: float = 1., std: float = .02): + def __init__(self, in_channels: int, mean: float = 1.0, std: float = 0.02): super().__init__() self.implicit = nn.Parameter(torch.ones(1, in_channels, 1, 1)) nn.init.normal_(self.implicit, mean=mean, std=std) @@ -1114,14 +1178,15 @@ class PPYOLOEBasicBlock(nn.Module): use_alpha (bool): Whether to use `alpha` parameter at 1x1 conv. """ - def __init__(self, - in_channels: int, - out_channels: int, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.1, eps=1e-5), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - shortcut: bool = True, - use_alpha: bool = False): + def __init__( + self, + in_channels: int, + out_channels: int, + norm_cfg: ConfigType = dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + shortcut: bool = True, + use_alpha: bool = False, + ): super().__init__() assert act_cfg is None or isinstance(act_cfg, dict) self.conv1 = ConvModule( @@ -1131,7 +1196,8 @@ def __init__(self, stride=1, padding=1, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.conv2 = RepVGGBlock( out_channels, @@ -1139,7 +1205,8 @@ def __init__(self, use_alpha=use_alpha, act_cfg=act_cfg, norm_cfg=norm_cfg, - use_bn_first=False) + use_bn_first=False, + ) self.shortcut = shortcut def forward(self, x: Tensor) -> Tensor: @@ -1184,19 +1251,22 @@ class CSPResLayer(nn.Module): Defaults to False. """ - def __init__(self, - in_channels: int, - out_channels: int, - num_block: int, - block_cfg: ConfigType = dict( - type='PPYOLOEBasicBlock', shortcut=True, use_alpha=True), - stride: int = 1, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.1, eps=1e-5), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - attention_cfg: OptMultiConfig = dict( - type='EffectiveSELayer', act_cfg=dict(type='HSigmoid')), - use_spp: bool = False): + def __init__( + self, + in_channels: int, + out_channels: int, + num_block: int, + block_cfg: ConfigType = dict( + type="PPYOLOEBasicBlock", shortcut=True, use_alpha=True + ), + stride: int = 1, + norm_cfg: ConfigType = dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + attention_cfg: OptMultiConfig = dict( + type="EffectiveSELayer", act_cfg=dict(type="HSigmoid") + ), + use_spp: bool = False, + ): super().__init__() self.num_block = num_block @@ -1208,7 +1278,8 @@ def __init__(self, if stride == 2: conv1_in_channels = conv2_in_channels = conv3_in_channels = ( - in_channels + out_channels) // 2 + in_channels + out_channels + ) // 2 blocks_channels = conv1_in_channels // 2 self.conv_down = ConvModule( in_channels, @@ -1217,7 +1288,8 @@ def __init__(self, stride=2, padding=1, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) else: conv1_in_channels = conv2_in_channels = in_channels conv3_in_channels = out_channels @@ -1225,31 +1297,22 @@ def __init__(self, self.conv_down = None self.conv1 = ConvModule( - conv1_in_channels, - blocks_channels, - 1, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + conv1_in_channels, blocks_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg + ) self.conv2 = ConvModule( - conv2_in_channels, - blocks_channels, - 1, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + conv2_in_channels, blocks_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg + ) self.blocks = self.build_blocks_layer(blocks_channels) self.conv3 = ConvModule( - conv3_in_channels, - out_channels, - 1, - norm_cfg=norm_cfg, - act_cfg=act_cfg) + conv3_in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg + ) if attention_cfg: attention_cfg = attention_cfg.copy() - attention_cfg['channels'] = blocks_channels * 2 + attention_cfg["channels"] = blocks_channels * 2 self.attn = MODELS.build(attention_cfg) else: self.attn = None @@ -1263,16 +1326,17 @@ def build_blocks_layer(self, blocks_channels: int) -> nn.Module: blocks = nn.Sequential() block_cfg = self.block_cfg.copy() block_cfg.update( - dict(in_channels=blocks_channels, out_channels=blocks_channels)) - block_cfg.setdefault('norm_cfg', self.norm_cfg) - block_cfg.setdefault('act_cfg', self.act_cfg) + dict(in_channels=blocks_channels, out_channels=blocks_channels) + ) + block_cfg.setdefault("norm_cfg", self.norm_cfg) + block_cfg.setdefault("act_cfg", self.act_cfg) for i in range(self.num_block): blocks.add_module(str(i), MODELS.build(block_cfg)) if i == (self.num_block - 1) // 2 and self.use_spp: blocks.add_module( - 'spp', + "spp", SPPFBottleneck( blocks_channels, blocks_channels, @@ -1280,15 +1344,17 @@ def build_blocks_layer(self, blocks_channels: int) -> nn.Module: use_conv_first=False, conv_cfg=None, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) return blocks def forward(self, x: Tensor) -> Tensor: """Forward process - Args: - x (Tensor): The input tensor. - """ + Args: + x (Tensor): The input tensor. + """ if self.conv_down is not None: x = self.conv_down(x) y1 = self.conv1(x) @@ -1314,42 +1380,47 @@ class RepStageBlock(nn.Module): Defaults to 'RepVGGBlock'. """ - def __init__(self, - in_channels: int, - out_channels: int, - num_blocks: int = 1, - bottle_block: nn.Module = RepVGGBlock, - block_cfg: ConfigType = dict(type='RepVGGBlock')): + def __init__( + self, + in_channels: int, + out_channels: int, + num_blocks: int = 1, + bottle_block: nn.Module = RepVGGBlock, + block_cfg: ConfigType = dict(type="RepVGGBlock"), + ): super().__init__() block_cfg = block_cfg.copy() - block_cfg.update( - dict(in_channels=in_channels, out_channels=out_channels)) + block_cfg.update(dict(in_channels=in_channels, out_channels=out_channels)) self.conv1 = MODELS.build(block_cfg) - block_cfg.update( - dict(in_channels=out_channels, out_channels=out_channels)) + block_cfg.update(dict(in_channels=out_channels, out_channels=out_channels)) self.block = None if num_blocks > 1: - self.block = nn.Sequential(*(MODELS.build(block_cfg) - for _ in range(num_blocks - 1))) + self.block = nn.Sequential( + *(MODELS.build(block_cfg) for _ in range(num_blocks - 1)) + ) if bottle_block == BottleRep: self.conv1 = BottleRep( - in_channels, - out_channels, - block_cfg=block_cfg, - adaptive_weight=True) + in_channels, out_channels, block_cfg=block_cfg, adaptive_weight=True + ) num_blocks = num_blocks // 2 self.block = None if num_blocks > 1: - self.block = nn.Sequential(*(BottleRep( - out_channels, - out_channels, - block_cfg=block_cfg, - adaptive_weight=True) for _ in range(num_blocks - 1))) + self.block = nn.Sequential( + *( + BottleRep( + out_channels, + out_channels, + block_cfg=block_cfg, + adaptive_weight=True, + ) + for _ in range(num_blocks - 1) + ) + ) def forward(self, x: Tensor) -> Tensor: """Forward process. @@ -1399,19 +1470,20 @@ class DarknetBottleneck(MMDET_DarknetBottleneck): Defaults to dict(type='Swish'). """ - def __init__(self, - in_channels: int, - out_channels: int, - expansion: float = 0.5, - kernel_size: Sequence[int] = (1, 3), - padding: Sequence[int] = (0, 1), - add_identity: bool = True, - use_depthwise: bool = False, - conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None) -> None: + def __init__( + self, + in_channels: int, + out_channels: int, + expansion: float = 0.5, + kernel_size: Sequence[int] = (1, 3), + padding: Sequence[int] = (0, 1), + add_identity: bool = True, + use_depthwise: bool = False, + conv_cfg: OptConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ) -> None: super().__init__(in_channels, out_channels, init_cfg=init_cfg) hidden_channels = int(out_channels * expansion) conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule @@ -1424,7 +1496,8 @@ def __init__(self, padding=padding[0], conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.conv2 = conv( hidden_channels, out_channels, @@ -1433,9 +1506,9 @@ def __init__(self, padding=padding[1], conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) - self.add_identity = \ - add_identity and in_channels == out_channels + act_cfg=act_cfg, + ) + self.add_identity = add_identity and in_channels == out_channels class CSPLayerWithTwoConv(BaseModule): @@ -1461,16 +1534,17 @@ class CSPLayerWithTwoConv(BaseModule): """ def __init__( - self, - in_channels: int, - out_channels: int, - expand_ratio: float = 0.5, - num_blocks: int = 1, - add_identity: bool = True, # shortcut - conv_cfg: OptConfigType = None, - norm_cfg: ConfigType = dict(type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None) -> None: + self, + in_channels: int, + out_channels: int, + expand_ratio: float = 0.5, + num_blocks: int = 1, + add_identity: bool = True, # shortcut + conv_cfg: OptConfigType = None, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ) -> None: super().__init__(init_cfg=init_cfg) self.mid_channels = int(out_channels * expand_ratio) @@ -1480,14 +1554,16 @@ def __init__( 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.final_conv = ConvModule( (2 + num_blocks) * self.mid_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) + act_cfg=act_cfg, + ) self.blocks = nn.ModuleList( DarknetBottleneck( @@ -1500,7 +1576,10 @@ def __init__( use_depthwise=False, conv_cfg=conv_cfg, norm_cfg=norm_cfg, - act_cfg=act_cfg) for _ in range(num_blocks)) + act_cfg=act_cfg, + ) + for _ in range(num_blocks) + ) def forward(self, x: Tensor) -> Tensor: """Forward process.""" diff --git a/mmyolo/mmyolo/models/losses/__init__.py b/mmyolo/mmyolo/models/losses/__init__.py index ee192921..2fe5739f 100644 --- a/mmyolo/mmyolo/models/losses/__init__.py +++ b/mmyolo/mmyolo/models/losses/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .iou_loss import IoULoss, bbox_overlaps -__all__ = ['IoULoss', 'bbox_overlaps'] +__all__ = ["IoULoss", "bbox_overlaps"] diff --git a/mmyolo/mmyolo/models/losses/iou_loss.py b/mmyolo/mmyolo/models/losses/iou_loss.py index e3d3dc40..8fa4b908 100644 --- a/mmyolo/mmyolo/models/losses/iou_loss.py +++ b/mmyolo/mmyolo/models/losses/iou_loss.py @@ -10,12 +10,14 @@ from mmyolo.registry import MODELS -def bbox_overlaps(pred: torch.Tensor, - target: torch.Tensor, - iou_mode: str = 'ciou', - bbox_format: str = 'xywh', - siou_theta: float = 4.0, - eps: float = 1e-7) -> torch.Tensor: +def bbox_overlaps( + pred: torch.Tensor, + target: torch.Tensor, + iou_mode: str = "ciou", + bbox_format: str = "xywh", + siou_theta: float = 4.0, + eps: float = 1e-7, +) -> torch.Tensor: r"""Calculate overlap between two set of bboxes. `Implementation of paper `Enhancing Geometric Factors into Model Learning and Inference for Object Detection and Instance @@ -44,9 +46,9 @@ def bbox_overlaps(pred: torch.Tensor, Returns: Tensor: shape (n, ). """ - assert iou_mode in ('iou', 'ciou', 'giou', 'siou') - assert bbox_format in ('xyxy', 'xywh') - if bbox_format == 'xywh': + assert iou_mode in ("iou", "ciou", "giou", "siou") + assert bbox_format in ("xyxy", "xywh") + if bbox_format == "xywh": pred = HorizontalBoxes.cxcywh_to_xyxy(pred) target = HorizontalBoxes.cxcywh_to_xyxy(target) @@ -56,10 +58,9 @@ def bbox_overlaps(pred: torch.Tensor, bbox2_x2, bbox2_y2 = target[..., 2], target[..., 3] # Overlap - overlap = (torch.min(bbox1_x2, bbox2_x2) - - torch.max(bbox1_x1, bbox2_x1)).clamp(0) * \ - (torch.min(bbox1_y2, bbox2_y2) - - torch.max(bbox1_y1, bbox2_y1)).clamp(0) + overlap = (torch.min(bbox1_x2, bbox2_x2) - torch.max(bbox1_x1, bbox2_x1)).clamp( + 0 + ) * (torch.min(bbox1_y2, bbox2_y2) - torch.max(bbox1_y1, bbox2_y1)).clamp(0) # Union w1, h1 = bbox1_x2 - bbox1_x1, bbox1_y2 - bbox1_y1 @@ -80,7 +81,7 @@ def bbox_overlaps(pred: torch.Tensor, enclose_w = enclose_wh[..., 0] # cw enclose_h = enclose_wh[..., 1] # ch - if iou_mode == 'ciou': + if iou_mode == "ciou": # CIoU = IoU - ( (ρ^2(b_pred,b_gt) / c^2) + (alpha x v) ) # calculate enclose area (c^2) @@ -90,14 +91,14 @@ def bbox_overlaps(pred: torch.Tensor, # euclidean distance between b_pred(bbox2) and b_gt(bbox1) # center point, because bbox format is xyxy -> left-top xy and # right-bottom xy, so need to / 4 to get center point. - rho2_left_item = ((bbox2_x1 + bbox2_x2) - (bbox1_x1 + bbox1_x2))**2 / 4 - rho2_right_item = ((bbox2_y1 + bbox2_y2) - - (bbox1_y1 + bbox1_y2))**2 / 4 + rho2_left_item = ((bbox2_x1 + bbox2_x2) - (bbox1_x1 + bbox1_x2)) ** 2 / 4 + rho2_right_item = ((bbox2_y1 + bbox2_y2) - (bbox1_y1 + bbox1_y2)) ** 2 / 4 rho2 = rho2_left_item + rho2_right_item # rho^2 (ρ^2) # Width and height ratio (v) wh_ratio = (4 / (math.pi**2)) * torch.pow( - torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + torch.atan(w2 / h2) - torch.atan(w1 / h1), 2 + ) with torch.no_grad(): alpha = wh_ratio / (wh_ratio - ious + (1 + eps)) @@ -105,12 +106,12 @@ def bbox_overlaps(pred: torch.Tensor, # CIoU ious = ious - ((rho2 / enclose_area) + (alpha * wh_ratio)) - elif iou_mode == 'giou': + elif iou_mode == "giou": # GIoU = IoU - ( (A_c - union) / A_c ) convex_area = enclose_w * enclose_h + eps # convex area (A_c) ious = ious - (convex_area - union) / convex_area - elif iou_mode == 'siou': + elif iou_mode == "siou": # SIoU: https://arxiv.org/pdf/2205.12740.pdf # SIoU = IoU - ( (Distance Cost + Shape Cost) / 2 ) @@ -126,25 +127,25 @@ def bbox_overlaps(pred: torch.Tensor, # choose minimize alpha, sin(alpha) sin_alpha = torch.abs(sigma_ch) / sigma sin_beta = torch.abs(sigma_cw) / sigma - sin_alpha = torch.where(sin_alpha <= math.sin(math.pi / 4), sin_alpha, - sin_beta) + sin_alpha = torch.where(sin_alpha <= math.sin(math.pi / 4), sin_alpha, sin_beta) # Angle cost = 1 - 2 * ( sin^2 ( arcsin(x) - (pi / 4) ) ) angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2) # Distance cost = Σ_(t=x,y) (1 - e ^ (- γ ρ_t)) - rho_x = (sigma_cw / enclose_w)**2 # ρ_x - rho_y = (sigma_ch / enclose_h)**2 # ρ_y + rho_x = (sigma_cw / enclose_w) ** 2 # ρ_x + rho_y = (sigma_ch / enclose_h) ** 2 # ρ_y gamma = 2 - angle_cost # γ distance_cost = (1 - torch.exp(-1 * gamma * rho_x)) + ( - 1 - torch.exp(-1 * gamma * rho_y)) + 1 - torch.exp(-1 * gamma * rho_y) + ) # Shape cost = Ω = Σ_(t=w,h) ( ( 1 - ( e ^ (-ω_t) ) ) ^ θ ) omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2) # ω_w omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2) # ω_h - shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), - siou_theta) + torch.pow( - 1 - torch.exp(-1 * omiga_h), siou_theta) + shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), siou_theta) + torch.pow( + 1 - torch.exp(-1 * omiga_h), siou_theta + ) ious = ious - ((distance_cost + shape_cost) * 0.5) @@ -167,16 +168,18 @@ class IoULoss(nn.Module): return_iou (bool): If True, return loss and iou. """ - def __init__(self, - iou_mode: str = 'ciou', - bbox_format: str = 'xywh', - eps: float = 1e-7, - reduction: str = 'mean', - loss_weight: float = 1.0, - return_iou: bool = True): + def __init__( + self, + iou_mode: str = "ciou", + bbox_format: str = "xywh", + eps: float = 1e-7, + reduction: str = "mean", + loss_weight: float = 1.0, + return_iou: bool = True, + ): super().__init__() - assert bbox_format in ('xywh', 'xyxy') - assert iou_mode in ('ciou', 'siou', 'giou') + assert bbox_format in ("xywh", "xyxy") + assert iou_mode in ("ciou", "siou", "giou") self.iou_mode = iou_mode self.bbox_format = bbox_format self.eps = eps @@ -190,7 +193,7 @@ def forward( target: torch.Tensor, weight: Optional[torch.Tensor] = None, avg_factor: Optional[float] = None, - reduction_override: Optional[Union[str, bool]] = None + reduction_override: Optional[Union[str, bool]] = None, ) -> Tuple[Union[torch.Tensor, torch.Tensor], torch.Tensor]: """Forward function. @@ -210,9 +213,8 @@ def forward( if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 - assert reduction_override in (None, 'none', 'mean', 'sum') - reduction = ( - reduction_override if reduction_override else self.reduction) + assert reduction_override in (None, "none", "mean", "sum") + reduction = reduction_override if reduction_override else self.reduction if weight is not None and weight.dim() > 1: weight = weight.mean(-1) @@ -222,9 +224,11 @@ def forward( target, iou_mode=self.iou_mode, bbox_format=self.bbox_format, - eps=self.eps) - loss = self.loss_weight * weight_reduce_loss(1.0 - iou, weight, - reduction, avg_factor) + eps=self.eps, + ) + loss = self.loss_weight * weight_reduce_loss( + 1.0 - iou, weight, reduction, avg_factor + ) if self.return_iou: return loss, iou diff --git a/mmyolo/mmyolo/models/necks/__init__.py b/mmyolo/mmyolo/models/necks/__init__.py index 6da9641c..5c5289f9 100644 --- a/mmyolo/mmyolo/models/necks/__init__.py +++ b/mmyolo/mmyolo/models/necks/__init__.py @@ -9,7 +9,13 @@ from .yolox_pafpn import YOLOXPAFPN __all__ = [ - 'YOLOv5PAFPN', 'BaseYOLONeck', 'YOLOv6RepPAFPN', 'YOLOXPAFPN', - 'CSPNeXtPAFPN', 'YOLOv7PAFPN', 'PPYOLOECSPPAFPN', 'YOLOv6CSPRepPAFPN', - 'YOLOv8PAFPN' + "YOLOv5PAFPN", + "BaseYOLONeck", + "YOLOv6RepPAFPN", + "YOLOXPAFPN", + "CSPNeXtPAFPN", + "YOLOv7PAFPN", + "PPYOLOECSPPAFPN", + "YOLOv6CSPRepPAFPN", + "YOLOv8PAFPN", ] diff --git a/mmyolo/mmyolo/models/necks/base_yolo_neck.py b/mmyolo/mmyolo/models/necks/base_yolo_neck.py index 8825b763..bc41ecc7 100644 --- a/mmyolo/mmyolo/models/necks/base_yolo_neck.py +++ b/mmyolo/mmyolo/models/necks/base_yolo_neck.py @@ -131,17 +131,19 @@ class BaseYOLONeck(BaseModule, metaclass=ABCMeta): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: Union[int, List[int]], - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - upsample_feats_cat_first: bool = True, - freeze_all: bool = False, - norm_cfg: ConfigType = None, - act_cfg: ConfigType = None, - init_cfg: OptMultiConfig = None, - **kwargs): + def __init__( + self, + in_channels: List[int], + out_channels: Union[int, List[int]], + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + upsample_feats_cat_first: bool = True, + freeze_all: bool = False, + norm_cfg: ConfigType = None, + act_cfg: ConfigType = None, + init_cfg: OptMultiConfig = None, + **kwargs, + ): super().__init__(init_cfg) self.in_channels = in_channels self.out_channels = out_channels @@ -177,32 +179,26 @@ def __init__(self, @abstractmethod def build_reduce_layer(self, idx: int): """build reduce layer.""" - pass @abstractmethod def build_upsample_layer(self, idx: int): """build upsample layer.""" - pass @abstractmethod def build_top_down_layer(self, idx: int): """build top down layer.""" - pass @abstractmethod def build_downsample_layer(self, idx: int): """build downsample layer.""" - pass @abstractmethod def build_bottom_up_layer(self, idx: int): """build bottom up layer.""" - pass @abstractmethod def build_out_layer(self, idx: int): """build out layer.""" - pass def _freeze_all(self): """Freeze the model.""" @@ -232,15 +228,16 @@ def forward(self, inputs: List[torch.Tensor]) -> tuple: for idx in range(len(self.in_channels) - 1, 0, -1): feat_high = inner_outs[0] feat_low = reduce_outs[idx - 1] - upsample_feat = self.upsample_layers[len(self.in_channels) - 1 - - idx]( - feat_high) + upsample_feat = self.upsample_layers[len(self.in_channels) - 1 - idx]( + feat_high + ) if self.upsample_feats_cat_first: top_down_layer_inputs = torch.cat([upsample_feat, feat_low], 1) else: top_down_layer_inputs = torch.cat([feat_low, upsample_feat], 1) inner_out = self.top_down_layers[len(self.in_channels) - 1 - idx]( - top_down_layer_inputs) + top_down_layer_inputs + ) inner_outs.insert(0, inner_out) # bottom-up path @@ -249,8 +246,7 @@ def forward(self, inputs: List[torch.Tensor]) -> tuple: feat_low = outs[-1] feat_high = inner_outs[idx + 1] downsample_feat = self.downsample_layers[idx](feat_low) - out = self.bottom_up_layers[idx]( - torch.cat([downsample_feat, feat_high], 1)) + out = self.bottom_up_layers[idx](torch.cat([downsample_feat, feat_high], 1)) outs.append(out) # out_layers diff --git a/mmyolo/mmyolo/models/necks/cspnext_pafpn.py b/mmyolo/mmyolo/models/necks/cspnext_pafpn.py index 310126f6..e77eee18 100644 --- a/mmyolo/mmyolo/models/necks/cspnext_pafpn.py +++ b/mmyolo/mmyolo/models/necks/cspnext_pafpn.py @@ -8,6 +8,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from .base_yolo_neck import BaseYOLONeck @@ -50,36 +51,35 @@ def __init__( freeze_all: bool = False, use_depthwise: bool = False, expand_ratio: float = 0.5, - upsample_cfg: ConfigType = dict(scale_factor=2, mode='nearest'), + upsample_cfg: ConfigType = dict(scale_factor=2, mode="nearest"), conv_cfg: bool = None, - norm_cfg: ConfigType = dict(type='BN'), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), + norm_cfg: ConfigType = dict(type="BN"), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), init_cfg: OptMultiConfig = dict( - type='Kaiming', - layer='Conv2d', + type="Kaiming", + layer="Conv2d", a=math.sqrt(5), - distribution='uniform', - mode='fan_in', - nonlinearity='leaky_relu') + distribution="uniform", + mode="fan_in", + nonlinearity="leaky_relu", + ), ) -> None: self.num_csp_blocks = round(num_csp_blocks * deepen_factor) - self.conv = DepthwiseSeparableConvModule \ - if use_depthwise else ConvModule + self.conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule self.upsample_cfg = upsample_cfg self.expand_ratio = expand_ratio self.conv_cfg = conv_cfg super().__init__( - in_channels=[ - int(channel * widen_factor) for channel in in_channels - ], + in_channels=[int(channel * widen_factor) for channel in in_channels], out_channels=int(out_channels * widen_factor), deepen_factor=deepen_factor, widen_factor=widen_factor, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_reduce_layer(self, idx: int) -> nn.Module: """build reduce layer. @@ -96,7 +96,8 @@ def build_reduce_layer(self, idx: int) -> nn.Module: self.in_channels[idx - 1], 1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: layer = nn.Identity() @@ -125,7 +126,8 @@ def build_top_down_layer(self, idx: int) -> nn.Module: expand_ratio=self.expand_ratio, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: return nn.Sequential( CSPLayer( @@ -137,13 +139,16 @@ def build_top_down_layer(self, idx: int) -> nn.Module: expand_ratio=self.expand_ratio, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), self.conv( self.in_channels[idx - 1], self.in_channels[idx - 2], kernel_size=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) def build_downsample_layer(self, idx: int) -> nn.Module: """build downsample layer. @@ -161,7 +166,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -181,7 +187,8 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: expand_ratio=self.expand_ratio, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_out_layer(self, idx: int) -> nn.Module: """build out layer. @@ -198,4 +205,5 @@ def build_out_layer(self, idx: int) -> nn.Module: 3, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) diff --git a/mmyolo/mmyolo/models/necks/ppyoloe_csppan.py b/mmyolo/mmyolo/models/necks/ppyoloe_csppan.py index 4e4ef720..93dbf2e5 100644 --- a/mmyolo/mmyolo/models/necks/ppyoloe_csppan.py +++ b/mmyolo/mmyolo/models/necks/ppyoloe_csppan.py @@ -44,23 +44,24 @@ class PPYOLOECSPPAFPN(BaseYOLONeck): Defaults to False. """ - def __init__(self, - in_channels: List[int] = [256, 512, 1024], - out_channels: List[int] = [256, 512, 1024], - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - freeze_all: bool = False, - num_csplayer: int = 1, - num_blocks_per_layer: int = 3, - block_cfg: ConfigType = dict( - type='PPYOLOEBasicBlock', shortcut=False, - use_alpha=False), - norm_cfg: ConfigType = dict( - type='BN', momentum=0.1, eps=1e-5), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - drop_block_cfg: ConfigType = None, - init_cfg: OptMultiConfig = None, - use_spp: bool = False): + def __init__( + self, + in_channels: List[int] = [256, 512, 1024], + out_channels: List[int] = [256, 512, 1024], + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + freeze_all: bool = False, + num_csplayer: int = 1, + num_blocks_per_layer: int = 3, + block_cfg: ConfigType = dict( + type="PPYOLOEBasicBlock", shortcut=False, use_alpha=False + ), + norm_cfg: ConfigType = dict(type="BN", momentum=0.1, eps=1e-5), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + drop_block_cfg: ConfigType = None, + init_cfg: OptMultiConfig = None, + use_spp: bool = False, + ): self.block_cfg = block_cfg self.num_csplayer = num_csplayer self.num_blocks_per_layer = round(num_blocks_per_layer * deepen_factor) @@ -70,18 +71,15 @@ def __init__(self, assert drop_block_cfg is None or isinstance(drop_block_cfg, dict) super().__init__( - in_channels=[ - int(channel * widen_factor) for channel in in_channels - ], - out_channels=[ - int(channel * widen_factor) for channel in out_channels - ], + in_channels=[int(channel * widen_factor) for channel in in_channels], + out_channels=[int(channel * widen_factor) for channel in out_channels], deepen_factor=deepen_factor, widen_factor=widen_factor, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_reduce_layer(self, idx: int): """build reduce layer. @@ -106,7 +104,9 @@ def build_reduce_layer(self, idx: int): norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, attention_cfg=None, - use_spp=self.use_spp) for i in range(self.num_csplayer) + use_spp=self.use_spp, + ) + for i in range(self.num_csplayer) ] if self.drop_block_cfg: @@ -129,8 +129,10 @@ def build_upsample_layer(self, idx: int) -> nn.Module: stride=1, padding=0, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), - nn.Upsample(scale_factor=2, mode='nearest')) + act_cfg=self.act_cfg, + ), + nn.Upsample(scale_factor=2, mode="nearest"), + ) def build_top_down_layer(self, idx: int) -> nn.Module: """build top down layer. @@ -154,7 +156,9 @@ def build_top_down_layer(self, idx: int) -> nn.Module: norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, attention_cfg=None, - use_spp=False) for i in range(self.num_csplayer) + use_spp=False, + ) + for i in range(self.num_csplayer) ] if self.drop_block_cfg: @@ -179,7 +183,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -203,7 +208,9 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, attention_cfg=None, - use_spp=False) for i in range(self.num_csplayer) + use_spp=False, + ) + for i in range(self.num_csplayer) ] if self.drop_block_cfg: diff --git a/mmyolo/mmyolo/models/necks/yolov5_pafpn.py b/mmyolo/mmyolo/models/necks/yolov5_pafpn.py index b95147fc..954918a6 100644 --- a/mmyolo/mmyolo/models/necks/yolov5_pafpn.py +++ b/mmyolo/mmyolo/models/necks/yolov5_pafpn.py @@ -8,6 +8,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from ..utils import make_divisible, make_round from .base_yolo_neck import BaseYOLONeck @@ -33,17 +34,18 @@ class YOLOv5PAFPN(BaseYOLONeck): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: Union[List[int], int], - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - num_csp_blocks: int = 1, - freeze_all: bool = False, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: List[int], + out_channels: Union[List[int], int], + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + num_csp_blocks: int = 1, + freeze_all: bool = False, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): self.num_csp_blocks = num_csp_blocks super().__init__( in_channels=in_channels, @@ -53,7 +55,8 @@ def __init__(self, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def init_weights(self): if self.init_cfg is None: @@ -81,7 +84,8 @@ def build_reduce_layer(self, idx: int) -> nn.Module: make_divisible(self.in_channels[idx - 1], self.widen_factor), 1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: layer = nn.Identity() @@ -89,7 +93,7 @@ def build_reduce_layer(self, idx: int) -> nn.Module: def build_upsample_layer(self, *args, **kwargs) -> nn.Module: """build upsample layer.""" - return nn.Upsample(scale_factor=2, mode='nearest') + return nn.Upsample(scale_factor=2, mode="nearest") def build_top_down_layer(self, idx: int): """build top down layer. @@ -103,33 +107,31 @@ def build_top_down_layer(self, idx: int): if idx == 1: return CSPLayer( - make_divisible(self.in_channels[idx - 1] * 2, - self.widen_factor), + make_divisible(self.in_channels[idx - 1] * 2, self.widen_factor), make_divisible(self.in_channels[idx - 1], self.widen_factor), num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: return nn.Sequential( CSPLayer( - make_divisible(self.in_channels[idx - 1] * 2, - self.widen_factor), - make_divisible(self.in_channels[idx - 1], - self.widen_factor), - num_blocks=make_round(self.num_csp_blocks, - self.deepen_factor), + make_divisible(self.in_channels[idx - 1] * 2, self.widen_factor), + make_divisible(self.in_channels[idx - 1], self.widen_factor), + num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( - make_divisible(self.in_channels[idx - 1], - self.widen_factor), - make_divisible(self.in_channels[idx - 2], - self.widen_factor), + make_divisible(self.in_channels[idx - 1], self.widen_factor), + make_divisible(self.in_channels[idx - 2], self.widen_factor), kernel_size=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) def build_downsample_layer(self, idx: int) -> nn.Module: """build downsample layer. @@ -147,7 +149,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -164,7 +167,8 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_out_layer(self, *args, **kwargs) -> nn.Module: """build out layer.""" diff --git a/mmyolo/mmyolo/models/necks/yolov6_pafpn.py b/mmyolo/mmyolo/models/necks/yolov6_pafpn.py index 74b7ce93..a1f24828 100644 --- a/mmyolo/mmyolo/models/necks/yolov6_pafpn.py +++ b/mmyolo/mmyolo/models/necks/yolov6_pafpn.py @@ -7,6 +7,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from ..layers import BepC3StageBlock, RepStageBlock from ..utils import make_round from .base_yolo_neck import BaseYOLONeck @@ -35,18 +36,19 @@ class YOLOv6RepPAFPN(BaseYOLONeck): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: int, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - num_csp_blocks: int = 12, - freeze_all: bool = False, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='ReLU', inplace=True), - block_cfg: ConfigType = dict(type='RepVGGBlock'), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: List[int], + out_channels: int, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + num_csp_blocks: int = 12, + freeze_all: bool = False, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="ReLU", inplace=True), + block_cfg: ConfigType = dict(type="RepVGGBlock"), + init_cfg: OptMultiConfig = None, + ): self.num_csp_blocks = num_csp_blocks self.block_cfg = block_cfg super().__init__( @@ -57,7 +59,8 @@ def __init__(self, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_reduce_layer(self, idx: int) -> nn.Module: """build reduce layer. @@ -70,12 +73,12 @@ def build_reduce_layer(self, idx: int) -> nn.Module: if idx == 2: layer = ConvModule( in_channels=int(self.in_channels[idx] * self.widen_factor), - out_channels=int(self.out_channels[idx - 1] * - self.widen_factor), + out_channels=int(self.out_channels[idx - 1] * self.widen_factor), kernel_size=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: layer = nn.Identity() @@ -94,7 +97,8 @@ def build_upsample_layer(self, idx: int) -> nn.Module: out_channels=int(self.out_channels[idx - 1] * self.widen_factor), kernel_size=2, stride=2, - bias=True) + bias=True, + ) def build_top_down_layer(self, idx: int) -> nn.Module: """build top down layer. @@ -108,24 +112,25 @@ def build_top_down_layer(self, idx: int) -> nn.Module: layer0 = RepStageBlock( in_channels=int( - (self.out_channels[idx - 1] + self.in_channels[idx - 1]) * - self.widen_factor), + (self.out_channels[idx - 1] + self.in_channels[idx - 1]) + * self.widen_factor + ), out_channels=int(self.out_channels[idx - 1] * self.widen_factor), num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), - block_cfg=block_cfg) + block_cfg=block_cfg, + ) if idx == 1: return layer0 elif idx == 2: layer1 = ConvModule( - in_channels=int(self.out_channels[idx - 1] * - self.widen_factor), - out_channels=int(self.out_channels[idx - 2] * - self.widen_factor), + in_channels=int(self.out_channels[idx - 1] * self.widen_factor), + out_channels=int(self.out_channels[idx - 2] * self.widen_factor), kernel_size=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) return nn.Sequential(layer0, layer1) def build_downsample_layer(self, idx: int) -> nn.Module: @@ -143,7 +148,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: stride=2, padding=3 // 2, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -159,7 +165,8 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: in_channels=int(self.out_channels[idx] * 2 * self.widen_factor), out_channels=int(self.out_channels[idx + 1] * self.widen_factor), num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), - block_cfg=block_cfg) + block_cfg=block_cfg, + ) def build_out_layer(self, *args, **kwargs) -> nn.Module: """build out layer.""" @@ -202,20 +209,21 @@ class YOLOv6CSPRepPAFPN(YOLOv6RepPAFPN): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: int, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - hidden_ratio: float = 0.5, - num_csp_blocks: int = 12, - freeze_all: bool = False, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='ReLU', inplace=True), - block_act_cfg: ConfigType = dict(type='SiLU', inplace=True), - block_cfg: ConfigType = dict(type='RepVGGBlock'), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: List[int], + out_channels: int, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + hidden_ratio: float = 0.5, + num_csp_blocks: int = 12, + freeze_all: bool = False, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="ReLU", inplace=True), + block_act_cfg: ConfigType = dict(type="SiLU", inplace=True), + block_cfg: ConfigType = dict(type="RepVGGBlock"), + init_cfg: OptMultiConfig = None, + ): self.hidden_ratio = hidden_ratio self.block_act_cfg = block_act_cfg super().__init__( @@ -228,7 +236,8 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg, block_cfg=block_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_top_down_layer(self, idx: int) -> nn.Module: """build top down layer. @@ -242,27 +251,28 @@ def build_top_down_layer(self, idx: int) -> nn.Module: layer0 = BepC3StageBlock( in_channels=int( - (self.out_channels[idx - 1] + self.in_channels[idx - 1]) * - self.widen_factor), + (self.out_channels[idx - 1] + self.in_channels[idx - 1]) + * self.widen_factor + ), out_channels=int(self.out_channels[idx - 1] * self.widen_factor), num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), block_cfg=block_cfg, hidden_ratio=self.hidden_ratio, norm_cfg=self.norm_cfg, - act_cfg=self.block_act_cfg) + act_cfg=self.block_act_cfg, + ) if idx == 1: return layer0 elif idx == 2: layer1 = ConvModule( - in_channels=int(self.out_channels[idx - 1] * - self.widen_factor), - out_channels=int(self.out_channels[idx - 2] * - self.widen_factor), + in_channels=int(self.out_channels[idx - 1] * self.widen_factor), + out_channels=int(self.out_channels[idx - 2] * self.widen_factor), kernel_size=1, stride=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) return nn.Sequential(layer0, layer1) def build_bottom_up_layer(self, idx: int) -> nn.Module: @@ -282,4 +292,5 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: block_cfg=block_cfg, hidden_ratio=self.hidden_ratio, norm_cfg=self.norm_cfg, - act_cfg=self.block_act_cfg) + act_cfg=self.block_act_cfg, + ) diff --git a/mmyolo/mmyolo/models/necks/yolov7_pafpn.py b/mmyolo/mmyolo/models/necks/yolov7_pafpn.py index 1d31f462..80b47406 100644 --- a/mmyolo/mmyolo/models/necks/yolov7_pafpn.py +++ b/mmyolo/mmyolo/models/necks/yolov7_pafpn.py @@ -6,6 +6,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from ..layers import MaxPoolAndStrideConvBlock, RepVGGBlock, SPPFCSPBlock from .base_yolo_neck import BaseYOLONeck @@ -44,28 +45,30 @@ class YOLOv7PAFPN(BaseYOLONeck): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: List[int], - block_cfg: dict = dict( - type='ELANBlock', - middle_ratio=0.5, - block_ratio=0.25, - num_blocks=4, - num_convs_in_block=1), - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - spp_expand_ratio: float = 0.5, - is_tiny_version: bool = False, - use_maxpool_in_downsample: bool = True, - use_in_channels_in_downsample: bool = False, - use_repconv_outs: bool = True, - upsample_feats_cat_first: bool = False, - freeze_all: bool = False, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: List[int], + out_channels: List[int], + block_cfg: dict = dict( + type="ELANBlock", + middle_ratio=0.5, + block_ratio=0.25, + num_blocks=4, + num_convs_in_block=1, + ), + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + spp_expand_ratio: float = 0.5, + is_tiny_version: bool = False, + use_maxpool_in_downsample: bool = True, + use_in_channels_in_downsample: bool = False, + use_repconv_outs: bool = True, + upsample_feats_cat_first: bool = False, + freeze_all: bool = False, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): self.is_tiny_version = is_tiny_version self.use_maxpool_in_downsample = use_maxpool_in_downsample @@ -73,23 +76,20 @@ def __init__(self, self.spp_expand_ratio = spp_expand_ratio self.use_repconv_outs = use_repconv_outs self.block_cfg = block_cfg - self.block_cfg.setdefault('norm_cfg', norm_cfg) - self.block_cfg.setdefault('act_cfg', act_cfg) + self.block_cfg.setdefault("norm_cfg", norm_cfg) + self.block_cfg.setdefault("act_cfg", act_cfg) super().__init__( - in_channels=[ - int(channel * widen_factor) for channel in in_channels - ], - out_channels=[ - int(channel * widen_factor) for channel in out_channels - ], + in_channels=[int(channel * widen_factor) for channel in in_channels], + out_channels=[int(channel * widen_factor) for channel in out_channels], deepen_factor=deepen_factor, widen_factor=widen_factor, upsample_feats_cat_first=upsample_feats_cat_first, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_reduce_layer(self, idx: int) -> nn.Module: """build reduce layer. @@ -108,14 +108,16 @@ def build_reduce_layer(self, idx: int) -> nn.Module: is_tiny_version=self.is_tiny_version, kernel_sizes=5, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: layer = ConvModule( self.in_channels[idx], self.out_channels[idx], 1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) return layer @@ -127,8 +129,10 @@ def build_upsample_layer(self, idx: int) -> nn.Module: self.out_channels[idx - 1], 1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), - nn.Upsample(scale_factor=2, mode='nearest')) + act_cfg=self.act_cfg, + ), + nn.Upsample(scale_factor=2, mode="nearest"), + ) def build_top_down_layer(self, idx: int) -> nn.Module: """build top down layer. @@ -140,8 +144,8 @@ def build_top_down_layer(self, idx: int) -> nn.Module: nn.Module: The top down layer. """ block_cfg = self.block_cfg.copy() - block_cfg['in_channels'] = self.out_channels[idx - 1] * 2 - block_cfg['out_channels'] = self.out_channels[idx - 1] + block_cfg["in_channels"] = self.out_channels[idx - 1] * 2 + block_cfg["out_channels"] = self.out_channels[idx - 1] return MODELS.build(block_cfg) def build_downsample_layer(self, idx: int) -> nn.Module: @@ -159,7 +163,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: self.out_channels[idx + 1], use_in_channels_of_middle=self.use_in_channels_in_downsample, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: return ConvModule( self.out_channels[idx], @@ -168,7 +173,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -180,8 +186,8 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: nn.Module: The bottom up layer. """ block_cfg = self.block_cfg.copy() - block_cfg['in_channels'] = self.out_channels[idx + 1] * 2 - block_cfg['out_channels'] = self.out_channels[idx + 1] + block_cfg["in_channels"] = self.out_channels[idx + 1] * 2 + block_cfg["out_channels"] = self.out_channels[idx + 1] return MODELS.build(block_cfg) def build_out_layer(self, idx: int) -> nn.Module: @@ -205,7 +211,8 @@ def build_out_layer(self, idx: int) -> nn.Module: out_channels, 3, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: return ConvModule( self.out_channels[idx], @@ -213,4 +220,5 @@ def build_out_layer(self, idx: int) -> nn.Module: 3, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) diff --git a/mmyolo/mmyolo/models/necks/yolov8_pafpn.py b/mmyolo/mmyolo/models/necks/yolov8_pafpn.py index e26698bc..47e39733 100644 --- a/mmyolo/mmyolo/models/necks/yolov8_pafpn.py +++ b/mmyolo/mmyolo/models/necks/yolov8_pafpn.py @@ -5,6 +5,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from .. import CSPLayerWithTwoConv from ..utils import make_divisible, make_round from .yolov5_pafpn import YOLOv5PAFPN @@ -31,17 +32,18 @@ class YOLOv8PAFPN(YOLOv5PAFPN): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: Union[List[int], int], - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - num_csp_blocks: int = 3, - freeze_all: bool = False, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: List[int], + out_channels: Union[List[int], int], + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + num_csp_blocks: int = 3, + freeze_all: bool = False, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): super().__init__( in_channels=in_channels, out_channels=out_channels, @@ -51,7 +53,8 @@ def __init__(self, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_reduce_layer(self, idx: int) -> nn.Module: """build reduce layer. @@ -74,13 +77,15 @@ def build_top_down_layer(self, idx: int) -> nn.Module: nn.Module: The top down layer. """ return CSPLayerWithTwoConv( - make_divisible((self.in_channels[idx - 1] + self.in_channels[idx]), - self.widen_factor), + make_divisible( + (self.in_channels[idx - 1] + self.in_channels[idx]), self.widen_factor + ), make_divisible(self.out_channels[idx - 1], self.widen_factor), num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -93,10 +98,11 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: """ return CSPLayerWithTwoConv( make_divisible( - (self.out_channels[idx] + self.out_channels[idx + 1]), - self.widen_factor), + (self.out_channels[idx] + self.out_channels[idx + 1]), self.widen_factor + ), make_divisible(self.out_channels[idx + 1], self.widen_factor), num_blocks=make_round(self.num_csp_blocks, self.deepen_factor), add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) diff --git a/mmyolo/mmyolo/models/necks/yolox_pafpn.py b/mmyolo/mmyolo/models/necks/yolox_pafpn.py index bd2595e7..b31c37ef 100644 --- a/mmyolo/mmyolo/models/necks/yolox_pafpn.py +++ b/mmyolo/mmyolo/models/necks/yolox_pafpn.py @@ -7,6 +7,7 @@ from mmdet.utils import ConfigType, OptMultiConfig from mmyolo.registry import MODELS + from .base_yolo_neck import BaseYOLONeck @@ -33,32 +34,32 @@ class YOLOXPAFPN(BaseYOLONeck): Defaults to None. """ - def __init__(self, - in_channels: List[int], - out_channels: int, - deepen_factor: float = 1.0, - widen_factor: float = 1.0, - num_csp_blocks: int = 3, - use_depthwise: bool = False, - freeze_all: bool = False, - norm_cfg: ConfigType = dict( - type='BN', momentum=0.03, eps=0.001), - act_cfg: ConfigType = dict(type='SiLU', inplace=True), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: List[int], + out_channels: int, + deepen_factor: float = 1.0, + widen_factor: float = 1.0, + num_csp_blocks: int = 3, + use_depthwise: bool = False, + freeze_all: bool = False, + norm_cfg: ConfigType = dict(type="BN", momentum=0.03, eps=0.001), + act_cfg: ConfigType = dict(type="SiLU", inplace=True), + init_cfg: OptMultiConfig = None, + ): self.num_csp_blocks = round(num_csp_blocks * deepen_factor) self.use_depthwise = use_depthwise super().__init__( - in_channels=[ - int(channel * widen_factor) for channel in in_channels - ], + in_channels=[int(channel * widen_factor) for channel in in_channels], out_channels=int(out_channels * widen_factor), deepen_factor=deepen_factor, widen_factor=widen_factor, freeze_all=freeze_all, norm_cfg=norm_cfg, act_cfg=act_cfg, - init_cfg=init_cfg) + init_cfg=init_cfg, + ) def build_reduce_layer(self, idx: int) -> nn.Module: """build reduce layer. @@ -75,7 +76,8 @@ def build_reduce_layer(self, idx: int) -> nn.Module: self.in_channels[idx - 1], 1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) else: layer = nn.Identity() @@ -83,7 +85,7 @@ def build_reduce_layer(self, idx: int) -> nn.Module: def build_upsample_layer(self, *args, **kwargs) -> nn.Module: """build upsample layer.""" - return nn.Upsample(scale_factor=2, mode='nearest') + return nn.Upsample(scale_factor=2, mode="nearest") def build_top_down_layer(self, idx: int) -> nn.Module: """build top down layer. @@ -101,7 +103,8 @@ def build_top_down_layer(self, idx: int) -> nn.Module: num_blocks=self.num_csp_blocks, add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) elif idx == 2: return nn.Sequential( CSPLayer( @@ -110,13 +113,16 @@ def build_top_down_layer(self, idx: int) -> nn.Module: num_blocks=self.num_csp_blocks, add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg), + act_cfg=self.act_cfg, + ), ConvModule( self.in_channels[idx - 1], self.in_channels[idx - 2], kernel_size=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg)) + act_cfg=self.act_cfg, + ), + ) def build_downsample_layer(self, idx: int) -> nn.Module: """build downsample layer. @@ -127,8 +133,7 @@ def build_downsample_layer(self, idx: int) -> nn.Module: Returns: nn.Module: The downsample layer. """ - conv = DepthwiseSeparableConvModule \ - if self.use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if self.use_depthwise else ConvModule return conv( self.in_channels[idx], self.in_channels[idx], @@ -136,7 +141,8 @@ def build_downsample_layer(self, idx: int) -> nn.Module: stride=2, padding=1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_bottom_up_layer(self, idx: int) -> nn.Module: """build bottom up layer. @@ -153,7 +159,8 @@ def build_bottom_up_layer(self, idx: int) -> nn.Module: num_blocks=self.num_csp_blocks, add_identity=False, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) def build_out_layer(self, idx: int) -> nn.Module: """build out layer. @@ -169,4 +176,5 @@ def build_out_layer(self, idx: int) -> nn.Module: self.out_channels, 1, norm_cfg=self.norm_cfg, - act_cfg=self.act_cfg) + act_cfg=self.act_cfg, + ) diff --git a/mmyolo/mmyolo/models/plugins/__init__.py b/mmyolo/mmyolo/models/plugins/__init__.py index 497233ac..4ae11f36 100644 --- a/mmyolo/mmyolo/models/plugins/__init__.py +++ b/mmyolo/mmyolo/models/plugins/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .cbam import CBAM -__all__ = ['CBAM'] +__all__ = ["CBAM"] diff --git a/mmyolo/mmyolo/models/plugins/cbam.py b/mmyolo/mmyolo/models/plugins/cbam.py index e9559f2e..cac9f65e 100644 --- a/mmyolo/mmyolo/models/plugins/cbam.py +++ b/mmyolo/mmyolo/models/plugins/cbam.py @@ -21,10 +21,9 @@ class ChannelAttention(BaseModule): Defaults to dict(type='ReLU'). """ - def __init__(self, - channels: int, - reduce_ratio: int = 16, - act_cfg: dict = dict(type='ReLU')): + def __init__( + self, channels: int, reduce_ratio: int = 16, act_cfg: dict = dict(type="ReLU") + ): super().__init__() self.avg_pool = nn.AdaptiveAvgPool2d(1) @@ -37,14 +36,17 @@ def __init__(self, kernel_size=1, stride=1, conv_cfg=None, - act_cfg=act_cfg), + act_cfg=act_cfg, + ), ConvModule( in_channels=int(channels / reduce_ratio), out_channels=channels, kernel_size=1, stride=1, conv_cfg=None, - act_cfg=None)) + act_cfg=None, + ), + ) self.sigmoid = nn.Sigmoid() def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -72,7 +74,8 @@ def __init__(self, kernel_size: int = 7): stride=1, padding=kernel_size // 2, conv_cfg=None, - act_cfg=dict(type='Sigmoid')) + act_cfg=dict(type="Sigmoid"), + ) def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward function.""" @@ -100,15 +103,18 @@ class CBAM(BaseModule): Defaults to None. """ - def __init__(self, - in_channels: int, - reduce_ratio: int = 16, - kernel_size: int = 7, - act_cfg: dict = dict(type='ReLU'), - init_cfg: OptMultiConfig = None): + def __init__( + self, + in_channels: int, + reduce_ratio: int = 16, + kernel_size: int = 7, + act_cfg: dict = dict(type="ReLU"), + init_cfg: OptMultiConfig = None, + ): super().__init__(init_cfg) self.channel_attention = ChannelAttention( - channels=in_channels, reduce_ratio=reduce_ratio, act_cfg=act_cfg) + channels=in_channels, reduce_ratio=reduce_ratio, act_cfg=act_cfg + ) self.spatial_attention = SpatialAttention(kernel_size) diff --git a/mmyolo/mmyolo/models/task_modules/__init__.py b/mmyolo/mmyolo/models/task_modules/__init__.py index 7dbdc25f..b03fb21f 100644 --- a/mmyolo/mmyolo/models/task_modules/__init__.py +++ b/mmyolo/mmyolo/models/task_modules/__init__.py @@ -3,6 +3,8 @@ from .coders import YOLOv5BBoxCoder, YOLOXBBoxCoder __all__ = [ - 'YOLOv5BBoxCoder', 'YOLOXBBoxCoder', 'BatchATSSAssigner', - 'BatchTaskAlignedAssigner' + "YOLOv5BBoxCoder", + "YOLOXBBoxCoder", + "BatchATSSAssigner", + "BatchTaskAlignedAssigner", ] diff --git a/mmyolo/mmyolo/models/task_modules/assigners/__init__.py b/mmyolo/mmyolo/models/task_modules/assigners/__init__.py index e74ab728..e57f6eb7 100644 --- a/mmyolo/mmyolo/models/task_modules/assigners/__init__.py +++ b/mmyolo/mmyolo/models/task_modules/assigners/__init__.py @@ -2,11 +2,17 @@ from .batch_atss_assigner import BatchATSSAssigner from .batch_dsl_assigner import BatchDynamicSoftLabelAssigner from .batch_task_aligned_assigner import BatchTaskAlignedAssigner -from .utils import (select_candidates_in_gts, select_highest_overlaps, - yolov6_iou_calculator) +from .utils import ( + select_candidates_in_gts, + select_highest_overlaps, + yolov6_iou_calculator, +) __all__ = [ - 'BatchATSSAssigner', 'BatchTaskAlignedAssigner', - 'select_candidates_in_gts', 'select_highest_overlaps', - 'yolov6_iou_calculator', 'BatchDynamicSoftLabelAssigner' + "BatchATSSAssigner", + "BatchTaskAlignedAssigner", + "select_candidates_in_gts", + "select_highest_overlaps", + "yolov6_iou_calculator", + "BatchDynamicSoftLabelAssigner", ] diff --git a/mmyolo/mmyolo/models/task_modules/assigners/batch_atss_assigner.py b/mmyolo/mmyolo/models/task_modules/assigners/batch_atss_assigner.py index 45b3069a..b194af5e 100644 --- a/mmyolo/mmyolo/models/task_modules/assigners/batch_atss_assigner.py +++ b/mmyolo/mmyolo/models/task_modules/assigners/batch_atss_assigner.py @@ -8,12 +8,15 @@ from torch import Tensor from mmyolo.registry import TASK_UTILS -from .utils import (select_candidates_in_gts, select_highest_overlaps, - yolov6_iou_calculator) +from .utils import ( + select_candidates_in_gts, + select_highest_overlaps, + yolov6_iou_calculator, +) -def bbox_center_distance(bboxes: Tensor, - priors: Tensor) -> Tuple[Tensor, Tensor]: + +def bbox_center_distance(bboxes: Tensor, priors: Tensor) -> Tuple[Tensor, Tensor]: """Compute the center distance between bboxes and priors. Args: @@ -34,8 +37,9 @@ def bbox_center_distance(bboxes: Tensor, priors_cy = (priors[:, 1] + priors[:, 3]) / 2.0 priors_points = torch.stack((priors_cx, priors_cy), dim=1) - distances = (bbox_points[:, None, :] - - priors_points[None, :, :]).pow(2).sum(-1).sqrt() + distances = ( + (bbox_points[:, None, :] - priors_points[None, :, :]).pow(2).sum(-1).sqrt() + ) return distances, priors_points @@ -61,19 +65,26 @@ class BatchATSSAssigner(nn.Module): """ def __init__( - self, - num_classes: int, - iou_calculator: ConfigType = dict(type='mmdet.BboxOverlaps2D'), - topk: int = 9): + self, + num_classes: int, + iou_calculator: ConfigType = dict(type="mmdet.BboxOverlaps2D"), + topk: int = 9, + ): super().__init__() self.num_classes = num_classes self.iou_calculator = TASK_UTILS.build(iou_calculator) self.topk = topk @torch.no_grad() - def forward(self, pred_bboxes: Tensor, priors: Tensor, - num_level_priors: List, gt_labels: Tensor, gt_bboxes: Tensor, - pad_bbox_flag: Tensor) -> dict: + def forward( + self, + pred_bboxes: Tensor, + priors: Tensor, + num_level_priors: List, + gt_labels: Tensor, + gt_bboxes: Tensor, + pad_bbox_flag: Tensor, + ) -> dict: """Assign gt to priors. The assignment is done in following steps @@ -120,14 +131,14 @@ def forward(self, pred_bboxes: Tensor, priors: Tensor, num_gt, num_priors = gt_bboxes.size(1), priors.size(0) assigned_result = { - 'assigned_labels': - gt_bboxes.new_full([batch_size, num_priors], self.num_classes), - 'assigned_bboxes': - gt_bboxes.new_full([batch_size, num_priors, 4], 0), - 'assigned_scores': - gt_bboxes.new_full([batch_size, num_priors, self.num_classes], 0), - 'fg_mask_pre_prior': - gt_bboxes.new_full([batch_size, num_priors], 0) + "assigned_labels": gt_bboxes.new_full( + [batch_size, num_priors], self.num_classes + ), + "assigned_bboxes": gt_bboxes.new_full([batch_size, num_priors, 4], 0), + "assigned_scores": gt_bboxes.new_full( + [batch_size, num_priors, self.num_classes], 0 + ), + "fg_mask_pre_prior": gt_bboxes.new_full([batch_size, num_priors], 0), } if num_gt == 0: @@ -139,36 +150,47 @@ def forward(self, pred_bboxes: Tensor, priors: Tensor, # compute center distance between all prior and gt distances, priors_points = bbox_center_distance( - gt_bboxes.reshape([-1, 4]), priors) + gt_bboxes.reshape([-1, 4]), priors + ) distances = distances.reshape([batch_size, -1, num_priors]) # Selecting candidates based on the center distance is_in_candidate, candidate_idxs = self.select_topk_candidates( - distances, num_level_priors, pad_bbox_flag) + distances, num_level_priors, pad_bbox_flag + ) # get corresponding iou for the these candidates, and compute the # mean and std, set mean + std as the iou threshold overlaps_thr_per_gt, iou_candidates = self.threshold_calculator( - is_in_candidate, candidate_idxs, overlaps, num_priors, batch_size, - num_gt) + is_in_candidate, candidate_idxs, overlaps, num_priors, batch_size, num_gt + ) # select candidates iou >= threshold as positive is_pos = torch.where( iou_candidates > overlaps_thr_per_gt.repeat([1, 1, num_priors]), - is_in_candidate, torch.zeros_like(is_in_candidate)) + is_in_candidate, + torch.zeros_like(is_in_candidate), + ) is_in_gts = select_candidates_in_gts(priors_points, gt_bboxes) pos_mask = is_pos * is_in_gts * pad_bbox_flag # if an anchor box is assigned to multiple gts, # the one with the highest IoU will be selected. - gt_idx_pre_prior, fg_mask_pre_prior, pos_mask = \ - select_highest_overlaps(pos_mask, overlaps, num_gt) + gt_idx_pre_prior, fg_mask_pre_prior, pos_mask = select_highest_overlaps( + pos_mask, overlaps, num_gt + ) # assigned target assigned_labels, assigned_bboxes, assigned_scores = self.get_targets( - gt_labels, gt_bboxes, gt_idx_pre_prior, fg_mask_pre_prior, - num_priors, batch_size, num_gt) + gt_labels, + gt_bboxes, + gt_idx_pre_prior, + fg_mask_pre_prior, + num_priors, + batch_size, + num_gt, + ) # soft label with iou if pred_bboxes is not None: @@ -176,15 +198,15 @@ def forward(self, pred_bboxes: Tensor, priors: Tensor, ious = ious.max(axis=-2)[0].unsqueeze(-1) assigned_scores *= ious - assigned_result['assigned_labels'] = assigned_labels.long() - assigned_result['assigned_bboxes'] = assigned_bboxes - assigned_result['assigned_scores'] = assigned_scores - assigned_result['fg_mask_pre_prior'] = fg_mask_pre_prior.bool() + assigned_result["assigned_labels"] = assigned_labels.long() + assigned_result["assigned_bboxes"] = assigned_bboxes + assigned_result["assigned_scores"] = assigned_scores + assigned_result["fg_mask_pre_prior"] = fg_mask_pre_prior.bool() return assigned_result - def select_topk_candidates(self, distances: Tensor, - num_level_priors: List[int], - pad_bbox_flag: Tensor) -> Tuple[Tensor, Tensor]: + def select_topk_candidates( + self, distances: Tensor, num_level_priors: List[int], pad_bbox_flag: Tensor + ) -> Tuple[Tensor, Tensor]: """Selecting candidates based on the center distance. Args: @@ -209,26 +231,29 @@ def select_topk_candidates(self, distances: Tensor, distances = torch.split(distances, num_level_priors, dim=-1) pad_bbox_flag = pad_bbox_flag.repeat(1, 1, self.topk).bool() - for distances_per_level, priors_per_level in zip( - distances, num_level_priors): + for distances_per_level, priors_per_level in zip(distances, num_level_priors): # on each pyramid level, for each gt, # select k bbox whose center are closest to the gt center end_index = start_idx + priors_per_level selected_k = min(self.topk, priors_per_level) _, topk_idxs_per_level = distances_per_level.topk( - selected_k, dim=-1, largest=False) + selected_k, dim=-1, largest=False + ) candidate_idxs.append(topk_idxs_per_level + start_idx) topk_idxs_per_level = torch.where( - pad_bbox_flag, topk_idxs_per_level, - torch.zeros_like(topk_idxs_per_level)) - - is_in_candidate = F.one_hot(topk_idxs_per_level, - priors_per_level).sum(dim=-2) - is_in_candidate = torch.where(is_in_candidate > 1, - torch.zeros_like(is_in_candidate), - is_in_candidate) + pad_bbox_flag, + topk_idxs_per_level, + torch.zeros_like(topk_idxs_per_level), + ) + + is_in_candidate = F.one_hot(topk_idxs_per_level, priors_per_level).sum( + dim=-2 + ) + is_in_candidate = torch.where( + is_in_candidate > 1, torch.zeros_like(is_in_candidate), is_in_candidate + ) is_in_candidate_list.append(is_in_candidate.to(distances_dtype)) start_idx = end_index @@ -239,10 +264,14 @@ def select_topk_candidates(self, distances: Tensor, return is_in_candidate_list, candidate_idxs @staticmethod - def threshold_calculator(is_in_candidate: List, candidate_idxs: Tensor, - overlaps: Tensor, num_priors: int, - batch_size: int, - num_gt: int) -> Tuple[Tensor, Tensor]: + def threshold_calculator( + is_in_candidate: List, + candidate_idxs: Tensor, + overlaps: Tensor, + num_priors: int, + batch_size: int, + num_gt: int, + ) -> Tuple[Tensor, Tensor]: """Get corresponding iou for the these candidates, and compute the mean and std, set mean + std as the iou threshold. @@ -265,32 +294,38 @@ def threshold_calculator(is_in_candidate: List, candidate_idxs: Tensor, """ batch_size_num_gt = batch_size * num_gt - candidate_overlaps = torch.where(is_in_candidate > 0, overlaps, - torch.zeros_like(overlaps)) + candidate_overlaps = torch.where( + is_in_candidate > 0, overlaps, torch.zeros_like(overlaps) + ) candidate_idxs = candidate_idxs.reshape([batch_size_num_gt, -1]) assist_indexes = num_priors * torch.arange( - batch_size_num_gt, device=candidate_idxs.device) + batch_size_num_gt, device=candidate_idxs.device + ) assist_indexes = assist_indexes[:, None] flatten_indexes = candidate_idxs + assist_indexes - candidate_overlaps_reshape = candidate_overlaps.reshape( - -1)[flatten_indexes] + candidate_overlaps_reshape = candidate_overlaps.reshape(-1)[flatten_indexes] candidate_overlaps_reshape = candidate_overlaps_reshape.reshape( - [batch_size, num_gt, -1]) + [batch_size, num_gt, -1] + ) - overlaps_mean_per_gt = candidate_overlaps_reshape.mean( - axis=-1, keepdim=True) - overlaps_std_per_gt = candidate_overlaps_reshape.std( - axis=-1, keepdim=True) + overlaps_mean_per_gt = candidate_overlaps_reshape.mean(axis=-1, keepdim=True) + overlaps_std_per_gt = candidate_overlaps_reshape.std(axis=-1, keepdim=True) overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt return overlaps_thr_per_gt, candidate_overlaps - def get_targets(self, gt_labels: Tensor, gt_bboxes: Tensor, - assigned_gt_inds: Tensor, fg_mask_pre_prior: Tensor, - num_priors: int, batch_size: int, - num_gt: int) -> Tuple[Tensor, Tensor, Tensor]: + def get_targets( + self, + gt_labels: Tensor, + gt_bboxes: Tensor, + assigned_gt_inds: Tensor, + fg_mask_pre_prior: Tensor, + num_priors: int, + batch_size: int, + num_gt: int, + ) -> Tuple[Tensor, Tensor, Tensor]: """Get target info. Args: @@ -317,23 +352,26 @@ def get_targets(self, gt_labels: Tensor, gt_bboxes: Tensor, # assigned target labels batch_index = torch.arange( - batch_size, dtype=gt_labels.dtype, device=gt_labels.device) + batch_size, dtype=gt_labels.dtype, device=gt_labels.device + ) batch_index = batch_index[..., None] assigned_gt_inds = (assigned_gt_inds + batch_index * num_gt).long() assigned_labels = gt_labels.flatten()[assigned_gt_inds.flatten()] assigned_labels = assigned_labels.reshape([batch_size, num_priors]) assigned_labels = torch.where( - fg_mask_pre_prior > 0, assigned_labels, - torch.full_like(assigned_labels, self.num_classes)) + fg_mask_pre_prior > 0, + assigned_labels, + torch.full_like(assigned_labels, self.num_classes), + ) # assigned target boxes - assigned_bboxes = gt_bboxes.reshape([-1, - 4])[assigned_gt_inds.flatten()] + assigned_bboxes = gt_bboxes.reshape([-1, 4])[assigned_gt_inds.flatten()] assigned_bboxes = assigned_bboxes.reshape([batch_size, num_priors, 4]) # assigned target scores - assigned_scores = F.one_hot(assigned_labels.long(), - self.num_classes + 1).float() - assigned_scores = assigned_scores[:, :, :self.num_classes] + assigned_scores = F.one_hot( + assigned_labels.long(), self.num_classes + 1 + ).float() + assigned_scores = assigned_scores[:, :, : self.num_classes] return assigned_labels, assigned_bboxes, assigned_scores diff --git a/mmyolo/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py b/mmyolo/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py index 58337d73..377bfc02 100644 --- a/mmyolo/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py +++ b/mmyolo/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py @@ -36,7 +36,7 @@ def __init__( soft_center_radius: float = 3.0, topk: int = 13, iou_weight: float = 3.0, - iou_calculator: ConfigType = dict(type='mmdet.BboxOverlaps2D') + iou_calculator: ConfigType = dict(type="mmdet.BboxOverlaps2D"), ) -> None: super().__init__() self.num_classes = num_classes @@ -46,9 +46,15 @@ def __init__( self.iou_calculator = TASK_UTILS.build(iou_calculator) @torch.no_grad() - def forward(self, pred_bboxes: Tensor, pred_scores: Tensor, priors: Tensor, - gt_labels: Tensor, gt_bboxes: Tensor, - pad_bbox_flag: Tensor) -> dict: + def forward( + self, + pred_bboxes: Tensor, + pred_scores: Tensor, + priors: Tensor, + gt_labels: Tensor, + gt_bboxes: Tensor, + pad_bbox_flag: Tensor, + ) -> dict: num_gt = gt_bboxes.size(1) decoded_bboxes = pred_bboxes num_bboxes = decoded_bboxes.size(1) @@ -56,23 +62,21 @@ def forward(self, pred_bboxes: Tensor, pred_scores: Tensor, priors: Tensor, if num_gt == 0 or num_bboxes == 0: return { - 'assigned_labels': - gt_labels.new_full( - pred_scores[..., 0].shape, - self.num_classes, - dtype=torch.long), - 'assigned_labels_weights': - gt_bboxes.new_full(pred_scores[..., 0].shape, 1), - 'assigned_bboxes': - gt_bboxes.new_full(pred_bboxes.shape, 0), - 'assign_metrics': - gt_bboxes.new_full(pred_scores[..., 0].shape, 0) + "assigned_labels": gt_labels.new_full( + pred_scores[..., 0].shape, self.num_classes, dtype=torch.long + ), + "assigned_labels_weights": gt_bboxes.new_full( + pred_scores[..., 0].shape, 1 + ), + "assigned_bboxes": gt_bboxes.new_full(pred_bboxes.shape, 0), + "assign_metrics": gt_bboxes.new_full(pred_scores[..., 0].shape, 0), } prior_center = priors[:, :2] if isinstance(gt_bboxes, BaseBoxes): raise NotImplementedError( - f'type of {type(gt_bboxes)} are not implemented !') + f"type of {type(gt_bboxes)} are not implemented !" + ) else: # Tensor boxes will be treated as horizontal boxes by defaults lt_ = prior_center[:, None, None] - gt_bboxes[..., :2] @@ -88,9 +92,9 @@ def forward(self, pred_bboxes: Tensor, pred_scores: Tensor, priors: Tensor, gt_center = (gt_bboxes[..., :2] + gt_bboxes[..., 2:]) / 2.0 strides = priors[..., 2] - distance = (priors[None].unsqueeze(2)[..., :2] - - gt_center[:, None, :, :] - ).pow(2).sum(-1).sqrt() / strides[None, :, None] + distance = (priors[None].unsqueeze(2)[..., :2] - gt_center[:, None, :, :]).pow( + 2 + ).sum(-1).sqrt() / strides[None, :, None] # prevent overflow distance = distance * valid_mask.unsqueeze(-1) @@ -104,40 +108,40 @@ def forward(self, pred_bboxes: Tensor, pred_scores: Tensor, priors: Tensor, idx = torch.zeros([2, batch_size, num_gt], dtype=torch.long) idx[0] = torch.arange(end=batch_size).view(-1, 1).repeat(1, num_gt) idx[1] = gt_labels.long().squeeze(-1) - pairwise_pred_scores = pairwise_pred_scores[idx[0], - idx[1]].permute(0, 2, 1) + pairwise_pred_scores = pairwise_pred_scores[idx[0], idx[1]].permute(0, 2, 1) # classification cost scale_factor = pairwise_ious - pairwise_pred_scores.sigmoid() pairwise_cls_cost = F.binary_cross_entropy_with_logits( - pairwise_pred_scores, pairwise_ious, - reduction='none') * scale_factor.abs().pow(2.0) + pairwise_pred_scores, pairwise_ious, reduction="none" + ) * scale_factor.abs().pow(2.0) cost_matrix = pairwise_cls_cost + iou_cost + soft_center_prior max_pad_value = torch.ones_like(cost_matrix) * INF - cost_matrix = torch.where(valid_mask[..., None].repeat(1, 1, num_gt), - cost_matrix, max_pad_value) + cost_matrix = torch.where( + valid_mask[..., None].repeat(1, 1, num_gt), cost_matrix, max_pad_value + ) - (matched_pred_ious, matched_gt_inds, - fg_mask_inboxes) = self.dynamic_k_matching(cost_matrix, pairwise_ious, - pad_bbox_flag) + (matched_pred_ious, matched_gt_inds, fg_mask_inboxes) = self.dynamic_k_matching( + cost_matrix, pairwise_ious, pad_bbox_flag + ) del pairwise_ious, cost_matrix batch_index = (fg_mask_inboxes > 0).nonzero(as_tuple=True)[0] - assigned_labels = gt_labels.new_full(pred_scores[..., 0].shape, - self.num_classes) + assigned_labels = gt_labels.new_full( + pred_scores[..., 0].shape, self.num_classes + ) assigned_labels[fg_mask_inboxes] = gt_labels[ - batch_index, matched_gt_inds].squeeze(-1) + batch_index, matched_gt_inds + ].squeeze(-1) assigned_labels = assigned_labels.long() - assigned_labels_weights = gt_bboxes.new_full(pred_scores[..., 0].shape, - 1) + assigned_labels_weights = gt_bboxes.new_full(pred_scores[..., 0].shape, 1) assigned_bboxes = gt_bboxes.new_full(pred_bboxes.shape, 0) - assigned_bboxes[fg_mask_inboxes] = gt_bboxes[batch_index, - matched_gt_inds] + assigned_bboxes[fg_mask_inboxes] = gt_bboxes[batch_index, matched_gt_inds] assign_metrics = gt_bboxes.new_full(pred_scores[..., 0].shape, 0) assign_metrics[fg_mask_inboxes] = matched_pred_ious @@ -146,10 +150,12 @@ def forward(self, pred_bboxes: Tensor, pred_scores: Tensor, priors: Tensor, assigned_labels=assigned_labels, assigned_labels_weights=assigned_labels_weights, assigned_bboxes=assigned_bboxes, - assign_metrics=assign_metrics) + assign_metrics=assign_metrics, + ) - def dynamic_k_matching(self, cost_matrix: Tensor, pairwise_ious: Tensor, - pad_bbox_flag: int) -> Tuple[Tensor, Tensor]: + def dynamic_k_matching( + self, cost_matrix: Tensor, pairwise_ious: Tensor, pad_bbox_flag: int + ) -> Tuple[Tensor, Tensor]: """Use IoU and matching cost to calculate the dynamic top-k positive targets. @@ -173,7 +179,7 @@ def dynamic_k_matching(self, cost_matrix: Tensor, pairwise_ious: Tensor, _, sorted_indices = torch.sort(cost_matrix, dim=1) for b in range(pad_bbox_flag.shape[0]): for gt_idx in range(num_gts[b]): - topk_ids = sorted_indices[b, :dynamic_ks[b, gt_idx], gt_idx] + topk_ids = sorted_indices[b, : dynamic_ks[b, gt_idx], gt_idx] matching_matrix[b, :, gt_idx][topk_ids] = 1 del topk_ious, dynamic_ks @@ -181,13 +187,13 @@ def dynamic_k_matching(self, cost_matrix: Tensor, pairwise_ious: Tensor, prior_match_gt_mask = matching_matrix.sum(2) > 1 if prior_match_gt_mask.sum() > 0: cost_min, cost_argmin = torch.min( - cost_matrix[prior_match_gt_mask, :], dim=1) + cost_matrix[prior_match_gt_mask, :], dim=1 + ) matching_matrix[prior_match_gt_mask, :] *= 0 matching_matrix[prior_match_gt_mask, cost_argmin] = 1 # get foreground mask inside box and center prior fg_mask_inboxes = matching_matrix.sum(2) > 0 - matched_pred_ious = (matching_matrix * - pairwise_ious).sum(2)[fg_mask_inboxes] + matched_pred_ious = (matching_matrix * pairwise_ious).sum(2)[fg_mask_inboxes] matched_gt_inds = matching_matrix[fg_mask_inboxes, :].argmax(1) return matched_pred_ious, matched_gt_inds, fg_mask_inboxes diff --git a/mmyolo/mmyolo/models/task_modules/assigners/batch_task_aligned_assigner.py b/mmyolo/mmyolo/models/task_modules/assigners/batch_task_aligned_assigner.py index 202d6789..6b58aae4 100644 --- a/mmyolo/mmyolo/models/task_modules/assigners/batch_task_aligned_assigner.py +++ b/mmyolo/mmyolo/models/task_modules/assigners/batch_task_aligned_assigner.py @@ -8,8 +8,12 @@ from mmyolo.models.losses import bbox_overlaps from mmyolo.registry import TASK_UTILS -from .utils import (select_candidates_in_gts, select_highest_overlaps, - yolov6_iou_calculator) + +from .utils import ( + select_candidates_in_gts, + select_highest_overlaps, + yolov6_iou_calculator, +) @TASK_UTILS.register_module() @@ -37,13 +41,15 @@ class BatchTaskAlignedAssigner(nn.Module): Defaults to False. """ - def __init__(self, - num_classes: int, - topk: int = 13, - alpha: float = 1.0, - beta: float = 6.0, - eps: float = 1e-7, - use_ciou: bool = False): + def __init__( + self, + num_classes: int, + topk: int = 13, + alpha: float = 1.0, + beta: float = 6.0, + eps: float = 1e-7, + use_ciou: bool = False, + ): super().__init__() self.num_classes = num_classes self.topk = topk @@ -101,50 +107,70 @@ def forward( num_gt = gt_bboxes.size(1) assigned_result = { - 'assigned_labels': - gt_bboxes.new_full(pred_scores[..., 0].shape, self.num_classes), - 'assigned_bboxes': - gt_bboxes.new_full(pred_bboxes.shape, 0), - 'assigned_scores': - gt_bboxes.new_full(pred_scores.shape, 0), - 'fg_mask_pre_prior': - gt_bboxes.new_full(pred_scores[..., 0].shape, 0) + "assigned_labels": gt_bboxes.new_full( + pred_scores[..., 0].shape, self.num_classes + ), + "assigned_bboxes": gt_bboxes.new_full(pred_bboxes.shape, 0), + "assigned_scores": gt_bboxes.new_full(pred_scores.shape, 0), + "fg_mask_pre_prior": gt_bboxes.new_full(pred_scores[..., 0].shape, 0), } if num_gt == 0: return assigned_result pos_mask, alignment_metrics, overlaps = self.get_pos_mask( - pred_bboxes, pred_scores, priors, gt_labels, gt_bboxes, - pad_bbox_flag, batch_size, num_gt) - - (assigned_gt_idxs, fg_mask_pre_prior, - pos_mask) = select_highest_overlaps(pos_mask, overlaps, num_gt) + pred_bboxes, + pred_scores, + priors, + gt_labels, + gt_bboxes, + pad_bbox_flag, + batch_size, + num_gt, + ) + + (assigned_gt_idxs, fg_mask_pre_prior, pos_mask) = select_highest_overlaps( + pos_mask, overlaps, num_gt + ) # assigned target assigned_labels, assigned_bboxes, assigned_scores = self.get_targets( - gt_labels, gt_bboxes, assigned_gt_idxs, fg_mask_pre_prior, - batch_size, num_gt) + gt_labels, + gt_bboxes, + assigned_gt_idxs, + fg_mask_pre_prior, + batch_size, + num_gt, + ) # normalize alignment_metrics *= pos_mask pos_align_metrics = alignment_metrics.max(axis=-1, keepdim=True)[0] pos_overlaps = (overlaps * pos_mask).max(axis=-1, keepdim=True)[0] norm_align_metric = ( - alignment_metrics * pos_overlaps / - (pos_align_metrics + self.eps)).max(-2)[0].unsqueeze(-1) + (alignment_metrics * pos_overlaps / (pos_align_metrics + self.eps)) + .max(-2)[0] + .unsqueeze(-1) + ) assigned_scores = assigned_scores * norm_align_metric - assigned_result['assigned_labels'] = assigned_labels - assigned_result['assigned_bboxes'] = assigned_bboxes - assigned_result['assigned_scores'] = assigned_scores - assigned_result['fg_mask_pre_prior'] = fg_mask_pre_prior.bool() + assigned_result["assigned_labels"] = assigned_labels + assigned_result["assigned_bboxes"] = assigned_bboxes + assigned_result["assigned_scores"] = assigned_scores + assigned_result["fg_mask_pre_prior"] = fg_mask_pre_prior.bool() return assigned_result - def get_pos_mask(self, pred_bboxes: Tensor, pred_scores: Tensor, - priors: Tensor, gt_labels: Tensor, gt_bboxes: Tensor, - pad_bbox_flag: Tensor, batch_size: int, - num_gt: int) -> Tuple[Tensor, Tensor, Tensor]: + def get_pos_mask( + self, + pred_bboxes: Tensor, + pred_scores: Tensor, + priors: Tensor, + gt_labels: Tensor, + gt_bboxes: Tensor, + pad_bbox_flag: Tensor, + batch_size: int, + num_gt: int, + ) -> Tuple[Tensor, Tensor, Tensor]: """Get possible mask. Args: @@ -172,9 +198,9 @@ def get_pos_mask(self, pred_bboxes: Tensor, pred_scores: Tensor, """ # Compute alignment metric between all bbox and gt - alignment_metrics, overlaps = \ - self.get_box_metrics(pred_bboxes, pred_scores, gt_labels, - gt_bboxes, batch_size, num_gt) + alignment_metrics, overlaps = self.get_box_metrics( + pred_bboxes, pred_scores, gt_labels, gt_bboxes, batch_size, num_gt + ) # get is_in_gts mask is_in_gts = select_candidates_in_gts(priors, gt_bboxes) @@ -182,16 +208,23 @@ def get_pos_mask(self, pred_bboxes: Tensor, pred_scores: Tensor, # get topk_metric mask topk_metric = self.select_topk_candidates( alignment_metrics * is_in_gts, - topk_mask=pad_bbox_flag.repeat([1, 1, self.topk]).bool()) + topk_mask=pad_bbox_flag.repeat([1, 1, self.topk]).bool(), + ) # merge all mask to a final mask pos_mask = topk_metric * is_in_gts * pad_bbox_flag return pos_mask, alignment_metrics, overlaps - def get_box_metrics(self, pred_bboxes: Tensor, pred_scores: Tensor, - gt_labels: Tensor, gt_bboxes: Tensor, batch_size: int, - num_gt: int) -> Tuple[Tensor, Tensor]: + def get_box_metrics( + self, + pred_bboxes: Tensor, + pred_scores: Tensor, + gt_labels: Tensor, + gt_bboxes: Tensor, + batch_size: int, + num_gt: int, + ) -> Tuple[Tensor, Tensor]: """Compute alignment metric between all bbox and gt. Args: @@ -221,20 +254,22 @@ def get_box_metrics(self, pred_bboxes: Tensor, pred_scores: Tensor, overlaps = bbox_overlaps( pred_bboxes.unsqueeze(1), gt_bboxes.unsqueeze(2), - iou_mode='ciou', - bbox_format='xyxy').clamp(0) + iou_mode="ciou", + bbox_format="xyxy", + ).clamp(0) else: overlaps = yolov6_iou_calculator(gt_bboxes, pred_bboxes) - alignment_metrics = bbox_scores.pow(self.alpha) * overlaps.pow( - self.beta) + alignment_metrics = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta) return alignment_metrics, overlaps - def select_topk_candidates(self, - alignment_gt_metrics: Tensor, - using_largest_topk: bool = True, - topk_mask: Optional[Tensor] = None) -> Tensor: + def select_topk_candidates( + self, + alignment_gt_metrics: Tensor, + using_largest_topk: bool = True, + topk_mask: Optional[Tensor] = None, + ) -> Tensor: """Compute alignment metric between all bbox and gt. Args: @@ -250,24 +285,28 @@ def select_topk_candidates(self, """ num_priors = alignment_gt_metrics.shape[-1] topk_metrics, topk_idxs = torch.topk( - alignment_gt_metrics, - self.topk, - axis=-1, - largest=using_largest_topk) + alignment_gt_metrics, self.topk, axis=-1, largest=using_largest_topk + ) if topk_mask is None: - topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > - self.eps).tile([1, 1, self.topk]) - topk_idxs = torch.where(topk_mask, topk_idxs, - torch.zeros_like(topk_idxs)) + topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > self.eps).tile( + [1, 1, self.topk] + ) + topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs)) is_in_topk = F.one_hot(topk_idxs, num_priors).sum(axis=-2) - is_in_topk = torch.where(is_in_topk > 1, torch.zeros_like(is_in_topk), - is_in_topk) + is_in_topk = torch.where( + is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk + ) return is_in_topk.to(alignment_gt_metrics.dtype) - def get_targets(self, gt_labels: Tensor, gt_bboxes: Tensor, - assigned_gt_idxs: Tensor, fg_mask_pre_prior: Tensor, - batch_size: int, - num_gt: int) -> Tuple[Tensor, Tensor, Tensor]: + def get_targets( + self, + gt_labels: Tensor, + gt_bboxes: Tensor, + assigned_gt_idxs: Tensor, + fg_mask_pre_prior: Tensor, + batch_size: int, + num_gt: int, + ) -> Tuple[Tensor, Tensor, Tensor]: """Get assigner info. Args: @@ -291,8 +330,8 @@ def get_targets(self, gt_labels: Tensor, gt_bboxes: Tensor, """ # assigned target labels batch_ind = torch.arange( - end=batch_size, dtype=torch.int64, device=gt_labels.device)[..., - None] + end=batch_size, dtype=torch.int64, device=gt_labels.device + )[..., None] assigned_gt_idxs = assigned_gt_idxs + batch_ind * num_gt assigned_labels = gt_labels.long().flatten()[assigned_gt_idxs] @@ -303,9 +342,12 @@ def get_targets(self, gt_labels: Tensor, gt_bboxes: Tensor, assigned_labels[assigned_labels < 0] = 0 assigned_scores = F.one_hot(assigned_labels, self.num_classes) force_gt_scores_mask = fg_mask_pre_prior[:, :, None].repeat( - 1, 1, self.num_classes) - assigned_scores = torch.where(force_gt_scores_mask > 0, - assigned_scores, - torch.full_like(assigned_scores, 0)) + 1, 1, self.num_classes + ) + assigned_scores = torch.where( + force_gt_scores_mask > 0, + assigned_scores, + torch.full_like(assigned_scores, 0), + ) return assigned_labels, assigned_bboxes, assigned_scores diff --git a/mmyolo/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py b/mmyolo/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py index 6709968e..a239d698 100644 --- a/mmyolo/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py +++ b/mmyolo/mmyolo/models/task_modules/assigners/batch_yolov7_assigner.py @@ -40,14 +40,16 @@ class BatchYOLOv7Assigner(nn.Module): cls_weight (float): Class weight. Defaults to 1.0. """ - def __init__(self, - num_classes: int, - num_base_priors: int, - featmap_strides: Sequence[int], - prior_match_thr: float = 4.0, - candidate_topk: int = 10, - iou_weight: float = 3.0, - cls_weight: float = 1.0): + def __init__( + self, + num_classes: int, + num_base_priors: int, + featmap_strides: Sequence[int], + prior_match_thr: float = 4.0, + candidate_topk: int = 10, + iou_weight: float = 3.0, + cls_weight: float = 1.0, + ): super().__init__() self.num_classes = num_classes self.num_base_priors = num_base_priors @@ -60,13 +62,15 @@ def __init__(self, self.cls_weight = cls_weight @torch.no_grad() - def forward(self, - pred_results, - batch_targets_normed, - batch_input_shape, - priors_base_sizes, - grid_offset, - near_neighbor_thr=0.5) -> dict: + def forward( + self, + pred_results, + batch_targets_normed, + batch_input_shape, + priors_base_sizes, + grid_offset, + near_neighbor_thr=0.5, + ) -> dict: """Forward function.""" # (num_base_priors, num_batch_gt, 7) # 7 is mean (batch_idx, cls_id, x_norm, y_norm, @@ -77,10 +81,10 @@ def forward(self, # empty gt of batch num_levels = len(pred_results) return dict( - mlvl_positive_infos=[pred_results[0].new_empty( - (0, 4))] * num_levels, + mlvl_positive_infos=[pred_results[0].new_empty((0, 4))] * num_levels, mlvl_priors=[] * num_levels, - mlvl_targets_normed=[] * num_levels) + mlvl_targets_normed=[] * num_levels, + ) # if near_neighbor_thr = 0.5 are mean the nearest # 3 neighbors are also considered positive samples. @@ -91,31 +95,39 @@ def forward(self, batch_targets_normed, priors_base_sizes, grid_offset, - near_neighbor_thr=near_neighbor_thr) + near_neighbor_thr=near_neighbor_thr, + ) - mlvl_positive_infos, mlvl_priors, \ - mlvl_targets_normed = self.simota_assigner( - pred_results, batch_targets_normed, mlvl_positive_infos, - mlvl_priors, batch_input_shape) + mlvl_positive_infos, mlvl_priors, mlvl_targets_normed = self.simota_assigner( + pred_results, + batch_targets_normed, + mlvl_positive_infos, + mlvl_priors, + batch_input_shape, + ) place_hold_var = batch_targets_normed.new_empty((0, 4)) _cat_multi_level_tensor_in_place( mlvl_positive_infos, mlvl_priors, mlvl_targets_normed, - place_hold_var=place_hold_var) + place_hold_var=place_hold_var, + ) return dict( mlvl_positive_infos=mlvl_positive_infos, mlvl_priors=mlvl_priors, - mlvl_targets_normed=mlvl_targets_normed) - - def yolov5_assigner(self, - pred_results, - batch_targets_normed, - priors_base_sizes, - grid_offset, - near_neighbor_thr=0.5): + mlvl_targets_normed=mlvl_targets_normed, + ) + + def yolov5_assigner( + self, + pred_results, + batch_targets_normed, + priors_base_sizes, + grid_offset, + near_neighbor_thr=0.5, + ): """YOLOv5 cross-grid sample assigner.""" num_batch_gts = batch_targets_normed.shape[1] assert num_batch_gts > 0 @@ -126,41 +138,41 @@ def yolov5_assigner(self, for i in range(len(pred_results)): # lever priors_base_sizes_i = priors_base_sizes[i] # (1, 1, feat_shape_w, feat_shape_h, feat_shape_w, feat_shape_h) - scaled_factor[2:6] = torch.tensor( - pred_results[i].shape)[[3, 2, 3, 2]] + scaled_factor[2:6] = torch.tensor(pred_results[i].shape)[[3, 2, 3, 2]] # Scale batch_targets from range 0-1 to range 0-features_maps size. # (num_base_priors, num_batch_gts, 7) batch_targets_scaled = batch_targets_normed * scaled_factor # Shape match - wh_ratio = batch_targets_scaled[..., - 4:6] / priors_base_sizes_i[:, None] - match_inds = torch.max( - wh_ratio, 1. / wh_ratio).max(2)[0] < self.prior_match_thr + wh_ratio = batch_targets_scaled[..., 4:6] / priors_base_sizes_i[:, None] + match_inds = ( + torch.max(wh_ratio, 1.0 / wh_ratio).max(2)[0] < self.prior_match_thr + ) batch_targets_scaled = batch_targets_scaled[ - match_inds] # (num_matched_target, 7) + match_inds + ] # (num_matched_target, 7) # no gt bbox matches anchor if batch_targets_scaled.shape[0] == 0: - mlvl_positive_infos.append( - batch_targets_scaled.new_empty((0, 4))) + mlvl_positive_infos.append(batch_targets_scaled.new_empty((0, 4))) mlvl_priors.append([]) continue # Positive samples with additional neighbors batch_targets_cxcy = batch_targets_scaled[:, 2:4] grid_xy = scaled_factor[[2, 3]] - batch_targets_cxcy - left, up = ((batch_targets_cxcy % 1 < near_neighbor_thr) & - (batch_targets_cxcy > 1)).T - right, bottom = ((grid_xy % 1 < near_neighbor_thr) & - (grid_xy > 1)).T - offset_inds = torch.stack( - (torch.ones_like(left), left, up, right, bottom)) - batch_targets_scaled = batch_targets_scaled.repeat( - (5, 1, 1))[offset_inds] # () - retained_offsets = grid_offset.repeat(1, offset_inds.shape[1], - 1)[offset_inds] + left, up = ( + (batch_targets_cxcy % 1 < near_neighbor_thr) & (batch_targets_cxcy > 1) + ).T + right, bottom = ((grid_xy % 1 < near_neighbor_thr) & (grid_xy > 1)).T + offset_inds = torch.stack((torch.ones_like(left), left, up, right, bottom)) + batch_targets_scaled = batch_targets_scaled.repeat((5, 1, 1))[ + offset_inds + ] # () + retained_offsets = grid_offset.repeat(1, offset_inds.shape[1], 1)[ + offset_inds + ] # batch_targets_scaled: (num_matched_target, 7) # 7 is mean (batch_idx, cls_id, x_scaled, @@ -170,9 +182,7 @@ def yolov5_assigner(self, # 4 is mean (batch_idx, prior_idx, x_scaled, y_scaled) mlvl_positive_info = batch_targets_scaled[:, [0, 6, 2, 3]] retained_offsets = retained_offsets * near_neighbor_thr - mlvl_positive_info[:, - 2:] = mlvl_positive_info[:, - 2:] - retained_offsets + mlvl_positive_info[:, 2:] = mlvl_positive_info[:, 2:] - retained_offsets mlvl_positive_info[:, 2].clamp_(0, scaled_factor[2] - 1) mlvl_positive_info[:, 3].clamp_(0, scaled_factor[3] - 1) mlvl_positive_info = mlvl_positive_info.long() @@ -183,8 +193,14 @@ def yolov5_assigner(self, return mlvl_positive_infos, mlvl_priors - def simota_assigner(self, pred_results, batch_targets_normed, - mlvl_positive_infos, mlvl_priors, batch_input_shape): + def simota_assigner( + self, + pred_results, + batch_targets_normed, + mlvl_positive_infos, + mlvl_priors, + batch_input_shape, + ): """SimOTA assigner.""" num_batch_gts = batch_targets_normed.shape[1] assert num_batch_gts > 0 @@ -219,7 +235,7 @@ def simota_assigner(self, pred_results, batch_targets_normed, if _mlvl_positive_info.shape[0] == 0: continue - idx = (_mlvl_positive_info[:, 0] == batch_idx) + idx = _mlvl_positive_info[:, 0] == batch_idx _mlvl_positive_info = _mlvl_positive_info[idx] _mlvl_positive_infos.append(_mlvl_positive_info) @@ -228,23 +244,28 @@ def simota_assigner(self, pred_results, batch_targets_normed, _from_which_layer.append( _mlvl_positive_info.new_full( - size=(_mlvl_positive_info.shape[0], ), fill_value=i)) + size=(_mlvl_positive_info.shape[0],), fill_value=i + ) + ) # (n,85) - level_batch_idx, prior_ind, \ - grid_x, grid_y = _mlvl_positive_info.T - pred_positive = head_pred[level_batch_idx, prior_ind, grid_y, - grid_x] + level_batch_idx, prior_ind, grid_x, grid_y = _mlvl_positive_info.T + pred_positive = head_pred[level_batch_idx, prior_ind, grid_y, grid_x] _mlvl_obj_cls.append(pred_positive[:, 4:]) # decoded grid = torch.stack([grid_x, grid_y], dim=1) - pred_positive_cxcy = (pred_positive[:, :2].sigmoid() * 2. - - 0.5 + grid) * self.featmap_strides[i] - pred_positive_wh = (pred_positive[:, 2:4].sigmoid() * 2) ** 2 \ - * priors * self.featmap_strides[i] + pred_positive_cxcy = ( + pred_positive[:, :2].sigmoid() * 2.0 - 0.5 + grid + ) * self.featmap_strides[i] + pred_positive_wh = ( + (pred_positive[:, 2:4].sigmoid() * 2) ** 2 + * priors + * self.featmap_strides[i] + ) pred_positive_xywh = torch.cat( - [pred_positive_cxcy, pred_positive_wh], dim=-1) + [pred_positive_cxcy, pred_positive_wh], dim=-1 + ) _mlvl_decoderd_bboxes.append(pred_positive_xywh) if len(_mlvl_decoderd_bboxes) == 0: @@ -258,14 +279,14 @@ def simota_assigner(self, pred_results, batch_targets_normed, continue # scaled xywh - batch_input_shape_wh = pred_results[0].new_tensor( - batch_input_shape[::-1]).repeat((1, 2)) + batch_input_shape_wh = ( + pred_results[0].new_tensor(batch_input_shape[::-1]).repeat((1, 2)) + ) targets_scaled_bbox = targets_normed[:, 2:6] * batch_input_shape_wh targets_scaled_bbox = bbox_cxcywh_to_xyxy(targets_scaled_bbox) _mlvl_decoderd_bboxes = bbox_cxcywh_to_xyxy(_mlvl_decoderd_bboxes) - pair_wise_iou = bbox_overlaps(targets_scaled_bbox, - _mlvl_decoderd_bboxes) + pair_wise_iou = bbox_overlaps(targets_scaled_bbox, _mlvl_decoderd_bboxes) pair_wise_iou_loss = -torch.log(pair_wise_iou + 1e-8) # 2 calc pair_wise_cls_loss @@ -275,39 +296,40 @@ def simota_assigner(self, pred_results, batch_targets_normed, _mlvl_priors = torch.cat(_mlvl_priors, dim=0) gt_cls_per_image = ( - F.one_hot(targets_normed[:, 1].to(torch.int64), - self.num_classes).float().unsqueeze(1).repeat( - 1, num_pred_positive, 1)) + F.one_hot(targets_normed[:, 1].to(torch.int64), self.num_classes) + .float() + .unsqueeze(1) + .repeat(1, num_pred_positive, 1) + ) # cls_score * obj - cls_preds_ = _mlvl_obj_cls[:, 1:]\ - .unsqueeze(0)\ - .repeat(num_gts, 1, 1) \ - * _mlvl_obj_cls[:, 0:1]\ - .unsqueeze(0).repeat(num_gts, 1, 1) + cls_preds_ = _mlvl_obj_cls[:, 1:].unsqueeze(0).repeat( + num_gts, 1, 1 + ) * _mlvl_obj_cls[:, 0:1].unsqueeze(0).repeat(num_gts, 1, 1) y = cls_preds_.sqrt_() pair_wise_cls_loss = F.binary_cross_entropy_with_logits( - torch.log(y / (1 - y)), gt_cls_per_image, - reduction='none').sum(-1) + torch.log(y / (1 - y)), gt_cls_per_image, reduction="none" + ).sum(-1) del cls_preds_ # calc cost cost = ( - self.cls_weight * pair_wise_cls_loss + - self.iou_weight * pair_wise_iou_loss) + self.cls_weight * pair_wise_cls_loss + + self.iou_weight * pair_wise_iou_loss + ) # num_gt, num_match_pred matching_matrix = torch.zeros_like(cost) top_k, _ = torch.topk( - pair_wise_iou, - min(self.candidate_topk, pair_wise_iou.shape[1]), - dim=1) + pair_wise_iou, min(self.candidate_topk, pair_wise_iou.shape[1]), dim=1 + ) dynamic_ks = torch.clamp(top_k.sum(1).int(), min=1) # Select only topk matches per gt for gt_idx in range(num_gts): _, pos_idx = torch.topk( - cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False) + cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False + ) matching_matrix[gt_idx][pos_idx] = 1.0 del top_k, dynamic_ks @@ -316,8 +338,7 @@ def simota_assigner(self, pred_results, batch_targets_normed, # only the least costly one can be taken anchor_matching_gt = matching_matrix.sum(0) if (anchor_matching_gt > 1).sum() > 0: - _, cost_argmin = torch.min( - cost[:, anchor_matching_gt > 1], dim=0) + _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0) matching_matrix[:, anchor_matching_gt > 1] *= 0.0 matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 fg_mask_inboxes = matching_matrix.sum(0) > 0.0 @@ -332,13 +353,13 @@ def simota_assigner(self, pred_results, batch_targets_normed, # to facilitate loss for i in range(num_levels): layer_idx = _from_which_layer == i - mlvl_positive_infos_matched[i].append( - _mlvl_positive_infos[layer_idx]) + mlvl_positive_infos_matched[i].append(_mlvl_positive_infos[layer_idx]) mlvl_priors_matched[i].append(_mlvl_priors[layer_idx]) - mlvl_targets_normed_matched[i].append( - targets_normed[layer_idx]) + mlvl_targets_normed_matched[i].append(targets_normed[layer_idx]) - results = mlvl_positive_infos_matched, \ - mlvl_priors_matched, \ - mlvl_targets_normed_matched + results = ( + mlvl_positive_infos_matched, + mlvl_priors_matched, + mlvl_targets_normed_matched, + ) return results diff --git a/mmyolo/mmyolo/models/task_modules/assigners/utils.py b/mmyolo/mmyolo/models/task_modules/assigners/utils.py index 58432009..4eaea2ba 100644 --- a/mmyolo/mmyolo/models/task_modules/assigners/utils.py +++ b/mmyolo/mmyolo/models/task_modules/assigners/utils.py @@ -7,9 +7,9 @@ from torch import Tensor -def select_candidates_in_gts(priors_points: Tensor, - gt_bboxes: Tensor, - eps: float = 1e-9) -> Tensor: +def select_candidates_in_gts( + priors_points: Tensor, gt_bboxes: Tensor, eps: float = 1e-9 +) -> Tensor: """Select the positive priors' center in gt. Args: @@ -25,22 +25,23 @@ def select_candidates_in_gts(priors_points: Tensor, gt_bboxes = gt_bboxes.reshape([-1, 4]) priors_number = priors_points.size(0) - priors_points = priors_points.unsqueeze(0).repeat(batch_size * num_gt, 1, - 1) + priors_points = priors_points.unsqueeze(0).repeat(batch_size * num_gt, 1, 1) # calculate the left, top, right, bottom distance between positive # prior center and gt side gt_bboxes_lt = gt_bboxes[:, 0:2].unsqueeze(1).repeat(1, priors_number, 1) gt_bboxes_rb = gt_bboxes[:, 2:4].unsqueeze(1).repeat(1, priors_number, 1) bbox_deltas = torch.cat( - [priors_points - gt_bboxes_lt, gt_bboxes_rb - priors_points], dim=-1) + [priors_points - gt_bboxes_lt, gt_bboxes_rb - priors_points], dim=-1 + ) bbox_deltas = bbox_deltas.reshape([batch_size, num_gt, priors_number, -1]) return (bbox_deltas.min(axis=-1)[0] > eps).to(gt_bboxes.dtype) -def select_highest_overlaps(pos_mask: Tensor, overlaps: Tensor, - num_gt: int) -> Tuple[Tensor, Tensor, Tensor]: +def select_highest_overlaps( + pos_mask: Tensor, overlaps: Tensor, num_gt: int +) -> Tuple[Tensor, Tensor, Tensor]: """If an anchor box is assigned to multiple gts, the one with the highest iou will be selected. @@ -62,12 +63,10 @@ def select_highest_overlaps(pos_mask: Tensor, overlaps: Tensor, # Make sure the positive sample matches the only one and is the largest IoU if fg_mask_pre_prior.max() > 1: - mask_multi_gts = (fg_mask_pre_prior.unsqueeze(1) > 1).repeat( - [1, num_gt, 1]) + mask_multi_gts = (fg_mask_pre_prior.unsqueeze(1) > 1).repeat([1, num_gt, 1]) index = overlaps.argmax(axis=1) is_max_overlaps = F.one_hot(index, num_gt) - is_max_overlaps = \ - is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) + is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) pos_mask = torch.where(mask_multi_gts, is_max_overlaps, pos_mask) fg_mask_pre_prior = pos_mask.sum(axis=-2) @@ -78,9 +77,7 @@ def select_highest_overlaps(pos_mask: Tensor, overlaps: Tensor, # TODO:'mmdet.BboxOverlaps2D' will cause gradient inconsistency, # which will be found and solved in a later version. -def yolov6_iou_calculator(bbox1: Tensor, - bbox2: Tensor, - eps: float = 1e-9) -> Tensor: +def yolov6_iou_calculator(bbox1: Tensor, bbox2: Tensor, eps: float = 1e-9) -> Tensor: """Calculate iou for batch. Args: @@ -98,8 +95,11 @@ def yolov6_iou_calculator(bbox1: Tensor, bbox2_x1y1, bbox2_x2y2 = bbox2[:, :, :, 0:2], bbox2[:, :, :, 2:4] # calculate overlap area - overlap = (torch.minimum(bbox1_x2y2, bbox2_x2y2) - - torch.maximum(bbox1_x1y1, bbox2_x1y1)).clip(0).prod(-1) + overlap = ( + (torch.minimum(bbox1_x2y2, bbox2_x2y2) - torch.maximum(bbox1_x1y1, bbox2_x1y1)) + .clip(0) + .prod(-1) + ) # calculate bbox area bbox1_area = (bbox1_x2y2 - bbox1_x1y1).clip(0).prod(-1) diff --git a/mmyolo/mmyolo/models/task_modules/coders/__init__.py b/mmyolo/mmyolo/models/task_modules/coders/__init__.py index 6346387c..5a8910a5 100644 --- a/mmyolo/mmyolo/models/task_modules/coders/__init__.py +++ b/mmyolo/mmyolo/models/task_modules/coders/__init__.py @@ -3,4 +3,4 @@ from .yolov5_bbox_coder import YOLOv5BBoxCoder from .yolox_bbox_coder import YOLOXBBoxCoder -__all__ = ['YOLOv5BBoxCoder', 'YOLOXBBoxCoder', 'DistancePointBBoxCoder'] +__all__ = ["YOLOv5BBoxCoder", "YOLOXBBoxCoder", "DistancePointBBoxCoder"] diff --git a/mmyolo/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py b/mmyolo/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py index 16417b8a..287dc746 100644 --- a/mmyolo/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py +++ b/mmyolo/mmyolo/models/task_modules/coders/distance_point_bbox_coder.py @@ -2,8 +2,9 @@ from typing import Optional, Sequence, Union import torch -from mmdet.models.task_modules.coders import \ - DistancePointBBoxCoder as MMDET_DistancePointBBoxCoder +from mmdet.models.task_modules.coders import ( + DistancePointBBoxCoder as MMDET_DistancePointBBoxCoder, +) from mmdet.structures.bbox import bbox2distance, distance2bbox from mmyolo.registry import TASK_UTILS @@ -22,8 +23,9 @@ def decode( points: torch.Tensor, pred_bboxes: torch.Tensor, stride: torch.Tensor, - max_shape: Optional[Union[Sequence[int], torch.Tensor, - Sequence[Sequence[int]]]] = None + max_shape: Optional[ + Union[Sequence[int], torch.Tensor, Sequence[Sequence[int]]] + ] = None, ) -> torch.Tensor: """Decode distance prediction to bounding box. @@ -52,11 +54,13 @@ def decode( return distance2bbox(points, pred_bboxes, max_shape) - def encode(self, - points: torch.Tensor, - gt_bboxes: torch.Tensor, - max_dis: float = 16., - eps: float = 0.01) -> torch.Tensor: + def encode( + self, + points: torch.Tensor, + gt_bboxes: torch.Tensor, + max_dis: float = 16.0, + eps: float = 0.01, + ) -> torch.Tensor: """Encode bounding box to distances. The rewrite is to support batch operations. diff --git a/mmyolo/mmyolo/models/task_modules/coders/yolov5_bbox_coder.py b/mmyolo/mmyolo/models/task_modules/coders/yolov5_bbox_coder.py index bab5f0e0..9acafde2 100644 --- a/mmyolo/mmyolo/models/task_modules/coders/yolov5_bbox_coder.py +++ b/mmyolo/mmyolo/models/task_modules/coders/yolov5_bbox_coder.py @@ -17,10 +17,13 @@ class YOLOv5BBoxCoder(BaseBBoxCoder): def encode(self, **kwargs): """Encode deltas between bboxes and ground truth boxes.""" - pass - def decode(self, priors: torch.Tensor, pred_bboxes: torch.Tensor, - stride: Union[torch.Tensor, int]) -> torch.Tensor: + def decode( + self, + priors: torch.Tensor, + pred_bboxes: torch.Tensor, + stride: Union[torch.Tensor, int], + ) -> torch.Tensor: """Decode regression results (delta_x, delta_x, w, h) to bboxes (tl_x, tl_y, br_x, br_y). @@ -44,12 +47,17 @@ def decode(self, priors: torch.Tensor, pred_bboxes: torch.Tensor, # The anchor of mmdet has been offset by 0.5 x_center_pred = (pred_bboxes[..., 0] - 0.5) * 2 * stride + x_center y_center_pred = (pred_bboxes[..., 1] - 0.5) * 2 * stride + y_center - w_pred = (pred_bboxes[..., 2] * 2)**2 * w - h_pred = (pred_bboxes[..., 3] * 2)**2 * h + w_pred = (pred_bboxes[..., 2] * 2) ** 2 * w + h_pred = (pred_bboxes[..., 3] * 2) ** 2 * h decoded_bboxes = torch.stack( - (x_center_pred - w_pred / 2, y_center_pred - h_pred / 2, - x_center_pred + w_pred / 2, y_center_pred + h_pred / 2), - dim=-1) + ( + x_center_pred - w_pred / 2, + y_center_pred - h_pred / 2, + x_center_pred + w_pred / 2, + y_center_pred + h_pred / 2, + ), + dim=-1, + ) return decoded_bboxes diff --git a/mmyolo/mmyolo/models/task_modules/coders/yolox_bbox_coder.py b/mmyolo/mmyolo/models/task_modules/coders/yolox_bbox_coder.py index 02c898d8..de4587d4 100644 --- a/mmyolo/mmyolo/models/task_modules/coders/yolox_bbox_coder.py +++ b/mmyolo/mmyolo/models/task_modules/coders/yolox_bbox_coder.py @@ -17,10 +17,13 @@ class YOLOXBBoxCoder(BaseBBoxCoder): def encode(self, **kwargs): """Encode deltas between bboxes and ground truth boxes.""" - pass - def decode(self, priors: torch.Tensor, pred_bboxes: torch.Tensor, - stride: Union[torch.Tensor, int]) -> torch.Tensor: + def decode( + self, + priors: torch.Tensor, + pred_bboxes: torch.Tensor, + stride: Union[torch.Tensor, int], + ) -> torch.Tensor: """Decode regression results (delta_x, delta_x, w, h) to bboxes (tl_x, tl_y, br_x, br_y). @@ -36,10 +39,10 @@ def decode(self, priors: torch.Tensor, pred_bboxes: torch.Tensor, xys = (pred_bboxes[..., :2] * stride) + priors whs = pred_bboxes[..., 2:].exp() * stride - tl_x = (xys[..., 0] - whs[..., 0] / 2) - tl_y = (xys[..., 1] - whs[..., 1] / 2) - br_x = (xys[..., 0] + whs[..., 0] / 2) - br_y = (xys[..., 1] + whs[..., 1] / 2) + tl_x = xys[..., 0] - whs[..., 0] / 2 + tl_y = xys[..., 1] - whs[..., 1] / 2 + br_x = xys[..., 0] + whs[..., 0] / 2 + br_y = xys[..., 1] + whs[..., 1] / 2 decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1) return decoded_bboxes diff --git a/mmyolo/mmyolo/models/utils/__init__.py b/mmyolo/mmyolo/models/utils/__init__.py index 89118283..50bb735e 100644 --- a/mmyolo/mmyolo/models/utils/__init__.py +++ b/mmyolo/mmyolo/models/utils/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .misc import make_divisible, make_round -__all__ = ['make_divisible', 'make_round'] +__all__ = ["make_divisible", "make_round"] diff --git a/mmyolo/mmyolo/models/utils/misc.py b/mmyolo/mmyolo/models/utils/misc.py index 6844ad37..14f3b0e3 100644 --- a/mmyolo/mmyolo/models/utils/misc.py +++ b/mmyolo/mmyolo/models/utils/misc.py @@ -2,9 +2,7 @@ import math -def make_divisible(x: float, - widen_factor: float = 1.0, - divisor: int = 8) -> int: +def make_divisible(x: float, widen_factor: float = 1.0, divisor: int = 8) -> int: """Make sure that x*widen_factor is divisible by divisor.""" return math.ceil(x * widen_factor / divisor) * divisor diff --git a/mmyolo/mmyolo/registry.py b/mmyolo/mmyolo/registry.py index 63967d86..f7efafca 100644 --- a/mmyolo/mmyolo/registry.py +++ b/mmyolo/mmyolo/registry.py @@ -13,61 +13,62 @@ from mmengine.registry import METRICS as MMENGINE_METRICS from mmengine.registry import MODEL_WRAPPERS as MMENGINE_MODEL_WRAPPERS from mmengine.registry import MODELS as MMENGINE_MODELS -from mmengine.registry import \ - OPTIM_WRAPPER_CONSTRUCTORS as MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS +from mmengine.registry import ( + OPTIM_WRAPPER_CONSTRUCTORS as MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS, +) from mmengine.registry import OPTIM_WRAPPERS as MMENGINE_OPTIM_WRAPPERS from mmengine.registry import OPTIMIZERS as MMENGINE_OPTIMIZERS from mmengine.registry import PARAM_SCHEDULERS as MMENGINE_PARAM_SCHEDULERS -from mmengine.registry import \ - RUNNER_CONSTRUCTORS as MMENGINE_RUNNER_CONSTRUCTORS +from mmengine.registry import RUNNER_CONSTRUCTORS as MMENGINE_RUNNER_CONSTRUCTORS from mmengine.registry import RUNNERS as MMENGINE_RUNNERS from mmengine.registry import TASK_UTILS as MMENGINE_TASK_UTILS from mmengine.registry import TRANSFORMS as MMENGINE_TRANSFORMS from mmengine.registry import VISBACKENDS as MMENGINE_VISBACKENDS from mmengine.registry import VISUALIZERS as MMENGINE_VISUALIZERS -from mmengine.registry import \ - WEIGHT_INITIALIZERS as MMENGINE_WEIGHT_INITIALIZERS +from mmengine.registry import WEIGHT_INITIALIZERS as MMENGINE_WEIGHT_INITIALIZERS from mmengine.registry import Registry # manage all kinds of runners like `EpochBasedRunner` and `IterBasedRunner` -RUNNERS = Registry('runner', parent=MMENGINE_RUNNERS) +RUNNERS = Registry("runner", parent=MMENGINE_RUNNERS) # manage runner constructors that define how to initialize runners RUNNER_CONSTRUCTORS = Registry( - 'runner constructor', parent=MMENGINE_RUNNER_CONSTRUCTORS) + "runner constructor", parent=MMENGINE_RUNNER_CONSTRUCTORS +) # manage all kinds of loops like `EpochBasedTrainLoop` -LOOPS = Registry('loop', parent=MMENGINE_LOOPS) +LOOPS = Registry("loop", parent=MMENGINE_LOOPS) # manage all kinds of hooks like `CheckpointHook` -HOOKS = Registry('hook', parent=MMENGINE_HOOKS) +HOOKS = Registry("hook", parent=MMENGINE_HOOKS) # manage data-related modules -DATASETS = Registry('dataset', parent=MMENGINE_DATASETS) -DATA_SAMPLERS = Registry('data sampler', parent=MMENGINE_DATA_SAMPLERS) -TRANSFORMS = Registry('transform', parent=MMENGINE_TRANSFORMS) +DATASETS = Registry("dataset", parent=MMENGINE_DATASETS) +DATA_SAMPLERS = Registry("data sampler", parent=MMENGINE_DATA_SAMPLERS) +TRANSFORMS = Registry("transform", parent=MMENGINE_TRANSFORMS) # manage all kinds of modules inheriting `nn.Module` -MODELS = Registry('model', parent=MMENGINE_MODELS) +MODELS = Registry("model", parent=MMENGINE_MODELS) # manage all kinds of model wrappers like 'MMDistributedDataParallel' -MODEL_WRAPPERS = Registry('model_wrapper', parent=MMENGINE_MODEL_WRAPPERS) +MODEL_WRAPPERS = Registry("model_wrapper", parent=MMENGINE_MODEL_WRAPPERS) # manage all kinds of weight initialization modules like `Uniform` WEIGHT_INITIALIZERS = Registry( - 'weight initializer', parent=MMENGINE_WEIGHT_INITIALIZERS) + "weight initializer", parent=MMENGINE_WEIGHT_INITIALIZERS +) # manage all kinds of optimizers like `SGD` and `Adam` -OPTIMIZERS = Registry('optimizer', parent=MMENGINE_OPTIMIZERS) -OPTIM_WRAPPERS = Registry('optim_wrapper', parent=MMENGINE_OPTIM_WRAPPERS) +OPTIMIZERS = Registry("optimizer", parent=MMENGINE_OPTIMIZERS) +OPTIM_WRAPPERS = Registry("optim_wrapper", parent=MMENGINE_OPTIM_WRAPPERS) # manage constructors that customize the optimization hyperparameters. OPTIM_WRAPPER_CONSTRUCTORS = Registry( - 'optimizer constructor', parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS) + "optimizer constructor", parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS +) # manage all kinds of parameter schedulers like `MultiStepLR` -PARAM_SCHEDULERS = Registry( - 'parameter scheduler', parent=MMENGINE_PARAM_SCHEDULERS) +PARAM_SCHEDULERS = Registry("parameter scheduler", parent=MMENGINE_PARAM_SCHEDULERS) # manage all kinds of metrics -METRICS = Registry('metric', parent=MMENGINE_METRICS) +METRICS = Registry("metric", parent=MMENGINE_METRICS) # manage task-specific modules like anchor generators and box coders -TASK_UTILS = Registry('task util', parent=MMENGINE_TASK_UTILS) +TASK_UTILS = Registry("task util", parent=MMENGINE_TASK_UTILS) # manage visualizer -VISUALIZERS = Registry('visualizer', parent=MMENGINE_VISUALIZERS) +VISUALIZERS = Registry("visualizer", parent=MMENGINE_VISUALIZERS) # manage visualizer backend -VISBACKENDS = Registry('vis_backend', parent=MMENGINE_VISBACKENDS) +VISBACKENDS = Registry("vis_backend", parent=MMENGINE_VISBACKENDS) diff --git a/mmyolo/mmyolo/testing/__init__.py b/mmyolo/mmyolo/testing/__init__.py index b6d7a010..c3bcd57c 100644 --- a/mmyolo/mmyolo/testing/__init__.py +++ b/mmyolo/mmyolo/testing/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from ._utils import get_detector_cfg -__all__ = ['get_detector_cfg'] +__all__ = ["get_detector_cfg"] diff --git a/mmyolo/mmyolo/testing/_utils.py b/mmyolo/mmyolo/testing/_utils.py index 9ccf2fe0..e97787a2 100644 --- a/mmyolo/mmyolo/testing/_utils.py +++ b/mmyolo/mmyolo/testing/_utils.py @@ -14,10 +14,11 @@ def _get_config_directory(): except NameError: # For IPython development when this __file__ is not defined import mmyolo + repo_dpath = dirname(dirname(mmyolo.__file__)) - config_dpath = join(repo_dpath, 'configs') + config_dpath = join(repo_dpath, "configs") if not exists(config_dpath): - raise Exception('Cannot find config path') + raise Exception("Cannot find config path") return config_dpath diff --git a/mmyolo/mmyolo/utils/__init__.py b/mmyolo/mmyolo/utils/__init__.py index e466c982..96b03995 100644 --- a/mmyolo/mmyolo/utils/__init__.py +++ b/mmyolo/mmyolo/utils/__init__.py @@ -3,4 +3,4 @@ from .misc import switch_to_deploy from .setup_env import register_all_modules -__all__ = ['register_all_modules', 'collect_env', 'switch_to_deploy'] +__all__ = ["register_all_modules", "collect_env", "switch_to_deploy"] diff --git a/mmyolo/mmyolo/utils/boxam_utils.py b/mmyolo/mmyolo/utils/boxam_utils.py index a0168b66..f43171d1 100644 --- a/mmyolo/mmyolo/utils/boxam_utils.py +++ b/mmyolo/mmyolo/utils/boxam_utils.py @@ -21,8 +21,7 @@ from mmyolo.registry import MODELS try: - from pytorch_grad_cam import (AblationCAM, AblationLayer, - ActivationsAndGradients) + from pytorch_grad_cam import AblationCAM, AblationLayer, ActivationsAndGradients from pytorch_grad_cam import GradCAM as Base_GradCAM from pytorch_grad_cam import GradCAMPlusPlus as Base_GradCAMPlusPlus from pytorch_grad_cam.base_cam import BaseCAM @@ -35,8 +34,8 @@ def init_detector( config: Union[str, Path, Config], checkpoint: Optional[str] = None, - palette: str = 'coco', - device: str = 'cuda:0', + palette: str = "coco", + device: str = "cuda:0", cfg_options: Optional[dict] = None, ) -> nn.Module: """Initialize a detector from config file. @@ -61,11 +60,12 @@ def init_detector( if isinstance(config, (str, Path)): config = Config.fromfile(config) elif not isinstance(config, Config): - raise TypeError('config must be a filename or Config object, ' - f'but got {type(config)}') + raise TypeError( + "config must be a filename or Config object, " f"but got {type(config)}" + ) if cfg_options is not None: config.merge_from_dict(cfg_options) - elif 'init_cfg' in config.model.backbone: + elif "init_cfg" in config.model.backbone: config.model.backbone.init_cfg = None # only change this @@ -74,29 +74,26 @@ def init_detector( model = MODELS.build(config.model) if checkpoint is not None: - checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + checkpoint = load_checkpoint(model, checkpoint, map_location="cpu") # Weights converted from elsewhere may not have meta fields. - checkpoint_meta = checkpoint.get('meta', {}) + checkpoint_meta = checkpoint.get("meta", {}) # save the dataset_meta in the model for convenience - if 'dataset_meta' in checkpoint_meta: + if "dataset_meta" in checkpoint_meta: # mmdet 3.x, all keys should be lowercase model.dataset_meta = { - k.lower(): v - for k, v in checkpoint_meta['dataset_meta'].items() + k.lower(): v for k, v in checkpoint_meta["dataset_meta"].items() } - elif 'CLASSES' in checkpoint_meta: + elif "CLASSES" in checkpoint_meta: # < mmdet 3.x - classes = checkpoint_meta['CLASSES'] - model.dataset_meta = {'classes': classes, 'palette': palette} + classes = checkpoint_meta["CLASSES"] + model.dataset_meta = {"classes": classes, "palette": palette} else: - warnings.simplefilter('once') + warnings.simplefilter("once") warnings.warn( - 'dataset_meta or class names are not saved in the ' - 'checkpoint\'s meta data, use COCO classes by default.') - model.dataset_meta = { - 'classes': get_classes('coco'), - 'palette': palette - } + "dataset_meta or class names are not saved in the " + "checkpoint's meta data, use COCO classes by default." + ) + model.dataset_meta = {"classes": get_classes("coco"), "palette": palette} model.cfg = config # save the config in the model for convenience model.to(device) @@ -104,9 +101,11 @@ def init_detector( return model -def reshape_transform(feats: Union[Tensor, List[Tensor]], - max_shape: Tuple[int, int] = (20, 20), - is_need_grad: bool = False): +def reshape_transform( + feats: Union[Tensor, List[Tensor]], + max_shape: Tuple[int, int] = (20, 20), + is_need_grad: bool = False, +): """Reshape and aggregate feature maps when the input is a multi-layer feature map. @@ -120,8 +119,10 @@ def reshape_transform(feats: Union[Tensor, List[Tensor]], feats = [feats] else: if is_need_grad: - raise NotImplementedError('The `grad_base` method does not ' - 'support output multi-activation layers') + raise NotImplementedError( + "The `grad_base` method does not " + "support output multi-activation layers" + ) max_h = max([im.shape[-2] for im in feats]) max_w = max([im.shape[-1] for im in feats]) @@ -133,8 +134,8 @@ def reshape_transform(feats: Union[Tensor, List[Tensor]], activations = [] for feat in feats: activations.append( - torch.nn.functional.interpolate( - torch.abs(feat), max_shape, mode='bilinear')) + torch.nn.functional.interpolate(torch.abs(feat), max_shape, mode="bilinear") + ) activations = torch.cat(activations, axis=1) return activations @@ -144,11 +145,9 @@ class BoxAMDetectorWrapper(nn.Module): """Wrap the mmdet model class to facilitate handling of non-tensor situations during inference.""" - def __init__(self, - cfg: ConfigType, - checkpoint: str, - score_thr: float, - device: str = 'cuda:0'): + def __init__( + self, cfg: ConfigType, checkpoint: str, score_thr: float, device: str = "cuda:0" + ): super().__init__() self.cfg = cfg self.device = device @@ -157,11 +156,11 @@ def __init__(self, self.detector = init_detector(self.cfg, self.checkpoint, device=device) pipeline_cfg = copy.deepcopy(self.cfg.test_dataloader.dataset.pipeline) - pipeline_cfg[0].type = 'mmdet.LoadImageFromNDArray' + pipeline_cfg[0].type = "mmdet.LoadImageFromNDArray" new_test_pipeline = [] for pipeline in pipeline_cfg: - if not pipeline['type'].endswith('LoadAnnotations'): + if not pipeline["type"].endswith("LoadAnnotations"): new_test_pipeline.append(pipeline) self.test_pipeline = Compose(new_test_pipeline) @@ -173,9 +172,9 @@ def need_loss(self, is_need_loss: bool): """Grad-based methods require loss.""" self.is_need_loss = is_need_loss - def set_input_data(self, - image: np.ndarray, - pred_instances: Optional[InstanceData] = None): + def set_input_data( + self, image: np.ndarray, pred_instances: Optional[InstanceData] = None + ): """Set the input data to be used in the next step.""" self.image = image @@ -186,13 +185,14 @@ def set_input_data(self, img=self.image, img_id=0, gt_bboxes=pred_instances.bboxes, - gt_bboxes_labels=pred_instances.labels) + gt_bboxes_labels=pred_instances.labels, + ) data = self.test_pipeline(data) else: data = dict(img=self.image, img_id=0) data = self.test_pipeline(data) - data['inputs'] = [data['inputs']] - data['data_samples'] = [data['data_samples']] + data["inputs"] = [data["inputs"]] + data["data_samples"] = [data["data_samples"]] self.input_data = data def __call__(self, *args, **kwargs): @@ -201,17 +201,17 @@ def __call__(self, *args, **kwargs): # Maybe this is a direction that can be optimized # self.detector.init_weights() - if hasattr(self.detector.bbox_head, 'featmap_sizes'): + if hasattr(self.detector.bbox_head, "featmap_sizes"): # Prevent the model algorithm error when calculating loss self.detector.bbox_head.featmap_sizes = None data_ = {} - data_['inputs'] = [self.input_data['inputs']] - data_['data_samples'] = [self.input_data['data_samples']] + data_["inputs"] = [self.input_data["inputs"]] + data_["data_samples"] = [self.input_data["data_samples"]] data = self.detector.data_preprocessor(data_, training=False) - loss = self.detector._run_forward(data, mode='loss') + loss = self.detector._run_forward(data, mode="loss") - if hasattr(self.detector.bbox_head, 'featmap_sizes'): + if hasattr(self.detector.bbox_head, "featmap_sizes"): self.detector.bbox_head.featmap_sizes = None return [loss] @@ -224,40 +224,42 @@ def __call__(self, *args, **kwargs): class BoxAMDetectorVisualizer: """Box AM visualization class.""" - def __init__(self, - method_class, - model: nn.Module, - target_layers: List, - reshape_transform: Optional[Callable] = None, - is_need_grad: bool = False, - extra_params: Optional[dict] = None): + def __init__( + self, + method_class, + model: nn.Module, + target_layers: List, + reshape_transform: Optional[Callable] = None, + is_need_grad: bool = False, + extra_params: Optional[dict] = None, + ): self.target_layers = target_layers self.reshape_transform = reshape_transform self.is_need_grad = is_need_grad - if method_class.__name__ == 'AblationCAM': - batch_size = extra_params.get('batch_size', 1) - ratio_channels_to_ablate = extra_params.get( - 'ratio_channels_to_ablate', 1.) + if method_class.__name__ == "AblationCAM": + batch_size = extra_params.get("batch_size", 1) + ratio_channels_to_ablate = extra_params.get("ratio_channels_to_ablate", 1.0) self.cam = AblationCAM( model, target_layers, - use_cuda=True if 'cuda' in model.device else False, + use_cuda=True if "cuda" in model.device else False, reshape_transform=reshape_transform, batch_size=batch_size, - ablation_layer=extra_params['ablation_layer'], - ratio_channels_to_ablate=ratio_channels_to_ablate) + ablation_layer=extra_params["ablation_layer"], + ratio_channels_to_ablate=ratio_channels_to_ablate, + ) else: self.cam = method_class( model, target_layers, - use_cuda=True if 'cuda' in model.device else False, + use_cuda=True if "cuda" in model.device else False, reshape_transform=reshape_transform, ) if self.is_need_grad: self.cam.activations_and_grads.release() - self.classes = model.detector.dataset_meta['classes'] + self.classes = model.detector.dataset_meta["classes"] self.COLORS = np.random.uniform(0, 255, size=(len(self.classes), 3)) def switch_activations_and_grads(self, model) -> None: @@ -267,7 +269,8 @@ def switch_activations_and_grads(self, model) -> None: if self.is_need_grad is True: self.cam.activations_and_grads = ActivationsAndGradients( - model, self.target_layers, self.reshape_transform) + model, self.target_layers, self.reshape_transform + ) self.is_need_grad = False else: self.cam.activations_and_grads.release() @@ -277,11 +280,13 @@ def __call__(self, img, targets, aug_smooth=False, eigen_smooth=False): img = torch.from_numpy(img)[None].permute(0, 3, 1, 2) return self.cam(img, targets, aug_smooth, eigen_smooth)[0, :] - def show_am(self, - image: np.ndarray, - pred_instance: InstanceData, - grayscale_am: np.ndarray, - with_norm_in_bboxes: bool = False): + def show_am( + self, + image: np.ndarray, + pred_instance: InstanceData, + grayscale_am: np.ndarray, + with_norm_in_bboxes: bool = False, + ): """Normalize the AM to be in the range [0, 1] inside every bounding boxes, and zero outside of the bounding boxes.""" @@ -295,7 +300,8 @@ def show_am(self, for x1, y1, x2, y2 in boxes: img = renormalized_am * 0 img[y1:y2, x1:x2] = scale_cam_image( - [grayscale_am[y1:y2, x1:x2].copy()])[0] + [grayscale_am[y1:y2, x1:x2].copy()] + )[0] images.append(img) renormalized_am = np.max(np.float32(images), axis=0) @@ -304,38 +310,44 @@ def show_am(self, renormalized_am = grayscale_am am_image_renormalized = show_cam_on_image( - image / 255, renormalized_am, use_rgb=False) + image / 255, renormalized_am, use_rgb=False + ) image_with_bounding_boxes = self._draw_boxes( - boxes, labels, am_image_renormalized, pred_instance.get('scores')) + boxes, labels, am_image_renormalized, pred_instance.get("scores") + ) return image_with_bounding_boxes - def _draw_boxes(self, - boxes: List, - labels: List, - image: np.ndarray, - scores: Optional[List] = None): + def _draw_boxes( + self, + boxes: List, + labels: List, + image: np.ndarray, + scores: Optional[List] = None, + ): """draw boxes on image.""" for i, box in enumerate(boxes): label = labels[i] color = self.COLORS[label] - cv2.rectangle(image, (int(box[0]), int(box[1])), - (int(box[2]), int(box[3])), color, 2) + cv2.rectangle( + image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2 + ) if scores is not None: score = scores[i] - text = str(self.classes[label]) + ': ' + str( - round(score * 100, 1)) + text = str(self.classes[label]) + ": " + str(round(score * 100, 1)) else: text = self.classes[label] cv2.putText( image, - text, (int(box[0]), int(box[1] - 5)), + text, + (int(box[0]), int(box[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, - lineType=cv2.LINE_AA) + lineType=cv2.LINE_AA, + ) return image @@ -346,20 +358,18 @@ def __init__(self): super().__init__() self.activations = None - def set_next_batch(self, input_batch_index, activations, - num_channels_to_ablate): + def set_next_batch(self, input_batch_index, activations, num_channels_to_ablate): """Extract the next batch member from activations, and repeat it num_channels_to_ablate times.""" if isinstance(activations, torch.Tensor): - return super().set_next_batch(input_batch_index, activations, - num_channels_to_ablate) + return super().set_next_batch( + input_batch_index, activations, num_channels_to_ablate + ) self.activations = [] for activation in activations: - activation = activation[ - input_batch_index, :, :, :].clone().unsqueeze(0) - self.activations.append( - activation.repeat(num_channels_to_ablate, 1, 1, 1)) + activation = activation[input_batch_index, :, :, :].clone().unsqueeze(0) + self.activations.append(activation.repeat(num_channels_to_ablate, 1, 1, 1)) def __call__(self, x): """Go over the activation indices to be ablated, stored in @@ -372,11 +382,11 @@ def __call__(self, x): channel_cumsum = np.cumsum([r.shape[1] for r in result]) num_channels_to_ablate = result[0].size(0) # batch for i in range(num_channels_to_ablate): - pyramid_layer = bisect.bisect_right(channel_cumsum, - self.indices[i]) + pyramid_layer = bisect.bisect_right(channel_cumsum, self.indices[i]) if pyramid_layer > 0: - index_in_pyramid_layer = self.indices[i] - channel_cumsum[ - pyramid_layer - 1] + index_in_pyramid_layer = ( + self.indices[i] - channel_cumsum[pyramid_layer - 1] + ) else: index_in_pyramid_layer = self.indices[i] result[pyramid_layer][i, index_in_pyramid_layer, :, :] = -1000 @@ -401,11 +411,13 @@ class DetBoxScoreTarget: the sum of losses after excluding a specific key. """ - def __init__(self, - pred_instance: InstanceData, - match_iou_thr: float = 0.5, - device: str = 'cuda:0', - ignore_loss_params: Optional[List] = None): + def __init__( + self, + pred_instance: InstanceData, + match_iou_thr: float = 0.5, + device: str = "cuda:0", + ignore_loss_params: Optional[List] = None, + ): self.focal_bboxes = pred_instance.bboxes self.focal_labels = pred_instance.labels self.match_iou_thr = match_iou_thr @@ -415,14 +427,13 @@ def __init__(self, assert isinstance(self.ignore_loss_params, list) def __call__(self, results): - output = torch.tensor([0.], device=self.device) + output = torch.tensor([0.0], device=self.device) - if 'loss_cls' in results: + if "loss_cls" in results: # grad-based method # results is dict for loss_key, loss_value in results.items(): - if 'loss' not in loss_key or \ - loss_key in self.ignore_loss_params: + if "loss" not in loss_key or loss_key in self.ignore_loss_params: continue if isinstance(loss_value, list): output += sum(loss_value) @@ -440,13 +451,13 @@ def __call__(self, results): pred_scores = pred_instances.scores pred_labels = pred_instances.labels - for focal_box, focal_label in zip(self.focal_bboxes, - self.focal_labels): - ious = torchvision.ops.box_iou(focal_box[None], - pred_bboxes[..., :4]) + for focal_box, focal_label in zip(self.focal_bboxes, self.focal_labels): + ious = torchvision.ops.box_iou(focal_box[None], pred_bboxes[..., :4]) index = ious.argmax() - if ious[0, index] > self.match_iou_thr and pred_labels[ - index] == focal_label: + if ( + ious[0, index] > self.match_iou_thr + and pred_labels[index] == focal_label + ): # TODO: Adaptive adjustment of weights based on algorithms score = ious[0, index] + pred_scores[index] output = output + score @@ -461,16 +472,19 @@ class SpatialBaseCAM(BaseCAM): tasks. There is no need to average the gradients in the detection task. """ - def get_cam_image(self, - input_tensor: torch.Tensor, - target_layer: torch.nn.Module, - targets: List[torch.nn.Module], - activations: torch.Tensor, - grads: torch.Tensor, - eigen_smooth: bool = False) -> np.ndarray: - - weights = self.get_cam_weights(input_tensor, target_layer, targets, - activations, grads) + def get_cam_image( + self, + input_tensor: torch.Tensor, + target_layer: torch.nn.Module, + targets: List[torch.nn.Module], + activations: torch.Tensor, + grads: torch.Tensor, + eigen_smooth: bool = False, + ) -> np.ndarray: + + weights = self.get_cam_weights( + input_tensor, target_layer, targets, activations, grads + ) weighted_activations = weights * activations if eigen_smooth: cam = get_2d_projection(weighted_activations) @@ -482,24 +496,26 @@ def get_cam_image(self, class GradCAM(SpatialBaseCAM, Base_GradCAM): """Gradients are no longer averaged over the spatial dimension.""" - def get_cam_weights(self, input_tensor, target_layer, target_category, - activations, grads): + def get_cam_weights( + self, input_tensor, target_layer, target_category, activations, grads + ): return grads class GradCAMPlusPlus(SpatialBaseCAM, Base_GradCAMPlusPlus): """Gradients are no longer averaged over the spatial dimension.""" - def get_cam_weights(self, input_tensor, target_layers, target_category, - activations, grads): + def get_cam_weights( + self, input_tensor, target_layers, target_category, activations, grads + ): grads_power_2 = grads**2 grads_power_3 = grads_power_2 * grads # Equation 19 in https://arxiv.org/abs/1710.11063 sum_activations = np.sum(activations, axis=(2, 3)) eps = 0.000001 aij = grads_power_2 / ( - 2 * grads_power_2 + - sum_activations[:, :, None, None] * grads_power_3 + eps) + 2 * grads_power_2 + sum_activations[:, :, None, None] * grads_power_3 + eps + ) # Now bring back the ReLU from eq.7 in the paper, # And zero out aijs where the activations are 0 aij = np.where(grads != 0, aij, 0) diff --git a/mmyolo/mmyolo/utils/collect_env.py b/mmyolo/mmyolo/utils/collect_env.py index 89bad658..0914129d 100644 --- a/mmyolo/mmyolo/utils/collect_env.py +++ b/mmyolo/mmyolo/utils/collect_env.py @@ -10,12 +10,12 @@ def collect_env() -> dict: """Collect the information of the running environments.""" env_info = collect_base_env() - env_info['MMCV'] = mmcv.__version__ - env_info['MMDetection'] = mmdet.__version__ - env_info['MMYOLO'] = mmyolo.__version__ + '+' + get_git_hash()[:7] + env_info["MMCV"] = mmcv.__version__ + env_info["MMDetection"] = mmdet.__version__ + env_info["MMYOLO"] = mmyolo.__version__ + "+" + get_git_hash()[:7] return env_info -if __name__ == '__main__': +if __name__ == "__main__": for name, val in collect_env().items(): - print(f'{name}: {val}') + print(f"{name}: {val}") diff --git a/mmyolo/mmyolo/utils/labelme_utils.py b/mmyolo/mmyolo/utils/labelme_utils.py index 09819197..6172e972 100644 --- a/mmyolo/mmyolo/utils/labelme_utils.py +++ b/mmyolo/mmyolo/utils/labelme_utils.py @@ -18,8 +18,13 @@ def __init__(self, classes: tuple): super().__init__() self.classes = classes - def __call__(self, pred_instances: InstanceData, metainfo: dict, - output_path: str, selected_classes: list): + def __call__( + self, + pred_instances: InstanceData, + metainfo: dict, + output_path: str, + selected_classes: list, + ): """Get image data field for labelme. Args: @@ -58,35 +63,34 @@ def __call__(self, pred_instances: InstanceData, metainfo: dict, } """ - image_path = os.path.abspath(metainfo['img_path']) + image_path = os.path.abspath(metainfo["img_path"]) json_info = { - 'version': '5.1.1', - 'flags': {}, - 'imagePath': image_path, - 'imageData': None, - 'imageHeight': metainfo['ori_shape'][0], - 'imageWidth': metainfo['ori_shape'][1], - 'shapes': [] + "version": "5.1.1", + "flags": {}, + "imagePath": image_path, + "imageData": None, + "imageHeight": metainfo["ori_shape"][0], + "imageWidth": metainfo["ori_shape"][1], + "shapes": [], } for pred_instance in pred_instances: pred_bbox = pred_instance.bboxes.cpu().numpy().tolist()[0] pred_label = self.classes[pred_instance.labels] - if selected_classes is not None and \ - pred_label not in selected_classes: + if selected_classes is not None and pred_label not in selected_classes: # filter class name continue sub_dict = { - 'label': pred_label, - 'points': [pred_bbox[:2], pred_bbox[2:]], - 'group_id': None, - 'shape_type': 'rectangle', - 'flags': {} + "label": pred_label, + "points": [pred_bbox[:2], pred_bbox[2:]], + "group_id": None, + "shape_type": "rectangle", + "flags": {}, } - json_info['shapes'].append(sub_dict) + json_info["shapes"].append(sub_dict) - with open(output_path, 'w', encoding='utf-8') as f_json: + with open(output_path, "w", encoding="utf-8") as f_json: json.dump(json_info, f_json, ensure_ascii=False, indent=2) diff --git a/mmyolo/mmyolo/utils/large_image.py b/mmyolo/mmyolo/utils/large_image.py index 68c6938e..42e7cd3f 100644 --- a/mmyolo/mmyolo/utils/large_image.py +++ b/mmyolo/mmyolo/utils/large_image.py @@ -6,9 +6,11 @@ from mmengine.structures import InstanceData -def shift_predictions(det_data_samples: SampleList, - offsets: Sequence[Tuple[int, int]], - src_image_shape: Tuple[int, int]) -> SampleList: +def shift_predictions( + det_data_samples: SampleList, + offsets: Sequence[Tuple[int, int]], + src_image_shape: Tuple[int, int], +) -> SampleList: """Shift predictions to the original image. Args: @@ -23,20 +25,22 @@ def shift_predictions(det_data_samples: SampleList, try: from sahi.slicing import shift_bboxes, shift_masks except ImportError: - raise ImportError('Please run "pip install -U sahi" ' - 'to install sahi first for large image inference.') + raise ImportError( + 'Please run "pip install -U sahi" ' + "to install sahi first for large image inference." + ) - assert len(det_data_samples) == len( - offsets), 'The `results` should has the ' 'same length with `offsets`.' + assert len(det_data_samples) == len(offsets), ( + "The `results` should has the " "same length with `offsets`." + ) shifted_predictions = [] for det_data_sample, offset in zip(det_data_samples, offsets): pred_inst = det_data_sample.pred_instances.clone() # shift bboxes and masks pred_inst.bboxes = shift_bboxes(pred_inst.bboxes, offset) - if 'masks' in det_data_sample: - pred_inst.masks = shift_masks(pred_inst.masks, offset, - src_image_shape) + if "masks" in det_data_sample: + pred_inst.masks = shift_masks(pred_inst.masks, offset, src_image_shape) shifted_predictions.append(pred_inst.clone()) @@ -45,10 +49,12 @@ def shift_predictions(det_data_samples: SampleList, return shifted_predictions -def merge_results_by_nms(results: SampleList, offsets: Sequence[Tuple[int, - int]], - src_image_shape: Tuple[int, int], - nms_cfg: dict) -> DetDataSample: +def merge_results_by_nms( + results: SampleList, + offsets: Sequence[Tuple[int, int]], + src_image_shape: Tuple[int, int], + nms_cfg: dict, +) -> DetDataSample: """Merge patch results by nms. Args: @@ -68,7 +74,8 @@ def merge_results_by_nms(results: SampleList, offsets: Sequence[Tuple[int, boxes=shifted_instances.bboxes, scores=shifted_instances.scores, idxs=shifted_instances.labels, - nms_cfg=nms_cfg) + nms_cfg=nms_cfg, + ) merged_instances = shifted_instances[keeps] merged_result = results[0].clone() diff --git a/mmyolo/mmyolo/utils/misc.py b/mmyolo/mmyolo/utils/misc.py index 5b5dd5d2..6d05bc23 100644 --- a/mmyolo/mmyolo/utils/misc.py +++ b/mmyolo/mmyolo/utils/misc.py @@ -9,8 +9,17 @@ from mmyolo.models import RepVGGBlock -IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', - '.tiff', '.webp') +IMG_EXTENSIONS = ( + ".jpg", + ".jpeg", + ".png", + ".ppm", + ".bmp", + ".pgm", + ".tif", + ".tiff", + ".webp", +) def switch_to_deploy(model): @@ -19,7 +28,7 @@ def switch_to_deploy(model): if isinstance(layer, RepVGGBlock): layer.switch_to_deploy() - print('Switch model to deploy modality.') + print("Switch model to deploy modality.") def auto_arrange_images(image_list: list, image_column: int = 2) -> np.ndarray: @@ -38,9 +47,9 @@ def auto_arrange_images(image_list: list, image_column: int = 2) -> np.ndarray: else: # arrange image according to image_column image_row = round(img_count / image_column) - fill_img_list = [np.ones(image_list[0].shape, dtype=np.uint8) * 255 - ] * ( - image_row * image_column - img_count) + fill_img_list = [np.ones(image_list[0].shape, dtype=np.uint8) * 255] * ( + image_row * image_column - img_count + ) image_list.extend(fill_img_list) merge_imgs_col = [] for i in range(image_row): @@ -66,7 +75,7 @@ def get_file_list(source_root: str) -> [list, dict]: source_type (dict): Source type: file or url or dir. """ is_dir = os.path.isdir(source_root) - is_url = source_root.startswith(('http:/', 'https:/')) + is_url = source_root.startswith(("http:/", "https:/")) is_file = os.path.splitext(source_root)[-1].lower() in IMG_EXTENSIONS source_file_path_list = [] @@ -76,17 +85,16 @@ def get_file_list(source_root: str) -> [list, dict]: source_file_path_list.append(os.path.join(source_root, file)) elif is_url: # when input source is url - filename = os.path.basename( - urllib.parse.unquote(source_root).split('?')[0]) + filename = os.path.basename(urllib.parse.unquote(source_root).split("?")[0]) file_save_path = os.path.join(os.getcwd(), filename) - print(f'Downloading source file to {file_save_path}') + print(f"Downloading source file to {file_save_path}") torch.hub.download_url_to_file(source_root, file_save_path) source_file_path_list = [file_save_path] elif is_file: # when input source is single image source_file_path_list = [source_root] else: - print('Cannot find image file.') + print("Cannot find image file.") source_type = dict(is_dir=is_dir, is_url=is_url, is_file=is_file) @@ -95,22 +103,21 @@ def get_file_list(source_root: str) -> [list, dict]: def show_data_classes(data_classes): """When printing an error, all class names of the dataset.""" - print('\n\nThe name of the class contained in the dataset:') + print("\n\nThe name of the class contained in the dataset:") data_classes_info = PrettyTable() - data_classes_info.title = 'Information of dataset class' + data_classes_info.title = "Information of dataset class" # List Print Settings # If the quantity is too large, 25 rows will be displayed in each column if len(data_classes) < 25: - data_classes_info.add_column('Class name', data_classes) + data_classes_info.add_column("Class name", data_classes) elif len(data_classes) % 25 != 0 and len(data_classes) > 25: col_num = int(len(data_classes) / 25) + 1 data_name_list = list(data_classes) for i in range(0, (col_num * 25) - len(data_classes)): - data_name_list.append('') + data_name_list.append("") for i in range(0, len(data_name_list), 25): - data_classes_info.add_column('Class name', - data_name_list[i:i + 25]) + data_classes_info.add_column("Class name", data_name_list[i : i + 25]) # Align display data to the left - data_classes_info.align['Class name'] = 'l' + data_classes_info.align["Class name"] = "l" print(data_classes_info) diff --git a/mmyolo/mmyolo/utils/setup_env.py b/mmyolo/mmyolo/utils/setup_env.py index f51ed928..456ada55 100644 --- a/mmyolo/mmyolo/utils/setup_env.py +++ b/mmyolo/mmyolo/utils/setup_env.py @@ -24,18 +24,22 @@ def register_all_modules(init_default_scope: bool = True): import mmyolo.models # noqa: F401,F403 if init_default_scope: - never_created = DefaultScope.get_current_instance() is None \ - or not DefaultScope.check_instance_created('mmyolo') + never_created = ( + DefaultScope.get_current_instance() is None + or not DefaultScope.check_instance_created("mmyolo") + ) if never_created: - DefaultScope.get_instance('mmyolo', scope_name='mmyolo') + DefaultScope.get_instance("mmyolo", scope_name="mmyolo") return current_scope = DefaultScope.get_current_instance() - if current_scope.scope_name != 'mmyolo': - warnings.warn('The current default scope ' - f'"{current_scope.scope_name}" is not "mmyolo", ' - '`register_all_modules` will force the current' - 'default scope to be "mmyolo". If this is not ' - 'expected, please set `init_default_scope=False`.') + if current_scope.scope_name != "mmyolo": + warnings.warn( + "The current default scope " + f'"{current_scope.scope_name}" is not "mmyolo", ' + "`register_all_modules` will force the current" + 'default scope to be "mmyolo". If this is not ' + "expected, please set `init_default_scope=False`." + ) # avoid name conflict - new_instance_name = f'mmyolo-{datetime.datetime.now()}' - DefaultScope.get_instance(new_instance_name, scope_name='mmyolo') + new_instance_name = f"mmyolo-{datetime.datetime.now()}" + DefaultScope.get_instance(new_instance_name, scope_name="mmyolo") diff --git a/mmyolo/mmyolo/version.py b/mmyolo/mmyolo/version.py index 92e8d704..9b7a5cea 100644 --- a/mmyolo/mmyolo/version.py +++ b/mmyolo/mmyolo/version.py @@ -1,8 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple -__version__ = '0.4.0' +__version__ = "0.4.0" -from typing import Tuple short_version = __version__ @@ -10,13 +10,13 @@ def parse_version_info(version_str: str) -> Tuple: """Parse version info of MMYOLO.""" version_info = [] - for x in version_str.split('.'): + for x in version_str.split("."): if x.isdigit(): version_info.append(int(x)) - elif x.find('rc') != -1: - patch_version = x.split('rc') + elif x.find("rc") != -1: + patch_version = x.split("rc") version_info.append(int(patch_version[0])) - version_info.append(f'rc{patch_version[1]}') + version_info.append(f"rc{patch_version[1]}") return tuple(version_info) diff --git a/mmyolo/projects/assigner_visualization/assigner_visualization.py b/mmyolo/projects/assigner_visualization/assigner_visualization.py index 0086985f..14cbdfd3 100644 --- a/mmyolo/projects/assigner_visualization/assigner_visualization.py +++ b/mmyolo/projects/assigner_visualization/assigner_visualization.py @@ -11,56 +11,59 @@ from mmengine.config import Config, DictAction from mmengine.dataset import COLLATE_FUNCTIONS from numpy import random +from projects.assigner_visualization.dense_heads import YOLOv5HeadAssigner +from projects.assigner_visualization.visualization import YOLOAssignerVisualizer from mmyolo.registry import DATASETS, MODELS from mmyolo.utils import register_all_modules -from projects.assigner_visualization.dense_heads import YOLOv5HeadAssigner -from projects.assigner_visualization.visualization import \ - YOLOAssignerVisualizer def parse_args(): parser = argparse.ArgumentParser( - description='MMYOLO show the positive sample assigning' - ' results.') - parser.add_argument('config', help='config file path') + description="MMYOLO show the positive sample assigning" " results." + ) + parser.add_argument("config", help="config file path") parser.add_argument( - '--show-number', - '-n', + "--show-number", + "-n", type=int, default=sys.maxsize, - help='number of images selected to save, ' - 'must bigger than 0. if the number is bigger than length ' - 'of dataset, show all the images in dataset; ' - 'default "sys.maxsize", show all images in dataset') + help="number of images selected to save, " + "must bigger than 0. if the number is bigger than length " + "of dataset, show all the images in dataset; " + 'default "sys.maxsize", show all images in dataset', + ) parser.add_argument( - '--output-dir', - default='assigned_results', + "--output-dir", + default="assigned_results", type=str, - help='The name of the folder where the image is saved.') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference.') + help="The name of the folder where the image is saved.", + ) + parser.add_argument("--device", default="cuda:0", help="Device used for inference.") parser.add_argument( - '--show-prior', + "--show-prior", default=False, - action='store_true', - help='Whether to show prior on image.') + action="store_true", + help="Whether to show prior on image.", + ) parser.add_argument( - '--not-show-label', + "--not-show-label", default=False, - action='store_true', - help='Whether to show label on image.') - parser.add_argument('--seed', default=-1, type=int, help='random seed') + action="store_true", + help="Whether to show label on image.", + ) + parser.add_argument("--seed", default=-1, type=int, help="random seed") parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) args = parser.parse_args() return args @@ -73,7 +76,7 @@ def main(): # set random seed seed = int(args.seed) if seed != -1: - print(f'Set the global seed: {seed}') + print(f"Set the global seed: {seed}") random.seed(int(args.seed)) cfg = Config.fromfile(args.config) @@ -82,36 +85,37 @@ def main(): # build model model = MODELS.build(cfg.model) - assert isinstance(model.bbox_head, YOLOv5HeadAssigner),\ - 'Now, this script only support yolov5, and bbox_head must use ' \ - '`YOLOv5HeadAssigner`. Please use `' \ - 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py' \ - '` as config file.' + assert isinstance(model.bbox_head, YOLOv5HeadAssigner), ( + "Now, this script only support yolov5, and bbox_head must use " + "`YOLOv5HeadAssigner`. Please use `" + "yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py" + "` as config file." + ) model.eval() model.to(args.device) # build dataset - dataset_cfg = cfg.get('train_dataloader').get('dataset') + dataset_cfg = cfg.get("train_dataloader").get("dataset") dataset = DATASETS.build(dataset_cfg) # get collate_fn - collate_fn_cfg = cfg.get('train_dataloader').pop( - 'collate_fn', dict(type='pseudo_collate')) - collate_fn_type = collate_fn_cfg.pop('type') + collate_fn_cfg = cfg.get("train_dataloader").pop( + "collate_fn", dict(type="pseudo_collate") + ) + collate_fn_type = collate_fn_cfg.pop("type") collate_fn = COLLATE_FUNCTIONS.get(collate_fn_type) # init visualizer visualizer = YOLOAssignerVisualizer( - vis_backends=[{ - 'type': 'LocalVisBackend' - }], name='visualizer') + vis_backends=[{"type": "LocalVisBackend"}], name="visualizer" + ) visualizer.dataset_meta = dataset.metainfo # need priors size to draw priors visualizer.priors_size = model.bbox_head.prior_generator.base_anchors # make output dir os.makedirs(args.output_dir, exist_ok=True) - print('Results will save to ', args.output_dir) + print("Results will save to ", args.output_dir) # init visualization image number assert args.show_number > 0 @@ -126,21 +130,21 @@ def main(): with torch.no_grad(): assign_results = model.assign(batch_data) - img = data['inputs'].cpu().numpy().astype(np.uint8).transpose( - (1, 2, 0)) + img = data["inputs"].cpu().numpy().astype(np.uint8).transpose((1, 2, 0)) # bgr2rgb img = mmcv.bgr2rgb(img) - gt_instances = data['data_samples'].gt_instances + gt_instances = data["data_samples"].gt_instances - img_show = visualizer.draw_assign(img, assign_results, gt_instances, - args.show_prior, args.not_show_label) + img_show = visualizer.draw_assign( + img, assign_results, gt_instances, args.show_prior, args.not_show_label + ) - if hasattr(data['data_samples'], 'img_path'): - filename = osp.basename(data['data_samples'].img_path) + if hasattr(data["data_samples"], "img_path"): + filename = osp.basename(data["data_samples"].img_path) else: # some dataset have not image path - filename = f'{ind_img}.jpg' + filename = f"{ind_img}.jpg" out_file = osp.join(args.output_dir, filename) # convert rgb 2 bgr and save img @@ -148,5 +152,5 @@ def main(): progress_bar.update() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py b/mmyolo/projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py index 1db799b5..67ec770d 100644 --- a/mmyolo/projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py +++ b/mmyolo/projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py @@ -1,11 +1,10 @@ -_base_ = [ - '../../../configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' -] +_base_ = ["../../../configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py"] -custom_imports = dict(imports=[ - 'projects.assigner_visualization.detectors', - 'projects.assigner_visualization.dense_heads' -]) +custom_imports = dict( + imports=[ + "projects.assigner_visualization.detectors", + "projects.assigner_visualization.dense_heads", + ] +) -model = dict( - type='YOLODetectorAssigner', bbox_head=dict(type='YOLOv5HeadAssigner')) +model = dict(type="YOLODetectorAssigner", bbox_head=dict(type="YOLOv5HeadAssigner")) diff --git a/mmyolo/projects/assigner_visualization/dense_heads/__init__.py b/mmyolo/projects/assigner_visualization/dense_heads/__init__.py index c8e368d9..4e0d2e98 100644 --- a/mmyolo/projects/assigner_visualization/dense_heads/__init__.py +++ b/mmyolo/projects/assigner_visualization/dense_heads/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .yolov5_head_assigner import YOLOv5HeadAssigner -__all__ = ['YOLOv5HeadAssigner'] +__all__ = ["YOLOv5HeadAssigner"] diff --git a/mmyolo/projects/assigner_visualization/dense_heads/yolov5_head_assigner.py b/mmyolo/projects/assigner_visualization/dense_heads/yolov5_head_assigner.py index 599963fe..c4d4effc 100644 --- a/mmyolo/projects/assigner_visualization/dense_heads/yolov5_head_assigner.py +++ b/mmyolo/projects/assigner_visualization/dense_heads/yolov5_head_assigner.py @@ -12,12 +12,11 @@ @MODELS.register_module() class YOLOv5HeadAssigner(YOLOv5Head): - def assign_by_gt_and_feat( self, batch_gt_instances: Sequence[InstanceData], batch_img_metas: Sequence[dict], - inputs_hw: Union[Tensor, tuple] = (640, 640) + inputs_hw: Union[Tensor, tuple] = (640, 640), ) -> dict: """Calculate the assigning results based on the gt and features extracted by the detection head. @@ -38,15 +37,21 @@ def assign_by_gt_and_feat( """ # 1. Convert gt to norm format batch_targets_normed = self._convert_gt_to_norm_format( - batch_gt_instances, batch_img_metas) + batch_gt_instances, batch_img_metas + ) device = batch_targets_normed.device scaled_factor = torch.ones(7, device=device) - gt_inds = torch.arange( - batch_targets_normed.shape[1], - dtype=torch.long, - device=device, - requires_grad=False).unsqueeze(0).repeat((self.num_base_priors, 1)) + gt_inds = ( + torch.arange( + batch_targets_normed.shape[1], + dtype=torch.long, + device=device, + requires_grad=False, + ) + .unsqueeze(0) + .repeat((self.num_base_priors, 1)) + ) assign_results = [] for i in range(self.num_levels): @@ -57,22 +62,25 @@ def assign_by_gt_and_feat( # empty gt bboxes if batch_targets_normed.shape[1] == 0: for k in range(self.num_base_priors): - assign_results_feat.append({ - 'stride': - self.featmap_strides[i], - 'grid_x_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'grid_y_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'img_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'class_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'retained_gt_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'prior_ind': - k - }) + assign_results_feat.append( + { + "stride": self.featmap_strides[i], + "grid_x_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "grid_y_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "img_inds": torch.zeros([0], dtype=torch.int64).to(device), + "class_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "retained_gt_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "prior_ind": k, + } + ) assign_results.append(assign_results_feat) continue @@ -84,32 +92,35 @@ def assign_by_gt_and_feat( batch_targets_scaled = batch_targets_normed * scaled_factor # 2. Shape match - wh_ratio = batch_targets_scaled[..., - 4:6] / priors_base_sizes_i[:, None] - match_inds = torch.max( - wh_ratio, 1 / wh_ratio).max(2)[0] < self.prior_match_thr + wh_ratio = batch_targets_scaled[..., 4:6] / priors_base_sizes_i[:, None] + match_inds = ( + torch.max(wh_ratio, 1 / wh_ratio).max(2)[0] < self.prior_match_thr + ) batch_targets_scaled = batch_targets_scaled[match_inds] match_gt_inds = gt_inds[match_inds] # no gt bbox matches anchor if batch_targets_scaled.shape[0] == 0: for k in range(self.num_base_priors): - assign_results_feat.append({ - 'stride': - self.featmap_strides[i], - 'grid_x_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'grid_y_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'img_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'class_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'retained_gt_inds': - torch.zeros([0], dtype=torch.int64).to(device), - 'prior_ind': - k - }) + assign_results_feat.append( + { + "stride": self.featmap_strides[i], + "grid_x_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "grid_y_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "img_inds": torch.zeros([0], dtype=torch.int64).to(device), + "class_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "retained_gt_inds": torch.zeros([0], dtype=torch.int64).to( + device + ), + "prior_ind": k, + } + ) assign_results.append(assign_results_feat) continue @@ -120,46 +131,48 @@ def assign_by_gt_and_feat( # them as positive samples as well. batch_targets_cxcy = batch_targets_scaled[:, 2:4] grid_xy = scaled_factor[[2, 3]] - batch_targets_cxcy - left, up = ((batch_targets_cxcy % 1 < self.near_neighbor_thr) & - (batch_targets_cxcy > 1)).T - right, bottom = ((grid_xy % 1 < self.near_neighbor_thr) & - (grid_xy > 1)).T - offset_inds = torch.stack( - (torch.ones_like(left), left, up, right, bottom)) - - batch_targets_scaled = batch_targets_scaled.repeat( - (5, 1, 1))[offset_inds] + left, up = ( + (batch_targets_cxcy % 1 < self.near_neighbor_thr) + & (batch_targets_cxcy > 1) + ).T + right, bottom = ((grid_xy % 1 < self.near_neighbor_thr) & (grid_xy > 1)).T + offset_inds = torch.stack((torch.ones_like(left), left, up, right, bottom)) + + batch_targets_scaled = batch_targets_scaled.repeat((5, 1, 1))[offset_inds] retained_gt_inds = match_gt_inds.repeat((5, 1))[offset_inds] - retained_offsets = self.grid_offset.repeat(1, offset_inds.shape[1], - 1)[offset_inds] + retained_offsets = self.grid_offset.repeat(1, offset_inds.shape[1], 1)[ + offset_inds + ] # prepare pred results and positive sample indexes to # calculate class loss and bbox lo _chunk_targets = batch_targets_scaled.chunk(4, 1) img_class_inds, grid_xy, grid_wh, priors_inds = _chunk_targets - priors_inds, (img_inds, class_inds) = priors_inds.long().view( - -1), img_class_inds.long().T + priors_inds, (img_inds, class_inds) = ( + priors_inds.long().view(-1), + img_class_inds.long().T, + ) - grid_xy_long = (grid_xy - - retained_offsets * self.near_neighbor_thr).long() + grid_xy_long = (grid_xy - retained_offsets * self.near_neighbor_thr).long() grid_x_inds, grid_y_inds = grid_xy_long.T for k in range(self.num_base_priors): retained_inds = priors_inds == k assign_results_prior = { - 'stride': self.featmap_strides[i], - 'grid_x_inds': grid_x_inds[retained_inds], - 'grid_y_inds': grid_y_inds[retained_inds], - 'img_inds': img_inds[retained_inds], - 'class_inds': class_inds[retained_inds], - 'retained_gt_inds': retained_gt_inds[retained_inds], - 'prior_ind': k + "stride": self.featmap_strides[i], + "grid_x_inds": grid_x_inds[retained_inds], + "grid_y_inds": grid_y_inds[retained_inds], + "img_inds": img_inds[retained_inds], + "class_inds": class_inds[retained_inds], + "retained_gt_inds": retained_gt_inds[retained_inds], + "prior_ind": k, } assign_results_feat.append(assign_results_prior) assign_results.append(assign_results_feat) return assign_results - def assign(self, batch_data_samples: Union[list, dict], - inputs_hw: Union[tuple, torch.Size]) -> dict: + def assign( + self, batch_data_samples: Union[list, dict], inputs_hw: Union[tuple, torch.Size] + ) -> dict: """Calculate assigning results. This function is provided to the `assigner_visualization.py` script. @@ -174,15 +187,21 @@ def assign(self, batch_data_samples: Union[list, dict], """ if isinstance(batch_data_samples, list): outputs = unpack_gt_instances(batch_data_samples) - (batch_gt_instances, batch_gt_instances_ignore, - batch_img_metas) = outputs - - assign_inputs = (batch_gt_instances, batch_img_metas, - batch_gt_instances_ignore, inputs_hw) + (batch_gt_instances, batch_gt_instances_ignore, batch_img_metas) = outputs + + assign_inputs = ( + batch_gt_instances, + batch_img_metas, + batch_gt_instances_ignore, + inputs_hw, + ) else: # Fast version - assign_inputs = (batch_data_samples['bboxes_labels'], - batch_data_samples['img_metas'], inputs_hw) + assign_inputs = ( + batch_data_samples["bboxes_labels"], + batch_data_samples["img_metas"], + inputs_hw, + ) assign_results = self.assign_by_gt_and_feat(*assign_inputs) return assign_results diff --git a/mmyolo/projects/assigner_visualization/detectors/__init__.py b/mmyolo/projects/assigner_visualization/detectors/__init__.py index 155606a0..aa4717ac 100644 --- a/mmyolo/projects/assigner_visualization/detectors/__init__.py +++ b/mmyolo/projects/assigner_visualization/detectors/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -from projects.assigner_visualization.detectors.yolo_detector_assigner import \ - YOLODetectorAssigner +from projects.assigner_visualization.detectors.yolo_detector_assigner import ( + YOLODetectorAssigner, +) -__all__ = ['YOLODetectorAssigner'] +__all__ = ["YOLODetectorAssigner"] diff --git a/mmyolo/projects/assigner_visualization/detectors/yolo_detector_assigner.py b/mmyolo/projects/assigner_visualization/detectors/yolo_detector_assigner.py index 394f8a06..4630c7a5 100644 --- a/mmyolo/projects/assigner_visualization/detectors/yolo_detector_assigner.py +++ b/mmyolo/projects/assigner_visualization/detectors/yolo_detector_assigner.py @@ -7,7 +7,6 @@ @MODELS.register_module() class YOLODetectorAssigner(YOLODetector): - def assign(self, data: dict) -> Union[dict, list]: """Calculate assigning results from a batch of inputs and data samples.This function is provided to the `assigner_visualization.py` @@ -20,8 +19,8 @@ def assign(self, data: dict) -> Union[dict, list]: dict: A dictionary of assigning components. """ assert isinstance(data, dict) - assert len(data['inputs']) == 1, 'Only support batchsize == 1' + assert len(data["inputs"]) == 1, "Only support batchsize == 1" data = self.data_preprocessor(data, True) - inputs_hw = data['inputs'].shape[-2:] - assign_results = self.bbox_head.assign(data['data_samples'], inputs_hw) + inputs_hw = data["inputs"].shape[-2:] + assign_results = self.bbox_head.assign(data["data_samples"], inputs_hw) return assign_results diff --git a/mmyolo/projects/assigner_visualization/visualization/__init__.py b/mmyolo/projects/assigner_visualization/visualization/__init__.py index 521a25b8..ab3694d3 100644 --- a/mmyolo/projects/assigner_visualization/visualization/__init__.py +++ b/mmyolo/projects/assigner_visualization/visualization/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .assigner_visualizer import YOLOAssignerVisualizer -__all__ = ['YOLOAssignerVisualizer'] +__all__ = ["YOLOAssignerVisualizer"] diff --git a/mmyolo/projects/assigner_visualization/visualization/assigner_visualizer.py b/mmyolo/projects/assigner_visualization/visualization/assigner_visualizer.py index a4e8ae50..2b5cda23 100644 --- a/mmyolo/projects/assigner_visualization/visualization/assigner_visualizer.py +++ b/mmyolo/projects/assigner_visualization/visualization/assigner_visualizer.py @@ -23,18 +23,18 @@ class YOLOAssignerVisualizer(DetLocalVisualizer): name (str): Name of the instance. Defaults to 'visualizer'. """ - def __init__(self, name: str = 'visualizer', *args, **kwargs): + def __init__(self, name: str = "visualizer", *args, **kwargs): super().__init__(name=name, *args, **kwargs) # need priors_size from config self.priors_size = None - def draw_grid(self, - stride: int = 8, - line_styles: Union[str, List[str]] = ':', - colors: Union[str, tuple, List[str], - List[tuple]] = (180, 180, 180), - line_widths: Union[Union[int, float], - List[Union[int, float]]] = 1): + def draw_grid( + self, + stride: int = 8, + line_styles: Union[str, List[str]] = ":", + colors: Union[str, tuple, List[str], List[tuple]] = (180, 180, 180), + line_widths: Union[Union[int, float], List[Union[int, float]]] = 1, + ): """Draw grids on image. Args: @@ -58,37 +58,47 @@ def draw_grid(self, If ``line_widths`` is single value, all the lines will have the same linewidth. Defaults to 1. """ - assert self._image is not None, 'Please set image using `set_image`' + assert self._image is not None, "Please set image using `set_image`" # draw vertical lines - x_datas_vertical = ((np.arange(self.width // stride - 1) + 1) * - stride).reshape((-1, 1)).repeat( - 2, axis=1) + x_datas_vertical = ( + ((np.arange(self.width // stride - 1) + 1) * stride) + .reshape((-1, 1)) + .repeat(2, axis=1) + ) y_datas_vertical = np.array([[0, self.height - 1]]).repeat( - self.width // stride - 1, axis=0) + self.width // stride - 1, axis=0 + ) self.draw_lines( x_datas_vertical, y_datas_vertical, colors=colors, line_styles=line_styles, - line_widths=line_widths) + line_widths=line_widths, + ) # draw horizontal lines x_datas_horizontal = np.array([[0, self.width - 1]]).repeat( - self.height // stride - 1, axis=0) - y_datas_horizontal = ((np.arange(self.height // stride - 1) + 1) * - stride).reshape((-1, 1)).repeat( - 2, axis=1) + self.height // stride - 1, axis=0 + ) + y_datas_horizontal = ( + ((np.arange(self.height // stride - 1) + 1) * stride) + .reshape((-1, 1)) + .repeat(2, axis=1) + ) self.draw_lines( x_datas_horizontal, y_datas_horizontal, colors=colors, line_styles=line_styles, - line_widths=line_widths) - - def draw_instances_assign(self, - instances: InstanceData, - retained_gt_inds: Tensor, - not_show_label: bool = False): + line_widths=line_widths, + ) + + def draw_instances_assign( + self, + instances: InstanceData, + retained_gt_inds: Tensor, + not_show_label: bool = False, + ): """Draw instances of GT. Args: @@ -99,13 +109,13 @@ def draw_instances_assign(self, not_show_label (bool): Whether to show gt labels on images. """ assert self.dataset_meta is not None - classes = self.dataset_meta['classes'] - palette = self.dataset_meta['palette'] + classes = self.dataset_meta["classes"] + palette = self.dataset_meta["palette"] if len(retained_gt_inds) == 0: return self.get_image() draw_gt_inds = torch.from_numpy( - np.array( - list(set(retained_gt_inds.cpu().numpy())), dtype=np.int64)) + np.array(list(set(retained_gt_inds.cpu().numpy())), dtype=np.int64) + ) bboxes = instances.bboxes[draw_gt_inds] labels = instances.labels[draw_gt_inds] @@ -122,37 +132,41 @@ def draw_instances_assign(self, bboxes, edge_colors=edge_colors, alpha=self.alpha, - line_widths=self.line_width) + line_widths=self.line_width, + ) if not not_show_label: positions = bboxes[:, :2] + self.line_width - areas = (bboxes[:, 3] - bboxes[:, 1]) * ( - bboxes[:, 2] - bboxes[:, 0]) + areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0]) scales = _get_adaptive_scales(areas) for i, (pos, label) in enumerate(zip(positions, labels)): - label_text = classes[ - label] if classes is not None else f'class {label}' + label_text = classes[label] if classes is not None else f"class {label}" self.draw_texts( label_text, pos, colors=text_colors[i], font_sizes=int(13 * scales[i]), - bboxes=[{ - 'facecolor': 'black', - 'alpha': 0.8, - 'pad': 0.7, - 'edgecolor': 'none' - }]) - - def draw_positive_assign(self, - grid_x_inds: Tensor, - grid_y_inds: Tensor, - class_inds: Tensor, - stride: int, - bboxes: Union[Tensor, HorizontalBoxes], - retained_gt_inds: Tensor, - offset: float = 0.5): + bboxes=[ + { + "facecolor": "black", + "alpha": 0.8, + "pad": 0.7, + "edgecolor": "none", + } + ], + ) + + def draw_positive_assign( + self, + grid_x_inds: Tensor, + grid_y_inds: Tensor, + class_inds: Tensor, + stride: int, + bboxes: Union[Tensor, HorizontalBoxes], + retained_gt_inds: Tensor, + offset: float = 0.5, + ): """ Args: @@ -175,7 +189,7 @@ def draw_positive_assign(self, # The PALETTE in the dataset_meta is required assert self.dataset_meta is not None - palette = self.dataset_meta['PALETTE'] + palette = self.dataset_meta["PALETTE"] x = ((grid_x_inds + offset) * stride).long() y = ((grid_y_inds + offset) * stride).long() center = torch.stack((x, y), dim=-1) @@ -187,21 +201,19 @@ def draw_positive_assign(self, colors = [palette[i] for i in class_inds] self.draw_circles( - center, - radius, - colors, - line_widths=0, - face_colors=colors, - alpha=1.0) - - def draw_prior(self, - grid_x_inds: Tensor, - grid_y_inds: Tensor, - class_inds: Tensor, - stride: int, - feat_ind: int, - prior_ind: int, - offset: float = 0.5): + center, radius, colors, line_widths=0, face_colors=colors, alpha=1.0 + ) + + def draw_prior( + self, + grid_x_inds: Tensor, + grid_y_inds: Tensor, + class_inds: Tensor, + stride: int, + feat_ind: int, + prior_ind: int, + offset: float = 0.5, + ): """Draw priors on image. Args: @@ -218,9 +230,9 @@ def draw_prior(self, with corresponding stride. Defaults to 0.5. """ - palette = self.dataset_meta['PALETTE'] - center_x = ((grid_x_inds + offset) * stride) - center_y = ((grid_y_inds + offset) * stride) + palette = self.dataset_meta["PALETTE"] + center_x = (grid_x_inds + offset) * stride + center_y = (grid_y_inds + offset) * stride xyxy = torch.stack((center_x, center_y, center_x, center_y), dim=1) assert self.priors_size is not None xyxy += self.priors_size[feat_ind][prior_ind] @@ -230,15 +242,18 @@ def draw_prior(self, xyxy, edge_colors=colors, alpha=self.alpha, - line_styles='--', - line_widths=math.ceil(self.line_width * 0.3)) - - def draw_assign(self, - image: np.ndarray, - assign_results: List[List[dict]], - gt_instances: InstanceData, - show_prior: bool = False, - not_show_label: bool = False) -> np.ndarray: + line_styles="--", + line_widths=math.ceil(self.line_width * 0.3), + ) + + def draw_assign( + self, + image: np.ndarray, + assign_results: List[List[dict]], + gt_instances: InstanceData, + show_prior: bool = False, + not_show_label: bool = False, + ) -> np.ndarray: """Draw assigning results. Args: @@ -255,42 +270,57 @@ def draw_assign(self, img_show_list = [] for feat_ind, assign_results_feat in enumerate(assign_results): img_show_list_feat = [] - for prior_ind, assign_results_prior in enumerate( - assign_results_feat): + for prior_ind, assign_results_prior in enumerate(assign_results_feat): self.set_image(image) h, w = image.shape[:2] # draw grid - stride = assign_results_prior['stride'] + stride = assign_results_prior["stride"] self.draw_grid(stride) # draw prior on matched gt - grid_x_inds = assign_results_prior['grid_x_inds'] - grid_y_inds = assign_results_prior['grid_y_inds'] - class_inds = assign_results_prior['class_inds'] - prior_ind = assign_results_prior['prior_ind'] + grid_x_inds = assign_results_prior["grid_x_inds"] + grid_y_inds = assign_results_prior["grid_y_inds"] + class_inds = assign_results_prior["class_inds"] + prior_ind = assign_results_prior["prior_ind"] if show_prior: - self.draw_prior(grid_x_inds, grid_y_inds, class_inds, - stride, feat_ind, prior_ind) + self.draw_prior( + grid_x_inds, + grid_y_inds, + class_inds, + stride, + feat_ind, + prior_ind, + ) # draw matched gt - retained_gt_inds = assign_results_prior['retained_gt_inds'] - self.draw_instances_assign(gt_instances, retained_gt_inds, - not_show_label) + retained_gt_inds = assign_results_prior["retained_gt_inds"] + self.draw_instances_assign( + gt_instances, retained_gt_inds, not_show_label + ) # draw positive - self.draw_positive_assign(grid_x_inds, grid_y_inds, class_inds, - stride, gt_instances.bboxes, - retained_gt_inds) + self.draw_positive_assign( + grid_x_inds, + grid_y_inds, + class_inds, + stride, + gt_instances.bboxes, + retained_gt_inds, + ) # draw title base_prior = self.priors_size[feat_ind][prior_ind] - prior_size = (base_prior[2] - base_prior[0], - base_prior[3] - base_prior[1]) + prior_size = ( + base_prior[2] - base_prior[0], + base_prior[3] - base_prior[1], + ) pos = np.array((20, 20)) - text = f'feat_ind: {feat_ind} ' \ - f'prior_ind: {prior_ind} ' \ - f'prior_size: ({prior_size[0]}, {prior_size[1]})' + text = ( + f"feat_ind: {feat_ind} " + f"prior_ind: {prior_ind} " + f"prior_size: ({prior_size[0]}, {prior_size[1]})" + ) scales = _get_adaptive_scales(np.array([h * w / 16])) font_sizes = int(13 * scales) self.draw_texts( @@ -298,12 +328,15 @@ def draw_assign(self, pos, colors=self.text_color, font_sizes=font_sizes, - bboxes=[{ - 'facecolor': 'black', - 'alpha': 0.8, - 'pad': 0.7, - 'edgecolor': 'none' - }]) + bboxes=[ + { + "facecolor": "black", + "alpha": 0.8, + "pad": 0.7, + "edgecolor": "none", + } + ], + ) img_show = self.get_image() img_show = mmcv.impad(img_show, padding=(5, 5, 5, 5)) diff --git a/mmyolo/projects/easydeploy/backbone/__init__.py b/mmyolo/projects/easydeploy/backbone/__init__.py index dc167f85..834cc90e 100644 --- a/mmyolo/projects/easydeploy/backbone/__init__.py +++ b/mmyolo/projects/easydeploy/backbone/__init__.py @@ -2,4 +2,4 @@ from .common import DeployC2f from .focus import DeployFocus, GConvFocus, NcnnFocus -__all__ = ['DeployFocus', 'NcnnFocus', 'GConvFocus', 'DeployC2f'] +__all__ = ["DeployFocus", "NcnnFocus", "GConvFocus", "DeployC2f"] diff --git a/mmyolo/projects/easydeploy/backbone/common.py b/mmyolo/projects/easydeploy/backbone/common.py index 617875bd..1c617587 100644 --- a/mmyolo/projects/easydeploy/backbone/common.py +++ b/mmyolo/projects/easydeploy/backbone/common.py @@ -4,13 +4,12 @@ class DeployC2f(nn.Module): - def __init__(self, *args, **kwargs): super().__init__() def forward(self, x: Tensor) -> Tensor: x_main = self.main_conv(x) - x_main = [x_main, x_main[:, self.mid_channels:, ...]] + x_main = [x_main, x_main[:, self.mid_channels :, ...]] x_main.extend(blocks(x_main[-1]) for blocks in self.blocks) x_main.pop(1) return self.final_conv(torch.cat(x_main, 1)) diff --git a/mmyolo/projects/easydeploy/backbone/focus.py b/mmyolo/projects/easydeploy/backbone/focus.py index 2a19afcc..5aace715 100644 --- a/mmyolo/projects/easydeploy/backbone/focus.py +++ b/mmyolo/projects/easydeploy/backbone/focus.py @@ -6,7 +6,6 @@ class DeployFocus(nn.Module): - def __init__(self, orin_Focus: nn.Module): super().__init__() self.__dict__.update(orin_Focus.__dict__) @@ -24,15 +23,16 @@ def forward(self, x: Tensor) -> Tensor: class NcnnFocus(nn.Module): - def __init__(self, orin_Focus: nn.Module): super().__init__() self.__dict__.update(orin_Focus.__dict__) def forward(self, x: Tensor) -> Tensor: batch_size, c, h, w = x.shape - assert h % 2 == 0 and w % 2 == 0, f'focus for yolox needs even feature\ - height and width, got {(h, w)}.' + assert ( + h % 2 == 0 and w % 2 == 0 + ), f"focus for yolox needs even feature\ + height and width, got {(h, w)}." x = x.reshape(batch_size, c * h, 1, w) _b, _c, _h, _w = x.shape @@ -57,18 +57,13 @@ def forward(self, x: Tensor) -> Tensor: class GConvFocus(nn.Module): - def __init__(self, orin_Focus: nn.Module): super().__init__() device = next(orin_Focus.parameters()).device - self.weight1 = torch.tensor([[1., 0], [0, 0]]).expand(3, 1, 2, - 2).to(device) - self.weight2 = torch.tensor([[0, 0], [1., 0]]).expand(3, 1, 2, - 2).to(device) - self.weight3 = torch.tensor([[0, 1.], [0, 0]]).expand(3, 1, 2, - 2).to(device) - self.weight4 = torch.tensor([[0, 0], [0, 1.]]).expand(3, 1, 2, - 2).to(device) + self.weight1 = torch.tensor([[1.0, 0], [0, 0]]).expand(3, 1, 2, 2).to(device) + self.weight2 = torch.tensor([[0, 0], [1.0, 0]]).expand(3, 1, 2, 2).to(device) + self.weight3 = torch.tensor([[0, 1.0], [0, 0]]).expand(3, 1, 2, 2).to(device) + self.weight4 = torch.tensor([[0, 0], [0, 1.0]]).expand(3, 1, 2, 2).to(device) self.__dict__.update(orin_Focus.__dict__) def forward(self, x: Tensor) -> Tensor: diff --git a/mmyolo/projects/easydeploy/bbox_code/__init__.py b/mmyolo/projects/easydeploy/bbox_code/__init__.py index b85a8155..458da6a3 100644 --- a/mmyolo/projects/easydeploy/bbox_code/__init__.py +++ b/mmyolo/projects/easydeploy/bbox_code/__init__.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .bbox_coder import (rtmdet_bbox_decoder, yolov5_bbox_decoder, - yolox_bbox_decoder) +from .bbox_coder import rtmdet_bbox_decoder, yolov5_bbox_decoder, yolox_bbox_decoder -__all__ = ['yolov5_bbox_decoder', 'rtmdet_bbox_decoder', 'yolox_bbox_decoder'] +__all__ = ["yolov5_bbox_decoder", "rtmdet_bbox_decoder", "yolox_bbox_decoder"] diff --git a/mmyolo/projects/easydeploy/bbox_code/bbox_coder.py b/mmyolo/projects/easydeploy/bbox_code/bbox_coder.py index 1fef2a1f..f8b589ae 100644 --- a/mmyolo/projects/easydeploy/bbox_code/bbox_coder.py +++ b/mmyolo/projects/easydeploy/bbox_code/bbox_coder.py @@ -5,8 +5,7 @@ from torch import Tensor -def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor, - stride: Tensor) -> Tensor: +def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor, stride: Tensor) -> Tensor: bbox_preds = bbox_preds.sigmoid() x_center = (priors[..., 0] + priors[..., 2]) * 0.5 @@ -16,27 +15,28 @@ def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor, x_center_pred = (bbox_preds[..., 0] - 0.5) * 2 * stride + x_center y_center_pred = (bbox_preds[..., 1] - 0.5) * 2 * stride + y_center - w_pred = (bbox_preds[..., 2] * 2)**2 * w - h_pred = (bbox_preds[..., 3] * 2)**2 * h + w_pred = (bbox_preds[..., 2] * 2) ** 2 * w + h_pred = (bbox_preds[..., 3] * 2) ** 2 * h - decoded_bboxes = torch.stack( - [x_center_pred, y_center_pred, w_pred, h_pred], dim=-1) + decoded_bboxes = torch.stack([x_center_pred, y_center_pred, w_pred, h_pred], dim=-1) return decoded_bboxes -def rtmdet_bbox_decoder(priors: Tensor, bbox_preds: Tensor, - stride: Optional[Tensor]) -> Tensor: - tl_x = (priors[..., 0] - bbox_preds[..., 0]) - tl_y = (priors[..., 1] - bbox_preds[..., 1]) - br_x = (priors[..., 0] + bbox_preds[..., 2]) - br_y = (priors[..., 1] + bbox_preds[..., 3]) +def rtmdet_bbox_decoder( + priors: Tensor, bbox_preds: Tensor, stride: Optional[Tensor] +) -> Tensor: + tl_x = priors[..., 0] - bbox_preds[..., 0] + tl_y = priors[..., 1] - bbox_preds[..., 1] + br_x = priors[..., 0] + bbox_preds[..., 2] + br_y = priors[..., 1] + bbox_preds[..., 3] decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1) return decoded_bboxes -def yolox_bbox_decoder(priors: Tensor, bbox_preds: Tensor, - stride: Optional[Tensor]) -> Tensor: +def yolox_bbox_decoder( + priors: Tensor, bbox_preds: Tensor, stride: Optional[Tensor] +) -> Tensor: stride = stride[None, :, None] xys = (bbox_preds[..., :2] * stride) + priors whs = bbox_preds[..., 2:].exp() * stride diff --git a/mmyolo/projects/easydeploy/model/__init__.py b/mmyolo/projects/easydeploy/model/__init__.py index 52d6043e..8d0317a8 100644 --- a/mmyolo/projects/easydeploy/model/__init__.py +++ b/mmyolo/projects/easydeploy/model/__init__.py @@ -2,4 +2,4 @@ from .backendwrapper import ORTWrapper, TRTWrapper from .model import DeployModel -__all__ = ['DeployModel', 'TRTWrapper', 'ORTWrapper'] +__all__ = ["DeployModel", "TRTWrapper", "ORTWrapper"] diff --git a/mmyolo/projects/easydeploy/model/backendwrapper.py b/mmyolo/projects/easydeploy/model/backendwrapper.py index 2997d84e..07ad1d97 100644 --- a/mmyolo/projects/easydeploy/model/backendwrapper.py +++ b/mmyolo/projects/easydeploy/model/backendwrapper.py @@ -6,28 +6,27 @@ import numpy as np import onnxruntime +import torch try: import tensorrt as trt except Exception: trt = None -import torch -warnings.filterwarnings(action='ignore', category=DeprecationWarning) +warnings.filterwarnings(action="ignore", category=DeprecationWarning) class TRTWrapper(torch.nn.Module): dtype_mapping = {} - def __init__(self, weight: Union[str, Path], - device: Optional[torch.device]): + def __init__(self, weight: Union[str, Path], device: Optional[torch.device]): super().__init__() weight = Path(weight) if isinstance(weight, str) else weight - assert weight.exists() and weight.suffix in ('.engine', '.plan') + assert weight.exists() and weight.suffix in (".engine", ".plan") if isinstance(device, str): device = torch.device(device) elif isinstance(device, int): - device = torch.device(f'cuda:{device}') + device = torch.device(f"cuda:{device}") self.weight = weight self.device = device self.stream = torch.cuda.Stream(device=device) @@ -36,18 +35,20 @@ def __init__(self, weight: Union[str, Path], self.__init_bindings() def __update_mapping(self): - self.dtype_mapping.update({ - trt.bool: torch.bool, - trt.int8: torch.int8, - trt.int32: torch.int32, - trt.float16: torch.float16, - trt.float32: torch.float32 - }) + self.dtype_mapping.update( + { + trt.bool: torch.bool, + trt.int8: torch.int8, + trt.int32: torch.int32, + trt.float16: torch.float16, + trt.float32: torch.float32, + } + ) def __init_engine(self): logger = trt.Logger(trt.Logger.ERROR) self.log = partial(logger.log, trt.Logger.ERROR) - trt.init_libnvinfer_plugins(logger, namespace='') + trt.init_libnvinfer_plugins(logger, namespace="") self.logger = logger with trt.Runtime(logger) as runtime: model = runtime.deserialize_cuda_engine(self.weight.read_bytes()) @@ -76,7 +77,7 @@ def __init_engine(self): self.bindings: List[int] = [0] * self.num_bindings def __init_bindings(self): - Binding = namedtuple('Binding', ('name', 'dtype', 'shape')) + Binding = namedtuple("Binding", ("name", "dtype", "shape")) inputs_info = [] outputs_info = [] @@ -104,15 +105,12 @@ def forward(self, *inputs): assert len(inputs) == self.num_inputs - contiguous_inputs: List[torch.Tensor] = [ - i.contiguous() for i in inputs - ] + contiguous_inputs: List[torch.Tensor] = [i.contiguous() for i in inputs] for i in range(self.num_inputs): self.bindings[i] = contiguous_inputs[i].data_ptr() if self.is_dynamic: - self.context.set_binding_shape( - i, tuple(contiguous_inputs[i].shape)) + self.context.set_binding_shape(i, tuple(contiguous_inputs[i].shape)) # create output tensors outputs: List[torch.Tensor] = [] @@ -122,9 +120,8 @@ def forward(self, *inputs): if self.is_dynamic: shape = tuple(self.context.get_binding_shape(j)) output = torch.empty( - size=shape, - dtype=self.output_dtypes[i], - device=self.device) + size=shape, dtype=self.output_dtypes[i], device=self.device + ) else: output = self.output_tensor[i] @@ -138,45 +135,40 @@ def forward(self, *inputs): class ORTWrapper(torch.nn.Module): - - def __init__(self, weight: Union[str, Path], - device: Optional[torch.device]): + def __init__(self, weight: Union[str, Path], device: Optional[torch.device]): super().__init__() weight = Path(weight) if isinstance(weight, str) else weight - assert weight.exists() and weight.suffix == '.onnx' + assert weight.exists() and weight.suffix == ".onnx" if isinstance(device, str): device = torch.device(device) elif isinstance(device, int): - device = torch.device(f'cuda:{device}') + device = torch.device(f"cuda:{device}") self.weight = weight self.device = device self.__init_session() self.__init_bindings() def __init_session(self): - providers = ['CPUExecutionProvider'] - if 'cuda' in self.device.type: - providers.insert(0, 'CUDAExecutionProvider') + providers = ["CPUExecutionProvider"] + if "cuda" in self.device.type: + providers.insert(0, "CUDAExecutionProvider") - session = onnxruntime.InferenceSession( - str(self.weight), providers=providers) + session = onnxruntime.InferenceSession(str(self.weight), providers=providers) self.session = session def __init_bindings(self): - Binding = namedtuple('Binding', ('name', 'dtype', 'shape')) + Binding = namedtuple("Binding", ("name", "dtype", "shape")) inputs_info = [] outputs_info = [] self.is_dynamic = False for i, tensor in enumerate(self.session.get_inputs()): if any(not isinstance(i, int) for i in tensor.shape): self.is_dynamic = True - inputs_info.append( - Binding(tensor.name, tensor.type, tuple(tensor.shape))) + inputs_info.append(Binding(tensor.name, tensor.type, tuple(tensor.shape))) for i, tensor in enumerate(self.session.get_outputs()): - outputs_info.append( - Binding(tensor.name, tensor.type, tuple(tensor.shape))) + outputs_info.append(Binding(tensor.name, tensor.type, tuple(tensor.shape))) self.inputs_info = inputs_info self.outputs_info = outputs_info self.num_inputs = len(inputs_info) @@ -194,9 +186,9 @@ def forward(self, *inputs): for i in range(self.num_inputs): assert contiguous_inputs[i].shape == self.inputs_info[i].shape - outputs = self.session.run([o.name for o in self.outputs_info], { - j.name: contiguous_inputs[i] - for i, j in enumerate(self.inputs_info) - }) + outputs = self.session.run( + [o.name for o in self.outputs_info], + {j.name: contiguous_inputs[i] for i, j in enumerate(self.inputs_info)}, + ) return tuple(torch.from_numpy(o).to(self.device) for o in outputs) diff --git a/mmyolo/projects/easydeploy/model/model.py b/mmyolo/projects/easydeploy/model/model.py index 0adcbbd2..3670591c 100644 --- a/mmyolo/projects/easydeploy/model/model.py +++ b/mmyolo/projects/easydeploy/model/model.py @@ -9,20 +9,18 @@ from torch import Tensor from mmyolo.models import RepVGGBlock -from mmyolo.models.dense_heads import (RTMDetHead, YOLOv5Head, YOLOv7Head, - YOLOXHead) +from mmyolo.models.dense_heads import RTMDetHead, YOLOv5Head, YOLOv7Head, YOLOXHead from mmyolo.models.layers import CSPLayerWithTwoConv + from ..backbone import DeployC2f, DeployFocus, GConvFocus, NcnnFocus -from ..bbox_code import (rtmdet_bbox_decoder, yolov5_bbox_decoder, - yolox_bbox_decoder) +from ..bbox_code import rtmdet_bbox_decoder, yolov5_bbox_decoder, yolox_bbox_decoder from ..nms import batched_nms, efficient_nms, onnx_nms class DeployModel(nn.Module): - - def __init__(self, - baseModel: nn.Module, - postprocess_cfg: Optional[ConfigDict] = None): + def __init__( + self, baseModel: nn.Module, postprocess_cfg: Optional[ConfigDict] = None + ): super().__init__() self.baseModel = baseModel if postprocess_cfg is None: @@ -32,11 +30,11 @@ def __init__(self, self.baseHead = baseModel.bbox_head self.__init_sub_attributes() self.detector_type = type(self.baseHead) - self.pre_top_k = postprocess_cfg.get('pre_top_k', 1000) - self.keep_top_k = postprocess_cfg.get('keep_top_k', 100) - self.iou_threshold = postprocess_cfg.get('iou_threshold', 0.65) - self.score_threshold = postprocess_cfg.get('score_threshold', 0.25) - self.backend = postprocess_cfg.get('backend', 1) + self.pre_top_k = postprocess_cfg.get("pre_top_k", 1000) + self.keep_top_k = postprocess_cfg.get("keep_top_k", 100) + self.iou_threshold = postprocess_cfg.get("iou_threshold", 0.65) + self.score_threshold = postprocess_cfg.get("score_threshold", 0.25) + self.backend = postprocess_cfg.get("backend", 1) self.__switch_deploy() def __init_sub_attributes(self): @@ -61,13 +59,15 @@ def __switch_deploy(self): else: self.baseModel.backbone.stem = GConvFocus(layer) elif isinstance(layer, CSPLayerWithTwoConv): - setattr(layer, '__class__', DeployC2f) - - def pred_by_feat(self, - cls_scores: List[Tensor], - bbox_preds: List[Tensor], - objectnesses: Optional[List[Tensor]] = None, - **kwargs): + setattr(layer, "__class__", DeployC2f) + + def pred_by_feat( + self, + cls_scores: List[Tensor], + bbox_preds: List[Tensor], + objectnesses: Optional[List[Tensor]] = None, + **kwargs, + ): assert len(cls_scores) == len(bbox_preds) dtype = cls_scores[0].dtype device = cls_scores[0].device @@ -85,23 +85,21 @@ def pred_by_feat(self, num_imgs = cls_scores[0].shape[0] featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] - mlvl_priors = self.prior_generate( - featmap_sizes, dtype=dtype, device=device) + mlvl_priors = self.prior_generate(featmap_sizes, dtype=dtype, device=device) flatten_priors = torch.cat(mlvl_priors) mlvl_strides = [ flatten_priors.new_full( - (featmap_size[0] * featmap_size[1] * self.num_base_priors, ), - stride) for featmap_size, stride in zip( - featmap_sizes, self.featmap_strides) + (featmap_size[0] * featmap_size[1] * self.num_base_priors,), stride + ) + for featmap_size, stride in zip(featmap_sizes, self.featmap_strides) ] flatten_stride = torch.cat(mlvl_strides) # flatten cls_scores, bbox_preds and objectness flatten_cls_scores = [ - cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, - self.num_classes) + cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, self.num_classes) for cls_score in cls_scores ] cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid() @@ -122,11 +120,17 @@ def pred_by_feat(self, scores = cls_scores - bboxes = bbox_decoder(flatten_priors[None], flatten_bbox_preds, - flatten_stride) - - return nms_func(bboxes, scores, self.keep_top_k, self.iou_threshold, - self.score_threshold, self.pre_top_k, self.keep_top_k) + bboxes = bbox_decoder(flatten_priors[None], flatten_bbox_preds, flatten_stride) + + return nms_func( + bboxes, + scores, + self.keep_top_k, + self.iou_threshold, + self.score_threshold, + self.pre_top_k, + self.keep_top_k, + ) def select_nms(self): if self.backend == 1: diff --git a/mmyolo/projects/easydeploy/nms/__init__.py b/mmyolo/projects/easydeploy/nms/__init__.py index 59c5cdbd..3381d909 100644 --- a/mmyolo/projects/easydeploy/nms/__init__.py +++ b/mmyolo/projects/easydeploy/nms/__init__.py @@ -2,4 +2,4 @@ from .ort_nms import onnx_nms from .trt_nms import batched_nms, efficient_nms -__all__ = ['efficient_nms', 'batched_nms', 'onnx_nms'] +__all__ = ["efficient_nms", "batched_nms", "onnx_nms"] diff --git a/mmyolo/projects/easydeploy/nms/ort_nms.py b/mmyolo/projects/easydeploy/nms/ort_nms.py index aad93cf0..9b58323c 100644 --- a/mmyolo/projects/easydeploy/nms/ort_nms.py +++ b/mmyolo/projects/easydeploy/nms/ort_nms.py @@ -2,16 +2,24 @@ import torch from torch import Tensor -_XYWH2XYXY = torch.tensor([[1.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 1.0], - [-0.5, 0.0, 0.5, 0.0], [0.0, -0.5, 0.0, 0.5]], - dtype=torch.float32) - - -def select_nms_index(scores: Tensor, - boxes: Tensor, - nms_index: Tensor, - batch_size: int, - keep_top_k: int = -1): +_XYWH2XYXY = torch.tensor( + [ + [1.0, 0.0, 1.0, 0.0], + [0.0, 1.0, 0.0, 1.0], + [-0.5, 0.0, 0.5, 0.0], + [0.0, -0.5, 0.0, 0.5], + ], + dtype=torch.float32, +) + + +def select_nms_index( + scores: Tensor, + boxes: Tensor, + nms_index: Tensor, + batch_size: int, + keep_top_k: int = -1, +): batch_inds, cls_inds = nms_index[:, 0], nms_index[:, 1] box_inds = nms_index[:, 2] @@ -21,27 +29,27 @@ def select_nms_index(scores: Tensor, batched_dets = dets.unsqueeze(0).repeat(batch_size, 1, 1) batch_template = torch.arange( - 0, batch_size, dtype=batch_inds.dtype, device=batch_inds.device) + 0, batch_size, dtype=batch_inds.dtype, device=batch_inds.device + ) batched_dets = batched_dets.where( (batch_inds == batch_template.unsqueeze(1)).unsqueeze(-1), - batched_dets.new_zeros(1)) + batched_dets.new_zeros(1), + ) batched_labels = cls_inds.unsqueeze(0).repeat(batch_size, 1) batched_labels = batched_labels.where( - (batch_inds == batch_template.unsqueeze(1)), - batched_labels.new_ones(1) * -1) + (batch_inds == batch_template.unsqueeze(1)), batched_labels.new_ones(1) * -1 + ) N = batched_dets.shape[0] - batched_dets = torch.cat((batched_dets, batched_dets.new_zeros((N, 1, 5))), - 1) - batched_labels = torch.cat((batched_labels, -batched_labels.new_ones( - (N, 1))), 1) + batched_dets = torch.cat((batched_dets, batched_dets.new_zeros((N, 1, 5))), 1) + batched_labels = torch.cat((batched_labels, -batched_labels.new_ones((N, 1))), 1) _, topk_inds = batched_dets[:, :, -1].sort(dim=1, descending=True) topk_batch_inds = torch.arange( - batch_size, dtype=topk_inds.dtype, - device=topk_inds.device).view(-1, 1) + batch_size, dtype=topk_inds.dtype, device=topk_inds.device + ).view(-1, 1) batched_dets = batched_dets[topk_batch_inds, topk_inds, ...] batched_labels = batched_labels[topk_batch_inds, topk_inds, ...] batched_dets, batched_scores = batched_dets.split([4, 1], 2) @@ -52,7 +60,6 @@ def select_nms_index(scores: Tensor, class ONNXNMSop(torch.autograd.Function): - @staticmethod def forward( ctx, @@ -60,37 +67,39 @@ def forward( scores: Tensor, max_output_boxes_per_class: Tensor = torch.tensor([100]), iou_threshold: Tensor = torch.tensor([0.5]), - score_threshold: Tensor = torch.tensor([0.05]) + score_threshold: Tensor = torch.tensor([0.05]), ) -> Tensor: device = boxes.device batch = scores.shape[0] num_det = 20 - batches = torch.randint(0, batch, (num_det, )).sort()[0].to(device) + batches = torch.randint(0, batch, (num_det,)).sort()[0].to(device) idxs = torch.arange(100, 100 + num_det).to(device) - zeros = torch.zeros((num_det, ), dtype=torch.int64).to(device) - selected_indices = torch.cat([batches[None], zeros[None], idxs[None]], - 0).T.contiguous() + zeros = torch.zeros((num_det,), dtype=torch.int64).to(device) + selected_indices = torch.cat( + [batches[None], zeros[None], idxs[None]], 0 + ).T.contiguous() selected_indices = selected_indices.to(torch.int64) return selected_indices @staticmethod def symbolic( - g, - boxes: Tensor, - scores: Tensor, - max_output_boxes_per_class: Tensor = torch.tensor([100]), - iou_threshold: Tensor = torch.tensor([0.5]), - score_threshold: Tensor = torch.tensor([0.05]), + g, + boxes: Tensor, + scores: Tensor, + max_output_boxes_per_class: Tensor = torch.tensor([100]), + iou_threshold: Tensor = torch.tensor([0.5]), + score_threshold: Tensor = torch.tensor([0.05]), ): return g.op( - 'NonMaxSuppression', + "NonMaxSuppression", boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, - outputs=1) + outputs=1, + ) def onnx_nms( @@ -111,12 +120,12 @@ def onnx_nms( if box_coding == 1: boxes = boxes @ (_XYWH2XYXY.to(boxes.device)) scores = scores.transpose(1, 2).contiguous() - selected_indices = ONNXNMSop.apply(boxes, scores, - max_output_boxes_per_class, - iou_threshold, score_threshold) + selected_indices = ONNXNMSop.apply( + boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold + ) num_dets, batched_dets, batched_scores, batched_labels = select_nms_index( - scores, boxes, selected_indices, batch_size, keep_top_k=keep_top_k) + scores, boxes, selected_indices, batch_size, keep_top_k=keep_top_k + ) - return num_dets, batched_dets, batched_scores, batched_labels.to( - torch.int32) + return num_dets, batched_dets, batched_scores, batched_labels.to(torch.int32) diff --git a/mmyolo/projects/easydeploy/nms/trt_nms.py b/mmyolo/projects/easydeploy/nms/trt_nms.py index e0db1e21..070b6ac1 100644 --- a/mmyolo/projects/easydeploy/nms/trt_nms.py +++ b/mmyolo/projects/easydeploy/nms/trt_nms.py @@ -2,13 +2,18 @@ import torch from torch import Tensor -_XYWH2XYXY = torch.tensor([[1.0, 0.0, 1.0, 0.0], [0.0, 1.0, 0.0, 1.0], - [-0.5, 0.0, 0.5, 0.0], [0.0, -0.5, 0.0, 0.5]], - dtype=torch.float32) +_XYWH2XYXY = torch.tensor( + [ + [1.0, 0.0, 1.0, 0.0], + [0.0, 1.0, 0.0, 1.0], + [-0.5, 0.0, 0.5, 0.0], + [0.0, -0.5, 0.0, 0.5], + ], + dtype=torch.float32, +) class TRTEfficientNMSop(torch.autograd.Function): - @staticmethod def forward( ctx, @@ -18,32 +23,34 @@ def forward( box_coding: int = 0, iou_threshold: float = 0.45, max_output_boxes: int = 100, - plugin_version: str = '1', + plugin_version: str = "1", score_activation: int = 0, score_threshold: float = 0.25, ): batch_size, _, num_classes = scores.shape - num_det = torch.randint( - 0, max_output_boxes, (batch_size, 1), dtype=torch.int32) + num_det = torch.randint(0, max_output_boxes, (batch_size, 1), dtype=torch.int32) det_boxes = torch.randn(batch_size, max_output_boxes, 4) det_scores = torch.randn(batch_size, max_output_boxes) det_classes = torch.randint( - 0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32) + 0, num_classes, (batch_size, max_output_boxes), dtype=torch.int32 + ) return num_det, det_boxes, det_scores, det_classes @staticmethod - def symbolic(g, - boxes: Tensor, - scores: Tensor, - background_class: int = -1, - box_coding: int = 0, - iou_threshold: float = 0.45, - max_output_boxes: int = 100, - plugin_version: str = '1', - score_activation: int = 0, - score_threshold: float = 0.25): + def symbolic( + g, + boxes: Tensor, + scores: Tensor, + background_class: int = -1, + box_coding: int = 0, + iou_threshold: float = 0.45, + max_output_boxes: int = 100, + plugin_version: str = "1", + score_activation: int = 0, + score_threshold: float = 0.25, + ): out = g.op( - 'TRT::EfficientNMS_TRT', + "TRT::EfficientNMS_TRT", boxes, scores, background_class_i=background_class, @@ -53,7 +60,8 @@ def symbolic(g, plugin_version_s=plugin_version, score_activation_i=score_activation, score_threshold_f=score_threshold, - outputs=4) + outputs=4, + ) num_det, det_boxes, det_scores, det_classes = out return num_det, det_boxes, det_scores, det_classes @@ -66,7 +74,7 @@ def forward( ctx, boxes: Tensor, scores: Tensor, - plugin_version: str = '1', + plugin_version: str = "1", shareLocation: int = 1, backgroundLabelId: int = -1, numClasses: int = 80, @@ -80,12 +88,10 @@ def forward( caffeSemantics: int = 1, ): batch_size, _, numClasses = scores.shape - num_det = torch.randint( - 0, keepTopK, (batch_size, 1), dtype=torch.int32) + num_det = torch.randint(0, keepTopK, (batch_size, 1), dtype=torch.int32) det_boxes = torch.randn(batch_size, keepTopK, 4) det_scores = torch.randn(batch_size, keepTopK) - det_classes = torch.randint(0, numClasses, - (batch_size, keepTopK)).float() + det_classes = torch.randint(0, numClasses, (batch_size, keepTopK)).float() return num_det, det_boxes, det_scores, det_classes @staticmethod @@ -93,7 +99,7 @@ def symbolic( g, boxes: Tensor, scores: Tensor, - plugin_version: str = '1', + plugin_version: str = "1", shareLocation: int = 1, backgroundLabelId: int = -1, numClasses: int = 80, @@ -107,7 +113,7 @@ def symbolic( caffeSemantics: int = 1, ): out = g.op( - 'TRT::BatchedNMSDynamic_TRT', + "TRT::BatchedNMSDynamic_TRT", boxes, scores, shareLocation_i=shareLocation, @@ -122,7 +128,8 @@ def symbolic( clipBoxes_i=clipBoxes, scoreBits_i=scoreBits, caffeSemantics_i=caffeSemantics, - outputs=4) + outputs=4, + ) num_det, det_boxes, det_scores, det_classes = out return num_det, det_boxes, det_scores, det_classes @@ -163,8 +170,16 @@ def _efficient_nms( `det_classes` of shape [N, num_det] """ num_det, det_boxes, det_scores, det_classes = TRTEfficientNMSop.apply( - boxes, scores, -1, box_coding, iou_threshold, keep_top_k, '1', 0, - score_threshold) + boxes, + scores, + -1, + box_coding, + iou_threshold, + keep_top_k, + "1", + 0, + score_threshold, + ) return num_det, det_boxes, det_scores, det_classes @@ -209,8 +224,21 @@ def _batched_nms( _, _, numClasses = scores.shape num_det, det_boxes, det_scores, det_classes = TRTbatchedNMSop.apply( - boxes, scores, '1', 1, -1, int(numClasses), min(pre_top_k, 4096), - keep_top_k, score_threshold, iou_threshold, 0, 0, 16, 1) + boxes, + scores, + "1", + 1, + -1, + int(numClasses), + min(pre_top_k, 4096), + keep_top_k, + score_threshold, + iou_threshold, + 0, + 0, + 16, + 1, + ) det_classes = det_classes.int() return num_det, det_boxes, det_scores, det_classes diff --git a/mmyolo/projects/easydeploy/tools/build_engine.py b/mmyolo/projects/easydeploy/tools/build_engine.py index b400c9db..ea527ed1 100644 --- a/mmyolo/projects/easydeploy/tools/build_engine.py +++ b/mmyolo/projects/easydeploy/tools/build_engine.py @@ -1,54 +1,56 @@ import argparse +import warnings from pathlib import Path from typing import List, Optional, Tuple, Union +import numpy as np +import torch + try: import tensorrt as trt except Exception: trt = None -import warnings -import numpy as np -import torch -warnings.filterwarnings(action='ignore', category=DeprecationWarning) +warnings.filterwarnings(action="ignore", category=DeprecationWarning) class EngineBuilder: - def __init__( - self, - checkpoint: Union[str, Path], - opt_shape: Union[Tuple, List] = (1, 3, 640, 640), - device: Optional[Union[str, int, torch.device]] = None) -> None: - checkpoint = Path(checkpoint) if isinstance(checkpoint, - str) else checkpoint - assert checkpoint.exists() and checkpoint.suffix == '.onnx' + self, + checkpoint: Union[str, Path], + opt_shape: Union[Tuple, List] = (1, 3, 640, 640), + device: Optional[Union[str, int, torch.device]] = None, + ) -> None: + checkpoint = Path(checkpoint) if isinstance(checkpoint, str) else checkpoint + assert checkpoint.exists() and checkpoint.suffix == ".onnx" if isinstance(device, str): device = torch.device(device) elif isinstance(device, int): - device = torch.device(f'cuda:{device}') + device = torch.device(f"cuda:{device}") self.checkpoint = checkpoint self.opt_shape = np.array(opt_shape, dtype=np.float32) self.device = device - def __build_engine(self, - scale: Optional[List[List]] = None, - fp16: bool = True, - with_profiling: bool = True) -> None: + def __build_engine( + self, + scale: Optional[List[List]] = None, + fp16: bool = True, + with_profiling: bool = True, + ) -> None: logger = trt.Logger(trt.Logger.WARNING) - trt.init_libnvinfer_plugins(logger, namespace='') + trt.init_libnvinfer_plugins(logger, namespace="") builder = trt.Builder(logger) config = builder.create_builder_config() config.max_workspace_size = torch.cuda.get_device_properties( - self.device).total_memory - flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + self.device + ).total_memory + flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(flag) parser = trt.OnnxParser(network, logger) if not parser.parse_from_file(str(self.checkpoint)): - raise RuntimeError( - f'failed to load ONNX file: {str(self.checkpoint)}') + raise RuntimeError(f"failed to load ONNX file: {str(self.checkpoint)}") inputs = [network.get_input(i) for i in range(network.num_inputs)] outputs = [network.get_output(i) for i in range(network.num_outputs)] profile = None @@ -57,28 +59,30 @@ def __build_engine(self, profile = builder.create_optimization_profile() if scale is None: scale = np.array( - [[1, 1, 0.5, 0.5], [1, 1, 1, 1], [4, 1, 1.5, 1.5]], - dtype=np.float32) + [[1, 1, 0.5, 0.5], [1, 1, 1, 1], [4, 1, 1.5, 1.5]], dtype=np.float32 + ) scale = (self.opt_shape * scale).astype(np.int32) elif isinstance(scale, List): scale = np.array(scale, dtype=np.int32) - assert scale.shape[0] == 3, 'Input a wrong scale list' + assert scale.shape[0] == 3, "Input a wrong scale list" else: raise NotImplementedError for inp in inputs: logger.log( trt.Logger.WARNING, - f'input "{inp.name}" with shape{inp.shape} {inp.dtype}') + f'input "{inp.name}" with shape{inp.shape} {inp.dtype}', + ) if dshape: profile.set_shape(inp.name, *scale) for out in outputs: logger.log( trt.Logger.WARNING, - f'output "{out.name}" with shape{out.shape} {out.dtype}') + f'output "{out.name}" with shape{out.shape} {out.dtype}', + ) if fp16 and builder.platform_has_fast_fp16: config.set_flag(trt.BuilderFlag.FP16) - self.weight = self.checkpoint.with_suffix('.engine') + self.weight = self.checkpoint.with_suffix(".engine") if dshape: config.add_optimization_profile(profile) if with_profiling: @@ -86,34 +90,38 @@ def __build_engine(self, with builder.build_engine(network, config) as engine: self.weight.write_bytes(engine.serialize()) logger.log( - trt.Logger.WARNING, f'Build tensorrt engine finish.\n' - f'Save in {str(self.weight.absolute())}') + trt.Logger.WARNING, + f"Build tensorrt engine finish.\n" f"Save in {str(self.weight.absolute())}", + ) - def build(self, - scale: Optional[List[List]] = None, - fp16: bool = True, - with_profiling=True): + def build( + self, scale: Optional[List[List]] = None, fp16: bool = True, with_profiling=True + ): self.__build_engine(scale, fp16, with_profiling) def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument("checkpoint", help="Checkpoint file") parser.add_argument( - '--img-size', - nargs='+', + "--img-size", + nargs="+", type=int, default=[640, 640], - help='Image size of height and width') + help="Image size of height and width", + ) parser.add_argument( - '--device', type=str, default='cuda:0', help='TensorRT builder device') + "--device", type=str, default="cuda:0", help="TensorRT builder device" + ) parser.add_argument( - '--scales', + "--scales", type=str, - default='[[1,3,640,640],[1,3,640,640],[1,3,640,640]]', - help='Input scales for build dynamic input shape engine') + default="[[1,3,640,640],[1,3,640,640],[1,3,640,640]]", + help="Input scales for build dynamic input shape engine", + ) parser.add_argument( - '--fp16', action='store_true', help='Build model with fp16 mode') + "--fp16", action="store_true", help="Build model with fp16 mode" + ) args = parser.parse_args() args.img_size *= 2 if len(args.img_size) == 1 else 1 return args @@ -124,13 +132,13 @@ def main(args): try: scales = eval(args.scales) except Exception: - print('Input scales is not a python variable') - print('Set scales default None') + print("Input scales is not a python variable") + print("Set scales default None") scales = None builder = EngineBuilder(args.checkpoint, img_size, args.device) builder.build(scales, fp16=args.fp16) -if __name__ == '__main__': +if __name__ == "__main__": args = parse_args() main(args) diff --git a/mmyolo/projects/easydeploy/tools/export.py b/mmyolo/projects/easydeploy/tools/export.py index fb7419e1..2e163e64 100644 --- a/mmyolo/projects/easydeploy/tools/export.py +++ b/mmyolo/projects/easydeploy/tools/export.py @@ -8,62 +8,59 @@ from mmdet.apis import init_detector from mmengine.config import ConfigDict from mmengine.utils.path import mkdir_or_exist +from projects.easydeploy.model import DeployModel from mmyolo.utils import register_all_modules -from projects.easydeploy.model import DeployModel -warnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) -warnings.filterwarnings(action='ignore', category=torch.jit.ScriptWarning) -warnings.filterwarnings(action='ignore', category=UserWarning) -warnings.filterwarnings(action='ignore', category=FutureWarning) -warnings.filterwarnings(action='ignore', category=ResourceWarning) +warnings.filterwarnings(action="ignore", category=torch.jit.TracerWarning) +warnings.filterwarnings(action="ignore", category=torch.jit.ScriptWarning) +warnings.filterwarnings(action="ignore", category=UserWarning) +warnings.filterwarnings(action="ignore", category=FutureWarning) +warnings.filterwarnings(action="ignore", category=ResourceWarning) def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') - parser.add_argument( - '--model-only', action='store_true', help='Export model only') + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") + parser.add_argument("--model-only", action="store_true", help="Export model only") parser.add_argument( - '--work-dir', default='./work_dir', help='Path to save export model') + "--work-dir", default="./work_dir", help="Path to save export model" + ) parser.add_argument( - '--img-size', - nargs='+', + "--img-size", + nargs="+", type=int, default=[640, 640], - help='Image size of height and width') - parser.add_argument('--batch-size', type=int, default=1, help='Batch size') + help="Image size of height and width", + ) + parser.add_argument("--batch-size", type=int, default=1, help="Batch size") + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') + "--simplify", action="store_true", help="Simplify onnx model by onnx-sim" + ) + parser.add_argument("--opset", type=int, default=11, help="ONNX opset version") parser.add_argument( - '--simplify', - action='store_true', - help='Simplify onnx model by onnx-sim') + "--backend", type=int, default=1, help="Backend for export onnx" + ) parser.add_argument( - '--opset', type=int, default=11, help='ONNX opset version') - parser.add_argument( - '--backend', type=int, default=1, help='Backend for export onnx') - parser.add_argument( - '--pre-topk', + "--pre-topk", type=int, default=1000, - help='Postprocess pre topk bboxes feed into NMS') + help="Postprocess pre topk bboxes feed into NMS", + ) parser.add_argument( - '--keep-topk', + "--keep-topk", type=int, default=100, - help='Postprocess keep topk bboxes out of NMS') + help="Postprocess keep topk bboxes out of NMS", + ) parser.add_argument( - '--iou-threshold', - type=float, - default=0.65, - help='IoU threshold for NMS') + "--iou-threshold", type=float, default=0.65, help="IoU threshold for NMS" + ) parser.add_argument( - '--score-threshold', - type=float, - default=0.25, - help='Score threshold for NMS') + "--score-threshold", type=float, default=0.25, help="Score threshold for NMS" + ) args = parser.parse_args() args.img_size *= 2 if len(args.img_size) == 1 else 1 return args @@ -90,29 +87,29 @@ def main(): keep_top_k=args.keep_topk, iou_threshold=args.iou_threshold, score_threshold=args.score_threshold, - backend=args.backend) - output_names = ['num_dets', 'boxes', 'scores', 'labels'] + backend=args.backend, + ) + output_names = ["num_dets", "boxes", "scores", "labels"] baseModel = build_model_from_cfg(args.config, args.checkpoint, args.device) - deploy_model = DeployModel( - baseModel=baseModel, postprocess_cfg=postprocess_cfg) + deploy_model = DeployModel(baseModel=baseModel, postprocess_cfg=postprocess_cfg) deploy_model.eval() - fake_input = torch.randn(args.batch_size, 3, - *args.img_size).to(args.device) + fake_input = torch.randn(args.batch_size, 3, *args.img_size).to(args.device) # dry run deploy_model(fake_input) - save_onnx_path = os.path.join(args.work_dir, 'end2end.onnx') + save_onnx_path = os.path.join(args.work_dir, "end2end.onnx") # export onnx with BytesIO() as f: torch.onnx.export( deploy_model, fake_input, f, - input_names=['images'], + input_names=["images"], output_names=output_names, - opset_version=args.opset) + opset_version=args.opset, + ) f.seek(0) onnx_model = onnx.load(f) onnx.checker.check_model(onnx_model) @@ -120,9 +117,15 @@ def main(): # Fix tensorrt onnx output shape, just for view if args.backend in (2, 3): shapes = [ - args.batch_size, 1, args.batch_size, args.keep_topk, 4, - args.batch_size, args.keep_topk, args.batch_size, - args.keep_topk + args.batch_size, + 1, + args.batch_size, + args.keep_topk, + 4, + args.batch_size, + args.keep_topk, + args.batch_size, + args.keep_topk, ] for i in onnx_model.graph.output: for j in i.type.tensor_type.shape.dim: @@ -130,13 +133,14 @@ def main(): if args.simplify: try: import onnxsim + onnx_model, check = onnxsim.simplify(onnx_model) - assert check, 'assert check failed' + assert check, "assert check failed" except Exception as e: - print(f'Simplify failure: {e}') + print(f"Simplify failure: {e}") onnx.save(onnx_model, save_onnx_path) - print(f'ONNX export success, save into {save_onnx_path}') + print(f"ONNX export success, save into {save_onnx_path}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/projects/easydeploy/tools/image-demo.py b/mmyolo/projects/easydeploy/tools/image-demo.py index c676949f..1cd4cfbf 100644 --- a/mmyolo/projects/easydeploy/tools/image-demo.py +++ b/mmyolo/projects/easydeploy/tools/image-demo.py @@ -19,29 +19,26 @@ def parse_args(): parser = ArgumentParser() + parser.add_argument("img", help="Image path, include image file, dir and URL.") + parser.add_argument("config", help="Config file") + parser.add_argument("checkpoint", help="Checkpoint file") + parser.add_argument("--out-dir", default="./output", help="Path to output file") + parser.add_argument("--device", default="cuda:0", help="Device used for inference") parser.add_argument( - 'img', help='Image path, include image file, dir and URL.') - parser.add_argument('config', help='Config file') - parser.add_argument('checkpoint', help='Checkpoint file') - parser.add_argument( - '--out-dir', default='./output', help='Path to output file') - parser.add_argument( - '--device', default='cuda:0', help='Device used for inference') - parser.add_argument( - '--show', action='store_true', help='Show the detection results') + "--show", action="store_true", help="Show the detection results" + ) args = parser.parse_args() return args def preprocess(config): - data_preprocess = config.get('model', {}).get('data_preprocessor', {}) - mean = data_preprocess.get('mean', [0., 0., 0.]) - std = data_preprocess.get('std', [1., 1., 1.]) + data_preprocess = config.get("model", {}).get("data_preprocessor", {}) + mean = data_preprocess.get("mean", [0.0, 0.0, 0.0]) + std = data_preprocess.get("std", [1.0, 1.0, 1.0]) mean = torch.tensor(mean, dtype=torch.float32).reshape(1, 3, 1, 1) std = torch.tensor(std, dtype=torch.float32).reshape(1, 3, 1, 1) class PreProcess(torch.nn.Module): - def __init__(self): super().__init__() @@ -63,10 +60,9 @@ def main(): colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(1000)] # build the model from a config file and a checkpoint file - if args.checkpoint.endswith('.onnx'): + if args.checkpoint.endswith(".onnx"): model = ORTWrapper(args.checkpoint, args.device) - elif args.checkpoint.endswith('.engine') or args.checkpoint.endswith( - '.plan'): + elif args.checkpoint.endswith(".engine") or args.checkpoint.endswith(".plan"): model = TRTWrapper(args.checkpoint, args.device) else: raise NotImplementedError @@ -76,7 +72,7 @@ def main(): cfg = Config.fromfile(args.config) test_pipeline = get_test_pipeline_cfg(cfg) - test_pipeline[0] = ConfigDict({'type': 'mmdet.LoadImageFromNDArray'}) + test_pipeline[0] = ConfigDict({"type": "mmdet.LoadImageFromNDArray"}) test_pipeline = Compose(test_pipeline) pre_pipeline = preprocess(cfg) @@ -91,21 +87,20 @@ def main(): progress_bar = ProgressBar(len(files)) for i, file in enumerate(files): bgr = mmcv.imread(file) - rgb = mmcv.imconvert(bgr, 'bgr', 'rgb') + rgb = mmcv.imconvert(bgr, "bgr", "rgb") data, samples = test_pipeline(dict(img=rgb, img_id=i)).values() - pad_param = samples.get('pad_param', - np.array([0, 0, 0, 0], dtype=np.float32)) - h, w = samples.get('ori_shape', rgb.shape[:2]) + pad_param = samples.get("pad_param", np.array([0, 0, 0, 0], dtype=np.float32)) + h, w = samples.get("ori_shape", rgb.shape[:2]) pad_param = torch.asarray( - [pad_param[2], pad_param[0], pad_param[2], pad_param[0]], - device=args.device) - scale_factor = samples.get('scale_factor', [1., 1]) + [pad_param[2], pad_param[0], pad_param[2], pad_param[0]], device=args.device + ) + scale_factor = samples.get("scale_factor", [1.0, 1]) scale_factor = torch.asarray(scale_factor * 2, device=args.device) data = pre_pipeline(data).to(args.device) result = model(data) - if source_type['is_dir']: - filename = os.path.relpath(file, args.img).replace('/', '_') + if source_type["is_dir"]: + filename = os.path.relpath(file, args.img).replace("/", "_") else: filename = os.path.basename(file) out_file = None if args.show else os.path.join(args.out_dir, filename) @@ -125,22 +120,25 @@ def main(): for (bbox, score, label) in zip(bboxes, scores, labels): bbox = bbox.tolist() color = colors[label] - name = f'cls:{label}_score:{score:0.4f}' + name = f"cls:{label}_score:{score:0.4f}" cv2.rectangle(bgr, bbox[:2], bbox[2:], color, 2) cv2.putText( bgr, - name, (bbox[0], bbox[1] - 2), + name, + (bbox[0], bbox[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, - 0.75, [225, 255, 255], - thickness=2) + 0.75, + [225, 255, 255], + thickness=2, + ) if args.show: - mmcv.imshow(bgr, 'result', 0) + mmcv.imshow(bgr, "result", 0) else: mmcv.imwrite(bgr, out_file) progress_bar.update() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py b/mmyolo/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py index 55b43bb3..a1df78f4 100644 --- a/mmyolo/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py +++ b/mmyolo/projects/example_project/configs/yolov5_s_dummy-backbone_v61_syncbn_8xb16-300e_coco.py @@ -1,5 +1,5 @@ -_base_ = '../../../configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' +_base_ = "../../../configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py" -custom_imports = dict(imports=['projects.example_project.dummy']) +custom_imports = dict(imports=["projects.example_project.dummy"]) -_base_.model.backbone.type = 'DummyYOLOv5CSPDarknet' +_base_.model.backbone.type = "DummyYOLOv5CSPDarknet" diff --git a/mmyolo/projects/example_project/dummy/__init__.py b/mmyolo/projects/example_project/dummy/__init__.py index ca1028c8..961a85d0 100644 --- a/mmyolo/projects/example_project/dummy/__init__.py +++ b/mmyolo/projects/example_project/dummy/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dummy_yolov5cspdarknet import DummyYOLOv5CSPDarknet -__all__ = ['DummyYOLOv5CSPDarknet'] +__all__ = ["DummyYOLOv5CSPDarknet"] diff --git a/mmyolo/projects/example_project/dummy/dummy_yolov5cspdarknet.py b/mmyolo/projects/example_project/dummy/dummy_yolov5cspdarknet.py index c500abb4..f4d7a2cf 100644 --- a/mmyolo/projects/example_project/dummy/dummy_yolov5cspdarknet.py +++ b/mmyolo/projects/example_project/dummy/dummy_yolov5cspdarknet.py @@ -12,5 +12,5 @@ class DummyYOLOv5CSPDarknet(YOLOv5CSPDarknet): """ def __init__(self, **kwargs) -> None: - print('Hello world!') + print("Hello world!") super().__init__(**kwargs) diff --git a/mmyolo/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py b/mmyolo/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py index 1d6a9d3b..af9712e6 100644 --- a/mmyolo/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py +++ b/mmyolo/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py @@ -1,12 +1,12 @@ -_base_ = '../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py' +_base_ = "../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py" max_epochs = 100 -data_root = './data/cat/' +data_root = "./data/cat/" # data_root = '/root/workspace/mmyolo/data/cat/' # Docker -work_dir = './work_dirs/yolov5_s-v61_syncbn_fast_1xb32-100e_cat' +work_dir = "./work_dirs/yolov5_s-v61_syncbn_fast_1xb32-100e_cat" -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth" # noqa train_batch_size_per_gpu = 32 train_num_workers = 4 @@ -19,58 +19,65 @@ anchors = [ [(68, 69), (154, 91), (143, 162)], # P3/8 [(242, 160), (189, 287), (391, 207)], # P4/16 - [(353, 337), (539, 341), (443, 432)] # P5/32 + [(353, 337), (539, 341), (443, 432)], # P5/32 ] -class_name = ('cat', ) +class_name = ("cat",) num_classes = len(class_name) metainfo = dict(classes=class_name, palette=[(220, 20, 60)]) -train_cfg = dict( - max_epochs=max_epochs, val_begin=20, val_interval=save_epoch_intervals) +train_cfg = dict(max_epochs=max_epochs, val_begin=20, val_interval=save_epoch_intervals) model = dict( bbox_head=dict( head_module=dict(num_classes=num_classes), prior_generator=dict(base_sizes=anchors), - loss_cls=dict(loss_weight=0.5 * - (num_classes / 80 * 3 / _base_.num_det_layers)))) + loss_cls=dict(loss_weight=0.5 * (num_classes / 80 * 3 / _base_.num_det_layers)), + ) +) train_dataloader = dict( batch_size=train_batch_size_per_gpu, num_workers=train_num_workers, dataset=dict( _delete_=True, - type='RepeatDataset', + type="RepeatDataset", times=5, dataset=dict( type=_base_.dataset_type, data_root=data_root, metainfo=metainfo, - ann_file='annotations/trainval.json', - data_prefix=dict(img='images/'), + ann_file="annotations/trainval.json", + data_prefix=dict(img="images/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=_base_.train_pipeline))) + pipeline=_base_.train_pipeline, + ), + ), +) val_dataloader = dict( dataset=dict( metainfo=metainfo, data_root=data_root, - ann_file='annotations/trainval.json', - data_prefix=dict(img='images/'))) + ann_file="annotations/trainval.json", + data_prefix=dict(img="images/"), + ) +) test_dataloader = val_dataloader -val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json') +val_evaluator = dict(ann_file=data_root + "annotations/trainval.json") test_evaluator = val_evaluator optim_wrapper = dict(optimizer=dict(lr=base_lr)) default_hooks = dict( checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, max_keep_ckpts=5, - save_best='auto'), + save_best="auto", + ), param_scheduler=dict(max_epochs=max_epochs), - logger=dict(type='LoggerHook', interval=10)) + logger=dict(type="LoggerHook", interval=10), +) diff --git a/mmyolo/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py b/mmyolo/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py index 67d5638a..b8068da7 100644 --- a/mmyolo/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py +++ b/mmyolo/projects/misc/custom_dataset/yolov6_s_syncbn_fast_1xb32-100e_cat.py @@ -1,11 +1,11 @@ -_base_ = '../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py' +_base_ = "../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py" max_epochs = 100 -data_root = './data/cat/' +data_root = "./data/cat/" -work_dir = './work_dirs/yolov6_s_syncbn_fast_1xb32-100e_cat' +work_dir = "./work_dirs/yolov6_s_syncbn_fast_1xb32-100e_cat" -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth" # noqa train_batch_size_per_gpu = 32 train_num_workers = 4 # train_num_workers = nGPU x 4 @@ -15,7 +15,7 @@ # base_lr_default * (your_bs / default_bs) base_lr = _base_.base_lr / 8 -class_name = ('cat', ) +class_name = ("cat",) num_classes = len(class_name) metainfo = dict(classes=class_name, palette=[(220, 20, 60)]) @@ -23,63 +23,75 @@ max_epochs=max_epochs, val_begin=20, val_interval=save_epoch_intervals, - dynamic_intervals=[(max_epochs - _base_.num_last_epochs, 1)]) + dynamic_intervals=[(max_epochs - _base_.num_last_epochs, 1)], +) model = dict( bbox_head=dict(head_module=dict(num_classes=num_classes)), train_cfg=dict( initial_assigner=dict(num_classes=num_classes), - assigner=dict(num_classes=num_classes))) + assigner=dict(num_classes=num_classes), + ), +) train_dataloader = dict( batch_size=train_batch_size_per_gpu, num_workers=train_num_workers, dataset=dict( _delete_=True, - type='RepeatDataset', + type="RepeatDataset", times=5, dataset=dict( type=_base_.dataset_type, data_root=data_root, metainfo=metainfo, - ann_file='annotations/trainval.json', - data_prefix=dict(img='images/'), + ann_file="annotations/trainval.json", + data_prefix=dict(img="images/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=_base_.train_pipeline))) + pipeline=_base_.train_pipeline, + ), + ), +) val_dataloader = dict( dataset=dict( metainfo=metainfo, data_root=data_root, - ann_file='annotations/trainval.json', - data_prefix=dict(img='images/'))) + ann_file="annotations/trainval.json", + data_prefix=dict(img="images/"), + ) +) test_dataloader = val_dataloader -val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json') +val_evaluator = dict(ann_file=data_root + "annotations/trainval.json") test_evaluator = val_evaluator optim_wrapper = dict(optimizer=dict(lr=base_lr)) default_hooks = dict( checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, max_keep_ckpts=5, - save_best='auto'), + save_best="auto", + ), param_scheduler=dict(max_epochs=max_epochs), - logger=dict(type='LoggerHook', interval=10)) + logger=dict(type="LoggerHook", interval=10), +) custom_hooks = [ dict( - type='EMAHook', - ema_type='ExpMomentumEMA', + type="EMAHook", + ema_type="ExpMomentumEMA", momentum=0.0001, update_buffers=True, strict_load=False, - priority=49), + priority=49, + ), dict( - type='mmdet.PipelineSwitchHook', + type="mmdet.PipelineSwitchHook", switch_epoch=max_epochs - _base_.num_last_epochs, - switch_pipeline=_base_.train_pipeline_stage2) + switch_pipeline=_base_.train_pipeline_stage2, + ), ] diff --git a/mmyolo/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py b/mmyolo/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py index fff59cb3..74eb16b5 100644 --- a/mmyolo/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py +++ b/mmyolo/projects/misc/custom_dataset/yolov7_tiny_syncbn_fast_1xb32-100e_cat.py @@ -1,11 +1,11 @@ -_base_ = '../yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py' +_base_ = "../yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py" max_epochs = 100 -data_root = './data/cat/' +data_root = "./data/cat/" -work_dir = './work_dirs/yolov7_tiny_syncbn_fast_1xb32-100e_cat' +work_dir = "./work_dirs/yolov7_tiny_syncbn_fast_1xb32-100e_cat" -load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth' # noqa +load_from = "https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth" # noqa train_batch_size_per_gpu = 32 train_num_workers = 4 # train_num_workers = nGPU x 4 @@ -18,10 +18,10 @@ anchors = [ [(68, 69), (154, 91), (143, 162)], # P3/8 [(242, 160), (189, 287), (391, 207)], # P4/16 - [(353, 337), (539, 341), (443, 432)] # P5/32 + [(353, 337), (539, 341), (443, 432)], # P5/32 ] -class_name = ('cat', ) +class_name = ("cat",) num_classes = len(class_name) metainfo = dict(classes=class_name, palette=[(220, 20, 60)]) @@ -29,50 +29,59 @@ max_epochs=max_epochs, val_begin=20, val_interval=save_epoch_intervals, - dynamic_intervals=[(max_epochs - 10, 1)]) + dynamic_intervals=[(max_epochs - 10, 1)], +) model = dict( bbox_head=dict( head_module=dict(num_classes=num_classes), prior_generator=dict(base_sizes=anchors), - loss_cls=dict(loss_weight=0.5 * - (num_classes / 80 * 3 / _base_.num_det_layers)))) + loss_cls=dict(loss_weight=0.5 * (num_classes / 80 * 3 / _base_.num_det_layers)), + ) +) train_dataloader = dict( batch_size=train_batch_size_per_gpu, num_workers=train_num_workers, dataset=dict( _delete_=True, - type='RepeatDataset', + type="RepeatDataset", times=5, dataset=dict( type=_base_.dataset_type, data_root=data_root, metainfo=metainfo, - ann_file='annotations/trainval.json', - data_prefix=dict(img='images/'), + ann_file="annotations/trainval.json", + data_prefix=dict(img="images/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=_base_.train_pipeline))) + pipeline=_base_.train_pipeline, + ), + ), +) val_dataloader = dict( dataset=dict( metainfo=metainfo, data_root=data_root, - ann_file='annotations/trainval.json', - data_prefix=dict(img='images/'))) + ann_file="annotations/trainval.json", + data_prefix=dict(img="images/"), + ) +) test_dataloader = val_dataloader -val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json') +val_evaluator = dict(ann_file=data_root + "annotations/trainval.json") test_evaluator = val_evaluator optim_wrapper = dict(optimizer=dict(lr=base_lr)) default_hooks = dict( checkpoint=dict( - type='CheckpointHook', + type="CheckpointHook", interval=save_epoch_intervals, max_keep_ckpts=2, - save_best='auto'), + save_best="auto", + ), param_scheduler=dict(max_epochs=max_epochs), - logger=dict(type='LoggerHook', interval=10)) + logger=dict(type="LoggerHook", interval=10), +) diff --git a/mmyolo/setup.py b/mmyolo/setup.py index f37c8979..0970846a 100755 --- a/mmyolo/setup.py +++ b/mmyolo/setup.py @@ -6,27 +6,27 @@ import shutil import sys import warnings -from setuptools import find_packages, setup +from setuptools import find_packages, setup from torch.utils.cpp_extension import BuildExtension def readme(): - with open('README.md', encoding='utf-8') as f: + with open("README.md", encoding="utf-8") as f: content = f.read() return content -version_file = 'mmyolo/version.py' +version_file = "mmyolo/version.py" def get_version(): with open(version_file) as f: - exec(compile(f.read(), version_file, 'exec')) - return locals()['__version__'] + exec(compile(f.read(), version_file, "exec")) + return locals()["__version__"] -def parse_requirements(fname='requirements.txt', with_version=True): +def parse_requirements(fname="requirements.txt", with_version=True): """Parse the package dependencies listed in a requirements file but strips specific versioning information. @@ -43,60 +43,60 @@ def parse_requirements(fname='requirements.txt', with_version=True): import re import sys from os.path import exists + require_fpath = fname def parse_line(line): """Parse information from a line in a requirements text file.""" - if line.startswith('-r '): + if line.startswith("-r "): # Allow specifying requirements in other files - target = line.split(' ')[1] + target = line.split(" ")[1] for info in parse_require_file(target): yield info else: - info = {'line': line} - if line.startswith('-e '): - info['package'] = line.split('#egg=')[1] - elif '@git+' in line: - info['package'] = line + info = {"line": line} + if line.startswith("-e "): + info["package"] = line.split("#egg=")[1] + elif "@git+" in line: + info["package"] = line else: # Remove versioning from the package - pat = '(' + '|'.join(['>=', '==', '>']) + ')' + pat = "(" + "|".join([">=", "==", ">"]) + ")" parts = re.split(pat, line, maxsplit=1) parts = [p.strip() for p in parts] - info['package'] = parts[0] + info["package"] = parts[0] if len(parts) > 1: op, rest = parts[1:] - if ';' in rest: + if ";" in rest: # Handle platform specific dependencies # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies - version, platform_deps = map(str.strip, - rest.split(';')) - info['platform_deps'] = platform_deps + version, platform_deps = map(str.strip, rest.split(";")) + info["platform_deps"] = platform_deps else: version = rest # NOQA - info['version'] = (op, version) + info["version"] = (op, version) yield info def parse_require_file(fpath): with open(fpath) as f: for line in f.readlines(): line = line.strip() - if line and not line.startswith('#'): + if line and not line.startswith("#"): yield from parse_line(line) def gen_packages_items(): if exists(require_fpath): for info in parse_require_file(require_fpath): - parts = [info['package']] - if with_version and 'version' in info: - parts.extend(info['version']) - if not sys.version.startswith('3.4'): + parts = [info["package"]] + if with_version and "version" in info: + parts.extend(info["version"]) + if not sys.version.startswith("3.4"): # apparently package_deps are broken in 3.4 - platform_deps = info.get('platform_deps') + platform_deps = info.get("platform_deps") if platform_deps is not None: - parts.append(';' + platform_deps) - item = ''.join(parts) + parts.append(";" + platform_deps) + item = "".join(parts) yield item packages = list(gen_packages_items()) @@ -112,23 +112,23 @@ def add_mim_extension(): """ # parse installment mode - if 'develop' in sys.argv: + if "develop" in sys.argv: # installed by `pip install -e .` - if platform.system() == 'Windows': + if platform.system() == "Windows": # set `copy` mode here since symlink fails on Windows. - mode = 'copy' + mode = "copy" else: - mode = 'symlink' - elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv: + mode = "symlink" + elif "sdist" in sys.argv or "bdist_wheel" in sys.argv: # installed by `pip install .` # or create source distribution by `python setup.py sdist` - mode = 'copy' + mode = "copy" else: return - filenames = ['tools', 'configs', 'demo', 'model-index.yml'] + filenames = ["tools", "configs", "demo", "model-index.yml"] repo_path = osp.dirname(__file__) - mim_path = osp.join(repo_path, 'mmyolo', '.mim') + mim_path = osp.join(repo_path, "mmyolo", ".mim") os.makedirs(mim_path, exist_ok=True) for filename in filenames: @@ -141,51 +141,52 @@ def add_mim_extension(): elif osp.isdir(tar_path): shutil.rmtree(tar_path) - if mode == 'symlink': + if mode == "symlink": src_relpath = osp.relpath(src_path, osp.dirname(tar_path)) os.symlink(src_relpath, tar_path) - elif mode == 'copy': + elif mode == "copy": if osp.isfile(src_path): shutil.copyfile(src_path, tar_path) elif osp.isdir(src_path): shutil.copytree(src_path, tar_path) else: - warnings.warn(f'Cannot copy file {src_path}.') + warnings.warn(f"Cannot copy file {src_path}.") else: - raise ValueError(f'Invalid mode {mode}') + raise ValueError(f"Invalid mode {mode}") -if __name__ == '__main__': +if __name__ == "__main__": add_mim_extension() setup( - name='mmyolo', + name="mmyolo", version=get_version(), - description='OpenMMLab Toolbox of YOLO', + description="OpenMMLab Toolbox of YOLO", long_description=readme(), - long_description_content_type='text/markdown', - author='MMYOLO Contributors', - author_email='openmmlab@gmail.com', - keywords='computer vision, object detection', - url='https://github.com/open-mmlab/mmyolo', - packages=find_packages(exclude=('configs', 'tools', 'demo')), + long_description_content_type="text/markdown", + author="MMYOLO Contributors", + author_email="openmmlab@gmail.com", + keywords="computer vision, object detection", + url="https://github.com/open-mmlab/mmyolo", + packages=find_packages(exclude=("configs", "tools", "demo")), include_package_data=True, classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], - license='GPL License 3.0', - install_requires=parse_requirements('requirements/runtime.txt'), + license="GPL License 3.0", + install_requires=parse_requirements("requirements/runtime.txt"), extras_require={ - 'all': parse_requirements('requirements.txt'), - 'tests': parse_requirements('requirements/tests.txt'), - 'build': parse_requirements('requirements/build.txt'), - 'mim': parse_requirements('requirements/mminstall.txt'), + "all": parse_requirements("requirements.txt"), + "tests": parse_requirements("requirements/tests.txt"), + "build": parse_requirements("requirements/build.txt"), + "mim": parse_requirements("requirements/mminstall.txt"), }, ext_modules=[], - cmdclass={'build_ext': BuildExtension}, - zip_safe=False) + cmdclass={"build_ext": BuildExtension}, + zip_safe=False, + ) diff --git a/mmyolo/tests/test_datasets/test_transforms/test_mix_img_transforms.py b/mmyolo/tests/test_datasets/test_transforms/test_mix_img_transforms.py index 253fd64b..b624dd97 100644 --- a/mmyolo/tests/test_datasets/test_transforms/test_mix_img_transforms.py +++ b/mmyolo/tests/test_datasets/test_transforms/test_mix_img_transforms.py @@ -16,7 +16,6 @@ class TestMosaic(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -25,34 +24,28 @@ def setUp(self): """ rng = np.random.RandomState(0) self.pre_transform = [ - dict( - type='LoadImageFromFile', - file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=dict(backend="disk")), + dict(type="LoadAnnotations", with_bbox=True), ] self.dataset = YOLOv5CocoDataset( - data_prefix=dict( - img=osp.join(osp.dirname(__file__), '../../data')), + data_prefix=dict(img=osp.join(osp.dirname(__file__), "../../data")), ann_file=osp.join( - osp.dirname(__file__), '../../data/coco_sample_color.json'), + osp.dirname(__file__), "../../data/coco_sample_color.json" + ), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[]) + pipeline=[], + ) self.results = { - 'img': - np.random.random((224, 224, 3)), - 'img_shape': (224, 224), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), - 'gt_masks': - BitmapMasks(rng.rand(3, 224, 224), height=224, width=224), - 'dataset': - self.dataset + "img": np.random.random((224, 224, 3)), + "img_shape": (224, 224), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), + "gt_masks": BitmapMasks(rng.rand(3, 224, 224), height=224, width=224), + "dataset": self.dataset, } def test_transform(self): @@ -68,48 +61,48 @@ def test_transform(self): with self.assertRaises(AssertionError): transform = Mosaic(use_cached=True, max_cached_images=1) - transform = Mosaic( - img_scale=(12, 10), pre_transform=self.pre_transform) + transform = Mosaic(img_scale=(12, 10), pre_transform=self.pre_transform) results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['img'].shape[:2] == (20, 24)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (20, 24)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_no_gt(self): - self.results['gt_bboxes'] = np.empty((0, 4), dtype=np.float32) - self.results['gt_bboxes_labels'] = np.empty((0, ), dtype=np.int64) - self.results['gt_ignore_flags'] = np.empty((0, ), dtype=bool) - transform = Mosaic( - img_scale=(12, 10), pre_transform=self.pre_transform) + self.results["gt_bboxes"] = np.empty((0, 4), dtype=np.float32) + self.results["gt_bboxes_labels"] = np.empty((0,), dtype=np.int64) + self.results["gt_ignore_flags"] = np.empty((0,), dtype=bool) + transform = Mosaic(img_scale=(12, 10), pre_transform=self.pre_transform) results = transform(copy.deepcopy(self.results)) self.assertIsInstance(results, dict) - self.assertTrue(results['img'].shape[:2] == (20, 24)) + self.assertTrue(results["img"].shape[:2] == (20, 24)) self.assertTrue( - results['gt_bboxes_labels'].shape[0] == results['gt_bboxes']. - shape[0] == results['gt_ignore_flags'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + results["gt_bboxes_labels"].shape[0] + == results["gt_bboxes"].shape[0] + == results["gt_ignore_flags"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_box_list(self): - transform = Mosaic( - img_scale=(12, 10), pre_transform=self.pre_transform) + transform = Mosaic(img_scale=(12, 10), pre_transform=self.pre_transform) results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) results = transform(results) - self.assertTrue(results['img'].shape[:2] == (20, 24)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (20, 24)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) class TestMosaic9(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -118,34 +111,28 @@ def setUp(self): """ rng = np.random.RandomState(0) self.pre_transform = [ - dict( - type='LoadImageFromFile', - file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=dict(backend="disk")), + dict(type="LoadAnnotations", with_bbox=True), ] self.dataset = YOLOv5CocoDataset( - data_prefix=dict( - img=osp.join(osp.dirname(__file__), '../../data')), + data_prefix=dict(img=osp.join(osp.dirname(__file__), "../../data")), ann_file=osp.join( - osp.dirname(__file__), '../../data/coco_sample_color.json'), + osp.dirname(__file__), "../../data/coco_sample_color.json" + ), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[]) + pipeline=[], + ) self.results = { - 'img': - np.random.random((224, 224, 3)), - 'img_shape': (224, 224), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), - 'gt_masks': - BitmapMasks(rng.rand(3, 224, 224), height=224, width=224), - 'dataset': - self.dataset + "img": np.random.random((224, 224, 3)), + "img_shape": (224, 224), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), + "gt_masks": BitmapMasks(rng.rand(3, 224, 224), height=224, width=224), + "dataset": self.dataset, } def test_transform(self): @@ -161,48 +148,48 @@ def test_transform(self): with self.assertRaises(AssertionError): transform = Mosaic9(use_cached=True, max_cached_images=1) - transform = Mosaic9( - img_scale=(12, 10), pre_transform=self.pre_transform) + transform = Mosaic9(img_scale=(12, 10), pre_transform=self.pre_transform) results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['img'].shape[:2] == (20, 24)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (20, 24)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_no_gt(self): - self.results['gt_bboxes'] = np.empty((0, 4), dtype=np.float32) - self.results['gt_bboxes_labels'] = np.empty((0, ), dtype=np.int64) - self.results['gt_ignore_flags'] = np.empty((0, ), dtype=bool) - transform = Mosaic9( - img_scale=(12, 10), pre_transform=self.pre_transform) + self.results["gt_bboxes"] = np.empty((0, 4), dtype=np.float32) + self.results["gt_bboxes_labels"] = np.empty((0,), dtype=np.int64) + self.results["gt_ignore_flags"] = np.empty((0,), dtype=bool) + transform = Mosaic9(img_scale=(12, 10), pre_transform=self.pre_transform) results = transform(copy.deepcopy(self.results)) self.assertIsInstance(results, dict) - self.assertTrue(results['img'].shape[:2] == (20, 24)) + self.assertTrue(results["img"].shape[:2] == (20, 24)) self.assertTrue( - results['gt_bboxes_labels'].shape[0] == results['gt_bboxes']. - shape[0] == results['gt_ignore_flags'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + results["gt_bboxes_labels"].shape[0] + == results["gt_bboxes"].shape[0] + == results["gt_ignore_flags"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_box_list(self): - transform = Mosaic9( - img_scale=(12, 10), pre_transform=self.pre_transform) + transform = Mosaic9(img_scale=(12, 10), pre_transform=self.pre_transform) results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) results = transform(results) - self.assertTrue(results['img'].shape[:2] == (20, 24)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (20, 24)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) class TestYOLOv5MixUp(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -211,45 +198,40 @@ def setUp(self): """ rng = np.random.RandomState(0) self.pre_transform = [ - dict( - type='LoadImageFromFile', - file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=dict(backend="disk")), + dict(type="LoadAnnotations", with_bbox=True), ] self.dataset = YOLOv5CocoDataset( - data_prefix=dict( - img=osp.join(osp.dirname(__file__), '../../data')), + data_prefix=dict(img=osp.join(osp.dirname(__file__), "../../data")), ann_file=osp.join( - osp.dirname(__file__), '../../data/coco_sample_color.json'), + osp.dirname(__file__), "../../data/coco_sample_color.json" + ), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[]) + pipeline=[], + ) self.results = { - 'img': - np.random.random((288, 512, 3)), - 'img_shape': (288, 512), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), - 'gt_masks': - BitmapMasks(rng.rand(3, 288, 512), height=288, width=512), - 'dataset': - self.dataset + "img": np.random.random((288, 512, 3)), + "img_shape": (288, 512), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), + "gt_masks": BitmapMasks(rng.rand(3, 288, 512), height=288, width=512), + "dataset": self.dataset, } def test_transform(self): transform = YOLOv5MixUp(pre_transform=self.pre_transform) results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['img'].shape[:2] == (288, 512)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (288, 512)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) # test assertion for invalid max_cached_images with self.assertRaises(AssertionError): @@ -257,20 +239,20 @@ def test_transform(self): def test_transform_with_box_list(self): results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) transform = YOLOv5MixUp(pre_transform=self.pre_transform) results = transform(results) - self.assertTrue(results['img'].shape[:2] == (288, 512)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (288, 512)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) class TestYOLOXMixUp(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -279,33 +261,27 @@ def setUp(self): """ rng = np.random.RandomState(0) self.pre_transform = [ - dict( - type='LoadImageFromFile', - file_client_args=dict(backend='disk')), - dict(type='LoadAnnotations', with_bbox=True) + dict(type="LoadImageFromFile", file_client_args=dict(backend="disk")), + dict(type="LoadAnnotations", with_bbox=True), ] self.dataset = YOLOv5CocoDataset( - data_prefix=dict( - img=osp.join(osp.dirname(__file__), '../../data')), + data_prefix=dict(img=osp.join(osp.dirname(__file__), "../../data")), ann_file=osp.join( - osp.dirname(__file__), '../../data/coco_sample_color.json'), + osp.dirname(__file__), "../../data/coco_sample_color.json" + ), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[]) + pipeline=[], + ) self.results = { - 'img': - np.random.random((224, 224, 3)), - 'img_shape': (224, 224), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), - 'gt_masks': - BitmapMasks(rng.rand(3, 224, 224), height=224, width=224), - 'dataset': - self.dataset + "img": np.random.random((224, 224, 3)), + "img_shape": (224, 224), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), + "gt_masks": BitmapMasks(rng.rand(3, 224, 224), height=224, width=224), + "dataset": self.dataset, } def test_transform(self): @@ -321,30 +297,34 @@ def test_transform(self): img_scale=(10, 12), ratio_range=(0.8, 1.6), pad_val=114.0, - pre_transform=self.pre_transform) + pre_transform=self.pre_transform, + ) # self.results['mix_results'] = [copy.deepcopy(self.results)] results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['img'].shape[:2] == (224, 224)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (224, 224)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_boxlist(self): results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) transform = YOLOXMixUp( img_scale=(10, 12), ratio_range=(0.8, 1.6), pad_val=114.0, - pre_transform=self.pre_transform) + pre_transform=self.pre_transform, + ) results = transform(results) - self.assertTrue(results['img'].shape[:2] == (224, 224)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (224, 224)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) diff --git a/mmyolo/tests/test_datasets/test_transforms/test_transforms.py b/mmyolo/tests/test_datasets/test_transforms/test_transforms.py index d256dd9f..3031ce96 100644 --- a/mmyolo/tests/test_datasets/test_transforms/test_transforms.py +++ b/mmyolo/tests/test_datasets/test_transforms/test_transforms.py @@ -9,16 +9,20 @@ from mmdet.structures.bbox import HorizontalBoxes from mmdet.structures.mask import BitmapMasks -from mmyolo.datasets.transforms import (LetterResize, LoadAnnotations, - YOLOv5HSVRandomAug, - YOLOv5KeepRatioResize, - YOLOv5RandomAffine) -from mmyolo.datasets.transforms.transforms import (PPYOLOERandomCrop, - PPYOLOERandomDistort) +from mmyolo.datasets.transforms import ( + LetterResize, + LoadAnnotations, + YOLOv5HSVRandomAug, + YOLOv5KeepRatioResize, + YOLOv5RandomAffine, +) +from mmyolo.datasets.transforms.transforms import ( + PPYOLOERandomCrop, + PPYOLOERandomDistort, +) class TestLetterResize(unittest.TestCase): - def setUp(self): """Set up the data info which are used in every test method. @@ -30,144 +34,157 @@ def setUp(self): img=np.random.random((300, 400, 3)), gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32), batch_shape=np.array([192, 672], dtype=np.int64), - gt_masks=BitmapMasks(rng.rand(1, 300, 400), height=300, width=400)) + gt_masks=BitmapMasks(rng.rand(1, 300, 400), height=300, width=400), + ) self.data_info2 = dict( img=np.random.random((300, 400, 3)), - gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32)) + gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32), + ) self.data_info3 = dict( img=np.random.random((300, 400, 3)), - batch_shape=np.array([192, 672], dtype=np.int64)) + batch_shape=np.array([192, 672], dtype=np.int64), + ) self.data_info4 = dict(img=np.random.random((300, 400, 3))) def test_letter_resize(self): # Test allow_scale_up transform = LetterResize(scale=(640, 640), allow_scale_up=False) results = transform(copy.deepcopy(self.data_info1)) - self.assertEqual(results['img_shape'], (192, 672, 3)) + self.assertEqual(results["img_shape"], (192, 672, 3)) self.assertTrue( - (results['gt_bboxes'] == np.array([[208., 0., 304., 96.]])).all()) - self.assertTrue((results['batch_shape'] == np.array([192, 672])).all()) - self.assertTrue((results['pad_param'] == np.array([0., 0., 208., - 208.])).all()) + (results["gt_bboxes"] == np.array([[208.0, 0.0, 304.0, 96.0]])).all() + ) + self.assertTrue((results["batch_shape"] == np.array([192, 672])).all()) self.assertTrue( - (np.array(results['scale_factor'], dtype=np.float32) <= 1.).all()) + (results["pad_param"] == np.array([0.0, 0.0, 208.0, 208.0])).all() + ) + self.assertTrue( + (np.array(results["scale_factor"], dtype=np.float32) <= 1.0).all() + ) # Test pad_val transform = LetterResize(scale=(640, 640), pad_val=dict(img=144)) results = transform(copy.deepcopy(self.data_info1)) - self.assertEqual(results['img_shape'], (192, 672, 3)) + self.assertEqual(results["img_shape"], (192, 672, 3)) self.assertTrue( - (results['gt_bboxes'] == np.array([[208., 0., 304., 96.]])).all()) - self.assertTrue((results['batch_shape'] == np.array([192, 672])).all()) - self.assertTrue((results['pad_param'] == np.array([0., 0., 208., - 208.])).all()) + (results["gt_bboxes"] == np.array([[208.0, 0.0, 304.0, 96.0]])).all() + ) + self.assertTrue((results["batch_shape"] == np.array([192, 672])).all()) + self.assertTrue( + (results["pad_param"] == np.array([0.0, 0.0, 208.0, 208.0])).all() + ) self.assertTrue( - (np.array(results['scale_factor'], dtype=np.float32) <= 1.).all()) + (np.array(results["scale_factor"], dtype=np.float32) <= 1.0).all() + ) # Test use_mini_pad transform = LetterResize(scale=(640, 640), use_mini_pad=True) results = transform(copy.deepcopy(self.data_info1)) - self.assertEqual(results['img_shape'], (192, 256, 3)) - self.assertTrue((results['gt_bboxes'] == np.array([[0., 0., 96., - 96.]])).all()) - self.assertTrue((results['batch_shape'] == np.array([192, 672])).all()) - self.assertTrue((results['pad_param'] == np.array([0., 0., 0., - 0.])).all()) + self.assertEqual(results["img_shape"], (192, 256, 3)) + self.assertTrue( + (results["gt_bboxes"] == np.array([[0.0, 0.0, 96.0, 96.0]])).all() + ) + self.assertTrue((results["batch_shape"] == np.array([192, 672])).all()) + self.assertTrue((results["pad_param"] == np.array([0.0, 0.0, 0.0, 0.0])).all()) self.assertTrue( - (np.array(results['scale_factor'], dtype=np.float32) <= 1.).all()) + (np.array(results["scale_factor"], dtype=np.float32) <= 1.0).all() + ) # Test stretch_only transform = LetterResize(scale=(640, 640), stretch_only=True) results = transform(copy.deepcopy(self.data_info1)) - self.assertEqual(results['img_shape'], (192, 672, 3)) - self.assertTrue((results['gt_bboxes'] == np.array( - [[0., 0., 251.99998474121094, 96.]])).all()) - self.assertTrue((results['batch_shape'] == np.array([192, 672])).all()) - self.assertTrue((results['pad_param'] == np.array([0., 0., 0., - 0.])).all()) + self.assertEqual(results["img_shape"], (192, 672, 3)) + self.assertTrue( + ( + results["gt_bboxes"] == np.array([[0.0, 0.0, 251.99998474121094, 96.0]]) + ).all() + ) + self.assertTrue((results["batch_shape"] == np.array([192, 672])).all()) + self.assertTrue((results["pad_param"] == np.array([0.0, 0.0, 0.0, 0.0])).all()) # Test transform = LetterResize(scale=(640, 640), pad_val=dict(img=144)) rng = np.random.RandomState(0) for _ in range(5): - input_h, input_w = np.random.randint(100, 700), np.random.randint( - 100, 700) - output_h, output_w = np.random.randint(100, - 700), np.random.randint( - 100, 700) + input_h, input_w = np.random.randint(100, 700), np.random.randint(100, 700) + output_h, output_w = np.random.randint(100, 700), np.random.randint( + 100, 700 + ) data_info = dict( img=np.random.random((input_h, input_w, 3)), gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32), batch_shape=np.array([output_h, output_w], dtype=np.int64), gt_masks=BitmapMasks( - rng.rand(1, input_h, input_w), - height=input_h, - width=input_w)) + rng.rand(1, input_h, input_w), height=input_h, width=input_w + ), + ) results = transform(data_info) - self.assertEqual(results['img_shape'], (output_h, output_w, 3)) + self.assertEqual(results["img_shape"], (output_h, output_w, 3)) self.assertTrue( - (results['batch_shape'] == np.array([output_h, - output_w])).all()) + (results["batch_shape"] == np.array([output_h, output_w])).all() + ) # Test without batchshape transform = LetterResize(scale=(640, 640), pad_val=dict(img=144)) rng = np.random.RandomState(0) for _ in range(5): - input_h, input_w = np.random.randint(100, 700), np.random.randint( - 100, 700) + input_h, input_w = np.random.randint(100, 700), np.random.randint(100, 700) data_info = dict( img=np.random.random((input_h, input_w, 3)), gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32), gt_masks=BitmapMasks( - rng.rand(1, input_h, input_w), - height=input_h, - width=input_w)) + rng.rand(1, input_h, input_w), height=input_h, width=input_w + ), + ) results = transform(data_info) - self.assertEqual(results['img_shape'], (640, 640, 3)) + self.assertEqual(results["img_shape"], (640, 640, 3)) # TODO: Testing the existence of multiple scale_factor and pad_param transform = [ YOLOv5KeepRatioResize(scale=(32, 32)), - LetterResize(scale=(64, 68), pad_val=dict(img=144)) + LetterResize(scale=(64, 68), pad_val=dict(img=144)), ] for _ in range(5): - input_h, input_w = np.random.randint(100, 700), np.random.randint( - 100, 700) - output_h, output_w = np.random.randint(100, - 700), np.random.randint( - 100, 700) + input_h, input_w = np.random.randint(100, 700), np.random.randint(100, 700) + output_h, output_w = np.random.randint(100, 700), np.random.randint( + 100, 700 + ) data_info = dict( img=np.random.random((input_h, input_w, 3)), gt_bboxes=np.array([[0, 0, 5, 5]], dtype=np.float32), - batch_shape=np.array([output_h, output_w], dtype=np.int64)) + batch_shape=np.array([output_h, output_w], dtype=np.int64), + ) for t in transform: data_info = t(data_info) # because of the "math.round" operation, # it is unable to strictly restore the original input shape # we just validate the correctness of scale_factor and pad_param - self.assertIn('scale_factor', data_info) - self.assertIn('pad_param', data_info) - pad_param = data_info['pad_param'].reshape(-1, 2).sum( - 1) # (top, b, l, r) -> (h, w) - scale_factor = np.asarray( - data_info['scale_factor'])[::-1] # (w, h) -> (h, w) - scale_factor_keepratio = np.min( - np.asarray((32, 32)) / (input_h, input_w)) + self.assertIn("scale_factor", data_info) + self.assertIn("pad_param", data_info) + pad_param = ( + data_info["pad_param"].reshape(-1, 2).sum(1) + ) # (top, b, l, r) -> (h, w) + scale_factor = np.asarray(data_info["scale_factor"])[ + ::-1 + ] # (w, h) -> (h, w) + scale_factor_keepratio = np.min(np.asarray((32, 32)) / (input_h, input_w)) validate_shape = np.floor( - np.asarray((input_h, input_w)) * scale_factor_keepratio + 0.5) - scale_factor_keepratio = np.floor(scale_factor_keepratio * - input_h + 0.5) / input_h + np.asarray((input_h, input_w)) * scale_factor_keepratio + 0.5 + ) + scale_factor_keepratio = ( + np.floor(scale_factor_keepratio * input_h + 0.5) / input_h + ) scale_factor_letter = (output_h, output_w) / validate_shape - scale_factor_letter = ( - scale_factor_letter - - (pad_param / validate_shape))[np.argmin(scale_factor_letter)] - self.assertTrue(data_info['img_shape'][:2] == (output_h, output_w)) - self.assertTrue((scale_factor == (scale_factor_keepratio * - scale_factor_letter)).all()) + scale_factor_letter = (scale_factor_letter - (pad_param / validate_shape))[ + np.argmin(scale_factor_letter) + ] + self.assertTrue(data_info["img_shape"][:2] == (output_h, output_w)) + self.assertTrue( + (scale_factor == (scale_factor_keepratio * scale_factor_letter)).all() + ) class TestYOLOv5KeepRatioResize(unittest.TestCase): - def setUp(self): """Set up the data info which are used in every test method. @@ -178,7 +195,8 @@ def setUp(self): self.data_info1 = dict( img=np.random.random((300, 400, 3)), gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32), - gt_masks=BitmapMasks(rng.rand(1, 300, 400), height=300, width=400)) + gt_masks=BitmapMasks(rng.rand(1, 300, 400), height=300, width=400), + ) self.data_info2 = dict(img=np.random.random((300, 400, 3))) def test_yolov5_keep_ratio_resize(self): @@ -192,22 +210,24 @@ def test_yolov5_keep_ratio_resize(self): transform = YOLOv5KeepRatioResize(scale=(640, 640)) results = transform(copy.deepcopy(self.data_info1)) self.assertTrue(transform.keep_ratio, True) - self.assertEqual(results['img_shape'], (480, 640)) + self.assertEqual(results["img_shape"], (480, 640)) + self.assertTrue( + (results["gt_bboxes"] == np.array([[0.0, 0.0, 240.0, 240.0]])).all() + ) self.assertTrue( - (results['gt_bboxes'] == np.array([[0., 0., 240., 240.]])).all()) - self.assertTrue((np.array(results['scale_factor'], - dtype=np.float32) == 1.6).all()) + (np.array(results["scale_factor"], dtype=np.float32) == 1.6).all() + ) # Test only img transform = YOLOv5KeepRatioResize(scale=(640, 640)) results = transform(copy.deepcopy(self.data_info2)) - self.assertEqual(results['img_shape'], (480, 640)) - self.assertTrue((np.array(results['scale_factor'], - dtype=np.float32) == 1.6).all()) + self.assertEqual(results["img_shape"], (480, 640)) + self.assertTrue( + (np.array(results["scale_factor"], dtype=np.float32) == 1.6).all() + ) class TestYOLOv5HSVRandomAug(unittest.TestCase): - def setUp(self): """Set up the data info which are used in every test method. @@ -216,48 +236,51 @@ def setUp(self): """ self.data_info = dict( img=mmcv.imread( - osp.join(osp.dirname(__file__), '../../data/color.jpg'), - 'color')) + osp.join(osp.dirname(__file__), "../../data/color.jpg"), "color" + ) + ) def test_yolov5_hsv_random_aug(self): # Test with gt_bboxes transform = YOLOv5HSVRandomAug( - hue_delta=0.015, saturation_delta=0.7, value_delta=0.4) + hue_delta=0.015, saturation_delta=0.7, value_delta=0.4 + ) results = transform(copy.deepcopy(self.data_info)) - self.assertTrue( - results['img'].shape[:2] == self.data_info['img'].shape[:2]) + self.assertTrue(results["img"].shape[:2] == self.data_info["img"].shape[:2]) class TestLoadAnnotations(unittest.TestCase): - def setUp(self): """Set up the data info which are used in every test method. TestCase calls functions in this order: setUp() -> testMethod() -> tearDown() -> cleanUp() """ - data_prefix = osp.join(osp.dirname(__file__), '../../data') - seg_map = osp.join(data_prefix, 'gray.jpg') + data_prefix = osp.join(osp.dirname(__file__), "../../data") + seg_map = osp.join(data_prefix, "gray.jpg") self.results = { - 'ori_shape': (300, 400), - 'seg_map_path': - seg_map, - 'instances': [{ - 'bbox': [0, 0, 10, 20], - 'bbox_label': 1, - 'mask': [[0, 0, 0, 20, 10, 20, 10, 0]], - 'ignore_flag': 0 - }, { - 'bbox': [10, 10, 110, 120], - 'bbox_label': 2, - 'mask': [[10, 10, 110, 10, 110, 120, 110, 10]], - 'ignore_flag': 0 - }, { - 'bbox': [50, 50, 60, 80], - 'bbox_label': 2, - 'mask': [[50, 50, 60, 50, 60, 80, 50, 80]], - 'ignore_flag': 1 - }] + "ori_shape": (300, 400), + "seg_map_path": seg_map, + "instances": [ + { + "bbox": [0, 0, 10, 20], + "bbox_label": 1, + "mask": [[0, 0, 0, 20, 10, 20, 10, 0]], + "ignore_flag": 0, + }, + { + "bbox": [10, 10, 110, 120], + "bbox_label": 2, + "mask": [[10, 10, 110, 10, 110, 120, 110, 10]], + "ignore_flag": 0, + }, + { + "bbox": [50, 50, 60, 80], + "bbox_label": 2, + "mask": [[50, 50, 60, 50, 60, 80, 50, 80]], + "ignore_flag": 1, + }, + ], } def test_load_bboxes(self): @@ -266,23 +289,25 @@ def test_load_bboxes(self): with_label=False, with_seg=False, with_mask=False, - box_type=None) + box_type=None, + ) results = transform(copy.deepcopy(self.results)) - self.assertIn('gt_bboxes', results) - self.assertTrue((results['gt_bboxes'] == np.array([[0, 0, 10, 20], - [10, 10, 110, - 120]])).all()) - self.assertEqual(results['gt_bboxes'].dtype, np.float32) + self.assertIn("gt_bboxes", results) self.assertTrue( - (results['gt_ignore_flags'] == np.array([False, False])).all()) - self.assertEqual(results['gt_ignore_flags'].dtype, bool) + ( + results["gt_bboxes"] == np.array([[0, 0, 10, 20], [10, 10, 110, 120]]) + ).all() + ) + self.assertEqual(results["gt_bboxes"].dtype, np.float32) + self.assertTrue((results["gt_ignore_flags"] == np.array([False, False])).all()) + self.assertEqual(results["gt_ignore_flags"].dtype, bool) # test empty instance results = transform({}) - self.assertIn('gt_bboxes', results) - self.assertTrue(results['gt_bboxes'].shape == (0, 4)) - self.assertIn('gt_ignore_flags', results) - self.assertTrue(results['gt_ignore_flags'].shape == (0, )) + self.assertIn("gt_bboxes", results) + self.assertTrue(results["gt_bboxes"].shape == (0, 4)) + self.assertIn("gt_ignore_flags", results) + self.assertTrue(results["gt_ignore_flags"].shape == (0,)) def test_load_labels(self): transform = LoadAnnotations( @@ -292,19 +317,17 @@ def test_load_labels(self): with_mask=False, ) results = transform(copy.deepcopy(self.results)) - self.assertIn('gt_bboxes_labels', results) - self.assertTrue((results['gt_bboxes_labels'] == np.array([1, - 2])).all()) - self.assertEqual(results['gt_bboxes_labels'].dtype, np.int64) + self.assertIn("gt_bboxes_labels", results) + self.assertTrue((results["gt_bboxes_labels"] == np.array([1, 2])).all()) + self.assertEqual(results["gt_bboxes_labels"].dtype, np.int64) # test empty instance results = transform({}) - self.assertIn('gt_bboxes_labels', results) - self.assertTrue(results['gt_bboxes_labels'].shape == (0, )) + self.assertIn("gt_bboxes_labels", results) + self.assertTrue(results["gt_bboxes_labels"].shape == (0,)) class TestYOLOv5RandomAffine(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -312,16 +335,13 @@ def setUp(self): tearDown() -> cleanUp() """ self.results = { - 'img': - np.random.random((224, 224, 3)), - 'img_shape': (224, 224), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), + "img": np.random.random((224, 224, 3)), + "img_shape": (224, 224), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), } def test_transform(self): @@ -338,29 +358,30 @@ def test_transform(self): transform = YOLOv5RandomAffine() results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['img'].shape[:2] == (224, 224)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (224, 224)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_boxlist(self): results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) transform = YOLOv5RandomAffine() results = transform(copy.deepcopy(results)) - self.assertTrue(results['img'].shape[:2] == (224, 224)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (224, 224)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) class TestPPYOLOERandomCrop(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -368,42 +389,40 @@ def setUp(self): tearDown() -> cleanUp() """ self.results = { - 'img': - np.random.random((224, 224, 3)), - 'img_shape': (224, 224), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), + "img": np.random.random((224, 224, 3)), + "img_shape": (224, 224), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), } def test_transform(self): transform = PPYOLOERandomCrop() results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_boxlist(self): results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) transform = PPYOLOERandomCrop() results = transform(copy.deepcopy(results)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) class TestPPYOLOERandomDistort(unittest.TestCase): - def setUp(self): """Setup the data info which are used in every test method. @@ -411,23 +430,19 @@ def setUp(self): tearDown() -> cleanUp() """ self.results = { - 'img': - np.random.random((224, 224, 3)), - 'img_shape': (224, 224), - 'gt_bboxes_labels': - np.array([1, 2, 3], dtype=np.int64), - 'gt_bboxes': - np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], - dtype=np.float32), - 'gt_ignore_flags': - np.array([0, 0, 1], dtype=bool), + "img": np.random.random((224, 224, 3)), + "img_shape": (224, 224), + "gt_bboxes_labels": np.array([1, 2, 3], dtype=np.int64), + "gt_bboxes": np.array( + [[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]], dtype=np.float32 + ), + "gt_ignore_flags": np.array([0, 0, 1], dtype=bool), } def test_transform(self): # test assertion for invalid prob with self.assertRaises(AssertionError): - transform = PPYOLOERandomDistort( - hue_cfg=dict(min=-18, max=18, prob=1.5)) + transform = PPYOLOERandomDistort(hue_cfg=dict(min=-18, max=18, prob=1.5)) # test assertion for invalid num_distort_func with self.assertRaises(AssertionError): @@ -435,22 +450,24 @@ def test_transform(self): transform = PPYOLOERandomDistort() results = transform(copy.deepcopy(self.results)) - self.assertTrue(results['img'].shape[:2] == (224, 224)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == np.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (224, 224)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == np.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) def test_transform_with_boxlist(self): results = copy.deepcopy(self.results) - results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes']) + results["gt_bboxes"] = HorizontalBoxes(results["gt_bboxes"]) transform = PPYOLOERandomDistort() results = transform(copy.deepcopy(results)) - self.assertTrue(results['img'].shape[:2] == (224, 224)) - self.assertTrue(results['gt_bboxes_labels'].shape[0] == - results['gt_bboxes'].shape[0]) - self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64) - self.assertTrue(results['gt_bboxes'].dtype == torch.float32) - self.assertTrue(results['gt_ignore_flags'].dtype == bool) + self.assertTrue(results["img"].shape[:2] == (224, 224)) + self.assertTrue( + results["gt_bboxes_labels"].shape[0] == results["gt_bboxes"].shape[0] + ) + self.assertTrue(results["gt_bboxes_labels"].dtype == np.int64) + self.assertTrue(results["gt_bboxes"].dtype == torch.float32) + self.assertTrue(results["gt_ignore_flags"].dtype == bool) diff --git a/mmyolo/tests/test_datasets/test_utils.py b/mmyolo/tests/test_datasets/test_utils.py index 43c8e61f..33a853a2 100644 --- a/mmyolo/tests/test_datasets/test_utils.py +++ b/mmyolo/tests/test_datasets/test_utils.py @@ -23,7 +23,6 @@ def _rand_bboxes(rng, num_boxes, w, h): class TestYOLOv5Collate(unittest.TestCase): - def test_yolov5_collate(self): rng = np.random.RandomState(0) @@ -38,14 +37,13 @@ def test_yolov5_collate(self): out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)]) self.assertIsInstance(out, dict) - self.assertTrue(out['inputs'].shape == (1, 3, 10, 10)) - self.assertTrue(out['data_samples'].shape == (4, 6)) + self.assertTrue(out["inputs"].shape == (1, 3, 10, 10)) + self.assertTrue(out["data_samples"].shape == (4, 6)) - out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)] * - 2) + out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)] * 2) self.assertIsInstance(out, dict) - self.assertTrue(out['inputs'].shape == (2, 3, 10, 10)) - self.assertTrue(out['data_samples'].shape == (8, 6)) + self.assertTrue(out["inputs"].shape == (2, 3, 10, 10)) + self.assertTrue(out["data_samples"].shape == (8, 6)) def test_yolov5_collate_with_multi_scale(self): rng = np.random.RandomState(0) @@ -59,75 +57,70 @@ def test_yolov5_collate_with_multi_scale(self): gt_instances.labels = torch.LongTensor(labels) data_samples.gt_instances = gt_instances - out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)], - use_ms_training=True) + out = yolov5_collate( + [dict(inputs=inputs, data_samples=data_samples)], use_ms_training=True + ) self.assertIsInstance(out, dict) - self.assertTrue(out['inputs'][0].shape == (3, 10, 10)) - print(out['data_samples'].shape) - self.assertTrue(out['data_samples'].shape == (4, 6)) - self.assertIsInstance(out['inputs'], list) - self.assertIsInstance(out['data_samples'], torch.Tensor) + self.assertTrue(out["inputs"][0].shape == (3, 10, 10)) + print(out["data_samples"].shape) + self.assertTrue(out["data_samples"].shape == (4, 6)) + self.assertIsInstance(out["inputs"], list) + self.assertIsInstance(out["data_samples"], torch.Tensor) out = yolov5_collate( - [dict(inputs=inputs, data_samples=data_samples)] * 2, - use_ms_training=True) + [dict(inputs=inputs, data_samples=data_samples)] * 2, use_ms_training=True + ) self.assertIsInstance(out, dict) - self.assertTrue(out['inputs'][0].shape == (3, 10, 10)) - self.assertTrue(out['data_samples'].shape == (8, 6)) - self.assertIsInstance(out['inputs'], list) - self.assertIsInstance(out['data_samples'], torch.Tensor) + self.assertTrue(out["inputs"][0].shape == (3, 10, 10)) + self.assertTrue(out["data_samples"].shape == (8, 6)) + self.assertIsInstance(out["inputs"], list) + self.assertIsInstance(out["data_samples"], torch.Tensor) class TestBatchShapePolicy(unittest.TestCase): - def test_batch_shape_policy(self): - src_data_infos = [{ - 'height': 20, - 'width': 100, - }, { - 'height': 11, - 'width': 100, - }, { - 'height': 21, - 'width': 100, - }, { - 'height': 30, - 'width': 100, - }, { - 'height': 10, - 'width': 100, - }] - - expected_data_infos = [{ - 'height': 10, - 'width': 100, - 'batch_shape': np.array([96, 672]) - }, { - 'height': 11, - 'width': 100, - 'batch_shape': np.array([96, 672]) - }, { - 'height': 20, - 'width': 100, - 'batch_shape': np.array([160, 672]) - }, { - 'height': 21, - 'width': 100, - 'batch_shape': np.array([160, 672]) - }, { - 'height': 30, - 'width': 100, - 'batch_shape': np.array([224, 672]) - }] + src_data_infos = [ + { + "height": 20, + "width": 100, + }, + { + "height": 11, + "width": 100, + }, + { + "height": 21, + "width": 100, + }, + { + "height": 30, + "width": 100, + }, + { + "height": 10, + "width": 100, + }, + ] + + expected_data_infos = [ + {"height": 10, "width": 100, "batch_shape": np.array([96, 672])}, + {"height": 11, "width": 100, "batch_shape": np.array([96, 672])}, + {"height": 20, "width": 100, "batch_shape": np.array([160, 672])}, + {"height": 21, "width": 100, "batch_shape": np.array([160, 672])}, + {"height": 30, "width": 100, "batch_shape": np.array([224, 672])}, + ] batch_shapes_policy = BatchShapePolicy(batch_size=2) out_data_infos = batch_shapes_policy(src_data_infos) for i in range(5): self.assertEqual( - (expected_data_infos[i]['height'], - expected_data_infos[i]['width']), - (out_data_infos[i]['height'], out_data_infos[i]['width'])) + (expected_data_infos[i]["height"], expected_data_infos[i]["width"]), + (out_data_infos[i]["height"], out_data_infos[i]["width"]), + ) self.assertTrue( - np.allclose(expected_data_infos[i]['batch_shape'], - out_data_infos[i]['batch_shape'])) + np.allclose( + expected_data_infos[i]["batch_shape"], + out_data_infos[i]["batch_shape"], + ) + ) diff --git a/mmyolo/tests/test_datasets/test_yolov5_coco.py b/mmyolo/tests/test_datasets/test_yolov5_coco.py index b7e1c9a4..aa22379b 100644 --- a/mmyolo/tests/test_datasets/test_yolov5_coco.py +++ b/mmyolo/tests/test_datasets/test_yolov5_coco.py @@ -5,19 +5,19 @@ class TestYOLOv5CocoDataset(unittest.TestCase): - def test_batch_shapes_cfg(self): batch_shapes_cfg = dict( - type='BatchShapePolicy', + type="BatchShapePolicy", batch_size=2, img_size=640, size_divisor=32, - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, + ) # test serialize_data=True dataset = YOLOv5CocoDataset( - data_prefix=dict(img='imgs'), - ann_file='tests/data/coco_sample.json', + data_prefix=dict(img="imgs"), + ann_file="tests/data/coco_sample.json", filter_cfg=dict(filter_empty_gt=False, min_size=0), pipeline=[], serialize_data=True, @@ -25,16 +25,15 @@ def test_batch_shapes_cfg(self): ) expected_img_ids = [3, 0, 2, 1] - expected_batch_shapes = [[512, 672], [512, 672], [672, 672], - [672, 672]] + expected_batch_shapes = [[512, 672], [512, 672], [672, 672], [672, 672]] for i, data in enumerate(dataset): - assert data['img_id'] == expected_img_ids[i] - assert data['batch_shape'].tolist() == expected_batch_shapes[i] + assert data["img_id"] == expected_img_ids[i] + assert data["batch_shape"].tolist() == expected_batch_shapes[i] # test serialize_data=True dataset = YOLOv5CocoDataset( - data_prefix=dict(img='imgs'), - ann_file='tests/data/coco_sample.json', + data_prefix=dict(img="imgs"), + ann_file="tests/data/coco_sample.json", filter_cfg=dict(filter_empty_gt=False, min_size=0), pipeline=[], serialize_data=False, @@ -42,30 +41,30 @@ def test_batch_shapes_cfg(self): ) expected_img_ids = [3, 0, 2, 1] - expected_batch_shapes = [[512, 672], [512, 672], [672, 672], - [672, 672]] + expected_batch_shapes = [[512, 672], [512, 672], [672, 672], [672, 672]] for i, data in enumerate(dataset): - assert data['img_id'] == expected_img_ids[i] - assert data['batch_shape'].tolist() == expected_batch_shapes[i] + assert data["img_id"] == expected_img_ids[i] + assert data["batch_shape"].tolist() == expected_batch_shapes[i] def test_prepare_data(self): dataset = YOLOv5CocoDataset( - data_prefix=dict(img='imgs'), - ann_file='tests/data/coco_sample.json', + data_prefix=dict(img="imgs"), + ann_file="tests/data/coco_sample.json", filter_cfg=dict(filter_empty_gt=False, min_size=0), pipeline=[], serialize_data=True, batch_shapes_cfg=None, ) for data in dataset: - assert 'dataset' in data + assert "dataset" in data # test with test_mode = True dataset = YOLOv5CocoDataset( - data_prefix=dict(img='imgs'), - ann_file='tests/data/coco_sample.json', + data_prefix=dict(img="imgs"), + ann_file="tests/data/coco_sample.json", test_mode=True, - pipeline=[]) + pipeline=[], + ) for data in dataset: - assert 'dataset' not in data + assert "dataset" not in data diff --git a/mmyolo/tests/test_datasets/test_yolov5_voc.py b/mmyolo/tests/test_datasets/test_yolov5_voc.py index f7e9b989..01544b58 100644 --- a/mmyolo/tests/test_datasets/test_yolov5_voc.py +++ b/mmyolo/tests/test_datasets/test_yolov5_voc.py @@ -10,77 +10,80 @@ class TestYOLOv5VocDataset(unittest.TestCase): - def test_batch_shapes_cfg(self): batch_shapes_cfg = dict( - type='BatchShapePolicy', + type="BatchShapePolicy", batch_size=2, img_size=640, size_divisor=32, - extra_pad_ratio=0.5) + extra_pad_ratio=0.5, + ) # test serialize_data=True dataset = YOLOv5VOCDataset( - data_root='tests/data/VOCdevkit/', - ann_file='VOC2007/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2007/'), + data_root="tests/data/VOCdevkit/", + ann_file="VOC2007/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2007/"), test_mode=True, pipeline=[], batch_shapes_cfg=batch_shapes_cfg, ) - expected_img_ids = ['000001'] + expected_img_ids = ["000001"] expected_batch_shapes = [[672, 480]] for i, data in enumerate(dataset): - assert data['img_id'] == expected_img_ids[i] - assert data['batch_shape'].tolist() == expected_batch_shapes[i] + assert data["img_id"] == expected_img_ids[i] + assert data["batch_shape"].tolist() == expected_batch_shapes[i] def test_prepare_data(self): dataset = YOLOv5VOCDataset( - data_root='tests/data/VOCdevkit/', - ann_file='VOC2007/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2007/'), + data_root="tests/data/VOCdevkit/", + ann_file="VOC2007/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2007/"), filter_cfg=dict(filter_empty_gt=False, min_size=0), pipeline=[], serialize_data=True, batch_shapes_cfg=None, ) for data in dataset: - assert 'dataset' in data + assert "dataset" in data # test with test_mode = True dataset = YOLOv5VOCDataset( - data_root='tests/data/VOCdevkit/', - ann_file='VOC2007/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2007/'), - filter_cfg=dict( - filter_empty_gt=True, min_size=32, bbox_min_size=None), + data_root="tests/data/VOCdevkit/", + ann_file="VOC2007/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2007/"), + filter_cfg=dict(filter_empty_gt=True, min_size=32, bbox_min_size=None), pipeline=[], test_mode=True, - batch_shapes_cfg=None) + batch_shapes_cfg=None, + ) for data in dataset: - assert 'dataset' not in data + assert "dataset" not in data def test_concat_dataset(self): dataset = ConcatDataset( datasets=[ dict( - type='YOLOv5VOCDataset', - data_root='tests/data/VOCdevkit/', - ann_file='VOC2007/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2007/'), + type="YOLOv5VOCDataset", + data_root="tests/data/VOCdevkit/", + ann_file="VOC2007/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2007/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[]), + pipeline=[], + ), dict( - type='YOLOv5VOCDataset', - data_root='tests/data/VOCdevkit/', - ann_file='VOC2012/ImageSets/Main/trainval.txt', - data_prefix=dict(sub_data_root='VOC2012/'), + type="YOLOv5VOCDataset", + data_root="tests/data/VOCdevkit/", + ann_file="VOC2012/ImageSets/Main/trainval.txt", + data_prefix=dict(sub_data_root="VOC2012/"), filter_cfg=dict(filter_empty_gt=False, min_size=32), - pipeline=[]) + pipeline=[], + ), ], - ignore_keys='dataset_type') + ignore_keys="dataset_type", + ) dataset.full_init() self.assertEqual(len(dataset), 2) diff --git a/mmyolo/tests/test_deploy/test_mmyolo_models.py b/mmyolo/tests/test_deploy/test_mmyolo_models.py index 65394e53..62156fab 100644 --- a/mmyolo/tests/test_deploy/test_mmyolo_models.py +++ b/mmyolo/tests/test_deploy/test_mmyolo_models.py @@ -5,30 +5,35 @@ import numpy as np import pytest import torch +from mmdeploy.codebase import import_codebase +from mmdeploy.utils import Backend +from mmdeploy.utils.config_utils import register_codebase +from mmdeploy.utils.test import ( + WrapModel, + check_backend, + get_model_outputs, + get_rewrite_outputs, +) from mmengine import Config try: import importlib - importlib.import_module('mmdeploy') + + importlib.import_module("mmdeploy") except ImportError: - pytest.skip('mmdeploy is not installed.', allow_module_level=True) + pytest.skip("mmdeploy is not installed.", allow_module_level=True) -from mmdeploy.codebase import import_codebase -from mmdeploy.utils import Backend -from mmdeploy.utils.config_utils import register_codebase -from mmdeploy.utils.test import (WrapModel, check_backend, get_model_outputs, - get_rewrite_outputs) try: - codebase = register_codebase('mmyolo') - import_codebase(codebase, ['mmyolo.deploy']) + codebase = register_codebase("mmyolo") + import_codebase(codebase, ["mmyolo.deploy"]) except ImportError: - pytest.skip('mmyolo is not installed.', allow_module_level=True) + pytest.skip("mmyolo is not installed.", allow_module_level=True) def seed_everything(seed=1029): random.seed(seed) - os.environ['PYTHONHASHSEED'] = str(seed) + os.environ["PYTHONHASHSEED"] = str(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): @@ -46,16 +51,20 @@ def get_yolov5_head_model(): multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.65), + max_per_img=300, + ) + ) from mmyolo.models.dense_heads import YOLOv5Head + head_module = dict( - type='YOLOv5HeadModule', + type="YOLOv5HeadModule", num_classes=4, in_channels=[2, 4, 8], featmap_strides=[8, 16, 32], - num_base_priors=1) + num_base_priors=1, + ) model = YOLOv5Head(head_module, test_cfg=test_cfg) @@ -63,27 +72,29 @@ def get_yolov5_head_model(): return model -@pytest.mark.parametrize('backend_type', [Backend.ONNXRUNTIME]) +@pytest.mark.parametrize("backend_type", [Backend.ONNXRUNTIME]) def test_yolov5_head_predict_by_feat(backend_type: Backend): """Test predict_by_feat rewrite of YOLOXHead.""" check_backend(backend_type) yolov5_head = get_yolov5_head_model() yolov5_head.cpu().eval() s = 256 - batch_img_metas = [{ - 'scale_factor': (1.0, 1.0), - 'pad_shape': (s, s, 3), - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3) - }] - output_names = ['dets', 'labels'] + batch_img_metas = [ + { + "scale_factor": (1.0, 1.0), + "pad_shape": (s, s, 3), + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + } + ] + output_names = ["dets", "labels"] deploy_cfg = Config( dict( backend_config=dict(type=backend_type.value), onnx_config=dict(output_names=output_names, input_shape=None), codebase_config=dict( - type='mmyolo', - task='ObjectDetection', + type="mmyolo", + task="ObjectDetection", post_processing=dict( score_threshold=0.05, iou_threshold=0.5, @@ -92,74 +103,84 @@ def test_yolov5_head_predict_by_feat(backend_type: Backend): keep_top_k=10, background_label_id=-1, ), - module=['mmyolo.deploy']))) + module=["mmyolo.deploy"], + ), + ) + ) seed_everything(1234) cls_scores = [ - torch.rand(1, yolov5_head.num_classes * yolov5_head.num_base_priors, - 4 * pow(2, i), 4 * pow(2, i)) for i in range(3, 0, -1) + torch.rand( + 1, + yolov5_head.num_classes * yolov5_head.num_base_priors, + 4 * pow(2, i), + 4 * pow(2, i), + ) + for i in range(3, 0, -1) ] seed_everything(5678) bbox_preds = [ - torch.rand(1, 4 * yolov5_head.num_base_priors, 4 * pow(2, i), - 4 * pow(2, i)) for i in range(3, 0, -1) + torch.rand(1, 4 * yolov5_head.num_base_priors, 4 * pow(2, i), 4 * pow(2, i)) + for i in range(3, 0, -1) ] seed_everything(9101) objectnesses = [ - torch.rand(1, 1 * yolov5_head.num_base_priors, 4 * pow(2, i), - 4 * pow(2, i)) for i in range(3, 0, -1) + torch.rand(1, 1 * yolov5_head.num_base_priors, 4 * pow(2, i), 4 * pow(2, i)) + for i in range(3, 0, -1) ] # to get outputs of pytorch model model_inputs = { - 'cls_scores': cls_scores, - 'bbox_preds': bbox_preds, - 'objectnesses': objectnesses, - 'batch_img_metas': batch_img_metas, - 'with_nms': True + "cls_scores": cls_scores, + "bbox_preds": bbox_preds, + "objectnesses": objectnesses, + "batch_img_metas": batch_img_metas, + "with_nms": True, } - model_outputs = get_model_outputs(yolov5_head, 'predict_by_feat', - model_inputs) + model_outputs = get_model_outputs(yolov5_head, "predict_by_feat", model_inputs) # to get outputs of onnx model after rewrite wrapped_model = WrapModel( - yolov5_head, - 'predict_by_feat', - batch_img_metas=batch_img_metas, - with_nms=True) + yolov5_head, "predict_by_feat", batch_img_metas=batch_img_metas, with_nms=True + ) rewrite_inputs = { - 'cls_scores': cls_scores, - 'bbox_preds': bbox_preds, - 'objectnesses': objectnesses, + "cls_scores": cls_scores, + "bbox_preds": bbox_preds, + "objectnesses": objectnesses, } rewrite_outputs, is_backend_output = get_rewrite_outputs( - wrapped_model=wrapped_model, - model_inputs=rewrite_inputs, - deploy_cfg=deploy_cfg) + wrapped_model=wrapped_model, model_inputs=rewrite_inputs, deploy_cfg=deploy_cfg + ) if is_backend_output: # hard code to make two tensors with the same shape # rewrite and original codes applied different nms strategy - min_shape = min(model_outputs[0].bboxes.shape[0], - rewrite_outputs[0].shape[1], 5) + min_shape = min( + model_outputs[0].bboxes.shape[0], rewrite_outputs[0].shape[1], 5 + ) for i in range(len(model_outputs)): - rewrite_outputs[0][i, :min_shape, 0::2] = \ - rewrite_outputs[0][i, :min_shape, 0::2].clamp_(0, s) - rewrite_outputs[0][i, :min_shape, 1::2] = \ - rewrite_outputs[0][i, :min_shape, 1::2].clamp_(0, s) + rewrite_outputs[0][i, :min_shape, 0::2] = rewrite_outputs[0][ + i, :min_shape, 0::2 + ].clamp_(0, s) + rewrite_outputs[0][i, :min_shape, 1::2] = rewrite_outputs[0][ + i, :min_shape, 1::2 + ].clamp_(0, s) assert np.allclose( model_outputs[i].bboxes[:min_shape], rewrite_outputs[0][i, :min_shape, :4], rtol=1e-03, - atol=1e-05) + atol=1e-05, + ) assert np.allclose( model_outputs[i].scores[:min_shape], rewrite_outputs[0][i, :min_shape, 4], rtol=1e-03, - atol=1e-05) + atol=1e-05, + ) assert np.allclose( model_outputs[i].labels[:min_shape], rewrite_outputs[1][i, :min_shape], rtol=1e-03, - atol=1e-05) + atol=1e-05, + ) else: assert rewrite_outputs is not None diff --git a/mmyolo/tests/test_deploy/test_object_detection.py b/mmyolo/tests/test_deploy/test_object_detection.py index b701e255..eefd0c90 100644 --- a/mmyolo/tests/test_deploy/test_object_detection.py +++ b/mmyolo/tests/test_deploy/test_object_detection.py @@ -2,43 +2,42 @@ import os from tempfile import NamedTemporaryFile, TemporaryDirectory +import mmdeploy.backend.onnxruntime as ort_apis import numpy as np import pytest import torch +from mmdeploy.apis import build_task_processor +from mmdeploy.codebase import import_codebase +from mmdeploy.utils import load_config +from mmdeploy.utils.config_utils import register_codebase +from mmdeploy.utils.test import SwitchBackendWrapper from mmengine import Config try: import importlib - importlib.import_module('mmdeploy') + + importlib.import_module("mmdeploy") except ImportError: - pytest.skip('mmdeploy is not installed.', allow_module_level=True) + pytest.skip("mmdeploy is not installed.", allow_module_level=True) -import mmdeploy.backend.onnxruntime as ort_apis -from mmdeploy.apis import build_task_processor -from mmdeploy.codebase import import_codebase -from mmdeploy.utils import load_config -from mmdeploy.utils.config_utils import register_codebase -from mmdeploy.utils.test import SwitchBackendWrapper try: - codebase = register_codebase('mmyolo') - import_codebase(codebase, ['mmyolo.deploy']) + codebase = register_codebase("mmyolo") + import_codebase(codebase, ["mmyolo.deploy"]) except ImportError: - pytest.skip('mmyolo is not installed.', allow_module_level=True) + pytest.skip("mmyolo is not installed.", allow_module_level=True) -model_cfg_path = 'tests/test_deploy/data/model.py' +model_cfg_path = "tests/test_deploy/data/model.py" model_cfg = load_config(model_cfg_path)[0] -model_cfg.test_dataloader.dataset.data_root = \ - 'tests/data' -model_cfg.test_dataloader.dataset.ann_file = 'coco_sample.json' -model_cfg.test_evaluator.ann_file = \ - 'tests/coco_sample.json' +model_cfg.test_dataloader.dataset.data_root = "tests/data" +model_cfg.test_dataloader.dataset.ann_file = "coco_sample.json" +model_cfg.test_evaluator.ann_file = "tests/coco_sample.json" deploy_cfg = Config( dict( - backend_config=dict(type='onnxruntime'), + backend_config=dict(type="onnxruntime"), codebase_config=dict( - type='mmyolo', - task='ObjectDetection', + type="mmyolo", + task="ObjectDetection", post_processing=dict( score_threshold=0.05, confidence_threshold=0.005, # for YOLOv3 @@ -48,16 +47,20 @@ keep_top_k=100, background_label_id=-1, ), - module=['mmyolo.deploy']), + module=["mmyolo.deploy"], + ), onnx_config=dict( - type='onnx', + type="onnx", export_params=True, keep_initializers_as_inputs=False, opset_version=11, input_shape=None, - input_names=['input'], - output_names=['dets', 'labels']))) -onnx_file = NamedTemporaryFile(suffix='.onnx').name + input_names=["input"], + output_names=["dets", "labels"], + ), + ) +) +onnx_file = NamedTemporaryFile(suffix=".onnx").name task_processor = None img_shape = (32, 32) img = np.random.rand(*img_shape, 3) @@ -66,31 +69,32 @@ @pytest.fixture(autouse=True) def init_task_processor(): global task_processor - task_processor = build_task_processor(model_cfg, deploy_cfg, 'cpu') + task_processor = build_task_processor(model_cfg, deploy_cfg, "cpu") @pytest.fixture def backend_model(): from mmdeploy.backend.onnxruntime import ORTWrapper - ort_apis.__dict__.update({'ORTWrapper': ORTWrapper}) + + ort_apis.__dict__.update({"ORTWrapper": ORTWrapper}) wrapper = SwitchBackendWrapper(ORTWrapper) wrapper.set( outputs={ - 'dets': torch.rand(1, 10, 5).sort(2).values, - 'labels': torch.randint(0, 10, (1, 10)) - }) + "dets": torch.rand(1, 10, 5).sort(2).values, + "labels": torch.randint(0, 10, (1, 10)), + } + ) - yield task_processor.build_backend_model(['']) + yield task_processor.build_backend_model([""]) wrapper.recover() def test_visualize(backend_model): - img_path = 'tests/data/color.jpg' - input_dict, _ = task_processor.create_input( - img_path, input_shape=img_shape) + img_path = "tests/data/color.jpg" + input_dict, _ = task_processor.create_input(img_path, input_shape=img_shape) results = backend_model.test_step(input_dict)[0] with TemporaryDirectory() as dir: - filename = dir + 'tmp.jpg' - task_processor.visualize(img, results, filename, 'window') + filename = dir + "tmp.jpg" + task_processor.visualize(img, results, filename, "window") assert os.path.exists(filename) diff --git a/mmyolo/tests/test_engine/test_hooks/test_switch_to_deploy_hook.py b/mmyolo/tests/test_engine/test_hooks/test_switch_to_deploy_hook.py index 52d6e9f0..a741be58 100644 --- a/mmyolo/tests/test_engine/test_hooks/test_switch_to_deploy_hook.py +++ b/mmyolo/tests/test_engine/test_hooks/test_switch_to_deploy_hook.py @@ -10,7 +10,6 @@ class TestSwitchToDeployHook(TestCase): - def test(self): runner = Mock() diff --git a/mmyolo/tests/test_engine/test_hooks/test_yolov5_param_scheduler_hook.py b/mmyolo/tests/test_engine/test_hooks/test_yolov5_param_scheduler_hook.py index 1a527333..0993b243 100644 --- a/mmyolo/tests/test_engine/test_hooks/test_yolov5_param_scheduler_hook.py +++ b/mmyolo/tests/test_engine/test_hooks/test_yolov5_param_scheduler_hook.py @@ -14,18 +14,17 @@ class ToyModel(nn.Module): - def __init__(self): super().__init__() self.linear = nn.Linear(2, 1) - def forward(self, inputs, data_samples, mode='tensor'): + def forward(self, inputs, data_samples, mode="tensor"): labels = torch.stack(data_samples) inputs = torch.stack(inputs) outputs = self.linear(inputs) - if mode == 'tensor': + if mode == "tensor": return outputs - elif mode == 'loss': + elif mode == "loss": loss = (labels - outputs).sum() outputs = dict(loss=loss) return outputs @@ -50,28 +49,30 @@ def __getitem__(self, index): optim_wrapper = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', + type="SGD", lr=0.01, momentum=0.937, weight_decay=0.0005, nesterov=True, - batch_size_per_gpu=1), - constructor='YOLOv5OptimizerConstructor') + batch_size_per_gpu=1, + ), + constructor="YOLOv5OptimizerConstructor", +) register_all_modules() class TestYOLOv5ParamSchelerHook(TestCase): - def test(self): model = ToyModel() train_dataloader = dict( dataset=DummyDataset(), - sampler=dict(type='DefaultSampler', shuffle=True), + sampler=dict(type="DefaultSampler", shuffle=True), batch_size=3, - num_workers=0) + num_workers=0, + ) runner = Mock() runner.model = model @@ -80,7 +81,8 @@ def test(self): runner.train_dataloader = Runner.build_dataloader(train_dataloader) hook = YOLOv5ParamSchedulerHook( - scheduler_type='linear', lr_factor=0.01, max_epochs=300) + scheduler_type="linear", lr_factor=0.01, max_epochs=300 + ) # test before train runner.epoch = 0 @@ -88,8 +90,8 @@ def test(self): hook.before_train(runner) for group in runner.optim_wrapper.param_groups: - self.assertEqual(group['lr'], 0.01) - self.assertEqual(group['momentum'], 0.937) + self.assertEqual(group["lr"], 0.01) + self.assertEqual(group["momentum"], 0.937) self.assertFalse(hook._warmup_end) @@ -100,8 +102,8 @@ def test(self): for group_idx, group in enumerate(runner.optim_wrapper.param_groups): if group_idx == 2: - self.assertEqual(round(group['lr'], 5), 0.0991) - self.assertEqual(group['momentum'], 0.80137) + self.assertEqual(round(group["lr"], 5), 0.0991) + self.assertEqual(group["momentum"], 0.80137) self.assertFalse(hook._warmup_end) # test after warm up @@ -110,8 +112,8 @@ def test(self): self.assertFalse(hook._warmup_end) for group in runner.optim_wrapper.param_groups: - self.assertEqual(group['lr'], 0.01) - self.assertEqual(group['momentum'], 0.937) + self.assertEqual(group["lr"], 0.01) + self.assertEqual(group["momentum"], 0.937) runner.iter = 1001 hook.before_train_iter(runner, 0) @@ -120,5 +122,5 @@ def test(self): # test after train_epoch hook.after_train_epoch(runner) for group in runner.optim_wrapper.param_groups: - self.assertEqual(group['lr'], 0.01) - self.assertEqual(group['momentum'], 0.937) + self.assertEqual(group["lr"], 0.01) + self.assertEqual(group["momentum"], 0.937) diff --git a/mmyolo/tests/test_engine/test_hooks/test_yolox_mode_switch_hook.py b/mmyolo/tests/test_engine/test_hooks/test_yolox_mode_switch_hook.py index fbe13413..a17caf8b 100644 --- a/mmyolo/tests/test_engine/test_hooks/test_yolox_mode_switch_hook.py +++ b/mmyolo/tests/test_engine/test_hooks/test_yolox_mode_switch_hook.py @@ -28,23 +28,23 @@ def __getitem__(self, index): pipeline1 = [ - dict(type='mmdet.Resize'), + dict(type="mmdet.Resize"), ] pipeline2 = [ - dict(type='mmdet.RandomFlip'), + dict(type="mmdet.RandomFlip"), ] register_all_modules() class TestYOLOXModeSwitchHook(TestCase): - def test(self): train_dataloader = dict( dataset=DummyDataset(), - sampler=dict(type='DefaultSampler', shuffle=True), + sampler=dict(type="DefaultSampler", shuffle=True), batch_size=3, - num_workers=0) + num_workers=0, + ) runner = Mock() runner.model = Mock() @@ -55,13 +55,11 @@ def test(self): runner.train_dataloader = Runner.build_dataloader(train_dataloader) runner.train_dataloader.dataset.pipeline = pipeline1 - hook = YOLOXModeSwitchHook( - num_last_epochs=15, new_train_pipeline=pipeline2) + hook = YOLOXModeSwitchHook(num_last_epochs=15, new_train_pipeline=pipeline2) # test after change mode runner.epoch = 284 runner.max_epochs = 300 hook.before_train_epoch(runner) self.assertTrue(runner.model.bbox_head.use_bbox_aux) - self.assertEqual(runner.train_loop.dataloader.dataset.pipeline, - pipeline2) + self.assertEqual(runner.train_loop.dataloader.dataset.pipeline, pipeline2) diff --git a/mmyolo/tests/test_engine/test_optimizers/test_yolov5_optim_constructor.py b/mmyolo/tests/test_engine/test_optimizers/test_yolov5_optim_constructor.py index 4830e5cd..bda20c9a 100644 --- a/mmyolo/tests/test_engine/test_optimizers/test_yolov5_optim_constructor.py +++ b/mmyolo/tests/test_engine/test_optimizers/test_yolov5_optim_constructor.py @@ -14,7 +14,6 @@ class ExampleModel(nn.Module): - def __init__(self): super().__init__() self.param1 = nn.Parameter(torch.ones(1)) @@ -24,58 +23,64 @@ def __init__(self): class TestYOLOv5OptimizerConstructor(TestCase): - def setUp(self): self.model = ExampleModel() self.base_lr = 0.01 self.weight_decay = 0.0001 self.optim_wrapper_cfg = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', + type="SGD", lr=self.base_lr, momentum=0.9, weight_decay=self.weight_decay, - batch_size_per_gpu=16)) + batch_size_per_gpu=16, + ), + ) def test_init(self): YOLOv5OptimizerConstructor(copy.deepcopy(self.optim_wrapper_cfg)) YOLOv5OptimizerConstructor( copy.deepcopy(self.optim_wrapper_cfg), - paramwise_cfg={'base_total_batch_size': 64}) + paramwise_cfg={"base_total_batch_size": 64}, + ) # `paramwise_cfg` must include `base_total_batch_size` if not None. with self.assertRaises(AssertionError): YOLOv5OptimizerConstructor( - copy.deepcopy(self.optim_wrapper_cfg), paramwise_cfg={'a': 64}) + copy.deepcopy(self.optim_wrapper_cfg), paramwise_cfg={"a": 64} + ) def test_build(self): optim_wrapper = YOLOv5OptimizerConstructor( - copy.deepcopy(self.optim_wrapper_cfg))( - self.model) + copy.deepcopy(self.optim_wrapper_cfg) + )(self.model) # test param_groups assert len(optim_wrapper.optimizer.param_groups) == 3 for i in range(3): param_groups_i = optim_wrapper.optimizer.param_groups[i] - assert param_groups_i['lr'] == self.base_lr + assert param_groups_i["lr"] == self.base_lr if i == 0: - assert param_groups_i['weight_decay'] == self.weight_decay + assert param_groups_i["weight_decay"] == self.weight_decay else: - assert param_groups_i['weight_decay'] == 0 + assert param_groups_i["weight_decay"] == 0 # test weight_decay linear scaling optim_wrapper_cfg = copy.deepcopy(self.optim_wrapper_cfg) - optim_wrapper_cfg['optimizer']['batch_size_per_gpu'] = 128 - optim_wrapper = YOLOv5OptimizerConstructor(optim_wrapper_cfg)( - self.model) - assert optim_wrapper.optimizer.param_groups[0][ - 'weight_decay'] == self.weight_decay * 2 + optim_wrapper_cfg["optimizer"]["batch_size_per_gpu"] = 128 + optim_wrapper = YOLOv5OptimizerConstructor(optim_wrapper_cfg)(self.model) + assert ( + optim_wrapper.optimizer.param_groups[0]["weight_decay"] + == self.weight_decay * 2 + ) # test without batch_size_per_gpu optim_wrapper_cfg = copy.deepcopy(self.optim_wrapper_cfg) - optim_wrapper_cfg['optimizer'].pop('batch_size_per_gpu') + optim_wrapper_cfg["optimizer"].pop("batch_size_per_gpu") optim_wrapper = dict( - optim_wrapper_cfg, constructor='YOLOv5OptimizerConstructor') + optim_wrapper_cfg, constructor="YOLOv5OptimizerConstructor" + ) optim_wrapper = build_optim_wrapper(self.model, optim_wrapper) - assert optim_wrapper.optimizer.param_groups[0][ - 'weight_decay'] == self.weight_decay + assert ( + optim_wrapper.optimizer.param_groups[0]["weight_decay"] == self.weight_decay + ) diff --git a/mmyolo/tests/test_engine/test_optimizers/test_yolov7_optim_wrapper_constructor.py b/mmyolo/tests/test_engine/test_optimizers/test_yolov7_optim_wrapper_constructor.py index a2f445be..d67bcd27 100644 --- a/mmyolo/tests/test_engine/test_optimizers/test_yolov7_optim_wrapper_constructor.py +++ b/mmyolo/tests/test_engine/test_optimizers/test_yolov7_optim_wrapper_constructor.py @@ -14,7 +14,6 @@ class ExampleModel(nn.Module): - def __init__(self): super().__init__() self.param1 = nn.Parameter(torch.ones(1)) @@ -24,58 +23,64 @@ def __init__(self): class TestYOLOv7OptimWrapperConstructor(TestCase): - def setUp(self): self.model = ExampleModel() self.base_lr = 0.01 self.weight_decay = 0.0001 self.optim_wrapper_cfg = dict( - type='OptimWrapper', + type="OptimWrapper", optimizer=dict( - type='SGD', + type="SGD", lr=self.base_lr, momentum=0.9, weight_decay=self.weight_decay, - batch_size_per_gpu=16)) + batch_size_per_gpu=16, + ), + ) def test_init(self): YOLOv7OptimWrapperConstructor(copy.deepcopy(self.optim_wrapper_cfg)) YOLOv7OptimWrapperConstructor( copy.deepcopy(self.optim_wrapper_cfg), - paramwise_cfg={'base_total_batch_size': 64}) + paramwise_cfg={"base_total_batch_size": 64}, + ) # `paramwise_cfg` must include `base_total_batch_size` if not None. with self.assertRaises(AssertionError): YOLOv7OptimWrapperConstructor( - copy.deepcopy(self.optim_wrapper_cfg), paramwise_cfg={'a': 64}) + copy.deepcopy(self.optim_wrapper_cfg), paramwise_cfg={"a": 64} + ) def test_build(self): optim_wrapper = YOLOv7OptimWrapperConstructor( - copy.deepcopy(self.optim_wrapper_cfg))( - self.model) + copy.deepcopy(self.optim_wrapper_cfg) + )(self.model) # test param_groups assert len(optim_wrapper.optimizer.param_groups) == 3 for i in range(3): param_groups_i = optim_wrapper.optimizer.param_groups[i] - assert param_groups_i['lr'] == self.base_lr + assert param_groups_i["lr"] == self.base_lr if i == 0: - assert param_groups_i['weight_decay'] == self.weight_decay + assert param_groups_i["weight_decay"] == self.weight_decay else: - assert param_groups_i['weight_decay'] == 0 + assert param_groups_i["weight_decay"] == 0 # test weight_decay linear scaling optim_wrapper_cfg = copy.deepcopy(self.optim_wrapper_cfg) - optim_wrapper_cfg['optimizer']['batch_size_per_gpu'] = 128 - optim_wrapper = YOLOv7OptimWrapperConstructor(optim_wrapper_cfg)( - self.model) - assert optim_wrapper.optimizer.param_groups[0][ - 'weight_decay'] == self.weight_decay * 2 + optim_wrapper_cfg["optimizer"]["batch_size_per_gpu"] = 128 + optim_wrapper = YOLOv7OptimWrapperConstructor(optim_wrapper_cfg)(self.model) + assert ( + optim_wrapper.optimizer.param_groups[0]["weight_decay"] + == self.weight_decay * 2 + ) # test without batch_size_per_gpu optim_wrapper_cfg = copy.deepcopy(self.optim_wrapper_cfg) - optim_wrapper_cfg['optimizer'].pop('batch_size_per_gpu') + optim_wrapper_cfg["optimizer"].pop("batch_size_per_gpu") optim_wrapper = dict( - optim_wrapper_cfg, constructor='YOLOv7OptimWrapperConstructor') + optim_wrapper_cfg, constructor="YOLOv7OptimWrapperConstructor" + ) optim_wrapper = build_optim_wrapper(self.model, optim_wrapper) - assert optim_wrapper.optimizer.param_groups[0][ - 'weight_decay'] == self.weight_decay + assert ( + optim_wrapper.optimizer.param_groups[0]["weight_decay"] == self.weight_decay + ) diff --git a/mmyolo/tests/test_models/test_backbone/test_csp_darknet.py b/mmyolo/tests/test_models/test_backbone/test_csp_darknet.py index 82dceb55..2a226dd8 100644 --- a/mmyolo/tests/test_models/test_backbone/test_csp_darknet.py +++ b/mmyolo/tests/test_models/test_backbone/test_csp_darknet.py @@ -6,29 +6,30 @@ from parameterized import parameterized from torch.nn.modules.batchnorm import _BatchNorm -from mmyolo.models.backbones import (YOLOv5CSPDarknet, YOLOv8CSPDarknet, - YOLOXCSPDarknet) +from mmyolo.models.backbones import YOLOv5CSPDarknet, YOLOv8CSPDarknet, YOLOXCSPDarknet from mmyolo.utils import register_all_modules + from .utils import check_norm_state, is_norm register_all_modules() class TestCSPDarknet(TestCase): - - @parameterized.expand([(YOLOv5CSPDarknet, ), (YOLOXCSPDarknet, ), - (YOLOv8CSPDarknet, )]) + @parameterized.expand( + [(YOLOv5CSPDarknet,), (YOLOXCSPDarknet,), (YOLOv8CSPDarknet,)] + ) def test_init(self, module_class): # out_indices in range(len(arch_setting) + 1) with pytest.raises(AssertionError): - module_class(out_indices=(6, )) + module_class(out_indices=(6,)) with pytest.raises(ValueError): # frozen_stages must in range(-1, len(arch_setting) + 1) module_class(frozen_stages=6) - @parameterized.expand([(YOLOv5CSPDarknet, ), (YOLOXCSPDarknet, ), - (YOLOv8CSPDarknet, )]) + @parameterized.expand( + [(YOLOv5CSPDarknet,), (YOLOXCSPDarknet,), (YOLOv8CSPDarknet,)] + ) def test_forward(self, module_class): # Test CSPDarknet with first stage frozen frozen_stages = 1 @@ -40,7 +41,7 @@ def test_forward(self, module_class): for param in mod.parameters(): assert param.requires_grad is False for i in range(1, frozen_stages + 1): - layer = getattr(model, f'stage{i}') + layer = getattr(model, f"stage{i}") for mod in layer.modules(): if isinstance(mod, _BatchNorm): assert mod.training is False @@ -54,8 +55,7 @@ def test_forward(self, module_class): assert check_norm_state(model.modules(), False) # Test CSPDarknet-P5 forward with widen_factor=0.25 - model = module_class( - arch='P5', widen_factor=0.25, out_indices=range(0, 5)) + model = module_class(arch="P5", widen_factor=0.25, out_indices=range(0, 5)) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -69,9 +69,8 @@ def test_forward(self, module_class): # Test CSPDarknet forward with dict(type='ReLU') model = module_class( - widen_factor=0.125, - act_cfg=dict(type='ReLU'), - out_indices=range(0, 5)) + widen_factor=0.125, act_cfg=dict(type="ReLU"), out_indices=range(0, 5) + ) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -100,11 +99,14 @@ def test_forward(self, module_class): assert feat[4].shape == torch.Size((1, 128, 2, 2)) # Test CSPDarknet with Dropout Block - model = module_class(plugins=[ - dict( - cfg=dict(type='mmdet.DropBlock', drop_prob=0.1, block_size=3), - stages=(False, False, True, True)), - ]) + model = module_class( + plugins=[ + dict( + cfg=dict(type="mmdet.DropBlock", drop_prob=0.1, block_size=3), + stages=(False, False, True, True), + ), + ] + ) assert len(model.stage1) == 2 assert len(model.stage2) == 2 diff --git a/mmyolo/tests/test_models/test_backbone/test_csp_resnet.py b/mmyolo/tests/test_models/test_backbone/test_csp_resnet.py index dd0f3c47..77c2259a 100644 --- a/mmyolo/tests/test_models/test_backbone/test_csp_resnet.py +++ b/mmyolo/tests/test_models/test_backbone/test_csp_resnet.py @@ -7,17 +7,17 @@ from mmyolo.models import PPYOLOECSPResNet from mmyolo.utils import register_all_modules + from .utils import check_norm_state, is_norm register_all_modules() class TestPPYOLOECSPResNet(TestCase): - def test_init(self): # out_indices in range(len(arch_setting) + 1) with pytest.raises(AssertionError): - PPYOLOECSPResNet(out_indices=(6, )) + PPYOLOECSPResNet(out_indices=(6,)) with pytest.raises(ValueError): # frozen_stages must in range(-1, len(arch_setting) + 1) @@ -34,7 +34,7 @@ def test_forward(self): for param in mod.parameters(): assert param.requires_grad is False for i in range(1, frozen_stages + 1): - layer = getattr(model, f'stage{i}') + layer = getattr(model, f"stage{i}") for mod in layer.modules(): if isinstance(mod, _BatchNorm): assert mod.training is False @@ -48,8 +48,7 @@ def test_forward(self): assert check_norm_state(model.modules(), False) # Test PPYOLOECSPResNet-P5 forward with widen_factor=0.25 - model = PPYOLOECSPResNet( - arch='P5', widen_factor=0.25, out_indices=range(0, 5)) + model = PPYOLOECSPResNet(arch="P5", widen_factor=0.25, out_indices=range(0, 5)) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -63,9 +62,8 @@ def test_forward(self): # Test PPYOLOECSPResNet forward with dict(type='ReLU') model = PPYOLOECSPResNet( - widen_factor=0.125, - act_cfg=dict(type='ReLU'), - out_indices=range(0, 5)) + widen_factor=0.125, act_cfg=dict(type="ReLU"), out_indices=range(0, 5) + ) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -94,11 +92,14 @@ def test_forward(self): assert feat[4].shape == torch.Size((1, 128, 2, 2)) # Test PPYOLOECSPResNet with BatchNorm forward - model = PPYOLOECSPResNet(plugins=[ - dict( - cfg=dict(type='mmdet.DropBlock', drop_prob=0.1, block_size=3), - stages=(False, False, True, True)), - ]) + model = PPYOLOECSPResNet( + plugins=[ + dict( + cfg=dict(type="mmdet.DropBlock", drop_prob=0.1, block_size=3), + stages=(False, False, True, True), + ), + ] + ) assert len(model.stage1) == 1 assert len(model.stage2) == 1 diff --git a/mmyolo/tests/test_models/test_backbone/test_efficient_rep.py b/mmyolo/tests/test_models/test_backbone/test_efficient_rep.py index 53af2029..034506dc 100644 --- a/mmyolo/tests/test_models/test_backbone/test_efficient_rep.py +++ b/mmyolo/tests/test_models/test_backbone/test_efficient_rep.py @@ -7,17 +7,17 @@ from mmyolo.models.backbones import YOLOv6CSPBep, YOLOv6EfficientRep from mmyolo.utils import register_all_modules + from .utils import check_norm_state, is_norm register_all_modules() class TestYOLOv6EfficientRep(TestCase): - def test_init(self): # out_indices in range(len(arch_setting) + 1) with pytest.raises(AssertionError): - YOLOv6EfficientRep(out_indices=(6, )) + YOLOv6EfficientRep(out_indices=(6,)) with pytest.raises(ValueError): # frozen_stages must in range(-1, len(arch_setting) + 1) @@ -34,7 +34,7 @@ def test_YOLOv6EfficientRep_forward(self): for param in mod.parameters(): assert param.requires_grad is False for i in range(1, frozen_stages + 1): - layer = getattr(model, f'stage{i}') + layer = getattr(model, f"stage{i}") for mod in layer.modules(): if isinstance(mod, _BatchNorm): assert mod.training is False @@ -49,7 +49,8 @@ def test_YOLOv6EfficientRep_forward(self): # Test YOLOv6EfficientRep-P5 forward with widen_factor=0.25 model = YOLOv6EfficientRep( - arch='P5', widen_factor=0.25, out_indices=range(0, 5)) + arch="P5", widen_factor=0.25, out_indices=range(0, 5) + ) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -63,9 +64,8 @@ def test_YOLOv6EfficientRep_forward(self): # Test YOLOv6EfficientRep forward with dict(type='ReLU') model = YOLOv6EfficientRep( - widen_factor=0.125, - act_cfg=dict(type='ReLU'), - out_indices=range(0, 5)) + widen_factor=0.125, act_cfg=dict(type="ReLU"), out_indices=range(0, 5) + ) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -94,11 +94,14 @@ def test_YOLOv6EfficientRep_forward(self): assert feat[4].shape == torch.Size((1, 128, 2, 2)) # Test YOLOv6EfficientRep with BatchNorm forward - model = YOLOv6EfficientRep(plugins=[ - dict( - cfg=dict(type='mmdet.DropBlock', drop_prob=0.1, block_size=3), - stages=(False, False, True, True)), - ]) + model = YOLOv6EfficientRep( + plugins=[ + dict( + cfg=dict(type="mmdet.DropBlock", drop_prob=0.1, block_size=3), + stages=(False, False, True, True), + ), + ] + ) assert len(model.stage1) == 1 assert len(model.stage2) == 1 @@ -123,7 +126,7 @@ def test_YOLOv6CSPBep_forward(self): for param in mod.parameters(): assert param.requires_grad is False for i in range(1, frozen_stages + 1): - layer = getattr(model, f'stage{i}') + layer = getattr(model, f"stage{i}") for mod in layer.modules(): if isinstance(mod, _BatchNorm): assert mod.training is False @@ -137,8 +140,7 @@ def test_YOLOv6CSPBep_forward(self): assert check_norm_state(model.modules(), False) # Test YOLOv6CSPBep forward with widen_factor=0.25 - model = YOLOv6CSPBep( - arch='P5', widen_factor=0.25, out_indices=range(0, 5)) + model = YOLOv6CSPBep(arch="P5", widen_factor=0.25, out_indices=range(0, 5)) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -152,9 +154,8 @@ def test_YOLOv6CSPBep_forward(self): # Test YOLOv6CSPBep forward with dict(type='ReLU') model = YOLOv6CSPBep( - widen_factor=0.125, - act_cfg=dict(type='ReLU'), - out_indices=range(0, 5)) + widen_factor=0.125, act_cfg=dict(type="ReLU"), out_indices=range(0, 5) + ) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -183,11 +184,14 @@ def test_YOLOv6CSPBep_forward(self): assert feat[4].shape == torch.Size((1, 128, 2, 2)) # Test YOLOv6CSPBep with BatchNorm forward - model = YOLOv6CSPBep(plugins=[ - dict( - cfg=dict(type='mmdet.DropBlock', drop_prob=0.1, block_size=3), - stages=(False, False, True, True)), - ]) + model = YOLOv6CSPBep( + plugins=[ + dict( + cfg=dict(type="mmdet.DropBlock", drop_prob=0.1, block_size=3), + stages=(False, False, True, True), + ), + ] + ) assert len(model.stage1) == 1 assert len(model.stage2) == 1 diff --git a/mmyolo/tests/test_models/test_backbone/test_yolov7_backbone.py b/mmyolo/tests/test_models/test_backbone/test_yolov7_backbone.py index 76b40aa4..be09b91c 100644 --- a/mmyolo/tests/test_models/test_backbone/test_yolov7_backbone.py +++ b/mmyolo/tests/test_models/test_backbone/test_yolov7_backbone.py @@ -7,17 +7,17 @@ from mmyolo.models.backbones import YOLOv7Backbone from mmyolo.utils import register_all_modules + from .utils import check_norm_state register_all_modules() class TestYOLOv7Backbone(TestCase): - def test_init(self): # out_indices in range(len(arch_setting) + 1) with pytest.raises(AssertionError): - YOLOv7Backbone(out_indices=(6, )) + YOLOv7Backbone(out_indices=(6,)) with pytest.raises(ValueError): # frozen_stages must in range(-1, len(arch_setting) + 1) @@ -34,7 +34,7 @@ def test_forward(self): for param in mod.parameters(): assert param.requires_grad is False for i in range(1, frozen_stages + 1): - layer = getattr(model, f'stage{i}') + layer = getattr(model, f"stage{i}") for mod in layer.modules(): if isinstance(mod, _BatchNorm): assert mod.training is False @@ -48,8 +48,7 @@ def test_forward(self): assert check_norm_state(model.modules(), False) # Test YOLOv7Backbone-L forward with widen_factor=0.25 - model = YOLOv7Backbone( - widen_factor=0.25, out_indices=tuple(range(0, 5))) + model = YOLOv7Backbone(widen_factor=0.25, out_indices=tuple(range(0, 5))) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -66,10 +65,11 @@ def test_forward(self): widen_factor=0.25, plugins=[ dict( - cfg=dict( - type='mmdet.DropBlock', drop_prob=0.1, block_size=3), - stages=(False, False, True, True)), - ]) + cfg=dict(type="mmdet.DropBlock", drop_prob=0.1, block_size=3), + stages=(False, False, True, True), + ), + ], + ) assert len(model.stage1) == 2 assert len(model.stage2) == 2 @@ -84,7 +84,7 @@ def test_forward(self): assert feat[2].shape == torch.Size((1, 256, 4, 4)) # Test YOLOv7Backbone-X forward with widen_factor=0.25 - model = YOLOv7Backbone(arch='X', widen_factor=0.25) + model = YOLOv7Backbone(arch="X", widen_factor=0.25) model.train() imgs = torch.randn(1, 3, 64, 64) @@ -95,7 +95,7 @@ def test_forward(self): assert feat[2].shape == torch.Size((1, 320, 2, 2)) # Test YOLOv7Backbone-tiny forward with widen_factor=0.25 - model = YOLOv7Backbone(arch='Tiny', widen_factor=0.25) + model = YOLOv7Backbone(arch="Tiny", widen_factor=0.25) model.train() feat = model(imgs) @@ -105,8 +105,7 @@ def test_forward(self): assert feat[2].shape == torch.Size((1, 128, 2, 2)) # Test YOLOv7Backbone-w forward with widen_factor=0.25 - model = YOLOv7Backbone( - arch='W', widen_factor=0.25, out_indices=(2, 3, 4, 5)) + model = YOLOv7Backbone(arch="W", widen_factor=0.25, out_indices=(2, 3, 4, 5)) model.train() imgs = torch.randn(1, 3, 128, 128) @@ -118,8 +117,7 @@ def test_forward(self): assert feat[3].shape == torch.Size((1, 256, 2, 2)) # Test YOLOv7Backbone-w forward with widen_factor=0.25 - model = YOLOv7Backbone( - arch='D', widen_factor=0.25, out_indices=(2, 3, 4, 5)) + model = YOLOv7Backbone(arch="D", widen_factor=0.25, out_indices=(2, 3, 4, 5)) model.train() feat = model(imgs) @@ -130,8 +128,7 @@ def test_forward(self): assert feat[3].shape == torch.Size((1, 384, 2, 2)) # Test YOLOv7Backbone-w forward with widen_factor=0.25 - model = YOLOv7Backbone( - arch='E', widen_factor=0.25, out_indices=(2, 3, 4, 5)) + model = YOLOv7Backbone(arch="E", widen_factor=0.25, out_indices=(2, 3, 4, 5)) model.train() feat = model(imgs) @@ -142,8 +139,7 @@ def test_forward(self): assert feat[3].shape == torch.Size((1, 320, 2, 2)) # Test YOLOv7Backbone-w forward with widen_factor=0.25 - model = YOLOv7Backbone( - arch='E2E', widen_factor=0.25, out_indices=(2, 3, 4, 5)) + model = YOLOv7Backbone(arch="E2E", widen_factor=0.25, out_indices=(2, 3, 4, 5)) model.train() feat = model(imgs) diff --git a/mmyolo/tests/test_models/test_backbone/utils.py b/mmyolo/tests/test_models/test_backbone/utils.py index d65db568..c78a433d 100644 --- a/mmyolo/tests/test_models/test_backbone/utils.py +++ b/mmyolo/tests/test_models/test_backbone/utils.py @@ -9,8 +9,10 @@ def is_block(modules): """Check if is ResNet building block.""" - if isinstance(modules, (BasicBlock, Bottleneck, BottleneckX, Bottle2neck, - SimplifiedBasicBlock)): + if isinstance( + modules, + (BasicBlock, Bottleneck, BottleneckX, Bottle2neck, SimplifiedBasicBlock), + ): return True return False diff --git a/mmyolo/tests/test_models/test_data_preprocessor/test_data_preprocessor.py b/mmyolo/tests/test_models/test_data_preprocessor/test_data_preprocessor.py index 203660ae..e614fbe8 100644 --- a/mmyolo/tests/test_models/test_data_preprocessor/test_data_preprocessor.py +++ b/mmyolo/tests/test_models/test_data_preprocessor/test_data_preprocessor.py @@ -13,63 +13,59 @@ class TestYOLOv5DetDataPreprocessor(TestCase): - def test_forward(self): processor = YOLOv5DetDataPreprocessor(mean=[0, 0, 0], std=[1, 1, 1]) data = { - 'inputs': [torch.randint(0, 256, (3, 11, 10))], - 'data_samples': [DetDataSample()] + "inputs": [torch.randint(0, 256, (3, 11, 10))], + "data_samples": [DetDataSample()], } out_data = processor(data, training=False) - batch_inputs, batch_data_samples = out_data['inputs'], out_data[ - 'data_samples'] + batch_inputs, batch_data_samples = out_data["inputs"], out_data["data_samples"] self.assertEqual(batch_inputs.shape, (1, 3, 11, 10)) self.assertEqual(len(batch_data_samples), 1) # test channel_conversion processor = YOLOv5DetDataPreprocessor( - mean=[0., 0., 0.], std=[1., 1., 1.], bgr_to_rgb=True) + mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], bgr_to_rgb=True + ) out_data = processor(data, training=False) - batch_inputs, batch_data_samples = out_data['inputs'], out_data[ - 'data_samples'] + batch_inputs, batch_data_samples = out_data["inputs"], out_data["data_samples"] self.assertEqual(batch_inputs.shape, (1, 3, 11, 10)) self.assertEqual(len(batch_data_samples), 1) # test padding, training=False data = { - 'inputs': [ + "inputs": [ torch.randint(0, 256, (3, 10, 11)), - torch.randint(0, 256, (3, 9, 14)) + torch.randint(0, 256, (3, 9, 14)), ] } processor = YOLOv5DetDataPreprocessor( - mean=[0., 0., 0.], std=[1., 1., 1.], bgr_to_rgb=True) + mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], bgr_to_rgb=True + ) out_data = processor(data, training=False) - batch_inputs, batch_data_samples = out_data['inputs'], out_data[ - 'data_samples'] + batch_inputs, batch_data_samples = out_data["inputs"], out_data["data_samples"] self.assertEqual(batch_inputs.shape, (2, 3, 10, 14)) self.assertIsNone(batch_data_samples) # test training data = { - 'inputs': torch.randint(0, 256, (2, 3, 10, 11)), - 'data_samples': torch.randint(0, 11, (18, 6)), + "inputs": torch.randint(0, 256, (2, 3, 10, 11)), + "data_samples": torch.randint(0, 11, (18, 6)), } out_data = processor(data, training=True) - batch_inputs, batch_data_samples = out_data['inputs'], out_data[ - 'data_samples'] - self.assertIn('img_metas', batch_data_samples) - self.assertIn('bboxes_labels', batch_data_samples) + batch_inputs, batch_data_samples = out_data["inputs"], out_data["data_samples"] + self.assertIn("img_metas", batch_data_samples) + self.assertIn("bboxes_labels", batch_data_samples) self.assertEqual(batch_inputs.shape, (2, 3, 10, 11)) - self.assertIsInstance(batch_data_samples['bboxes_labels'], - torch.Tensor) - self.assertIsInstance(batch_data_samples['img_metas'], list) + self.assertIsInstance(batch_data_samples["bboxes_labels"], torch.Tensor) + self.assertIsInstance(batch_data_samples["img_metas"], list) data = { - 'inputs': [torch.randint(0, 256, (3, 11, 10))], - 'data_samples': [DetDataSample()] + "inputs": [torch.randint(0, 256, (3, 11, 10))], + "data_samples": [DetDataSample()], } # data_samples must be tensor with self.assertRaises(AssertionError): @@ -77,47 +73,47 @@ def test_forward(self): class TestPPYOLOEDetDataPreprocessor(TestCase): - def test_batch_random_resize(self): processor = PPYOLOEDetDataPreprocessor( pad_size_divisor=32, batch_augments=[ dict( - type='PPYOLOEBatchRandomResize', + type="PPYOLOEBatchRandomResize", random_size_range=(320, 480), interval=1, size_divisor=32, random_interp=True, - keep_ratio=False) + keep_ratio=False, + ) ], - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True) + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ) self.assertTrue( - isinstance(processor.batch_augments[0], PPYOLOEBatchRandomResize)) - message_hub = MessageHub.get_instance('test_batch_random_resize') - message_hub.update_info('iter', 0) + isinstance(processor.batch_augments[0], PPYOLOEBatchRandomResize) + ) + message_hub = MessageHub.get_instance("test_batch_random_resize") + message_hub.update_info("iter", 0) # test training data = { - 'inputs': [ + "inputs": [ + torch.randint(0, 256, (3, 10, 11)), torch.randint(0, 256, (3, 10, 11)), - torch.randint(0, 256, (3, 10, 11)) ], - 'data_samples': - torch.randint(0, 11, (18, 6)).float(), + "data_samples": torch.randint(0, 11, (18, 6)).float(), } out_data = processor(data, training=True) - batch_data_samples = out_data['data_samples'] - self.assertIn('img_metas', batch_data_samples) - self.assertIn('bboxes_labels', batch_data_samples) - self.assertIsInstance(batch_data_samples['bboxes_labels'], - torch.Tensor) - self.assertIsInstance(batch_data_samples['img_metas'], list) + batch_data_samples = out_data["data_samples"] + self.assertIn("img_metas", batch_data_samples) + self.assertIn("bboxes_labels", batch_data_samples) + self.assertIsInstance(batch_data_samples["bboxes_labels"], torch.Tensor) + self.assertIsInstance(batch_data_samples["img_metas"], list) data = { - 'inputs': [torch.randint(0, 256, (3, 11, 10))], - 'data_samples': DetDataSample() + "inputs": [torch.randint(0, 256, (3, 11, 10))], + "data_samples": DetDataSample(), } # data_samples must be list with self.assertRaises(TypeError): diff --git a/mmyolo/tests/test_models/test_dense_heads/test_ppyoloe_head.py b/mmyolo/tests/test_models/test_dense_heads/test_ppyoloe_head.py index 20e0c457..8f7976e2 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_ppyoloe_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_ppyoloe_head.py @@ -14,44 +14,51 @@ class TestPPYOLOEHead(TestCase): - def setUp(self): self.head_module = dict( - type='PPYOLOEHeadModule', + type="PPYOLOEHeadModule", num_classes=4, in_channels=[32, 64, 128], - featmap_strides=(8, 16, 32)) + featmap_strides=(8, 16, 32), + ) def test_init_weights(self): head = PPYOLOEHead(head_module=self.head_module) head.head_module.init_weights() bias_init = bias_init_with_prob(0.01) - for conv_cls, conv_reg in zip(head.head_module.cls_preds, - head.head_module.reg_preds): - assert_allclose(conv_cls.weight.data, - torch.zeros_like(conv_cls.weight.data)) - assert_allclose(conv_reg.weight.data, - torch.zeros_like(conv_reg.weight.data)) - - assert_allclose(conv_cls.bias.data, - torch.ones_like(conv_cls.bias.data) * bias_init) - assert_allclose(conv_reg.bias.data, - torch.ones_like(conv_reg.bias.data)) + for conv_cls, conv_reg in zip( + head.head_module.cls_preds, head.head_module.reg_preds + ): + assert_allclose( + conv_cls.weight.data, torch.zeros_like(conv_cls.weight.data) + ) + assert_allclose( + conv_reg.weight.data, torch.zeros_like(conv_reg.weight.data) + ) + + assert_allclose( + conv_cls.bias.data, torch.ones_like(conv_cls.bias.data) * bias_init + ) + assert_allclose(conv_reg.bias.data, torch.ones_like(conv_reg.bias.data)) def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = Config( dict( multi_label=True, nms_pre=1000, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.7), - max_per_img=300)) + nms=dict(type="nms", iou_threshold=0.7), + max_per_img=300, + ) + ) head = PPYOLOEHead(head_module=self.head_module, test_cfg=test_cfg) head.eval() @@ -67,7 +74,8 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, @@ -75,42 +83,48 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) def test_loss_by_feat(self): - message_hub = MessageHub.get_instance('test_ppyoloe_loss_by_feat') - message_hub.update_info('epoch', 1) + message_hub = MessageHub.get_instance("test_ppyoloe_loss_by_feat") + message_hub.update_info("epoch", 1) s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'batch_input_shape': (s, s), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "batch_input_shape": (s, s), + "scale_factor": 1, + } + ] head = PPYOLOEHead( head_module=self.head_module, train_cfg=ConfigDict( initial_epoch=31, initial_assigner=dict( - type='BatchATSSAssigner', + type="BatchATSSAssigner", num_classes=4, topk=9, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=4, topk=13, alpha=1, - beta=6))) + beta=6, + ), + ), + ) head.train() feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, bbox_dist_preds = head.forward(feat) @@ -118,22 +132,25 @@ def test_loss_by_feat(self): # background gt_instances = torch.empty((0, 6), dtype=torch.float32) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - bbox_dist_preds, gt_instances, - img_metas) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, bbox_dist_preds, gt_instances, img_metas + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_dfl_loss = empty_gt_losses['loss_dfl'].sum() - self.assertGreater(empty_cls_loss.item(), 0, - 'cls loss should be non-zero') + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_dfl_loss = empty_gt_losses["loss_dfl"].sum() + self.assertGreater(empty_cls_loss.item(), 0, "cls loss should be non-zero") self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) self.assertEqual( - empty_dfl_loss.item(), 0, - 'there should be df loss when there are no true boxes') + empty_dfl_loss.item(), + 0, + "there should be df loss when there are no true boxes", + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs @@ -142,64 +159,64 @@ def test_loss_by_feat(self): train_cfg=ConfigDict( initial_epoch=31, initial_assigner=dict( - type='BatchATSSAssigner', + type="BatchATSSAssigner", num_classes=4, topk=9, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=4, topk=13, alpha=1, - beta=6))) + beta=6, + ), + ), + ) head.train() - gt_instances = torch.Tensor( - [[0., 0., 23.6667, 23.8757, 238.6326, 151.8874]]) - - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - bbox_dist_preds, gt_instances, - img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_loss_dfl = one_gt_losses['loss_dfl'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_loss_dfl.item(), 0, - 'obj loss should be non-zero') + gt_instances = torch.Tensor([[0.0, 0.0, 23.6667, 23.8757, 238.6326, 151.8874]]) + + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, bbox_dist_preds, gt_instances, img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_loss_dfl = one_gt_losses["loss_dfl"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_loss_dfl.item(), 0, "obj loss should be non-zero") # test num_class = 1 - self.head_module['num_classes'] = 1 + self.head_module["num_classes"] = 1 head = PPYOLOEHead( head_module=self.head_module, train_cfg=ConfigDict( initial_epoch=31, initial_assigner=dict( - type='BatchATSSAssigner', + type="BatchATSSAssigner", num_classes=1, topk=9, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=1, topk=13, alpha=1, - beta=6))) + beta=6, + ), + ), + ) head.train() - gt_instances = torch.Tensor( - [[0., 0., 23.6667, 23.8757, 238.6326, 151.8874]]) + gt_instances = torch.Tensor([[0.0, 0.0, 23.6667, 23.8757, 238.6326, 151.8874]]) cls_scores, bbox_preds, bbox_dist_preds = head.forward(feat) - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - bbox_dist_preds, gt_instances, - img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_loss_dfl = one_gt_losses['loss_dfl'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_loss_dfl.item(), 0, - 'obj loss should be non-zero') + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, bbox_dist_preds, gt_instances, img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_loss_dfl = one_gt_losses["loss_dfl"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_loss_dfl.item(), 0, "obj loss should be non-zero") diff --git a/mmyolo/tests/test_models/test_dense_heads/test_rtmdet_head.py b/mmyolo/tests/test_models/test_dense_heads/test_rtmdet_head.py index 3f3fccfa..ba95cb85 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_rtmdet_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_rtmdet_head.py @@ -12,15 +12,15 @@ class TestRTMDetHead(TestCase): - def setUp(self): self.head_module = dict( - type='RTMDetSepBNHeadModule', + type="RTMDetSepBNHeadModule", num_classes=4, in_channels=1, stacked_convs=1, feat_channels=64, - featmap_strides=[4, 8, 16]) + featmap_strides=[4, 8, 16], + ) def test_init_weights(self): head = RTMDetHead(head_module=self.head_module) @@ -28,23 +28,25 @@ def test_init_weights(self): def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = dict( multi_label=True, nms_pre=30000, score_thr=0.001, - nms=dict(type='nms', iou_threshold=0.65), - max_per_img=300) + nms=dict(type="nms", iou_threshold=0.65), + max_per_img=300, + ) test_cfg = Config(test_cfg) head = RTMDetHead(head_module=self.head_module, test_cfg=test_cfg) feat = [ - torch.rand(1, 1, s // feat_size, s // feat_size) - for feat_size in [4, 8, 16] + torch.rand(1, 1, s // feat_size, s // feat_size) for feat_size in [4, 8, 16] ] cls_scores, bbox_preds = head.forward(feat) head.predict_by_feat( @@ -53,87 +55,97 @@ def test_predict_by_feat(self): batch_img_metas=img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, batch_img_metas=img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) def test_loss_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'batch_input_shape': (s, s), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "batch_input_shape": (s, s), + "scale_factor": 1, + } + ] train_cfg = dict( assigner=dict( num_classes=80, - type='BatchDynamicSoftLabelAssigner', + type="BatchDynamicSoftLabelAssigner", topk=13, - iou_calculator=dict(type='mmdet.BboxOverlaps2D')), + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + ), allowed_border=-1, pos_weight=-1, - debug=False) + debug=False, + ) train_cfg = Config(train_cfg) head = RTMDetHead(head_module=self.head_module, train_cfg=train_cfg) feat = [ - torch.rand(1, 1, s // feat_size, s // feat_size) - for feat_size in [4, 8, 16] + torch.rand(1, 1, s // feat_size, s // feat_size) for feat_size in [4, 8, 16] ] cls_scores, bbox_preds = head.forward(feat) # Test that empty ground truth encourages the network to predict # background gt_instances = InstanceData( - bboxes=torch.empty((0, 4)), labels=torch.LongTensor([])) + bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]) + ) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - [gt_instances], img_metas) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, [gt_instances], img_metas + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - self.assertGreater(empty_cls_loss.item(), 0, - 'classification loss should be non-zero') + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + self.assertGreater( + empty_cls_loss.item(), 0, "classification loss should be non-zero" + ) self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs head = RTMDetHead(head_module=self.head_module, train_cfg=train_cfg) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([1])) + labels=torch.LongTensor([1]), + ) - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") # test num_class = 1 - self.head_module['num_classes'] = 1 + self.head_module["num_classes"] = 1 head = RTMDetHead(head_module=self.head_module, train_cfg=train_cfg) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([0])) + labels=torch.LongTensor([0]), + ) cls_scores, bbox_preds = head.forward(feat) - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") diff --git a/mmyolo/tests/test_models/test_dense_heads/test_yolov5_head.py b/mmyolo/tests/test_models/test_dense_heads/test_yolov5_head.py index 31b399bf..c152e3f0 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_yolov5_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_yolov5_head.py @@ -12,37 +12,40 @@ class TestYOLOv5Head(TestCase): - def setUp(self): self.head_module = dict( - type='YOLOv5HeadModule', + type="YOLOv5HeadModule", num_classes=2, in_channels=[32, 64, 128], featmap_strides=[8, 16, 32], - num_base_priors=3) + num_base_priors=3, + ) def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = Config( dict( multi_label=True, max_per_img=300, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.65))) + nms=dict(type="nms", iou_threshold=0.65), + ) + ) head = YOLOv5Head(head_module=self.head_module, test_cfg=test_cfg) feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, objectnesses = head.forward(feat) head.predict_by_feat( @@ -52,7 +55,8 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, @@ -60,177 +64,200 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) def test_loss_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'batch_input_shape': (s, s), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "batch_input_shape": (s, s), + "scale_factor": 1, + } + ] head = YOLOv5Head(head_module=self.head_module) feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, objectnesses = head.forward(feat) # Test that empty ground truth encourages the network to predict # background gt_instances = InstanceData( - bboxes=torch.empty((0, 4)), labels=torch.LongTensor([])) + bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]) + ) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - objectnesses, [gt_instances], - img_metas) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_obj_loss = empty_gt_losses['loss_obj'].sum() + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_obj_loss = empty_gt_losses["loss_obj"].sum() self.assertEqual( - empty_cls_loss.item(), 0, - 'there should be no cls loss when there are no true boxes') + empty_cls_loss.item(), + 0, + "there should be no cls loss when there are no true boxes", + ) self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') - self.assertGreater(empty_obj_loss.item(), 0, - 'objectness loss should be non-zero') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) + self.assertGreater( + empty_obj_loss.item(), 0, "objectness loss should be non-zero" + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs head = YOLOv5Head(head_module=self.head_module) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([1])) - - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, objectnesses, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') + labels=torch.LongTensor([1]), + ) + + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") # test num_class = 1 - self.head_module['num_classes'] = 1 + self.head_module["num_classes"] = 1 head = YOLOv5Head(head_module=self.head_module) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([0])) - - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, objectnesses, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - self.assertEqual(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') + labels=torch.LongTensor([0]), + ) + + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + self.assertEqual(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") def test_loss_by_feat_with_ignore(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'batch_input_shape': (s, s), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "batch_input_shape": (s, s), + "scale_factor": 1, + } + ] head = YOLOv5Head(head_module=self.head_module, ignore_iof_thr=0.8) feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, objectnesses = head.forward(feat) # Test that empty ground truth encourages the network to predict # background gt_instances = InstanceData( - bboxes=torch.empty((0, 4)), labels=torch.LongTensor([])) + bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]) + ) # ignore boxes gt_instances_ignore = torch.tensor( - [[0, 0, 69.7688, 0, 619.3611, 62.2711]], dtype=torch.float32) + [[0, 0, 69.7688, 0, 619.3611, 62.2711]], dtype=torch.float32 + ) empty_gt_losses = head._loss_by_feat_with_ignore( - cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas, - gt_instances_ignore) + cls_scores, + bbox_preds, + objectnesses, + [gt_instances], + img_metas, + gt_instances_ignore, + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_obj_loss = empty_gt_losses['loss_obj'].sum() + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_obj_loss = empty_gt_losses["loss_obj"].sum() self.assertEqual( - empty_cls_loss.item(), 0, - 'there should be no cls loss when there are no true boxes') + empty_cls_loss.item(), + 0, + "there should be no cls loss when there are no true boxes", + ) self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') - self.assertGreater(empty_obj_loss.item(), 0, - 'objectness loss should be non-zero') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) + self.assertGreater( + empty_obj_loss.item(), 0, "objectness loss should be non-zero" + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs head = YOLOv5Head(head_module=self.head_module, ignore_iof_thr=0.8) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([1])) + labels=torch.LongTensor([1]), + ) gt_instances_ignore = torch.tensor( - [[0, 0, 69.7688, 0, 619.3611, 62.2711]], dtype=torch.float32) - - one_gt_losses = head._loss_by_feat_with_ignore(cls_scores, bbox_preds, - objectnesses, - [gt_instances], - img_metas, - gt_instances_ignore) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') + [[0, 0, 69.7688, 0, 619.3611, 62.2711]], dtype=torch.float32 + ) + + one_gt_losses = head._loss_by_feat_with_ignore( + cls_scores, + bbox_preds, + objectnesses, + [gt_instances], + img_metas, + gt_instances_ignore, + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") # test num_class = 1 - self.head_module['num_classes'] = 1 + self.head_module["num_classes"] = 1 head = YOLOv5Head(head_module=self.head_module, ignore_iof_thr=0.8) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([0])) + labels=torch.LongTensor([0]), + ) gt_instances_ignore = torch.tensor( - [[0, 0, 69.7688, 0, 619.3611, 62.2711]], dtype=torch.float32) - - one_gt_losses = head._loss_by_feat_with_ignore(cls_scores, bbox_preds, - objectnesses, - [gt_instances], - img_metas, - gt_instances_ignore) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - self.assertEqual(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') + [[0, 0, 69.7688, 0, 619.3611, 62.2711]], dtype=torch.float32 + ) + + one_gt_losses = head._loss_by_feat_with_ignore( + cls_scores, + bbox_preds, + objectnesses, + [gt_instances], + img_metas, + gt_instances_ignore, + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + self.assertEqual(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") diff --git a/mmyolo/tests/test_models/test_dense_heads/test_yolov6_head.py b/mmyolo/tests/test_models/test_dense_heads/test_yolov6_head.py index 47401d02..0e30113f 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_yolov6_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_yolov6_head.py @@ -11,36 +11,39 @@ class TestYOLOv6Head(TestCase): - def setUp(self): self.head_module = dict( - type='YOLOv6HeadModule', + type="YOLOv6HeadModule", num_classes=2, in_channels=[32, 64, 128], - featmap_strides=[8, 16, 32]) + featmap_strides=[8, 16, 32], + ) def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = Config( dict( multi_label=True, max_per_img=300, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.65))) + nms=dict(type="nms", iou_threshold=0.65), + ) + ) head = YOLOv6Head(head_module=self.head_module, test_cfg=test_cfg) feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds = head.forward(feat) head.predict_by_feat( @@ -50,7 +53,8 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, @@ -58,4 +62,5 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) diff --git a/mmyolo/tests/test_models/test_dense_heads/test_yolov7_head.py b/mmyolo/tests/test_models/test_dense_heads/test_yolov7_head.py index 5033f97e..fdfe0341 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_yolov7_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_yolov7_head.py @@ -13,37 +13,40 @@ # TODO: Test YOLOv7p6HeadModule class TestYOLOv7Head(TestCase): - def setUp(self): self.head_module = dict( - type='YOLOv7HeadModule', + type="YOLOv7HeadModule", num_classes=2, in_channels=[32, 64, 128], featmap_strides=[8, 16, 32], - num_base_priors=3) + num_base_priors=3, + ) def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = Config( dict( multi_label=True, max_per_img=300, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.65))) + nms=dict(type="nms", iou_threshold=0.65), + ) + ) head = YOLOv7Head(head_module=self.head_module, test_cfg=test_cfg) feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, objectnesses = head.forward(feat) head.predict_by_feat( @@ -53,7 +56,8 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, @@ -61,85 +65,91 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) def test_loss_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'batch_input_shape': (s, s), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "batch_input_shape": (s, s), + "scale_factor": 1, + } + ] head = YOLOv7Head(head_module=self.head_module) feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, objectnesses = head.forward(feat) # Test that empty ground truth encourages the network to predict # background gt_instances = InstanceData( - bboxes=torch.empty((0, 4)), labels=torch.LongTensor([])) + bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]) + ) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - objectnesses, [gt_instances], - img_metas) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_obj_loss = empty_gt_losses['loss_obj'].sum() + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_obj_loss = empty_gt_losses["loss_obj"].sum() self.assertEqual( - empty_cls_loss.item(), 0, - 'there should be no cls loss when there are no true boxes') + empty_cls_loss.item(), + 0, + "there should be no cls loss when there are no true boxes", + ) self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') - self.assertGreater(empty_obj_loss.item(), 0, - 'objectness loss should be non-zero') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) + self.assertGreater( + empty_obj_loss.item(), 0, "objectness loss should be non-zero" + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs head = YOLOv7Head(head_module=self.head_module) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([1])) - - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, objectnesses, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') + labels=torch.LongTensor([1]), + ) + + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") # test num_class = 1 - self.head_module['num_classes'] = 1 + self.head_module["num_classes"] = 1 head = YOLOv7Head(head_module=self.head_module) gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([0])) + labels=torch.LongTensor([0]), + ) cls_scores, bbox_preds, objectnesses = head.forward(feat) - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, objectnesses, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - self.assertEqual(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + self.assertEqual(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") diff --git a/mmyolo/tests/test_models/test_dense_heads/test_yolov8_head.py b/mmyolo/tests/test_models/test_dense_heads/test_yolov8_head.py index 8980387a..85ee05ae 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_yolov8_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_yolov8_head.py @@ -12,37 +12,40 @@ class TestYOLOv8Head(TestCase): - def setUp(self): self.head_module = dict( - type='YOLOv8HeadModule', + type="YOLOv8HeadModule", num_classes=4, in_channels=[32, 64, 128], - featmap_strides=[8, 16, 32]) + featmap_strides=[8, 16, 32], + ) def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = Config( dict( multi_label=True, max_per_img=300, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.65))) + nms=dict(type="nms", iou_threshold=0.65), + ) + ) head = YOLOv8Head(head_module=self.head_module, test_cfg=test_cfg) head.eval() feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds = head.forward(feat) head.predict_by_feat( @@ -52,7 +55,8 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, @@ -60,33 +64,38 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) def test_loss_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'batch_input_shape': (s, s), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "batch_input_shape": (s, s), + "scale_factor": 1, + } + ] head = YOLOv8Head( head_module=self.head_module, train_cfg=ConfigDict( assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=4, topk=10, alpha=0.5, - beta=6))) + beta=6, + ) + ), + ) head.train() feat = [] - for i in range(len(self.head_module['in_channels'])): - in_channel = self.head_module['in_channels'][i] - feat_size = self.head_module['featmap_strides'][i] - feat.append( - torch.rand(1, in_channel, s // feat_size, s // feat_size)) + for i in range(len(self.head_module["in_channels"])): + in_channel = self.head_module["in_channels"][i] + feat_size = self.head_module["featmap_strides"][i] + feat.append(torch.rand(1, in_channel, s // feat_size, s // feat_size)) cls_scores, bbox_preds, bbox_dist_preds = head.forward(feat) @@ -94,68 +103,70 @@ def test_loss_by_feat(self): # background gt_instances = torch.empty((0, 6), dtype=torch.float32) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - bbox_dist_preds, gt_instances, - img_metas) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, bbox_dist_preds, gt_instances, img_metas + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_dfl_loss = empty_gt_losses['loss_dfl'].sum() - self.assertGreater(empty_cls_loss.item(), 0, - 'cls loss should be non-zero') + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_dfl_loss = empty_gt_losses["loss_dfl"].sum() + self.assertGreater(empty_cls_loss.item(), 0, "cls loss should be non-zero") self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) self.assertEqual( - empty_dfl_loss.item(), 0, - 'there should be df loss when there are no true boxes') + empty_dfl_loss.item(), + 0, + "there should be df loss when there are no true boxes", + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs - gt_instances = torch.Tensor( - [[0., 0., 23.6667, 23.8757, 238.6326, 151.8874]]) - - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - bbox_dist_preds, gt_instances, - img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_loss_dfl = one_gt_losses['loss_dfl'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_loss_dfl.item(), 0, - 'obj loss should be non-zero') + gt_instances = torch.Tensor([[0.0, 0.0, 23.6667, 23.8757, 238.6326, 151.8874]]) + + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, bbox_dist_preds, gt_instances, img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_loss_dfl = one_gt_losses["loss_dfl"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_loss_dfl.item(), 0, "obj loss should be non-zero") # test num_class = 1 - self.head_module['num_classes'] = 1 + self.head_module["num_classes"] = 1 head = YOLOv8Head( head_module=self.head_module, train_cfg=ConfigDict( assigner=dict( - type='BatchTaskAlignedAssigner', + type="BatchTaskAlignedAssigner", num_classes=1, topk=10, alpha=0.5, - beta=6))) + beta=6, + ) + ), + ) head.train() gt_instances = torch.Tensor( - [[0., 0., 23.6667, 23.8757, 238.6326, 151.8874], - [1., 0., 24.6667, 27.8757, 28.6326, 51.8874]]) + [ + [0.0, 0.0, 23.6667, 23.8757, 238.6326, 151.8874], + [1.0, 0.0, 24.6667, 27.8757, 28.6326, 51.8874], + ] + ) cls_scores, bbox_preds, bbox_dist_preds = head.forward(feat) - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - bbox_dist_preds, gt_instances, - img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_loss_dfl = one_gt_losses['loss_dfl'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_loss_dfl.item(), 0, - 'obj loss should be non-zero') + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, bbox_dist_preds, gt_instances, img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_loss_dfl = one_gt_losses["loss_dfl"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_loss_dfl.item(), 0, "obj loss should be non-zero") diff --git a/mmyolo/tests/test_models/test_dense_heads/test_yolox_head.py b/mmyolo/tests/test_models/test_dense_heads/test_yolox_head.py index 74467302..2aab64a5 100644 --- a/mmyolo/tests/test_models/test_dense_heads/test_yolox_head.py +++ b/mmyolo/tests/test_models/test_dense_heads/test_yolox_head.py @@ -14,10 +14,9 @@ class TestYOLOXHead(TestCase): - def setUp(self): self.head_module = dict( - type='YOLOXHeadModule', + type="YOLOXHeadModule", num_classes=4, in_channels=1, stacked_convs=1, @@ -27,31 +26,37 @@ def test_init_weights(self): head = YOLOXHead(head_module=self.head_module) head.head_module.init_weights() bias_init = bias_init_with_prob(0.01) - for conv_cls, conv_obj in zip(head.head_module.multi_level_conv_cls, - head.head_module.multi_level_conv_obj): - assert_allclose(conv_cls.bias.data, - torch.ones_like(conv_cls.bias.data) * bias_init) - assert_allclose(conv_obj.bias.data, - torch.ones_like(conv_obj.bias.data) * bias_init) + for conv_cls, conv_obj in zip( + head.head_module.multi_level_conv_cls, head.head_module.multi_level_conv_obj + ): + assert_allclose( + conv_cls.bias.data, torch.ones_like(conv_cls.bias.data) * bias_init + ) + assert_allclose( + conv_obj.bias.data, torch.ones_like(conv_obj.bias.data) * bias_init + ) def test_predict_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'ori_shape': (s, s, 3), - 'scale_factor': (1.0, 1.0), - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "ori_shape": (s, s, 3), + "scale_factor": (1.0, 1.0), + } + ] test_cfg = Config( dict( multi_label=True, max_per_img=300, score_thr=0.01, - nms=dict(type='nms', iou_threshold=0.65))) + nms=dict(type="nms", iou_threshold=0.65), + ) + ) head = YOLOXHead(head_module=self.head_module, test_cfg=test_cfg) feat = [ - torch.rand(1, 1, s // feat_size, s // feat_size) - for feat_size in [4, 8, 16] + torch.rand(1, 1, s // feat_size, s // feat_size) for feat_size in [4, 8, 16] ] cls_scores, bbox_preds, objectnesses = head.forward(feat) head.predict_by_feat( @@ -61,7 +66,8 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=True, - with_nms=True) + with_nms=True, + ) head.predict_by_feat( cls_scores, bbox_preds, @@ -69,54 +75,65 @@ def test_predict_by_feat(self): img_metas, cfg=test_cfg, rescale=False, - with_nms=False) + with_nms=False, + ) def test_loss_by_feat(self): s = 256 - img_metas = [{ - 'img_shape': (s, s, 3), - 'scale_factor': 1, - }] + img_metas = [ + { + "img_shape": (s, s, 3), + "scale_factor": 1, + } + ] train_cfg = Config( dict( assigner=dict( - type='mmdet.SimOTAAssigner', - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), + type="mmdet.SimOTAAssigner", + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), center_radius=2.5, candidate_topk=10, iou_weight=3.0, - cls_weight=1.0))) + cls_weight=1.0, + ) + ) + ) head = YOLOXHead(head_module=self.head_module, train_cfg=train_cfg) assert not head.use_bbox_aux feat = [ - torch.rand(1, 1, s // feat_size, s // feat_size) - for feat_size in [4, 8, 16] + torch.rand(1, 1, s // feat_size, s // feat_size) for feat_size in [4, 8, 16] ] cls_scores, bbox_preds, objectnesses = head.forward(feat) # Test that empty ground truth encourages the network to predict # background gt_instances = InstanceData( - bboxes=torch.empty((0, 4)), labels=torch.LongTensor([])) + bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]) + ) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - objectnesses, [gt_instances], - img_metas) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) # When there is no truth, the cls loss should be nonzero but there # should be no box loss. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_obj_loss = empty_gt_losses['loss_obj'].sum() + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_obj_loss = empty_gt_losses["loss_obj"].sum() self.assertEqual( - empty_cls_loss.item(), 0, - 'there should be no cls loss when there are no true boxes') + empty_cls_loss.item(), + 0, + "there should be no cls loss when there are no true boxes", + ) self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when there are no true boxes') - self.assertGreater(empty_obj_loss.item(), 0, - 'objectness loss should be non-zero') + empty_box_loss.item(), + 0, + "there should be no box loss when there are no true boxes", + ) + self.assertGreater( + empty_obj_loss.item(), 0, "objectness loss should be non-zero" + ) # When truth is non-empty then both cls and box loss should be nonzero # for random inputs @@ -124,40 +141,44 @@ def test_loss_by_feat(self): head.use_bbox_aux = True gt_instances = InstanceData( bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]), - labels=torch.LongTensor([2])) - - one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, objectnesses, - [gt_instances], img_metas) - onegt_cls_loss = one_gt_losses['loss_cls'].sum() - onegt_box_loss = one_gt_losses['loss_bbox'].sum() - onegt_obj_loss = one_gt_losses['loss_obj'].sum() - onegt_l1_loss = one_gt_losses['loss_bbox_aux'].sum() - self.assertGreater(onegt_cls_loss.item(), 0, - 'cls loss should be non-zero') - self.assertGreater(onegt_box_loss.item(), 0, - 'box loss should be non-zero') - self.assertGreater(onegt_obj_loss.item(), 0, - 'obj loss should be non-zero') - self.assertGreater(onegt_l1_loss.item(), 0, - 'l1 loss should be non-zero') + labels=torch.LongTensor([2]), + ) + + one_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) + onegt_cls_loss = one_gt_losses["loss_cls"].sum() + onegt_box_loss = one_gt_losses["loss_bbox"].sum() + onegt_obj_loss = one_gt_losses["loss_obj"].sum() + onegt_l1_loss = one_gt_losses["loss_bbox_aux"].sum() + self.assertGreater(onegt_cls_loss.item(), 0, "cls loss should be non-zero") + self.assertGreater(onegt_box_loss.item(), 0, "box loss should be non-zero") + self.assertGreater(onegt_obj_loss.item(), 0, "obj loss should be non-zero") + self.assertGreater(onegt_l1_loss.item(), 0, "l1 loss should be non-zero") # Test groud truth out of bound gt_instances = InstanceData( bboxes=torch.Tensor([[s * 4, s * 4, s * 4 + 10, s * 4 + 10]]), - labels=torch.LongTensor([2])) - empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, - objectnesses, [gt_instances], - img_metas) + labels=torch.LongTensor([2]), + ) + empty_gt_losses = head.loss_by_feat( + cls_scores, bbox_preds, objectnesses, [gt_instances], img_metas + ) # When gt_bboxes out of bound, the assign results should be empty, # so the cls and bbox loss should be zero. - empty_cls_loss = empty_gt_losses['loss_cls'].sum() - empty_box_loss = empty_gt_losses['loss_bbox'].sum() - empty_obj_loss = empty_gt_losses['loss_obj'].sum() + empty_cls_loss = empty_gt_losses["loss_cls"].sum() + empty_box_loss = empty_gt_losses["loss_bbox"].sum() + empty_obj_loss = empty_gt_losses["loss_obj"].sum() self.assertEqual( - empty_cls_loss.item(), 0, - 'there should be no cls loss when gt_bboxes out of bound') + empty_cls_loss.item(), + 0, + "there should be no cls loss when gt_bboxes out of bound", + ) self.assertEqual( - empty_box_loss.item(), 0, - 'there should be no box loss when gt_bboxes out of bound') - self.assertGreater(empty_obj_loss.item(), 0, - 'objectness loss should be non-zero') + empty_box_loss.item(), + 0, + "there should be no box loss when gt_bboxes out of bound", + ) + self.assertGreater( + empty_obj_loss.item(), 0, "objectness loss should be non-zero" + ) diff --git a/mmyolo/tests/test_models/test_detectors/test_yolo_detector.py b/mmyolo/tests/test_models/test_detectors/test_yolo_detector.py index 0af0f0b3..a14e751a 100644 --- a/mmyolo/tests/test_models/test_detectors/test_yolo_detector.py +++ b/mmyolo/tests/test_models/test_detectors/test_yolo_detector.py @@ -14,89 +14,98 @@ class TestSingleStageDetector(TestCase): - def setUp(self): register_all_modules() - @parameterized.expand([ - 'yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py', - 'yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py', - 'yolox/yolox_tiny_8xb8-300e_coco.py', - 'rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', - 'yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', - 'yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py' - ]) + @parameterized.expand( + [ + "yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py", + "yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py", + "yolox/yolox_tiny_8xb8-300e_coco.py", + "rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py", + "yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py", + "yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py", + ] + ) def test_init(self, cfg_file): model = get_detector_cfg(cfg_file) model.backbone.init_cfg = None from mmyolo.registry import MODELS + detector = MODELS.build(model) self.assertTrue(detector.backbone) self.assertTrue(detector.neck) self.assertTrue(detector.bbox_head) - @parameterized.expand([ - ('yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py', ('cuda', 'cpu')), - ('yolox/yolox_s_8xb8-300e_coco.py', ('cuda', 'cpu')), - ('yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', ('cuda', 'cpu')), - ('rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', ('cuda', 'cpu')), - ('yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py', ('cuda', 'cpu')) - ]) + @parameterized.expand( + [ + ("yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py", ("cuda", "cpu")), + ("yolox/yolox_s_8xb8-300e_coco.py", ("cuda", "cpu")), + ("yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py", ("cuda", "cpu")), + ("rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py", ("cuda", "cpu")), + ("yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py", ("cuda", "cpu")), + ] + ) def test_forward_loss_mode(self, cfg_file, devices): message_hub = MessageHub.get_instance( - f'test_single_stage_forward_loss_mode-{time.time()}') - message_hub.update_info('iter', 0) - message_hub.update_info('epoch', 0) + f"test_single_stage_forward_loss_mode-{time.time()}" + ) + message_hub.update_info("iter", 0) + message_hub.update_info("epoch", 0) model = get_detector_cfg(cfg_file) model.backbone.init_cfg = None - if 'fast' in cfg_file: + if "fast" in cfg_file: model.data_preprocessor = dict( - type='mmdet.DetDataPreprocessor', - mean=[0., 0., 0.], - std=[255., 255., 255.], - bgr_to_rgb=True) + type="mmdet.DetDataPreprocessor", + mean=[0.0, 0.0, 0.0], + std=[255.0, 255.0, 255.0], + bgr_to_rgb=True, + ) from mmyolo.registry import MODELS - assert all([device in ['cpu', 'cuda'] for device in devices]) + + assert all([device in ["cpu", "cuda"] for device in devices]) for device in devices: detector = MODELS.build(model) detector.init_weights() - if device == 'cuda': + if device == "cuda": if not torch.cuda.is_available(): - return unittest.skip('test requires GPU and torch+cuda') + return unittest.skip("test requires GPU and torch+cuda") detector = detector.cuda() packed_inputs = demo_mm_inputs(2, [[3, 320, 128], [3, 125, 320]]) data = detector.data_preprocessor(packed_inputs, True) - losses = detector.forward(**data, mode='loss') + losses = detector.forward(**data, mode="loss") self.assertIsInstance(losses, dict) - @parameterized.expand([ - ('yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py', ('cuda', - 'cpu')), - ('yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py', ('cuda', 'cpu')), - ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cuda', 'cpu')), - ('yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', ('cuda', 'cpu')), - ('rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', ('cuda', 'cpu')), - ('yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py', ('cuda', 'cpu')) - ]) + @parameterized.expand( + [ + ("yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py", ("cuda", "cpu")), + ("yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py", ("cuda", "cpu")), + ("yolox/yolox_tiny_8xb8-300e_coco.py", ("cuda", "cpu")), + ("yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py", ("cuda", "cpu")), + ("rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py", ("cuda", "cpu")), + ("yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py", ("cuda", "cpu")), + ] + ) def test_forward_predict_mode(self, cfg_file, devices): model = get_detector_cfg(cfg_file) model.backbone.init_cfg = None from mmyolo.registry import MODELS - assert all([device in ['cpu', 'cuda'] for device in devices]) + + assert all([device in ["cpu", "cuda"] for device in devices]) for device in devices: detector = MODELS.build(model) - if device == 'cuda': + if device == "cuda": if not torch.cuda.is_available(): - return unittest.skip('test requires GPU and torch+cuda') + return unittest.skip("test requires GPU and torch+cuda") detector = detector.cuda() packed_inputs = demo_mm_inputs(2, [[3, 320, 128], [3, 125, 320]]) @@ -104,35 +113,37 @@ def test_forward_predict_mode(self, cfg_file, devices): # Test forward test detector.eval() with torch.no_grad(): - batch_results = detector.forward(**data, mode='predict') + batch_results = detector.forward(**data, mode="predict") self.assertEqual(len(batch_results), 2) self.assertIsInstance(batch_results[0], DetDataSample) - @parameterized.expand([ - ('yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py', ('cuda', - 'cpu')), - ('yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py', ('cuda', 'cpu')), - ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cuda', 'cpu')), - ('yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', ('cuda', 'cpu')), - ('rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', ('cuda', 'cpu')), - ('yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py', ('cuda', 'cpu')) - ]) + @parameterized.expand( + [ + ("yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py", ("cuda", "cpu")), + ("yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py", ("cuda", "cpu")), + ("yolox/yolox_tiny_8xb8-300e_coco.py", ("cuda", "cpu")), + ("yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py", ("cuda", "cpu")), + ("rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py", ("cuda", "cpu")), + ("yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py", ("cuda", "cpu")), + ] + ) def test_forward_tensor_mode(self, cfg_file, devices): model = get_detector_cfg(cfg_file) model.backbone.init_cfg = None from mmyolo.registry import MODELS - assert all([device in ['cpu', 'cuda'] for device in devices]) + + assert all([device in ["cpu", "cuda"] for device in devices]) for device in devices: detector = MODELS.build(model) - if device == 'cuda': + if device == "cuda": if not torch.cuda.is_available(): - return unittest.skip('test requires GPU and torch+cuda') + return unittest.skip("test requires GPU and torch+cuda") detector = detector.cuda() packed_inputs = demo_mm_inputs(2, [[3, 320, 128], [3, 125, 320]]) data = detector.data_preprocessor(packed_inputs, False) - batch_results = detector.forward(**data, mode='tensor') + batch_results = detector.forward(**data, mode="tensor") self.assertIsInstance(batch_results, tuple) diff --git a/mmyolo/tests/test_models/test_layers/test_ema.py b/mmyolo/tests/test_models/test_layers/test_ema.py index b3583828..f2b250e1 100644 --- a/mmyolo/tests/test_models/test_layers/test_ema.py +++ b/mmyolo/tests/test_models/test_layers/test_ema.py @@ -11,24 +11,21 @@ class TestEMA(TestCase): - def test_exp_momentum_ema(self): model = nn.Sequential(nn.Conv2d(1, 5, kernel_size=3), nn.Linear(5, 10)) # Test invalid gamma - with self.assertRaisesRegex(AssertionError, - 'gamma must be greater than 0'): + with self.assertRaisesRegex(AssertionError, "gamma must be greater than 0"): ExpMomentumEMA(model, gamma=-1) # Test EMA model = torch.nn.Sequential( - torch.nn.Conv2d(1, 5, kernel_size=3), torch.nn.Linear(5, 10)) + torch.nn.Conv2d(1, 5, kernel_size=3), torch.nn.Linear(5, 10) + ) momentum = 0.1 gamma = 4 ema_model = ExpMomentumEMA(model, momentum=momentum, gamma=gamma) - averaged_params = [ - torch.zeros_like(param) for param in model.parameters() - ] + averaged_params = [torch.zeros_like(param) for param in model.parameters()] n_updates = 10 for i in range(n_updates): updated_averaged_params = [] @@ -38,8 +35,7 @@ def test_exp_momentum_ema(self): updated_averaged_params.append(p.clone()) else: m = (1 - momentum) * math.exp(-(1 + i) / gamma) + momentum - updated_averaged_params.append( - (p_avg * (1 - m) + p * m).clone()) + updated_averaged_params.append((p_avg * (1 - m) + p * m).clone()) ema_model.update_parameters(model) averaged_params = updated_averaged_params @@ -48,11 +44,12 @@ def test_exp_momentum_ema(self): def test_exp_momentum_ema_update_buffer(self): model = nn.Sequential( - nn.Conv2d(1, 5, kernel_size=3), nn.BatchNorm2d(5, momentum=0.3), - nn.Linear(5, 10)) + nn.Conv2d(1, 5, kernel_size=3), + nn.BatchNorm2d(5, momentum=0.3), + nn.Linear(5, 10), + ) # Test invalid gamma - with self.assertRaisesRegex(AssertionError, - 'gamma must be greater than 0'): + with self.assertRaisesRegex(AssertionError, "gamma must be greater than 0"): ExpMomentumEMA(model, gamma=-1) # Test EMA with momentum annealing. @@ -60,7 +57,8 @@ def test_exp_momentum_ema_update_buffer(self): gamma = 4 ema_model = ExpMomentumEMA( - model, gamma=gamma, momentum=momentum, update_buffers=True) + model, gamma=gamma, momentum=momentum, update_buffers=True + ) averaged_params = [ torch.zeros_like(param) for param in itertools.chain(model.parameters(), model.buffers()) @@ -70,8 +68,8 @@ def test_exp_momentum_ema_update_buffer(self): for i in range(n_updates): updated_averaged_params = [] params = [ - param for param in itertools.chain(model.parameters(), - model.buffers()) + param + for param in itertools.chain(model.parameters(), model.buffers()) if param.size() != torch.Size([]) ] for p, p_avg in zip(params, averaged_params): @@ -80,14 +78,15 @@ def test_exp_momentum_ema_update_buffer(self): updated_averaged_params.append(p.clone()) else: m = (1 - momentum) * math.exp(-(1 + i) / gamma) + momentum - updated_averaged_params.append( - (p_avg * (1 - m) + p * m).clone()) + updated_averaged_params.append((p_avg * (1 - m) + p * m).clone()) ema_model.update_parameters(model) averaged_params = updated_averaged_params ema_params = [ - param for param in itertools.chain(ema_model.module.parameters(), - ema_model.module.buffers()) + param + for param in itertools.chain( + ema_model.module.parameters(), ema_model.module.buffers() + ) if param.size() != torch.Size([]) ] for p_target, p_ema in zip(averaged_params, ema_params): diff --git a/mmyolo/tests/test_models/test_layers/test_yolo_bricks.py b/mmyolo/tests/test_models/test_layers/test_yolo_bricks.py index 5331a4e0..b336730c 100644 --- a/mmyolo/tests/test_models/test_layers/test_yolo_bricks.py +++ b/mmyolo/tests/test_models/test_layers/test_yolo_bricks.py @@ -11,7 +11,6 @@ class TestSPPFBottleneck(TestCase): - def test_forward(self): input_tensor = torch.randn((1, 3, 20, 20)) bottleneck = SPPFBottleneck(3, 16) @@ -29,6 +28,7 @@ def test_forward(self): # set use_conv_first=False bottleneck = SPPFBottleneck( - 3, 16, use_conv_first=False, kernel_sizes=[3, 5, 7, 9]) + 3, 16, use_conv_first=False, kernel_sizes=[3, 5, 7, 9] + ) out_tensor = bottleneck(input_tensor) self.assertEqual(out_tensor.shape, (1, 16, 20, 20)) diff --git a/mmyolo/tests/test_models/test_necks/test_cspnext_pafpn.py b/mmyolo/tests/test_models/test_necks/test_cspnext_pafpn.py index b26c99aa..27dcc377 100644 --- a/mmyolo/tests/test_models/test_necks/test_cspnext_pafpn.py +++ b/mmyolo/tests/test_models/test_necks/test_cspnext_pafpn.py @@ -10,7 +10,6 @@ class TestCSPNeXtPAFPN(TestCase): - def test_forward(self): s = 64 in_channels = [8, 16, 32] @@ -29,9 +28,9 @@ def test_forward(self): # test depth-wise neck = CSPNeXtPAFPN( - in_channels=in_channels, - out_channels=out_channels, - use_depthwise=True) + in_channels=in_channels, out_channels=out_channels, use_depthwise=True + ) from mmcv.cnn.bricks import DepthwiseSeparableConvModule + self.assertTrue(neck.conv, DepthwiseSeparableConvModule) diff --git a/mmyolo/tests/test_models/test_necks/test_ppyoloe_csppan.py b/mmyolo/tests/test_models/test_necks/test_ppyoloe_csppan.py index b79c1ce5..86b09d14 100644 --- a/mmyolo/tests/test_models/test_necks/test_ppyoloe_csppan.py +++ b/mmyolo/tests/test_models/test_necks/test_ppyoloe_csppan.py @@ -10,7 +10,6 @@ class TestPPYOLOECSPPAFPN(TestCase): - def test_forward(self): s = 64 in_channels = [8, 16, 32] @@ -20,8 +19,7 @@ def test_forward(self): torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i]) for i in range(len(in_channels)) ] - neck = PPYOLOECSPPAFPN( - in_channels=in_channels, out_channels=out_channels) + neck = PPYOLOECSPPAFPN(in_channels=in_channels, out_channels=out_channels) outs = neck(feats) assert len(outs) == len(feats) for i in range(len(feats)): @@ -41,10 +39,9 @@ def test_drop_block(self): in_channels=in_channels, out_channels=out_channels, drop_block_cfg=dict( - type='mmdet.DropBlock', - drop_prob=0.1, - block_size=3, - warm_iters=0)) + type="mmdet.DropBlock", drop_prob=0.1, block_size=3, warm_iters=0 + ), + ) neck.train() outs = neck(feats) assert len(outs) == len(feats) diff --git a/mmyolo/tests/test_models/test_necks/test_yolov5_pafpn.py b/mmyolo/tests/test_models/test_necks/test_yolov5_pafpn.py index 339621ec..623c410e 100644 --- a/mmyolo/tests/test_models/test_necks/test_yolov5_pafpn.py +++ b/mmyolo/tests/test_models/test_necks/test_yolov5_pafpn.py @@ -10,7 +10,6 @@ class TestYOLOv5PAFPN(TestCase): - def test_forward(self): s = 64 in_channels = [8, 16, 32] diff --git a/mmyolo/tests/test_models/test_necks/test_yolov6_pafpn.py b/mmyolo/tests/test_models/test_necks/test_yolov6_pafpn.py index bea49feb..b2781386 100644 --- a/mmyolo/tests/test_models/test_necks/test_yolov6_pafpn.py +++ b/mmyolo/tests/test_models/test_necks/test_yolov6_pafpn.py @@ -10,7 +10,6 @@ class TestYOLOv6PAFPN(TestCase): - def test_YOLOv6RepPAFP_forward(self): s = 64 in_channels = [8, 16, 32] @@ -20,8 +19,7 @@ def test_YOLOv6RepPAFP_forward(self): torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i]) for i in range(len(in_channels)) ] - neck = YOLOv6RepPAFPN( - in_channels=in_channels, out_channels=out_channels) + neck = YOLOv6RepPAFPN(in_channels=in_channels, out_channels=out_channels) outs = neck(feats) assert len(outs) == len(feats) for i in range(len(feats)): @@ -37,8 +35,7 @@ def test_YOLOv6CSPRepPAFPN_forward(self): torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i]) for i in range(len(in_channels)) ] - neck = YOLOv6CSPRepPAFPN( - in_channels=in_channels, out_channels=out_channels) + neck = YOLOv6CSPRepPAFPN(in_channels=in_channels, out_channels=out_channels) outs = neck(feats) assert len(outs) == len(feats) for i in range(len(feats)): diff --git a/mmyolo/tests/test_models/test_necks/test_yolov7_pafpn.py b/mmyolo/tests/test_models/test_necks/test_yolov7_pafpn.py index 17bf455c..8644f2e6 100644 --- a/mmyolo/tests/test_models/test_necks/test_yolov7_pafpn.py +++ b/mmyolo/tests/test_models/test_necks/test_yolov7_pafpn.py @@ -11,7 +11,6 @@ class TestYOLOv7PAFPN(TestCase): - def test_forward(self): # test P5 s = 64 @@ -31,9 +30,8 @@ def test_forward(self): # test is_tiny_version neck = YOLOv7PAFPN( - in_channels=in_channels, - out_channels=out_channels, - is_tiny_version=True) + in_channels=in_channels, out_channels=out_channels, is_tiny_version=True + ) outs = neck(feats) assert len(outs) == len(feats) for i in range(len(feats)): @@ -44,7 +42,8 @@ def test_forward(self): neck = YOLOv7PAFPN( in_channels=in_channels, out_channels=out_channels, - use_in_channels_in_downsample=True) + use_in_channels_in_downsample=True, + ) for f in feats: print(f.shape) outs = neck(feats) @@ -57,9 +56,8 @@ def test_forward(self): # test use_repconv_outs is False neck = YOLOv7PAFPN( - in_channels=in_channels, - out_channels=out_channels, - use_repconv_outs=False) + in_channels=in_channels, out_channels=out_channels, use_repconv_outs=False + ) self.assertIsInstance(neck.out_layers[0], ConvModule) # test P6 diff --git a/mmyolo/tests/test_models/test_necks/test_yolov8_pafpn.py b/mmyolo/tests/test_models/test_necks/test_yolov8_pafpn.py index 66d136d0..c8754f4b 100644 --- a/mmyolo/tests/test_models/test_necks/test_yolov8_pafpn.py +++ b/mmyolo/tests/test_models/test_necks/test_yolov8_pafpn.py @@ -10,7 +10,6 @@ class TestYOLOv8PAFPN(TestCase): - def test_YOLOv8PAFPN_forward(self): s = 64 in_channels = [8, 16, 32] diff --git a/mmyolo/tests/test_models/test_necks/test_yolox_pafpn.py b/mmyolo/tests/test_models/test_necks/test_yolox_pafpn.py index 25fe67a1..7518c3a5 100644 --- a/mmyolo/tests/test_models/test_necks/test_yolox_pafpn.py +++ b/mmyolo/tests/test_models/test_necks/test_yolox_pafpn.py @@ -10,7 +10,6 @@ class TestYOLOXPAFPN(TestCase): - def test_forward(self): s = 64 in_channels = [8, 16, 32] diff --git a/mmyolo/tests/test_models/test_plugins/test_cbam.py b/mmyolo/tests/test_models/test_plugins/test_cbam.py index 4af547c0..31b23344 100644 --- a/mmyolo/tests/test_models/test_plugins/test_cbam.py +++ b/mmyolo/tests/test_models/test_plugins/test_cbam.py @@ -11,7 +11,6 @@ class TestCBAM(TestCase): - def test_forward(self): tensor_shape = (2, 16, 20, 20) @@ -26,6 +25,6 @@ def test_forward(self): self.assertEqual(out.shape, tensor_shape) # test other act_cfg in ChannelAttention - cbam = CBAM(in_channels=16, act_cfg=dict(type='Sigmoid')) + cbam = CBAM(in_channels=16, act_cfg=dict(type="Sigmoid")) out = cbam(images) self.assertEqual(out.shape, tensor_shape) diff --git a/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_atss_assigner.py b/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_atss_assigner.py index a01e4fce..f1db4547 100644 --- a/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_atss_assigner.py +++ b/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_atss_assigner.py @@ -7,53 +7,79 @@ class TestBatchATSSAssigner(TestCase): - def test_batch_atss_assigner(self): num_classes = 2 batch_size = 2 batch_atss_assigner = BatchATSSAssigner( topk=3, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), - num_classes=num_classes) - priors = torch.FloatTensor([ - [4., 4., 8., 8.], - [12., 4., 8., 8.], - [20., 4., 8., 8.], - [28., 4., 8., 8.], - ]).repeat(21, 1) - gt_bboxes = torch.FloatTensor([ - [0, 0, 60, 93], - [229, 0, 532, 157], - ]).unsqueeze(0).repeat(batch_size, 1, 1) - gt_labels = torch.LongTensor([ - [0], - [11], - ]).unsqueeze(0).repeat(batch_size, 1, 1) + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + num_classes=num_classes, + ) + priors = torch.FloatTensor( + [ + [4.0, 4.0, 8.0, 8.0], + [12.0, 4.0, 8.0, 8.0], + [20.0, 4.0, 8.0, 8.0], + [28.0, 4.0, 8.0, 8.0], + ] + ).repeat(21, 1) + gt_bboxes = ( + torch.FloatTensor( + [ + [0, 0, 60, 93], + [229, 0, 532, 157], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) + gt_labels = ( + torch.LongTensor( + [ + [0], + [11], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) num_level_bboxes = [64, 16, 4] - pad_bbox_flag = torch.FloatTensor([ - [1], - [0], - ]).unsqueeze(0).repeat(batch_size, 1, 1) - pred_bboxes = torch.FloatTensor([ - [-4., -4., 12., 12.], - [4., -4., 20., 12.], - [12., -4., 28., 12.], - [20., -4., 36., 12.], - ]).unsqueeze(0).repeat(batch_size, 21, 1) + pad_bbox_flag = ( + torch.FloatTensor( + [ + [1], + [0], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) + pred_bboxes = ( + torch.FloatTensor( + [ + [-4.0, -4.0, 12.0, 12.0], + [4.0, -4.0, 20.0, 12.0], + [12.0, -4.0, 28.0, 12.0], + [20.0, -4.0, 36.0, 12.0], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 21, 1) + ) batch_assign_result = batch_atss_assigner.forward( - pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, - pad_bbox_flag) + pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, pad_bbox_flag + ) - assigned_labels = batch_assign_result['assigned_labels'] - assigned_bboxes = batch_assign_result['assigned_bboxes'] - assigned_scores = batch_assign_result['assigned_scores'] - fg_mask_pre_prior = batch_assign_result['fg_mask_pre_prior'] + assigned_labels = batch_assign_result["assigned_labels"] + assigned_bboxes = batch_assign_result["assigned_bboxes"] + assigned_scores = batch_assign_result["assigned_scores"] + fg_mask_pre_prior = batch_assign_result["fg_mask_pre_prior"] self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 84])) - self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, - 4])) - self.assertEqual(assigned_scores.shape, - torch.Size([batch_size, 84, num_classes])) + self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, 4])) + self.assertEqual( + assigned_scores.shape, torch.Size([batch_size, 84, num_classes]) + ) self.assertEqual(fg_mask_pre_prior.shape, torch.Size([batch_size, 84])) def test_batch_atss_assigner_with_empty_gt(self): @@ -62,43 +88,58 @@ def test_batch_atss_assigner_with_empty_gt(self): batch_size = 2 batch_atss_assigner = BatchATSSAssigner( topk=3, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), - num_classes=num_classes) - priors = torch.FloatTensor([ - [4., 4., 8., 8.], - [12., 4., 8., 8.], - [20., 4., 8., 8.], - [28., 4., 8., 8.], - ]).repeat(21, 1) + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + num_classes=num_classes, + ) + priors = torch.FloatTensor( + [ + [4.0, 4.0, 8.0, 8.0], + [12.0, 4.0, 8.0, 8.0], + [20.0, 4.0, 8.0, 8.0], + [28.0, 4.0, 8.0, 8.0], + ] + ).repeat(21, 1) num_level_bboxes = [64, 16, 4] - pad_bbox_flag = torch.FloatTensor([ - [1], - [0], - ]).unsqueeze(0).repeat(batch_size, 1, 1) - pred_bboxes = torch.FloatTensor([ - [-4., -4., 12., 12.], - [4., -4., 20., 12.], - [12., -4., 28., 12.], - [20., -4., 36., 12.], - ]).unsqueeze(0).repeat(batch_size, 21, 1) + pad_bbox_flag = ( + torch.FloatTensor( + [ + [1], + [0], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) + pred_bboxes = ( + torch.FloatTensor( + [ + [-4.0, -4.0, 12.0, 12.0], + [4.0, -4.0, 20.0, 12.0], + [12.0, -4.0, 28.0, 12.0], + [20.0, -4.0, 36.0, 12.0], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 21, 1) + ) gt_bboxes = torch.zeros(batch_size, 0, 4) gt_labels = torch.zeros(batch_size, 0, 1) batch_assign_result = batch_atss_assigner.forward( - pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, - pad_bbox_flag) + pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, pad_bbox_flag + ) - assigned_labels = batch_assign_result['assigned_labels'] - assigned_bboxes = batch_assign_result['assigned_bboxes'] - assigned_scores = batch_assign_result['assigned_scores'] - fg_mask_pre_prior = batch_assign_result['fg_mask_pre_prior'] + assigned_labels = batch_assign_result["assigned_labels"] + assigned_bboxes = batch_assign_result["assigned_bboxes"] + assigned_scores = batch_assign_result["assigned_scores"] + fg_mask_pre_prior = batch_assign_result["fg_mask_pre_prior"] self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 84])) - self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, - 4])) - self.assertEqual(assigned_scores.shape, - torch.Size([batch_size, 84, num_classes])) + self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, 4])) + self.assertEqual( + assigned_scores.shape, torch.Size([batch_size, 84, num_classes]) + ) self.assertEqual(fg_mask_pre_prior.shape, torch.Size([batch_size, 84])) def test_batch_atss_assigner_with_empty_boxs(self): @@ -107,40 +148,60 @@ def test_batch_atss_assigner_with_empty_boxs(self): batch_size = 2 batch_atss_assigner = BatchATSSAssigner( topk=3, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), - num_classes=num_classes) + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + num_classes=num_classes, + ) priors = torch.zeros(84, 4) - gt_bboxes = torch.FloatTensor([ - [0, 0, 60, 93], - [229, 0, 532, 157], - ]).unsqueeze(0).repeat(batch_size, 1, 1) - gt_labels = torch.LongTensor([ - [0], - [11], - ]).unsqueeze(0).repeat(batch_size, 1, 1) + gt_bboxes = ( + torch.FloatTensor( + [ + [0, 0, 60, 93], + [229, 0, 532, 157], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) + gt_labels = ( + torch.LongTensor( + [ + [0], + [11], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) num_level_bboxes = [64, 16, 4] - pad_bbox_flag = torch.FloatTensor([[1], [0]]).unsqueeze(0).repeat( - batch_size, 1, 1) - pred_bboxes = torch.FloatTensor([ - [-4., -4., 12., 12.], - [4., -4., 20., 12.], - [12., -4., 28., 12.], - [20., -4., 36., 12.], - ]).unsqueeze(0).repeat(batch_size, 21, 1) + pad_bbox_flag = ( + torch.FloatTensor([[1], [0]]).unsqueeze(0).repeat(batch_size, 1, 1) + ) + pred_bboxes = ( + torch.FloatTensor( + [ + [-4.0, -4.0, 12.0, 12.0], + [4.0, -4.0, 20.0, 12.0], + [12.0, -4.0, 28.0, 12.0], + [20.0, -4.0, 36.0, 12.0], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 21, 1) + ) batch_assign_result = batch_atss_assigner.forward( - pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, - pad_bbox_flag) - assigned_labels = batch_assign_result['assigned_labels'] - assigned_bboxes = batch_assign_result['assigned_bboxes'] - assigned_scores = batch_assign_result['assigned_scores'] - fg_mask_pre_prior = batch_assign_result['fg_mask_pre_prior'] + pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, pad_bbox_flag + ) + assigned_labels = batch_assign_result["assigned_labels"] + assigned_bboxes = batch_assign_result["assigned_bboxes"] + assigned_scores = batch_assign_result["assigned_scores"] + fg_mask_pre_prior = batch_assign_result["fg_mask_pre_prior"] self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 84])) - self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, - 4])) - self.assertEqual(assigned_scores.shape, - torch.Size([batch_size, 84, num_classes])) + self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, 4])) + self.assertEqual( + assigned_scores.shape, torch.Size([batch_size, 84, num_classes]) + ) self.assertEqual(fg_mask_pre_prior.shape, torch.Size([batch_size, 84])) def test_batch_atss_assigner_with_empty_boxes_and_gt(self): @@ -150,8 +211,9 @@ def test_batch_atss_assigner_with_empty_boxes_and_gt(self): batch_size = 2 batch_atss_assigner = BatchATSSAssigner( topk=3, - iou_calculator=dict(type='mmdet.BboxOverlaps2D'), - num_classes=num_classes) + iou_calculator=dict(type="mmdet.BboxOverlaps2D"), + num_classes=num_classes, + ) priors = torch.zeros(84, 4) gt_bboxes = torch.zeros(batch_size, 0, 4) gt_labels = torch.zeros(batch_size, 0, 1) @@ -160,16 +222,16 @@ def test_batch_atss_assigner_with_empty_boxes_and_gt(self): pred_bboxes = torch.zeros(batch_size, 0, 4) batch_assign_result = batch_atss_assigner.forward( - pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, - pad_bbox_flag) - assigned_labels = batch_assign_result['assigned_labels'] - assigned_bboxes = batch_assign_result['assigned_bboxes'] - assigned_scores = batch_assign_result['assigned_scores'] - fg_mask_pre_prior = batch_assign_result['fg_mask_pre_prior'] + pred_bboxes, priors, num_level_bboxes, gt_labels, gt_bboxes, pad_bbox_flag + ) + assigned_labels = batch_assign_result["assigned_labels"] + assigned_bboxes = batch_assign_result["assigned_bboxes"] + assigned_scores = batch_assign_result["assigned_scores"] + fg_mask_pre_prior = batch_assign_result["fg_mask_pre_prior"] self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 84])) - self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, - 4])) - self.assertEqual(assigned_scores.shape, - torch.Size([batch_size, 84, num_classes])) + self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, 4])) + self.assertEqual( + assigned_scores.shape, torch.Size([batch_size, 84, num_classes]) + ) self.assertEqual(fg_mask_pre_prior.shape, torch.Size([batch_size, 84])) diff --git a/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_task_aligned_assigner.py b/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_task_aligned_assigner.py index fe474b53..f102a9f3 100644 --- a/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_task_aligned_assigner.py +++ b/mmyolo/tests/test_models/test_task_modules/test_assigners/test_batch_task_aligned_assigner.py @@ -7,50 +7,71 @@ class TestBatchTaskAlignedAssigner(TestCase): - def test_batch_task_aligned_assigner(self): batch_size = 2 num_classes = 4 assigner = BatchTaskAlignedAssigner( - num_classes=num_classes, alpha=1, beta=6, topk=13, eps=1e-9) - pred_scores = torch.FloatTensor([ - [0.1, 0.2], - [0.2, 0.3], - [0.3, 0.4], - [0.4, 0.5], - ]).unsqueeze(0).repeat(batch_size, 21, 1) - priors = torch.FloatTensor([ - [0, 0, 4., 4.], - [0, 0, 12., 4.], - [0, 0, 20., 4.], - [0, 0, 28., 4.], - ]).repeat(21, 1) - gt_bboxes = torch.FloatTensor([ - [0, 0, 60, 93], - [229, 0, 532, 157], - ]).unsqueeze(0).repeat(batch_size, 1, 1) - gt_labels = torch.LongTensor([[0], [1] - ]).unsqueeze(0).repeat(batch_size, 1, 1) - pad_bbox_flag = torch.FloatTensor([[1], [0]]).unsqueeze(0).repeat( - batch_size, 1, 1) - pred_bboxes = torch.FloatTensor([ - [-4., -4., 12., 12.], - [4., -4., 20., 12.], - [12., -4., 28., 12.], - [20., -4., 36., 12.], - ]).unsqueeze(0).repeat(batch_size, 21, 1) + num_classes=num_classes, alpha=1, beta=6, topk=13, eps=1e-9 + ) + pred_scores = ( + torch.FloatTensor( + [ + [0.1, 0.2], + [0.2, 0.3], + [0.3, 0.4], + [0.4, 0.5], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 21, 1) + ) + priors = torch.FloatTensor( + [ + [0, 0, 4.0, 4.0], + [0, 0, 12.0, 4.0], + [0, 0, 20.0, 4.0], + [0, 0, 28.0, 4.0], + ] + ).repeat(21, 1) + gt_bboxes = ( + torch.FloatTensor( + [ + [0, 0, 60, 93], + [229, 0, 532, 157], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 1, 1) + ) + gt_labels = torch.LongTensor([[0], [1]]).unsqueeze(0).repeat(batch_size, 1, 1) + pad_bbox_flag = ( + torch.FloatTensor([[1], [0]]).unsqueeze(0).repeat(batch_size, 1, 1) + ) + pred_bboxes = ( + torch.FloatTensor( + [ + [-4.0, -4.0, 12.0, 12.0], + [4.0, -4.0, 20.0, 12.0], + [12.0, -4.0, 28.0, 12.0], + [20.0, -4.0, 36.0, 12.0], + ] + ) + .unsqueeze(0) + .repeat(batch_size, 21, 1) + ) - assign_result = assigner.forward(pred_bboxes, pred_scores, priors, - gt_labels, gt_bboxes, pad_bbox_flag) + assign_result = assigner.forward( + pred_bboxes, pred_scores, priors, gt_labels, gt_bboxes, pad_bbox_flag + ) - assigned_labels = assign_result['assigned_labels'] - assigned_bboxes = assign_result['assigned_bboxes'] - assigned_scores = assign_result['assigned_scores'] - fg_mask_pre_prior = assign_result['fg_mask_pre_prior'] + assigned_labels = assign_result["assigned_labels"] + assigned_bboxes = assign_result["assigned_bboxes"] + assigned_scores = assign_result["assigned_scores"] + fg_mask_pre_prior = assign_result["fg_mask_pre_prior"] self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 84])) - self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, - 4])) - self.assertEqual(assigned_scores.shape, - torch.Size([batch_size, 84, num_classes])) + self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 84, 4])) + self.assertEqual( + assigned_scores.shape, torch.Size([batch_size, 84, num_classes]) + ) self.assertEqual(fg_mask_pre_prior.shape, torch.Size([batch_size, 84])) diff --git a/mmyolo/tests/test_models/test_task_modules/test_coders/test_distance_point_bbox_coder.py b/mmyolo/tests/test_models/test_task_modules/test_coders/test_distance_point_bbox_coder.py index 10b0215c..bad983a5 100644 --- a/mmyolo/tests/test_models/test_task_modules/test_coders/test_distance_point_bbox_coder.py +++ b/mmyolo/tests/test_models/test_task_modules/test_coders/test_distance_point_bbox_coder.py @@ -7,18 +7,23 @@ class TestDistancePointBBoxCoder(TestCase): - def test_decoder(self): coder = DistancePointBBoxCoder() - points = torch.Tensor([[74., 61.], [-29., 106.], [138., 61.], - [29., 170.]]) - pred_bboxes = torch.Tensor([[0, -1, 3, 3], [-1, -7, -4.8, 9], - [-23, -1, 12, 1], [14.5, -13, 10, 18.3]]) - expected_distance = torch.Tensor([[74, 63, 80, 67], - [-25, 134, -48.2, 142], - [276, 67, 210, 67], - [-58, 248, 89, 279.8]]) + points = torch.Tensor( + [[74.0, 61.0], [-29.0, 106.0], [138.0, 61.0], [29.0, 170.0]] + ) + pred_bboxes = torch.Tensor( + [[0, -1, 3, 3], [-1, -7, -4.8, 9], [-23, -1, 12, 1], [14.5, -13, 10, 18.3]] + ) + expected_distance = torch.Tensor( + [ + [74, 63, 80, 67], + [-25, 134, -48.2, 142], + [276, 67, 210, 67], + [-58, 248, 89, 279.8], + ] + ) strides = torch.Tensor([2, 4, 6, 6]) out_distance = coder.decode(points, pred_bboxes, strides) assert expected_distance.allclose(out_distance) diff --git a/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolov5_bbox_coder.py b/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolov5_bbox_coder.py index e1d4ebe1..0f437c1c 100644 --- a/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolov5_bbox_coder.py +++ b/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolov5_bbox_coder.py @@ -7,22 +7,34 @@ class TestYOLOv5Coder(TestCase): - def test_decoder(self): coder = YOLOv5BBoxCoder() - priors = torch.Tensor([[10., 10., 20., 20.], [10., 8., 10., 10.], - [15., 8., 20., 3.], [2., 5., 5., 8.]]) - pred_bboxes = torch.Tensor([[0.0000, 0.0000, 1.0000, 1.0000], - [0.1409, 0.1409, 2.8591, 2.8591], - [0.0000, 0.3161, 4.1945, 0.6839], - [1.0000, 5.0000, 9.0000, 5.0000]]) + priors = torch.Tensor( + [ + [10.0, 10.0, 20.0, 20.0], + [10.0, 8.0, 10.0, 10.0], + [15.0, 8.0, 20.0, 3.0], + [2.0, 5.0, 5.0, 8.0], + ] + ) + pred_bboxes = torch.Tensor( + [ + [0.0000, 0.0000, 1.0000, 1.0000], + [0.1409, 0.1409, 2.8591, 2.8591], + [0.0000, 0.3161, 4.1945, 0.6839], + [1.0000, 5.0000, 9.0000, 5.0000], + ] + ) strides = torch.Tensor([2, 4, 8, 8]) expected_decode_bboxes = torch.Tensor( - [[4.3111, 4.3111, 25.6889, 25.6889], - [10.2813, 5.7033, 10.2813, 12.8594], - [7.7949, 11.1710, 27.2051, 2.3369], - [1.1984, 8.4730, 13.1955, 20.3129]]) + [ + [4.3111, 4.3111, 25.6889, 25.6889], + [10.2813, 5.7033, 10.2813, 12.8594], + [7.7949, 11.1710, 27.2051, 2.3369], + [1.1984, 8.4730, 13.1955, 20.3129], + ] + ) out = coder.decode(priors, pred_bboxes, strides) assert expected_decode_bboxes.allclose(out, atol=1e-04) diff --git a/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolox_bbox_coder.py b/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolox_bbox_coder.py index 00d6c316..ee38c15a 100644 --- a/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolox_bbox_coder.py +++ b/mmyolo/tests/test_models/test_task_modules/test_coders/test_yolox_bbox_coder.py @@ -7,21 +7,27 @@ class TestYOLOv5Coder(TestCase): - def test_decoder(self): coder = YOLOXBBoxCoder() - priors = torch.Tensor([[10., 10.], [8., 8.], [15., 8.], [2., 5.]]) - pred_bboxes = torch.Tensor([[0.0000, 0.0000, 1.0000, 1.0000], - [0.0409, 0.1409, 0.8591, 0.8591], - [0.0000, 0.3161, 0.1945, 0.6839], - [1.0000, 5.0000, 0.2000, 0.6000]]) + priors = torch.Tensor([[10.0, 10.0], [8.0, 8.0], [15.0, 8.0], [2.0, 5.0]]) + pred_bboxes = torch.Tensor( + [ + [0.0000, 0.0000, 1.0000, 1.0000], + [0.0409, 0.1409, 0.8591, 0.8591], + [0.0000, 0.3161, 0.1945, 0.6839], + [1.0000, 5.0000, 0.2000, 0.6000], + ] + ) strides = torch.Tensor([2, 4, 6, 6]) expected_decode_bboxes = torch.Tensor( - [[7.2817, 7.2817, 12.7183, 12.7183], - [3.4415, 3.8415, 12.8857, 13.2857], - [11.3559, 3.9518, 18.6441, 15.8414], - [4.3358, 29.5336, 11.6642, 40.4664]]) + [ + [7.2817, 7.2817, 12.7183, 12.7183], + [3.4415, 3.8415, 12.8857, 13.2857], + [11.3559, 3.9518, 18.6441, 15.8414], + [4.3358, 29.5336, 11.6642, 40.4664], + ] + ) out = coder.decode(priors, pred_bboxes, strides) assert expected_decode_bboxes.allclose(out, atol=1e-04) diff --git a/mmyolo/tests/test_utils/test_collect_env.py b/mmyolo/tests/test_utils/test_collect_env.py index 913f46fa..007dc38a 100644 --- a/mmyolo/tests/test_utils/test_collect_env.py +++ b/mmyolo/tests/test_utils/test_collect_env.py @@ -10,24 +10,29 @@ class TestCollectEnv(TestCase): - def test_collect_env(self): env_info = collect_env() print(env_info) expected_keys = [ - 'sys.platform', 'Python', 'CUDA available', 'PyTorch', - 'PyTorch compiling details', 'OpenCV', 'MMEngine', 'GCC' + "sys.platform", + "Python", + "CUDA available", + "PyTorch", + "PyTorch compiling details", + "OpenCV", + "MMEngine", + "GCC", ] for key in expected_keys: assert key in env_info - if env_info['CUDA available']: - for key in ['CUDA_HOME', 'NVCC']: + if env_info["CUDA available"]: + for key in ["CUDA_HOME", "NVCC"]: assert key in env_info - assert env_info['sys.platform'] == sys.platform - assert env_info['Python'] == sys.version.replace('\n', '') + assert env_info["sys.platform"] == sys.platform + assert env_info["Python"] == sys.version.replace("\n", "") - assert env_info['MMEngine'] == mmengine.__version__ - assert env_info['MMCV'] == mmcv.__version__ - assert env_info['MMDetection'] == mmdet.__version__ + assert env_info["MMEngine"] == mmengine.__version__ + assert env_info["MMCV"] == mmcv.__version__ + assert env_info["MMDetection"] == mmdet.__version__ diff --git a/mmyolo/tests/test_utils/test_setup_env.py b/mmyolo/tests/test_utils/test_setup_env.py index e6bd6890..1b1c77e6 100644 --- a/mmyolo/tests/test_utils/test_setup_env.py +++ b/mmyolo/tests/test_utils/test_setup_env.py @@ -9,31 +9,30 @@ class TestSetupEnv(TestCase): - def test_register_all_modules(self): from mmyolo.registry import DATASETS # not init default scope - sys.modules.pop('mmyolo.datasets', None) - sys.modules.pop('mmyolo.datasets.yolov5_coco', None) - DATASETS._module_dict.pop('YOLOv5CocoDataset', None) - self.assertFalse('YOLOv5CocoDataset' in DATASETS.module_dict) + sys.modules.pop("mmyolo.datasets", None) + sys.modules.pop("mmyolo.datasets.yolov5_coco", None) + DATASETS._module_dict.pop("YOLOv5CocoDataset", None) + self.assertFalse("YOLOv5CocoDataset" in DATASETS.module_dict) register_all_modules(init_default_scope=False) - self.assertTrue('YOLOv5CocoDataset' in DATASETS.module_dict) + self.assertTrue("YOLOv5CocoDataset" in DATASETS.module_dict) # init default scope - sys.modules.pop('mmyolo.datasets', None) - sys.modules.pop('mmyolo.datasets.yolov5_coco', None) - DATASETS._module_dict.pop('YOLOv5CocoDataset', None) - self.assertFalse('YOLOv5CocoDataset' in DATASETS.module_dict) + sys.modules.pop("mmyolo.datasets", None) + sys.modules.pop("mmyolo.datasets.yolov5_coco", None) + DATASETS._module_dict.pop("YOLOv5CocoDataset", None) + self.assertFalse("YOLOv5CocoDataset" in DATASETS.module_dict) register_all_modules(init_default_scope=True) - self.assertTrue('YOLOv5CocoDataset' in DATASETS.module_dict) - self.assertEqual(DefaultScope.get_current_instance().scope_name, - 'mmyolo') + self.assertTrue("YOLOv5CocoDataset" in DATASETS.module_dict) + self.assertEqual(DefaultScope.get_current_instance().scope_name, "mmyolo") # init default scope when another scope is init - name = f'test-{datetime.datetime.now()}' - DefaultScope.get_instance(name, scope_name='test') + name = f"test-{datetime.datetime.now()}" + DefaultScope.get_instance(name, scope_name="test") with self.assertWarnsRegex( - Warning, 'The current default scope "test" is not "mmyolo"'): + Warning, 'The current default scope "test" is not "mmyolo"' + ): register_all_modules(init_default_scope=True) diff --git a/mmyolo/tools/analysis_tools/benchmark.py b/mmyolo/tools/analysis_tools/benchmark.py index d4db67d7..36c422fc 100644 --- a/mmyolo/tools/analysis_tools/benchmark.py +++ b/mmyolo/tools/analysis_tools/benchmark.py @@ -20,70 +20,73 @@ # TODO: Refactoring and improving def parse_args(): - parser = argparse.ArgumentParser(description='MMYOLO benchmark a model') - parser.add_argument('config', help='test config file path') - parser.add_argument('checkpoint', help='checkpoint file') + parser = argparse.ArgumentParser(description="MMYOLO benchmark a model") + parser.add_argument("config", help="test config file path") + parser.add_argument("checkpoint", help="checkpoint file") parser.add_argument( - '--repeat-num', + "--repeat-num", type=int, default=1, - help='number of repeat times of measurement for averaging the results') + help="number of repeat times of measurement for averaging the results", + ) + parser.add_argument("--max-iter", type=int, default=2000, help="num of max iter") parser.add_argument( - '--max-iter', type=int, default=2000, help='num of max iter') + "--log-interval", type=int, default=50, help="interval of logging" + ) parser.add_argument( - '--log-interval', type=int, default=50, help='interval of logging') + "--work-dir", + help="the directory to save the file containing " "benchmark metrics", + ) parser.add_argument( - '--work-dir', - help='the directory to save the file containing ' - 'benchmark metrics') + "--fuse-conv-bn", + action="store_true", + help="Whether to fuse conv and bn, this will slightly increase" + "the inference speed", + ) parser.add_argument( - '--fuse-conv-bn', - action='store_true', - help='Whether to fuse conv and bn, this will slightly increase' - 'the inference speed') - parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) parser.add_argument( - '--launcher', - choices=['none', 'pytorch', 'slurm', 'mpi'], - default='none', - help='job launcher') - parser.add_argument('--local_rank', type=int, default=0) + "--launcher", + choices=["none", "pytorch", "slurm", "mpi"], + default="none", + help="job launcher", + ) + parser.add_argument("--local_rank", type=int, default=0) args = parser.parse_args() - if 'LOCAL_RANK' not in os.environ: - os.environ['LOCAL_RANK'] = str(args.local_rank) + if "LOCAL_RANK" not in os.environ: + os.environ["LOCAL_RANK"] = str(args.local_rank) return args -def measure_inference_speed(cfg, checkpoint, max_iter, log_interval, - is_fuse_conv_bn): - env_cfg = cfg.get('env_cfg') - if env_cfg.get('cudnn_benchmark'): +def measure_inference_speed(cfg, checkpoint, max_iter, log_interval, is_fuse_conv_bn): + env_cfg = cfg.get("env_cfg") + if env_cfg.get("cudnn_benchmark"): torch.backends.cudnn.benchmark = True - mp_cfg: dict = env_cfg.get('mp_cfg', {}) + mp_cfg: dict = env_cfg.get("mp_cfg", {}) set_multi_processing(**mp_cfg, distributed=cfg.distributed) # Because multiple processes will occupy additional CPU resources, # FPS statistics will be more unstable when num_workers is not 0. # It is reasonable to set num_workers to 0. dataloader_cfg = cfg.test_dataloader - dataloader_cfg['num_workers'] = 0 - dataloader_cfg['batch_size'] = 1 - dataloader_cfg['persistent_workers'] = False + dataloader_cfg["num_workers"] = 0 + dataloader_cfg["batch_size"] = 1 + dataloader_cfg["persistent_workers"] = False data_loader = Runner.build_dataloader(dataloader_cfg) # build the model and load checkpoint model = MODELS.build(cfg.model) - load_checkpoint(model, checkpoint, map_location='cpu') + load_checkpoint(model, checkpoint, map_location="cpu") model = model.cuda() model.eval() @@ -109,25 +112,26 @@ def measure_inference_speed(cfg, checkpoint, max_iter, log_interval, if (i + 1) % log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print_log( - f'Done image [{i + 1:<3}/ {max_iter}], ' - f'fps: {fps:.1f} img / s, ' - f'times per image: {1000 / fps:.1f} ms / img', 'current') + f"Done image [{i + 1:<3}/ {max_iter}], " + f"fps: {fps:.1f} img / s, " + f"times per image: {1000 / fps:.1f} ms / img", + "current", + ) if (i + 1) == max_iter: fps = (i + 1 - num_warmup) / pure_inf_time print_log( - f'Overall fps: {fps:.1f} img / s, ' - f'times per image: {1000 / fps:.1f} ms / img', 'current') + f"Overall fps: {fps:.1f} img / s, " + f"times per image: {1000 / fps:.1f} ms / img", + "current", + ) break return fps -def repeat_measure_inference_speed(cfg, - checkpoint, - max_iter, - log_interval, - is_fuse_conv_bn, - repeat_num=1): +def repeat_measure_inference_speed( + cfg, checkpoint, max_iter, log_interval, is_fuse_conv_bn, repeat_num=1 +): assert repeat_num >= 1 fps_list = [] @@ -136,20 +140,22 @@ def repeat_measure_inference_speed(cfg, cp_cfg = copy.deepcopy(cfg) fps_list.append( - measure_inference_speed(cp_cfg, checkpoint, max_iter, log_interval, - is_fuse_conv_bn)) + measure_inference_speed( + cp_cfg, checkpoint, max_iter, log_interval, is_fuse_conv_bn + ) + ) if repeat_num > 1: fps_list_ = [round(fps, 1) for fps in fps_list] times_pre_image_list_ = [round(1000 / fps, 1) for fps in fps_list] mean_fps_ = sum(fps_list_) / len(fps_list_) - mean_times_pre_image_ = sum(times_pre_image_list_) / len( - times_pre_image_list_) + mean_times_pre_image_ = sum(times_pre_image_list_) / len(times_pre_image_list_) print_log( - f'Overall fps: {fps_list_}[{mean_fps_:.1f}] img / s, ' - f'times per image: ' - f'{times_pre_image_list_}[{mean_times_pre_image_:.1f}] ms / img', - 'current') + f"Overall fps: {fps_list_}[{mean_fps_:.1f}] img / s, " + f"times per image: " + f"{times_pre_image_list_}[{mean_times_pre_image_:.1f}] ms / img", + "current", + ) return fps_list return fps_list[0] @@ -164,25 +170,31 @@ def main(): cfg.merge_from_dict(args.cfg_options) distributed = False - if args.launcher != 'none': - init_dist(args.launcher, **cfg.get('env_cfg', {}).get('dist_cfg', {})) + if args.launcher != "none": + init_dist(args.launcher, **cfg.get("env_cfg", {}).get("dist_cfg", {})) distributed = True - assert get_world_size( - ) == 1, 'Inference benchmark does not allow distributed multi-GPU' + assert ( + get_world_size() == 1 + ), "Inference benchmark does not allow distributed multi-GPU" cfg.distributed = distributed log_file = None if args.work_dir: - log_file = os.path.join(args.work_dir, 'benchmark.log') + log_file = os.path.join(args.work_dir, "benchmark.log") mkdir_or_exist(args.work_dir) - MMLogger.get_instance('mmyolo', log_file=log_file, log_level='INFO') + MMLogger.get_instance("mmyolo", log_file=log_file, log_level="INFO") - repeat_measure_inference_speed(cfg, args.checkpoint, args.max_iter, - args.log_interval, args.fuse_conv_bn, - args.repeat_num) + repeat_measure_inference_speed( + cfg, + args.checkpoint, + args.max_iter, + args.log_interval, + args.fuse_conv_bn, + args.repeat_num, + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/analysis_tools/browse_coco_json.py b/mmyolo/tools/analysis_tools/browse_coco_json.py index 71a2fc2a..304102cd 100644 --- a/mmyolo/tools/analysis_tools/browse_coco_json.py +++ b/mmyolo/tools/analysis_tools/browse_coco_json.py @@ -14,11 +14,11 @@ def show_coco_json(args): coco = COCO(osp.join(args.data_root, args.ann_file)) else: coco = COCO(args.ann_file) - print(f'Total number of images:{len(coco.getImgIds())}') + print(f"Total number of images:{len(coco.getImgIds())}") categories = coco.loadCats(coco.getCatIds()) - category_names = [category['name'] for category in categories] - print(f'Total number of Categories : {len(category_names)}') - print('Categories: \n{}\n'.format(' '.join(category_names))) + category_names = [category["name"] for category in categories] + print(f"Total number of Categories : {len(category_names)}") + print("Categories: \n{}\n".format(" ".join(category_names))) if args.category_names is None: category_ids = [] @@ -34,13 +34,13 @@ def show_coco_json(args): for i in range(len(image_ids)): image_data = coco.loadImgs(image_ids[i])[0] if args.data_root is not None: - image_path = osp.join(args.data_root, args.img_dir, - image_data['file_name']) + image_path = osp.join(args.data_root, args.img_dir, image_data["file_name"]) else: - image_path = osp.join(args.img_dir, image_data['file_name']) + image_path = osp.join(args.img_dir, image_data["file_name"]) annotation_ids = coco.getAnnIds( - imgIds=image_data['id'], catIds=category_ids, iscrowd=0) + imgIds=image_data["id"], catIds=category_ids, iscrowd=0 + ) annotations = coco.loadAnns(annotation_ids) image = cv2.imread(image_path) @@ -79,10 +79,14 @@ def show_bbox_only(coco, anns, show_label_bbox=True, is_filling=True): colors = [] for ann in anns: - color = image2color[ann['category_id']] - bbox_x, bbox_y, bbox_w, bbox_h = ann['bbox'] - poly = [[bbox_x, bbox_y], [bbox_x, bbox_y + bbox_h], - [bbox_x + bbox_w, bbox_y + bbox_h], [bbox_x + bbox_w, bbox_y]] + color = image2color[ann["category_id"]] + bbox_x, bbox_y, bbox_w, bbox_h = ann["bbox"] + poly = [ + [bbox_x, bbox_y], + [bbox_x, bbox_y + bbox_h], + [bbox_x + bbox_w, bbox_y + bbox_h], + [bbox_x + bbox_w, bbox_y], + ] polygons.append(Polygon(np.array(poly).reshape((4, 2)))) colors.append(color) @@ -94,46 +98,49 @@ def show_bbox_only(coco, anns, show_label_bbox=True, is_filling=True): ax.text( bbox_x, bbox_y, - '%s' % (coco.loadCats(ann['category_id'])[0]['name']), - color='white', - bbox=label_bbox) + "%s" % (coco.loadCats(ann["category_id"])[0]["name"]), + color="white", + bbox=label_bbox, + ) if is_filling: - p = PatchCollection( - polygons, facecolor=colors, linewidths=0, alpha=0.4) + p = PatchCollection(polygons, facecolor=colors, linewidths=0, alpha=0.4) ax.add_collection(p) - p = PatchCollection( - polygons, facecolor='none', edgecolors=colors, linewidths=2) + p = PatchCollection(polygons, facecolor="none", edgecolors=colors, linewidths=2) ax.add_collection(p) def parse_args(): - parser = argparse.ArgumentParser(description='Show coco json file') - parser.add_argument('--data-root', default=None, help='dataset root') + parser = argparse.ArgumentParser(description="Show coco json file") + parser.add_argument("--data-root", default=None, help="dataset root") parser.add_argument( - '--img-dir', default='data/coco/train2017', help='image folder path') + "--img-dir", default="data/coco/train2017", help="image folder path" + ) parser.add_argument( - '--ann-file', - default='data/coco/annotations/instances_train2017.json', - help='ann file path') + "--ann-file", + default="data/coco/annotations/instances_train2017.json", + help="ann file path", + ) parser.add_argument( - '--wait-time', type=float, default=2, help='the interval of show (s)') + "--wait-time", type=float, default=2, help="the interval of show (s)" + ) parser.add_argument( - '--disp-all', - action='store_true', - help='Whether to display all types of data, ' - 'such as bbox and mask.' - ' Default is to display only bbox') + "--disp-all", + action="store_true", + help="Whether to display all types of data, " + "such as bbox and mask." + " Default is to display only bbox", + ) parser.add_argument( - '--category-names', + "--category-names", type=str, default=None, - nargs='+', - help='Display category-specific data, e.g., "bicycle", "person"') + nargs="+", + help='Display category-specific data, e.g., "bicycle", "person"', + ) parser.add_argument( - '--shuffle', - action='store_true', - help='Whether to display in disorder') + "--shuffle", action="store_true", help="Whether to display in disorder" + ) args = parser.parse_args() return args @@ -143,5 +150,5 @@ def main(): show_coco_json(args) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/analysis_tools/browse_dataset.py b/mmyolo/tools/analysis_tools/browse_dataset.py index 2a76ebce..b4b72c28 100644 --- a/mmyolo/tools/analysis_tools/browse_dataset.py +++ b/mmyolo/tools/analysis_tools/browse_dataset.py @@ -20,65 +20,67 @@ # TODO: Support for printing the change in key of results def parse_args(): - parser = argparse.ArgumentParser(description='Browse a dataset') - parser.add_argument('config', help='train config file path') + parser = argparse.ArgumentParser(description="Browse a dataset") + parser.add_argument("config", help="train config file path") parser.add_argument( - '--phase', - '-p', - default='train', + "--phase", + "-p", + default="train", type=str, - choices=['train', 'test', 'val'], + choices=["train", "test", "val"], help='phase of dataset to visualize, accept "train" "test" and "val".' - ' Defaults to "train".') + ' Defaults to "train".', + ) parser.add_argument( - '--mode', - '-m', - default='transformed', + "--mode", + "-m", + default="transformed", type=str, - choices=['original', 'transformed', 'pipeline'], - help='display mode; display original pictures or ' + choices=["original", "transformed", "pipeline"], + help="display mode; display original pictures or " 'transformed pictures or comparison pictures. "original" ' 'means show images load from disk; "transformed" means ' 'to show images after transformed; "pipeline" means show all ' - 'the intermediate images. Defaults to "transformed".') + 'the intermediate images. Defaults to "transformed".', + ) parser.add_argument( - '--out-dir', + "--out-dir", default=None, type=str, - help='If there is no display interface, you can save it.') - parser.add_argument('--not-show', default=False, action='store_true') + help="If there is no display interface, you can save it.", + ) + parser.add_argument("--not-show", default=False, action="store_true") parser.add_argument( - '--show-number', - '-n', + "--show-number", + "-n", type=int, default=sys.maxsize, - help='number of images selected to visualize, ' - 'must bigger than 0. if the number is bigger than length ' - 'of dataset, show all the images in dataset; ' - 'default "sys.maxsize", show all images in dataset') + help="number of images selected to visualize, " + "must bigger than 0. if the number is bigger than length " + "of dataset, show all the images in dataset; " + 'default "sys.maxsize", show all images in dataset', + ) parser.add_argument( - '--show-interval', - '-i', - type=float, - default=3, - help='the interval of show (s)') + "--show-interval", "-i", type=float, default=3, help="the interval of show (s)" + ) parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) args = parser.parse_args() return args -def _get_adaptive_scale(img_shape: Tuple[int, int], - min_scale: float = 0.3, - max_scale: float = 3.0) -> float: +def _get_adaptive_scale( + img_shape: Tuple[int, int], min_scale: float = 0.3, max_scale: float = 3.0 +) -> float: """Get adaptive scale according to image shape. The target scale depends on the the short edge length of the image. If the @@ -94,7 +96,7 @@ def _get_adaptive_scale(img_shape: Tuple[int, int], int: The adaptive scale. """ short_edge_length = min(img_shape) - scale = short_edge_length / 224. + scale = short_edge_length / 224.0 return min(max(scale, min_scale), max_scale) @@ -121,10 +123,10 @@ def make_grid(imgs, names): pad_width, pad_width, cv2.BORDER_CONSTANT, - value=(255, 255, 255)) + value=(255, 255, 255), + ) texts.append(f'{"execution: "}{i}\n{names[i]}\n{ori_shapes[i]}') - text_positions.append( - [start_x + img.shape[1] // 2 + pad_width, max_height]) + text_positions.append([start_x + img.shape[1] // 2 + pad_width, max_height]) start_x += img.shape[1] + horizontal_gap display_img = np.concatenate(imgs, axis=1) @@ -134,9 +136,10 @@ def make_grid(imgs, names): texts, positions=np.array(text_positions), font_sizes=img_scale * 7, - colors='black', - horizontal_alignments='center', - font_families='monospace') + colors="black", + horizontal_alignments="center", + font_families="monospace", + ) return visualizer.get_image() @@ -151,14 +154,11 @@ def __init__(self, transforms, intermediate_imgs): self.intermediate_imgs = intermediate_imgs def __call__(self, data): - if 'img' in data: - self.intermediate_imgs.append({ - 'name': 'original', - 'img': data['img'].copy() - }) - self.ptransforms = [ - self.transforms[i] for i in range(len(self.transforms) - 1) - ] + if "img" in data: + self.intermediate_imgs.append( + {"name": "original", "img": data["img"].copy()} + ) + self.ptransforms = [self.transforms[i] for i in range(len(self.transforms) - 1)] for t in self.ptransforms: data = t(data) # Keep the same meta_keys in the PackDetInputs @@ -166,13 +166,13 @@ def __call__(self, data): data_sample = self.transforms[-1](data) if data is None: return None - if 'img' in data: - self.intermediate_imgs.append({ - 'name': - t.__class__.__name__, - 'dataset_sample': - data_sample['data_samples'] - }) + if "img" in data: + self.intermediate_imgs.append( + { + "name": t.__class__.__name__, + "dataset_sample": data_sample["data_samples"], + } + ) return data @@ -185,20 +185,19 @@ def main(): # register all modules in mmyolo into the registries register_all_modules() - dataset_cfg = cfg.get(args.phase + '_dataloader').get('dataset') + dataset_cfg = cfg.get(args.phase + "_dataloader").get("dataset") dataset = DATASETS.build(dataset_cfg) visualizer = VISUALIZERS.build(cfg.visualizer) visualizer.dataset_meta = dataset.metainfo intermediate_imgs = [] - if not hasattr(dataset, 'pipeline'): + if not hasattr(dataset, "pipeline"): # for dataset_wrapper dataset = dataset.dataset # TODO: The dataset wrapper occasion is not considered here - dataset.pipeline = InspectCompose(dataset.pipeline.transforms, - intermediate_imgs) + dataset.pipeline = InspectCompose(dataset.pipeline.transforms, intermediate_imgs) # init visualization image number assert args.show_number > 0 @@ -207,56 +206,54 @@ def main(): progress_bar = ProgressBar(display_number) for i, item in zip(range(display_number), dataset): image_i = [] - result_i = [result['dataset_sample'] for result in intermediate_imgs] + result_i = [result["dataset_sample"] for result in intermediate_imgs] for k, datasample in enumerate(result_i): image = datasample.img gt_instances = datasample.gt_instances image = image[..., [2, 1, 0]] # bgr to rgb - gt_bboxes = gt_instances.get('bboxes', None) + gt_bboxes = gt_instances.get("bboxes", None) if gt_bboxes is not None and isinstance(gt_bboxes, BaseBoxes): gt_instances.bboxes = gt_bboxes.tensor - gt_masks = gt_instances.get('masks', None) + gt_masks = gt_instances.get("masks", None) if gt_masks is not None: masks = mask2ndarray(gt_masks) gt_instances.masks = masks.astype(bool) datasample.gt_instances = gt_instances # get filename from dataset or just use index as filename visualizer.add_datasample( - 'result', - image, - datasample, - draw_pred=False, - draw_gt=True, - show=False) + "result", image, datasample, draw_pred=False, draw_gt=True, show=False + ) image_show = visualizer.get_image() image_i.append(image_show) - if args.mode == 'original': + if args.mode == "original": image = image_i[0] - elif args.mode == 'transformed': + elif args.mode == "transformed": image = image_i[-1] else: - image = make_grid([result for result in image_i], - [result['name'] for result in intermediate_imgs]) + image = make_grid( + [result for result in image_i], + [result["name"] for result in intermediate_imgs], + ) - if hasattr(datasample, 'img_path'): + if hasattr(datasample, "img_path"): filename = osp.basename(datasample.img_path) else: # some dataset have not image path - filename = f'{i}.jpg' - out_file = osp.join(args.out_dir, - filename) if args.out_dir is not None else None + filename = f"{i}.jpg" + out_file = ( + osp.join(args.out_dir, filename) if args.out_dir is not None else None + ) if out_file is not None: mmcv.imwrite(image[..., ::-1], out_file) if not args.not_show: - visualizer.show( - image, win_name=filename, wait_time=args.show_interval) + visualizer.show(image, win_name=filename, wait_time=args.show_interval) intermediate_imgs.clear() progress_bar.update() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/analysis_tools/dataset_analysis.py b/mmyolo/tools/analysis_tools/dataset_analysis.py index a1fad78e..8ff259a3 100644 --- a/mmyolo/tools/analysis_tools/dataset_analysis.py +++ b/mmyolo/tools/analysis_tools/dataset_analysis.py @@ -17,41 +17,48 @@ def parse_args(): parser = argparse.ArgumentParser( - description='Distribution of categories and bbox instances') - parser.add_argument('config', help='config file path') + description="Distribution of categories and bbox instances" + ) + parser.add_argument("config", help="config file path") parser.add_argument( - '--val-dataset', + "--val-dataset", default=False, - action='store_true', - help='The default train_dataset.' - 'To change it to val_dataset, enter "--val-dataset"') + action="store_true", + help="The default train_dataset." + 'To change it to val_dataset, enter "--val-dataset"', + ) parser.add_argument( - '--class-name', + "--class-name", default=None, type=str, - help='Display specific class, e.g., "bicycle"') + help='Display specific class, e.g., "bicycle"', + ) parser.add_argument( - '--area-rule', + "--area-rule", default=None, type=int, - nargs='+', - help='Redefine area rules,but no more than three numbers.' - ' e.g., 30 70 125') + nargs="+", + help="Redefine area rules,but no more than three numbers." " e.g., 30 70 125", + ) parser.add_argument( - '--func', + "--func", default=None, type=str, choices=[ - 'show_bbox_num', 'show_bbox_wh', 'show_bbox_wh_ratio', - 'show_bbox_area' + "show_bbox_num", + "show_bbox_wh", + "show_bbox_wh_ratio", + "show_bbox_area", ], - help='Dataset analysis function selection.') + help="Dataset analysis function selection.", + ) parser.add_argument( - '--out-dir', - default='./dataset_analysis', + "--out-dir", + default="./dataset_analysis", type=str, - help='Output directory of dataset analysis visualization results,' - ' Save in "./dataset_analysis/" by default') + help="Output directory of dataset analysis visualization results," + ' Save in "./dataset_analysis/" by default', + ) args = parser.parse_args() return args @@ -59,54 +66,51 @@ def parse_args(): def show_bbox_num(cfg, out_dir, fig_set, class_name, class_num): """Display the distribution map of categories and number of bbox instances.""" - print('\n\nDrawing bbox_num figure:') + print("\n\nDrawing bbox_num figure:") # Draw designs - fig = plt.figure( - figsize=(fig_set['figsize'][0], fig_set['figsize'][1]), dpi=300) - plt.bar(class_name, class_num, align='center') + fig = plt.figure(figsize=(fig_set["figsize"][0], fig_set["figsize"][1]), dpi=300) + plt.bar(class_name, class_num, align="center") # Draw titles, labels and so on for x, y in enumerate(class_num): - plt.text(x, y, '%s' % y, ha='center', fontsize=fig_set['fontsize'] + 3) - plt.xticks(rotation=fig_set['xticks_angle']) - plt.xlabel('Category Name') - plt.ylabel('Num of instances') + plt.text(x, y, "%s" % y, ha="center", fontsize=fig_set["fontsize"] + 3) + plt.xticks(rotation=fig_set["xticks_angle"]) + plt.xlabel("Category Name") + plt.ylabel("Num of instances") plt.title(cfg.dataset_type) # Save figure if not os.path.exists(out_dir): os.makedirs(out_dir) - out_name = fig_set['out_name'] + out_name = fig_set["out_name"] fig.savefig( - f'{out_dir}/{out_name}_bbox_num.jpg', - bbox_inches='tight', - pad_inches=0.1) # Save Image + f"{out_dir}/{out_name}_bbox_num.jpg", bbox_inches="tight", pad_inches=0.1 + ) # Save Image plt.close() - print(f'End and save in {out_dir}/{out_name}_bbox_num.jpg') + print(f"End and save in {out_dir}/{out_name}_bbox_num.jpg") def show_bbox_wh(out_dir, fig_set, class_bbox_w, class_bbox_h, class_name): """Display the width and height distribution of categories and bbox instances.""" - print('\n\nDrawing bbox_wh figure:') + print("\n\nDrawing bbox_wh figure:") # Draw designs fig, ax = plt.subplots( - figsize=(fig_set['figsize'][0], fig_set['figsize'][1]), dpi=300) + figsize=(fig_set["figsize"][0], fig_set["figsize"][1]), dpi=300 + ) # Set the position of the map and label on the x-axis positions_w = list(range(0, 12 * len(class_name), 12)) positions_h = list(range(6, 12 * len(class_name), 12)) positions_x_label = list(range(3, 12 * len(class_name) + 1, 12)) - ax.violinplot( - class_bbox_w, positions_w, showmeans=True, showmedians=True, widths=4) - ax.violinplot( - class_bbox_h, positions_h, showmeans=True, showmedians=True, widths=4) + ax.violinplot(class_bbox_w, positions_w, showmeans=True, showmedians=True, widths=4) + ax.violinplot(class_bbox_h, positions_h, showmeans=True, showmedians=True, widths=4) # Draw titles, labels and so on - plt.xticks(rotation=fig_set['xticks_angle']) - plt.ylabel('The width or height of bbox') - plt.xlabel('Class name') - plt.title('Width or height distribution of classes and bbox instances') + plt.xticks(rotation=fig_set["xticks_angle"]) + plt.ylabel("The width or height of bbox") + plt.xlabel("Class name") + plt.title("Width or height distribution of classes and bbox instances") # Draw the max, min and median of wide data in violin chart for i in range(len(class_bbox_w)): @@ -114,20 +118,23 @@ def show_bbox_wh(out_dir, fig_set, class_bbox_w, class_bbox_h, class_name): positions_w[i], median(class_bbox_w[i]), f'{"%.2f" % median(class_bbox_w[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) plt.text( positions_w[i], max(class_bbox_w[i]), f'{"%.2f" % max(class_bbox_w[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) plt.text( positions_w[i], min(class_bbox_w[i]), f'{"%.2f" % min(class_bbox_w[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) # Draw the max, min and median of height data in violin chart for i in range(len(positions_h)): @@ -135,68 +142,68 @@ def show_bbox_wh(out_dir, fig_set, class_bbox_w, class_bbox_h, class_name): positions_h[i], median(class_bbox_h[i]), f'{"%.2f" % median(class_bbox_h[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) plt.text( positions_h[i], max(class_bbox_h[i]), f'{"%.2f" % max(class_bbox_h[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) plt.text( positions_h[i], min(class_bbox_h[i]), f'{"%.2f" % min(class_bbox_h[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) # Draw Legend plt.setp(ax, xticks=positions_x_label, xticklabels=class_name) - labels = ['bbox_w', 'bbox_h'] - colors = ['steelblue', 'darkorange'] + labels = ["bbox_w", "bbox_h"] + colors = ["steelblue", "darkorange"] patches = [ - mpatches.Patch(color=colors[i], label=f'{labels[i]:s}') + mpatches.Patch(color=colors[i], label=f"{labels[i]:s}") for i in range(len(colors)) ] ax = plt.gca() box = ax.get_position() ax.set_position([box.x0, box.y0, box.width, box.height * 0.8]) - ax.legend(loc='upper center', handles=patches, ncol=2) + ax.legend(loc="upper center", handles=patches, ncol=2) # Save figure if not os.path.exists(out_dir): os.makedirs(out_dir) - out_name = fig_set['out_name'] + out_name = fig_set["out_name"] fig.savefig( - f'{out_dir}/{out_name}_bbox_wh.jpg', - bbox_inches='tight', - pad_inches=0.1) # Save Image + f"{out_dir}/{out_name}_bbox_wh.jpg", bbox_inches="tight", pad_inches=0.1 + ) # Save Image plt.close() - print(f'End and save in {out_dir}/{out_name}_bbox_wh.jpg') + print(f"End and save in {out_dir}/{out_name}_bbox_wh.jpg") def show_bbox_wh_ratio(out_dir, fig_set, class_name, class_bbox_ratio): """Display the distribution map of category and bbox instance width and height ratio.""" - print('\n\nDrawing bbox_wh_ratio figure:') + print("\n\nDrawing bbox_wh_ratio figure:") # Draw designs fig, ax = plt.subplots( - figsize=(fig_set['figsize'][0], fig_set['figsize'][1]), dpi=300) + figsize=(fig_set["figsize"][0], fig_set["figsize"][1]), dpi=300 + ) # Set the position of the map and label on the x-axis positions = list(range(0, 6 * len(class_name), 6)) ax.violinplot( - class_bbox_ratio, - positions, - showmeans=True, - showmedians=True, - widths=5) + class_bbox_ratio, positions, showmeans=True, showmedians=True, widths=5 + ) # Draw titles, labels and so on - plt.xticks(rotation=fig_set['xticks_angle']) - plt.ylabel('Ratio of width to height of bbox') - plt.xlabel('Class name') - plt.title('Width to height ratio distribution of class and bbox instances') + plt.xticks(rotation=fig_set["xticks_angle"]) + plt.ylabel("Ratio of width to height of bbox") + plt.xlabel("Class name") + plt.title("Width to height ratio distribution of class and bbox instances") # Draw the max, min and median of wide data in violin chart for i in range(len(class_bbox_ratio)): @@ -204,20 +211,23 @@ def show_bbox_wh_ratio(out_dir, fig_set, class_name, class_bbox_ratio): positions[i], median(class_bbox_ratio[i]), f'{"%.2f" % median(class_bbox_ratio[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) plt.text( positions[i], max(class_bbox_ratio[i]), f'{"%.2f" % max(class_bbox_ratio[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) plt.text( positions[i], min(class_bbox_ratio[i]), f'{"%.2f" % min(class_bbox_ratio[i])}', - ha='center', - fontsize=fig_set['fontsize']) + ha="center", + fontsize=fig_set["fontsize"], + ) # Set the position of the map and label on the x-axis plt.setp(ax, xticks=positions, xticklabels=class_name) @@ -225,121 +235,109 @@ def show_bbox_wh_ratio(out_dir, fig_set, class_name, class_bbox_ratio): # Save figure if not os.path.exists(out_dir): os.makedirs(out_dir) - out_name = fig_set['out_name'] + out_name = fig_set["out_name"] fig.savefig( - f'{out_dir}/{out_name}_bbox_ratio.jpg', - bbox_inches='tight', - pad_inches=0.1) # Save Image + f"{out_dir}/{out_name}_bbox_ratio.jpg", bbox_inches="tight", pad_inches=0.1 + ) # Save Image plt.close() - print(f'End and save in {out_dir}/{out_name}_bbox_ratio.jpg') + print(f"End and save in {out_dir}/{out_name}_bbox_ratio.jpg") def show_bbox_area(out_dir, fig_set, area_rule, class_name, bbox_area_num): """Display the distribution map of category and bbox instance area based on the rules of large, medium and small objects.""" - print('\n\nDrawing bbox_area figure:') + print("\n\nDrawing bbox_area figure:") # Set the direct distance of each label and the width of each histogram # Set the required labels and colors positions = np.arange(0, 2 * len(class_name), 2) width = 0.4 - labels = ['Small', 'Mediun', 'Large', 'Huge'] - colors = ['#438675', '#F7B469', '#6BA6DA', '#913221'] + labels = ["Small", "Mediun", "Large", "Huge"] + colors = ["#438675", "#F7B469", "#6BA6DA", "#913221"] # Draw designs - fig = plt.figure( - figsize=(fig_set['figsize'][0], fig_set['figsize'][1]), dpi=300) + fig = plt.figure(figsize=(fig_set["figsize"][0], fig_set["figsize"][1]), dpi=300) for i in range(len(area_rule) - 1): area_num = [bbox_area_num[idx][i] for idx in range(len(class_name))] plt.bar( - positions + width * i, - area_num, - width, - label=labels[i], - color=colors[i]) + positions + width * i, area_num, width, label=labels[i], color=colors[i] + ) for idx, (x, y) in enumerate(zip(positions.tolist(), area_num)): - plt.text( - x + width * i, - y, - y, - ha='center', - fontsize=fig_set['fontsize'] - 1) + plt.text(x + width * i, y, y, ha="center", fontsize=fig_set["fontsize"] - 1) # Draw titles, labels and so on - plt.xticks(rotation=fig_set['xticks_angle']) + plt.xticks(rotation=fig_set["xticks_angle"]) plt.xticks(positions + width * ((len(area_rule) - 2) / 2), class_name) - plt.ylabel('Class Area') - plt.xlabel('Class Name') - plt.title( - 'Area and number of large, medium and small objects of each class') + plt.ylabel("Class Area") + plt.xlabel("Class Name") + plt.title("Area and number of large, medium and small objects of each class") # Set and Draw Legend patches = [ - mpatches.Patch(color=colors[i], label=f'{labels[i]:s}') + mpatches.Patch(color=colors[i], label=f"{labels[i]:s}") for i in range(len(area_rule) - 1) ] ax = plt.gca() box = ax.get_position() ax.set_position([box.x0, box.y0, box.width, box.height * 0.8]) - ax.legend(loc='upper center', handles=patches, ncol=len(area_rule) - 1) + ax.legend(loc="upper center", handles=patches, ncol=len(area_rule) - 1) # Save figure if not os.path.exists(out_dir): os.makedirs(out_dir) - out_name = fig_set['out_name'] + out_name = fig_set["out_name"] fig.savefig( - f'{out_dir}/{out_name}_bbox_area.jpg', - bbox_inches='tight', - pad_inches=0.1) # Save Image + f"{out_dir}/{out_name}_bbox_area.jpg", bbox_inches="tight", pad_inches=0.1 + ) # Save Image plt.close() - print(f'End and save in {out_dir}/{out_name}_bbox_area.jpg') + print(f"End and save in {out_dir}/{out_name}_bbox_area.jpg") def show_class_list(classes, class_num): """Print the data of the class obtained by the current run.""" - print('\n\nThe information obtained is as follows:') + print("\n\nThe information obtained is as follows:") class_info = PrettyTable() - class_info.title = 'Information of dataset class' + class_info.title = "Information of dataset class" # List Print Settings # If the quantity is too large, 25 rows will be displayed in each column if len(classes) < 25: - class_info.add_column('Class name', classes) - class_info.add_column('Bbox num', class_num) + class_info.add_column("Class name", classes) + class_info.add_column("Bbox num", class_num) elif len(classes) % 25 != 0 and len(classes) > 25: col_num = int(len(classes) / 25) + 1 class_nums = class_num.tolist() class_name_list = list(classes) for i in range(0, (col_num * 25) - len(classes)): - class_name_list.append('') - class_nums.append('') + class_name_list.append("") + class_nums.append("") for i in range(0, len(class_name_list), 25): - class_info.add_column('Class name', class_name_list[i:i + 25]) - class_info.add_column('Bbox num', class_nums[i:i + 25]) + class_info.add_column("Class name", class_name_list[i : i + 25]) + class_info.add_column("Bbox num", class_nums[i : i + 25]) # Align display data to the left - class_info.align['Class name'] = 'l' - class_info.align['Bbox num'] = 'l' + class_info.align["Class name"] = "l" + class_info.align["Bbox num"] = "l" print(class_info) def show_data_list(args, area_rule): """Print run setup information.""" - print('\n\nPrint current running information:') + print("\n\nPrint current running information:") data_info = PrettyTable() - data_info.title = 'Dataset information' + data_info.title = "Dataset information" # Print the corresponding information according to the settings if args.val_dataset is False: - data_info.add_column('Dataset type', ['train_dataset']) + data_info.add_column("Dataset type", ["train_dataset"]) elif args.val_dataset is True: - data_info.add_column('Dataset type', ['val_dataset']) + data_info.add_column("Dataset type", ["val_dataset"]) if args.class_name is None: - data_info.add_column('Class name', ['All classes']) + data_info.add_column("Class name", ["All classes"]) else: - data_info.add_column('Class name', [args.class_name]) + data_info.add_column("Class name", [args.class_name]) if args.func is None: - data_info.add_column('Function', ['All function']) + data_info.add_column("Function", ["All function"]) else: - data_info.add_column('Function', [args.func]) - data_info.add_column('Area rule', [area_rule]) + data_info.add_column("Function", [args.func]) + data_info.add_column("Area rule", [area_rule]) print(data_info) @@ -359,14 +357,13 @@ def replace_pipeline_to_none(cfg): traversed """ - if cfg.get('dataset', None) is None and cfg.get('datasets', - None) is None: + if cfg.get("dataset", None) is None and cfg.get("datasets", None) is None: return - dataset = cfg.dataset if cfg.get('dataset', None) else cfg.datasets + dataset = cfg.dataset if cfg.get("dataset", None) else cfg.datasets if isinstance(dataset, list): for item in dataset: item.pipeline = None - elif dataset.get('pipeline', None): + elif dataset.get("pipeline", None): dataset.pipeline = None else: replace_pipeline_to_none(dataset) @@ -382,32 +379,34 @@ def replace_pipeline_to_none(cfg): # 2.Prepare data # Drawing settings fig_all_set = { - 'figsize': [35, 18], - 'fontsize': int(10 - 0.08 * len(dataset.metainfo['classes'])), - 'xticks_angle': 70, - 'out_name': cfg.dataset_type + "figsize": [35, 18], + "fontsize": int(10 - 0.08 * len(dataset.metainfo["classes"])), + "xticks_angle": 70, + "out_name": cfg.dataset_type, } fig_one_set = { - 'figsize': [15, 10], - 'fontsize': 10, - 'xticks_angle': 0, - 'out_name': args.class_name + "figsize": [15, 10], + "fontsize": 10, + "xticks_angle": 0, + "out_name": args.class_name, } # Call the category name and save address if args.class_name is None: - classes = dataset.metainfo['classes'] + classes = dataset.metainfo["classes"] classes_idx = [i for i in range(len(classes))] fig_set = fig_all_set - elif args.class_name in dataset.metainfo['classes']: + elif args.class_name in dataset.metainfo["classes"]: classes = [args.class_name] - classes_idx = [dataset.metainfo['classes'].index(args.class_name)] + classes_idx = [dataset.metainfo["classes"].index(args.class_name)] fig_set = fig_one_set else: - data_classes = dataset.metainfo['classes'] + data_classes = dataset.metainfo["classes"] show_data_classes(data_classes) - raise RuntimeError(f'Expected args.class_name to be one of the list,' - f'but got "{args.class_name}"') + raise RuntimeError( + f"Expected args.class_name to be one of the list," + f'but got "{args.class_name}"' + ) # Building Area Rules if args.area_rule is None: @@ -418,10 +417,11 @@ def replace_pipeline_to_none(cfg): else: raise RuntimeError( f'Expected the "{args.area_rule}" to be e.g. 30 60 120, ' - 'and no more than three numbers.') + "and no more than three numbers." + ) # Build arrays or lists to store data for each category - class_num = np.zeros((len(classes), ), dtype=np.int64) + class_num = np.zeros((len(classes),), dtype=np.int64) class_bbox = [[] for _ in classes] class_name = [] class_bbox_w = [] @@ -431,25 +431,24 @@ def replace_pipeline_to_none(cfg): show_data_list(args, area_rule) # Get the quantity and bbox data corresponding to each category - print('\nRead the information of each picture in the dataset:') + print("\nRead the information of each picture in the dataset:") progress_bar = ProgressBar(len(dataset)) for index in range(len(dataset)): - for instance in dataset[index]['instances']: - if instance[ - 'bbox_label'] in classes_idx and args.class_name is None: - class_num[instance['bbox_label']] += 1 - class_bbox[instance['bbox_label']].append(instance['bbox']) - elif instance['bbox_label'] in classes_idx and args.class_name: + for instance in dataset[index]["instances"]: + if instance["bbox_label"] in classes_idx and args.class_name is None: + class_num[instance["bbox_label"]] += 1 + class_bbox[instance["bbox_label"]].append(instance["bbox"]) + elif instance["bbox_label"] in classes_idx and args.class_name: class_num[0] += 1 - class_bbox[0].append(instance['bbox']) + class_bbox[0].append(instance["bbox"]) progress_bar.update() show_class_list(classes, class_num) # Get the width, height and area of bbox corresponding to each category - print('\nRead bbox information in each class:') + print("\nRead bbox information in each class:") progress_bar_classes = ProgressBar(len(classes)) for idx, (classes, classes_idx) in enumerate(zip(classes, classes_idx)): bbox = np.array(class_bbox[idx]) - bbox_area_nums = np.zeros((len(area_rule) - 1, ), dtype=np.int64) + bbox_area_nums = np.zeros((len(area_rule) - 1,), dtype=np.int64) if len(bbox) > 0: bbox_wh = bbox[:, 2:4] - bbox[:, 0:2] bbox_ratio = bbox_wh[:, 0] / bbox_wh[:, 1] @@ -461,8 +460,8 @@ def replace_pipeline_to_none(cfg): # The area rule, there is an section between two numbers for i in range(len(area_rule) - 1): bbox_area_nums[i] = np.logical_and( - bbox_area >= area_rule[i]**2, - bbox_area < area_rule[i + 1]**2).sum() + bbox_area >= area_rule[i] ** 2, bbox_area < area_rule[i + 1] ** 2 + ).sum() elif len(bbox) == 0: class_bbox_w.append([0]) class_bbox_h.append([0]) @@ -475,25 +474,20 @@ def replace_pipeline_to_none(cfg): # 3.draw Dataset Information if args.func is None: show_bbox_num(cfg, args.out_dir, fig_set, class_name, class_num) - show_bbox_wh(args.out_dir, fig_set, class_bbox_w, class_bbox_h, - class_name) + show_bbox_wh(args.out_dir, fig_set, class_bbox_w, class_bbox_h, class_name) show_bbox_wh_ratio(args.out_dir, fig_set, class_name, class_bbox_ratio) - show_bbox_area(args.out_dir, fig_set, area_rule, class_name, - bbox_area_num) - elif args.func == 'show_bbox_num': + show_bbox_area(args.out_dir, fig_set, area_rule, class_name, bbox_area_num) + elif args.func == "show_bbox_num": show_bbox_num(cfg, args.out_dir, fig_set, class_name, class_num) - elif args.func == 'show_bbox_wh': - show_bbox_wh(args.out_dir, fig_set, class_bbox_w, class_bbox_h, - class_name) - elif args.func == 'show_bbox_wh_ratio': + elif args.func == "show_bbox_wh": + show_bbox_wh(args.out_dir, fig_set, class_bbox_w, class_bbox_h, class_name) + elif args.func == "show_bbox_wh_ratio": show_bbox_wh_ratio(args.out_dir, fig_set, class_name, class_bbox_ratio) - elif args.func == 'show_bbox_area': - show_bbox_area(args.out_dir, fig_set, area_rule, class_name, - bbox_area_num) + elif args.func == "show_bbox_area": + show_bbox_area(args.out_dir, fig_set, area_rule, class_name, bbox_area_num) else: - raise RuntimeError( - 'Please enter the correct func name, e.g., show_bbox_num') + raise RuntimeError("Please enter the correct func name, e.g., show_bbox_num") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/analysis_tools/optimize_anchors.py b/mmyolo/tools/analysis_tools/optimize_anchors.py index 70f15172..56b121e7 100644 --- a/mmyolo/tools/analysis_tools/optimize_anchors.py +++ b/mmyolo/tools/analysis_tools/optimize_anchors.py @@ -35,8 +35,11 @@ import numpy as np import torch -from mmdet.structures.bbox import (bbox_cxcywh_to_xyxy, bbox_overlaps, - bbox_xyxy_to_cxcywh) +from mmdet.structures.bbox import ( + bbox_cxcywh_to_xyxy, + bbox_overlaps, + bbox_xyxy_to_cxcywh, +) from mmdet.utils import replace_cfg_vals, update_data_root from mmengine.config import Config from mmengine.fileio import dump @@ -55,53 +58,55 @@ def parse_args(): - parser = argparse.ArgumentParser(description='Optimize anchor parameters.') - parser.add_argument('config', help='Train config file path.') + parser = argparse.ArgumentParser(description="Optimize anchor parameters.") + parser.add_argument("config", help="Train config file path.") parser.add_argument( - '--input-shape', + "--input-shape", type=int, - nargs='+', + nargs="+", default=[640, 640], - help='input image size, represent [width, height]') + help="input image size, represent [width, height]", + ) parser.add_argument( - '--algorithm', - default='DE', - help='Algorithm used for anchor optimizing.' - 'Support k-means and differential_evolution for YOLO,' - 'and v5-k-means is special for YOLOV5.') + "--algorithm", + default="DE", + help="Algorithm used for anchor optimizing." + "Support k-means and differential_evolution for YOLO," + "and v5-k-means is special for YOLOV5.", + ) parser.add_argument( - '--iters', - default=1000, - type=int, - help='Maximum iterations for optimizer.') + "--iters", default=1000, type=int, help="Maximum iterations for optimizer." + ) parser.add_argument( - '--prior-match-thr', + "--prior-match-thr", default=4.0, type=float, - help='anchor-label `gt_filter_sizes` ratio threshold ' - 'hyperparameter used for training, default=4.0, this ' - 'parameter is unique to v5-k-means') + help="anchor-label `gt_filter_sizes` ratio threshold " + "hyperparameter used for training, default=4.0, this " + "parameter is unique to v5-k-means", + ) parser.add_argument( - '--mutation-args', + "--mutation-args", type=float, - nargs='+', + nargs="+", default=[0.9, 0.1], - help='paramter of anchor optimize method genetic algorithm, ' - 'represent [prob, sigma], this parameter is unique to v5-k-means') + help="paramter of anchor optimize method genetic algorithm, " + "represent [prob, sigma], this parameter is unique to v5-k-means", + ) parser.add_argument( - '--augment-args', + "--augment-args", type=float, - nargs='+', + nargs="+", default=[0.9, 1.1], - help='scale factor of box size augment when metric box and anchor, ' - 'represent [min, max], this parameter is unique to v5-k-means') + help="scale factor of box size augment when metric box and anchor, " + "represent [min, max], this parameter is unique to v5-k-means", + ) parser.add_argument( - '--device', default='cuda:0', help='Device used for calculating.') + "--device", default="cuda:0", help="Device used for calculating." + ) parser.add_argument( - '--out-dir', - default=None, - type=str, - help='Path to save anchor optimize result.') + "--out-dir", default=None, type=str, help="Path to save anchor optimize result." + ) args = parser.parse_args() return args @@ -122,13 +127,15 @@ class BaseAnchorOptimizer: Default: None """ - def __init__(self, - dataset, - input_shape, - num_anchor_per_level, - logger, - device='cuda:0', - out_dir=None): + def __init__( + self, + dataset, + input_shape, + num_anchor_per_level, + logger, + device="cuda:0", + out_dir=None, + ): self.dataset = dataset self.input_shape = input_shape self.num_anchor_per_level = num_anchor_per_level @@ -149,25 +156,25 @@ def get_whs_and_shapes(self): tuple[np.ndarray]: Array of bbox shapes and array of image shapes with shape (num_bboxes, 2) in [width, height] format. """ - self.logger.info('Collecting bboxes from annotation...') + self.logger.info("Collecting bboxes from annotation...") bbox_whs = [] img_shapes = [] prog_bar = ProgressBar(len(self.dataset)) for idx in range(len(self.dataset)): data_info = self.dataset.get_data_info(idx) - img_shape = np.array([data_info['width'], data_info['height']]) - gt_instances = data_info['instances'] + img_shape = np.array([data_info["width"], data_info["height"]]) + gt_instances = data_info["instances"] for instance in gt_instances: - bbox = np.array(instance['bbox']) + bbox = np.array(instance["bbox"]) gt_filter_sizes = bbox[2:4] - bbox[0:2] img_shapes.append(img_shape) bbox_whs.append(gt_filter_sizes) prog_bar.update() - print('\n') + print("\n") bbox_whs = np.array(bbox_whs) img_shapes = np.array(img_shapes) - self.logger.info(f'Collected {bbox_whs.shape[0]} bboxes.') + self.logger.info(f"Collected {bbox_whs.shape[0]} bboxes.") return bbox_whs, img_shapes def get_zero_center_bbox_tensor(self): @@ -177,10 +184,8 @@ def get_zero_center_bbox_tensor(self): Tensor: Tensor of bboxes with shape (num_bboxes, 4) in [xmin, ymin, xmax, ymax] format. """ - whs = torch.from_numpy(self.bbox_whs).to( - self.device, dtype=torch.float32) - bboxes = bbox_cxcywh_to_xyxy( - torch.cat([torch.zeros_like(whs), whs], dim=1)) + whs = torch.from_numpy(self.bbox_whs).to(self.device, dtype=torch.float32) + bboxes = bbox_cxcywh_to_xyxy(torch.cat([torch.zeros_like(whs), whs], dim=1)) return bboxes def optimize(self): @@ -192,15 +197,14 @@ def save_result(self, anchors, path=None): start = 0 for num in self.num_anchor_per_level: end = num + start - anchor_results.append([(round(w), round(h)) - for w, h in anchors[start:end]]) + anchor_results.append([(round(w), round(h)) for w, h in anchors[start:end]]) start = end - self.logger.info(f'Anchor optimize result:{anchor_results}') + self.logger.info(f"Anchor optimize result:{anchor_results}") if path: - json_path = osp.join(path, 'anchor_optimize_result.json') + json_path = osp.join(path, "anchor_optimize_result.json") dump(anchor_results, json_path) - self.logger.info(f'Result saved in {json_path}') + self.logger.info(f"Result saved in {json_path}") class YOLOKMeansAnchorOptimizer(BaseAnchorOptimizer): @@ -222,16 +226,19 @@ def optimize(self): def kmeans_anchors(self): self.logger.info( - f'Start cluster {self.num_anchors} YOLO anchors with K-means...') + f"Start cluster {self.num_anchors} YOLO anchors with K-means..." + ) bboxes = self.get_zero_center_bbox_tensor() - cluster_center_idx = torch.randint( - 0, bboxes.shape[0], (self.num_anchors, )).to(self.device) + cluster_center_idx = torch.randint(0, bboxes.shape[0], (self.num_anchors,)).to( + self.device + ) - assignments = torch.zeros((bboxes.shape[0], )).to(self.device) + assignments = torch.zeros((bboxes.shape[0],)).to(self.device) cluster_centers = bboxes[cluster_center_idx] if self.num_anchors == 1: - cluster_centers = self.kmeans_maximization(bboxes, assignments, - cluster_centers) + cluster_centers = self.kmeans_maximization( + bboxes, assignments, cluster_centers + ) anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() anchors = sorted(anchors, key=lambda x: x[0] * x[1]) return anchors @@ -239,20 +246,21 @@ def kmeans_anchors(self): prog_bar = ProgressBar(self.iters) for i in range(self.iters): converged, assignments = self.kmeans_expectation( - bboxes, assignments, cluster_centers) + bboxes, assignments, cluster_centers + ) if converged: - self.logger.info(f'K-means process has converged at iter {i}.') + self.logger.info(f"K-means process has converged at iter {i}.") break - cluster_centers = self.kmeans_maximization(bboxes, assignments, - cluster_centers) + cluster_centers = self.kmeans_maximization( + bboxes, assignments, cluster_centers + ) prog_bar.update() - print('\n') - avg_iou = bbox_overlaps(bboxes, - cluster_centers).max(1)[0].mean().item() + print("\n") + avg_iou = bbox_overlaps(bboxes, cluster_centers).max(1)[0].mean().item() anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy() anchors = sorted(anchors, key=lambda x: x[0] * x[1]) - self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}') + self.logger.info(f"Anchor cluster finish. Average IOU: {avg_iou}") return anchors @@ -260,7 +268,7 @@ def kmeans_maximization(self, bboxes, assignments, centers): """Maximization part of EM algorithm(Expectation-Maximization)""" new_centers = torch.zeros_like(centers) for i in range(centers.shape[0]): - mask = (assignments == i) + mask = assignments == i if mask.sum(): new_centers[i, :] = bboxes[mask].mean(0) return new_centers @@ -284,12 +292,14 @@ class YOLOV5KMeansAnchorOptimizer(BaseAnchorOptimizer): ratio threshold hyperparameter. """ - def __init__(self, - iters, - prior_match_thr=4.0, - mutation_args=[0.9, 0.1], - augment_args=[0.9, 1.1], - **kwargs): + def __init__( + self, + iters, + prior_match_thr=4.0, + mutation_args=[0.9, 0.1], + augment_args=[0.9, 1.1], + **kwargs, + ): super().__init__(**kwargs) self.iters = iters @@ -299,27 +309,32 @@ def __init__(self, def optimize(self): self.logger.info( - f'Start cluster {self.num_anchors} YOLOv5 anchors with K-means...') + f"Start cluster {self.num_anchors} YOLOv5 anchors with K-means..." + ) - bbox_whs = torch.from_numpy(self.bbox_whs).to( - self.device, dtype=torch.float32) + bbox_whs = torch.from_numpy(self.bbox_whs).to(self.device, dtype=torch.float32) anchors = self.anchor_generate( bbox_whs, num=self.num_anchors, img_size=self.input_shape[0], prior_match_thr=self.prior_match_thr, - iters=self.iters) + iters=self.iters, + ) best_ratio, mean_matched = self.anchor_metric(bbox_whs, anchors) - self.logger.info(f'{mean_matched:.2f} anchors/target {best_ratio:.3f} ' - 'Best Possible Recall (BPR). ') + self.logger.info( + f"{mean_matched:.2f} anchors/target {best_ratio:.3f} " + "Best Possible Recall (BPR). " + ) self.save_result(anchors.tolist(), self.out_dir) - def anchor_generate(self, - box_size: Tensor, - num: int = 9, - img_size: int = 640, - prior_match_thr: float = 4.0, - iters: int = 1000) -> Tensor: + def anchor_generate( + self, + box_size: Tensor, + num: int = 9, + img_size: int = 640, + prior_match_thr: float = 4.0, + iters: int = 1000, + ) -> Tensor: """cluster boxes metric with anchors. Args: @@ -344,34 +359,35 @@ def anchor_generate(self, # step2: init anchors if kmeans: try: - self.logger.info( - 'beginning init anchors with scipy kmeans method') + self.logger.info("beginning init anchors with scipy kmeans method") # sigmas for whitening sigmas = box_size.std(0).cpu().numpy() - anchors = kmeans( - box_size.cpu().numpy() / sigmas, num, iter=30)[0] * sigmas + anchors = ( + kmeans(box_size.cpu().numpy() / sigmas, num, iter=30)[0] * sigmas + ) # kmeans may return fewer points than requested # if width/height is insufficient or too similar assert num == len(anchors) except Exception: self.logger.warning( - 'scipy kmeans method cannot get enough points ' - 'because of width/height is insufficient or too similar, ' - 'now switching strategies from kmeans to random init.') - anchors = np.sort(np.random.rand(num * 2)).reshape( - num, 2) * img_size + "scipy kmeans method cannot get enough points " + "because of width/height is insufficient or too similar, " + "now switching strategies from kmeans to random init." + ) + anchors = np.sort(np.random.rand(num * 2)).reshape(num, 2) * img_size else: self.logger.info( - 'cannot found scipy package, switching strategies from kmeans ' - 'to random init, you can install scipy package to ' - 'get better anchor init') - anchors = np.sort(np.random.rand(num * 2)).reshape(num, - 2) * img_size + "cannot found scipy package, switching strategies from kmeans " + "to random init, you can install scipy package to " + "get better anchor init" + ) + anchors = np.sort(np.random.rand(num * 2)).reshape(num, 2) * img_size - self.logger.info('init done, beginning evolve anchors...') + self.logger.info("init done, beginning evolve anchors...") # sort small to large anchors = torch.tensor(anchors[np.argsort(anchors.prod(1))]).to( - box_size.device, dtype=torch.float32) + box_size.device, dtype=torch.float32 + ) # step3: evolve anchors use Genetic Algorithm prog_bar = ProgressBar(iters) @@ -384,9 +400,12 @@ def anchor_generate(self, while (mutate_result == 1).all(): # mutate_result is scale factor of anchors, between 0.3 and 3 mutate_result = ( - (np.random.random(cluster_shape) < self.mutation_prob) * - random.random() * np.random.randn(*cluster_shape) * - self.mutation_sigma + 1).clip(0.3, 3.0) + (np.random.random(cluster_shape) < self.mutation_prob) + * random.random() + * np.random.randn(*cluster_shape) + * self.mutation_sigma + + 1 + ).clip(0.3, 3.0) mutate_result = torch.from_numpy(mutate_result).to(box_size.device) new_anchors = (anchors.clone() * mutate_result).clip(min=2.0) new_fitness = self._anchor_fitness(box_size, new_anchors, thr) @@ -395,17 +414,16 @@ def anchor_generate(self, anchors = new_anchors.clone() prog_bar.update() - print('\n') + print("\n") # sort small to large anchors = anchors[torch.argsort(anchors.prod(1))] - self.logger.info(f'Anchor cluster finish. fitness = {fitness:.4f}') + self.logger.info(f"Anchor cluster finish. fitness = {fitness:.4f}") return anchors - def anchor_metric(self, - box_size: Tensor, - anchors: Tensor, - threshold: float = 4.0) -> Tuple: + def anchor_metric( + self, box_size: Tensor, anchors: Tensor, threshold: float = 4.0 + ) -> Tuple: """compute boxes metric with anchors. Args: @@ -422,13 +440,19 @@ def anchor_metric(self, # According to the uniform distribution,the scaling scale between # augment_min and augment_max is randomly generated scale = np.random.uniform( - self.augment_min, self.augment_max, size=(box_size.shape[0], 1)) + self.augment_min, self.augment_max, size=(box_size.shape[0], 1) + ) box_size = torch.tensor( np.array( - [l[:, ] * s for s, l in zip(scale, - box_size.cpu().numpy())])).to( - box_size.device, - dtype=torch.float32) + [ + l[ + :, + ] + * s + for s, l in zip(scale, box_size.cpu().numpy()) + ] + ) + ).to(box_size.device, dtype=torch.float32) # step2: calculate ratio min_ratio, best_ratio = self._metric(box_size, anchors) mean_matched = (min_ratio > 1 / threshold).float().sum(1).mean() @@ -439,8 +463,9 @@ def _filter_box(self, box_size: Tensor) -> Tensor: small_cnt = (box_size < 3.0).any(1).sum() if small_cnt: self.logger.warning( - f'Extremely small objects found: {small_cnt} ' - f'of {len(box_size)} labels are <3 pixels in size') + f"Extremely small objects found: {small_cnt} " + f"of {len(box_size)} labels are <3 pixels in size" + ) # filter > 2 pixels filter_sizes = box_size[(box_size >= 2.0).any(1)] return filter_sizes @@ -514,14 +539,16 @@ class YOLODEAnchorOptimizer(BaseAnchorOptimizer): Default: 0.7. """ - def __init__(self, - iters, - strategy='best1bin', - population_size=15, - convergence_thr=0.0001, - mutation=(0.5, 1), - recombination=0.7, - **kwargs): + def __init__( + self, + iters, + strategy="best1bin", + population_size=15, + convergence_thr=0.0001, + mutation=(0.5, 1), + recombination=0.7, + **kwargs, + ): super().__init__(**kwargs) @@ -546,17 +573,17 @@ def differential_evolution(self): result = differential_evolution( func=self.avg_iou_cost, bounds=bounds, - args=(bboxes, ), + args=(bboxes,), strategy=self.strategy, maxiter=self.iters, popsize=self.population_size, tol=self.convergence_thr, mutation=self.mutation, recombination=self.recombination, - updating='immediate', - disp=True) - self.logger.info( - f'Anchor evolution finish. Average IOU: {1 - result.fun}') + updating="immediate", + disp=True, + ) + self.logger.info(f"Anchor evolution finish. Average IOU: {1 - result.fun}") anchors = [(w, h) for w, h in zip(result.x[::2], result.x[1::2])] anchors = sorted(anchors, key=lambda x: x[0] * x[1]) return anchors @@ -565,11 +592,11 @@ def differential_evolution(self): def avg_iou_cost(anchor_params, bboxes): assert len(anchor_params) % 2 == 0 anchor_whs = torch.tensor( - [[w, h] - for w, h in zip(anchor_params[::2], anchor_params[1::2])]).to( - bboxes.device, dtype=bboxes.dtype) + [[w, h] for w, h in zip(anchor_params[::2], anchor_params[1::2])] + ).to(bboxes.device, dtype=bboxes.dtype) anchor_boxes = bbox_cxcywh_to_xyxy( - torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1)) + torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1) + ) ious = bbox_overlaps(bboxes, anchor_boxes) max_ious, _ = ious.max(1) cost = 1 - max_ious.mean().item() @@ -593,18 +620,19 @@ def main(): assert len(input_shape) == 2 anchor_type = cfg.model.bbox_head.prior_generator.type - assert anchor_type == 'mmdet.YOLOAnchorGenerator', \ - f'Only support optimize YOLOAnchor, but get {anchor_type}.' + assert ( + anchor_type == "mmdet.YOLOAnchorGenerator" + ), f"Only support optimize YOLOAnchor, but get {anchor_type}." base_sizes = cfg.model.bbox_head.prior_generator.base_sizes num_anchor_per_level = [len(sizes) for sizes in base_sizes] train_data_cfg = cfg.train_dataloader - while 'dataset' in train_data_cfg: - train_data_cfg = train_data_cfg['dataset'] + while "dataset" in train_data_cfg: + train_data_cfg = train_data_cfg["dataset"] dataset = DATASETS.build(train_data_cfg) - if args.algorithm == 'k-means': + if args.algorithm == "k-means": optimizer = YOLOKMeansAnchorOptimizer( dataset=dataset, input_shape=input_shape, @@ -612,8 +640,9 @@ def main(): num_anchor_per_level=num_anchor_per_level, iters=args.iters, logger=logger, - out_dir=args.out_dir) - elif args.algorithm == 'DE': + out_dir=args.out_dir, + ) + elif args.algorithm == "DE": optimizer = YOLODEAnchorOptimizer( dataset=dataset, input_shape=input_shape, @@ -621,8 +650,9 @@ def main(): num_anchor_per_level=num_anchor_per_level, iters=args.iters, logger=logger, - out_dir=args.out_dir) - elif args.algorithm == 'v5-k-means': + out_dir=args.out_dir, + ) + elif args.algorithm == "v5-k-means": optimizer = YOLOV5KMeansAnchorOptimizer( dataset=dataset, input_shape=input_shape, @@ -633,14 +663,16 @@ def main(): mutation_args=args.mutation_args, augment_args=args.augment_args, logger=logger, - out_dir=args.out_dir) + out_dir=args.out_dir, + ) else: raise NotImplementedError( - f'Only support k-means and differential_evolution, ' - f'but get {args.algorithm}') + f"Only support k-means and differential_evolution, " + f"but get {args.algorithm}" + ) optimizer.optimize() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/analysis_tools/vis_scheduler.py b/mmyolo/tools/analysis_tools/vis_scheduler.py index 91b8f5fe..e0caad6e 100644 --- a/mmyolo/tools/analysis_tools/vis_scheduler.py +++ b/mmyolo/tools/analysis_tools/vis_scheduler.py @@ -38,57 +38,62 @@ def parse_args(): parser = argparse.ArgumentParser( - description='Visualize a hyper-parameter scheduler') - parser.add_argument('config', help='config file path') + description="Visualize a hyper-parameter scheduler" + ) + parser.add_argument("config", help="config file path") parser.add_argument( - '-p', - '--parameter', + "-p", + "--parameter", type=str, - default='lr', - choices=['lr', 'momentum', 'wd'], - help='The parameter to visualize its change curve, choose from' - '"lr", "wd" and "momentum". Defaults to "lr".') + default="lr", + choices=["lr", "momentum", "wd"], + help="The parameter to visualize its change curve, choose from" + '"lr", "wd" and "momentum". Defaults to "lr".', + ) parser.add_argument( - '-d', - '--dataset-size', + "-d", + "--dataset-size", type=int, - help='The size of the dataset. If specify, `DATASETS.build` will ' - 'be skipped and use this size as the dataset size.') + help="The size of the dataset. If specify, `DATASETS.build` will " + "be skipped and use this size as the dataset size.", + ) parser.add_argument( - '-n', - '--ngpus', + "-n", + "--ngpus", type=int, default=1, - help='The number of GPUs used in training.') + help="The number of GPUs used in training.", + ) + parser.add_argument("-o", "--out-dir", type=Path, help="Path to output file") parser.add_argument( - '-o', '--out-dir', type=Path, help='Path to output file') + "--log-level", + default="WARNING", + help="The log level of the handler and logger. Defaults to " "WARNING.", + ) + parser.add_argument("--title", type=str, help="title of figure") + parser.add_argument("--style", type=str, default="whitegrid", help="style of plt") + parser.add_argument("--not-show", default=False, action="store_true") parser.add_argument( - '--log-level', - default='WARNING', - help='The log level of the handler and logger. Defaults to ' - 'WARNING.') - parser.add_argument('--title', type=str, help='title of figure') + "--window-size", + default="12*7", + help='Size of the window to display images, in format of "$W*$H".', + ) parser.add_argument( - '--style', type=str, default='whitegrid', help='style of plt') - parser.add_argument('--not-show', default=False, action='store_true') - parser.add_argument( - '--window-size', - default='12*7', - help='Size of the window to display images, in format of "$W*$H".') - parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) args = parser.parse_args() - if args.window_size != '': - assert re.match(r'\d+\*\d+', args.window_size), \ - "'window-size' must be in format 'W*H'." + if args.window_size != "": + assert re.match( + r"\d+\*\d+", args.window_size + ), "'window-size' must be in format 'W*H'." return args @@ -101,7 +106,7 @@ def __init__(self): self.data_preprocessor = nn.Identity() self.conv = nn.Conv2d(1, 1, 1) - def forward(self, inputs, data_samples, mode='tensor'): + def forward(self, inputs, data_samples, mode="tensor"): pass def train_step(self, data, optim_wrapper): @@ -109,7 +114,6 @@ def train_step(self, data, optim_wrapper): class ParamRecordHook(Hook): - def __init__(self, by_epoch): super().__init__() self.by_epoch = by_epoch @@ -117,18 +121,17 @@ def __init__(self, by_epoch): self.momentum_list = [] self.wd_list = [] self.task_id = 0 - self.progress = Progress(BarColumn(), MofNCompleteColumn(), - TextColumn('{task.description}')) + self.progress = Progress( + BarColumn(), MofNCompleteColumn(), TextColumn("{task.description}") + ) def before_train(self, runner): if self.by_epoch: total = runner.train_loop.max_epochs - self.task_id = self.progress.add_task( - 'epochs', start=True, total=total) + self.task_id = self.progress.add_task("epochs", start=True, total=total) else: total = runner.train_loop.max_iters - self.task_id = self.progress.add_task( - 'iters', start=True, total=total) + self.task_id = self.progress.add_task("iters", start=True, total=total) self.progress.start() def after_train_epoch(self, runner): @@ -139,11 +142,9 @@ def after_train_epoch(self, runner): def after_train_iter(self, runner, batch_idx, data_batch, outputs): if not self.by_epoch: self.progress.update(self.task_id, advance=1) - self.lr_list.append(runner.optim_wrapper.get_lr()['lr'][0]) - self.momentum_list.append( - runner.optim_wrapper.get_momentum()['momentum'][0]) - self.wd_list.append( - runner.optim_wrapper.param_groups[0]['weight_decay']) + self.lr_list.append(runner.optim_wrapper.get_lr()["lr"][0]) + self.momentum_list.append(runner.optim_wrapper.get_momentum()["momentum"][0]) + self.wd_list.append(runner.optim_wrapper.param_groups[0]["weight_decay"]) def after_train(self, runner): self.progress.stop() @@ -153,11 +154,12 @@ def plot_curve(lr_list, args, param_name, iters_per_epoch, by_epoch=True): """Plot learning rate vs iter graph.""" try: import seaborn as sns + sns.set_style(args.style) except ImportError: pass - wind_w, wind_h = args.window_size.split('*') + wind_w, wind_h = args.window_size.split("*") wind_w, wind_h = int(wind_w), int(wind_h) plt.figure(figsize=(wind_w, wind_h)) @@ -166,19 +168,19 @@ def plot_curve(lr_list, args, param_name, iters_per_epoch, by_epoch=True): if by_epoch: ax.xaxis.tick_top() - ax.set_xlabel('Iters') - ax.xaxis.set_label_position('top') + ax.set_xlabel("Iters") + ax.xaxis.set_label_position("top") sec_ax = ax.secondary_xaxis( - 'bottom', - functions=(lambda x: x / iters_per_epoch, - lambda y: y * iters_per_epoch)) - sec_ax.set_xlabel('Epochs') + "bottom", + functions=(lambda x: x / iters_per_epoch, lambda y: y * iters_per_epoch), + ) + sec_ax.set_xlabel("Epochs") else: - plt.xlabel('Iters') + plt.xlabel("Iters") plt.ylabel(param_name) if args.title is None: - plt.title(f'{osp.basename(args.config)} {param_name} curve') + plt.title(f"{osp.basename(args.config)} {param_name} curve") else: plt.title(args.title) @@ -187,13 +189,14 @@ def simulate_train(data_loader, cfg, by_epoch): model = SimpleModel() param_record_hook = ParamRecordHook(by_epoch=by_epoch) default_hooks = dict( - param_scheduler=cfg.default_hooks['param_scheduler'], + param_scheduler=cfg.default_hooks["param_scheduler"], runtime_info=None, timer=None, logger=None, checkpoint=None, sampler_seed=None, - param_record=param_record_hook) + param_record=param_record_hook, + ) runner = Runner( model=model, @@ -206,14 +209,16 @@ def simulate_train(data_loader, cfg, by_epoch): default_scope=cfg.default_scope, default_hooks=default_hooks, visualizer=MagicMock(spec=Visualizer), - custom_hooks=cfg.get('custom_hooks', None)) + custom_hooks=cfg.get("custom_hooks", None), + ) runner.train() param_dict = dict( lr=param_record_hook.lr_list, momentum=param_record_hook.momentum_list, - wd=param_record_hook.wd_list) + wd=param_record_hook.wd_list, + ) return param_dict @@ -223,31 +228,33 @@ def main(): cfg = Config.fromfile(args.config) if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) - if cfg.get('work_dir', None) is None: + if cfg.get("work_dir", None) is None: # use config filename as default work_dir if cfg.work_dir is None - cfg.work_dir = osp.join('./work_dirs', - osp.splitext(osp.basename(args.config))[0]) + cfg.work_dir = osp.join( + "./work_dirs", osp.splitext(osp.basename(args.config))[0] + ) cfg.log_level = args.log_level # register all modules in mmyolo into the registries register_all_modules() # init logger - print('Param_scheduler :') + print("Param_scheduler :") rich.print_json(json.dumps(cfg.param_scheduler)) # prepare data loader batch_size = cfg.train_dataloader.batch_size * args.ngpus - if 'by_epoch' in cfg.train_cfg: - by_epoch = cfg.train_cfg.get('by_epoch') - elif 'type' in cfg.train_cfg: - by_epoch = cfg.train_cfg.get('type') == 'EpochBasedTrainLoop' + if "by_epoch" in cfg.train_cfg: + by_epoch = cfg.train_cfg.get("by_epoch") + elif "type" in cfg.train_cfg: + by_epoch = cfg.train_cfg.get("type") == "EpochBasedTrainLoop" else: - raise ValueError('please set `train_cfg`.') + raise ValueError("please set `train_cfg`.") if args.dataset_size is None and by_epoch: from mmyolo.registry import DATASETS + dataset_size = len(DATASETS.build(cfg.train_dataloader.dataset)) else: dataset_size = args.dataset_size or batch_size @@ -257,25 +264,26 @@ class FakeDataloader(list): data_loader = FakeDataloader(range(dataset_size // batch_size)) dataset_info = ( - f'\nDataset infos:' - f'\n - Dataset size: {dataset_size}' - f'\n - Batch size per GPU: {cfg.train_dataloader.batch_size}' - f'\n - Number of GPUs: {args.ngpus}' - f'\n - Total batch size: {batch_size}') + f"\nDataset infos:" + f"\n - Dataset size: {dataset_size}" + f"\n - Batch size per GPU: {cfg.train_dataloader.batch_size}" + f"\n - Number of GPUs: {args.ngpus}" + f"\n - Total batch size: {batch_size}" + ) if by_epoch: - dataset_info += f'\n - Iterations per epoch: {len(data_loader)}' - rich.print(dataset_info + '\n') + dataset_info += f"\n - Iterations per epoch: {len(data_loader)}" + rich.print(dataset_info + "\n") # simulation training process param_dict = simulate_train(data_loader, cfg, by_epoch) param_list = param_dict[args.parameter] - if args.parameter == 'lr': - param_name = 'Learning Rate' - elif args.parameter == 'momentum': - param_name = 'Momentum' + if args.parameter == "lr": + param_name = "Learning Rate" + elif args.parameter == "momentum": + param_name = "Momentum" else: - param_name = 'Weight Decay' + param_name = "Weight Decay" plot_curve(param_list, args, param_name, len(data_loader), by_epoch) if args.out_dir: @@ -284,13 +292,14 @@ class FakeDataloader(list): # save the graph out_file = osp.join( - args.out_dir, f'{osp.basename(args.config)}-{args.parameter}.jpg') + args.out_dir, f"{osp.basename(args.config)}-{args.parameter}.jpg" + ) plt.savefig(out_file) - print(f'\nThe {param_name} graph is saved at {out_file}') + print(f"\nThe {param_name} graph is saved at {out_file}") if not args.not_show: plt.show() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/dataset_converters/balloon2coco.py b/mmyolo/tools/dataset_converters/balloon2coco.py index 65eb660c..743b6a74 100644 --- a/mmyolo/tools/dataset_converters/balloon2coco.py +++ b/mmyolo/tools/dataset_converters/balloon2coco.py @@ -12,18 +12,17 @@ def convert_balloon_to_coco(ann_file, out_file, image_prefix): images = [] obj_count = 0 for idx, v in enumerate(mmengine.track_iter_progress(data_infos.values())): - filename = v['filename'] + filename = v["filename"] img_path = osp.join(image_prefix, filename) height, width = mmcv.imread(img_path).shape[:2] - images.append( - dict(id=idx, file_name=filename, height=height, width=width)) + images.append(dict(id=idx, file_name=filename, height=height, width=width)) - for _, obj in v['regions'].items(): - assert not obj['region_attributes'] - obj = obj['shape_attributes'] - px = obj['all_points_x'] - py = obj['all_points_y'] + for _, obj in v["regions"].items(): + assert not obj["region_attributes"] + obj = obj["shape_attributes"] + px = obj["all_points_x"] + py = obj["all_points_y"] poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)] poly = [p for x in poly for p in x] @@ -36,23 +35,28 @@ def convert_balloon_to_coco(ann_file, out_file, image_prefix): bbox=[x_min, y_min, x_max - x_min, y_max - y_min], area=(x_max - x_min) * (y_max - y_min), segmentation=[poly], - iscrowd=0) + iscrowd=0, + ) annotations.append(data_anno) obj_count += 1 coco_format_json = dict( images=images, annotations=annotations, - categories=[{ - 'id': 0, - 'name': 'balloon' - }]) + categories=[{"id": 0, "name": "balloon"}], + ) mmengine.dump(coco_format_json, out_file) -if __name__ == '__main__': +if __name__ == "__main__": - convert_balloon_to_coco('data/balloon/train/via_region_data.json', - 'data/balloon/train.json', 'data/balloon/train/') - convert_balloon_to_coco('data/balloon/val/via_region_data.json', - 'data/balloon/val.json', 'data/balloon/val/') + convert_balloon_to_coco( + "data/balloon/train/via_region_data.json", + "data/balloon/train.json", + "data/balloon/train/", + ) + convert_balloon_to_coco( + "data/balloon/val/via_region_data.json", + "data/balloon/val.json", + "data/balloon/val/", + ) diff --git a/mmyolo/tools/dataset_converters/labelme2coco.py b/mmyolo/tools/dataset_converters/labelme2coco.py index e68b935d..54b73daa 100644 --- a/mmyolo/tools/dataset_converters/labelme2coco.py +++ b/mmyolo/tools/dataset_converters/labelme2coco.py @@ -49,18 +49,19 @@ def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('--img-dir', type=str, help='Dataset image directory') + parser.add_argument("--img-dir", type=str, help="Dataset image directory") + parser.add_argument("--labels-dir", type=str, help="Dataset labels directory") + parser.add_argument("--out", type=str, help="COCO label json output path") parser.add_argument( - '--labels-dir', type=str, help='Dataset labels directory') - parser.add_argument('--out', type=str, help='COCO label json output path') - parser.add_argument( - '--class-id-txt', default=None, type=str, help='All class id txt path') + "--class-id-txt", default=None, type=str, help="All class id txt path" + ) args = parser.parse_args() return args -def format_coco_annotations(points: list, image_id: int, annotations_id: int, - category_id: int) -> dict: +def format_coco_annotations( + points: list, image_id: int, annotations_id: int, category_id: int +) -> dict: """Gen COCO annotations format label from labelme format label. Args: @@ -73,31 +74,33 @@ def format_coco_annotations(points: list, image_id: int, annotations_id: int, annotation_info (dict): COCO annotation data. """ annotation_info = dict() - annotation_info['iscrowd'] = 0 - annotation_info['category_id'] = category_id - annotation_info['id'] = annotations_id - annotation_info['image_id'] = image_id + annotation_info["iscrowd"] = 0 + annotation_info["category_id"] = category_id + annotation_info["id"] = annotations_id + annotation_info["image_id"] = image_id # bbox is [x1, y1, w, h] - annotation_info['bbox'] = [ - points[0][0], points[0][1], points[1][0] - points[0][0], - points[1][1] - points[0][1] + annotation_info["bbox"] = [ + points[0][0], + points[0][1], + points[1][0] - points[0][0], + points[1][1] - points[0][1], ] - annotation_info['area'] = annotation_info['bbox'][2] * annotation_info[ - 'bbox'][3] # bbox w * h + annotation_info["area"] = ( + annotation_info["bbox"][2] * annotation_info["bbox"][3] + ) # bbox w * h segmentation_points = np.asarray(points).copy() segmentation_points[1, :] = np.asarray(points)[2, :] segmentation_points[2, :] = np.asarray(points)[1, :] - annotation_info['segmentation'] = [list(segmentation_points.flatten())] + annotation_info["segmentation"] = [list(segmentation_points.flatten())] return annotation_info def parse_labelme_to_coco( - image_dir: str, - labels_root: str, - all_classes_id: Optional[dict] = None) -> (dict, dict): + image_dir: str, labels_root: str, all_classes_id: Optional[dict] = None +) -> (dict, dict): """Gen COCO json format label from labelme format label. Args: @@ -160,7 +163,7 @@ def parse_labelme_to_coco( """ # init coco json field - coco_json = {'images': [], 'categories': [], 'annotations': []} + coco_json = {"images": [], "categories": [], "annotations": []} image_id = 0 annotations_id = 0 @@ -173,94 +176,92 @@ def parse_labelme_to_coco( # add class_ids and class_names to the categories list in coco_json for class_name, class_id in category_to_id.items(): - coco_json['categories'].append({ - 'id': class_id, - 'name': class_name - }) + coco_json["categories"].append({"id": class_id, "name": class_name}) # filter incorrect image file img_file_list = [ - img_file for img_file in Path(image_dir).iterdir() + img_file + for img_file in Path(image_dir).iterdir() if img_file.suffix.lower() in IMG_EXTENSIONS ] for img_file in track_iter_progress(img_file_list): # get label file according to the image file name - label_path = Path(labels_root).joinpath( - img_file.stem).with_suffix('.json') + label_path = Path(labels_root).joinpath(img_file.stem).with_suffix(".json") if not label_path.exists(): - print(f'Can not find label file: {label_path}, skip...') + print(f"Can not find label file: {label_path}, skip...") continue # load labelme label - with open(label_path, encoding='utf-8') as f: + with open(label_path, encoding="utf-8") as f: labelme_data = json.load(f) image_id = image_id + 1 # coco id begin from 1 # update coco 'images' field - coco_json['images'].append({ - 'height': - labelme_data['imageHeight'], - 'width': - labelme_data['imageWidth'], - 'id': - image_id, - 'file_name': - Path(labelme_data['imagePath']).name - }) - - for label_shapes in labelme_data['shapes']: + coco_json["images"].append( + { + "height": labelme_data["imageHeight"], + "width": labelme_data["imageWidth"], + "id": image_id, + "file_name": Path(labelme_data["imagePath"]).name, + } + ) + + for label_shapes in labelme_data["shapes"]: # Update coco 'categories' field - class_name = label_shapes['label'] + class_name = label_shapes["label"] - if (all_classes_id is None) and (class_name - not in categories_labels): + if (all_classes_id is None) and (class_name not in categories_labels): # only update when not been added before - coco_json['categories'].append({ - 'id': - len(categories_labels) + 1, # categories id start with 1 - 'name': class_name - }) + coco_json["categories"].append( + { + "id": len(categories_labels) + 1, # categories id start with 1 + "name": class_name, + } + ) categories_labels.append(class_name) category_to_id[class_name] = len(categories_labels) - elif (all_classes_id is not None) and (class_name - not in categories_labels): + elif (all_classes_id is not None) and (class_name not in categories_labels): # check class name - raise ValueError(f'Got unexpected class name {class_name}, ' - 'which is not in your `--class-id-txt`.') + raise ValueError( + f"Got unexpected class name {class_name}, " + "which is not in your `--class-id-txt`." + ) # get shape type and convert it to coco format - shape_type = label_shapes['shape_type'] - if shape_type != 'rectangle': - print(f'not support `{shape_type}` yet, skip...') + shape_type = label_shapes["shape_type"] + if shape_type != "rectangle": + print(f"not support `{shape_type}` yet, skip...") continue annotations_id = annotations_id + 1 # convert point from [xmin, ymin, xmax, ymax] to [x1, y1, w, h] - (x1, y1), (x2, y2) = label_shapes['points'] + (x1, y1), (x2, y2) = label_shapes["points"] x1, x2 = sorted([x1, x2]) # xmin, xmax y1, y2 = sorted([y1, y2]) # ymin, ymax points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]] coco_annotations = format_coco_annotations( - points, image_id, annotations_id, category_to_id[class_name]) - coco_json['annotations'].append(coco_annotations) + points, image_id, annotations_id, category_to_id[class_name] + ) + coco_json["annotations"].append(coco_annotations) - print(f'Total image = {image_id}') - print(f'Total annotations = {annotations_id}') - print(f'Number of categories = {len(categories_labels)}, ' - f'which is {categories_labels}') + print(f"Total image = {image_id}") + print(f"Total annotations = {annotations_id}") + print( + f"Number of categories = {len(categories_labels)}, " + f"which is {categories_labels}" + ) return coco_json, category_to_id -def convert_labelme_to_coco(image_dir: str, - labels_dir: str, - out_path: str, - class_id_txt: Optional[str] = None): +def convert_labelme_to_coco( + image_dir: str, labels_dir: str, out_path: str, class_id_txt: Optional[str] = None +): """Convert labelme format label to COCO json format label. Args: @@ -270,25 +271,27 @@ def convert_labelme_to_coco(image_dir: str, class_id_txt (Optional[str]): All class id txt file path. Default None. """ - assert Path(out_path).suffix == '.json' + assert Path(out_path).suffix == ".json" if class_id_txt is not None: - assert Path(class_id_txt).suffix == '.txt' + assert Path(class_id_txt).suffix == ".txt" all_classes_id = dict() - with open(class_id_txt, encoding='utf-8') as f: + with open(class_id_txt, encoding="utf-8") as f: txt_lines = f.read().splitlines() assert len(txt_lines) > 0 for txt_line in txt_lines: - class_info = txt_line.split(' ') + class_info = txt_line.split(" ") if len(class_info) != 2: - raise ValueError('Error parse "class_id_txt" file ' - f'{class_id_txt}, please check if some of ' - 'the class names is blank, like "1 " -> ' - '"1 blank", or class name has space between' - ' words, like "1 Big house" -> "1 ' - 'Big-house".') + raise ValueError( + 'Error parse "class_id_txt" file ' + f"{class_id_txt}, please check if some of " + 'the class names is blank, like "1 " -> ' + '"1 blank", or class name has space between' + ' words, like "1 Big house" -> "1 ' + 'Big-house".' + ) v, k = class_info all_classes_id.update({k: int(v)}) else: @@ -296,30 +299,32 @@ def convert_labelme_to_coco(image_dir: str, # convert to coco json coco_json_data, category_to_id = parse_labelme_to_coco( - image_dir, labels_dir, all_classes_id) + image_dir, labels_dir, all_classes_id + ) # save json result Path(out_path).parent.mkdir(exist_ok=True, parents=True) - print(f'Saving json to {out_path}') - json.dump(coco_json_data, open(out_path, 'w'), indent=2) + print(f"Saving json to {out_path}") + json.dump(coco_json_data, open(out_path, "w"), indent=2) if class_id_txt is None: - category_to_id_path = Path(out_path).with_name('class_with_id.txt') - print(f'Saving class id txt to {category_to_id_path}') - with open(category_to_id_path, 'w', encoding='utf-8') as f: + category_to_id_path = Path(out_path).with_name("class_with_id.txt") + print(f"Saving class id txt to {category_to_id_path}") + with open(category_to_id_path, "w", encoding="utf-8") as f: for k, v in category_to_id.items(): - f.write(f'{v} {k}\n') + f.write(f"{v} {k}\n") else: - print('Not Saving new class id txt, user should using ' - f'{class_id_txt} for training config') + print( + "Not Saving new class id txt, user should using " + f"{class_id_txt} for training config" + ) def main(): args = parse_args() - convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out, - args.class_id_txt) - print('All done!') + convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out, args.class_id_txt) + print("All done!") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/dataset_converters/yolo2coco.py b/mmyolo/tools/dataset_converters/yolo2coco.py index 19f13666..ad39f649 100644 --- a/mmyolo/tools/dataset_converters/yolo2coco.py +++ b/mmyolo/tools/dataset_converters/yolo2coco.py @@ -52,13 +52,13 @@ import mmcv import mmengine -IMG_EXTENSIONS = ('.jpg', '.png', '.jpeg') +IMG_EXTENSIONS = (".jpg", ".png", ".jpeg") def check_existence(file_path: str): """Check if target file is existed.""" if not osp.exists(file_path): - raise FileNotFoundError(f'{file_path} does not exist!') + raise FileNotFoundError(f"{file_path} does not exist!") def get_image_info(yolo_image_dir, idx, file_name): @@ -69,10 +69,10 @@ def get_image_info(yolo_image_dir, idx, file_name): img = mmcv.imread(img_path) height, width = img.shape[:2] img_info_dict = { - 'file_name': file_name, - 'id': idx, - 'width': width, - 'height': height + "file_name": file_name, + "id": idx, + "width": width, + "height": height, } return img_info_dict, height, width @@ -92,16 +92,16 @@ def convert_bbox_info(label, idx, obj_count, image_height, image_width): y2 = (y + h / 2) * image_height cls_id = int(label[0]) - width = max(0., x2 - x1) - height = max(0., y2 - y1) + width = max(0.0, x2 - x1) + height = max(0.0, y2 - y1) coco_format_info = { - 'image_id': idx, - 'id': obj_count, - 'category_id': cls_id, - 'bbox': [x1, y1, width, height], - 'area': width * height, - 'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]], - 'iscrowd': 0 + "image_id": idx, + "id": obj_count, + "category_id": cls_id, + "bbox": [x1, y1, width, height], + "area": width * height, + "segmentation": [[x1, y1, x2, y1, x2, y2, x1, y2]], + "iscrowd": 0, } obj_count += 1 return coco_format_info, obj_count @@ -109,20 +109,19 @@ def convert_bbox_info(label, idx, obj_count, image_height, image_width): def organize_by_existing_files(image_dir: str, existed_categories: list): """Format annotations by existing train/val/test files.""" - categories = ['train', 'val', 'test'] + categories = ["train", "val", "test"] image_list = [] for cat in categories: if cat in existed_categories: - txt_file = osp.join(image_dir, f'{cat}.txt') - print(f'Start to read {cat} dataset definition') + txt_file = osp.join(image_dir, f"{cat}.txt") + print(f"Start to read {cat} dataset definition") assert osp.exists(txt_file) with open(txt_file) as f: img_paths = f.readlines() img_paths = [ - os.path.split(img_path.strip())[1] - for img_path in img_paths + os.path.split(img_path.strip())[1] for img_path in img_paths ] # split the absolute path image_list.append(img_paths) else: @@ -137,35 +136,35 @@ def convert_yolo_to_coco(image_dir: str): image_dir (str): the root directory of your datasets which contains labels, images, classes.txt, etc """ - print(f'Start to load existing images and annotations from {image_dir}') + print(f"Start to load existing images and annotations from {image_dir}") check_existence(image_dir) # check local environment - yolo_label_dir = osp.join(image_dir, 'labels') - yolo_image_dir = osp.join(image_dir, 'images') - yolo_class_txt = osp.join(image_dir, 'classes.txt') + yolo_label_dir = osp.join(image_dir, "labels") + yolo_image_dir = osp.join(image_dir, "images") + yolo_class_txt = osp.join(image_dir, "classes.txt") check_existence(yolo_label_dir) check_existence(yolo_image_dir) check_existence(yolo_class_txt) - print(f'All necessary files are located at {image_dir}') + print(f"All necessary files are located at {image_dir}") - train_txt_path = osp.join(image_dir, 'train.txt') - val_txt_path = osp.join(image_dir, 'val.txt') - test_txt_path = osp.join(image_dir, 'test.txt') + train_txt_path = osp.join(image_dir, "train.txt") + val_txt_path = osp.join(image_dir, "val.txt") + test_txt_path = osp.join(image_dir, "test.txt") existed_categories = [] - print(f'Checking if train.txt, val.txt, and test.txt are in {image_dir}') + print(f"Checking if train.txt, val.txt, and test.txt are in {image_dir}") if osp.exists(train_txt_path): - print('Found train.txt') - existed_categories.append('train') + print("Found train.txt") + existed_categories.append("train") if osp.exists(val_txt_path): - print('Found val.txt') - existed_categories.append('val') + print("Found val.txt") + existed_categories.append("val") if osp.exists(test_txt_path): - print('Found test.txt') - existed_categories.append('test') + print("Found test.txt") + existed_categories.append("test") # prepare the output folders - output_folder = osp.join(image_dir, 'annotations') + output_folder = osp.join(image_dir, "annotations") if not osp.exists(output_folder): os.makedirs(output_folder) check_existence(output_folder) @@ -177,31 +176,33 @@ def convert_yolo_to_coco(image_dir: str): indices = os.listdir(yolo_image_dir) total = len(indices) - dataset = {'images': [], 'annotations': [], 'categories': []} + dataset = {"images": [], "annotations": [], "categories": []} if existed_categories == []: - print('These files are not located, no need to organize separately.') + print("These files are not located, no need to organize separately.") for i, cls in enumerate(classes, 0): - dataset['categories'].append({'id': i, 'name': cls}) + dataset["categories"].append({"id": i, "name": cls}) else: - print('Need to organize the data accordingly.') - train_dataset = {'images': [], 'annotations': [], 'categories': []} - val_dataset = {'images': [], 'annotations': [], 'categories': []} - test_dataset = {'images': [], 'annotations': [], 'categories': []} + print("Need to organize the data accordingly.") + train_dataset = {"images": [], "annotations": [], "categories": []} + val_dataset = {"images": [], "annotations": [], "categories": []} + test_dataset = {"images": [], "annotations": [], "categories": []} # category id starts from 0 for i, cls in enumerate(classes, 0): - train_dataset['categories'].append({'id': i, 'name': cls}) - val_dataset['categories'].append({'id': i, 'name': cls}) - test_dataset['categories'].append({'id': i, 'name': cls}) + train_dataset["categories"].append({"id": i, "name": cls}) + val_dataset["categories"].append({"id": i, "name": cls}) + test_dataset["categories"].append({"id": i, "name": cls}) train_img, val_img, test_img = organize_by_existing_files( - image_dir, existed_categories) + image_dir, existed_categories + ) obj_count = 0 skipped = 0 converted = 0 for idx, image in enumerate(mmengine.track_iter_progress(indices)): img_info_dict, image_height, image_width = get_image_info( - yolo_image_dir, idx, image) + yolo_image_dir, idx, image + ) if existed_categories != []: if image in train_img: @@ -211,15 +212,13 @@ def convert_yolo_to_coco(image_dir: str): elif image in test_img: dataset = test_dataset - dataset['images'].append(img_info_dict) + dataset["images"].append(img_info_dict) img_name = osp.splitext(image)[0] - label_path = f'{osp.join(yolo_label_dir, img_name)}.txt' + label_path = f"{osp.join(yolo_label_dir, img_name)}.txt" if not osp.exists(label_path): # if current image is not annotated or the annotation file failed - print( - f'WARNING: {label_path} does not exist. Please check the file.' - ) + print(f"WARNING: {label_path} does not exist. Please check the file.") skipped += 1 continue @@ -227,38 +226,42 @@ def convert_yolo_to_coco(image_dir: str): labels = f.readlines() for label in labels: coco_info, obj_count = convert_bbox_info( - label, idx, obj_count, image_height, image_width) - dataset['annotations'].append(coco_info) + label, idx, obj_count, image_height, image_width + ) + dataset["annotations"].append(coco_info) converted += 1 # saving results to result json if existed_categories == []: - out_file = osp.join(image_dir, 'annotations/result.json') - print(f'Saving converted results to {out_file} ...') + out_file = osp.join(image_dir, "annotations/result.json") + print(f"Saving converted results to {out_file} ...") mmengine.dump(dataset, out_file) else: for category in existed_categories: - out_file = osp.join(output_folder, f'{category}.json') - print(f'Saving converted results to {out_file} ...') - if category == 'train': + out_file = osp.join(output_folder, f"{category}.json") + print(f"Saving converted results to {out_file} ...") + if category == "train": mmengine.dump(train_dataset, out_file) - elif category == 'val': + elif category == "val": mmengine.dump(val_dataset, out_file) - elif category == 'test': + elif category == "test": mmengine.dump(test_dataset, out_file) # simple statistics - print(f'Process finished! Please check at {output_folder} .') - print(f'Number of images found: {total}, converted: {converted},', - f'and skipped: {skipped}. Total annotation count: {obj_count}.') - print('You can use tools/analysis_tools/browse_coco_json.py to visualize!') + print(f"Process finished! Please check at {output_folder} .") + print( + f"Number of images found: {total}, converted: {converted},", + f"and skipped: {skipped}. Total annotation count: {obj_count}.", + ) + print("You can use tools/analysis_tools/browse_coco_json.py to visualize!") -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - 'image_dir', + "image_dir", type=str, - help='dataset directory with ./images and ./labels, classes.txt, etc.') + help="dataset directory with ./images and ./labels, classes.txt, etc.", + ) arg = parser.parse_args() convert_yolo_to_coco(arg.image_dir) diff --git a/mmyolo/tools/misc/coco_split.py b/mmyolo/tools/misc/coco_split.py index 8ce70349..3681bc7f 100644 --- a/mmyolo/tools/misc/coco_split.py +++ b/mmyolo/tools/misc/coco_split.py @@ -10,30 +10,29 @@ def parse_args(): parser = argparse.ArgumentParser() + parser.add_argument("--json", type=str, required=True, help="COCO json label path") + parser.add_argument("--out-dir", type=str, required=True, help="output path") parser.add_argument( - '--json', type=str, required=True, help='COCO json label path') - parser.add_argument( - '--out-dir', type=str, required=True, help='output path') - parser.add_argument( - '--ratios', - nargs='+', + "--ratios", + nargs="+", type=float, - help='ratio for sub dataset, if set 2 number then will generate ' + help="ratio for sub dataset, if set 2 number then will generate " 'trainval + test (eg. "0.8 0.1 0.1" or "2 1 1"), if set 3 number ' - 'then will generate train + val + test (eg. "0.85 0.15" or "2 1")') + 'then will generate train + val + test (eg. "0.85 0.15" or "2 1")', + ) parser.add_argument( - '--shuffle', - action='store_true', - help='Whether to display in disorder') - parser.add_argument('--seed', default=-1, type=int, help='seed') + "--shuffle", action="store_true", help="Whether to display in disorder" + ) + parser.add_argument("--seed", default=-1, type=int, help="seed") args = parser.parse_args() return args -def split_coco_dataset(coco_json_path: str, save_dir: str, ratios: list, - shuffle: bool, seed: int): +def split_coco_dataset( + coco_json_path: str, save_dir: str, ratios: list, shuffle: bool, seed: int +): if not Path(coco_json_path).exists(): - raise FileNotFoundError(f'Can not not found {coco_json_path}') + raise FileNotFoundError(f"Can not not found {coco_json_path}") if not Path(save_dir).exists(): Path(save_dir).mkdir(parents=True) @@ -44,12 +43,12 @@ def split_coco_dataset(coco_json_path: str, save_dir: str, ratios: list, if len(ratios) == 2: ratio_train, ratio_test = ratios ratio_val = 0 - train_type = 'trainval' + train_type = "trainval" elif len(ratios) == 3: ratio_train, ratio_val, ratio_test = ratios - train_type = 'train' + train_type = "train" else: - raise ValueError('ratios must set 2 or 3 group!') + raise ValueError("ratios must set 2 or 3 group!") # Read coco info coco = COCO(coco_json_path) @@ -59,28 +58,31 @@ def split_coco_dataset(coco_json_path: str, save_dir: str, ratios: list, val_image_num = int(len(coco_image_ids) * ratio_val) test_image_num = int(len(coco_image_ids) * ratio_test) train_image_num = len(coco_image_ids) - val_image_num - test_image_num - print('Split info: ====== \n' - f'Train ratio = {ratio_train}, number = {train_image_num}\n' - f'Val ratio = {ratio_val}, number = {val_image_num}\n' - f'Test ratio = {ratio_test}, number = {test_image_num}') + print( + "Split info: ====== \n" + f"Train ratio = {ratio_train}, number = {train_image_num}\n" + f"Val ratio = {ratio_val}, number = {val_image_num}\n" + f"Test ratio = {ratio_test}, number = {test_image_num}" + ) seed = int(seed) if seed != -1: - print(f'Set the global seed: {seed}') + print(f"Set the global seed: {seed}") np.random.seed(seed) if shuffle: - print('shuffle dataset.') + print("shuffle dataset.") random.shuffle(coco_image_ids) # split each dataset train_image_ids = coco_image_ids[:train_image_num] if val_image_num != 0: - val_image_ids = coco_image_ids[train_image_num:train_image_num + - val_image_num] + val_image_ids = coco_image_ids[ + train_image_num : train_image_num + val_image_num + ] else: val_image_ids = None - test_image_ids = coco_image_ids[train_image_num + val_image_num:] + test_image_ids = coco_image_ids[train_image_num + val_image_num :] # Save new json categories = coco.loadCats(coco.getCatIds()) @@ -90,33 +92,32 @@ def split_coco_dataset(coco_json_path: str, save_dir: str, ratios: list, # Gen new json img_dict = { - 'images': coco.loadImgs(ids=img_id_list), - 'categories': categories, - 'annotations': coco.loadAnns(coco.getAnnIds(imgIds=img_id_list)) + "images": coco.loadImgs(ids=img_id_list), + "categories": categories, + "annotations": coco.loadAnns(coco.getAnnIds(imgIds=img_id_list)), } # save json if img_id_list == train_image_ids: - json_file_path = Path(save_dir, f'{train_type}.json') + json_file_path = Path(save_dir, f"{train_type}.json") elif img_id_list == val_image_ids: - json_file_path = Path(save_dir, 'val.json') + json_file_path = Path(save_dir, "val.json") elif img_id_list == test_image_ids: - json_file_path = Path(save_dir, 'test.json') + json_file_path = Path(save_dir, "test.json") else: - raise ValueError('img_id_list ERROR!') + raise ValueError("img_id_list ERROR!") - print(f'Saving json to {json_file_path}') - with open(json_file_path, 'w') as f_json: + print(f"Saving json to {json_file_path}") + with open(json_file_path, "w") as f_json: json.dump(img_dict, f_json, ensure_ascii=False, indent=2) - print('All done!') + print("All done!") def main(): args = parse_args() - split_coco_dataset(args.json, args.out_dir, args.ratios, args.shuffle, - args.seed) + split_coco_dataset(args.json, args.out_dir, args.ratios, args.shuffle, args.seed) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/misc/download_dataset.py b/mmyolo/tools/misc/download_dataset.py index 7d1c64d8..7b8d46b1 100644 --- a/mmyolo/tools/misc/download_dataset.py +++ b/mmyolo/tools/misc/download_dataset.py @@ -9,47 +9,43 @@ def parse_args(): - parser = argparse.ArgumentParser( - description='Download datasets for training') + parser = argparse.ArgumentParser(description="Download datasets for training") parser.add_argument( - '--dataset-name', type=str, help='dataset name', default='coco2017') - parser.add_argument( - '--save-dir', - type=str, - help='the dir to save dataset', - default='data/coco') + "--dataset-name", type=str, help="dataset name", default="coco2017" + ) parser.add_argument( - '--unzip', - action='store_true', - help='whether unzip dataset or not, zipped files will be saved') + "--save-dir", type=str, help="the dir to save dataset", default="data/coco" + ) parser.add_argument( - '--delete', - action='store_true', - help='delete the download zipped files') + "--unzip", + action="store_true", + help="whether unzip dataset or not, zipped files will be saved", + ) parser.add_argument( - '--threads', type=int, help='number of threading', default=4) + "--delete", action="store_true", help="delete the download zipped files" + ) + parser.add_argument("--threads", type=int, help="number of threading", default=4) args = parser.parse_args() return args def download(url, dir, unzip=True, delete=False, threads=1): - def download_one(url, dir): f = dir / Path(url).name if Path(url).is_file(): Path(url).rename(f) elif not f.exists(): - print(f'Downloading {url} to {f}') + print(f"Downloading {url} to {f}") torch.hub.download_url_to_file(url, f, progress=True) - if unzip and f.suffix in ('.zip', '.tar'): - print(f'Unzipping {f.name}') - if f.suffix == '.zip': + if unzip and f.suffix in (".zip", ".tar"): + print(f"Unzipping {f.name}") + if f.suffix == ".zip": ZipFile(f).extractall(path=dir) - elif f.suffix == '.tar': + elif f.suffix == ".tar": TarFile(f).extractall(path=dir) if delete: f.unlink() - print(f'Delete {f}') + print(f"Delete {f}") dir = Path(dir) if threads > 1: @@ -70,43 +66,36 @@ def main(): data2url = dict( # TODO: Support for downloading Panoptic Segmentation of COCO coco2017=[ - 'http://images.cocodataset.org/zips/train2017.zip', - 'http://images.cocodataset.org/zips/val2017.zip', - 'http://images.cocodataset.org/zips/test2017.zip', - 'http://images.cocodataset.org/annotations/' + - 'annotations_trainval2017.zip' + "http://images.cocodataset.org/zips/train2017.zip", + "http://images.cocodataset.org/zips/val2017.zip", + "http://images.cocodataset.org/zips/test2017.zip", + "http://images.cocodataset.org/annotations/" + + "annotations_trainval2017.zip", ], lvis=[ - 'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip', # noqa - 'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip', # noqa + "https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip", # noqa + "https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip", # noqa ], voc2007=[ - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', # noqa - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', # noqa - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar', # noqa + "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar", # noqa + "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar", # noqa + "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar", # noqa ], voc2012=[ - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', # noqa + "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar", # noqa ], balloon=[ # src link: https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip # noqa - 'https://download.openmmlab.com/mmyolo/data/balloon_dataset.zip' - ], - cat=[ - 'https://download.openmmlab.com/mmyolo/data/cat_dataset.zip' # noqa + "https://download.openmmlab.com/mmyolo/data/balloon_dataset.zip" ], + cat=["https://download.openmmlab.com/mmyolo/data/cat_dataset.zip"], # noqa ) url = data2url.get(args.dataset_name, None) if url is None: - print('Only support COCO, VOC, balloon, cat and LVIS now!') + print("Only support COCO, VOC, balloon, cat and LVIS now!") return - download( - url, - dir=path, - unzip=args.unzip, - delete=args.delete, - threads=args.threads) + download(url, dir=path, unzip=args.unzip, delete=args.delete, threads=args.threads) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/misc/extract_subcoco.py b/mmyolo/tools/misc/extract_subcoco.py index 31528e0b..6acf7ad6 100644 --- a/mmyolo/tools/misc/extract_subcoco.py +++ b/mmyolo/tools/misc/extract_subcoco.py @@ -28,31 +28,30 @@ # TODO: Currently only supports coco2017 -def _process_data(args, - in_dataset_type: str, - out_dataset_type: str, - year: str = '2017'): - assert in_dataset_type in ('train', 'val') - assert out_dataset_type in ('train', 'val') +def _process_data( + args, in_dataset_type: str, out_dataset_type: str, year: str = "2017" +): + assert in_dataset_type in ("train", "val") + assert out_dataset_type in ("train", "val") - int_ann_file_name = f'annotations/instances_{in_dataset_type}{year}.json' - out_ann_file_name = f'annotations/instances_{out_dataset_type}{year}.json' + int_ann_file_name = f"annotations/instances_{in_dataset_type}{year}.json" + out_ann_file_name = f"annotations/instances_{out_dataset_type}{year}.json" ann_path = osp.join(args.root, int_ann_file_name) json_data = mmengine.load(ann_path) new_json_data = { - 'info': json_data['info'], - 'licenses': json_data['licenses'], - 'categories': json_data['categories'], - 'images': [], - 'annotations': [] + "info": json_data["info"], + "licenses": json_data["licenses"], + "categories": json_data["categories"], + "images": [], + "annotations": [], } area_dict = { - 'small': [0., 32 * 32], - 'medium': [32 * 32, 96 * 96], - 'large': [96 * 96, float('inf')] + "small": [0.0, 32 * 32], + "medium": [32 * 32, 96 * 96], + "large": [96 * 96, float("inf")], } coco = COCO(ann_path) @@ -64,7 +63,7 @@ def _process_data(args, ann_info = coco.loadAnns(ann_ids) # get image ids by anns set - filter_img_ids = {ann['image_id'] for ann in ann_info} + filter_img_ids = {ann["image_id"] for ann in ann_info} filter_img = coco.loadImgs(filter_img_ids) # shuffle @@ -73,26 +72,26 @@ def _process_data(args, num_img = args.num_img if args.num_img > 0 else len(filter_img) if num_img > len(filter_img): print( - f'num_img is too big, will be set to {len(filter_img)}, ' - 'because of not enough image after filter by classes and area_size' + f"num_img is too big, will be set to {len(filter_img)}, " + "because of not enough image after filter by classes and area_size" ) num_img = len(filter_img) progress_bar = mmengine.ProgressBar(num_img) for i in range(num_img): - file_name = filter_img[i]['file_name'] + file_name = filter_img[i]["file_name"] image_path = osp.join(args.root, in_dataset_type + year, file_name) ann_ids = coco.getAnnIds( - imgIds=[filter_img[i]['id']], catIds=catIds, areaRng=areaRng) + imgIds=[filter_img[i]["id"]], catIds=catIds, areaRng=areaRng + ) img_ann_info = coco.loadAnns(ann_ids) - new_json_data['images'].append(filter_img[i]) - new_json_data['annotations'].extend(img_ann_info) + new_json_data["images"].append(filter_img[i]) + new_json_data["annotations"].extend(img_ann_info) - shutil.copy(image_path, osp.join(args.out_dir, - out_dataset_type + year)) + shutil.copy(image_path, osp.join(args.out_dir, out_dataset_type + year)) progress_bar.update() @@ -101,60 +100,65 @@ def _process_data(args, def _make_dirs(out_dir): mmengine.mkdir_or_exist(out_dir) - mmengine.mkdir_or_exist(osp.join(out_dir, 'annotations')) - mmengine.mkdir_or_exist(osp.join(out_dir, 'train2017')) - mmengine.mkdir_or_exist(osp.join(out_dir, 'val2017')) + mmengine.mkdir_or_exist(osp.join(out_dir, "annotations")) + mmengine.mkdir_or_exist(osp.join(out_dir, "train2017")) + mmengine.mkdir_or_exist(osp.join(out_dir, "val2017")) def parse_args(): - parser = argparse.ArgumentParser(description='Extract coco subset') - parser.add_argument('root', help='root path') + parser = argparse.ArgumentParser(description="Extract coco subset") + parser.add_argument("root", help="root path") parser.add_argument( - 'out_dir', type=str, help='directory where subset coco will be saved.') + "out_dir", type=str, help="directory where subset coco will be saved." + ) parser.add_argument( - '--num-img', + "--num-img", default=50, type=int, - help='num of extract image, -1 means all images') + help="num of extract image, -1 means all images", + ) parser.add_argument( - '--area-size', - choices=['small', 'medium', 'large'], - help='filter ground-truth info by area size') + "--area-size", + choices=["small", "medium", "large"], + help="filter ground-truth info by area size", + ) parser.add_argument( - '--classes', nargs='+', help='filter ground-truth by class name') + "--classes", nargs="+", help="filter ground-truth by class name" + ) parser.add_argument( - '--use-training-set', - action='store_true', - help='Whether to use the training set when extract the training set. ' - 'The training subset is extracted from the validation set by ' - 'default which can speed up.') - parser.add_argument('--seed', default=-1, type=int, help='seed') + "--use-training-set", + action="store_true", + help="Whether to use the training set when extract the training set. " + "The training subset is extracted from the validation set by " + "default which can speed up.", + ) + parser.add_argument("--seed", default=-1, type=int, help="seed") args = parser.parse_args() return args def main(): args = parse_args() - assert args.out_dir != args.root, \ - 'The file will be overwritten in place, ' \ - 'so the same folder is not allowed !' + assert args.out_dir != args.root, ( + "The file will be overwritten in place, " "so the same folder is not allowed !" + ) seed = int(args.seed) if seed != -1: - print(f'Set the global seed: {seed}') + print(f"Set the global seed: {seed}") np.random.seed(int(args.seed)) _make_dirs(args.out_dir) - print('====Start processing train dataset====') + print("====Start processing train dataset====") if args.use_training_set: - _process_data(args, 'train', 'train') + _process_data(args, "train", "train") else: - _process_data(args, 'val', 'train') - print('\n====Start processing val dataset====') - _process_data(args, 'val', 'val') - print(f'\n Result save to {args.out_dir}') + _process_data(args, "val", "train") + print("\n====Start processing val dataset====") + _process_data(args, "val", "val") + print(f"\n Result save to {args.out_dir}") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/ppyoloe_to_mmyolo.py b/mmyolo/tools/model_converters/ppyoloe_to_mmyolo.py index 75c4af69..73cd3a13 100644 --- a/mmyolo/tools/model_converters/ppyoloe_to_mmyolo.py +++ b/mmyolo/tools/model_converters/ppyoloe_to_mmyolo.py @@ -6,179 +6,177 @@ def convert_bn(k: str): - name = k.replace('._mean', - '.running_mean').replace('._variance', '.running_var') + name = k.replace("._mean", ".running_mean").replace("._variance", ".running_var") return name def convert_repvgg(k: str): - if '.conv2.conv1.' in k: - name = k.replace('.conv2.conv1.', '.conv2.rbr_dense.') + if ".conv2.conv1." in k: + name = k.replace(".conv2.conv1.", ".conv2.rbr_dense.") return name - elif '.conv2.conv2.' in k: - name = k.replace('.conv2.conv2.', '.conv2.rbr_1x1.') + elif ".conv2.conv2." in k: + name = k.replace(".conv2.conv2.", ".conv2.rbr_1x1.") return name else: return k def convert(src: str, dst: str, imagenet_pretrain: bool = False): - with open(src, 'rb') as f: + with open(src, "rb") as f: model = pickle.load(f) new_state_dict = OrderedDict() if imagenet_pretrain: for k, v in model.items(): - if '@@' in k: + if "@@" in k: continue - if 'stem.' in k: + if "stem." in k: # backbone.stem.conv1.conv.weight # -> backbone.stem.0.conv.weight - org_ind = k.split('.')[1][-1] + org_ind = k.split(".")[1][-1] new_ind = str(int(org_ind) - 1) - name = k.replace('stem.conv%s.' % org_ind, - 'stem.%s.' % new_ind) + name = k.replace("stem.conv%s." % org_ind, "stem.%s." % new_ind) else: # backbone.stages.1.conv2.bn._variance # -> backbone.stage2.0.conv2.bn.running_var - org_stage_ind = k.split('.')[1] + org_stage_ind = k.split(".")[1] new_stage_ind = str(int(org_stage_ind) + 1) - name = k.replace('stages.%s.' % org_stage_ind, - 'stage%s.0.' % new_stage_ind) + name = k.replace( + "stages.%s." % org_stage_ind, "stage%s.0." % new_stage_ind + ) name = convert_repvgg(name) - if '.attn.' in k: - name = name.replace('.attn.fc.', '.attn.fc.conv.') + if ".attn." in k: + name = name.replace(".attn.fc.", ".attn.fc.conv.") name = convert_bn(name) - name = 'backbone.' + name + name = "backbone." + name new_state_dict[name] = torch.from_numpy(v) else: for k, v in model.items(): name = k - if k.startswith('backbone.'): - if '.stem.' in k: + if k.startswith("backbone."): + if ".stem." in k: # backbone.stem.conv1.conv.weight # -> backbone.stem.0.conv.weight - org_ind = k.split('.')[2][-1] + org_ind = k.split(".")[2][-1] new_ind = str(int(org_ind) - 1) - name = k.replace('.stem.conv%s.' % org_ind, - '.stem.%s.' % new_ind) + name = k.replace(".stem.conv%s." % org_ind, ".stem.%s." % new_ind) else: # backbone.stages.1.conv2.bn._variance # -> backbone.stage2.0.conv2.bn.running_var - org_stage_ind = k.split('.')[2] + org_stage_ind = k.split(".")[2] new_stage_ind = str(int(org_stage_ind) + 1) - name = k.replace('.stages.%s.' % org_stage_ind, - '.stage%s.0.' % new_stage_ind) + name = k.replace( + ".stages.%s." % org_stage_ind, ".stage%s.0." % new_stage_ind + ) name = convert_repvgg(name) - if '.attn.' in k: - name = name.replace('.attn.fc.', '.attn.fc.conv.') + if ".attn." in k: + name = name.replace(".attn.fc.", ".attn.fc.conv.") name = convert_bn(name) - elif k.startswith('neck.'): + elif k.startswith("neck."): # fpn_stages - if k.startswith('neck.fpn_stages.'): + if k.startswith("neck.fpn_stages."): # neck.fpn_stages.0.0.conv1.conv.weight # -> neck.reduce_layers.2.0.conv1.conv.weight - if k.startswith('neck.fpn_stages.0.0.'): - name = k.replace('neck.fpn_stages.0.0.', - 'neck.reduce_layers.2.0.') - if '.spp.' in name: - name = name.replace('.spp.conv.', '.spp.conv2.') + if k.startswith("neck.fpn_stages.0.0."): + name = k.replace( + "neck.fpn_stages.0.0.", "neck.reduce_layers.2.0." + ) + if ".spp." in name: + name = name.replace(".spp.conv.", ".spp.conv2.") # neck.fpn_stages.1.0.conv1.conv.weight # -> neck.top_down_layers.0.0.conv1.conv.weight - elif k.startswith('neck.fpn_stages.1.0.'): - name = k.replace('neck.fpn_stages.1.0.', - 'neck.top_down_layers.0.0.') - elif k.startswith('neck.fpn_stages.2.0.'): - name = k.replace('neck.fpn_stages.2.0.', - 'neck.top_down_layers.1.0.') + elif k.startswith("neck.fpn_stages.1.0."): + name = k.replace( + "neck.fpn_stages.1.0.", "neck.top_down_layers.0.0." + ) + elif k.startswith("neck.fpn_stages.2.0."): + name = k.replace( + "neck.fpn_stages.2.0.", "neck.top_down_layers.1.0." + ) else: - raise NotImplementedError('Not implemented.') - name = name.replace('.0.convs.', '.0.blocks.') - elif k.startswith('neck.fpn_routes.'): + raise NotImplementedError("Not implemented.") + name = name.replace(".0.convs.", ".0.blocks.") + elif k.startswith("neck.fpn_routes."): # neck.fpn_routes.0.conv.weight # -> neck.upsample_layers.0.0.conv.weight - index = k.split('.')[2] - name = 'neck.upsample_layers.' + index + '.0.' + '.'.join( - k.split('.')[-2:]) - name = name.replace('.0.convs.', '.0.blocks.') - elif k.startswith('neck.pan_stages.'): + index = k.split(".")[2] + name = ( + "neck.upsample_layers." + + index + + ".0." + + ".".join(k.split(".")[-2:]) + ) + name = name.replace(".0.convs.", ".0.blocks.") + elif k.startswith("neck.pan_stages."): # neck.pan_stages.0.0.conv1.conv.weight # -> neck.bottom_up_layers.1.0.conv1.conv.weight - ind = k.split('.')[2] + ind = k.split(".")[2] name = k.replace( - 'neck.pan_stages.' + ind, 'neck.bottom_up_layers.' + - ('0' if ind == '1' else '1')) - name = name.replace('.0.convs.', '.0.blocks.') - elif k.startswith('neck.pan_routes.'): + "neck.pan_stages." + ind, + "neck.bottom_up_layers." + ("0" if ind == "1" else "1"), + ) + name = name.replace(".0.convs.", ".0.blocks.") + elif k.startswith("neck.pan_routes."): # neck.pan_routes.0.conv.weight # -> neck.downsample_layers.0.conv.weight - ind = k.split('.')[2] + ind = k.split(".")[2] name = k.replace( - 'neck.pan_routes.' + ind, 'neck.downsample_layers.' + - ('0' if ind == '1' else '1')) - name = name.replace('.0.convs.', '.0.blocks.') + "neck.pan_routes." + ind, + "neck.downsample_layers." + ("0" if ind == "1" else "1"), + ) + name = name.replace(".0.convs.", ".0.blocks.") else: - raise NotImplementedError('Not implement.') + raise NotImplementedError("Not implement.") name = convert_repvgg(name) name = convert_bn(name) - elif k.startswith('yolo_head.'): - if ('anchor_points' in k) or ('stride_tensor' in k): + elif k.startswith("yolo_head."): + if ("anchor_points" in k) or ("stride_tensor" in k): continue - if 'proj_conv' in k: - name = k.replace('yolo_head.proj_conv.', - 'bbox_head.head_module.proj_conv.') + if "proj_conv" in k: + name = k.replace( + "yolo_head.proj_conv.", "bbox_head.head_module.proj_conv." + ) else: for org_key, rep_key in [ - [ - 'yolo_head.stem_cls.', - 'bbox_head.head_module.cls_stems.' - ], - [ - 'yolo_head.stem_reg.', - 'bbox_head.head_module.reg_stems.' - ], - [ - 'yolo_head.pred_cls.', - 'bbox_head.head_module.cls_preds.' - ], - [ - 'yolo_head.pred_reg.', - 'bbox_head.head_module.reg_preds.' - ] + ["yolo_head.stem_cls.", "bbox_head.head_module.cls_stems."], + ["yolo_head.stem_reg.", "bbox_head.head_module.reg_stems."], + ["yolo_head.pred_cls.", "bbox_head.head_module.cls_preds."], + ["yolo_head.pred_reg.", "bbox_head.head_module.reg_preds."], ]: name = name.replace(org_key, rep_key) - name = name.split('.') + name = name.split(".") ind = name[3] name[3] = str(2 - int(ind)) - name = '.'.join(name) + name = ".".join(name) name = convert_bn(name) else: continue new_state_dict[name] = torch.from_numpy(v) - data = {'state_dict': new_state_dict} + data = {"state_dict": new_state_dict} torch.save(data, dst) def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument( - '--src', - default='ppyoloe_plus_crn_s_80e_coco.pdparams', - help='src ppyoloe model path') + parser = argparse.ArgumentParser(description="Convert model keys") parser.add_argument( - '--dst', default='mmppyoloe_plus_s.pt', help='save path') + "--src", + default="ppyoloe_plus_crn_s_80e_coco.pdparams", + help="src ppyoloe model path", + ) + parser.add_argument("--dst", default="mmppyoloe_plus_s.pt", help="save path") parser.add_argument( - '--imagenet-pretrain', - action='store_true', + "--imagenet-pretrain", + action="store_true", default=False, - help='Load model pretrained on imagenet dataset which only ' - 'have weight for backbone.') + help="Load model pretrained on imagenet dataset which only " + "have weight for backbone.", + ) args = parser.parse_args() convert(args.src, args.dst, args.imagenet_pretrain) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/rtmdet_to_mmyolo.py b/mmyolo/tools/model_converters/rtmdet_to_mmyolo.py index 35f24dbf..f39b75e8 100644 --- a/mmyolo/tools/model_converters/rtmdet_to_mmyolo.py +++ b/mmyolo/tools/model_converters/rtmdet_to_mmyolo.py @@ -7,55 +7,55 @@ def convert(src, dst): """Convert keys in pretrained RTMDet models to MMYOLO style.""" - blobs = torch.load(src)['state_dict'] + blobs = torch.load(src)["state_dict"] state_dict = OrderedDict() for key, weight in blobs.items(): - if 'neck.reduce_layers.0' in key: - new_key = key.replace('.0', '.2') + if "neck.reduce_layers.0" in key: + new_key = key.replace(".0", ".2") state_dict[new_key] = weight - elif 'neck.reduce_layers.1' in key: - new_key = key.replace('reduce_layers.1', 'top_down_layers.0.1') + elif "neck.reduce_layers.1" in key: + new_key = key.replace("reduce_layers.1", "top_down_layers.0.1") state_dict[new_key] = weight - elif 'neck.top_down_blocks.0' in key: - new_key = key.replace('down_blocks', 'down_layers.0') + elif "neck.top_down_blocks.0" in key: + new_key = key.replace("down_blocks", "down_layers.0") state_dict[new_key] = weight - elif 'neck.top_down_blocks.1' in key: - new_key = key.replace('down_blocks', 'down_layers') + elif "neck.top_down_blocks.1" in key: + new_key = key.replace("down_blocks", "down_layers") state_dict[new_key] = weight - elif 'downsamples' in key: - new_key = key.replace('downsamples', 'downsample_layers') + elif "downsamples" in key: + new_key = key.replace("downsamples", "downsample_layers") state_dict[new_key] = weight - elif 'bottom_up_blocks' in key: - new_key = key.replace('bottom_up_blocks', 'bottom_up_layers') + elif "bottom_up_blocks" in key: + new_key = key.replace("bottom_up_blocks", "bottom_up_layers") state_dict[new_key] = weight - elif 'out_convs' in key: - new_key = key.replace('out_convs', 'out_layers') + elif "out_convs" in key: + new_key = key.replace("out_convs", "out_layers") state_dict[new_key] = weight - elif 'bbox_head' in key: - new_key = key.replace('bbox_head', 'bbox_head.head_module') + elif "bbox_head" in key: + new_key = key.replace("bbox_head", "bbox_head.head_module") state_dict[new_key] = weight - elif 'data_preprocessor' in key: + elif "data_preprocessor" in key: continue else: new_key = key state_dict[new_key] = weight - print(f'Convert {key} to {new_key}') + print(f"Convert {key} to {new_key}") # save checkpoint checkpoint = dict() - checkpoint['state_dict'] = state_dict - checkpoint['meta'] = blobs['meta'] + checkpoint["state_dict"] = state_dict + checkpoint["meta"] = blobs["meta"] torch.save(checkpoint, dst) def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument('src', help='src rtm model path') - parser.add_argument('dst', help='save path') + parser = argparse.ArgumentParser(description="Convert model keys") + parser.add_argument("src", help="src rtm model path") + parser.add_argument("dst", help="save path") args = parser.parse_args() convert(args.src, args.dst) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/yolov5_to_mmyolo.py b/mmyolo/tools/model_converters/yolov5_to_mmyolo.py index c1d4e41d..bbe2fa9e 100644 --- a/mmyolo/tools/model_converters/yolov5_to_mmyolo.py +++ b/mmyolo/tools/model_converters/yolov5_to_mmyolo.py @@ -5,118 +5,118 @@ import torch convert_dict_p5 = { - 'model.0': 'backbone.stem', - 'model.1': 'backbone.stage1.0', - 'model.2': 'backbone.stage1.1', - 'model.3': 'backbone.stage2.0', - 'model.4': 'backbone.stage2.1', - 'model.5': 'backbone.stage3.0', - 'model.6': 'backbone.stage3.1', - 'model.7': 'backbone.stage4.0', - 'model.8': 'backbone.stage4.1', - 'model.9.cv1': 'backbone.stage4.2.conv1', - 'model.9.cv2': 'backbone.stage4.2.conv2', - 'model.10': 'neck.reduce_layers.2', - 'model.13': 'neck.top_down_layers.0.0', - 'model.14': 'neck.top_down_layers.0.1', - 'model.17': 'neck.top_down_layers.1', - 'model.18': 'neck.downsample_layers.0', - 'model.20': 'neck.bottom_up_layers.0', - 'model.21': 'neck.downsample_layers.1', - 'model.23': 'neck.bottom_up_layers.1', - 'model.24.m': 'bbox_head.head_module.convs_pred', + "model.0": "backbone.stem", + "model.1": "backbone.stage1.0", + "model.2": "backbone.stage1.1", + "model.3": "backbone.stage2.0", + "model.4": "backbone.stage2.1", + "model.5": "backbone.stage3.0", + "model.6": "backbone.stage3.1", + "model.7": "backbone.stage4.0", + "model.8": "backbone.stage4.1", + "model.9.cv1": "backbone.stage4.2.conv1", + "model.9.cv2": "backbone.stage4.2.conv2", + "model.10": "neck.reduce_layers.2", + "model.13": "neck.top_down_layers.0.0", + "model.14": "neck.top_down_layers.0.1", + "model.17": "neck.top_down_layers.1", + "model.18": "neck.downsample_layers.0", + "model.20": "neck.bottom_up_layers.0", + "model.21": "neck.downsample_layers.1", + "model.23": "neck.bottom_up_layers.1", + "model.24.m": "bbox_head.head_module.convs_pred", } convert_dict_p6 = { - 'model.0': 'backbone.stem', - 'model.1': 'backbone.stage1.0', - 'model.2': 'backbone.stage1.1', - 'model.3': 'backbone.stage2.0', - 'model.4': 'backbone.stage2.1', - 'model.5': 'backbone.stage3.0', - 'model.6': 'backbone.stage3.1', - 'model.7': 'backbone.stage4.0', - 'model.8': 'backbone.stage4.1', - 'model.9': 'backbone.stage5.0', - 'model.10': 'backbone.stage5.1', - 'model.11.cv1': 'backbone.stage5.2.conv1', - 'model.11.cv2': 'backbone.stage5.2.conv2', - 'model.12': 'neck.reduce_layers.3', - 'model.15': 'neck.top_down_layers.0.0', - 'model.16': 'neck.top_down_layers.0.1', - 'model.19': 'neck.top_down_layers.1.0', - 'model.20': 'neck.top_down_layers.1.1', - 'model.23': 'neck.top_down_layers.2', - 'model.24': 'neck.downsample_layers.0', - 'model.26': 'neck.bottom_up_layers.0', - 'model.27': 'neck.downsample_layers.1', - 'model.29': 'neck.bottom_up_layers.1', - 'model.30': 'neck.downsample_layers.2', - 'model.32': 'neck.bottom_up_layers.2', - 'model.33.m': 'bbox_head.head_module.convs_pred', + "model.0": "backbone.stem", + "model.1": "backbone.stage1.0", + "model.2": "backbone.stage1.1", + "model.3": "backbone.stage2.0", + "model.4": "backbone.stage2.1", + "model.5": "backbone.stage3.0", + "model.6": "backbone.stage3.1", + "model.7": "backbone.stage4.0", + "model.8": "backbone.stage4.1", + "model.9": "backbone.stage5.0", + "model.10": "backbone.stage5.1", + "model.11.cv1": "backbone.stage5.2.conv1", + "model.11.cv2": "backbone.stage5.2.conv2", + "model.12": "neck.reduce_layers.3", + "model.15": "neck.top_down_layers.0.0", + "model.16": "neck.top_down_layers.0.1", + "model.19": "neck.top_down_layers.1.0", + "model.20": "neck.top_down_layers.1.1", + "model.23": "neck.top_down_layers.2", + "model.24": "neck.downsample_layers.0", + "model.26": "neck.bottom_up_layers.0", + "model.27": "neck.downsample_layers.1", + "model.29": "neck.bottom_up_layers.1", + "model.30": "neck.downsample_layers.2", + "model.32": "neck.bottom_up_layers.2", + "model.33.m": "bbox_head.head_module.convs_pred", } def convert(src, dst): """Convert keys in pretrained YOLOv5 models to mmyolo style.""" - if src.endswith('6.pt'): + if src.endswith("6.pt"): convert_dict = convert_dict_p6 is_p6_model = True - print('Converting P6 model') + print("Converting P6 model") else: convert_dict = convert_dict_p5 is_p6_model = False - print('Converting P5 model') + print("Converting P5 model") try: - yolov5_model = torch.load(src)['model'] + yolov5_model = torch.load(src)["model"] blobs = yolov5_model.state_dict() except ModuleNotFoundError: raise RuntimeError( - 'This script must be placed under the ultralytics/yolov5 repo,' - ' because loading the official pretrained model need' - ' `model.py` to build model.') + "This script must be placed under the ultralytics/yolov5 repo," + " because loading the official pretrained model need" + " `model.py` to build model." + ) state_dict = OrderedDict() for key, weight in blobs.items(): - num, module = key.split('.')[1:3] - if (is_p6_model and - (num == '11' or num == '33')) or (not is_p6_model and - (num == '9' or num == '24')): - if module == 'anchors': + num, module = key.split(".")[1:3] + if (is_p6_model and (num == "11" or num == "33")) or ( + not is_p6_model and (num == "9" or num == "24") + ): + if module == "anchors": continue - prefix = f'model.{num}.{module}' + prefix = f"model.{num}.{module}" else: - prefix = f'model.{num}' + prefix = f"model.{num}" new_key = key.replace(prefix, convert_dict[prefix]) - if '.m.' in new_key: - new_key = new_key.replace('.m.', '.blocks.') - new_key = new_key.replace('.cv', '.conv') + if ".m." in new_key: + new_key = new_key.replace(".m.", ".blocks.") + new_key = new_key.replace(".cv", ".conv") else: - new_key = new_key.replace('.cv1', '.main_conv') - new_key = new_key.replace('.cv2', '.short_conv') - new_key = new_key.replace('.cv3', '.final_conv') + new_key = new_key.replace(".cv1", ".main_conv") + new_key = new_key.replace(".cv2", ".short_conv") + new_key = new_key.replace(".cv3", ".final_conv") state_dict[new_key] = weight - print(f'Convert {key} to {new_key}') + print(f"Convert {key} to {new_key}") # save checkpoint checkpoint = dict() - checkpoint['state_dict'] = state_dict + checkpoint["state_dict"] = state_dict torch.save(checkpoint, dst) # Note: This script must be placed under the yolov5 repo to run. def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument( - '--src', default='yolov5s.pt', help='src yolov5 model path') - parser.add_argument('--dst', default='mmyolov5s.pt', help='save path') + parser = argparse.ArgumentParser(description="Convert model keys") + parser.add_argument("--src", default="yolov5s.pt", help="src yolov5 model path") + parser.add_argument("--dst", default="mmyolov5s.pt", help="save path") args = parser.parse_args() convert(args.src, args.dst) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/yolov6_to_mmyolo.py b/mmyolo/tools/model_converters/yolov6_to_mmyolo.py index e9e86ab4..d8f2d3e3 100644 --- a/mmyolo/tools/model_converters/yolov6_to_mmyolo.py +++ b/mmyolo/tools/model_converters/yolov6_to_mmyolo.py @@ -6,110 +6,109 @@ def convert(src, dst): import sys - sys.path.append('yolov6') + + sys.path.append("yolov6") try: - ckpt = torch.load(src, map_location=torch.device('cpu')) + ckpt = torch.load(src, map_location=torch.device("cpu")) except ModuleNotFoundError: raise RuntimeError( - 'This script must be placed under the meituan/YOLOv6 repo,' - ' because loading the official pretrained model need' - ' some python files to build model.') + "This script must be placed under the meituan/YOLOv6 repo," + " because loading the official pretrained model need" + " some python files to build model." + ) # The saved model is the model before reparameterization - model = ckpt['ema' if ckpt.get('ema') else 'model'].float() + model = ckpt["ema" if ckpt.get("ema") else "model"].float() new_state_dict = OrderedDict() for k, v in model.state_dict().items(): name = k - if 'detect' in k: - if 'proj' in k: + if "detect" in k: + if "proj" in k: continue - name = k.replace('detect', 'bbox_head.head_module') - if k.find('anchors') >= 0 or k.find('anchor_grid') >= 0: + name = k.replace("detect", "bbox_head.head_module") + if k.find("anchors") >= 0 or k.find("anchor_grid") >= 0: continue - if 'ERBlock_2' in k: - name = k.replace('ERBlock_2', 'stage1.0') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'ERBlock_3' in k: - name = k.replace('ERBlock_3', 'stage2.0') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'ERBlock_4' in k: - name = k.replace('ERBlock_4', 'stage3.0') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'ERBlock_5' in k: - name = k.replace('ERBlock_5', 'stage4.0') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - if 'stage4.0.2' in name: - name = name.replace('stage4.0.2', 'stage4.1') - name = name.replace('cv', 'conv') - elif 'reduce_layer0' in k: - name = k.replace('reduce_layer0', 'reduce_layers.2') - elif 'Rep_p4' in k: - name = k.replace('Rep_p4', 'top_down_layers.0.0') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'reduce_layer1' in k: - name = k.replace('reduce_layer1', 'top_down_layers.0.1') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'Rep_p3' in k: - name = k.replace('Rep_p3', 'top_down_layers.1') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'upsample0' in k: - name = k.replace('upsample0.upsample_transpose', - 'upsample_layers.0') - elif 'upsample1' in k: - name = k.replace('upsample1.upsample_transpose', - 'upsample_layers.1') - elif 'Rep_n3' in k: - name = k.replace('Rep_n3', 'bottom_up_layers.0') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'Rep_n4' in k: - name = k.replace('Rep_n4', 'bottom_up_layers.1') - if '.cv' in k: - name = name.replace('.cv', '.conv') - if '.m.' in k: - name = name.replace('.m.', '.block.') - elif 'downsample2' in k: - name = k.replace('downsample2', 'downsample_layers.0') - elif 'downsample1' in k: - name = k.replace('downsample1', 'downsample_layers.1') + if "ERBlock_2" in k: + name = k.replace("ERBlock_2", "stage1.0") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "ERBlock_3" in k: + name = k.replace("ERBlock_3", "stage2.0") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "ERBlock_4" in k: + name = k.replace("ERBlock_4", "stage3.0") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "ERBlock_5" in k: + name = k.replace("ERBlock_5", "stage4.0") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + if "stage4.0.2" in name: + name = name.replace("stage4.0.2", "stage4.1") + name = name.replace("cv", "conv") + elif "reduce_layer0" in k: + name = k.replace("reduce_layer0", "reduce_layers.2") + elif "Rep_p4" in k: + name = k.replace("Rep_p4", "top_down_layers.0.0") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "reduce_layer1" in k: + name = k.replace("reduce_layer1", "top_down_layers.0.1") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "Rep_p3" in k: + name = k.replace("Rep_p3", "top_down_layers.1") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "upsample0" in k: + name = k.replace("upsample0.upsample_transpose", "upsample_layers.0") + elif "upsample1" in k: + name = k.replace("upsample1.upsample_transpose", "upsample_layers.1") + elif "Rep_n3" in k: + name = k.replace("Rep_n3", "bottom_up_layers.0") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "Rep_n4" in k: + name = k.replace("Rep_n4", "bottom_up_layers.1") + if ".cv" in k: + name = name.replace(".cv", ".conv") + if ".m." in k: + name = name.replace(".m.", ".block.") + elif "downsample2" in k: + name = k.replace("downsample2", "downsample_layers.0") + elif "downsample1" in k: + name = k.replace("downsample1", "downsample_layers.1") new_state_dict[name] = v - data = {'state_dict': new_state_dict} + data = {"state_dict": new_state_dict} torch.save(data, dst) # Note: This script must be placed under the yolov6 repo to run. def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument( - '--src', default='yolov6s.pt', help='src yolov6 model path') - parser.add_argument('--dst', default='mmyolov6.pt', help='save path') + parser = argparse.ArgumentParser(description="Convert model keys") + parser.add_argument("--src", default="yolov6s.pt", help="src yolov6 model path") + parser.add_argument("--dst", default="mmyolov6.pt", help="save path") args = parser.parse_args() convert(args.src, args.dst) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/yolov7_to_mmyolo.py b/mmyolo/tools/model_converters/yolov7_to_mmyolo.py index f8bff947..c08c6286 100644 --- a/mmyolo/tools/model_converters/yolov7_to_mmyolo.py +++ b/mmyolo/tools/model_converters/yolov7_to_mmyolo.py @@ -7,702 +7,636 @@ convert_dict_tiny = { # stem - 'model.0': 'backbone.stem.0', - 'model.1': 'backbone.stem.1', - + "model.0": "backbone.stem.0", + "model.1": "backbone.stem.1", # stage1 TinyDownSampleBlock - 'model.2': 'backbone.stage1.0.short_conv', - 'model.3': 'backbone.stage1.0.main_convs.0', - 'model.4': 'backbone.stage1.0.main_convs.1', - 'model.5': 'backbone.stage1.0.main_convs.2', - 'model.7': 'backbone.stage1.0.final_conv', - + "model.2": "backbone.stage1.0.short_conv", + "model.3": "backbone.stage1.0.main_convs.0", + "model.4": "backbone.stage1.0.main_convs.1", + "model.5": "backbone.stage1.0.main_convs.2", + "model.7": "backbone.stage1.0.final_conv", # stage2 TinyDownSampleBlock - 'model.9': 'backbone.stage2.1.short_conv', - 'model.10': 'backbone.stage2.1.main_convs.0', - 'model.11': 'backbone.stage2.1.main_convs.1', - 'model.12': 'backbone.stage2.1.main_convs.2', - 'model.14': 'backbone.stage2.1.final_conv', - + "model.9": "backbone.stage2.1.short_conv", + "model.10": "backbone.stage2.1.main_convs.0", + "model.11": "backbone.stage2.1.main_convs.1", + "model.12": "backbone.stage2.1.main_convs.2", + "model.14": "backbone.stage2.1.final_conv", # stage3 TinyDownSampleBlock - 'model.16': 'backbone.stage3.1.short_conv', - 'model.17': 'backbone.stage3.1.main_convs.0', - 'model.18': 'backbone.stage3.1.main_convs.1', - 'model.19': 'backbone.stage3.1.main_convs.2', - 'model.21': 'backbone.stage3.1.final_conv', - + "model.16": "backbone.stage3.1.short_conv", + "model.17": "backbone.stage3.1.main_convs.0", + "model.18": "backbone.stage3.1.main_convs.1", + "model.19": "backbone.stage3.1.main_convs.2", + "model.21": "backbone.stage3.1.final_conv", # stage4 TinyDownSampleBlock - 'model.23': 'backbone.stage4.1.short_conv', - 'model.24': 'backbone.stage4.1.main_convs.0', - 'model.25': 'backbone.stage4.1.main_convs.1', - 'model.26': 'backbone.stage4.1.main_convs.2', - 'model.28': 'backbone.stage4.1.final_conv', - + "model.23": "backbone.stage4.1.short_conv", + "model.24": "backbone.stage4.1.main_convs.0", + "model.25": "backbone.stage4.1.main_convs.1", + "model.26": "backbone.stage4.1.main_convs.2", + "model.28": "backbone.stage4.1.final_conv", # neck SPPCSPBlock - 'model.29': 'neck.reduce_layers.2.short_layer', - 'model.30': 'neck.reduce_layers.2.main_layers', - 'model.35': 'neck.reduce_layers.2.fuse_layers', - 'model.37': 'neck.reduce_layers.2.final_conv', - 'model.38': 'neck.upsample_layers.0.0', - 'model.40': 'neck.reduce_layers.1', - 'model.42': 'neck.top_down_layers.0.short_conv', - 'model.43': 'neck.top_down_layers.0.main_convs.0', - 'model.44': 'neck.top_down_layers.0.main_convs.1', - 'model.45': 'neck.top_down_layers.0.main_convs.2', - 'model.47': 'neck.top_down_layers.0.final_conv', - 'model.48': 'neck.upsample_layers.1.0', - 'model.50': 'neck.reduce_layers.0', - 'model.52': 'neck.top_down_layers.1.short_conv', - 'model.53': 'neck.top_down_layers.1.main_convs.0', - 'model.54': 'neck.top_down_layers.1.main_convs.1', - 'model.55': 'neck.top_down_layers.1.main_convs.2', - 'model.57': 'neck.top_down_layers.1.final_conv', - 'model.58': 'neck.downsample_layers.0', - 'model.60': 'neck.bottom_up_layers.0.short_conv', - 'model.61': 'neck.bottom_up_layers.0.main_convs.0', - 'model.62': 'neck.bottom_up_layers.0.main_convs.1', - 'model.63': 'neck.bottom_up_layers.0.main_convs.2', - 'model.65': 'neck.bottom_up_layers.0.final_conv', - 'model.66': 'neck.downsample_layers.1', - 'model.68': 'neck.bottom_up_layers.1.short_conv', - 'model.69': 'neck.bottom_up_layers.1.main_convs.0', - 'model.70': 'neck.bottom_up_layers.1.main_convs.1', - 'model.71': 'neck.bottom_up_layers.1.main_convs.2', - 'model.73': 'neck.bottom_up_layers.1.final_conv', - 'model.74': 'neck.out_layers.0', - 'model.75': 'neck.out_layers.1', - 'model.76': 'neck.out_layers.2', - + "model.29": "neck.reduce_layers.2.short_layer", + "model.30": "neck.reduce_layers.2.main_layers", + "model.35": "neck.reduce_layers.2.fuse_layers", + "model.37": "neck.reduce_layers.2.final_conv", + "model.38": "neck.upsample_layers.0.0", + "model.40": "neck.reduce_layers.1", + "model.42": "neck.top_down_layers.0.short_conv", + "model.43": "neck.top_down_layers.0.main_convs.0", + "model.44": "neck.top_down_layers.0.main_convs.1", + "model.45": "neck.top_down_layers.0.main_convs.2", + "model.47": "neck.top_down_layers.0.final_conv", + "model.48": "neck.upsample_layers.1.0", + "model.50": "neck.reduce_layers.0", + "model.52": "neck.top_down_layers.1.short_conv", + "model.53": "neck.top_down_layers.1.main_convs.0", + "model.54": "neck.top_down_layers.1.main_convs.1", + "model.55": "neck.top_down_layers.1.main_convs.2", + "model.57": "neck.top_down_layers.1.final_conv", + "model.58": "neck.downsample_layers.0", + "model.60": "neck.bottom_up_layers.0.short_conv", + "model.61": "neck.bottom_up_layers.0.main_convs.0", + "model.62": "neck.bottom_up_layers.0.main_convs.1", + "model.63": "neck.bottom_up_layers.0.main_convs.2", + "model.65": "neck.bottom_up_layers.0.final_conv", + "model.66": "neck.downsample_layers.1", + "model.68": "neck.bottom_up_layers.1.short_conv", + "model.69": "neck.bottom_up_layers.1.main_convs.0", + "model.70": "neck.bottom_up_layers.1.main_convs.1", + "model.71": "neck.bottom_up_layers.1.main_convs.2", + "model.73": "neck.bottom_up_layers.1.final_conv", + "model.74": "neck.out_layers.0", + "model.75": "neck.out_layers.1", + "model.76": "neck.out_layers.2", # head - 'model.77.m.0': 'bbox_head.head_module.convs_pred.0.1', - 'model.77.m.1': 'bbox_head.head_module.convs_pred.1.1', - 'model.77.m.2': 'bbox_head.head_module.convs_pred.2.1' + "model.77.m.0": "bbox_head.head_module.convs_pred.0.1", + "model.77.m.1": "bbox_head.head_module.convs_pred.1.1", + "model.77.m.2": "bbox_head.head_module.convs_pred.2.1", } convert_dict_l = { # stem - 'model.0': 'backbone.stem.0', - 'model.1': 'backbone.stem.1', - 'model.2': 'backbone.stem.2', - + "model.0": "backbone.stem.0", + "model.1": "backbone.stem.1", + "model.2": "backbone.stem.2", # stage1 # ConvModule - 'model.3': 'backbone.stage1.0', + "model.3": "backbone.stage1.0", # ELANBlock expand_channel_2x - 'model.4': 'backbone.stage1.1.short_conv', - 'model.5': 'backbone.stage1.1.main_conv', - 'model.6': 'backbone.stage1.1.blocks.0.0', - 'model.7': 'backbone.stage1.1.blocks.0.1', - 'model.8': 'backbone.stage1.1.blocks.1.0', - 'model.9': 'backbone.stage1.1.blocks.1.1', - 'model.11': 'backbone.stage1.1.final_conv', - + "model.4": "backbone.stage1.1.short_conv", + "model.5": "backbone.stage1.1.main_conv", + "model.6": "backbone.stage1.1.blocks.0.0", + "model.7": "backbone.stage1.1.blocks.0.1", + "model.8": "backbone.stage1.1.blocks.1.0", + "model.9": "backbone.stage1.1.blocks.1.1", + "model.11": "backbone.stage1.1.final_conv", # stage2 # MaxPoolBlock reduce_channel_2x - 'model.13': 'backbone.stage2.0.maxpool_branches.1', - 'model.14': 'backbone.stage2.0.stride_conv_branches.0', - 'model.15': 'backbone.stage2.0.stride_conv_branches.1', + "model.13": "backbone.stage2.0.maxpool_branches.1", + "model.14": "backbone.stage2.0.stride_conv_branches.0", + "model.15": "backbone.stage2.0.stride_conv_branches.1", # ELANBlock expand_channel_2x - 'model.17': 'backbone.stage2.1.short_conv', - 'model.18': 'backbone.stage2.1.main_conv', - 'model.19': 'backbone.stage2.1.blocks.0.0', - 'model.20': 'backbone.stage2.1.blocks.0.1', - 'model.21': 'backbone.stage2.1.blocks.1.0', - 'model.22': 'backbone.stage2.1.blocks.1.1', - 'model.24': 'backbone.stage2.1.final_conv', - + "model.17": "backbone.stage2.1.short_conv", + "model.18": "backbone.stage2.1.main_conv", + "model.19": "backbone.stage2.1.blocks.0.0", + "model.20": "backbone.stage2.1.blocks.0.1", + "model.21": "backbone.stage2.1.blocks.1.0", + "model.22": "backbone.stage2.1.blocks.1.1", + "model.24": "backbone.stage2.1.final_conv", # stage3 # MaxPoolBlock reduce_channel_2x - 'model.26': 'backbone.stage3.0.maxpool_branches.1', - 'model.27': 'backbone.stage3.0.stride_conv_branches.0', - 'model.28': 'backbone.stage3.0.stride_conv_branches.1', + "model.26": "backbone.stage3.0.maxpool_branches.1", + "model.27": "backbone.stage3.0.stride_conv_branches.0", + "model.28": "backbone.stage3.0.stride_conv_branches.1", # ELANBlock expand_channel_2x - 'model.30': 'backbone.stage3.1.short_conv', - 'model.31': 'backbone.stage3.1.main_conv', - 'model.32': 'backbone.stage3.1.blocks.0.0', - 'model.33': 'backbone.stage3.1.blocks.0.1', - 'model.34': 'backbone.stage3.1.blocks.1.0', - 'model.35': 'backbone.stage3.1.blocks.1.1', - 'model.37': 'backbone.stage3.1.final_conv', - + "model.30": "backbone.stage3.1.short_conv", + "model.31": "backbone.stage3.1.main_conv", + "model.32": "backbone.stage3.1.blocks.0.0", + "model.33": "backbone.stage3.1.blocks.0.1", + "model.34": "backbone.stage3.1.blocks.1.0", + "model.35": "backbone.stage3.1.blocks.1.1", + "model.37": "backbone.stage3.1.final_conv", # stage4 # MaxPoolBlock reduce_channel_2x - 'model.39': 'backbone.stage4.0.maxpool_branches.1', - 'model.40': 'backbone.stage4.0.stride_conv_branches.0', - 'model.41': 'backbone.stage4.0.stride_conv_branches.1', + "model.39": "backbone.stage4.0.maxpool_branches.1", + "model.40": "backbone.stage4.0.stride_conv_branches.0", + "model.41": "backbone.stage4.0.stride_conv_branches.1", # ELANBlock no_change_channel - 'model.43': 'backbone.stage4.1.short_conv', - 'model.44': 'backbone.stage4.1.main_conv', - 'model.45': 'backbone.stage4.1.blocks.0.0', - 'model.46': 'backbone.stage4.1.blocks.0.1', - 'model.47': 'backbone.stage4.1.blocks.1.0', - 'model.48': 'backbone.stage4.1.blocks.1.1', - 'model.50': 'backbone.stage4.1.final_conv', - + "model.43": "backbone.stage4.1.short_conv", + "model.44": "backbone.stage4.1.main_conv", + "model.45": "backbone.stage4.1.blocks.0.0", + "model.46": "backbone.stage4.1.blocks.0.1", + "model.47": "backbone.stage4.1.blocks.1.0", + "model.48": "backbone.stage4.1.blocks.1.1", + "model.50": "backbone.stage4.1.final_conv", # neck SPPCSPBlock - 'model.51.cv1': 'neck.reduce_layers.2.main_layers.0', - 'model.51.cv3': 'neck.reduce_layers.2.main_layers.1', - 'model.51.cv4': 'neck.reduce_layers.2.main_layers.2', - 'model.51.cv5': 'neck.reduce_layers.2.fuse_layers.0', - 'model.51.cv6': 'neck.reduce_layers.2.fuse_layers.1', - 'model.51.cv2': 'neck.reduce_layers.2.short_layer', - 'model.51.cv7': 'neck.reduce_layers.2.final_conv', - + "model.51.cv1": "neck.reduce_layers.2.main_layers.0", + "model.51.cv3": "neck.reduce_layers.2.main_layers.1", + "model.51.cv4": "neck.reduce_layers.2.main_layers.2", + "model.51.cv5": "neck.reduce_layers.2.fuse_layers.0", + "model.51.cv6": "neck.reduce_layers.2.fuse_layers.1", + "model.51.cv2": "neck.reduce_layers.2.short_layer", + "model.51.cv7": "neck.reduce_layers.2.final_conv", # neck - 'model.52': 'neck.upsample_layers.0.0', - 'model.54': 'neck.reduce_layers.1', - + "model.52": "neck.upsample_layers.0.0", + "model.54": "neck.reduce_layers.1", # neck ELANBlock reduce_channel_2x - 'model.56': 'neck.top_down_layers.0.short_conv', - 'model.57': 'neck.top_down_layers.0.main_conv', - 'model.58': 'neck.top_down_layers.0.blocks.0', - 'model.59': 'neck.top_down_layers.0.blocks.1', - 'model.60': 'neck.top_down_layers.0.blocks.2', - 'model.61': 'neck.top_down_layers.0.blocks.3', - 'model.63': 'neck.top_down_layers.0.final_conv', - 'model.64': 'neck.upsample_layers.1.0', - 'model.66': 'neck.reduce_layers.0', - + "model.56": "neck.top_down_layers.0.short_conv", + "model.57": "neck.top_down_layers.0.main_conv", + "model.58": "neck.top_down_layers.0.blocks.0", + "model.59": "neck.top_down_layers.0.blocks.1", + "model.60": "neck.top_down_layers.0.blocks.2", + "model.61": "neck.top_down_layers.0.blocks.3", + "model.63": "neck.top_down_layers.0.final_conv", + "model.64": "neck.upsample_layers.1.0", + "model.66": "neck.reduce_layers.0", # neck ELANBlock reduce_channel_2x - 'model.68': 'neck.top_down_layers.1.short_conv', - 'model.69': 'neck.top_down_layers.1.main_conv', - 'model.70': 'neck.top_down_layers.1.blocks.0', - 'model.71': 'neck.top_down_layers.1.blocks.1', - 'model.72': 'neck.top_down_layers.1.blocks.2', - 'model.73': 'neck.top_down_layers.1.blocks.3', - 'model.75': 'neck.top_down_layers.1.final_conv', - + "model.68": "neck.top_down_layers.1.short_conv", + "model.69": "neck.top_down_layers.1.main_conv", + "model.70": "neck.top_down_layers.1.blocks.0", + "model.71": "neck.top_down_layers.1.blocks.1", + "model.72": "neck.top_down_layers.1.blocks.2", + "model.73": "neck.top_down_layers.1.blocks.3", + "model.75": "neck.top_down_layers.1.final_conv", # neck MaxPoolBlock no_change_channel - 'model.77': 'neck.downsample_layers.0.maxpool_branches.1', - 'model.78': 'neck.downsample_layers.0.stride_conv_branches.0', - 'model.79': 'neck.downsample_layers.0.stride_conv_branches.1', - + "model.77": "neck.downsample_layers.0.maxpool_branches.1", + "model.78": "neck.downsample_layers.0.stride_conv_branches.0", + "model.79": "neck.downsample_layers.0.stride_conv_branches.1", # neck ELANBlock reduce_channel_2x - 'model.81': 'neck.bottom_up_layers.0.short_conv', - 'model.82': 'neck.bottom_up_layers.0.main_conv', - 'model.83': 'neck.bottom_up_layers.0.blocks.0', - 'model.84': 'neck.bottom_up_layers.0.blocks.1', - 'model.85': 'neck.bottom_up_layers.0.blocks.2', - 'model.86': 'neck.bottom_up_layers.0.blocks.3', - 'model.88': 'neck.bottom_up_layers.0.final_conv', - + "model.81": "neck.bottom_up_layers.0.short_conv", + "model.82": "neck.bottom_up_layers.0.main_conv", + "model.83": "neck.bottom_up_layers.0.blocks.0", + "model.84": "neck.bottom_up_layers.0.blocks.1", + "model.85": "neck.bottom_up_layers.0.blocks.2", + "model.86": "neck.bottom_up_layers.0.blocks.3", + "model.88": "neck.bottom_up_layers.0.final_conv", # neck MaxPoolBlock no_change_channel - 'model.90': 'neck.downsample_layers.1.maxpool_branches.1', - 'model.91': 'neck.downsample_layers.1.stride_conv_branches.0', - 'model.92': 'neck.downsample_layers.1.stride_conv_branches.1', - + "model.90": "neck.downsample_layers.1.maxpool_branches.1", + "model.91": "neck.downsample_layers.1.stride_conv_branches.0", + "model.92": "neck.downsample_layers.1.stride_conv_branches.1", # neck ELANBlock reduce_channel_2x - 'model.94': 'neck.bottom_up_layers.1.short_conv', - 'model.95': 'neck.bottom_up_layers.1.main_conv', - 'model.96': 'neck.bottom_up_layers.1.blocks.0', - 'model.97': 'neck.bottom_up_layers.1.blocks.1', - 'model.98': 'neck.bottom_up_layers.1.blocks.2', - 'model.99': 'neck.bottom_up_layers.1.blocks.3', - 'model.101': 'neck.bottom_up_layers.1.final_conv', - + "model.94": "neck.bottom_up_layers.1.short_conv", + "model.95": "neck.bottom_up_layers.1.main_conv", + "model.96": "neck.bottom_up_layers.1.blocks.0", + "model.97": "neck.bottom_up_layers.1.blocks.1", + "model.98": "neck.bottom_up_layers.1.blocks.2", + "model.99": "neck.bottom_up_layers.1.blocks.3", + "model.101": "neck.bottom_up_layers.1.final_conv", # RepVGGBlock - 'model.102.rbr_dense.0': 'neck.out_layers.0.rbr_dense.conv', - 'model.102.rbr_dense.1': 'neck.out_layers.0.rbr_dense.bn', - 'model.102.rbr_1x1.0': 'neck.out_layers.0.rbr_1x1.conv', - 'model.102.rbr_1x1.1': 'neck.out_layers.0.rbr_1x1.bn', - 'model.103.rbr_dense.0': 'neck.out_layers.1.rbr_dense.conv', - 'model.103.rbr_dense.1': 'neck.out_layers.1.rbr_dense.bn', - 'model.103.rbr_1x1.0': 'neck.out_layers.1.rbr_1x1.conv', - 'model.103.rbr_1x1.1': 'neck.out_layers.1.rbr_1x1.bn', - 'model.104.rbr_dense.0': 'neck.out_layers.2.rbr_dense.conv', - 'model.104.rbr_dense.1': 'neck.out_layers.2.rbr_dense.bn', - 'model.104.rbr_1x1.0': 'neck.out_layers.2.rbr_1x1.conv', - 'model.104.rbr_1x1.1': 'neck.out_layers.2.rbr_1x1.bn', - + "model.102.rbr_dense.0": "neck.out_layers.0.rbr_dense.conv", + "model.102.rbr_dense.1": "neck.out_layers.0.rbr_dense.bn", + "model.102.rbr_1x1.0": "neck.out_layers.0.rbr_1x1.conv", + "model.102.rbr_1x1.1": "neck.out_layers.0.rbr_1x1.bn", + "model.103.rbr_dense.0": "neck.out_layers.1.rbr_dense.conv", + "model.103.rbr_dense.1": "neck.out_layers.1.rbr_dense.bn", + "model.103.rbr_1x1.0": "neck.out_layers.1.rbr_1x1.conv", + "model.103.rbr_1x1.1": "neck.out_layers.1.rbr_1x1.bn", + "model.104.rbr_dense.0": "neck.out_layers.2.rbr_dense.conv", + "model.104.rbr_dense.1": "neck.out_layers.2.rbr_dense.bn", + "model.104.rbr_1x1.0": "neck.out_layers.2.rbr_1x1.conv", + "model.104.rbr_1x1.1": "neck.out_layers.2.rbr_1x1.bn", # head - 'model.105.m.0': 'bbox_head.head_module.convs_pred.0.1', - 'model.105.m.1': 'bbox_head.head_module.convs_pred.1.1', - 'model.105.m.2': 'bbox_head.head_module.convs_pred.2.1' + "model.105.m.0": "bbox_head.head_module.convs_pred.0.1", + "model.105.m.1": "bbox_head.head_module.convs_pred.1.1", + "model.105.m.2": "bbox_head.head_module.convs_pred.2.1", } convert_dict_x = { # stem - 'model.0': 'backbone.stem.0', - 'model.1': 'backbone.stem.1', - 'model.2': 'backbone.stem.2', - + "model.0": "backbone.stem.0", + "model.1": "backbone.stem.1", + "model.2": "backbone.stem.2", # stage1 # ConvModule - 'model.3': 'backbone.stage1.0', + "model.3": "backbone.stage1.0", # ELANBlock expand_channel_2x - 'model.4': 'backbone.stage1.1.short_conv', - 'model.5': 'backbone.stage1.1.main_conv', - 'model.6': 'backbone.stage1.1.blocks.0.0', - 'model.7': 'backbone.stage1.1.blocks.0.1', - 'model.8': 'backbone.stage1.1.blocks.1.0', - 'model.9': 'backbone.stage1.1.blocks.1.1', - 'model.10': 'backbone.stage1.1.blocks.2.0', - 'model.11': 'backbone.stage1.1.blocks.2.1', - 'model.13': 'backbone.stage1.1.final_conv', - + "model.4": "backbone.stage1.1.short_conv", + "model.5": "backbone.stage1.1.main_conv", + "model.6": "backbone.stage1.1.blocks.0.0", + "model.7": "backbone.stage1.1.blocks.0.1", + "model.8": "backbone.stage1.1.blocks.1.0", + "model.9": "backbone.stage1.1.blocks.1.1", + "model.10": "backbone.stage1.1.blocks.2.0", + "model.11": "backbone.stage1.1.blocks.2.1", + "model.13": "backbone.stage1.1.final_conv", # stage2 # MaxPoolBlock reduce_channel_2x - 'model.15': 'backbone.stage2.0.maxpool_branches.1', - 'model.16': 'backbone.stage2.0.stride_conv_branches.0', - 'model.17': 'backbone.stage2.0.stride_conv_branches.1', - + "model.15": "backbone.stage2.0.maxpool_branches.1", + "model.16": "backbone.stage2.0.stride_conv_branches.0", + "model.17": "backbone.stage2.0.stride_conv_branches.1", # ELANBlock expand_channel_2x - 'model.19': 'backbone.stage2.1.short_conv', - 'model.20': 'backbone.stage2.1.main_conv', - 'model.21': 'backbone.stage2.1.blocks.0.0', - 'model.22': 'backbone.stage2.1.blocks.0.1', - 'model.23': 'backbone.stage2.1.blocks.1.0', - 'model.24': 'backbone.stage2.1.blocks.1.1', - 'model.25': 'backbone.stage2.1.blocks.2.0', - 'model.26': 'backbone.stage2.1.blocks.2.1', - 'model.28': 'backbone.stage2.1.final_conv', - + "model.19": "backbone.stage2.1.short_conv", + "model.20": "backbone.stage2.1.main_conv", + "model.21": "backbone.stage2.1.blocks.0.0", + "model.22": "backbone.stage2.1.blocks.0.1", + "model.23": "backbone.stage2.1.blocks.1.0", + "model.24": "backbone.stage2.1.blocks.1.1", + "model.25": "backbone.stage2.1.blocks.2.0", + "model.26": "backbone.stage2.1.blocks.2.1", + "model.28": "backbone.stage2.1.final_conv", # stage3 # MaxPoolBlock reduce_channel_2x - 'model.30': 'backbone.stage3.0.maxpool_branches.1', - 'model.31': 'backbone.stage3.0.stride_conv_branches.0', - 'model.32': 'backbone.stage3.0.stride_conv_branches.1', + "model.30": "backbone.stage3.0.maxpool_branches.1", + "model.31": "backbone.stage3.0.stride_conv_branches.0", + "model.32": "backbone.stage3.0.stride_conv_branches.1", # ELANBlock expand_channel_2x - 'model.34': 'backbone.stage3.1.short_conv', - 'model.35': 'backbone.stage3.1.main_conv', - 'model.36': 'backbone.stage3.1.blocks.0.0', - 'model.37': 'backbone.stage3.1.blocks.0.1', - 'model.38': 'backbone.stage3.1.blocks.1.0', - 'model.39': 'backbone.stage3.1.blocks.1.1', - 'model.40': 'backbone.stage3.1.blocks.2.0', - 'model.41': 'backbone.stage3.1.blocks.2.1', - 'model.43': 'backbone.stage3.1.final_conv', - + "model.34": "backbone.stage3.1.short_conv", + "model.35": "backbone.stage3.1.main_conv", + "model.36": "backbone.stage3.1.blocks.0.0", + "model.37": "backbone.stage3.1.blocks.0.1", + "model.38": "backbone.stage3.1.blocks.1.0", + "model.39": "backbone.stage3.1.blocks.1.1", + "model.40": "backbone.stage3.1.blocks.2.0", + "model.41": "backbone.stage3.1.blocks.2.1", + "model.43": "backbone.stage3.1.final_conv", # stage4 # MaxPoolBlock reduce_channel_2x - 'model.45': 'backbone.stage4.0.maxpool_branches.1', - 'model.46': 'backbone.stage4.0.stride_conv_branches.0', - 'model.47': 'backbone.stage4.0.stride_conv_branches.1', + "model.45": "backbone.stage4.0.maxpool_branches.1", + "model.46": "backbone.stage4.0.stride_conv_branches.0", + "model.47": "backbone.stage4.0.stride_conv_branches.1", # ELANBlock no_change_channel - 'model.49': 'backbone.stage4.1.short_conv', - 'model.50': 'backbone.stage4.1.main_conv', - 'model.51': 'backbone.stage4.1.blocks.0.0', - 'model.52': 'backbone.stage4.1.blocks.0.1', - 'model.53': 'backbone.stage4.1.blocks.1.0', - 'model.54': 'backbone.stage4.1.blocks.1.1', - 'model.55': 'backbone.stage4.1.blocks.2.0', - 'model.56': 'backbone.stage4.1.blocks.2.1', - 'model.58': 'backbone.stage4.1.final_conv', - + "model.49": "backbone.stage4.1.short_conv", + "model.50": "backbone.stage4.1.main_conv", + "model.51": "backbone.stage4.1.blocks.0.0", + "model.52": "backbone.stage4.1.blocks.0.1", + "model.53": "backbone.stage4.1.blocks.1.0", + "model.54": "backbone.stage4.1.blocks.1.1", + "model.55": "backbone.stage4.1.blocks.2.0", + "model.56": "backbone.stage4.1.blocks.2.1", + "model.58": "backbone.stage4.1.final_conv", # neck SPPCSPBlock - 'model.59.cv1': 'neck.reduce_layers.2.main_layers.0', - 'model.59.cv3': 'neck.reduce_layers.2.main_layers.1', - 'model.59.cv4': 'neck.reduce_layers.2.main_layers.2', - 'model.59.cv5': 'neck.reduce_layers.2.fuse_layers.0', - 'model.59.cv6': 'neck.reduce_layers.2.fuse_layers.1', - 'model.59.cv2': 'neck.reduce_layers.2.short_layer', - 'model.59.cv7': 'neck.reduce_layers.2.final_conv', - + "model.59.cv1": "neck.reduce_layers.2.main_layers.0", + "model.59.cv3": "neck.reduce_layers.2.main_layers.1", + "model.59.cv4": "neck.reduce_layers.2.main_layers.2", + "model.59.cv5": "neck.reduce_layers.2.fuse_layers.0", + "model.59.cv6": "neck.reduce_layers.2.fuse_layers.1", + "model.59.cv2": "neck.reduce_layers.2.short_layer", + "model.59.cv7": "neck.reduce_layers.2.final_conv", # neck - 'model.60': 'neck.upsample_layers.0.0', - 'model.62': 'neck.reduce_layers.1', - + "model.60": "neck.upsample_layers.0.0", + "model.62": "neck.reduce_layers.1", # neck ELANBlock reduce_channel_2x - 'model.64': 'neck.top_down_layers.0.short_conv', - 'model.65': 'neck.top_down_layers.0.main_conv', - 'model.66': 'neck.top_down_layers.0.blocks.0.0', - 'model.67': 'neck.top_down_layers.0.blocks.0.1', - 'model.68': 'neck.top_down_layers.0.blocks.1.0', - 'model.69': 'neck.top_down_layers.0.blocks.1.1', - 'model.70': 'neck.top_down_layers.0.blocks.2.0', - 'model.71': 'neck.top_down_layers.0.blocks.2.1', - 'model.73': 'neck.top_down_layers.0.final_conv', - 'model.74': 'neck.upsample_layers.1.0', - 'model.76': 'neck.reduce_layers.0', - + "model.64": "neck.top_down_layers.0.short_conv", + "model.65": "neck.top_down_layers.0.main_conv", + "model.66": "neck.top_down_layers.0.blocks.0.0", + "model.67": "neck.top_down_layers.0.blocks.0.1", + "model.68": "neck.top_down_layers.0.blocks.1.0", + "model.69": "neck.top_down_layers.0.blocks.1.1", + "model.70": "neck.top_down_layers.0.blocks.2.0", + "model.71": "neck.top_down_layers.0.blocks.2.1", + "model.73": "neck.top_down_layers.0.final_conv", + "model.74": "neck.upsample_layers.1.0", + "model.76": "neck.reduce_layers.0", # neck ELANBlock reduce_channel_2x - 'model.78': 'neck.top_down_layers.1.short_conv', - 'model.79': 'neck.top_down_layers.1.main_conv', - 'model.80': 'neck.top_down_layers.1.blocks.0.0', - 'model.81': 'neck.top_down_layers.1.blocks.0.1', - 'model.82': 'neck.top_down_layers.1.blocks.1.0', - 'model.83': 'neck.top_down_layers.1.blocks.1.1', - 'model.84': 'neck.top_down_layers.1.blocks.2.0', - 'model.85': 'neck.top_down_layers.1.blocks.2.1', - 'model.87': 'neck.top_down_layers.1.final_conv', - + "model.78": "neck.top_down_layers.1.short_conv", + "model.79": "neck.top_down_layers.1.main_conv", + "model.80": "neck.top_down_layers.1.blocks.0.0", + "model.81": "neck.top_down_layers.1.blocks.0.1", + "model.82": "neck.top_down_layers.1.blocks.1.0", + "model.83": "neck.top_down_layers.1.blocks.1.1", + "model.84": "neck.top_down_layers.1.blocks.2.0", + "model.85": "neck.top_down_layers.1.blocks.2.1", + "model.87": "neck.top_down_layers.1.final_conv", # neck MaxPoolBlock no_change_channel - 'model.89': 'neck.downsample_layers.0.maxpool_branches.1', - 'model.90': 'neck.downsample_layers.0.stride_conv_branches.0', - 'model.91': 'neck.downsample_layers.0.stride_conv_branches.1', - + "model.89": "neck.downsample_layers.0.maxpool_branches.1", + "model.90": "neck.downsample_layers.0.stride_conv_branches.0", + "model.91": "neck.downsample_layers.0.stride_conv_branches.1", # neck ELANBlock reduce_channel_2x - 'model.93': 'neck.bottom_up_layers.0.short_conv', - 'model.94': 'neck.bottom_up_layers.0.main_conv', - 'model.95': 'neck.bottom_up_layers.0.blocks.0.0', - 'model.96': 'neck.bottom_up_layers.0.blocks.0.1', - 'model.97': 'neck.bottom_up_layers.0.blocks.1.0', - 'model.98': 'neck.bottom_up_layers.0.blocks.1.1', - 'model.99': 'neck.bottom_up_layers.0.blocks.2.0', - 'model.100': 'neck.bottom_up_layers.0.blocks.2.1', - 'model.102': 'neck.bottom_up_layers.0.final_conv', - + "model.93": "neck.bottom_up_layers.0.short_conv", + "model.94": "neck.bottom_up_layers.0.main_conv", + "model.95": "neck.bottom_up_layers.0.blocks.0.0", + "model.96": "neck.bottom_up_layers.0.blocks.0.1", + "model.97": "neck.bottom_up_layers.0.blocks.1.0", + "model.98": "neck.bottom_up_layers.0.blocks.1.1", + "model.99": "neck.bottom_up_layers.0.blocks.2.0", + "model.100": "neck.bottom_up_layers.0.blocks.2.1", + "model.102": "neck.bottom_up_layers.0.final_conv", # neck MaxPoolBlock no_change_channel - 'model.104': 'neck.downsample_layers.1.maxpool_branches.1', - 'model.105': 'neck.downsample_layers.1.stride_conv_branches.0', - 'model.106': 'neck.downsample_layers.1.stride_conv_branches.1', - + "model.104": "neck.downsample_layers.1.maxpool_branches.1", + "model.105": "neck.downsample_layers.1.stride_conv_branches.0", + "model.106": "neck.downsample_layers.1.stride_conv_branches.1", # neck ELANBlock reduce_channel_2x - 'model.108': 'neck.bottom_up_layers.1.short_conv', - 'model.109': 'neck.bottom_up_layers.1.main_conv', - 'model.110': 'neck.bottom_up_layers.1.blocks.0.0', - 'model.111': 'neck.bottom_up_layers.1.blocks.0.1', - 'model.112': 'neck.bottom_up_layers.1.blocks.1.0', - 'model.113': 'neck.bottom_up_layers.1.blocks.1.1', - 'model.114': 'neck.bottom_up_layers.1.blocks.2.0', - 'model.115': 'neck.bottom_up_layers.1.blocks.2.1', - 'model.117': 'neck.bottom_up_layers.1.final_conv', - + "model.108": "neck.bottom_up_layers.1.short_conv", + "model.109": "neck.bottom_up_layers.1.main_conv", + "model.110": "neck.bottom_up_layers.1.blocks.0.0", + "model.111": "neck.bottom_up_layers.1.blocks.0.1", + "model.112": "neck.bottom_up_layers.1.blocks.1.0", + "model.113": "neck.bottom_up_layers.1.blocks.1.1", + "model.114": "neck.bottom_up_layers.1.blocks.2.0", + "model.115": "neck.bottom_up_layers.1.blocks.2.1", + "model.117": "neck.bottom_up_layers.1.final_conv", # Conv - 'model.118': 'neck.out_layers.0', - 'model.119': 'neck.out_layers.1', - 'model.120': 'neck.out_layers.2', - + "model.118": "neck.out_layers.0", + "model.119": "neck.out_layers.1", + "model.120": "neck.out_layers.2", # head - 'model.121.m.0': 'bbox_head.head_module.convs_pred.0.1', - 'model.121.m.1': 'bbox_head.head_module.convs_pred.1.1', - 'model.121.m.2': 'bbox_head.head_module.convs_pred.2.1' + "model.121.m.0": "bbox_head.head_module.convs_pred.0.1", + "model.121.m.1": "bbox_head.head_module.convs_pred.1.1", + "model.121.m.2": "bbox_head.head_module.convs_pred.2.1", } convert_dict_w = { # stem - 'model.1': 'backbone.stem.conv', - + "model.1": "backbone.stem.conv", # stage1 # ConvModule - 'model.2': 'backbone.stage1.0', + "model.2": "backbone.stage1.0", # ELANBlock - 'model.3': 'backbone.stage1.1.short_conv', - 'model.4': 'backbone.stage1.1.main_conv', - 'model.5': 'backbone.stage1.1.blocks.0.0', - 'model.6': 'backbone.stage1.1.blocks.0.1', - 'model.7': 'backbone.stage1.1.blocks.1.0', - 'model.8': 'backbone.stage1.1.blocks.1.1', - 'model.10': 'backbone.stage1.1.final_conv', - + "model.3": "backbone.stage1.1.short_conv", + "model.4": "backbone.stage1.1.main_conv", + "model.5": "backbone.stage1.1.blocks.0.0", + "model.6": "backbone.stage1.1.blocks.0.1", + "model.7": "backbone.stage1.1.blocks.1.0", + "model.8": "backbone.stage1.1.blocks.1.1", + "model.10": "backbone.stage1.1.final_conv", # stage2 - 'model.11': 'backbone.stage2.0', + "model.11": "backbone.stage2.0", # ELANBlock - 'model.12': 'backbone.stage2.1.short_conv', - 'model.13': 'backbone.stage2.1.main_conv', - 'model.14': 'backbone.stage2.1.blocks.0.0', - 'model.15': 'backbone.stage2.1.blocks.0.1', - 'model.16': 'backbone.stage2.1.blocks.1.0', - 'model.17': 'backbone.stage2.1.blocks.1.1', - 'model.19': 'backbone.stage2.1.final_conv', - + "model.12": "backbone.stage2.1.short_conv", + "model.13": "backbone.stage2.1.main_conv", + "model.14": "backbone.stage2.1.blocks.0.0", + "model.15": "backbone.stage2.1.blocks.0.1", + "model.16": "backbone.stage2.1.blocks.1.0", + "model.17": "backbone.stage2.1.blocks.1.1", + "model.19": "backbone.stage2.1.final_conv", # stage3 - 'model.20': 'backbone.stage3.0', + "model.20": "backbone.stage3.0", # ELANBlock - 'model.21': 'backbone.stage3.1.short_conv', - 'model.22': 'backbone.stage3.1.main_conv', - 'model.23': 'backbone.stage3.1.blocks.0.0', - 'model.24': 'backbone.stage3.1.blocks.0.1', - 'model.25': 'backbone.stage3.1.blocks.1.0', - 'model.26': 'backbone.stage3.1.blocks.1.1', - 'model.28': 'backbone.stage3.1.final_conv', - + "model.21": "backbone.stage3.1.short_conv", + "model.22": "backbone.stage3.1.main_conv", + "model.23": "backbone.stage3.1.blocks.0.0", + "model.24": "backbone.stage3.1.blocks.0.1", + "model.25": "backbone.stage3.1.blocks.1.0", + "model.26": "backbone.stage3.1.blocks.1.1", + "model.28": "backbone.stage3.1.final_conv", # stage4 - 'model.29': 'backbone.stage4.0', + "model.29": "backbone.stage4.0", # ELANBlock - 'model.30': 'backbone.stage4.1.short_conv', - 'model.31': 'backbone.stage4.1.main_conv', - 'model.32': 'backbone.stage4.1.blocks.0.0', - 'model.33': 'backbone.stage4.1.blocks.0.1', - 'model.34': 'backbone.stage4.1.blocks.1.0', - 'model.35': 'backbone.stage4.1.blocks.1.1', - 'model.37': 'backbone.stage4.1.final_conv', - + "model.30": "backbone.stage4.1.short_conv", + "model.31": "backbone.stage4.1.main_conv", + "model.32": "backbone.stage4.1.blocks.0.0", + "model.33": "backbone.stage4.1.blocks.0.1", + "model.34": "backbone.stage4.1.blocks.1.0", + "model.35": "backbone.stage4.1.blocks.1.1", + "model.37": "backbone.stage4.1.final_conv", # stage5 - 'model.38': 'backbone.stage5.0', + "model.38": "backbone.stage5.0", # ELANBlock - 'model.39': 'backbone.stage5.1.short_conv', - 'model.40': 'backbone.stage5.1.main_conv', - 'model.41': 'backbone.stage5.1.blocks.0.0', - 'model.42': 'backbone.stage5.1.blocks.0.1', - 'model.43': 'backbone.stage5.1.blocks.1.0', - 'model.44': 'backbone.stage5.1.blocks.1.1', - 'model.46': 'backbone.stage5.1.final_conv', - + "model.39": "backbone.stage5.1.short_conv", + "model.40": "backbone.stage5.1.main_conv", + "model.41": "backbone.stage5.1.blocks.0.0", + "model.42": "backbone.stage5.1.blocks.0.1", + "model.43": "backbone.stage5.1.blocks.1.0", + "model.44": "backbone.stage5.1.blocks.1.1", + "model.46": "backbone.stage5.1.final_conv", # neck SPPCSPBlock - 'model.47.cv1': 'neck.reduce_layers.3.main_layers.0', - 'model.47.cv3': 'neck.reduce_layers.3.main_layers.1', - 'model.47.cv4': 'neck.reduce_layers.3.main_layers.2', - 'model.47.cv5': 'neck.reduce_layers.3.fuse_layers.0', - 'model.47.cv6': 'neck.reduce_layers.3.fuse_layers.1', - 'model.47.cv2': 'neck.reduce_layers.3.short_layer', - 'model.47.cv7': 'neck.reduce_layers.3.final_conv', - + "model.47.cv1": "neck.reduce_layers.3.main_layers.0", + "model.47.cv3": "neck.reduce_layers.3.main_layers.1", + "model.47.cv4": "neck.reduce_layers.3.main_layers.2", + "model.47.cv5": "neck.reduce_layers.3.fuse_layers.0", + "model.47.cv6": "neck.reduce_layers.3.fuse_layers.1", + "model.47.cv2": "neck.reduce_layers.3.short_layer", + "model.47.cv7": "neck.reduce_layers.3.final_conv", # neck - 'model.48': 'neck.upsample_layers.0.0', - 'model.50': 'neck.reduce_layers.2', - + "model.48": "neck.upsample_layers.0.0", + "model.50": "neck.reduce_layers.2", # neck ELANBlock - 'model.52': 'neck.top_down_layers.0.short_conv', - 'model.53': 'neck.top_down_layers.0.main_conv', - 'model.54': 'neck.top_down_layers.0.blocks.0', - 'model.55': 'neck.top_down_layers.0.blocks.1', - 'model.56': 'neck.top_down_layers.0.blocks.2', - 'model.57': 'neck.top_down_layers.0.blocks.3', - 'model.59': 'neck.top_down_layers.0.final_conv', - 'model.60': 'neck.upsample_layers.1.0', - 'model.62': 'neck.reduce_layers.1', - + "model.52": "neck.top_down_layers.0.short_conv", + "model.53": "neck.top_down_layers.0.main_conv", + "model.54": "neck.top_down_layers.0.blocks.0", + "model.55": "neck.top_down_layers.0.blocks.1", + "model.56": "neck.top_down_layers.0.blocks.2", + "model.57": "neck.top_down_layers.0.blocks.3", + "model.59": "neck.top_down_layers.0.final_conv", + "model.60": "neck.upsample_layers.1.0", + "model.62": "neck.reduce_layers.1", # neck ELANBlock reduce_channel_2x - 'model.64': 'neck.top_down_layers.1.short_conv', - 'model.65': 'neck.top_down_layers.1.main_conv', - 'model.66': 'neck.top_down_layers.1.blocks.0', - 'model.67': 'neck.top_down_layers.1.blocks.1', - 'model.68': 'neck.top_down_layers.1.blocks.2', - 'model.69': 'neck.top_down_layers.1.blocks.3', - 'model.71': 'neck.top_down_layers.1.final_conv', - 'model.72': 'neck.upsample_layers.2.0', - 'model.74': 'neck.reduce_layers.0', - 'model.76': 'neck.top_down_layers.2.short_conv', - 'model.77': 'neck.top_down_layers.2.main_conv', - 'model.78': 'neck.top_down_layers.2.blocks.0', - 'model.79': 'neck.top_down_layers.2.blocks.1', - 'model.80': 'neck.top_down_layers.2.blocks.2', - 'model.81': 'neck.top_down_layers.2.blocks.3', - 'model.83': 'neck.top_down_layers.2.final_conv', - 'model.84': 'neck.downsample_layers.0', - + "model.64": "neck.top_down_layers.1.short_conv", + "model.65": "neck.top_down_layers.1.main_conv", + "model.66": "neck.top_down_layers.1.blocks.0", + "model.67": "neck.top_down_layers.1.blocks.1", + "model.68": "neck.top_down_layers.1.blocks.2", + "model.69": "neck.top_down_layers.1.blocks.3", + "model.71": "neck.top_down_layers.1.final_conv", + "model.72": "neck.upsample_layers.2.0", + "model.74": "neck.reduce_layers.0", + "model.76": "neck.top_down_layers.2.short_conv", + "model.77": "neck.top_down_layers.2.main_conv", + "model.78": "neck.top_down_layers.2.blocks.0", + "model.79": "neck.top_down_layers.2.blocks.1", + "model.80": "neck.top_down_layers.2.blocks.2", + "model.81": "neck.top_down_layers.2.blocks.3", + "model.83": "neck.top_down_layers.2.final_conv", + "model.84": "neck.downsample_layers.0", # neck ELANBlock - 'model.86': 'neck.bottom_up_layers.0.short_conv', - 'model.87': 'neck.bottom_up_layers.0.main_conv', - 'model.88': 'neck.bottom_up_layers.0.blocks.0', - 'model.89': 'neck.bottom_up_layers.0.blocks.1', - 'model.90': 'neck.bottom_up_layers.0.blocks.2', - 'model.91': 'neck.bottom_up_layers.0.blocks.3', - 'model.93': 'neck.bottom_up_layers.0.final_conv', - 'model.94': 'neck.downsample_layers.1', - + "model.86": "neck.bottom_up_layers.0.short_conv", + "model.87": "neck.bottom_up_layers.0.main_conv", + "model.88": "neck.bottom_up_layers.0.blocks.0", + "model.89": "neck.bottom_up_layers.0.blocks.1", + "model.90": "neck.bottom_up_layers.0.blocks.2", + "model.91": "neck.bottom_up_layers.0.blocks.3", + "model.93": "neck.bottom_up_layers.0.final_conv", + "model.94": "neck.downsample_layers.1", # neck ELANBlock reduce_channel_2x - 'model.96': 'neck.bottom_up_layers.1.short_conv', - 'model.97': 'neck.bottom_up_layers.1.main_conv', - 'model.98': 'neck.bottom_up_layers.1.blocks.0', - 'model.99': 'neck.bottom_up_layers.1.blocks.1', - 'model.100': 'neck.bottom_up_layers.1.blocks.2', - 'model.101': 'neck.bottom_up_layers.1.blocks.3', - 'model.103': 'neck.bottom_up_layers.1.final_conv', - 'model.104': 'neck.downsample_layers.2', - + "model.96": "neck.bottom_up_layers.1.short_conv", + "model.97": "neck.bottom_up_layers.1.main_conv", + "model.98": "neck.bottom_up_layers.1.blocks.0", + "model.99": "neck.bottom_up_layers.1.blocks.1", + "model.100": "neck.bottom_up_layers.1.blocks.2", + "model.101": "neck.bottom_up_layers.1.blocks.3", + "model.103": "neck.bottom_up_layers.1.final_conv", + "model.104": "neck.downsample_layers.2", # neck ELANBlock reduce_channel_2x - 'model.106': 'neck.bottom_up_layers.2.short_conv', - 'model.107': 'neck.bottom_up_layers.2.main_conv', - 'model.108': 'neck.bottom_up_layers.2.blocks.0', - 'model.109': 'neck.bottom_up_layers.2.blocks.1', - 'model.110': 'neck.bottom_up_layers.2.blocks.2', - 'model.111': 'neck.bottom_up_layers.2.blocks.3', - 'model.113': 'neck.bottom_up_layers.2.final_conv', - 'model.114': 'bbox_head.head_module.main_convs_pred.0.0', - 'model.115': 'bbox_head.head_module.main_convs_pred.1.0', - 'model.116': 'bbox_head.head_module.main_convs_pred.2.0', - 'model.117': 'bbox_head.head_module.main_convs_pred.3.0', - + "model.106": "neck.bottom_up_layers.2.short_conv", + "model.107": "neck.bottom_up_layers.2.main_conv", + "model.108": "neck.bottom_up_layers.2.blocks.0", + "model.109": "neck.bottom_up_layers.2.blocks.1", + "model.110": "neck.bottom_up_layers.2.blocks.2", + "model.111": "neck.bottom_up_layers.2.blocks.3", + "model.113": "neck.bottom_up_layers.2.final_conv", + "model.114": "bbox_head.head_module.main_convs_pred.0.0", + "model.115": "bbox_head.head_module.main_convs_pred.1.0", + "model.116": "bbox_head.head_module.main_convs_pred.2.0", + "model.117": "bbox_head.head_module.main_convs_pred.3.0", # head - 'model.118.m.0': 'bbox_head.head_module.main_convs_pred.0.2', - 'model.118.m.1': 'bbox_head.head_module.main_convs_pred.1.2', - 'model.118.m.2': 'bbox_head.head_module.main_convs_pred.2.2', - 'model.118.m.3': 'bbox_head.head_module.main_convs_pred.3.2' + "model.118.m.0": "bbox_head.head_module.main_convs_pred.0.2", + "model.118.m.1": "bbox_head.head_module.main_convs_pred.1.2", + "model.118.m.2": "bbox_head.head_module.main_convs_pred.2.2", + "model.118.m.3": "bbox_head.head_module.main_convs_pred.3.2", } convert_dict_e = { # stem - 'model.1': 'backbone.stem.conv', - + "model.1": "backbone.stem.conv", # stage1 - 'model.2.cv1': 'backbone.stage1.0.stride_conv_branches.0', - 'model.2.cv2': 'backbone.stage1.0.stride_conv_branches.1', - 'model.2.cv3': 'backbone.stage1.0.maxpool_branches.1', - + "model.2.cv1": "backbone.stage1.0.stride_conv_branches.0", + "model.2.cv2": "backbone.stage1.0.stride_conv_branches.1", + "model.2.cv3": "backbone.stage1.0.maxpool_branches.1", # ELANBlock - 'model.3': 'backbone.stage1.1.short_conv', - 'model.4': 'backbone.stage1.1.main_conv', - 'model.5': 'backbone.stage1.1.blocks.0.0', - 'model.6': 'backbone.stage1.1.blocks.0.1', - 'model.7': 'backbone.stage1.1.blocks.1.0', - 'model.8': 'backbone.stage1.1.blocks.1.1', - 'model.9': 'backbone.stage1.1.blocks.2.0', - 'model.10': 'backbone.stage1.1.blocks.2.1', - 'model.12': 'backbone.stage1.1.final_conv', - + "model.3": "backbone.stage1.1.short_conv", + "model.4": "backbone.stage1.1.main_conv", + "model.5": "backbone.stage1.1.blocks.0.0", + "model.6": "backbone.stage1.1.blocks.0.1", + "model.7": "backbone.stage1.1.blocks.1.0", + "model.8": "backbone.stage1.1.blocks.1.1", + "model.9": "backbone.stage1.1.blocks.2.0", + "model.10": "backbone.stage1.1.blocks.2.1", + "model.12": "backbone.stage1.1.final_conv", # stage2 - 'model.13.cv1': 'backbone.stage2.0.stride_conv_branches.0', - 'model.13.cv2': 'backbone.stage2.0.stride_conv_branches.1', - 'model.13.cv3': 'backbone.stage2.0.maxpool_branches.1', - + "model.13.cv1": "backbone.stage2.0.stride_conv_branches.0", + "model.13.cv2": "backbone.stage2.0.stride_conv_branches.1", + "model.13.cv3": "backbone.stage2.0.maxpool_branches.1", # ELANBlock - 'model.14': 'backbone.stage2.1.short_conv', - 'model.15': 'backbone.stage2.1.main_conv', - 'model.16': 'backbone.stage2.1.blocks.0.0', - 'model.17': 'backbone.stage2.1.blocks.0.1', - 'model.18': 'backbone.stage2.1.blocks.1.0', - 'model.19': 'backbone.stage2.1.blocks.1.1', - 'model.20': 'backbone.stage2.1.blocks.2.0', - 'model.21': 'backbone.stage2.1.blocks.2.1', - 'model.23': 'backbone.stage2.1.final_conv', - + "model.14": "backbone.stage2.1.short_conv", + "model.15": "backbone.stage2.1.main_conv", + "model.16": "backbone.stage2.1.blocks.0.0", + "model.17": "backbone.stage2.1.blocks.0.1", + "model.18": "backbone.stage2.1.blocks.1.0", + "model.19": "backbone.stage2.1.blocks.1.1", + "model.20": "backbone.stage2.1.blocks.2.0", + "model.21": "backbone.stage2.1.blocks.2.1", + "model.23": "backbone.stage2.1.final_conv", # stage3 - 'model.24.cv1': 'backbone.stage3.0.stride_conv_branches.0', - 'model.24.cv2': 'backbone.stage3.0.stride_conv_branches.1', - 'model.24.cv3': 'backbone.stage3.0.maxpool_branches.1', - + "model.24.cv1": "backbone.stage3.0.stride_conv_branches.0", + "model.24.cv2": "backbone.stage3.0.stride_conv_branches.1", + "model.24.cv3": "backbone.stage3.0.maxpool_branches.1", # ELANBlock - 'model.25': 'backbone.stage3.1.short_conv', - 'model.26': 'backbone.stage3.1.main_conv', - 'model.27': 'backbone.stage3.1.blocks.0.0', - 'model.28': 'backbone.stage3.1.blocks.0.1', - 'model.29': 'backbone.stage3.1.blocks.1.0', - 'model.30': 'backbone.stage3.1.blocks.1.1', - 'model.31': 'backbone.stage3.1.blocks.2.0', - 'model.32': 'backbone.stage3.1.blocks.2.1', - 'model.34': 'backbone.stage3.1.final_conv', - + "model.25": "backbone.stage3.1.short_conv", + "model.26": "backbone.stage3.1.main_conv", + "model.27": "backbone.stage3.1.blocks.0.0", + "model.28": "backbone.stage3.1.blocks.0.1", + "model.29": "backbone.stage3.1.blocks.1.0", + "model.30": "backbone.stage3.1.blocks.1.1", + "model.31": "backbone.stage3.1.blocks.2.0", + "model.32": "backbone.stage3.1.blocks.2.1", + "model.34": "backbone.stage3.1.final_conv", # stage4 - 'model.35.cv1': 'backbone.stage4.0.stride_conv_branches.0', - 'model.35.cv2': 'backbone.stage4.0.stride_conv_branches.1', - 'model.35.cv3': 'backbone.stage4.0.maxpool_branches.1', - + "model.35.cv1": "backbone.stage4.0.stride_conv_branches.0", + "model.35.cv2": "backbone.stage4.0.stride_conv_branches.1", + "model.35.cv3": "backbone.stage4.0.maxpool_branches.1", # ELANBlock - 'model.36': 'backbone.stage4.1.short_conv', - 'model.37': 'backbone.stage4.1.main_conv', - 'model.38': 'backbone.stage4.1.blocks.0.0', - 'model.39': 'backbone.stage4.1.blocks.0.1', - 'model.40': 'backbone.stage4.1.blocks.1.0', - 'model.41': 'backbone.stage4.1.blocks.1.1', - 'model.42': 'backbone.stage4.1.blocks.2.0', - 'model.43': 'backbone.stage4.1.blocks.2.1', - 'model.45': 'backbone.stage4.1.final_conv', - + "model.36": "backbone.stage4.1.short_conv", + "model.37": "backbone.stage4.1.main_conv", + "model.38": "backbone.stage4.1.blocks.0.0", + "model.39": "backbone.stage4.1.blocks.0.1", + "model.40": "backbone.stage4.1.blocks.1.0", + "model.41": "backbone.stage4.1.blocks.1.1", + "model.42": "backbone.stage4.1.blocks.2.0", + "model.43": "backbone.stage4.1.blocks.2.1", + "model.45": "backbone.stage4.1.final_conv", # stage5 - 'model.46.cv1': 'backbone.stage5.0.stride_conv_branches.0', - 'model.46.cv2': 'backbone.stage5.0.stride_conv_branches.1', - 'model.46.cv3': 'backbone.stage5.0.maxpool_branches.1', - + "model.46.cv1": "backbone.stage5.0.stride_conv_branches.0", + "model.46.cv2": "backbone.stage5.0.stride_conv_branches.1", + "model.46.cv3": "backbone.stage5.0.maxpool_branches.1", # ELANBlock - 'model.47': 'backbone.stage5.1.short_conv', - 'model.48': 'backbone.stage5.1.main_conv', - 'model.49': 'backbone.stage5.1.blocks.0.0', - 'model.50': 'backbone.stage5.1.blocks.0.1', - 'model.51': 'backbone.stage5.1.blocks.1.0', - 'model.52': 'backbone.stage5.1.blocks.1.1', - 'model.53': 'backbone.stage5.1.blocks.2.0', - 'model.54': 'backbone.stage5.1.blocks.2.1', - 'model.56': 'backbone.stage5.1.final_conv', - + "model.47": "backbone.stage5.1.short_conv", + "model.48": "backbone.stage5.1.main_conv", + "model.49": "backbone.stage5.1.blocks.0.0", + "model.50": "backbone.stage5.1.blocks.0.1", + "model.51": "backbone.stage5.1.blocks.1.0", + "model.52": "backbone.stage5.1.blocks.1.1", + "model.53": "backbone.stage5.1.blocks.2.0", + "model.54": "backbone.stage5.1.blocks.2.1", + "model.56": "backbone.stage5.1.final_conv", # neck SPPCSPBlock - 'model.57.cv1': 'neck.reduce_layers.3.main_layers.0', - 'model.57.cv3': 'neck.reduce_layers.3.main_layers.1', - 'model.57.cv4': 'neck.reduce_layers.3.main_layers.2', - 'model.57.cv5': 'neck.reduce_layers.3.fuse_layers.0', - 'model.57.cv6': 'neck.reduce_layers.3.fuse_layers.1', - 'model.57.cv2': 'neck.reduce_layers.3.short_layer', - 'model.57.cv7': 'neck.reduce_layers.3.final_conv', - + "model.57.cv1": "neck.reduce_layers.3.main_layers.0", + "model.57.cv3": "neck.reduce_layers.3.main_layers.1", + "model.57.cv4": "neck.reduce_layers.3.main_layers.2", + "model.57.cv5": "neck.reduce_layers.3.fuse_layers.0", + "model.57.cv6": "neck.reduce_layers.3.fuse_layers.1", + "model.57.cv2": "neck.reduce_layers.3.short_layer", + "model.57.cv7": "neck.reduce_layers.3.final_conv", # neck - 'model.58': 'neck.upsample_layers.0.0', - 'model.60': 'neck.reduce_layers.2', - + "model.58": "neck.upsample_layers.0.0", + "model.60": "neck.reduce_layers.2", # neck ELANBlock - 'model.62': 'neck.top_down_layers.0.short_conv', - 'model.63': 'neck.top_down_layers.0.main_conv', - 'model.64': 'neck.top_down_layers.0.blocks.0', - 'model.65': 'neck.top_down_layers.0.blocks.1', - 'model.66': 'neck.top_down_layers.0.blocks.2', - 'model.67': 'neck.top_down_layers.0.blocks.3', - 'model.68': 'neck.top_down_layers.0.blocks.4', - 'model.69': 'neck.top_down_layers.0.blocks.5', - 'model.71': 'neck.top_down_layers.0.final_conv', - 'model.72': 'neck.upsample_layers.1.0', - 'model.74': 'neck.reduce_layers.1', - + "model.62": "neck.top_down_layers.0.short_conv", + "model.63": "neck.top_down_layers.0.main_conv", + "model.64": "neck.top_down_layers.0.blocks.0", + "model.65": "neck.top_down_layers.0.blocks.1", + "model.66": "neck.top_down_layers.0.blocks.2", + "model.67": "neck.top_down_layers.0.blocks.3", + "model.68": "neck.top_down_layers.0.blocks.4", + "model.69": "neck.top_down_layers.0.blocks.5", + "model.71": "neck.top_down_layers.0.final_conv", + "model.72": "neck.upsample_layers.1.0", + "model.74": "neck.reduce_layers.1", # neck ELANBlock - 'model.76': 'neck.top_down_layers.1.short_conv', - 'model.77': 'neck.top_down_layers.1.main_conv', - 'model.78': 'neck.top_down_layers.1.blocks.0', - 'model.79': 'neck.top_down_layers.1.blocks.1', - 'model.80': 'neck.top_down_layers.1.blocks.2', - 'model.81': 'neck.top_down_layers.1.blocks.3', - 'model.82': 'neck.top_down_layers.1.blocks.4', - 'model.83': 'neck.top_down_layers.1.blocks.5', - 'model.85': 'neck.top_down_layers.1.final_conv', - 'model.86': 'neck.upsample_layers.2.0', - 'model.88': 'neck.reduce_layers.0', - 'model.90': 'neck.top_down_layers.2.short_conv', - 'model.91': 'neck.top_down_layers.2.main_conv', - 'model.92': 'neck.top_down_layers.2.blocks.0', - 'model.93': 'neck.top_down_layers.2.blocks.1', - 'model.94': 'neck.top_down_layers.2.blocks.2', - 'model.95': 'neck.top_down_layers.2.blocks.3', - 'model.96': 'neck.top_down_layers.2.blocks.4', - 'model.97': 'neck.top_down_layers.2.blocks.5', - 'model.99': 'neck.top_down_layers.2.final_conv', - 'model.100.cv1': 'neck.downsample_layers.0.stride_conv_branches.0', - 'model.100.cv2': 'neck.downsample_layers.0.stride_conv_branches.1', - 'model.100.cv3': 'neck.downsample_layers.0.maxpool_branches.1', - + "model.76": "neck.top_down_layers.1.short_conv", + "model.77": "neck.top_down_layers.1.main_conv", + "model.78": "neck.top_down_layers.1.blocks.0", + "model.79": "neck.top_down_layers.1.blocks.1", + "model.80": "neck.top_down_layers.1.blocks.2", + "model.81": "neck.top_down_layers.1.blocks.3", + "model.82": "neck.top_down_layers.1.blocks.4", + "model.83": "neck.top_down_layers.1.blocks.5", + "model.85": "neck.top_down_layers.1.final_conv", + "model.86": "neck.upsample_layers.2.0", + "model.88": "neck.reduce_layers.0", + "model.90": "neck.top_down_layers.2.short_conv", + "model.91": "neck.top_down_layers.2.main_conv", + "model.92": "neck.top_down_layers.2.blocks.0", + "model.93": "neck.top_down_layers.2.blocks.1", + "model.94": "neck.top_down_layers.2.blocks.2", + "model.95": "neck.top_down_layers.2.blocks.3", + "model.96": "neck.top_down_layers.2.blocks.4", + "model.97": "neck.top_down_layers.2.blocks.5", + "model.99": "neck.top_down_layers.2.final_conv", + "model.100.cv1": "neck.downsample_layers.0.stride_conv_branches.0", + "model.100.cv2": "neck.downsample_layers.0.stride_conv_branches.1", + "model.100.cv3": "neck.downsample_layers.0.maxpool_branches.1", # neck ELANBlock - 'model.102': 'neck.bottom_up_layers.0.short_conv', - 'model.103': 'neck.bottom_up_layers.0.main_conv', - 'model.104': 'neck.bottom_up_layers.0.blocks.0', - 'model.105': 'neck.bottom_up_layers.0.blocks.1', - 'model.106': 'neck.bottom_up_layers.0.blocks.2', - 'model.107': 'neck.bottom_up_layers.0.blocks.3', - 'model.108': 'neck.bottom_up_layers.0.blocks.4', - 'model.109': 'neck.bottom_up_layers.0.blocks.5', - 'model.111': 'neck.bottom_up_layers.0.final_conv', - 'model.112.cv1': 'neck.downsample_layers.1.stride_conv_branches.0', - 'model.112.cv2': 'neck.downsample_layers.1.stride_conv_branches.1', - 'model.112.cv3': 'neck.downsample_layers.1.maxpool_branches.1', - + "model.102": "neck.bottom_up_layers.0.short_conv", + "model.103": "neck.bottom_up_layers.0.main_conv", + "model.104": "neck.bottom_up_layers.0.blocks.0", + "model.105": "neck.bottom_up_layers.0.blocks.1", + "model.106": "neck.bottom_up_layers.0.blocks.2", + "model.107": "neck.bottom_up_layers.0.blocks.3", + "model.108": "neck.bottom_up_layers.0.blocks.4", + "model.109": "neck.bottom_up_layers.0.blocks.5", + "model.111": "neck.bottom_up_layers.0.final_conv", + "model.112.cv1": "neck.downsample_layers.1.stride_conv_branches.0", + "model.112.cv2": "neck.downsample_layers.1.stride_conv_branches.1", + "model.112.cv3": "neck.downsample_layers.1.maxpool_branches.1", # neck ELANBlock - 'model.114': 'neck.bottom_up_layers.1.short_conv', - 'model.115': 'neck.bottom_up_layers.1.main_conv', - 'model.116': 'neck.bottom_up_layers.1.blocks.0', - 'model.117': 'neck.bottom_up_layers.1.blocks.1', - 'model.118': 'neck.bottom_up_layers.1.blocks.2', - 'model.119': 'neck.bottom_up_layers.1.blocks.3', - 'model.120': 'neck.bottom_up_layers.1.blocks.4', - 'model.121': 'neck.bottom_up_layers.1.blocks.5', - 'model.123': 'neck.bottom_up_layers.1.final_conv', - 'model.124.cv1': 'neck.downsample_layers.2.stride_conv_branches.0', - 'model.124.cv2': 'neck.downsample_layers.2.stride_conv_branches.1', - 'model.124.cv3': 'neck.downsample_layers.2.maxpool_branches.1', - + "model.114": "neck.bottom_up_layers.1.short_conv", + "model.115": "neck.bottom_up_layers.1.main_conv", + "model.116": "neck.bottom_up_layers.1.blocks.0", + "model.117": "neck.bottom_up_layers.1.blocks.1", + "model.118": "neck.bottom_up_layers.1.blocks.2", + "model.119": "neck.bottom_up_layers.1.blocks.3", + "model.120": "neck.bottom_up_layers.1.blocks.4", + "model.121": "neck.bottom_up_layers.1.blocks.5", + "model.123": "neck.bottom_up_layers.1.final_conv", + "model.124.cv1": "neck.downsample_layers.2.stride_conv_branches.0", + "model.124.cv2": "neck.downsample_layers.2.stride_conv_branches.1", + "model.124.cv3": "neck.downsample_layers.2.maxpool_branches.1", # neck ELANBlock - 'model.126': 'neck.bottom_up_layers.2.short_conv', - 'model.127': 'neck.bottom_up_layers.2.main_conv', - 'model.128': 'neck.bottom_up_layers.2.blocks.0', - 'model.129': 'neck.bottom_up_layers.2.blocks.1', - 'model.130': 'neck.bottom_up_layers.2.blocks.2', - 'model.131': 'neck.bottom_up_layers.2.blocks.3', - 'model.132': 'neck.bottom_up_layers.2.blocks.4', - 'model.133': 'neck.bottom_up_layers.2.blocks.5', - 'model.135': 'neck.bottom_up_layers.2.final_conv', - 'model.136': 'bbox_head.head_module.main_convs_pred.0.0', - 'model.137': 'bbox_head.head_module.main_convs_pred.1.0', - 'model.138': 'bbox_head.head_module.main_convs_pred.2.0', - 'model.139': 'bbox_head.head_module.main_convs_pred.3.0', - + "model.126": "neck.bottom_up_layers.2.short_conv", + "model.127": "neck.bottom_up_layers.2.main_conv", + "model.128": "neck.bottom_up_layers.2.blocks.0", + "model.129": "neck.bottom_up_layers.2.blocks.1", + "model.130": "neck.bottom_up_layers.2.blocks.2", + "model.131": "neck.bottom_up_layers.2.blocks.3", + "model.132": "neck.bottom_up_layers.2.blocks.4", + "model.133": "neck.bottom_up_layers.2.blocks.5", + "model.135": "neck.bottom_up_layers.2.final_conv", + "model.136": "bbox_head.head_module.main_convs_pred.0.0", + "model.137": "bbox_head.head_module.main_convs_pred.1.0", + "model.138": "bbox_head.head_module.main_convs_pred.2.0", + "model.139": "bbox_head.head_module.main_convs_pred.3.0", # head - 'model.140.m.0': 'bbox_head.head_module.main_convs_pred.0.2', - 'model.140.m.1': 'bbox_head.head_module.main_convs_pred.1.2', - 'model.140.m.2': 'bbox_head.head_module.main_convs_pred.2.2', - 'model.140.m.3': 'bbox_head.head_module.main_convs_pred.3.2' + "model.140.m.0": "bbox_head.head_module.main_convs_pred.0.2", + "model.140.m.1": "bbox_head.head_module.main_convs_pred.1.2", + "model.140.m.2": "bbox_head.head_module.main_convs_pred.2.2", + "model.140.m.3": "bbox_head.head_module.main_convs_pred.3.2", } convert_dicts = { - 'yolov7-tiny.pt': convert_dict_tiny, - 'yolov7-w6.pt': convert_dict_w, - 'yolov7-e6.pt': convert_dict_e, - 'yolov7.pt': convert_dict_l, - 'yolov7x.pt': convert_dict_x + "yolov7-tiny.pt": convert_dict_tiny, + "yolov7-w6.pt": convert_dict_w, + "yolov7-e6.pt": convert_dict_e, + "yolov7.pt": convert_dict_l, + "yolov7x.pt": convert_dict_x, } @@ -711,20 +645,20 @@ def convert(src, dst): convert_dict = convert_dicts[osp.basename(src)] num_levels = 3 - if src_key == 'yolov7.pt': + if src_key == "yolov7.pt": indexes = [102, 51] in_channels = [256, 512, 1024] - elif src_key == 'yolov7x.pt': + elif src_key == "yolov7x.pt": indexes = [121, 59] in_channels = [320, 640, 1280] - elif src_key == 'yolov7-tiny.pt': + elif src_key == "yolov7-tiny.pt": indexes = [77, 1000] in_channels = [128, 256, 512] - elif src_key == 'yolov7-w6.pt': + elif src_key == "yolov7-w6.pt": indexes = [118, 47] in_channels = [256, 512, 768, 1024] num_levels = 4 - elif src_key == 'yolov7-e6.pt': + elif src_key == "yolov7-e6.pt": indexes = [140, [2, 13, 24, 35, 46, 57, 100, 112, 124]] in_channels = 320, 640, 960, 1280 num_levels = 4 @@ -733,70 +667,64 @@ def convert(src, dst): indexes[1] = [indexes[1]] """Convert keys in detectron pretrained YOLOv7 models to mmyolo style.""" try: - yolov7_model = torch.load(src)['model'].float() + yolov7_model = torch.load(src)["model"].float() blobs = yolov7_model.state_dict() except ModuleNotFoundError: raise RuntimeError( - 'This script must be placed under the WongKinYiu/yolov7 repo,' - ' because loading the official pretrained model need' - ' `model.py` to build model.') + "This script must be placed under the WongKinYiu/yolov7 repo," + " because loading the official pretrained model need" + " `model.py` to build model." + ) state_dict = OrderedDict() for key, weight in blobs.items(): - if key.find('anchors') >= 0 or key.find('anchor_grid') >= 0: + if key.find("anchors") >= 0 or key.find("anchor_grid") >= 0: continue - num, module = key.split('.')[1:3] + num, module = key.split(".")[1:3] if int(num) < indexes[0] and int(num) not in indexes[1]: - prefix = f'model.{num}' + prefix = f"model.{num}" new_key = key.replace(prefix, convert_dict[prefix]) state_dict[new_key] = weight - print(f'Convert {key} to {new_key}') + print(f"Convert {key} to {new_key}") elif int(num) in indexes[1]: - strs_key = key.split('.')[:3] - new_key = key.replace('.'.join(strs_key), - convert_dict['.'.join(strs_key)]) + strs_key = key.split(".")[:3] + new_key = key.replace(".".join(strs_key), convert_dict[".".join(strs_key)]) state_dict[new_key] = weight - print(f'Convert {key} to {new_key}') + print(f"Convert {key} to {new_key}") else: - strs_key = key.split('.')[:4] - new_key = key.replace('.'.join(strs_key), - convert_dict['.'.join(strs_key)]) + strs_key = key.split(".")[:4] + new_key = key.replace(".".join(strs_key), convert_dict[".".join(strs_key)]) state_dict[new_key] = weight - print(f'Convert {key} to {new_key}') + print(f"Convert {key} to {new_key}") # Add ImplicitA and ImplicitM for i in range(num_levels): if num_levels == 3: - implicit_a = f'bbox_head.head_module.' \ - f'convs_pred.{i}.0.implicit' + implicit_a = f"bbox_head.head_module." f"convs_pred.{i}.0.implicit" state_dict[implicit_a] = torch.zeros((1, in_channels[i], 1, 1)) - implicit_m = f'bbox_head.head_module.' \ - f'convs_pred.{i}.2.implicit' + implicit_m = f"bbox_head.head_module." f"convs_pred.{i}.2.implicit" state_dict[implicit_m] = torch.ones((1, 3 * 85, 1, 1)) else: - implicit_a = f'bbox_head.head_module.' \ - f'main_convs_pred.{i}.1.implicit' + implicit_a = f"bbox_head.head_module." f"main_convs_pred.{i}.1.implicit" state_dict[implicit_a] = torch.zeros((1, in_channels[i], 1, 1)) - implicit_m = f'bbox_head.head_module.' \ - f'main_convs_pred.{i}.3.implicit' + implicit_m = f"bbox_head.head_module." f"main_convs_pred.{i}.3.implicit" state_dict[implicit_m] = torch.ones((1, 3 * 85, 1, 1)) # save checkpoint checkpoint = dict() - checkpoint['state_dict'] = state_dict + checkpoint["state_dict"] = state_dict torch.save(checkpoint, dst) # Note: This script must be placed under the yolov7 repo to run. def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument( - 'src', default='yolov7.pt', help='src yolov7 model path') - parser.add_argument('dst', default='mm_yolov7l.pt', help='save path') + parser = argparse.ArgumentParser(description="Convert model keys") + parser.add_argument("src", default="yolov7.pt", help="src yolov7 model path") + parser.add_argument("dst", default="mm_yolov7l.pt", help="save path") args = parser.parse_args() convert(args.src, args.dst) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/yolov8_to_mmyolo.py b/mmyolo/tools/model_converters/yolov8_to_mmyolo.py index d1541367..75dfa2d7 100644 --- a/mmyolo/tools/model_converters/yolov8_to_mmyolo.py +++ b/mmyolo/tools/model_converters/yolov8_to_mmyolo.py @@ -6,27 +6,25 @@ convert_dict_s = { # backbone - 'model.0': 'backbone.stem', - 'model.1': 'backbone.stage1.0', - 'model.2': 'backbone.stage1.1', - 'model.3': 'backbone.stage2.0', - 'model.4': 'backbone.stage2.1', - 'model.5': 'backbone.stage3.0', - 'model.6': 'backbone.stage3.1', - 'model.7': 'backbone.stage4.0', - 'model.8': 'backbone.stage4.1', - 'model.9': 'backbone.stage4.2', - + "model.0": "backbone.stem", + "model.1": "backbone.stage1.0", + "model.2": "backbone.stage1.1", + "model.3": "backbone.stage2.0", + "model.4": "backbone.stage2.1", + "model.5": "backbone.stage3.0", + "model.6": "backbone.stage3.1", + "model.7": "backbone.stage4.0", + "model.8": "backbone.stage4.1", + "model.9": "backbone.stage4.2", # neck - 'model.12': 'neck.top_down_layers.0', - 'model.15': 'neck.top_down_layers.1', - 'model.16': 'neck.downsample_layers.0', - 'model.18': 'neck.bottom_up_layers.0', - 'model.19': 'neck.downsample_layers.1', - 'model.21': 'neck.bottom_up_layers.1', - + "model.12": "neck.top_down_layers.0", + "model.15": "neck.top_down_layers.1", + "model.16": "neck.downsample_layers.0", + "model.18": "neck.bottom_up_layers.0", + "model.19": "neck.downsample_layers.1", + "model.21": "neck.bottom_up_layers.1", # Detector - 'model.22': 'bbox_head.head_module', + "model.22": "bbox_head.head_module", } @@ -35,55 +33,56 @@ def convert(src, dst): convert_dict = convert_dict_s try: - yolov8_model = torch.load(src)['model'] + yolov8_model = torch.load(src)["model"] blobs = yolov8_model.state_dict() except ModuleNotFoundError: raise RuntimeError( - 'This script must be placed under the ultralytics repo,' - ' because loading the official pretrained model need' - ' `model.py` to build model.' - 'Also need to install hydra-core>=1.2.0 and thop>=0.1.1') + "This script must be placed under the ultralytics repo," + " because loading the official pretrained model need" + " `model.py` to build model." + "Also need to install hydra-core>=1.2.0 and thop>=0.1.1" + ) state_dict = OrderedDict() for key, weight in blobs.items(): - num, module = key.split('.')[1:3] - prefix = f'model.{num}' + num, module = key.split(".")[1:3] + prefix = f"model.{num}" new_key = key.replace(prefix, convert_dict[prefix]) - if '.m.' in new_key: - new_key = new_key.replace('.m.', '.blocks.') - new_key = new_key.replace('.cv', '.conv') - elif 'bbox_head.head_module' in new_key: - new_key = new_key.replace('.cv2', '.reg_preds') - new_key = new_key.replace('.cv3', '.cls_preds') - elif 'backbone.stage4.2' in new_key: - new_key = new_key.replace('.cv', '.conv') + if ".m." in new_key: + new_key = new_key.replace(".m.", ".blocks.") + new_key = new_key.replace(".cv", ".conv") + elif "bbox_head.head_module" in new_key: + new_key = new_key.replace(".cv2", ".reg_preds") + new_key = new_key.replace(".cv3", ".cls_preds") + elif "backbone.stage4.2" in new_key: + new_key = new_key.replace(".cv", ".conv") else: - new_key = new_key.replace('.cv1', '.main_conv') - new_key = new_key.replace('.cv2', '.final_conv') + new_key = new_key.replace(".cv1", ".main_conv") + new_key = new_key.replace(".cv2", ".final_conv") - if 'bbox_head.head_module.dfl.conv.weight' == new_key: - print('Drop "bbox_head.head_module.dfl.conv.weight", ' - 'because it is useless') + if "bbox_head.head_module.dfl.conv.weight" == new_key: + print( + 'Drop "bbox_head.head_module.dfl.conv.weight", ' "because it is useless" + ) continue state_dict[new_key] = weight - print(f'Convert {key} to {new_key}') + print(f"Convert {key} to {new_key}") # save checkpoint checkpoint = dict() - checkpoint['state_dict'] = state_dict + checkpoint["state_dict"] = state_dict torch.save(checkpoint, dst) # Note: This script must be placed under the YOLOv8 repo to run. def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument( - '--src', default='yolov8s.pt', help='src YOLOv8 model path') - parser.add_argument('--dst', default='mmyolov8s.pth', help='save path') + parser = argparse.ArgumentParser(description="Convert model keys") + parser.add_argument("--src", default="yolov8s.pt", help="src YOLOv8 model path") + parser.add_argument("--dst", default="mmyolov8s.pth", help="save path") args = parser.parse_args() convert(args.src, args.dst) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/model_converters/yolox_to_mmyolo.py b/mmyolo/tools/model_converters/yolox_to_mmyolo.py index 5fcc7356..6ea7b224 100644 --- a/mmyolo/tools/model_converters/yolox_to_mmyolo.py +++ b/mmyolo/tools/model_converters/yolox_to_mmyolo.py @@ -5,18 +5,18 @@ import torch neck_dict = { - 'backbone.lateral_conv0': 'neck.reduce_layers.2', - 'backbone.C3_p4.conv': 'neck.top_down_layers.0.0.cv', - 'backbone.C3_p4.m.0.': 'neck.top_down_layers.0.0.m.0.', - 'backbone.reduce_conv1': 'neck.top_down_layers.0.1', - 'backbone.C3_p3.conv': 'neck.top_down_layers.1.cv', - 'backbone.C3_p3.m.0.': 'neck.top_down_layers.1.m.0.', - 'backbone.bu_conv2': 'neck.downsample_layers.0', - 'backbone.C3_n3.conv': 'neck.bottom_up_layers.0.cv', - 'backbone.C3_n3.m.0.': 'neck.bottom_up_layers.0.m.0.', - 'backbone.bu_conv1': 'neck.downsample_layers.1', - 'backbone.C3_n4.conv': 'neck.bottom_up_layers.1.cv', - 'backbone.C3_n4.m.0.': 'neck.bottom_up_layers.1.m.0.', + "backbone.lateral_conv0": "neck.reduce_layers.2", + "backbone.C3_p4.conv": "neck.top_down_layers.0.0.cv", + "backbone.C3_p4.m.0.": "neck.top_down_layers.0.0.m.0.", + "backbone.reduce_conv1": "neck.top_down_layers.0.1", + "backbone.C3_p3.conv": "neck.top_down_layers.1.cv", + "backbone.C3_p3.m.0.": "neck.top_down_layers.1.m.0.", + "backbone.bu_conv2": "neck.downsample_layers.0", + "backbone.C3_n3.conv": "neck.bottom_up_layers.0.cv", + "backbone.C3_n3.m.0.": "neck.bottom_up_layers.0.m.0.", + "backbone.bu_conv1": "neck.downsample_layers.1", + "backbone.C3_n4.conv": "neck.bottom_up_layers.1.cv", + "backbone.C3_n4.m.0.": "neck.bottom_up_layers.1.m.0.", } @@ -24,87 +24,87 @@ def convert_stem(model_key, model_weight, state_dict, converted_names): new_key = model_key[9:] state_dict[new_key] = model_weight converted_names.add(model_key) - print(f'Convert {model_key} to {new_key}') + print(f"Convert {model_key} to {new_key}") def convert_backbone(model_key, model_weight, state_dict, converted_names): - new_key = model_key.replace('backbone.dark', 'stage') + new_key = model_key.replace("backbone.dark", "stage") num = int(new_key[14]) - 1 new_key = new_key[:14] + str(num) + new_key[15:] - if '.m.' in model_key: - new_key = new_key.replace('.m.', '.blocks.') - elif not new_key[16] == '0' and 'stage4.1' not in new_key: - new_key = new_key.replace('conv1', 'main_conv') - new_key = new_key.replace('conv2', 'short_conv') - new_key = new_key.replace('conv3', 'final_conv') + if ".m." in model_key: + new_key = new_key.replace(".m.", ".blocks.") + elif not new_key[16] == "0" and "stage4.1" not in new_key: + new_key = new_key.replace("conv1", "main_conv") + new_key = new_key.replace("conv2", "short_conv") + new_key = new_key.replace("conv3", "final_conv") state_dict[new_key] = model_weight converted_names.add(model_key) - print(f'Convert {model_key} to {new_key}') + print(f"Convert {model_key} to {new_key}") def convert_neck(model_key, model_weight, state_dict, converted_names): for old, new in neck_dict.items(): if old in model_key: new_key = model_key.replace(old, new) - if '.m.' in model_key: - new_key = new_key.replace('.m.', '.blocks.') - elif '.C' in model_key: - new_key = new_key.replace('cv1', 'main_conv') - new_key = new_key.replace('cv2', 'short_conv') - new_key = new_key.replace('cv3', 'final_conv') + if ".m." in model_key: + new_key = new_key.replace(".m.", ".blocks.") + elif ".C" in model_key: + new_key = new_key.replace("cv1", "main_conv") + new_key = new_key.replace("cv2", "short_conv") + new_key = new_key.replace("cv3", "final_conv") state_dict[new_key] = model_weight converted_names.add(model_key) - print(f'Convert {model_key} to {new_key}') + print(f"Convert {model_key} to {new_key}") def convert_head(model_key, model_weight, state_dict, converted_names): - if 'stem' in model_key: - new_key = model_key.replace('head.stem', 'neck.out_layer') - elif 'cls_convs' in model_key: + if "stem" in model_key: + new_key = model_key.replace("head.stem", "neck.out_layer") + elif "cls_convs" in model_key: new_key = model_key.replace( - 'head.cls_convs', 'bbox_head.head_module.multi_level_cls_convs') - elif 'reg_convs' in model_key: + "head.cls_convs", "bbox_head.head_module.multi_level_cls_convs" + ) + elif "reg_convs" in model_key: new_key = model_key.replace( - 'head.reg_convs', 'bbox_head.head_module.multi_level_reg_convs') - elif 'preds' in model_key: - new_key = model_key.replace('head.', - 'bbox_head.head_module.multi_level_conv_') - new_key = new_key.replace('_preds', '') + "head.reg_convs", "bbox_head.head_module.multi_level_reg_convs" + ) + elif "preds" in model_key: + new_key = model_key.replace("head.", "bbox_head.head_module.multi_level_conv_") + new_key = new_key.replace("_preds", "") state_dict[new_key] = model_weight converted_names.add(model_key) - print(f'Convert {model_key} to {new_key}') + print(f"Convert {model_key} to {new_key}") def convert(src, dst): """Convert keys in detectron pretrained YOLOX models to mmyolo style.""" - blobs = torch.load(src)['model'] + blobs = torch.load(src)["model"] state_dict = OrderedDict() converted_names = set() for key, weight in blobs.items(): - if 'backbone.stem' in key: + if "backbone.stem" in key: convert_stem(key, weight, state_dict, converted_names) - elif 'backbone.backbone' in key: + elif "backbone.backbone" in key: convert_backbone(key, weight, state_dict, converted_names) - elif 'backbone.neck' not in key and 'head' not in key: + elif "backbone.neck" not in key and "head" not in key: convert_neck(key, weight, state_dict, converted_names) - elif 'head' in key: + elif "head" in key: convert_head(key, weight, state_dict, converted_names) # save checkpoint checkpoint = dict() - checkpoint['state_dict'] = state_dict + checkpoint["state_dict"] = state_dict torch.save(checkpoint, dst) def main(): - parser = argparse.ArgumentParser(description='Convert model keys') - parser.add_argument( - '--src', default='yolox_s.pth', help='src yolox model path') - parser.add_argument('--dst', default='mmyoloxs.pt', help='save path') + parser = argparse.ArgumentParser(description="Convert model keys") + parser.add_argument("--src", default="yolox_s.pth", help="src yolox model path") + parser.add_argument("--dst", default="mmyoloxs.pt", help="save path") args = parser.parse_args() convert(args.src, args.dst) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/test.py b/mmyolo/tools/test.py index 0c5b89b8..db36a9dc 100644 --- a/mmyolo/tools/test.py +++ b/mmyolo/tools/test.py @@ -14,55 +14,59 @@ # TODO: support fuse_conv_bn def parse_args(): - parser = argparse.ArgumentParser( - description='MMYOLO test (and eval) a model') - parser.add_argument('config', help='test config file path') - parser.add_argument('checkpoint', help='checkpoint file') + parser = argparse.ArgumentParser(description="MMYOLO test (and eval) a model") + parser.add_argument("config", help="test config file path") + parser.add_argument("checkpoint", help="checkpoint file") parser.add_argument( - '--work-dir', - help='the directory to save the file containing evaluation metrics') + "--work-dir", + help="the directory to save the file containing evaluation metrics", + ) parser.add_argument( - '--out', + "--out", type=str, - help='output result file (must be a .pkl file) in pickle format') + help="output result file (must be a .pkl file) in pickle format", + ) parser.add_argument( - '--json-prefix', + "--json-prefix", type=str, - help='the prefix of the output json file without perform evaluation, ' - 'which is useful when you want to format the result to a specific ' - 'format and submit it to the test server') + help="the prefix of the output json file without perform evaluation, " + "which is useful when you want to format the result to a specific " + "format and submit it to the test server", + ) + parser.add_argument("--show", action="store_true", help="show prediction results") parser.add_argument( - '--show', action='store_true', help='show prediction results') + "--deploy", action="store_true", help="Switch model to deployment mode" + ) parser.add_argument( - '--deploy', - action='store_true', - help='Switch model to deployment mode') + "--show-dir", + help="directory where painted images will be saved. " + "If specified, it will be automatically saved " + "to the work_dir/timestamp/show_dir", + ) parser.add_argument( - '--show-dir', - help='directory where painted images will be saved. ' - 'If specified, it will be automatically saved ' - 'to the work_dir/timestamp/show_dir') + "--wait-time", type=float, default=2, help="the interval of show (s)" + ) parser.add_argument( - '--wait-time', type=float, default=2, help='the interval of show (s)') - parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) parser.add_argument( - '--launcher', - choices=['none', 'pytorch', 'slurm', 'mpi'], - default='none', - help='job launcher') - parser.add_argument('--local_rank', type=int, default=0) + "--launcher", + choices=["none", "pytorch", "slurm", "mpi"], + default="none", + help="job launcher", + ) + parser.add_argument("--local_rank", type=int, default=0) args = parser.parse_args() - if 'LOCAL_RANK' not in os.environ: - os.environ['LOCAL_RANK'] = str(args.local_rank) + if "LOCAL_RANK" not in os.environ: + os.environ["LOCAL_RANK"] = str(args.local_rank) return args @@ -85,10 +89,11 @@ def main(): if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir - elif cfg.get('work_dir', None) is None: + elif cfg.get("work_dir", None) is None: # use config filename as default work_dir if cfg.work_dir is None - cfg.work_dir = osp.join('./work_dirs', - osp.splitext(osp.basename(args.config))[0]) + cfg.work_dir = osp.join( + "./work_dirs", osp.splitext(osp.basename(args.config))[0] + ) cfg.load_from = args.checkpoint @@ -96,18 +101,18 @@ def main(): cfg = trigger_visualization_hook(cfg, args) if args.deploy: - cfg.custom_hooks.append(dict(type='SwitchToDeployHook')) + cfg.custom_hooks.append(dict(type="SwitchToDeployHook")) # add `format_only` and `outfile_prefix` into cfg if args.json_prefix is not None: cfg_json = { - 'test_evaluator.format_only': True, - 'test_evaluator.outfile_prefix': args.json_prefix + "test_evaluator.format_only": True, + "test_evaluator.outfile_prefix": args.json_prefix, } cfg.merge_from_dict(cfg_json) # build the runner from config - if 'runner_type' not in cfg: + if "runner_type" not in cfg: # build the default runner runner = Runner.from_cfg(cfg) else: @@ -117,14 +122,14 @@ def main(): # add `DumpResults` dummy metric if args.out is not None: - assert args.out.endswith(('.pkl', '.pickle')), \ - 'The dump file must be a pkl file.' - runner.test_evaluator.metrics.append( - DumpResults(out_file_path=args.out)) + assert args.out.endswith( + (".pkl", ".pickle") + ), "The dump file must be a pkl file." + runner.test_evaluator.metrics.append(DumpResults(out_file_path=args.out)) # start testing runner.test() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/mmyolo/tools/train.py b/mmyolo/tools/train.py index e6ba9ebc..46268a54 100644 --- a/mmyolo/tools/train.py +++ b/mmyolo/tools/train.py @@ -13,41 +13,45 @@ def parse_args(): - parser = argparse.ArgumentParser(description='Train a detector') - parser.add_argument('config', help='train config file path') - parser.add_argument('--work-dir', help='the dir to save logs and models') + parser = argparse.ArgumentParser(description="Train a detector") + parser.add_argument("config", help="train config file path") + parser.add_argument("--work-dir", help="the dir to save logs and models") parser.add_argument( - '--amp', - action='store_true', + "--amp", + action="store_true", default=False, - help='enable automatic-mixed-precision training') + help="enable automatic-mixed-precision training", + ) parser.add_argument( - '--resume', - nargs='?', + "--resume", + nargs="?", type=str, - const='auto', - help='If specify checkpoint path, resume from it, while if not ' - 'specify, try to auto resume from the latest checkpoint ' - 'in the work directory.') + const="auto", + help="If specify checkpoint path, resume from it, while if not " + "specify, try to auto resume from the latest checkpoint " + "in the work directory.", + ) parser.add_argument( - '--cfg-options', - nargs='+', + "--cfg-options", + nargs="+", action=DictAction, - help='override some settings in the used config, the key-value pair ' - 'in xxx=yyy format will be merged into config file. If the value to ' + help="override some settings in the used config, the key-value pair " + "in xxx=yyy format will be merged into config file. If the value to " 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' - 'Note that the quotation marks are necessary and that no white space ' - 'is allowed.') + "Note that the quotation marks are necessary and that no white space " + "is allowed.", + ) parser.add_argument( - '--launcher', - choices=['none', 'pytorch', 'slurm', 'mpi'], - default='none', - help='job launcher') - parser.add_argument('--local_rank', type=int, default=0) + "--launcher", + choices=["none", "pytorch", "slurm", "mpi"], + default="none", + help="job launcher", + ) + parser.add_argument("--local_rank", type=int, default=0) args = parser.parse_args() - if 'LOCAL_RANK' not in os.environ: - os.environ['LOCAL_RANK'] = str(args.local_rank) + if "LOCAL_RANK" not in os.environ: + os.environ["LOCAL_RANK"] = str(args.local_rank) return args @@ -71,28 +75,31 @@ def main(): if args.work_dir is not None: # update configs according to CLI args if args.work_dir is not None cfg.work_dir = args.work_dir - elif cfg.get('work_dir', None) is None: + elif cfg.get("work_dir", None) is None: # use config filename as default work_dir if cfg.work_dir is None - cfg.work_dir = osp.join('./work_dirs', - osp.splitext(osp.basename(args.config))[0]) + cfg.work_dir = osp.join( + "./work_dirs", osp.splitext(osp.basename(args.config))[0] + ) # enable automatic-mixed-precision training if args.amp is True: optim_wrapper = cfg.optim_wrapper.type - if optim_wrapper == 'AmpOptimWrapper': + if optim_wrapper == "AmpOptimWrapper": print_log( - 'AMP training is already enabled in your config.', - logger='current', - level=logging.WARNING) + "AMP training is already enabled in your config.", + logger="current", + level=logging.WARNING, + ) else: - assert optim_wrapper == 'OptimWrapper', ( - '`--amp` is only supported when the optimizer wrapper type is ' - f'`OptimWrapper` but got {optim_wrapper}.') - cfg.optim_wrapper.type = 'AmpOptimWrapper' - cfg.optim_wrapper.loss_scale = 'dynamic' + assert optim_wrapper == "OptimWrapper", ( + "`--amp` is only supported when the optimizer wrapper type is " + f"`OptimWrapper` but got {optim_wrapper}." + ) + cfg.optim_wrapper.type = "AmpOptimWrapper" + cfg.optim_wrapper.loss_scale = "dynamic" # resume is determined in this priority: resume from > auto_resume - if args.resume == 'auto': + if args.resume == "auto": cfg.resume = True cfg.load_from = None elif args.resume is not None: @@ -100,7 +107,7 @@ def main(): cfg.load_from = args.resume # build the runner from config - if 'runner_type' not in cfg: + if "runner_type" not in cfg: # build the default runner runner = Runner.from_cfg(cfg) else: @@ -112,5 +119,5 @@ def main(): runner.train() -if __name__ == '__main__': +if __name__ == "__main__": main()