From bf0838d09329d6eb95369d3fd0f9063b74d7db32 Mon Sep 17 00:00:00 2001
From: XixinYang <121591093+XixinYang@users.noreply.github.com>
Date: Tue, 26 Sep 2023 14:09:11 +0800
Subject: [PATCH] docs: add tutorials of fine-tune on a custom dataset (#711)
---
benchmark_results.md | 100 ++---
.../finetune_with_a_custom_dataset.md | 359 ++++++++++++++++
.../finetune_with_a_custom_dataset.md | 365 +++++++++++++++++
examples/README.md | 8 -
examples/finetune.py | 108 -----
examples/finetune/README.md | 17 +
examples/finetune/finetune.py | 386 ++++++++++++++++++
examples/finetune/read_images_online.py | 47 +++
examples/finetune/split_files.py | 41 ++
examples/scripts/train_densenet_multigpus.sh | 3 -
examples/scripts/train_densenet_standalone.sh | 2 -
mindcv/utils/callbacks.py | 6 +-
mkdocs.yml | 1 +
13 files changed, 1272 insertions(+), 171 deletions(-)
create mode 100644 docs/en/how_to_guides/finetune_with_a_custom_dataset.md
create mode 100644 docs/zh/how_to_guides/finetune_with_a_custom_dataset.md
delete mode 100644 examples/finetune.py
create mode 100644 examples/finetune/README.md
create mode 100644 examples/finetune/finetune.py
create mode 100644 examples/finetune/read_images_online.py
create mode 100644 examples/finetune/split_files.py
delete mode 100644 examples/scripts/train_densenet_multigpus.sh
delete mode 100644 examples/scripts/train_densenet_standalone.sh
diff --git a/benchmark_results.md b/benchmark_results.md
index d8be98502..bbd374264 100644
--- a/benchmark_results.md
+++ b/benchmark_results.md
@@ -1,8 +1,9 @@
| Model | Context | Top-1 (%) | Top-5 (%) | Params(M) | Recipe | Download |
| -------------- | -------- | --------- | --------- | --------- | ------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
-| BiT_resnet50 | D910x8-G | 76.81 | 93.17 | 25.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet50-1e4795a4.ckpt) |
-| BiT_resnet50x3 | D910x8-G | 80.63 | 95.12 | 217.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet50x3_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet50x3-a960f91f.ckpt) |
-| BiT_resnet101 | D910x8-G | 77.93 | 93.75 | 44.54 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet101-2efa9106.ckpt) |
+| bit_resnet50 | D910x8-G | 76.81 | 93.17 | 25.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet50-1e4795a4.ckpt) |
+| bit_resnet50x3 | D910x8-G | 80.63 | 95.12 | 217.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet50x3_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet50x3-a960f91f.ckpt) |
+| bit_resnet101 | D910x8-G | 77.93 | 93.75 | 44.54 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/bit/bit_resnet101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/bit/BiT_resnet101-2efa9106.ckpt) |
+| cmt_small | D910x8-G | 83.24 | 96.41 | 26.09 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/cmt/cmt_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/cmt/cmt_small-6858ee22.ckpt) |
| coat_lite_tiny | D910x8-G | 77.35 | 93.43 | 5.72 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/coat/coat_lite_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/coat/coat_lite_tiny-fa7bf894.ckpt) |
| coat_lite_mini | D910x8-G | 78.51 | 93.84 | 11.01 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/coat/coat_lite_mini_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/coat/coat_lite_mini-55a52f05.ckpt) |
| coat_tiny | D910x8-G | 79.67 | 94.88 | 5.50 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/coat/coat_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/coat/coat_tiny-071cb792.ckpt) |
@@ -20,10 +21,10 @@
| crossvit_9 | D910x8-G | 73.56 | 91.79 | 8.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_9_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_9-e74c8e18.ckpt) |
| crossvit_15 | D910x8-G | 81.08 | 95.33 | 27.27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_15_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_15-eaa43c02.ckpt) |
| crossvit_18 | D910x8-G | 81.93 | 95.75 | 43.27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/crossvit/crossvit_18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/crossvit/crossvit_18-ca0a2e43.ckpt) |
-| densenet121 | D910x8-G | 75.64 | 92.84 | 8.06 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet121_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet121-120_5004_Ascend.ckpt) |
-| densenet161 | D910x8-G | 79.09 | 94.66 | 28.90 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet161_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet161-120_5004_Ascend.ckpt) |
-| densenet169 | D910x8-G | 77.26 | 93.71 | 14.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet169_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet169-120_5004_Ascend.ckpt) |
-| densenet201 | D910x8-G | 78.14 | 94.08 | 20.24 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet201_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet201-120_5004_Ascend.ckpt) |
+| densenet121 | D910x8-G | 75.64 | 92.84 | 8.06 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet_121_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet121-120_5004_Ascend.ckpt) |
+| densenet161 | D910x8-G | 79.09 | 94.66 | 28.90 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet_161_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet161-120_5004_Ascend.ckpt) |
+| densenet169 | D910x8-G | 77.26 | 93.71 | 14.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet_169_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet169-120_5004_Ascend.ckpt) |
+| densenet201 | D910x8-G | 78.14 | 94.08 | 20.24 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/densenet/densenet_201_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/densenet/densenet201-120_5004_Ascend.ckpt) |
| dpn92 | D910x8-G | 79.46 | 94.49 | 37.79 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/dpn/dpn92_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/dpn/dpn92-e3e0fca.ckpt) |
| dpn98 | D910x8-G | 79.94 | 94.57 | 61.74 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/dpn/dpn98_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/dpn/dpn98-119a8207.ckpt) |
| dpn107 | D910x8-G | 80.05 | 94.74 | 87.13 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/dpn/dpn107_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/dpn/dpn107-7d7df07b.ckpt) |
@@ -38,6 +39,7 @@
| ghostnet_100 | D910x8-G | 73.78 | 91.66 | 5.20 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/ghostnet/ghostnet_100_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/ghostnet/ghostnet_100-bef8025a.ckpt) |
| ghostnet_130 | D910x8-G | 75.50 | 92.56 | 7.39 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/ghostnet/ghostnet_130_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/ghostnet/ghostnet_130-cf4c235c.ckpt) |
| googlenet | D910x8-G | 72.68 | 90.89 | 6.99 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/googlenet/googlenet_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/googlenet/googlenet-5552fcd3.ckpt) |
+| halonet_50t | D910X8-G | 79.53 | 94.79 | 22.79 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/halonet/halonet_50t_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/halonet/halonet_50t-533da6be.ckpt) |
| hrnet_w32 | D910x8-G | 80.64 | 95.44 | 41.30 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/hrnet/hrnet_w32_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/hrnet/hrnet_w32-cc4fbd91.ckpt) |
| hrnet_w48 | D910x8-G | 81.19 | 95.69 | 77.57 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/hrnet/hrnet_w48_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/hrnet/hrnet_w48-2e3399cd.ckpt) |
| inception_v3 | D910x8-G | 79.11 | 94.40 | 27.20 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/inceptionv3/inception_v3_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/inception_v3/inception_v3-38f67890.ckpt) |
@@ -45,20 +47,20 @@
| mixnet_s | D910x8-G | 75.52 | 92.52 | 4.17 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mixnet/mixnet_s_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mixnet/mixnet_s-2a5ef3a3.ckpt) |
| mixnet_m | D910x8-G | 76.64 | 93.05 | 5.06 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mixnet/mixnet_m_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mixnet/mixnet_m-74cc4cb1.ckpt) |
| mixnet_l | D910x8-G | 78.73 | 94.31 | 7.38 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mixnet/mixnet_l_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mixnet/mixnet_l-978edf2b.ckpt) |
-| mnasnet_050 | D910x8-G | 68.07 | 88.09 | 2.14 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_050_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_050-7d8bf4db.ckpt) |
-| mnasnet_075 | D910x8-G | 71.81 | 90.53 | 3.20 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_075_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_075-465d366d.ckpt) |
-| mnasnet_100 | D910x8-G | 74.28 | 91.70 | 4.42 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_100_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_100-1bcf43f8.ckpt) |
-| mnasnet_130 | D910x8-G | 75.65 | 92.64 | 6.33 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_130_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_130-a43a150a.ckpt) |
-| mnasnet_140 | D910x8-G | 76.01 | 92.83 | 7.16 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_140_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_140-7e20bb30.ckpt) |
-| mobilenet_v1_025 | D910x8-G | 53.87 | 77.66 | 0.47 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_025_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_025-d3377fba.ckpt) |
-| mobilenet_v1_050 | D910x8-G | 65.94 | 86.51 | 1.34 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_050_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_050-23e9ddbe.ckpt) |
-| mobilenet_v1_075 | D910x8-G | 70.44 | 89.49 | 2.60 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_075_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_075-5bed0c73.ckpt) |
-| mobilenet_v1_100 | D910x8-G | 72.95 | 91.01 | 4.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_100_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_100-91c7b206.ckpt) |
-| mobilenet_v2_075 | D910x8-G | 69.98 | 89.32 | 2.66 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_075_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_075-bd7bd4c4.ckpt) |
-| mobilenet_v2_100 | D910x8-G | 72.27 | 90.72 | 3.54 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_100_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_100-d5532038.ckpt) |
-| mobilenet_v2_140 | D910x8-G | 75.56 | 92.56 | 6.15 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_140_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_140-98776171.ckpt) |
-| mobilenet_v3_small_100 | D910x8-G | 68.10 | 87.86 | 2.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_small_100_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_small_100-509c6047.ckpt) |
-| mobilenet_v3_large_100 | D910x8-G | 75.23 | 92.31 | 5.51 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_large_100_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_100-1279ad5f.ckpt) |
+| mnasnet_050 | D910x8-G | 68.07 | 88.09 | 2.14 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_0.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_050-7d8bf4db.ckpt) |
+| mnasnet_075 | D910x8-G | 71.81 | 90.53 | 3.20 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_0.75_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_075-465d366d.ckpt) |
+| mnasnet_100 | D910x8-G | 74.28 | 91.70 | 4.42 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_100-1bcf43f8.ckpt) |
+| mnasnet_130 | D910x8-G | 75.65 | 92.64 | 6.33 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_1.3_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_130-a43a150a.ckpt) |
+| mnasnet_140 | D910x8-G | 76.01 | 92.83 | 7.16 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mnasnet/mnasnet_1.4_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mnasnet/mnasnet_140-7e20bb30.ckpt) |
+| mobilenet_v1_025 | D910x8-G | 53.87 | 77.66 | 0.47 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_025-d3377fba.ckpt) |
+| mobilenet_v1_050 | D910x8-G | 65.94 | 86.51 | 1.34 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_0.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_050-23e9ddbe.ckpt) |
+| mobilenet_v1_075 | D910x8-G | 70.44 | 89.49 | 2.60 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_0.75_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_075-5bed0c73.ckpt) |
+| mobilenet_v1_100 | D910x8-G | 72.95 | 91.01 | 4.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv1/mobilenet_v1_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv1/mobilenet_v1_100-91c7b206.ckpt) |
+| mobilenet_v2_075 | D910x8-G | 69.98 | 89.32 | 2.66 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_075-bd7bd4c4.ckpt) |
+| mobilenet_v2_100 | D910x8-G | 72.27 | 90.72 | 3.54 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_100-d5532038.ckpt) |
+| mobilenet_v2_140 | D910x8-G | 75.56 | 92.56 | 6.15 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv2/mobilenet_v2_1.4_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv2/mobilenet_v2_140-98776171.ckpt) |
+| mobilenet_v3_small_100 | D910x8-G | 68.10 | 87.86 | 2.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_small_100-509c6047.ckpt) |
+| mobilenet_v3_large_100 | D910x8-G | 75.23 | 92.31 | 5.51 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilenet/mobilenetv3/mobilenet_v3_large_100-1279ad5f.ckpt) |
| mobilevit_xx_small | D910x8-G | 68.91 | 88.91 | 1.27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilevit/mobilevit_xx_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilevit/mobilevit_xx_small-af9da8a0.ckpt) |
| mobilevit_x_small | D910x8-G | 74.99 | 92.32 | 2.32 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilevit/mobilevit_x_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilevit/mobilevit_x_small-673fc6f2.ckpt) |
| mobilevit_small | D910x8-G | 78.47 | 94.18 | 5.59 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/mobilevit/mobilevit_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/mobilevit/mobilevit_small-caf79638.ckpt) |
@@ -97,46 +99,46 @@
| repvgg_b1g2 | D910x8-G | 78.03 | 94.09 | 45.85 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_b1g2_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_b1g2-f0dc714f.ckpt) |
| repvgg_b1g4 | D910x8-G | 77.64 | 94.03 | 40.03 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_b1g4_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_b1g4-bd93230e.ckpt) |
| repvgg_b2g4 | D910x8-G | 78.8 | 94.36 | 61.84 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/repvgg/repvgg_b2g4_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/repvgg/repvgg_b2g4-e79eeadd.ckpt) |
-| res2net50 | D910x8-G | 79.35 | 94.64 | 25.76 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net50-f42cf71b.ckpt) |
-| res2net101 | D910x8-G | 79.56 | 94.70 | 45.33 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net101-8ae60132.ckpt) |
-| res2net50_v1b | D910x8-G | 80.32 | 95.09 | 25.77 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net50_v1b_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net50_v1b-99304e92.ckpt) |
-| res2net101_v1b | D910x8-G | 81.14 | 95.41 | 45.35 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net101_v1b_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net101_v1b-7e6db001.ckpt) |
+| res2net50 | D910x8-G | 79.35 | 94.64 | 25.76 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net_50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net50-f42cf71b.ckpt) |
+| res2net101 | D910x8-G | 79.56 | 94.70 | 45.33 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net_101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net101-8ae60132.ckpt) |
+| res2net50_v1b | D910x8-G | 80.32 | 95.09 | 25.77 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net_50_v1b_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net50_v1b-99304e92.ckpt) |
+| res2net101_v1b | D910x8-G | 81.14 | 95.41 | 45.35 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/res2net/res2net_101_v1b_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/res2net/res2net101_v1b-7e6db001.ckpt) |
| resnest50 | D910x8-G | 80.81 | 95.16 | 27.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnest/resnest50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnest/resnest50-f2e7fc9c.ckpt) |
| resnest101 | D910x8-G | 82.90 | 96.12 | 48.41 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnest/resnest101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnest/resnest101-7cc5c258.ckpt) |
-| resnet18 | D910x8-G | 70.21 | 89.62 | 11.70 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet18-1e65cd21.ckpt) |
-| resnet34 | D910x8-G | 74.15 | 91.98 | 21.81 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet34_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet34-f297d27e.ckpt) |
-| resnet50 | D910x8-G | 76.69 | 93.50 | 25.61 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet50-e0733ab8.ckpt) |
-| resnet101 | D910x8-G | 78.24 | 94.09 |44.65 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet101-689c5e77.ckpt) |
-| resnet152 | D910x8-G | 78.72 | 94.45 | 60.34| [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet152_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet152-beb689d8.ckpt) |
+| resnet18 | D910x8-G | 70.21 | 89.62 | 11.70 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet18-1e65cd21.ckpt) |
+| resnet34 | D910x8-G | 74.15 | 91.98 | 21.81 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_34_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet34-f297d27e.ckpt) |
+| resnet50 | D910x8-G | 76.69 | 93.50 | 25.61 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet50-e0733ab8.ckpt) |
+| resnet101 | D910x8-G | 78.24 | 94.09 |44.65 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet101-689c5e77.ckpt) |
+| resnet152 | D910x8-G | 78.72 | 94.45 | 60.34| [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnet/resnet_152_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnet/resnet152-beb689d8.ckpt) |
| resnetv2_50 | D910x8-G | 76.90 | 93.37 | 25.60 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnetv2/resnetv2_50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnetv2/resnetv2_50-3c2f143b.ckpt) |
| resnetv2_101 | D910x8-G | 78.48 | 94.23 | 44.55 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnetv2/resnetv2_101_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnetv2/resnetv2_101-5d4c49a1.ckpt) |
| resnext50_32x4d | D910x8-G | 78.53 | 94.10 | 25.10 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnext/resnext50_32x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnext/resnext50_32x4d-af8aba16.ckpt) |
| resnext101_32x4d | D910x8-G | 79.83 | 94.80 | 44.32 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnext/resnext101_32x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnext/resnext101_32x4d-3c1e9c51.ckpt) |
| resnext101_64x4d | D910x8-G | 80.30 | 94.82 | 83.66 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnext/resnext101_64x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnext/resnext101_64x4d-8929255b.ckpt) |
| resnext152_64x4d | D910x8-G | 80.52 | 95.00 | 115.27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/resnext/resnext152_64x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/resnext/resnext152_64x4d-3aba275c.ckpt) |
-| rexnet_09 | D910x8-G | 77.06 | 93.41 | 4.13 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_09_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_09-da498331.ckpt) |
-| rexnet_10 | D910x8-G | 77.38 | 93.60 | 4.84 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_10_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_10-c5fb2dc7.ckpt) |
-| rexnet_13 | D910x8-G | 79.06 | 94.28 | 7.61 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_13_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_13-a49c41e5.ckpt) |
-| rexnet_15 | D910x8-G | 79.95 | 94.74 | 9.79 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_15_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_15-37a931d3.ckpt) |
-| rexnet_20 | D910x8-G | 80.64 | 94.99 | 16.45 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_20_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_20-c5810914.ckpt) |
+| rexnet_09 | D910x8-G | 77.06 | 93.41 | 4.13 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_x09_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_09-da498331.ckpt) |
+| rexnet_10 | D910x8-G | 77.38 | 93.60 | 4.84 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_x10_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_10-c5fb2dc7.ckpt) |
+| rexnet_13 | D910x8-G | 79.06 | 94.28 | 7.61 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_x13_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_13-a49c41e5.ckpt) |
+| rexnet_15 | D910x8-G | 79.95 | 94.74 | 9.79 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_x15_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_15-37a931d3.ckpt) |
+| rexnet_20 | D910x8-G | 80.64 | 94.99 | 16.45 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/rexnet/rexnet_x20_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/rexnet/rexnet_20-c5810914.ckpt) |
| seresnet18 | D910x8-G | 71.81 | 90.49 | 11.80 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnet18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/senet/seresnet18-7880643b.ckpt) |
| seresnet34 | D910x8-G | 75.38 | 92.50 | 21.98 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnet34_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/senet/seresnet34-8179d3c9.ckpt) |
| seresnet50 | D910x8-G | 78.32 | 94.07 | 28.14 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnet50_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/senet/seresnet50-ff9cd214.ckpt) |
| seresnext26_32x4d | D910x8-G | 77.17 | 93.42 | 16.83 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnext26_32x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/senet/seresnext26_32x4d-5361f5b6.ckpt) |
| seresnext50_32x4d | D910x8-G | 78.71 | 94.36 | 27.63 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/senet/seresnext50_32x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/senet/seresnext50_32x4d-fdc35aca.ckpt) |
-| shufflenet_v1_g3_05 | D910x8-G | 57.05 | 79.73 | 0.73 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv1/shufflenet_v1_g3_05_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv1/shufflenet_v1_g3_05-42cfe109.ckpt) |
-| shufflenet_v1_g3_10 | D910x8-G | 67.77 | 87.73 | 1.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv1/shufflenet_v1_g3_10_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv1/shufflenet_v1_g3_10-245f0ccf.ckpt) |
-| shufflenet_v2_x0_5 | D910x8-G | 60.53 | 82.11 | 1.37 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_x0_5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x0_5-8c841061.ckpt) |
-| shufflenet_v2_x1_0 | D910x8-G | 69.47 | 88.88 | 2.29 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_x1_0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x1_0-0da4b7fa.ckpt) |
-| shufflenet_v2_x1_5 | D910x8-G | 72.79 | 90.93 | 3.53 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_x1_5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x1_5-00b56131.ckpt) |
-| shufflenet_v2_x2_0 | D910x8-G | 75.07 | 92.08 | 7.44 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_x2_0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x2_0-ed8e698d.ckpt) |
+| shufflenet_v1_g3_05 | D910x8-G | 57.05 | 79.73 | 0.73 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv1/shufflenet_v1_g3_05-42cfe109.ckpt) |
+| shufflenet_v1_g3_10 | D910x8-G | 67.77 | 87.73 | 1.89 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv1/shufflenet_v1_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv1/shufflenet_v1_g3_10-245f0ccf.ckpt) |
+| shufflenet_v2_x0_5 | D910x8-G | 60.53 | 82.11 | 1.37 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x0_5-8c841061.ckpt) |
+| shufflenet_v2_x1_0 | D910x8-G | 69.47 | 88.88 | 2.29 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x1_0-0da4b7fa.ckpt) |
+| shufflenet_v2_x1_5 | D910x8-G | 72.79 | 90.93 | 3.53 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_1.5_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x1_5-00b56131.ckpt) |
+| shufflenet_v2_x2_0 | D910x8-G | 75.07 | 92.08 | 7.44 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/shufflenetv2/shufflenet_v2_2.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/shufflenet/shufflenetv2/shufflenet_v2_x2_0-ed8e698d.ckpt) |
| skresnet18 | D910x8-G | 73.09 | 91.20 | 11.97 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/sknet/skresnet18_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/sknet/skresnet18-868228e5.ckpt) |
| skresnet34 | D910x8-G | 76.71 | 93.10 | 22.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/sknet/skresnet34_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/sknet/skresnet34-d668b629.ckpt) |
| skresnext50_32x4d | D910x8-G | 79.08 | 94.60 | 37.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/sknet/skresnext50_32x4d_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/sknet/skresnext50_32x4d-395413a2.ckpt) |
-| squeezenet1_0 | D910x8-G | 59.01 | 81.01 | 1.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet1_0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_0-e2d78c4a.ckpt) |
-| squeezenet1_0 | GPUx8-G | 58.83 | 81.08 | 1.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet1_0_gpu.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_0_gpu-685f5941.ckpt) |
-| squeezenet1_1 | D910x8-G | 58.44 | 80.84 | 1.24 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet1_1_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_1-da256d3a.ckpt) |
-| squeezenet1_1 | GPUx8-G | 59.18 | 81.41 | 1.24 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet1_1_gpu.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_1_gpu-0e33234a.ckpt) |
+| squeezenet1_0 | D910x8-G | 59.01 | 81.01 | 1.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet_1.0_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_0-e2d78c4a.ckpt) |
+| squeezenet1_0 | GPUx8-G | 58.83 | 81.08 | 1.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet_1.0_gpu.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_0_gpu-685f5941.ckpt) |
+| squeezenet1_1 | D910x8-G | 58.44 | 80.84 | 1.24 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet_1.1_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_1-da256d3a.ckpt) |
+| squeezenet1_1 | GPUx8-G | 59.18 | 81.41 | 1.24 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/squeezenet/squeezenet_1.1_gpu.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/squeezenet/squeezenet1_1_gpu-0e33234a.ckpt) |
| swin_tiny | D910x8-G | 80.82 | 94.80 | 33.38 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/swintransformer/swin_tiny_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/swin/swin_tiny-0ff2f96d.ckpt) |
| swinv2_tiny_window8 | D910x8-G | 81.42 | 95.43 | 28.78 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/swinv2/swinv2_tiny_window8-3ef8b787.ckpt) |
| vgg11 | D910x8-G | 71.86 | 90.50 | 132.86 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vgg/vgg11_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vgg/vgg11-ef31d161.ckpt) |
@@ -147,12 +149,12 @@
| visformer_tiny_v2 | D910x8-G | 78.82 | 94.41 | 9.38 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/visformer/visformer_tiny_v2_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/visformer/visformer_tiny_v2-6711a758.ckpt) |
| visformer_small | D910x8-G | 81.76 | 95.88 | 40.25 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/visformer/visformer_small_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/visformer/visformer_small-6c83b6db.ckpt) |
| visformer_small_v2 | D910x8-G | 82.17 | 95.90 | 23.52 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/visformer/visformer_small_v2_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/visformer/visformer_small_v2-63674ade.ckpt) |
-| vit_b_32_224 | D910x8-G | 75.86 | 92.08 | 87.46 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vit/vit_b_32_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vit/vit_b_32_224-7553218f.ckpt) |
-| vit_l_16_224 | D910x8-G | 76.34 | 92.79 | 303.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vit/vit_l_16_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vit/vit_l_16_224-f02b2487.ckpt) |
-| vit_l_32_224 | D910x8-G | 73.71 | 90.92 | 305.52 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vit/vit_b_32_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vit/vit_l_32_224-3a961018.ckpt) |
+| vit_b_32_224 | D910x8-G | 75.86 | 92.08 | 87.46 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vit/vit_b32_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vit/vit_b_32_224-7553218f.ckpt) |
+| vit_l_16_224 | D910x8-G | 76.34 | 92.79 | 303.31 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vit/vit_l16_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vit/vit_l_16_224-f02b2487.ckpt) |
+| vit_l_32_224 | D910x8-G | 73.71 | 90.92 | 305.52 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/vit/vit_l32_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/vit/vit_l_32_224-3a961018.ckpt) |
| volo_d1 | D910x8-G | 82.59 | 95.99 | 27 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/volo/volo_d1_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/volo/volo_d1-c7efada9.ckpt) |
| xception | D910x8-G | 79.01 | 94.25 | 22.91 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/xception/xception_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/xception/xception-2c1e711df.ckpt) |
-| xcit_tiny_12_p16_224 | D910x8-G | 77.67 | 93.79 | 7.00 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/xcit/xcit_tiny_12_p16_224_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/xcit/xcit_tiny_12_p16_224-1b1c9301.ckpt) |
+| xcit_tiny_12_p16_224 | D910x8-G | 77.67 | 93.79 | 7.00 | [yaml](https://github.com/mindspore-lab/mindcv/blob/main/configs/xcit/xcit_tiny_12_p16_ascend.yaml) | [weights](https://download.mindspore.cn/toolkits/mindcv/xcit/xcit_tiny_12_p16_224-1b1c9301.ckpt) |
#### Notes
- Context: Training context denoted as {device}x{pieces}-{MS mode}, where mindspore mode can be G - graph mode or F - pynative mode with ms function. For example, D910x8-G is for training on 8 pieces of Ascend 910 NPU using graph mode.
diff --git a/docs/en/how_to_guides/finetune_with_a_custom_dataset.md b/docs/en/how_to_guides/finetune_with_a_custom_dataset.md
new file mode 100644
index 000000000..cfc4e88ee
--- /dev/null
+++ b/docs/en/how_to_guides/finetune_with_a_custom_dataset.md
@@ -0,0 +1,359 @@
+# Fine-tune with A Custom Dataset
+
+This document introduces the process for fine-tuning a custom dataset in MindCV and the implementation of fine-tuning techniques such as reading the dataset online, setting the learning rate for specific layers, freezing part of the parameters, etc. The main code is in./example/finetune.py, you can make changes to it based on this tutorial as needed.
+
+Next, we will use the FGVC-Aircraft dataset as an example to show how to fine-tune the pre-trained model mobilenet v3-small. [Fine-Grained Visual Classification of Aircraft](https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/) is a commonly used fine-grained image Classification benchmark dataset, which contains 10,000 aircraft images from 100 different types of aircraft (a.k.a variants), that is, 100 images for each aircraft type.
+
+First, extract the downloaded dataset to . /data folder, the directory structure of the Aircraft dataset is:
+
+```text
+aircraft
+└── data
+ ├── images
+ │ ├── image1.jpg
+ │ ├── image2.jpg
+ │ └── ....
+ ├── images_variant_test.txt
+ ├── images_variant_trainval.txt
+ └── ....
+```
+
+The folder "images" contains all the 10,000 images, and the airplane types and subset names of each image are recorded in images_variant_*.txt. When this dataset is used for fine-tuning, the training set is usually set by annotation file: images_variant_trainval.txt. Hence, the training set should contain 6667 images and the test set should contain 3333 images after the dataset has been split.
+
+## Data Preprocessing
+
+### Read Custom Dataset
+
+For custom datasets, you can either organize the dataset file directory locally into a tree structure similar to ImageNet, and then use the function `create_dataset` to read the dataset (offline way), or if your dataset is medium-scale or above, which is not suitable to use offline way, you can also directly [read all the images into a mappable or iterable object](https://www.mindspore.cn/tutorials/en/r2.1/beginner/dataset.html#customizing-dataset), replacing the file splitting and the `create_dataset` steps (online way).
+
+#### Read Dataset Offline
+
+The function ` create_dataset ` uses [`mindspore.Dataset.ImageFolderDataset`](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/dataset/mindspore.dataset.ImageFolderDataset.html#mindspore.dataset.ImageFolderDataset) function to build a dataset object, all images in the same folder will be assigned a same label, which is the folder name. Therefore, the prerequisite for using this function is that the file directory of the source dataset should follow the following tree structure:
+
+```text
+DATASET_NAME
+ ├── split1(e.g. train)/
+ │ ├── class1/
+ │ │ ├── 000001.jpg
+ │ │ ├── 000002.jpg
+ │ │ └── ....
+ │ └── class2/
+ │ ├── 000001.jpg
+ │ ├── 000002.jpg
+ │ └── ....
+ └── split2/
+ ├── class1/
+ │ ├── 000001.jpg
+ │ ├── 000002.jpg
+ │ └── ....
+ └── class2/
+ ├── 000001.jpg
+ ├── 000002.jpg
+ └── ....
+```
+
+Next, we'll take the annotation file ./aircraft/data/images_variant_trainval.txt as an example, locally generate the file of train set ./aircraft/data/images/trainval/, which meets the request of a tree-structure directory.
+
+```python
+import shutil
+import os
+
+# only for Aircraft dataset but not a general one
+def extract_images(images_path, subset_name, annotation_file_path, copy=True):
+ # read the annotation file to get the label of each image
+ def annotations(annotation_file_path):
+ image_label = {}
+ for i in open(annotation_file_path, "r"):
+ label = " ".join(i.split(" ")[1:]).replace("\n", "").replace("/", "_")
+ if label not in image_label.keys():
+ image_label[label] = []
+ image_label[label].append(i.split(" ")[0])
+ else:
+ image_label[label].append(i.split(" ")[0])
+ return image_label
+
+ # make a new folder for subset
+ subset_path = images_path + subset_name
+ os.mkdir(subset_path)
+
+ # extract and copy/move images to the new folder
+ image_label = annotations(annotation_file_path)
+ for label in image_label.keys():
+ label_folder = subset_path + "/" + label
+ os.mkdir(label_folder)
+ for image in image_label[label]:
+ image_name = image + ".jpg"
+ if copy:
+ shutil.copy(images_path + image_name, label_folder + image_name)
+ else:
+ shutil.move(images_path + image_name, label_folder)
+
+
+images_path = "./aircraft/data/images/"
+subset_name = "trainval"
+annotation_file_path = "./aircraft/data/images_variant_trainval.txt"
+extract_images(images_path, subset_name, annotation_file_path)
+```
+
+The splitting method of the test set is the same as that of the training set. The file structure of the whole Aircraft dataset should be:
+
+```text
+aircraft
+└── data
+ └── images
+ ├── trainval
+ │ ├── 707-320
+ │ │ ├── 0056978.jpg
+ │ │ └── ....
+ │ ├── 727-200
+ │ │ ├── 0048341.jpg
+ │ │ └── ....
+ │ └── ....
+ └── test
+ ├── 707-320
+ │ ├── 0062765.jpg
+ │ └── ....
+ ├── 727-200
+ │ ├── 0061581.jpg
+ │ └── ....
+ └── ....
+```
+
+./example/finetune.py integrates the whole training pipeline, from pre-processing to the establishment and training of the model: `create_dataset` -> `create_transforms` -> `create_loader` -> `create_model` ->..., thus, the dataset with the adequate file directory structure can be sent directly to the fine-tuning script to start the subsequent processes including loading dataset and model training by running `python ./example/finetune.py --data_dir=./aircraft/data/images/` command. For custom datasets, please note that the dataset parameter in the configuration file must be set to an empty string `""` in advance.
+
+#### Read Dataset Online
+
+Offline data reading takes up extra local disk space to store the newly generated data files. Therefore, when the local storage space is insufficient or the data cannot be backed up to the local environment, the local data files cannot be read using `create_dataset` directly, you can write a function to read the dataset in an online way.
+
+Here's how we generate a random-accessible dataset object that stores the images of the training set and realize a map from indices to data samples:
+
+- First, we define a class `ImageClsDataset` to read the raw data and transform them into a random-accessible dataset:
+
+ - In the initialization function `__init__()`, the annotation file path such as ./aircraft/data/images_variant_trainval.txt is taken as input, and used to generate a dictionary `self.annotation` that stores the one-to-one correspondence between images and tags;
+ - Since `create_loader` will perform a map operation on this iterated object, which does not support string format labels, it is also necessary to generate `self.label2id` to convert the string format label in `self.annotation` to integer type;
+ - Based on the information stored in `self.annotation`, we next read each image in the training set as a one-dimensional array from the folder ./aircraft/data/images/ (the image data must be read as an one-dimensional array due to map operation restrictions in `create_loader`). The image information and label are stored in `self._data` and `self._label` respectively.
+ - Next, the mappable object is constructed using the `__getitem__` function.
+- After writing the ImageClsDataset class, we can pass it the path of the annotation file to instantiate it, and load it as a dataset that can be read by the model through [`mindspore.dataset.GeneratorDataset`](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/dataset/mindspore.dataset.GeneratorDataset.html#mindspore.dataset.GeneratorDataset). Note that the parameter `column_names` must be set to be ["image", "label"] for subsequent reading by other functions. What we've got now is supposed to be the same as what's generated by `create_dataset`.
+
+
+```python
+import numpy as np
+from mindspore.dataset import GeneratorDataset
+
+
+class ImageClsDataset:
+ def __init__(self, annotation_dir, images_dir):
+ # Read annotations
+ self.annotation = {}
+ for i in open(annotation_dir, "r"):
+ image_label = i.replace("\n", "").replace("/", "_").split(" ")
+ image = image_label[0] + ".jpg"
+ label = " ".join(image_label[1:])
+ self.annotation[image] = label
+
+ # Transfer string-type label to int-type label
+ self.label2id = {}
+ labels = sorted(list(set(self.annotation.values())))
+ for i in labels:
+ self.label2id[i] = labels.index(i)
+
+ for image, label in self.annotation.items():
+ self.annotation[image] = self.label2id[label]
+
+ # Read image-labels as mappable object
+ images = dict.fromkeys(self.label2id.values(), [])
+ for image, label in self.annotation.items():
+ read_image = np.fromfile(images_dir + image, dtype=np.uint8)
+ images[label].append(read_image)
+
+ self._data = sum(list(images.values()), [])
+ self._label = sum([[i] * len(images[i]) for i in images.keys()], [])
+
+ # make class ImageClsDataset a mappable object
+ def __getitem__(self, index):
+ return self._data[index], self._label[index]
+
+ def __len__(self):
+ return len(self._data)
+
+annotation_dir = "./aircraft/data/images_variant_trainval.txt"
+images_dir = "./aircraft/data/iamges/"
+dataset = ImageClsDataset(annotation_dir)
+ataset_train = GeneratorDataset(source=dataset, column_names=["image", "label"], shuffle=True)
+```
+
+Compared with the offline way, the online way skipped the step of splitting the data file locally and reading the local file with the `create_dataset` function. So in the subsequent training, simply **replace the part of finetune.py that uses `create_dataset` with the above code**, then you can start training by running finetune.py directly as what you do after reading the dataset offline.
+
+### Augmentation and Batching
+
+MindCV uses the `create_loader` function to perform data augmentation and batching for the dataset read in the previous chapter. Augmentation strategies are defined in advance by the `create_transforms` function. Batching is set by the parameter `batch_size` in the `create_loader` function. All hyperparameters mentioned above can be passed through the model configuration file. Hyper-parameters' specific usage see the [API documentation](https://mindspore-lab.github.io/mindcv/zh/reference/data/).
+
+For small-size custom datasets, it is suggested that data augmentation can be used to the training set to enhance the generalization of the model and prevent overfitting. For the dataset of fine-grained image classification tasks, such as the Aircraft dataset in this tutorial, the classification effect may be not that ideal due to the large variance within the data class, the image size can be set larger by adjusting the hyper-parameter `image_resize` (such as 448, 512, 600, etc.).
+
+## Fine-tuning
+
+Referring to [Stanford University CS231n](https://cs231n.github.io/transfer-learning/#tf), **fine-tuning all the parameters**, **freezing feature network**, and **setting learning rates for specific layers** are commonly used fine-tuning skills. The first one uses pre-trained weights to initialize the parameters of the target model, and then updates all parameters based on the new dataset, so it's usually time-consuming but will get a high precision. Freezing feature networks are divided into freezing all feature networks(linear probe) and freezing partial feature networks. The former uses the pre-trained model as a feature extractor and only updates the parameters of the full connection layer, which takes a short time but has low accuracy; The latter generally freezes the parameters of shallow layers, which only learn the basic features of images, and only updates the parameters of the deep network and the full connection layer. Setting learning rate for specific layers is similar but more elaborate, it specifies the learning rates used by certain layers during training.
+
+For hyper-parameters used in fine-tuning training, you can refer to the configuration file used when pre-training on the ImageNet-1k dataset in ./configs. Note that for fine-tuning, the hyper-parameter `pretrained` should be set to be `True` to load the pre-training weight, `num_classes` should be set to be the number of labels of the custom dataset (e.g. 100 for the Aircraft dataset here), moreover, don't forget to reduce batch_size and epoch_size based on the size of the custom dataset. In addition, since the pre-trained weight already contains a lot of information for identifying images, in order not to destroy this information too much, it is also necessary to reduce the learning rate `lr` , and it is also recommended to start training and adjust from at most one-tenth of the pre-trained learning rate or 0.0001. These parameters can be modified in the configuration file or added in the shell command as shown below. The training results can be viewed in the file ./ckpt/results.txt.
+
+```bash
+python .examples/finetune/finetune.py --config=./configs/mobilenetv3/mobilnet_v3_small_ascend.yaml --data_dir=./aircraft/data --pretrained=True
+```
+
+When fine-tuning mobilenet v3-small based on Aircraft dataset, this tutorial mainly made the following changes to the hyper-parameters:
+
+| Hyper-parameter | Pretrain | Fine-tune |
+| --------------- | ---------- | -------------------- |
+| dataset | "imagenet" | "" |
+| batch_size | 75 | 8 |
+| image_resize | 224 | 600 |
+| auto_augment | - | "randaug-m7-mstd0.5" |
+| num_classes | 1000 | 100 |
+| pretrained | False | True |
+| epoch_size | 470 | 50 |
+| lr | 0.77 | 0.002 |
+
+### Fine-tuning All the Parameters
+
+Since the progress of this type of fine-tuning is the same as training from scratch, simply **start the training by running finetune.py** and adjust the parameters as training from scratch.
+
+### Freeze Feature Network
+
+#### Linear Probe
+
+We prevent parameters from updating by setting `requires_grad=False` for all parameters except those in the full connection layer. In finetune.py, add the following code after `create_model` :
+
+```python
+from mindcv.models.registry import _model_pretrained_cfgs
+
+# ...create_model()
+
+# number of parameters to be updated
+num_params = 2
+
+# read names of parameters in FC layer
+classifier_names = [_model_pretrained_cfgs[args.model]["classifier"] + ".weight",
+ _model_pretrained_cfgs[args.model]["classifier"] + ".bias"]
+
+# prevent parameters in network(except the classifier) from updating
+for param in network.trainable_params():
+ if param.name not in classifier_names:
+ param.requires_grad = False
+```
+
+#### Freeze Part of the Feature Network
+
+To balance the speed and precision of fine-tuning, we can also fix some target network parameters and train the parameters in the deep layer only. It is necessary to extract the parameter names in those layers to be frozen and slightly modify the code in the last chapter. By printing the result of `create_model` -- `network`, we can see that in MindCV, each layer of the network of mobilenet v3-small is named with `features.*`. Suppose that we freeze only the first 7 layers of the network, add the following code after `create_model`:
+
+```python
+# ...create_model()
+
+# read names of network layers
+freeze_layer=["features."+str(i) for i in range(7)]
+
+# prevent parameters in the first 7 layers of the network from updating
+for param in network.trainable_params():
+ for layer in freeze_layer:
+ if layer in param.name:
+ param.requires_grad = False
+```
+
+### Set Learning Rate for Specific Layers
+
+To further improve the training accuracy of a fine-tuned model, we can set different learning rates for different layers in the network. This is because the shallow part of the network generally recognizes common contours or features, so even if parameters in this part will be updated, the learning rate should be set relatively small; The deep part generally recognizes the detailed personal characteristics of an object, so the learning rate can be set relatively large; Compared with the feature network that needs to retain the pre-training information as much as possible, the classifier needs to be trained from the beginning, hence the learning rate can be appropriately increased. Since this operation is elaborate, we need to enter finetune.py to specify the parameter names of specific layers and the corresponding learning rates.
+
+MindCV uses [` create_optimizer `](https://mindspore-lab.github.io/mindcv/zh/reference/optim/#mindcv.optim.optim_factory.create_optimizer) to generate the optimizer and passes the learning rate to the optimizer. To set the tiered learning rate, simply **change the `params` parameter of `create_optimizer` function in finetune.py from `network.trainable_params()` to a list containing the names of the specific parameters and the corresponding learning rate**, which you can refer to the [API documentation of optimizers](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/mindspore.nn.html#%E4%BC%98%E5%8C%96%E5%99%A8). The specific structure of the network and the parameter names in each layer can be viewed by printing the result of `create_model` -- `network`.
+
+> Tips: You can also use the same operation to set different weight_decay for parameters.
+
+#### Set Learning Rate for Classifier
+
+Taking mobilenet v3-small as an example, the model classifier name starts with "classifier", so if we only increase the learning rate of the classifier, we need to specify it at each step of training. `lr_scheduler` is a learning rate list generated by `create_scheduler`, which contains the learning rate at each step of training. Suppose we adjust the learning rate of the classifier to 1.2 times that on the feature network. The changes to the finetune.py code are as follows:
+
+```python
+# ...
+
+
+# Note: a)the params-lr dict must contain all the parameters. b)Also, you're recommended to set a dict with a key "order_params" to make sure the parameters will be updated in the right order.
+params_lr_group = [{"params": list(filter(lambda x: 'classifier' in x.name, network.trainable_params())),
+ "lr": [i*1.2 for i in lr_scheduler]},
+ {"params": list(filter(lambda x: 'classifier' not in x.name, network.trainable_params())),
+ "lr": lr_scheduler},
+ {"order_params": network.trainable_params()}]
+
+optimizer = create_optimizer(params_lr_group,
+ opt=args.opt,
+ lr=lr_scheduler,
+ ...)
+```
+
+#### Set Learning Rate for Any Layers in Feature Network
+
+Similar to adjusting the learning rate of a classifier alone, setting the learning rate of layers in feature network requires a list specifying the learning rate for each layer. Assuming that we only increase the learning rate of the last three layers of the feature network (with prefix features.13, features.14, features.15), the code for creating the optimizer in finetune.py will be changed as follows:
+
+```python
+# ...
+
+
+# Note: a)the params-lr dict must contain all the parameters. b)Also, you're recommended to set a dict with a key "order_params" to make sure the parameters will be updated in the right order.
+params_lr_group = [{"params": list(filter(lambda x: 'features.13' in x.name, network.trainable_params())),
+ "lr": [i * 1.05 for i in lr_scheduler]},
+ {"params": list(filter(lambda x: 'features.14' in x.name, network.trainable_params())),
+ "lr": [i * 1.1 for i in lr_scheduler]},
+ {"params": list(filter(lambda x: 'features.15' in x.name, network.trainable_params())),
+ "lr": [i * 1.15 for i in lr_scheduler]},
+ {"params": list(filter(
+ lambda x: ".".join(x.name.split(".")[:2]) not in ["features.13", "features.14", "features.15"],
+ network.trainable_params())),
+ "lr": lr_scheduler},
+ {"order_params": network.trainable_params()}]
+
+optimizer = create_optimizer(params_lr_group,
+ opt=args.opt,
+ lr=lr_scheduler,
+ ...)
+```
+
+
+## Evaluation
+
+After training, use the model weights stored in `*_best-ckpt` format in the./ckpt folder to evaluate the performance of the network on the test set. Just **run validate.py** and pass the file path of the model configuration file as well as the model weight to it:
+
+```bash
+python validate.py --config=./configs/mobilenetv3/mobilnet_v3_small_ascend.yaml --data_dir=./aircraft/data --ckpt_path=./ckpt/mobilenet_v3_small_100_best.ckpt
+```
+
+The following table summarizes the Top-1 accuracy of the fine-tuned mobilenet v3-small on the Aircraft dataset with the same training configuration but different fine-tuning skills:
+
+| Network | Freeze All the Feature Work | Freeze Shallow Part of Feature Network | Full Fine-tuning | Full Fine-tuning with Increasing Learning Rate of Classifier | Full Fine-tuning with Increasing Learning Rate of Deep Layers |
+| ------------------ | --------------------------- | -------------------------------------- | ---------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| mobilenet v3-small | 48.66% | 76.83% | 88.35% | 88.89% | 88.68% |
+
+## Prediction
+
+Refer to this section of the MindCV fine-tuning tutorial: [visual model reasoning results](https://mindspore-lab.github.io/mindcv/zh/tutorials/finetune/#12), or add the following code in validate.py to generate a text file ./ckpt/pred.txt that stores the true and predicted labels of the test set:
+
+```python
+# ... after model.eval()
+
+# predited label
+pred = np.argmax(model.predict(images).asnumpy(), axis=1)
+
+# real label
+images, labels = next(loader_eval.create_tuple_iterator())
+
+# write pred.txt
+prediction = np.array([pred, labels]).transpose()
+np.savetxt("./ckpt/pred.txt", prediction, fmt="%s", header="pred \t real")
+```
+
+## Appendix
+
+The following table shows the Top-1 accuracy (%) of full-model fine-tuning on the Aircraft dataset on several CNNs. For the classification accuracy that can be achieved on this dataset, see [Aircraft Leaderboard](https://fgvc.org/leaderboard/Aircraft.html) and [Paper With Code](https://paperswithcode.com/sota/fine-grained-image-classification-on-fgvc).
+
+| Network | Full Fine-tuning Accuracy with Mindcv | Accuracy in Papers |
+| ------------------ | ------------------------------------- | ------------------------------------------------------------ |
+| mobilenet v3-small | 88.35% | - |
+| mobilenet v3-large | 92.22% | [83.8%](https://opus.lib.uts.edu.au/handle/10453/156186) |
+| convnext-tiny | 93.69% | [84.23%](http://ise.thss.tsinghua.edu.cn/~mlong/doc/hub-pathway-transfer-learning-nips22.pdf) |
+| resnest50 | 86.82% | - |
diff --git a/docs/zh/how_to_guides/finetune_with_a_custom_dataset.md b/docs/zh/how_to_guides/finetune_with_a_custom_dataset.md
new file mode 100644
index 000000000..f76b29489
--- /dev/null
+++ b/docs/zh/how_to_guides/finetune_with_a_custom_dataset.md
@@ -0,0 +1,365 @@
+# 自定义数据集的模型微调指南
+
+本文档提供了使用MindCV在自定义数据集上微调的参考流程以及在线读取数据集、分层设置学习率、冻结部分特征网络等微调技巧的实现方法,主要代码实现集成在./example/finetune.py中,您可以基于此教程根据需要自行改动。
+
+接下来将以FGVC-Aircraft数据集为例展示如何对预训练模型mobilenet v3-small进行微调。[Fine-Grained Visual Classification of Aircraft](https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/)是常用的细粒度图像分类基准数据集,包含 10000 张飞机图片,100 种不同的飞机型号(variant),其中每种飞机型号均有 100 张图片。
+
+首先将下载后的数据集解压到./data文件夹下,Aircraft数据集的目录为:
+
+```text
+aircraft
+└── data
+ ├── images
+ │ ├── image1.jpg
+ │ ├── image2.jpg
+ │ └── ....
+ ├── images_variant_test.txt
+ ├── images_variant_trainval.txt
+ └── ....
+```
+
+其中images文件夹包含全部10000张图片,每张图片所属的飞机型号和子集由images_variant_*.txt标注。在模型微调阶段,训练集一般由images_variant_trainval.txt 确定。经过拆分后,训练集应当包含6667张图片,测试集包含3333张图片。
+
+## 数据预处理
+
+### 读取数据集
+
+对于自定义数据集而言,既可以先在本地将数据文件目录整理成与ImageNet类似的树状结构,再使用`create_dataset`读取数据集(离线方式,仅适用于小型数据集),又可以直接[将原始数据集读取成可迭代/可映射对象]((https://www.mindspore.cn/tutorials/en/r2.1/beginner/dataset.html#customizing-dataset)),替代文件拆分与`create_dataset`步骤(在线方式)。
+
+#### 离线方式
+
+MindCV的`create_dataset`接口使用[`mindspore.dataset.ImageFolderDataset`](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/dataset/mindspore.dataset.ImageFolderDataset.html#mindspore.dataset.ImageFolderDataset)函数构建数据对象,同一个文件夹内的所有图片将会根据文件夹名字被分配相同的标签。因此,使用该流程的前提条件是源数据集的文件目录应当遵循如下树状结构:
+
+```text
+DATASET_NAME
+ ├── split1(e.g. train)/
+ │ ├── class1/
+ │ │ ├── 000001.jpg
+ │ │ ├── 000002.jpg
+ │ │ └── ....
+ │ └── class2/
+ │ ├── 000001.jpg
+ │ ├── 000002.jpg
+ │ └── ....
+ └── split2/
+ ├── class1/
+ │ ├── 000001.jpg
+ │ ├── 000002.jpg
+ │ └── ....
+ └── class2/
+ ├── 000001.jpg
+ ├── 000002.jpg
+ └── ....
+```
+
+接下来以说明文件./aircraft/data/images_variant_trainval.txt 为例,在本地生成满足前述树状结构的训练集文件 ./aircraft/data/images/trainval/。
+
+```python
+import shutil
+import os
+
+# only for Aircraft dataset but not a general one
+def extract_images(images_path, subset_name, annotation_file_path, copy=True):
+ # read the annotation file to get the label of each image
+ def annotations(annotation_file_path):
+ image_label = {}
+ for i in open(annotation_file_path, "r"):
+ label = " ".join(i.split(" ")[1:]).replace("\n", "").replace("/", "_")
+ if label not in image_label.keys():
+ image_label[label] = []
+ image_label[label].append(i.split(" ")[0])
+ else:
+ image_label[label].append(i.split(" ")[0])
+ return image_label
+
+ # make a new folder for subset
+ subset_path = images_path + subset_name
+ os.mkdir(subset_path)
+
+ # extract and copy/move images to the new folder
+ image_label = annotations(annotation_file_path)
+ for label in image_label.keys():
+ label_folder = subset_path + "/" + label
+ os.mkdir(label_folder)
+ for image in image_label[label]:
+ image_name = image + ".jpg"
+ if copy:
+ shutil.copy(images_path + image_name, label_folder + image_name)
+ else:
+ shutil.move(images_path + image_name, label_folder)
+
+
+images_path = "./aircraft/data/images/"
+subset_name = "trainval"
+annotation_file_path = "./aircraft/data/images_variant_trainval.txt"
+extract_images(images_path, subset_name, annotation_file_path)
+```
+
+测试集的拆分方式与训练集一致,整理完成的Aircraft数据集文件结构应为:
+
+```text
+aircraft
+└── data
+ └── images
+ ├── trainval
+ │ ├── 707-320
+ │ │ ├── 0056978.jpg
+ │ │ └── ....
+ │ ├── 727-200
+ │ │ ├── 0048341.jpg
+ │ │ └── ....
+ │ └── ....
+ └── test
+ ├── 707-320
+ │ ├── 0062765.jpg
+ │ └── ....
+ ├── 727-200
+ │ ├── 0061581.jpg
+ │ └── ....
+ └── ....
+```
+
+由于模型微调文件./example/finetune.py中集成了`create_dataset`->`create_transforms`->`create_loader`->`create_model`->...等所有从预处理到建立、验证模型的训练流程,使用离线方式整理完文件目录结构的数据集可以**直接通过运行`python ./example/finetune.py`命令完成后续读取数据与训练模型**这一整套操作。对于自定义数据集而言,还需注意提前将配置文件中的`dataset`参数设置为空字符串`""`。
+
+#### 在线方式
+
+离线方式的数据读取会在本地占用额外的磁盘空间存储新生成的数据文件,因此在本地存储空间不足或无法将数据备份到本地等其他特殊情况下,无法直接使用`create_dataset`接口读取本地数据文件时,可以采用在线方式自行编写函数读取数据集。
+
+以生成储存训练集图片和索引到图片样本映射的可随机访问数据集为例:
+
+- 首先定义一个读取原始数据并将其转换成可随机访问的数据集对象`ImageClsDataset`:
+
+ - 在该类的初始化函数`__init__()`中,以./aircraft/data/images_variant_trainval.txt为例的标注文件的文件路径将被当做输入,用于生成储存图片与标签一一对应关系的字典`self.annotation`;
+
+ - 由于在`create_loader`中将会对此对象进行map操作,而该操作不支持字符串格式的标签,因此还需要生成`self.label2id`并将`self.annotation`中字符串格式的标签转换成整数格式;
+
+ - 根据`self.annotation`中储存的信息,从文件夹./aircraft/data/images/中将训练集图片读取成一维数组形式(由于`create_loader`中map操作限制,此处图片数据必须被读取为一维格式),并将图片信息与标签分别存放到`self._data`与`self._label`中;
+
+ - 接下来使用`__getitem__`方法构造可随机访问的数据集对象。
+
+-
+ 构造完`ImageClsDataset`类之后,向其传入标注文件的路径以实例化该类,并通过[`mindspore.dataset.GeneratorDataset`](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/dataset/mindspore.dataset.GeneratorDataset.html#mindspore.dataset.GeneratorDataset)函数将该可映射对象加载成数据集即可,注意该函数的参数`column_names`必须被设置为["image", "label"]以便后续其他接口读取数据,此时得到的`dataset_train`应当与通过`create_dataset`读取的训练集完全一致。
+
+
+```python
+import numpy as np
+from mindspore.dataset import GeneratorDataset
+
+
+class ImageClsDataset:
+ def __init__(self, annotation_dir, images_dir):
+ # Read annotations
+ self.annotation = {}
+ for i in open(annotation_dir, "r"):
+ image_label = i.replace("\n", "").replace("/", "_").split(" ")
+ image = image_label[0] + ".jpg"
+ label = " ".join(image_label[1:])
+ self.annotation[image] = label
+
+ # Transfer string-type label to int-type label
+ self.label2id = {}
+ labels = sorted(list(set(self.annotation.values())))
+ for i in labels:
+ self.label2id[i] = labels.index(i)
+
+ for image, label in self.annotation.items():
+ self.annotation[image] = self.label2id[label]
+
+ # Read image-labels as mappable object
+ images = dict.fromkeys(self.label2id.values(), [])
+ for image, label in self.annotation.items():
+ read_image = np.fromfile(images_dir + image, dtype=np.uint8)
+ images[label].append(read_image)
+
+ self._data = sum(list(images.values()), [])
+ self._label = sum([[i] * len(images[i]) for i in images.keys()], [])
+
+ # make class ImageClsDataset a mappable object
+ def __getitem__(self, index):
+ return self._data[index], self._label[index]
+
+ def __len__(self):
+ return len(self._data)
+
+
+annotation_dir = "./aircraft/data/images_variant_trainval.txt"
+images_dir = "./aircraft/data/iamges/"
+dataset = ImageClsDataset(annotation_dir)
+dataset_train = GeneratorDataset(source=dataset, column_names=["image", "label"], shuffle=True)
+
+```
+
+与离线方式读取数据集相比,在线读取方式省略了在本地拆分数据文件并用`create_dataset`接口读取本地文件的步骤,因此在后续的训练中,只需**将finetune.py中使用`create_dataset`接口的部分替换成上述代码**,就可以与离线方式一样,直接运行finetune.py开始训练。
+
+### 数据增强与分批
+
+MindCV使用`create_loader`函数对上一章节读取的数据集进行图像增强与分批处理,图像增强策略通过`create_transforms`函数事先定义,分批处理操作通过`create_loader`函数中的参数`batch_size`定义,以上涉及到的**所有超参数均可以通过模型配置文件传递**,超参数具体使用方法见[API说明](https://mindspore-lab.github.io/mindcv/zh/reference/data/)。
+
+对于规模较小的自定义数据集,建议可以在这一部分对训练集做额外的数据增强处理,以增强模型的泛化性,防止过拟合。对于细粒度图像分类任务的数据集,比如本文中的Aircraft数据集,由于数据类内方差较大可能导致分类效果较差,还可以通过调整超参数`image_resize`适当增大图片尺寸(如:448、512、600等等)。
+
+## 模型微调
+
+参考[Stanford University CS231n](https://cs231n.github.io/transfer-learning/#tf),**整体微调**、**冻结特征网络微调**、与**分层设置学习率微调**是常用的微调模式。模型的整体微调使用预训练权重初始化目标模型的参数并在此基础上针对新数据集继续训练、更新所有参数,因此计算量较大,耗时较长但一般精度较高;冻结特征网络则分为冻结所有特征网络与冻结部分特征网络两种,前者将预训练模型作为特征提取器,仅更新全连接层参数,耗时短但精度低,后者一般固定学习基础特征的浅层参数,只更新学习精细特征的深层网络参数与全连接层参数;分层设置学习率与之相似,但是更加精细地指定了网络内部某些特定层在训练中更新参数所使用的学习率。
+
+对于实际微调训练中所使用的的超参数配置,可以参考./configs中基于ImageNet-1k数据集预训练的配置文件。注意对模型微调而言,应事先将超参数`pretrained`设置为`True`以加载预训练权重,将`num_classes`设置为自定义数据集的标签个数(比如Aircfrat数据集是100),还可以基于自定义数据集规模,适当调小`batch_size`与`epoch_size`。此外,由于预训练权重中已经包含了许多识别图像的初始信息,为了不过分破坏这些信息,还需将学习率`lr`调小,建议至多从预训练学习率的十分之一或0.0001开始训练、调参。这些参数都可以在配置文件中修改,也可以如下所示在shell命令中添加,训练结果可在./ckpt/results.txt文件中查看。
+
+```bash
+python .examples/finetune/finetune.py --config=./configs/mobilenetv3/mobilnet_v3_small_ascend.yaml --data_dir=./aircraft/data --pretrained=True
+```
+
+本文在基于Aircraft数据集对mobilenet v3-small微调时主要对超参数做了如下改动:
+
+| Hyper-parameter | Pretrain | Fine-tune |
+| --------------- |------------|----------------------|
+| dataset | "imagenet" | "" |
+| batch_size | 75 | 8 |
+| image_resize | 224 | 600 |
+| auto_augment | - | "randaug-m7-mstd0.5" |
+| num_classes | 1000 | 100 |
+| pretrained | False | True |
+| epoch_size | 470 | 50 |
+| lr | 0.77 | 0.002 |
+
+### 整体微调
+
+由于整体微调的训练流程与从头训练一致,因此只需通过**运行finetune.py启动训练**并跟从头训练一样调参即可。
+
+### 冻结特征网络
+
+#### 冻结所有特征网络
+
+我们通过对除全连接层外的所有参数设置`requires_grad=False`来防止其参数更新。在finetune.py中,只需在创建模型`create_model`之后加入如下代码即可实现:
+
+```python
+from mindcv.models.registry import _model_pretrained_cfgs
+
+# ...create_model()
+
+# number of parameters to be updated
+num_params = 2
+
+# read names of parameters in FC layer
+classifier_names = [_model_pretrained_cfgs[args.model]["classifier"] + ".weight",
+ _model_pretrained_cfgs[args.model]["classifier"] + ".bias"]
+
+# prevent parameters in network(except the classifier) from updating
+for param in network.trainable_params():
+ if param.name not in classifier_names:
+ param.requires_grad = False
+```
+
+#### 冻结部分特征网络
+
+为了平衡微调训练的速度和精度,我们还可以固定部分目标网络参数,有针对性地训练网络中的深层参数。实现这一操作只需要提取出要冻结的层中的参数名称,并在上述冻结所有特征网络的代码基础上稍作修改即可。通过打印`create_model`的结果——`network`可知,MindCV中对mobilenet v3-small的每层网络命名为`"features.*"`,假设我们仅冻结网络前7层,在finetune.py中创建模型`create_model`后加入如下代码即可:
+
+```python
+# ...create_model()
+
+# read names of network layers
+freeze_layer=["features."+str(i) for i in range(7)]
+
+# prevent parameters in first 7 layers of network from updating
+for param in network.trainable_params():
+ for layer in freeze_layer:
+ if layer in param.name:
+ param.requires_grad = False
+```
+
+### 分层设置学习率
+
+为了进一步提升微调网络的训练效果,还可以分层设置训练中的学习率。这是由于浅层网络一般是识别通用的轮廓特征,所以即便重新更新该部分参数,学习率也应该被设置得比较小;深层部分一般识别物体精细的个性特征,学习率也因此可以设置得比较大;而相对于需要尽量保留预训练信息的特征网络而言,分类器需要从头开始训练,也可以适当将学习率调大。由于针对特定网络层的学习率调整操作比较精细,我们需要进入finetune.py中自行指定参数名与对应的学习率。
+
+MindCV使用[`create_optimizer`](https://mindspore-lab.github.io/mindcv/zh/reference/optim/#mindcv.optim.optim_factory.create_optimizer)函数构造优化器,并将学习率传到优化器中去。要设置分层学习率,只需**将finetune.py中`create_optimizer`函数的`params`参数从`network.trainable_params()`改为包含特定层参数名与对应学习率的列表即可**,参考[MindSpore各优化器说明文档](https://www.mindspore.cn/docs/zh-CN/r2.0/api_python/mindspore.nn.html#%E4%BC%98%E5%8C%96%E5%99%A8),其中网络具体结构与每层中的参数名均可以通过打印`create_model`的结果——`network`查看。
+> Tips: 您还可以使用同样的操作分层设置weight_decay.
+
+#### 单独调整分类器的学习率
+
+以mobilenet v3-small为例,该模型分类器名称以“classifier”开头,因此如果仅调大分类器的学习率,我们需要指定分类器在每一步训练中的学习率。`lr_scheduler`是由`create_scheduler`生成的学习率调整策略,是一个包含网络每步训练中具体学习率值的列表,假设我们将分类器的学习率调整至特征网络学习率的1.2倍,finetune.py中创建优化器部分代码的改动如下:
+
+```python
+# ...
+
+
+# Note: a)the params-lr dict must contain all the parameters. b)Also, you're recommended to set a dict with a key "order_params" to make sure the parameters will be updated in a right order.
+params_lr_group = [{"params": list(filter(lambda x: 'classifier' in x.name, network.trainable_params())),
+ "lr": [i*1.2 for i in lr_scheduler]},
+ {"params": list(filter(lambda x: 'classifier' not in x.name, network.trainable_params())),
+ "lr": lr_scheduler},
+ {"order_params": network.trainable_params()}]
+
+optimizer = create_optimizer(params_lr_group,
+ opt=args.opt,
+ lr=lr_scheduler,
+ ...)
+```
+
+#### 设置特征网络任意层的学习率
+
+与单独调整分类器的学习率类似,分层设置特征网络学习率需要指定特定层的学习率变化列表。假设我们仅增大特征网络最后三层参数(features.13, features.14, features.15)更新的学习率,对finetune.py中创建优化器部分代码的改动如下:
+
+```python
+# ...
+
+
+# Note: a)the params-lr dict must contain all the parameters. b)Also, you're recommended to set a dict with a key "order_params" to make sure the parameters will be updated in a right order.
+params_lr_group = [{"params": list(filter(lambda x: 'features.13' in x.name, network.trainable_params())),
+ "lr": [i * 1.05 for i in lr_scheduler]},
+ {"params": list(filter(lambda x: 'features.14' in x.name, network.trainable_params())),
+ "lr": [i * 1.1 for i in lr_scheduler]},
+ {"params": list(filter(lambda x: 'features.15' in x.name, network.trainable_params())),
+ "lr": [i * 1.15 for i in lr_scheduler]},
+ {"params": list(filter(
+ lambda x: ".".join(x.name.split(".")[:2]) not in ["features.13", "features.14", "features.15"],
+ network.trainable_params())),
+ "lr": lr_scheduler},
+ {"order_params": network.trainable_params()}]
+
+optimizer = create_optimizer(params_lr_group,
+ opt=args.opt,
+ lr=lr_scheduler,
+ ...)
+```
+
+
+## 模型评估
+
+训练结束后,使用./ckpt文件夹中以`*_best.ckpt`格式储存的模型权重来评估模型在测试集上的最优表现,只需**直接运行validate.py**并向其传入模型配置文件路径与权重的文件路径即可:
+
+```bash
+python validate.py --config=./configs/mobilenetv3/mobilnet_v3_small_ascend.yaml --data_dir=./aircraft/data --ckpt_path=./ckpt/mobilenet_v3_small_100_best.ckpt
+```
+
+模型微调章节展示了多种微调技巧,下表总结了在使用相同训练配置不同微调方式下mobilenet v3-small模型在Aircraft数据集上的Top-1 精度表现:
+
+| 模型 | 冻结所有特征网络 | 冻结浅层特征网络 | 全量微调+固定学习率 | 全量微调+调大分类器学习率 | | 全量微调+调大深层网络学习率 |
+| ------------------ | ---------------- | ---------------- | ------------------- | ------------------------- | ---- | --------------------------- |
+| mobilenet v3-small | 48.66% | 76.83% | 88.35% | 88.89% | | 88.68% |
+
+## 模型预测
+
+参考MindCV微调教程中[可视化模型推理结果](https://mindspore-lab.github.io/mindcv/zh/tutorials/finetune/#_12)小节,或是在validate.py中加入如下代码生成储存测试集真实值与预测值的文本文件./ckpt/pred.txt:
+
+```python
+# ... after model.eval()
+
+# predited label
+pred = np.argmax(model.predict(images).asnumpy(), axis=1)
+
+# real label
+images, labels = next(loader_eval.create_tuple_iterator())
+
+# write pred.txt
+prediction = np.array([pred, labels]).transpose()
+np.savetxt("./ckpt/pred.txt", prediction, fmt="%s", header="pred \t real")
+```
+
+## 附录
+
+以下表格展示了使用MindCV在多个CNN模型上对Aircraft数据集进行全量微调的精度(Top 1%)对比信息,该数据集上可实现的分类精度参见[Aircraft leaderboard](https://fgvc.org/leaderboard/Aircraft.html)和[paperwithcode网站](https://paperswithcode.com/sota/fine-grained-image-classification-on-fgvc)。
+
+| 模型 | MindCV全量微调精度 | 参考精度 |
+| ------------------ | ------------------ | ------------------------------------------------------------ |
+| mobilenet v3-small | 88.35% | - |
+| mobilenet v3-large | 92.22% | [83.8%](https://opus.lib.uts.edu.au/handle/10453/156186) |
+| convnext-tiny | 93.69% | [84.23%](http://ise.thss.tsinghua.edu.cn/~mlong/doc/hub-pathway-transfer-learning-nips22.pdf) |
+| resnest50 | 86.82% | - |
diff --git a/examples/README.md b/examples/README.md
index 2b59e5f4d..74a5e40b6 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,13 +1,5 @@
This folder contains examples for various tasks, which users can run easily.
-### Finetune
-```
-python examples/finetune.py
-
-```
-This example shows how to finetune a pretrained model on your own dataset. You can also specifiy `--freeze_backbone` to choose whether to freeze the backbone and finetune the classifier head only.
-
-
### Single process with model training and evaluation
```
python examples/train_with_func_example.py
diff --git a/examples/finetune.py b/examples/finetune.py
deleted file mode 100644
index 1f3927bfb..000000000
--- a/examples/finetune.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import sys
-
-sys.path.append(".")
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from mindspore import LossMonitor, Model, TimeMonitor
-
-from mindcv.data import create_dataset, create_loader, create_transforms, get_dataset_download_root
-from mindcv.loss import create_loss
-from mindcv.models import create_model
-from mindcv.optim import create_optimizer
-from mindcv.utils.download import DownLoad
-
-freeze_backbone = False
-visualize = False
-
-dataset_url = (
- "https://mindspore-website.obs.cn-north-4.myhuaweicloud.com/notebook/datasets/intermediate/Canidae_data.zip"
-)
-root_dir = os.path.join(get_dataset_download_root(), "Canidae")
-data_dir = os.path.join(root_dir, "data", "Canidae") # Canidae has prefix path "data/Canidae" in unzipped file.
-if not os.path.exists(data_dir):
- DownLoad().download_and_extract_archive(dataset_url, root_dir)
-
-num_workers = 8
-
-# 加载自定义数据集
-dataset_train = create_dataset(root=data_dir, split="train", num_parallel_workers=num_workers)
-dataset_val = create_dataset(root=data_dir, split="val", num_parallel_workers=num_workers)
-
-# 定义和获取数据处理及增强操作
-trans_train = create_transforms(dataset_name="ImageNet", is_training=True)
-trans_val = create_transforms(dataset_name="ImageNet", is_training=False)
-
-loader_train = create_loader(
- dataset=dataset_train,
- batch_size=16,
- is_training=True,
- num_classes=2,
- transform=trans_train,
- num_parallel_workers=num_workers,
-)
-
-
-loader_val = create_loader(
- dataset=dataset_val,
- batch_size=5,
- is_training=True,
- num_classes=2,
- transform=trans_val,
- num_parallel_workers=num_workers,
-)
-
-images, labels = next(loader_train.create_tuple_iterator())
-# images = data["image"]
-# labels = data["label"]
-
-print("Tensor of image", images.shape)
-print("Labels:", labels)
-
-# class_name对应label,按文件夹字符串从小到大的顺序标记label
-class_name = {0: "dogs", 1: "wolves"}
-
-if visualize:
- plt.figure(figsize=(15, 7))
- for i in range(len(labels)):
- # 获取图像及其对应的label
- data_image = images[i].asnumpy()
- data_label = labels[i]
- # 处理图像供展示使用
- data_image = np.transpose(data_image, (1, 2, 0))
- mean = np.array([0.485, 0.456, 0.406])
- std = np.array([0.229, 0.224, 0.225])
- data_image = std * data_image + mean
- data_image = np.clip(data_image, 0, 1)
- # 显示图像
- plt.subplot(3, 6, i + 1)
- plt.imshow(data_image)
- plt.title(class_name[int(labels[i].asnumpy())])
- plt.axis("off")
-
- plt.show()
-
-network = create_model(model_name="densenet121", num_classes=2, pretrained=True)
-
-
-# 定义优化器和损失函数
-lr = 1e-3 if freeze_backbone else 1e-4
-opt = create_optimizer(network.trainable_params(), opt="adam", lr=lr)
-loss = create_loss(name="CE")
-
-if freeze_backbone:
- # freeze backbone
- for param in network.get_parameters():
- if param.name not in ["classifier.weight", "classifier.bias"]:
- param.requires_grad = False
-
-
-# 实例化模型
-model = Model(network, loss_fn=loss, optimizer=opt, metrics={"accuracy"})
-print("Training...")
-model.train(10, loader_train, callbacks=[LossMonitor(5), TimeMonitor(5)], dataset_sink_mode=False)
-print("Evaluating...")
-res = model.eval(loader_val)
-print(res)
diff --git a/examples/finetune/README.md b/examples/finetune/README.md
new file mode 100644
index 000000000..e2f33b2bf
--- /dev/null
+++ b/examples/finetune/README.md
@@ -0,0 +1,17 @@
+This folder contains scripts for fine-tuning on your own custom dataset, do refer to [tutorials](https://mindspore-lab.github.io/mindcv/how_to_guides/finetune_with_a_custom_dataset/) for details.
+
+### split_files.py
+```shell
+python examples/finetune/split_files.py
+```
+This file taking Aircraft dataset as an example, shows how to manually reorganize data into a tree-structure directory according to an annotation file. Note that it's only for Aircraft dataset but not a general one, you'd better check the content before running it.
+
+### read_images_online.py
+
+This is an example demonstrating how to read the raw images as well as the labels into a `GeneratorDataset` object, which is a `MappableDataset` class that can be read directly by models. You are recommended to insert this part into your data preprocessing script or training script, but not run it alone.
+
+### finetune.py
+```shell
+python examples/finetune/finetune.py --config=./configs/mobilenetv3/mobilnet_v3_small_ascend.yaml
+```
+A script for fine-tuning with some example code of fine-tuning methods in it (refer to the tutorial mentioned above).
diff --git a/examples/finetune/finetune.py b/examples/finetune/finetune.py
new file mode 100644
index 000000000..d3af1d87a
--- /dev/null
+++ b/examples/finetune/finetune.py
@@ -0,0 +1,386 @@
+""" Model training pipeline """
+import logging
+import os
+
+import mindspore as ms
+from mindspore import Tensor
+from mindspore.communication import get_group_size, get_rank, init
+
+from mindcv.data import create_dataset, create_loader, create_transforms
+from mindcv.loss import create_loss
+from mindcv.models import create_model
+from mindcv.optim import create_optimizer
+from mindcv.scheduler import create_scheduler
+from mindcv.utils import (
+ AllReduceSum,
+ StateMonitor,
+ create_trainer,
+ get_metrics,
+ require_customized_train_step,
+ set_logger,
+ set_seed,
+)
+
+from config import parse_args, save_args # isort: skip
+
+logger = logging.getLogger("mindcv.train")
+
+
+def finetune_train(args):
+ """main train function"""
+
+ ms.set_context(mode=args.mode)
+ if args.distribute:
+ init()
+ device_num = get_group_size()
+ rank_id = get_rank()
+ ms.set_auto_parallel_context(
+ device_num=device_num,
+ parallel_mode="data_parallel",
+ gradients_mean=True,
+ # we should but cannot set parameter_broadcast=True, which will cause error on gpu.
+ )
+ else:
+ device_num = None
+ rank_id = None
+
+ set_seed(args.seed)
+ set_logger(name="mindcv", output_dir=args.ckpt_save_dir, rank=0 if not rank_id else rank_id, color=False)
+ logger.info(
+ "We recommend installing `termcolor` via `pip install termcolor` "
+ "and setup logger by `set_logger(..., color=True)`"
+ )
+
+ # check directory structure of datatset (only use for offline way of reading dataset)
+ # for data_split in [args.train_split, args.val_split]:
+ # path = [i for i in os.listdir(args.data_dir + "/" + data_split + "/") if
+ # os.path.isdir(args.data_dir + "/" + data_split + "/" + i + "/")]
+ # file_num = len(path)
+ # if file_num != args.num_classes:
+ # raise ValueError("The directory structure of the custom dataset should be the same as ImageNet, "
+ # "which is, the hierarchy of root -> split -> class -> image. \n "
+ # "Please check your directory structure of dataset.")
+
+ # create dataset
+ dataset_train = create_dataset(
+ name=args.dataset,
+ root=args.data_dir,
+ split=args.train_split,
+ shuffle=args.shuffle,
+ num_samples=args.num_samples,
+ num_shards=device_num,
+ shard_id=rank_id,
+ num_parallel_workers=args.num_parallel_workers,
+ download=args.dataset_download,
+ num_aug_repeats=args.aug_repeats,
+ )
+
+ if args.num_classes is None:
+ num_classes = dataset_train.num_classes()
+ else:
+ num_classes = args.num_classes
+
+ # create transforms
+ num_aug_splits = 0
+ if args.aug_splits > 0:
+ assert args.aug_splits == 3, "Currently, only support 3 splits of augmentation"
+ assert args.auto_augment is not None, "aug_splits should be set with one auto_augment"
+ num_aug_splits = args.aug_splits
+
+ transform_list = create_transforms(
+ dataset_name=args.dataset,
+ is_training=True,
+ image_resize=args.image_resize,
+ scale=args.scale,
+ ratio=args.ratio,
+ hflip=args.hflip,
+ vflip=args.vflip,
+ color_jitter=args.color_jitter,
+ interpolation=args.interpolation,
+ auto_augment=args.auto_augment,
+ mean=args.mean,
+ std=args.std,
+ re_prob=args.re_prob,
+ re_scale=args.re_scale,
+ re_ratio=args.re_ratio,
+ re_value=args.re_value,
+ re_max_attempts=args.re_max_attempts,
+ separate=num_aug_splits > 0,
+ )
+
+ # load dataset
+ loader_train = create_loader(
+ dataset=dataset_train,
+ batch_size=args.batch_size,
+ drop_remainder=args.drop_remainder,
+ is_training=True,
+ mixup=args.mixup,
+ cutmix=args.cutmix,
+ cutmix_prob=args.cutmix_prob,
+ num_classes=num_classes,
+ transform=transform_list,
+ num_parallel_workers=args.num_parallel_workers,
+ separate=num_aug_splits > 0,
+ )
+
+ if args.val_while_train:
+ dataset_eval = create_dataset(
+ name=args.dataset,
+ root=args.data_dir,
+ split=args.val_split,
+ num_shards=device_num,
+ shard_id=rank_id,
+ num_parallel_workers=args.num_parallel_workers,
+ download=args.dataset_download,
+ )
+
+ transform_list_eval = create_transforms(
+ dataset_name=args.dataset,
+ is_training=False,
+ image_resize=args.image_resize,
+ crop_pct=args.crop_pct,
+ interpolation=args.interpolation,
+ mean=args.mean,
+ std=args.std,
+ )
+
+ loader_eval = create_loader(
+ dataset=dataset_eval,
+ batch_size=args.batch_size,
+ drop_remainder=False,
+ is_training=False,
+ transform=transform_list_eval,
+ num_parallel_workers=args.num_parallel_workers,
+ )
+ # validation dataset count
+ eval_count = dataset_eval.get_dataset_size()
+ if args.distribute:
+ all_reduce = AllReduceSum()
+ eval_count = all_reduce(Tensor(eval_count, ms.int32))
+ else:
+ loader_eval = None
+ eval_count = None
+
+ num_batches = loader_train.get_dataset_size()
+ # Train dataset count
+ train_count = dataset_train.get_dataset_size()
+ if args.distribute:
+ all_reduce = AllReduceSum()
+ train_count = all_reduce(Tensor(train_count, ms.int32))
+
+ # create model
+ network = create_model(
+ model_name=args.model,
+ num_classes=num_classes,
+ in_channels=args.in_channels,
+ drop_rate=args.drop_rate,
+ drop_path_rate=args.drop_path_rate,
+ pretrained=args.pretrained,
+ checkpoint_path=args.ckpt_path,
+ ema=args.ema,
+ )
+
+ num_params = sum([param.size for param in network.get_parameters()])
+
+ # # if you want to freeze all the feature network:
+ # from mindcv.models.registry import _model_pretrained_cfgs
+ # # number of parameters to be updated
+ # num_params = 2
+ #
+ # # read names of parameters in FC layer
+ # classifier_names = [_model_pretrained_cfgs[args.model]["classifier"] + ".weight",
+ # _model_pretrained_cfgs[args.model]["classifier"] + ".bias"]
+ #
+ # # prevent parameters in network(except the classifier) from updating
+ # for param in network.trainable_params():
+ # if param.name not in classifier_names:
+ # param.requires_grad = False
+ #
+ #
+ # # if you only want to freeze part of the network (for example, first 7 layers):
+ # # read names of network layers
+ # freeze_layer = ["features." + str(i) for i in range(7)]
+ #
+ # # prevent parameters in first 7 layers of network from updating
+ # for param in network.trainable_params():
+ # for layer in freeze_layer:
+ # if layer in param.name:
+ # param.requires_grad = False
+
+ # create loss
+ loss = create_loss(
+ name=args.loss,
+ reduction=args.reduction,
+ label_smoothing=args.label_smoothing,
+ aux_factor=args.aux_factor,
+ )
+
+ # create learning rate schedule
+ lr_scheduler = create_scheduler(
+ num_batches,
+ scheduler=args.scheduler,
+ lr=args.lr,
+ min_lr=args.min_lr,
+ warmup_epochs=args.warmup_epochs,
+ warmup_factor=args.warmup_factor,
+ decay_epochs=args.decay_epochs,
+ decay_rate=args.decay_rate,
+ milestones=args.multi_step_decay_milestones,
+ num_epochs=args.epoch_size,
+ num_cycles=args.num_cycles,
+ cycle_decay=args.cycle_decay,
+ lr_epoch_stair=args.lr_epoch_stair,
+ )
+
+ # resume training if ckpt_path is given
+ if args.ckpt_path != "" and args.resume_opt:
+ opt_ckpt_path = os.path.join(args.ckpt_save_dir, f"optim_{args.model}.ckpt")
+ else:
+ opt_ckpt_path = ""
+
+ # create optimizer
+ # TODO: consistent naming opt, name, dataset_name
+ if (
+ args.loss_scale_type == "fixed"
+ and args.drop_overflow_update is False
+ and not require_customized_train_step(
+ args.ema,
+ args.clip_grad,
+ args.gradient_accumulation_steps,
+ args.amp_cast_list,
+ )
+ ):
+ optimizer_loss_scale = args.loss_scale
+ else:
+ optimizer_loss_scale = 1.0
+
+ # # set learning rate for specific layer:
+ # # Note: a)the params-lr dict must contain all the parameters.
+ # # b)Also, you're recommended to set a dict with a key "order_params" to make sure the
+ # # parameters will be updated in a right order.
+ # params_lr_group = [{"params": list(filter(lambda x: 'features.13' in x.name, network.trainable_params())),
+ # "lr": [i * 1.05 for i in lr_scheduler]},
+ # {"params": list(filter(lambda x: 'features.14' in x.name, network.trainable_params())),
+ # "lr": [i * 1.1 for i in lr_scheduler]},
+ # {"params": list(filter(lambda x: 'features.15' in x.name, network.trainable_params())),
+ # "lr": [i * 1.15 for i in lr_scheduler]},
+ # {"params": list(filter(
+ # lambda x: ".".join(x.name.split(".")[:2]) not in ["features.13", "features.14",
+ # "features.15"],
+ # network.trainable_params())),
+ # "lr": lr_scheduler},
+ # {"order_params": network.trainable_params()}]
+ #
+ # optimizer = create_optimizer(params_lr_group,
+ # opt=args.opt,
+ # lr=lr_scheduler,
+ # ...)
+
+ optimizer = create_optimizer(
+ network.trainable_params(),
+ opt=args.opt,
+ lr=lr_scheduler,
+ weight_decay=args.weight_decay,
+ momentum=args.momentum,
+ nesterov=args.use_nesterov,
+ filter_bias_and_bn=args.filter_bias_and_bn,
+ loss_scale=optimizer_loss_scale,
+ checkpoint_path=opt_ckpt_path,
+ eps=args.eps,
+ )
+
+ # Define eval metrics.
+ metrics = get_metrics(num_classes)
+
+ # create trainer
+ trainer = create_trainer(
+ network,
+ loss,
+ optimizer,
+ metrics,
+ amp_level=args.amp_level,
+ amp_cast_list=args.amp_cast_list,
+ loss_scale_type=args.loss_scale_type,
+ loss_scale=args.loss_scale,
+ drop_overflow_update=args.drop_overflow_update,
+ ema=args.ema,
+ ema_decay=args.ema_decay,
+ clip_grad=args.clip_grad,
+ clip_value=args.clip_value,
+ gradient_accumulation_steps=args.gradient_accumulation_steps,
+ )
+
+ # callback
+ # save checkpoint, summary training loss
+ # record val acc and do model selection if val dataset is available
+ begin_step = 0
+ begin_epoch = 0
+ if args.ckpt_path != "":
+ begin_step = optimizer.global_step.asnumpy()[0]
+ begin_epoch = args.ckpt_path.split("/")[-1].split("-")[1].split("_")[0]
+ begin_epoch = int(begin_epoch)
+
+ summary_dir = f"./{args.ckpt_save_dir}/summary"
+ assert (
+ args.ckpt_save_policy != "top_k" or args.val_while_train is True
+ ), "ckpt_save_policy is top_k, val_while_train must be True."
+ state_cb = StateMonitor(
+ trainer,
+ model_name=args.model,
+ model_ema=args.ema,
+ last_epoch=begin_epoch,
+ dataset_sink_mode=args.dataset_sink_mode,
+ dataset_val=loader_eval,
+ metric_name=list(metrics.keys()),
+ val_interval=args.val_interval,
+ ckpt_save_dir=args.ckpt_save_dir,
+ ckpt_save_interval=args.ckpt_save_interval,
+ ckpt_save_policy=args.ckpt_save_policy,
+ ckpt_keep_max=args.keep_checkpoint_max,
+ summary_dir=summary_dir,
+ log_interval=args.log_interval,
+ rank_id=rank_id,
+ device_num=device_num,
+ )
+
+ callbacks = [state_cb]
+ essential_cfg_msg = "\n".join(
+ [
+ "Essential Experiment Configurations:",
+ f"MindSpore mode[GRAPH(0)/PYNATIVE(1)]: {args.mode}",
+ f"Distributed mode: {args.distribute}",
+ f"Number of devices: {device_num if device_num is not None else 1}",
+ f"Number of training samples: {train_count}",
+ f"Number of validation samples: {eval_count}",
+ f"Number of classes: {num_classes}",
+ f"Number of batches: {num_batches}",
+ f"Batch size: {args.batch_size}",
+ f"Auto augment: {args.auto_augment}",
+ f"MixUp: {args.mixup}",
+ f"CutMix: {args.cutmix}",
+ f"Model: {args.model}",
+ f"Model parameters: {num_params}",
+ f"Number of epochs: {args.epoch_size}",
+ f"Optimizer: {args.opt}",
+ f"Learning rate: {args.lr}",
+ f"LR Scheduler: {args.scheduler}",
+ f"Momentum: {args.momentum}",
+ f"Weight decay: {args.weight_decay}",
+ f"Auto mixed precision: {args.amp_level}",
+ f"Loss scale: {args.loss_scale}({args.loss_scale_type})",
+ ]
+ )
+ logger.info(essential_cfg_msg)
+ save_args(args, os.path.join(args.ckpt_save_dir, f"{args.model}.yaml"), rank_id)
+
+ if args.ckpt_path != "":
+ logger.info(f"Resume training from {args.ckpt_path}, last step: {begin_step}, last epoch: {begin_epoch}")
+ else:
+ logger.info("Start training")
+
+ trainer.train(args.epoch_size, loader_train, callbacks=callbacks, dataset_sink_mode=args.dataset_sink_mode)
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ finetune_train(args)
diff --git a/examples/finetune/read_images_online.py b/examples/finetune/read_images_online.py
new file mode 100644
index 000000000..ab413bdbf
--- /dev/null
+++ b/examples/finetune/read_images_online.py
@@ -0,0 +1,47 @@
+""" Read images online """
+import numpy as np
+
+from mindspore.dataset import GeneratorDataset
+
+
+class ImageClsDataset:
+ def __init__(self, annotation_dir, images_dir):
+ # Read annotations
+ self.annotation = {}
+ for i in open(annotation_dir, "r"):
+ image_label = i.replace("\n", "").replace("/", "_").split(" ")
+ image = image_label[0] + ".jpg"
+ label = " ".join(image_label[1:])
+ self.annotation[image] = label
+
+ # Transfer string-type label to int-type label
+ self.label2id = {}
+ labels = sorted(list(set(self.annotation.values())))
+ for i in labels:
+ self.label2id[i] = labels.index(i)
+
+ for image, label in self.annotation.items():
+ self.annotation[image] = self.label2id[label]
+
+ # Read image-labels as iterable object
+ images = dict.fromkeys(self.label2id.values(), [])
+ for image, label in self.annotation.items():
+ read_image = np.fromfile(images_dir + image, dtype=np.uint8)
+ images[label].append(read_image)
+
+ self._data = sum(list(images.values()), [])
+ self._label = sum([[i] * len(images[i]) for i in images.keys()], [])
+
+ # make class ImageClsDataset an iterable object
+ def __getitem__(self, index):
+ return self._data[index], self._label[index]
+
+ def __len__(self):
+ return len(self._data)
+
+
+# take aircraft dataset as an example
+annotation_dir = "./aircraft/data/images_variant_trainval.txt"
+images_dir = "./aircraft/data/iamges/"
+dataset = ImageClsDataset(annotation_dir)
+dataset_train = GeneratorDataset(source=dataset, column_names=["image", "label"], shuffle=True)
diff --git a/examples/finetune/split_files.py b/examples/finetune/split_files.py
new file mode 100644
index 000000000..9398136fd
--- /dev/null
+++ b/examples/finetune/split_files.py
@@ -0,0 +1,41 @@
+""" Extract images and generate ImageNet-style dataset directory """
+import os
+import shutil
+
+
+# only for Aircraft dataset but not a general one
+def extract_images(images_path, subset_name, annotation_file_path, copy=True):
+ # read the annotation file to get the label of each image
+ def annotations(annotation_file_path):
+ image_label = {}
+ for i in open(annotation_file_path, "r"):
+ label = " ".join(i.split(" ")[1:]).replace("\n", "").replace("/", "_")
+ if label not in image_label.keys():
+ image_label[label] = []
+ image_label[label].append(i.split(" ")[0])
+ else:
+ image_label[label].append(i.split(" ")[0])
+ return image_label
+
+ # make a new folder for subset
+ subset_path = images_path + subset_name
+ os.mkdir(subset_path)
+
+ # extract and copy/move images to the new folder
+ image_label = annotations(annotation_file_path)
+ for label in image_label.keys():
+ label_folder = subset_path + "/" + label
+ os.mkdir(label_folder)
+ for image in image_label[label]:
+ image_name = image + ".jpg"
+ if copy:
+ shutil.copy(images_path + image_name, label_folder + image_name)
+ else:
+ shutil.move(images_path + image_name, label_folder)
+
+
+# take train set of aircraft dataset as an example
+images_path = "./aircraft/data/images/"
+subset_name = "trainval"
+annotation_file_path = "./aircraft/data/images_variant_trainval.txt"
+extract_images(images_path, subset_name, annotation_file_path)
diff --git a/examples/scripts/train_densenet_multigpus.sh b/examples/scripts/train_densenet_multigpus.sh
deleted file mode 100644
index 63e25c2da..000000000
--- a/examples/scripts/train_densenet_multigpus.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-export CUDA_VISIBLE_DEVICES=0,1,2,3
-mpirun --allow-run-as-root -n 4 python train.py --distribute --model=densenet121 --pretrained --epoch_size=5 --dataset=cifar10 --dataset_download
-#mpirun --allow-run-as-root -n 4 python train.py --distribute --model=densenet121 --pretrained --epoch_size=5 --dataset=cifar10 --data_dir=./datasets/cifar/cifar-10-batches-bin
diff --git a/examples/scripts/train_densenet_standalone.sh b/examples/scripts/train_densenet_standalone.sh
deleted file mode 100644
index 892c608ac..000000000
--- a/examples/scripts/train_densenet_standalone.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-python train.py --model=densenet121 --optimizer=adam --lr=0.001 --dataset=cifar10 --num_classes=10 --dataset_download
-#python train.py --model=densenet121 --opt=adam --lr=0.001 --dataset=cifar10 --num_classes=10 --data_dir=./datasets/cifar/cifar-10-batches-bin
diff --git a/mindcv/utils/callbacks.py b/mindcv/utils/callbacks.py
index b99ebf24c..5cce910e9 100644
--- a/mindcv/utils/callbacks.py
+++ b/mindcv/utils/callbacks.py
@@ -278,7 +278,11 @@ def _get_lr_from_cbp(self, cb_params):
else: # if the optimizer is successfully called, the global_step will actually be the value of next step.
optim_step = optimizer.global_step - 1
if optimizer.dynamic_lr:
- lr = optimizer.learning_rate(optim_step)[0]
+ if isinstance(optimizer.learning_rate, ms.nn.CellList):
+ # return the learning rates of the first parameter if dynamic_lr
+ lr = optimizer.learning_rate[0](optim_step)[0]
+ else:
+ lr = optimizer.learning_rate(optim_step)[0]
else:
lr = optimizer.learning_rate
return lr
diff --git a/mkdocs.yml b/mkdocs.yml
index 7931303e7..02fe96dac 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -17,6 +17,7 @@ nav:
- How-To Guides: # Problem Oriented
- Write A New Model: how_to_guides/write_a_new_model.md
- Multi-Scale Feature Extraction: how_to_guides/feature_extraction.md
+ - Fine-tune with A Custom Dataset: how_to_guides/finetune_with_a_custom_dataset.md
- Reference:
- data: reference/data.md
- loss: reference/loss.md