mindspore-lab · WongGawa · Oct 14, 2024
diff --git a/configs/yolov10/hyp.scratch.yaml b/configs/yolov10/hyp.scratch.yaml
@@ -0,0 +1,60 @@
+optimizer:
+  optimizer: momentum
+  lr_init: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+  momentum: 0.9  # SGD momentum/Adam beta1
+  nesterov: True  # update gradients with NAG(Nesterov Accelerated Gradient) algorithm
+  loss_scale: 1.0  # loss scale for optimizer
+  warmup_epochs: 3  # warmup epochs (fractions ok)
+  warmup_momentum: 0.8  # warmup initial momentum
+  warmup_bias_lr: 0.0  # warmup initial bias lr
+  min_warmup_step: 1000  # minimum warmup step
+  group_param: yolov8  # group param strategy
+  gp_weight_decay: 0.0005  # group param weight decay 5e-4
+  start_factor: 1.0
+  end_factor: 0.01
+
+loss:
+  name: YOLOv10Loss
+  box: 7.5  # box loss gain
+  cls: 0.5  # cls loss gain
+  dfl: 1.5  # dfl loss gain
+  reg_max: 16
+
+data:
+  num_parallel_workers: 4
+
+  # multi-stage data augment
+  train_transforms: {
+    stage_epochs: [ 490, 10 ],
+    trans_list: [
+      [
+        {func_name: mosaic, prob: 1.0},
+        {func_name: resample_segments},
+        {func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.5, shear: 0.0},
+        {func_name: albumentations},
+        {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4},
+        {func_name: fliplr, prob: 0.5},
+        {func_name: label_norm, xyxy2xywh_: True},
+        {func_name: label_pad, padding_size: 160, padding_value: -1},
+        {func_name: image_norm, scale: 255.},
+        {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+      ],
+      [
+        {func_name: letterbox, scaleup: True},
+        {func_name: resample_segments},
+        {func_name: random_perspective, prob: 1.0, degrees: 0.0, translate: 0.1, scale: 0.5, shear: 0.0},
+        {func_name: albumentations},
+        {func_name: hsv_augment, prob: 1.0, hgain: 0.015, sgain: 0.7, vgain: 0.4},
+        {func_name: fliplr, prob: 0.5},
+        {func_name: label_norm, xyxy2xywh_: True},
+        {func_name: label_pad, padding_size: 160, padding_value: -1},
+        {func_name: image_norm, scale: 255.},
+        {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+      ]]
+  }
+
+  test_transforms: [
+    {func_name: letterbox, scaleup: False, only_image: True},
+    {func_name: image_norm, scale: 255.},
+    {func_name: image_transpose, bgr2rgb: True, hwc2chw: True}
+  ]
diff --git a/configs/yolov10/yolov10b.yaml b/configs/yolov10/yolov10b.yaml
@@ -0,0 +1,61 @@
+__BASE__: [
+  '../coco.yaml',
+  './hyp.scratch.yaml',
+]
+
+epochs: 500 # total train epochs
+per_batch_size: 32  # 32 * 8 = 256
+img_size: 640
+iou_thres: 0.7
+overflow_still_update: False
+ms_loss_scaler: dynamic
+ms_loss_scaler_value: 65536.0
+clip_grad: True
+anchor_base: False
+opencv_threads_num: 0  # opencv: disable threading optimizations
+
+network:
+  model_name: yolov10
+  nc: 80  # number of classes
+  reg_max: 16
+
+  depth_multiple: 0.67  # model depth multiple
+  width_multiple: 1.00  # layer channel multiple
+  max_channels: 512
+  stride: [8, 16, 32]
+
+  # YOLOv10.0b backbone
+  backbone:
+    # [from, repeats, module, args]
+    - [-1, 1, ConvNormAct, [64, 3, 2]]            # 0-P1/2
+    - [-1, 1, ConvNormAct, [128, 3, 2]]           # 1-P2/4
+    - [-1, 3, C2f, [128, True]]
+    - [-1, 1, ConvNormAct, [256, 3, 2]]           # 3-P3/8
+    - [-1, 6, C2f, [256, True]]
+    - [-1, 1, SCDown, [512, 3, 2]]                # 5-P4/16
+    - [-1, 6, C2f, [512, True]]
+    - [-1, 1, SCDown, [1024, 3, 2]]               # 7-P5/32
+    - [-1, 3, C2fCIB, [1024, True, True]]
+    - [-1, 1, SPPF, [1024, 5]]                    # 9
+    - [-1, 1, PSA, [1024]]                        # 10
+
+  # YOLOv10.0b head
+  head:
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 6], 1, Concat, [1]]                   # cat backbone P4
+    - [-1, 3, C2fCIB, [512, True]]                # 13
+
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 4], 1, Concat, [1] ]                  # cat backbone P3
+    - [-1, 3, C2f, [256]]                         # 16 (P3/8-small)
+
+    - [-1, 1, ConvNormAct, [256, 3, 2]]
+    - [[ -1, 13], 1, Concat, [1]]                 # cat head P4
+    - [-1, 3, C2fCIB, [512, True]]                # 19 (P4/16-medium)
+
+    - [-1, 1, SCDown, [512, 3, 2]]
+    - [[-1, 10], 1, Concat, [1]]                  # cat head P5
+    - [-1, 3, C2fCIB, [1024, True]]               # 22 (P5/32-large)
+
+    - [[16, 19, 22], 1, YOLOv10Head, [nc, reg_max, stride]]  # Detect(P3, P4, P5)
+
diff --git a/configs/yolov10/yolov10l.yaml b/configs/yolov10/yolov10l.yaml
@@ -0,0 +1,61 @@
+__BASE__: [
+  '../coco.yaml',
+  './hyp.scratch.yaml',
+]
+
+epochs: 500 # total train epochs
+per_batch_size: 32  # 32 * 8 = 256
+img_size: 640
+iou_thres: 0.7
+overflow_still_update: False
+ms_loss_scaler: dynamic
+ms_loss_scaler_value: 65536.0
+clip_grad: True
+anchor_base: False
+opencv_threads_num: 0  # opencv: disable threading optimizations
+
+network:
+  model_name: yolov10
+  nc: 80  # number of classes
+  reg_max: 16
+
+  depth_multiple: 1.00  # model depth multiple
+  width_multiple: 1.00  # layer channel multiple
+  max_channels: 512
+  stride: [8, 16, 32]
+
+  # YOLOv10.0l backbone
+  backbone:
+    # [from, repeats, module, args]
+    - [-1, 1, ConvNormAct, [64, 3, 2]]            # 0-P1/2
+    - [-1, 1, ConvNormAct, [128, 3, 2]]           # 1-P2/4
+    - [-1, 3, C2f, [128, True]]
+    - [-1, 1, ConvNormAct, [256, 3, 2]]           # 3-P3/8
+    - [-1, 6, C2f, [256, True]]
+    - [-1, 1, SCDown, [512, 3, 2]]                # 5-P4/16
+    - [-1, 6, C2f, [512, True]]
+    - [-1, 1, SCDown, [1024, 3, 2]]               # 7-P5/32
+    - [-1, 3, C2fCIB, [1024, True, True]]
+    - [-1, 1, SPPF, [1024, 5]]                    # 9
+    - [-1, 1, PSA, [1024]]                        # 10
+
+  # YOLOv10.0l head
+  head:
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 6], 1, Concat, [1]]                   # cat backbone P4
+    - [-1, 3, C2fCIB, [512, True]]                # 13
+
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 4], 1, Concat, [1] ]                  # cat backbone P3
+    - [-1, 3, C2f, [256]]                         # 16 (P3/8-small)
+
+    - [-1, 1, ConvNormAct, [256, 3, 2]]
+    - [[ -1, 13], 1, Concat, [1]]                 # cat head P4
+    - [-1, 3, C2fCIB, [512, True]]                # 19 (P4/16-medium)
+
+    - [-1, 1, SCDown, [512, 3, 2]]
+    - [[-1, 10], 1, Concat, [1]]                  # cat head P5
+    - [-1, 3, C2fCIB, [1024, True]]               # 22 (P5/32-large)
+
+    - [[16, 19, 22], 1, YOLOv10Head, [nc, reg_max, stride]]  # Detect(P3, P4, P5)
+
diff --git a/configs/yolov10/yolov10m.yaml b/configs/yolov10/yolov10m.yaml
@@ -0,0 +1,61 @@
+__BASE__: [
+  '../coco.yaml',
+  './hyp.scratch.yaml',
+]
+
+epochs: 500 # total train epochs
+per_batch_size: 32  # 32 * 8 = 256
+img_size: 640
+iou_thres: 0.7
+overflow_still_update: False
+ms_loss_scaler: dynamic
+ms_loss_scaler_value: 65536.0
+clip_grad: True
+anchor_base: False
+opencv_threads_num: 0  # opencv: disable threading optimizations
+
+network:
+  model_name: yolov10
+  nc: 80  # number of classes
+  reg_max: 16
+
+  depth_multiple: 0.67  # model depth multiple
+  width_multiple: 0.75  # layer channel multiple
+  max_channels: 768
+  stride: [8, 16, 32]
+
+  # YOLOv10.0m backbone
+  backbone:
+    # [from, repeats, module, args]
+    - [-1, 1, ConvNormAct, [64, 3, 2]]            # 0-P1/2
+    - [-1, 1, ConvNormAct, [128, 3, 2]]           # 1-P2/4
+    - [-1, 3, C2f, [128, True]]
+    - [-1, 1, ConvNormAct, [256, 3, 2]]           # 3-P3/8
+    - [-1, 6, C2f, [256, True]]
+    - [-1, 1, SCDown, [512, 3, 2]]                # 5-P4/16
+    - [-1, 6, C2f, [512, True]]
+    - [-1, 1, SCDown, [1024, 3, 2]]               # 7-P5/32
+    - [-1, 3, C2fCIB, [1024, True, True]]
+    - [-1, 1, SPPF, [1024, 5]]                    # 9
+    - [-1, 1, PSA, [1024]]                        # 10
+
+  # YOLOv10.0m head
+  head:
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 6], 1, Concat, [1]]                   # cat backbone P4
+    - [-1, 3, C2f, [512]]                         # 13
+
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 4], 1, Concat, [1] ]                  # cat backbone P3
+    - [-1, 3, C2f, [256]]                         # 16 (P3/8-small)
+
+    - [-1, 1, ConvNormAct, [256, 3, 2]]
+    - [[ -1, 13], 1, Concat, [1]]                 # cat head P4
+    - [-1, 3, C2fCIB, [512, True]]                # 19 (P4/16-medium)
+
+    - [-1, 1, SCDown, [512, 3, 2]]
+    - [[-1, 10], 1, Concat, [1]]                  # cat head P5
+    - [-1, 3, C2fCIB, [1024, True]]               # 22 (P5/32-large)
+
+    - [[16, 19, 22], 1, YOLOv10Head, [nc, reg_max, stride]]  # Detect(P3, P4, P5)
+
diff --git a/configs/yolov10/yolov10n.yaml b/configs/yolov10/yolov10n.yaml
@@ -0,0 +1,61 @@
+__BASE__: [
+  '../coco.yaml',
+  './hyp.scratch.yaml',
+]
+
+epochs: 500 # total train epochs
+per_batch_size: 32  # 32 * 8 = 256
+img_size: 640
+iou_thres: 0.7
+overflow_still_update: False
+ms_loss_scaler: dynamic
+ms_loss_scaler_value: 65536.0
+clip_grad: True
+anchor_base: False
+opencv_threads_num: 0  # opencv: disable threading optimizations
+
+network:
+  model_name: yolov10
+  nc: 80  # number of classes
+  reg_max: 16
+
+  depth_multiple: 0.33  # model depth multiple
+  width_multiple: 0.25  # layer channel multiple
+  max_channels: 1024
+  stride: [8, 16, 32]
+
+  # YOLOv10.0n backbone
+  backbone:
+    # [from, repeats, module, args]
+    - [-1, 1, ConvNormAct, [64, 3, 2]]     # 0-P1/2
+    - [-1, 1, ConvNormAct, [128, 3, 2]]    # 1-P2/4
+    - [-1, 3, C2f, [128, True]]
+    - [-1, 1, ConvNormAct, [256, 3, 2]]    # 3-P3/8
+    - [-1, 6, C2f, [256, True]]
+    - [-1, 1, SCDown, [512, 3, 2]]         # 5-P4/16
+    - [-1, 6, C2f, [512, True]]
+    - [-1, 1, SCDown, [1024, 3, 2]]        # 7-P5/32
+    - [-1, 3, C2f, [1024, True]]
+    - [-1, 1, SPPF, [1024, 5]]             # 9
+    - [-1, 1, PSA, [1024]]                 # 10
+
+  # YOLOv10.0n head
+  head:
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 6], 1, Concat, [1]]            # cat backbone P4
+    - [-1, 3, C2f, [512]]                  # 13
+
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 4], 1, Concat, [1] ]           # cat backbone P3
+    - [-1, 3, C2f, [256]]                  # 16 (P3/8-small)
+
+    - [-1, 1, ConvNormAct, [256, 3, 2]]
+    - [[ -1, 13], 1, Concat, [1]]          # cat head P4
+    - [-1, 3, C2f, [512]]                  # 19 (P4/16-medium)
+
+    - [-1, 1, SCDown, [512, 3, 2]]
+    - [[-1, 10], 1, Concat, [1]]           # cat head P5
+    - [-1, 3, C2fCIB, [1024, True, True]]  # 22 (P5/32-large)
+
+    - [[16, 19, 22], 1, YOLOv10Head, [nc, reg_max, stride]]  # Detect(P3, P4, P5)
+
diff --git a/configs/yolov10/yolov10s.yaml b/configs/yolov10/yolov10s.yaml
@@ -0,0 +1,61 @@
+__BASE__: [
+  '../coco.yaml',
+  './hyp.scratch.yaml',
+]
+
+epochs: 500 # total train epochs
+per_batch_size: 32  # 32 * 8 = 256
+img_size: 640
+iou_thres: 0.7
+overflow_still_update: False
+ms_loss_scaler: dynamic
+ms_loss_scaler_value: 65536.0
+clip_grad: True
+anchor_base: False
+opencv_threads_num: 0  # opencv: disable threading optimizations
+
+network:
+  model_name: yolov10
+  nc: 80  # number of classes
+  reg_max: 16
+
+  depth_multiple: 0.33  # model depth multiple
+  width_multiple: 0.50  # layer channel multiple
+  max_channels: 1024
+  stride: [8, 16, 32]
+
+  # YOLOv10.0s backbone
+  backbone:
+    # [from, repeats, module, args]
+    - [-1, 1, ConvNormAct, [64, 3, 2]]          # 0-P1/2
+    - [-1, 1, ConvNormAct, [128, 3, 2]]         # 1-P2/4
+    - [-1, 3, C2f, [128, True]]
+    - [-1, 1, ConvNormAct, [256, 3, 2]]         # 3-P3/8
+    - [-1, 6, C2f, [256, True]]
+    - [-1, 1, SCDown, [512, 3, 2]]              # 5-P4/16
+    - [-1, 6, C2f, [512, True]]
+    - [-1, 1, SCDown, [1024, 3, 2]]             # 7-P5/32
+    - [-1, 3, C2fCIB, [1024, True, True]]
+    - [-1, 1, SPPF, [1024, 5]]                  # 9
+    - [-1, 1, PSA, [1024]]                      # 10
+
+  # YOLOv10.0s head
+  head:
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 6], 1, Concat, [1]]                 # cat backbone P4
+    - [-1, 3, C2f, [512]]                       # 13
+
+    - [-1, 1, Upsample, [None, 2, 'nearest']]
+    - [[-1, 4], 1, Concat, [1] ]                # cat backbone P3
+    - [-1, 3, C2f, [256]]                       # 16 (P3/8-small)
+
+    - [-1, 1, ConvNormAct, [256, 3, 2]]
+    - [[ -1, 13], 1, Concat, [1]]               # cat head P4
+    - [-1, 3, C2f, [512]]                       # 19 (P4/16-medium)
+
+    - [-1, 1, SCDown, [512, 3, 2]]
+    - [[-1, 10], 1, Concat, [1]]                # cat head P5
+    - [-1, 3, C2fCIB, [1024, True, True]]       # 22 (P5/32-large)
+
+    - [[16, 19, 22], 1, YOLOv10Head, [nc, reg_max, stride]]  # Detect(P3, P4, P5)
+