forked from open-mmlab/mmpretrain
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CodeCamp2023-339] New Version of
config
Adapting Vision Transforme…
…r Algorithm (open-mmlab#1727) * add old config * add old config * add old config * renew vit-base-p16_64xb64_in1k.py * rename * finish vit_base_p16_64xb64_in1k_384px.py * finish vit_base_p32_64xb64_in1k.py and 384px * finish 4 vit_large*.py * finish vit_base_p16_32xb128_mae_in1k.py * add vit_base_p16_4xb544_ipu_in1k.py * modify data_root * using to modify cfg * pre-commit check * ignore ipu * keep other files no change * remove redefinition * only keep vit_base_p16.py * move init_cfg into model.update
- Loading branch information
Showing
15 changed files
with
645 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
60 changes: 60 additions & 0 deletions
60
mmpretrain/configs/_base_/datasets/imagenet_bs64_pil_resize.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.dataset import DefaultSampler | ||
|
||
from mmpretrain.datasets import (CenterCrop, ImageNet, LoadImageFromFile, | ||
PackInputs, RandomFlip, RandomResizedCrop, | ||
ResizeEdge) | ||
from mmpretrain.evaluation import Accuracy | ||
|
||
# dataset settings | ||
dataset_type = ImageNet | ||
data_preprocessor = dict( | ||
num_classes=1000, | ||
# RGB format normalization parameters | ||
mean=[123.675, 116.28, 103.53], | ||
std=[58.395, 57.12, 57.375], | ||
# convert image from BGR to RGB | ||
to_rgb=True, | ||
) | ||
|
||
train_pipeline = [ | ||
dict(type=LoadImageFromFile), | ||
dict(type=RandomResizedCrop, scale=224, backend='pillow'), | ||
dict(type=RandomFlip, prob=0.5, direction='horizontal'), | ||
dict(type=PackInputs), | ||
] | ||
|
||
test_pipeline = [ | ||
dict(type=LoadImageFromFile), | ||
dict(type=ResizeEdge, scale=256, edge='short', backend='pillow'), | ||
dict(type=CenterCrop, crop_size=224), | ||
dict(type=PackInputs), | ||
] | ||
|
||
train_dataloader = dict( | ||
batch_size=64, | ||
num_workers=5, | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root='data/imagenet', | ||
split='train', | ||
pipeline=train_pipeline), | ||
sampler=dict(type=DefaultSampler, shuffle=True), | ||
) | ||
|
||
val_dataloader = dict( | ||
batch_size=64, | ||
num_workers=5, | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root='data/imagenet', | ||
split='val', | ||
pipeline=test_pipeline), | ||
sampler=dict(type=DefaultSampler, shuffle=False), | ||
) | ||
val_evaluator = dict(type=Accuracy, topk=(1, 5)) | ||
|
||
# If you want standard test, please manually configure the test dataset | ||
test_dataloader = val_dataloader | ||
test_evaluator = val_evaluator |
78 changes: 78 additions & 0 deletions
78
mmpretrain/configs/_base_/datasets/imagenet_bs64_pil_resize_autoaug.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.dataset import DefaultSampler | ||
|
||
from mmpretrain.datasets import (CenterCrop, ImageNet, LoadImageFromFile, | ||
PackInputs, RandomFlip, RandomResizedCrop, | ||
ResizeEdge) | ||
from mmpretrain.datasets.transforms import AutoAugment | ||
from mmpretrain.evaluation import Accuracy | ||
|
||
# dataset settings | ||
dataset_type = ImageNet | ||
data_preprocessor = dict( | ||
num_classes=1000, | ||
# RGB format normalization parameters | ||
mean=[123.675, 116.28, 103.53], | ||
std=[58.395, 57.12, 57.375], | ||
# convert image from BGR to RGB | ||
to_rgb=True, | ||
) | ||
|
||
bgr_mean = data_preprocessor['mean'][::-1] | ||
bgr_std = data_preprocessor['std'][::-1] | ||
|
||
train_pipeline = [ | ||
dict(type=LoadImageFromFile), | ||
dict( | ||
type=RandomResizedCrop, | ||
scale=224, | ||
backend='pillow', | ||
interpolation='bicubic'), | ||
dict(type=RandomFlip, prob=0.5, direction='horizontal'), | ||
dict( | ||
type=AutoAugment, | ||
policies='imagenet', | ||
hparams=dict( | ||
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), | ||
dict(type=PackInputs), | ||
] | ||
|
||
test_pipeline = [ | ||
dict(type=LoadImageFromFile), | ||
dict( | ||
type=ResizeEdge, | ||
scale=256, | ||
edge='short', | ||
backend='pillow', | ||
interpolation='bicubic'), | ||
dict(type=CenterCrop, crop_size=224), | ||
dict(type=PackInputs), | ||
] | ||
|
||
train_dataloader = dict( | ||
batch_size=64, | ||
num_workers=5, | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root='data/imagenet', | ||
split='train', | ||
pipeline=train_pipeline), | ||
sampler=dict(type=DefaultSampler, shuffle=True), | ||
) | ||
|
||
val_dataloader = dict( | ||
batch_size=64, | ||
num_workers=5, | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root='data/imagenet', | ||
split='val', | ||
pipeline=test_pipeline), | ||
sampler=dict(type=DefaultSampler, shuffle=False), | ||
) | ||
val_evaluator = dict(type=Accuracy, topk=(1, 5)) | ||
|
||
# If you want standard test, please manually configure the test dataset | ||
test_dataloader = val_dataloader | ||
test_evaluator = val_evaluator |
89 changes: 89 additions & 0 deletions
89
mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_224.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.dataset import DefaultSampler | ||
|
||
from mmpretrain.datasets import (CenterCrop, ImageNet, LoadImageFromFile, | ||
PackInputs, RandAugment, RandomErasing, | ||
RandomFlip, RandomResizedCrop, ResizeEdge) | ||
from mmpretrain.evaluation import Accuracy | ||
|
||
# dataset settings | ||
dataset_type = ImageNet | ||
data_preprocessor = dict( | ||
num_classes=1000, | ||
# RGB format normalization parameters | ||
mean=[123.675, 116.28, 103.53], | ||
std=[58.395, 57.12, 57.375], | ||
# convert image from BGR to RGB | ||
to_rgb=True, | ||
) | ||
|
||
bgr_mean = data_preprocessor['mean'][::-1] | ||
bgr_std = data_preprocessor['std'][::-1] | ||
|
||
train_pipeline = [ | ||
dict(type=LoadImageFromFile), | ||
dict( | ||
type=RandomResizedCrop, | ||
scale=224, | ||
backend='pillow', | ||
interpolation='bicubic'), | ||
dict(type=RandomFlip, prob=0.5, direction='horizontal'), | ||
dict( | ||
type=RandAugment, | ||
policies='timm_increasing', | ||
num_policies=2, | ||
total_level=10, | ||
magnitude_level=9, | ||
magnitude_std=0.5, | ||
hparams=dict( | ||
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), | ||
dict( | ||
type=RandomErasing, | ||
erase_prob=0.25, | ||
mode='rand', | ||
min_area_ratio=0.02, | ||
max_area_ratio=1 / 3, | ||
fill_color=bgr_mean, | ||
fill_std=bgr_std), | ||
dict(type=PackInputs), | ||
] | ||
|
||
test_pipeline = [ | ||
dict(type=LoadImageFromFile), | ||
dict( | ||
type=ResizeEdge, | ||
scale=256, | ||
edge='short', | ||
backend='pillow', | ||
interpolation='bicubic'), | ||
dict(type=CenterCrop, crop_size=224), | ||
dict(type=PackInputs), | ||
] | ||
|
||
train_dataloader = dict( | ||
batch_size=64, | ||
num_workers=5, | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root='data/imagenet', | ||
split='train', | ||
pipeline=train_pipeline), | ||
sampler=dict(type=DefaultSampler, shuffle=True), | ||
) | ||
|
||
val_dataloader = dict( | ||
batch_size=64, | ||
num_workers=5, | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root='data/imagenet', | ||
split='val', | ||
pipeline=test_pipeline), | ||
sampler=dict(type=DefaultSampler, shuffle=False), | ||
) | ||
val_evaluator = dict(type=Accuracy, topk=(1, 5)) | ||
|
||
# If you want standard test, please manually configure the test dataset | ||
test_dataloader = val_dataloader | ||
test_evaluator = val_evaluator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.model.weight_init import KaimingInit | ||
|
||
from mmpretrain.models import (ImageClassifier, LabelSmoothLoss, | ||
VisionTransformer, VisionTransformerClsHead) | ||
|
||
# model settings | ||
model = dict( | ||
type=ImageClassifier, | ||
backbone=dict( | ||
type=VisionTransformer, | ||
arch='b', | ||
img_size=224, | ||
patch_size=16, | ||
drop_rate=0.1, | ||
init_cfg=[ | ||
dict( | ||
type=KaimingInit, | ||
layer='Conv2d', | ||
mode='fan_in', | ||
nonlinearity='linear') | ||
]), | ||
neck=None, | ||
head=dict( | ||
type=VisionTransformerClsHead, | ||
num_classes=1000, | ||
in_channels=768, | ||
loss=dict( | ||
type=LabelSmoothLoss, label_smooth_val=0.1, mode='classy_vision'), | ||
)) |
44 changes: 44 additions & 0 deletions
44
mmpretrain/configs/_base_/schedules/imagenet_bs4096_adamw.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.optim import CosineAnnealingLR, LinearLR | ||
from torch.optim import AdamW | ||
|
||
# optimizer | ||
optim_wrapper = dict( | ||
optimizer=dict(type=AdamW, lr=0.003, weight_decay=0.3), | ||
# specific to vit pretrain | ||
paramwise_cfg=dict(custom_keys={ | ||
'.cls_token': dict(decay_mult=0.0), | ||
'.pos_embed': dict(decay_mult=0.0) | ||
}), | ||
) | ||
|
||
# learning policy | ||
param_scheduler = [ | ||
# warm up learning rate scheduler | ||
dict( | ||
type=LinearLR, | ||
start_factor=1e-4, | ||
by_epoch=True, | ||
begin=0, | ||
end=30, | ||
# update by iter | ||
convert_to_iter_based=True), | ||
# main learning rate scheduler | ||
dict( | ||
type=CosineAnnealingLR, | ||
T_max=270, | ||
by_epoch=True, | ||
begin=30, | ||
end=300, | ||
) | ||
] | ||
|
||
# train, val, test setting | ||
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1) | ||
val_cfg = dict() | ||
test_cfg = dict() | ||
|
||
# NOTE: `auto_scale_lr` is for automatically scaling LR, | ||
# based on the actual training batch size. | ||
auto_scale_lr = dict(base_batch_size=4096) |
52 changes: 52 additions & 0 deletions
52
mmpretrain/configs/vision_transformer/vit_base_p16_32xb128_mae_in1k.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.config import read_base | ||
from mmengine.model import ConstantInit, TruncNormalInit | ||
from torch.optim import AdamW | ||
|
||
from mmpretrain.engine import EMAHook | ||
from mmpretrain.models import CutMix, Mixup | ||
|
||
with read_base(): | ||
from .._base_.datasets.imagenet_bs64_swin_224 import * | ||
from .._base_.default_runtime import * | ||
from .._base_.models.vit_base_p16 import * | ||
from .._base_.schedules.imagenet_bs1024_adamw_swin import * | ||
|
||
model.update( | ||
backbone=dict(drop_rate=0, drop_path_rate=0.1, init_cfg=None), | ||
head=dict(loss=dict(mode='original')), | ||
init_cfg=[ | ||
dict(type=TruncNormalInit, layer='Linear', std=.02), | ||
dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.), | ||
], | ||
train_cfg=dict( | ||
augments=[dict(type=Mixup, alpha=0.8), | ||
dict(type=CutMix, alpha=1.0)])) | ||
|
||
# dataset settings | ||
train_dataloader.update(batch_size=128) | ||
|
||
# schedule settings | ||
optim_wrapper.update( | ||
optimizer=dict( | ||
type=AdamW, | ||
lr=1e-4 * 4096 / 256, | ||
weight_decay=0.3, | ||
eps=1e-8, | ||
betas=(0.9, 0.95)), | ||
paramwise_cfg=dict( | ||
norm_decay_mult=0.0, | ||
bias_decay_mult=0.0, | ||
custom_keys={ | ||
'.cls_token': dict(decay_mult=0.0), | ||
'.pos_embed': dict(decay_mult=0.0) | ||
})) | ||
|
||
# runtime settings | ||
custom_hooks = [dict(type=EMAHook, momentum=1e-4)] | ||
|
||
# NOTE: `auto_scale_lr` is for automatically scaling LR | ||
# based on the actual training batch size. | ||
# base_batch_size = (32 GPUs) x (128 samples per GPU) | ||
auto_scale_lr.update(base_batch_size=4096) |
20 changes: 20 additions & 0 deletions
20
mmpretrain/configs/vision_transformer/vit_base_p16_64xb64_in1k.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (c) OpenMMLab. All rights reserved. | ||
# This is a BETA new format config file, and the usage may change recently. | ||
from mmengine.config import read_base | ||
|
||
from mmpretrain.models import Mixup | ||
|
||
with read_base(): | ||
from .._base_.datasets.imagenet_bs64_pil_resize_autoaug import * | ||
from .._base_.default_runtime import * | ||
from .._base_.models.vit_base_p16 import * | ||
from .._base_.schedules.imagenet_bs4096_adamw import * | ||
|
||
# model setting | ||
model.update( | ||
head=dict(hidden_dim=3072), | ||
train_cfg=dict(augments=dict(type=Mixup, alpha=0.2)), | ||
) | ||
|
||
# schedule setting | ||
optim_wrapper.update(clip_grad=dict(max_norm=1.0)) |
Oops, something went wrong.