Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add dptr pretrain #22

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 104 additions & 0 deletions configs/rec/dptr/dptr_parseq_finetune.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
Global:
device: gpu
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
output_dir: /share/ckpt/zhaoshuai/openocr/dptr_parseq/
eval_epoch_step: [0, 1]
eval_batch_step: [0, 500]
cal_metric_during_train: True
pretrained_model:
checkpoints:
use_tensorboard: false
infer_img:
# for data or label process
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
max_text_length: &max_text_length 25
use_space_char: &use_space_char False
use_amp: True
save_res_path: /share/ckpt/zhaoshuai/openocr/dptr_parseq/predicts_dptr_parseq.txt
grad_clip_val: 20

Optimizer:
name: AdamW
lr: 0.001485 # 2gpus 384bs/gpu
weight_decay: 0.
filter_bias_and_bn: False

LRScheduler:
name: OneCycleLR
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
cycle_momentum: False

Architecture:
model_type: rec
algorithm: DPTR
Transform:
Encoder:
name: ViT
embed_dim: 512
num_heads: 8
Decoder:
name: DptrParseq
decode_ar: True
refine_iters: 1
is_pretrain: False

Loss:
name: PARSeqLoss

PostProcess:
name: ARLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char

Metric:
name: RecMetric
main_indicator: acc
is_filter: True

Train:
dataset:
name: LMDBDataSet
data_dir: /share/test/zhaoshuai/parseq-data/data/train/real/ArT
transforms:
- DecodeImagePIL: # load image
img_mode: RGB
- PARSeqAugPIL:
- DPTRLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- RecTVResize:
image_shape: [ 32, 128 ]
padding: False
- KeepKeys:
keep_keys: ['image', 'label'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 4
drop_last: True
num_workers: 4

Eval:
dataset:
name: LMDBDataSet
data_dir: /share/test/zhaoshuai/parseq-data/data/val
transforms:
- DecodeImagePIL: # load image
img_mode: RGB
- PARSeqAugPIL:
- DPTRLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- RecTVResize:
image_shape: [ 32, 128 ]
padding: False
- KeepKeys:
keep_keys: ['image', 'label'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 4
num_workers: 2
88 changes: 88 additions & 0 deletions configs/rec/dptr/dptr_parseq_pretrain.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
Global:
device: gpu
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
output_dir: /share/ckpt/zhaoshuai/openocr/dptr_parseq/
eval_epoch_step: [0, 1]
eval_batch_step: [0, 500]
cal_metric_during_train: True
pretrained_model:
checkpoints:
use_tensorboard: false
infer_img:
# for data or label process
character_dict_path: &character_dict_path ./tools/utils/EN_symbol_dict.txt
max_text_length: &max_text_length 25
use_space_char: &use_space_char False
use_amp: True
save_res_path: /share/ckpt/zhaoshuai/openocr/dptr_parseq/predicts_dptr_parseq.txt
grad_clip_val: 20

Optimizer:
name: AdamW
lr: 0.001485 # 2gpus 384bs/gpu
weight_decay: 0.
filter_bias_and_bn: False

LRScheduler:
name: OneCycleLR
warmup_epoch: 1.5 # pct_start 0.075*20 = 1.5ep
cycle_momentum: False

Architecture:
model_type: rec
algorithm: DPTR
Decoder:
name: DptrParseq
decode_ar: True
refine_iters: 1
is_pretrain: True
ORP_path: /share/ckpt/zhaoshuai/parseq/clip_background.pth

Loss:
name: PARSeqLoss

PostProcess:
name: ARLabelDecode
character_dict_path: *character_dict_path
use_space_char: *use_space_char

Metric:
name: RecMetric
main_indicator: acc
is_filter: True

Train:
dataset:
name: TextLMDBDataSet
data_dir: /share/test/zhaoshuai/parseq-data/data/train/real/ArT
transforms:
- DPTRLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['clip_label', 'label'] # dataloader will return list in this order
loader:
shuffle: True
batch_size_per_card: 256
drop_last: True
num_workers: 4

Eval:
dataset:
name: TextLMDBDataSet
data_dir: /share/test/zhaoshuai/parseq-data/data/val
transforms:
- DPTRLabelEncode: # Class handling label
character_dict_path: *character_dict_path
use_space_char: *use_space_char
max_text_length: *max_text_length
- KeepKeys:
keep_keys: ['clip_label', 'label'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 256
num_workers: 2
Binary file not shown.
Loading