Skip to content

Commit

Permalink
arg change
Browse files Browse the repository at this point in the history
  • Loading branch information
wtmlon committed Jan 25, 2024
1 parent 8b6d037 commit dc6df8c
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,22 @@


param="model_name_or_path=qwen/qwen-7b "
param+="per_device_train_batch_size=1 "
param+="per_device_train_batch_size=2 "
param+="data_parallel_degree=1 "
param+="tensor_parallel_degree=8 "
param+="pipeline_parallel_degree=1 "
param+="tensor_parallel_degree=2 "
param+="pipeline_parallel_degree=2 "
param+="virtual_pp_degree=5 "
param+="sequence_parallel=0 "
param+="sharding_parallel_degree=1 "
param+="sharding=stage3 "
param+="sharding_parallel_degree=2 "
param+="sharding=stage1 "
param+="recompute=1 "
param+="run_mode=MP8-PP1-mbs1-acc16 "
param+="recompute_granularity=core_attn "
param+="run_mode=MP2-PP2-mbs2-acc4 "
param+="device_num=N1C8 "
param+="global_batch_size=16 "
param+="model_item=qwen-qwen-7b_seqlen2048_pretrain "
param+="max_steps=150 "
param+="gradient_accumulation_steps=16 "
param+="gradient_accumulation_steps=4 "
param+="pp_recompute_interval=1 "
param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity, "
param+="recompute_use_reentrant=true "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,22 @@


param="model_name_or_path=qwen/qwen-7b "
param+="per_device_train_batch_size=1 "
param+="per_device_train_batch_size=2 "
param+="data_parallel_degree=1 "
param+="tensor_parallel_degree=4 "
param+="pipeline_parallel_degree=2 "
param+="pipeline_parallel_degree=1 "
param+="virtual_pp_degree=1 "
param+="sequence_parallel=0 "
param+="sharding_parallel_degree=1 "
param+="sharding_parallel_degree=2 "
param+="sharding=stage1 "
param+="recompute=1 "
param+="recompute_granularity=core_attn "
param+="run_mode=MP4-PP2-vpp1-mbs1-acc16-recompute "
param+="device_num=N1C8 "
param+="global_batch_size=16 "
param+="model_item=qwen-qwen-7b_seqlen2048_pretrain "
param+="max_steps=150 "
param+="gradient_accumulation_steps=16 "
param+="gradient_accumulation_steps=4 "
param+="pp_recompute_interval=1 "
param+="tensor_parallel_config=enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add "
param+="recompute_use_reentrant=true "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ function _set_params(){
tensor_parallel_config=${tensor_parallel_config:-"enable_mp_async_allreduce,enable_mp_skip_c_identity,enable_mp_fused_linear_param_grad_add"}
pipeline_parallel_config=${pipeline_parallel_config:-""}
recompute_use_reentrant=${recompute_use_reentrant:-"true"}
recompute_granularity=${recompute_granularity:-"full"}

base_batch_size=${global_batch_size}

Expand Down

0 comments on commit dc6df8c

Please sign in to comment.