Skip to content

Commit

Permalink
Experiment to reproduce ex ac 0.79 param set in train_sft.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
wangzaistone committed Nov 2, 2023
1 parent def504d commit a709bed
Showing 1 changed file with 17 additions and 16 deletions.
33 changes: 17 additions & 16 deletions dbgpt_hub/scripts/train_sft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,32 @@ wandb offline # Close wandb
# v100 ,单卡
current_date=$(date +"%Y%m%d_%H%M%S")
train_log="dbgpt_hub/output/train_${current_date}.log"

CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
--quantization_bit 4 \
--model_name_or_path /home/model/Baichuan2-13B-Chat \
--model_name_or_path Your_download_CodeLlama-13b-Instruct-hf_path \
--do_train \
--dataset example_text2sql \
--max_source_length 1024 \
--dataset example_text2sql_train \
--max_source_length 2048 \
--max_target_length 512 \
--template baichuan2 \
--finetuning_type lora \
--lora_rank 32 \
--lora_alpha 8 \
--lora_target W_pack \
--output_dir dbgpt_hub/output/adapter/baichuan2-13b-qlora \
--lora_target q_proj,v_proj \
--template llama2 \
--lora_rank 64 \
--lora_alpha 32 \
--output_dir dbgpt_hub/output/adapter/CodeLlama-13b-sql-lora \
--overwrite_cache \
--overwrite_output_dir \
--per_device_train_batch_size 1 \
--gradient_accumulation_steps 4 \
--gradient_accumulation_steps 16 \
--lr_scheduler_type cosine_with_restarts \
--logging_steps 10 \
--save_steps 10 \
--learning_rate 5e-5 \
--num_train_epochs 0.2 \
--plot_loss 2>&1 | tee ${train_log}
--logging_steps 50 \
--save_steps 2000 \
--learning_rate 2e-4 \
--num_train_epochs 8 \
--plot_loss \
--bf16 2>&1 | tee ${train_log}
# --bf16#v100不支持bf16
# test num_train_epochs set to 0.1
# test num_train_epochs could set to 0.1

# 多卡,deepseed启动,A100
# deepspeed --num_gpus 2 dbgpt_hub/train/sft_train.py \
Expand Down

0 comments on commit a709bed

Please sign in to comment.