Merge pull request #86 from eosphoros-ai/refactor

export merged model ,and fix multi card run error
eosphoros-ai · Oct 7, 2023 · 4fcec76 · 4fcec76
2 parents 09dfb08 + f704967
commit 4fcec76
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -185,8 +185,18 @@ sh ./dbgpt_hub/scripts/predict_sft.sh
 In the script, by default with the parameter --quantization_bit, it predicts using QLoRA. Removing it switches to the LoRA prediction method.
 
 ### 3.5 Model Weights
-You can find weights from huggingface. [hg-eosphoros-ai
-](https://huggingface.co/eosphoros)
+You can find the corresponding model weights we uploaded in August from Huggingface.[hg-eosphoros-ai
+](https://huggingface.co/eosphoros)   
+
+We will release a better version of the new weights as soon as possible. 
+
+## 3.5.2 Model and fine-tuned weight merging 
+
+Run the following script, and be sure to replace the relevant parameter path values in the script with the path corresponding to your project.   
+
+```bash
+sh ./dbgpt_hub/scripts/export_merge.sh
+```
 
 ### 3.6 Model Evaluation
 To evaluate model performance on the dataset, default is spider dataset.

diff --git a/README.zh.md b/README.zh.md
@@ -182,7 +182,14 @@ sh ./dbgpt_hub/scripts/predict_sft.sh
 
 
 # 3.5、模型权重
-可以从Huggingface查看对应的模型权重。 [huggingface地址](https://huggingface.co/eosphoros)
+可以从Huggingface查看我们之前8月份上传的对应的模型权重。 [huggingface地址](https://huggingface.co/eosphoros)  
+新的权重我们将尽快释放出一版效果更好的。  
+## 3.5.2 模型和微调权重合并
+运行如下脚本，注意将脚本中的相关参数路径值替换为你项目所对应的路径。
+```bash
+sh ./dbgpt_hub/scripts/export_merge.sh
+```
+
 
 ### 3.6、模型评估
 对于模型在数据集上的效果评估,默认为在`spider`数据集上。

diff --git a/dbgpt_hub/configs/data_args.py b/dbgpt_hub/configs/data_args.py
@@ -76,7 +76,7 @@ class DataArguments:
         }
     )
     dataset: Optional[str] = field(
-        default="alpaca_en",
+        default="example_text2sql",
         metadata={
             "help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."
         },

diff --git a/dbgpt_hub/scripts/export_merge.sh b/dbgpt_hub/scripts/export_merge.sh
@@ -0,0 +1,8 @@
+python dbgpt_hub/train/export_model.py \
+    --model_name_or_path Your_base_model_path_like_Baichuan2-13B-Chat \
+    --template Your_template_like_baichuan2_eval \
+    --finetuning_type lora \
+    --checkpoint_dir Your_ckpt_path_checkpoint-100 \
+    --output_dir Your_export_model_like_output_merge_model_baichuan2-13b-qlora_merge \
+    --fp16
+    # --bf16
diff --git a/dbgpt_hub/scripts/train_sft.sh b/dbgpt_hub/scripts/train_sft.sh
@@ -1,7 +1,6 @@
 wandb offline
-# v100
+# v100 ,单卡
 CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
-    --deepspeed dbgpt_hub/configs/ds_config.json \
     --quantization_bit 4 \
     --model_name_or_path /home/model/Baichuan2-13B-Chat \
     --do_train \
@@ -19,9 +18,37 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
     --per_device_train_batch_size 1 \
     --gradient_accumulation_steps 4 \
     --lr_scheduler_type cosine_with_restarts \
-    --logging_steps 10 \
+    --logging_steps 250 \
     --save_steps 500 \
     --learning_rate 5e-5 \
-    --num_train_epochs 1 \
+    --num_train_epochs 2 \
     --plot_loss 
-    # --bf16#v100不支持bf16
+    # --bf16#v100不支持bf16
+    # test  num_train_epochs set to 0.1
+
+# 多卡，deepseed启动，A100
+# deepspeed --num_gpus 2  dbgpt_hub/train/sft_train.py \
+#     --deepspeed dbgpt_hub/configs/ds_config.json \
+#     --quantization_bit 4 \
+#     --model_name_or_path /home/model_files/Llama-2-13b-chat-hf \
+#     --do_train \
+#     --dataset example_text2sql_train \
+#     --max_source_length 1024 \
+#     --max_target_length 512 \
+#     --template llama2 \
+#     --finetuning_type lora \
+#     --lora_rank 64 \
+#     --lora_alpha 32 \
+#     --lora_target q_proj,v_proj \
+#     --output_dir dbgpt_hub/output/adapter/llama2-13b-qlora_1024_epoch1_debug1008_withDeepseed_mulitCard \
+#     --overwrite_cache \
+#     --overwrite_output_dir \
+#     --per_device_train_batch_size 1 \
+#     --gradient_accumulation_steps 16 \
+#     --lr_scheduler_type cosine_with_restarts \
+#     --logging_steps 25 \
+#     --save_steps 20 \
+#     --learning_rate 2e-4 \
+#     --num_train_epochs 0.1 \
+#     --plot_loss \
+#     --bf16
diff --git a/dbgpt_hub/train/export_model.py b/dbgpt_hub/train/export_model.py
@@ -1,6 +1,11 @@
+import os 
+import sys
+ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(ROOT_PATH)
 from dbgpt_hub.llm_base.model_trainer import export_model
 
 
+
 def main():
     export_model()
 

diff --git a/dbgpt_hub/train/sft_train.py b/dbgpt_hub/train/sft_train.py
@@ -1,5 +1,7 @@
+import os 
 import sys
-sys.path.append('/home/zw/explained/DB-GPT-Hub')
+ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.append(ROOT_PATH)
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 from transformers import DataCollatorForSeq2Seq, Seq2SeqTrainingArguments
 

diff --git a/docs/eval_llm_result.md b/docs/eval_llm_result.md
@@ -9,7 +9,7 @@ This doc aims to summarize the performance of publicly available big language mo
 | GPT 4                          | 0.762              | [quote](https://www.numbersstation.ai/post/nsql-llama-2-7b)                        |
 | wizardcoder                    | 0.610              | [quote](https://github.com/cuplv/text-to-sql-wizardcoder/tree/main)                |
 | llama2_13b_hf                  | 0.252              | run in this project,default param set                                              |
-| llama2_13b_hf_lora             | 0.622              | run in this project,default param set                                              |
+| llama2_13b_hf_lora             | 0.697             | run in this project,default param set                                              |
 | CodeLlama-7b-Instruct-hf_qlora | 0.649              | run in this project,in refactor branch, with qlora and nf4,bit4 SFT, only 500 step |
 
 We will support CodeLlama with lora soon ,and give more exp .