From d60822a9c3b49582184e38e8b508f515bb878b4f Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:15:07 +0800
Subject: [PATCH 1/7] Update predict.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

增加tqdm显示
---
 dbgpt_hub/predict/predict.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dbgpt_hub/predict/predict.py b/dbgpt_hub/predict/predict.py
index c382980..e4653b7 100644
--- a/dbgpt_hub/predict/predict.py
+++ b/dbgpt_hub/predict/predict.py
@@ -1,6 +1,7 @@
 import os
 import json
 import sys
+from tqdm import tqdm
 
 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.append(ROOT_PATH)
@@ -26,7 +27,7 @@ def inference(model: ChatModel, predict_data: List[Dict], **input_kwargs):
     res = []
     # test
     # for item in predict_data[:20]:
-    for item in predict_data:
+    for item in tqdm(predict_data, desc="Inference Progress", unit="item"):
         response, _ = model.chat(query=item["input"], history=[], **input_kwargs)
         res.append(response)
     return res

From 61caf9443ff1461aef0cb916b2f358e01b74a109 Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:27:53 +0800
Subject: [PATCH 2/7] Update train_sft.sh

---
 dbgpt_hub/scripts/train_sft.sh | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/dbgpt_hub/scripts/train_sft.sh b/dbgpt_hub/scripts/train_sft.sh
index cbe2187..7ff9230 100644
--- a/dbgpt_hub/scripts/train_sft.sh
+++ b/dbgpt_hub/scripts/train_sft.sh
@@ -1,5 +1,7 @@
-wandb offline
+wandb offline # Close wandb
 # v100 ,单卡
+current_date=$(date +"%Y%m%d_%H%M%S")
+train_log="outputs/train_${current_date}.log"
 CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
     --quantization_bit 4 \
     --model_name_or_path /home/model/Baichuan2-13B-Chat \
@@ -22,7 +24,7 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
     --save_steps 10 \
     --learning_rate 5e-5 \
     --num_train_epochs 0.2 \
-    --plot_loss 
+    --plot_loss 2>&1 | tee ${train_log}
     # --bf16#v100不支持bf16
     # test  num_train_epochs set to 0.1
 
@@ -51,4 +53,27 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
 #     --learning_rate 2e-4 \
 #     --num_train_epochs 0.1 \
 #     --plot_loss \
-#     --bf16
\ No newline at end of file
+#     --bf16 2>&1 | tee ${train_log}
+
+
+# 多卡，deepseed，全量微调
+# deepspeed --include localhost:4,5,6,7  dbgpt_hub/train/sft_train.py \
+#     --dataset example_text2sql_train \
+#     --model_name_or_path CodeLlama-7b-Instruct-hf \
+#     --do_train \
+#     --finetuning_type full \
+#     --max_source_length 2048 \
+#     --max_target_length 512 \
+#     --template llama2 \
+#     --output_dir dbgpt_hub/output/adapter/code_llama-7b-2048_epoch4_full \
+#     --overwrite_cache \
+#     --overwrite_output_dir \
+#     --per_device_train_batch_size 4 \
+#     --gradient_accumulation_steps 16 \
+#     --lr_scheduler_type cosine_with_restarts \
+#     --logging_steps 50 \
+#     --learning_rate 2e-5 \
+#     --num_train_epochs 4 \
+#     --plot_loss \
+#     --bf16 True\
+#     --deepspeed dbgpt_hub/configs/stage3.json 2>&1 | tee ${train_log}

From b42fd01045afafc2fd66613bf79ae1f82c860689 Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:28:50 +0800
Subject: [PATCH 3/7] Update train_sft.sh

---
 dbgpt_hub/scripts/train_sft.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dbgpt_hub/scripts/train_sft.sh b/dbgpt_hub/scripts/train_sft.sh
index 7ff9230..24f36c7 100644
--- a/dbgpt_hub/scripts/train_sft.sh
+++ b/dbgpt_hub/scripts/train_sft.sh
@@ -1,7 +1,7 @@
 wandb offline # Close wandb
 # v100 ,单卡
 current_date=$(date +"%Y%m%d_%H%M%S")
-train_log="outputs/train_${current_date}.log"
+train_log="dbgpt_hub/output/train_${current_date}.log"
 CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
     --quantization_bit 4 \
     --model_name_or_path /home/model/Baichuan2-13B-Chat \
@@ -65,7 +65,7 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
 #     --max_source_length 2048 \
 #     --max_target_length 512 \
 #     --template llama2 \
-#     --output_dir dbgpt_hub/output/adapter/code_llama-7b-2048_epoch4_full \
+#     --output_dir dbgpt_hub/output/adapter/code-llama-7b-2048_epoch4_full \
 #     --overwrite_cache \
 #     --overwrite_output_dir \
 #     --per_device_train_batch_size 4 \

From 940a31c905d7d7afb8f80b077c2155e0ab131412 Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:30:37 +0800
Subject: [PATCH 4/7] Create satge3.json

---
 dbgpt_hub/configs/satge3.json | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 dbgpt_hub/configs/satge3.json

diff --git a/dbgpt_hub/configs/satge3.json b/dbgpt_hub/configs/satge3.json
new file mode 100644
index 0000000..ea17347
--- /dev/null
+++ b/dbgpt_hub/configs/satge3.json
@@ -0,0 +1,32 @@
+{
+  "fp16": {
+      "enabled": "auto",
+      "loss_scale": 0,
+      "loss_scale_window": 1000,
+      "initial_scale_power": 16,
+      "hysteresis": 2,
+      "min_loss_scale": 1
+  },
+  "zero_optimization": {
+      "stage": 3,
+      "offload_optimizer": {
+          "device": "cpu",
+          "pin_memory": true
+      },
+      "offload_param": {
+          "device": "cpu",
+          "pin_memory": true
+      },
+      "overlap_comm": true,
+      "contiguous_gradients": true,
+      "stage3_max_live_parameters" : 1e9,
+      "stage3_max_reuse_distance" : 1e9,
+      "stage3_prefetch_bucket_size" : 5e8,
+      "stage3_param_persistence_threshold" : 1e6,
+      "sub_group_size" : 1e12,
+      "stage3_gather_16bit_weights_on_model_save": true
+  },
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto"
+}

From fff1bf655f0ef2ed936ec283bfcc59af1a00681d Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:31:20 +0800
Subject: [PATCH 5/7] Rename ds_config.json to stage2.json

---
 dbgpt_hub/configs/{ds_config.json => stage2.json} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename dbgpt_hub/configs/{ds_config.json => stage2.json} (99%)

diff --git a/dbgpt_hub/configs/ds_config.json b/dbgpt_hub/configs/stage2.json
similarity index 99%
rename from dbgpt_hub/configs/ds_config.json
rename to dbgpt_hub/configs/stage2.json
index 04158ad..e96d4d9 100644
--- a/dbgpt_hub/configs/ds_config.json
+++ b/dbgpt_hub/configs/stage2.json
@@ -20,4 +20,4 @@
       "overlap_comm": false,
       "contiguous_gradients": true
     }
-  }
\ No newline at end of file
+  }

From 3c6edc0d8ce9c33b8052374c9e5e850692e174f6 Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:31:35 +0800
Subject: [PATCH 6/7] Rename satge3.json to stage3.json

---
 dbgpt_hub/configs/{satge3.json => stage3.json} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename dbgpt_hub/configs/{satge3.json => stage3.json} (100%)

diff --git a/dbgpt_hub/configs/satge3.json b/dbgpt_hub/configs/stage3.json
similarity index 100%
rename from dbgpt_hub/configs/satge3.json
rename to dbgpt_hub/configs/stage3.json

From 9d95c65445444ad8e94f01f8a8d639ddd78c3cd4 Mon Sep 17 00:00:00 2001
From: luchun <71970539+zhanghy-sketchzh@users.noreply.github.com>
Date: Thu, 2 Nov 2023 10:33:49 +0800
Subject: [PATCH 7/7] Update train_sft.sh

---
 dbgpt_hub/scripts/train_sft.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dbgpt_hub/scripts/train_sft.sh b/dbgpt_hub/scripts/train_sft.sh
index 24f36c7..ca1112e 100644
--- a/dbgpt_hub/scripts/train_sft.sh
+++ b/dbgpt_hub/scripts/train_sft.sh
@@ -30,7 +30,7 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
 
 # 多卡，deepseed启动，A100
 # deepspeed --num_gpus 2  dbgpt_hub/train/sft_train.py \
-#     --deepspeed dbgpt_hub/configs/ds_config.json \
+#     --deepspeed dbgpt_hub/configs/stage2.json \
 #     --quantization_bit 4 \
 #     --model_name_or_path /home/model_files/Llama-2-13b-chat-hf \
 #     --do_train \