diff --git a/dbgpt_hub/llm_base/__init__.py b/dbgpt_hub/llm_base/__init__.py
index e7dd7f8..8b13789 100644
--- a/dbgpt_hub/llm_base/__init__.py
+++ b/dbgpt_hub/llm_base/__init__.py
@@ -1,4 +1 @@
-# from .load_tokenizer import get_accelerate_model
-# from .save_peft_model_callback import SavePeftModelCallback
 
-# __all__ = ["get_accelerate_model", "SavePeftModelCallback"]
diff --git a/dbgpt_hub/llm_base/chat_model.py b/dbgpt_hub/llm_base/chat_model.py
index 1d14384..d2272ec 100644
--- a/dbgpt_hub/llm_base/chat_model.py
+++ b/dbgpt_hub/llm_base/chat_model.py
@@ -9,8 +9,6 @@
 from dbgpt_hub.llm_base.load_tokenizer import dispatch_model, load_model_and_tokenizer
 from dbgpt_hub.llm_base.model_trainer import  get_logits_processor
 from dbgpt_hub.data_process.data_utils import get_template_and_fix_tokenizer
-from dbgpt_hub.data_process.data_utils import extract_sql_prompt_dataset
-## TODO： 待参考 src/llmtuner/chat/stream_chat.py
 
 
 class ChatModel:
diff --git a/dbgpt_hub/llm_base/config_parser.py b/dbgpt_hub/llm_base/config_parser.py
index fc1b17e..044152a 100644
--- a/dbgpt_hub/llm_base/config_parser.py
+++ b/dbgpt_hub/llm_base/config_parser.py
@@ -1,17 +1,14 @@
 import os
 import sys
 import torch
-from dbgpt_hub.llm_base.loggings import get_logger
+import transformers
+import datasets
 from transformers.trainer import  WEIGHTS_NAME
 from transformers.modeling_utils import load_sharded_checkpoint
 from transformers.trainer import WEIGHTS_NAME, WEIGHTS_INDEX_NAME
-from typing import Dict
-import datasets
-import transformers
-from typing import Any, Dict, Optional, Tuple
 from transformers import HfArgumentParser, Seq2SeqTrainingArguments
 from transformers.trainer_utils import get_last_checkpoint
-
+from typing import Any, Dict, Optional, Tuple
 from dbgpt_hub.llm_base.loggings import get_logger
 from dbgpt_hub.configs.model_args import (
     ModelArguments,
diff --git a/dbgpt_hub/llm_base/load_tokenizer.py b/dbgpt_hub/llm_base/load_tokenizer.py
index 665565e..5f11699 100644
--- a/dbgpt_hub/llm_base/load_tokenizer.py
+++ b/dbgpt_hub/llm_base/load_tokenizer.py
@@ -1,29 +1,18 @@
-import argparse
 import os
-import warnings
-import importlib
 import torch
-from packaging import version
-from os.path import join
-from typing import Optional, Tuple,Dict
-import bitsandbytes as bnb
-from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
-from transformers import PreTrainedModel, PreTrainedTokenizer
-from peft.tuners.lora import LoraLayer
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    BitsAndBytesConfig,
-    LlamaTokenizer,
-)
-
-
-import os
 import math
-import torch
+from typing import Optional, Tuple,Dict,TYPE_CHECKING, Literal,List
 from types import MethodType
-from typing import TYPE_CHECKING, Literal, Optional, Tuple,List
+from trl import AutoModelForCausalLMWithValueHead
+from dbgpt_hub.llm_base.loggings import reset_logging, get_logger
+from dbgpt_hub.configs.model_args import FinetuningArguments
+from dbgpt_hub.llm_base.adapter import init_adapter
+from dbgpt_hub.configs.config import LAYERNORM_NAMES,VALUE_HEAD_FILE_NAME
 
+from transformers import PreTrainedModel, PreTrainedTokenizer
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+from transformers.deepspeed import is_deepspeed_zero3_enabled
 from transformers import (
     AutoConfig,
     AutoModelForCausalLM,
@@ -33,16 +22,6 @@
     PreTrainedModel,
     PreTrainedTokenizerBase,
 )
-from transformers.utils import check_min_version
-from transformers.utils.versions import require_version
-from transformers.deepspeed import is_deepspeed_zero3_enabled
-from trl import AutoModelForCausalLMWithValueHead
-
-from dbgpt_hub.llm_base.loggings import reset_logging, get_logger
-# from llmtuner.extras.misc import count_parameters, prepare_model_for_training
-from dbgpt_hub.configs.model_args import FinetuningArguments
-from dbgpt_hub.llm_base.adapter import init_adapter
-from dbgpt_hub.configs.config import LAYERNORM_NAMES,VALUE_HEAD_FILE_NAME
 
 if TYPE_CHECKING:
     from transformers import PreTrainedTokenizer
@@ -59,280 +38,6 @@
 require_version("trl>=0.5.0", "To fix: pip install trl>=0.5.0")
 
 
-# from dbgpt_hub.utils.model_utils import (
-#     smart_tokenizer_and_embedding_resize,
-#     find_all_linear_names,
-# )
-
-
-
-def is_ipex_available():
-    def get_major_and_minor_from_version(full_version):
-        return (
-            str(version.parse(full_version).major)
-            + "."
-            + str(version.parse(full_version).minor)
-        )
-
-    _torch_version = importlib.metadata.version("torch")
-    if importlib.util.find_spec("intel_extension_for_pytorch") is None:
-        return False
-    _ipex_version = "N/A"
-    try:
-        _ipex_version = importlib.metadata.version("intel_extension_for_pytorch")
-    except importlib.metadata.PackageNotFoundError:
-        return False
-    torch_major_and_minor = get_major_and_minor_from_version(_torch_version)
-    ipex_major_and_minor = get_major_and_minor_from_version(_ipex_version)
-    if torch_major_and_minor != ipex_major_and_minor:
-        warnings.warn(
-            f"Intel Extension for PyTorch {ipex_major_and_minor} needs to work with PyTorch {ipex_major_and_minor}.*,"
-            f" but PyTorch {_torch_version} is found. Please switch to the matching version and run again."
-        )
-        return False
-    return True
-
-
-def smart_tokenizer_and_embedding_resize(
-    special_tokens_dict: Dict[str, str],
-    tokenizer: PreTrainedTokenizer,
-    model: PreTrainedModel,
-) -> None:
-    """Resize tokenizer and embedding to accommodate new special tokens.
-    改变tokenizer和embedding的尺寸。
-    一般需要将tokenizer和embedding的尺寸设置为64的倍数，方便GPU加速。
-
-    Args:
-        special_tokens_dict (Dict[str, str]): A dictionary of special tokens to be added to the tokenizer.
-        tokenizer (PreTrainedTokenizer): The tokenizer object to be resized.
-        model (PreTrainedModel): The model object whose token embeddings are to be resized.
-
-    Returns:
-        None
-
-    Note: This function resizes the tokenizer to accommodate additional special tokens and the
-    embedding matrix of the model to match the new size of the tokenizer. If any new special tokens
-    have been added, the function computes the average embedding values of the existing embeddings
-    and sets those values for the new special token embeddings. This is done separately for the input
-    embeddings and output embeddings of the model.
-    """
-
-    num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
-    model.resize_token_embeddings(len(tokenizer))
-
-    if num_new_tokens > 0:
-        input_embeddings_data = model.get_input_embeddings().weight.data
-        output_embeddings_data = model.get_output_embeddings().weight.data
-
-        # Compute average embeddings of existing tokens
-        input_embeddings_avg = input_embeddings_data[:-num_new_tokens].mean(
-            dim=0, keepdim=True
-        )
-        output_embeddings_avg = output_embeddings_data[:-num_new_tokens].mean(
-            dim=0, keepdim=True
-        )
-
-        input_embeddings_data[-num_new_tokens:] = input_embeddings_avg
-        output_embeddings_data[-num_new_tokens:] = output_embeddings_avg
-
-
-def find_all_linear_names(
-    args: argparse.Namespace, model: torch.nn.Module
-) -> List[str]:
-    """
-    Returns a list of names of all linear layers present in the given model.
-    Args:
-        args (argparse.Namespace): A namespace containing arguments of the script.
-        model (torch.nn.Module): The PyTorch model to extract linear layer names from.
-
-    Returns:
-        List[str]: A list of names of all linear layers present in the given model.
-
-    Raises:
-        TypeError: If `args` is not an instance of `argparse.Namespace`, or if `model` is not an instance \
-            of `torch.nn.Module`.
-        ValueError: If `args.bits` is not 4 or 8.
-
-    Example Usage:
-        >>> import argparse
-        >>> parser = argparse.ArgumentParser()
-        >>> parser.add_argument('--bits', type=int)
-        >>> args = parser.parse_args(['--bits', '4'])
-        >>> model = torch.nn.Sequential(torch.nn.Linear(10, 5), torch.nn.Linear(5, 1))
-        >>> find_all_linear_names(args, model)
-        ['0', '1']
-    """
-    # Determine the correct linear layer class based on the value of `args.bits`
-    if args.bits == 4:
-        cls = bnb.nn.Linear4bit
-    elif args.bits == 8:
-        cls = bnb.nn.Linear8bitLt
-    else:
-        torch.nn.Linear
-
-    lora_module_names = set()
-    for name, module in model.named_modules():
-        # Check if the current module is an instance of the linear layer class
-        if isinstance(module, cls):
-            # If yes, split the name of the module into its component parts and add the first or last part to the set
-            names = name.split(".")
-            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
-
-    # Remove 'lm_head' from the set if present (needed for 16-bit)
-    if "lm_head" in lora_module_names:
-        lora_module_names.remove("lm_head")
-
-    # Convert the set into a list and return it
-    return list(lora_module_names)
-
-
-
-
-## TODO 待将此处的所有调用都替换掉，过去在train_qlora和predict_qlora中用了，待替换，然后删除此处历史代码。
-def get_accelerate_model(
-    args: argparse.Namespace = None, checkpoint_dir: Optional[str] = None
-):
-    if torch.cuda.is_available():
-        n_gpus = torch.cuda.device_count()
-    if is_ipex_available() and torch.xpu.is_available():
-        n_gpus = torch.xpu.device_count()
-
-    max_memory = f"{args.max_memory_MB}MB"
-    max_memory = {i: max_memory for i in range(n_gpus)}
-    device_map = "auto"
-
-    # if we are in a distributed setting, we need to set the device map and max memory per device
-    if os.environ.get("LOCAL_RANK") is not None:
-        local_rank = int(os.environ.get("LOCAL_RANK", "0"))
-        device_map = {"": local_rank}
-        max_memory = {"": max_memory[local_rank]}
-
-    if args.full_finetune:
-        assert args.bits in [16, 32]
-
-    print(f"loading base model {args.model_name_or_path}...")
-    compute_dtype = (
-        torch.float16 if args.fp16 else (torch.bfloat16 if args.bf16 else torch.float32)
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        args.model_name_or_path,
-        cache_dir=args.cache_dir,
-        load_in_4bit=args.bits == 4,
-        load_in_8bit=args.bits == 8,
-        device_map=device_map,
-        max_memory=max_memory,
-        quantization_config=BitsAndBytesConfig(
-            load_in_4bit=args.bits == 4,
-            load_in_8bit=args.bits == 8,
-            llm_int8_threshold=6.0,
-            llm_int8_has_fp16_weight=False,
-            bnb_4bit_compute_dtype=compute_dtype,
-            bnb_4bit_use_double_quant=args.double_quant,
-            bnb_4bit_quant_type=args.quant_type,
-        ),
-        torch_dtype=(
-            torch.float32
-            if args.fp16
-            else (torch.bfloat16 if args.bf16 else torch.float32)
-        ),
-        trust_remote_code=args.trust_remote_code,
-        use_auth_token=args.use_auth_token,
-    )
-    if compute_dtype == torch.float16 and args.bits == 4:
-        if torch.cuda.is_bf16_supported():
-            print("=" * 80)
-            print(
-                "Your GPU supports bfloat16, you can accelerate training with the argument --bf16"
-            )
-            print("=" * 80)
-
-    if compute_dtype == torch.float16 and (
-        is_ipex_available() and torch.xpu.is_available()
-    ):
-        compute_dtype = torch.bfloat16
-        print("Intel XPU does not support float16 yet, so switching to bfloat16")
-
-    setattr(model, "model_parallel", True)
-    setattr(model, "is_parallelizable", True)
-
-    model.config.torch_dtype = (
-        torch.float32 if args.fp16 else (torch.bfloat16 if args.bf16 else torch.float32)
-    )
-
-    # Tokenizer
-    tokenizer = AutoTokenizer.from_pretrained(
-        args.model_name_or_path,
-        cache_dir=args.cache_dir,
-        padding_side="right",
-        use_fast=False,  # Fast tokenizer giving issues.
-        tokenizer_type="llama"
-        if (
-            "llama" in args.model_name_or_path or "CodeLlama" in args.model_name_or_path
-        )
-        else None,  # Needed for HF name change
-        trust_remote_code=args.trust_remote_code,
-        use_auth_token=args.use_auth_token,
-    )
-    if tokenizer._pad_token is None:
-        smart_tokenizer_and_embedding_resize(
-            special_tokens_dict=dict(pad_token="[PAD]"),
-            tokenizer=tokenizer,
-            model=model,
-        )
-    if "llama" in args.model_name_or_path or isinstance(tokenizer, LlamaTokenizer):
-        # LLaMA tokenizer may not have correct special tokens set.
-        # Check and add them if missing to prevent them from being parsed into different tokens.
-        # Note that these are present in the vocabulary.
-        # Note also that `model.config.pad_token_id` is 0 which corresponds to `<unk>` token.
-        print("Adding special tokens.")
-        tokenizer.add_special_tokens(
-            {
-                "eos_token": tokenizer.convert_ids_to_tokens(model.config.eos_token_id),
-                "bos_token": tokenizer.convert_ids_to_tokens(model.config.bos_token_id),
-                "unk_token": tokenizer.convert_ids_to_tokens(
-                    model.config.pad_token_id
-                    if model.config.pad_token_id != -1
-                    else tokenizer.pad_token_id
-                ),
-            }
-        )
-
-    if not args.full_finetune:
-        model = prepare_model_for_kbit_training(
-            model, use_gradient_checkpointing=args.gradient_checkpointing
-        )
-
-    if not args.full_finetune:
-        if checkpoint_dir is not None:
-            print("Loading adapters from checkpoint.")
-            model = PeftModel.from_pretrained(
-                model, join(checkpoint_dir, "adapter_model"), is_trainable=True
-            )
-        else:
-            print(f"adding LoRA modules...")
-            modules = find_all_linear_names(args, model)
-            config = LoraConfig(
-                r=args.lora_r,
-                lora_alpha=args.lora_alpha,
-                target_modules=modules,
-                lora_dropout=args.lora_dropout,
-                bias="none",
-                task_type="CAUSAL_LM",
-            )
-            model = get_peft_model(model, config)
-
-    for name, module in model.named_modules():
-        if isinstance(module, LoraLayer):
-            if args.bf16:
-                module = module.to(torch.bfloat16)
-        if "norm" in name:
-            module = module.to(torch.float32)
-        if "lm_head" in name or "embed_tokens" in name:
-            if hasattr(module, "weight"):
-                if args.bf16 and module.weight.dtype == torch.float32:
-                    module = module.to(torch.bfloat16)
-    return model, tokenizer
-
 
 def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
     r"""
@@ -401,6 +106,27 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 
 
+def load_valuehead_params(model: torch.nn.Module, checkpoint_dir: os.PathLike) -> bool:
+    valuehead_file = os.path.join(checkpoint_dir, VALUE_HEAD_FILE_NAME)
+    if not os.path.exists(valuehead_file):
+        logger.warning(
+            "Provided path ({}) does not contain valuehead weights.".format(
+                checkpoint_dir
+            )
+        )
+        return False
+    valuehead_state_dict = torch.load(valuehead_file, map_location="cpu")
+    model.register_buffer("reward_head_weight", valuehead_state_dict["summary.weight"])
+    model.register_buffer("reward_head_bias", valuehead_state_dict["summary.bias"])
+    model.register_buffer(
+        "default_head_weight", torch.zeros_like(valuehead_state_dict["summary.weight"])
+    )
+    model.register_buffer(
+        "default_head_bias", torch.zeros_like(valuehead_state_dict["summary.bias"])
+    )
+    return True
+
+
 def load_model_and_tokenizer(
     model_args: "ModelArguments",
     finetuning_args: "FinetuningArguments",
@@ -616,25 +342,6 @@ def load_model_and_tokenizer(
 
     return model, tokenizer
 
-def load_valuehead_params(model: torch.nn.Module, checkpoint_dir: os.PathLike) -> bool:
-    valuehead_file = os.path.join(checkpoint_dir, VALUE_HEAD_FILE_NAME)
-    if not os.path.exists(valuehead_file):
-        logger.warning(
-            "Provided path ({}) does not contain valuehead weights.".format(
-                checkpoint_dir
-            )
-        )
-        return False
-    valuehead_state_dict = torch.load(valuehead_file, map_location="cpu")
-    model.register_buffer("reward_head_weight", valuehead_state_dict["summary.weight"])
-    model.register_buffer("reward_head_bias", valuehead_state_dict["summary.bias"])
-    model.register_buffer(
-        "default_head_weight", torch.zeros_like(valuehead_state_dict["summary.weight"])
-    )
-    model.register_buffer(
-        "default_head_bias", torch.zeros_like(valuehead_state_dict["summary.bias"])
-    )
-    return True
 
 def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel":
     r"""
diff --git a/dbgpt_hub/llm_base/loggings.py b/dbgpt_hub/llm_base/loggings.py
index a7e7c06..f253f2a 100644
--- a/dbgpt_hub/llm_base/loggings.py
+++ b/dbgpt_hub/llm_base/loggings.py
@@ -5,10 +5,8 @@
 import time
 from typing import TYPE_CHECKING
 from datetime import timedelta
-
 from transformers import TrainerCallback
 from transformers.trainer_utils import has_length
-
 from dbgpt_hub.configs.config import LOG_FILE_NAME
 
 if TYPE_CHECKING:
diff --git a/dbgpt_hub/llm_base/model_trainer.py b/dbgpt_hub/llm_base/model_trainer.py
index a7a8493..bef69c1 100644
--- a/dbgpt_hub/llm_base/model_trainer.py
+++ b/dbgpt_hub/llm_base/model_trainer.py
@@ -13,7 +13,6 @@
 from dbgpt_hub.llm_base.loggings import get_logger
 from dbgpt_hub.llm_base.config_parser import get_train_args, get_state_dict,load_trainable_params
 from dbgpt_hub.llm_base.load_tokenizer import load_model_and_tokenizer
-
 from dbgpt_hub.configs.config import VALUE_HEAD_FILE_NAME,FINETUNING_ARGS_NAME
 from transformers import Seq2SeqTrainer
 from transformers.trainer import TRAINING_ARGS_NAME, WEIGHTS_NAME
@@ -22,7 +21,6 @@
 from transformers.generation.logits_process import LogitsProcessor
 from transformers.generation.utils import LogitsProcessorList
 
-
 from peft import PeftModel
 from trl import PreTrainedModelWrapper
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union,Sequence
diff --git a/dbgpt_hub/scripts/train_sft.sh b/dbgpt_hub/scripts/train_sft.sh
index df07c22..cbe2187 100644
--- a/dbgpt_hub/scripts/train_sft.sh
+++ b/dbgpt_hub/scripts/train_sft.sh
@@ -18,10 +18,10 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \
     --per_device_train_batch_size 1 \
     --gradient_accumulation_steps 4 \
     --lr_scheduler_type cosine_with_restarts \
-    --logging_steps 250 \
-    --save_steps 500 \
+    --logging_steps 10 \
+    --save_steps 10 \
     --learning_rate 5e-5 \
-    --num_train_epochs 2 \
+    --num_train_epochs 0.2 \
     --plot_loss 
     # --bf16#v100不支持bf16
     # test  num_train_epochs set to 0.1