diff --git a/dbgpt_hub/llm_base/__init__.py b/dbgpt_hub/llm_base/__init__.py index e7dd7f8..8b13789 100644 --- a/dbgpt_hub/llm_base/__init__.py +++ b/dbgpt_hub/llm_base/__init__.py @@ -1,4 +1 @@ -# from .load_tokenizer import get_accelerate_model -# from .save_peft_model_callback import SavePeftModelCallback -# __all__ = ["get_accelerate_model", "SavePeftModelCallback"] diff --git a/dbgpt_hub/llm_base/chat_model.py b/dbgpt_hub/llm_base/chat_model.py index 1d14384..d2272ec 100644 --- a/dbgpt_hub/llm_base/chat_model.py +++ b/dbgpt_hub/llm_base/chat_model.py @@ -9,8 +9,6 @@ from dbgpt_hub.llm_base.load_tokenizer import dispatch_model, load_model_and_tokenizer from dbgpt_hub.llm_base.model_trainer import get_logits_processor from dbgpt_hub.data_process.data_utils import get_template_and_fix_tokenizer -from dbgpt_hub.data_process.data_utils import extract_sql_prompt_dataset -## TODO: 待参考 src/llmtuner/chat/stream_chat.py class ChatModel: diff --git a/dbgpt_hub/llm_base/config_parser.py b/dbgpt_hub/llm_base/config_parser.py index fc1b17e..044152a 100644 --- a/dbgpt_hub/llm_base/config_parser.py +++ b/dbgpt_hub/llm_base/config_parser.py @@ -1,17 +1,14 @@ import os import sys import torch -from dbgpt_hub.llm_base.loggings import get_logger +import transformers +import datasets from transformers.trainer import WEIGHTS_NAME from transformers.modeling_utils import load_sharded_checkpoint from transformers.trainer import WEIGHTS_NAME, WEIGHTS_INDEX_NAME -from typing import Dict -import datasets -import transformers -from typing import Any, Dict, Optional, Tuple from transformers import HfArgumentParser, Seq2SeqTrainingArguments from transformers.trainer_utils import get_last_checkpoint - +from typing import Any, Dict, Optional, Tuple from dbgpt_hub.llm_base.loggings import get_logger from dbgpt_hub.configs.model_args import ( ModelArguments, diff --git a/dbgpt_hub/llm_base/load_tokenizer.py b/dbgpt_hub/llm_base/load_tokenizer.py index 665565e..5f11699 100644 --- a/dbgpt_hub/llm_base/load_tokenizer.py +++ b/dbgpt_hub/llm_base/load_tokenizer.py @@ -1,29 +1,18 @@ -import argparse import os -import warnings -import importlib import torch -from packaging import version -from os.path import join -from typing import Optional, Tuple,Dict -import bitsandbytes as bnb -from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training -from transformers import PreTrainedModel, PreTrainedTokenizer -from peft.tuners.lora import LoraLayer -from transformers import ( - AutoTokenizer, - AutoModelForCausalLM, - BitsAndBytesConfig, - LlamaTokenizer, -) - - -import os import math -import torch +from typing import Optional, Tuple,Dict,TYPE_CHECKING, Literal,List from types import MethodType -from typing import TYPE_CHECKING, Literal, Optional, Tuple,List +from trl import AutoModelForCausalLMWithValueHead +from dbgpt_hub.llm_base.loggings import reset_logging, get_logger +from dbgpt_hub.configs.model_args import FinetuningArguments +from dbgpt_hub.llm_base.adapter import init_adapter +from dbgpt_hub.configs.config import LAYERNORM_NAMES,VALUE_HEAD_FILE_NAME +from transformers import PreTrainedModel, PreTrainedTokenizer +from transformers.utils import check_min_version +from transformers.utils.versions import require_version +from transformers.deepspeed import is_deepspeed_zero3_enabled from transformers import ( AutoConfig, AutoModelForCausalLM, @@ -33,16 +22,6 @@ PreTrainedModel, PreTrainedTokenizerBase, ) -from transformers.utils import check_min_version -from transformers.utils.versions import require_version -from transformers.deepspeed import is_deepspeed_zero3_enabled -from trl import AutoModelForCausalLMWithValueHead - -from dbgpt_hub.llm_base.loggings import reset_logging, get_logger -# from llmtuner.extras.misc import count_parameters, prepare_model_for_training -from dbgpt_hub.configs.model_args import FinetuningArguments -from dbgpt_hub.llm_base.adapter import init_adapter -from dbgpt_hub.configs.config import LAYERNORM_NAMES,VALUE_HEAD_FILE_NAME if TYPE_CHECKING: from transformers import PreTrainedTokenizer @@ -59,280 +38,6 @@ require_version("trl>=0.5.0", "To fix: pip install trl>=0.5.0") -# from dbgpt_hub.utils.model_utils import ( -# smart_tokenizer_and_embedding_resize, -# find_all_linear_names, -# ) - - - -def is_ipex_available(): - def get_major_and_minor_from_version(full_version): - return ( - str(version.parse(full_version).major) - + "." - + str(version.parse(full_version).minor) - ) - - _torch_version = importlib.metadata.version("torch") - if importlib.util.find_spec("intel_extension_for_pytorch") is None: - return False - _ipex_version = "N/A" - try: - _ipex_version = importlib.metadata.version("intel_extension_for_pytorch") - except importlib.metadata.PackageNotFoundError: - return False - torch_major_and_minor = get_major_and_minor_from_version(_torch_version) - ipex_major_and_minor = get_major_and_minor_from_version(_ipex_version) - if torch_major_and_minor != ipex_major_and_minor: - warnings.warn( - f"Intel Extension for PyTorch {ipex_major_and_minor} needs to work with PyTorch {ipex_major_and_minor}.*," - f" but PyTorch {_torch_version} is found. Please switch to the matching version and run again." - ) - return False - return True - - -def smart_tokenizer_and_embedding_resize( - special_tokens_dict: Dict[str, str], - tokenizer: PreTrainedTokenizer, - model: PreTrainedModel, -) -> None: - """Resize tokenizer and embedding to accommodate new special tokens. - 改变tokenizer和embedding的尺寸。 - 一般需要将tokenizer和embedding的尺寸设置为64的倍数,方便GPU加速。 - - Args: - special_tokens_dict (Dict[str, str]): A dictionary of special tokens to be added to the tokenizer. - tokenizer (PreTrainedTokenizer): The tokenizer object to be resized. - model (PreTrainedModel): The model object whose token embeddings are to be resized. - - Returns: - None - - Note: This function resizes the tokenizer to accommodate additional special tokens and the - embedding matrix of the model to match the new size of the tokenizer. If any new special tokens - have been added, the function computes the average embedding values of the existing embeddings - and sets those values for the new special token embeddings. This is done separately for the input - embeddings and output embeddings of the model. - """ - - num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict) - model.resize_token_embeddings(len(tokenizer)) - - if num_new_tokens > 0: - input_embeddings_data = model.get_input_embeddings().weight.data - output_embeddings_data = model.get_output_embeddings().weight.data - - # Compute average embeddings of existing tokens - input_embeddings_avg = input_embeddings_data[:-num_new_tokens].mean( - dim=0, keepdim=True - ) - output_embeddings_avg = output_embeddings_data[:-num_new_tokens].mean( - dim=0, keepdim=True - ) - - input_embeddings_data[-num_new_tokens:] = input_embeddings_avg - output_embeddings_data[-num_new_tokens:] = output_embeddings_avg - - -def find_all_linear_names( - args: argparse.Namespace, model: torch.nn.Module -) -> List[str]: - """ - Returns a list of names of all linear layers present in the given model. - Args: - args (argparse.Namespace): A namespace containing arguments of the script. - model (torch.nn.Module): The PyTorch model to extract linear layer names from. - - Returns: - List[str]: A list of names of all linear layers present in the given model. - - Raises: - TypeError: If `args` is not an instance of `argparse.Namespace`, or if `model` is not an instance \ - of `torch.nn.Module`. - ValueError: If `args.bits` is not 4 or 8. - - Example Usage: - >>> import argparse - >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('--bits', type=int) - >>> args = parser.parse_args(['--bits', '4']) - >>> model = torch.nn.Sequential(torch.nn.Linear(10, 5), torch.nn.Linear(5, 1)) - >>> find_all_linear_names(args, model) - ['0', '1'] - """ - # Determine the correct linear layer class based on the value of `args.bits` - if args.bits == 4: - cls = bnb.nn.Linear4bit - elif args.bits == 8: - cls = bnb.nn.Linear8bitLt - else: - torch.nn.Linear - - lora_module_names = set() - for name, module in model.named_modules(): - # Check if the current module is an instance of the linear layer class - if isinstance(module, cls): - # If yes, split the name of the module into its component parts and add the first or last part to the set - names = name.split(".") - lora_module_names.add(names[0] if len(names) == 1 else names[-1]) - - # Remove 'lm_head' from the set if present (needed for 16-bit) - if "lm_head" in lora_module_names: - lora_module_names.remove("lm_head") - - # Convert the set into a list and return it - return list(lora_module_names) - - - - -## TODO 待将此处的所有调用都替换掉,过去在train_qlora和predict_qlora中用了,待替换,然后删除此处历史代码。 -def get_accelerate_model( - args: argparse.Namespace = None, checkpoint_dir: Optional[str] = None -): - if torch.cuda.is_available(): - n_gpus = torch.cuda.device_count() - if is_ipex_available() and torch.xpu.is_available(): - n_gpus = torch.xpu.device_count() - - max_memory = f"{args.max_memory_MB}MB" - max_memory = {i: max_memory for i in range(n_gpus)} - device_map = "auto" - - # if we are in a distributed setting, we need to set the device map and max memory per device - if os.environ.get("LOCAL_RANK") is not None: - local_rank = int(os.environ.get("LOCAL_RANK", "0")) - device_map = {"": local_rank} - max_memory = {"": max_memory[local_rank]} - - if args.full_finetune: - assert args.bits in [16, 32] - - print(f"loading base model {args.model_name_or_path}...") - compute_dtype = ( - torch.float16 if args.fp16 else (torch.bfloat16 if args.bf16 else torch.float32) - ) - model = AutoModelForCausalLM.from_pretrained( - args.model_name_or_path, - cache_dir=args.cache_dir, - load_in_4bit=args.bits == 4, - load_in_8bit=args.bits == 8, - device_map=device_map, - max_memory=max_memory, - quantization_config=BitsAndBytesConfig( - load_in_4bit=args.bits == 4, - load_in_8bit=args.bits == 8, - llm_int8_threshold=6.0, - llm_int8_has_fp16_weight=False, - bnb_4bit_compute_dtype=compute_dtype, - bnb_4bit_use_double_quant=args.double_quant, - bnb_4bit_quant_type=args.quant_type, - ), - torch_dtype=( - torch.float32 - if args.fp16 - else (torch.bfloat16 if args.bf16 else torch.float32) - ), - trust_remote_code=args.trust_remote_code, - use_auth_token=args.use_auth_token, - ) - if compute_dtype == torch.float16 and args.bits == 4: - if torch.cuda.is_bf16_supported(): - print("=" * 80) - print( - "Your GPU supports bfloat16, you can accelerate training with the argument --bf16" - ) - print("=" * 80) - - if compute_dtype == torch.float16 and ( - is_ipex_available() and torch.xpu.is_available() - ): - compute_dtype = torch.bfloat16 - print("Intel XPU does not support float16 yet, so switching to bfloat16") - - setattr(model, "model_parallel", True) - setattr(model, "is_parallelizable", True) - - model.config.torch_dtype = ( - torch.float32 if args.fp16 else (torch.bfloat16 if args.bf16 else torch.float32) - ) - - # Tokenizer - tokenizer = AutoTokenizer.from_pretrained( - args.model_name_or_path, - cache_dir=args.cache_dir, - padding_side="right", - use_fast=False, # Fast tokenizer giving issues. - tokenizer_type="llama" - if ( - "llama" in args.model_name_or_path or "CodeLlama" in args.model_name_or_path - ) - else None, # Needed for HF name change - trust_remote_code=args.trust_remote_code, - use_auth_token=args.use_auth_token, - ) - if tokenizer._pad_token is None: - smart_tokenizer_and_embedding_resize( - special_tokens_dict=dict(pad_token="[PAD]"), - tokenizer=tokenizer, - model=model, - ) - if "llama" in args.model_name_or_path or isinstance(tokenizer, LlamaTokenizer): - # LLaMA tokenizer may not have correct special tokens set. - # Check and add them if missing to prevent them from being parsed into different tokens. - # Note that these are present in the vocabulary. - # Note also that `model.config.pad_token_id` is 0 which corresponds to `` token. - print("Adding special tokens.") - tokenizer.add_special_tokens( - { - "eos_token": tokenizer.convert_ids_to_tokens(model.config.eos_token_id), - "bos_token": tokenizer.convert_ids_to_tokens(model.config.bos_token_id), - "unk_token": tokenizer.convert_ids_to_tokens( - model.config.pad_token_id - if model.config.pad_token_id != -1 - else tokenizer.pad_token_id - ), - } - ) - - if not args.full_finetune: - model = prepare_model_for_kbit_training( - model, use_gradient_checkpointing=args.gradient_checkpointing - ) - - if not args.full_finetune: - if checkpoint_dir is not None: - print("Loading adapters from checkpoint.") - model = PeftModel.from_pretrained( - model, join(checkpoint_dir, "adapter_model"), is_trainable=True - ) - else: - print(f"adding LoRA modules...") - modules = find_all_linear_names(args, model) - config = LoraConfig( - r=args.lora_r, - lora_alpha=args.lora_alpha, - target_modules=modules, - lora_dropout=args.lora_dropout, - bias="none", - task_type="CAUSAL_LM", - ) - model = get_peft_model(model, config) - - for name, module in model.named_modules(): - if isinstance(module, LoraLayer): - if args.bf16: - module = module.to(torch.bfloat16) - if "norm" in name: - module = module.to(torch.float32) - if "lm_head" in name or "embed_tokens" in name: - if hasattr(module, "weight"): - if args.bf16 and module.weight.dtype == torch.float32: - module = module.to(torch.bfloat16) - return model, tokenizer - def count_parameters(model: torch.nn.Module) -> Tuple[int, int]: r""" @@ -401,6 +106,27 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: +def load_valuehead_params(model: torch.nn.Module, checkpoint_dir: os.PathLike) -> bool: + valuehead_file = os.path.join(checkpoint_dir, VALUE_HEAD_FILE_NAME) + if not os.path.exists(valuehead_file): + logger.warning( + "Provided path ({}) does not contain valuehead weights.".format( + checkpoint_dir + ) + ) + return False + valuehead_state_dict = torch.load(valuehead_file, map_location="cpu") + model.register_buffer("reward_head_weight", valuehead_state_dict["summary.weight"]) + model.register_buffer("reward_head_bias", valuehead_state_dict["summary.bias"]) + model.register_buffer( + "default_head_weight", torch.zeros_like(valuehead_state_dict["summary.weight"]) + ) + model.register_buffer( + "default_head_bias", torch.zeros_like(valuehead_state_dict["summary.bias"]) + ) + return True + + def load_model_and_tokenizer( model_args: "ModelArguments", finetuning_args: "FinetuningArguments", @@ -616,25 +342,6 @@ def load_model_and_tokenizer( return model, tokenizer -def load_valuehead_params(model: torch.nn.Module, checkpoint_dir: os.PathLike) -> bool: - valuehead_file = os.path.join(checkpoint_dir, VALUE_HEAD_FILE_NAME) - if not os.path.exists(valuehead_file): - logger.warning( - "Provided path ({}) does not contain valuehead weights.".format( - checkpoint_dir - ) - ) - return False - valuehead_state_dict = torch.load(valuehead_file, map_location="cpu") - model.register_buffer("reward_head_weight", valuehead_state_dict["summary.weight"]) - model.register_buffer("reward_head_bias", valuehead_state_dict["summary.bias"]) - model.register_buffer( - "default_head_weight", torch.zeros_like(valuehead_state_dict["summary.weight"]) - ) - model.register_buffer( - "default_head_bias", torch.zeros_like(valuehead_state_dict["summary.bias"]) - ) - return True def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel": r""" diff --git a/dbgpt_hub/llm_base/loggings.py b/dbgpt_hub/llm_base/loggings.py index a7e7c06..f253f2a 100644 --- a/dbgpt_hub/llm_base/loggings.py +++ b/dbgpt_hub/llm_base/loggings.py @@ -5,10 +5,8 @@ import time from typing import TYPE_CHECKING from datetime import timedelta - from transformers import TrainerCallback from transformers.trainer_utils import has_length - from dbgpt_hub.configs.config import LOG_FILE_NAME if TYPE_CHECKING: diff --git a/dbgpt_hub/llm_base/model_trainer.py b/dbgpt_hub/llm_base/model_trainer.py index a7a8493..bef69c1 100644 --- a/dbgpt_hub/llm_base/model_trainer.py +++ b/dbgpt_hub/llm_base/model_trainer.py @@ -13,7 +13,6 @@ from dbgpt_hub.llm_base.loggings import get_logger from dbgpt_hub.llm_base.config_parser import get_train_args, get_state_dict,load_trainable_params from dbgpt_hub.llm_base.load_tokenizer import load_model_and_tokenizer - from dbgpt_hub.configs.config import VALUE_HEAD_FILE_NAME,FINETUNING_ARGS_NAME from transformers import Seq2SeqTrainer from transformers.trainer import TRAINING_ARGS_NAME, WEIGHTS_NAME @@ -22,7 +21,6 @@ from transformers.generation.logits_process import LogitsProcessor from transformers.generation.utils import LogitsProcessorList - from peft import PeftModel from trl import PreTrainedModelWrapper from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union,Sequence diff --git a/dbgpt_hub/scripts/train_sft.sh b/dbgpt_hub/scripts/train_sft.sh index df07c22..cbe2187 100644 --- a/dbgpt_hub/scripts/train_sft.sh +++ b/dbgpt_hub/scripts/train_sft.sh @@ -18,10 +18,10 @@ CUDA_VISIBLE_DEVICES=0 python dbgpt_hub/train/sft_train.py \ --per_device_train_batch_size 1 \ --gradient_accumulation_steps 4 \ --lr_scheduler_type cosine_with_restarts \ - --logging_steps 250 \ - --save_steps 500 \ + --logging_steps 10 \ + --save_steps 10 \ --learning_rate 5e-5 \ - --num_train_epochs 2 \ + --num_train_epochs 0.2 \ --plot_loss # --bf16#v100不支持bf16 # test num_train_epochs set to 0.1