diff --git a/README.md b/README.md index 3d9aa4fb..fcd72979 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,7 @@ pip install git+https://github.com/Tongjilibo/bert4torch ### 4.1 版本历史 |更新日期| bert4torch | torch4keras | 版本说明 | |------| ---------------- | ----------------- |----------- | +|20240619| 0.5.1 | 0.2.4 | 增加Qwen1.5, Qwen2, glm4; 增加SWA/convert_lm_logits_dtype;调整各个trainer(重点DPOTrainer), generation中segment_ids, repetition_penalty需带query, RMSNorm中转类型bug| |20240418| 0.5.0 | 0.2.2 | 修复chatglm3的bug, 修复save_pretrained时多文件的bug,增加CausalLMLoss, 修改deepspeed的传参逻辑,修改Text2Vec的bug, 完善openai client, 增加get_weight_decay_optim_groups| |20240317| 0.4.9.post2 | 0.2.1.post2 |增加get_weight_decay_optim_groups函数, attention中允许is_causal,修改repetition_penalty的bug,把baichuan从llama中剥离,修复config_path的bug,允许num_key_value_heads参数,[torch4keras-v0.2.1.post2](https://github.com/Tongjilibo/torch4keras/releases/tag/v0.2.1.post2)更新特性| |20240221| 0.4.8 | 0.2.0|fastapi发布服务允许闲时offload到cpu, `build_transformer_model`允许从hf下载, 添加`FillMask`的pipeline, 添加`SequenceClassificationTrainer`| diff --git a/bert4torch/snippets/import_utils.py b/bert4torch/snippets/import_utils.py index 21ef7bb3..9b7f8681 100644 --- a/bert4torch/snippets/import_utils.py +++ b/bert4torch/snippets/import_utils.py @@ -10,18 +10,6 @@ import importlib.metadata as importlib_metadata -def is_accelerate_available(check_partial_state=False): - '''是否可以使用accelerate''' - accelerate_available = importlib.util.find_spec("accelerate") is not None - if accelerate_available: - if check_partial_state: - return version.parse(importlib_metadata.version("accelerate")) >= version.parse("0.17.0") - else: - return True - else: - return False - - def is_flash_attn_available(): '''是否可以使用包flash_attn''' _flash_attn_available = is_package_available("flash_attn") and \ diff --git a/bert4torch/trainer/__init__.py b/bert4torch/trainer/__init__.py index be02f68a..95217bcf 100644 --- a/bert4torch/trainer/__init__.py +++ b/bert4torch/trainer/__init__.py @@ -4,6 +4,6 @@ from torch4keras.trainer import * # torch4keras>=0.1.2.post2 from .ppo_trainer import PPOTrainer -from .dpo_trainer import DPOTrainer -from .ptuningv2_trainer import PtuningV2Trainer -from .sequence_classification_trainer import SequenceClassificationTrainer \ No newline at end of file +from .dpo_trainer import DPOTrainer, DPOModel +from .ptuningv2_trainer import PtuningV2Trainer, PtuningV2Model +from .sequence_classification_trainer import SequenceClassificationTrainer, SequenceClassificationModel \ No newline at end of file diff --git a/bert4torch/trainer/dpo_trainer.py b/bert4torch/trainer/dpo_trainer.py index c3d2e141..b8a38b49 100644 --- a/bert4torch/trainer/dpo_trainer.py +++ b/bert4torch/trainer/dpo_trainer.py @@ -7,6 +7,7 @@ from contextlib import contextmanager, nullcontext import warnings import inspect +from torch.nn.modules import Module from torch4keras.trainer import AutoTrainer, Trainer from bert4torch.models import BaseModel, build_transformer_model from bert4torch.snippets import is_peft_available, disable_dropout_in_model, peft_module_casting_to_bf16 @@ -20,6 +21,14 @@ class DPOModel(BaseModel): :param model: 待训练模型 :param ref_model: 参考模型 + :param args: dpo训练的部分参数 + :param model_init_kwargs: model的build_transformer_model参数 + :param ref_model_init_kwargs: ref_model的build_transformer_model参数 + :param model_adapter_name: model的adapter_name + :param ref_adapter_name: ref_model的adapter_name + :param peft_config: peft配置项 + :param disable_dropout: 是否不适用dropout + :param force_use_ref_model: 强制使用ref_model ''' def __init__( self, @@ -163,6 +172,14 @@ class DPOTrainer(AutoTrainer): '''DPOTrainer :param model: 待训练模型 :param ref_model: 参考模型 + :param args: dpo训练的部分参数 + :param model_init_kwargs: model的build_transformer_model参数 + :param ref_model_init_kwargs: ref_model的build_transformer_model参数 + :param model_adapter_name: model的adapter_name + :param ref_adapter_name: ref_model的adapter_name + :param peft_config: peft配置项 + :param disable_dropout: 是否不适用dropout + :param force_use_ref_model: 强制使用ref_model Examples ```python @@ -175,11 +192,26 @@ class DPOTrainer(AutoTrainer): >>> model.to('cuda') ``` ''' + def __init__(self, + model: Optional[Union[BaseModel, str]], + *trainer_args, + ref_model:BaseModel=None, + args: Optional[DottableDict] = DottableDict(), + model_init_kwargs: Optional[Dict] = None, + ref_model_init_kwargs: Optional[Dict] = None, + model_adapter_name: Optional[str] = None, + ref_adapter_name: Optional[str] = None, + peft_config: Optional[Dict] = None, + disable_dropout: bool = True, + force_use_ref_model: bool = False, + **kwargs): + pass + def __new__(cls, model: Optional[Union[BaseModel, str]], - *args, + *trainer_args, ref_model:BaseModel=None, - dpo_args: Optional[DottableDict] = DottableDict(), + args: Optional[DottableDict] = DottableDict(), model_init_kwargs: Optional[Dict] = None, ref_model_init_kwargs: Optional[Dict] = None, model_adapter_name: Optional[str] = None, @@ -189,7 +221,7 @@ def __new__(cls, force_use_ref_model: bool = False, **kwargs ) -> Trainer: - module = DPOModel(model, ref_model, dpo_args, model_init_kwargs, ref_model_init_kwargs, + module = DPOModel(model, ref_model, args, model_init_kwargs, ref_model_init_kwargs, model_adapter_name, ref_adapter_name, peft_config, disable_dropout, force_use_ref_model) module.to(model.device) - return super().__new__(cls, module, *args, **kwargs) \ No newline at end of file + return super().__new__(cls, module, *trainer_args, **kwargs) \ No newline at end of file diff --git a/bert4torch/trainer/ptuningv2_trainer.py b/bert4torch/trainer/ptuningv2_trainer.py index 6d5b8059..6f32b1c8 100644 --- a/bert4torch/trainer/ptuningv2_trainer.py +++ b/bert4torch/trainer/ptuningv2_trainer.py @@ -133,6 +133,10 @@ class PtuningV2Trainer(AutoTrainer): >>> model = PtuningV2Trainer(encoder).to('cuda') ``` ''' + def __init__(self, encoder:nn.Module, *args, pre_seq_len:int=128, prefix_projection:bool=False, **kwargs): + pass + def __new__(cls, encoder:nn.Module, *args, pre_seq_len:int=128, prefix_projection:bool=False, **kwargs) -> Trainer: module = PtuningV2Model(encoder, *args, pre_seq_len=pre_seq_len, prefix_projection=prefix_projection, **kwargs) + module.to(encoder.device) return super().__new__(cls, module, *args, **kwargs) \ No newline at end of file diff --git a/bert4torch/trainer/sequence_classification_trainer.py b/bert4torch/trainer/sequence_classification_trainer.py index abd43064..bf68f3bd 100644 --- a/bert4torch/trainer/sequence_classification_trainer.py +++ b/bert4torch/trainer/sequence_classification_trainer.py @@ -61,12 +61,15 @@ class SequenceClassificationTrainer(AutoTrainer): >>> config_path = '' # bert4torch_config.json路径 >>> checkpoint_path = '' # 模型文件夹路径 >>> bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True) - >>> model = SequenceClassificationTrainer(bert) - >>> model.to('cuda') + >>> model = SequenceClassificationTrainer(bert).to('cuda') ``` ''' + def __init__(self, module:BaseModel, *args, num_labels:int=2, classifier_dropout:float=None, + pool_strategy:Literal['pooler', 'cls', 'last-avg', 'mean', 'last-max', 'max', 'first-last-avg', 'custom']='cls', **kwargs): + pass + def __new__(cls, module:BaseModel, *args, num_labels:int=2, classifier_dropout:float=None, pool_strategy:Literal['pooler', 'cls', 'last-avg', 'mean', 'last-max', 'max', 'first-last-avg', 'custom']='cls', **kwargs) -> Trainer: - module = SequenceClassificationModel(module, num_labels, classifier_dropout, pool_strategy, **kwargs) - module.to(model.device) - return super().__new__(cls, module, *args, **kwargs) \ No newline at end of file + model = SequenceClassificationModel(module, num_labels, classifier_dropout, pool_strategy, **kwargs) + model.to(module.device) + return super().__new__(cls, model, *args, **kwargs) \ No newline at end of file diff --git a/docs/History.md b/docs/History.md index ac16253d..240c1379 100644 --- a/docs/History.md +++ b/docs/History.md @@ -1,5 +1,6 @@ ## 更新历史 +- **20240619**:增加Qwen1.5, Qwen2, glm4; 增加SWA/convert_lm_logits_dtype;调整各个trainer(重点DPOTrainer), generation中segment_ids, repetition_penalty需带query - **20240426**:简化大模型调用demo, generation_config从config读取, 增加Qwen2和SWA, 修复RMSNorm中转类型bug - **20240418**:修改Text2Vec的bug, 完善openai client, 增加get_weight_decay_optim_groups - **20240331**: 修复chatglm3的bug, 修复save_pretrained时多文件的bug,增加CausalLMLoss, 修改deepspeed的传参逻辑 diff --git a/examples/sentence_classfication/task_sentiment_classification.py b/examples/sentence_classfication/task_sentiment_classification.py index 037f6932..6f4cf3f4 100644 --- a/examples/sentence_classfication/task_sentiment_classification.py +++ b/examples/sentence_classfication/task_sentiment_classification.py @@ -63,27 +63,29 @@ def collate_fn(batch): valid_dataloader = DataLoader(MyDataset([f'{data_dir}/sentiment.valid.data']), batch_size=batch_size, collate_fn=collate_fn) test_dataloader = DataLoader(MyDataset([f'{data_dir}/sentiment.test.data']), batch_size=batch_size, collate_fn=collate_fn) -# 方式1 -class Model(BaseModel): - def __init__(self, pool_method='cls') -> None: - super().__init__() - self.pool_method = pool_method - self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True) - self.dropout = nn.Dropout(0.1) - self.dense = nn.Linear(self.bert.configs['hidden_size'], 2) - - def forward(self, token_ids, segment_ids): - hidden_states, pooling = self.bert([token_ids, segment_ids]) - pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method) - output = self.dropout(pooled_output) - output = self.dense(output) - return output -model = Model().to(device) - -# 方式2 -# from bert4torch.trainer import SequenceClassificationTrainer -# bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True) -# model = SequenceClassificationTrainer(bert).to(device) +if False: + # 方式1 + class Model(BaseModel): + def __init__(self, pool_method='cls') -> None: + super().__init__() + self.pool_method = pool_method + self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True) + self.dropout = nn.Dropout(0.1) + self.dense = nn.Linear(self.bert.configs['hidden_size'], 2) + + def forward(self, token_ids, segment_ids): + hidden_states, pooling = self.bert([token_ids, segment_ids]) + pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method) + output = self.dropout(pooled_output) + output = self.dense(output) + return output + model = Model().to(device) + +else: + # 方式2 + from bert4torch.trainer import SequenceClassificationTrainer + bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True) + model = SequenceClassificationTrainer(bert).to(device) # 定义使用的loss和optimizer,这里支持自定义 model.compile( diff --git a/setup.py b/setup.py index 92a7735c..167c362d 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,6 @@ license='MIT Licence', url='https://github.com/Tongjilibo/bert4torch', author='Tongjilibo', - install_requires=['numpy', 'tqdm', 'torch>1.6', 'torch4keras==0.2.3', 'six'], + install_requires=['numpy', 'tqdm', 'torch>1.6', 'torch4keras==0.2.4', 'six'], packages=find_packages() ) \ No newline at end of file