v0.5.1

Tongjilibo · Jun 19, 2024 · f8eb45f · f8eb45f
1 parent 706b7da
commit f8eb45f
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -93,6 +93,7 @@ pip install git+https://github.com/Tongjilibo/bert4torch
 ### 4.1 版本历史
 |更新日期| bert4torch | torch4keras | 版本说明 |
 |------| ---------------- | ----------------- |----------- |
+|20240619| 0.5.1          | 0.2.4 | 增加Qwen1.5, Qwen2, glm4; 增加SWA/convert_lm_logits_dtype；调整各个trainer(重点DPOTrainer), generation中segment_ids, repetition_penalty需带query, RMSNorm中转类型bug|
 |20240418| 0.5.0          | 0.2.2 | 修复chatglm3的bug, 修复save_pretrained时多文件的bug，增加CausalLMLoss, 修改deepspeed的传参逻辑，修改Text2Vec的bug, 完善openai client, 增加get_weight_decay_optim_groups|
 |20240317| 0.4.9.post2    | 0.2.1.post2 |增加get_weight_decay_optim_groups函数, attention中允许is_causal，修改repetition_penalty的bug，把baichuan从llama中剥离，修复config_path的bug，允许num_key_value_heads参数，[torch4keras-v0.2.1.post2](https://github.com/Tongjilibo/torch4keras/releases/tag/v0.2.1.post2)更新特性|
 |20240221| 0.4.8          | 0.2.0|fastapi发布服务允许闲时offload到cpu, `build_transformer_model`允许从hf下载, 添加`FillMask`的pipeline, 添加`SequenceClassificationTrainer`|

diff --git a/bert4torch/snippets/import_utils.py b/bert4torch/snippets/import_utils.py
@@ -10,18 +10,6 @@
     import importlib.metadata as importlib_metadata
 
 
-def is_accelerate_available(check_partial_state=False):
-    '''是否可以使用accelerate'''
-    accelerate_available = importlib.util.find_spec("accelerate") is not None
-    if accelerate_available:
-        if check_partial_state:
-            return version.parse(importlib_metadata.version("accelerate")) >= version.parse("0.17.0")
-        else:
-            return True
-    else:
-        return False
-
-
 def is_flash_attn_available():
     '''是否可以使用包flash_attn'''
     _flash_attn_available = is_package_available("flash_attn") and \

diff --git a/bert4torch/trainer/__init__.py b/bert4torch/trainer/__init__.py
@@ -4,6 +4,6 @@
 
 from torch4keras.trainer import *  # torch4keras>=0.1.2.post2
 from .ppo_trainer import PPOTrainer
-from .dpo_trainer import DPOTrainer
-from .ptuningv2_trainer import PtuningV2Trainer
-from .sequence_classification_trainer import SequenceClassificationTrainer
+from .dpo_trainer import DPOTrainer, DPOModel
+from .ptuningv2_trainer import PtuningV2Trainer, PtuningV2Model
+from .sequence_classification_trainer import SequenceClassificationTrainer, SequenceClassificationModel
diff --git a/bert4torch/trainer/dpo_trainer.py b/bert4torch/trainer/dpo_trainer.py
@@ -7,6 +7,7 @@
 from contextlib import contextmanager, nullcontext
 import warnings
 import inspect
+from torch.nn.modules import Module
 from torch4keras.trainer import AutoTrainer, Trainer
 from bert4torch.models import BaseModel, build_transformer_model
 from bert4torch.snippets import is_peft_available, disable_dropout_in_model, peft_module_casting_to_bf16
@@ -20,6 +21,14 @@ class DPOModel(BaseModel):
 
     :param model: 待训练模型
     :param ref_model: 参考模型
+    :param args: dpo训练的部分参数
+    :param model_init_kwargs: model的build_transformer_model参数
+    :param ref_model_init_kwargs: ref_model的build_transformer_model参数
+    :param model_adapter_name: model的adapter_name
+    :param ref_adapter_name: ref_model的adapter_name
+    :param peft_config: peft配置项
+    :param disable_dropout: 是否不适用dropout
+    :param force_use_ref_model: 强制使用ref_model
     '''
     def __init__(
         self, 
@@ -163,6 +172,14 @@ class DPOTrainer(AutoTrainer):
     '''DPOTrainer
     :param model: 待训练模型
     :param ref_model: 参考模型
+    :param args: dpo训练的部分参数
+    :param model_init_kwargs: model的build_transformer_model参数
+    :param ref_model_init_kwargs: ref_model的build_transformer_model参数
+    :param model_adapter_name: model的adapter_name
+    :param ref_adapter_name: ref_model的adapter_name
+    :param peft_config: peft配置项
+    :param disable_dropout: 是否不适用dropout
+    :param force_use_ref_model: 强制使用ref_model
 
     Examples
     ```python
@@ -175,11 +192,26 @@ class DPOTrainer(AutoTrainer):
     >>> model.to('cuda')
     ```
     '''
+    def __init__(self, 
+                model: Optional[Union[BaseModel, str]], 
+                *trainer_args,
+                ref_model:BaseModel=None,
+                args: Optional[DottableDict] = DottableDict(),
+                model_init_kwargs: Optional[Dict] = None,
+                ref_model_init_kwargs: Optional[Dict] = None,
+                model_adapter_name: Optional[str] = None,
+                ref_adapter_name: Optional[str] = None,
+                peft_config: Optional[Dict] = None,
+                disable_dropout: bool = True,
+                force_use_ref_model: bool = False,
+                **kwargs):
+        pass
+
     def __new__(cls,         
                 model: Optional[Union[BaseModel, str]], 
-                *args,
+                *trainer_args,
                 ref_model:BaseModel=None,
-                dpo_args: Optional[DottableDict] = DottableDict(),
+                args: Optional[DottableDict] = DottableDict(),
                 model_init_kwargs: Optional[Dict] = None,
                 ref_model_init_kwargs: Optional[Dict] = None,
                 model_adapter_name: Optional[str] = None,
@@ -189,7 +221,7 @@ def __new__(cls,
                 force_use_ref_model: bool = False,
                 **kwargs
         ) -> Trainer:
-        module = DPOModel(model, ref_model, dpo_args, model_init_kwargs, ref_model_init_kwargs,
+        module = DPOModel(model, ref_model, args, model_init_kwargs, ref_model_init_kwargs,
                           model_adapter_name, ref_adapter_name, peft_config, disable_dropout, force_use_ref_model)
         module.to(model.device)
-        return super().__new__(cls, module, *args, **kwargs)
+        return super().__new__(cls, module, *trainer_args, **kwargs)
diff --git a/bert4torch/trainer/ptuningv2_trainer.py b/bert4torch/trainer/ptuningv2_trainer.py
@@ -133,6 +133,10 @@ class PtuningV2Trainer(AutoTrainer):
     >>> model = PtuningV2Trainer(encoder).to('cuda')
     ```
     '''
+    def __init__(self, encoder:nn.Module, *args, pre_seq_len:int=128, prefix_projection:bool=False, **kwargs):
+        pass
+
     def __new__(cls, encoder:nn.Module, *args, pre_seq_len:int=128, prefix_projection:bool=False, **kwargs) -> Trainer:
         module = PtuningV2Model(encoder, *args, pre_seq_len=pre_seq_len, prefix_projection=prefix_projection, **kwargs)
+        module.to(encoder.device)
         return super().__new__(cls, module, *args, **kwargs)
diff --git a/bert4torch/trainer/sequence_classification_trainer.py b/bert4torch/trainer/sequence_classification_trainer.py
@@ -61,12 +61,15 @@ class SequenceClassificationTrainer(AutoTrainer):
     >>> config_path = ''  # bert4torch_config.json路径
     >>> checkpoint_path = ''  # 模型文件夹路径
     >>> bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True)
-    >>> model = SequenceClassificationTrainer(bert)
-    >>> model.to('cuda')
+    >>> model = SequenceClassificationTrainer(bert).to('cuda')
     ```
     '''
+    def __init__(self, module:BaseModel, *args, num_labels:int=2, classifier_dropout:float=None, 
+                pool_strategy:Literal['pooler', 'cls', 'last-avg', 'mean', 'last-max', 'max', 'first-last-avg', 'custom']='cls', **kwargs):
+        pass
+
     def __new__(cls, module:BaseModel, *args, num_labels:int=2, classifier_dropout:float=None, 
                 pool_strategy:Literal['pooler', 'cls', 'last-avg', 'mean', 'last-max', 'max', 'first-last-avg', 'custom']='cls', **kwargs) -> Trainer:
-        module = SequenceClassificationModel(module, num_labels, classifier_dropout, pool_strategy, **kwargs)
-        module.to(model.device)
-        return super().__new__(cls, module, *args, **kwargs)
+        model = SequenceClassificationModel(module, num_labels, classifier_dropout, pool_strategy, **kwargs)
+        model.to(module.device)
+        return super().__new__(cls, model, *args, **kwargs)
diff --git a/docs/History.md b/docs/History.md
@@ -1,5 +1,6 @@
 ## 更新历史
 
+- **20240619**：增加Qwen1.5, Qwen2, glm4; 增加SWA/convert_lm_logits_dtype；调整各个trainer(重点DPOTrainer), generation中segment_ids, repetition_penalty需带query
 - **20240426**：简化大模型调用demo, generation_config从config读取, 增加Qwen2和SWA, 修复RMSNorm中转类型bug
 - **20240418**：修改Text2Vec的bug, 完善openai client, 增加get_weight_decay_optim_groups
 - **20240331**: 修复chatglm3的bug, 修复save_pretrained时多文件的bug，增加CausalLMLoss, 修改deepspeed的传参逻辑

diff --git a/examples/sentence_classfication/task_sentiment_classification.py b/examples/sentence_classfication/task_sentiment_classification.py
@@ -63,27 +63,29 @@ def collate_fn(batch):
 valid_dataloader = DataLoader(MyDataset([f'{data_dir}/sentiment.valid.data']), batch_size=batch_size, collate_fn=collate_fn) 
 test_dataloader = DataLoader(MyDataset([f'{data_dir}/sentiment.test.data']),  batch_size=batch_size, collate_fn=collate_fn) 
 
-# 方式1
-class Model(BaseModel):
-    def __init__(self, pool_method='cls') -> None:
-        super().__init__()
-        self.pool_method = pool_method
-        self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
-        self.dropout = nn.Dropout(0.1)
-        self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
-
-    def forward(self, token_ids, segment_ids):
-        hidden_states, pooling = self.bert([token_ids, segment_ids])
-        pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method)
-        output = self.dropout(pooled_output)
-        output = self.dense(output)
-        return output
-model = Model().to(device)
-
-# 方式2
-# from bert4torch.trainer import SequenceClassificationTrainer
-# bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
-# model = SequenceClassificationTrainer(bert).to(device)
+if False:
+    # 方式1
+    class Model(BaseModel):
+        def __init__(self, pool_method='cls') -> None:
+            super().__init__()
+            self.pool_method = pool_method
+            self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
+            self.dropout = nn.Dropout(0.1)
+            self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
+
+        def forward(self, token_ids, segment_ids):
+            hidden_states, pooling = self.bert([token_ids, segment_ids])
+            pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method)
+            output = self.dropout(pooled_output)
+            output = self.dense(output)
+            return output
+    model = Model().to(device)
+
+else:
+    # 方式2
+    from bert4torch.trainer import SequenceClassificationTrainer
+    bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
+    model = SequenceClassificationTrainer(bert).to(device)
 
 # 定义使用的loss和optimizer，这里支持自定义
 model.compile(

diff --git a/setup.py b/setup.py
@@ -14,6 +14,6 @@
     license='MIT Licence',
     url='https://github.com/Tongjilibo/bert4torch',
     author='Tongjilibo',
-    install_requires=['numpy', 'tqdm', 'torch>1.6', 'torch4keras==0.2.3', 'six'],
+    install_requires=['numpy', 'tqdm', 'torch>1.6', 'torch4keras==0.2.4', 'six'],
     packages=find_packages()
 )