Skip to content

Commit

Permalink
v0.5.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Tongjilibo committed Jun 19, 2024
1 parent 706b7da commit f8eb45f
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 46 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ pip install git+https://github.com/Tongjilibo/bert4torch
### 4.1 版本历史
|更新日期| bert4torch | torch4keras | 版本说明 |
|------| ---------------- | ----------------- |----------- |
|20240619| 0.5.1 | 0.2.4 | 增加Qwen1.5, Qwen2, glm4; 增加SWA/convert_lm_logits_dtype;调整各个trainer(重点DPOTrainer), generation中segment_ids, repetition_penalty需带query, RMSNorm中转类型bug|
|20240418| 0.5.0 | 0.2.2 | 修复chatglm3的bug, 修复save_pretrained时多文件的bug,增加CausalLMLoss, 修改deepspeed的传参逻辑,修改Text2Vec的bug, 完善openai client, 增加get_weight_decay_optim_groups|
|20240317| 0.4.9.post2 | 0.2.1.post2 |增加get_weight_decay_optim_groups函数, attention中允许is_causal,修改repetition_penalty的bug,把baichuan从llama中剥离,修复config_path的bug,允许num_key_value_heads参数,[torch4keras-v0.2.1.post2](https://github.com/Tongjilibo/torch4keras/releases/tag/v0.2.1.post2)更新特性|
|20240221| 0.4.8 | 0.2.0|fastapi发布服务允许闲时offload到cpu, `build_transformer_model`允许从hf下载, 添加`FillMask`的pipeline, 添加`SequenceClassificationTrainer`|
Expand Down
12 changes: 0 additions & 12 deletions bert4torch/snippets/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,6 @@
import importlib.metadata as importlib_metadata


def is_accelerate_available(check_partial_state=False):
'''是否可以使用accelerate'''
accelerate_available = importlib.util.find_spec("accelerate") is not None
if accelerate_available:
if check_partial_state:
return version.parse(importlib_metadata.version("accelerate")) >= version.parse("0.17.0")
else:
return True
else:
return False


def is_flash_attn_available():
'''是否可以使用包flash_attn'''
_flash_attn_available = is_package_available("flash_attn") and \
Expand Down
6 changes: 3 additions & 3 deletions bert4torch/trainer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@

from torch4keras.trainer import * # torch4keras>=0.1.2.post2
from .ppo_trainer import PPOTrainer
from .dpo_trainer import DPOTrainer
from .ptuningv2_trainer import PtuningV2Trainer
from .sequence_classification_trainer import SequenceClassificationTrainer
from .dpo_trainer import DPOTrainer, DPOModel
from .ptuningv2_trainer import PtuningV2Trainer, PtuningV2Model
from .sequence_classification_trainer import SequenceClassificationTrainer, SequenceClassificationModel
40 changes: 36 additions & 4 deletions bert4torch/trainer/dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from contextlib import contextmanager, nullcontext
import warnings
import inspect
from torch.nn.modules import Module
from torch4keras.trainer import AutoTrainer, Trainer
from bert4torch.models import BaseModel, build_transformer_model
from bert4torch.snippets import is_peft_available, disable_dropout_in_model, peft_module_casting_to_bf16
Expand All @@ -20,6 +21,14 @@ class DPOModel(BaseModel):
:param model: 待训练模型
:param ref_model: 参考模型
:param args: dpo训练的部分参数
:param model_init_kwargs: model的build_transformer_model参数
:param ref_model_init_kwargs: ref_model的build_transformer_model参数
:param model_adapter_name: model的adapter_name
:param ref_adapter_name: ref_model的adapter_name
:param peft_config: peft配置项
:param disable_dropout: 是否不适用dropout
:param force_use_ref_model: 强制使用ref_model
'''
def __init__(
self,
Expand Down Expand Up @@ -163,6 +172,14 @@ class DPOTrainer(AutoTrainer):
'''DPOTrainer
:param model: 待训练模型
:param ref_model: 参考模型
:param args: dpo训练的部分参数
:param model_init_kwargs: model的build_transformer_model参数
:param ref_model_init_kwargs: ref_model的build_transformer_model参数
:param model_adapter_name: model的adapter_name
:param ref_adapter_name: ref_model的adapter_name
:param peft_config: peft配置项
:param disable_dropout: 是否不适用dropout
:param force_use_ref_model: 强制使用ref_model
Examples
```python
Expand All @@ -175,11 +192,26 @@ class DPOTrainer(AutoTrainer):
>>> model.to('cuda')
```
'''
def __init__(self,
model: Optional[Union[BaseModel, str]],
*trainer_args,
ref_model:BaseModel=None,
args: Optional[DottableDict] = DottableDict(),
model_init_kwargs: Optional[Dict] = None,
ref_model_init_kwargs: Optional[Dict] = None,
model_adapter_name: Optional[str] = None,
ref_adapter_name: Optional[str] = None,
peft_config: Optional[Dict] = None,
disable_dropout: bool = True,
force_use_ref_model: bool = False,
**kwargs):
pass

def __new__(cls,
model: Optional[Union[BaseModel, str]],
*args,
*trainer_args,
ref_model:BaseModel=None,
dpo_args: Optional[DottableDict] = DottableDict(),
args: Optional[DottableDict] = DottableDict(),
model_init_kwargs: Optional[Dict] = None,
ref_model_init_kwargs: Optional[Dict] = None,
model_adapter_name: Optional[str] = None,
Expand All @@ -189,7 +221,7 @@ def __new__(cls,
force_use_ref_model: bool = False,
**kwargs
) -> Trainer:
module = DPOModel(model, ref_model, dpo_args, model_init_kwargs, ref_model_init_kwargs,
module = DPOModel(model, ref_model, args, model_init_kwargs, ref_model_init_kwargs,
model_adapter_name, ref_adapter_name, peft_config, disable_dropout, force_use_ref_model)
module.to(model.device)
return super().__new__(cls, module, *args, **kwargs)
return super().__new__(cls, module, *trainer_args, **kwargs)
4 changes: 4 additions & 0 deletions bert4torch/trainer/ptuningv2_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ class PtuningV2Trainer(AutoTrainer):
>>> model = PtuningV2Trainer(encoder).to('cuda')
```
'''
def __init__(self, encoder:nn.Module, *args, pre_seq_len:int=128, prefix_projection:bool=False, **kwargs):
pass

def __new__(cls, encoder:nn.Module, *args, pre_seq_len:int=128, prefix_projection:bool=False, **kwargs) -> Trainer:
module = PtuningV2Model(encoder, *args, pre_seq_len=pre_seq_len, prefix_projection=prefix_projection, **kwargs)
module.to(encoder.device)
return super().__new__(cls, module, *args, **kwargs)
13 changes: 8 additions & 5 deletions bert4torch/trainer/sequence_classification_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,15 @@ class SequenceClassificationTrainer(AutoTrainer):
>>> config_path = '' # bert4torch_config.json路径
>>> checkpoint_path = '' # 模型文件夹路径
>>> bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True)
>>> model = SequenceClassificationTrainer(bert)
>>> model.to('cuda')
>>> model = SequenceClassificationTrainer(bert).to('cuda')
```
'''
def __init__(self, module:BaseModel, *args, num_labels:int=2, classifier_dropout:float=None,
pool_strategy:Literal['pooler', 'cls', 'last-avg', 'mean', 'last-max', 'max', 'first-last-avg', 'custom']='cls', **kwargs):
pass

def __new__(cls, module:BaseModel, *args, num_labels:int=2, classifier_dropout:float=None,
pool_strategy:Literal['pooler', 'cls', 'last-avg', 'mean', 'last-max', 'max', 'first-last-avg', 'custom']='cls', **kwargs) -> Trainer:
module = SequenceClassificationModel(module, num_labels, classifier_dropout, pool_strategy, **kwargs)
module.to(model.device)
return super().__new__(cls, module, *args, **kwargs)
model = SequenceClassificationModel(module, num_labels, classifier_dropout, pool_strategy, **kwargs)
model.to(module.device)
return super().__new__(cls, model, *args, **kwargs)
1 change: 1 addition & 0 deletions docs/History.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## 更新历史

- **20240619**:增加Qwen1.5, Qwen2, glm4; 增加SWA/convert_lm_logits_dtype;调整各个trainer(重点DPOTrainer), generation中segment_ids, repetition_penalty需带query
- **20240426**:简化大模型调用demo, generation_config从config读取, 增加Qwen2和SWA, 修复RMSNorm中转类型bug
- **20240418**:修改Text2Vec的bug, 完善openai client, 增加get_weight_decay_optim_groups
- **20240331**: 修复chatglm3的bug, 修复save_pretrained时多文件的bug,增加CausalLMLoss, 修改deepspeed的传参逻辑
Expand Down
44 changes: 23 additions & 21 deletions examples/sentence_classfication/task_sentiment_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,29 @@ def collate_fn(batch):
valid_dataloader = DataLoader(MyDataset([f'{data_dir}/sentiment.valid.data']), batch_size=batch_size, collate_fn=collate_fn)
test_dataloader = DataLoader(MyDataset([f'{data_dir}/sentiment.test.data']), batch_size=batch_size, collate_fn=collate_fn)

# 方式1
class Model(BaseModel):
def __init__(self, pool_method='cls') -> None:
super().__init__()
self.pool_method = pool_method
self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)

def forward(self, token_ids, segment_ids):
hidden_states, pooling = self.bert([token_ids, segment_ids])
pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method)
output = self.dropout(pooled_output)
output = self.dense(output)
return output
model = Model().to(device)

# 方式2
# from bert4torch.trainer import SequenceClassificationTrainer
# bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
# model = SequenceClassificationTrainer(bert).to(device)
if False:
# 方式1
class Model(BaseModel):
def __init__(self, pool_method='cls') -> None:
super().__init__()
self.pool_method = pool_method
self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)

def forward(self, token_ids, segment_ids):
hidden_states, pooling = self.bert([token_ids, segment_ids])
pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method)
output = self.dropout(pooled_output)
output = self.dense(output)
return output
model = Model().to(device)

else:
# 方式2
from bert4torch.trainer import SequenceClassificationTrainer
bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, gradient_checkpoint=True)
model = SequenceClassificationTrainer(bert).to(device)

# 定义使用的loss和optimizer,这里支持自定义
model.compile(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
license='MIT Licence',
url='https://github.com/Tongjilibo/bert4torch',
author='Tongjilibo',
install_requires=['numpy', 'tqdm', 'torch>1.6', 'torch4keras==0.2.3', 'six'],
install_requires=['numpy', 'tqdm', 'torch>1.6', 'torch4keras==0.2.4', 'six'],
packages=find_packages()
)

0 comments on commit f8eb45f

Please sign in to comment.