Skip to content

Commit

Permalink
make telechat2 config compatiable with Llama
Browse files Browse the repository at this point in the history
Signed-off-by: Isotr0py <[email protected]>
  • Loading branch information
Isotr0py committed Nov 27, 2024
1 parent e5757a2 commit 84224ba
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 1 deletion.
4 changes: 3 additions & 1 deletion vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
MLPSpeculatorConfig, MPTConfig,
NemotronConfig, NVLM_D_Config,
Olmo2Config, RWConfig,
SolarConfig, UltravoxConfig)
SolarConfig, Telechat2Config,
UltravoxConfig)
# yapf: enable
from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import resolve_obj_by_qualname
Expand Down Expand Up @@ -64,6 +65,7 @@
"NVLM_D": NVLM_D_Config,
"olmo2": Olmo2Config,
"solar": SolarConfig,
"telechat": Telechat2Config,
"ultravox": UltravoxConfig,
**_CONFIG_REGISTRY_OVERRIDE_HF
}
Expand Down
2 changes: 2 additions & 0 deletions vllm/transformers_utils/configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from vllm.transformers_utils.configs.nvlm_d import NVLM_D_Config
from vllm.transformers_utils.configs.olmo2 import Olmo2Config
from vllm.transformers_utils.configs.solar import SolarConfig
from vllm.transformers_utils.configs.telechat2 import Telechat2Config
from vllm.transformers_utils.configs.ultravox import UltravoxConfig

__all__ = [
Expand All @@ -36,5 +37,6 @@
"NVLM_D_Config",
"Olmo2Config",
"SolarConfig",
"Telechat2Config",
"UltravoxConfig",
]
76 changes: 76 additions & 0 deletions vllm/transformers_utils/configs/telechat2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# adapted from https://www.modelscope.cn/models/TeleAI/TeleChat2-3B/resolve/master/configuration_telechat2.py
""" Telechat configuration compatible with LlamaConfig. """

from transformers.configuration_utils import PretrainedConfig


class Telechat2Config(PretrainedConfig):
"""
Args:
vocab_size (`int`, *optional*, defaults to 160256): Vocabulary size of the Telechat model.

Check failure on line 10 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:10:81: E501 Line too long (98 > 80)
hidden_size (`int`, *optional*, defaults to 4096): Dimensionality of the embeddings and hidden states.

Check failure on line 11 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:11:81: E501 Line too long (110 > 80)
ffn_hidden_size (`int`, *optional*, defaults to 12288): Dimensionality of the feed-forward hidden states.

Check failure on line 12 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:12:81: E501 Line too long (113 > 80)
n_layer (`int`, *optional*, defaults to 30): Number of hidden layers in the Transformer

Check failure on line 13 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:13:81: E501 Line too long (95 > 80)
n_head (`int`, *optional*, defaults to 32): Number of attention heads for each attention layer.

Check failure on line 14 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:14:81: E501 Line too long (103 > 80)
layer_norm_epsilon (`float`, *optional*, defaults to 1e-5): The epsilon to use in the layer normalization layers.

Check failure on line 15 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:15:81: E501 Line too long (121 > 80)
initializer_range (`float`, *optional*, defaults to 0.02): The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

Check failure on line 16 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:16:81: E501 Line too long (163 > 80)
apply_residual_connection_post_layernorm (`bool`, *optional*, defaults to `False`): If enabled, use the layer norm of the hidden states as the residual in the transformer blocks

Check failure on line 17 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:17:81: E501 Line too long (185 > 80)
hidden_dropout (`float`, *optional*, defaults to 0.0): Dropout rate of the dropout function on the bias dropout.

Check failure on line 18 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:18:81: E501 Line too long (120 > 80)
attention_dropout (`float`, *optional*, defaults to 0.0): Dropout rate applied to the attention probs

Check failure on line 19 in vllm/transformers_utils/configs/telechat2.py

View workflow job for this annotation

GitHub Actions / ruff (3.12)

Ruff (E501)

vllm/transformers_utils/configs/telechat2.py:19:81: E501 Line too long (109 > 80)
use_cache (`bool`, *optional*, defaults to `True`): Whether or not the model should return the last key/values attentions.
training_seqlen (`int`, *optional*, defaults to 8192): Sequence length during last finetuning.
logn (`bool`, *optional*, defaults to `True`): Whether or not to use logN during extrapolation.
embed_layernorm (`bool`, *optional*, defaults to `True`): Whether or not to use embedding layernorm.
"""

model_type = "telechat"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
"num_hidden_layers": "n_layer",
"num_attention_heads": "n_head",
"intermediate_size": "ffn_hidden_size",
"rms_norm_eps": "layer_norm_epsilon"
}

def __init__(
self,
vocab_size=160256,
hidden_size=4096,
n_layer=30,
n_head=32,
layer_norm_epsilon=1e-5,
initializer_range=0.02,
use_cache=True,
bos_token_id=1,
eos_token_id=2,
apply_residual_connection_post_layernorm=False,
hidden_dropout=0.0,
attention_dropout=0.0,
ffn_hidden_size=12288,
training_seqlen = 8192,
logn = True,
embed_layernorm = False,
hidden_act="silu",
**kwargs,
):
self.vocab_size = vocab_size
n_embed = kwargs.pop("n_embed", None)
self.hidden_size = hidden_size if n_embed is None else n_embed
self.n_layer = n_layer
self.n_head = n_head
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range
self.use_cache = use_cache
self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
self.hidden_dropout = hidden_dropout
self.attention_dropout = attention_dropout
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.logn = logn
self.training_seqlen = training_seqlen
self.embed_layernorm = embed_layernorm
self.num_key_value_heads= kwargs.pop("num_key_value_heads", None)
self.ffn_hidden_size = ffn_hidden_size
self.hidden_act = hidden_act
super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)

0 comments on commit 84224ba

Please sign in to comment.