diff --git a/tests/model_executor/test_model_load_with_params.py b/tests/model_executor/test_model_load_with_params.py index 0517cc397602c..7e5e2780d3916 100644 --- a/tests/model_executor/test_model_load_with_params.py +++ b/tests/model_executor/test_model_load_with_params.py @@ -29,8 +29,8 @@ def test_model_loading_with_params(vllm_runner): model_tokenizer = model.model.llm_engine.tokenizer # asserts on the bert model config file - assert model_config.bert_config["max_seq_length"] == 512 - assert model_config.bert_config["do_lower_case"] + assert model_config.encoder_config["max_seq_length"] == 512 + assert model_config.encoder_config["do_lower_case"] # asserts on the pooling config files assert model_config.pooler_config.pooling_type == PoolingType.CLS.name diff --git a/tests/test_config.py b/tests/test_config.py index b8aa8caca7164..68950106e8898 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -168,7 +168,7 @@ def test_get_bert_tokenization_sentence_transformer_config(): revision=None, ) - bert_bge_model_config = bge_model_config._get_bert_config() + bert_bge_model_config = bge_model_config._get_encoder_config() assert bert_bge_model_config["max_seq_length"] == 512 assert bert_bge_model_config["do_lower_case"] diff --git a/vllm/config.py b/vllm/config.py index 46d0ae3a0dc40..a80fb726a5ec6 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -197,7 +197,7 @@ def __init__( code_revision, rope_scaling, rope_theta, config_format) self.hf_text_config = get_hf_text_config(self.hf_config) - self.bert_config = self._get_bert_config() + self.encoder_config = self._get_encoder_config() self.hf_image_processor_config = get_hf_image_processor_config( self.model, revision) self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype) @@ -231,7 +231,7 @@ def __init__( disable_sliding_window=self.disable_sliding_window, sliding_window_len=self.get_hf_config_sliding_window(), spec_target_max_model_len=spec_target_max_model_len, - bert_config=self.bert_config) + encoder_config=self.encoder_config) self.served_model_name = get_served_model_name(model, served_model_name) self.multimodal_config = self._init_multimodal_config( @@ -275,7 +275,7 @@ def _init_multimodal_config( return None - def _get_bert_config(self): + def _get_encoder_config(self): return get_sentence_transformer_tokenizer_config( self.model, self.revision) @@ -1808,7 +1808,7 @@ def _get_and_verify_max_len( disable_sliding_window: bool, sliding_window_len: Optional[Union[int, List[Optional[int]]]], spec_target_max_model_len: Optional[int] = None, - bert_config: Optional[Any] = None, + encoder_config: Optional[Any] = None, ) -> int: """Get and verify the model's maximum length.""" derived_max_model_len = float("inf") @@ -1891,8 +1891,8 @@ def _get_and_verify_max_len( "original_max_position_embeddings"] derived_max_model_len *= scaling_factor - if bert_config and "max_seq_length" in bert_config: - derived_max_model_len = bert_config["max_seq_length"] + if encoder_config and "max_seq_length" in encoder_config: + derived_max_model_len = encoder_config["max_seq_length"] # If the user specified a max length, make sure it is smaller than the # derived length from the HF model config. diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 6b371343fe714..78bca391c028e 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -377,15 +377,15 @@ def get_sentence_transformer_tokenizer_config(model: str, "sentence_xlm-roberta_config.json", "sentence_xlnet_config.json", ]: - bert_dict = get_hf_file_to_dict(config_name, model, revision, token) - if bert_dict: + encoder_dict = get_hf_file_to_dict(config_name, model, revision, token) + if encoder_dict: break - if not bert_dict: + if not encoder_dict: return None - if all(k in bert_dict for k in ("max_seq_length", "do_lower_case")): - return bert_dict + if all(k in encoder_dict for k in ("max_seq_length", "do_lower_case")): + return encoder_dict return None diff --git a/vllm/transformers_utils/tokenizer_group/__init__.py b/vllm/transformers_utils/tokenizer_group/__init__.py index 95ac1d4e6baf7..6a114b513f382 100644 --- a/vllm/transformers_utils/tokenizer_group/__init__.py +++ b/vllm/transformers_utils/tokenizer_group/__init__.py @@ -25,9 +25,9 @@ def init_tokenizer_from_configs(model_config: ModelConfig, trust_remote_code=model_config.trust_remote_code, revision=model_config.tokenizer_revision) - if (model_config.bert_config is not None - and "do_lower_case" in model_config.bert_config): - init_kwargs["do_lower_case"] = model_config.bert_config[ + if (model_config.encoder_config is not None + and "do_lower_case" in model_config.encoder_config): + init_kwargs["do_lower_case"] = model_config.encoder_config[ "do_lower_case"] return get_tokenizer_group(parallel_config.tokenizer_pool_config,