diff --git a/vllm/config.py b/vllm/config.py index 0b2deb0cbb07a..ae3d5d19285c5 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -108,8 +108,6 @@ class ModelConfig: can not be gathered from the vllm arguments. config_format: The config format which shall be loaded. Defaults to 'auto' which defaults to 'hf'. - bert_config: tokenizationconfiguration dictionary for a given - Sentence Transformer BERT model. mm_processor_kwargs: Arguments to be forwarded to the model's processor for multi-modal data, e.g., image processor. pooling_type: Used to configure the pooling method in the embedding diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 43ab68a5de4f9..d8f1ef60f3eba 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -15,6 +15,7 @@ SpeculativeConfig, TaskOption, TokenizerPoolConfig) from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger +from vllm.model_executor.layers.pooler import PoolingType from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.transformers_utils.config import ( maybe_register_config_serialize_by_value) @@ -850,7 +851,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument( '--pooling-type', - choices=['LAST', 'ALL', 'CLS', 'STEP', 'MEAN'], + choices=[pt.name for pt in PoolingType], default=None, help='Used to configure the pooling method in the embedding model.' )