diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index d1d4ed4577d62..d3c1ab0a98dcd 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -269,7 +269,7 @@ def __init__( model_config.trust_remote_code, model_config.dtype, model_config.max_model_len, - load_config.download_dir, + load_config.download_dir, load_config.load_format, parallel_config.tensor_parallel_size, parallel_config.pipeline_parallel_size, diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py index d322838a9367c..12a9fa0aee6ee 100644 --- a/vllm/model_executor/model_loader/loader.py +++ b/vllm/model_executor/model_loader/loader.py @@ -148,18 +148,17 @@ def _get_model_initialization_kwargs( return extra_kwargs -def build_model(model_class: Type[nn.Module], - hf_config: PretrainedConfig, +def build_model(model_class: Type[nn.Module], hf_config: PretrainedConfig, cache_config: Optional[CacheConfig], - quant_config: Optional[QuantizationConfig], - *, + quant_config: Optional[QuantizationConfig], *, lora_config: Optional[LoRAConfig], multimodal_config: Optional[MultiModalConfig], scheduler_config: Optional[SchedulerConfig], pooling_config: Optional[PoolingConfig] = None) -> nn.Module: extra_kwargs = _get_model_initialization_kwargs(model_class, lora_config, multimodal_config, - scheduler_config) + scheduler_config + ) return model_class(config=hf_config, cache_config=cache_config,