From 1455144cb8a7ff5751f64df1f9c2d704ee17a8d3 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Wed, 23 Oct 2024 11:31:57 +0000 Subject: [PATCH 1/2] Fix `_init_vision_model` in `nvlm_d` model --- vllm/model_executor/models/nvlm_d.py | 37 +++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/models/nvlm_d.py b/vllm/model_executor/models/nvlm_d.py index 3e3c3b05879fb..692ee51a81bc2 100644 --- a/vllm/model_executor/models/nvlm_d.py +++ b/vllm/model_executor/models/nvlm_d.py @@ -58,12 +58,31 @@ def _init_mlp1(self, config: PretrainedConfig) -> nn.Sequential: nn.Linear(llm_intermediate_size, llm_hidden_size, bias=False), ) - def _init_vision_model(self, config: PretrainedConfig, - quant_config: Optional[QuantizationConfig], - num_hidden_layers: int): - # We added additional dummy heads to the original num of heads to make - # the number of heads divisible by 8. - return InternVisionModel(config.vision_config, - quant_config=quant_config, - num_hidden_layers_override=num_hidden_layers, - num_dummy_heads=7) + def _init_vision_model( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig], + *, + is_mono: bool, + prefix: str, + ): + if is_mono: + vision_feature_layer = config.select_layer + if vision_feature_layer < 0: + num_hidden_layers = config.vision_config.num_hidden_layers \ + + vision_feature_layer + 1 + else: + num_hidden_layers = vision_feature_layer + 1 + + # We added additional dummy heads to the original num of heads to + # make the number of heads divisible by 8. + return InternVisionModel( + config.vision_config, + quant_config=quant_config, + num_hidden_layers_override=num_hidden_layers, + num_dummy_heads=7, + prefix=prefix, + ) + else: + msg = "Monolith mode is not applicable to NVLM_D" + raise NotImplementedError(msg) From 48ebebc10ba8faec84c9e3c8f11dcd07257d3e8a Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 23 Oct 2024 20:03:03 +0800 Subject: [PATCH 2/2] Update vllm/model_executor/models/nvlm_d.py Co-authored-by: Isotr0py <2037008807@qq.com> --- vllm/model_executor/models/nvlm_d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/nvlm_d.py b/vllm/model_executor/models/nvlm_d.py index 692ee51a81bc2..df4fd0a3256e9 100644 --- a/vllm/model_executor/models/nvlm_d.py +++ b/vllm/model_executor/models/nvlm_d.py @@ -66,7 +66,7 @@ def _init_vision_model( is_mono: bool, prefix: str, ): - if is_mono: + if not is_mono: vision_feature_layer = config.select_layer if vision_feature_layer < 0: num_hidden_layers = config.vision_config.num_hidden_layers \