From 687e365ac55c3f9f1face8f39368d432940908de Mon Sep 17 00:00:00 2001 From: Jeff Cook Date: Mon, 9 Dec 2024 09:01:51 -0700 Subject: [PATCH 1/3] Quick fix to make Pixtral-HF load correctly again after 39e227c7ae. --- vllm/model_executor/models/llava.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 65c6bd07bfff0..38b27f68567eb 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -218,11 +218,11 @@ def _get_dummy_mm_kwargs( image_processor = hf_processor.image_processor # type: ignore hf_inputs = image_processor.preprocess(data['image'], return_tensors="pt") - is_pixtral = isinstance(hf_processor, PixtralProcessor) + if 'is_pixtral' not in hf_inputs: + hf_inputs['is_pixtral'] = isinstance(hf_processor, PixtralProcessor) return MultiModalKwargs( - **hf_inputs, - is_pixtral=torch.tensor(is_pixtral), + **hf_inputs ) From 14703eac2e5e1157c823ec42596586a797ccbc87 Mon Sep 17 00:00:00 2001 From: Jeff Cook Date: Mon, 9 Dec 2024 09:32:11 -0700 Subject: [PATCH 2/3] format.sh fix for previous commit. --- vllm/model_executor/models/llava.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 38b27f68567eb..9f17a0dfe82ff 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -219,11 +219,10 @@ def _get_dummy_mm_kwargs( hf_inputs = image_processor.preprocess(data['image'], return_tensors="pt") if 'is_pixtral' not in hf_inputs: - hf_inputs['is_pixtral'] = isinstance(hf_processor, PixtralProcessor) + hf_inputs['is_pixtral'] = isinstance(hf_processor, + PixtralProcessor) - return MultiModalKwargs( - **hf_inputs - ) + return MultiModalKwargs(**hf_inputs) class LlavaLikeConfig(Protocol): From dd1a08bc79157b94fbc57bf2a3349d3946ebefa1 Mon Sep 17 00:00:00 2001 From: Jeff Cook Date: Thu, 12 Dec 2024 08:46:13 -0700 Subject: [PATCH 3/3] Update vllm/model_executor/models/llava.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the extra check ensuring `is_pixtral` is available; apparently reasonable confidence that it'll be there on all relevant codepaths 🤷🏻 Co-authored-by: Isotr0py <2037008807@qq.com> --- vllm/model_executor/models/llava.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 9f17a0dfe82ff..53eef72dd5f91 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -218,9 +218,6 @@ def _get_dummy_mm_kwargs( image_processor = hf_processor.image_processor # type: ignore hf_inputs = image_processor.preprocess(data['image'], return_tensors="pt") - if 'is_pixtral' not in hf_inputs: - hf_inputs['is_pixtral'] = isinstance(hf_processor, - PixtralProcessor) return MultiModalKwargs(**hf_inputs)