From 178c2141d4090b581ac69027d20a1b5d1f202b8d Mon Sep 17 00:00:00 2001 From: AlpinDale <52078762+AlpinDale@users.noreply.github.com> Date: Wed, 18 Dec 2024 08:31:01 -0800 Subject: [PATCH] fix: phi3v crash with unusual image sizes (#916) --- aphrodite/modeling/models/phi3v.py | 1 - tests/models/test_phi3v.py | 27 ++++++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/aphrodite/modeling/models/phi3v.py b/aphrodite/modeling/models/phi3v.py index eeec38c4b..7314c6d91 100644 --- a/aphrodite/modeling/models/phi3v.py +++ b/aphrodite/modeling/models/phi3v.py @@ -396,7 +396,6 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs): image_data = multi_modal_data["image"] if isinstance(image_data, Image.Image): w, h = image_data.size - w, h = _calc_hd_transform_size(width=w, height=h) image_feature_size = get_phi3v_image_feature_size(hf_config, input_width=w, diff --git a/tests/models/test_phi3v.py b/tests/models/test_phi3v.py index 9d496eb46..1b3324a30 100644 --- a/tests/models/test_phi3v.py +++ b/tests/models/test_phi3v.py @@ -3,13 +3,14 @@ from typing import List, Optional, Tuple, Type import pytest +from PIL import Image from transformers import AutoTokenizer from aphrodite.common.sequence import SampleLogprobs from aphrodite.common.utils import is_cpu, is_hip from aphrodite.multimodal.utils import rescale_image_size -from ..conftest import IMAGE_ASSETS, AphroditeRunner, HfRunner, _ImageAssets +from ..conftest import IMAGE_ASSETS, AphroditeRunner, HfRunner from .utils import check_logprobs_close pytestmark = pytest.mark.vlm @@ -58,7 +59,7 @@ def aphrodite_to_hf_output(aphrodite_output: Tuple[List[int], str, def run_test( hf_runner: Type[HfRunner], aphrodite_runner: Type[AphroditeRunner], - image_assets: _ImageAssets, + images: List[Image.Image], model: str, *, size_factors: List[float], @@ -77,7 +78,6 @@ def run_test( Note, the text input is also adjusted to abide by aphrodite contract. The text output is sanitized to be able to compare with hf. """ - images = [asset.pil_image for asset in image_assets] inputs_per_image = [( [prompt for _ in size_factors], @@ -112,7 +112,7 @@ def run_test( hf_model_kwargs = {"_attn_implementation": "eager"} with hf_runner(model, dtype=dtype, model_kwargs=hf_model_kwargs) as hf_model: - eos_token_id = hf_model.processor.tokenizer.eos_token_id + eos_token_id = hf_model.processor.eos_token_id hf_outputs_per_image = [ hf_model.generate_greedy_logprobs_limit(prompts, max_tokens, @@ -159,7 +159,7 @@ def test_models(hf_runner, aphrodite_runner, image_assets, model, size_factors, run_test( hf_runner, aphrodite_runner, - image_assets, + [asset.pil_image for asset in image_assets], model, size_factors=size_factors, dtype=dtype, @@ -167,3 +167,20 @@ def test_models(hf_runner, aphrodite_runner, image_assets, model, size_factors, num_logprobs=num_logprobs, tensor_parallel_size=1, ) + + +@pytest.mark.parametrize("model", models) +@pytest.mark.parametrize("dtype", [target_dtype]) +def test_regression_7840(hf_runner, aphrodite_runner, image_assets, model, + dtype) -> None: + run_test( + hf_runner, + aphrodite_runner, + [image_assets[0].pil_image.resize((465, 226))], + model, + size_factors=[1.0], + dtype=dtype, + max_tokens=128, + num_logprobs=10, + tensor_parallel_size=1, + )