From 723d7275026d2c6877e0555af7deca266dd5e6fb Mon Sep 17 00:00:00 2001 From: zifeitong Date: Wed, 21 Aug 2024 09:40:20 -0700 Subject: [PATCH] Update vllm/multimodal/image.py Co-authored-by: Cyrus Leung --- tests/models/test_llava_next.py | 17 ++++++----------- vllm/multimodal/image.py | 4 ++-- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/models/test_llava_next.py b/tests/models/test_llava_next.py index e03ad115b93bb..9640e827b4151 100644 --- a/tests/models/test_llava_next.py +++ b/tests/models/test_llava_next.py @@ -14,16 +14,11 @@ _LIMIT_IMAGE_PER_PROMPT = 4 -_PREFACE = ( - "A chat between a curious human and an artificial intelligence assistant. " - "The assistant gives helpful, detailed, and polite answers to the human's " - "questions.") - HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ "stop_sign": - f"{_PREFACE} USER: \nWhat's the content of the image? ASSISTANT:", + "[INST] \nWhat's the content of the image? [/INST]", "cherry_blossom": - f"{_PREFACE} USER: \nWhat is the season? ASSISTANT:", + "[INST] \nWhat is the season? [/INST]", }) models = ["llava-hf/llava-v1.6-mistral-7b-hf"] @@ -256,10 +251,10 @@ def test_models_multiple_image_inputs(hf_runner, vllm_runner, image_assets, inputs = [( [ - f"{_PREFACE} USER: \nDescribe the 2 images. ASSISTANT:", # noqa: E501 - f"{_PREFACE} USER: \nDescribe the 2 images. ASSISTANT:", # noqa: E501 - f"{_PREFACE} USER: \nDescribe the 4 images. ASSISTANT:", # noqa: E501 - f"{_PREFACE} USER: \nWhat is the season? ASSISTANT:" + "[INST] \nDescribe 2 images. [/INST]", + "[INST] \nDescribe 2 images. [/INST]", + "[INST] \nDescribe 4 images. [/INST]", + "[INST] \nWhat is the season? [/INST]" ], [ [stop_sign, cherry_blossom], diff --git a/vllm/multimodal/image.py b/vllm/multimodal/image.py index 4dd6d42f24051..a91d93494f0db 100644 --- a/vllm/multimodal/image.py +++ b/vllm/multimodal/image.py @@ -77,11 +77,11 @@ def repeat_and_pad_image_tokens( prompt_parts = prompt.split(image_token_str, maxsplit=len(repeat_count)) new_prompt = "" - for i in range(len(repeat_count)): + for i, repeat_count_item in enumerate(repeat_count): replacement_str = "".join( repeat_and_pad_token( image_token_str, - repeat_count=repeat_count[i], + repeat_count=repeat_count_item, pad_token_left=pad_token_str_left, pad_token_right=pad_token_str_right, ))