diff --git a/examples/minicpmv_example.py b/examples/minicpmv_example.py index 52366a7030ad0..bf20a7ea04ad4 100644 --- a/examples/minicpmv_example.py +++ b/examples/minicpmv_example.py @@ -4,6 +4,8 @@ from vllm.assets.image import ImageAsset # 2.0 +# The official repo doesn't work yet, so we need to use a fork for now +# For more details, please see: See: https://github.com/vllm-project/vllm/pull/4087#issuecomment-2250397630 # MODEL_NAME = "HwwwH/MiniCPM-V-2" # 2.5 MODEL_NAME = "openbmb/MiniCPM-Llama3-V-2_5" diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index dca4523d1a27d..1f6d77b828459 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -100,7 +100,9 @@ def _image_token_str(model_config: ModelConfig, if model_type == "phi3_v": # Workaround since this token is not defined in the tokenizer return "<|image_1|>" - if model_type in ("blip-2", "chatglm", "fuyu", "minicpmv", "paligemma"): + if model_type == "minicpmv": + return "(./)" + if model_type in ("blip-2", "chatglm", "fuyu", "paligemma"): # These models do not use image tokens in the prompt return None if model_type.startswith("llava"):