From 1e7b03544956edacd14de8137e852368c80edb4f Mon Sep 17 00:00:00 2001
From: Alphi <52458637+HwwwwwwwH@users.noreply.github.com>
Date: Fri, 26 Jul 2024 00:42:49 +0800
Subject: [PATCH] [Bugfix] Add image placeholder for OpenAI Compatible Server
 of MiniCPM-V (#6787)

Co-authored-by: hezhihui <hzh7269@modelbest.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
---
 examples/minicpmv_example.py   | 2 ++
 vllm/entrypoints/chat_utils.py | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/examples/minicpmv_example.py b/examples/minicpmv_example.py
index 52366a7030ad0..bf20a7ea04ad4 100644
--- a/examples/minicpmv_example.py
+++ b/examples/minicpmv_example.py
@@ -4,6 +4,8 @@
 from vllm.assets.image import ImageAsset
 
 # 2.0
+# The official repo doesn't work yet, so we need to use a fork for now
+# For more details, please see: See: https://github.com/vllm-project/vllm/pull/4087#issuecomment-2250397630
 # MODEL_NAME = "HwwwH/MiniCPM-V-2"
 # 2.5
 MODEL_NAME = "openbmb/MiniCPM-Llama3-V-2_5"
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index dca4523d1a27d..1f6d77b828459 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -100,7 +100,9 @@ def _image_token_str(model_config: ModelConfig,
     if model_type == "phi3_v":
         # Workaround since this token is not defined in the tokenizer
         return "<|image_1|>"
-    if model_type in ("blip-2", "chatglm", "fuyu", "minicpmv", "paligemma"):
+    if model_type == "minicpmv":
+        return "(<image>./</image>)"
+    if model_type in ("blip-2", "chatglm", "fuyu", "paligemma"):
         # These models do not use image tokens in the prompt
         return None
     if model_type.startswith("llava"):