[Model] Molmo vLLM Integration (vllm-project#9016)

Co-authored-by: sanghol <[email protected]> Co-authored-by: Roger Wang <[email protected]> Co-authored-by: Roger Wang <[email protected]>
rasmith · Oct 14, 2024 · dfe43a2 · dfe43a2
1 parent 16b24e7
commit dfe43a2
Show file tree

Hide file tree

Showing 7 changed files with 1,319 additions and 3 deletions.
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
@@ -399,6 +399,12 @@ Text Generation
     - :code:`meta-llama/Llama-3.2-90B-Vision-Instruct`, :code:`meta-llama/Llama-3.2-11B-Vision`, etc.
     -
     -
+  * - :code:`MolmoForCausalLM`
+    - Molmo
+    - Image
+    - :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
+    -
+    - ✅︎
   * - :code:`NVLM_D_Model`
     - NVLM-D 1.0
     - Image\ :sup:`E+`

diff --git a/examples/offline_inference_vision_language.py b/examples/offline_inference_vision_language.py
@@ -300,6 +300,23 @@ def run_mllama(question: str, modality: str):
     return llm, prompt, stop_token_ids
 
 
+# Molmo
+def run_molmo(question, modality):
+    assert modality == "image"
+
+    model_name = "allenai/Molmo-7B-D-0924"
+
+    llm = LLM(
+        model=model_name,
+        trust_remote_code=True,
+        dtype="bfloat16",
+    )
+
+    prompt = question
+    stop_token_ids = None
+    return llm, prompt, stop_token_ids
+
+
 # GLM-4v
 def run_glm4v(question: str, modality: str):
     assert modality == "image"
@@ -331,6 +348,7 @@ def run_glm4v(question: str, modality: str):
     "qwen_vl": run_qwen_vl,
     "qwen2_vl": run_qwen2_vl,
     "mllama": run_mllama,
+    "molmo": run_molmo,
     "glm4v": run_glm4v,
 }
 

diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
@@ -163,6 +163,8 @@ def _placeholder_str(self, modality: ModalityStr,
                 return "<|image|>"
             if model_type == "qwen2_vl":
                 return "<|vision_start|><|image_pad|><|vision_end|>"
+            if model_type == "molmo":
+                return ""
 
             raise TypeError(f"Unknown model type: {model_type}")
         elif modality == "audio":

diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -20,4 +20,4 @@
     "supports_multimodal",
     "SupportsPP",
     "supports_pp",
-]
+]