Include partial support warning for Phi3V

vllm-project · Jul 19, 2024 · cf793c0 · cf793c0
1 parent 0f30145
commit cf793c0
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -100,7 +100,11 @@
     _ROCM_SWA_REASON,
     "PaliGemmaForConditionalGeneration":
     ("ROCm flash attention does not yet "
-     "fully support 32-bit precision on PaliGemma")
+     "fully support 32-bit precision on PaliGemma"),
+    "Phi3VForCausalLM":
+    ("ROCm Triton flash attention may run into compilation errors due to "
+     "excessive use of shared memory. If this happens, disable Triton FA "
+     "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
 }