From cf793c007d9947185b19f95cd08c82199755d3ef Mon Sep 17 00:00:00 2001 From: Matthew Wong Date: Fri, 19 Jul 2024 18:20:19 +0000 Subject: [PATCH] Include partial support warning for Phi3V --- vllm/model_executor/models/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py index 00781ae4eb2c5..f3c3fe31c68a3 100644 --- a/vllm/model_executor/models/__init__.py +++ b/vllm/model_executor/models/__init__.py @@ -100,7 +100,11 @@ _ROCM_SWA_REASON, "PaliGemmaForConditionalGeneration": ("ROCm flash attention does not yet " - "fully support 32-bit precision on PaliGemma") + "fully support 32-bit precision on PaliGemma"), + "Phi3VForCausalLM": + ("ROCm Triton flash attention may run into compilation errors due to " + "excessive use of shared memory. If this happens, disable Triton FA " + "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`") }