From 8f4c6fa8add71754b96b0c0a21c6bc83198a57da Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg Date: Thu, 12 Dec 2024 13:56:40 -0600 Subject: [PATCH] Disable auto enabling chunked prefill on ROCm platform on long contexts due to poor performance Signed-off-by: Gregory Shtrasberg --- vllm/engine/arg_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 3db069ec64ee4..36c3cfec24915 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1063,7 +1063,8 @@ def create_engine_config(self, if (is_gpu and not use_sliding_window and not use_spec_decode and not self.enable_lora and not self.enable_prompt_adapter - and model_config.task != "embedding"): + and model_config.task != "embedding" + and not current_platform.is_rocm()): self.enable_chunked_prefill = True logger.warning( "Chunked prefill is enabled by default for models with "