Disable auto enabling chunked prefill on ROCm platform on long contex…

…ts due to poor performance (#324) Signed-off-by: Gregory Shtrasberg <[email protected]>
ROCm · Dec 12, 2024 · 405e730 · 405e730
1 parent 7efa6e0
commit 405e730
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -1063,7 +1063,8 @@ def create_engine_config(self,
                 if (is_gpu and not use_sliding_window and not use_spec_decode
                         and not self.enable_lora
                         and not self.enable_prompt_adapter
-                        and model_config.task != "embedding"):
+                        and model_config.task != "embedding"
+                        and not current_platform.is_rocm()):
                     self.enable_chunked_prefill = True
                     logger.warning(
                         "Chunked prefill is enabled by default for models with "