From 7c1b18ac79108d8e4e0b4222a6655579950fffb1 Mon Sep 17 00:00:00 2001 From: chenqianfzh <51831990+chenqianfzh@users.noreply.github.com> Date: Fri, 26 Jul 2024 18:32:20 -0700 Subject: [PATCH] enforce eager mode with bnb quantization temporarily (#6846) --- vllm/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index 6403a53f86281..92fde449b43fd 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -282,6 +282,10 @@ def verify_with_parallel_config( raise ValueError( "BitAndBytes quantization with TP or PP is not supported yet.") + if self.quantization == "bitsandbytes" and self.enforce_eager is False: + raise ValueError( + "BitAndBytes with enforce_eager = False is not supported yet.") + def get_hf_config_sliding_window(self) -> Optional[int]: """Get the sliding window size, or None if disabled."""