From 429d17e428932083e739ad51e3f49661fd38ff9c Mon Sep 17 00:00:00 2001 From: yansh97 Date: Wed, 27 Nov 2024 13:55:23 +0800 Subject: [PATCH] [bugfix] fix the default value of llm_int8_threshold in BitsAndBytesConfig (#10657) Signed-off-by: Andrew Feldman --- vllm/model_executor/layers/quantization/bitsandbytes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 6a0de3034142a..e01c713dd14db 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -26,7 +26,7 @@ def __init__( llm_int8_enable_fp32_cpu_offload: bool = False, llm_int8_has_fp16_weight: bool = False, llm_int8_skip_modules: Optional[List[str]] = None, - llm_int8_threshold: float = 0.0, + llm_int8_threshold: float = 6.0, ) -> None: self.load_in_8bit = load_in_8bit @@ -103,7 +103,7 @@ def get_safe_value(config, keys, default_value=None): ["llm_int8_skip_modules"], default_value=[]) llm_int8_threshold = get_safe_value(config, ["llm_int8_threshold"], - default_value=0.0) + default_value=6.0) return cls( load_in_8bit=load_in_8bit,