update

Signed-off-by: yan ma <[email protected]>
vllm-project · Nov 15, 2024 · 0c2ece4 · 0c2ece4
1 parent 855b506
commit 0c2ece4
Showing 1 changed file with 16 additions and 12 deletions.
diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py
@@ -11,6 +11,8 @@
 from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
 from vllm.platforms import current_platform
 
+MIN_IPEX_VERSION = "2.5.0"
+
 
 class IPEXConfig(QuantizationConfig):
     """INT8 quantization config class using IPEX for the CPU/XPU backend,
@@ -128,15 +130,16 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
 
         try:
             import intel_extension_for_pytorch as ipex
-            if ipex.__version__ < "2.5.0":
-                raise ImportError("intel_extension_for_pytorch version is "
-                                  "wrong. Please install "
-                                  "intel_extension_for_pytorch>=2.5.0.")
+            if ipex.__version__ < MIN_IPEX_VERSION:
+                raise ImportError(
+                    "intel_extension_for_pytorch version is "
+                    "wrong. Please install "
+                    f"intel_extension_for_pytorch>={MIN_IPEX_VERSION}.")
         except ImportError as err:
             raise ImportError(
                 "Please install "
-                "intel_extension_for_pytorch>=2.5.0 via "
-                "`pip install intel_extension_for_pytorch>=2.5.0`"
+                f"intel_extension_for_pytorch>={MIN_IPEX_VERSION} via "
+                f"`pip install intel_extension_for_pytorch>={MIN_IPEX_VERSION}`"
                 " to use IPEX-AWQ linear method.") from err
         # Using the compute dtype (lowp_mode) as INT8 to leverage instructions
         # with better performance.
@@ -193,15 +196,16 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
 
         try:
             import intel_extension_for_pytorch as ipex
-            if ipex.__version__ < "2.5.0":
-                raise ImportError("intel_extension_for_pytorch version is "
-                                  "wrong. Please install "
-                                  "intel_extension_for_pytorch>=2.5.0.")
+            if ipex.__version__ < MIN_IPEX_VERSION:
+                raise ImportError(
+                    "intel_extension_for_pytorch version is "
+                    "wrong. Please install "
+                    f"intel_extension_for_pytorch>={MIN_IPEX_VERSION}.")
         except ImportError as err:
             raise ImportError(
                 "Please install "
-                "intel_extension_for_pytorch>=2.5.0 via "
-                "`pip install intel_extension_for_pytorch>=2.5.0`"
+                f"intel_extension_for_pytorch>={MIN_IPEX_VERSION} via "
+                f"`pip install intel_extension_for_pytorch>={MIN_IPEX_VERSION}`"
                 " to use IPEX-AWQ linear method.") from err
 
         # Using the compute dtype (lowp_mode) as INT8 to leverage instructions