vllm-project · simon-mo · Oct 18, 2024 · Oct 15, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/requirements-common.txt b/requirements-common.txt
@@ -31,4 +31,4 @@ pyyaml
 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
 setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
 einops # Required for Qwen2-VL.
-compressed-tensors == 0.6.0 # required for compressed-tensors
+compressed-tensors == 0.7.0 # required for compressed-tensors
@@ -100,11 +100,12 @@ def from_config(cls, config: Dict[str, Any]) -> "CompressedTensorsConfig":
                 target_scheme_map[target][
                     "weights"] = QuantizationArgs.parse_obj(
                         quant_config.get("weights"))
-                try:
+
+                if is_activation_quantization_format(quant_format):
                     target_scheme_map[target][
                         "input_activations"] = QuantizationArgs.parse_obj(
                             quant_config.get("input_activations"))
-                except Exception:
+                else:
                     target_scheme_map[target]["input_activations"] = None
 
         return cls(target_scheme_map=target_scheme_map,
@@ -244,9 +245,8 @@ def _get_scheme_from_parts(
                     group_size=weight_quant.group_size,
                     actorder=weight_quant.actorder)
 
-        # Detect If Activation Quantization.
-        # TODO @dsikka: clean-up conditions
-        if is_activation_quantization_format(self.quant_format):
+        # Will only be not None if is_activation_quantization_format is True
+        if input_quant:
             if self._is_fp8_w8a8(weight_quant, input_quant):
                 is_fp8_w8a8_supported = self._check_scheme_supported(
                     CompressedTensorsW8A8Fp8.get_min_capability(), error=False)