From a6b612163e44b3dee3a0e879c9cc5326fe2536fd Mon Sep 17 00:00:00 2001 From: Dipika Date: Tue, 15 Oct 2024 16:47:33 +0000 Subject: [PATCH] stop silently failing when input_act args are incorrect --- .../compressed_tensors/compressed_tensors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index a371f1f4ad2cb..658303a01fb3d 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -100,11 +100,12 @@ def from_config(cls, config: Dict[str, Any]) -> "CompressedTensorsConfig": target_scheme_map[target][ "weights"] = QuantizationArgs.parse_obj( quant_config.get("weights")) - try: + + if is_activation_quantization_format(quant_format): target_scheme_map[target][ "input_activations"] = QuantizationArgs.parse_obj( quant_config.get("input_activations")) - except Exception: + else: target_scheme_map[target]["input_activations"] = None return cls(target_scheme_map=target_scheme_map, @@ -244,9 +245,8 @@ def _get_scheme_from_parts( group_size=weight_quant.group_size, actorder=weight_quant.actorder) - # Detect If Activation Quantization. - # TODO @dsikka: clean-up conditions - if is_activation_quantization_format(self.quant_format): + # Will only be not None if is_activation_quantization_format is True + if input_quant: if self._is_fp8_w8a8(weight_quant, input_quant): is_fp8_w8a8_supported = self._check_scheme_supported( CompressedTensorsW8A8Fp8.get_min_capability(), error=False)