diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py index 8bd7a0a12135b..5ffbb8e854e87 100644 --- a/vllm/model_executor/layers/quantization/awq_marlin.py +++ b/vllm/model_executor/layers/quantization/awq_marlin.py @@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig): def __init__(self, weight_bits: int, group_size: int, has_zp: bool, lm_head_quantized: bool) -> None: self.weight_bits = weight_bits - self.pack_factor = 32 // self.weight_bits # packed into int32 + self.pack_factor = 32 // self.weight_bits # packed into 32bits self.group_size = group_size self.has_zp = has_zp self.lm_head_quantized = lm_head_quantized