diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py index d16e570d12202..64a88b01cd260 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py @@ -89,23 +89,34 @@ def create_weights(self, layer: torch.nn.Module, requires_grad=False) layer.register_parameter("weight", weight) - set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) - - set_weight_attrs(weight, {"weight_loader": weight_loader}) - + set_weight_attrs(weight, { + "weight_loader": weight_loader, + "input_dim": 1, + "output_dim": 0, + }) layer.register_parameter("input_scale", input_scale) - set_weight_attrs(input_scale, {"weight_loader": weight_loader}) + set_weight_attrs(input_scale, { + "weight_loader": weight_loader, + "ignore_warning": True, + }) layer.register_parameter("input_zero_point", input_zero_point) - set_weight_attrs(input_zero_point, {"weight_loader": weight_loader}) + set_weight_attrs(input_zero_point, { + "weight_loader": weight_loader, + "ignore_warning": True, + }) layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, {"weight_loader": weight_loader}) set_weight_attrs( weight_scale, { + "weight_loader": weight_loader, "shard_splitter": self.scales_shard_splitter, - "logical_widths": output_partition_sizes + "logical_widths": output_partition_sizes, + "ignore_warning": True, }) layer.register_parameter("weight_zero_point", weight_zero_point) - set_weight_attrs(weight_zero_point, {"weight_loader": weight_loader}) + set_weight_attrs(weight_zero_point, { + "weight_loader": weight_loader, + "ignore_warning": True + }) def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor): weight = layer.weight