From b35be5403f3cf8631aefe02a35d97013657e2e47 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Thu, 30 May 2024 17:04:37 -0700 Subject: [PATCH] [Bugfix] Avoid Warnings in SparseML Activation Quantization (#5120) --- .../compressed_tensors_w8a8_statictensor.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py index d16e570d12202..64a88b01cd260 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_statictensor.py @@ -89,23 +89,34 @@ def create_weights(self, layer: torch.nn.Module, requires_grad=False) layer.register_parameter("weight", weight) - set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) - - set_weight_attrs(weight, {"weight_loader": weight_loader}) - + set_weight_attrs(weight, { + "weight_loader": weight_loader, + "input_dim": 1, + "output_dim": 0, + }) layer.register_parameter("input_scale", input_scale) - set_weight_attrs(input_scale, {"weight_loader": weight_loader}) + set_weight_attrs(input_scale, { + "weight_loader": weight_loader, + "ignore_warning": True, + }) layer.register_parameter("input_zero_point", input_zero_point) - set_weight_attrs(input_zero_point, {"weight_loader": weight_loader}) + set_weight_attrs(input_zero_point, { + "weight_loader": weight_loader, + "ignore_warning": True, + }) layer.register_parameter("weight_scale", weight_scale) - set_weight_attrs(weight_scale, {"weight_loader": weight_loader}) set_weight_attrs( weight_scale, { + "weight_loader": weight_loader, "shard_splitter": self.scales_shard_splitter, - "logical_widths": output_partition_sizes + "logical_widths": output_partition_sizes, + "ignore_warning": True, }) layer.register_parameter("weight_zero_point", weight_zero_point) - set_weight_attrs(weight_zero_point, {"weight_loader": weight_loader}) + set_weight_attrs(weight_zero_point, { + "weight_loader": weight_loader, + "ignore_warning": True + }) def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor): weight = layer.weight