From 176f6d39832a9a7523c8e8a2aa3c8a5c4301cced Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Thu, 8 May 2025 16:54:43 +0800 Subject: [PATCH 1/2] update g_idx Signed-off-by: Kaihui-intel --- neural_compressor/transformers/quantization/utils.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/neural_compressor/transformers/quantization/utils.py b/neural_compressor/transformers/quantization/utils.py index df7785183c2..de939cf2652 100644 --- a/neural_compressor/transformers/quantization/utils.py +++ b/neural_compressor/transformers/quantization/utils.py @@ -206,14 +206,8 @@ def _replace_linear( device=device, use_optimum_format=getattr(module, "use_optimum_format", True), ) - if quantization_config.quant_method.value == "gptq": - g_idx = getattr( - module, - "g_idx", - torch.zeros(in_features, dtype=torch.int32).to(device), - ) - else: - g_idx = None + # g_idx is only present when using GPTQ quantization method + g_idx = module.g_idx if hasattr(module, "g_idx") else None model._modules[name].set_scales_zps_gidx( ( module.scales From 97f8b6222784255fd92f801c5a4bcadf7e413ee8 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Fri, 9 May 2025 17:04:55 +0800 Subject: [PATCH 2/2] update build woq model Signed-off-by: Kaihui-intel --- neural_compressor/transformers/models/modeling_auto.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/transformers/models/modeling_auto.py b/neural_compressor/transformers/models/modeling_auto.py index 20f207eb34d..b38fa35d8bf 100644 --- a/neural_compressor/transformers/models/modeling_auto.py +++ b/neural_compressor/transformers/models/modeling_auto.py @@ -70,6 +70,7 @@ def build_woq_model(model, quantization_config): not getattr(quantization_config, "sym", False), ) use_optimum_format = True + g_idx = hasattr(m, "g_idx") and m.g_idx is not None with init_empty_weights(): new_module = INCWeightOnlyLinear( @@ -80,7 +81,7 @@ def build_woq_model(model, quantization_config): group_size=quantization_config.group_size, zp=zp, bias=m.bias is not None, - g_idx=True, + g_idx=g_idx, use_optimum_format=use_optimum_format, ) set_module(model, n, new_module)