From b4232c396be0d52100ce4296d1aeff01d92d074e Mon Sep 17 00:00:00 2001
From: changwangss <chang1.wang@intel.com>
Date: Tue, 3 Dec 2024 22:17:11 -0800
Subject: [PATCH] fix load_empty_model

Signed-off-by: changwangss <chang1.wang@intel.com>
---
 optimum/intel/neural_compressor/quantization.py | 5 +++--
 setup.py                                        | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py
index cf3f8dc07e..8d6d44698d 100644
--- a/optimum/intel/neural_compressor/quantization.py
+++ b/optimum/intel/neural_compressor/quantization.py
@@ -375,9 +375,10 @@ def _weight_only_quantization(
 
     low_cpu_mem_usage = True
 
-    if getattr(quantization_config, "use_layer_wise", False) and token is None and subfolder == "":
+    if getattr(quantization_config, "use_layer_wise", False):
         from neural_compressor.torch import load_empty_model
-        model = load_empty_model(model_id, cls=model_class, trust_remote_code=trust_remote_code)
+
+        model = load_empty_model(model_id, cls=model_class, **loading_kwargs)
     else:
         model = model_class.from_pretrained(model_id, low_cpu_mem_usage=low_cpu_mem_usage, **loading_kwargs)
 
diff --git a/setup.py b/setup.py
index 3a15828223..4e28426cc6 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@
     "nncf": ["nncf>=2.14.0"],
     "openvino": ["nncf>=2.14.0", "openvino>=2024.5.0", "openvino-tokenizers>=2024.5.0"],
     "neural-compressor": [
-        "neural_compressor[pt]@git+https://github.com/intel/neural-compressor.git@5c72158a6799bdf0334ef36fbd493eeed3b62d9f",
+        "neural_compressor[pt]@git+https://github.com/intel/neural-compressor.git@3bc8e4d0035445c51b2bd5ff6196b9b19e92b3dd",
         "accelerate",
         "transformers<4.46",
     ],