From b4232c396be0d52100ce4296d1aeff01d92d074e Mon Sep 17 00:00:00 2001 From: changwangss Date: Tue, 3 Dec 2024 22:17:11 -0800 Subject: [PATCH] fix load_empty_model Signed-off-by: changwangss --- optimum/intel/neural_compressor/quantization.py | 5 +++-- setup.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/optimum/intel/neural_compressor/quantization.py b/optimum/intel/neural_compressor/quantization.py index cf3f8dc07e..8d6d44698d 100644 --- a/optimum/intel/neural_compressor/quantization.py +++ b/optimum/intel/neural_compressor/quantization.py @@ -375,9 +375,10 @@ def _weight_only_quantization( low_cpu_mem_usage = True - if getattr(quantization_config, "use_layer_wise", False) and token is None and subfolder == "": + if getattr(quantization_config, "use_layer_wise", False): from neural_compressor.torch import load_empty_model - model = load_empty_model(model_id, cls=model_class, trust_remote_code=trust_remote_code) + + model = load_empty_model(model_id, cls=model_class, **loading_kwargs) else: model = model_class.from_pretrained(model_id, low_cpu_mem_usage=low_cpu_mem_usage, **loading_kwargs) diff --git a/setup.py b/setup.py index 3a15828223..4e28426cc6 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ "nncf": ["nncf>=2.14.0"], "openvino": ["nncf>=2.14.0", "openvino>=2024.5.0", "openvino-tokenizers>=2024.5.0"], "neural-compressor": [ - "neural_compressor[pt]@git+https://github.com/intel/neural-compressor.git@5c72158a6799bdf0334ef36fbd493eeed3b62d9f", + "neural_compressor[pt]@git+https://github.com/intel/neural-compressor.git@3bc8e4d0035445c51b2bd5ff6196b9b19e92b3dd", "accelerate", "transformers<4.46", ],