diff --git a/intel_extension_for_transformers/transformers/llm/quantization/utils.py b/intel_extension_for_transformers/transformers/llm/quantization/utils.py index 78df8ffcdee..a8a5b88baf9 100644 --- a/intel_extension_for_transformers/transformers/llm/quantization/utils.py +++ b/intel_extension_for_transformers/transformers/llm/quantization/utils.py @@ -23,7 +23,7 @@ from ....tools.utils import _ipex_version from accelerate import init_empty_weights from datasets import load_dataset -from neural_compressor.torch.algorithms.weight_only.modules import WeightOnlyLinear +from neural_compressor.torch.algorithms.weight_only.modules import INCWeightOnlyLinear as WeightOnlyLinear from neural_compressor.torch.quantization import ( AutoRoundConfig, AWQConfig, diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index 63540e11a74..26aa9c38ee4 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -70,7 +70,7 @@ from ...tools.utils import is_intel_gpu_available, is_ipex_available, _neural_compressor_version from accelerate import init_empty_weights from huggingface_hub import hf_hub_download -from neural_compressor.torch.algorithms.weight_only.modules import WeightOnlyLinear +from neural_compressor.torch.algorithms.weight_only.modules import INCWeightOnlyLinear as WeightOnlyLinear from neural_compressor.model.torch_model import PyTorchFXModel from packaging import version from threading import Thread diff --git a/tests/requirements.txt b/tests/requirements.txt index f1c2c289e09..31aad19f9f3 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -15,6 +15,7 @@ intel-tensorflow==2.14.0 lm-eval==0.4.3 mlflow nlpaug==1.1.9 +nltk==3.8.1 onnx onnxruntime peft==0.6.2