From b0dc0de8325ee9686900d904c62bb5f36626f3ed Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Sun, 10 Mar 2024 01:37:54 -0800 Subject: [PATCH 1/5] Fix onnxrt example bug: bart export failure (#1658) Signed-off-by: yuwenzho --- .../quantization/ptq_dynamic/prepare_model.py | 9 ++++++--- .../quantization/ptq_dynamic/requirements.txt | 3 ++- .../quantization/ptq_static/prepare_model.py | 9 ++++++--- .../quantization/ptq_static/requirements.txt | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py index d020e8aecb6..be05479d9e3 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/prepare_model.py @@ -88,7 +88,10 @@ def export_onnx_model(args, model): config=AutoConfig.from_pretrained(args.input_model)) if args.input_model == 'Intel/bart-large-mrpc': - import os - os.system('python -m transformers.onnx --model=Intel/bart-large-mrpc --feature=sequence-classification --export_with_transformers bart-large-mrpc/') + import shutil + from optimum.exporters.onnx import main_export + + main_export(args.input_model, output="bart-large-mrpc", task="text-classification") + shutil.move("bart-large-mrpc/model.onnx", args.output_model) else: - export_onnx_model(args, model) \ No newline at end of file + export_onnx_model(args, model) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt index 416ff1b96e0..9988cdf0329 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt @@ -8,4 +8,5 @@ sympy onnxruntime-extensions; python_version < '3.11' numpy==1.23.5 sentencepiece -protobuf<=3.20.3 \ No newline at end of file +protobuf<=3.20.3 +optimum diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py index 6d8d774887b..a8272021d5a 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/prepare_model.py @@ -87,7 +87,10 @@ def export_onnx_model(args, model): config=AutoConfig.from_pretrained(args.input_model)) if args.input_model == 'Intel/bart-large-mrpc': - import os - os.system('python -m transformers.onnx --model=Intel/bart-large-mrpc --feature=sequence-classification --export_with_transformers bart-large-mrpc/') + import shutil + from optimum.exporters.onnx import main_export + + main_export(args.input_model, output="bart-large-mrpc", task="text-classification") + shutil.move("bart-large-mrpc/model.onnx", args.output_model) else: - export_onnx_model(args, model) \ No newline at end of file + export_onnx_model(args, model) diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt index bc078ee513b..9988cdf0329 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt @@ -9,4 +9,4 @@ onnxruntime-extensions; python_version < '3.11' numpy==1.23.5 sentencepiece protobuf<=3.20.3 -optimum[exporters] \ No newline at end of file +optimum From c4de1982961e604e698729fb153cd330d4139777 Mon Sep 17 00:00:00 2001 From: Zixuan Cheng <110808245+violetch24@users.noreply.github.com> Date: Mon, 11 Mar 2024 13:39:13 +0800 Subject: [PATCH 2/5] fix QA IPEX example fp32 input issue (#1661) Signed-off-by: Cheng, Zixuan --- .../quantization/ptq_static/ipex/run_qa.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py index ce5646bbe34..12e69f13f3f 100644 --- a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py +++ b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py @@ -687,7 +687,10 @@ def eval_func(model): example_inputs = get_example_inputs(model, eval_dataloader) model = ipex.optimize(model) with torch.no_grad(): - model = torch.jit.trace(model, example_kwarg_inputs=example_inputs, strict=False) + if isinstance(example_inputs, dict): + model = torch.jit.trace(model, example_kwarg_inputs=example_inputs, strict=False) + else: + model = torch.jit.trace(model, example_inputs, strict=False) model = torch.jit.freeze(model) if model_args.benchmark or model_args.accuracy_only: From c214f903a47bf895b0124d4aa5de80453b1487cf Mon Sep 17 00:00:00 2001 From: Zixuan Cheng <110808245+violetch24@users.noreply.github.com> Date: Mon, 11 Mar 2024 21:05:36 +0800 Subject: [PATCH 3/5] add more ut to smooth_quant for 3.x API (#1657) Signed-off-by: Cheng, Zixuan --- .../algorithms/smooth_quant/smooth_quant.py | 3 +- .../torch/quantization/test_smooth_quant.py | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py b/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py index 30fdb8f532e..bd26dcdfc3b 100644 --- a/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py +++ b/neural_compressor/torch/algorithms/smooth_quant/smooth_quant.py @@ -179,7 +179,8 @@ def qdq_quantize( if ipex_ver.release > Version("2.1.0").release: update_sq_scale(ipex_config_path, smoothquant_scale_info) model.load_qconf_summary(qconf_summary=ipex_config_path) - _ipex_post_quant_process(model, example_inputs, inplace=inplace) + model.save_qconf_summary(qconf_summary=ipex_config_path) + model = _ipex_post_quant_process(model, example_inputs, inplace=inplace) with open(ipex_config_path, "r") as f: model.tune_cfg = json.load(f) diff --git a/test/3x/torch/quantization/test_smooth_quant.py b/test/3x/torch/quantization/test_smooth_quant.py index 1e2b581dcc6..66dab93c36c 100644 --- a/test/3x/torch/quantization/test_smooth_quant.py +++ b/test/3x/torch/quantization/test_smooth_quant.py @@ -82,3 +82,39 @@ def run_fn(model): output1 = fp32_model(example_inputs) output2 = q_model(example_inputs) assert torch.allclose(output1, output2, atol=2e-2), "Accuracy gap atol > 0.02 is unexpected. Please check." + + @pytest.mark.skipif(not is_ipex_available(), reason="Requires IPEX") + def test_sq_ipex_save_load(self): + from intel_extension_for_pytorch.quantization import convert, prepare + + example_inputs = torch.zeros([1, 3]) + qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5) + user_model = copy.deepcopy(model) + user_model = prepare(user_model.eval(), qconfig, example_inputs=example_inputs, inplace=True) + + def run_fn(model): + model(example_inputs) + + run_fn(user_model) + with torch.no_grad(): + user_model = convert(user_model.eval(), inplace=True).eval() + user_model(example_inputs) + user_model = torch.jit.trace(user_model.eval(), example_inputs, strict=False) + user_model = torch.jit.freeze(user_model.eval()) + user_model(example_inputs) + user_model(example_inputs) + ipex_out = user_model(example_inputs) + + fp32_model = copy.deepcopy(model) + quant_config = get_default_sq_config() + q_model = quantize(fp32_model, quant_config=quant_config, run_fn=run_fn, example_inputs=example_inputs) + assert q_model is not None, "Quantization failed!" + inc_out = q_model(example_inputs) + q_model.save("saved") + + # load + loaded_model = torch.jit.load("saved") + loaded_out = loaded_model(example_inputs) + assert torch.allclose(inc_out, ipex_out, atol=1e-05), "Unexpected result. Please double check." + + assert torch.allclose(inc_out, loaded_out, atol=1e-05), "Unexpected result. Please double check." From d07175c39cd796c17582e986268a3a7179683763 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 12 Mar 2024 14:56:50 +0800 Subject: [PATCH 4/5] Update Conditions of Getting min-max during TF MatMul Requantize (#1660) Signed-off-by: zehao-intel --- .../graph_rewriter/int8/fuse_matmul_requantize.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py index e2a4843d329..c0113c1bc4d 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/fuse_matmul_requantize.py @@ -263,10 +263,14 @@ def do_transformation(self): weight_node = self.graph_info[new_node.input[1]].node bias_node = self.graph_info[new_node.input[2]].node - max_input_node = self.graph_info[last_node.input[-1]].node - min_input_node = self.graph_info[last_node.input[-2]].node - if max_input_node.op == "Enter": # pragma: no cover + max_input_node = None + min_input_node = None + if last_node.op.find("Requantize") != -1 or last_node.op.find("QuantizeV2") != -1: + max_input_node = self.graph_info[last_node.input[-1]].node + min_input_node = self.graph_info[last_node.input[-2]].node + + if max_input_node and max_input_node.op == "Enter": # pragma: no cover min_input_parent_name = Helper.node_name_from_input(min_input_node.input[0]) max_input_parent_name = Helper.node_name_from_input(max_input_node.input[0]) min_input_parent_node = self.graph_info[min_input_parent_name].node From d8e60b8eda59098bd29d6e314ed3383300c0f642 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 12 Mar 2024 15:00:36 +0800 Subject: [PATCH 5/5] Fix TF saved_model issues (#1659) Signed-off-by: zehao-intel --- neural_compressor/model/tensorflow_model.py | 171 +++++++++++++++----- 1 file changed, 131 insertions(+), 40 deletions(-) diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index 063b95ab052..e4809863a55 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -310,7 +310,45 @@ def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_ return opt, input_tensor_names, output_tensor_names +def _get_graph_from_saved_model_v3(model, input_tensor_names, output_tensor_names): + """The version 3 function that get graph from saved_model. + + Args: + model (string or tf.keras.Model): model path or tf.keras.Model object. + input_tensor_names (list of string): input tensor names of the model. + output_tensor_names (list of string): output tensor names of the model. + + Returns: + graph_def (tf.compat.v1.Session): tf.compat.v1.Session object. + inputs (list of string): validated input names. + outputs (list of string): validated output names. + """ + from neural_compressor.adaptor.tf_utils.util import parse_saved_model + + if isinstance(model, tf.keras.Model): + tmp_dir = cfg.default_workspace + "/saved_model" + model.save(tmp_dir) + model = tmp_dir + graph_def, _, _, _, input_names, output_names = parse_saved_model( + model, True, input_tensor_names, output_tensor_names + ) + + return graph_def, input_names, output_names + + def _get_graph_from_saved_model_v2(saved_model_dir, input_tensor_names, output_tensor_names): + """The version 2 function that get graph from the original keras model. + + Args: + saved_model_dir (string): model path of a temporary saved_model. + input_tensor_names (list of string): input tensor names of the model. + output_tensor_names (list of string): output tensor names of the model. + + Returns: + graph_def (tf.compat.v1.Session): tf.compat.v1.Session object. + input_names (list of string): validated input names. + output_names (list of string): validated output names. + """ from tensorflow.python.saved_model import signature_constants, tag_constants saved_model_exported_names = [signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] @@ -319,7 +357,17 @@ def _get_graph_from_saved_model_v2(saved_model_dir, input_tensor_names, output_t return load_saved_model(saved_model_dir, saved_model_tags, input_tensor_names, output_tensor_names) -def _get_graph_from_original_keras_v2(model, output_dir): +def _get_graph_from_original_keras_v2(model): + """The version 2 function that get graph from the original keras model. + + Args: + model (string or tf.keras.Model): model path or tf.keras.Model object. + + Returns: + graph_def (tf.compat.v1.Session): tf.compat.v1.Session object. + input_names (list of string): validated input names. + output_names (list of string): validated output names. + """ from tensorflow.lite.python.convert import OpsSet from tensorflow.lite.python.util import ( get_grappler_config, @@ -364,6 +412,17 @@ def _get_graph_from_original_keras_v2(model, output_dir): def _check_keras_format(model, saved_model_dir): + """Decide which method will be used to get graph from the saved_model . + + Args: + model (string or tf.keras.Model): model path or tf.keras.Model object. + saved_model_dir (string): the path to save a temporary saved_model. + + Returns: + graph_def (tf.compat.v1.Session): tf.compat.v1.Session object. + inputs (list of string): validated input names. + outputs (list of string): validated output names. + """ from tensorflow.python import saved_model from tensorflow.python.saved_model import save_options from tensorflow.python.saved_model.load import load @@ -384,6 +443,16 @@ def _check_keras_format(model, saved_model_dir): def _get_graph_from_saved_model_v1(model): + """The version 1 function that get graph from saved_model. + + Args: + model (string or tf.keras.Model): model path or tf.keras.Model object. + + Returns: + graph_def (tf.compat.v1.Session): tf.compat.v1.Session object. + inputs (list of string): validated input names. + outputs (list of string): validated output names. + """ from tensorflow.lite.python.convert_saved_model import get_inputs_outputs, get_meta_graph_def, get_signature_def from tensorflow.python.client import session from tensorflow.python.framework import ops @@ -424,6 +493,51 @@ def _get_graph_from_saved_model_v1(model): return graph_def, inputs, outputs +def try_loading_keras(model, input_tensor_names, output_tensor_names): + """Try different ways of loading keras models. + + Args: + model (string or tf.keras.Model): model path or tf.keras.Model object. + input_tensor_names (list of string): input tensor names of the model. + output_tensor_names (list of string): output tensor names of the model. + + Returns: + graph_def (tf.compat.v1.Session): tf.compat.v1.Session object. + input_names (list of string): validated input names. + output_names (list of string): validated output names. + """ + temp_dir = tempfile.mkdtemp() + if not isinstance(model, tf.keras.Model): + model = tf.keras.models.load_model(model) + keras_format = _check_keras_format(model, temp_dir) + + if keras_format == "saved_model_v2": + try: + graph_def, input_names, output_names = _get_graph_from_saved_model_v2( + temp_dir, input_tensor_names, output_tensor_names + ) + if "_FusedBatchNormEx" in [node.op for node in graph_def.node]: + keras_format = "trackable_object" + except: + keras_format = "trackable_object" + + if keras_format == "trackable_object": + try: + graph_def, input_names, output_names = _get_graph_from_original_keras_v2(model) + except: + keras_format = "saved_model_v1" + + if keras_format == "saved_model_v1": # pragma: no cover + try: + tf.keras.backend.set_learning_phase(0) + graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) + except: + raise ValueError("Not supported keras model type...") + + shutil.rmtree(temp_dir, True) + return graph_def, input_names, output_names + + def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with keras model. @@ -434,49 +548,19 @@ def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): Returns: sess (tf.compat.v1.Session): tf.compat.v1.Session object. - input_tensor_names (list of string): validated input_tensor_names. - output_tensor_names (list of string): validated output_tensor_names. """ - temp_dir = tempfile.mkdtemp() if tf.version.VERSION > "2.1.0": - if not isinstance(model, tf.keras.Model): - model = tf.keras.models.load_model(model) - keras_format = _check_keras_format(model, temp_dir) - if keras_format == "saved_model_v2": - try: - graph_def, input_names, output_names = _get_graph_from_saved_model_v2( - temp_dir, input_tensor_names, output_tensor_names - ) - if "_FusedBatchNormEx" in [node.op for node in graph_def.node]: - keras_format = "trackable_object" - except: - keras_format = "trackable_object" - if keras_format == "trackable_object": - try: - graph_def, input_names, output_names = _get_graph_from_original_keras_v2(model, temp_dir) - except: - keras_format = "saved_model_v1" - if keras_format == "saved_model_v1": # pragma: no cover - try: - tf.keras.backend.set_learning_phase(0) - graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) - except: - keras_format = "saved_model_general" - if keras_format == "saved_model_general": # pargma: no cover - try: - from neural_compressor.adaptor.tf_utils.util import parse_saved_model - - graph_def, _saved_model, _, _, input_names, output_names = parse_saved_model( - temp_dir, True, input_tensor_names, output_tensor_names - ) - except: - raise ValueError("Not supported keras model type...") - + try: + graph_def, input_names, output_names = _get_graph_from_saved_model_v3( + model, input_tensor_names, output_tensor_names + ) + except: + graph_def, input_names, output_names = try_loading_keras(model, input_tensor_names, output_tensor_names) # tensorflow 1.x use v1 convert method else: tf.keras.backend.set_learning_phase(0) graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) - shutil.rmtree(temp_dir, True) + return graph_def_session(graph_def, input_names, output_names, **kwargs) @@ -645,12 +729,19 @@ def saved_model_session(model, input_tensor_names, output_tensor_names, **kwargs output_tensor_names (list of string): validated output_tensor_names. """ try: - graph_def, input_names, output_names = _get_graph_from_saved_model_v2( + graph_def, input_names, output_names = _get_graph_from_saved_model_v3( model, input_tensor_names, output_tensor_names ) except: - graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) + try: + graph_def, input_names, output_names = _get_graph_from_saved_model_v2( + model, input_tensor_names, output_tensor_names + ) + except: + graph_def, input_names, output_names = _get_graph_from_saved_model_v1(model) + assert graph_def is not None, "Can not parse the saved model..." + return graph_def_session(graph_def, input_names, output_names, **kwargs)