From c760cf1de3541be98ecf4cde5a233cabcb2193e7 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Mon, 29 Jul 2024 14:03:32 +0800 Subject: [PATCH] fix nlp benchmark Signed-off-by: Kaihui-intel --- .../static_quant/pt2e/run_clm_no_trainer.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py index 7b4c9a46630..395bc6f9b57 100644 --- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py +++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py @@ -90,18 +90,10 @@ def get_example_inputs(tokenizer): prepare_model(*example_inputs) # convert converted_model = convert(prepare_model) - # inference - from torch._inductor import config - - config.freezing = True - opt_model = torch.compile(converted_model) - - opt_model.config = user_model.config # for lm eval - user_model = opt_model - + # save if args.output_dir: - user_model.save(example_inputs=example_inputs, output_dir = args.output_dir) + converted_model.save(example_inputs=example_inputs, output_dir = args.output_dir) @@ -112,7 +104,15 @@ def get_example_inputs(tokenizer): model = load(args.output_dir) model.config = user_model.config # for lm eval - user_model = model + + # Compile the quantized model and replace the Q/DQ pattern with Q-operator + from torch._inductor import config + + config.freezing = True + opt_model = torch.compile(model) + + opt_model.config = user_model.config # for lm eval + user_model = opt_model if args.accuracy: