From c760cf1de3541be98ecf4cde5a233cabcb2193e7 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Mon, 29 Jul 2024 14:03:32 +0800
Subject: [PATCH] fix nlp benchmark

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../static_quant/pt2e/run_clm_no_trainer.py   | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
index 7b4c9a46630..395bc6f9b57 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
+++ b/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
@@ -90,18 +90,10 @@ def get_example_inputs(tokenizer):
         prepare_model(*example_inputs)
     # convert
     converted_model = convert(prepare_model)
-    # inference
-    from torch._inductor import config
-
-    config.freezing = True
-    opt_model = torch.compile(converted_model)
-
-    opt_model.config = user_model.config # for lm eval
-    user_model = opt_model
-
+    
     # save
     if args.output_dir:
-        user_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
+        converted_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
 
 
 
@@ -112,7 +104,15 @@ def get_example_inputs(tokenizer):
         model = load(args.output_dir)
 
         model.config = user_model.config # for lm eval
-        user_model = model
+        
+        # Compile the quantized model and replace the Q/DQ pattern with Q-operator
+        from torch._inductor import config
+
+        config.freezing = True
+        opt_model = torch.compile(model)
+
+        opt_model.config = user_model.config # for lm eval
+        user_model = opt_model
 
 if args.accuracy: