fix nlp benchmark

Signed-off-by: Kaihui-intel <[email protected]>
intel · Jul 29, 2024 · c760cf1 · c760cf1
1 parent 61d9325
commit c760cf1
Showing 1 changed file with 11 additions and 11 deletions.
diff --git a/...huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/...huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
@@ -90,18 +90,10 @@ def get_example_inputs(tokenizer):
         prepare_model(*example_inputs)
     # convert
     converted_model = convert(prepare_model)
-    # inference
-    from torch._inductor import config
-
-    config.freezing = True
-    opt_model = torch.compile(converted_model)
-
-    opt_model.config = user_model.config # for lm eval
-    user_model = opt_model
-
+
     # save
     if args.output_dir:
-        user_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
+        converted_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
 
 
 
@@ -112,7 +104,15 @@ def get_example_inputs(tokenizer):
         model = load(args.output_dir)
 
         model.config = user_model.config # for lm eval
-        user_model = model
+
+        # Compile the quantized model and replace the Q/DQ pattern with Q-operator
+        from torch._inductor import config
+
+        config.freezing = True
+        opt_model = torch.compile(model)
+
+        opt_model.config = user_model.config # for lm eval
+        user_model = opt_model
 
 if args.accuracy: