intel · n1ck-guo · May 12, 2025 · May 12, 2025 · May 13, 2025 · May 13, 2025
diff --git a/.azure-pipelines/scripts/ut/run_ut.sh b/.azure-pipelines/scripts/ut/run_ut.sh
@@ -2,16 +2,17 @@
 set -xe
 
 # install requirements
-echo "set up UT env..."
+echo "##[group]set up UT env..."
 export TQDM_MININTERVAL=60
-export TQDM_POSITION=-1
 pip install pytest-cov pytest-html
-pip install -r /auto-round/test/requirements.txt
-pip list
+pip install -r /auto-round/test/test_cpu/requirements.txt
+
 # install latest gguf for ut test
 git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp/gguf-py && pip install .
+echo "##[endgroup]"
+pip list
 
-cd /auto-round/test || exit 1
+cd /auto-round/test/test_cpu || exit 1
 find . -type f -exec sed -i '/sys\.path\.insert(0, "\.\.")/d' {} +
 
 export LD_LIBRARY_PATH=${HOME}/.local/lib/:$LD_LIBRARY_PATH
@@ -31,7 +32,7 @@ cp report.html ${LOG_DIR}/
 cp coverage.xml ${LOG_DIR}/
 
 if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
-    echo "Find errors in pytest case, please check the output..."
+    echo "##[error]Find errors in pytest case, please check the output..."
     exit 1
 fi
 

diff --git a/.azure-pipelines/scripts/ut/run_ut_hpu.sh b/.azure-pipelines/scripts/ut/run_ut_hpu.sh
@@ -6,7 +6,7 @@ echo "set up UT env..."
 pip install pytest-cov pytest-html
 pip list
 
-cd /auto-round/test || exit 1
+cd /auto-round/test/test_cpu || exit 1
 find . -type f -exec sed -i '/sys\.path\.insert(0, "\.\.")/d' {} +
 
 export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
@@ -31,7 +31,7 @@ cp report.html ${LOG_DIR}/
 cp coverage.xml ${LOG_DIR}/
 
 if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
-    echo "Find errors in pytest case, please check the output..."
+    echo "##[error]Find errors in pytest case, please check the output..."
     exit 1
 fi
 

diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -370,7 +370,7 @@ def tune(args):
 
     if args.enable_torch_compile:
         logger.info("`torch.compile` is enabled to reduce tuning costs. "
-                    "If it causes issues, you can disable it by remove `--enable_torch_compile` argument.")
+                    "If it causes issues, you can disable it by removing `--enable_torch_compile` argument.")
 
     model_name = args.model
     if model_name[-1] == "/":
@@ -582,7 +582,7 @@ def tune(args):
                 device=device_str,
                 eval_model_dtype=eval_model_dtype)
             print(make_table(res))
-            print("evaluation running time=", time.time() - st)
+            print("evaluation running time=%ds" % (time.time() - st))
     else:
         if args.eval_task_by_task:
             eval_task_by_task(
@@ -599,7 +599,7 @@ def tune(args):
             res = simple_evaluate(
                 model="hf", model_args=model_args, tasks=tasks, device=device_str, batch_size=args.eval_bs)
             print(make_table(res))
-            print("evaluation running time=", time.time() - st)
+            print("evaluation running time=%ds" % (time.time() - st))
 
 
 def _eval_init(tasks, model_path, device, disable_trust_remote_code=False, dtype="auto"):
@@ -657,14 +657,14 @@ def eval(args):
         res = simple_evaluate_user_model(
                 model, tokenizer, tasks=tasks, batch_size=batch_size, device=device_str)
         print(make_table(res))
-        print("evaluation running time=", time.time() - st)
+        print("evaluation running time=%ds" % (time.time() - st))
     else:
         st = time.time()
         res = simple_evaluate(
             model="hf", model_args=model_args, tasks=tasks, device=device_str, batch_size=args.eval_bs)
         from lm_eval.utils import make_table  # pylint: disable=E0401
         print(make_table(res))
-        print("evaluation running time=", time.time() - st)
+        print("evaluation running time=%ds" % (time.time() - st))
 
 
 def eval_task_by_task(

diff --git a/auto_round/testing_utils.py b/auto_round/testing_utils.py
@@ -0,0 +1,193 @@
+# Copyright (c) 2025 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import importlib.util
+
+import torch
+
+from transformers.utils.versions import require_version
+
+def is_gguf_available():
+    return importlib.util.find_spec("gguf") is not None
+
+def is_autogptq_available():
+    return importlib.util.find_spec("auto_gptq") is not None
+
+def is_awq_available():
+    return importlib.util.find_spec("awq") is not None
+
+def is_optimum_available():
+    return importlib.util.find_spec("optimum") is not None
+
+def is_ipex_available():
+    try:
+        require_version("intel-extension-for-pytorch>=2.5")
+        return True
+    except ImportError:
+        return False
+
+def is_itrex_available():
+    return importlib.util.find_spec("intel_extension_for_transformers") is not None
+
+def is_flash_attn_avaliable():
+    return importlib.util.find_spec("flash_attn") is not None
+
+def is_gptqmodel_available():
+    try:
+        require_version("gptqmodel>=2.0")
+        return True
+    except ImportError:
+        return False
+
+def greater_than_050():
+    try:
+        require_version("auto-round>=0.5.0")
+        return True
+    except ImportError:
+        return False
+
+
+def require_gguf(test_case):
+    """
+    Decorator marking a test that requires gguf.
+
+    These tests are skipped when gguf isn't installed.
+
+    """
+    return unittest.skipUnless(is_gguf_available(), "test requires gguf")(test_case)
+
+
+def require_autogptq(test_case):
+    """
+    Decorator marking a test that requires auto-gptq.
+
+    These tests are skipped when auto-gptq isn't installed.
+
+    """
+    return unittest.skipUnless(is_autogptq_available(), "test requires auto-gptq")(test_case)
+
+
+def require_gptqmodel(test_case):
+    """
+    Decorator marking a test that requires gptqmodel.
+
+    These tests are skipped when gptqmodel isn't installed.
+
+    """
+    return unittest.skipUnless(is_gptqmodel_available(), "test requires gptqmodel>=2.0")(test_case)
+
+
+def require_awq(test_case):
+    """
+    Decorator marking a test that requires autoawq.
+
+    These tests are skipped when autoawq isn't installed.
+
+    """
+    return unittest.skipUnless(is_awq_available(), "test requires autoawq")(test_case)
+
+
+def require_ipex(test_case):
+    """
+    Decorator marking a test that requires intel-extension-for-pytorch.
+
+    These tests are skipped when intel-extension-for-pytorch isn't installed.
+
+    """
+    return unittest.skipUnless(is_ipex_available(), "test requires intel-extension-for-pytorch>=2.5")(test_case)
+
+
+def require_itrex(test_case):
+    """
+    Decorator marking a test that requires intel-extension-for-transformers.
+
+    These tests are skipped when intel-extension-for-transformers isn't installed.
+
+    """
+    return unittest.skipUnless(is_itrex_available(), "test requires intel-extension-for-transformers")(test_case)
+
+def require_optimum(test_case):
+    """
+    Decorator marking a test that optimum.
+
+    These tests are skipped when optimum isn't installed.
+
+    """
+    return unittest.skipUnless(is_optimum_available(), "test requires optimum")(test_case)
+
+
+def require_greater_than_050(test_case):
+    """
+    Decorator marking a test that requires auto-round>=0.5.0.
+
+    These tests are skipped when auto-round<0.5.0.
+
+    """
+    return unittest.skipUnless(greater_than_050(), "test requires auto-round>=0.5.0")(test_case)
+
+
+def multi_card(test_case):
+    """
+    Decorator marking a test that requires multi cards.
+
+    These tests are skipped when use only one card or cpu.
+
+    """
+    return unittest.skipUnless(
+        torch.cuda.is_available() and torch.cuda.device_count() > 1, "test requires multiple cards.")(test_case)
+
+
+def require_old_version(test_case):
+    """
+    Decorator marking a test that requires old version of transformers and torch.
+
+    These tests are skipped when not use special version.
+
+    """
+    env_check = True
+    try:
+        require_version("torch<2.7.0")
+        env_check &= True
+    except ImportError:
+        env_check &= False
+    return unittest.skipUnless(env_check, "Environment is not satisfactory")(test_case)
+
+
+def require_vlm_env(test_case):
+    """
+    Decorator marking a test that requires some special env to load vlm model.
+
+    These tests are skipped when not meet the environment requirements.
+
+    """
+
+    env_check = True
+    # pip install flash-attn --no-build-isolation
+    env_check &= is_flash_attn_avaliable()
+
+    # pip install git+https://github.com/haotian-liu/[email protected]
+    env_check &= importlib.util.find_spec("llava") is not None
+
+    # pip install git+https://github.com/deepseek-ai/DeepSeek-VL2.git
+    env_check &= importlib.util.find_spec("deepseek_vl2") is not None
+
+    env_check &= importlib.util.find_spec("xformers") is not None
+
+    return unittest.skipUnless(env_check, "Environment is not satisfactory")(test_case)
+
+
+
+
+
diff --git a/docs/step_by_step.md b/docs/step_by_step.md
@@ -101,7 +101,7 @@ CPU, Intel GPU, HPU and CUDA for both quantization and inference.
   This setting provides the best accuracy in most scenarios but is 4–5× slower than the standard AutoRound recipe. It is especially recommended for 2-bit quantization and is a good choice if sufficient resources are available.
 
   ```bash
-    auto-round-best --model facebook/opt-125m  --bits 4 --group_size 128  --format "auto_gptq,auto_awq,auto_round"
+  auto-round-best --model facebook/opt-125m  --bits 4 --group_size 128  --format "auto_gptq,auto_awq,auto_round"
     ```
 
 - **Light Settings:**

diff --git a/test/_test_helpers.py → test/test_cpu/_test_helpers.py b/test/_test_helpers.py → test/test_cpu/_test_helpers.py
diff --git a/test/conftest.py → test/test_cpu/conftest.py b/test/conftest.py → test/test_cpu/conftest.py
diff --git a/test/requirements.txt → test/test_cpu/requirements.txt b/test/requirements.txt → test/test_cpu/requirements.txt
diff --git a/test/test_act_quantization.py → test/test_cpu/test_act_quantization.py b/test/test_act_quantization.py → test/test_cpu/test_act_quantization.py
@@ -3,7 +3,7 @@
 import sys
 import unittest
 
-sys.path.insert(0, "..")
+sys.path.insert(0, "../..")
 import torch
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer

diff --git a/test/test_auto_round_hpu_only.py → test/test_cpu/test_auto_round_hpu_only.py b/test/test_auto_round_hpu_only.py → test/test_cpu/test_auto_round_hpu_only.py
diff --git a/test/test_autoopt.py → test/test_cpu/test_autoopt.py b/test/test_autoopt.py → test/test_cpu/test_autoopt.py
@@ -3,7 +3,7 @@
 import sys
 import unittest
 
-sys.path.insert(0, "..")
+sys.path.insert(0, "../..")
 import torch
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer

diff --git a/test/test_autoround.py → test/test_cpu/test_autoround.py b/test/test_autoround.py → test/test_cpu/test_autoround.py
@@ -5,7 +5,7 @@
 
 from auto_round.eval.evaluation import simple_evaluate_user_model
 
-sys.path.insert(0, "..")
+sys.path.insert(0, "../..")
 import torch
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer

diff --git a/test/test_autoround_acc.py → test/test_cpu/test_autoround_acc.py b/test/test_autoround_acc.py → test/test_cpu/test_autoround_acc.py
@@ -3,7 +3,7 @@
 import shutil
 import sys
 import unittest
-sys.path.insert(0, "..")
+sys.path.insert(0, "../..")
 import torch
 import transformers
 from math import isclose

diff --git a/test/test_autoround_export_to_itrex.py → ...est_cpu/test_autoround_export_to_itrex.py b/test/test_autoround_export_to_itrex.py → ...est_cpu/test_autoround_export_to_itrex.py
@@ -3,7 +3,7 @@
 import sys
 import unittest
 
-sys.path.insert(0, "..")
+sys.path.insert(0, "../..")
 import torch
 import transformers
 from transformers import AutoModelForCausalLM, AutoTokenizer