Skip to content

refact cuda ut to facilitate automation #559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions .azure-pipelines/scripts/ut/run_ut.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
set -xe

# install requirements
echo "set up UT env..."
echo "##[group]set up UT env..."
export TQDM_MININTERVAL=60
export TQDM_POSITION=-1
pip install pytest-cov pytest-html
pip install -r /auto-round/test/requirements.txt
pip list
pip install -r /auto-round/test/test_cpu/requirements.txt

# install latest gguf for ut test
git clone https://github.com/ggml-org/llama.cpp.git && cd llama.cpp/gguf-py && pip install .
echo "##[endgroup]"
pip list

cd /auto-round/test || exit 1
cd /auto-round/test/test_cpu || exit 1
find . -type f -exec sed -i '/sys\.path\.insert(0, "\.\.")/d' {} +

export LD_LIBRARY_PATH=${HOME}/.local/lib/:$LD_LIBRARY_PATH
Expand All @@ -31,7 +32,7 @@ cp report.html ${LOG_DIR}/
cp coverage.xml ${LOG_DIR}/

if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
echo "Find errors in pytest case, please check the output..."
echo "##[error]Find errors in pytest case, please check the output..."
exit 1
fi

Expand Down
4 changes: 2 additions & 2 deletions .azure-pipelines/scripts/ut/run_ut_hpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ echo "set up UT env..."
pip install pytest-cov pytest-html
pip list

cd /auto-round/test || exit 1
cd /auto-round/test/test_cpu || exit 1
find . -type f -exec sed -i '/sys\.path\.insert(0, "\.\.")/d' {} +

export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
Expand All @@ -31,7 +31,7 @@ cp report.html ${LOG_DIR}/
cp coverage.xml ${LOG_DIR}/

if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
echo "Find errors in pytest case, please check the output..."
echo "##[error]Find errors in pytest case, please check the output..."
exit 1
fi

Expand Down
10 changes: 5 additions & 5 deletions auto_round/script/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def tune(args):

if args.enable_torch_compile:
logger.info("`torch.compile` is enabled to reduce tuning costs. "
"If it causes issues, you can disable it by remove `--enable_torch_compile` argument.")
"If it causes issues, you can disable it by removing `--enable_torch_compile` argument.")

model_name = args.model
if model_name[-1] == "/":
Expand Down Expand Up @@ -582,7 +582,7 @@ def tune(args):
device=device_str,
eval_model_dtype=eval_model_dtype)
print(make_table(res))
print("evaluation running time=", time.time() - st)
print("evaluation running time=%ds" % (time.time() - st))
else:
if args.eval_task_by_task:
eval_task_by_task(
Expand All @@ -599,7 +599,7 @@ def tune(args):
res = simple_evaluate(
model="hf", model_args=model_args, tasks=tasks, device=device_str, batch_size=args.eval_bs)
print(make_table(res))
print("evaluation running time=", time.time() - st)
print("evaluation running time=%ds" % (time.time() - st))


def _eval_init(tasks, model_path, device, disable_trust_remote_code=False, dtype="auto"):
Expand Down Expand Up @@ -657,14 +657,14 @@ def eval(args):
res = simple_evaluate_user_model(
model, tokenizer, tasks=tasks, batch_size=batch_size, device=device_str)
print(make_table(res))
print("evaluation running time=", time.time() - st)
print("evaluation running time=%ds" % (time.time() - st))
else:
st = time.time()
res = simple_evaluate(
model="hf", model_args=model_args, tasks=tasks, device=device_str, batch_size=args.eval_bs)
from lm_eval.utils import make_table # pylint: disable=E0401
print(make_table(res))
print("evaluation running time=", time.time() - st)
print("evaluation running time=%ds" % (time.time() - st))


def eval_task_by_task(
Expand Down
193 changes: 193 additions & 0 deletions auto_round/testing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Copyright (c) 2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import importlib.util

import torch

from transformers.utils.versions import require_version

def is_gguf_available():
return importlib.util.find_spec("gguf") is not None

def is_autogptq_available():
return importlib.util.find_spec("auto_gptq") is not None

def is_awq_available():
return importlib.util.find_spec("awq") is not None

def is_optimum_available():
return importlib.util.find_spec("optimum") is not None

def is_ipex_available():
try:
require_version("intel-extension-for-pytorch>=2.5")
return True
except ImportError:
return False

def is_itrex_available():
return importlib.util.find_spec("intel_extension_for_transformers") is not None

def is_flash_attn_avaliable():
return importlib.util.find_spec("flash_attn") is not None

def is_gptqmodel_available():
try:
require_version("gptqmodel>=2.0")
return True
except ImportError:
return False

def greater_than_050():
try:
require_version("auto-round>=0.5.0")
return True
except ImportError:
return False


def require_gguf(test_case):
"""
Decorator marking a test that requires gguf.

These tests are skipped when gguf isn't installed.

"""
return unittest.skipUnless(is_gguf_available(), "test requires gguf")(test_case)


def require_autogptq(test_case):
"""
Decorator marking a test that requires auto-gptq.

These tests are skipped when auto-gptq isn't installed.

"""
return unittest.skipUnless(is_autogptq_available(), "test requires auto-gptq")(test_case)


def require_gptqmodel(test_case):
"""
Decorator marking a test that requires gptqmodel.

These tests are skipped when gptqmodel isn't installed.

"""
return unittest.skipUnless(is_gptqmodel_available(), "test requires gptqmodel>=2.0")(test_case)


def require_awq(test_case):
"""
Decorator marking a test that requires autoawq.

These tests are skipped when autoawq isn't installed.

"""
return unittest.skipUnless(is_awq_available(), "test requires autoawq")(test_case)


def require_ipex(test_case):
"""
Decorator marking a test that requires intel-extension-for-pytorch.

These tests are skipped when intel-extension-for-pytorch isn't installed.

"""
return unittest.skipUnless(is_ipex_available(), "test requires intel-extension-for-pytorch>=2.5")(test_case)


def require_itrex(test_case):
"""
Decorator marking a test that requires intel-extension-for-transformers.

These tests are skipped when intel-extension-for-transformers isn't installed.

"""
return unittest.skipUnless(is_itrex_available(), "test requires intel-extension-for-transformers")(test_case)

def require_optimum(test_case):
"""
Decorator marking a test that optimum.

These tests are skipped when optimum isn't installed.

"""
return unittest.skipUnless(is_optimum_available(), "test requires optimum")(test_case)


def require_greater_than_050(test_case):
"""
Decorator marking a test that requires auto-round>=0.5.0.

These tests are skipped when auto-round<0.5.0.

"""
return unittest.skipUnless(greater_than_050(), "test requires auto-round>=0.5.0")(test_case)


def multi_card(test_case):
"""
Decorator marking a test that requires multi cards.

These tests are skipped when use only one card or cpu.

"""
return unittest.skipUnless(
torch.cuda.is_available() and torch.cuda.device_count() > 1, "test requires multiple cards.")(test_case)


def require_old_version(test_case):
"""
Decorator marking a test that requires old version of transformers and torch.

These tests are skipped when not use special version.

"""
env_check = True
try:
require_version("torch<2.7.0")
env_check &= True
except ImportError:
env_check &= False
return unittest.skipUnless(env_check, "Environment is not satisfactory")(test_case)


def require_vlm_env(test_case):
"""
Decorator marking a test that requires some special env to load vlm model.

These tests are skipped when not meet the environment requirements.

"""

env_check = True
# pip install flash-attn --no-build-isolation
env_check &= is_flash_attn_avaliable()

# pip install git+https://github.com/haotian-liu/[email protected]
env_check &= importlib.util.find_spec("llava") is not None

# pip install git+https://github.com/deepseek-ai/DeepSeek-VL2.git
env_check &= importlib.util.find_spec("deepseek_vl2") is not None

env_check &= importlib.util.find_spec("xformers") is not None

return unittest.skipUnless(env_check, "Environment is not satisfactory")(test_case)





2 changes: 1 addition & 1 deletion docs/step_by_step.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ CPU, Intel GPU, HPU and CUDA for both quantization and inference.
This setting provides the best accuracy in most scenarios but is 4–5× slower than the standard AutoRound recipe. It is especially recommended for 2-bit quantization and is a good choice if sufficient resources are available.

```bash
auto-round-best --model facebook/opt-125m --bits 4 --group_size 128 --format "auto_gptq,auto_awq,auto_round"
auto-round-best --model facebook/opt-125m --bits 4 --group_size 128 --format "auto_gptq,auto_awq,auto_round"
```

- **Light Settings:**
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
import unittest

sys.path.insert(0, "..")
sys.path.insert(0, "../..")
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
Expand Down
2 changes: 1 addition & 1 deletion test/test_autoopt.py → test/test_cpu/test_autoopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
import unittest

sys.path.insert(0, "..")
sys.path.insert(0, "../..")
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
Expand Down
2 changes: 1 addition & 1 deletion test/test_autoround.py → test/test_cpu/test_autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from auto_round.eval.evaluation import simple_evaluate_user_model

sys.path.insert(0, "..")
sys.path.insert(0, "../..")
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
import sys
import unittest
sys.path.insert(0, "..")
sys.path.insert(0, "../..")
import torch
import transformers
from math import isclose
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
import unittest

sys.path.insert(0, "..")
sys.path.insert(0, "../..")
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
Expand Down
Loading