Unit Tests #1300

Workflow file for this run

.github/workflows/unit_tests.yml at 5da5115

	name: Unit Tests

	run-name: "${{ github.event.inputs.title }}"

	defaults:
	run:
	shell: bash -le {0}

	on:
	repository_dispatch:
	workflow_dispatch:
	inputs:
	title:
	description: 'set a title for this run'
	required: false
	default: ''
	repo:
	description: 'GitHub repo {owner}/{repo}'
	required: false
	default: ''
	ref:
	description: 'GitHub ref: Branch, Tag or Commit SHA'
	required: false
	default: ''
	pr_number:
	description: 'PR Number'
	required: false
	type: number
	test_names:
	description: 'Input Test(s) to Run (default all)'
	required: false
	default: ''
	test_regex:
	description: 'Regex to filter test files'
	required: false
	default: ''
	artifact_id:
	description: 'Run id for artifact to be downloaded'
	required: false
	default: ''
	max-parallel:
	description: 'Parallel jobs'
	required: false
	default: '20'
	exclusive-gpu:
	description: 'One Test Per GPU'
	type: boolean
	required: false
	default: true
	server:
	description: 'Wheel Build Server'
	type: choice
	options:
	- '["self-hosted", "zen4"]'
	- '["self-hosted", "xeon5"]'

	env:
	CUDA_DEVICE_ORDER: PCI_BUS_ID
	CUDA_VISIBLE_DEVICES: 0
	TORCH_CUDA_ARCH_LIST: '8.6 8.9'
	PYTORCH_CUDA_ALLOC_CONF: 'expandable_segments:True'
	MAX_JOBS: 8
	RUNNER: 10.0.13.31
	XEON5: 10.0.14.248
	LEGACY_TESTS: "models/test_internlm.py,models/test_internlm2_5.py,models/test_xverse.py"
	IGNORED_TEST_FILES: "test_tgi.py,test_gptneox.py,models/test_mixtral.py,models/test_phi_3_moe.py,test_bits_new.py"
	GPTQMODEL_FORCE_BUILD: 1
	repo: ${{ github.event.inputs.repo \|\| github.repository }}
	ref: ${{ github.event.inputs.ref \|\| github.ref }}

	concurrency:
	group: ${{ github.event.inputs.ref \|\| github.ref }}-workflow-unit-tests-${{ github.event.inputs.test_names }}
	cancel-in-progress: true

	jobs:
	check-vm:
	runs-on: ubuntu-24.04
	outputs:
	ip: ${{ steps.get_ip.outputs.ip }}
	run_id: ${{ steps.get_ip.outputs.run_id }}
	max-parallel: ${{ steps.get_ip.outputs.max-parallel }}
	steps:
	- name: Print env
	run: \|
	echo "repo: ${{ env.repo }}"
	echo "ref: ${{ env.ref }}"
	echo "artifact_id: ${{ github.event.inputs.artifact_id }}"
	echo "test_names: ${{ github.event.inputs.test_names }}"
	echo "exclusive-gpu: ${{ github.event.inputs.exclusive-gpu }}"
	echo "selected server: ${{ github.event.inputs.server }}"

	- name: Select server
	id: get_ip
	run: \|
	echo "ip=$RUNNER" >> "$GITHUB_OUTPUT"

	echo "ip: $ip"

	if [ -n "${{ github.event.inputs.artifact_id }}" ]; then
	run_id="${{ github.event.inputs.artifact_id }}"
	else
	run_id="${{ github.run_id }}"
	fi
	echo "run_id=$run_id" >> "$GITHUB_OUTPUT"
	echo "artifact_id=$run_id"

	max_p=${{ github.event.inputs.max-parallel }}
	max_p="{\"size\": ${max_p:-20}}"
	echo "max-parallel=$max_p" >> "$GITHUB_OUTPUT"
	echo "max-parallel=$max_p"

	list-test-files:
	runs-on: ubuntu-24.04
	outputs:
	torch-files: ${{ steps.files.outputs.torch-files }}
	transformers-files: ${{ steps.files.outputs.transformers-files }}
	m4-files: ${{ steps.files.outputs.m4-files }}

	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: List files
	id: files
	run: \|
	script="
	import json
	import os
	import re

	LEGACY_TESTS = '${LEGACY_TESTS}'
	IGNORED_TEST_FILES = '${IGNORED_TEST_FILES}'

	TEST_NAMES='${{ github.event.inputs.test_names }}'
	TEST_REGEX='${{ github.event.inputs.test_regex }}'

	input_test_files_list = [f.strip().removesuffix('.py') for f in TEST_NAMES.split(',') if f.strip()]

	transformers_test_files = [f.strip().removesuffix('.py') for f in f'{LEGACY_TESTS}'.split(',') if f.strip()]
	transformers_test_files = [f for f in transformers_test_files if not input_test_files_list or f in input_test_files_list]

	all_tests = [f.removesuffix('.py') for f in os.listdir('tests/') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('py') not in f'{IGNORED_TEST_FILES}']
	all_tests_models = ['models/'+f.removesuffix('.py') for f in os.listdir('tests/models') if f.startswith('test_') and f.endswith('.py') and f.strip().removesuffix('py') not in f'{IGNORED_TEST_FILES}']

	torch_test_files = [f for f in all_tests+all_tests_models if (not input_test_files_list or f in input_test_files_list) and f not in transformers_test_files and 'mlx' not in f]

	torch_test_files = [test for test in torch_test_files if re.match(f'{TEST_REGEX}', test)]
	transformers_test_files = [test for test in transformers_test_files if re.match(f'{TEST_REGEX}', test)]

	m4_test_files = [f for f in all_tests if ('mlx' in f or 'apple' in f) and (f.strip().removesuffix('.py') in input_test_files_list if input_test_files_list else True)]

	print(f'{json.dumps(torch_test_files)}\|{json.dumps(transformers_test_files)}\|{json.dumps(m4_test_files)}')
	"

	test_files=$(python3 -c "$script")
	IFS='\|' read -r torch_test_files transformers_test_files mlx_files <<< "$test_files"

	echo "torch-files=$torch_test_files" >> "$GITHUB_OUTPUT"
	echo "transformers-files=$transformers_test_files" >> "$GITHUB_OUTPUT"
	echo "m4-files=$mlx_files" >> "$GITHUB_OUTPUT"

	echo "Test files: $test_files"
	echo "Torch Test files: $torch_test_files"
	echo "Transformers Test files: $transformers_test_files"
	echo "MLX Test files: $mlx_files"
	echo "Ignored Test files: $IGNORED_TEST_FILES"

	- name: Print conditions and parameters
	run: \|
	echo "Parameters:"
	echo "needs.list-test-files.outputs.torch-files: ${{ needs.list-test-files.outputs.torch-files }}"
	echo "needs.list-test-files.outputs.transformers-files: ${{ needs.list-test-files.outputs.transformers-files }}"
	echo "needs.list-test-files.outputs.m4-files: ${{ needs.list-test-files.outputs.m4-files }}"
	echo "github.event.inputs.artifact_id: ${{ github.event.inputs.artifact_id }}"
	echo "needs.build.result: ${{ needs.build.result }}"
	echo "github.event.inputs.test_names: ${{ github.event.inputs.test_names }}"
	echo ""

	echo "Conditions:"
	echo "will build run: ${{ needs.list-test-files.outputs.torch-files != '[]' && needs.list-test-files.outputs.transformers-files != '[]' && !(needs.list-test-files.outputs.m4-files == '[]' && needs.list-test-files.outputs.m4-files == '[]') }}"
	echo "will legacy run: ${{ (needs.build.result == 'success' \|\| github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != '[]' }}"
	echo "will torch run: ${{ (needs.build.result == 'success' \|\| github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-files != '[]' }}"
	echo "will m4 run: ${{ (github.event.inputs.test_names == '' \|\| contains(github.event.inputs.test_names, 'apple') \|\| contains(github.event.inputs.test_names, 'mlx') ) && (needs.list-test-files.outputs.m4-files != '' \|\| needs.list-test-files.outputs.m4-files != '[]') }}"

	build:
	runs-on: ${{ fromJSON(github.event.inputs.server ) }}
	needs:
	- check-vm
	- list-test-files
	if: needs.list-test-files.outputs.torch-files != '[]' \|\| needs.list-test-files.outputs.transformers-files != '[]'
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v7
	options: --device /dev/dri --ipc=host --runtime=nvidia --gpus all
	volumes:
	- /dev/dri/by-path:/dev/dri/by-path
	- /home/ci/models:/monster/data/model
	- /home/ci/models/huggingface:/github/home/.cache/huggingface

	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch
	echo "##### pip list #####"
	pip list

	- name: Compress dir
	run: \|
	mkdir dist \|\| true
	rm -rf dist/* \|\| true
	tar -zcf ../gptqmodel_source.tar.gz ./
	mv ../gptqmodel_source.tar.gz dist/
	sha256=$(sha256sum dist/gptqmodel_source.tar.gz)
	echo "hash=$sha256"
	echo "SOURCE_HASH=$sha256" >> $GITHUB_ENV

	- name: Upload source to local
	continue-on-error: true
	run: curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.SOURCE_HASH }}" -F "file=@dist/gptqmodel_source.tar.gz" http://$RUNNER/gpu/whl/upload

	- name: Upload source to github artifact
	uses: actions/upload-artifact@v4
	with:
	name: source
	path: dist/gptqmodel_source.tar.gz

	- name: Compile
	if: github.event.inputs.artifact_id == '' && !cancelled()
	timeout-minutes: 35
	run: python setup.py bdist_wheel

	- name: Test install
	if: github.event.inputs.artifact_id == '' && !cancelled()
	run: \|
	ls -ahl dist
	whl=$(ls -t dist/*.whl \| head -n 1 \| xargs basename)
	sha256=$(sha256sum dist/$whl)
	echo "hash=$sha256"

	echo "WHL_HASH=$sha256" >> $GITHUB_ENV
	echo "WHL_NAME=$whl" >> $GITHUB_ENV

	twine check dist/$whl
	uv pip install dist/$whl

	- name: Upload wheel to local
	if: github.event.inputs.artifact_id == '' && !cancelled()
	continue-on-error: true
	run: curl -s -F "runid=${{ github.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "sha256=${{ env.WHL_HASH }}" -F "file=@dist/${{ env.WHL_NAME }}" http://$RUNNER/gpu/whl/upload

	- name: Upload wheel to github artifact
	if: github.event.inputs.artifact_id == '' && !cancelled()
	uses: actions/upload-artifact@v4
	with:
	name: whl
	path: dist/${{ env.WHL_NAME }}

	- name: Clean cache
	if: always()
	run: pip cache purge && uv cache clean && rm -rf ./* ./.*

	legacy:
	needs:
	- build
	- list-test-files
	- check-vm
	runs-on: [ self-hosted, xeon5 ]
	if: always() && !cancelled() && (needs.build.result == 'success' \|\| github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.transformers-files != '[]'
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v7
	volumes:
	- /home/ci/models:/monster/data/model
	- /home/ci/models/huggingface:/github/home/.cache/huggingface
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size \|\| 20 }}
	matrix:
	test_script: ${{ fromJSON(needs.list-test-files.outputs.transformers-files) }}
	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ github.event.inputs.repo }}
	ref: ${{ github.event.inputs.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch
	echo "== pip list =="
	pip list

	- name: Download source from local
	continue-on-error: true
	run: \|
	curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "SOURCE_DOWNLOADED=1" >> $GITHUB_ENV

	- name: Download source from github
	if: env.SOURCE_DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: source
	path: dist
	run-id: ${{ github.run_id }}

	- name: Uncompress source
	continue-on-error: true
	run: \|
	find . -mindepth 1 ! -name "gptqmodel_source.tar.gz" -exec rm -rf {} +
	ls -ahl .
	tar -zxf gptqmodel_source.tar.gz

	- name: Download wheel from local
	continue-on-error: true
	run: \|
	file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://$RUNNER/gpu/whl/download")

	echo "file_name=$file_name"

	if echo "$file_name" \| grep -q "gptqmodel"; then
	mkdir dist \|\| true
	cd dist
	curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "WHL_DOWNLOADED=1" >> $GITHUB_ENV
	fi

	- name: Download artifact from github
	if: env.WHL_DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: whl
	path: dist
	run-id: ${{ needs.check-vm.outputs.run_id }}

	- name: Install wheel
	run: \|
	uv pip install colorlog logbar git+https://github.com/ModelCloud/Tokenicer -U
	echo "===== install optimum bitblas parameterized uvicorn ====="
	uv pip install optimum bitblas==0.0.1.dev13 parameterized uvicorn -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	echo "===== install dist/whl ====="
	uv pip install dist/*.whl -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	echo "===== init test env ====="
	echo "===== install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 ====="
	uv pip install transformers==4.38.2 typing-extensions numpy==1.26.4 peft==0.13.2 -U -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	if [ "${{ matrix.test_script }}" == "models/test_xverse" ]; then
	echo "===== install tokenizers==0.15.2 ====="
	uv pip install tokenizers==0.15.2 -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi
	if [ "${{ matrix.test_script }}" == "test_quant_formats" ] \|\| [ "${{ matrix.test_script }}" == "test_perplexity" ]; then
	echo "===== install auto_round ====="
	uv pip install auto_round -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi

	- name: Find suitable GPU
	if: ${{ !contains(matrix.test_script, 'ipex') && !cancelled() }}
	run: \|
	timestamp=$(date +%s%3N)
	gpu_id=-1

	while [ "$gpu_id" -lt 0 ]; do
	gpu_id=$(curl -s "http://$RUNNER/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}")

	if [ "$gpu_id" -lt 0 ]; then
	echo "http://$RUNNER/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
	echo "No available GPU, waiting 5 seconds..."
	curl http://$XEON5/gpu/status2
	sleep 5
	else
	echo "Allocated GPU ID: $gpu_id"
	fi
	done
	if [[ ! "$gpu_id" =~ ^[0-9]+$ ]]; then
	echo "gpu_id: $gpu_id is not a number"
	fi
	echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
	echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
	echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
	curl http://$XEON5/gpu/status2

	- name: Run tests
	if: ${{ (!github.event.inputs.test_names \|\| contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
	run: \|
	start_time=$(date +%s)
	pytest --durations=0 tests/${{ matrix.test_script }}.py \|\| { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
	execution_time=$(( $(date +%s) - start_time ))
	echo "$((execution_time / 60))m $((execution_time % 60))s"
	curl "http://$RUNNER/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&name=${{ matrix.test_script }}"

	- name: Release GPU
	if: always() && !contains(matrix.test_script, 'ipex')
	run: curl -X GET "http://$RUNNER/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"

	- name: Clean cache
	if: always()
	run: pip cache purge && uv cache clean && rm -rf ./* ./.*

	torch:
	needs:
	- build
	- list-test-files
	- check-vm
	runs-on: [ self-hosted, xeon5 ]
	if: always() && !cancelled() && (needs.build.result == 'success' \|\| github.event.inputs.artifact_id != '') && needs.list-test-files.outputs.torch-files != '[]'
	container:
	image: ${{ needs.check-vm.outputs.ip }}:5000/modelcloud/gptqmodel:github-ci-v7
	options: --device /dev/dri --ipc=host --runtime=nvidia --gpus all
	volumes:
	- /dev/dri/by-path:/dev/dri/by-path
	- /home/ci/models:/monster/data/model
	- /home/ci/models/huggingface:/github/home/.cache/huggingface
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.check-vm.outputs.max-parallel).size \|\| 20 }}
	matrix:
	test_script: ${{ fromJSON(needs.list-test-files.outputs.torch-files) }}
	steps:
	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Fetch PR by number
	if: ${{ github.event.inputs.pr_number != 0 }}
	run: \|
	PR_NUMBER=${{ github.event.inputs.pr_number }}
	echo "pr number $PR_NUMBER"
	git config --global --add safe.directory $(pwd)
	git fetch origin pull/${PR_NUMBER}/head:pr-${PR_NUMBER}
	git checkout pr-${PR_NUMBER}

	- name: Print Env
	run: \|
	echo "== pyenv =="
	pyenv versions
	echo "== python =="
	python --version
	echo "== nvcc =="
	nvcc --version
	echo "== torch =="
	pip show torch
	echo "== pip list =="
	pip list


	- name: Download source from local
	continue-on-error: true
	run: \|
	curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ github.run_id }}/gptqmodel_source.tar.gz
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "SOURCE_DOWNLOADED=1" >> $GITHUB_ENV

	- name: Download source from github
	if: env.SOURCE_DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: source
	path: dist
	run-id: ${{ github.run_id }}

	- name: Uncompress source
	continue-on-error: true
	run: \|
	find . -mindepth 1 ! -name "gptqmodel_source.tar.gz" -exec rm -rf {} +
	ls -ahl .
	tar -zxf gptqmodel_source.tar.gz

	- name: Download wheel from local
	continue-on-error: true
	run: \|
	file_name=$(curl -s -F "runid=${{ needs.check-vm.outputs.run_id }}" -F "repo=${{ env.repo }}" -F "ref=${{ env.ref }}" -F "fuzz=1" "http://$RUNNER/gpu/whl/download")

	echo "file_name=$file_name"

	if echo "$file_name" \| grep -q "gptqmodel"; then
	mkdir dist \|\| true
	cd dist
	curl -s -O http://$RUNNER/whl/${{ env.repo }}/${{ needs.check-vm.outputs.run_id }}/$file_name
	ls -ahl .
	sha256=$(sha256sum $file_name)
	echo "sha256=$sha256"
	echo "WHL_DOWNLOADED=1" >> $GITHUB_ENV
	fi

	- name: Download artifact from github
	if: env.WHL_DOWNLOADED == '' && !cancelled()
	uses: actions/download-artifact@v4
	with:
	name: whl
	path: dist
	run-id: ${{ needs.check-vm.outputs.run_id }}

	- name: Install wheel
	run: \|
	uv pip install -U transformers colorlog logbar
	if [ "${{ matrix.test_script }}" == "test_perplexity" ] \|\| \
	[ "${{ matrix.test_script }}" == "test_q4_bitblas" ] \|\| \
	[ "${{ matrix.test_script }}" == "test_save_loaded_quantized_model" ]; then
	echo "===== install bitblas==0.0.1.dev13 ====="
	uv pip install bitblas==0.0.1.dev13 -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi

	if [[ "${{ matrix.test_script }}" == auto_round ]]; then
	uv pip install auto_round
	fi

	if [ "${{ matrix.test_script }}" == "models/test_cohere2" ] \|\| [ "${{ matrix.test_script }}" == "models/test_gemma" ]; then
	echo "===== install transformers from git ====="
	uv pip install -U transformers -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi

	if [[ "${{ matrix.test_script }}" == xpu ]]; then
	echo "===== switching to xpu env ====="
	source /etc/profile.d/pyenv.sh && pyenv activate xpu
	uv pip install colorlog
	fi

	if [[ "${{ matrix.test_script }}" == ipex ]] && [[ "${{ matrix.test_script }}" != xpu ]]; then
	uv pip uninstall torchvision torch flash_attn # fix ipex can't be used with torch+cu126
	uv pip install torchvision torch
	uv pip install -U intel_extension_for_pytorch -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi

	if [[ "${{ matrix.test_script }}" == "mlx" ]]; then
	uv pip install mlx_lm --no-build-isolation -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi

	if [[ "${{ matrix.test_script }}" == "test_modelscope" ]]; then
	echo "===== installing modelscope ====="
	uv pip install modelscope --no-build-isolation -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	fi

	uv pip install git+https://github.com/ModelCloud/Tokenicer -U

	# ipex doesn't need to compile kernels. xpu can't install cuda package
	if [[ "${{ matrix.test_script }}" != ipex && "${{ matrix.test_script }}" != xpu ]]; then
	echo "===== install dist/whl ====="
	uv pip install dist/*.whl -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple
	else
	echo "===== install with local files for xpu env ====="
	export CUDA_VISIBLE_DEVICES=""
	unset TORCH_CUDA_ARCH_LIST
	uv pip install . --no-build-isolation
	fi

	if [ "${{ matrix.test_script }}" == "test_transformers" ]; then
	echo "===== install optimum from git ====="
	uv pip install -U git+https://github.com/huggingface/optimum.git -i http://$RUNNER/simple/ --trusted-host $RUNNER
	fi

	if [[ "${{ matrix.test_script }}" == "test_sglang" ]]; then
	uv pip install transformers==4.48.3
	uv pip install numpy==1.26.3
	fi

	- name: Find suitable GPU
	if: ${{ !contains(matrix.test_script, 'ipex') && !contains(matrix.test_script, 'xpu') && !cancelled() }}
	run: \|
	timestamp=$(date +%s%3N)
	gpu_id=-1

	while [ "$gpu_id" -lt 0 ]; do
	gpu_id=$(curl -s "http://$RUNNER/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }}")

	if [ "$gpu_id" -lt 0 ]; then
	echo "http://$RUNNER/gpu/get?id=${{ github.run_id }}&timestamp=$timestamp&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}&exclusive=${{ github.event.inputs.exclusive-gpu }} returned $gpu_id"
	echo "No available GPU, waiting 5 seconds..."
	curl http://$XEON5/gpu/status2
	sleep 5
	else
	echo "Allocated GPU ID: $gpu_id"
	fi
	done
	if [[ ! "$gpu_id" =~ ^[0-9]+$ ]]; then
	echo "gpu_id: $gpu_id is not a number"
	fi
	echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV
	echo "STEP_TIMESTAMP=$timestamp" >> $GITHUB_ENV
	echo "CUDA_VISIBLE_DEVICES set to $gpu_id, timestamp=$timestamp"
	curl http://$XEON5/gpu/status2

	- name: Run tests
	if: ${{ (!github.event.inputs.test_names \|\| contains(github.event.inputs.test_names, matrix.test_script)) && !cancelled() }}
	run: \|
	if [[ "${{ matrix.test_script }}" == ipex ]]; then
	export CUDA_VISIBLE_DEVICES=""
	fi
	if [[ "${{ matrix.test_script }}" == xpu ]]; then
	export CUDA_VISIBLE_DEVICES=""
	source /etc/profile.d/pyenv.sh && pyenv activate xpu
	pip uninstall vllm -y
	pip list
	fi

	start_time=$(date +%s)
	pytest --durations=0 tests/${{ matrix.test_script }}.py \|\| { echo "ERROR=1" >> $GITHUB_ENV; exit 1; }
	execution_time=$(( $(date +%s) - start_time ))
	echo "$((execution_time / 60))m $((execution_time % 60))s"
	curl "http://$RUNNER/gpu/log_test_vram?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&range=$execution_time&unit=second&test=${{ matrix.test_script }}"

	- name: Release GPU
	if: always() && !contains(matrix.test_script, 'ipex') && !contains(matrix.test_script, 'xpu')
	run: curl -X GET "http://$RUNNER/gpu/release?id=${{ github.run_id }}&gpu=${{ env.CUDA_VISIBLE_DEVICES }}&timestamp=${{ env.STEP_TIMESTAMP }}&test=${{ matrix.test_script }}&runner=${RUNNER_NAME}"

	- name: Clean cache
	if: always()
	run: \|
	# rm ~/.cache/evalplus/*pkl \|\| true
	pip cache purge && uv cache clean && rm -rf ./* ./.*

	show-statistics:
	runs-on: [ self-hosted, xeon5 ]
	if: github.event.inputs.exclusive-gpu
	container:
	image: modelcloud/gptqmodel:alpine-ci-v1
	needs:
	- legacy
	- torch
	steps:
	- name: Print statistics
	run: curl "http://$RUNNER/gpu/get_vram_logs?id=${{ github.run_id }}"

	m4:
	runs-on: [ self-hosted, m4 ]
	needs:
	- check-vm
	- list-test-files
	if: (github.event.inputs.test_names == '' \|\| contains(github.event.inputs.test_names, 'apple') \|\| contains(github.event.inputs.test_names, 'mlx') ) && (needs.list-test-files.outputs.m4-files != '' && needs.list-test-files.outputs.m4-files != '[]') && !cancelled()
	strategy:
	fail-fast: false
	matrix:
	test_script: ${{ fromJSON(needs.list-test-files.outputs.m4-files) }}
	steps:
	- name: Print Env
	run: \|
	echo "repo: ${{ env.repo }}"
	echo "ref: ${{ env.ref }}"
	ls -ahl .

	- name: Checkout Codes
	uses: actions/checkout@v4
	with:
	repository: ${{ env.repo }}
	ref: ${{ env.ref }}

	- name: Run test
	run: \|
	export PATH="/opt/homebrew/bin:$PATH" && eval "$(pyenv init -)"
	rm -rf venv \|\| true

	echo "=== checking models dir is mounted"
	ls ../../../monster

	echo "=== activating venv"
	pyenv global 3.11.11 && python -m venv venv
	source venv/bin/activate

	rm profile.sb \|\| true

	curl -O http://$RUNNER/scripts/m4/profile.sb

	echo "=== installing uv setuptools build"
	pip install uv setuptools build -U -i http://$RUNNER/simple --trusted-host $RUNNER --extra-index-url https://pypi.org/simple

	echo "=== installing test tools"
	uv pip install pytest parameterized vllm lm-eval device-smi mlx-lm -U -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple

	echo "=== installing gptqmodel"
	uv pip install . --no-build-isolation -i http://$RUNNER/simple/ --trusted-host $RUNNER --extra-index-url https://pypi.org/simple

	echo "replacing model path"
	find tests -name "*.py" -exec sed -i '' 's/\/monster\/data\/model/..\/..\/..\/monster/g' {} +

	TEST=${{ matrix.test_script }}
	if [[ ! "$TEST" == *.py ]]; then
	TEST="$TEST.py"
	fi
	echo "=== running test: $TEST"
	pytest tests/$TEST

	- name: Clean cache
	if: always()
	run: \|
	source venv/bin/activate && pip cache purge && uv cache clean \|\| true
	rm -rf ../GPTQModel && mkdir ../GPTQModel

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unit Tests #1300

Workflow file

Unit Tests #1300

Jobs

Run details

Workflow file for this run