Skip to content

[Nightly] Enable bisect search #1849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions .github/scripts/bisect_search.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/bash
set -xe
export GIT_PAGER=cat

# Init params
WORKSPACE=$(realpath ${WORKSPACE:-"/tmp"})
PYTORCH_VERSION=${PYTORCH_VERSION:-"main"}
TORCH_XPU_OPS_VERSION=${TORCH_XPU_OPS_VERSION:-"main"}
for var; do
eval "export $(echo ${var@Q} |sed "s/^'-*//g;s/=/='/")"
done

# Clean WORKSPACE
mkdir -p ${WORKSPACE}
rm -rf "${WORKSPACE:?}/"* || sudo rm -rf "${WORKSPACE:?}/"*

# Build pytorch
pip uninstall -y torch
source $(dirname $(realpath $0))/env.sh 2> /dev/null
build_status="$($(dirname $(realpath $0))/build.sh \
--WORKSPACE="${WORKSPACE}" \
--PYTORCH_VERSION="${PYTORCH_VERSION}" \
--TORCH_XPU_OPS_VERSION="${TORCH_XPU_OPS_VERSION}" \
> ${GITHUB_WORKSPACE}/gs-logs/build-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${build_status} -ne 0 ];then
tail -n 100 ${GITHUB_WORKSPACE}/gs-logs/build-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log
echo "Build got failed"
exit 1
fi
pip list |grep torch

# Test
test_result=1
if [ "${SEARCH_CHECK}" == "accuracy" ];then
cd ${WORKSPACE}/pytorch
rm -rf torch
test_status="$(eval "${SEARCH_CASE} --output=${WORKSPACE}/tmp.csv" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
acc_result=$(tail -n 1 ${WORKSPACE}/tmp.csv |awk -F, '{print $4}')
if [[ "${acc_result}" == "pass"* ]];then
test_result=0
fi
fi
elif [ "${SEARCH_CHECK}" == "performance" ];then
cd ${WORKSPACE}/pytorch
rm -rf torch
test_status="$(eval "${SEARCH_CASE} --output=${WORKSPACE}/tmp.csv" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
perf_result=$(tail -n 1 ${WORKSPACE}/tmp.csv |awk -F, '{print $5}')
test_result=$(echo "${perf_result},${SEARCH_GOOD_VALUE:-"0.00001"},${SEARCH_CRITERIA}" |awk -F, '{
if ($1/$2 > (1 - $3)){
print "0";
}else{
print "1";
}
}')
fi
elif [ "${SEARCH_CHECK}" == "ut_regressions" ];then
cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/regressions
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
elif [ "${SEARCH_CHECK}" == "ut_extended" ];then
cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/xpu/extended
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
elif [ "${SEARCH_CHECK}" == "ut_xpu" ];then
cd ${WORKSPACE}/pytorch/third_party/torch-xpu-ops/test/xpu
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
else
test_status="$(eval "${SEARCH_CASE}" \
> ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log 2>&1 && echo $? || echo $?)"
if [ ${test_status} -eq 0 ];then
test_result=0
fi
fi

# Test result
cat ${GITHUB_WORKSPACE}/gs-logs/test-${PYTORCH_VERSION}-${TORCH_XPU_OPS_VERSION}.log
echo "${test_result},${acc_result},${perf_result},${PYTORCH_VERSION},${TORCH_XPU_OPS_VERSION}" |\
tee -a ${GITHUB_WORKSPACE}/gs-logs/summary.csv |tee -a ${WORKSPACE}/result.csv
exit ${test_result}
244 changes: 244 additions & 0 deletions .github/workflows/_bisect_search.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
name: Bisect Search

on:
workflow_dispatch:
inputs:
runner:
required: true
type: string
default: 'pvc_rolling'
description: Test node
search_commits:
required: true
type: string
default: ''
description: Target commits, such as 'pytorch=old/new,xpu-ops=old/new'
search_check:
type: string
default: ''
description: Test case type, 'performance, accuracy, <ut_regressions/ut_extended/ut_xpu> or others'
search_case:
required: true
type: string
default: ''
description: Test case, such as 'python xxx.py or pytest -k xxx'
search_criteria:
type: string
default: '0.1'
description: Criteria for performance check, default is 10%
triton:
type: string
default: 'pinned'
description: Triton pinned by pytorch by default, or 'commit/branch', or 'repo@commit/repo@branch'
oneapi:
type: string
default: 'installed'
description: Installed oneAPI DLE on host by default, fill offline.sh url if needed
python:
type: string
default: '3.10'
description: Python version

permissions: read-all

jobs:
get_runner:
runs-on: ${{ inputs.runner }}
outputs:
test_host: ${{ steps.runner-info.outputs.test_host }}
test_user: ${{ steps.runner-info.outputs.test_user }}
test_group: ${{ steps.runner-info.outputs.test_group }}
steps:
- name: Get runner info
id: runner-info
run: |
# get test runner
echo "test_host=${RUNNER_NAME}" |tee -a ${GITHUB_OUTPUT}
echo "test_user=$(id -u)" |tee -a ${GITHUB_OUTPUT}
echo "test_group=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
# show host info
cat /etc/os-release
uname -a
source /opt/intel/oneapi/setvars.sh
sycl-ls
dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
- name: Cleanup workspace
if: ${{ always() }}
run: |
# clean docker cache
docker stop $(docker ps -aq) || true
docker system prune -af || true
# clean files
ls -al
sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf

biisect-search:
needs: get_runner
runs-on: ${{ needs.get_runner.outputs.test_host }}
container:
image: mengfeili/intel-pvc-driver:1146-1136
volumes:
- ${{ github.workspace }}:${{ github.workspace }}
options: --device=/dev/mem --device=/dev/dri --group-add video --privileged --shm-size=8g
-u ${{ needs.get_runner.outputs.test_user }}:${{ needs.get_runner.outputs.test_group }}
env:
AGENT_TOOLSDIRECTORY: /tmp/_tools
GH_TOKEN: ${{ github.token }}
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
SEARCH_COMMITS: ${{ inputs.search_commits }}
SEARCH_CHECK: ${{ inputs.search_check }}
SEARCH_CASE: ${{ inputs.search_case }}
SEARCH_CRITERIA: ${{ inputs.search_criteria }}
defaults:
run:
shell: bash -xe {0}
steps:
- name: Setup python-${{ inputs.python }}
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python }}
- name: Check runner
run: |
ls -al
find ./ |grep -v "^\./$" |xargs rm -rf
hostname && whoami && id
clinfo --list
gcc -v && g++ -v
which python && which pip
python -V
pip install -U pip wheel setuptools
pip list
uname -a
dpkg -l |grep -E 'libigc-dev|libze-dev|level-zero-dev'
pip install cmake ninja pandas psutil scipy requests pybind11
mkdir gs-logs gs-search
echo "Status,Acc,Perf,PyTorch,Torch-xpu-ops" > gs-logs/summary.csv
- name: Install oneAPI DLE
if: ${{ inputs.oneapi != 'installed' }}
run: |
rm -rf ~/intel ~/.intel
wget -q -O oneapi.sh "${{ inputs.oneapi }}"
bash oneapi.sh -a -s --eula accept --action install --install-dir ${HOME}/intel/oneapi
echo "XPU_ONEAPI_PATH=${HOME}/intel/oneapi" >> ${GITHUB_ENV}
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
with:
path: gs-scripts
- name: Prepare source code
run: |
git clone https://github.com/pytorch/pytorch gs-pytorch
cd gs-pytorch
LATEST_PT_COMMIT="$(git rev-parse HEAD)"
cd ..
git clone https://github.com/intel/torch-xpu-ops gs-torch-xpu-ops
cd gs-torch-xpu-ops
LATEST_XPU_COMMIT="$(git rev-parse HEAD)"
cd ..
echo "LATEST_PT_COMMIT=${LATEST_PT_COMMIT}" >> ${GITHUB_ENV}
echo "LATEST_XPU_COMMIT=${LATEST_XPU_COMMIT}" >> ${GITHUB_ENV}
- name: Prepare test env
run: |
if [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/huggingface.py"* ]];then
pip install transformers==4.44.2
elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/timm_models.py"* ]];then
pip install --no-deps git+https://github.com/huggingface/[email protected]
pip install $(curl -sSL https://raw.githubusercontent.com/huggingface/pytorch-image-models/v1.0.14/requirements.txt | grep -vE torch)
elif [[ "${{ inputs.search_case }}" == *"benchmarks/dynamo/torchbench.py"* ]];then
model_name="$(echo ${{ inputs.search_case }} |sed 's+.*\--only *++;s/ .*//')"
pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/xpu
git clone https://github.com/pytorch/benchmark gs-benchmark
cd gs-benchmark
echo "PYTHONPATH=${PWD}:${PYTHONPATH}" >> ${GITHUB_ENV}
python install.py ${model_name}
pip uninstall -y torch
else
pip install -r gs-pytorch/.ci/docker/requirements-ci.txt
fi
- name: Triton Installation
run: |
cd gs-pytorch
rm -rf pytorch_triton_xpu-*.whl
if [ "${{ inputs.triton }}" != "pinned" ];then
TRITON_COMMIT_ID="${{ inputs.triton }}"
else
TRITON_COMMIT_ID="$(cat .ci/docker/ci_commit_pins/triton-xpu.txt)"
fi
TRITON_VERSION_NAME="$(
curl -sSL https://raw.githubusercontent.com/intel/intel-xpu-backend-for-triton/${TRITON_COMMIT_ID}/python/triton/__init__.py 2>&1 |\
grep '__version__' |head -n 1 |awk -F "'" '{print $2}'
)"
python .github/scripts/build_triton_wheel.py --device xpu --commit-hash ${TRITON_COMMIT_ID} --triton-version ${TRITON_VERSION_NAME}
pip install pytorch_triton_xpu-*.whl
- name: Bisect search pytorch
if: ${{ contains(inputs.search_commits, 'pytorch') }}
run: |
pytorch_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*pytorch=++;s+,.*++')"
old_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $1}')"
new_commit="$(echo ${pytorch_commits} |awk -F '/' '{print $2}')"
old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${old_commit}" \
--TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \
> ${{ github.workspace }}/gs-logs/search-${old_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)"
old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')"
new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${new_commit}" \
--TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \
> ${{ github.workspace }}/gs-logs/search-${new_commit}-${LATEST_XPU_COMMIT}.log 2>&1 && echo $? || echo $?)"
new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
if [ "${old_status}" != "${new_status}" ];then
cd gs-pytorch
git reset --hard
bisect_status="$(git bisect start ${new_commit} ${old_commit} \
${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="$(git rev-parse HEAD)" \
--TORCH_XPU_OPS_VERSION="${LATEST_XPU_COMMIT}" \
> ${{ github.workspace }}/gs-logs/bisect-pytorch.log 2>&1 && echo $? || echo $?)"
git bisect log |tee ${{ github.workspace }}/gs-logs/result-pytorch.log
else
echo "Checked and no regression !"
fi
- name: Bisect search torch-xpu-ops
if: ${{ contains(inputs.search_commits, 'xpu-ops') }}
run: |
xpu_ops_commits="$(echo ${{ inputs.search_commits }} |sed 's+.*xpu-ops=++;s+,.*++')"
old_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $1}')"
new_commit="$(echo ${xpu_ops_commits} |awk -F '/' '{print $2}')"
old_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${LATEST_PT_COMMIT}" \
--TORCH_XPU_OPS_VERSION="${old_commit}" \
> ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${old_commit}.log && echo $? || echo $?)"
old_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
export SEARCH_GOOD_VALUE="$(echo ${old_result} |awk -F, '{print $3}')"
new_status="$(${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${LATEST_PT_COMMIT}" \
--TORCH_XPU_OPS_VERSION="${new_commit}" \
> ${{ github.workspace }}/gs-logs/search-${LATEST_PT_COMMIT}-${new_commit}.log && echo $? || echo $?)"
new_result="$(tail -n 1 ${{ github.workspace }}/gs-search/result.csv)"
if [ "${old_status}" != "${new_status}" ];then
cd gs-pytorch
git reset --hard
bisect_status="$(
git bisect start ${new_commit} ${old_commit} \
${{ github.workspace }}/gs-scripts/.github/scripts/bisect_search.sh \
--WORKSPACE="${{ github.workspace }}/gs-search" \
--PYTORCH_VERSION="${LATEST_PT_COMMIT}" \
--TORCH_XPU_OPS_VERSION="$(git rev-parse HEAD)" \
> ${{ github.workspace }}/gs-logs/bisect-torch-xpu-ops.log 2>&1 && echo $? || echo $?)"
git bisect log |tee ${{ github.workspace }}/gs-logs/result-torch-xpu-ops.log
else
echo "Checked and no regression !"
fi
- name: Summary
run: |
cat gs-logs/summary.csv |tee -a ${GITHUB_STEP_SUMMARY}
for reulst_log in $(find gs-logs -name "result-*.log")
do
echo -e "\n\n\n${reulst_log}" |tee -a ${GITHUB_STEP_SUMMARY}
cat ${reulst_log} |tee -a ${GITHUB_STEP_SUMMARY}
done