-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9041a72
commit e29775b
Showing
3 changed files
with
302 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
name: CI Linux HSA | ||
|
||
on: | ||
workflow_call: | ||
workflow_dispatch: | ||
pull_request: | ||
merge_group: | ||
push: | ||
branches: | ||
- main | ||
|
||
concurrency: | ||
group: ci-build-test-cpp-linux-hsa-${{ github.event.number || github.sha }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
build_hsa: | ||
name: Build HSA (linux) | ||
runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64 | ||
steps: | ||
- name: Configure local git mirrors | ||
run: | | ||
/gitmirror/scripts/trigger_update_mirrors.sh | ||
/gitmirror/scripts/git_config.sh | ||
- name: "Checking out repository" | ||
env: | ||
BRANCH_NAME: ${{ github.ref }} | ||
REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }} | ||
run: | | ||
git init | ||
git remote add origin $REPO_ADDRESS | ||
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME | ||
git reset --hard FETCH_HEAD | ||
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 | ||
- name: Install deps | ||
run: | | ||
dnf install -y almalinux-release-devel | ||
yum install -y elfutils-libelf-devel p7zip p7zip-plugins \ | ||
sudo ncurses-compat-libs openssh vim-common | ||
- name: Build and install libnuma | ||
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime | ||
run: | | ||
curl --silent -L \ | ||
https://github.com/numactl/numactl/releases/download/v2.0.18/numactl-2.0.18.tar.gz \ | ||
-o numactl-2.0.18.tar.gz | ||
tar -xf numactl-2.0.18.tar.gz | ||
pushd numactl-2.0.18 | ||
./configure | ||
# i have no idea why this is necessary | ||
# but without it you get something about "can't cd into dir" | ||
sed -i '7563s/`cd "$dir" && pwd`/$dir/g' libtool | ||
make install | ||
popd | ||
- name: Hack ROCR | ||
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime | ||
run: | | ||
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/blit_shaders/CMakeLists.txt | ||
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/trap_handler/CMakeLists.txt | ||
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/image/blit_src/CMakeLists.txt | ||
- name: Get compatible Clang | ||
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime | ||
run: | | ||
pip download mlir==20.0.0.2024090301+amdgpu.b6597f52 -f https://makslevental.github.io/wheels | ||
unzip -q mlir-*.whl | ||
- name: Build ROCR distro | ||
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime | ||
run: | | ||
rocr_dir="$PWD" | ||
build_rocr_dir="$PWD/rocr-build" | ||
mkdir -p "$build_rocr_dir" | ||
build_rocr_dir="$(cd $build_rocr_dir && pwd)" | ||
rocr_install_dir="$PWD/rocr-install" | ||
cmake -GNinja \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \ | ||
-DClang_DIR=$PWD/mlir/lib/cmake/clang \ | ||
-DLLVM_DIR=$PWD/mlir/lib/cmake/mlir \ | ||
-DIMAGE_SUPPORT=OFF \ | ||
-S "$rocr_dir" -B "$build_rocr_dir" | ||
cmake --build "$build_rocr_dir" --target install | ||
tar -cf rocr-${GITHUB_SHA::8}.tar rocr-install | ||
- name: Upload artifacts | ||
uses: actions/upload-artifact@v4 | ||
if: ${{ !cancelled() }} | ||
with: | ||
name: linux_hsa_x86_64_release_packages | ||
path: ${{ github.workspace }}/third_party/ROCR-Runtime/rocr-*.tar | ||
if-no-files-found: error | ||
|
||
build_and_ctest: | ||
name: Build and Test with HSA (linux, ASSERTIONS) | ||
needs: [build_hsa] | ||
runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64 | ||
strategy: | ||
fail-fast: true | ||
env: | ||
CACHE_DIR: ${{ github.workspace }}/.container-cache | ||
# either the PR number or `branch-N` where N always increments | ||
CACHE_KEY: linux-build-test-cpp-asserts-manylinux-v2-${{ format('{0}-{1}', github.ref_name, github.run_number) }} | ||
steps: | ||
- name: Set unified TZ | ||
uses: szenius/[email protected] | ||
with: | ||
# this is an arbitrary choice | ||
timezoneLinux: "Asia/Singapore" | ||
timezoneMacos: "Asia/Singapore" | ||
timezoneWindows: "Singapore Standard Time" | ||
|
||
- name: Configure local git mirrors | ||
run: | | ||
/gitmirror/scripts/trigger_update_mirrors.sh | ||
/gitmirror/scripts/git_config.sh | ||
- name: "Checking out repository" | ||
env: | ||
BRANCH_NAME: ${{ github.ref }} | ||
REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }} | ||
run: | | ||
git init | ||
git remote add origin $REPO_ADDRESS | ||
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME | ||
git reset --hard FETCH_HEAD | ||
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 | ||
- name: Install deps | ||
run: | | ||
dnf install -y almalinux-release-devel epel-release | ||
yum remove -y openssl-devel zlib-devel || true | ||
yum install -y protobuf-devel protobuf-compiler tmate | ||
- name: Python deps | ||
run: | | ||
pip install "numpy<2" pyyaml "pybind11[global]==2.10.3" nanobind | ||
- name: Enable cache | ||
uses: actions/cache/restore@v3 | ||
with: | ||
path: ${{ env.CACHE_DIR }} | ||
key: ${{ env.CACHE_KEY }} | ||
restore-keys: linux-build-test-cpp- | ||
|
||
- name: Download artifacts | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: linux_hsa_x86_64_release_packages | ||
|
||
- name: Extract artifact | ||
run: | | ||
tar -xvf rocr-*.tar | ||
echo "hsa-runtime64_ROOT=$PWD/rocr-install" >> $GITHUB_ENV | ||
echo IREE_EXTERNAL_HAL_DRIVER=hsa >> $GITHUB_ENV | ||
- name: Build packages | ||
run: | | ||
export cache_dir="${{ env.CACHE_DIR }}" | ||
export CCACHE_COMPILERCHECK="string:$(clang --version)" | ||
bash build_tools/ci/build_test_cpp.sh | ||
- name: Create artifacts | ||
if: ${{ !cancelled() }} | ||
run: | | ||
tar cf iree-dist-linux.tar iree-install | ||
- name: Upload artifacts | ||
uses: actions/upload-artifact@v4 | ||
if: ${{ !cancelled() }} | ||
with: | ||
name: linux_x86_64_release_packages | ||
path: iree-dist-linux.tar | ||
if-no-files-found: warn | ||
|
||
- name: Save cache | ||
uses: actions/cache/save@v3 | ||
if: ${{ !cancelled() && github.event_name == 'push' && github.ref_name == 'main' }} | ||
with: | ||
path: ${{ env.CACHE_DIR }} | ||
key: ${{ env.CACHE_KEY }} | ||
|
||
test_linux: | ||
name: E2E Test linux with HSA | ||
needs: build_and_ctest | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
runs-on: [linux-phoenix] | ||
runs-on: ${{ matrix.runs-on }} | ||
env: | ||
XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic | ||
steps: | ||
- name: "Checking out repository" # for test scripts | ||
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 | ||
with: | ||
submodules: false # not required for testbench | ||
|
||
- name: Download artifacts | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: linux_x86_64_release_packages | ||
|
||
- name: Extract artifact | ||
run: | | ||
tar -xvf iree-dist-linux.tar | ||
bash build_tools/download_peano.sh | ||
- name: Create venv and install dependencies | ||
run: | | ||
python3 -m venv .venv | ||
source .venv/bin/activate | ||
pip install -r tests/matmul/requirements.txt | ||
- name: E2E correctness matmul test | ||
run: | | ||
source .venv/bin/activate | ||
# Without this additional line an error like | ||
# | ||
# [XRT] ERROR: Failed to allocate host memory buffer (mmap(len=10616832, prot=3, flags=8193, offset=4294967296) | ||
# failed (err=11): Resource temporarily unavailable), make sure host bank is enabled (see xbutil configure --host-mem) | ||
# iree-amd-aie/runtime/src/iree-amd-aie/driver/xrt/direct_allocator.cc:179: RESOURCE_EXHAUSTED; could not allocate | ||
# memory for buffer; while invoking C++ function matmul_test.generate_random_matrix; while calling import; | ||
# | ||
# might be observed when too much memory is allocated. For example this | ||
# error was seen when running a bf16->f32 matmul with m=n=k=2304. | ||
# | ||
# This line was suggested at https://github.com/Xilinx/mlir-air/issues/566 | ||
# | ||
# Note that this is only half of the fix. It is also necessary that | ||
# the machine that CI is running on has permission to run this line. | ||
# | ||
# This permission can be adding by adding the line | ||
# ``` | ||
# %github ALL=(ALL) NOPASSWD: /usr/bin/prlimit * | ||
# ``` | ||
# | ||
# to the file /etc/sudoers.d/github, which can be done by running | ||
# ``` | ||
# sudo visudo -f /etc/sudoers.d/github | ||
# ``` | ||
# on the guthub CI machine. | ||
sudo prlimit -lunlimited --pid $$ | ||
source /opt/xilinx/xrt/setup.sh | ||
bash build_tools/ci/run_matmul_test.sh \ | ||
test_matmuls \ | ||
iree-install \ | ||
$PWD/llvm-aie \ | ||
/opt/xilinx/xrt \ | ||
/opt/Xilinx/Vitis/2024.2 | ||
- name : Smoke E2E comparison flag test | ||
run: | | ||
source .venv/bin/activate | ||
source /opt/xilinx/xrt/setup.sh | ||
python3 build_tools/ci/cpu_comparison/run_test.py \ | ||
test_aie_vs_cpu \ | ||
iree-install \ | ||
$PWD/llvm-aie \ | ||
--xrt-dir /opt/xilinx/xrt \ | ||
--test-set='Smoke' \ | ||
--do-not-run-aie | ||
# Assert that output.log is empty (because verbose=0) | ||
if [ -s output.log ]; then | ||
echo "output.log is not empty:" | ||
cat output.log | ||
exit 1 | ||
else | ||
echo "output.log is empty" | ||
fi | ||
- name : E2E comparison of AIE to llvm-cpu | ||
run: | | ||
source .venv/bin/activate | ||
source /opt/xilinx/xrt/setup.sh | ||
python3 build_tools/ci/cpu_comparison/run_test.py \ | ||
test_aie_vs_cpu \ | ||
$PWD/iree-install \ | ||
$PWD/llvm-aie \ | ||
--xrt-dir /opt/xilinx/xrt \ | ||
--vitis-dir /opt/Xilinx/Vitis/2024.2 \ | ||
--reset-npu-between-runs -v | ||
- name: Printing IR from aie2xclbin | ||
run: | | ||
source .venv/bin/activate | ||
source /opt/xilinx/xrt/setup.sh | ||
bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \ | ||
iree-install \ | ||
print_ir_aie2xclbin_results \ | ||
$PWD/llvm-aie |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters