Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] HSA HAL #723

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
299 changes: 299 additions & 0 deletions .github/workflows/ci-hsa-linux.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
name: CI Linux HSA

on:
workflow_call:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- main

concurrency:
group: ci-build-test-cpp-linux-hsa-${{ github.event.number || github.sha }}
cancel-in-progress: true

jobs:
build_hsa:
name: Build HSA (linux)
runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64
steps:
- name: Configure local git mirrors
run: |
/gitmirror/scripts/trigger_update_mirrors.sh
/gitmirror/scripts/git_config.sh

- name: "Checking out repository"
env:
BRANCH_NAME: ${{ github.ref }}
REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }}
run: |
git init
git remote add origin $REPO_ADDRESS
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME
git reset --hard FETCH_HEAD
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10

- name: Install deps
run: |
dnf install -y almalinux-release-devel
yum install -y elfutils-libelf-devel p7zip p7zip-plugins \
sudo ncurses-compat-libs openssh vim-common

- name: Build and install libnuma
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime
run: |
curl --silent -L \
https://github.com/numactl/numactl/releases/download/v2.0.18/numactl-2.0.18.tar.gz \
-o numactl-2.0.18.tar.gz
tar -xf numactl-2.0.18.tar.gz
pushd numactl-2.0.18
./configure
# i have no idea why this is necessary
# but without it you get something about "can't cd into dir"
sed -i '7563s/`cd "$dir" && pwd`/$dir/g' libtool
make install
popd

- name: Hack ROCR
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime
run: |
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/blit_shaders/CMakeLists.txt
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/trap_handler/CMakeLists.txt
sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/image/blit_src/CMakeLists.txt

- name: Get compatible Clang
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime
run: |
pip download mlir==20.0.0.2024090301+amdgpu.b6597f52 -f https://makslevental.github.io/wheels
unzip -q mlir-*.whl

- name: Build ROCR distro
working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime
run: |
rocr_dir="$PWD"
build_rocr_dir="$PWD/rocr-build"
mkdir -p "$build_rocr_dir"
build_rocr_dir="$(cd $build_rocr_dir && pwd)"
rocr_install_dir="$PWD/rocr-install"

cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \
-DClang_DIR=$PWD/mlir/lib/cmake/clang \
-DLLVM_DIR=$PWD/mlir/lib/cmake/mlir \
-DIMAGE_SUPPORT=OFF \
-S "$rocr_dir" -B "$build_rocr_dir"

cmake --build "$build_rocr_dir" --target install
tar -cf rocr-${GITHUB_SHA::8}.tar rocr-install

- name: Upload artifacts
uses: actions/upload-artifact@v4
if: ${{ !cancelled() }}
with:
name: linux_hsa_x86_64_release_packages
path: ${{ github.workspace }}/third_party/ROCR-Runtime/rocr-*.tar
if-no-files-found: error

build_and_ctest:
name: Build and Test with HSA (linux, ASSERTIONS)
needs: [build_hsa]
runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64
strategy:
fail-fast: true
env:
CACHE_DIR: ${{ github.workspace }}/.container-cache
# either the PR number or `branch-N` where N always increments
CACHE_KEY: linux-build-test-cpp-asserts-manylinux-v2-${{ format('{0}-{1}', github.ref_name, github.run_number) }}
steps:
- name: Set unified TZ
uses: szenius/[email protected]
with:
# this is an arbitrary choice
timezoneLinux: "Asia/Singapore"
timezoneMacos: "Asia/Singapore"
timezoneWindows: "Singapore Standard Time"

- name: Configure local git mirrors
run: |
/gitmirror/scripts/trigger_update_mirrors.sh
/gitmirror/scripts/git_config.sh

- name: "Checking out repository"
env:
BRANCH_NAME: ${{ github.ref }}
REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }}
run: |
git init
git remote add origin $REPO_ADDRESS
git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME
git reset --hard FETCH_HEAD
git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10

- name: Install deps
run: |
dnf install -y almalinux-release-devel epel-release
yum remove -y openssl-devel zlib-devel || true
yum install -y protobuf-devel protobuf-compiler tmate

- name: Python deps
run: |
pip install "numpy<2" pyyaml "pybind11[global]==2.10.3" nanobind

- name: Enable cache
uses: actions/cache/restore@v3
with:
path: ${{ env.CACHE_DIR }}
key: ${{ env.CACHE_KEY }}
restore-keys: linux-build-test-cpp-

- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: linux_hsa_x86_64_release_packages

- name: Extract artifact
run: |
tar -xvf rocr-*.tar
echo "hsa-runtime64_ROOT=$PWD/rocr-install" >> $GITHUB_ENV
echo IREE_EXTERNAL_HAL_DRIVER=hsa >> $GITHUB_ENV

- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
export CCACHE_COMPILERCHECK="string:$(clang --version)"
bash build_tools/ci/build_test_cpp.sh

- name: Create artifacts
if: ${{ !cancelled() }}
run: |
tar cf iree-dist-linux.tar iree-install

- name: Upload artifacts
uses: actions/upload-artifact@v4
if: ${{ !cancelled() }}
with:
name: linux_x86_64_release_packages
path: iree-dist-linux.tar
if-no-files-found: warn

- name: Save cache
uses: actions/cache/save@v3
if: ${{ !cancelled() && github.event_name == 'push' && github.ref_name == 'main' }}
with:
path: ${{ env.CACHE_DIR }}
key: ${{ env.CACHE_KEY }}

test_linux:
name: E2E Test linux with HSA
needs: build_and_ctest
strategy:
fail-fast: false
matrix:
runs-on: [linux-phoenix]
runs-on: ${{ matrix.runs-on }}
env:
XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic
steps:
- name: "Checking out repository" # for test scripts
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
submodules: false # not required for testbench

- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: linux_x86_64_release_packages

- name: Extract artifact
run: |
tar -xvf iree-dist-linux.tar
bash build_tools/download_peano.sh

- name: Create venv and install dependencies
run: |
python3 -m venv .venv
source .venv/bin/activate
pip install -r tests/matmul/requirements.txt

- name: E2E correctness matmul test
run: |
source .venv/bin/activate
# Without this additional line an error like
#
# [XRT] ERROR: Failed to allocate host memory buffer (mmap(len=10616832, prot=3, flags=8193, offset=4294967296)
# failed (err=11): Resource temporarily unavailable), make sure host bank is enabled (see xbutil configure --host-mem)
# iree-amd-aie/runtime/src/iree-amd-aie/driver/xrt/direct_allocator.cc:179: RESOURCE_EXHAUSTED; could not allocate
# memory for buffer; while invoking C++ function matmul_test.generate_random_matrix; while calling import;
#
# might be observed when too much memory is allocated. For example this
# error was seen when running a bf16->f32 matmul with m=n=k=2304.
#
# This line was suggested at https://github.com/Xilinx/mlir-air/issues/566
#
# Note that this is only half of the fix. It is also necessary that
# the machine that CI is running on has permission to run this line.
#
# This permission can be adding by adding the line
# ```
# %github ALL=(ALL) NOPASSWD: /usr/bin/prlimit *
# ```
#
# to the file /etc/sudoers.d/github, which can be done by running
# ```
# sudo visudo -f /etc/sudoers.d/github
# ```
# on the guthub CI machine.
sudo prlimit -lunlimited --pid $$

source /opt/xilinx/xrt/setup.sh
bash build_tools/ci/run_matmul_test.sh \
test_matmuls \
iree-install \
$PWD/llvm-aie \
/opt/xilinx/xrt \
/opt/Xilinx/Vitis/2024.2


- name : Smoke E2E comparison flag test
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
python3 build_tools/ci/cpu_comparison/run_test.py \
test_aie_vs_cpu \
iree-install \
$PWD/llvm-aie \
--xrt-dir /opt/xilinx/xrt \
--test-set='Smoke' \
--do-not-run-aie

# Assert that output.log is empty (because verbose=0)
if [ -s output.log ]; then
echo "output.log is not empty:"
cat output.log
exit 1
else
echo "output.log is empty"
fi

- name : E2E comparison of AIE to llvm-cpu
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
python3 build_tools/ci/cpu_comparison/run_test.py \
test_aie_vs_cpu \
$PWD/iree-install \
$PWD/llvm-aie \
--xrt-dir /opt/xilinx/xrt \
--vitis-dir /opt/Xilinx/Vitis/2024.2 \
--reset-npu-between-runs -v

- name: Printing IR from aie2xclbin
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \
iree-install \
print_ir_aie2xclbin_results \
$PWD/llvm-aie
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@
url = https://github.com/iree-org/iree.git
shallow = true
ignore = dirty
[submodule "third_party/ROCR-Runtime"]
path = third_party/ROCR-Runtime
url = https://github.com/nod-ai/ROCR-Runtime.git
shallow = true
branch = iree-aie
2 changes: 1 addition & 1 deletion build_tools/build_test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ if [[ "$OSTYPE" == "linux-gnu"* ]]; then
-DCMAKE_CXX_COMPILER="${CXX}" \
-DLLVM_TARGET_ARCH=X86 \
-DLLVM_TARGETS_TO_BUILD=X86 \
-DIREE_EXTERNAL_HAL_DRIVERS=xrt \
-DIREE_EXTERNAL_HAL_DRIVERS=${IREE_EXTERNAL_HAL_DRIVER:-xrt} \
-S $iree_dir -B $build_dir
elif [[ "$OSTYPE" == "darwin"* ]]; then
cmake $CMAKE_ARGS \
Expand Down
54 changes: 54 additions & 0 deletions build_tools/ci/build_roct_rocr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

set -eux -o errtrace

this_dir="$(cd $(dirname $0) && pwd)"
repo_root="$(cd $this_dir/../.. && pwd)"

roct_dir="$(cd $repo_root/third_party/ROCT-Thunk-Interface && pwd)"
rocr_dir="$(cd $repo_root/third_party/ROCR-Runtime && pwd)"

build_roct_dir="$repo_root/roct-build"
roct_install_dir="$repo_root/roct-install"
mkdir -p "$build_roct_dir"
build_roct_dir="$(cd $build_roct_dir && pwd)"

build_rocr_dir="$repo_root/rocr-build"
rocr_install_dir="$repo_root/rocr-install"
mkdir -p "$build_rocr_dir"
build_rocr_dir="$(cd $build_rocr_dir && pwd)"

cache_dir="${cache_dir:-}"

if [ -z "${cache_dir}" ]; then
cache_dir="${repo_root}/.build-cache"
mkdir -p "${cache_dir}"
cache_dir="$(cd ${cache_dir} && pwd)"
fi
echo "Caching to ${cache_dir}"
mkdir -p "${cache_dir}/ccache"

if [[ "$OSTYPE" == "msys"* ]]; then
export CC=clang-cl.exe
export CXX=clang-cl.exe
fi
export CCACHE_DIR="${cache_dir}/ccache"
export CCACHE_MAXSIZE="700M"
export CMAKE_C_COMPILER_LAUNCHER=ccache
export CMAKE_CXX_COMPILER_LAUNCHER=ccache

cd $roct_dir
cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$roct_install_dir" \
-S "$roct_dir" -B "$build_roct_dir"
cmake --build "$build_roct_dir" --target install

cd $rocr_dir
cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \
-DCMAKE_PREFIX_PATH="$roct_install_dir" \
-DIMAGE_SUPPORT=OFF \
-S "$rocr_dir/src" -B "$build_rocr_dir"
cmake --build "$build_rocr_dir" --target install
11 changes: 11 additions & 0 deletions iree_runtime_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,16 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
include(iree_aie_bootgen)
endif()

set(IREE_AMD_AIE_ENABLE_HSA_DRIVER OFF)
if("hsa" IN_LIST IREE_EXTERNAL_HAL_DRIVERS)
message(STATUS "Enabling HSA build because it is an enabled HAL driver")
set(IREE_AMD_AIE_ENABLE_HSA_DRIVER ON)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
find_package(hsa-runtime64 CONFIG REQUIRED
NAMES hsa-runtime64 hsa_runtime64)
endif()

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/runtime/src AMD-AIE)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/experimental AMD-AIE-experimental)
Loading
Loading