diff --git a/.github/workflows/ci-hsa-linux.yml b/.github/workflows/ci-hsa-linux.yml new file mode 100644 index 000000000..a398d1b46 --- /dev/null +++ b/.github/workflows/ci-hsa-linux.yml @@ -0,0 +1,104 @@ +name: CI HSA Linux + +on: + workflow_dispatch: + inputs: + force_debug_with_tmate: + type: boolean + description: 'Run the build with tmate session' + required: false + default: false + debug_with_tmate: + type: boolean + description: 'Run the build with a tmate session ONLY in case of failure' + required: false + default: false + release: + type: boolean + description: 'Release to latest' + required: false + default: false + pull_request: + merge_group: + push: + branches: + - amd-staging + +concurrency: + group: ci-build-test-cpp-hsa-linux-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + build_hsa: + name: Build HSA (linux) + runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64 + steps: + - name: Configure local git mirrors + run: | + /gitmirror/scripts/trigger_update_mirrors.sh + /gitmirror/scripts/git_config.sh + + - name: "Checking out repository" + env: + BRANCH_NAME: ${{ github.ref }} + REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }} + run: | + git init + git remote add origin $REPO_ADDRESS + git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME + git reset --hard FETCH_HEAD + git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 + + - name: Install deps + run: | + dnf install -y almalinux-release-devel + yum install -y elfutils-libelf-devel p7zip p7zip-plugins \ + sudo ncurses-compat-libs openssh vim-common + + - name: Build and install libnuma + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + curl --silent -L \ + https://github.com/numactl/numactl/releases/download/v2.0.18/numactl-2.0.18.tar.gz \ + -o numactl-2.0.18.tar.gz + tar -xf numactl-2.0.18.tar.gz + pushd numactl-2.0.18 + ./configure + # i have no idea why this is necessary + # but without it you get something about "can't cd into dir" + sed -i '7563s/`cd "$dir" && pwd`/$dir/g' libtool + make install + popd + + - name: Hack ROCR + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/blit_shaders/CMakeLists.txt + sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/trap_handler/CMakeLists.txt + sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/image/blit_src/CMakeLists.txt + + - name: Get Clang + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + pip download mlir==20.0.0.2024090301+amdgpu.b6597f52 -f https://makslevental.github.io/wheels + unzip -q mlir-*.whl + + - name: Build ROCR distro + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + rocr_dir="$PWD" + build_rocr_dir="$PWD/rocr-build" + mkdir -p "$build_rocr_dir" + build_rocr_dir="$(cd $build_rocr_dir && pwd)" + rocr_install_dir="$PWD/rocr-install" + + cmake -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \ + -DClang_DIR=$PWD/mlir/lib/cmake/clang \ + -DLLVM_DIR=$PWD/mlir/lib/cmake/mlir \ + -DIMAGE_SUPPORT=OFF \ + -S "$rocr_dir" -B "$build_rocr_dir" + + cmake --build "$build_rocr_dir" --target install + tar -cf rocr-${GITHUB_SHA::8}.tar rocr-install diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index f3b1e83bc..895732a34 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -17,6 +17,88 @@ concurrency: cancel-in-progress: true jobs: + build_hsa: + name: Build HSA (linux) + runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64 + steps: + - name: Configure local git mirrors + run: | + /gitmirror/scripts/trigger_update_mirrors.sh + /gitmirror/scripts/git_config.sh + + - name: "Checking out repository" + env: + BRANCH_NAME: ${{ github.ref }} + REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }} + run: | + git init + git remote add origin $REPO_ADDRESS + git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME + git reset --hard FETCH_HEAD + git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 + + - name: Install deps + run: | + dnf install -y almalinux-release-devel + yum install -y elfutils-libelf-devel p7zip p7zip-plugins \ + sudo ncurses-compat-libs openssh vim-common + + - name: Build and install libnuma + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + curl --silent -L \ + https://github.com/numactl/numactl/releases/download/v2.0.18/numactl-2.0.18.tar.gz \ + -o numactl-2.0.18.tar.gz + tar -xf numactl-2.0.18.tar.gz + pushd numactl-2.0.18 + ./configure + # i have no idea why this is necessary + # but without it you get something about "can't cd into dir" + sed -i '7563s/`cd "$dir" && pwd`/$dir/g' libtool + make install + popd + + - name: Hack ROCR + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/blit_shaders/CMakeLists.txt + sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/core/runtime/trap_handler/CMakeLists.txt + sed -i 's/amdgcn-amd-amdhsa/amdgcn-amd-amdhsa -nogpulib/g' runtime/hsa-runtime/image/blit_src/CMakeLists.txt + + - name: Get compatible Clang + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + pip download mlir==20.0.0.2024090301+amdgpu.b6597f52 -f https://makslevental.github.io/wheels + unzip -q mlir-*.whl + + - name: Build ROCR distro + working-directory: ${{ github.workspace }}/third_party/ROCR-Runtime + run: | + rocr_dir="$PWD" + build_rocr_dir="$PWD/rocr-build" + mkdir -p "$build_rocr_dir" + build_rocr_dir="$(cd $build_rocr_dir && pwd)" + rocr_install_dir="$PWD/rocr-install" + + cmake -GNinja \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \ + -DClang_DIR=$PWD/mlir/lib/cmake/clang \ + -DLLVM_DIR=$PWD/mlir/lib/cmake/mlir \ + -DIMAGE_SUPPORT=OFF \ + -S "$rocr_dir" -B "$build_rocr_dir" + + cmake --build "$build_rocr_dir" --target install + tar -cf rocr-${GITHUB_SHA::8}.tar rocr-install + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: linux_hsa_x86_64_release_packages + path: ${{ github.workspace }}/third_party/ROCR-Runtime/rocr-*.tar + if-no-files-found: error + build_and_ctest: name: Build and Test (linux, ASSERTIONS) runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64 @@ -55,7 +137,7 @@ jobs: run: | dnf install -y almalinux-release-devel epel-release yum remove -y openssl-devel zlib-devel || true - yum install -y protobuf-devel protobuf-compiler libnuma-devel tmate + yum install -y protobuf-devel protobuf-compiler tmate - name: Python deps run: | @@ -68,10 +150,14 @@ jobs: key: ${{ env.CACHE_KEY }} restore-keys: linux-build-test-cpp- - - name: Build ROCT/ROCR + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: linux_hsa_x86_64_release_packages + + - name: Extract artifact run: | - export cache_dir="${{ env.CACHE_DIR }}" - bash build_tools/ci/build_roct_rocr.sh + tar -xvf rocr-*.tar echo "hsa-runtime64_ROOT=$PWD/rocr-install" >> $GITHUB_ENV - name: Build packages