Skip to content

Fix GHA caches

Fix GHA caches #288

Workflow file for this run

name: CI Linux
on:
workflow_call:
workflow_dispatch:
pull_request:
merge_group:
push:
branches:
- main
schedule:
# At minute 0 past every 6th hour. (see https://crontab.guru)
# this job is to keep the ccache cache warm
- cron: '0 */6 * * *'
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit).
group: ci-build-test-cpp-linux-${{ github.event.number || github.sha }}
cancel-in-progress: true
permissions:
contents: read
# required to delete cache
# https://docs.github.com/en/rest/actions/cache?apiVersion=2022-11-28#delete-github-actions-caches-for-a-repository-using-a-cache-key
actions: write
jobs:
build_and_ctest:
name: Build and Test (linux, ASSERTIONS)
runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64
strategy:
fail-fast: true
env:
CACHE_DIR: ${{ github.workspace }}/.container-cache
# either the PR number or `branch-N` where N always increments
CACHE_KEY: linux-build-test-cpp-asserts-manylinux-v2-${{ github.event.number || format('{0}-{1}', github.ref_name, github.run_number) }}
steps:
- name: Set unified TZ
uses: szenius/[email protected]
with:
# this is an arbitrary choice
timezoneLinux: "Asia/Singapore"
timezoneMacos: "Asia/Singapore"
timezoneWindows: "Singapore Standard Time"
- name: Configure local git mirrors
run: |
/gitmirror/scripts/trigger_update_mirrors.sh
/gitmirror/scripts/git_config.sh
- name: "Checking out repository"
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
submodules: recursive
- name: Install deps
run: |
yum remove -y openssl-devel zlib-devel || true
dnf install -y almalinux-release-devel
type -p yum-config-manager >/dev/null || yum -y install yum-utils
yum-config-manager -y --add-repo https://cli.github.com/packages/rpm/gh-cli.repo
yum install -y protobuf-devel protobuf-compiler gh
- name: Sync source deps
run: |
python ./sync_deps.py
- name: Python deps
run: |
pip install "numpy<2" pyyaml "pybind11[global]==2.10.3" nanobind
- name: Enable cache
uses: actions/cache/restore@v3
with:
path: ${{ env.CACHE_DIR }}
key: ${{ env.CACHE_KEY }}
restore-keys: linux-build-test-cpp-
- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
bash build_tools/ci/build_test_cpp.sh
- name: Create artifacts
if: ${{ !cancelled() }}
run: |
tar cf iree-dist-linux.tar -C iree-install .
- name: Upload artifacts
uses: actions/upload-artifact@v4
if: ${{ !cancelled() }}
with:
name: linux_x86_64_release_packages
path: iree-dist-linux.tar
if-no-files-found: warn
- name: Overwrite cache
if: ${{ !cancelled() }}
continue-on-error: true
env:
# required to run gh with privileges
GH_TOKEN: ${{ github.token }}
run: |
gh extension install actions/gh-actions-cache
gh actions-cache delete ${{ env.CACHE_KEY }} --confirm -R ${{ github.repository }}
- name: Save cache
uses: actions/cache/save@v3
if: ${{ !cancelled() }}
with:
path: ${{ env.CACHE_DIR }}
key: ${{ env.CACHE_KEY }}
test_linux:
name: E2E Test linux
needs: build_and_ctest
strategy:
fail-fast: false
matrix:
runs-on: [linux-phoenix]
runs-on: ${{ matrix.runs-on }}
env:
XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic
steps:
- name: "Checking out repository" # for test scripts
uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0
with:
submodules: false # not required for testbench
- name: Download artifacts
uses: actions/download-artifact@v4
with:
name: linux_x86_64_release_packages
- name: Extract artifact
run: |
mkdir iree-install
tar -xf iree-dist-linux.tar -C iree-install
bash build_tools/download_peano.sh
- name: Create venv and install dependencies
run: |
python3 -m venv .venv
source .venv/bin/activate
pip install -r tests/matmul/requirements.txt
- name: E2E correctness matmul test
run: |
source .venv/bin/activate
# Without this additional line an error like
#
# [XRT] ERROR: Failed to allocate host memory buffer (mmap(len=10616832, prot=3, flags=8193, offset=4294967296)
# failed (err=11): Resource temporarily unavailable), make sure host bank is enabled (see xbutil configure --host-mem)
# iree-amd-aie/runtime/src/iree-amd-aie/driver/xrt/direct_allocator.cc:179: RESOURCE_EXHAUSTED; could not allocate
# memory for buffer; while invoking C++ function matmul_test.generate_random_matrix; while calling import;
#
# might be observed when too much memory is allocated. For example this
# error was seen when running a bf16->f32 matmul with m=n=k=2304.
#
# This line was suggested at https://github.com/Xilinx/mlir-air/issues/566
#
# Note that this is only half of the fix. It is also necessary that
# the machine that CI is running on has permission to run this line.
#
# This permission can be adding by adding the line
# ```
# %github ALL=(ALL) NOPASSWD: /usr/bin/prlimit *
# ```
#
# to the file /etc/sudoers.d/github, which can be done by running
# ```
# sudo visudo -f /etc/sudoers.d/github
# ```
# on the guthub CI machine.
sudo prlimit -lunlimited --pid $$
source /opt/xilinx/xrt/setup.sh
bash build_tools/ci/run_matmul_test.sh \
test_matmuls \
iree-install \
$PWD/llvm-aie \
/opt/xilinx/xrt \
/opt/Xilinx/Vitis/2024.2
- name : Smoke E2E comparison flag test
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
python3 build_tools/ci/cpu_comparison/run_test.py \
test_aie_vs_cpu \
iree-install \
$PWD/llvm-aie \
--xrt-dir /opt/xilinx/xrt \
--test-set='Smoke' \
--do-not-run-aie
# Assert that output.log is empty (because verbose=0)
if [ -s output.log ]; then
echo "output.log is not empty:"
cat output.log
exit 1
else
echo "output.log is empty"
fi
- name : E2E comparison of AIE to llvm-cpu
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
python3 build_tools/ci/cpu_comparison/run_test.py \
test_aie_vs_cpu \
$PWD/iree-install \
$PWD/llvm-aie \
--xrt-dir /opt/xilinx/xrt \
--vitis-dir /opt/Xilinx/Vitis/2024.2 \
--reset-npu-between-runs -v
- name: Printing IR from aie2xclbin
run: |
source .venv/bin/activate
source /opt/xilinx/xrt/setup.sh
bash build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh \
iree-install \
print_ir_aie2xclbin_results \
$PWD/llvm-aie