diff --git a/.github/workflows/ci-linux-san.yml b/.github/workflows/ci-linux-san.yml new file mode 100644 index 000000000..94ccf4400 --- /dev/null +++ b/.github/workflows/ci-linux-san.yml @@ -0,0 +1,190 @@ +name: CI Linux Sanitizers + +on: + workflow_call: + workflow_dispatch: + pull_request: + merge_group: + push: + branches: + - main + +concurrency: + # A PR number if a pull request and otherwise the commit hash. This cancels + # queued and in-progress runs for the same PR (presubmit) or commit + # (postsubmit). + group: ci-build-test-cpp-linux-sanitizers-${{ github.event.number || github.sha }} + cancel-in-progress: true + +jobs: + build_and_ctest: + name: Build and Test + runs-on: nod-ai-shared-cpubuilder-manylinux-x86_64 + strategy: + fail-fast: false + matrix: + san: [ASAN, MSAN, TSAN, UBSAN] + env: + CACHE_DIR: ${{ github.workspace }}/.container-cache + # either the PR number or `branch-N` where N always increments + CACHE_KEY: linux-build-test-cpp-asserts-manylinux-v2-${{ format('{0}-{1}', github.ref_name, github.run_number) }} + steps: + - name: Set unified TZ + uses: szenius/set-timezone@v2.0 + with: + # this is an arbitrary choice + timezoneLinux: "Asia/Singapore" + timezoneMacos: "Asia/Singapore" + timezoneWindows: "Singapore Standard Time" + + - name: Configure local git mirrors + run: | + /gitmirror/scripts/trigger_update_mirrors.sh + /gitmirror/scripts/git_config.sh + + - name: "Checking out repository" + env: + BRANCH_NAME: ${{ github.ref }} + REPO_ADDRESS: ${{ github.server_url }}/${{ github.repository }} + run: | + git init + git remote add origin $REPO_ADDRESS + git -c protocol.version=2 fetch --depth 1 origin $BRANCH_NAME + git reset --hard FETCH_HEAD + git -c submodule."third_party/torch-mlir".update=none -c submodule."third_party/stablehlo".update=none -c submodule."src/runtime_src/core/common/aiebu".update=none submodule update --init --recursive --depth 1 --single-branch -j 10 + + - name: Install deps + run: | + dnf install -y almalinux-release-devel epel-release + yum remove -y openssl-devel zlib-devel || true + yum install -y protobuf-devel protobuf-compiler tmate + + - name: Python deps + run: | + pip install "numpy<2" pyyaml "pybind11[global]==2.10.3" nanobind pytest + + - name: Run Pytest + run: | + pytest build_tools/ci + + - name: Enable cache + uses: actions/cache/restore@v3 + with: + path: ${{ env.CACHE_DIR }} + key: ${{ env.CACHE_KEY }} + restore-keys: linux-build-test-cpp- + + - name: Build packages + run: | + export cache_dir="${{ env.CACHE_DIR }}" + export CCACHE_COMPILERCHECK="string:$(clang --version)" + bash build_tools/build_llvm.sh + rm -rf llvm-build + export llvm_install_dir=$PWD/llvm-install + export san=${{ matrix.san }} + bash build_tools/build_test_cpp.sh + + - name: Create artifacts + if: ${{ !cancelled() }} + run: | + pushd third_party/iree/third_party/llvm-project && llvm_sha_short=$(git rev-parse --short HEAD) && popd + tar cf llvm-dist-linux-$llvm_sha_short.tar llvm-install + tar cf iree-dist-linux.tar iree-install + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: linux_x86_64_llvm_packages + path: llvm-dist-*.tar + if-no-files-found: warn + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: linux_${{ matrix.san }}_x86_64_iree_packages + path: iree-dist-linux.tar + if-no-files-found: warn + + test_linux: + name: E2E Test + needs: build_and_ctest + strategy: + fail-fast: false + matrix: + san: [ASAN, MSAN, TSAN, UBSAN] + runs-on: linux-phoenix + env: + XILINXD_LICENSE_FILE: /opt/xilinx/Xilinx.lic + steps: + - name: "Checking out repository" # for test scripts + uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + with: + submodules: false # not required for testbench + + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + name: linux_${{ matrix.san }}_x86_64_iree_packages + + - name: Extract artifact + run: | + tar -xvf iree-dist-linux.tar + bash build_tools/download_peano.sh + + - name: Create venv and install dependencies + run: | + python3 -m venv .venv + source .venv/bin/activate + pip install -r build_tools/ci/requirements.txt + + + - name : E2E comparison of AIE to llvm-cpu + run: | + source .venv/bin/activate + source /opt/xilinx/xrt/setup.sh + python3 build_tools/ci/cpu_comparison/run.py \ + test_aie_vs_cpu \ + $PWD/iree-install \ + $PWD/llvm-aie \ + --xrt-dir /opt/xilinx/xrt \ + --vitis-dir /opt/Xilinx/Vitis/2024.2 \ + --reset-npu-between-runs -v + + - name: E2E correctness matmul test + run: | + # Without this additional line an error like + # + # [XRT] ERROR: Failed to allocate host memory buffer (mmap(len=10616832, prot=3, flags=8193, offset=4294967296) + # failed (err=11): Resource temporarily unavailable), make sure host bank is enabled (see xbutil configure --host-mem) + # iree-amd-aie/runtime/src/iree-amd-aie/driver/xrt/direct_allocator.cc:179: RESOURCE_EXHAUSTED; could not allocate + # memory for buffer; while invoking C++ function matmul_test.generate_random_matrix; while calling import; + # + # might be observed when too much memory is allocated. This + # error was seen when running a bf16->f32 matmul with m=n=k=2304. + # + # This line was suggested at https://github.com/Xilinx/mlir-air/issues/566 + # + # Note that this is only half of the fix. It is also necessary that + # the machine that CI is running on has permission to run this line. + # + # This permission can be adding by adding the line + # ``` + # %github ALL=(ALL) NOPASSWD: /usr/bin/prlimit * + # ``` + # + # to the file /etc/sudoers.d/github, which can be done by running + # ``` + # sudo visudo -f /etc/sudoers.d/github + # ``` + # on the github CI machine. + sudo prlimit -lunlimited --pid $$ + source .venv/bin/activate + source /opt/xilinx/xrt/setup.sh + bash build_tools/ci/run_matmul_test.sh \ + test_matmuls \ + iree-install \ + $PWD/llvm-aie \ + /opt/xilinx/xrt \ + /opt/Xilinx/Vitis/2024.2 diff --git a/build_tools/build_test_cpp.sh b/build_tools/build_test_cpp.sh index ddda26a0d..f3867cd60 100644 --- a/build_tools/build_test_cpp.sh +++ b/build_tools/build_test_cpp.sh @@ -82,6 +82,16 @@ CMAKE_ARGS="\ -DCMAKE_OBJECT_PATH_MAX=4096 \ -DIREE_CMAKE_PLUGIN_PATHS=$repo_root" +san="${san:-}" + +if [ "$san" != "" ];then + CMAKE_ARGS="$CMAKE_ARGS -DIREE_ENABLE_$san=ON -DHAVE_STD_REGEX=ON" + export ASAN_OPTIONS=detect_leaks=0 + export MSAN_OPTIONS=halt_on_error=0 + export TSAN_OPTIONS=halt_on_error=0 + export UBSAN_OPTIONS=halt_on_error=0 +fi + if [ -d "${llvm_install_dir}" ]; then CMAKE_ARGS="$CMAKE_ARGS \ -DIREE_BUILD_BUNDLED_LLVM=OFF \ @@ -118,6 +128,13 @@ echo "----------" echo "Install to: $install_dir" cmake --build "$build_dir" --target iree-install-dist +if [ "$san" != "" ];then + export ASAN_OPTIONS=detect_leaks=1:verbosity=1:log_threads=1:print_stacktrace=1 + export MSAN_OPTIONS=halt_on_error=1:verbosity=1:log_threads=1:print_stacktrace=1 + export TSAN_OPTIONS=halt_on_error=1:verbosity=1:log_threads=1:print_stacktrace=1 + export UBSAN_OPTIONS=halt_on_error=1:verbosity=1:log_threads=1:print_stacktrace=1 +fi + echo "CTest" echo "-----" if [[ "$OSTYPE" == "linux-gnu"* ]]; then