diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 100ea0eca..968cf4630 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -9,6 +9,11 @@ on: branches: - main + schedule: + # At minute 0 past every 6th hour. (see https://crontab.guru) + # this job is to keep the ccache cache warm + - cron: '0 */6 * * *' + concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels # queued and in-progress runs for the same PR (presubmit) or commit @@ -24,6 +29,8 @@ jobs: fail-fast: true env: CACHE_DIR: ${{ github.workspace }}/.container-cache + # either the PR number or `branch-N` where N always increments + CACHE_KEY: linux-build-test-cpp-asserts-manylinux-v2-${{ github.event.number || format('{0}-{1}', github.ref_name, github.run_number) }} steps: - name: Set unified TZ uses: szenius/set-timezone@v2.0 @@ -42,11 +49,11 @@ jobs: with: submodules: recursive - - name: Install static libs + - name: Install deps run: | - dnf install -y almalinux-release-devel yum remove -y openssl-devel zlib-devel || true yum install -y protobuf-devel protobuf-compiler + - name: Sync source deps run: | python ./sync_deps.py @@ -59,12 +66,13 @@ jobs: uses: actions/cache/restore@v3 with: path: ${{ env.CACHE_DIR }} - key: linux-build-test-cpp-asserts-manylinux-v2-${{ github.sha }} + key: ${{ env.CACHE_KEY }} restore-keys: linux-build-test-cpp- - name: Build packages run: | export cache_dir="${{ env.CACHE_DIR }}" + export CCACHE_COMPILERCHECK="string:$(clang --version)" bash build_tools/ci/build_test_cpp.sh - name: Create artifacts @@ -82,10 +90,10 @@ jobs: - name: Save cache uses: actions/cache/save@v3 - if: ${{ !cancelled() }} + if: ${{ !cancelled() && github.event_name != 'pull_request' }} with: path: ${{ env.CACHE_DIR }} - key: linux-build-test-cpp-asserts-manylinux-v2-${{ github.sha }} + key: ${{ env.CACHE_KEY }} test_linux: name: E2E Test linux diff --git a/.github/workflows/ci-macos.yml b/.github/workflows/ci-macos.yml index eda8ee471..cf05c9376 100644 --- a/.github/workflows/ci-macos.yml +++ b/.github/workflows/ci-macos.yml @@ -9,10 +9,10 @@ on: branches: - main + schedule: + - cron: '0 */6 * * *' + concurrency: - # A PR number if a pull request and otherwise the commit hash. This cancels - # queued and in-progress runs for the same PR (presubmit) or commit - # (postsubmit). group: ci-build-test-cpp-macos-${{ github.event.number || github.sha }} cancel-in-progress: true @@ -26,6 +26,7 @@ jobs: runs-on: [macos-12, macos-14] env: CACHE_DIR: ${{ github.workspace }}/.container-cache + CACHE_KEY: ${{ matrix.runs-on }}-build-test-cpp-asserts-v1-${{ github.event.number || format('{0}-{1}', github.ref_name, github.run_number) }} steps: - name: Set unified TZ uses: szenius/set-timezone@v2.0 @@ -60,22 +61,19 @@ jobs: uses: actions/cache/restore@v3 with: path: ${{ env.CACHE_DIR }} - # without datetime stamps you'll get collisions for the cache warming runs - # ("Failed to save: Unable to reserve cache with key ..., another job may be creating this cache.") - key: ${{ matrix.runs-on }}-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }} + key: ${{ env.CACHE_KEY }} restore-keys: ${{ matrix.runs-on }}-build-test-cpp- - name: Build packages run: | export cache_dir="${{ env.CACHE_DIR }}" + export CCACHE_COMPILERCHECK="string:$(clang --version)" bash build_tools/ci/build_test_cpp.sh - name: Create artifacts if: ${{ !cancelled() }} run: | - rm -f iree-install/bin/clang* - rm -f iree-install/bin/llvm-link* - tar cf iree-dist-${{ matrix.runs-on }}.tar -C iree-install . -C ../iree-build tools/testing/e2e/iree-e2e-matmul-test + tar cf iree-dist-${{ matrix.runs-on }}.tar -C iree-install . - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -87,7 +85,7 @@ jobs: - name: Save cache uses: actions/cache/save@v3 - if: ${{ !cancelled() }} + if: ${{ !cancelled() && github.event_name != 'pull_request' }} with: path: ${{ env.CACHE_DIR }} - key: ${{ matrix.runs-on }}-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }} + key: ${{ env.CACHE_KEY }} diff --git a/.github/workflows/ci-windows.yml b/.github/workflows/ci-windows.yml index 4804d14eb..de71a4f9a 100644 --- a/.github/workflows/ci-windows.yml +++ b/.github/workflows/ci-windows.yml @@ -10,14 +10,9 @@ on: - main schedule: - # At minute 0 past every 12th hour. (see https://crontab.guru) - # this job is to keep the ccache cache warm - - cron: '0 */12 * * *' + - cron: '0 */6 * * *' concurrency: - # A PR number if a pull request and otherwise the commit hash. This cancels - # queued and in-progress runs for the same PR (presubmit) or commit - # (postsubmit). group: ci-build-test-cpp-windows-${{ github.event.number || github.sha }} cancel-in-progress: true @@ -34,6 +29,7 @@ jobs: fail-fast: true env: CACHE_DIR: ${{ github.workspace }}/.container-cache + CACHE_KEY: windows-build-test-cpp-asserts-v1-${{ github.event.number || format('{0}-{1}', github.ref_name, github.run_number) }} steps: - name: Set unified TZ uses: szenius/set-timezone@v2.0 @@ -73,14 +69,13 @@ jobs: uses: actions/cache/restore@v3 with: path: ${{ env.CACHE_DIR }} - # without datetime stamps you'll get collisions for the cache warming runs - # ("Failed to save: Unable to reserve cache with key ..., another job may be creating this cache.") - key: windows-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }} + key: ${{ env.CACHE_KEY }} restore-keys: windows-build-test-cpp- - name: Build packages run: | export cache_dir="${{ env.CACHE_DIR }}" + export CCACHE_COMPILERCHECK="string:$(clang-cl.exe --version)" bash build_tools/ci/build_test_cpp.sh - name: Create artifacts @@ -98,10 +93,10 @@ jobs: - name: Save cache uses: actions/cache/save@v3 - if: ${{ !cancelled() }} + if: ${{ !cancelled() && github.event_name != 'pull_request' }} with: path: ${{ env.CACHE_DIR }} - key: windows-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }} + key: ${{ env.CACHE_KEY }} test_windows: name: E2E Test windows diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh index 65c61332a..0382601aa 100644 --- a/build_tools/ci/build_test_cpp.sh +++ b/build_tools/ci/build_test_cpp.sh @@ -36,10 +36,12 @@ elif [[ "$OSTYPE" == "msys"* ]]; then export CC=clang-cl.exe export CXX=clang-cl.exe fi + export CCACHE_DIR="${cache_dir}/ccache" export CCACHE_MAXSIZE="700M" export CMAKE_C_COMPILER_LAUNCHER=ccache export CMAKE_CXX_COMPILER_LAUNCHER=ccache +export CCACHE_SLOPPINESS=include_file_ctime,include_file_mtime,time_macros # Clear ccache stats. ccache -z @@ -64,6 +66,7 @@ CMAKE_ARGS="\ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$install_dir \ -DCMAKE_INSTALL_LIBDIR=lib \ + -DIREE_ERROR_ON_MISSING_SUBMODULES=OFF \ -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_SAMPLES=OFF \ -DIREE_BUILD_PYTHON_BINDINGS=ON \ @@ -109,15 +112,12 @@ echo "-----" if [[ "$OSTYPE" == "linux-gnu"* ]]; then ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j elif [[ "$OSTYPE" == "darwin"* ]]; then - ctest --test-dir "$build_dir" -R amd-aie -E "pack_peel_pipeline_matmul|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5 + ctest --test-dir "$build_dir" -R amd-aie -E "matmul_pack_peel_air_e2e|matmul_elementwise_pack_peel_air_e2e|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5 elif [[ "$OSTYPE" == "msys"* ]]; then # hack while windows is flaky to get past failing tests ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j --repeat until-pass:5 fi -# Show ccache stats. -ccache --show-stats - rm -f "$install_dir"/bin/clang* rm -f "$install_dir"/bin/llvm-link* cp "$build_dir"/tools/testing/e2e/iree-e2e-matmul-test "$install_dir"/bin diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py index 3ff4a05a5..09223a36e 100755 --- a/build_tools/ci/cpu_comparison/run_test.py +++ b/build_tools/ci/cpu_comparison/run_test.py @@ -627,7 +627,7 @@ def run(self, config): test_name = output_dir / "test_from_template_full_bias.mlir" template_name = matmul_template_dir / "matmul_bias_MxK_KxN_MxN.mlir" generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32") - aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0) + aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", rtol=0, atol=0) if config.xdna_datetime and config.xdna_datetime < 20240801: for name in [ @@ -657,10 +657,10 @@ def run(self, config): ) if config.vitis_dir: aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=True + config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=True ) aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=False + config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=False ) diff --git a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh index edc70df08..39ea12f1d 100755 --- a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh +++ b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh @@ -9,25 +9,17 @@ set -euo pipefail # Check for the number of provided arguments -if [ "$#" -ne 3 ] && [ "$#" -ne 5 ]; then +if [ "$#" -ne 3 ]; then echo -e "Illegal number of parameters: $#." \ "\n For 2 parameters:" \ "\n 1) " \ "\n 2) " \ "\n 3) " \ - "\n For 5 parameters:" \ - "\n 1) " \ - "\n 2) " \ - "\n 3) " \ - "\n 4) " \ - "\n 5) " \ "\n Example (dependent on environment variables):" \ "\n ./print_ir_aie2xclbin.sh " \ "\$IREE_BUILD_DIR/tools " \ "results_dir_tmp "\ "\$PEANO_INSTALL_DIR "\ - "/opt/xilinx/xrt "\ - "\$VITIS_INSTALL_PATH" exit 1 fi @@ -39,17 +31,6 @@ mkdir -p ${OUTPUT} if [ "$#" -eq 3 ]; then echo "Assuming that this is the 'CI case' as 3 parameters were provided." PEANO="$3" - XRT=/opt/xilinx/xrt - VITIS=/opt/Xilinx/Vitis/2024.2 -fi - -echo "chess-clang: $(find $VITIS -name chess-clang)" -echo "xchesscc: $(find $VITIS -name xchesscc)" - -# The local set-paths-manually case: -if [ "$#" -eq 5 ]; then - XRT="$4" - VITIS="$5" fi IREE_INSTALL_DIR="$1" @@ -83,15 +64,6 @@ else exit 1 fi -if [ -d "${XRT}" ]; then - XRT=`realpath "${XRT}"` - source $XRT/setup.sh -fi - -if [ -d "${VITIS}" ]; then - VITIS=${VITIS} -fi - # There might be a FileCheck program in the IREE_INSTALL_DIR. Check. # Do not fail if it is not there, we can also check if it already on PATH. if [ -x "${IREE_INSTALL_DIR}/bin/FileCheck" ]; then @@ -113,7 +85,6 @@ ${SOURCE_MLIR_FILE} \ --iree-hal-target-backends=amd-aie \ --iree-amd-aie-peano-install-dir=${PEANO} \ --iree-amd-aie-install-dir=${IREE_INSTALL_DIR} \ ---iree-amd-aie-vitis-install-dir=${VITIS} \ --iree-hal-dump-executable-files-to=${OUTPUT} \ --aie2xclbin-print-ir-after-all \ --aie2xclbin-print-ir-before-all \ @@ -122,7 +93,6 @@ ${SOURCE_MLIR_FILE} \ --mlir-print-ir-after-all \ --mlir-print-ir-module-scope \ --mlir-disable-threading \ ---iree-amdaie-tile-pipeline=pad-pack \ -o ${OUTPUT}/test_artefact.vmfb \ --iree-amd-aie-show-invoked-commands" @@ -176,9 +146,9 @@ IREE_COMPILE_COMMAND="${IREE_COMPILE_EXE} \ ${SOURCE_MLIR_FILE} \ --compile-mode=hal-executable \ --iree-hal-target-backends=amd-aie \ +--iree-amdaie-lower-to-aie-pipeline=air \ --iree-amd-aie-peano-install-dir=${PEANO} \ --iree-amd-aie-install-dir=${IREE_INSTALL_DIR} \ ---iree-amd-aie-vitis-install-dir=${VITIS} \ --iree-hal-dump-executable-intermediates-to=${OUTPUT} \ --iree-hal-dump-executable-files-to=${OUTPUT} \ --mlir-disable-threading \ @@ -199,9 +169,9 @@ IREE_COMPILE_COMMAND="${IREE_COMPILE_EXE} \ ${SOURCE_MLIR_FILE} \ --compile-mode=hal-executable \ --iree-hal-target-backends=amd-aie \ +--iree-amdaie-lower-to-aie-pipeline=air \ --iree-amd-aie-peano-install-dir=${PEANO} \ --iree-amd-aie-install-dir=${IREE_INSTALL_DIR} \ ---iree-amd-aie-vitis-install-dir=${VITIS} \ --iree-hal-dump-executable-intermediates-to=${OUTPUT} \ --iree-hal-dump-executable-files-to=${OUTPUT} \ --mlir-disable-threading \ diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index ad7bead47..32fcac59f 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -536,74 +536,29 @@ run_matmul_test \ --use_ukernel "0" \ --num_repeat_runs "2" +################################################################### +# MLIR-AIR Matmul tests +################################################################### + if [ -d "$VITIS" ]; then run_matmul_test \ --name_prefix "ukern" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --m "256" --k "256" --n "256" \ --use_ukernel "1" fi -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# -# run_matmul_test \ -# --name_prefix "transpose_int32" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "8" --n "16" --k "32" \ -# --do_transpose_rhs "1" - - -run_matmul_test \ - --name_prefix "transpose_i8_i32" \ - --lhs_rhs_type "i8" \ - --acc_type "i32" \ - --m "16" --n "32" --k "64" \ - --do_transpose_rhs "1" - -run_matmul_test \ - --name_prefix "transpose_bf16" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "256" --n "256" --k "256" \ - --do_transpose_rhs "1" - -# The below matmul case passes with -# tile_sizes = [[1, 1], [0, 0, 250], [1, 1], [0, 0, 2]], packedSizes = [1, 1, 5] -# but fails with tile_sizes = [[1, 1], [0, 0, 200], [1, 1], [0, 0, 1]], packedSizes = [1, 1, 8], -# with the error LLVM ERROR: unable to legalize instruction: %152:_(<2 x s32>) = G_FMUL %148:_, %150:_ (in function: core_0_2) -# The later is what a more vectorization friendly packing looks like so this test is expected failing the test here. -# TODO: check if the test will pass with a more recent llvm-aie and if it doesnt, report it upstream. -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# run_matmul_test \ -# --name_prefix "failure_0" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "1" --n "1" --k "1000" \ -# --expect_compile_failure "1" - -# The below matmul case passes with -# tile_sizes = [52, 52], [0, 0, 63], [26, 26], [0, 0, 3], packedSizes = [2, 2, 7] -# but fails with tile_sizes = [[52, 52], [0, 0, 63], [4, 4], [0, 0, 3]], packedSizes = [4, 4, 7], -# in AIRHerdPlacementPass with the error No valid placement found -# The later is what a more vectorization friendly packing looks like so we are expected failing the test here. -# We should fix this failure. -# run_matmul_test \ -# --name_prefix "failure_0" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "52" --n "52" --k "63" \ -# --expect_compile_failure "1" - # Example of a run with a group of 2+ matmuls. Currently this test is passed # the flag '--num_repeat_runs 0" as there is currently an issue with the runtime if # multiple matmuls are run in the same test. TODO(newling/nmeshram): Document # this issue. run_matmul_test \ --name_prefix "multiple_matmuls" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ --lhs_rhs_type "i32" \ --acc_type "i32" \ --m "512,8,16" \ @@ -612,105 +567,28 @@ run_matmul_test \ --num_repeat_runs "0" run_matmul_test \ - --name_prefix "small" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "16" --n "16" --k "8" - -run_matmul_test \ - --name_prefix "small" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "8" --n "32" --k "16" - -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# run_matmul_test \ -# --name_prefix "small" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "9" --n "7" --k "16" - -run_matmul_test \ - --name_prefix "large" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "64" --n "64" --k "128" - -run_matmul_test \ - --name_prefix "large" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "512" --n "512" --k "512" - -run_matmul_test \ - --name_prefix "int8" \ - --lhs_rhs_type "i8" \ - --acc_type "i32" \ - --m "64" --n "64" --k "64" - -run_matmul_test \ - --name_prefix "bf16_2304" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "128" --n "128" --k "2304" + --name_prefix "transpose_i8_i32" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ + --lhs_rhs_type "i8" \ + --acc_type "i32" \ + --m "16" --n "32" --k "64" \ + --do_transpose_rhs "1" run_matmul_test \ - --name_prefix "packPeel" \ + --name_prefix "packPeel_i32" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ --acc_type "i32" \ --m "64" --n "64" --k "128" -# We're seeing intermittent numerical errors in these 3 tests, -# needs investigation. TODO(newling/yzhang93): Add more info. -# Appears to be only pack-peel pipeline with bf16->f32. -# Using 'num_repeat_runs=0' flag to avoid running the numerical test. -################################################################# - - -# TODO: compilation error with the below test. -# -# error: 'aie.dma_bd' op Cannot give more than 3 dimensions for step sizes and wraps in this tile (got 4 dimensions). -# -# The config generated with the current strategy is: -# -# packing_config = #amdaie.packing_config -# } -run_matmul_test \ - --name_prefix "packPeel" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "64" --n "64" --k "128" \ - --num_repeat_runs "0" - run_matmul_test \ - --name_prefix "packPeelLarge" \ + --name_prefix "packPeel_bf16" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --m "512" --n "512" --k "512" -run_matmul_test \ - --name_prefix "packPeel2304" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "128" --n "128" --k "2304" - - run_matmul_test \ --name_prefix "packPeel_t_bf16" \ --tile_pipeline "pack-peel" \ @@ -719,56 +597,6 @@ run_matmul_test \ --m "128" --n "256" --k "512" \ --do_transpose_rhs "1" -################################################################### - -run_matmul_test \ - --name_prefix "mm2" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "9728" --n "2432" - -run_matmul_test \ - --name_prefix "mm3" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "2432" - -run_matmul_test \ - --name_prefix "mm4" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "7296" - -run_matmul_test \ - --name_prefix "mm5" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "9728" - -run_matmul_test \ - --name_prefix "mm6" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "9728" - -run_matmul_test \ - --name_prefix "mm7" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "2432" - -run_matmul_test \ - --name_prefix "mm8" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "9728" --n "2432" - -run_matmul_test \ - --name_prefix "mm9" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "7296" - ################################################################### # ObjectFifo Matmul tests ################################################################### @@ -805,7 +633,7 @@ i32_shapes_medium=( ) run_matmul_test_on_shapes ${i32_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_i32" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ @@ -820,7 +648,7 @@ if [ "$OSTYPE" != "msys" ]; then fi run_matmul_test_on_shapes ${i32_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_i32" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ @@ -852,7 +680,7 @@ bf16_ukernel_shapes_medium=( ) run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_bf16" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -860,7 +688,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_bf16" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -869,7 +697,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ # i8 Matmul tests. run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_i8" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i8" \ @@ -877,7 +705,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_i8" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i8" \ @@ -886,7 +714,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ if [ -d "$VITIS" ]; then run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_ukern" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -895,7 +723,7 @@ if [ -d "$VITIS" ]; then --use_ukernel "1" run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_ukern" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp index 4ca8649f7..19f9fa9d4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp @@ -64,13 +64,10 @@ void AMDAIELoweringStrategyPass::runOnOperation() { } } - // To simplify development, the number of cores can be passed as a flag during - // compilation. In the future these parameters could be read from file. - struct AIEConfig cfg = {numCores}; for (auto funcOp : moduleOp.getOps()) { // Set the strategy with default heuristics. if (failed(initAIELaunchConfig(funcOp, usePassPipeline, - useLowerToAIEPipeline, cfg))) { + useLowerToAIEPipeline))) { funcOp.emitOpError("failed to have a lowering configuration set for it."); return signalPassFailure(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp index cfd347313..3ffcd6e6f 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp @@ -32,9 +32,9 @@ namespace mlir::iree_compiler::AMDAIE { /// these two access patterns overlap by both accessing elements in range [32, /// 63]. template -LogicalResult checkForNoOverlappingAccessPatterns( - const SmallVector> - &stridedOps) { +LogicalResult checkForContiguousAccessPatterns( + ArrayRef> stridedOps) { + for (auto &&[i, stridedOpAndOffset] : llvm::enumerate(stridedOps)) { DoublyStridedCopyOpInterface stridedOp = stridedOpAndOffset.first; std::optional extent; @@ -45,14 +45,26 @@ LogicalResult checkForNoOverlappingAccessPatterns( } if (!extent) { return stridedOp.emitOpError() - << "non-constant access extent is not supported"; + << "has a non-constant access extent, which is not supported"; } int64_t offset = stridedOpAndOffset.second; - if (i < (stridedOps.size() - 1) && - (offset + extent.value()) > stridedOps[i + 1].second) { - return stridedOp.emitOpError() - << "access pattern of strided operation overlaps with next one, " - "which is not supported for now"; + if (i < (stridedOps.size() - 1)) { + if (offset + extent.value() != stridedOps[i + 1].second) { + // TODO(newling) my understanding from the code is that the link + // operation effectively replaces the cumulative offset of each + // circular_dma_cpy_nd with the differential offset with + // the previous circular_dma_cpy_nd in the 'link' list. + // + // This however is hardcoded to a zero offset (later in the pass where + // discardAllNonZeroOffsets is called, offsets are set to zero). This + // effectively is constraining the link operation to only work with + // contiguous access patterns. + // + // Is this a bug? + return stridedOp.emitOpError() + << "has access pattern of which isn't contiguous with next one " + "-- not currently supported."; + } } } return success(); @@ -81,9 +93,8 @@ LogicalResult createLogicalObjectFifoLink( for (Operation *userOp : logicalObjectFifo->getUsers()) { if (auto stridedOp = dyn_cast(userOp)) { if (lastUserOp && stridedOp->getBlock() != lastUserOp->getBlock()) { - logicalObjectFifo->emitError( - "does have copy-like users not residing in the same block"); - return failure(); + return logicalObjectFifo->emitOpError( + "has copy-like users not residing in the same block"); } auto sourceLogicalObjectFifo = dyn_cast( @@ -130,11 +141,11 @@ LogicalResult createLogicalObjectFifoLink( // Check that access patterns are not overlapping between consumers // respectively producers. if (failed( - checkForNoOverlappingAccessPatterns(ins))) { + checkForContiguousAccessPatterns(ins))) { return failure(); } if (failed( - checkForNoOverlappingAccessPatterns(outs))) { + checkForContiguousAccessPatterns(outs))) { return failure(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp index f8f4773ef..1d564be37 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp @@ -312,8 +312,7 @@ static SmallVector setInnerPermB(bool isMatmulTransposeB) { static LogicalResult setRootConfigForPackPeelPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - LowerToAIEPassPipeline useLowerToAIEPipeline, AIEConfig cfg, - bool isMatmulTransposeB) { + LowerToAIEPassPipeline useLowerToAIEPipeline, bool isMatmulTransposeB) { bool isObjectFifo = useLowerToAIEPipeline == LowerToAIEPassPipeline::ObjectFifo; auto maybePackPeelTiling = @@ -389,7 +388,7 @@ static LogicalResult setRootConfigForPackPeelPipeline( static LogicalResult setRootConfigForPadPackPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AIEConfig cfg, bool isMatmulTransposeB) { + bool isMatmulTransposeB) { auto maybePadPackTiling = ParameterSetting::create( linalgOp, /*isPackPeel=*/false, /*isObjectFifo=*/false); if (failed(maybePadPackTiling)) return failure(); @@ -445,8 +444,7 @@ static LogicalResult setRootConfigForPadPackPipeline( //===----------------------------------------------------------------------===// static LogicalResult setRootConfigForConvDecomposePipeline( - mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AIEConfig cfg) { + mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp) { FailureOr> maybeInstructionSize = getMatmulInstructionSize(linalgOp); int64_t OW = 4; @@ -606,13 +604,13 @@ static bool isMatmulTransposeB(linalg::GenericOp genericOp) { /// transposition. static LogicalResult setTransposeLikeOpRootConfig( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { if (passPipeline == TilePassPipeline::PackPeelPipeline) return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, - useLowerToAIEPipeline, cfg, true); + useLowerToAIEPipeline, true); else if (passPipeline == TilePassPipeline::PadPackPipeline) - return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, cfg, true); + return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, true); return linalgOp.emitError( "Unhandled pass pipeline in setTransposeLikeOpRootConfig."); } @@ -621,17 +619,16 @@ static LogicalResult setTransposeLikeOpRootConfig( // Root Configurations //===----------------------------------------------------------------------===// -static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - linalg::GenericOp genericOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +static LogicalResult setRootConfig( + mlir::FunctionOpInterface entryPointFn, linalg::GenericOp genericOp, + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { assert(!getLoweringConfig(genericOp) && "expected lowering_config is not set"); if (isMatmulTransposeB(genericOp) && succeeded(setTransposeLikeOpRootConfig( - entryPointFn, genericOp, passPipeline, useLowerToAIEPipeline, cfg))) { + entryPointFn, genericOp, passPipeline, useLowerToAIEPipeline))) { return success(); } @@ -640,18 +637,16 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, /// Sets the lowering configuration for dispatch region with root op that /// implements the contraction operation interface. -static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - linalg::ContractionOpInterface contractionOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +static LogicalResult setRootConfig( + mlir::FunctionOpInterface entryPointFn, + linalg::ContractionOpInterface contractionOp, TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { assert(!getLoweringConfig(contractionOp) && "expected lowering_config is not set"); auto linalgOp = cast(contractionOp.getOperation()); if (isa(linalgOp)) { - if (succeeded(setTransposeLikeOpRootConfig(entryPointFn, linalgOp, - passPipeline, - useLowerToAIEPipeline, cfg))) { + if (succeeded(setTransposeLikeOpRootConfig( + entryPointFn, linalgOp, passPipeline, useLowerToAIEPipeline))) { return success(); } return failure(); @@ -672,31 +667,30 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, // approach which will have different tile sizes and pass pipelines if (passPipeline == TilePassPipeline::PackPeelPipeline) return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, - useLowerToAIEPipeline, cfg, false); + useLowerToAIEPipeline, false); if (passPipeline == TilePassPipeline::PadPackPipeline) - return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, cfg, false); + return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, false); return linalgOp.emitError("Unhandled pass pipeline in setRootConfig."); } static LogicalResult setConvRootConfig(mlir::FunctionOpInterface entryPointFn, linalg::ConvolutionOpInterface convOp, - TilePassPipeline passPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline) { assert(!getLoweringConfig(convOp) && "expected lowering_config is not set"); auto linalgOp = cast(convOp.getOperation()); // Current tiling strategy is based on llvm-cpu ConvTileAndDecomposeExpert. if (passPipeline == TilePassPipeline::ConvDecomposePipeline) - return setRootConfigForConvDecomposePipeline(entryPointFn, linalgOp, cfg); + return setRootConfigForConvDecomposePipeline(entryPointFn, linalgOp); return linalgOp.emitError("Unhandled pass pipeline in setConvRootConfig."); } /// Redirects to methods that set the configuration based on operation type. static LogicalResult setRootConfigImpl( mlir::FunctionOpInterface entryPointFn, Operation *op, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { auto setRootConfigFn = [&](Operation *op) -> LogicalResult { return TypeSwitch(op) // TODO (nmeshram): This is very limited for now, plan is to @@ -706,15 +700,15 @@ static LogicalResult setRootConfigImpl( .Case( [&](auto op) { - return setConvRootConfig(entryPointFn, op, passPipeline, cfg); + return setConvRootConfig(entryPointFn, op, passPipeline); }) .Case([&](auto op) { return setRootConfig(entryPointFn, op, passPipeline, - useLowerToAIEPipeline, cfg); + useLowerToAIEPipeline); }) .Case([&](auto op) { return setRootConfig(entryPointFn, op, passPipeline, - useLowerToAIEPipeline, cfg); + useLowerToAIEPipeline); }) .Default([&](Operation *op) { return success(); }); }; @@ -724,8 +718,8 @@ static LogicalResult setRootConfigImpl( /// Sets the translation information to use for a dispatch region. static LogicalResult setTranslationInfoAndRootConfig( mlir::FunctionOpInterface entryPointFn, ArrayRef computeOps, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { // Make sure that lowering_config is not preset on any compute ops. for (auto computeOp : computeOps) { if (getLoweringConfig(computeOp)) @@ -741,7 +735,7 @@ static LogicalResult setTranslationInfoAndRootConfig( return entryPointFn.emitError("Case with no root ops not yet supported."); if (failed(setRootConfigImpl(entryPointFn, rootOperation, passPipeline, - useLowerToAIEPipeline, cfg))) + useLowerToAIEPipeline))) return failure(); return success(); } @@ -750,10 +744,9 @@ static LogicalResult setTranslationInfoAndRootConfig( // Entry Point //===----------------------------------------------------------------------===// -LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +LogicalResult initAIELaunchConfig( + FunctionOpInterface funcOp, TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { if (getTranslationInfo(funcOp)) return success(); // TODO (nmeshram): Need a default pipeline for control flow cases. @@ -762,7 +755,7 @@ LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, SmallVector computeOps = getComputeOps(funcOp); if (failed(setTranslationInfoAndRootConfig(funcOp, computeOps, passPipeline, - useLowerToAIEPipeline, cfg))) + useLowerToAIEPipeline))) return failure(); // The root configuration setting introduces `tensor.dim` operations. diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h index 3afec4f7d..879f0882c 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h @@ -35,16 +35,9 @@ enum class PeelingType { First, Last, FirstLast }; /// Enum for operands to be bufferized to allocation. enum class BufferizeOperand { InputOutput, Input, Output, DefOp }; -/// Struct specifying the number of cores to use. This will be replaced -/// by a more versatile handling in the future. -struct AIEConfig { - int32_t num_cores; -}; - LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, TilePassPipeline usePassPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg); + LowerToAIEPassPipeline useLowerToAIEPipeline); } // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index 06cf2171a..f1ac91a26 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -52,7 +52,7 @@ static llvm::cl::opt clUseLowerToAIEPipeline( clEnumValN(LowerToAIEPassPipeline::ObjectFifo, "objectFifo", "Use the IREE lowering to objectFifos")), - llvm::cl::init(LowerToAIEPassPipeline::AIR)); + llvm::cl::init(LowerToAIEPassPipeline::ObjectFifo)); /// Command line option for selecting the lowering pipeline to use tiling /// computations and packing data. @@ -69,11 +69,7 @@ static llvm::cl::opt clUseTilePipeline( clEnumValN(TilePassPipeline::ConvDecomposePipeline, "conv-decompose", "Use the conv-decompose based lowering strategy for " "convolution interface ops")), - llvm::cl::init(TilePassPipeline::PadPackPipeline)); - -static llvm::cl::opt clNumCores( - "iree-amdaie-num-cores", - llvm::cl::desc("Choose the number of cores to use"), llvm::cl::init(1)); + llvm::cl::init(TilePassPipeline::PackPeelPipeline)); static llvm::cl::opt clPathToUkernels( "iree-amdaie-path-to-ukernels", @@ -553,7 +549,6 @@ void buildAMDAIETransformPassPipeline(OpPassManager &variantPassManager, AMDAIELoweringStrategyOptions options; options.usePassPipeline = clUseTilePipeline; options.useLowerToAIEPipeline = clUseLowerToAIEPipeline; - options.numCores = clNumCores; modulePassManager.addPass(createAMDAIELoweringStrategyPass(options)); } modulePassManager.addPass(createLowerExecutableUsingTransformDialectPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index 9f6560870..c1ec4c15c 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -290,7 +290,7 @@ def AMDAIELowerExecutableTarget : let options = [ Option<"usePassPipeline", "use-pass-pipeline", "mlir::iree_compiler::AMDAIE::TilePassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PadPackPipeline", + /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline", "Pass pipeline to use while lowering to AIR dialect", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline, "pack-peel", @@ -310,7 +310,7 @@ def AMDAIELoweringStrategy : let options = [ Option<"usePassPipeline", "use-pass-pipeline", "mlir::iree_compiler::AMDAIE::TilePassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PadPackPipeline", + /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline", "Pass pipeline to use while lowering to AIR dialect", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline, "pack-peel", @@ -320,11 +320,9 @@ def AMDAIELoweringStrategy : clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::ConvDecomposePipeline, "conv-decompose", "Use the conv-decompose based lowering strategy for convolution interface ops.") )}]>, - Option<"numCores", "num-cores", "int32_t", /*default=*/"1", - "Choose the number of cores to use">, Option<"useLowerToAIEPipeline", "use-lower-to-aie-pipeline", "mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::AIR", + /*default=*/"mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::ObjectFifo", "Lowering pass pipeline to use", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::ObjectFifo, "objectFifo", diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir index 2f915931c..67577eff1 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir @@ -79,7 +79,7 @@ func.func @link_multiple_inputs_with_overlapping_access(%arg0: memref<32x1024xi3 %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> %3 = amdaie.circular_dma_cpy_nd(%1[0] [1024] [1], %0[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %4 = amdaie.circular_dma_cpy_nd(%1[1, 0] [1, 1024] [2048, 1], %0[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - // expected-error @+1 {{access pattern of strided operation overlaps with next one}} + // expected-error @+1 {{op has access pattern of which isn't contiguous with next one}} %5 = amdaie.circular_dma_cpy_nd(%1[1, 0] [1, 1025] [1024, 1], %0[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %6 = amdaie.circular_dma_cpy_nd(%2[] [] [], %1[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) return @@ -151,7 +151,7 @@ func.func @link_multiple_outputs_with_overlapping_access(%arg0: memref<32x1024xi %3 = amdaie.circular_dma_cpy_nd(%1[] [] [], %0[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %4 = amdaie.circular_dma_cpy_nd(%2[] [] [], %1[1, 0] [1, 1024] [2048, 1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %5 = amdaie.circular_dma_cpy_nd(%2[] [] [], %1[1, 0] [1, 1024] [1024, 1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - // expected-error @+1 {{access pattern of strided operation overlaps with next one}} + // expected-error @+1 {{op has access pattern of which isn't contiguous with next one}} %6 = amdaie.circular_dma_cpy_nd(%2[] [] [], %1[0, 0] [32, 32] [64, 1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) return } @@ -243,7 +243,7 @@ func.func @ensure_no_removal_of_offsets(%arg0: memref<32x1024xi32>, %arg1: memre func.func @link_different_blocks(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - // expected-error @+2 {{does have copy-like users not residing in the same block}} + // expected-error @+2 {{has copy-like users not residing in the same block}} // expected-error @+1 {{couldn't create a link operation}} %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<32x64xi32, 1> -> !amdaie.logicalobjectfifo> %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir index 872d627a2..8c7546d78 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir @@ -1,5 +1,5 @@ -// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-pass-pipeline=pad-pack})' %s | FileCheck %s --check-prefix=CHECK-PAD-PACK -// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-pass-pipeline=pack-peel})' %s | FileCheck %s --check-prefix=CHECK-PACK-PEEL +// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-lower-to-aie-pipeline=air use-pass-pipeline=pad-pack})' %s | FileCheck %s --check-prefix=CHECK-PAD-PACK +// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-lower-to-aie-pipeline=air use-pass-pipeline=pack-peel})' %s | FileCheck %s --check-prefix=CHECK-PACK-PEEL // CHECK-PAD-PACK{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-PAD-PACK{LITERAL}: #packingConfig = #amdaie.packing_config diff --git a/sync_deps.py b/sync_deps.py index 0d0e470c7..149b11533 100644 --- a/sync_deps.py +++ b/sync_deps.py @@ -2,122 +2,121 @@ ### AUTO-GENERATED: DO NOT EDIT ### Casual developers and CI bots invoke this to do the most ### efficient checkout of dependencies. -### Cross-repo project development should use the +### Cross-repo project development should use the ### 'shark-workspace' dev tool for more full featured setup. ### Update with: shark-workspace pin PINNED_VERSIONS = { - "iree": "60b65f30c932eaf967922785253a85a1aa14cebb", + "iree": "60b65f30c932eaf967922785253a85a1aa14cebb", } ORIGINS = { - "iree": "https://github.com/iree-org/iree.git", + "iree": "https://github.com/iree-org/iree.git", } -SUBMODULES = { - "iree": 1 -} +SUBMODULES = {"iree"} +EXCLUDED_SUBMODULES = {"third_party/torch-mlir"} ### Update support: import argparse -from pathlib import Path import re import shlex import subprocess +from pathlib import Path def main(): - parser = argparse.ArgumentParser(description="Source deps sync") - parser.add_argument( - "--exclude-submodule", - nargs="*", - help="Exclude submodules by regex (relative to '{project}:{path})") - parser.add_argument("--exclude-dep", - nargs="*", - help="Excludes dependencies by regex") - parser.add_argument("--depth", - type=int, - default=0, - help="Fetch revisions with --depth") - parser.add_argument("--submodules-depth", - type=int, - default=0, - help="Update submodules with --depth") - args = parser.parse_args() - - workspace_dir = Path(__file__).resolve().parent.parent - for repo_name, revision in PINNED_VERSIONS.items(): - # Exclude this dep? - exclude_repo = False - for exclude_pattern in (args.exclude_dep or ()): - if re.search(exclude_pattern, repo_name): - exclude_repo = True - if exclude_repo: - print(f"Excluding {repo_name} based on --exclude-dep") - continue - - print(f"Syncing {repo_name}") - repo_dir = workspace_dir / repo_name - if not repo_dir.exists(): - # Shallow clone - print(f" Cloning {repo_name}...") - repo_dir.mkdir() - run(["init"], repo_dir) - run(["remote", "add", "origin", ORIGINS[repo_name]], repo_dir) - # Checkout detached head. - fetch_args = ["fetch"] - if args.depth > 0: - fetch_args.extend(["--depth=1"]) - fetch_args.extend(["origin", revision]) - run(fetch_args, repo_dir) - run(["-c", "advice.detachedHead=false", "checkout", revision], repo_dir) - if SUBMODULES.get(repo_name): - print(f" Initializing submodules for {repo_name}") - cp = run(["submodule", "status"], - repo_dir, - silent=True, - capture_output=True) - submodules = [] - for submodule_status_line in cp.stdout.decode().splitlines(): - submodule_status_parts = submodule_status_line.split() - submodule_path = submodule_status_parts[1] - exclude_submodule = False - for exclude_pattern in (args.exclude_submodule or ()): - if re.search(exclude_pattern, f"{repo_name}:{submodule_path}"): - exclude_submodule = True - if exclude_submodule: - print(f" Excluding {submodule_path} based on --exclude-submodule") - continue - submodules.append(submodule_path) - - update_args = ["submodule", "update", "--init"] - if args.submodules_depth > 0: - update_args.extend(["--depth", "1"]) - update_args.extend(["--"]) - update_args.extend(submodules) - run(update_args, repo_dir) - - -def run(args, - cwd, - *, - capture_output: bool = False, - check: bool = True, - silent: bool = False): - args = ["git"] + args - args_text = ' '.join([shlex.quote(arg) for arg in args]) - if not silent: - print(f" [{cwd}]$ {args_text}") - cp = subprocess.run(args, cwd=str(cwd), capture_output=capture_output) - if check and cp.returncode != 0: - addl_info = f":\n({cp.stderr.decode()})" if capture_output else "" - raise RuntimeError(f"Git command failed: {args_text} (from {cwd})" - f"{addl_info}") - return cp + parser = argparse.ArgumentParser(description="Source deps sync") + parser.add_argument( + "--exclude-submodule", + nargs="*", + help="Exclude submodules by regex (relative to '{project}:{path})", + default=(), + ) + parser.add_argument( + "--exclude-dep", nargs="*", help="Excludes dependencies by regex" + ) + parser.add_argument( + "--depth", type=int, default=0, help="Fetch revisions with --depth" + ) + parser.add_argument( + "--submodules-depth", type=int, default=0, help="Update submodules with --depth" + ) + args = parser.parse_args() + + workspace_dir = Path(__file__).resolve().parent.parent + for repo_name, revision in PINNED_VERSIONS.items(): + # Exclude this dep? + exclude_repo = False + for exclude_pattern in args.exclude_dep or (): + if re.search(exclude_pattern, repo_name): + exclude_repo = True + if exclude_repo: + print(f"Excluding {repo_name} based on --exclude-dep") + continue + + print(f"Syncing {repo_name}") + repo_dir = workspace_dir / repo_name + if not repo_dir.exists(): + # Shallow clone + print(f" Cloning {repo_name}...") + repo_dir.mkdir() + run(["init"], repo_dir) + run(["remote", "add", "origin", ORIGINS[repo_name]], repo_dir) + # Checkout detached head. + fetch_args = ["fetch"] + if args.depth > 0: + fetch_args.extend(["--depth=1"]) + fetch_args.extend(["origin", revision]) + run(fetch_args, repo_dir) + run(["-c", "advice.detachedHead=false", "checkout", revision], repo_dir) + if repo_name in SUBMODULES: + print(f" Initializing submodules for {repo_name}") + cp = run( + ["submodule", "status"], repo_dir, silent=True, capture_output=True + ) + submodules = [] + for submodule_status_line in cp.stdout.decode().splitlines(): + submodule_status_parts = submodule_status_line.split() + submodule_path = submodule_status_parts[1] + exclude_submodule = False + for exclude_pattern in args.exclude_submodule + tuple( + EXCLUDED_SUBMODULES + ): + if re.search(exclude_pattern, f"{repo_name}:{submodule_path}"): + exclude_submodule = True + if exclude_submodule: + print(f" Excluding {submodule_path} based on --exclude-submodule") + continue + submodules.append(submodule_path) + + update_args = ["submodule", "update", "--init"] + if args.submodules_depth > 0: + update_args.extend(["--depth", "1"]) + update_args.extend(["--"]) + update_args.extend(submodules) + print(update_args) + run(update_args, repo_dir) + + +def run( + args, cwd, *, capture_output: bool = False, check: bool = True, silent: bool = False +): + args = ["git"] + args + args_text = " ".join([shlex.quote(arg) for arg in args]) + if not silent: + print(f" [{cwd}]$ {args_text}") + cp = subprocess.run(args, cwd=str(cwd), capture_output=capture_output) + if check and cp.returncode != 0: + addl_info = f":\n({cp.stderr.decode()})" if capture_output else "" + raise RuntimeError( + f"Git command failed: {args_text} (from {cwd})" f"{addl_info}" + ) + return cp if __name__ == "__main__": - main() + main() diff --git a/tests/samples/CMakeLists.txt b/tests/samples/CMakeLists.txt index 33d0e5646..618409664 100644 --- a/tests/samples/CMakeLists.txt +++ b/tests/samples/CMakeLists.txt @@ -8,12 +8,12 @@ iree_lit_test_suite( NAME lit SRCS - "conv_pipeline_e2e.mlir" - "matmul_peeled_objectfifo.mlir" - "matmul_peeled_objectfifo_e2e.mlir" - "pack_peel_pipeline_matmul.mlir" - "pack_peel_pipeline_matmul_elementwise.mlir" - "pad_pack_pipeline_e2e.mlir" + "conv2d_nhwc_air_e2e.mlir" + "matmul_elementwise_pack_peel_air_e2e.mlir" + "matmul_pack_peel_air_e2e.mlir" + "matmul_pack_peel_objectfifo.mlir" + "matmul_pack_peel_objectfifo_e2e.mlir" + "matmul_pad_pack_air_e2e.mlir" "xdna_oplib_plugin.mlir" TOOLS ${IREE_LLD_TARGET} diff --git a/tests/samples/conv_pipeline_e2e.mlir b/tests/samples/conv2d_nhwc_air_e2e.mlir similarity index 95% rename from tests/samples/conv_pipeline_e2e.mlir rename to tests/samples/conv2d_nhwc_air_e2e.mlir index ffe8222f8..2b005150a 100644 --- a/tests/samples/conv_pipeline_e2e.mlir +++ b/tests/samples/conv2d_nhwc_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=conv-decompose --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=conv-decompose --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s func.func @conv_2d_nhwc_hwcf(%arg0: tensor<2x14x14x32xi32>, %arg1: tensor<3x3x32x64xi32>) -> tensor<2x12x12x64xi32> { %cst = arith.constant 0 : i32 diff --git a/tests/samples/pack_peel_pipeline_matmul_elementwise.mlir b/tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir similarity index 95% rename from tests/samples/pack_peel_pipeline_matmul_elementwise.mlir rename to tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir index c99b3b269..2f666db91 100644 --- a/tests/samples/pack_peel_pipeline_matmul_elementwise.mlir +++ b/tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-matmul-elementwise-fusion --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=air --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-matmul-elementwise-fusion --split-input-file | FileCheck %s func.func @matmul_elementwise_i32(%lhs: tensor<1024x512xi32>, %rhs: tensor<512x1024xi32>, %ele: tensor<1024x1024xi32>) -> tensor<1024x1024xi32> { diff --git a/tests/samples/pack_peel_pipeline_matmul.mlir b/tests/samples/matmul_pack_peel_air_e2e.mlir similarity index 91% rename from tests/samples/pack_peel_pipeline_matmul.mlir rename to tests/samples/matmul_pack_peel_air_e2e.mlir index a626a2132..e29ded73e 100644 --- a/tests/samples/pack_peel_pipeline_matmul.mlir +++ b/tests/samples/matmul_pack_peel_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=air --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s func.func @matmul_i8_i32(%lhs: tensor<32x16xi8>, %rhs: tensor<16x32xi8>) -> tensor<32x32xi32> { diff --git a/tests/samples/matmul_peeled_objectfifo.mlir b/tests/samples/matmul_pack_peel_objectfifo.mlir similarity index 100% rename from tests/samples/matmul_peeled_objectfifo.mlir rename to tests/samples/matmul_pack_peel_objectfifo.mlir diff --git a/tests/samples/matmul_peeled_objectfifo_e2e.mlir b/tests/samples/matmul_pack_peel_objectfifo_e2e.mlir similarity index 100% rename from tests/samples/matmul_peeled_objectfifo_e2e.mlir rename to tests/samples/matmul_pack_peel_objectfifo_e2e.mlir diff --git a/tests/samples/pad_pack_pipeline_e2e.mlir b/tests/samples/matmul_pad_pack_air_e2e.mlir similarity index 97% rename from tests/samples/pad_pack_pipeline_e2e.mlir rename to tests/samples/matmul_pad_pack_air_e2e.mlir index 14bdcb04c..90ef20392 100644 --- a/tests/samples/pad_pack_pipeline_e2e.mlir +++ b/tests/samples/matmul_pad_pack_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pad-pack --split-input-file | FileCheck %s --check-prefix=CPP +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pad-pack --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s --check-prefix=CPP // This test demonstrates Pad-Pack pipeline based e2e lowering.