diff --git a/build_tools/ci/build_test_cpp.sh b/build_tools/ci/build_test_cpp.sh index 38c98ca9d..0382601aa 100644 --- a/build_tools/ci/build_test_cpp.sh +++ b/build_tools/ci/build_test_cpp.sh @@ -112,7 +112,7 @@ echo "-----" if [[ "$OSTYPE" == "linux-gnu"* ]]; then ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j elif [[ "$OSTYPE" == "darwin"* ]]; then - ctest --test-dir "$build_dir" -R amd-aie -E "pack_peel_pipeline_matmul|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5 + ctest --test-dir "$build_dir" -R amd-aie -E "matmul_pack_peel_air_e2e|matmul_elementwise_pack_peel_air_e2e|conv_fill_spec_pad" --output-on-failure -j --repeat until-pass:5 elif [[ "$OSTYPE" == "msys"* ]]; then # hack while windows is flaky to get past failing tests ctest --test-dir "$build_dir" -R amd-aie --output-on-failure -j --repeat until-pass:5 diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py index 3ff4a05a5..09223a36e 100755 --- a/build_tools/ci/cpu_comparison/run_test.py +++ b/build_tools/ci/cpu_comparison/run_test.py @@ -627,7 +627,7 @@ def run(self, config): test_name = output_dir / "test_from_template_full_bias.mlir" template_name = matmul_template_dir / "matmul_bias_MxK_KxN_MxN.mlir" generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32") - aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0) + aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", rtol=0, atol=0) if config.xdna_datetime and config.xdna_datetime < 20240801: for name in [ @@ -657,10 +657,10 @@ def run(self, config): ) if config.vitis_dir: aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=True + config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=True ) aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=False + config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=False ) diff --git a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh index 76b06bb3a..39ea12f1d 100755 --- a/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh +++ b/build_tools/ci/print_ir_aie2xclbin/print_ir_aie2xclbin.sh @@ -93,7 +93,6 @@ ${SOURCE_MLIR_FILE} \ --mlir-print-ir-after-all \ --mlir-print-ir-module-scope \ --mlir-disable-threading \ ---iree-amdaie-tile-pipeline=pad-pack \ -o ${OUTPUT}/test_artefact.vmfb \ --iree-amd-aie-show-invoked-commands" @@ -147,6 +146,7 @@ IREE_COMPILE_COMMAND="${IREE_COMPILE_EXE} \ ${SOURCE_MLIR_FILE} \ --compile-mode=hal-executable \ --iree-hal-target-backends=amd-aie \ +--iree-amdaie-lower-to-aie-pipeline=air \ --iree-amd-aie-peano-install-dir=${PEANO} \ --iree-amd-aie-install-dir=${IREE_INSTALL_DIR} \ --iree-hal-dump-executable-intermediates-to=${OUTPUT} \ @@ -169,6 +169,7 @@ IREE_COMPILE_COMMAND="${IREE_COMPILE_EXE} \ ${SOURCE_MLIR_FILE} \ --compile-mode=hal-executable \ --iree-hal-target-backends=amd-aie \ +--iree-amdaie-lower-to-aie-pipeline=air \ --iree-amd-aie-peano-install-dir=${PEANO} \ --iree-amd-aie-install-dir=${IREE_INSTALL_DIR} \ --iree-hal-dump-executable-intermediates-to=${OUTPUT} \ diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index ad7bead47..32fcac59f 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -536,74 +536,29 @@ run_matmul_test \ --use_ukernel "0" \ --num_repeat_runs "2" +################################################################### +# MLIR-AIR Matmul tests +################################################################### + if [ -d "$VITIS" ]; then run_matmul_test \ --name_prefix "ukern" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --m "256" --k "256" --n "256" \ --use_ukernel "1" fi -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# -# run_matmul_test \ -# --name_prefix "transpose_int32" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "8" --n "16" --k "32" \ -# --do_transpose_rhs "1" - - -run_matmul_test \ - --name_prefix "transpose_i8_i32" \ - --lhs_rhs_type "i8" \ - --acc_type "i32" \ - --m "16" --n "32" --k "64" \ - --do_transpose_rhs "1" - -run_matmul_test \ - --name_prefix "transpose_bf16" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "256" --n "256" --k "256" \ - --do_transpose_rhs "1" - -# The below matmul case passes with -# tile_sizes = [[1, 1], [0, 0, 250], [1, 1], [0, 0, 2]], packedSizes = [1, 1, 5] -# but fails with tile_sizes = [[1, 1], [0, 0, 200], [1, 1], [0, 0, 1]], packedSizes = [1, 1, 8], -# with the error LLVM ERROR: unable to legalize instruction: %152:_(<2 x s32>) = G_FMUL %148:_, %150:_ (in function: core_0_2) -# The later is what a more vectorization friendly packing looks like so this test is expected failing the test here. -# TODO: check if the test will pass with a more recent llvm-aie and if it doesnt, report it upstream. -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# run_matmul_test \ -# --name_prefix "failure_0" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "1" --n "1" --k "1000" \ -# --expect_compile_failure "1" - -# The below matmul case passes with -# tile_sizes = [52, 52], [0, 0, 63], [26, 26], [0, 0, 3], packedSizes = [2, 2, 7] -# but fails with tile_sizes = [[52, 52], [0, 0, 63], [4, 4], [0, 0, 3]], packedSizes = [4, 4, 7], -# in AIRHerdPlacementPass with the error No valid placement found -# The later is what a more vectorization friendly packing looks like so we are expected failing the test here. -# We should fix this failure. -# run_matmul_test \ -# --name_prefix "failure_0" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "52" --n "52" --k "63" \ -# --expect_compile_failure "1" - # Example of a run with a group of 2+ matmuls. Currently this test is passed # the flag '--num_repeat_runs 0" as there is currently an issue with the runtime if # multiple matmuls are run in the same test. TODO(newling/nmeshram): Document # this issue. run_matmul_test \ --name_prefix "multiple_matmuls" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ --lhs_rhs_type "i32" \ --acc_type "i32" \ --m "512,8,16" \ @@ -612,105 +567,28 @@ run_matmul_test \ --num_repeat_runs "0" run_matmul_test \ - --name_prefix "small" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "16" --n "16" --k "8" - -run_matmul_test \ - --name_prefix "small" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "8" --n "32" --k "16" - -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# run_matmul_test \ -# --name_prefix "small" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "9" --n "7" --k "16" - -run_matmul_test \ - --name_prefix "large" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "64" --n "64" --k "128" - -run_matmul_test \ - --name_prefix "large" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "512" --n "512" --k "512" - -run_matmul_test \ - --name_prefix "int8" \ - --lhs_rhs_type "i8" \ - --acc_type "i32" \ - --m "64" --n "64" --k "64" - -run_matmul_test \ - --name_prefix "bf16_2304" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "128" --n "128" --k "2304" + --name_prefix "transpose_i8_i32" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ + --lhs_rhs_type "i8" \ + --acc_type "i32" \ + --m "16" --n "32" --k "64" \ + --do_transpose_rhs "1" run_matmul_test \ - --name_prefix "packPeel" \ + --name_prefix "packPeel_i32" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ --acc_type "i32" \ --m "64" --n "64" --k "128" -# We're seeing intermittent numerical errors in these 3 tests, -# needs investigation. TODO(newling/yzhang93): Add more info. -# Appears to be only pack-peel pipeline with bf16->f32. -# Using 'num_repeat_runs=0' flag to avoid running the numerical test. -################################################################# - - -# TODO: compilation error with the below test. -# -# error: 'aie.dma_bd' op Cannot give more than 3 dimensions for step sizes and wraps in this tile (got 4 dimensions). -# -# The config generated with the current strategy is: -# -# packing_config = #amdaie.packing_config -# } -run_matmul_test \ - --name_prefix "packPeel" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "64" --n "64" --k "128" \ - --num_repeat_runs "0" - run_matmul_test \ - --name_prefix "packPeelLarge" \ + --name_prefix "packPeel_bf16" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --m "512" --n "512" --k "512" -run_matmul_test \ - --name_prefix "packPeel2304" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "128" --n "128" --k "2304" - - run_matmul_test \ --name_prefix "packPeel_t_bf16" \ --tile_pipeline "pack-peel" \ @@ -719,56 +597,6 @@ run_matmul_test \ --m "128" --n "256" --k "512" \ --do_transpose_rhs "1" -################################################################### - -run_matmul_test \ - --name_prefix "mm2" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "9728" --n "2432" - -run_matmul_test \ - --name_prefix "mm3" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "2432" - -run_matmul_test \ - --name_prefix "mm4" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "7296" - -run_matmul_test \ - --name_prefix "mm5" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "9728" - -run_matmul_test \ - --name_prefix "mm6" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "9728" - -run_matmul_test \ - --name_prefix "mm7" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "2432" - -run_matmul_test \ - --name_prefix "mm8" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "9728" --n "2432" - -run_matmul_test \ - --name_prefix "mm9" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "7296" - ################################################################### # ObjectFifo Matmul tests ################################################################### @@ -805,7 +633,7 @@ i32_shapes_medium=( ) run_matmul_test_on_shapes ${i32_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_i32" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ @@ -820,7 +648,7 @@ if [ "$OSTYPE" != "msys" ]; then fi run_matmul_test_on_shapes ${i32_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_i32" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ @@ -852,7 +680,7 @@ bf16_ukernel_shapes_medium=( ) run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_bf16" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -860,7 +688,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_bf16" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -869,7 +697,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ # i8 Matmul tests. run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_i8" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i8" \ @@ -877,7 +705,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_i8" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i8" \ @@ -886,7 +714,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ if [ -d "$VITIS" ]; then run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_ukern" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -895,7 +723,7 @@ if [ -d "$VITIS" ]; then --use_ukernel "1" run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_ukern" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp index 4ca8649f7..19f9fa9d4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp @@ -64,13 +64,10 @@ void AMDAIELoweringStrategyPass::runOnOperation() { } } - // To simplify development, the number of cores can be passed as a flag during - // compilation. In the future these parameters could be read from file. - struct AIEConfig cfg = {numCores}; for (auto funcOp : moduleOp.getOps()) { // Set the strategy with default heuristics. if (failed(initAIELaunchConfig(funcOp, usePassPipeline, - useLowerToAIEPipeline, cfg))) { + useLowerToAIEPipeline))) { funcOp.emitOpError("failed to have a lowering configuration set for it."); return signalPassFailure(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp index f8f4773ef..1d564be37 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp @@ -312,8 +312,7 @@ static SmallVector setInnerPermB(bool isMatmulTransposeB) { static LogicalResult setRootConfigForPackPeelPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - LowerToAIEPassPipeline useLowerToAIEPipeline, AIEConfig cfg, - bool isMatmulTransposeB) { + LowerToAIEPassPipeline useLowerToAIEPipeline, bool isMatmulTransposeB) { bool isObjectFifo = useLowerToAIEPipeline == LowerToAIEPassPipeline::ObjectFifo; auto maybePackPeelTiling = @@ -389,7 +388,7 @@ static LogicalResult setRootConfigForPackPeelPipeline( static LogicalResult setRootConfigForPadPackPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AIEConfig cfg, bool isMatmulTransposeB) { + bool isMatmulTransposeB) { auto maybePadPackTiling = ParameterSetting::create( linalgOp, /*isPackPeel=*/false, /*isObjectFifo=*/false); if (failed(maybePadPackTiling)) return failure(); @@ -445,8 +444,7 @@ static LogicalResult setRootConfigForPadPackPipeline( //===----------------------------------------------------------------------===// static LogicalResult setRootConfigForConvDecomposePipeline( - mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AIEConfig cfg) { + mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp) { FailureOr> maybeInstructionSize = getMatmulInstructionSize(linalgOp); int64_t OW = 4; @@ -606,13 +604,13 @@ static bool isMatmulTransposeB(linalg::GenericOp genericOp) { /// transposition. static LogicalResult setTransposeLikeOpRootConfig( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { if (passPipeline == TilePassPipeline::PackPeelPipeline) return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, - useLowerToAIEPipeline, cfg, true); + useLowerToAIEPipeline, true); else if (passPipeline == TilePassPipeline::PadPackPipeline) - return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, cfg, true); + return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, true); return linalgOp.emitError( "Unhandled pass pipeline in setTransposeLikeOpRootConfig."); } @@ -621,17 +619,16 @@ static LogicalResult setTransposeLikeOpRootConfig( // Root Configurations //===----------------------------------------------------------------------===// -static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - linalg::GenericOp genericOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +static LogicalResult setRootConfig( + mlir::FunctionOpInterface entryPointFn, linalg::GenericOp genericOp, + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { assert(!getLoweringConfig(genericOp) && "expected lowering_config is not set"); if (isMatmulTransposeB(genericOp) && succeeded(setTransposeLikeOpRootConfig( - entryPointFn, genericOp, passPipeline, useLowerToAIEPipeline, cfg))) { + entryPointFn, genericOp, passPipeline, useLowerToAIEPipeline))) { return success(); } @@ -640,18 +637,16 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, /// Sets the lowering configuration for dispatch region with root op that /// implements the contraction operation interface. -static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - linalg::ContractionOpInterface contractionOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +static LogicalResult setRootConfig( + mlir::FunctionOpInterface entryPointFn, + linalg::ContractionOpInterface contractionOp, TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { assert(!getLoweringConfig(contractionOp) && "expected lowering_config is not set"); auto linalgOp = cast(contractionOp.getOperation()); if (isa(linalgOp)) { - if (succeeded(setTransposeLikeOpRootConfig(entryPointFn, linalgOp, - passPipeline, - useLowerToAIEPipeline, cfg))) { + if (succeeded(setTransposeLikeOpRootConfig( + entryPointFn, linalgOp, passPipeline, useLowerToAIEPipeline))) { return success(); } return failure(); @@ -672,31 +667,30 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, // approach which will have different tile sizes and pass pipelines if (passPipeline == TilePassPipeline::PackPeelPipeline) return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, - useLowerToAIEPipeline, cfg, false); + useLowerToAIEPipeline, false); if (passPipeline == TilePassPipeline::PadPackPipeline) - return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, cfg, false); + return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, false); return linalgOp.emitError("Unhandled pass pipeline in setRootConfig."); } static LogicalResult setConvRootConfig(mlir::FunctionOpInterface entryPointFn, linalg::ConvolutionOpInterface convOp, - TilePassPipeline passPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline) { assert(!getLoweringConfig(convOp) && "expected lowering_config is not set"); auto linalgOp = cast(convOp.getOperation()); // Current tiling strategy is based on llvm-cpu ConvTileAndDecomposeExpert. if (passPipeline == TilePassPipeline::ConvDecomposePipeline) - return setRootConfigForConvDecomposePipeline(entryPointFn, linalgOp, cfg); + return setRootConfigForConvDecomposePipeline(entryPointFn, linalgOp); return linalgOp.emitError("Unhandled pass pipeline in setConvRootConfig."); } /// Redirects to methods that set the configuration based on operation type. static LogicalResult setRootConfigImpl( mlir::FunctionOpInterface entryPointFn, Operation *op, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { auto setRootConfigFn = [&](Operation *op) -> LogicalResult { return TypeSwitch(op) // TODO (nmeshram): This is very limited for now, plan is to @@ -706,15 +700,15 @@ static LogicalResult setRootConfigImpl( .Case( [&](auto op) { - return setConvRootConfig(entryPointFn, op, passPipeline, cfg); + return setConvRootConfig(entryPointFn, op, passPipeline); }) .Case([&](auto op) { return setRootConfig(entryPointFn, op, passPipeline, - useLowerToAIEPipeline, cfg); + useLowerToAIEPipeline); }) .Case([&](auto op) { return setRootConfig(entryPointFn, op, passPipeline, - useLowerToAIEPipeline, cfg); + useLowerToAIEPipeline); }) .Default([&](Operation *op) { return success(); }); }; @@ -724,8 +718,8 @@ static LogicalResult setRootConfigImpl( /// Sets the translation information to use for a dispatch region. static LogicalResult setTranslationInfoAndRootConfig( mlir::FunctionOpInterface entryPointFn, ArrayRef computeOps, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { // Make sure that lowering_config is not preset on any compute ops. for (auto computeOp : computeOps) { if (getLoweringConfig(computeOp)) @@ -741,7 +735,7 @@ static LogicalResult setTranslationInfoAndRootConfig( return entryPointFn.emitError("Case with no root ops not yet supported."); if (failed(setRootConfigImpl(entryPointFn, rootOperation, passPipeline, - useLowerToAIEPipeline, cfg))) + useLowerToAIEPipeline))) return failure(); return success(); } @@ -750,10 +744,9 @@ static LogicalResult setTranslationInfoAndRootConfig( // Entry Point //===----------------------------------------------------------------------===// -LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +LogicalResult initAIELaunchConfig( + FunctionOpInterface funcOp, TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { if (getTranslationInfo(funcOp)) return success(); // TODO (nmeshram): Need a default pipeline for control flow cases. @@ -762,7 +755,7 @@ LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, SmallVector computeOps = getComputeOps(funcOp); if (failed(setTranslationInfoAndRootConfig(funcOp, computeOps, passPipeline, - useLowerToAIEPipeline, cfg))) + useLowerToAIEPipeline))) return failure(); // The root configuration setting introduces `tensor.dim` operations. diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h index 3afec4f7d..879f0882c 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h @@ -35,16 +35,9 @@ enum class PeelingType { First, Last, FirstLast }; /// Enum for operands to be bufferized to allocation. enum class BufferizeOperand { InputOutput, Input, Output, DefOp }; -/// Struct specifying the number of cores to use. This will be replaced -/// by a more versatile handling in the future. -struct AIEConfig { - int32_t num_cores; -}; - LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, TilePassPipeline usePassPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg); + LowerToAIEPassPipeline useLowerToAIEPipeline); } // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index cb3d87425..d5243c86d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -52,7 +52,7 @@ static llvm::cl::opt clUseLowerToAIEPipeline( clEnumValN(LowerToAIEPassPipeline::ObjectFifo, "objectFifo", "Use the IREE lowering to objectFifos")), - llvm::cl::init(LowerToAIEPassPipeline::AIR)); + llvm::cl::init(LowerToAIEPassPipeline::ObjectFifo)); /// Command line option for selecting the lowering pipeline to use tiling /// computations and packing data. @@ -69,11 +69,7 @@ static llvm::cl::opt clUseTilePipeline( clEnumValN(TilePassPipeline::ConvDecomposePipeline, "conv-decompose", "Use the conv-decompose based lowering strategy for " "convolution interface ops")), - llvm::cl::init(TilePassPipeline::PadPackPipeline)); - -static llvm::cl::opt clNumCores( - "iree-amdaie-num-cores", - llvm::cl::desc("Choose the number of cores to use"), llvm::cl::init(1)); + llvm::cl::init(TilePassPipeline::PackPeelPipeline)); static llvm::cl::opt clPathToUkernels( "iree-amdaie-path-to-ukernels", @@ -553,7 +549,6 @@ void buildAMDAIETransformPassPipeline(OpPassManager &variantPassManager, AMDAIELoweringStrategyOptions options; options.usePassPipeline = clUseTilePipeline; options.useLowerToAIEPipeline = clUseLowerToAIEPipeline; - options.numCores = clNumCores; modulePassManager.addPass(createAMDAIELoweringStrategyPass(options)); } modulePassManager.addPass(createLowerExecutableUsingTransformDialectPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index 9f6560870..c1ec4c15c 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -290,7 +290,7 @@ def AMDAIELowerExecutableTarget : let options = [ Option<"usePassPipeline", "use-pass-pipeline", "mlir::iree_compiler::AMDAIE::TilePassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PadPackPipeline", + /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline", "Pass pipeline to use while lowering to AIR dialect", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline, "pack-peel", @@ -310,7 +310,7 @@ def AMDAIELoweringStrategy : let options = [ Option<"usePassPipeline", "use-pass-pipeline", "mlir::iree_compiler::AMDAIE::TilePassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PadPackPipeline", + /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline", "Pass pipeline to use while lowering to AIR dialect", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline, "pack-peel", @@ -320,11 +320,9 @@ def AMDAIELoweringStrategy : clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::ConvDecomposePipeline, "conv-decompose", "Use the conv-decompose based lowering strategy for convolution interface ops.") )}]>, - Option<"numCores", "num-cores", "int32_t", /*default=*/"1", - "Choose the number of cores to use">, Option<"useLowerToAIEPipeline", "use-lower-to-aie-pipeline", "mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::AIR", + /*default=*/"mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::ObjectFifo", "Lowering pass pipeline to use", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::ObjectFifo, "objectFifo", diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir index 872d627a2..8c7546d78 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy.mlir @@ -1,5 +1,5 @@ -// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-pass-pipeline=pad-pack})' %s | FileCheck %s --check-prefix=CHECK-PAD-PACK -// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-pass-pipeline=pack-peel})' %s | FileCheck %s --check-prefix=CHECK-PACK-PEEL +// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-lower-to-aie-pipeline=air use-pass-pipeline=pad-pack})' %s | FileCheck %s --check-prefix=CHECK-PAD-PACK +// RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{use-lower-to-aie-pipeline=air use-pass-pipeline=pack-peel})' %s | FileCheck %s --check-prefix=CHECK-PACK-PEEL // CHECK-PAD-PACK{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-PAD-PACK{LITERAL}: #packingConfig = #amdaie.packing_config diff --git a/tests/samples/CMakeLists.txt b/tests/samples/CMakeLists.txt index 33d0e5646..618409664 100644 --- a/tests/samples/CMakeLists.txt +++ b/tests/samples/CMakeLists.txt @@ -8,12 +8,12 @@ iree_lit_test_suite( NAME lit SRCS - "conv_pipeline_e2e.mlir" - "matmul_peeled_objectfifo.mlir" - "matmul_peeled_objectfifo_e2e.mlir" - "pack_peel_pipeline_matmul.mlir" - "pack_peel_pipeline_matmul_elementwise.mlir" - "pad_pack_pipeline_e2e.mlir" + "conv2d_nhwc_air_e2e.mlir" + "matmul_elementwise_pack_peel_air_e2e.mlir" + "matmul_pack_peel_air_e2e.mlir" + "matmul_pack_peel_objectfifo.mlir" + "matmul_pack_peel_objectfifo_e2e.mlir" + "matmul_pad_pack_air_e2e.mlir" "xdna_oplib_plugin.mlir" TOOLS ${IREE_LLD_TARGET} diff --git a/tests/samples/conv_pipeline_e2e.mlir b/tests/samples/conv2d_nhwc_air_e2e.mlir similarity index 95% rename from tests/samples/conv_pipeline_e2e.mlir rename to tests/samples/conv2d_nhwc_air_e2e.mlir index ffe8222f8..2b005150a 100644 --- a/tests/samples/conv_pipeline_e2e.mlir +++ b/tests/samples/conv2d_nhwc_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=conv-decompose --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=conv-decompose --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s func.func @conv_2d_nhwc_hwcf(%arg0: tensor<2x14x14x32xi32>, %arg1: tensor<3x3x32x64xi32>) -> tensor<2x12x12x64xi32> { %cst = arith.constant 0 : i32 diff --git a/tests/samples/pack_peel_pipeline_matmul_elementwise.mlir b/tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir similarity index 95% rename from tests/samples/pack_peel_pipeline_matmul_elementwise.mlir rename to tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir index c99b3b269..2f666db91 100644 --- a/tests/samples/pack_peel_pipeline_matmul_elementwise.mlir +++ b/tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-matmul-elementwise-fusion --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=air --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-matmul-elementwise-fusion --split-input-file | FileCheck %s func.func @matmul_elementwise_i32(%lhs: tensor<1024x512xi32>, %rhs: tensor<512x1024xi32>, %ele: tensor<1024x1024xi32>) -> tensor<1024x1024xi32> { diff --git a/tests/samples/pack_peel_pipeline_matmul.mlir b/tests/samples/matmul_pack_peel_air_e2e.mlir similarity index 91% rename from tests/samples/pack_peel_pipeline_matmul.mlir rename to tests/samples/matmul_pack_peel_air_e2e.mlir index a626a2132..e29ded73e 100644 --- a/tests/samples/pack_peel_pipeline_matmul.mlir +++ b/tests/samples/matmul_pack_peel_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=air --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s func.func @matmul_i8_i32(%lhs: tensor<32x16xi8>, %rhs: tensor<16x32xi8>) -> tensor<32x32xi32> { diff --git a/tests/samples/matmul_peeled_objectfifo.mlir b/tests/samples/matmul_pack_peel_objectfifo.mlir similarity index 100% rename from tests/samples/matmul_peeled_objectfifo.mlir rename to tests/samples/matmul_pack_peel_objectfifo.mlir diff --git a/tests/samples/matmul_peeled_objectfifo_e2e.mlir b/tests/samples/matmul_pack_peel_objectfifo_e2e.mlir similarity index 100% rename from tests/samples/matmul_peeled_objectfifo_e2e.mlir rename to tests/samples/matmul_pack_peel_objectfifo_e2e.mlir diff --git a/tests/samples/pad_pack_pipeline_e2e.mlir b/tests/samples/matmul_pad_pack_air_e2e.mlir similarity index 97% rename from tests/samples/pad_pack_pipeline_e2e.mlir rename to tests/samples/matmul_pad_pack_air_e2e.mlir index 14bdcb04c..90ef20392 100644 --- a/tests/samples/pad_pack_pipeline_e2e.mlir +++ b/tests/samples/matmul_pad_pack_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pad-pack --split-input-file | FileCheck %s --check-prefix=CPP +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pad-pack --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s --check-prefix=CPP // This test demonstrates Pad-Pack pipeline based e2e lowering.