diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py index 1cc3b53fa..677b45b3f 100755 --- a/build_tools/ci/cpu_comparison/run_test.py +++ b/build_tools/ci/cpu_comparison/run_test.py @@ -602,7 +602,7 @@ def run(self, config): test_name = output_dir / "test_from_template_full_bias.mlir" template_name = matmul_template_dir / "matmul_bias_MxK_KxN_MxN.mlir" generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32") - aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0) + aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", rtol=0, atol=0) if config.xdna_datetime and config.xdna_datetime < 20240819: for name in [ @@ -631,10 +631,10 @@ def run(self, config): test_name, template_name, 1024, 1024, 512, "bf16", "f32" ) aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=True + config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=True ) aie_vs_llvm_cpu( - config, test_name, tile_pipeline="pack-peel", use_ukernel=False + config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=False ) diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index c195f876f..54032a93d 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -527,72 +527,23 @@ run_matmul_test \ --use_ukernel "0" \ --num_repeat_runs "2" +################################################################### +# MLIR-AIR Matmul tests +################################################################### + run_matmul_test \ --name_prefix "ukern" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --m "256" --k "256" --n "256" \ --use_ukernel "1" -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# -# run_matmul_test \ -# --name_prefix "transpose_int32" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "8" --n "16" --k "32" \ -# --do_transpose_rhs "1" - - -run_matmul_test \ - --name_prefix "transpose_i8_i32" \ - --lhs_rhs_type "i8" \ - --acc_type "i32" \ - --m "16" --n "32" --k "64" \ - --do_transpose_rhs "1" - -run_matmul_test \ - --name_prefix "transpose_bf16" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "256" --n "256" --k "256" \ - --do_transpose_rhs "1" - -# The below matmul case passes with -# tile_sizes = [[1, 1], [0, 0, 250], [1, 1], [0, 0, 2]], packedSizes = [1, 1, 5] -# but fails with tile_sizes = [[1, 1], [0, 0, 200], [1, 1], [0, 0, 1]], packedSizes = [1, 1, 8], -# with the error LLVM ERROR: unable to legalize instruction: %152:_(<2 x s32>) = G_FMUL %148:_, %150:_ (in function: core_0_2) -# The later is what a more vectorization friendly packing looks like so this test is expected failing the test here. -# TODO: check if the test will pass with a more recent llvm-aie and if it doesnt, report it upstream. -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# run_matmul_test \ -# --name_prefix "failure_0" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "1" --n "1" --k "1000" \ -# --expect_compile_failure "1" - -# The below matmul case passes with -# tile_sizes = [52, 52], [0, 0, 63], [26, 26], [0, 0, 3], packedSizes = [2, 2, 7] -# but fails with tile_sizes = [[52, 52], [0, 0, 63], [4, 4], [0, 0, 3]], packedSizes = [4, 4, 7], -# in AIRHerdPlacementPass with the error No valid placement found -# The later is what a more vectorization friendly packing looks like so we are expected failing the test here. -# We should fix this failure. -# run_matmul_test \ -# --name_prefix "failure_0" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "52" --n "52" --k "63" \ -# --expect_compile_failure "1" - -# Example of a run with a group of 2+ matmuls. Currently this test is passed -# the flag '--num_repeat_runs 0" as there is currently an issue with the runtime if -# multiple matmuls are run in the same test. TODO(newling/nmeshram): Document -# this issue. run_matmul_test \ --name_prefix "multiple_matmuls" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ --lhs_rhs_type "i32" \ --acc_type "i32" \ --m "512,8,16" \ @@ -601,105 +552,28 @@ run_matmul_test \ --num_repeat_runs "0" run_matmul_test \ - --name_prefix "small" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "16" --n "16" --k "8" - -run_matmul_test \ - --name_prefix "small" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "8" --n "32" --k "16" - -# Disabled until the following issue is resolved: -# https://github.com/Xilinx/llvm-aie/issues/102 -# run_matmul_test \ -# --name_prefix "small" \ -# --lhs_rhs_type "i32" \ -# --acc_type "i32" \ -# --m "9" --n "7" --k "16" - -run_matmul_test \ - --name_prefix "large" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "64" --n "64" --k "128" - -run_matmul_test \ - --name_prefix "large" \ - --lhs_rhs_type "i32" \ - --acc_type "i32" \ - --m "512" --n "512" --k "512" - -run_matmul_test \ - --name_prefix "int8" \ - --lhs_rhs_type "i8" \ - --acc_type "i32" \ - --m "64" --n "64" --k "64" - -run_matmul_test \ - --name_prefix "bf16_2304" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "128" --n "128" --k "2304" + --name_prefix "transpose_i8_i32" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ + --lhs_rhs_type "i8" \ + --acc_type "i32" \ + --m "16" --n "32" --k "64" \ + --do_transpose_rhs "1" run_matmul_test \ - --name_prefix "packPeel" \ + --name_prefix "packPeel_i32" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ --acc_type "i32" \ --m "64" --n "64" --k "128" -# We're seeing intermittent numerical errors in these 3 tests, -# needs investigation. TODO(newling/yzhang93): Add more info. -# Appears to be only pack-peel pipeline with bf16->f32. -# Using 'num_repeat_runs=0' flag to avoid running the numerical test. -################################################################# - - -# TODO: compilation error with the below test. -# -# error: 'aie.dma_bd' op Cannot give more than 3 dimensions for step sizes and wraps in this tile (got 4 dimensions). -# -# The config generated with the current strategy is: -# -# packing_config = #amdaie.packing_config -# } -run_matmul_test \ - --name_prefix "packPeel" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "64" --n "64" --k "128" \ - --num_repeat_runs "0" - run_matmul_test \ - --name_prefix "packPeelLarge" \ + --name_prefix "packPeel_bf16" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --m "512" --n "512" --k "512" -run_matmul_test \ - --name_prefix "packPeel2304" \ - --tile_pipeline "pack-peel" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "128" --n "128" --k "2304" - - run_matmul_test \ --name_prefix "packPeel_t_bf16" \ --tile_pipeline "pack-peel" \ @@ -708,56 +582,6 @@ run_matmul_test \ --m "128" --n "256" --k "512" \ --do_transpose_rhs "1" -################################################################### - -run_matmul_test \ - --name_prefix "mm2" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "9728" --n "2432" - -run_matmul_test \ - --name_prefix "mm3" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "2432" - -run_matmul_test \ - --name_prefix "mm4" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "7296" - -run_matmul_test \ - --name_prefix "mm5" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "9728" - -run_matmul_test \ - --name_prefix "mm6" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "308" --k "2432" --n "9728" - -run_matmul_test \ - --name_prefix "mm7" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "2432" - -run_matmul_test \ - --name_prefix "mm8" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "9728" --n "2432" - -run_matmul_test \ - --name_prefix "mm9" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "8192" --k "2432" --n "7296" - ################################################################### # ObjectFifo Matmul tests ################################################################### @@ -794,7 +618,7 @@ i32_shapes_medium=( ) run_matmul_test_on_shapes ${i32_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_i32" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ @@ -802,7 +626,7 @@ run_matmul_test_on_shapes ${i32_shapes_small[@]} \ --num_repeat_runs "10" run_matmul_test_on_shapes ${i32_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_i32" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i32" \ @@ -834,7 +658,7 @@ bf16_ukernel_shapes_medium=( ) run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_bf16" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -842,7 +666,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_bf16" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -851,7 +675,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ # i8 Matmul tests. run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_i8" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i8" \ @@ -859,7 +683,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_i8" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "i8" \ @@ -867,7 +691,7 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \ --num_repeat_runs "2" run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \ - --name_prefix "small" \ + --name_prefix "small_ukern" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ @@ -876,7 +700,7 @@ run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \ --use_ukernel "1" run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ - --name_prefix "medium" \ + --name_prefix "medium_ukern" \ --lower_to_aie_pipeline "objectFifo" \ --tile_pipeline "pack-peel" \ --lhs_rhs_type "bf16" \ diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp index 4ca8649f7..19f9fa9d4 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp @@ -64,13 +64,10 @@ void AMDAIELoweringStrategyPass::runOnOperation() { } } - // To simplify development, the number of cores can be passed as a flag during - // compilation. In the future these parameters could be read from file. - struct AIEConfig cfg = {numCores}; for (auto funcOp : moduleOp.getOps()) { // Set the strategy with default heuristics. if (failed(initAIELaunchConfig(funcOp, usePassPipeline, - useLowerToAIEPipeline, cfg))) { + useLowerToAIEPipeline))) { funcOp.emitOpError("failed to have a lowering configuration set for it."); return signalPassFailure(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp index f8f4773ef..1d564be37 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp @@ -312,8 +312,7 @@ static SmallVector setInnerPermB(bool isMatmulTransposeB) { static LogicalResult setRootConfigForPackPeelPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - LowerToAIEPassPipeline useLowerToAIEPipeline, AIEConfig cfg, - bool isMatmulTransposeB) { + LowerToAIEPassPipeline useLowerToAIEPipeline, bool isMatmulTransposeB) { bool isObjectFifo = useLowerToAIEPipeline == LowerToAIEPassPipeline::ObjectFifo; auto maybePackPeelTiling = @@ -389,7 +388,7 @@ static LogicalResult setRootConfigForPackPeelPipeline( static LogicalResult setRootConfigForPadPackPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AIEConfig cfg, bool isMatmulTransposeB) { + bool isMatmulTransposeB) { auto maybePadPackTiling = ParameterSetting::create( linalgOp, /*isPackPeel=*/false, /*isObjectFifo=*/false); if (failed(maybePadPackTiling)) return failure(); @@ -445,8 +444,7 @@ static LogicalResult setRootConfigForPadPackPipeline( //===----------------------------------------------------------------------===// static LogicalResult setRootConfigForConvDecomposePipeline( - mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AIEConfig cfg) { + mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp) { FailureOr> maybeInstructionSize = getMatmulInstructionSize(linalgOp); int64_t OW = 4; @@ -606,13 +604,13 @@ static bool isMatmulTransposeB(linalg::GenericOp genericOp) { /// transposition. static LogicalResult setTransposeLikeOpRootConfig( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { if (passPipeline == TilePassPipeline::PackPeelPipeline) return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, - useLowerToAIEPipeline, cfg, true); + useLowerToAIEPipeline, true); else if (passPipeline == TilePassPipeline::PadPackPipeline) - return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, cfg, true); + return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, true); return linalgOp.emitError( "Unhandled pass pipeline in setTransposeLikeOpRootConfig."); } @@ -621,17 +619,16 @@ static LogicalResult setTransposeLikeOpRootConfig( // Root Configurations //===----------------------------------------------------------------------===// -static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - linalg::GenericOp genericOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +static LogicalResult setRootConfig( + mlir::FunctionOpInterface entryPointFn, linalg::GenericOp genericOp, + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { assert(!getLoweringConfig(genericOp) && "expected lowering_config is not set"); if (isMatmulTransposeB(genericOp) && succeeded(setTransposeLikeOpRootConfig( - entryPointFn, genericOp, passPipeline, useLowerToAIEPipeline, cfg))) { + entryPointFn, genericOp, passPipeline, useLowerToAIEPipeline))) { return success(); } @@ -640,18 +637,16 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, /// Sets the lowering configuration for dispatch region with root op that /// implements the contraction operation interface. -static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, - linalg::ContractionOpInterface contractionOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +static LogicalResult setRootConfig( + mlir::FunctionOpInterface entryPointFn, + linalg::ContractionOpInterface contractionOp, TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { assert(!getLoweringConfig(contractionOp) && "expected lowering_config is not set"); auto linalgOp = cast(contractionOp.getOperation()); if (isa(linalgOp)) { - if (succeeded(setTransposeLikeOpRootConfig(entryPointFn, linalgOp, - passPipeline, - useLowerToAIEPipeline, cfg))) { + if (succeeded(setTransposeLikeOpRootConfig( + entryPointFn, linalgOp, passPipeline, useLowerToAIEPipeline))) { return success(); } return failure(); @@ -672,31 +667,30 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, // approach which will have different tile sizes and pass pipelines if (passPipeline == TilePassPipeline::PackPeelPipeline) return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, - useLowerToAIEPipeline, cfg, false); + useLowerToAIEPipeline, false); if (passPipeline == TilePassPipeline::PadPackPipeline) - return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, cfg, false); + return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, false); return linalgOp.emitError("Unhandled pass pipeline in setRootConfig."); } static LogicalResult setConvRootConfig(mlir::FunctionOpInterface entryPointFn, linalg::ConvolutionOpInterface convOp, - TilePassPipeline passPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline) { assert(!getLoweringConfig(convOp) && "expected lowering_config is not set"); auto linalgOp = cast(convOp.getOperation()); // Current tiling strategy is based on llvm-cpu ConvTileAndDecomposeExpert. if (passPipeline == TilePassPipeline::ConvDecomposePipeline) - return setRootConfigForConvDecomposePipeline(entryPointFn, linalgOp, cfg); + return setRootConfigForConvDecomposePipeline(entryPointFn, linalgOp); return linalgOp.emitError("Unhandled pass pipeline in setConvRootConfig."); } /// Redirects to methods that set the configuration based on operation type. static LogicalResult setRootConfigImpl( mlir::FunctionOpInterface entryPointFn, Operation *op, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { auto setRootConfigFn = [&](Operation *op) -> LogicalResult { return TypeSwitch(op) // TODO (nmeshram): This is very limited for now, plan is to @@ -706,15 +700,15 @@ static LogicalResult setRootConfigImpl( .Case( [&](auto op) { - return setConvRootConfig(entryPointFn, op, passPipeline, cfg); + return setConvRootConfig(entryPointFn, op, passPipeline); }) .Case([&](auto op) { return setRootConfig(entryPointFn, op, passPipeline, - useLowerToAIEPipeline, cfg); + useLowerToAIEPipeline); }) .Case([&](auto op) { return setRootConfig(entryPointFn, op, passPipeline, - useLowerToAIEPipeline, cfg); + useLowerToAIEPipeline); }) .Default([&](Operation *op) { return success(); }); }; @@ -724,8 +718,8 @@ static LogicalResult setRootConfigImpl( /// Sets the translation information to use for a dispatch region. static LogicalResult setTranslationInfoAndRootConfig( mlir::FunctionOpInterface entryPointFn, ArrayRef computeOps, - TilePassPipeline passPipeline, LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { + TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { // Make sure that lowering_config is not preset on any compute ops. for (auto computeOp : computeOps) { if (getLoweringConfig(computeOp)) @@ -741,7 +735,7 @@ static LogicalResult setTranslationInfoAndRootConfig( return entryPointFn.emitError("Case with no root ops not yet supported."); if (failed(setRootConfigImpl(entryPointFn, rootOperation, passPipeline, - useLowerToAIEPipeline, cfg))) + useLowerToAIEPipeline))) return failure(); return success(); } @@ -750,10 +744,9 @@ static LogicalResult setTranslationInfoAndRootConfig( // Entry Point //===----------------------------------------------------------------------===// -LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, - TilePassPipeline passPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg) { +LogicalResult initAIELaunchConfig( + FunctionOpInterface funcOp, TilePassPipeline passPipeline, + LowerToAIEPassPipeline useLowerToAIEPipeline) { if (getTranslationInfo(funcOp)) return success(); // TODO (nmeshram): Need a default pipeline for control flow cases. @@ -762,7 +755,7 @@ LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, SmallVector computeOps = getComputeOps(funcOp); if (failed(setTranslationInfoAndRootConfig(funcOp, computeOps, passPipeline, - useLowerToAIEPipeline, cfg))) + useLowerToAIEPipeline))) return failure(); // The root configuration setting introduces `tensor.dim` operations. diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h index 3afec4f7d..879f0882c 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.h @@ -35,16 +35,9 @@ enum class PeelingType { First, Last, FirstLast }; /// Enum for operands to be bufferized to allocation. enum class BufferizeOperand { InputOutput, Input, Output, DefOp }; -/// Struct specifying the number of cores to use. This will be replaced -/// by a more versatile handling in the future. -struct AIEConfig { - int32_t num_cores; -}; - LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp, TilePassPipeline usePassPipeline, - LowerToAIEPassPipeline useLowerToAIEPipeline, - AIEConfig cfg); + LowerToAIEPassPipeline useLowerToAIEPipeline); } // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index 81c54b413..e5deb65e2 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -44,7 +44,7 @@ static llvm::cl::opt clUseLowerToAIEPipeline( clEnumValN(LowerToAIEPassPipeline::ObjectFifo, "objectFifo", "Use the IREE lowering to objectFifos")), - llvm::cl::init(LowerToAIEPassPipeline::AIR)); + llvm::cl::init(LowerToAIEPassPipeline::ObjectFifo)); /// Command line option for selecting the lowering pipeline to use tiling /// computations and packing data. @@ -61,11 +61,7 @@ static llvm::cl::opt clUseTilePipeline( clEnumValN(TilePassPipeline::ConvDecomposePipeline, "conv-decompose", "Use the conv-decompose based lowering strategy for " "convolution interface ops")), - llvm::cl::init(TilePassPipeline::PadPackPipeline)); - -static llvm::cl::opt clNumCores( - "iree-amdaie-num-cores", - llvm::cl::desc("Choose the number of cores to use"), llvm::cl::init(1)); + llvm::cl::init(TilePassPipeline::PackPeelPipeline)); static llvm::cl::opt clPathToUkernels( "iree-amdaie-path-to-ukernels", @@ -536,7 +532,6 @@ void buildAMDAIETransformPassPipeline(OpPassManager &variantPassManager, AMDAIELoweringStrategyOptions options; options.usePassPipeline = clUseTilePipeline; options.useLowerToAIEPipeline = clUseLowerToAIEPipeline; - options.numCores = clNumCores; modulePassManager.addPass(createAMDAIELoweringStrategyPass(options)); } modulePassManager.addPass(createLowerExecutableUsingTransformDialectPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index 2dec5f951..b2617a0a0 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -272,7 +272,7 @@ def AMDAIELowerExecutableTarget : let options = [ Option<"usePassPipeline", "use-pass-pipeline", "mlir::iree_compiler::AMDAIE::TilePassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PadPackPipeline", + /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline", "Pass pipeline to use while lowering to AIR dialect", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline, "pack-peel", @@ -292,7 +292,7 @@ def AMDAIELoweringStrategy : let options = [ Option<"usePassPipeline", "use-pass-pipeline", "mlir::iree_compiler::AMDAIE::TilePassPipeline", - /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PadPackPipeline", + /*default=*/"mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline", "Pass pipeline to use while lowering to AIR dialect", [{::llvm::cl::values( clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::PackPeelPipeline, "pack-peel", @@ -302,8 +302,6 @@ def AMDAIELoweringStrategy : clEnumValN(mlir::iree_compiler::AMDAIE::TilePassPipeline::ConvDecomposePipeline, "conv-decompose", "Use the conv-decompose based lowering strategy for convolution interface ops.") )}]>, - Option<"numCores", "num-cores", "int32_t", /*default=*/"1", - "Choose the number of cores to use">, Option<"useLowerToAIEPipeline", "use-lower-to-aie-pipeline", "mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline", /*default=*/"mlir::iree_compiler::AMDAIE::LowerToAIEPassPipeline::AIR", diff --git a/tests/samples/CMakeLists.txt b/tests/samples/CMakeLists.txt index 33d0e5646..618409664 100644 --- a/tests/samples/CMakeLists.txt +++ b/tests/samples/CMakeLists.txt @@ -8,12 +8,12 @@ iree_lit_test_suite( NAME lit SRCS - "conv_pipeline_e2e.mlir" - "matmul_peeled_objectfifo.mlir" - "matmul_peeled_objectfifo_e2e.mlir" - "pack_peel_pipeline_matmul.mlir" - "pack_peel_pipeline_matmul_elementwise.mlir" - "pad_pack_pipeline_e2e.mlir" + "conv2d_nhwc_air_e2e.mlir" + "matmul_elementwise_pack_peel_air_e2e.mlir" + "matmul_pack_peel_air_e2e.mlir" + "matmul_pack_peel_objectfifo.mlir" + "matmul_pack_peel_objectfifo_e2e.mlir" + "matmul_pad_pack_air_e2e.mlir" "xdna_oplib_plugin.mlir" TOOLS ${IREE_LLD_TARGET} diff --git a/tests/samples/conv_pipeline_e2e.mlir b/tests/samples/conv2d_nhwc_air_e2e.mlir similarity index 95% rename from tests/samples/conv_pipeline_e2e.mlir rename to tests/samples/conv2d_nhwc_air_e2e.mlir index 7c6957017..832274db1 100644 --- a/tests/samples/conv_pipeline_e2e.mlir +++ b/tests/samples/conv2d_nhwc_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=conv-decompose --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=conv-decompose --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s func.func @conv_2d_nhwc_hwcf(%arg0: tensor<2x14x14x32xi32>, %arg1: tensor<3x3x32x64xi32>) -> tensor<2x12x12x64xi32> { %cst = arith.constant 0 : i32 diff --git a/tests/samples/pack_peel_pipeline_matmul_elementwise.mlir b/tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir similarity index 96% rename from tests/samples/pack_peel_pipeline_matmul_elementwise.mlir rename to tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir index c99b3b269..e2ac7dd5c 100644 --- a/tests/samples/pack_peel_pipeline_matmul_elementwise.mlir +++ b/tests/samples/matmul_elementwise_pack_peel_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-matmul-elementwise-fusion --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-lower-to-aie-pipeline=air --iree-amdaie-matmul-elementwise-fusion --split-input-file | FileCheck %s func.func @matmul_elementwise_i32(%lhs: tensor<1024x512xi32>, %rhs: tensor<512x1024xi32>, %ele: tensor<1024x1024xi32>) -> tensor<1024x1024xi32> { diff --git a/tests/samples/pack_peel_pipeline_matmul.mlir b/tests/samples/matmul_pack_peel_air_e2e.mlir similarity index 94% rename from tests/samples/pack_peel_pipeline_matmul.mlir rename to tests/samples/matmul_pack_peel_air_e2e.mlir index a626a2132..d86645b48 100644 --- a/tests/samples/pack_peel_pipeline_matmul.mlir +++ b/tests/samples/matmul_pack_peel_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pack-peel --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s func.func @matmul_i8_i32(%lhs: tensor<32x16xi8>, %rhs: tensor<16x32xi8>) -> tensor<32x32xi32> { diff --git a/tests/samples/matmul_peeled_objectfifo.mlir b/tests/samples/matmul_pack_peel_objectfifo.mlir similarity index 100% rename from tests/samples/matmul_peeled_objectfifo.mlir rename to tests/samples/matmul_pack_peel_objectfifo.mlir diff --git a/tests/samples/matmul_peeled_objectfifo_e2e.mlir b/tests/samples/matmul_pack_peel_objectfifo_e2e.mlir similarity index 100% rename from tests/samples/matmul_peeled_objectfifo_e2e.mlir rename to tests/samples/matmul_pack_peel_objectfifo_e2e.mlir diff --git a/tests/samples/pad_pack_pipeline_e2e.mlir b/tests/samples/matmul_pad_pack_air_e2e.mlir similarity index 97% rename from tests/samples/pad_pack_pipeline_e2e.mlir rename to tests/samples/matmul_pad_pack_air_e2e.mlir index 14bdcb04c..90ef20392 100644 --- a/tests/samples/pad_pack_pipeline_e2e.mlir +++ b/tests/samples/matmul_pad_pack_air_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pad-pack --split-input-file | FileCheck %s --check-prefix=CPP +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-tile-pipeline=pad-pack --iree-amdaie-lower-to-aie-pipeline=air --split-input-file | FileCheck %s --check-prefix=CPP // This test demonstrates Pad-Pack pipeline based e2e lowering.