Set ObjectFifo pipeline as default and some cleanup

nod-ai · Aug 29, 2024 · 21661c9 · 21661c9
1 parent a5fbf9d
commit 21661c9
Show file tree

Hide file tree

Showing 14 changed files with 78 additions and 278 deletions.
diff --git a/build_tools/ci/cpu_comparison/run_test.py b/build_tools/ci/cpu_comparison/run_test.py
@@ -602,7 +602,7 @@ def run(self, config):
         test_name = output_dir / "test_from_template_full_bias.mlir"
         template_name = matmul_template_dir / "matmul_bias_MxK_KxN_MxN.mlir"
         generate_matmul_test(test_name, template_name, 128, 128, 256, "i32", "i32")
-        aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", rtol=0, atol=0)
+        aie_vs_llvm_cpu(config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", rtol=0, atol=0)
 
         if config.xdna_datetime and config.xdna_datetime < 20240819:
             for name in [
@@ -631,10 +631,10 @@ def run(self, config):
             test_name, template_name, 1024, 1024, 512, "bf16", "f32"
         )
         aie_vs_llvm_cpu(
-            config, test_name, tile_pipeline="pack-peel", use_ukernel=True
+            config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=True
         )
         aie_vs_llvm_cpu(
-            config, test_name, tile_pipeline="pack-peel", use_ukernel=False
+            config, test_name, tile_pipeline="pack-peel", lower_to_aie_pipeline="air", use_ukernel=False
         )
 
 

diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
@@ -527,72 +527,23 @@ run_matmul_test \
     --use_ukernel "0" \
     --num_repeat_runs "2"
 
+###################################################################
+# MLIR-AIR Matmul tests
+###################################################################
+
 run_matmul_test \
     --name_prefix "ukern" \
+    --lower_to_aie_pipeline "air" \
+    --tile_pipeline "pad-pack" \
     --lhs_rhs_type "bf16" \
     --acc_type "f32" \
     --m "256"  --k "256" --n "256" \
     --use_ukernel "1"
 
-# Disabled until the following issue is resolved:
-# https://github.com/Xilinx/llvm-aie/issues/102
-#
-# run_matmul_test \
-#   --name_prefix "transpose_int32" \
-#   --lhs_rhs_type "i32" \
-#   --acc_type "i32" \
-#   --m "8" --n "16" --k "32" \
-#   --do_transpose_rhs "1"
-
-
-run_matmul_test \
-  --name_prefix "transpose_i8_i32" \
-  --lhs_rhs_type "i8" \
-  --acc_type "i32" \
-  --m "16" --n "32" --k "64" \
-  --do_transpose_rhs "1"
-
-run_matmul_test \
-  --name_prefix "transpose_bf16" \
-  --lhs_rhs_type "bf16" \
-  --acc_type "f32" \
-  --m "256" --n "256" --k "256" \
-  --do_transpose_rhs "1"
-
-# The below matmul case passes with
-# tile_sizes = [[1, 1], [0, 0, 250], [1, 1], [0, 0, 2]], packedSizes = [1, 1, 5]
-# but fails with tile_sizes = [[1, 1], [0, 0, 200], [1, 1], [0, 0, 1]], packedSizes = [1, 1, 8],
-# with the error LLVM ERROR: unable to legalize instruction: %152:_(<2 x s32>) = G_FMUL %148:_, %150:_ (in function: core_0_2)
-# The later is what a more vectorization friendly packing looks like so this test is expected failing the test here.
-# TODO: check if the test will pass with a more recent llvm-aie and if it doesnt, report it upstream.
-# Disabled until the following issue is resolved:
-# https://github.com/Xilinx/llvm-aie/issues/102
-# run_matmul_test \
-#    --name_prefix "failure_0" \
-#    --lhs_rhs_type "i32" \
-#    --acc_type "i32" \
-#    --m "1"  --n "1" --k "1000" \
-#    --expect_compile_failure "1"
-
-# The below matmul case passes with
-# tile_sizes = [52, 52], [0, 0, 63], [26, 26], [0, 0, 3], packedSizes = [2, 2, 7]
-# but fails with tile_sizes = [[52, 52], [0, 0, 63], [4, 4], [0, 0, 3]], packedSizes = [4, 4, 7],
-# in AIRHerdPlacementPass with the error No valid placement found
-# The later is what a more vectorization friendly packing looks like so we are expected failing the test here.
-# We should fix this failure.
-# run_matmul_test \
-#    --name_prefix "failure_0" \
-#    --lhs_rhs_type "i32" \
-#    --acc_type "i32" \
-#    --m "52"  --n "52" --k "63" \
-#    --expect_compile_failure "1"
-
-# Example of a run with a group of 2+ matmuls. Currently this test is passed
-# the flag '--num_repeat_runs 0" as there is currently an issue with the runtime if
-# multiple matmuls are run in the same test. TODO(newling/nmeshram): Document
-# this issue.
 run_matmul_test \
     --name_prefix "multiple_matmuls" \
+    --lower_to_aie_pipeline "air" \
+    --tile_pipeline "pad-pack" \
     --lhs_rhs_type "i32" \
     --acc_type "i32" \
     --m "512,8,16" \
@@ -601,105 +552,28 @@ run_matmul_test \
     --num_repeat_runs "0"
 
 run_matmul_test \
-    --name_prefix "small" \
-    --lhs_rhs_type "i32" \
-    --acc_type "i32" \
-    --m "16"  --n "16" --k "8"
-
-run_matmul_test \
-    --name_prefix "small" \
-    --lhs_rhs_type "i32" \
-    --acc_type "i32" \
-    --m "8"  --n "32" --k "16"
-
-# Disabled until the following issue is resolved:
-# https://github.com/Xilinx/llvm-aie/issues/102
-# run_matmul_test \
-#     --name_prefix "small" \
-#     --lhs_rhs_type "i32" \
-#     --acc_type "i32" \
-#     --m "9"  --n "7" --k "16"
-
-run_matmul_test \
-    --name_prefix "large" \
-    --lhs_rhs_type "i32" \
-    --acc_type "i32" \
-    --m "64"  --n "64" --k "128"
-
-run_matmul_test \
-    --name_prefix "large" \
-    --lhs_rhs_type "i32" \
-    --acc_type "i32" \
-    --m "512"  --n "512" --k "512"
-
-run_matmul_test \
-    --name_prefix "int8" \
-    --lhs_rhs_type "i8" \
-    --acc_type "i32" \
-    --m "64"  --n "64" --k "64"
-
-run_matmul_test \
-    --name_prefix "bf16_2304" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "128"  --n "128" --k "2304"
+  --name_prefix "transpose_i8_i32" \
+  --lower_to_aie_pipeline "air" \
+  --tile_pipeline "pad-pack" \
+  --lhs_rhs_type "i8" \
+  --acc_type "i32" \
+  --m "16" --n "32" --k "64" \
+  --do_transpose_rhs "1"
 
 run_matmul_test \
-    --name_prefix "packPeel" \
+    --name_prefix "packPeel_i32" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "i32" \
     --acc_type "i32" \
     --m "64"  --n "64" --k "128"
 
-# We're seeing intermittent numerical errors in these 3 tests,
-# needs investigation. TODO(newling/yzhang93): Add more info.
-# Appears to be only pack-peel pipeline with bf16->f32.
-# Using 'num_repeat_runs=0' flag to avoid running the numerical test.
-#################################################################
-
-
-# TODO: compilation error with the below test.
-#
-# error: 'aie.dma_bd' op Cannot give more than 3 dimensions for step sizes and wraps in this  tile (got 4 dimensions).
-#
-# The config generated with the current strategy is:
-#
-# packing_config = #amdaie.packing_config<packing_config =
-#   [{packedSizes = [64, 64, 64],
-#     transposePackIndices = [1],
-#     unpackEmpty = [false],
-#     innerPerm = [[1, 0]],
-#     outerPerm = [[0, 1]]},
-#     {
-#       packedSizes = [0, 0, 0, 4, 4, 8],
-#       transposePackIndices = [0, 1, 2],
-#       unpackEmpty = [false, false, true],
-#       innerPerm = [[0, 1], [1, 0], [0, 1]],
-#       outerPerm = [[0, 1, 3, 2], [0, 1, 3, 2], [0, 1, 3, 2]]}]>
-#     }
-run_matmul_test \
-    --name_prefix "packPeel" \
-    --tile_pipeline "pack-peel" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "64"  --n "64" --k "128" \
-    --num_repeat_runs "0"
-
 run_matmul_test \
-    --name_prefix "packPeelLarge" \
+    --name_prefix "packPeel_bf16" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "bf16" \
     --acc_type "f32" \
     --m "512"  --n "512" --k "512"
 
-run_matmul_test \
-    --name_prefix "packPeel2304" \
-    --tile_pipeline "pack-peel" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "128"  --n "128" --k "2304"
-
-
 run_matmul_test \
   --name_prefix "packPeel_t_bf16" \
   --tile_pipeline "pack-peel" \
@@ -708,56 +582,6 @@ run_matmul_test \
   --m "128" --n "256" --k "512" \
   --do_transpose_rhs "1"
 
-###################################################################
-
-run_matmul_test \
-    --name_prefix "mm2" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "308"  --k "9728" --n "2432"
-
-run_matmul_test \
-    --name_prefix "mm3" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "308"  --k "2432" --n "2432"
-
-run_matmul_test \
-     --name_prefix "mm4" \
-     --lhs_rhs_type "bf16" \
-     --acc_type "f32" \
-     --m "308"  --k "2432" --n "7296"
-
-run_matmul_test \
-     --name_prefix "mm5" \
-     --lhs_rhs_type "bf16" \
-     --acc_type "f32" \
-     --m "8192" --k "2432" --n "9728"
-
-run_matmul_test \
-    --name_prefix "mm6" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "308"  --k "2432" --n "9728"
-
-run_matmul_test \
-    --name_prefix "mm7" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "8192" --k "2432" --n "2432"
-
-run_matmul_test \
-     --name_prefix "mm8" \
-     --lhs_rhs_type "bf16" \
-     --acc_type "f32" \
-     --m "8192" --k "9728" --n "2432"
-
-run_matmul_test \
-    --name_prefix "mm9" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --m "8192" --k "2432" --n "7296"
-
 ###################################################################
 # ObjectFifo Matmul tests
 ###################################################################
@@ -794,15 +618,15 @@ i32_shapes_medium=(
 )
 
 run_matmul_test_on_shapes ${i32_shapes_small[@]} \
-    --name_prefix "small" \
+    --name_prefix "small_i32" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "i32" \
     --acc_type "i32" \
     --num_repeat_runs "10"
 
 run_matmul_test_on_shapes ${i32_shapes_medium[@]} \
-    --name_prefix "medium" \
+    --name_prefix "medium_i32" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "i32" \
@@ -834,15 +658,15 @@ bf16_ukernel_shapes_medium=(
 )
 
 run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \
-    --name_prefix "small" \
+    --name_prefix "small_bf16" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "bf16" \
     --acc_type "f32" \
     --num_repeat_runs "2"
 
 run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \
-    --name_prefix "medium" \
+    --name_prefix "medium_bf16" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "bf16" \
@@ -851,23 +675,23 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \
 
 # i8 Matmul tests.
 run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \
-    --name_prefix "small" \
+    --name_prefix "small_i8" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "i8" \
     --acc_type "i32" \
     --num_repeat_runs "2"
 
 run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \
-    --name_prefix "medium" \
+    --name_prefix "medium_i8" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "i8" \
     --acc_type "i32" \
     --num_repeat_runs "2"
 
 run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \
-    --name_prefix "small" \
+    --name_prefix "small_ukern" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "bf16" \
@@ -876,7 +700,7 @@ run_matmul_test_on_shapes ${bf16_ukernel_shapes_small[@]} \
     --use_ukernel "1"
 
 run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \
-    --name_prefix "medium" \
+    --name_prefix "medium_ukern" \
     --lower_to_aie_pipeline "objectFifo" \
     --tile_pipeline "pack-peel" \
     --lhs_rhs_type "bf16" \

diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAddLoweringStrategy.cpp
@@ -64,13 +64,10 @@ void AMDAIELoweringStrategyPass::runOnOperation() {
     }
   }
 
-  // To simplify development, the number of cores can be passed as a flag during
-  // compilation. In the future these parameters could be read from file.
-  struct AIEConfig cfg = {numCores};
   for (auto funcOp : moduleOp.getOps<FunctionOpInterface>()) {
     // Set the strategy with default heuristics.
     if (failed(initAIELaunchConfig(funcOp, usePassPipeline,
-                                   useLowerToAIEPipeline, cfg))) {
+                                   useLowerToAIEPipeline))) {
       funcOp.emitOpError("failed to have a lowering configuration set for it.");
       return signalPassFailure();
     }