Add matmul_transpose_b tests to run.py

nod-ai · Dec 13, 2024 · 93065a1 · 93065a1
1 parent 33d19a2
commit 93065a1
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 31 deletions.
diff --git a/build_tools/ci/cpu_comparison/matmul_template/matmul_transpose_b_MxK_NxK.mlir b/build_tools/ci/cpu_comparison/matmul_template/matmul_transpose_b_MxK_NxK.mlir
@@ -0,0 +1,12 @@
+// input ${M}x${K}x${TYPE1}
+// input ${N}x${K}x${TYPE1}
+
+func.func @matmul(%arg0: tensor<${M}x${K}x${TYPE1}>, %arg1: tensor<${N}x${K}x${TYPE1}>) -> tensor<${M}x${N}x${TYPE2}>
+{
+  %cst = arith.constant ${ZERO} : ${TYPE2}
+  %0 = tensor.empty() : tensor<${M}x${N}x${TYPE2}>
+  %1 = linalg.fill ins(%cst : ${TYPE2}) outs(%0 : tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}>
+  %2 = linalg.matmul_transpose_b ins(%arg0, %arg1 : tensor<${M}x${K}x${TYPE1}>, tensor<${N}x${K}x${TYPE1}>)
+    outs(%1: tensor<${M}x${N}x${TYPE2}>) -> tensor<${M}x${N}x${TYPE2}>
+  return %2: tensor<${M}x${N}x${TYPE2}>
+}
diff --git a/build_tools/ci/cpu_comparison/run.py b/build_tools/ci/cpu_comparison/run.py
@@ -404,6 +404,43 @@ def _execute(self, config):
         return self.benchmark(config)
 
 
+class MatmulTransposeB(BaseMatmul):
+    """
+    A test of the form matmul_transpose_b(A,B) where A:MxK, B:NxK
+    """
+
+    def __init__(
+        self,
+        M,
+        N,
+        K,
+        input_type,
+        acc_type,
+        use_ukernel=False,
+        run_on_target=["npu1_4col"],
+    ):
+        super().__init__(
+            run_on_target=run_on_target,
+            aie_compilation_flags=None,
+            M=M,
+            N=N,
+            K=K,
+            input_type=input_type,
+            acc_type=acc_type,
+        )
+        self.labels.append("MatmulTransposeB")
+
+        self.name = f"matmul_transpose_b_{M}_{N}_{K}_{input_type}_{acc_type}"
+
+    def _execute(self, config):
+        matmul_template_dir = config.file_dir / "matmul_template"
+        template_name = matmul_template_dir / "matmul_transpose_b_MxK_NxK.mlir"
+        self.generate(config, template_name)
+        self.vs_cpu(config)
+
+        return True
+
+
 class MatmulThinBias(BaseMatmul):
     """
     A test of the form matmul(A,B) + C where A:MxK, B:KxN, C:N
@@ -1400,6 +1437,15 @@ def __init__(self):
         self.register(MatmulThinBias(1024, 1024, 512, "bf16", "f32", use_ukernel=True))
         self.register(MatmulThinBias(1024, 1024, 512, "bf16", "f32"))
 
+        # MatmulFullBias test:
+        self.register(MatmulFullBias(128, 128, 256, "i32", "i32"))
+
+        # MatmulTransposeB test(s):
+        for input_type, acc_type in zip(["i8", "bf16"], ["i32", "f32"]):
+            self.register(MatmulTransposeB(32, 32, 32, input_type, acc_type))
+            self.register(MatmulTransposeB(128, 256, 128, input_type, acc_type))
+            self.register(MatmulTransposeB(1536, 1536, 2048, input_type, acc_type))
+
         # Matmul test(s):
         self.register(
             Matmul(
@@ -1659,9 +1705,6 @@ def __init__(self):
         for name in ["two_matmul_switching", "matmul_f32_8_8_4", "matmul_f32_8_4_8"]:
             self.register(MultipleDispatches(name))
 
-        # MatmulFullBias test:
-        self.register(MatmulFullBias(128, 128, 256, "i32", "i32"))
-
         # Convolution NHCWQ test:
         self.register(ConvolutionNHWCQ())
 
@@ -1722,7 +1765,7 @@ def all_tests(
        that directory.
 
     3) create a new matmul template in `./matmul_template`, for example if you
-       want to add a new variant with tranposed operands or unary elementwise
+       want to add a new variant with transposed operands or unary elementwise
        operations.
 
     4) create a new template generator, duplicating the directory structure of

diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
@@ -629,6 +629,8 @@ run_matmul_test_on_shapes ${i32_shapes_medium[@]} \
     --acc_type "i32" \
     --num_repeat_runs "2"
 
+# bf16 Matmul tests.
+
 bf16_i8_shapes_small=(
   '64x64x64'
   '128x256x128'
@@ -641,7 +643,7 @@ bf16_i8_shapes_medium=(
   '4096x2048x4096'
 )
 
-# bf16 Matmul tests.
+
 run_matmul_test_on_shapes ${bf16_i8_shapes_small[@]} \
     --name_prefix "small_bf16" \
     --lower_to_aie_pipeline "objectFifo" \
@@ -675,32 +677,6 @@ run_matmul_test_on_shapes ${bf16_i8_shapes_medium[@]} \
     --acc_type "i32" \
     --num_repeat_runs "2"
 
-# matmul_transpose_b tests.
-transpose_shapes=(
-  '64x64x64'
-  '128x32x256'
-  '512x128x256'
-  '1536x2048x1536'
-)
-
-run_matmul_test_on_shapes ${transpose_shapes[@]} \
-    --name_prefix "transpose_bf16" \
-    --lower_to_aie_pipeline "objectFifo" \
-    --tile_pipeline "pack-peel" \
-    --lhs_rhs_type "bf16" \
-    --acc_type "f32" \
-    --num_repeat_runs "2" \
-    --do_transpose_rhs "1"
-
-run_matmul_test_on_shapes ${transpose_shapes[@]} \
-    --name_prefix "transpose_i8" \
-    --lower_to_aie_pipeline "objectFifo" \
-    --tile_pipeline "pack-peel" \
-    --lhs_rhs_type "i8" \
-    --acc_type "i32" \
-    --num_repeat_runs "2" \
-    --do_transpose_rhs "1"
-
 
 # note this will not actually show any devices because --xrt_lite_n_core_rows --xrt_lite_n_core_cols are not passed
 # which i have omitted to make the conditional slightly more succinct