From b7fbe2b9de8c3d2a485c42433b40b1151df5dc23 Mon Sep 17 00:00:00 2001
From: Avimitin <dev@avimit.in>
Date: Wed, 12 Jul 2023 16:54:19 +0800
Subject: [PATCH] [examples][MLIRSparseTensor] Add example to show how sparse
 vectorization rewrite ForOp

Signed-off-by: Avimitin <dev@avimit.in>
---
 examples/MLIRSparseTensor/makefile            | 21 ++++++++
 .../sparse-tensor-vectorization.mlir          | 52 +++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 examples/MLIRSparseTensor/sparse-tensor-vectorization.mlir
diff --git a/examples/MLIRSparseTensor/makefile b/examples/MLIRSparseTensor/makefile
index 6afd5f5e51..f31e3adf19 100644
--- a/examples/MLIRSparseTensor/makefile
+++ b/examples/MLIRSparseTensor/makefile
@@ -105,3 +105,24 @@ sparse-tensor-expand-lower:
 		--linalg-generalize-named-ops \
 		--linalg-fuse-elementwise-ops \
 		--sparsification -o log.mlir
+
+# This target will show the original for-loop without vectorization,
+# which is useful to compare with the vectorized version.
+sparse-tensor-vectorization-linalg-lower:
+	@${MLIR_OPT} ./sparse-tensor-vectorization.mlir \
+		--linalg-generalize-named-ops \
+		--linalg-fuse-elementwise-ops \
+		--sparsification \
+		-o log.mlir
+sparse-tensor-vectorization-lower:
+	@${MLIR_OPT} ./sparse-tensor-vectorization.mlir \
+		--sparsification --cse \
+		--sparse-vectorization="vl=16" --cse \
+		-o log.mlir
+# This example is used for code verification only, as there is currently no ARMSVE machine for us to run the code on.
+# Do the same run, but with VLA enable
+sparse-tensor-vla-vectorization-lower:
+	@${MLIR_OPT} ./sparse-tensor-vectorization.mlir \
+		--sparsification --cse \
+		--sparse-vectorization="vl=16 enable-vla-vectorization=true" --cse \
+		-o log.mlir
diff --git a/examples/MLIRSparseTensor/sparse-tensor-vectorization.mlir b/examples/MLIRSparseTensor/sparse-tensor-vectorization.mlir
new file mode 100644
index 0000000000..5ed3c14bc0
--- /dev/null
+++ b/examples/MLIRSparseTensor/sparse-tensor-vectorization.mlir
@@ -0,0 +1,52 @@
+#SparseVector = #sparse_tensor.encoding<{
+  dimLevelType = ["compressed"]
+}>
+
+#trait_mul = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>,  // b
+    affine_map<(i) -> (i)>   // x (out)
+  ],
+  iterator_types = ["parallel"],
+  doc = "x(i) = a(i) * b(i)"
+}
+
+// Example for parallel loop vectorization
+func.func @sparse_mul(%arga: tensor<1024xf32, #SparseVector>,
+                      %argb: tensor<1024xf32>,
+                      %argx: tensor<1024xf32>) -> tensor<1024xf32> {
+  %0 = linalg.generic #trait_mul
+    ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>)
+    outs(%argx: tensor<1024xf32>) {
+      ^bb(%a: f32, %b: f32, %x: f32):
+        %0 = arith.mulf %a, %b : f32
+        linalg.yield %0 : f32
+  } -> tensor<1024xf32>
+  return %0 : tensor<1024xf32>
+}
+
+#trait_reduction = {
+  indexing_maps = [
+    affine_map<(i) -> (i)>,  // a
+    affine_map<(i) -> (i)>,  // b
+    affine_map<(i) -> ()>    // x (out)
+  ],
+  iterator_types = ["reduction"],
+  doc = "x += a(i) * b(i)"
+}
+
+// Example for reduction loop vectorization
+func.func @sparse_reduction(%arga: tensor<1024xf32, #SparseVector>,
+                            %argb: tensor<1024xf32>,
+                            %argx: tensor<f32>) -> tensor<f32> {
+  %0 = linalg.generic #trait_reduction
+    ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>)
+    outs(%argx: tensor<f32>) {
+      ^bb(%a: f32, %b: f32, %x: f32):
+        %0 = arith.mulf %a, %b : f32
+        %1 = arith.addf %x, %0 : f32
+        linalg.yield %1 : f32
+  } -> tensor<f32>
+  return %0 : tensor<f32>
+}