opencompl · superlopuh · Oct 3, 2024 · Sep 2, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/Snakefile b/Snakefile
@@ -653,6 +653,7 @@ rule kernel_generate_data_c:
     input:
         json="kernels/{kernel}/{shape}/params.json",
         h="kernels/{kernel}/{shape}/data.h",
+        gendata="kernels/{kernel}/gendata.py"
     output:
         "kernels/{kernel}/{shape}/data.c",
     wildcard_constraints:

diff --git a/kernels/pooling_nchw_max_d1_s2_3x3/baseline.c.template b/kernels/pooling_nchw_max_d1_s2_3x3/baseline.c.template
@@ -3,10 +3,10 @@
 #include <stdint.h>
 
 void pooling_nchw_max_d1_s2_3x3(const double* restrict x, double* restrict y) {
-    for (int row = 0; row < H - 3 + 1; row += 2) {
-        for (int col = 0; col < W - 3 + 1; col += 2) {
-            int y_row = row / 2;
-            int y_col = col / 2;
+    for (int row = 0; row < H - 3 + 1; row += 1) {
+        for (int col = 0; col < W - 3 + 1; col += 1) {
+            int y_row = row / 1;
+            int y_col = col / 1;
             int y_index = (y_row * NEW_W) + y_col;
             // Load initial value in y
             double max_value = -10000.0;

diff --git a/kernels/pooling_nchw_max_d1_s2_3x3/data.h.template b/kernels/pooling_nchw_max_d1_s2_3x3/data.h.template
@@ -2,8 +2,8 @@
 
 #define N 1
 #define C 1
-#define H {{(M - 1) * 2 + 3 + 1}}
-#define W {{(N - 1) * 2 + 3 + 1}}
+#define H {{(M - 1) * 1 + 3 + 1}}
+#define W {{(N - 1) * 1 + 3 + 1}}
 #define NEW_H {{M}}
 #define NEW_W {{N}}
 

diff --git a/kernels/pooling_nchw_max_d1_s2_3x3/gendata.py b/kernels/pooling_nchw_max_d1_s2_3x3/gendata.py
@@ -16,7 +16,7 @@ def sum_pool_data(
 
     # Define the pooling parameters
     pool_size = (3, 3)
-    stride = 2
+    stride = 1
 
     new_h = M
     new_w = N

diff --git a/kernels/pooling_nchw_max_d1_s2_3x3/linalg.mlir.template b/kernels/pooling_nchw_max_d1_s2_3x3/linalg.mlir.template
@@ -1,7 +1,7 @@
-  func.func public @pooling_nchw_max_d1_s2_3x3(%X : tensor<1x1x{{(M - 1) * 2 + 3 + 1}}x{{(N - 1) * 2 + 3 + 1}}xf64> {"llvm.noalias"}, %Y : tensor<1x1x{{M}}x{{N}}xf64> {"llvm.noalias"}) -> tensor<1x1x{{M}}x{{N}}xf64> {
+  func.func public @pooling_nchw_max_d1_s2_3x3(%X : tensor<1x1x{{(M - 1) * 1 + 3 + 1}}x{{(N - 1) * 1 + 3 + 1}}xf64> {"llvm.noalias"}, %Y : tensor<1x1x{{M}}x{{N}}xf64> {"llvm.noalias"}) -> tensor<1x1x{{M}}x{{N}}xf64> {
     %min_val = arith.constant -10000.0 : f64
     %zeros = linalg.fill ins(%min_val : f64) outs(%Y : tensor<1x1x{{M}}x{{N}}xf64>) -> tensor<1x1x{{M}}x{{N}}xf64>
     %kernel = tensor.empty() : tensor<3x3xf64>
-    %res = linalg.pooling_nchw_max {"dilations" = dense<1> : vector<2xi64>, "strides" = dense<2> : vector<2xi64>} ins(%X, %kernel : tensor<1x1x{{(M - 1) * 2 + 3 + 1}}x{{(N - 1) * 2 + 3 + 1}}xf64>, tensor<3x3xf64>) outs(%zeros : tensor<1x1x{{M}}x{{N}}xf64>) -> tensor<1x1x{{M}}x{{N}}xf64>
+    %res = linalg.pooling_nchw_max {"dilations" = dense<1> : vector<2xi64>, "strides" = dense<1> : vector<2xi64>} ins(%X, %kernel : tensor<1x1x{{(M - 1) * 1 + 3 + 1}}x{{(N - 1) * 1 + 3 + 1}}xf64>, tensor<3x3xf64>) outs(%zeros : tensor<1x1x{{M}}x{{N}}xf64>) -> tensor<1x1x{{M}}x{{N}}xf64>
     func.return %res : tensor<1x1x{{M}}x{{N}}xf64>
   }
diff --git a/kernels/pooling_nchw_sum_d1_s2_3x3/baseline.c.template b/kernels/pooling_nchw_sum_d1_s2_3x3/baseline.c.template
@@ -3,12 +3,12 @@
 #include <stdint.h>
 
 void pooling_nchw_sum_d1_s2_3x3(const double* restrict x, double* restrict y) {
-    for (int row = 0; row < H - 3 + 1; row += 2) {
-        for (int col = 0; col < W - 3 + 1; col += 2) {
+    for (int row = 0; row < H - 3 + 1; row += 1) {
+        for (int col = 0; col < W - 3 + 1; col += 1) {
             for (int i = 0; i < N; i++) {
                 for (int j = 0; j < C; j++) {
-                    int y_row = row / 2;
-                    int y_col = col / 2;
+                    int y_row = row / 1;
+                    int y_col = col / 1;
                     int y_index = (i * (C * NEW_H * NEW_W)) + (j * (NEW_H * NEW_W)) +
                                   (y_row * NEW_W) + y_col;
                     double sum = 0.0;

diff --git a/kernels/pooling_nchw_sum_d1_s2_3x3/data.h.template b/kernels/pooling_nchw_sum_d1_s2_3x3/data.h.template
@@ -2,8 +2,8 @@
 
 #define N 1
 #define C 1
-#define H {{(M - 1) * 2 + 3 + 1}}
-#define W {{(N - 1) * 2 + 3 + 1}}
+#define H {{(M - 1) * 1 + 3 + 1}}
+#define W {{(N - 1) * 1 + 3 + 1}}
 #define NEW_H {{M}}
 #define NEW_W {{N}}
 

diff --git a/kernels/pooling_nchw_sum_d1_s2_3x3/gendata.py b/kernels/pooling_nchw_sum_d1_s2_3x3/gendata.py
@@ -16,7 +16,7 @@ def sum_pool_data(
 
     # Define the pooling parameters
     pool_size = (3, 3)
-    stride = 2
+    stride = 1
 
     new_h = M
     new_w = N

diff --git a/kernels/pooling_nchw_sum_d1_s2_3x3/linalg.mlir.template b/kernels/pooling_nchw_sum_d1_s2_3x3/linalg.mlir.template
@@ -1,7 +1,7 @@
-  func.func public @pooling_nchw_sum_d1_s2_3x3(%X : tensor<1x1x{{(M - 1) * 2 + 3 + 1}}x{{(N - 1) * 2 + 3 + 1}}xf64> {"llvm.noalias"}, %Y : tensor<1x1x{{M}}x{{N}}xf64> {"llvm.noalias"}) -> tensor<1x1x{{M}}x{{N}}xf64> {
+  func.func public @pooling_nchw_sum_d1_s2_3x3(%X : tensor<1x1x{{(M - 1) * 1 + 3 + 1}}x{{(N - 1) * 1 + 3 + 1}}xf64> {"llvm.noalias"}, %Y : tensor<1x1x{{M}}x{{N}}xf64> {"llvm.noalias"}) -> tensor<1x1x{{M}}x{{N}}xf64> {
     %zero = arith.constant 0.0 : f64
     %zeros = linalg.fill ins(%zero : f64) outs(%Y : tensor<1x1x{{M}}x{{N}}xf64>) -> tensor<1x1x{{M}}x{{N}}xf64>
     %kernel = tensor.empty() : tensor<3x3xf64>
-    %res = linalg.pooling_nchw_sum {"dilations" = dense<1> : vector<2xi64>, "strides" = dense<2> : vector<2xi64>} ins(%X, %kernel : tensor<1x1x{{(M - 1) * 2 + 3 + 1}}x{{(N - 1) * 2 + 3 + 1}}xf64>, tensor<3x3xf64>) outs(%zeros : tensor<1x1x{{M}}x{{N}}xf64>) -> tensor<1x1x{{M}}x{{N}}xf64>
+    %res = linalg.pooling_nchw_sum {"dilations" = dense<1> : vector<2xi64>, "strides" = dense<1> : vector<2xi64>} ins(%X, %kernel : tensor<1x1x{{(M - 1) * 1 + 3 + 1}}x{{(N - 1) * 1 + 3 + 1}}xf64>, tensor<3x3xf64>) outs(%zeros : tensor<1x1x{{M}}x{{N}}xf64>) -> tensor<1x1x{{M}}x{{N}}xf64>
     func.return %res : tensor<1x1x{{M}}x{{N}}xf64>
   }