for test

nod-ai · Sep 23, 2024 · 514eca5 · 514eca5
1 parent 326b930
commit 514eca5
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 109 deletions.
diff --git a/...ler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETransferStridedAccessPattern.cpp b/...ler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIETransferStridedAccessPattern.cpp
@@ -240,6 +240,7 @@ static LogicalResult createNewAddressing(
 static LogicalResult transferDmaAddressing(MLIRContext *ctx,
                                            AMDAIE::ConnectionOp connectionOp) {
   IRRewriter rewriter(ctx);
+  OpBuilder::InsertionGuard guard(rewriter);
 
   FailureOr<AMDAIE::NpuCircularDmaCpyNdOp> maybeNpuDmaUserOp =
       connectionOp.getNpuCircularDmaCpyNdUser();
@@ -263,8 +264,11 @@ static LogicalResult transferDmaAddressing(MLIRContext *ctx,
       circularDma.getTargetMixedStrides();
 
   // Change the source/target addressing of all users from a connection op.
-  for (Operation *user : connectionOp->getUsers()) {
+  llvm::SmallVector<Operation *> users(connectionOp->getUsers());
+  assert(users.size() == 3 && "Expect 3 users");
+  for (Operation *user : users) {
     if (auto dmaOp = dyn_cast<AMDAIE::NpuDmaCpyNdOp>(user)) {
+      OpBuilder::InsertionGuard guard(rewriter);
       SmallVector<OpFoldResult> srcOffsets = dmaOp.getSourceMixedOffsets();
       SmallVector<OpFoldResult> srcSizes = dmaOp.getSourceMixedSizes();
       SmallVector<OpFoldResult> srcStrides = dmaOp.getSourceMixedStrides();

diff --git a/.../plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/transfer_strided_access_pattern.mlir b/.../plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/transfer_strided_access_pattern.mlir
@@ -1,54 +1,4 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-transfer-strided-access-pattern))" --split-input-file %s --verify-diagnostics | FileCheck %s
-
-// CHECK-LABEL: @single_dma_l3_source
-// CHECK:       %[[APPLY:.+]] = affine.apply
-// CHECK:       amdaie.npu.circular_dma_cpy_nd %{{.*}}([0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1], [] [] [])
-// CHECK:       amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 0, %[[APPLY]]] [4, 32, 64] [4096, 128, 1])
-#map = affine_map<(d0) -> (d0 * 64)>
-module {
-  func.func @single_dma_l3_source(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<128x128xi32>>) {
-    amdaie.workgroup {
-      %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<128x128xi32>>)
-      amdaie.controlcode {
-        scf.forall (%arg2, %arg3) in (2, 2) {
-          %1 = affine.apply #map(%arg3)
-          %2 = amdaie.npu.circular_dma_cpy_nd %0([0] [2048] [1], [] [] [])
-          %3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
-          amdaie.npu.dma_wait(%3, MM2S)
-        }
-        amdaie.end
-      }
-    }
-    return
-  }
-}
-
-// -----
-
-// CHECK-LABEL: @single_dma_l3_target
-// CHECK:       %[[APPLY:.+]] = affine.apply
-// CHECK:       amdaie.npu.circular_dma_cpy_nd %{{.*}}([] [] [], [0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1])
-// CHECK:       amdaie.npu.dma_cpy_nd %{{.*}}([0, 0, %[[APPLY]]] [4, 32, 64] [4096, 128, 1], [] [] [])
-#map = affine_map<(d0) -> (d0 * 64)>
-module {
-  func.func @single_dma_l3_target(%arg0: !amdaie.logicalobjectfifo<memref<128x128xi32>>, %arg1: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>) {
-    amdaie.workgroup {
-      %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<128x128xi32>>, !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>)
-      amdaie.controlcode {
-        scf.forall (%arg2, %arg3) in (2, 2) {
-          %1 = affine.apply #map(%arg3)
-          %2 = amdaie.npu.circular_dma_cpy_nd %0([] [] [], [0] [2048] [1])
-          %3 = amdaie.npu.dma_cpy_nd %0([0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1], [] [] [])
-          amdaie.npu.dma_wait(%3, S2MM)
-        }
-        amdaie.end
-      }
-    }
-    return
-  }
-}
-
-// -----
+// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-transfer-strided-access-pattern))" --split-input-file %s | FileCheck %s
 
 // CHECK-LABEL: @multiple_dma_l3_source
 // CHECK:       %[[APPLY:.+]] = affine.apply
@@ -58,6 +8,9 @@ module {
 #map = affine_map<(d0) -> (d0 * 64)>
 module {
   func.func @multiple_dma_l3_source(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<256x128xi32>>) {
+    %c1 = arith.constant 1 : index
+    %c0 = arith.constant 0 : index
+    %c2 = arith.constant 2 : index
     amdaie.workgroup {
       %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<256x128xi32>>)
       amdaie.controlcode {
@@ -66,8 +19,10 @@ module {
           %2 = amdaie.npu.circular_dma_cpy_nd %0([0] [2048] [1], [] [] [])
           %3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, %1] [2, 32, 32] [32, 128, 1])
           amdaie.npu.dma_wait(%3, MM2S)
-          %4 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 224, %1] [2, 32, 32] [32, 128, 1])
-          amdaie.npu.dma_wait(%4, MM2S)
+          scf.for %arg4 = %c0 to %c2 step %c1 {
+            %4 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 224, %1] [2, 32, 32] [32, 128, 1])
+            amdaie.npu.dma_wait(%4, MM2S)
+          }
         }
         amdaie.end
       }
@@ -102,58 +57,3 @@ module {
     return
   }
 }
-
-// -----
-
-// This test is supposed not to have any change, because the L2 addressing is not linear.
-// CHECK-LABEL: @no_transfer_l2_not_linear
-// CHECK:       %[[APPLY:.+]] = affine.apply
-// CHECK:       amdaie.npu.circular_dma_cpy_nd %{{.*}}([0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1], [] [] [])
-// CHECK:       amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 0, 0, %[[APPLY]]] [4, 2, 32, 32] [4096, 32, 128, 1])
-#map = affine_map<(d0) -> (d0 * 64)>
-module {
-  func.func @no_transfer_l2_not_linear(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<128x128xi32>>) {
-    amdaie.workgroup {
-      %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<128x128xi32>>)
-      amdaie.controlcode {
-        scf.forall (%arg2, %arg3) in (2, 2) {
-          %1 = affine.apply #map(%arg3)
-          %2 = amdaie.npu.circular_dma_cpy_nd %0([0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1], [] [] [])
-          %3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
-          amdaie.npu.dma_wait(%3, MM2S)
-        }
-        amdaie.end
-      }
-    }
-    return
-  }
-}
-
-// -----
-
-// This test is supposed not to have any change, because the one of the L3 addressing is not combinable.
-// CHECK-LABEL: @no_transfer_l3_not_combinable
-// CHECK:       %[[APPLY:.+]] = affine.apply
-// CHECK:       amdaie.npu.circular_dma_cpy_nd %{{.*}}([0] [2048] [1], [] [] [])
-// CHECK:       amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 0, 0, %[[APPLY]]] [4, 2, 32, 32] [4096, 32, 128, 1])
-// CHECK:       amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 32, 0, %[[APPLY]]] [4, 2, 32, 32] [4096, 32, 128, 1])
-#map = affine_map<(d0) -> (d0 * 64)>
-module {
-  func.func @no_transfer_l3_not_combinable(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<128x128xi32>>) {
-    amdaie.workgroup {
-      %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<128x128xi32>>)
-      amdaie.controlcode {
-        scf.forall (%arg2, %arg3) in (2, 2) {
-          %1 = affine.apply #map(%arg3)
-          %2 = amdaie.npu.circular_dma_cpy_nd %0([0] [2048] [1], [] [] [])
-          %3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
-          amdaie.npu.dma_wait(%3, MM2S)
-          %4 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 32, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
-          amdaie.npu.dma_wait(%3, MM2S)
-        }
-        amdaie.end
-      }
-    }
-    return
-  }
-}