Skip to content

Commit

Permalink
for test
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhang93 committed Sep 23, 2024
1 parent 326b930 commit 514eca5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 109 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ static LogicalResult createNewAddressing(
static LogicalResult transferDmaAddressing(MLIRContext *ctx,
AMDAIE::ConnectionOp connectionOp) {
IRRewriter rewriter(ctx);
OpBuilder::InsertionGuard guard(rewriter);

FailureOr<AMDAIE::NpuCircularDmaCpyNdOp> maybeNpuDmaUserOp =
connectionOp.getNpuCircularDmaCpyNdUser();
Expand All @@ -263,8 +264,11 @@ static LogicalResult transferDmaAddressing(MLIRContext *ctx,
circularDma.getTargetMixedStrides();

// Change the source/target addressing of all users from a connection op.
for (Operation *user : connectionOp->getUsers()) {
llvm::SmallVector<Operation *> users(connectionOp->getUsers());
assert(users.size() == 3 && "Expect 3 users");
for (Operation *user : users) {
if (auto dmaOp = dyn_cast<AMDAIE::NpuDmaCpyNdOp>(user)) {
OpBuilder::InsertionGuard guard(rewriter);
SmallVector<OpFoldResult> srcOffsets = dmaOp.getSourceMixedOffsets();
SmallVector<OpFoldResult> srcSizes = dmaOp.getSourceMixedSizes();
SmallVector<OpFoldResult> srcStrides = dmaOp.getSourceMixedStrides();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,54 +1,4 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-transfer-strided-access-pattern))" --split-input-file %s --verify-diagnostics | FileCheck %s

// CHECK-LABEL: @single_dma_l3_source
// CHECK: %[[APPLY:.+]] = affine.apply
// CHECK: amdaie.npu.circular_dma_cpy_nd %{{.*}}([0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1], [] [] [])
// CHECK: amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 0, %[[APPLY]]] [4, 32, 64] [4096, 128, 1])
#map = affine_map<(d0) -> (d0 * 64)>
module {
func.func @single_dma_l3_source(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<128x128xi32>>) {
amdaie.workgroup {
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<128x128xi32>>)
amdaie.controlcode {
scf.forall (%arg2, %arg3) in (2, 2) {
%1 = affine.apply #map(%arg3)
%2 = amdaie.npu.circular_dma_cpy_nd %0([0] [2048] [1], [] [] [])
%3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
amdaie.npu.dma_wait(%3, MM2S)
}
amdaie.end
}
}
return
}
}

// -----

// CHECK-LABEL: @single_dma_l3_target
// CHECK: %[[APPLY:.+]] = affine.apply
// CHECK: amdaie.npu.circular_dma_cpy_nd %{{.*}}([] [] [], [0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1])
// CHECK: amdaie.npu.dma_cpy_nd %{{.*}}([0, 0, %[[APPLY]]] [4, 32, 64] [4096, 128, 1], [] [] [])
#map = affine_map<(d0) -> (d0 * 64)>
module {
func.func @single_dma_l3_target(%arg0: !amdaie.logicalobjectfifo<memref<128x128xi32>>, %arg1: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>) {
amdaie.workgroup {
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<128x128xi32>>, !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>)
amdaie.controlcode {
scf.forall (%arg2, %arg3) in (2, 2) {
%1 = affine.apply #map(%arg3)
%2 = amdaie.npu.circular_dma_cpy_nd %0([] [] [], [0] [2048] [1])
%3 = amdaie.npu.dma_cpy_nd %0([0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1], [] [] [])
amdaie.npu.dma_wait(%3, S2MM)
}
amdaie.end
}
}
return
}
}

// -----
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-transfer-strided-access-pattern))" --split-input-file %s | FileCheck %s

// CHECK-LABEL: @multiple_dma_l3_source
// CHECK: %[[APPLY:.+]] = affine.apply
Expand All @@ -58,6 +8,9 @@ module {
#map = affine_map<(d0) -> (d0 * 64)>
module {
func.func @multiple_dma_l3_source(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<256x128xi32>>) {
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
amdaie.workgroup {
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<256x128xi32>>)
amdaie.controlcode {
Expand All @@ -66,8 +19,10 @@ module {
%2 = amdaie.npu.circular_dma_cpy_nd %0([0] [2048] [1], [] [] [])
%3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, %1] [2, 32, 32] [32, 128, 1])
amdaie.npu.dma_wait(%3, MM2S)
%4 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 224, %1] [2, 32, 32] [32, 128, 1])
amdaie.npu.dma_wait(%4, MM2S)
scf.for %arg4 = %c0 to %c2 step %c1 {
%4 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 224, %1] [2, 32, 32] [32, 128, 1])
amdaie.npu.dma_wait(%4, MM2S)
}
}
amdaie.end
}
Expand Down Expand Up @@ -102,58 +57,3 @@ module {
return
}
}

// -----

// This test is supposed not to have any change, because the L2 addressing is not linear.
// CHECK-LABEL: @no_transfer_l2_not_linear
// CHECK: %[[APPLY:.+]] = affine.apply
// CHECK: amdaie.npu.circular_dma_cpy_nd %{{.*}}([0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1], [] [] [])
// CHECK: amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 0, 0, %[[APPLY]]] [4, 2, 32, 32] [4096, 32, 128, 1])
#map = affine_map<(d0) -> (d0 * 64)>
module {
func.func @no_transfer_l2_not_linear(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<128x128xi32>>) {
amdaie.workgroup {
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<128x128xi32>>)
amdaie.controlcode {
scf.forall (%arg2, %arg3) in (2, 2) {
%1 = affine.apply #map(%arg3)
%2 = amdaie.npu.circular_dma_cpy_nd %0([0, 0, 0, 0] [4, 32, 2, 32] [2048, 32, 1024, 1], [] [] [])
%3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
amdaie.npu.dma_wait(%3, MM2S)
}
amdaie.end
}
}
return
}
}

// -----

// This test is supposed not to have any change, because the one of the L3 addressing is not combinable.
// CHECK-LABEL: @no_transfer_l3_not_combinable
// CHECK: %[[APPLY:.+]] = affine.apply
// CHECK: amdaie.npu.circular_dma_cpy_nd %{{.*}}([0] [2048] [1], [] [] [])
// CHECK: amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 0, 0, %[[APPLY]]] [4, 2, 32, 32] [4096, 32, 128, 1])
// CHECK: amdaie.npu.dma_cpy_nd %{{.*}}([] [] [], [0, 32, 0, %[[APPLY]]] [4, 2, 32, 32] [4096, 32, 128, 1])
#map = affine_map<(d0) -> (d0 * 64)>
module {
func.func @no_transfer_l3_not_combinable(%arg0: !amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, %arg1: !amdaie.logicalobjectfifo<memref<128x128xi32>>) {
amdaie.workgroup {
%0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo<memref<2048xi32, 1 : i32>, 2>, !amdaie.logicalobjectfifo<memref<128x128xi32>>)
amdaie.controlcode {
scf.forall (%arg2, %arg3) in (2, 2) {
%1 = affine.apply #map(%arg3)
%2 = amdaie.npu.circular_dma_cpy_nd %0([0] [2048] [1], [] [] [])
%3 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 0, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
amdaie.npu.dma_wait(%3, MM2S)
%4 = amdaie.npu.dma_cpy_nd %0([] [] [], [0, 32, 0, %1] [4, 2, 32, 32] [4096, 32, 128, 1])
amdaie.npu.dma_wait(%3, MM2S)
}
amdaie.end
}
}
return
}
}

0 comments on commit 514eca5

Please sign in to comment.