From 489f3ca104d17eba8a01aea9d055c48611c96e7a Mon Sep 17 00:00:00 2001 From: Jeff Fifield Date: Thu, 7 Mar 2024 15:34:32 -0700 Subject: [PATCH] Fix write lock placement (#467) Place write locks a the start of the block instead of at dma op location --- mlir/lib/Conversion/AIRToAIEPass.cpp | 7 +++-- .../air_channel_to_locks_ping_pong.mlir | 8 ++--- .../AIRToAIE/air_shimcpy_to_aie.mlir | 20 +++++++------ .../AIRToAIE/air_shimcpy_to_aie2.mlir | 30 ++++++++++--------- ...air_shimcpy_to_aie2_with_shim_dma_bds.mlir | 6 ++-- .../air_shimcpy_to_aie_with_shim_dma_bds.mlir | 7 +++-- .../AIRToAIE/air_to_ipu_add_one.mlir | 10 +++---- .../async_gemm_w_pingpong_to_locks.mlir | 8 ++--- .../async_gemm_w_pingpong_to_locks_aie2.mlir | 8 ++--- .../AIRToAIE/lower_herd_air_regions.mlir | 2 +- 10 files changed, 57 insertions(+), 49 deletions(-) diff --git a/mlir/lib/Conversion/AIRToAIEPass.cpp b/mlir/lib/Conversion/AIRToAIEPass.cpp index f64e8dbb2..2da8bfbcb 100644 --- a/mlir/lib/Conversion/AIRToAIEPass.cpp +++ b/mlir/lib/Conversion/AIRToAIEPass.cpp @@ -2276,7 +2276,8 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { int64_t lockAqValue = -1; int64_t lockRelValue = -1; Value alloc = nullptr; - if (isTileInbound(memcpyOpIf, (int)air::MemorySpace::L1)) { + auto tileInbound = isTileInbound(memcpyOpIf, (int)air::MemorySpace::L1); + if (tileInbound) { lockAqValue = isAIE2 ? 1 : 1; lockRelValue = isAIE2 ? 1 : 0; alloc = memcpyOpIf.getDstMemref(); @@ -2288,8 +2289,10 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase { if (auto bco = dyn_cast(alloc.getDefiningOp())) builder.setInsertionPoint(bco.getOperand().getDefiningOp()); - else if (auto a = dyn_cast(alloc.getDefiningOp())) + else if (isa(alloc.getDefiningOp())) builder.setInsertionPoint(alloc.getDefiningOp()); + else if (!tileInbound && isa(alloc.getDefiningOp())) + builder.setInsertionPointToStart(memcpyOpIf->getBlock()); else builder.setInsertionPoint(memcpyOpIf); diff --git a/mlir/test/Conversion/AIRToAIE/air_channel_to_locks_ping_pong.mlir b/mlir/test/Conversion/AIRToAIE/air_channel_to_locks_ping_pong.mlir index ccebcc70b..f4bac81d6 100644 --- a/mlir/test/Conversion/AIRToAIE/air_channel_to_locks_ping_pong.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_channel_to_locks_ping_pong.mlir @@ -55,7 +55,7 @@ // CHECK: ^bb2: // CHECK: aie.end // CHECK: } - +// CHECK: @multi_memcpys_over_time #set = affine_set<()[s0, s1] : (s0 >= 0, -s0 + 1 >= 0, s1 == 0)> air.channel @channel_0 [1, 1] func.func @multi_memcpys_over_time() { @@ -161,7 +161,7 @@ func.func @multi_memcpys_over_time() { // CHECK: } // CHECK: aie.flow(%[[VAL_1]], DMA : 0, %[[VAL_2]], DMA : 0) - +// CHECK: @core_to_core_ping_pong #set1 = affine_set<()[s0, s1] : (s0 >= 0, -s0 + 1 >= 0, s1 == 0)> air.channel @channel_1 [1, 1] func.func @core_to_core_ping_pong() { @@ -262,14 +262,14 @@ func.func @core_to_core_ping_pong() { // CHECK: aie.core(%[[VAL_1]]) { // CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) -// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) // CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_4]], Release, 1) +// CHECK: aie.use_lock(%[[VAL_4]], Release, 1) // CHECK: aie.end // CHECK: } // CHECK: aie.flow(%[[VAL_1]], DMA : 0, %[[VAL_2]], DMA : 0) - +// cHECK: @core_to_core_ping_pong #set1 = affine_set<()[s0, s1] : (s0 >= 0, -s0 + 1 >= 0, s1 == 0)> air.channel @channel_1 [1, 1] func.func @core_to_core_ping_pong() { diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie.mlir index 3a344ad83..0ccc89697 100644 --- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie.mlir @@ -33,6 +33,7 @@ // CHECK: } // CHECK: aie.flow(%[[VAL_10]], DMA : 0, %[[VAL_12]], DMA : 0) +// CHECK: @func1 func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { %herd_cols = arith.constant 1 : index %herd_rows = arith.constant 1 : index @@ -86,6 +87,7 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.flow(%[[VAL_10]], DMA : 0, %[[VAL_12]], DMA : 0) // CHECK: aie.flow(%[[VAL_10]], DMA : 1, %[[VAL_12]], DMA : 1) +// CHECK: @func2 func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { %herd_cols = arith.constant 1 : index %herd_rows = arith.constant 1 : index @@ -135,8 +137,8 @@ func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: } // CHECK: aie.core(%[[VAL_1]]) { -// CHECK: aie.use_lock(%[[VAL_2]], Acquire, 1) // CHECK: aie.use_lock(%[[VAL_3]], Acquire, 0) +// CHECK: aie.use_lock(%[[VAL_2]], Acquire, 1) // CHECK: aie.use_lock(%[[VAL_2]], Release, 0) // CHECK: aie.use_lock(%[[VAL_3]], Release, 1) // CHECK: aie.end @@ -144,7 +146,7 @@ func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.flow(%[[VAL_0]], DMA : 0, %[[VAL_1]], DMA : 0) // CHECK: aie.flow(%[[VAL_1]], DMA : 0, %[[VAL_0]], DMA : 0) - +// CHECK: @func3 air.channel @channel_0 [1, 1] air.channel @channel_1 [1, 1] func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { @@ -206,7 +208,7 @@ func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.flow(%[[VAL_0]], DMA : 0, %[[VAL_1]], DMA : 0) // CHECK: aie.flow(%[[VAL_0]], DMA : 1, %[[VAL_1]], DMA : 1) - +// CHECK: @func4 air.channel @channel_2 [1, 1] air.channel @channel_3 [1, 1] func.func @func4(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { @@ -269,7 +271,7 @@ func.func @func4(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.flow(%[[VAL_0]], DMA : 0, %[[VAL_1]], DMA : 0) // CHECK: aie.flow(%[[VAL_0]], DMA : 1, %[[VAL_1]], DMA : 1) - +// CHECK: @func5 air.channel @channel_4 [1, 1] air.channel @channel_5 [1, 1] func.func @func5(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { @@ -333,7 +335,7 @@ func.func @func5(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.flow(%[[VAL_0]], DMA : 1, %[[VAL_2]], DMA : 0) // CHECK: aie.flow(%[[VAL_0]], DMA : 1, %[[VAL_3]], DMA : 0) // CHECK: aie.flow(%[[VAL_0]], DMA : 1, %[[VAL_4]], DMA : 0) - +// CHECK: @func6 #set = affine_set<()[s0, s1] : (s0 == 0, s1 >= 0, -s1 + 3 >= 0)> #set1 = affine_set<()[s0, s1] : (s0 >= 0, -s0 + 3 >= 0, s1 == 0)> air.channel @channel_6 [1, 1] {broadcast_shape = [1, 4]} @@ -426,6 +428,7 @@ func.func @func6(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: } // CHECK: aie.core(%[[VAL_0]]) { +// CHECK: aie.use_lock(%[[VAL_5]], Acquire, 0) // CHECK: aie.use_lock(%[[VAL_4]], Acquire, 1) // CHECK: scf.for // CHECK: aie.use_lock(%[[VAL_3]], Acquire, 1) @@ -433,7 +436,6 @@ func.func @func6(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.use_lock(%[[VAL_2]], Release, 0) // CHECK: aie.use_lock(%[[VAL_3]], Release, 0) // CHECK: } -// CHECK: aie.use_lock(%[[VAL_5]], Acquire, 0) // CHECK: aie.use_lock(%[[VAL_5]], Release, 1) // CHECK: aie.use_lock(%[[VAL_4]], Release, 0) // CHECK: aie.end @@ -442,7 +444,7 @@ func.func @func6(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.flow(%[[VAL_1]], DMA : 0, %[[VAL_0]], DMA : 0) // CHECK: aie.flow(%[[VAL_1]], DMA : 1, %[[VAL_0]], DMA : 1) // CHECK: aie.flow(%[[VAL_0]], DMA : 0, %[[VAL_1]], DMA : 0) - +// CHECK: @func7 air.channel @channel_8 [1, 1] air.channel @channel_9 [1, 1] air.channel @channel_10 [1, 1] @@ -529,10 +531,10 @@ func.func @func7(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>, %arg2 : mem // CHECK: aie.use_lock(%[[VAL_3]], Release, 0) // CHECK: aie.next_bd ^bb4 // CHECK: } - +// CHECK: @func8 module { - func.func @graph(%arg0: memref<32x16xi32>, %arg1: memref<32x16xi32>) { + func.func @func8(%arg0: memref<32x16xi32>, %arg1: memref<32x16xi32>) { %c1 = arith.constant 1 : index air.herd @herd_0 tile (%arg2, %arg3) in (%arg4=%c1, %arg5=%c1) args(%arg6=%arg0, %arg7=%arg1) : memref<32x16xi32>, memref<32x16xi32> attributes {x_loc = 5 : i64, y_loc = 4 : i64} { %c0 = arith.constant 0 : index diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir index c764a6d8c..a4f87fd79 100644 --- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2.mlir @@ -34,6 +34,7 @@ // CHECK: aie.end // CHECK: aie.flow(%[[VAL_1]], DMA : 0, %[[VAL_0]], DMA : 0) // CHECK: aie.shim_dma_allocation @airMemcpyId0(MM2S, 0, 2) +// CHECK: @func1 func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { %herd_cols = arith.constant 1 : index %herd_rows = arith.constant 1 : index @@ -82,8 +83,8 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: ^bb1: // CHECK: cf.br ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_2]], Release, 1) // CHECK: aie.use_lock(%[[VAL_5]], Release, 1) // CHECK: aie.end @@ -93,7 +94,8 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: memref.global "public" @airMemcpyId2 : memref<512xi32, 2> // CHECK: aie.shim_dma_allocation @airMemcpyId1(MM2S, 0, 2) // CHECK: memref.global "public" @airMemcpyId1 : memref<1024xi32, 2> -func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { +// CHECK: @func2 +func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { %herd_cols = arith.constant 1 : index %herd_rows = arith.constant 1 : index air.herd tile(%tx, %ty) in (%size_x = %herd_cols, %size_y = %herd_rows) args(%ext0 = %arg0, %ext1 = %arg1) : memref<1024xi32>, memref<1024xi32> attributes { sym_name="herd1"} { @@ -148,8 +150,8 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: ^bb1: // CHECK: cf.br ^bb2 // CHECK: ^bb2: -// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_4]], AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%[[VAL_3]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_2]], Release, 1) // CHECK: aie.use_lock(%[[VAL_5]], Release, 1) // CHECK: aie.end @@ -161,7 +163,7 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: memref.global "public" @airMemcpyId3 : memref<512xi32, 2> // CHECK: aie.shim_dma_allocation @airMemcpyId2(MM2S, 0, 2) // CHECK: memref.global "public" @airMemcpyId2 : memref<1024xi32, 2> - +// CHECK: @func3 air.channel @channel_0 [1, 1] air.channel @channel_1 [1, 1] func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { @@ -223,8 +225,8 @@ func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: } // CHECK: aie.core(%[[VAL_3]]) { -// CHECK: aie.use_lock(%[[VAL_18]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_19]], AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%[[VAL_18]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_20]], Release, 1) // CHECK: aie.use_lock(%[[VAL_17]], Release, 1) // CHECK: aie.end @@ -270,7 +272,7 @@ func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: memref.global "public" @airMemcpyId7 : memref<1024xi32, 1> // CHECK: aie.shim_dma_allocation @airMemcpyId2(MM2S, 0, 2) // CHECK: memref.global "public" @airMemcpyId2 : memref<1024xi32, 1> - +// CHECK: @func4 air.channel @channel_2 [1, 1] air.channel @channel_3 [1, 1] air.channel @channel_4 [1, 1] @@ -340,7 +342,7 @@ func.func @func4(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.shim_dma_allocation @airMemcpyId6(MM2S, 0, 2) // CHECK: memref.global "public" @airMemcpyId6 : memref<1024xi32, 1> - +// CHECK: @func5 #set = affine_set<()[s0, s1] : (s0 == 0, s1 >= 0, -s1 + 3 >= 0)> #set1 = affine_set<()[s0, s1] : (s0 >= 0, -s0 + 3 >= 0, s1 == 0)> air.channel @channel_6 [1, 1] {broadcast_shape = [1, 4]} @@ -424,8 +426,8 @@ func.func @func5(%arg0 : memref<1024xi32>) -> () { // CHECK: aie.shim_dma_allocation @airMemcpyId14_3(S2MM, 1, 3) // CHECK: memref.global "public" @airMemcpyId14_3 : memref<4x4xi32, 2> +// CHECK: @func6 // CHECK: air.channel.get{{.*}}metadata = @airMemcpyId14} : (memref<8x8xi32>) - #map1 = affine_map<()[s0] -> (s0 * 4)> air.channel @channel_0 [2, 2] func.func @func6(%arg5 : memref<8x8xi32>) { @@ -498,7 +500,7 @@ func.func @func6(%arg5 : memref<8x8xi32>) { // CHECK: aie.dma_bd({{.*}} : memref<4x4xi32, 1>, 0, 16) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.next_bd ^bb6 - +// CHECK: @func7 air.channel @channel_0 [1, 1] air.channel @channel_1 [1, 1] air.channel @channel_2 [1, 1] @@ -544,7 +546,7 @@ func.func @func7(%arg0 : memref<8x16xi32>, %arg1 : memref<16x8xi32>){ // CHECK: aie.next_bd ^bb1 // CHECK: ^bb2: // pred: ^bb0 // CHECK: aie.end - +// CHECK: @func8 air.channel @channel_0 [1, 1] func.func @func8(%arg0 : memref<8x16xi32>, %arg1 : memref<16x8xi32>){ air.segment args(%ext0 = %arg0, %ext1 = %arg1) : memref<8x16xi32>, memref<16x8xi32> attributes {sym_name="segment", id = 2 : i32, x_loc = 0 : i64, x_size = 1 : i64, y_loc = 3 : i64, y_size = 1 : i64} { @@ -585,7 +587,7 @@ func.func @func8(%arg0 : memref<8x16xi32>, %arg1 : memref<16x8xi32>){ // CHECK: aie.dma_bd({{.*}} : memref<64xf32, 1>, 0, 32, []) // CHECK: aie.dma_start(MM2S, 1, ^bb4, ^bb2) // CHECK: aie.dma_bd({{.*}} : memref<64xf32, 1>, 128, 32, []) - +// CHECK: @func9 #map = affine_map<()[s0] -> (s0 * 32)> air.channel @channel_1 [2, 1] func.func @func9(%arg0: memref<128xf32>, %arg1: memref<128xf32>) { @@ -650,7 +652,7 @@ func.func @func9(%arg0: memref<128xf32>, %arg1: memref<128xf32>) { // CHECK: aie.dma_bd({{.*}} : memref<32x256xi32, 1>, 0, 8192) // CHECK: aie.dma_start(MM2S, 1, ^bb4, ^bb2, repeat_count = 32) // CHECK: aie.dma_bd({{.*}} : memref<32x256xi32, 1>, 0, 8192) - +// CHECK: @func10 #map = affine_map<()[s0] -> (s0 * 32)> air.channel @channel_1 [2, 1] func.func @func10(%arg0: memref<128xf32>, %arg1: memref<128xf32>) { @@ -715,10 +717,10 @@ func.func @func10(%arg0: memref<128xf32>, %arg1: memref<128xf32>) { // CHECK: memref<32x256xbf16, 1>, 0, 65536, [, , ]) // CHECK: aie.dma_start(MM2S, 1, ^bb4, ^bb2) // CHECK: memref<32x256xbf16, 1>, 0, 65536, [, , ]) - +// CHECK: @func11 #map = affine_map<()[s0] -> (s0 * 32)> air.channel @channel_1 [2, 1] -func.func @func10(%arg0: memref<128xbf16>, %arg1: memref<128xbf16>) { +func.func @func11(%arg0: memref<128xbf16>, %arg1: memref<128xbf16>) { %c2 = arith.constant 2 : index %0 = air.launch async (%arg2) in (%arg3=%c2) attributes {id = 1 : i32} { %1 = air.segment @segment_0 async attributes {id = 2 : i32, x_loc = 0 : i64, x_size = 1 : i64, y_loc = 3 : i64, y_size = 2 : i64} { diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2_with_shim_dma_bds.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2_with_shim_dma_bds.mlir index 00a5e5e46..1cfc77ca0 100644 --- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2_with_shim_dma_bds.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie2_with_shim_dma_bds.mlir @@ -91,8 +91,8 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.next_bd ^bb4 // CHECK: } // CHECK: aie.core(%[[VAL_2]]) { -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_10]], AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_8]], Release, 1) // CHECK: aie.use_lock(%[[VAL_11]], Release, 1) // CHECK: aie.end @@ -171,8 +171,8 @@ func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: } // CHECK: aie.core(%[[VAL_7]]) { -// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_10]], AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%[[VAL_9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_8]], Release, 1) // CHECK: aie.use_lock(%[[VAL_11]], Release, 1) // CHECK: aie.end @@ -264,8 +264,8 @@ func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: } // CHECK: aie.core(%[[VAL_3]]) { -// CHECK: aie.use_lock(%[[VAL_18]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_19]], AcquireGreaterEqual, 1) +// CHECK: aie.use_lock(%[[VAL_18]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL_20]], Release, 1) // CHECK: aie.use_lock(%[[VAL_17]], Release, 1) // CHECK: aie.end diff --git a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie_with_shim_dma_bds.mlir b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie_with_shim_dma_bds.mlir index cd8205ea7..d5ff86518 100644 --- a/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie_with_shim_dma_bds.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_shimcpy_to_aie_with_shim_dma_bds.mlir @@ -41,6 +41,7 @@ // CHECK: ^bb2: // CHECK: aie.end // CHECK: } +// CHECK: @func1 func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { %herd_cols = arith.constant 1 : index %herd_rows = arith.constant 1 : index @@ -116,7 +117,7 @@ func.func @func1(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.use_lock(%[[VAL_4]], Release, 0) // CHECK: aie.next_bd ^bb4 // CHECK: } - +// CHECK: @func2 func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { %herd_cols = arith.constant 1 : index %herd_rows = arith.constant 1 : index @@ -170,8 +171,8 @@ func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: } // CHECK: aie.core(%[[VAL_5]]) { -// CHECK: aie.use_lock(%[[VAL_6]], Acquire, 1) // CHECK: aie.use_lock(%[[VAL_7]], Acquire, 0) +// CHECK: aie.use_lock(%[[VAL_6]], Acquire, 1) // CHECK: aie.use_lock(%[[VAL_6]], Release, 0) // CHECK: aie.use_lock(%[[VAL_7]], Release, 1) // CHECK: aie.end @@ -197,7 +198,7 @@ func.func @func2(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { // CHECK: aie.use_lock(%[[VAL_3]], Release, 0) // CHECK: aie.next_bd ^bb4 // CHECK: } - +// CHECK: @func3 air.channel @channel_0 [1, 1] air.channel @channel_1 [1, 1] func.func @func3(%arg0 : memref<1024xi32>, %arg1 : memref<1024xi32>) -> () { diff --git a/mlir/test/Conversion/AIRToAIE/air_to_ipu_add_one.mlir b/mlir/test/Conversion/AIRToAIE/air_to_ipu_add_one.mlir index 299cd1da6..32c30f3b6 100644 --- a/mlir/test/Conversion/AIRToAIE/air_to_ipu_add_one.mlir +++ b/mlir/test/Conversion/AIRToAIE/air_to_ipu_add_one.mlir @@ -44,13 +44,13 @@ // CHECK: ^bb1: // CHECK: cf.br ^bb2 // CHECK: ^bb2: +// CHECK: aie.use_lock(%[[VAL9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL8]], AcquireGreaterEqual, 1) // CHECK: affine.for %[[VAL16:.*]] = 0 to 64 { // CHECK: %[[VAL17:.*]] = affine.load %[[VAL13]][%[[VAL16]]] : memref<64xi32, 2> // CHECK: %[[VAL18:.*]] = arith.addi %[[VAL17]], %[[VAL15]] : i32 // CHECK: affine.store %[[VAL18]], %[[VAL14]][%[[VAL16]]] : memref<64xi32, 2> // CHECK: } -// CHECK: aie.use_lock(%[[VAL9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL7]], Release, 1) // CHECK: aie.use_lock(%[[VAL10]], Release, 1) // CHECK: aie.end @@ -92,7 +92,7 @@ // CHECK: memref.global "public" @airMemcpyId7 : memref<64xi32, 1> // CHECK: aie.shim_dma_allocation @airMemcpyId2(MM2S, 0, 0) // CHECK: memref.global "public" @airMemcpyId2 : memref<64xi32, 1> - +// CHECK: @func0 #map2 = affine_map<(d0) -> (d0)> air.channel @channel_0 [1, 1] air.channel @channel_1 [1, 1] @@ -176,13 +176,13 @@ func.func @func0(%arg0 : memref<64xi32>, %arg1 : memref<64xi32>) -> () { // CHECK: ^bb1: // CHECK: cf.br ^bb2 // CHECK: ^bb2: +// CHECK: aie.use_lock(%[[VAL9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL8]], AcquireGreaterEqual, 1) // CHECK: affine.for %[[VAL16:.*]] = 0 to 64 { // CHECK: %[[VAL17:.*]] = affine.load %[[VAL13]][%[[VAL16]]] : memref<64xi32, 2> // CHECK: %[[VAL18:.*]] = arith.addi %[[VAL17]], %[[VAL15]] : i32 // CHECK: affine.store %[[VAL18]], %[[VAL14]][%[[VAL16]]] : memref<64xi32, 2> // CHECK: } -// CHECK: aie.use_lock(%[[VAL9]], AcquireGreaterEqual, 1) // CHECK: aie.use_lock(%[[VAL7]], Release, 1) // CHECK: aie.use_lock(%[[VAL10]], Release, 1) // CHECK: aie.end @@ -224,14 +224,14 @@ func.func @func0(%arg0 : memref<64xi32>, %arg1 : memref<64xi32>) -> () { // CHECK: memref.global "public" @airMemcpyId7 : memref<64xi32, 1> // CHECK: aie.shim_dma_allocation @airMemcpyId2(MM2S, 0, 0) // CHECK: memref.global "public" @airMemcpyId2 : memref<64xi32, 1> - +// CHECK: @func1 #map = affine_map<(d0) -> (d0)> module { air.channel @channel_0 [1, 1] air.channel @channel_1 [1, 1] air.channel @channel_2 [1, 1] air.channel @channel_3 [1, 1] - func.func @func0(%arg0: memref<64xi32>, %arg1: memref<64xi32>) { + func.func @func1(%arg0: memref<64xi32>, %arg1: memref<64xi32>) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c512 = arith.constant 512 : index diff --git a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks.mlir b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks.mlir index 810953097..255969b05 100644 --- a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks.mlir +++ b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks.mlir @@ -24,6 +24,7 @@ // CHECK-COUNT-5: aie.buffer(%[[VAL_2]]) {sym_name = {{.*}}} : memref<32x32xi32, 2> // CHECK: aie.mem(%[[VAL_5]]) // CHECK: aie.core(%[[VAL_5]]) { +// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Acquire, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, Acquire, 1) @@ -37,12 +38,12 @@ // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } -// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } {elf_file = // CHECK: aie.mem(%[[VAL_4]]) // CHECK: aie.core(%[[VAL_4]]) +// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Acquire, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, Acquire, 1) @@ -56,12 +57,12 @@ // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } -// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } {elf_file = // CHECK: aie.mem(%[[VAL_3]]) // CHECK: aie.core(%[[VAL_3]]) +// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Acquire, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, Acquire, 1) @@ -75,12 +76,12 @@ // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } -// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } {elf_file = // CHECK: aie.mem(%[[VAL_2]]) // CHECK: aie.core(%[[VAL_2]]) +// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Acquire, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, Acquire, 1) @@ -94,7 +95,6 @@ // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } -// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } {elf_file = diff --git a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_aie2.mlir b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_aie2.mlir index 8a7c052c0..ac718ef57 100644 --- a/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_aie2.mlir +++ b/mlir/test/Conversion/AIRToAIE/async_gemm_w_pingpong_to_locks_aie2.mlir @@ -29,6 +29,7 @@ // CHECK: aie.mem(%[[VAL_7]]) // CHECK: aie.core(%[[VAL_7]]) { // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) +// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) @@ -41,13 +42,13 @@ // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } -// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } {elf_file = // CHECK: aie.mem(%[[VAL_6]]) // CHECK: aie.core(%[[VAL_6]]) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) +// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) @@ -60,13 +61,13 @@ // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } -// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } {elf_file = // CHECK: aie.mem(%[[VAL_5]]) // CHECK: aie.core(%[[VAL_5]]) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) +// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) @@ -79,13 +80,13 @@ // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } -// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } {elf_file = // CHECK: aie.mem(%[[VAL_4]]) // CHECK: aie.core(%[[VAL_4]]) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) +// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: scf.for // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) @@ -98,7 +99,6 @@ // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } -// CHECK: aie.use_lock({{.*}}, AcquireGreaterEqual, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: } {elf_file = diff --git a/mlir/test/Conversion/AIRToAIE/lower_herd_air_regions.mlir b/mlir/test/Conversion/AIRToAIE/lower_herd_air_regions.mlir index cc54d8888..715d94892 100644 --- a/mlir/test/Conversion/AIRToAIE/lower_herd_air_regions.mlir +++ b/mlir/test/Conversion/AIRToAIE/lower_herd_air_regions.mlir @@ -8,6 +8,7 @@ // RUN: air-opt %s -air-to-aie | FileCheck %s // CHECK: aie.core({{.*}}) { +// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Acquire, 1) // CHECK: scf.for {{.*}} = {{.*}} to {{.*}} step {{.*}} { // CHECK: aie.use_lock({{.*}}, Acquire, 1) @@ -16,7 +17,6 @@ // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: aie.use_lock({{.*}}, Release, 0) // CHECK: } -// CHECK: aie.use_lock({{.*}}, Acquire, 0) // CHECK: aie.use_lock({{.*}}, Release, 1) // CHECK: aie.use_lock({{.*}}, Release, 0) #map = affine_map<()[s0] -> (s0 * 32)>