diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp index 0c501fb3e..206e7ada6 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp @@ -262,14 +262,8 @@ DoublyStridedOpInterface DmaCpyNdOp::createDoublyStridedOp( SmallVector &newSourceStrides) { Location loc = (*this)->getLoc(); auto newOp = rewriter.create( - loc, getTarget(), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides), - getSource(), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides)); + loc, getTarget(), newTargetOffsets, newTargetSizes, newTargetStrides, + getSource(), newSourceOffsets, newSourceSizes, newSourceStrides); return cast(newOp.getOperation()); } @@ -395,14 +389,8 @@ DoublyStridedOpInterface CircularDmaCpyNdOp::createDoublyStridedOp( SmallVector &newSourceStrides) { Location loc = (*this)->getLoc(); auto newOp = rewriter.create( - loc, getTarget(), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides), - getSource(), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides)); + loc, getTarget(), newTargetOffsets, newTargetSizes, newTargetStrides, + getSource(), newSourceOffsets, newSourceSizes, newSourceStrides); return cast(newOp.getOperation()); } @@ -835,15 +823,9 @@ DoublyStridedOpInterface NpuDmaCpyNdOp::createDoublyStridedOp( ::llvm::SmallVector &newSourceStrides) { Location loc = (*this)->getLoc(); auto newOp = rewriter.create( - loc, getDma(), getTarget(), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides), - getTargetBdId(), getSource(), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides), - getSourceBdId()); + loc, getDma(), getTarget(), newTargetOffsets, newTargetSizes, + newTargetStrides, getTargetBdId(), getSource(), newSourceOffsets, + newSourceSizes, newSourceStrides, getSourceBdId()); return cast(newOp.getOperation()); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDoublyStridedOp.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDoublyStridedOp.cpp index e6b919eb0..bfe55ea42 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDoublyStridedOp.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDoublyStridedOp.cpp @@ -50,12 +50,12 @@ LogicalResult foldDmaOpLinearDims(RewriterBase &rewriter, LogicalResult foldDmaOpSingleDims(RewriterBase &rewriter, AMDAIE::DoublyStridedOpInterface op) { OpBuilder::InsertionGuard guard(rewriter); - SmallVector sourceOffsets = op.getSourceOffsets(); - SmallVector sourceSizes = op.getSourceSizes(); - SmallVector sourceStrides = op.getSourceStrides(); - SmallVector targetOffsets = op.getTargetOffsets(); - SmallVector targetSizes = op.getTargetSizes(); - SmallVector targetStrides = op.getTargetStrides(); + SmallVector sourceOffsets = op.getSourceMixedOffsets(); + SmallVector sourceSizes = op.getSourceMixedSizes(); + SmallVector sourceStrides = op.getSourceMixedStrides(); + SmallVector targetOffsets = op.getTargetMixedOffsets(); + SmallVector targetSizes = op.getTargetMixedSizes(); + SmallVector targetStrides = op.getTargetMixedStrides(); LogicalResult sourceRes = foldSingleDim(sourceOffsets, sourceSizes, sourceStrides); LogicalResult targetRes = @@ -145,7 +145,8 @@ void AMDAIECanonicalizeDoublyStridedOpPass::runOnOperation() { } // namespace -std::unique_ptr createAMDAIECanonicalizeDoublyStridedOpPass(AMDAIECanonicalizeDoublyStridedOpOptions options) { +std::unique_ptr createAMDAIECanonicalizeDoublyStridedOpPass( + AMDAIECanonicalizeDoublyStridedOpOptions options) { return std::make_unique(options); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEDmaUtils.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEDmaUtils.cpp index 1ff0b3eec..dcba79e2b 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEDmaUtils.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEDmaUtils.cpp @@ -289,19 +289,17 @@ LogicalResult foldLinearDims(MLIRContext *ctx, LogicalResult foldSingleDim(SmallVector &offsets, SmallVector &sizes, SmallVector &strides) { - if (offsets.size() == 0) { - return failure(); - } - if (offsets.size() == 1 && getConstantIntValue(offsets[0]) && - getConstantIntValue(offsets[0]).value() == 0 && - getConstantIntValue(strides[0]) && - getConstantIntValue(strides[0]).value() == 1) { - offsets.clear(); - sizes.clear(); - strides.clear(); - return success(); - } - return failure(); + assert(offsets.size() == sizes.size() && offsets.size() == strides.size() && + "expected same number of source offsets and sizes"); + + if (offsets.size() != 1) return failure(); + if (!isConstantIntValue(offsets[0], 0)) return failure(); + if (!isConstantIntValue(strides[0], 1)) return failure(); + + offsets.clear(); + sizes.clear(); + strides.clear(); + return success(); } /// Fold unit dimensions within a strided access pattern. diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index 729da7457..b759c2a27 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -631,8 +631,6 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager) { addAMDAIEToAIEPasses(passManager); - passManager.addPass(createCanonicalizerPass()); - // Now lower using the AIE passes from MLIR-AIE. addMLIRAIELoweringPasses(passManager); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir index 67577eff1..c59dedd93 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir @@ -222,13 +222,10 @@ func.func @link_multiple_inputs_and_outputs_with_offsets(%arg0: memref<32x1024xi // Make sure offsets on the non-link side are not removed. // CHECK-LABEL: func.func @ensure_no_removal_of_offsets -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C1024:.+]] = arith.constant 1024 : index // CHECK: %[[DMA0:.+]] = amdaie.circular_dma_cpy_nd -// CHECK-SAME: [%[[C1]]] [%[[C1]]] [%[[C1024]]] -// CHECK-DAG: %[[C2048:.+]] = arith.constant 2048 : index +// CHECK-SAME: [1] [1] [1024] // CHECK: %[[DMA1:.+]] = amdaie.circular_dma_cpy_nd -// CHECK-SAME: [%[[C1]]] [%[[C1]]] [%[[C2048]]] +// CHECK-SAME: [1] [1] [2048] // CHECK: amdaie.logicalobjectfifo.link[%[[DMA0]]] -> [%[[DMA1]]] () func.func @ensure_no_removal_of_offsets(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<2x8x8x4x8xi32, 2>) { %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/distribute_cores_and_objectfifos.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/distribute_cores_and_objectfifos.mlir index 9e592aafd..5269c9d15 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/distribute_cores_and_objectfifos.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/distribute_cores_and_objectfifos.mlir @@ -579,7 +579,6 @@ module { // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index // CHECK-DAG: %[[ALLOC_0:.+]] = memref.alloc() : memref<1x1x32x32xi32, 2> // CHECK-DAG: %[[ALLOC_1:.+]] = memref.alloc() : memref<2x2x32x32xi32, 1> // CHECK-DAG: %[[ALLOC_2:.+]] = memref.alloc() : memref<64x64xi32> @@ -596,19 +595,19 @@ module { // CHECK-DAG: %[[FROM_MEMREF_3:.+]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC_0]], {%[[TILE_1_3]]} // CHECK-DAG: %[[FROM_MEMREF_4:.+]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC_1]], {%[[TILE_0_1]]} // CHECK-DAG: %[[FROM_MEMREF_5:.+]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC_2]], {%[[TILE_0_0]]} -// CHECK-DAG: %[[DMA_0:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_0]][%c0, %c0] [%c32, %c32] [%c32, %c1] +// CHECK-DAG: %[[DMA_0:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_0]][0, 0] [32, 32] [32, 1] // CHECK-DAG: %[[CORE_0_2:.*]] = amdaie.core(%[[TILE_0_2]], in : [], out : [%[[DMA_0]]]) // CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_0]], Write) // CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>) -// CHECK-DAG: %[[DMA_1:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_1]][%c0, %c0] [%c32, %c32] [%c32, %c1] +// CHECK-DAG: %[[DMA_1:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_1]][0, 0] [32, 32] [32, 1] // CHECK-DAG: %[[CORE_1_2:.*]] = amdaie.core(%[[TILE_1_2]], in : [], out : [%[[DMA_1]]]) // CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_1]], Write) // CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>) -// CHECK-DAG: %[[DMA_2:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_2]][%c0, %c0] [%c32, %c32] [%c32, %c1] +// CHECK-DAG: %[[DMA_2:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_2]][0, 0] [32, 32] [32, 1] // CHECK-DAG: %[[CORE_0_3:.*]] = amdaie.core(%[[TILE_0_3]], in : [], out : [%[[DMA_2]]]) // CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_2]], Write) // CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>) -// CHECK-DAG: %[[DMA_3:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_3]][%c0, %c0] [%c32, %c32] [%c32, %c1] +// CHECK-DAG: %[[DMA_3:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_3]][0, 0] [32, 32] [32, 1] // CHECK-DAG: %[[CORE_1_3:.*]] = amdaie.core(%[[TILE_1_3]], in : [], out : [%[[DMA_3]]]) // CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_3]], Write) // CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>)