Skip to content

Commit

Permalink
[CanonicalizeDoublyStridedOp] Fix single dimension folder (#737)
Browse files Browse the repository at this point in the history
I'm starting to look deeper into creating links when the
circular_dma_cpy_nd's being linked are not mutually 'contiguous'.
Noticed that Attributes become Constant ops unnecessarily in
`create-logical-objectfifo-link`, so fixed this by removing `
getValueOrCreateConstantIndexOp`. Then noticed that test
`canonicalize_doubly_strided_op.mlir` was failing, and this was because
`.getTargetStrides` was being used where `getTargetMixedStrides`
should've been, so fixed that. Also removed a redundant canonicalization
call (print-ir-after-all showed 2 contiguous canonicalizations)
  • Loading branch information
newling authored Sep 3, 2024
1 parent a0c372a commit 07ea41d
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 57 deletions.
32 changes: 7 additions & 25 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,14 +262,8 @@ DoublyStridedOpInterface DmaCpyNdOp::createDoublyStridedOp(
SmallVector<OpFoldResult> &newSourceStrides) {
Location loc = (*this)->getLoc();
auto newOp = rewriter.create<AMDAIE::DmaCpyNdOp>(
loc, getTarget(),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides),
getSource(),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides));
loc, getTarget(), newTargetOffsets, newTargetSizes, newTargetStrides,
getSource(), newSourceOffsets, newSourceSizes, newSourceStrides);
return cast<DoublyStridedOpInterface>(newOp.getOperation());
}

Expand Down Expand Up @@ -395,14 +389,8 @@ DoublyStridedOpInterface CircularDmaCpyNdOp::createDoublyStridedOp(
SmallVector<OpFoldResult> &newSourceStrides) {
Location loc = (*this)->getLoc();
auto newOp = rewriter.create<AMDAIE::CircularDmaCpyNdOp>(
loc, getTarget(),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides),
getSource(),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides));
loc, getTarget(), newTargetOffsets, newTargetSizes, newTargetStrides,
getSource(), newSourceOffsets, newSourceSizes, newSourceStrides);
return cast<DoublyStridedOpInterface>(newOp.getOperation());
}

Expand Down Expand Up @@ -835,15 +823,9 @@ DoublyStridedOpInterface NpuDmaCpyNdOp::createDoublyStridedOp(
::llvm::SmallVector<OpFoldResult> &newSourceStrides) {
Location loc = (*this)->getLoc();
auto newOp = rewriter.create<AMDAIE::NpuDmaCpyNdOp>(
loc, getDma(), getTarget(),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides),
getTargetBdId(), getSource(),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides),
getSourceBdId());
loc, getDma(), getTarget(), newTargetOffsets, newTargetSizes,
newTargetStrides, getTargetBdId(), getSource(), newSourceOffsets,
newSourceSizes, newSourceStrides, getSourceBdId());
return cast<DoublyStridedOpInterface>(newOp.getOperation());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ LogicalResult foldDmaOpLinearDims(RewriterBase &rewriter,
LogicalResult foldDmaOpSingleDims(RewriterBase &rewriter,
AMDAIE::DoublyStridedOpInterface op) {
OpBuilder::InsertionGuard guard(rewriter);
SmallVector<OpFoldResult> sourceOffsets = op.getSourceOffsets();
SmallVector<OpFoldResult> sourceSizes = op.getSourceSizes();
SmallVector<OpFoldResult> sourceStrides = op.getSourceStrides();
SmallVector<OpFoldResult> targetOffsets = op.getTargetOffsets();
SmallVector<OpFoldResult> targetSizes = op.getTargetSizes();
SmallVector<OpFoldResult> targetStrides = op.getTargetStrides();
SmallVector<OpFoldResult> sourceOffsets = op.getSourceMixedOffsets();
SmallVector<OpFoldResult> sourceSizes = op.getSourceMixedSizes();
SmallVector<OpFoldResult> sourceStrides = op.getSourceMixedStrides();
SmallVector<OpFoldResult> targetOffsets = op.getTargetMixedOffsets();
SmallVector<OpFoldResult> targetSizes = op.getTargetMixedSizes();
SmallVector<OpFoldResult> targetStrides = op.getTargetMixedStrides();
LogicalResult sourceRes =
foldSingleDim(sourceOffsets, sourceSizes, sourceStrides);
LogicalResult targetRes =
Expand Down Expand Up @@ -145,7 +145,8 @@ void AMDAIECanonicalizeDoublyStridedOpPass::runOnOperation() {

} // namespace

std::unique_ptr<Pass> createAMDAIECanonicalizeDoublyStridedOpPass(AMDAIECanonicalizeDoublyStridedOpOptions options) {
std::unique_ptr<Pass> createAMDAIECanonicalizeDoublyStridedOpPass(
AMDAIECanonicalizeDoublyStridedOpOptions options) {
return std::make_unique<AMDAIECanonicalizeDoublyStridedOpPass>(options);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,19 +289,17 @@ LogicalResult foldLinearDims(MLIRContext *ctx,
LogicalResult foldSingleDim(SmallVector<OpFoldResult> &offsets,
SmallVector<OpFoldResult> &sizes,
SmallVector<OpFoldResult> &strides) {
if (offsets.size() == 0) {
return failure();
}
if (offsets.size() == 1 && getConstantIntValue(offsets[0]) &&
getConstantIntValue(offsets[0]).value() == 0 &&
getConstantIntValue(strides[0]) &&
getConstantIntValue(strides[0]).value() == 1) {
offsets.clear();
sizes.clear();
strides.clear();
return success();
}
return failure();
assert(offsets.size() == sizes.size() && offsets.size() == strides.size() &&
"expected same number of source offsets and sizes");

if (offsets.size() != 1) return failure();
if (!isConstantIntValue(offsets[0], 0)) return failure();
if (!isConstantIntValue(strides[0], 1)) return failure();

offsets.clear();
sizes.clear();
strides.clear();
return success();
}

/// Fold unit dimensions within a strided access pattern.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -631,8 +631,6 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager) {

addAMDAIEToAIEPasses(passManager);

passManager.addPass(createCanonicalizerPass());

// Now lower using the AIE passes from MLIR-AIE.
addMLIRAIELoweringPasses(passManager);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,10 @@ func.func @link_multiple_inputs_and_outputs_with_offsets(%arg0: memref<32x1024xi

// Make sure offsets on the non-link side are not removed.
// CHECK-LABEL: func.func @ensure_no_removal_of_offsets
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C1024:.+]] = arith.constant 1024 : index
// CHECK: %[[DMA0:.+]] = amdaie.circular_dma_cpy_nd
// CHECK-SAME: [%[[C1]]] [%[[C1]]] [%[[C1024]]]
// CHECK-DAG: %[[C2048:.+]] = arith.constant 2048 : index
// CHECK-SAME: [1] [1] [1024]
// CHECK: %[[DMA1:.+]] = amdaie.circular_dma_cpy_nd
// CHECK-SAME: [%[[C1]]] [%[[C1]]] [%[[C2048]]]
// CHECK-SAME: [1] [1] [2048]
// CHECK: amdaie.logicalobjectfifo.link[%[[DMA0]]] -> [%[[DMA1]]] ()
func.func @ensure_no_removal_of_offsets(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<2x8x8x4x8xi32, 2>) {
%0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo<memref<32x1024xi32>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,6 @@ module {
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
// CHECK-DAG: %[[ALLOC_0:.+]] = memref.alloc() : memref<1x1x32x32xi32, 2>
// CHECK-DAG: %[[ALLOC_1:.+]] = memref.alloc() : memref<2x2x32x32xi32, 1>
// CHECK-DAG: %[[ALLOC_2:.+]] = memref.alloc() : memref<64x64xi32>
Expand All @@ -596,19 +595,19 @@ module {
// CHECK-DAG: %[[FROM_MEMREF_3:.+]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC_0]], {%[[TILE_1_3]]}
// CHECK-DAG: %[[FROM_MEMREF_4:.+]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC_1]], {%[[TILE_0_1]]}
// CHECK-DAG: %[[FROM_MEMREF_5:.+]] = amdaie.logicalobjectfifo.from_memref %[[ALLOC_2]], {%[[TILE_0_0]]}
// CHECK-DAG: %[[DMA_0:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_0]][%c0, %c0] [%c32, %c32] [%c32, %c1]
// CHECK-DAG: %[[DMA_0:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_0]][0, 0] [32, 32] [32, 1]
// CHECK-DAG: %[[CORE_0_2:.*]] = amdaie.core(%[[TILE_0_2]], in : [], out : [%[[DMA_0]]])
// CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_0]], Write)
// CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>)
// CHECK-DAG: %[[DMA_1:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_1]][%c0, %c0] [%c32, %c32] [%c32, %c1]
// CHECK-DAG: %[[DMA_1:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c0, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_1]][0, 0] [32, 32] [32, 1]
// CHECK-DAG: %[[CORE_1_2:.*]] = amdaie.core(%[[TILE_1_2]], in : [], out : [%[[DMA_1]]])
// CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_1]], Write)
// CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>)
// CHECK-DAG: %[[DMA_2:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_2]][%c0, %c0] [%c32, %c32] [%c32, %c1]
// CHECK-DAG: %[[DMA_2:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c0] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_2]][0, 0] [32, 32] [32, 1]
// CHECK-DAG: %[[CORE_0_3:.*]] = amdaie.core(%[[TILE_0_3]], in : [], out : [%[[DMA_2]]])
// CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_2]], Write)
// CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>)
// CHECK-DAG: %[[DMA_3:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_3]][%c0, %c0] [%c32, %c32] [%c32, %c1]
// CHECK-DAG: %[[DMA_3:.*]] = amdaie.dma_cpy_nd(%[[FROM_MEMREF_4]][%c1, %c1] [%c1, %c1] [%c1, %c1], %[[FROM_MEMREF_3]][0, 0] [32, 32] [32, 1]
// CHECK-DAG: %[[CORE_1_3:.*]] = amdaie.core(%[[TILE_1_3]], in : [], out : [%[[DMA_3]]])
// CHECK-DAG: %[[VAL_0:.+]] = amdaie.logicalobjectfifo.access(%[[FROM_MEMREF_3]], Write)
// CHECK-DAG: linalg.fill ins(%{{.+}} : i32) outs(%[[VAL_0]] : memref<1x1x32x32xi32, 2>)
Expand Down

0 comments on commit 07ea41d

Please sign in to comment.