25
25
#include " iree-amd-aie/Transforms/AMDAIEDmaUtils.h"
26
26
#include " iree-amd-aie/Transforms/AMDAIEUtils.h"
27
27
#include " iree-amd-aie/Transforms/Passes.h"
28
+ #include " iree-amd-aie/Transforms/Transforms.h"
28
29
#include " iree-amd-aie/aie_runtime/iree_aie_runtime.h"
29
30
#include " mlir/Dialect/Affine/IR/AffineOps.h"
30
31
#include " mlir/Dialect/SCF/IR/SCF.h"
@@ -49,15 +50,6 @@ int64_t calculateNbIterations(int64_t lowerBound, int64_t upperBound,
49
50
50
51
namespace {
51
52
52
- // / Return an ancestor of 'op' in 'block', or nullptr if no such ancestor.
53
- Operation *getAncestorInBlock (Operation *op, Block *block) {
54
- if (!op || !block) return nullptr ;
55
- auto parent = op;
56
- while (parent && (parent->getBlock () != block))
57
- parent = parent->getParentOp ();
58
- return parent;
59
- }
60
-
61
53
// / Utility affine expression visitor to retrieve the scale and optional bias
62
54
// / from the expression.
63
55
struct RetrieveScaleAndBias
@@ -112,31 +104,6 @@ struct RetrieveScaleAndBias
112
104
}
113
105
};
114
106
115
- // / Utility to clean up the DMA users after loop subsumption + hoisting. This
116
- // / will hoist `amdaie.npu.dma_cpy_nd`'s users like `npu.dma_wait` as well.
117
- LogicalResult moveUsersToHoistedDMAScope (Operation *parentOp) {
118
- IRRewriter rewriter (parentOp->getContext ());
119
- // Move `amdaie.npu.dma_wait` operation after the parent op in the same block
120
- // as the input `amdaie.npu.dma_cpy_nd` operation. This parent op will
121
- // typically be a loop out of which the DMA operation has been hoisted. Moving
122
- // the wait operation after this loop is important to avoid a deadlock with
123
- // whatever operations are still remaining inside the loop's scope.
124
- WalkResult res = parentOp->walk ([&](AMDAIE::NpuDmaWaitOp npuDmaWaitOp) {
125
- Operation *dmaOp = npuDmaWaitOp.getDma ().getDefiningOp ();
126
- Operation *ancestorInSameBlock =
127
- getAncestorInBlock (npuDmaWaitOp, dmaOp->getBlock ());
128
- if (!ancestorInSameBlock) {
129
- npuDmaWaitOp->emitOpError (
130
- " doesn't have an ancestor in the same scope as the source DMA op" );
131
- return WalkResult::interrupt ();
132
- }
133
- rewriter.moveOpAfter (npuDmaWaitOp, ancestorInSameBlock);
134
- return WalkResult::advance ();
135
- });
136
- if (res.wasInterrupted ()) return failure ();
137
- return success ();
138
- }
139
-
140
107
struct SubsumeLoopIntoDMA
141
108
: public OpInterfaceRewritePattern<AMDAIE::DoublyStridedOpInterface> {
142
109
using OpInterfaceRewritePattern::OpInterfaceRewritePattern;
@@ -594,7 +561,7 @@ class AMDAIEDmaLoopSubsumptionPass
594
561
}
595
562
596
563
AMDAIEDmaLoopSubsumptionPass () = default ;
597
- AMDAIEDmaLoopSubsumptionPass (const AMDAIEDmaLoopSubsumptionPass &pass) {};
564
+ AMDAIEDmaLoopSubsumptionPass (const AMDAIEDmaLoopSubsumptionPass &pass){};
598
565
AMDAIEDmaLoopSubsumptionPass (const AMDAIEDmaLoopSubsumptionOptions &options)
599
566
: AMDAIEDmaLoopSubsumptionBase(options) {}
600
567
void runOnOperation () override ;
@@ -605,7 +572,6 @@ void AMDAIEDmaLoopSubsumptionPass::runOnOperation() {
605
572
MLIRContext *context = &getContext ();
606
573
607
574
RewritePatternSet patterns (context);
608
-
609
575
{
610
576
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup (parentOp);
611
577
std::optional<AMDAIEDevice> maybeDevice = getConfigAMDAIEDevice (targetAttr);
@@ -619,19 +585,17 @@ void AMDAIEDmaLoopSubsumptionPass::runOnOperation() {
619
585
}
620
586
AMDAIE::AMDAIEDeviceModel deviceModel =
621
587
AMDAIE::getDeviceModel (maybeDevice.value ());
622
-
623
- SubsumeLoopIntoDMA pattern (context, std::move (deviceModel),
624
- onlyZeroStrideOnOuterDim);
625
-
626
- patterns.insert <SubsumeLoopIntoDMA>(std::move (pattern));
588
+ populateDmaLoopSubsumptionPattern (patterns, std::move (deviceModel),
589
+ onlyZeroStrideOnOuterDim);
627
590
}
628
591
629
592
if (failed (applyPatternsAndFoldGreedily (parentOp, std::move (patterns)))) {
630
593
parentOp->emitOpError (" failed to subsume some loops into DMA operations" );
631
594
return signalPassFailure ();
632
595
}
633
596
634
- if (failed (moveUsersToHoistedDMAScope (parentOp))) {
597
+ IRRewriter rewriter (parentOp->getContext ());
598
+ if (failed (moveNpuDmaSyncUsersAfterAncestorInSameBlock (rewriter, parentOp))) {
635
599
parentOp->emitOpError (
636
600
" failed to move DMA users to correct scope after loop subsumption" );
637
601
return signalPassFailure ();
@@ -640,6 +604,14 @@ void AMDAIEDmaLoopSubsumptionPass::runOnOperation() {
640
604
641
605
} // namespace
642
606
607
+ void populateDmaLoopSubsumptionPattern (RewritePatternSet &patterns,
608
+ AMDAIE::AMDAIEDeviceModel &&deviceModel,
609
+ bool onlyZeroStrideOnOuterDim) {
610
+ SubsumeLoopIntoDMA pattern (patterns.getContext (), std::move (deviceModel),
611
+ onlyZeroStrideOnOuterDim);
612
+ patterns.insert <SubsumeLoopIntoDMA>(std::move (pattern));
613
+ }
614
+
643
615
std::unique_ptr<Pass> createAMDAIEDmaLoopSubsumptionPass (
644
616
AMDAIEDmaLoopSubsumptionOptions options) {
645
617
return std::make_unique<AMDAIEDmaLoopSubsumptionPass>(options);
0 commit comments