diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEFusePackIntoLoop.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEFusePackIntoLoop.cpp index cead3226e..1d087b373 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEFusePackIntoLoop.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEFusePackIntoLoop.cpp @@ -128,23 +128,34 @@ void AMDAIEFusePackIntoLoopPass::runOnOperation() { getTensorExtractSliceDefiningOp(operand); if (!failed(sliceOp)) { sliceOps.push_back(sliceOp.value()); + } else { + sliceOps.push_back({}); } } - if (sliceOps.empty()) { - LLVM_DEBUG(llvm::dbgs() << "----- Pack ops are already fused or no slice " - "ops were found.-----\n"); - return; - } - // Materialize each slice of the producer in place. - for (auto sliceOp : sliceOps) { - std::optional fusedProducer = - scf::tileAndFuseProducerOfSlice(rewriter, sliceOp, - MutableArrayRef(&loops, 1)); - if (!fusedProducer) { - funcOp->emitOpError("Failed to fuse pack ops into for loop."); - return signalPassFailure(); + for (auto iter : llvm::enumerate(sliceOps)) { + tensor::ExtractSliceOp sliceOp = iter.value(); + if (!sliceOp) { + Value operand = genericOp.getOperand(iter.index()); + auto parent = + dyn_cast_if_present(operand.getDefiningOp()); + if (parent) { + // Move `parent` to start of the block that generic is in: + Block *block = genericOp->getBlock(); + Operation *firstOpInBlock = &block->front(); + rewriter.moveOpBefore(parent, firstOpInBlock); + // return signalPassFailure(); + } + continue; + } else { + std::optional fusedProducer = + scf::tileAndFuseProducerOfSlice(rewriter, sliceOp, + MutableArrayRef(&loops, 1)); + if (!fusedProducer) { + funcOp->emitOpError("Failed to fuse pack ops into for loop."); + return signalPassFailure(); + } } } }