diff --git a/compiler/plugins/target/AMD-AIE/aie/AIEOps.td b/compiler/plugins/target/AMD-AIE/aie/AIEOps.td index 87fd27e22..8d8a4f929 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIEOps.td +++ b/compiler/plugins/target/AMD-AIE/aie/AIEOps.td @@ -23,6 +23,7 @@ class AIE_Op traits = []> : Op; def AIE_DeviceOp: AIE_Op<"device", [ + HasParent<"mlir::ModuleOp">, SymbolTable, SingleBlock, NoTerminator, IsolatedFromAbove ]> { let summary = "Define an AIE design targetting a complete device"; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index fcba2923b..515cc5be1 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -349,6 +349,20 @@ LogicalResult AIETargetBackend::serializeExecutable( ParserConfig pcfg(variantOp->getContext()); llvm::SourceMgr srcMgr; + // Move DeviceOp into its own ModuleOp, if there are multiple DeviceOps. + // Required as core-to-standard pass will move all ops in DeviceOps into + // the parent ModuleOp, so if they're not separated, core code between + // DeviceOps gets incorrectly concatenated. There's probably a simpler + // workaround, to be reviewed as we continue to remove layers of crust. + if (deviceOps.size() > 1) { + OpBuilder opBuilder(deviceOps[i].getContext()); + auto moduleWithOneDevice = + opBuilder.create(deviceOps[i].getLoc()); + opBuilder.setInsertionPointToStart(moduleWithOneDevice.getBody()); + Operation *repl = opBuilder.clone(*deviceOps[i].getOperation()); + deviceOps[i] = cast(repl); + } + if (failed(aie2xclbin( /*ctx=*/variantOp->getContext(), deviceOps[i], /*outputNPU=*/npuInstPath.str().str(), diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index e7285642f..b7c2b1578 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -647,7 +647,10 @@ static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, bool printIRBeforeAll, bool printIRAfterAll, bool printIRModuleScope, bool timing, const Path &tempDir) { - AIE::DeviceOp copy = deviceOp.clone(); + + auto copy = cast(deviceOp.getParentOp()->clone()); + deviceOp = *copy.getOps().begin(); + std::string errorMessage; PassManager passManager(context, AIE::DeviceOp::getOperationName()); applyConfigToPassManager(passManager, printIRBeforeAll, printIRAfterAll, @@ -655,13 +658,13 @@ static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, passManager.addPass( mlir::iree_compiler::AMDAIE::createAMDAIEPathfinderPass()); - if (failed(passManager.run(copy))) { + if (failed(passManager.run(deviceOp))) { llvm::errs() << "failed to run passes to prepare for XCLBin generation"; return failure(); } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToCDODirect( - copy, tempDir.string()))) { + deviceOp, tempDir.string()))) { llvm::errs() << "failed to emit CDO"; return failure(); } @@ -1032,16 +1035,12 @@ static LogicalResult generateUnifiedObject( bool timing, bool useChess, bool verbose, Path tempDir, std::optional vitisDir, const std::string &targetArch, Path peanoDir) { - // TODO(newling) to avoid nesting the DeviceOp in a ModuleOp, - // we need to make changes to core-to-standard-pass. - ModuleOp moduleWithOneDevice; - { - OpBuilder opBuilder(deviceOp.getContext()); - moduleWithOneDevice = opBuilder.create(deviceOp.getLoc()); - opBuilder.setInsertionPointToStart(moduleWithOneDevice.getBody()); - opBuilder.clone(*deviceOp.getOperation()); - } - PassManager pm(context, moduleWithOneDevice.getOperationName()); + assert(deviceOp->getParentOp() && isa(deviceOp->getParentOp()) && + "DeviceOp must be in a module parent"); + + ModuleOp moduleOpCopy = cast(deviceOp->getParentOp()).clone(); + + PassManager pm(context, moduleOpCopy.getOperationName()); applyConfigToPassManager(pm, printIRBeforeAll, printIRAfterAll, printIRModuleScope, timing); @@ -1059,12 +1058,11 @@ static LogicalResult generateUnifiedObject( llvm::outs() << "\n"; } - // AIE::DeviceOp copy = deviceOp.clone(); - if (failed(pm.run(moduleWithOneDevice))) + if (failed(pm.run(moduleOpCopy))) return deviceOp.emitOpError("Failed to lower to LLVM"); llvm::LLVMContext llvmContext; - auto llvmModule = translateModuleToLLVMIR(moduleWithOneDevice, llvmContext); + auto llvmModule = translateModuleToLLVMIR(moduleOpCopy, llvmContext); if (!llvmModule) { return deviceOp.emitOpError("Failed to translate module to LLVMIR"); } @@ -1127,7 +1125,8 @@ static LogicalResult generateUnifiedObject( return failure(); } } - moduleWithOneDevice->erase(); + + moduleOpCopy->erase(); return success(); }