Skip to content

Commit

Permalink
squashed commit
Browse files Browse the repository at this point in the history
  • Loading branch information
newling committed Aug 20, 2024
1 parent d107305 commit cf909f3
Show file tree
Hide file tree
Showing 23 changed files with 204 additions and 397 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,12 @@

#include <algorithm>
#include <cassert>
#include <list>
#include <set>

#include "AIEDialect.h"
#include "Passes.h"
#include "iree-amd-aie/aie_runtime/iree_aie_router.h"
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/Support/raw_os_ostream.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"

Expand Down
36 changes: 18 additions & 18 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Format.h"
#include "mlir/IR/AsmState.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
Expand Down Expand Up @@ -512,6 +511,7 @@ struct AMDAIEDmaToNpuPass : mlir::OperationPass<DeviceOp> {

instructions[2] = count;
instructions[3] = instructions.size() * sizeof(uint32_t);

ArrayRef<uint32_t> instsArrRef(instructions.data(), instructions.size());
device->setAttr(
"npu_instructions",
Expand All @@ -521,24 +521,24 @@ struct AMDAIEDmaToNpuPass : mlir::OperationPass<DeviceOp> {
IntegerType::get(&getContext(), 32, IntegerType::Unsigned)),
"npu_instructions",
HeapAsmResourceBlob::allocateAndCopyInferAlign(instsArrRef)));
// The LX instructions for the entry point function are already generated by
// the pass hence we can safely delete the function as it is of no use to
// us. A reason to do this is that otherwise it is unceseccarily lowered to
// llvm where it can have a chance to crash in case the argument list is not
// lowerable for reasons such as memref's with dynamic offsets.
auto symName = dyn_cast_or_null<StringAttr>(device->getAttr("sym_name"));

SmallVector<RuntimeSequenceOp> seqOps;
device->walk([&](RuntimeSequenceOp seqOp) {
// if the deviceOp has a symbol name attached to it we look for the
// sequence op that partically matches that symbol, if not we collect all
// sequenceOps.
if (!symName ||
symName.str().find(seqOp.getSymName()->str()) != std::string::npos)
seqOps.push_back(seqOp);
});
// If exactly one entry point function is found we can delete it. For any
// other result we do not make any change.
if (seqOps.size() == 1) seqOps[0].erase();
device->walk([&](RuntimeSequenceOp seqOp) { seqOps.push_back(seqOp); });

if (seqOps.size() > 1) {
device->emitOpError("has ")
<< seqOps.size()
<< " aiex.runtime_sequence ops. Expected no more than 1.";
signalPassFailure();
}

if (seqOps.size() == 1) {
auto seqOp = seqOps[0];
StringRef name = seqOp.getSymName().value();
device->setAttr("runtime_sequence_name",
StringAttr::get(&getContext(), name));
seqOp.erase();
}
}
};

Expand Down
89 changes: 0 additions & 89 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIEXToStandard.cpp

This file was deleted.

1 change: 0 additions & 1 deletion compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ iree_cc_library(
AMDAIELocalizeLocks.cpp
AMDAIENormalizeAddressSpaces.cpp
AMDAIEObjectFifoStatefulTransform.cpp
AMDAIEXToStandard.cpp
DEPS
iree-amd-aie::aie_runtime::iree_aie_runtime_static
::AIEDialectIR
Expand Down
4 changes: 1 addition & 3 deletions compiler/plugins/target/AMD-AIE/aie/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ createAMDAIEPathfinderPass();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIECoreToStandardPass();
std::unique_ptr<OperationPass<xilinx::AIE::DeviceOp>>
createAMDAIEDmaToNpuPass();
std::unique_ptr<OperationPass<ModuleOp>> createAMDAIEXToStandardPass();

void registerAMDAIEAssignBufferAddressesBasic();
void registerAMDAIEAssignBufferDescriptorIDs();
Expand All @@ -44,9 +43,8 @@ void registerAMDAIELocalizeLocks();
void registerAMDAIENormalizeAddressSpaces();
void registerAMDAIEObjectFifoStatefulTransform();
void registerAMDAIERoutePathfinderFlows();

void registerAMDAIEDmaToNpu();
void registerAMDAIEXToStandardPass();

} // namespace mlir::iree_compiler::AMDAIE

#endif // AMDAIE_PASSES_H_
2 changes: 1 addition & 1 deletion compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// CHECK: memref.global "public" @of_fromMem : memref<32xi32>
// CHECK: aie.shim_dma_allocation @of_fromMem(MM2S, 0, 0)
// CHECK: aie.shim_dma_allocation @of_toMem(S2MM, 0, 0)
// CHECK: } {npu_instructions = dense_resource<npu_instructions> : tensor<64xui32>}
// CHECK: } {npu_instructions = dense_resource<npu_instructions> : tensor<64xui32>, runtime_sequence_name = "sequence"}

// CHECK: {-#
// CHECK: dialect_resources: {
Expand Down

This file was deleted.

23 changes: 0 additions & 23 deletions compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,3 @@ module {
} {sym_name = "explicit_sym_name_0"}
}

// -----

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK: memref.global "public" @toMem : memref<16xi32>
// CHECK: func.func @pretend_microkernel
// CHECK: aiex.runtime_sequence @explicit_sym_name
// CHECK: aie.shim_dma_allocation @toMem(MM2S, 1, 1)

module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @pretend_microkernel(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
return
}

aiex.runtime_sequence @explicit_sym_name(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
}
aie.shim_dma_allocation @toMem (MM2S, 1, 1)
} {sym_name = "wrong_sym_name"}
}

Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ struct AMDAIESession
AMDAIE::registerAMDAIEObjectFifoStatefulTransform();
AMDAIE::registerAMDAIERoutePathfinderFlows();
AMDAIE::registerAMDAIEDmaToNpu();
AMDAIE::registerAMDAIEXToStandardPass();
AMDAIE::registerAIRConversionPasses();
AMDAIE::registerAIRTransformPasses();
aievec::registerConvertAIEVecToLLVMPass();
Expand Down
93 changes: 39 additions & 54 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "aievec/XLLVMDialect.h"
#include "air/Dialect/AIR/AIRDialect.h"
#include "air/Dialect/AIRRt/AIRRtDialect.h"
#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree-amd-aie/IR/AMDAIEDialect.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
Expand All @@ -27,9 +26,12 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
#include "mlir/Conversion/Passes.h"
#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/EmitC/IR/EmitC.h"
#include "mlir/Dialect/Func/Extensions/AllExtensions.h"
Expand Down Expand Up @@ -75,42 +77,27 @@ static llvm::cl::opt<std::string> clEnableAMDAIEUkernels(
"unprefixed microkernels to enable, e.g. `matmul`."),
llvm::cl::init("none"));

// Utility to find aie.device Op corresponding to the export Op.
// For example, we have
// hal.executable.variant {
// hal.executable.export symbol1
// hal.executable.export symbol2
// module {
// aie.device {
// ...
// aiex.runtime_sequence symbol1
// }
// aie.device {
// ...
// aiex.runtime_sequence symbol2
// }
// }
// }
// Hence we need to find the aiex.runtime_sequence that coresponds to the export
// op symbol and return its parent aie.device Op. This is what we will pass to
// the `aie2xclbin` tool for artifact generation per entry point.
static xilinx::AIE::DeviceOp getDeviceOpFromEntryPoint(ModuleOp moduleOp,
StringRef exportOpName) {
static xilinx::AIE::DeviceOp getDeviceOpWithName(ModuleOp moduleOp,
StringRef targetName) {
xilinx::AIE::DeviceOp deviceOp;

moduleOp.walk([&](xilinx::AIEX::RuntimeSequenceOp sequenceOp) {
if (sequenceOp.getSymName() == exportOpName) {
deviceOp =
dyn_cast_or_null<xilinx::AIE::DeviceOp>(sequenceOp->getParentOp());
return WalkResult::interrupt();
}
return WalkResult::advance();
uint32_t nDeviceOpsVisited = 0;
moduleOp.walk([&](xilinx::AIE::DeviceOp d) {
++nDeviceOpsVisited;
// This attribute should've been set in the dma-to-npu pass.
auto maybeName = d->getAttrOfType<StringAttr>("runtime_sequence_name");
if (!maybeName) return WalkResult::advance();
auto name = maybeName.getValue();
if (name != targetName) return WalkResult::advance();
deviceOp = d;
return WalkResult::interrupt();
});
if (!deviceOp) {
moduleOp.emitError()
<< "failed to find aie.device containing func.func with symbol "
<< exportOpName;
}

if (!deviceOp)
moduleOp.emitError() << "visited " << nDeviceOpsVisited
<< " aie.device ops, and failed to find one with name "
<< targetName;

return deviceOp;
}

Expand Down Expand Up @@ -291,7 +278,7 @@ LogicalResult AIETargetBackend::serializeExecutable(
}

StringRef exportOpName = exportOp.getSymName();
deviceOps.push_back(getDeviceOpFromEntryPoint(moduleOp, exportOpName));
deviceOps.push_back(getDeviceOpWithName(moduleOp, exportOpName));

// The xclbin kernel name, appended with instance name suffix (`:MLIRAIEV1`,
// 10 chars) is required by the xclbinutil to have a length smaller or equal
Expand Down Expand Up @@ -334,21 +321,8 @@ LogicalResult AIETargetBackend::serializeExecutable(
uint64_t ordinal = entryPointOrdinals.at(entryPointNames[i]);

entryPointNamesFb[ordinal] = entryPointNames[i];

SmallString<128> inputMlirPath(workDir);
llvm::sys::path::append(inputMlirPath,
entryPointNamesFb[ordinal] + ".aiecc.mlir");

std::string errorMessage;
{
auto inputMlirOut = openOutputFile(inputMlirPath, &errorMessage);
if (!inputMlirOut) {
return moduleOp.emitOpError()
<< "Failed to write MLIR: " << errorMessage;
}
deviceOps[i].print(inputMlirOut->os(), OpPrintingFlags().useLocalScope());
inputMlirOut->keep();
}

// we add the entry point to the working directory for xclbin artifacts if
// there are multiple entry points so that we dont overwrite the xclbinutil
// generated artifacts e.g kernels.json, for different entry points which
Expand All @@ -375,11 +349,22 @@ LogicalResult AIETargetBackend::serializeExecutable(
ParserConfig pcfg(variantOp->getContext());
llvm::SourceMgr srcMgr;

OwningOpRef<ModuleOp> owningModuleOp =
parseSourceFile<ModuleOp>(inputMlirPath, srcMgr, pcfg);
// Move DeviceOp into its own ModuleOp, if there are multiple DeviceOps.
// Required as core-to-standard pass will move all ops in DeviceOps into
// the parent ModuleOp, so if they're not separated, core code between
// DeviceOps gets incorrectly concatenated. There's probably a simpler
// workaround, to be reviewed as we continue to remove layers of crust.
if (deviceOps.size() > 1) {
OpBuilder opBuilder(deviceOps[i].getContext());
auto moduleWithOneDevice =
opBuilder.create<ModuleOp>(deviceOps[i].getLoc());
opBuilder.setInsertionPointToStart(moduleWithOneDevice.getBody());
Operation *repl = opBuilder.clone(*deviceOps[i].getOperation());
deviceOps[i] = cast<xilinx::AIE::DeviceOp>(repl);
}

if (failed(aie2xclbin(
/*ctx=*/variantOp->getContext(), /*moduleOp=*/*owningModuleOp,
/*ctx=*/variantOp->getContext(), deviceOps[i],
/*outputNPU=*/npuInstPath.str().str(),
/*outputXCLBin=*/xclbinPath.str().str(),
/*printIRBeforeAll=*/options.aie2xclbinPrintIrBeforeAll,
Expand Down
Loading

0 comments on commit cf909f3

Please sign in to comment.