Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve compile time (AIRRtToNpuPass) #551

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions mlir/lib/Conversion/AIRRtToNpuPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1162,10 +1162,27 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
SmallVector<AIEX::NpuDmaMemcpyNdOp> dmas;
f.walk([&](AIEX::NpuDmaMemcpyNdOp dma) { dmas.push_back(dma); });
auto d = f->getParentOfType<AIE::DeviceOp>();

// Performance optimization: instead of repeating calls to
// getAllocOpForSymbol with the same symbol name, cache the result of the
// first call and use the cache for subsequent calls. This dramatically
// improves compile time for some designs.
llvm::DenseMap<StringRef, std::optional<AIE::ShimDMAAllocationOp>>
allocationCache;
auto getAllocOpForSymbolWithCaching = [&](StringRef sym_name) {
auto iter = allocationCache.find(sym_name);
if (iter != allocationCache.end()) {
return iter->second;
}
auto infaOp = getAllocOpForSymbol(d, sym_name);
allocationCache.insert({sym_name, infaOp});
return infaOp;
};

if (!d)
return;
for (auto dma : dmas) {
if (auto infoOp = getAllocOpForSymbol(d, dma.getMetadata())) {
if (auto infoOp = getAllocOpForSymbolWithCaching(dma.getMetadata())) {
if (infoOp->getChannelDir() == AIE::DMAChannelDir::S2MM) {
// Found dma op copying results to host
OpBuilder builder(dma);
Expand All @@ -1189,7 +1206,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
if (auto sync = dyn_cast<AIEX::NpuSyncOp>(op))
previsouSyncs.push_back(sync);
else if (auto dma = dyn_cast<AIEX::NpuDmaMemcpyNdOp>(op)) {
auto infoOp = getAllocOpForSymbol(d, dma.getMetadata());
auto infoOp = getAllocOpForSymbolWithCaching(dma.getMetadata());
if (infoOp && infoOp->getChannelDir() == AIE::DMAChannelDir::S2MM &&
!previsouSyncs.empty()) {
for (auto prevSync : previsouSyncs)
Expand All @@ -1209,12 +1226,29 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
std::map<int, int> chanToIdMap;
AIE::DeviceOp d = nullptr;
blk->walk([&](AIE::DeviceOp op) { d = op; });

// Performance optimization: instead of repeating calls to
// getAllocOpForSymbol with the same symbol name, cache the result of the
// first call and use the cache for subsequent calls. This dramatically
// improves compile time for some designs.
llvm::DenseMap<StringRef, std::optional<AIE::ShimDMAAllocationOp>>
allocationCache;
auto getAllocOpForSymbolWithCaching = [&](StringRef sym_name) {
auto iter = allocationCache.find(sym_name);
if (iter != allocationCache.end()) {
return iter->second;
}
auto infaOp = getAllocOpForSymbol(d, sym_name);
allocationCache.insert({sym_name, infaOp});
return infaOp;
};

blk->walk([&](Operation *op) {
if (auto dma = dyn_cast<AIEX::NpuDmaMemcpyNdOp>(op)) {
OpBuilder builder(dma);
int col = -1;
if (d) {
if (auto infoOp = getAllocOpForSymbol(d, dma.getMetadata())) {
if (auto infoOp = getAllocOpForSymbolWithCaching(dma.getMetadata())) {
col = infoOp->getCol();
} else if (auto objFifoCreateOp =
getObjectFifoCreateOpForSymbol(d, dma.getMetadata())) {
Expand Down
Loading