-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[compiler] Implement DMA code specialization (#55)
Up until now we've been performing memory copies on the compute cores using `memref.copy` which is very slow. This PR instead starts making use of the DMA cores DMA hardware to perform any copying from L1 to L3. As only the DMA core can execute these instructions and only the compute core can execute kernels, this PR also implements a "dma code specialization" pass which copies the original compute core containing DMA instructions, removes all compute instructions from the copy, and inserts required synchronization between the compute and DMA functions.
- Loading branch information
Showing
8 changed files
with
150 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
86 changes: 86 additions & 0 deletions
86
codegen/compiler/src/Quidditch/Dialect/Snitch/Transforms/SpecializeDMACode.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#include "Passes.h" | ||
|
||
#include "Quidditch/Dialect/Snitch/IR/QuidditchSnitchDialect.h" | ||
#include "Quidditch/Dialect/Snitch/IR/QuidditchSnitchOps.h" | ||
#include "mlir/Interfaces/FunctionInterfaces.h" | ||
|
||
namespace quidditch::Snitch { | ||
#define GEN_PASS_DEF_SPECIALIZEDMACODEPASS | ||
#include "Quidditch/Dialect/Snitch/Transforms/Passes.h.inc" | ||
} // namespace quidditch::Snitch | ||
|
||
namespace { | ||
class SpecializeDMACode | ||
: public quidditch::Snitch::impl::SpecializeDMACodePassBase< | ||
SpecializeDMACode> { | ||
public: | ||
using Base::Base; | ||
|
||
protected: | ||
void runOnOperation() override; | ||
|
||
private: | ||
}; | ||
|
||
} // namespace | ||
|
||
using namespace mlir; | ||
using namespace quidditch::Snitch; | ||
|
||
static void removeComputeOps(FunctionOpInterface dmaCode) { | ||
dmaCode->walk([&](MemRefMicrokernelOp operation) { | ||
// TODO: These can have results in theory which would make this crash! | ||
operation->erase(); | ||
}); | ||
} | ||
|
||
static void removeDmaCode(FunctionOpInterface computeCode) { | ||
SmallVector<Operation *> toDelete; | ||
computeCode->walk([&](Operation *operation) { | ||
if (isa<WaitForDMATransfersOp, StartDMATransferOp>(operation)) | ||
toDelete.push_back(operation); | ||
}); | ||
for (Operation *op : toDelete) { | ||
op->dropAllUses(); | ||
op->erase(); | ||
} | ||
} | ||
|
||
static void insertBarriers(FunctionOpInterface function) { | ||
function->walk([](Operation *operation) { | ||
OpBuilder builder(operation->getContext()); | ||
if (isa<WaitForDMATransfersOp>(operation)) | ||
// Barrier needs to be after the wait to signal to compute ops the | ||
// transfer is done. | ||
builder.setInsertionPointAfter(operation); | ||
else if (isa<StartDMATransferOp>(operation)) | ||
// Barrier needs to be before the transfer for compute ops to signal | ||
// that a computation is done. | ||
// TODO: This is overly conservative and could be optimized somewhere. | ||
builder.setInsertionPoint(operation); | ||
else | ||
return; | ||
|
||
builder.create<BarrierOp>(operation->getLoc()); | ||
}); | ||
} | ||
|
||
void SpecializeDMACode::runOnOperation() { | ||
auto *dialect = getContext().getLoadedDialect<QuidditchSnitchDialect>(); | ||
SymbolTable table(getOperation()); | ||
for (auto function : getOperation().getOps<FunctionOpInterface>()) { | ||
if (function.isDeclaration()) | ||
continue; | ||
|
||
insertBarriers(function); | ||
|
||
FunctionOpInterface clone = function.clone(); | ||
clone.setName((clone.getName() + "$dma").str()); | ||
table.insert(clone, function->getIterator()); | ||
dialect->getDmaSpecializationAttrHelper().setAttr( | ||
function, FlatSymbolRefAttr::get(clone)); | ||
|
||
removeComputeOps(clone); | ||
removeDmaCode(function); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters