Skip to content

Commit

Permalink
Replace logical objectfifo produce/consume with core in/out DMA operands
Browse files Browse the repository at this point in the history
  • Loading branch information
jtuyls committed Aug 8, 2024
1 parent 91a13bb commit 7d4f418
Show file tree
Hide file tree
Showing 17 changed files with 260 additions and 418 deletions.
15 changes: 13 additions & 2 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,27 @@ LogicalResult ControlCodeOp::verify() {
// AMDAIE_CoreOp
//===----------------------------------------------------------------------===//


void CoreOp::build(OpBuilder &b, OperationState &result, AMDAIE::TileOp tileOp,
ValueRange inputDmas, ValueRange outputDmas) {
build(b, result, b.getIndexType(), tileOp, inputDmas, outputDmas, nullptr);
}

/// Hardcoded row_offset == 2 -> AIE core rows start from 2
/// TODO(jornt): avoid hardcoding here. Add a device model/identifier to loop up
/// core offset. This will be handled in a follow-up.
void CoreOp::build(OpBuilder &b, OperationState &result, Value coreCol,
Value coreRow) {
Value coreRow, ValueRange inputDmas, ValueRange outputDmas) {
auto rowOffset = b.create<arith::ConstantIndexOp>(b.getUnknownLoc(), 2);
auto row =
b.createOrFold<arith::AddIOp>(b.getUnknownLoc(), rowOffset, coreRow);
auto tileOp = b.create<AMDAIE::TileOp>(b.getUnknownLoc(), coreCol, row);
build(b, result, b.getIndexType(), tileOp, nullptr);
build(b, result, tileOp, inputDmas, outputDmas, nullptr);
}

void CoreOp::build(OpBuilder &b, OperationState &result, Value coreCol,
Value coreRow) {
build(b, result, coreCol, coreRow, {}, {});
}

LogicalResult CoreOp::verify() {
Expand Down
96 changes: 9 additions & 87 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def AMDAIE_ControlCodeOp : AMDAIE_Op<"controlcode", [HasParent<"WorkgroupOp">,
let hasVerifier = 1;
}

def AMDAIE_CoreOp: AMDAIE_Op<"core", [SingleBlock]>, Results<(outs Index)> {
def AMDAIE_CoreOp: AMDAIE_Op<"core", [SingleBlock, AttrSizedOperandSegments]>, Results<(outs Index)> {
let summary = "The AIE core operator";
let description = [{
This operation represents an AIE core op, containing a sequence of operations
Expand All @@ -62,15 +62,20 @@ def AMDAIE_CoreOp: AMDAIE_Op<"core", [SingleBlock]>, Results<(outs Index)> {

let arguments = (
ins Index:$tile,
OptionalAttr<StrAttr>:$link_with
Variadic<Index>:$input_dmas,
Variadic<Index>:$output_dmas,
OptionalAttr<StrAttr>:$link_with
);

let regions = (region SizedRegion<1>:$region);

let assemblyFormat = [{ `(` $tile `)` regions attr-dict }];
let assemblyFormat = [{ `(` $tile `,` `in` `:` `[` $input_dmas `]` `,` `out` `:` `[` $output_dmas `]` `)` regions attr-dict }];

let builders = [
OpBuilder<(ins "mlir::Value":$coreCol, "mlir::Value":$coreRow)>,
OpBuilder<(ins "mlir::Value":$coreCol, "mlir::Value":$coreRow,
"ValueRange":$input_dmas, "ValueRange":$output_dmas)>,
OpBuilder<(ins "TileOp":$tile, "ValueRange":$input_dmas, "ValueRange":$output_dmas)>
];

let extraClassDeclaration = [{
Expand Down Expand Up @@ -438,7 +443,7 @@ def AMDAIE_LogicalObjectFifoAccessOp : AMDAIE_Op<"logicalobjectfifo.access"> {
%alloc = memref.alloc() : memref<8x16xi32, 2>
%0 = amdaie.logicalobjectfifo.from_memref %alloc, {%tile} : memref<8x16xi32, 2>
-> !amdaie.logicalobjectfifo<memref<8x16xi32, 2>>
%core = amdaie.core(%tile) {
%core = amdaie.core(%tile, in : [], out : []) {
%1 = amdaie.logicalobjectfifo.access(%0, Read) :
!amdaie.logicalobjectfifo<memref<8x16xi32, 2>> -> memref<8x16xi32, 2>
```
Expand Down Expand Up @@ -511,47 +516,6 @@ def AMDAIE_LogicalObjectFifoAcquire:
];
}

def AMDAIE_LogicalObjectFifoConsume: AMDAIE_Op<"logicalobjectfifo.consume", []> {
let summary = "Consume a DMA logical objectFifo result.";
let description = [{
Consumes the result of a DMA operation. This is a blocking operation,
waiting for the DMA to produce data. Typically, this operation will reside
inside a `CoreOp` to synchronize with external DMA operations producing data
into the respective core's local memory.

Example:
```mlir
%2 = amdaie.dma_cpy_nd(
%1[%c0, %c0, %c0, %c0] [%c1, %c1, %c8, %c16] [%c128, %c128, %c16, %c1],
%0[%c0, %c0, %c0, %c0] [%c1, %c1, %c8, %c16] [%c128, %c16, %c16, %c1])
: (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>,
!amdaie.logicalobjectfifo<memref<8x16xi32, 1>>)
%3 = amdaie.logicalobjectfifo.consume(%2)
```
}];

let arguments = (
ins Index:$dma
);

let assemblyFormat = [{
`(` $dma `)` attr-dict
}];

let extraClassDeclaration = [{
DmaCpyNdOp getDmaCpyNdOp() {
return dyn_cast<DmaCpyNdOp>(getDma().getDefiningOp());
}
Value getLogicalObjectfifo() {
return dyn_cast<DmaCpyNdOp>(getDma().getDefiningOp()).getTarget();
}
// Return the port of this operation.
LogicalObjectFifoPort getPort() {
return LogicalObjectFifoPort::Consume;
}
}];
}

def AMDAIE_LogicalObjectFifoFromMemrefOp
: AMDAIE_Op<"logicalobjectfifo.from_memref", [Pure]> {
let summary = "Create a logical objectFifo from a memref";
Expand Down Expand Up @@ -654,48 +618,6 @@ def AMDAIE_LogicalObjectFifoLink
}];
}

def AMDAIE_LogicalObjectFifoProduce: AMDAIE_Op<"logicalobjectfifo.produce", []> {
let summary = "Produce a DMA logicalobjectfifo input.";
let description = [{
Produces the input of a DMA operation. This is a release-type operation,
where the DMA will be waiting for the data to be produced. Typically, this
operation will reside inside a `CoreOp` to synchronize with external DMA
operations waiting for data from the respective core's local memory to be
released.

Example:
```mlir
%2 = amdaie.dma_cpy_nd(
%1[%c0, %c0, %c0, %c0] [%c1, %c1, %c8, %c16] [%c128, %c128, %c16, %c1],
%0[%c0, %c0, %c0, %c0] [%c1, %c1, %c8, %c16] [%c128, %c16, %c16, %c1])
: (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>,
!amdaie.logicalobjectfifo<memref<8x16xi32, 1>>)
%3 = amdaie.logicalobjectfifo.produce(%2)
```
}];

let arguments = (
ins Index:$dma
);

let assemblyFormat = [{
`(` $dma `)` attr-dict
}];

let extraClassDeclaration = [{
DmaCpyNdOp getDmaCpyNdOp() {
return dyn_cast<DmaCpyNdOp>(getDma().getDefiningOp());
}
Value getLogicalObjectfifo() {
return dyn_cast<DmaCpyNdOp>(getDma().getDefiningOp()).getSource();
}
// Return the port of this operation.
LogicalObjectFifoPort getPort() {
return LogicalObjectFifoPort::Produce;
}
}];
}

def AMDAIE_LogicalObjectFifoRelease:
AMDAIE_Op<"logicalobjectfifo.release", []> {
let summary = "Semaphore operation to release objects from a logical"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ func.func @core_invalid_terminator() {
%tile = amdaie.tile(%c0, %c0)
// expected-note @+2 {{in custom textual format, the absence of terminator implies 'amdaie.end'}}
// expected-error @+1 {{'amdaie.core' op expects regions to end with 'amdaie.end', found 'arith.constant'}}
%core = amdaie.core(%tile) {
%core = amdaie.core(%tile, in : [], out : []) {
%c1 = arith.constant 0 : index
}
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ func.func @bd_id() {
// CHECK-LABEL: func.func @core
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[TILE_0:.*]] = amdaie.tile(%[[C0]], %[[C0]])
// CHECK: %[[CORE_0:.*]] = amdaie.core(%[[TILE_0]])
// CHECK: %[[CORE_0:.*]] = amdaie.core(%[[TILE_0]], in : [], out : [])
// CHECK: amdaie.end
func.func @core() {
%c0 = arith.constant 0 : index
%tile = amdaie.tile(%c0, %c0)
%core = amdaie.core(%tile) {
%core = amdaie.core(%tile, in : [], out : []) {
amdaie.end
}
return
Expand Down Expand Up @@ -150,17 +150,6 @@ func.func @logicalobjectfifo_acquire(%arg0: !amdaie.logicalobjectfifo<memref<1x1

// -----

// CHECK-LABEL: func.func @logicalobjectfifo_consume
// CHECK: amdaie.dma_cpy_nd
// CHECK: amdaie.logicalobjectfifo.consume
func.func @logicalobjectfifo_consume(%arg0: !amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, %arg1: !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>) {
%0 = amdaie.dma_cpy_nd(%arg0[0, 0, 0, 0] [1, 1, 8, 16] [128, 128, 16, 1], %arg1[0, 0, 0, 0] [1, 1, 8, 16] [128, 16, 16, 1]) : (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>)
amdaie.logicalobjectfifo.consume(%0)
return
}

// -----

// CHECK-LABEL: func.func @logicalobjectfifo_link
// CHECK: %[[DMA0:.+]] = amdaie.circular_dma_cpy_nd
// CHECK: %[[DMA1:.+]] = amdaie.circular_dma_cpy_nd
Expand All @@ -176,17 +165,6 @@ func.func @logicalobjectfifo_link(%arg0: !amdaie.logicalobjectfifo<memref<32x102

// -----

// CHECK-LABEL: func.func @logicalobjectfifo_produce
// CHECK: amdaie.dma_cpy_nd
// CHECK: amdaie.logicalobjectfifo.produce
func.func @logicalobjectfifo_produce(%arg0: !amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, %arg1: !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>) {
%0 = amdaie.dma_cpy_nd(%arg0[0, 0, 0, 0] [1, 1, 8, 16] [128, 128, 16, 1], %arg1[0, 0, 0, 0] [1, 1, 8, 16] [128, 16, 16, 1]) : (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, !amdaie.logicalobjectfifo<memref<8x16xi32, 1>>)
amdaie.logicalobjectfifo.produce(%0)
return
}

// -----

// CHECK-LABEL: func.func @logicalobjectfifo_release
// CHECK: %[[DMA:.+]] = amdaie.dma_cpy_nd
// CHECK: amdaie.logicalobjectfifo.release
Expand Down Expand Up @@ -300,11 +278,11 @@ func.func @workgroup() {
%c1 = arith.constant 1 : index
amdaie.workgroup {
%tile_0_0 = amdaie.tile(%c0, %c0)
%core_0 = amdaie.core(%tile_0_0) {
%core_0 = amdaie.core(%tile_0_0, in : [], out : []) {
amdaie.end
}
%tile_0_1 = amdaie.tile(%c0, %c1)
%core_1 = amdaie.core(%tile_0_1) {
%core_1 = amdaie.core(%tile_0_1, in : [], out : []) {
amdaie.end
}
amdaie.controlcode {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,35 @@

namespace mlir::iree_compiler::AMDAIE {

/// Merge the 'source' core operations in the end of the 'dest' core operation.
void CoreContext::mergeCoreOps(AMDAIE::CoreOp source, AMDAIE::CoreOp dest) {
/// Merge the 'source' and 'dest' core operations into a new `amdaie.core`
/// operation and combine the input and output DMAs.
AMDAIE::CoreOp CoreContext::mergeCoreOps(AMDAIE::CoreOp source,
AMDAIE::CoreOp dest) {
OpBuilder::InsertionGuard guard(rewriter);
Block::iterator insertIt = dest.getBody()->getTerminator()->getIterator();
Block::iterator sourceBegin = source.getBody()->begin();
Block::iterator sourceEnd = source.getBody()->getTerminator()->getIterator();
dest.getBody()->getOperations().splice(
insertIt, source.getBody()->getOperations(), sourceBegin, sourceEnd);
rewriter.moveOpBefore(dest, source);
rewriter.replaceOp(source, dest);
AMDAIE::TileOp tile = dest.getTileOp();
SmallVector<Value> sourceInputDmas = source.getInputDmas();
SmallVector<Value> destInputDmas = dest.getInputDmas();
llvm::SmallSetVector<Value, 4> inputDmas(destInputDmas.begin(),
destInputDmas.end());
inputDmas.insert(sourceInputDmas.begin(), sourceInputDmas.end());
SmallVector<Value> sourceOutputDmas = source.getOutputDmas();
SmallVector<Value> destOutputDmas = dest.getOutputDmas();
llvm::SmallSetVector<Value, 4> outputDmas(destOutputDmas.begin(),
destOutputDmas.end());
outputDmas.insert(sourceOutputDmas.begin(), sourceOutputDmas.end());
rewriter.setInsertionPoint(source);
auto newCoreOp = rewriter.create<AMDAIE::CoreOp>(rewriter.getUnknownLoc(),
tile, inputDmas.takeVector(),
outputDmas.takeVector());
Region &region = newCoreOp.getRegion();
Block *newBlock = rewriter.createBlock(&region);
rewriter.setInsertionPointToStart(newBlock);
rewriter.eraseOp(dest.getBody()->getTerminator());
rewriter.mergeBlocks(dest.getBody(), newBlock);
rewriter.mergeBlocks(source.getBody(), newBlock);
rewriter.eraseOp(dest);
rewriter.eraseOp(source);
return newCoreOp;
}

/// Clone CoreOp and add to or merge with coreContext.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class CoreContext {
if (!existingCoreOp) {
coreMap[coordinate] = coreOp;
} else {
mergeCoreOps(coreOp, existingCoreOp);
coreMap[coordinate] = mergeCoreOps(coreOp, existingCoreOp);
}
}

Expand All @@ -179,9 +179,8 @@ class CoreContext {
}

private:
/// Merge the 'source' core operations in the end of the 'dest' core
/// operation.
void mergeCoreOps(AMDAIE::CoreOp source, AMDAIE::CoreOp dest);
/// Merge the 'source' and 'dest' core operations into a new one.
AMDAIE::CoreOp mergeCoreOps(AMDAIE::CoreOp source, AMDAIE::CoreOp dest);

/// The rewriter to be used.
IRRewriterAndMapper &rewriter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -527,25 +527,26 @@ LogicalResult insertLogicalObjectFifoAccess(ModuleOp moduleOp) {
DenseMap<Value, std::tuple<AMDAIE::LogicalObjectFifoFromMemrefOp,
AMDAIE::MemoryAccess>>
memrefToLogicalObjectFifo;
// First walk to collect consume/produce DMA accesses and map respective
// memrefs to logical objectifos.
coreOp->walk([&](Operation *op) {
// TODO(jornt): can we avoid produce/consume?
if (auto consumeOp = dyn_cast<AMDAIE::LogicalObjectFifoConsume>(op)) {
Value targetMemref =
consumeOp.getDmaCpyNdOp().getTargetObjectFifo().getMemref();
memrefToLogicalObjectFifo[targetMemref] =
std::make_pair(consumeOp.getDmaCpyNdOp().getTargetObjectFifo(),
AMDAIE::MemoryAccess::Read);
} else if (auto produceOp =
dyn_cast<AMDAIE::LogicalObjectFifoProduce>(op)) {
Value sourceMemref =
produceOp.getDmaCpyNdOp().getSourceObjectFifo().getMemref();

SmallVector<AMDAIE::DmaCpyNdOp> inputDmaOps =
llvm::map_to_vector(coreOp.getInputDmas(), [](Value inputDma) {
return cast<AMDAIE::DmaCpyNdOp>(inputDma.getDefiningOp());
});
for (AMDAIE::DmaCpyNdOp inputDmaOp : inputDmaOps) {
Value targetMemref = inputDmaOp.getTargetObjectFifo().getMemref();
memrefToLogicalObjectFifo[targetMemref] = std::make_pair(
inputDmaOp.getTargetObjectFifo(), AMDAIE::MemoryAccess::Read);
}
SmallVector<AMDAIE::DmaCpyNdOp> outputDmaOps =
llvm::map_to_vector(coreOp.getOutputDmas(), [](Value outputDma) {
return cast<AMDAIE::DmaCpyNdOp>(outputDma.getDefiningOp());
});
for (AMDAIE::DmaCpyNdOp outputDmaOp : outputDmaOps) {
Value sourceMemref = outputDmaOp.getSourceObjectFifo().getMemref();
memrefToLogicalObjectFifo[sourceMemref] =
std::make_pair(produceOp.getDmaCpyNdOp().getSourceObjectFifo(),
std::make_pair(outputDmaOp.getSourceObjectFifo(),
AMDAIE::MemoryAccess::Write);
}
});
}

// We maintain a map from AllocOp to LogicalObjectFifoAccessOp in order to
// avoid creating a new LogicalObjectFifoAccessOp for the same AllocOp being
Expand Down
Loading

0 comments on commit 7d4f418

Please sign in to comment.