diff --git a/compiler/plugins/target/AMD-AIE/aie/AIEDialect.cpp b/compiler/plugins/target/AMD-AIE/aie/AIEDialect.cpp index af05998aa..fa155ceb0 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIEDialect.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AIEDialect.cpp @@ -577,4 +577,17 @@ LogicalResult DMABDOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// AIE_FlowOp +//===----------------------------------------------------------------------===// + +void FlowOp::build(OpBuilder &b, OperationState &result, Value source, + mlir::iree_compiler::AMDAIE::StrmSwPortType source_bundle, + uint8_t source_channel, Value dest, + mlir::iree_compiler::AMDAIE::StrmSwPortType dest_bundle, + uint8_t dest_channel) { + build(b, result, source, source_bundle, source_channel, dest, dest_bundle, + dest_channel, nullptr); +} + } // namespace xilinx::AIE diff --git a/compiler/plugins/target/AMD-AIE/aie/AIEOps.td b/compiler/plugins/target/AMD-AIE/aie/AIEOps.td index 8d8a4f929..4e21442fd 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AIEOps.td +++ b/compiler/plugins/target/AMD-AIE/aie/AIEOps.td @@ -165,12 +165,22 @@ def AIE_FlowOp: AIE_Op<"flow"> { ConfinedAttr]>:$source_channel, Index:$dest, StrmSwPortTypeAttr:$dest_bundle, - ConfinedAttr]>:$dest_channel + ConfinedAttr]>:$dest_channel, + OptionalAttr:$symbol ); let summary = "A logical circuit-switched connection between cores"; let assemblyFormat = [{ `(` $source `,` $source_bundle `:` $source_channel `,` $dest `,` $dest_bundle `:` $dest_channel `)` attr-dict }]; + let builders = [ + OpBuilder<( + ins "::mlir::Value":$source, + "::mlir::iree_compiler::AMDAIE::StrmSwPortType":$source_bundle, + "uint8_t":$source_channel, + "::mlir::Value":$dest, + "::mlir::iree_compiler::AMDAIE::StrmSwPortType":$dest_bundle, + "uint8_t":$dest_channel)> + ]; } def AIE_AMSelOp: AIE_Op<"amsel", [ diff --git a/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp b/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp index 1f52fb8a4..2b1158660 100644 --- a/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp +++ b/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp @@ -8,6 +8,7 @@ #include "Passes.h" #include "iree-amd-aie/aie_runtime/iree_aie_runtime.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" @@ -114,24 +115,6 @@ std::optional getOptionalSharedTile(ObjectFifoLinkOp op) { } // namespace -class DMAChannelAnalysis { - DenseMap producerChannelsPerTile; - DenseMap consumerChannelsPerTile; - - public: - DMAChannelAnalysis() {} - - /// Given an AIE tile, returns its next usable producer channel. - SwitchDMAConnection getProducerDMAChannel(Value tile) { - return {DMAChannelDir::MM2S, producerChannelsPerTile[tile]++}; - } - - /// Given an AIE tile, returns its next usable consumer channel. - SwitchDMAConnection getConsumerDMAChannel(Value tile) { - return {DMAChannelDir::S2MM, consumerChannelsPerTile[tile]++}; - } -}; - enum SharedMemoryDirection { LHS = -1, RHS = 1, NONE = 0 }; /// Retrieve ObjectFifoLinkOp of ObjectFifoCreateOp, @@ -800,55 +783,78 @@ void createBuffersAndLocks( /// Translate ObjectFifoCreateOp ops into routing primitives (Flows) and DMA /// primitives (DMABD, DMAStart, Buffer, UseLock). -void createFlowsAndTileDMAs( +LogicalResult createFlowsAndTileDMAs( OpBuilder builder, DeviceOp device, ObjectFifoCreateOp producer, - const std::vector &consumers, - DMAChannelAnalysis &dmaAnalysis, + std::vector &consumers, const DenseMap> &locksPerFifo, const DenseMap &objFifoLinks, - const DenseMap> &buffersPerFifo) { + const DenseMap> &buffersPerFifo, + const DenseMap> &symbolToFlowOps) { AMDAIEDeviceModel deviceModel = getDeviceModel(static_cast(device.getDevice())); auto createDMA = [&deviceModel, &device, &builder, &locksPerFifo, &objFifoLinks, &buffersPerFifo]( ObjectFifoCreateOp op, DMAChannelDir channelDir, - int channelIndex, BDDimLayoutArrayAttr dims) { + uint8_t channelIndex, BDDimLayoutArrayAttr dims) { TileOp producerOp = cast(op.getProducerTile().getDefiningOp()); - if (deviceModel.isShimTile(producerOp.getCol(), producerOp.getRow())) + if (deviceModel.isShimTile(producerOp.getCol(), producerOp.getRow())) { return; - else if (deviceModel.isMemTile(producerOp.getCol(), producerOp.getRow())) + } else if (deviceModel.isMemTile(producerOp.getCol(), + producerOp.getRow())) { createMemTileDMA(device, builder, op, channelDir, channelIndex, dims, objFifoLinks, buffersPerFifo, locksPerFifo); - else + } else { createAMDAIETileDMA(device, builder, op, channelDir, channelIndex, dims, objFifoLinks, buffersPerFifo, locksPerFifo); + } }; - // create producer tile DMA + // Collect producer and consumer DMA channels + if (!symbolToFlowOps.contains(producer.getSymName())) { + return producer.emitOpError() + << "symbol name not found in symbol to flow ops map"; + } + SmallVector flowOps = symbolToFlowOps.at(producer.getSymName()); + SmallVector producerChannelsVec = llvm::map_to_vector( + flowOps, [](FlowOp flowOp) { return flowOp.getSourceChannel(); }); + llvm::SmallSetVector producerChannels(producerChannelsVec.begin(), + producerChannelsVec.end()); + if (producerChannels.size() != 1) + return producer.emitOpError() << "expected a single producer channel"; + DenseMap consumerChannelsMap; + for (FlowOp flowOp : flowOps) + consumerChannelsMap[flowOp.getDest()] = flowOp.getDestChannel(); + if (consumerChannelsMap.size() != consumers.size()) { + return producer.emitOpError() << "expected same number of consumers as the " + "number of consumer objectfifos provided"; + } + + // create producer tile DMA TileOp producerProducerTileOp = cast(producer.getProducerTile().getDefiningOp()); - SwitchDMAConnection producerChan = - dmaAnalysis.getProducerDMAChannel(producer.getProducerTile()); - createDMA(producer, static_cast(producerChan.direction), - producerChan.channel, producer.getDimensionsToStreamAttr()); + createDMA(producer, DMAChannelDir::MM2S, producerChannels[0], + producer.getDimensionsToStreamAttr()); // generate objectFifo allocation info OpBuilder::InsertionGuard g(builder); builder.setInsertionPoint(&device.getBody()->back()); if (deviceModel.isShimTile(producerProducerTileOp.getCol(), - producerProducerTileOp.getRow())) + producerProducerTileOp.getRow())) { builder.create( - builder.getUnknownLoc(), producer.getName(), - static_cast(producerChan.direction), - producerChan.channel, producerProducerTileOp.getCol()); + builder.getUnknownLoc(), producer.getName(), DMAChannelDir::MM2S, + producerChannels[0], producerProducerTileOp.getCol()); + } + + for (ObjectFifoCreateOp consumer : consumers) { + if (!consumerChannelsMap.contains(consumer.getProducerTile())) { + return consumer.emitOpError() + << "did not find producer tile in consumerChannelsMap"; + } + uint8_t consumerChannel = consumerChannelsMap[consumer.getProducerTile()]; - for (auto consumer : consumers) { // create consumer tile DMA - SwitchDMAConnection consumerChan = - dmaAnalysis.getConsumerDMAChannel(consumer.getProducerTile()); BDDimLayoutArrayAttr consumerDims = consumer.getDimensionsFromStreamPerConsumer()[0]; - createDMA(consumer, static_cast(consumerChan.direction), - consumerChan.channel, consumerDims); + createDMA(consumer, DMAChannelDir::S2MM, consumerChannel, consumerDims); // generate objectFifo allocation info OpBuilder::InsertionGuard gg(builder); builder.setInsertionPoint(&device.getBody()->back()); @@ -856,22 +862,13 @@ void createFlowsAndTileDMAs( TileOp consumerProducerTileOp = cast(consumer.getProducerTile().getDefiningOp()); if (deviceModel.isShimTile(consumerProducerTileOp.getCol(), - consumerProducerTileOp.getRow())) + consumerProducerTileOp.getRow())) { builder.create( - builder.getUnknownLoc(), producer.getName(), - static_cast(consumerChan.direction), - consumerChan.channel, consumerProducerTileOp.getCol()); - - // create flow - { - OpBuilder::InsertionGuard ggg(builder); - builder.setInsertionPointAfter(producer); - builder.create(builder.getUnknownLoc(), - producer.getProducerTile(), WireBundle::DMA, - producerChan.channel, consumer.getProducerTile(), - WireBundle::DMA, consumerChan.channel); + builder.getUnknownLoc(), producer.getName(), DMAChannelDir::S2MM, + consumerChannel, consumerProducerTileOp.getCol()); } } + return success(); } namespace mlir::iree_compiler::AMDAIE { @@ -905,7 +902,6 @@ struct AMDAIEObjectFifoStatefulTransformPass : mlir::OperationPass { void runOnOperation() override { DeviceOp device = getOperation(); - DMAChannelAnalysis dmaAnalysis; OpBuilder builder = OpBuilder::atBlockEnd(device.getBody()); // maps each objFifo to its corresponding buffer DenseMap> buffersPerFifo; @@ -926,20 +922,32 @@ struct AMDAIEObjectFifoStatefulTransformPass : mlir::OperationPass { llvm::to_vector(device.getOps()); for (ObjectFifoCreateOp createOp : createFifoOps) { if (auto _shareDirection = NONE; - !requiresDMAs(createOp, _shareDirection, splitBecauseLink)) + !requiresDMAs(createOp, _shareDirection, splitBecauseLink)) { continue; + } splitFifo(device, createOp, builder, splitFifos); } - for (ObjectFifoCreateOp createOp : device.getOps()) + for (ObjectFifoCreateOp createOp : device.getOps()) { createBuffersAndLocks(builder, device, createOp, splitBecauseLink, objFifoLinks, buffersPerFifo, locksPerFifo); + } + + DenseMap> symbolToFlowOps; + device.walk([&](FlowOp op) { + std::optional symbolAttr = op.getSymbol(); + if (symbolAttr) symbolToFlowOps[symbolAttr.value()].push_back(op); + }); // Only the objectFifos we split above require DMA communication; the others // rely on shared memory and share the same buffers. - for (auto &[producer, consumers] : splitFifos) - createFlowsAndTileDMAs(builder, device, producer, consumers, dmaAnalysis, - locksPerFifo, objFifoLinks, buffersPerFifo); + for (auto &[producer, consumers] : splitFifos) { + if (failed(createFlowsAndTileDMAs(builder, device, producer, consumers, + locksPerFifo, objFifoLinks, + buffersPerFifo, symbolToFlowOps))) { + return signalPassFailure(); + } + } // Replace ops for (auto coreOp : device.getOps()) { diff --git a/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_dma.mlir b/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_dma.mlir index 65036eca3..0cf9d9c55 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_dma.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_dma.mlir @@ -99,6 +99,7 @@ module @aie2_cyclostatic_dma { %tile22 = aie.tile(2, 2) // producer tile %tile83 = aie.tile(8, 3) // consumer tile %buf83 = aie.buffer(%tile83) {sym_name = "buf83"} : memref<4xi32> + aie.flow(%tile22, DMA : 0, %tile83, DMA : 0) {symbol = @fifo} // ObjectFifo that can hold 4 memrefs, populated by tile22 and // consumed by tile23 aie.objectfifo @fifo (%tile22, {%tile83}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_l2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_l2.mlir index 063bbd18c..1654a257d 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_l2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/AIE2_cyclostatic_l2.mlir @@ -164,6 +164,8 @@ module @aie2_cyclostatic_l2 { %memtile = aie.tile(2, 1) // mem tile %tile83 = aie.tile(8, 3) // consumer tile %buf83 = aie.buffer(%tile83) {sym_name = "buf83"} : memref<1xi32> + aie.flow(%tile22, DMA : 0, %memtile, DMA : 0) {symbol = @fifo0} + aie.flow(%memtile, DMA : 0, %tile83, DMA : 0) {symbol = @fifo1} // ObjectFifo that can hold 4 memref<1xi32>s, populated by tile22 and // consumed by tile23 aie.objectfifo @fifo0 (%tile22, {%memtile}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/allocation_info_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/allocation_info_test.mlir index 9aef2676c..65ea008e8 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/allocation_info_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/allocation_info_test.mlir @@ -106,6 +106,10 @@ module @alloc { %tile20 = aie.tile(2, 0) %tile22 = aie.tile(2, 2) %tile23 = aie.tile(2, 3) + aie.flow(%tile20, DMA : 0, %tile22, DMA : 0) {symbol = @of_in_0} + aie.flow(%tile22, DMA : 0, %tile20, DMA : 0) {symbol = @of_out_0} + aie.flow(%tile20, DMA : 1, %tile23, DMA : 0) {symbol = @of_in_1} + aie.flow(%tile23, DMA : 0, %tile20, DMA : 1) {symbol = @of_out_1} aie.objectfifo @of_in_0 (%tile20, {%tile22}, 2 : i32) : !aie.objectfifo> aie.objectfifo @of_out_0 (%tile22, {%tile20}, 2 : i32) : !aie.objectfifo> aie.objectfifo @of_in_1 (%tile20, {%tile23}, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE1.mlir b/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE1.mlir index 109b6bff3..222df9204 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE1.mlir @@ -60,6 +60,8 @@ module @elementGenerationAIE1 { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) %tile33 = aie.tile(3, 3) + aie.flow(%tile12, DMA : 0, %tile33, DMA : 0) {symbol = @of1} + aie.flow(%tile12, DMA : 1, %tile13, DMA : 0) {symbol = @of0} // In the shared memory case, the number of elements does not change. aie.objectfifo @of0 (%tile12, {%tile13}, 4 : i32) : !aie.objectfifo> // In the non-adjacent memory case, the number of elements depends on the max amount acquired by diff --git a/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE2.mlir index 0997616bb..4cfd3507c 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/base_test_AIE2.mlir @@ -56,14 +56,16 @@ // CHECK: } module @elementGenerationAIE2 { - aie.device(xcve2302) { + aie.device(xcve2302) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) %tile33 = aie.tile(3, 3) + aie.flow(%tile12, DMA : 0, %tile33, DMA : 0) {symbol = @of1} + aie.flow(%tile12, DMA : 1, %tile13, DMA : 0) {symbol = @of0} // In the shared memory case, the number of elements does not change. aie.objectfifo @of0 (%tile12, {%tile13}, 4 : i32) : !aie.objectfifo> // In the non-adjacent memory case, the number of elements depends on the max amount acquired by // the processes running on each core (here nothing is specified so it cannot be derived). aie.objectfifo @of1 (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo> - } + } } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/broadcast_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/broadcast_test.mlir index 7b4b42253..70eb759ea 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/broadcast_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/broadcast_test.mlir @@ -247,6 +247,10 @@ module @broadcast { %tile14 = aie.tile(1, 4) %tile32 = aie.tile(3, 2) %tile33 = aie.tile(3, 3) + aie.flow(%tile13, DMA : 0, %tile33, DMA : 0) {symbol = @broadcast_of} + aie.flow(%tile13, DMA : 0, %tile32, DMA : 0) {symbol = @broadcast_of} + aie.flow(%tile13, DMA : 0, %tile14, DMA : 0) {symbol = @broadcast_of} + aie.flow(%tile13, DMA : 0, %tile12, DMA : 0) {symbol = @broadcast_of} aie.objectfifo @broadcast_of (%tile13, {%tile12, %tile14, %tile32, %tile33}, [2, 2, 3, 4, 3]) : !aie.objectfifo> func.func @some_work(%lineOut : memref<16xi32>) -> () { return diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir index 2d58b49de..1d4a0219f 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir @@ -72,6 +72,8 @@ module @link_AIE1 { %tile20 = aie.tile(2, 0) %tile12 = aie.tile(1, 2) %tile22 = aie.tile(2, 2) + aie.flow(%tile20, DMA : 0, %tile12, DMA : 0) {symbol = @of1} + aie.flow(%tile12, DMA : 0, %tile22, DMA : 0) {symbol = @of2} aie.objectfifo @of1 (%tile20, {%tile12}, 2 : i32) : !aie.objectfifo> aie.objectfifo @of2 (%tile12, {%tile22}, 2 : i32) : !aie.objectfifo> aie.objectfifo.link [@of1] -> [@of2] () diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE2.mlir index 371eb63f3..b61eb7d90 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE2.mlir @@ -175,6 +175,9 @@ module @link_AIE2 { %tile01 = aie.tile(2, 1) %tile02 = aie.tile(2, 2) %tile03 = aie.tile(2, 3) + aie.flow(%tile00, DMA : 0, %tile01, DMA : 0) {symbol = @mem_in} + aie.flow(%tile00, DMA : 0, %tile02, DMA : 0) {symbol = @mem_in} + aie.flow(%tile01, DMA : 0, %tile03, DMA : 0) {symbol = @mem_out} aie.objectfifo @mem_in (%tile00, {%tile02, %tile01}, [2,2,7]) : !aie.objectfifo> aie.objectfifo @mem_out (%tile01, {%tile03}, 7 : i32) : !aie.objectfifo> aie.objectfifo.link [@mem_in] -> [@mem_out] () diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_DDR_to_L1.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_DDR_to_L1.mlir index ad0fe6bfb..eacf274ff 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_DDR_to_L1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_DDR_to_L1.mlir @@ -72,6 +72,8 @@ module @link_DDR_L1 { %tile20 = aie.tile(2, 0) %tile21 = aie.tile(2, 1) %tile22 = aie.tile(2, 2) + aie.flow(%tile20, DMA : 0, %tile21, DMA : 0) {symbol = @to_memTile} + aie.flow(%tile21, DMA : 0, %tile22, DMA : 0) {symbol = @from_memTile} aie.objectfifo @to_memTile (%tile20, {%tile21}, 2 : i32) : !aie.objectfifo> aie.objectfifo @from_memTile (%tile21, {%tile22}, 2 : i32) : !aie.objectfifo> aie.objectfifo.link [@to_memTile] -> [@from_memTile] () diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_L1_to_DDR.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_L1_to_DDR.mlir index b4af26063..e0b3bdd4a 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_L1_to_DDR.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_L1_to_DDR.mlir @@ -72,6 +72,8 @@ module @link_L1_DDR { %tile20 = aie.tile(2, 0) %tile21 = aie.tile(2, 1) %tile22 = aie.tile(2, 2) + aie.flow(%tile22, DMA : 0, %tile21, DMA : 0) {symbol = @to_memTile} + aie.flow(%tile21, DMA : 0, %tile20, DMA : 0) {symbol = @from_memTile} aie.objectfifo @to_memTile (%tile22, {%tile21}, 2 : i32) : !aie.objectfifo> aie.objectfifo @from_memTile (%tile21, {%tile20}, 2 : i32) : !aie.objectfifo> aie.objectfifo.link [@to_memTile] -> [@from_memTile] () diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_broadcast.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_broadcast.mlir index 5a8f867c5..167047bdb 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_broadcast.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_broadcast.mlir @@ -135,6 +135,10 @@ module @link_broadcast { %tile21 = aie.tile(2, 1) %tile22 = aie.tile(2, 2) %tile33 = aie.tile(3, 3) + aie.flow(%tile20, DMA : 0, %tile21, DMA : 0) {symbol = @link1} + aie.flow(%tile21, DMA : 0, %tile33, DMA : 0) {symbol = @link2} + aie.flow(%tile21, DMA : 0, %tile22, DMA : 0) {symbol = @link2} + aie.flow(%tile22, DMA : 0, %tile33, DMA : 1) {symbol = @skip_connection} aie.objectfifo @link1 (%tile20, {%tile21}, 2 : i32) : !aie.objectfifo> aie.objectfifo @link2 (%tile21, {%tile22, %tile33}, [2, 2, 3]) : !aie.objectfifo> aie.objectfifo @skip_connection (%tile22, {%tile33}, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_distribute.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_distribute.mlir index 70fba47a3..fe2d7526a 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_distribute.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_distribute.mlir @@ -144,6 +144,10 @@ module @link_distribute { %tile22 = aie.tile(2, 2) %tile23 = aie.tile(2, 3) %tile33 = aie.tile(3, 3) + aie.flow(%tile20, DMA : 0, %tile21, DMA : 0) {symbol = @link1} + aie.flow(%tile21, DMA : 0, %tile22, DMA : 0) {symbol = @link2} + aie.flow(%tile21, DMA : 1, %tile23, DMA : 0) {symbol = @link3} + aie.flow(%tile21, DMA : 2, %tile33, DMA : 0) {symbol = @link4} aie.objectfifo @link1 (%tile20, {%tile21}, 2 : i32) : !aie.objectfifo> aie.objectfifo @link2 (%tile21, {%tile22}, 2 : i32) : !aie.objectfifo> aie.objectfifo @link3 (%tile21, {%tile23}, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_join.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_join.mlir index a59741eda..e52ae3d13 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_join.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_join.mlir @@ -173,20 +173,25 @@ // CHECK: } module @link_join { - aie.device(xcve2302) { - %tile20 = aie.tile(2, 0) - %tile21 = aie.tile(2, 1) - %tile12 = aie.tile(1, 2) - %tile22 = aie.tile(2, 2) - %tile23 = aie.tile(2, 3) - %tile33 = aie.tile(3, 3) - aie.objectfifo @link1 (%tile12, {%tile21}, 2 : i32) : !aie.objectfifo> - aie.objectfifo @link2 (%tile22, {%tile21}, 2 : i32) : !aie.objectfifo> - aie.objectfifo @link3 (%tile23, {%tile21}, 2 : i32) : !aie.objectfifo> - aie.objectfifo @link4 (%tile33, {%tile21}, 2 : i32) : !aie.objectfifo> - aie.objectfifo @link5 (%tile21, {%tile20}, 2 : i32) : !aie.objectfifo> - %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<512xi8> - aie.objectfifo.register_external_buffers @link5 (%tile20, {%ext_buffer_in}) : (memref<512xi8>) - aie.objectfifo.link [@link1, @link2, @link3, @link4] -> [@link5] () - } + aie.device(xcve2302) { + %tile20 = aie.tile(2, 0) + %tile21 = aie.tile(2, 1) + %tile12 = aie.tile(1, 2) + %tile22 = aie.tile(2, 2) + %tile23 = aie.tile(2, 3) + %tile33 = aie.tile(3, 3) + aie.flow(%tile12, DMA : 0, %tile21, DMA : 0) {symbol = @link1} + aie.flow(%tile22, DMA : 0, %tile21, DMA : 1) {symbol = @link2} + aie.flow(%tile23, DMA : 0, %tile21, DMA : 2) {symbol = @link3} + aie.flow(%tile33, DMA : 0, %tile21, DMA : 3) {symbol = @link4} + aie.flow(%tile21, DMA : 0, %tile20, DMA : 0) {symbol = @link5} + aie.objectfifo @link1 (%tile12, {%tile21}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @link2 (%tile22, {%tile21}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @link3 (%tile23, {%tile21}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @link4 (%tile33, {%tile21}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @link5 (%tile21, {%tile20}, 2 : i32) : !aie.objectfifo> + %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<512xi8> + aie.objectfifo.register_external_buffers @link5 (%tile20, {%ext_buffer_in}) : (memref<512xi8>) + aie.objectfifo.link [@link1, @link2, @link3, @link4] -> [@link5] () + } } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/matmul_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/matmul_test.mlir index 122471be1..33ff36451 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/matmul_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/matmul_test.mlir @@ -127,6 +127,9 @@ module @matmul { aie.device(xcve2302) { %t00 = aie.tile(2, 0) %t02 = aie.tile(2, 2) + aie.flow(%t00, DMA : 0, %t02, DMA : 0) {symbol = @inA} + aie.flow(%t00, DMA : 1, %t02, DMA : 1) {symbol = @inB} + aie.flow(%t02, DMA : 0, %t00, DMA : 0) {symbol = @outC} aie.objectfifo @inA (%t00, { %t02 }, 2 : i32) : !aie.objectfifo> aie.objectfifo @inB (%t00, { %t02 }, 2 : i32) : !aie.objectfifo> aie.objectfifo @outC (%t02, { %t00 }, 2 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/memTile_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/memTile_test.mlir index 18b1ed433..b201d419b 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/memTile_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/memTile_test.mlir @@ -51,6 +51,7 @@ module @memTile { aie.device(xcve2302) { %tile11 = aie.tile(2, 1) %tile12 = aie.tile(2, 2) + aie.flow(%tile11, DMA : 0, %tile12, DMA : 0) {symbol = @of} aie.objectfifo @of (%tile11, {%tile12}, 2 : i32) : !aie.objectfifo> } } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_base_AIE2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_base_AIE2.mlir index ad58eb549..47bdc1b8a 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_base_AIE2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_base_AIE2.mlir @@ -118,6 +118,8 @@ module @ndDMAObjFifoAIE2 { // Even if an objectFifo could be implemented in shared memory, as with // this case between two adjacent tiles, we need to use DMAs if a data // layout transformation with toStream and fromStream was specified. + aie.flow(%tile12, DMA : 0, %tile13, DMA : 0) {symbol = @of0} + aie.flow(%tile12, DMA : 1, %tile33, DMA : 0) {symbol = @of1} aie.objectfifo @of0 (%tile12 toStream [, , ], // transpose {%tile13 fromStream []}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_distribute_AIE2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_distribute_AIE2.mlir index da6dcd9f4..eec8cdf86 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_distribute_AIE2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_distribute_AIE2.mlir @@ -107,6 +107,9 @@ module @ndDMAObjFifoAIE2 { %tile11 = aie.tile(2, 1) %tile22 = aie.tile(3, 2) %tile23 = aie.tile(3, 3) + aie.flow(%tile10, DMA : 0, %tile11, DMA : 0) {symbol = @of0} + aie.flow(%tile11, DMA : 0, %tile22, DMA : 0) {symbol = @of1} + aie.flow(%tile11, DMA : 1, %tile23, DMA : 0) {symbol = @of2} aie.objectfifo @of0 (%tile10, {%tile11}, 2 : i32) : !aie.objectfifo> aie.objectfifo @of1 (%tile11 toStream [, diff --git a/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_multiple_consumers_AIE2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_multiple_consumers_AIE2.mlir index 6fbce4ca6..de7dffab5 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_multiple_consumers_AIE2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/nd_dma_multiple_consumers_AIE2.mlir @@ -184,19 +184,23 @@ // CHECK: } module @ndDMAObjFifoAIE2 { - aie.device(xcve2302) { + aie.device(xcve2302) { %tile12 = aie.tile(1, 2) %tile13 = aie.tile(1, 3) %tile33 = aie.tile(3, 3) %tile22 = aie.tile(2, 2) %tile23 = aie.tile(2, 3) + aie.flow(%tile12, DMA : 0, %tile33, DMA : 0) {symbol = @of0} + aie.flow(%tile12, DMA : 0, %tile13, DMA : 0) {symbol = @of0} + aie.flow(%tile12, DMA : 1, %tile33, DMA : 1) {symbol = @of1} + aie.flow(%tile22, DMA : 0, %tile23, DMA : 0) {symbol = @of3} aie.objectfifo @of0 (%tile12 toStream [, , ], // transpose - {%tile13 fromStream [], + {%tile13 fromStream [], %tile33 fromStream []}, - 4 : i32) : !aie.objectfifo> + 4 : i32) : !aie.objectfifo> aie.objectfifo @of1 (%tile12 toStream [], {%tile33}, - 2 : i32) : !aie.objectfifo> + 2 : i32) : !aie.objectfifo> aie.objectfifo @of3 (%tile22, {%tile23 fromStream []}, - 2 : i32) : !aie.objectfifo> - } + 2 : i32) : !aie.objectfifo> + } } diff --git a/compiler/plugins/target/AMD-AIE/aie/test/nested_loop_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/nested_loop_test.mlir index 8075dad93..93ca70c90 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/nested_loop_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/nested_loop_test.mlir @@ -255,6 +255,10 @@ aie.device(npu1_4col) { %tile_0_1 = aie.tile(0, 1) %tile_1_2 = aie.tile(1, 2) %tile_0_2 = aie.tile(0, 2) + aie.flow(%tile_0_1, DMA : 0, %tile_1_2, DMA : 0) {symbol = @in2} + aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0) {symbol = @in2} + aie.flow(%tile_0_1, DMA : 1, %tile_1_2, DMA : 1) {symbol = @in7} + aie.flow(%tile_1_2, DMA : 0, %tile_0_1, DMA : 0) {symbol = @in8} aie.objectfifo @in2(%tile_0_1, {%tile_0_2, %tile_1_2}, 4 : i32) : !aie.objectfifo> aie.objectfifo @in7(%tile_0_1, {%tile_1_2}, 4 : i32) : !aie.objectfifo> aie.objectfifo @in8(%tile_1_2, {%tile_0_1}, 4 : i32) : !aie.objectfifo> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_1.mlir b/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_1.mlir index 20b3faef7..43c55037d 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_1.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_1.mlir @@ -84,6 +84,7 @@ module @non_adjacency { aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile33 = aie.tile(3, 3) + aie.flow(%tile12, DMA : 0, %tile33, DMA : 0) {symbol = @objfifo} aie.objectfifo @objfifo (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo> func.func @some_work(%lineOut : memref<16xi32>) -> () { return diff --git a/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_2.mlir index a1d712fd0..4dcb43ee7 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_2.mlir @@ -100,6 +100,7 @@ module @non_adjacency { aie.device(npu1_4col) { %tile12 = aie.tile(1, 2) %tile33 = aie.tile(3, 3) + aie.flow(%tile12, DMA : 0, %tile33, DMA : 0) {symbol = @objfifo} aie.objectfifo @objfifo (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo> func.func @some_work(%lineOut : memref<16xi32>) -> () { return diff --git a/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_AIE2.mlir b/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_AIE2.mlir index 2c2f738f7..790d410b9 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_AIE2.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/non_adjacency_test_AIE2.mlir @@ -82,6 +82,7 @@ module @non_adjacency_AIE2 { aie.device(xcve2302) { %tile12 = aie.tile(1, 2) %tile33 = aie.tile(3, 3) + aie.flow(%tile12, DMA : 0, %tile33, DMA : 0) {symbol = @of} aie.objectfifo @of (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo> func.func @some_work(%lineOut : memref<16xi32>) -> () { return diff --git a/compiler/plugins/target/AMD-AIE/aie/test/register_external_buffers_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/register_external_buffers_test.mlir index bb87d0200..f7b52ccd8 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/register_external_buffers_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/register_external_buffers_test.mlir @@ -54,6 +54,7 @@ module @register_external_buffers { aie.device(npu1_4col) { %tile32 = aie.tile(3, 2) %tile30 = aie.tile(3, 0) + aie.flow(%tile30, DMA : 0, %tile32, DMA : 0) {symbol = @ext_of} aie.objectfifo @ext_of (%tile30, {%tile32}, 3 : i32) : !aie.objectfifo> %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<64xi32> aie.objectfifo.register_external_buffers @ext_of (%tile30, {%ext_buffer_in}) : (memref<64xi32>) diff --git a/compiler/plugins/target/AMD-AIE/aie/test/shimRow_mem_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/shimRow_mem_test.mlir index 4cce02d77..5ad08e2be 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/shimRow_mem_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/shimRow_mem_test.mlir @@ -54,6 +54,7 @@ module @shimRow_mem { aie.device(npu1_4col) { %tile32 = aie.tile(3, 2) %tile30 = aie.tile(3, 0) + aie.flow(%tile30, DMA : 0, %tile32, DMA : 0) {symbol = @objfifo} aie.objectfifo @objfifo (%tile30, {%tile32}, 3 : i32) : !aie.objectfifo> %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<64xi32> aie.objectfifo.register_external_buffers @objfifo (%tile30, {%ext_buffer_in}) : (memref<64xi32>) diff --git a/compiler/plugins/target/AMD-AIE/aie/test/shim_AIE2_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/shim_AIE2_test.mlir index cd48a2674..88b3a8daa 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/shim_AIE2_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/shim_AIE2_test.mlir @@ -59,6 +59,8 @@ module @shim_AIE2 { aie.device(xcve2302) { %tile22 = aie.tile(2, 2) %tile20 = aie.tile(2, 0) + aie.flow(%tile20, DMA : 0, %tile22, DMA : 0) {symbol = @of_in} + aie.flow(%tile22, DMA : 0, %tile20, DMA : 0) {symbol = @of_out} aie.objectfifo @of_in (%tile20, {%tile22}, 2 : i32) : !aie.objectfifo> aie.objectfifo @of_out (%tile22, {%tile20}, 2 : i32) : !aie.objectfifo> %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<64xi32> diff --git a/compiler/plugins/target/AMD-AIE/aie/test/shim_broadcast_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/shim_broadcast_test.mlir index 7a0889d6d..6ea4846ef 100644 --- a/compiler/plugins/target/AMD-AIE/aie/test/shim_broadcast_test.mlir +++ b/compiler/plugins/target/AMD-AIE/aie/test/shim_broadcast_test.mlir @@ -82,6 +82,9 @@ module @shim_broadcast { %tile22 = aie.tile(2, 2) %tile23 = aie.tile(2, 3) %tile33 = aie.tile(3, 3) + aie.flow(%tile20, DMA : 0, %tile33, DMA : 0) {symbol = @of_in} + aie.flow(%tile20, DMA : 0, %tile23, DMA : 0) {symbol = @of_in} + aie.flow(%tile20, DMA : 0, %tile22, DMA : 0) {symbol = @of_in} aie.objectfifo @of_in (%tile20, {%tile22, %tile23, %tile33}, 2 : i32) : !aie.objectfifo> %ext_buffer_in = aie.external_buffer {sym_name = "ext_buffer_in"}: memref<64xi32> aie.objectfifo.register_external_buffers @of_in (%tile20, {%ext_buffer_in}) : (memref<64xi32>) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp index 04ce32382..01f37a95d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp @@ -32,6 +32,15 @@ void BdIdOp::getAsmResultNames(function_ref setNameFn) { setNameFn(getResult(), "bd_id"); } +//===----------------------------------------------------------------------===// +// AMDAIE_ChannelOp +//===----------------------------------------------------------------------===// + +void ChannelOp::getAsmResultNames( + function_ref setNameFn) { + setNameFn(getResult(), "channel"); +} + //===----------------------------------------------------------------------===// // AMDAIE_ControlCodeOp //===----------------------------------------------------------------------===// @@ -417,6 +426,11 @@ void CircularDmaCpyNdOp::getCanonicalizationPatterns(RewritePatternSet &results, // AMDAIE_ConnectionOp //===----------------------------------------------------------------------===// +void ConnectionOp::build(mlir::OpBuilder &b, mlir::OperationState &result, + Value target, Value source) { + build(b, result, target, {}, source, {}); +} + FailureOr ConnectionOp::getNpuCircularDmaCpyNdUser() { SmallVector npuDmaUsers; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td index 13f06a6f9..d76110e06 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td @@ -205,6 +205,35 @@ def AMDAIE_BdIdOp: AMDAIE_Op<"bd_id", [ let assemblyFormat = [{ `(` $tile `,` $value `)` attr-dict }]; } +def AMDAIE_ChannelOp: AMDAIE_Op<"channel", [ + Pure, + DeclareOpInterfaceMethods + ]>, Results<(outs Index)> { + let summary = "Represents a physical input or output channel/port on an AIE " + "tile."; + let description = [{ + This operation represents a channel (or DMA port) on an AIE tile. The + operation is fully specified through a tile and channel ID value, + designating the exact physical DMA channel/port to be used. This op helps + with guaranteeing/verifying correct reuse of the same channel across + different DMA operations, for example for packet routing. + + Example: + + ```mlir + %tile = amdaie.tile(%c0, %c0) + %channel = amdaie.channel(%tile, 0) + ``` + }]; + + let arguments = ( + ins Index:$tile, + ConfinedAttr]>:$value + ); + + let assemblyFormat = [{ `(` $tile `,` $value `)` attr-dict }]; +} + //===----------------------------------------------------------------------===// // IREE AMDAIE Npu Ops //===----------------------------------------------------------------------===// @@ -579,7 +608,8 @@ def AMDAIE_NpuDmaWaitOp: AMDAIE_Op<"npu.dma_wait", []> { // IREE AMDAIE LogicalObjectFifo Ops //===----------------------------------------------------------------------===// -def AMDAIE_ConnectionOp: AMDAIE_Op<"connection", [Pure, CopyOpInterface]> { +def AMDAIE_ConnectionOp: AMDAIE_Op<"connection", + [Pure, CopyOpInterface, AttrSizedOperandSegments]> { let summary = "A connection between two logical objectFifos."; let description = [{ Represents a connection between logical objectFifos. This connection can be @@ -599,15 +629,29 @@ def AMDAIE_ConnectionOp: AMDAIE_Op<"connection", [Pure, CopyOpInterface]> { let arguments = ( ins AnyAMDAIELogicalObjectFifoType:$target, - AnyAMDAIELogicalObjectFifoType:$source + Variadic:$target_channels, + AnyAMDAIELogicalObjectFifoType:$source, + Variadic:$source_channels ); let results = (outs Index:$result); let assemblyFormat = [{ - `(` $target `,` $source `)` attr-dict `:` `(` type($target) `,` type($source) `)` + `(` + $target + ( ` ` `{` $target_channels^ `}` )? + `,` + $source + ( ` ` `{` $source_channels^ `}` )? + `)` + attr-dict + `:` `(` type($target) `,` type($source) `)` }]; + let builders = [ + OpBuilder<(ins "Value":$target, "::mlir::Value":$source)> + ]; + let extraClassDeclaration = [{ Value getSourceMemref() { return getSource(); } Value getTargetMemref() { return getTarget(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp new file mode 100644 index 000000000..d70c71129 --- /dev/null +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEAssignChannels.cpp @@ -0,0 +1,92 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree-amd-aie/IR/AMDAIEOps.h" +#include "iree-amd-aie/Transforms/AMDAIEUtils.h" +#include "iree-amd-aie/Transforms/Passes.h" +#include "iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h" +#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h" + +#define DEBUG_TYPE "iree-amdaie-assign-channels" + +namespace mlir::iree_compiler::AMDAIE { + +namespace { + +/// Assign channels to `amdaie.connection` ops. +LogicalResult assignChannels(AMDAIE::WorkgroupOp workgroupOp) { + IRRewriter rewriter(workgroupOp->getContext()); + ChannelGenerator generator; + SmallVector connectionOps; + workgroupOp->walk([&](AMDAIE::ConnectionOp connectionOp) { + connectionOps.push_back(connectionOp); + }); + for (AMDAIE::ConnectionOp connectionOp : connectionOps) { + auto sourceLogicalObjFifo = + dyn_cast_if_present( + connectionOp.getSource().getDefiningOp()); + if (!sourceLogicalObjFifo) { + return connectionOp.emitOpError() + << "expected a `LogicalObjFifoOpInterface` source"; + } + auto targetLogicalObjFifo = + dyn_cast_if_present( + connectionOp.getTarget().getDefiningOp()); + if (!targetLogicalObjFifo) { + return connectionOp.emitOpError() + << "expected a `LogicalObjFifoOpInterface` target"; + } + + rewriter.setInsertionPoint(connectionOp); + SmallVector sourceChannels; + for (Value tile : sourceLogicalObjFifo.getTiles()) { + uint8_t channel = generator.getProducerDMAChannel(tile); + auto channelOp = rewriter.create( + rewriter.getUnknownLoc(), tile, channel); + sourceChannels.push_back(channelOp.getResult()); + } + SmallVector targetChannels; + for (Value tile : targetLogicalObjFifo.getTiles()) { + uint8_t channel = generator.getConsumerDMAChannel(tile); + auto channelOp = rewriter.create( + rewriter.getUnknownLoc(), tile, channel); + targetChannels.push_back(channelOp.getResult()); + } + rewriter.replaceOpWithNewOp( + connectionOp, connectionOp.getTarget(), targetChannels, + connectionOp.getSource(), sourceChannels); + } + return success(); +} + +class AMDAIEAssignChannelsPass + : public impl::AMDAIEAssignChannelsBase { + public: + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnOperation() override; +}; + +void AMDAIEAssignChannelsPass::runOnOperation() { + Operation *parentOp = getOperation(); + SmallVector workgroupOps; + parentOp->walk([&](AMDAIE::WorkgroupOp workgroupOp) { + workgroupOps.push_back(workgroupOp); + }); + for (AMDAIE::WorkgroupOp workgroupOp : workgroupOps) { + if (failed(assignChannels(workgroupOp))) return signalPassFailure(); + } +} + +} // namespace + +std::unique_ptr createAMDAIEAssignChannelsPass() { + return std::make_unique(); +} + +} // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp index 7f1e23be9..5b22fe5cc 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp @@ -93,9 +93,10 @@ AIE::BDDimLayoutArrayAttr convertSizeStrideToBDDimLayoutArrayAttr( /// Utility to create an `aie.objectfifo` operation from /// `amdaie.circular_dma_cpy_nd`. FailureOr createObjectFifo( - IRRewriter &rewriter, AMDAIE::ConnectionOp connectionOp, + IRRewriter &rewriter, AMDAIE::ConnectionOp connectionOp, IRMapping &mapper, AMDAIE::NpuCircularDmaCpyNdOp dmaOp, Value srcTile, ValueRange dstTiles, StringAttr &symName) { + OpBuilder::InsertionGuard guard(rewriter); auto sourceType = cast(connectionOp.getSource().getType()); auto targetType = @@ -119,6 +120,27 @@ FailureOr createObjectFifo( depth = sourceDepth; } + SmallVector producerChannels; + SmallVector consumerChannels; + for (Value producerChannel : connectionOp.getSourceChannels()) { + auto channelOp = + dyn_cast(producerChannel.getDefiningOp()); + if (!channelOp) { + return connectionOp.emitOpError() + << "found non-`amdaie.channel` source channel"; + } + producerChannels.push_back(channelOp); + } + for (Value consumerChannel : connectionOp.getTargetChannels()) { + auto channelOp = + dyn_cast(consumerChannel.getDefiningOp()); + if (!channelOp) { + return connectionOp.emitOpError() + << "found non-`amdaie.channel` source channel"; + } + consumerChannels.push_back(channelOp); + } + // Convert source and target sizes and strides to `BDDimLayoutArrayAttr`s, // which the `aie.objectfifo` works with. AIE::BDDimLayoutArrayAttr sourceDims = @@ -167,6 +189,20 @@ FailureOr createObjectFifo( rewriter.getUnknownLoc(), symName, srcTile, dstTiles, rewriter.getIntegerAttr(rewriter.getI32Type(), depth), dtype, sourceDims, targetDims); + + // Insert flow ops + rewriter.setInsertionPoint(fifo); + for (AMDAIE::ChannelOp producerChannel : producerChannels) { + for (AMDAIE::ChannelOp consumerChannel : consumerChannels) { + Value aieProducerTile = mapper.lookup(producerChannel.getTile()); + Value aieConsumerTile = mapper.lookup(consumerChannel.getTile()); + rewriter.create( + rewriter.getUnknownLoc(), aieProducerTile, AIE::WireBundle::DMA, + producerChannel.getValue(), aieConsumerTile, AIE::WireBundle::DMA, + consumerChannel.getValue(), FlatSymbolRefAttr::get(fifo->getContext(), fifo.getName())); + } + } + return fifo; } @@ -231,8 +267,8 @@ LogicalResult acquireOpToAIE(IRRewriter &rewriter, OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPoint(acquireOp); - auto connectionOp = - dyn_cast_if_present(acquireOp.getDma().getDefiningOp()); + auto connectionOp = dyn_cast_if_present( + acquireOp.getDma().getDefiningOp()); if (!connectionOp) { return connectionOp.emitError() << "acquire doesn't operate on a `amdaie.connection`"; @@ -480,7 +516,7 @@ LogicalResult flowToAIE(IRRewriter &rewriter, AMDAIE::ConnectionOp connectionOp, auto symName = "obj" + std::to_string(dmaId++); StringAttr symAttr = rewriter.getStringAttr(symName); FailureOr objFifo = - createObjectFifo(rewriter, connectionOp, npuDmaUserOp.value(), + createObjectFifo(rewriter, connectionOp, mapper, npuDmaUserOp.value(), newSourceTile, newTargetTiles, symAttr); if (failed(objFifo)) return failure(); mapper.map(connectionOp.getOperation(), objFifo.value().getOperation()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index e5afdc73b..2979c71ef 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -46,6 +46,7 @@ iree_cc_library( "AMDAIEAccessToAcquireRelease.cpp" "AMDAIEAddLoweringStrategy.cpp" "AMDAIEAIRDmaToAMDAIEDma.cpp" + "AMDAIEAssignChannels.cpp" "AMDAIEAssignLogicalObjectFifoDepth.cpp" "AMDAIEAssignNpuDmaBdIds.cpp" "AMDAIEBufferizeToAllocation.cpp" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h index 62db55e72..8912db52d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h @@ -22,6 +22,7 @@ namespace mlir::iree_compiler::AMDAIE { #define GEN_PASS_DECL #define GEN_PASS_DEF_AMDAIEACCESSTOACQUIRERELEASE #define GEN_PASS_DEF_AMDAIEAIRDMATOAMDAIEDMA +#define GEN_PASS_DEF_AMDAIEASSIGNCHANNELS #define GEN_PASS_DEF_AMDAIEASSIGNLOGICALOBJECTFIFODEPTH #define GEN_PASS_DEF_AMDAIEASSIGNNPUDMABDIDS #define GEN_PASS_DEF_AMDAIEBRIDGETOAIR diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index 33ab6864b..c1aa45c0b 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -632,6 +632,10 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager) { passManager.addPass(createCanonicalizerPass()); passManager.addPass(createAMDAIECoreLoopUnrollPass()); + passManager.addPass(createAMDAIEAssignChannelsPass()); + passManager.addPass(createCSEPass()); + passManager.addPass(createCanonicalizerPass()); + addAMDAIEToAIEPasses(passManager); // Now lower using the AIE passes from MLIR-AIE. diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h index 905bbb709..fe5670067 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h @@ -57,6 +57,9 @@ std::unique_ptr createAMDAIEAccessToAcquireReleasePass(); /// logical objectFifos. std::unique_ptr createAMDAIEAIRDmaAMDAIEDmaPass(); +/// Create a pass to assign channels to connections. +std::unique_ptr createAMDAIEAssignChannelsPass(); + /// Create a pass to assign a buffer depth to /// `amdaie.logicalobjectfifo.from_memref` ops. std::unique_ptr createAMDAIEAssignLogicalObjectFifoDepthPass( diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index c4cde8dad..73ceee040 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -23,6 +23,12 @@ def AMDAIEAIRDmaToAMDAIEDma : let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEAIRDmaAMDAIEDmaPass()"; } +def AMDAIEAssignChannels : + Pass<"iree-amdaie-assign-channels", ""> { + let summary = "Assign channels to `amdaie.connection` ops."; + let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIEAssignChannelsPass()"; +} + def AMDAIEAssignLogicalObjectFifoDepth : Pass<"iree-amdaie-assign-logical-objectfifo-depth", ""> { let summary = "Assign a buffer depth of the logical objectfifos."; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt index 7d99ee216..ba4380860 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt @@ -11,6 +11,7 @@ iree_lit_test_suite( "access_to_acquire_release.mlir" "aie_link_executables.mlir" "air_dma_to_amdaie_dma.mlir" + "assign_channels.mlir" "assign_logical_objectfifo_depth.mlir" "assign_npu_dma_bd_ids.mlir" "bridge_to_air.mlir" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir new file mode 100644 index 000000000..80855c059 --- /dev/null +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/assign_channels.mlir @@ -0,0 +1,74 @@ +// RUN: iree-opt --pass-pipeline="builtin.module(iree-amdaie-assign-channels)" --split-input-file --verify-diagnostics %s | FileCheck %s + +// CHECK-LABEL: @assign_channels +// CHECK: %[[C0:.+]] = arith.constant 0 : index +// CHECK: %[[C1:.+]] = arith.constant 1 : index +// CHECK: amdaie.workgroup +// CHECK: %[[tile_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) +// CHECK: %[[tile_0_1:.+]] = amdaie.tile(%[[C0]], %[[C1]]) +// CHECK: %[[CHANNEL_0:.+]] = amdaie.channel(%[[tile_0_0]], 0) +// CHECK: %[[CHANNEL_1:.+]] = amdaie.channel(%[[tile_0_1]], 0) +// CHECK: amdaie.connection(%{{.+}} {%[[CHANNEL_1]]}, %{{.+}} {%[[CHANNEL_0]]}) +// CHECK: %[[CHANNEL_2:.+]] = amdaie.channel(%[[tile_0_0]], 1) +// CHECK: %[[CHANNEL_3:.+]] = amdaie.channel(%[[tile_0_1]], 1) +// CHECK: amdaie.connection(%{{.+}} {%[[CHANNEL_3]]}, %{{.+}} {%[[CHANNEL_2]]}) +// CHECK: %[[CHANNEL_4:.+]] = amdaie.channel(%[[tile_0_0]], 2) +// CHECK: %[[CHANNEL_5:.+]] = amdaie.channel(%[[tile_0_1]], 2) +// CHECK: amdaie.connection(%{{.+}} {%[[CHANNEL_5]]}, %{{.+}} {%[[CHANNEL_4]]}) +module { + func.func @assign_channels(%arg0: memref<1x1x8x16xi32, 1>, %arg1: memref<8x16xi32>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + amdaie.workgroup { + %tile_0_0 = amdaie.tile(%c0, %c0) + %tile_0_1 = amdaie.tile(%c0, %c1) + %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_1} : memref<1x1x8x16xi32, 1> -> !amdaie.logicalobjectfifo> + %1 = amdaie.logicalobjectfifo.from_memref %arg1, {%tile_0_0} : memref<8x16xi32> -> !amdaie.logicalobjectfifo> + %2 = amdaie.connection(%0, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + %3 = amdaie.connection(%0, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + %4 = amdaie.connection(%0, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + amdaie.controlcode { + amdaie.end + } + } + return + } +} + +// ----- + +module { + func.func @no_source(%arg0: memref<1x1x8x16xi32, 1>, %arg1: !amdaie.logicalobjectfifo>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + amdaie.workgroup { + %tile_0_1 = amdaie.tile(%c0, %c1) + %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0_1} : memref<1x1x8x16xi32, 1> -> !amdaie.logicalobjectfifo> + // expected-error @+1 {{expected a `LogicalObjFifoOpInterface` source}} + %1 = amdaie.connection(%0, %arg1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + amdaie.controlcode { + amdaie.end + } + } + return + } +} + +// ----- + +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { + func.func @no_target(%arg0: !amdaie.logicalobjectfifo>, %arg1: memref<8x16xi32>) { + %c0 = arith.constant 0 : index + amdaie.workgroup { + %tile_0_0 = amdaie.tile(%c0, %c0) + %0 = amdaie.logicalobjectfifo.from_memref %arg1, {%tile_0_0} : memref<8x16xi32> -> !amdaie.logicalobjectfifo> + // expected-error @+1 {{expected a `LogicalObjFifoOpInterface` target}} + %1 = amdaie.connection(%arg0, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) + amdaie.controlcode { + amdaie.end + } + } + return + } +} diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt b/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt index c3f6c0e26..7e0017dd4 100644 --- a/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/CMakeLists.txt @@ -13,6 +13,7 @@ iree_cc_library( Utils HDRS "ChannelBdIdGenerator.h" + "ChannelGenerator.h" SRCS "ChannelBdIdGenerator.cpp" DEPS diff --git a/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h new file mode 100644 index 000000000..3fca74e7c --- /dev/null +++ b/runtime/src/iree-amd-aie/aie_runtime/Utils/ChannelGenerator.h @@ -0,0 +1,42 @@ +// Copyright 2020 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_AMDAIE_UTILS_CHANNEL_GENERATOR_H_ +#define IREE_COMPILER_AMDAIE_UTILS_CHANNEL_GENERATOR_H_ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "mlir/Support/LogicalResult.h" + +using namespace llvm; + +namespace mlir::iree_compiler::AMDAIE { + +/// Utility to generate valid channels. +/// TODO(jornt): add physical feasibility checks on channels. +class ChannelGenerator { + public: + ChannelGenerator() {} + + /// Given a tile, returns its next usable producer channel. + uint8_t getProducerDMAChannel(Value tile) { + return producerChannelsPerTile[tile]++; + } + + /// Given a tile, returns its next usable consumer channel. + uint8_t getConsumerDMAChannel(Value tile) { + return consumerChannelsPerTile[tile]++; + } + + private: + DenseMap producerChannelsPerTile; + DenseMap consumerChannelsPerTile; +}; + +} // namespace mlir::iree_compiler::AMDAIE + +#endif // IREE_COMPILER_AMDAIE_UTILS_CHANNEL_GENERATOR_H_