Skip to content

Commit

Permalink
incorporate comments
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Jul 17, 2024
1 parent a6e806b commit cfa3c9d
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 114 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,34 +159,45 @@ LogicalResult configureBdInBlock(XAie_DmaDesc &dmaTileBd, Block &block,
std::optional<llvm::ArrayRef<BDDimLayoutAttr>> dims = bdOp.getDimensions();
int lenInBytes = bdOp.getLenInBytes();
int basePlusOffsetInBytes = baseAddr + bdOp.getOffsetInBytes();
int32_t bufferElementTypeWidthInBytes =
bdOp.getBufferElementTypeWidthInBytes();
// aie-rt expects multiples of 32b words (see docstring on
// XAie_DmaSetMultiDimAddr). Thus, elementWidthIn32bWords is possibly a
// fraction, e.g. bf16 => elementWidthIn32bWords == 0.5 so that size = 10 => 5
// 32b words
double elementWidthIn32bWords =
static_cast<double>(bufferElementTypeWidthInBytes) / 4.0;

if (!dims) {
TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetAddrLen, &dmaTileBd,
basePlusOffsetInBytes, lenInBytes);
} else {
XAie_DmaTensor dmaTileBdTensor = {};
dmaTileBdTensor.NumDim = dims->size();
dmaTileBdTensor.Dim = static_cast<XAie_DmaDimDesc *>(
calloc(dmaTileBdTensor.NumDim, sizeof(XAie_DmaDimDesc)));
if (!dmaTileBdTensor.Dim)
return bdOp.emitError("couldn't allocate array of XAie_DmaDimDesc");
// libxaie requires stride in multiples of 32b
double elementWidthIn32bWords =
static_cast<double>(bdOp.getBufferElementTypeWidthInBytes()) / 4.0;
dmaTileBdTensor.Dim = new XAie_DmaDimDesc[dmaTileBdTensor.NumDim];
for (size_t i = 0; i < dims->size(); i++) {
// Pass down dimensions in reverse order; in the MLIR, this allows
// us to specify step sizes/wraps in the same order as we would
// access a multi-dim C array, with the highest dimension first.
int j = dims->size() - i - 1;
uint16_t size;
uint32_t stride;
// us to specify step sizes/strides in the same order as we would for
// RankedTensorType/MemRefType.
uint16_t size = dims.value()[i].getSize();
uint32_t stride = dims.value()[i].getStride();
size_t j = dims->size() - i - 1;
if (j > 0) {
stride = static_cast<uint32_t>(dims.value()[i].getStride() *
elementWidthIn32bWords);
size = dims.value()[i].getSize();
if (stride * bufferElementTypeWidthInBytes % 4 != 0) {
return bdOp.emitOpError("`stride` on dim ")
<< i
<< ", times element width (in bytes), should "
"be a multiple of 4 bytes";
}
stride = static_cast<uint32_t>(stride * elementWidthIn32bWords);
} else {
stride = dims.value()[i].getStride();
size = static_cast<uint16_t>(dims.value()[i].getSize() *
elementWidthIn32bWords);
if (size * bufferElementTypeWidthInBytes % 4 != 0) {
return bdOp.emitOpError("`size` on dim ")
<< i
<< ", times element width (in bytes), should "
"be a multiple of 4 bytes";
}
size = static_cast<uint16_t>(size * elementWidthIn32bWords);
}
stride = stride > 0 ? stride : 1;
// Assume AIE-ML architecture (ie use AieMlDimDesc instead of AieDimDesc);
Expand All @@ -204,26 +215,24 @@ LogicalResult configureBdInBlock(XAie_DmaDesc &dmaTileBd, Block &block,
if (padDims) {
XAie_DmaPadTensor dmaPadTensor = {};
dmaPadTensor.NumDim = padDims->size();
dmaPadTensor.PadDesc = static_cast<XAie_PadDesc *>(
calloc(dmaPadTensor.NumDim, sizeof(XAie_PadDesc)));
if (!dmaPadTensor.PadDesc)
return bdOp.emitError("couldn't allocate array of XAie_PadDesc");
// libxaie requires stride in multiples of 32b
double elementWidthIn32bWords =
static_cast<double>(bdOp.getBufferElementTypeWidthInBytes()) / 4.0;
dmaPadTensor.PadDesc = new XAie_PadDesc[dmaPadTensor.NumDim];
for (size_t i = 0; i < padDims->size(); i++) {
// Pass down dimensions in reverse order.
int j = padDims->size() - i - 1;
uint8_t before;
uint8_t after;
if (j > 0) {
before = static_cast<uint8_t>(padDims.value()[i].getConstPadBefore());
after = static_cast<uint8_t>(padDims.value()[i].getConstPadAfter());
} else {
before = static_cast<uint8_t>(padDims.value()[i].getConstPadBefore() *
elementWidthIn32bWords);
after = static_cast<uint8_t>(padDims.value()[i].getConstPadAfter() *
elementWidthIn32bWords);
uint8_t before = padDims.value()[i].getConstPadBefore();
uint8_t after = padDims.value()[i].getConstPadAfter();
size_t j = padDims->size() - i - 1;
if (j == 0) {
if (before * bufferElementTypeWidthInBytes % 4 != 0) {
return bdOp.emitOpError(
"`before` padding on inner-most dim, times element width (in "
"bytes), should be a multiple of 4 bytes");
}
if (after * bufferElementTypeWidthInBytes % 4 != 0) {
return bdOp.emitOpError(
"`after` padding on inner-most dim, times element width (in "
"bytes), should be a multiple of 4 bytes");
}
before = static_cast<uint8_t>(before * elementWidthIn32bWords);
after = static_cast<uint8_t>(after * elementWidthIn32bWords);
}
dmaPadTensor.PadDesc[j] = {before, after};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,8 @@

module {
aie.device(npu1_1col) {
memref.global "public" @objFifo_in0 : memref<56x56xi8>
memref.global "public" @objFifo_out0 : memref<64x64xi8>
%tile_0_0 = aie.tile(0, 0)
%tile_0_1 = aie.tile(0, 1)
%tile_0_2 = aie.tile(0, 2)
%objFifo_in1_cons_buff_0 = aie.buffer(%tile_0_2) {address = 0 : i32, sym_name = "objFifo_in1_cons_buff_0"} : memref<64x64xi8>
%objFifo_in1_cons_buff_1 = aie.buffer(%tile_0_2) {address = 4096 : i32, sym_name = "objFifo_in1_cons_buff_1"} : memref<64x64xi8>
%objFifo_out1_buff_0 = aie.buffer(%tile_0_2) {address = 8192 : i32, sym_name = "objFifo_out1_buff_0"} : memref<64x64xi8>
%objFifo_out1_buff_1 = aie.buffer(%tile_0_2) {address = 12288 : i32, sym_name = "objFifo_out1_buff_1"} : memref<64x64xi8>
aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0)
aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0)
aie.flow(%tile_0_1, DMA : 1, %tile_0_0, DMA : 0)
aie.flow(%tile_0_2, DMA : 0, %tile_0_1, DMA : 1)
aie.shim_dma_allocation @objFifo_in0(MM2S, 0, 0)
func.func @bobsyouruncle(%arg0: memref<61x56xi8>, %arg1: memref<32xi8>, %arg2: memref<64x64xi8>) {
%c0_i64 = arith.constant 0 : i64
%c1_i64 = arith.constant 1 : i64
%c56_i64 = arith.constant 56 : i64
%c61_i64 = arith.constant 61 : i64
%c64_i64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c61_i64, %c56_i64][%c0_i64, %c0_i64, %c56_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<61x56xi8>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c64_i64, %c64_i64][%c0_i64, %c0_i64, %c64_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64x64xi8>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
%objFifo_in0_cons_buff_0 = aie.buffer(%tile_0_1) {address = 0 : i32, sym_name = "objFifo_in0_cons_buff_0"} : memref<64x64xi8>
%objFifo_in0_cons_buff_1 = aie.buffer(%tile_0_1) {address = 4096 : i32, sym_name = "objFifo_in0_cons_buff_1"} : memref<64x64xi8>
%objFifo_out0_buff_0 = aie.buffer(%tile_0_1) {address = 8192 : i32, sym_name = "objFifo_out0_buff_0"} : memref<64x64xi8>
%objFifo_out0_buff_1 = aie.buffer(%tile_0_1) {address = 12288 : i32, sym_name = "objFifo_out0_buff_1"} : memref<64x64xi8>
%objFifo_in0_cons_prod_lock = aie.lock(%tile_0_1, 0) {init = 1 : i32, sym_name = "objFifo_in0_cons_prod_lock"}
%objFifo_in0_cons_cons_lock = aie.lock(%tile_0_1, 1) {init = 0 : i32, sym_name = "objFifo_in0_cons_cons_lock"}
%memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,9 @@

module {
aie.device(npu1_1col) {
memref.global "public" @objFifo_in0 : memref<16xi8>
memref.global "public" @objFifo_out0 : memref<16xi8>
%tile_0_0 = aie.tile(0, 0)
%tile_0_1 = aie.tile(0, 1)
%tile_0_2 = aie.tile(0, 2)
%objFifo_in1_cons_buff_0 = aie.buffer(%tile_0_2) {address = 0 : i32, sym_name = "objFifo_in1_cons_buff_0"} : memref<8xi8>
%objFifo_in1_cons_buff_1 = aie.buffer(%tile_0_2) {address = 8 : i32, sym_name = "objFifo_in1_cons_buff_1"} : memref<8xi8>
%objFifo_out1_buff_0 = aie.buffer(%tile_0_2) {address = 16 : i32, sym_name = "objFifo_out1_buff_0"} : memref<8xi8>
%objFifo_out1_buff_1 = aie.buffer(%tile_0_2) {address = 24 : i32, sym_name = "objFifo_out1_buff_1"} : memref<8xi8>
aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0)
aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0)
aie.flow(%tile_0_1, DMA : 1, %tile_0_0, DMA : 0)
aie.flow(%tile_0_2, DMA : 0, %tile_0_1, DMA : 1)
aie.shim_dma_allocation @objFifo_in0(MM2S, 0, 0)
func.func @bobsyouruncle(%arg0: memref<64xi8>, %arg1: memref<32xi8>, %arg2: memref<64xi8>) {
%c0_i64 = arith.constant 0 : i64
%c1_i64 = arith.constant 1 : i64
%c32_i64 = arith.constant 32 : i64
%c64_i64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c32_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi8>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi8>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
%objFifo_in0_cons_buff_0 = aie.buffer(%tile_0_1) {address = 0 : i32, sym_name = "objFifo_in0_cons_buff_0"} : memref<16xi8>
%objFifo_in0_cons_buff_1 = aie.buffer(%tile_0_1) {address = 16 : i32, sym_name = "objFifo_in0_cons_buff_1"} : memref<16xi8>
%objFifo_out0_buff_0 = aie.buffer(%tile_0_1) {address = 32 : i32, sym_name = "objFifo_out0_buff_0"} : memref<16xi8>
%objFifo_out0_buff_1 = aie.buffer(%tile_0_1) {address = 48 : i32, sym_name = "objFifo_out0_buff_1"} : memref<16xi8>
%objFifo_in0_cons_prod_lock = aie.lock(%tile_0_1, 0) {init = 2 : i32, sym_name = "objFifo_in0_cons_prod_lock"}
%objFifo_in0_cons_cons_lock = aie.lock(%tile_0_1, 1) {init = 0 : i32, sym_name = "objFifo_in0_cons_cons_lock"}
%memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,9 @@

module {
aie.device(npu1_1col) {
memref.global "public" @objFifo_in0 : memref<16xi32>
memref.global "public" @objFifo_out0 : memref<16xi32>
%tile_0_0 = aie.tile(0, 0)
%tile_0_1 = aie.tile(0, 1)
%tile_0_2 = aie.tile(0, 2)
%objFifo_in1_cons_buff_0 = aie.buffer(%tile_0_2) {address = 0 : i32, sym_name = "objFifo_in1_cons_buff_0"} : memref<8xi32>
%objFifo_in1_cons_buff_1 = aie.buffer(%tile_0_2) {address = 32 : i32, sym_name = "objFifo_in1_cons_buff_1"} : memref<8xi32>
%objFifo_out1_buff_0 = aie.buffer(%tile_0_2) {address = 64 : i32, sym_name = "objFifo_out1_buff_0"} : memref<8xi32>
%objFifo_out1_buff_1 = aie.buffer(%tile_0_2) {address = 96 : i32, sym_name = "objFifo_out1_buff_1"} : memref<8xi32>
aie.flow(%tile_0_0, DMA : 0, %tile_0_1, DMA : 0)
aie.flow(%tile_0_1, DMA : 0, %tile_0_2, DMA : 0)
aie.flow(%tile_0_1, DMA : 1, %tile_0_0, DMA : 0)
aie.flow(%tile_0_2, DMA : 0, %tile_0_1, DMA : 1)
aie.shim_dma_allocation @objFifo_in0(MM2S, 0, 0)
func.func @bobsyouruncle(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) {
%c0_i64 = arith.constant 0 : i64
%c1_i64 = arith.constant 1 : i64
%c52_i64 = arith.constant 52 : i64
%c64_i64 = arith.constant 64 : i64
aiex.npu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c52_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64, %c1_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
%objFifo_in0_cons_buff_0 = aie.buffer(%tile_0_1) {address = 0 : i32, sym_name = "objFifo_in0_cons_buff_0"} : memref<16xi32>
%objFifo_in0_cons_buff_1 = aie.buffer(%tile_0_1) {address = 64 : i32, sym_name = "objFifo_in0_cons_buff_1"} : memref<16xi32>
%objFifo_out0_buff_0 = aie.buffer(%tile_0_1) {address = 128 : i32, sym_name = "objFifo_out0_buff_0"} : memref<16xi32>
%objFifo_out0_buff_1 = aie.buffer(%tile_0_1) {address = 192 : i32, sym_name = "objFifo_out0_buff_1"} : memref<16xi32>
%objFifo_in0_cons_prod_lock = aie.lock(%tile_0_1, 0) {init = 2 : i32, sym_name = "objFifo_in0_cons_prod_lock"}
%objFifo_in0_cons_cons_lock = aie.lock(%tile_0_1, 1) {init = 0 : i32, sym_name = "objFifo_in0_cons_cons_lock"}
%memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// RUN: iree-opt --verify-diagnostics --split-input-file %s

module {
aie.device(xcve2802) {
%t1 = aie.tile(1, 1)
%buf = aie.buffer(%t1) : memref<256xi8>
%mem = aie.memtile_dma(%t1) {
aie.dma_start("MM2S", 0, ^bd0, ^end)
^bd0:
// expected-error@+1 {{'aie.dma_bd' op Inner-most padding-before count must result in padding in 32-bit words.}}
aie.dma_bd(%buf : memref<256xi8>, 0, 8, [<size = 4, stride = 1>], [<const_pad_before = 2, const_pad_after = 2>], pad_value = 0)
aie.next_bd ^end
^end:
aie.end
}
}
}

// -----

module {
aie.device(xcve2802) {
%t1 = aie.tile(1, 1)
%buf = aie.buffer(%t1) : memref<256xi32>
%mem = aie.memtile_dma(%t1) {
aie.dma_start("MM2S", 0, ^bd0, ^end)
^bd0:
// expected-error@+1 {{'aie.dma_bd' op Data exceeds len after padding.}}
aie.dma_bd(%buf : memref<256xi32>, 0, 4, [<size = 2, stride = 128>], [<const_pad_before = 2, const_pad_after = 1>], pad_value = 0)
aie.next_bd ^end
^end:
aie.end
}
}
}

// -----

module {
aie.device(xcve2802) {
%t1 = aie.tile(1, 1)
%buf = aie.buffer(%t1) : memref<256xbf16>
%mem = aie.memtile_dma(%t1) {
aie.dma_start("MM2S", 0, ^bd0, ^end)
^bd0:
// expected-error@+1 {{'aie.dma_bd' op Inner-most padding-before count must result in padding in 32-bit words.}}
aie.dma_bd(%buf : memref<256xbf16>, 0, 256, [<size = 4, stride = 1>], [<const_pad_before = 3, const_pad_after = 2>], pad_value = 0)
aie.next_bd ^end
^end:
aie.end
}
}
}

// -----

module {
aie.device(xcve2802) {
%t1 = aie.tile(1, 1)
%buf = aie.buffer(%t1) : memref<256xbf16>
%mem = aie.memtile_dma(%t1) {
aie.dma_start("MM2S", 0, ^bd0, ^end)
^bd0:
// expected-error@+1 {{'aie.dma_bd' op Inner-most padding-after count must result in padding in 32-bit words.}}
aie.dma_bd(%buf : memref<256xbf16>, 0, 256, [<size = 4, stride = 1>], [<const_pad_before = 2, const_pad_after = 3>], pad_value = 0)
aie.next_bd ^end
^end:
aie.end
}
}
}

0 comments on commit cfa3c9d

Please sign in to comment.