Skip to content

Commit

Permalink
Purge assertions part 2: propagate errors to signalPassFailures() (#…
Browse files Browse the repository at this point in the history
…925)

* Propagating errors to signalPassFailure, for user feedback

* Propagate error to signalPassFailure

* Propagate error to signalPassFailure

* Propagate error to signalPassFailure

* Propagate error to signalPassFailure

* Refactor AIRToAIESchedulingUtils to propagate errors if any hardware failed to get allocated
  • Loading branch information
erwei-xilinx authored Mar 10, 2025
1 parent f37e448 commit 72f10a6
Show file tree
Hide file tree
Showing 10 changed files with 406 additions and 461 deletions.
80 changes: 39 additions & 41 deletions mlir/include/air/Conversion/AIRToAIESchedulingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ using namespace mlir;
namespace xilinx {
namespace air {

bool isTileInbound(air::MemcpyInterface memcpyOp, int tileMemSpaceAsInt);
bool isTileOutbound(air::MemcpyInterface memcpyOp, int tileMemSpaceAsInt);
FailureOr<bool> isTileInbound(air::MemcpyInterface memcpyOp,
int tileMemSpaceAsInt);
FailureOr<bool> isTileOutbound(air::MemcpyInterface memcpyOp,
int tileMemSpaceAsInt);

AIE::TileOp getPhysTileOpOrNull(AIE::DeviceOp aie_device, int col, int row);

Expand Down Expand Up @@ -96,10 +98,10 @@ struct MemcpyBundleAsFlow {
int S2MM_memspace_as_int;
int numMM2SAllocs = 0;
int numS2MMAllocs = 0;
void pushBackMemcpyOpToBundle(air::DmaMemcpyNdOp memcpyOp);
void pushBackMemcpyOpToBundle(air::ChannelGetOp memcpyOp);
void pushBackMemcpyOpToBundle(air::ChannelPutOp memcpyOp);
void pushBackMemcpyOpToBundle(air::ChannelInterface memcpyOp);
LogicalResult pushBackMemcpyOpToBundle(air::DmaMemcpyNdOp memcpyOp);
LogicalResult pushBackMemcpyOpToBundle(air::ChannelGetOp memcpyOp);
LogicalResult pushBackMemcpyOpToBundle(air::ChannelPutOp memcpyOp);
LogicalResult pushBackMemcpyOpToBundle(air::ChannelInterface memcpyOp);
MemcpyBundleAsFlow(air::DmaMemcpyNdOp dmaMemcpyOp);
MemcpyBundleAsFlow(air::ChannelOp chan);
};
Expand All @@ -111,14 +113,14 @@ class DMAAllocator {
DMAAllocator(AIE::DeviceOp device, int dmaMemorySpaceAsInt)
: device(device), DMAMemorySpaceAsInt(dmaMemorySpaceAsInt) {}

allocation_info_t lookupDMAAllocation(int64_t col, int64_t row,
air::MemcpyInterface &memcpyOp);
std::pair<AIE::LockOp, AIE::LockOp>
FailureOr<allocation_info_t>
lookupDMAAllocation(int64_t col, int64_t row, air::MemcpyInterface &memcpyOp);
FailureOr<std::pair<AIE::LockOp, AIE::LockOp>>
getLockForDMA(air::MemcpyInterface &memcpyOp, int col, int row,
Operation *bufferOp);
allocation_info_t allocNewDmaChannel(air::MemcpyInterface &memcpyOp,
AIE::TileOp tile, int chan, int col,
int row, std::vector<int> dma_id);
FailureOr<allocation_info_t>
allocNewDmaChannel(air::MemcpyInterface &memcpyOp, AIE::TileOp tile, int chan,
int col, int row, std::vector<int> dma_id);
void sortMemcpyOps(std::vector<Operation *> dma_memcpy_ops);

protected:
Expand All @@ -140,11 +142,12 @@ class TileDMAAllocator : public DMAAllocator {

// A very simple scheme to allocate channels for dma operations:
// <description>
allocation_info_t simpleDmaChannelAlloc(air::MemcpyInterface &memcpyOp,
int col, int row, int chan);
FailureOr<allocation_info_t>
simpleDmaChannelAlloc(air::MemcpyInterface &memcpyOp, int col, int row,
int chan);

AIE::BufferOp getBuffer(uint64_t, int64_t col, int64_t row,
air::MemcpyInterface &memcpyOp);
FailureOr<AIE::BufferOp> getBuffer(uint64_t, int64_t col, int64_t row,
air::MemcpyInterface &memcpyOp);
};

class ShimDMAAllocator : public DMAAllocator {
Expand All @@ -155,19 +158,21 @@ class ShimDMAAllocator : public DMAAllocator {

ShimDMAAllocator(AIE::DeviceOp device);

allocation_info_t allocNewDmaChannel(air::MemcpyInterface &memcpyOp, int col,
int row,
std::vector<Operation *> &dma_ops,
std::string colAllocConstraint);
FailureOr<allocation_info_t>
allocNewDmaChannel(air::MemcpyInterface &memcpyOp, int col, int row,
std::vector<Operation *> &dma_ops,
std::string colAllocConstraint);

allocation_info_t allocNewDmaChannel(air::MemcpyInterface &memcpyOp,
allocation_info_t existing_alloc,
std::vector<Operation *> &dma_ops);
FailureOr<allocation_info_t>
allocNewDmaChannel(air::MemcpyInterface &memcpyOp,
allocation_info_t existing_alloc,
std::vector<Operation *> &dma_ops);

AIE::ExternalBufferOp getBuffer(uint64_t &BufferId, int64_t col, int64_t row,
air::MemcpyInterface &memcpyOp);
FailureOr<AIE::ExternalBufferOp> getBuffer(uint64_t &BufferId, int64_t col,
int64_t row,
air::MemcpyInterface &memcpyOp);

std::optional<air::allocation_info_t>
FailureOr<air::allocation_info_t>
foundFlowReuseOpportunity(std::vector<MemcpyBundleAsFlow> memcpy_flows,
air::allocation_info_t alloc, bool isMM2S);
};
Expand All @@ -179,17 +184,16 @@ class MemTileDMAAllocator : public DMAAllocator {

MemTileDMAAllocator(AIE::DeviceOp device);

allocation_info_t simpleDmaChannelAlloc(air::MemcpyInterface &memcpyOp,
int chan);
allocation_info_t simpleDmaChannelAlloc(air::MemcpyInterface &memcpyOp,
allocation_info_t &existing_alloc);
FailureOr<allocation_info_t>
simpleDmaChannelAlloc(air::MemcpyInterface &memcpyOp, int chan);
FailureOr<allocation_info_t>
simpleDmaChannelAlloc(air::MemcpyInterface &memcpyOp,
allocation_info_t &existing_alloc);

int forecastChannelAlloc(air::MemcpyInterface &memcpyOp);
FailureOr<AIE::BufferOp> getBuffer(uint64_t, int64_t col, int64_t row,
air::MemcpyInterface &memcpyOp);

AIE::BufferOp getBuffer(uint64_t, int64_t col, int64_t row,
air::MemcpyInterface &memcpyOp);

std::optional<air::allocation_info_t>
FailureOr<air::allocation_info_t>
foundFlowReuseOpportunity(std::vector<MemcpyBundleAsFlow> memcpy_flows,
air::allocation_info_t alloc, bool isMM2S);
};
Expand All @@ -204,12 +208,6 @@ int foundInVector(T item, std::vector<T> vec);
int getSCFForLoopDepth(Operation *o);
bool groupingMemcpysByLoop(std::vector<MemcpyBundleAsFlow> &memcpy_flows);

LogicalResult
groupedByLoopDMAChannelAllocation(std::vector<MemcpyBundleAsFlow> &memcpy_flows,
ShimDMAAllocator &shim_dma_alloc,
MemTileDMAAllocator &memtile_dma_alloc,
TileDMAAllocator &tile_dma_alloc);

} // namespace air
} // namespace xilinx

Expand Down
11 changes: 6 additions & 5 deletions mlir/include/air/Util/Dependency.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ class dependencyCanonicalizer {
std::string toPositionString(std::vector<unsigned> position);
unsigned getIteratorFromPosition(std::vector<unsigned> position,
Operation *hier_op);
void redoDepTraceIfDepOnHier(func::FuncOp func);
LogicalResult redoDepTraceIfDepOnHier(func::FuncOp func);

private:
void addVerticesInHerd(std::vector<dependencyGraph> &herd_subgraphs,
Expand Down Expand Up @@ -356,8 +356,8 @@ class dependencyTracer {

// Trace dependency from op
template <typename T>
void traceDependencyFromOp(SmallVector<partialMemref, 1> operands,
T sink_air_op, std::string dep_type) {
LogicalResult traceDependencyFromOp(SmallVector<partialMemref, 1> operands,
T sink_air_op, std::string dep_type) {

char dep_tracing_mode = 'n';
if (dep_type == "RAW")
Expand All @@ -366,7 +366,7 @@ class dependencyTracer {
dep_tracing_mode = 'n';
else {
sink_air_op->emitOpError("Unknown dependency type.");
return;
return failure();
}

// Detect deps
Expand All @@ -380,11 +380,12 @@ class dependencyTracer {
pushDepsAtCurrentScope(operand.memrefValue, async_op, dep_tracing_mode,
&operand);
}
return success();
}

// Re-establish async dependency from an scf.for op to all other async ops in
// the module.
void traceDependencyFromScfForOp(scf::ForOp &forOp);
LogicalResult traceDependencyFromScfForOp(scf::ForOp &forOp);

// Recursively reconnect loop-carried dependency in scf loop nest
void reconnectLoopCarriedDependencyFromOp(Operation *op);
Expand Down
33 changes: 20 additions & 13 deletions mlir/lib/Conversion/AIRLoweringPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ class AIRDmaMemcpyNdToAIRRtConversion
};

// AIR channel to AIRRT impl.
Operation *
FailureOr<Operation *>
AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder,
air::ChannelInterface thisOp,
air::ChannelInterface theOtherOp) {
Expand Down Expand Up @@ -488,7 +488,7 @@ AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder,
if (!theOtherOp->hasAttr("tile")) {
theOtherOp->emitOpError(
"missing 'tile' attribute as compile-time flag.");
return nullptr;
return failure();
}
ArrayAttr tiles = theOtherOp->getAttrOfType<ArrayAttr>("tile");
auto tile_dict = llvm::cast<DictionaryAttr>(tiles[0]);
Expand Down Expand Up @@ -534,7 +534,7 @@ AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder,
auto lastStrideConst = getConstantIntValue(strides.back());
if (!lastStrideConst) {
thisOp->emitOpError("last stride is not static.");
return nullptr;
return failure();
}

strides.pop_back();
Expand Down Expand Up @@ -597,11 +597,14 @@ class AIRChannelGetPutToAIRRtConversion : public OpConversionPattern<OpT> {
return op->emitOpError("failed to find the other side of air.channel");
auto otherOp = otherOps[0];

Operation *airrtOp =
auto airrtOp =
AIRChannelInterfaceToAIRRtConversionImpl(rewriter, op, otherOp);

if (airrtOp) {
rewriter.replaceOp(op, airrtOp);
if (failed(airrtOp))
return failure();

if (*airrtOp != nullptr) {
rewriter.replaceOp(op, *airrtOp);
return success();
}

Expand Down Expand Up @@ -869,7 +872,9 @@ LogicalResult ScfParToAffineForConversion(Operation *op) {
return failure();

llvm::SmallSet<Operation *, 8> erased;
f.walk([&](scf::ParallelOp scf_par) {
SmallVector<scf::ParallelOp> scf_pars;
f.walk([&](scf::ParallelOp scf_par) { scf_pars.push_back(scf_par); });
for (auto scf_par : scf_pars) {
if (!llvm::all_of(scf_par.getLowerBound(), [](Value v) {
auto constV = getConstantIntValue(v);
if (!constV)
Expand All @@ -879,7 +884,7 @@ LogicalResult ScfParToAffineForConversion(Operation *op) {
return true;
})) {
scf_par->emitOpError("has non-zero lower bound.");
return;
return failure();
}
if (!llvm::all_of(scf_par.getStep(), [](Value v) {
auto constV = getConstantIntValue(v);
Expand All @@ -890,7 +895,7 @@ LogicalResult ScfParToAffineForConversion(Operation *op) {
return true;
})) {
scf_par->emitOpError("has non-unit step size.");
return;
return failure();
}
std::vector<int> par_sizes = {};
for (auto v : scf_par.getUpperBound())
Expand Down Expand Up @@ -921,7 +926,7 @@ LogicalResult ScfParToAffineForConversion(Operation *op) {
}
}
erased.insert(scf_par);
});
}
for (auto a : erased) {
if (a->getNumResults())
for (auto token : a->getResults())
Expand Down Expand Up @@ -1072,7 +1077,8 @@ class AIRLoweringPass : public air::impl::AIRLoweringBase<AIRLoweringPass> {
[&](airrt::DmaMemcpyNdOp c) { hasCandidateSCFParallel = true; });
}
if (hasCandidateSCFParallel)
serializeAsyncControlFlows(f);
if (failed(serializeAsyncControlFlows(f)))
signalPassFailure();

// SCF parallel to affine for conversion
if (failed(ScfParToAffineForConversion(f))) {
Expand Down Expand Up @@ -1248,7 +1254,7 @@ class AIRLoweringPass : public air::impl::AIRLoweringBase<AIRLoweringPass> {

// This function is a workaround for vck190 having one single control
// processor, where all the async. control programs are serialized here.
void serializeAsyncControlFlows(func::FuncOp func) const {
LogicalResult serializeAsyncControlFlows(func::FuncOp func) const {

// Collect async scf loops in line-by-line order
std::vector<Operation *> scf_loops;
Expand Down Expand Up @@ -1306,7 +1312,7 @@ class AIRLoweringPass : public air::impl::AIRLoweringBase<AIRLoweringPass> {
Operation *chan_op = getInnerMostMemcpyFromLoopNest(bucket[i]);
if (!chan_op) {
func->emitOpError("memcpy in innermost loop body not found.");
return;
return failure();
}
auto src_loop_nest = getParentLoopNest(chan_op, bucket[i]);
for (auto [src_loop, dst_loop] :
Expand Down Expand Up @@ -1339,6 +1345,7 @@ class AIRLoweringPass : public air::impl::AIRLoweringBase<AIRLoweringPass> {
bucket[i]->erase();
}
}
return success();
}
};

Expand Down
16 changes: 9 additions & 7 deletions mlir/lib/Conversion/AIRRtToNpuPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,8 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {

// Configure the tile trace units and the shimDMA
if (clTraceSize > 0)
insertNpuWrite32ForTrace(module, clTraceSize, clTraceOffset);
if (failed(insertNpuWrite32ForTrace(module, clTraceSize, clTraceOffset)))
signalPassFailure();

RewritePatternSet funcToSeqPatterns(ctx);
funcToSeqPatterns.add<ControlFuncConversion>(ctx);
Expand Down Expand Up @@ -1311,8 +1312,8 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
}

// configure events to monitor
void insertNpuWrite32ForTrace(ModuleOp module, int64_t trace_size,
int64_t trace_offset) {
LogicalResult insertNpuWrite32ForTrace(ModuleOp module, int64_t trace_size,
int64_t trace_offset) {
SmallVector<mlir::func::FuncOp> funcOps;
module.walk([&](mlir::func::FuncOp f) { funcOps.push_back(f); });

Expand Down Expand Up @@ -1354,11 +1355,11 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
if (!target_model.isCoreTile(srcColIndex, srcRowIndex) &&
!target_model.isMemTile(srcColIndex, srcRowIndex)) {
pktFlow->emitOpError("unsupported trace src.");
return;
return failure();
}
if (!target_model.isShimNOCTile(dstColIndex, dstRowIndex)) {
pktFlow->emitOpError("unsupported trace dest.");
return;
return failure();
}
int pkt_type = 0;
if (target_model.isMemTile(srcColIndex, srcRowIndex))
Expand Down Expand Up @@ -1445,7 +1446,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
int bdID = chanToIdMap[dstColIndex];
if (bdID < 4) {
pktFlow->emitOpError("runs out of bd_id.");
return;
return failure();
}

builder.create<AIEX::NpuWriteBdOp>(
Expand Down Expand Up @@ -1475,7 +1476,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
address = 0x1D20C;
else {
pktFlow->emitOpError("unknown trace dest.");
return;
return failure();
}
builder.create<AIEX::NpuWrite32Op>(
builder.getUnknownLoc(), address, bdID, nullptr,
Expand All @@ -1493,6 +1494,7 @@ struct AIRRtToNpuPass : public impl::AIRRtToNpuBase<AIRRtToNpuPass> {
builder.create<AIEX::NpuWrite32Op>(builder.getUnknownLoc(), 0x34008, 127,
nullptr, zero, zero);
}
return success();
}

// Renumber aiex.npu.dma_memcpy_nd ops per column of AIEs.
Expand Down
Loading

0 comments on commit 72f10a6

Please sign in to comment.