Skip to content

Commit

Permalink
squash
Browse files Browse the repository at this point in the history
  • Loading branch information
newling committed Aug 28, 2024
1 parent a5fbf9d commit ade1af2
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "iree-amd-aie/Transforms/Passes.h"
#include "iree/compiler/Codegen/Utils/Utils.h"
#include "llvm/ADT/StringExtras.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
Expand All @@ -16,16 +17,97 @@
#include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
#include "mlir/IR/Iterators.h"
#include "mlir/IR/PatternMatch.h"

#define DEBUG_TYPE "iree-amdaie-tile-and-fuse"


namespace mlir::iree_compiler::AMDAIE {

namespace {

enum class GPUMappingType { Block, Thread };
FailureOr<SmallVector<Attribute>> getGPUMappingAttributes(
ArrayRef<int64_t> tileSizesVal, GPUMappingType mappingType,
TilingInterface op) {
MLIRContext *context = op.getContext();

// There is one induction variables in the scf.forall for each of the
// non-zero tile sizes. Recall that a '0' tile size corresponds to 'do
// not tile'.
uint32_t nbIndVars = std::count_if(tileSizesVal.begin(), tileSizesVal.end(),
[](int64_t t) { return t != 0; });

// See mlir::gpu::MappingId enum. Currently 13 dimensions are supported.
// clang-format off
// https://github.com/llvm/llvm-project/blob/e8063702cfbbf39f0b92283d0588dee264b5eb2b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td#L37.
// clang-format on
if (nbIndVars > mlir::gpu::getMaxEnumValForMappingId()) {
return op->emitOpError("has too many dimensions to tile, ")
<< "there are only " << mlir::gpu::getMaxEnumValForMappingId()
<< " dimensions available in the mlir::gpu dialect (require "
<< nbIndVars << " here).";
}


auto getMappingAttributeForDimension = [&](uint32_t i) -> Attribute {
auto id = static_cast<gpu::MappingId>(i);
if (mappingType == GPUMappingType::Block)
return gpu::GPUBlockMappingAttr::get(context, id);
else if (mappingType == GPUMappingType::Thread)
return gpu::GPUThreadMappingAttr::get(context, id);
else {
assert(false && "unhandled mapping type");
}
};

SmallVector<Attribute> mapping;
mapping.reserve(nbIndVars);
for (uint32_t i = 0; i < nbIndVars; ++i) {
// DimY and DimX are swapped, so that DimX goes to AIE array columns and
// DimX goes to AIE arrat rows (or something like that...)
if (i == 0)
mapping.push_back(getMappingAttributeForDimension(1));
else if (i == 1)
mapping.push_back(getMappingAttributeForDimension(0));
else
mapping.push_back(getMappingAttributeForDimension(i));
}

// Currently we expect only the first 2 tiled dimensions to be more than 1.
// TODO(newling) if there are 3+ dimensions, we need to collapse them,
// a follow-up pass. Instead of
// (i,j,k) in (2,3,5)
// for example, could be
// (i,l) in (2,15)
// with then
// j=l/5 and k=l%5.
// Once the above is implemented, we can safely remove the following check.
//
// Example of failure (0,2,3,1,4) : the 4 is not one of the first 2 non-zeros
// Example of failure (0,1,1,2) : the 2 is not one of the first 2 non-zeros
// Example of success (0,2,3,1,1) : there are no entires above 1 after 3.
if (nbIndVars > 2) {
uint32_t indVarCount = 0;
for (auto tileSize : tileSizesVal) {
if (indVarCount >= 2 && tileSize > 1) {
return op->emitOpError("has requested tile sizes (")
<< llvm::join_items(tileSizesVal, ",")
<< "). Currently we only support tiling where the first 2 "
"non-zero tile sizes are greater than 1. We must implement "
"a pass to merge tiling dimensions to be able to lower to "
"the 2-D AIE array. ";
}
if (tileSize > 0) {
++indVarCount;
}
}
}

return mapping;
}

/// Utility function to check if any of the reduction dimension is being tiled.
static bool isTilingReductionDimension(TilingInterface consumerOp,
SmallVector<int64_t> tileSizesVal) {
Expand Down Expand Up @@ -157,27 +239,20 @@ void AMDAIETileAndFusePass::runOnOperation() {

SmallVector<OpFoldResult> tileSizes =
getAsIndexOpFoldResult(context, tileSizesVal);

auto options = scf::SCFTilingOptions().setTileSizes(tileSizes);

// When tiling using scf.for we do not need to set any mapping.
if (!useSCFFor) {
options.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp);
// Here we assume there are always two levels of parallel (scf.forall)
// loops, and the first level of tiling is always using scf.forall and
// mapped to blocks. Currently we are not using mapping attributes for
// Conv2d ops, because there could be four parallel tiling dimensions.
// TODO (vivian): create AIE specific mapping attributes.
if (!isa<linalg::ConvolutionOpInterface>(consumerOp.getOperation())) {
if (tilingLevel == 0) {
options.setMapping(
{gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimY),
gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimX)});
} else {
options.setMapping(
{gpu::GPUThreadMappingAttr::get(context, gpu::MappingId::DimY),
gpu::GPUThreadMappingAttr::get(context, gpu::MappingId::DimX)});
}
auto maybeMapping = getGPUMappingAttributes(
tileSizesVal,
tilingLevel == 0 ? GPUMappingType::Block : GPUMappingType::Thread,
consumerOp);
if (failed(maybeMapping)) {
return signalPassFailure();
}
options.setMapping(maybeMapping.value());
}

IRRewriter rewriter(context);
Expand Down
18 changes: 18 additions & 0 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,24 @@ void addMLIRAIRLoweringPasses(OpPassManager &passManager, AMDAIEDevice device) {
passManager.addPass(createEraseHALDescriptorTypeFromMemRefPass());
passManager.addPass(memref::createFoldMemRefAliasOpsPass());
passManager.addPass(createAMDAIEBridgeToAIRPass());

// Running canonicalization for all pipelines here results in failures.
// Example
// ```
// 'memref.cast' op is an unsupported operation. This pass currently only
// supports AllocOp and SubViewOp as inputs.
// ```
// It is currently required for the convolution pipeline though, to remove the
// extra (size-1) thread- and group- dimensions.
//
// TODO(newling) there are better solutions like:
// 1) make canonicalization work for scf.forall
// 2) pass to collapse rank-4 scf.foralls to rank-2 scf.foralls.
// 3) resolve above 'unsupproted operation' error.
if (clUseTilePipeline == TilePassPipeline::ConvDecomposePipeline) {
passManager.addPass(createCanonicalizerPass());
}

// TODO (Erwei): Figure out a way to work with AMDAIEPackToDmaPass.
if (clUseTilePipeline == TilePassPipeline::PackPeelPipeline)
passManager.addPass(createAMDAIEDecomposeLinalgExtPackUnPackToAIRPass());
Expand Down

0 comments on commit ade1af2

Please sign in to comment.