Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AMDAIEDevice to the executable target config #528

Merged
merged 1 commit into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions build_tools/ci/run_matmul_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ function run_matmul_test() {

local target_backend="amd-aie"

local target_device="npu1_4col"

local device="xrt"

local peano_install_path="${PEANO}"
Expand Down Expand Up @@ -256,6 +258,10 @@ function run_matmul_test() {
use_ukernel="$2"
shift 2
;;
--target_device)
target_device="$2"
shift 2
;;
--target_backend)
target_backend="$2"
shift 2
Expand Down Expand Up @@ -381,6 +387,7 @@ function run_matmul_test() {
set +e

compilation_flags="--iree-hal-target-backends=${target_backend} \
--iree-amdaie-target-device=${target_device} \
--iree-amdaie-lower-to-aie-pipeline=${lower_to_aie_pipeline} \
--iree-amdaie-tile-pipeline=${tile_pipeline} \
--iree-amd-aie-peano-install-dir=${peano_install_path} \
Expand Down Expand Up @@ -495,6 +502,7 @@ run_matmul_test \
--lhs_rhs_type "bf16" \
--acc_type "f32" \
--target_backend "amd-aie" \
--target_device "npu1_4col" \
--device "xrt" \
--peano_install_path "${PEANO}" \
--mlir_aie_install_path "${MLIR_AIE_INSTALL}" \
Expand Down
16 changes: 16 additions & 0 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@ include "iree-amd-aie/IR/AMDAIEDialect.td"
include "mlir/IR/AttrTypeBase.td"
include "mlir/IR/EnumAttr.td"

def AMDAIE_AMDAIEDevice: I32EnumAttr<"AMDAIEDevice",
"Enum with target AMDAIE devices.",
[
I32EnumAttrCase<"xcvc1902", 1>,
I32EnumAttrCase<"xcve2302", 2>,
I32EnumAttrCase<"xcve2802", 3>,
I32EnumAttrCase<"npu1", 4>,
I32EnumAttrCase<"npu1_1col", 5>,
I32EnumAttrCase<"npu1_2col", 6>,
I32EnumAttrCase<"npu1_3col", 7>,
I32EnumAttrCase<"npu1_4col", 8>
]>
{
let cppNamespace = "mlir::iree_compiler::AMDAIE";
}

def AMDAIE_CopyOpOperateOn: I32EnumAttr<"CopyOpOperateOn",
"Enables templated functions that operate on either source or target of "
"copy/dma operations",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h"
#include "air/Dialect/AIR/AIRDialect.h"
#include "air/Dialect/AIRRt/AIRRtDialect.h"
#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree-amd-aie/IR/AMDAIEDialect.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-dialects/Dialect/LinalgTransform/Passes.h"
Expand Down Expand Up @@ -48,6 +49,25 @@

namespace mlir::iree_compiler::AMDAIE {

/// Command line option for selecting the target AIE device.
static llvm::cl::opt<AMDAIEDevice> clAMDAIETargetDevice(
"iree-amdaie-target-device",
llvm::cl::desc("Sets the target device architecture."),
llvm::cl::values(
clEnumValN(AMDAIEDevice::xcvc1902, "xcvc1902", "The xcvc1902 device"),
clEnumValN(AMDAIEDevice::xcve2302, "xcve2302", "The xcve2302 device"),
clEnumValN(AMDAIEDevice::xcve2802, "xcve2802", "The xcve2802 device"),
clEnumValN(AMDAIEDevice::npu1, "npu1", "Default Phoenix NPU"),
clEnumValN(AMDAIEDevice::npu1_1col, "npu1_1col",
"Phoenix NPU with a single column"),
clEnumValN(AMDAIEDevice::npu1_2col, "npu1_2col",
"Phoenix NPU with two columns"),
clEnumValN(AMDAIEDevice::npu1_3col, "npu1_3col",
"Phoenix NPU with three columns"),
clEnumValN(AMDAIEDevice::npu1_4col, "npu1_4col",
"Phoenix NPU with four columns")),
llvm::cl::init(AMDAIEDevice::npu1_4col));
nirvedhmeshram marked this conversation as resolved.
Show resolved Hide resolved

static llvm::cl::opt<std::string> clEnableAMDAIEUkernels(
"iree-amdaie-enable-ukernels",
llvm::cl::desc("Enables microkernels in the amdaie backend. May be "
Expand Down Expand Up @@ -142,8 +162,10 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend {
auto addConfig = [&](StringRef name, Attribute value) {
configItems.emplace_back(StringAttr::get(context, name), value);
};
// Set target arch
addConfig("target_arch", StringAttr::get(context, "chip-tbd"));
// Set target device
addConfig(
"target_device",
StringAttr::get(context, AMDAIE::stringifyEnum(clAMDAIETargetDevice)));
// Set microkernel enabling flag.
addConfig("ukernels", StringAttr::get(context, clEnableAMDAIEUkernels));
auto configAttr = b.getDictionaryAttr(configItems);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "aie/Dialect/XLLVM/XLLVMDialect.h"
#include "aie/Passes.h"
#include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h"
#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree-amd-aie/IR/AMDAIEDialect.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-dialects/Dialect/LinalgTransform/Passes.h"
Expand Down Expand Up @@ -141,8 +142,10 @@ class AIETargetDirectBackend final : public IREE::HAL::TargetBackend {
auto addConfig = [&](StringRef name, Attribute value) {
configItems.emplace_back(StringAttr::get(context, name), value);
};
// Set target arch
addConfig("target_arch", StringAttr::get(context, "chip-tbd"));
// Set target device
addConfig("target_device",
StringAttr::get(context,
AMDAIE::stringifyEnum(AMDAIEDevice::npu1_4col)));
// Set microkernel enabling flag.
addConfig("ukernels",
StringAttr::get(context, /*clEnableAMDAIEUkernels*/ ""));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-targets %s | FileCheck %s --check-prefix=DEFAULT
// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-targets --iree-amdaie-enable-ukernels=all %s | FileCheck %s --check-prefix=ENABLE_UKERNEL

// DEFAULT: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "none"}>) {
// ENABLE_UKERNEL: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "all"}>) {
// DEFAULT: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>) {
// ENABLE_UKERNEL: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "all"}>) {
func.func @matmul_small(%lhs : tensor<8x16xi32>,
%rhs : tensor<16x32xi32>) -> tensor<8x32xi32> {
%empty = tensor.empty() : tensor<8x32xi32>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "aie/Dialect/AIEX/IR/AIEXDialect.h"
#include "iree-amd-aie/IR/AMDAIEDialect.h"
#include "iree-amd-aie/IR/AMDAIEOps.h"
#include "iree-amd-aie/Transforms/AMDAIEUtils.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-amd-aie/Transforms/Transforms.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
Expand Down Expand Up @@ -729,13 +730,23 @@ LogicalResult workgroupToAIE(IRRewriter &rewriter,
LogicalResult lowerToAIE(ModuleOp moduleOp) {
IRRewriter rewriter(moduleOp.getContext());
Block *moduleBlock = &moduleOp->getRegion(0).front();

// Retrieve the AMDAIEDevice from the executable target attribute.
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(moduleOp);
auto test = IREE::HAL::DeviceTargetAttr::lookup(moduleOp);
std::optional<AMDAIEDevice> device = getConfigAMDAIEDevice(targetAttr);
if (!device)
return moduleOp.emitOpError()
<< "No AMDAIEDevice found in the target attribute configuration";
xilinx::AIE::AIEDevice aieDevice = static_cast<xilinx::AIE::AIEDevice>(
static_cast<uint32_t>(device.value()));

auto funcRes = moduleOp.walk([&](func::FuncOp funcOp) {
// Insert AIE DeviceOp
rewriter.setInsertionPoint(moduleBlock, moduleBlock->begin());
auto deviceOp = rewriter.create<xilinx::AIE::DeviceOp>(
rewriter.getUnknownLoc(),
xilinx::AIE::AIEDeviceAttr::get(rewriter.getContext(),
xilinx::AIE::AIEDevice::npu1_4col));
xilinx::AIE::AIEDeviceAttr::get(rewriter.getContext(), aieDevice));
deviceOp.getRegion().emplaceBlock();
Block *deviceBlock = &deviceOp.getRegion().front();

Expand Down Expand Up @@ -868,7 +879,7 @@ class AMDAIELowerToAIEPass
}

AMDAIELowerToAIEPass() = default;
AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass) {};
AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass){};
void runOnOperation() override;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@

namespace mlir::iree_compiler::AMDAIE {

std::optional<AMDAIEDevice> getConfigAMDAIEDevice(
IREE::HAL::ExecutableTargetAttr targetAttr) {
if (!targetAttr) return std::nullopt;
auto config = targetAttr.getConfiguration();
if (!config) return std::nullopt;
std::optional<StringAttr> attr = config.getAs<StringAttr>("target_device");
if (!attr) return std::nullopt;
return AMDAIE::symbolizeEnum<AMDAIEDevice>(attr.value().getValue());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😎 yea I forgot mlir gives symbolize and stringify.

}

namespace {

/// Generate a DenseMap key we can use for the element types (alternatives
Expand All @@ -23,7 +33,7 @@ constexpr uint32_t getElementTypeKey(uint32_t a, uint32_t b, uint32_t c) {

/// Map from (LHS bitwidth, RHS bitwidth, Accumulator bitwidth) to the AIE
/// instruction size (m, n, k) for the integer types with those bitwidths.
const auto& getIntegerMatmulInstructionSizeMap() {
const auto &getIntegerMatmulInstructionSizeMap() {
// Sanity check.
static_assert(getElementTypeKey(1, 2, 3) == 1 + 2 * 256 + 3 * 65536);

Expand Down Expand Up @@ -60,7 +70,7 @@ const auto& getIntegerMatmulInstructionSizeMap() {

FailureOr<std::array<uint32_t, 3>> getAIEIntegerMatmulInstructionSize(
uint32_t nBitsLhs, uint32_t nBitsRhs, uint32_t nBitsAcc) {
const auto& mapForIntTypes = getIntegerMatmulInstructionSizeMap();
const auto &mapForIntTypes = getIntegerMatmulInstructionSizeMap();
auto it =
mapForIntTypes.find(getElementTypeKey(nBitsLhs, nBitsRhs, nBitsAcc));
if (it == mapForIntTypes.end()) {
Expand All @@ -77,8 +87,7 @@ FailureOr<std::array<uint32_t, 3>> getAIEMatmulInstructionSize(Type elTypeLhs,
isa<FloatType>(elTypeAcc);

bool allInteger = isa<IntegerType>(elTypeLhs) &&
isa<IntegerType>(elTypeRhs) &&
isa<IntegerType>(elTypeAcc);
isa<IntegerType>(elTypeRhs) && isa<IntegerType>(elTypeAcc);

if (!allInteger && !allFloatingPoint) {
return failure();
Expand All @@ -103,7 +112,7 @@ FailureOr<std::array<uint32_t, 3>> getAIEMatmulInstructionSize(Type elTypeLhs,

FailureOr<unsigned> getTilingScaleFactor(Type elemType) {
unsigned bitWidth = elemType.getIntOrFloatBitWidth();
if (bitWidth %8 != 0) return failure();
if (bitWidth % 8 != 0) return failure();
if (bitWidth > 64) return failure();
return 64 / bitWidth;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,17 @@

#include <array>

#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
#include "mlir/Dialect/Linalg/Utils/Utils.h"
#include "mlir/IR/Types.h"

namespace mlir::iree_compiler::AMDAIE {

/// Returns the target AMDAIE device.
std::optional<AMDAIEDevice> getConfigAMDAIEDevice(
IREE::HAL::ExecutableTargetAttr targetAttr);

// This function is based on the following table pulled from the
// AIEVec_MatMulOp documentation in
// mlir-aie/include/aie/Dialect/AIEVec/IR/AIEVecOps.td
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-amdaie-lower-to-aie)" --verify-diagnostics %s | FileCheck %s

// CHECK: module
// expected-error @+1 {{No AMDAIEDevice found in the target attribute configuration}}
module {
}

// -----

// CHECK: module
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
}

// -----

// CHECK: module
// CHECK: aie.device
// CHECK: func.func @empty_func
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @empty_func() {
return
}
Expand All @@ -20,7 +28,8 @@ module {
// CHECK: module
// CHECK: aie.device
// CHECK: func.func @workgroup
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @workgroup() {
amdaie.workgroup {
amdaie.controlcode {
Expand All @@ -38,7 +47,8 @@ module {
// CHECK-SAME: %{{.+}}: memref<1024x64xi32>
// CHECK-SAME: %{{.+}}: memref<32x64xi32>
// CHECK-NOT: memref.assume_alignment
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @hal_bindings() {
%c0 = arith.constant 0 : index
%0 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1024x64xi32>
Expand Down Expand Up @@ -67,7 +77,8 @@ module {
// CHECK-SAME: @[[OBJ0]]
// CHECK-SAME: @[[OBJ1]]
// CHECK: func.func @circular_dma_cpy_nd_and_link
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @circular_dma_cpy_nd_and_link() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down Expand Up @@ -112,7 +123,8 @@ module {
// CHECK-SAME: @[[OBJ0]]
// CHECK-SAME: @[[OBJ1]]
// CHECK: func.func @circular_dma_cpy_sizes_and_strides
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @circular_dma_cpy_sizes_and_strides() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down Expand Up @@ -161,7 +173,8 @@ module {
// CHECK: %[[REINTERPRET:.+]] = memref.reinterpret_cast %[[ACCESS]]
// CHECK: linalg.fill ins(%{{.+}} : i32) outs(%[[REINTERPRET]] : memref<32x32xi32, 1>)
// CHECK: func.func @tile_and_core_and_acquire
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @tile_and_core_and_acquire() {
amdaie.workgroup {
%c0_i32 = arith.constant 0 : i32
Expand Down Expand Up @@ -217,7 +230,8 @@ module {
// CHECK: aie.objectfifo.subview.access
// CHECK-SAME: %[[ACQUIRE_1]]
// CHECK: func.func @tile_and_core_and_acquire_broadcast
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @tile_and_core_and_acquire_broadcast() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down Expand Up @@ -266,7 +280,8 @@ module {
// CHECK: aie.core(%[[TILE_0_2]])
// CHECK: aie.objectfifo.release
// CHECK: func.func @tile_and_core_and_release
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @tile_and_core_and_release() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down Expand Up @@ -331,7 +346,8 @@ module {
// CHECK-SAME: issue_token = true
// CHECK-SAME: metadata = @[[OBJ2]]
// CHECK-NEXT: aiex.npu.dma_wait {symbol = @[[OBJ2]]}
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @controlcode() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down Expand Up @@ -377,7 +393,8 @@ module {

// Test to demonstrate invalid implicit L3 memref type that has rank greater than that
// expected for static offsets/sizes/strides.
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @controlcode_invalid_implicit_l3_memref() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down Expand Up @@ -462,7 +479,8 @@ module {
// CHECK-SAME: @[[OBJ0]]
// CHECK-NEXT: aiex.npu.dma_wait
// CHECK-SAME: @[[OBJ0]]
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @large_example() {
amdaie.workgroup {
%c0 = arith.constant 0 : index
Expand Down
3 changes: 2 additions & 1 deletion tests/samples/matmul_peeled_objectfifo.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d3, d5)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d0, d3, d4)>
module {
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @matmul_i32() {
%c64 = arith.constant 64 : index
%c960 = arith.constant 960 : index
Expand Down
2 changes: 1 addition & 1 deletion tests/samples/matmul_peeled_objectfifo_e2e.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=objectFifo --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s
// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources --iree-amdaie-target-device=npu1_4col %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=objectFifo --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s

nirvedhmeshram marked this conversation as resolved.
Show resolved Hide resolved
// CHECK-LABEL: hal.executable.export public @matmul_i32_dispatch_0_matmul_128x128x256_i32
// CHECK-DAG: %[[TILE_0_2:.+]] = aie.tile(0, 2)
Expand Down
Loading