diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index db5f8055c..908be0dac 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -182,6 +182,8 @@ function run_matmul_test() { local target_backend="amd-aie" + local target_device="npu1_4col" + local device="xrt" local peano_install_path="${PEANO}" @@ -256,6 +258,10 @@ function run_matmul_test() { use_ukernel="$2" shift 2 ;; + --target_device) + target_device="$2" + shift 2 + ;; --target_backend) target_backend="$2" shift 2 @@ -381,6 +387,7 @@ function run_matmul_test() { set +e compilation_flags="--iree-hal-target-backends=${target_backend} \ + --iree-amdaie-target-device=${target_device} \ --iree-amdaie-lower-to-aie-pipeline=${lower_to_aie_pipeline} \ --iree-amdaie-tile-pipeline=${tile_pipeline} \ --iree-amd-aie-peano-install-dir=${peano_install_path} \ @@ -495,6 +502,7 @@ run_matmul_test \ --lhs_rhs_type "bf16" \ --acc_type "f32" \ --target_backend "amd-aie" \ + --target_device "npu1_4col" \ --device "xrt" \ --peano_install_path "${PEANO}" \ --mlir_aie_install_path "${MLIR_AIE_INSTALL}" \ diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.td index 1ad7d9506..362d0aaf7 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEAttrs.td @@ -11,6 +11,22 @@ include "iree-amd-aie/IR/AMDAIEDialect.td" include "mlir/IR/AttrTypeBase.td" include "mlir/IR/EnumAttr.td" +def AMDAIE_AMDAIEDevice: I32EnumAttr<"AMDAIEDevice", + "Enum with target AMDAIE devices.", + [ + I32EnumAttrCase<"xcvc1902", 1>, + I32EnumAttrCase<"xcve2302", 2>, + I32EnumAttrCase<"xcve2802", 3>, + I32EnumAttrCase<"npu1", 4>, + I32EnumAttrCase<"npu1_1col", 5>, + I32EnumAttrCase<"npu1_2col", 6>, + I32EnumAttrCase<"npu1_3col", 7>, + I32EnumAttrCase<"npu1_4col", 8> + ]> +{ + let cppNamespace = "mlir::iree_compiler::AMDAIE"; +} + def AMDAIE_CopyOpOperateOn: I32EnumAttr<"CopyOpOperateOn", "Enables templated functions that operate on either source or target of " "copy/dma operations", diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp index 529a01bbb..8f823ff61 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.cpp @@ -16,6 +16,7 @@ #include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h" #include "air/Dialect/AIR/AIRDialect.h" #include "air/Dialect/AIRRt/AIRRtDialect.h" +#include "iree-amd-aie/IR/AMDAIEAttrs.h" #include "iree-amd-aie/IR/AMDAIEDialect.h" #include "iree-amd-aie/Transforms/Passes.h" #include "iree-dialects/Dialect/LinalgTransform/Passes.h" @@ -48,6 +49,25 @@ namespace mlir::iree_compiler::AMDAIE { +/// Command line option for selecting the target AIE device. +static llvm::cl::opt clAMDAIETargetDevice( + "iree-amdaie-target-device", + llvm::cl::desc("Sets the target device architecture."), + llvm::cl::values( + clEnumValN(AMDAIEDevice::xcvc1902, "xcvc1902", "The xcvc1902 device"), + clEnumValN(AMDAIEDevice::xcve2302, "xcve2302", "The xcve2302 device"), + clEnumValN(AMDAIEDevice::xcve2802, "xcve2802", "The xcve2802 device"), + clEnumValN(AMDAIEDevice::npu1, "npu1", "Default Phoenix NPU"), + clEnumValN(AMDAIEDevice::npu1_1col, "npu1_1col", + "Phoenix NPU with a single column"), + clEnumValN(AMDAIEDevice::npu1_2col, "npu1_2col", + "Phoenix NPU with two columns"), + clEnumValN(AMDAIEDevice::npu1_3col, "npu1_3col", + "Phoenix NPU with three columns"), + clEnumValN(AMDAIEDevice::npu1_4col, "npu1_4col", + "Phoenix NPU with four columns")), + llvm::cl::init(AMDAIEDevice::npu1_4col)); + static llvm::cl::opt clEnableAMDAIEUkernels( "iree-amdaie-enable-ukernels", llvm::cl::desc("Enables microkernels in the amdaie backend. May be " @@ -142,8 +162,10 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend { auto addConfig = [&](StringRef name, Attribute value) { configItems.emplace_back(StringAttr::get(context, name), value); }; - // Set target arch - addConfig("target_arch", StringAttr::get(context, "chip-tbd")); + // Set target device + addConfig( + "target_device", + StringAttr::get(context, AMDAIE::stringifyEnum(clAMDAIETargetDevice))); // Set microkernel enabling flag. addConfig("ukernels", StringAttr::get(context, clEnableAMDAIEUkernels)); auto configAttr = b.getDictionaryAttr(configItems); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp index a2da62c9b..e6b625a28 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETargetDirect.cpp @@ -15,6 +15,7 @@ #include "aie/Dialect/XLLVM/XLLVMDialect.h" #include "aie/Passes.h" #include "aie/Target/LLVMIR/Dialect/XLLVM/XLLVMToLLVMIRTranslation.h" +#include "iree-amd-aie/IR/AMDAIEAttrs.h" #include "iree-amd-aie/IR/AMDAIEDialect.h" #include "iree-amd-aie/Transforms/Passes.h" #include "iree-dialects/Dialect/LinalgTransform/Passes.h" @@ -141,8 +142,10 @@ class AIETargetDirectBackend final : public IREE::HAL::TargetBackend { auto addConfig = [&](StringRef name, Attribute value) { configItems.emplace_back(StringAttr::get(context, name), value); }; - // Set target arch - addConfig("target_arch", StringAttr::get(context, "chip-tbd")); + // Set target device + addConfig("target_device", + StringAttr::get(context, + AMDAIE::stringifyEnum(AMDAIEDevice::npu1_4col))); // Set microkernel enabling flag. addConfig("ukernels", StringAttr::get(context, /*clEnableAMDAIEUkernels*/ "")); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/tests/amd_aie_target_backend.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/tests/amd_aie_target_backend.mlir index a372daf86..3b1e4a2a8 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/tests/amd_aie_target_backend.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/tests/amd_aie_target_backend.mlir @@ -1,8 +1,8 @@ // RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-targets %s | FileCheck %s --check-prefix=DEFAULT // RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-targets --iree-amdaie-enable-ukernels=all %s | FileCheck %s --check-prefix=ENABLE_UKERNEL -// DEFAULT: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "none"}>) { -// ENABLE_UKERNEL: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_arch = "chip-tbd", ukernels = "all"}>) { +// DEFAULT: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>) { +// ENABLE_UKERNEL: hal.executable.variant public @amdaie_xclbin_fb target(<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "all"}>) { func.func @matmul_small(%lhs : tensor<8x16xi32>, %rhs : tensor<16x32xi32>) -> tensor<8x32xi32> { %empty = tensor.empty() : tensor<8x32xi32> diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp index b5b328c60..0ee21193a 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIELowerToAIE.cpp @@ -17,6 +17,7 @@ #include "aie/Dialect/AIEX/IR/AIEXDialect.h" #include "iree-amd-aie/IR/AMDAIEDialect.h" #include "iree-amd-aie/IR/AMDAIEOps.h" +#include "iree-amd-aie/Transforms/AMDAIEUtils.h" #include "iree-amd-aie/Transforms/Passes.h" #include "iree-amd-aie/Transforms/Transforms.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" @@ -729,13 +730,23 @@ LogicalResult workgroupToAIE(IRRewriter &rewriter, LogicalResult lowerToAIE(ModuleOp moduleOp) { IRRewriter rewriter(moduleOp.getContext()); Block *moduleBlock = &moduleOp->getRegion(0).front(); + + // Retrieve the AMDAIEDevice from the executable target attribute. + auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(moduleOp); + auto test = IREE::HAL::DeviceTargetAttr::lookup(moduleOp); + std::optional device = getConfigAMDAIEDevice(targetAttr); + if (!device) + return moduleOp.emitOpError() + << "No AMDAIEDevice found in the target attribute configuration"; + xilinx::AIE::AIEDevice aieDevice = static_cast( + static_cast(device.value())); + auto funcRes = moduleOp.walk([&](func::FuncOp funcOp) { // Insert AIE DeviceOp rewriter.setInsertionPoint(moduleBlock, moduleBlock->begin()); auto deviceOp = rewriter.create( rewriter.getUnknownLoc(), - xilinx::AIE::AIEDeviceAttr::get(rewriter.getContext(), - xilinx::AIE::AIEDevice::npu1_4col)); + xilinx::AIE::AIEDeviceAttr::get(rewriter.getContext(), aieDevice)); deviceOp.getRegion().emplaceBlock(); Block *deviceBlock = &deviceOp.getRegion().front(); @@ -868,7 +879,7 @@ class AMDAIELowerToAIEPass } AMDAIELowerToAIEPass() = default; - AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass) {}; + AMDAIELowerToAIEPass(const AMDAIELowerToAIEPass &pass){}; void runOnOperation() override; }; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.cpp index 86ba2b983..20edef734 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.cpp @@ -12,6 +12,16 @@ namespace mlir::iree_compiler::AMDAIE { +std::optional getConfigAMDAIEDevice( + IREE::HAL::ExecutableTargetAttr targetAttr) { + if (!targetAttr) return std::nullopt; + auto config = targetAttr.getConfiguration(); + if (!config) return std::nullopt; + std::optional attr = config.getAs("target_device"); + if (!attr) return std::nullopt; + return AMDAIE::symbolizeEnum(attr.value().getValue()); +} + namespace { /// Generate a DenseMap key we can use for the element types (alternatives @@ -23,7 +33,7 @@ constexpr uint32_t getElementTypeKey(uint32_t a, uint32_t b, uint32_t c) { /// Map from (LHS bitwidth, RHS bitwidth, Accumulator bitwidth) to the AIE /// instruction size (m, n, k) for the integer types with those bitwidths. -const auto& getIntegerMatmulInstructionSizeMap() { +const auto &getIntegerMatmulInstructionSizeMap() { // Sanity check. static_assert(getElementTypeKey(1, 2, 3) == 1 + 2 * 256 + 3 * 65536); @@ -60,7 +70,7 @@ const auto& getIntegerMatmulInstructionSizeMap() { FailureOr> getAIEIntegerMatmulInstructionSize( uint32_t nBitsLhs, uint32_t nBitsRhs, uint32_t nBitsAcc) { - const auto& mapForIntTypes = getIntegerMatmulInstructionSizeMap(); + const auto &mapForIntTypes = getIntegerMatmulInstructionSizeMap(); auto it = mapForIntTypes.find(getElementTypeKey(nBitsLhs, nBitsRhs, nBitsAcc)); if (it == mapForIntTypes.end()) { @@ -77,8 +87,7 @@ FailureOr> getAIEMatmulInstructionSize(Type elTypeLhs, isa(elTypeAcc); bool allInteger = isa(elTypeLhs) && - isa(elTypeRhs) && - isa(elTypeAcc); + isa(elTypeRhs) && isa(elTypeAcc); if (!allInteger && !allFloatingPoint) { return failure(); @@ -103,7 +112,7 @@ FailureOr> getAIEMatmulInstructionSize(Type elTypeLhs, FailureOr getTilingScaleFactor(Type elemType) { unsigned bitWidth = elemType.getIntOrFloatBitWidth(); - if (bitWidth %8 != 0) return failure(); + if (bitWidth % 8 != 0) return failure(); if (bitWidth > 64) return failure(); return 64 / bitWidth; } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.h index 84c935796..cab6e4e1e 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIEUtils.h @@ -9,11 +9,17 @@ #include +#include "iree-amd-aie/IR/AMDAIEAttrs.h" +#include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/IR/Types.h" namespace mlir::iree_compiler::AMDAIE { +/// Returns the target AMDAIE device. +std::optional getConfigAMDAIEDevice( + IREE::HAL::ExecutableTargetAttr targetAttr); + // This function is based on the following table pulled from the // AIEVec_MatMulOp documentation in // mlir-aie/include/aie/Dialect/AIEVec/IR/AIEVecOps.td diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir index d9bd51c10..2b214e363 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lower_to_aie.mlir @@ -1,15 +1,23 @@ // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-amdaie-lower-to-aie)" --verify-diagnostics %s | FileCheck %s -// CHECK: module +// expected-error @+1 {{No AMDAIEDevice found in the target attribute configuration}} module { } // ----- +// CHECK: module +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { +} + +// ----- + // CHECK: module // CHECK: aie.device // CHECK: func.func @empty_func -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @empty_func() { return } @@ -20,7 +28,8 @@ module { // CHECK: module // CHECK: aie.device // CHECK: func.func @workgroup -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @workgroup() { amdaie.workgroup { amdaie.controlcode { @@ -38,7 +47,8 @@ module { // CHECK-SAME: %{{.+}}: memref<1024x64xi32> // CHECK-SAME: %{{.+}}: memref<32x64xi32> // CHECK-NOT: memref.assume_alignment -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @hal_bindings() { %c0 = arith.constant 0 : index %0 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<1024x64xi32> @@ -67,7 +77,8 @@ module { // CHECK-SAME: @[[OBJ0]] // CHECK-SAME: @[[OBJ1]] // CHECK: func.func @circular_dma_cpy_nd_and_link -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @circular_dma_cpy_nd_and_link() { amdaie.workgroup { %c0 = arith.constant 0 : index @@ -112,7 +123,8 @@ module { // CHECK-SAME: @[[OBJ0]] // CHECK-SAME: @[[OBJ1]] // CHECK: func.func @circular_dma_cpy_sizes_and_strides -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @circular_dma_cpy_sizes_and_strides() { amdaie.workgroup { %c0 = arith.constant 0 : index @@ -161,7 +173,8 @@ module { // CHECK: %[[REINTERPRET:.+]] = memref.reinterpret_cast %[[ACCESS]] // CHECK: linalg.fill ins(%{{.+}} : i32) outs(%[[REINTERPRET]] : memref<32x32xi32, 1>) // CHECK: func.func @tile_and_core_and_acquire -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @tile_and_core_and_acquire() { amdaie.workgroup { %c0_i32 = arith.constant 0 : i32 @@ -217,7 +230,8 @@ module { // CHECK: aie.objectfifo.subview.access // CHECK-SAME: %[[ACQUIRE_1]] // CHECK: func.func @tile_and_core_and_acquire_broadcast -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @tile_and_core_and_acquire_broadcast() { amdaie.workgroup { %c0 = arith.constant 0 : index @@ -266,7 +280,8 @@ module { // CHECK: aie.core(%[[TILE_0_2]]) // CHECK: aie.objectfifo.release // CHECK: func.func @tile_and_core_and_release -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @tile_and_core_and_release() { amdaie.workgroup { %c0 = arith.constant 0 : index @@ -331,7 +346,8 @@ module { // CHECK-SAME: issue_token = true // CHECK-SAME: metadata = @[[OBJ2]] // CHECK-NEXT: aiex.npu.dma_wait {symbol = @[[OBJ2]]} -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @controlcode() { amdaie.workgroup { %c0 = arith.constant 0 : index @@ -377,7 +393,8 @@ module { // Test to demonstrate invalid implicit L3 memref type that has rank greater than that // expected for static offsets/sizes/strides. -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @controlcode_invalid_implicit_l3_memref() { amdaie.workgroup { %c0 = arith.constant 0 : index @@ -462,7 +479,8 @@ module { // CHECK-SAME: @[[OBJ0]] // CHECK-NEXT: aiex.npu.dma_wait // CHECK-SAME: @[[OBJ0]] -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @large_example() { amdaie.workgroup { %c0 = arith.constant 0 : index diff --git a/tests/samples/matmul_peeled_objectfifo.mlir b/tests/samples/matmul_peeled_objectfifo.mlir index 70fd8b66a..2d6be1d88 100644 --- a/tests/samples/matmul_peeled_objectfifo.mlir +++ b/tests/samples/matmul_peeled_objectfifo.mlir @@ -21,7 +21,8 @@ #map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d0, d3, d5)> #map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)> #map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d0, d3, d4)> -module { +#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> +module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { func.func @matmul_i32() { %c64 = arith.constant 64 : index %c960 = arith.constant 960 : index diff --git a/tests/samples/matmul_peeled_objectfifo_e2e.mlir b/tests/samples/matmul_peeled_objectfifo_e2e.mlir index 52d571073..9cafe77f5 100644 --- a/tests/samples/matmul_peeled_objectfifo_e2e.mlir +++ b/tests/samples/matmul_peeled_objectfifo_e2e.mlir @@ -1,4 +1,4 @@ -// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=objectFifo --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s +// RUN: iree-compile --iree-hal-target-backends=amd-aie --compile-to=executable-sources --iree-amdaie-target-device=npu1_4col %s | iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(iree-hal-translate-target-executable-variants{target=amd-aie})))" --iree-amdaie-lower-to-aie-pipeline=objectFifo --iree-amdaie-tile-pipeline=pack-peel --split-input-file | FileCheck %s // CHECK-LABEL: hal.executable.export public @matmul_i32_dispatch_0_matmul_128x128x256_i32 // CHECK-DAG: %[[TILE_0_2:.+]] = aie.tile(0, 2)