Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IREE Bump to Oct 1, 2024 #788

Merged
merged 4 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def generate_aie_vmfb(
f"--iree-amd-aie-vitis-install-dir={config.vitis_dir}",
f"--iree-hal-dump-executable-files-to={config.output_dir}",
"--iree-scheduling-optimize-bindings=false",
"--iree-hal-memoization=false",
"--iree-hal-indirect-command-buffers=false",
f"--mlir-disable-threading",
"--mlir-elide-resource-strings-if-larger=10",
]
Expand Down
4 changes: 4 additions & 0 deletions build_tools/ci/run_matmul_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,8 @@ function run_matmul_test() {
--iree-amd-aie-enable-chess=${use_chess} \
--iree-amdaie-enable-packet-flow=${enable_packet_flow} \
--iree-hal-dump-executable-files-to=$PWD \
--iree-hal-memoization=false \
--iree-hal-indirect-command-buffers=false \
--mlir-elide-resource-strings-if-larger=10 \
--iree-amd-aie-show-invoked-commands"

Expand All @@ -416,6 +418,8 @@ function run_matmul_test() {
set +e

echo "**** Generating matmul .vmfb file for ${name} ****"
${IREE_COMPILE_EXE} "${matmul_ir}" \
${compilation_flags} --compile-to=vm -o "${matmul_vmfb}.vm"
${IREE_COMPILE_EXE} "${matmul_ir}" \
${compilation_flags} -o "${matmul_vmfb}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ namespace {

/// Converts `scf.forall` into nested `scf.for` and then coalesce the `scf.for`
/// loops.
LogicalResult coreForallToFor(RewriterBase &rewriter,
AMDAIE::CoreOp coreOp) {
LogicalResult coreForallToFor(RewriterBase &rewriter, AMDAIE::CoreOp coreOp) {
WalkResult res = coreOp->walk([&](scf::ForallOp forallOp) {
SmallVector<Operation *> forOpResults;
if (failed(scf::forallToForLoop(rewriter, forallOp, &forOpResults))) {
Expand All @@ -55,12 +54,12 @@ class AMDAIEConvertCoreForallToForPass
AMDAIEConvertCoreForallToForPass> {
public:
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<scf::SCFDialect>();
registry.insert<scf::SCFDialect, affine::AffineDialect>();
}

AMDAIEConvertCoreForallToForPass() = default;
AMDAIEConvertCoreForallToForPass(
const AMDAIEConvertCoreForallToForPass &pass){};
const AMDAIEConvertCoreForallToForPass &pass) {};
void runOnOperation() override;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,13 +343,15 @@ void AMDAIETileAndFusePass::runOnOperation() {
if (isTilingReductionDimension(consumerOp, tileSizesVal)) {
tileAndFuseOptions.setFusionControlFn(
[&](tensor::ExtractSliceOp sliceOp, OpResult originalProducer,
bool isDestinationOperand) -> std::tuple<bool, bool> {
return {false, false};
bool isDestinationOperand)
-> std::optional<scf::SCFTileAndFuseOptions::ControlFnResult> {
return std::nullopt;
});
} else {
tileAndFuseOptions.setFusionControlFn(
[&](tensor::ExtractSliceOp sliceOp, OpResult originalProducer,
bool isDestinationOperand) -> std::tuple<bool, bool> {
bool isDestinationOperand)
-> std::optional<scf::SCFTileAndFuseOptions::ControlFnResult> {
bool fusableOp =
TypeSwitch<Operation *, bool>(originalProducer.getOwner())
// List ops that shouldnt be fused.
Expand All @@ -360,7 +362,8 @@ void AMDAIETileAndFusePass::runOnOperation() {
return op->getDialect() ==
context->getLoadedDialect<linalg::LinalgDialect>();
});
return {fusableOp, false};
if (!fusableOp) return std::nullopt;
return scf::SCFTileAndFuseOptions::ControlFnResult{false};
});
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
// RUN: iree-opt --pass-pipeline="builtin.module(iree-amdaie-convert-core-forall-to-for,canonicalize)" --split-input-file %s | FileCheck %s

// CHECK-LABEL: @test_single
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: amdaie.core
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK: scf.for %[[ARG0:.+]] = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-DAG: %[[REM:.+]] = arith.remsi %[[ARG0]], %[[C2]] : index
// CHECK-DAG: %[[DIV:.+]] = arith.divsi %[[ARG0]], %[[C2]] : index
// CHECK-DAG: func.call @callee(%[[DIV]], %[[REM]]) : (index, index) -> ()
// CHECK-DAG: %[[D:.+]]:2 = affine.delinearize_index %[[ARG0]] into (%[[C2]], %[[C2]]) : index, index
// CHECK-DAG: func.call @callee(%[[D]]#0, %[[D]]#1) : (index, index) -> ()
module @test_single {
func.func private @callee(%i: index, %j: index)
%c0 = arith.constant 0 : index
Expand All @@ -28,20 +27,18 @@ module @test_single {
// -----

// CHECK-LABEL: @test_multi
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK-DAG: amdaie.core
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: scf.for %[[ARG0:.+]] = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-DAG: %[[REM:.+]] = arith.remsi %[[ARG0]], %[[C2]] : index
// CHECK-DAG: %[[DIV:.+]] = arith.divsi %[[ARG0]], %[[C2]] : index
// CHECK-DAG: func.call @callee(%[[DIV]], %[[REM]]) : (index, index) -> ()
// CHECK-DAG: %[[D:.+]]:2 = affine.delinearize_index %[[ARG0]] into (%[[C2]], %[[C2]]) : index, index
// CHECK-DAG: func.call @callee(%[[D]]#0, %[[D]]#1) : (index, index) -> ()
// CHECK-DAG: scf.for %[[ARG1:.+]] = %[[C0]] to %[[C16]] step %[[C1]] {
// CHECK-DAG: %[[REM1:.+]] = arith.remsi %[[ARG1]], %[[C4]] : index
// CHECK-DAG: %[[DIV1:.+]] = arith.divsi %[[ARG1]], %[[C4]] : index
// CHECK-DAG: func.call @callee(%[[DIV1]], %[[REM1]]) : (index, index) -> ()
// CHECK-DAG: %[[D:.+]]:2 = affine.delinearize_index %[[ARG0]] into (%[[C4]], %[[C4]]) : index, index
// CHECK-DAG: func.call @callee(%[[D]]#0, %[[D]]#1) : (index, index) -> ()
module @test_multi {
func.func private @callee(%i: index, %j: index)
%c0 = arith.constant 0 : index
Expand All @@ -63,19 +60,17 @@ module @test_multi {
// -----

// CHECK-LABEL: @test_nested
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK-DAG: amdaie.core
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index
// CHECK-DAG: %[[C16:.+]] = arith.constant 16 : index
// CHECK: scf.for %[[ARG0:.+]] = %[[C0]] to %[[C16]] step %[[C1]] {
// CHECK-DAG: %[[REM0:.+]] = arith.remsi %[[ARG0]], %[[C4]] : index
// CHECK-DAG: %[[DIV0:.+]] = arith.divsi %[[ARG0]], %[[C4]] : index
// CHECK-DAG: %[[D1:.+]]:2 = affine.delinearize_index %[[ARG0]] into (%[[C4]], %[[C4]]) : index, index
// CHECK-DAG: scf.for %[[ARG1:.+]] = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-DAG: %[[REM1:.+]] = arith.remsi %[[ARG1]], %[[C2]] : index
// CHECK-DAG: %[[DIV1:.+]] = arith.divsi %[[ARG1]], %[[C2]] : index
// CHECK-DAG: func.call @callee(%[[DIV0]], %[[REM0]], %[[DIV1]], %[[REM1]]) : (index, index, index, index) -> ()
// CHECK-DAG: %[[D2:.+]]:2 = affine.delinearize_index %[[ARG1]] into (%[[C2]], %[[C2]]) : index, index
// CHECK-DAG: func.call @callee(%[[D1]]#0, %[[D1]]#1, %[[D2]]#0, %[[D2]]#1) : (index, index, index, index) -> ()
module @test_nested {
func.func private @callee(%i: index, %j: index, %k: index, %l: index)
%c0 = arith.constant 0 : index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,12 @@ module {
// CHECK: %[[SECOND_LOOP:.*]]:2 = scf.for %[[IV0:.*]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITER_ARG_1:.*]] = %[[FIRST_LOOP]], %[[ITER_ARG_3:.*]] = %[[UNPACK_OUT]])
// CHECK: {
// CHECK: %[[MATMUL:.*]] = linalg.generic
// CHECK: affine.apply
// CHECK: affine.apply
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV0]])
// CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: %[[TILED_UNPACK:.*]] = tensor.unpack %[[MATMUL]] outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %[[TILED_UNPACK_DEST]]
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV0]])
// CHECK: %[[YIELD_MATMUL:.*]] = tensor.insert_slice %[[MATMUL]] into %[[ITER_ARG_1]]
// CHECK: %[[YIELD_UNPACK:.*]] = tensor.insert_slice %[[TILED_UNPACK]] into %[[ITER_ARG_3]]
// CHECK: scf.yield %[[YIELD_MATMUL]], %[[YIELD_UNPACK]]
Expand Down Expand Up @@ -159,12 +159,12 @@ module {
// CHECK: arith.addi
// CHECK: }
// CHECK: %[[YIELD_MATMUL:.*]] = tensor.insert_slice %[[MATMUL]] into %[[ITER_ARG_1]]
// CHECK: affine.apply
// CHECK: affine.apply
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV0]])
// CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: %[[TILED_UNPACK:.*]] = tensor.unpack %[[FUSED_CONSUMER]] outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %[[TILED_UNPACK_DEST]]
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV0]])
// CHECK: %[[YIELD_ELEM:.*]] = tensor.insert_slice %[[FUSED_CONSUMER]] into %[[ITER_ARG_2]]
// CHECK: %[[YIELD_UNPACK:.*]] = tensor.insert_slice %[[TILED_UNPACK]] into %[[ITER_ARG_3]]
// CHECK: scf.yield %[[YIELD_MATMUL]], %[[YIELD_ELEM]], %[[YIELD_UNPACK]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ module {
// CHECK: %[[SECOND_LOOP:.*]]:2 = scf.forall (%[[IV0:.*]], %[[IV1:.*]]) in (2, 2) shared_outs(%[[ITER_ARG_1:.*]] = %[[FIRST_LOOP]], %[[ITER_ARG_3:.*]] = %[[UNPACK_OUT]])
// CHECK: {
// CHECK: %[[MATMUL:.*]] = linalg.generic
// CHECK: affine.apply
// CHECK: affine.apply
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV1]])
// CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: %[[TILED_UNPACK:.*]] = tensor.unpack %[[MATMUL]] outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %[[TILED_UNPACK_DEST]]
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV1]])
// CHECK: scf.forall.in_parallel {
// CHECK: tensor.parallel_insert_slice %[[TILED_UNPACK]] into %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: tensor.parallel_insert_slice %[[MATMUL]] into %[[ITER_ARG_1]][0, 0, %[[IV1]], %[[IV0]], 0, 0] [1, 1, 4, 8, 4, 8] [1, 1, 1, 1, 1, 1]
// CHECK: tensor.parallel_insert_slice %[[TILED_UNPACK]] into %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: }
// CHECK: }
// CHECK: %[[SECOND_UNPACK:.*]] = tensor.unpack %[[SECOND_LOOP]]#1 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %[[SECOND_UNPACK_OUT]] :
Expand Down Expand Up @@ -153,16 +153,16 @@ module {
// CHECK: {
// CHECK: arith.addi
// CHECK: }
// CHECK: affine.apply
// CHECK: affine.apply
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV1]])
// CHECK: %[[TILED_UNPACK_DEST:.*]] = tensor.extract_slice %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: %[[TILED_UNPACK:.*]] = tensor.unpack %[[FUSED_CONSUMER]] outer_dims_perm = [0, 1, 3, 2] inner_dims_pos = [2, 3] inner_tiles = [4, 8] into %[[TILED_UNPACK_DEST]]
// CHECK: %[[iv0:.*]] = affine.apply #[[UNPACK_RESULT_MAP0]](%[[IV0]])
// CHECK: %[[iv1:.*]] = affine.apply #[[UNPACK_RESULT_MAP1]](%[[IV1]])
// CHECK: scf.forall.in_parallel {
// CHECK: tensor.parallel_insert_slice %[[TILED_UNPACK]] into %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: tensor.parallel_insert_slice %[[FUSED_CONSUMER]] into %[[ITER_ARG_2]][0, 0, %[[IV1]], %[[IV0]], 0, 0] [1, 1, 4, 8, 4, 8] [1, 1, 1, 1, 1, 1]
// CHECK: tensor.parallel_insert_slice %[[MATMUL]] into %[[ITER_ARG_1]][0, 0, %[[IV1]], %[[IV0]], 0, 0] [1, 1, 4, 8, 4, 8] [1, 1, 1, 1, 1, 1]
// CHECK: tensor.parallel_insert_slice %[[FUSED_CONSUMER]] into %[[ITER_ARG_2]][0, 0, %[[IV1]], %[[IV0]], 0, 0] [1, 1, 4, 8, 4, 8] [1, 1, 1, 1, 1, 1]
// CHECK: tensor.parallel_insert_slice %[[TILED_UNPACK]] into %[[ITER_ARG_3]][0, 0, %[[iv0]], %[[iv1]]] [1, 1, 32, 32] [1, 1, 1, 1]
// CHECK: }
// CHECK: }
// CHECK: %[[SECOND_UNPACK:.*]] = tensor.unpack %[[SECOND_LOOP]]#2 inner_dims_pos = [0, 1] inner_tiles = [64, 64] into %[[SECOND_UNPACK_OUT]] :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,9 @@ static iree_status_t iree_hal_xrt_direct_command_buffer_dispatch(
std::vector<xrt::bo> bos;
// TODO(max): do we need multiple descriptor sets ever for AIE?
uint32_t set = 0;
iree_hal_xrt_direct_command_buffer_push_descriptor_set(
base_command_buffer, set, bindings.count, bindings.values);
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_xrt_direct_command_buffer_push_descriptor_set(
base_command_buffer, set, bindings.count, bindings.values));
for (iree_host_size_t j = 0; j < bindings.count; ++j) {
xrt::bo arg_buffer =
xrt::bo(*command_buffer->descriptor_sets[set].bindings[j],
Expand Down
12 changes: 10 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from iree.compiler import ir
from iree.compiler._mlir_libs import get_dialect_registry
from iree.compiler.api import Session, Output, Source
from iree.compiler.api import Session, Output, Source, _initializeGlobalCL
from iree.compiler.extras import types as T
from iree.runtime import VmModule
from iree.runtime import get_driver, Config, SystemContext
Expand Down Expand Up @@ -49,8 +49,16 @@ def pytest_addoption(parser):
parser.addoption("--iree-aie-debug", action="store_true")


@pytest.fixture(scope="session")
def global_cl_args(request):
_initializeGlobalCL(
"--iree-hal-memoization=false",
"--iree-hal-indirect-command-buffers=false",
)


@pytest.fixture
def iree_session(request, pytestconfig) -> Session:
def iree_session(request, pytestconfig, global_cl_args) -> Session:
s = Session()
s.context.append_dialect_registry(get_dialect_registry())
s.context.load_all_available_dialects()
Expand Down
7 changes: 2 additions & 5 deletions tests/test_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,5 @@ def test_matmul(
arg0 = np.random.randint(-1, 3, (M, K), dtype=lhs_rhs_type)
arg1 = np.random.randint(-1, 3, (K, N), dtype=lhs_rhs_type)
with invokable_module(session, module, device) as module:
for i in range(num_repeat_runs):
results = module[matmul_name](arg0, arg1).to_host()
assert np.array_equal(
results, (arg0.astype(acc_type) @ arg1.astype(acc_type))
)
results = module[matmul_name](arg0, arg1).to_host()
assert np.array_equal(results, (arg0.astype(acc_type) @ arg1.astype(acc_type)))
2 changes: 1 addition & 1 deletion third_party/iree
Submodule iree updated 344 files
Loading