From 05ce39f3fba4b5cc6eee18a431f8f8e16fa9b5d2 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Mon, 16 Dec 2024 02:12:26 -0800 Subject: [PATCH] [DT] Unify encoding materialization pass into a single pass. (#19454) The revision creates a generic materialization pass and uses it for backends that implement data-tiling. After months of development, we identify that the needs of GPU is a superset of the needs of CPU. To be more specific, it has the additional "swizzle" field in terms of layout. It means that the GPU set_encoding/unset_encoding lowering patterns cover the needs of CPU path. The lowering of contraction ops is different. CPU lowers it to mmt4d op, while GPU lowers it to multi_mma op. However, the lowering of contraction is implemented through attribute interface. Thus, we can have a generic pattern to lower contraction ops. To make the review process much easier, the revision is created by 5 commits. 1. It directly creates the MaterializeEncoding pass and copy-paste the GPU patterns: SetEncodingOpLoweringConversion, UnSetEncodingOpLoweringConversion, and MaterializeContractionOp. In the first commit, it also updates the GPU tests to use the new pass. 2. The GPU data-tiling does not support element-wise generic op lowering atm. The second commit moves the pattern to shared pattern set and bail out when swizzle is present. This is an NFC for both pipelines. 3. The third commit replaces the existing materialization pass with the generic pass, and deletes all the legacy passes. 4. The four commit moves the lit tests from `Common/[CPU|GPU]/test` to `Common/test`. 5. Now there are duplicate patterns for set_encoding, unset_encoding, and contraction ops lowering. The last commit deletes the legacy patterns, and move the patterns from MaterializeEncoding.cpp to where the legacy patterns locate. Furthermore, it renames the file as `MaterializeEncodingPatterns.cpp`. The revision retains the MaterializeEncodingIntoNop pass, and add a TODO item. Because it is still used by MaterializeHomogeneousEncoding pass. It can be deleted once we deprecate the early materialization path. --------- Signed-off-by: hanhanW --- .../iree/compiler/Codegen/Common/BUILD.bazel | 7 +- .../compiler/Codegen/Common/CMakeLists.txt | 7 +- .../compiler/Codegen/Common/CPU/BUILD.bazel | 4 - .../Codegen/Common/CPU/CMakeLists.txt | 4 - .../compiler/Codegen/Common/CPU/Passes.td | 20 - .../Codegen/Common/CPU/test/BUILD.bazel | 2 - .../Codegen/Common/CPU/test/CMakeLists.txt | 2 - .../compiler/Codegen/Common/EncodingUtils.h | 14 +- .../compiler/Codegen/Common/GPU/BUILD.bazel | 4 - .../Codegen/Common/GPU/CMakeLists.txt | 4 - .../Common/GPU/GPUMaterializeEncoding.cpp | 398 ------------------ .../compiler/Codegen/Common/GPU/Passes.td | 10 - .../Codegen/Common/GPU/test/BUILD.bazel | 4 - .../Codegen/Common/GPU/test/CMakeLists.txt | 4 - ...eEncodings.cpp => MaterializeEncoding.cpp} | 131 +++--- .../Common/MaterializeEncodingIntoNop.cpp | 8 +- ...ck.cpp => MaterializeEncodingPatterns.cpp} | 238 ++++++++--- .../iree/compiler/Codegen/Common/Passes.td | 15 + .../compiler/Codegen/Common/test/BUILD.bazel | 10 +- .../Codegen/Common/test/CMakeLists.txt | 6 + .../gpu_materialize_encoding_gfx1100.mlir | 2 +- .../test/gpu_materialize_encoding_gfx908.mlir | 2 +- .../test/gpu_materialize_encoding_gfx90a.mlir | 2 +- .../test/gpu_materialize_encoding_gfx942.mlir | 2 +- .../test/llvmcpu_materialize_encoding.mlir | 106 ++--- .../test/vmvx_materialize_encoding.mlir | 2 +- .../iree/compiler/Codegen/LLVMCPU/Passes.cpp | 2 +- .../src/iree/compiler/Codegen/Utils/Utils.cpp | 4 + .../src/iree/compiler/Codegen/Utils/Utils.h | 3 +- .../Dialect/VMVX/Transforms/Passes.cpp | 2 +- .../compiler/GlobalOptimization/BUILD.bazel | 2 - .../GlobalOptimization/CMakeLists.txt | 2 - .../MaterializeHomogeneousEncodings.cpp | 6 +- 33 files changed, 357 insertions(+), 672 deletions(-) delete mode 100644 compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp rename compiler/src/iree/compiler/Codegen/Common/{CPU/CPUMaterializeEncodings.cpp => MaterializeEncoding.cpp} (64%) rename compiler/src/iree/compiler/Codegen/Common/{MaterializeEncodingIntoPackUnPack.cpp => MaterializeEncodingPatterns.cpp} (85%) rename compiler/src/iree/compiler/Codegen/Common/{GPU => }/test/gpu_materialize_encoding_gfx1100.mlir (98%) rename compiler/src/iree/compiler/Codegen/Common/{GPU => }/test/gpu_materialize_encoding_gfx908.mlir (98%) rename compiler/src/iree/compiler/Codegen/Common/{GPU => }/test/gpu_materialize_encoding_gfx90a.mlir (99%) rename compiler/src/iree/compiler/Codegen/Common/{GPU => }/test/gpu_materialize_encoding_gfx942.mlir (99%) rename compiler/src/iree/compiler/Codegen/Common/{CPU => }/test/llvmcpu_materialize_encoding.mlir (97%) rename compiler/src/iree/compiler/Codegen/Common/{CPU => }/test/vmvx_materialize_encoding.mlir (99%) diff --git a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel index e3513ba69d29..f95b0fa81551 100644 --- a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel @@ -125,8 +125,9 @@ iree_compiler_cc_library( "LinkTuningSpecsPass.cpp", "LowerExecutableUsingTransformDialect.cpp", "LowerUKernelsToCalls.cpp", + "MaterializeEncoding.cpp", "MaterializeEncodingIntoNop.cpp", - "MaterializeEncodingIntoPackUnPack.cpp", + "MaterializeEncodingPatterns.cpp", "MaterializeTuningSpecsPass.cpp", "MemrefCopyToLinalg.cpp", "NormalizeLoopBounds.cpp", @@ -173,8 +174,10 @@ iree_compiler_cc_library( ":PassHeaders", ":PassesIncGen", "//compiler/src/iree/compiler/Codegen/Common:FoldTensorExtractOpIncGen", + "//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect", "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect", "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils", + "//compiler/src/iree/compiler/Codegen/Dialect/GPU/IR:IREEGPUDialect", "//compiler/src/iree/compiler/Codegen/Dialect/VectorExt/IR:IREEVectorExtDialect", "//compiler/src/iree/compiler/Codegen/Interfaces:BufferizationInterfaces", "//compiler/src/iree/compiler/Codegen/Interfaces:PartitionableLoopsInterface", @@ -183,9 +186,11 @@ iree_compiler_cc_library( "//compiler/src/iree/compiler/Codegen/Utils", "//compiler/src/iree/compiler/Dialect/Encoding/IR", "//compiler/src/iree/compiler/Dialect/Flow/IR", + "//compiler/src/iree/compiler/Dialect/HAL/Analysis", "//compiler/src/iree/compiler/Dialect/HAL/IR", "//compiler/src/iree/compiler/Dialect/LinalgExt/IR", "//compiler/src/iree/compiler/Dialect/LinalgExt/Transforms", + "//compiler/src/iree/compiler/Dialect/Stream/Analysis", "//compiler/src/iree/compiler/Dialect/Util/Analysis", "//compiler/src/iree/compiler/Dialect/Util/IR", "//compiler/src/iree/compiler/Utils", diff --git a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt index adec8aad7583..af3c55725838 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt @@ -117,8 +117,9 @@ iree_cc_library( "LinkTuningSpecsPass.cpp" "LowerExecutableUsingTransformDialect.cpp" "LowerUKernelsToCalls.cpp" + "MaterializeEncoding.cpp" "MaterializeEncodingIntoNop.cpp" - "MaterializeEncodingIntoPackUnPack.cpp" + "MaterializeEncodingPatterns.cpp" "MaterializeTuningSpecsPass.cpp" "MemrefCopyToLinalg.cpp" "NormalizeLoopBounds.cpp" @@ -203,8 +204,10 @@ iree_cc_library( MLIRVectorTransforms MLIRViewLikeInterface iree::compiler::Codegen::Common::FoldTensorExtractOpIncGen + iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect iree::compiler::Codegen::Dialect::Codegen::Utils + iree::compiler::Codegen::Dialect::GPU::IR::IREEGPUDialect iree::compiler::Codegen::Dialect::VectorExt::IR::IREEVectorExtDialect iree::compiler::Codegen::Interfaces::BufferizationInterfaces iree::compiler::Codegen::Interfaces::PartitionableLoopsInterface @@ -213,9 +216,11 @@ iree_cc_library( iree::compiler::Codegen::Utils iree::compiler::Dialect::Encoding::IR iree::compiler::Dialect::Flow::IR + iree::compiler::Dialect::HAL::Analysis iree::compiler::Dialect::HAL::IR iree::compiler::Dialect::LinalgExt::IR iree::compiler::Dialect::LinalgExt::Transforms + iree::compiler::Dialect::Stream::Analysis iree::compiler::Dialect::Util::Analysis iree::compiler::Dialect::Util::IR iree::compiler::Utils diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/CPU/BUILD.bazel index f1053da29240..05fb9bed4203 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/BUILD.bazel @@ -45,7 +45,6 @@ iree_compiler_cc_library( name = "CommonCPUPasses", srcs = [ "CPULowerToUKernels.cpp", - "CPUMaterializeEncodings.cpp", "CPUPrepareUkernels.cpp", "Passes.cpp", ], @@ -56,16 +55,13 @@ iree_compiler_cc_library( ":PassHeaders", ":PassesIncGen", "//compiler/src/iree/compiler/Codegen/Common", - "//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect", "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect", "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils", "//compiler/src/iree/compiler/Codegen/Interfaces:UKernelOpInterface", "//compiler/src/iree/compiler/Codegen/Transforms", "//compiler/src/iree/compiler/Codegen/Utils", "//compiler/src/iree/compiler/Dialect/Encoding/IR", - "//compiler/src/iree/compiler/Dialect/HAL/Analysis", "//compiler/src/iree/compiler/Dialect/HAL/IR", - "//compiler/src/iree/compiler/Dialect/Stream/Analysis", "//runtime/src/iree/builtins/ukernel:exported_bits", "@llvm-project//llvm:Support", "@llvm-project//mlir:AffineDialect", diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CPU/CMakeLists.txt index 75db95e43291..419c4b0878c9 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CMakeLists.txt @@ -42,7 +42,6 @@ iree_cc_library( "Passes.h" SRCS "CPULowerToUKernels.cpp" - "CPUMaterializeEncodings.cpp" "CPUPrepareUkernels.cpp" "Passes.cpp" DEPS @@ -78,16 +77,13 @@ iree_cc_library( MLIRVectorTransforms iree::builtins::ukernel::exported_bits iree::compiler::Codegen::Common - iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect iree::compiler::Codegen::Dialect::Codegen::Utils iree::compiler::Codegen::Interfaces::UKernelOpInterface iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils iree::compiler::Dialect::Encoding::IR - iree::compiler::Dialect::HAL::Analysis iree::compiler::Dialect::HAL::IR - iree::compiler::Dialect::Stream::Analysis PUBLIC ) diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td index 8c73c5bca4a9..394de5414ea1 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/Passes.td @@ -13,26 +13,6 @@ include "mlir/Pass/PassBase.td" // Common Passes used for CPU-like backends (keep alphabetical) //===---------------------------------------------------------------------===// -def CPUMaterializeHostEncodingPass : - Pass<"iree-codegen-cpu-materialize-host-encoding", "mlir::ModuleOp"> { - let summary = "Convert encoding-specific operations based on target attributes."; - let description = [{ - Examples: - encoding.set_encoding -> tensor.pack - encoding.unset_encoding -> tensor.unpack - linalg.matmul -> linalg.mmt4d "}]; -} - -def CPUMaterializeDeviceEncodingPass : - InterfacePass<"iree-codegen-cpu-materialize-device-encoding", "mlir::FunctionOpInterface"> { - let summary = "Convert encoding-specific operations based on target attributes."; - let description = [{ - Examples: - encoding.set_encoding -> tensor.pack - encoding.unset_encoding -> tensor.unpack - linalg.matmul -> linalg.mmt4d "}]; -} - def CPULowerToUKernelsPass : Pass<"iree-codegen-cpu-lower-to-ukernels", ""> { let summary = diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/CPU/test/BUILD.bazel index b2d6b916f713..fe5caa3434e2 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/BUILD.bazel @@ -19,10 +19,8 @@ iree_lit_test_suite( srcs = enforce_glob( # keep sorted [ - "llvmcpu_materialize_encoding.mlir", "lower_to_ukernel_ops.mlir", "prepare_ukernels.mlir", - "vmvx_materialize_encoding.mlir", ], include = ["*.mlir"], ), diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CPU/test/CMakeLists.txt index 3dd9de7f98cc..100058fea35a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/test/CMakeLists.txt @@ -14,10 +14,8 @@ iree_lit_test_suite( NAME lit SRCS - "llvmcpu_materialize_encoding.mlir" "lower_to_ukernel_ops.mlir" "prepare_ukernels.mlir" - "vmvx_materialize_encoding.mlir" TOOLS FileCheck iree-opt diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h index da9aa8a41731..bf188b66cf54 100644 --- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h +++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h @@ -93,17 +93,9 @@ FailureOr lowerUnsetEncodingToUnpackOp( Value packedValue, const MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn); -/// Pouplates the set of patterns that lowers set_encoding, unset_encoding, and -/// upstream dialect ops with encoding types to pack/unpack ops. -void populateMaterializeEncodingIntoPackUnPackPatterns( - RewritePatternSet &patterns, - MaterializeEncodingTypeConverter &typeConverter, - MaterializeEncodingValueFn materializeEncodingValueFn); - -/// Pouplates the set of patterns that lowers shape-like operations (e.g., Flow -/// ops, Hal ops, tensor.empty, linalg.fill, etc) with encoding types to the -/// same op with materialized shapes. -void populateShapeIndependentMaterializeEncodingPatterns( +/// Pouplates the set of patterns that lowers operations with encoding types to +/// operations without encodings. +void populateMaterializeEncodingPatterns( RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target, MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel index 128ffa9fc46e..66177778f683 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/BUILD.bazel @@ -65,7 +65,6 @@ iree_compiler_cc_library( "GPUGreedilyDistributeToThreads.cpp", "GPUInferMemorySpace.cpp", "GPULowerToUKernels.cpp", - "GPUMaterializeEncoding.cpp", "GPUMultiBuffering.cpp", "GPUNestedLayoutDistributionPatterns.cpp", "GPUPackToIntrinsics.cpp", @@ -107,10 +106,7 @@ iree_compiler_cc_library( "//compiler/src/iree/compiler/Codegen/Transforms", "//compiler/src/iree/compiler/Codegen/Utils", "//compiler/src/iree/compiler/Codegen/Utils:VectorOpUtils", - "//compiler/src/iree/compiler/Dialect/Encoding/IR", - "//compiler/src/iree/compiler/Dialect/HAL/Analysis", "//compiler/src/iree/compiler/Dialect/HAL/IR", - "//compiler/src/iree/compiler/Dialect/Stream/Analysis", "//compiler/src/iree/compiler/Utils", "@llvm-project//llvm:Support", "@llvm-project//mlir:AMDGPUDialect", diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt index 97d324042e2c..2f065df2bb52 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/CMakeLists.txt @@ -63,7 +63,6 @@ iree_cc_library( "GPUGreedilyDistributeToThreads.cpp" "GPUInferMemorySpace.cpp" "GPULowerToUKernels.cpp" - "GPUMaterializeEncoding.cpp" "GPUMultiBuffering.cpp" "GPUNestedLayoutDistributionPatterns.cpp" "GPUPackToIntrinsics.cpp" @@ -140,10 +139,7 @@ iree_cc_library( iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils iree::compiler::Codegen::Utils::VectorOpUtils - iree::compiler::Dialect::Encoding::IR - iree::compiler::Dialect::HAL::Analysis iree::compiler::Dialect::HAL::IR - iree::compiler::Dialect::Stream::Analysis iree::compiler::Utils PUBLIC ) diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp deleted file mode 100644 index 32536085576b..000000000000 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp +++ /dev/null @@ -1,398 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "iree/compiler/Codegen/Common/EncodingUtils.h" -#include "iree/compiler/Codegen/Common/GPU/Passes.h" -#include "iree/compiler/Codegen/Dialect/Codegen/Utils/Utils.h" -#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" -#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" -#include "iree/compiler/Codegen/Utils/GPUUtils.h" -#include "iree/compiler/Dialect/Encoding/IR/EncodingDialect.h" -#include "iree/compiler/Dialect/Encoding/IR/EncodingOps.h" -#include "iree/compiler/Dialect/HAL/Analysis/DeviceAnalysis.h" -#include "iree/compiler/Dialect/HAL/IR/HALTypes.h" -#include "iree/compiler/Dialect/Stream/Analysis/Affinity.h" -#include "llvm/ADT/SmallVector.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/MemRef/Transforms/Transforms.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Tensor/Transforms/Transforms.h" -#include "mlir/Dialect/Utils/IndexingUtils.h" -#include "mlir/Dialect/Utils/ReshapeOpsUtils.h" -#include "mlir/IR/BuiltinTypes.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" - -#define DEBUG_TYPE "iree-codegen-gpu-materialize-encoding" - -namespace mlir::iree_compiler { - -#define GEN_PASS_DEF_GPUMATERIALIZEDEVICEENCODINGPASS -#define GEN_PASS_DEF_GPUMATERIALIZEHOSTENCODINGPASS -#include "iree/compiler/Codegen/Common/GPU/Passes.h.inc" - -using IREE::Codegen::MaterializeEncodingInfo; -using IREE::Codegen::TileSwizzle; - -namespace { - -// TODO(hanchung): Delete this pass and rely on tensor-based analysis to -// materialize encodings based on where tensors are used. This pass is not able -// to handle that. -struct GPUMaterializeHostEncodingPass - : public impl::GPUMaterializeHostEncodingPassBase< - GPUMaterializeHostEncodingPass> { - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnOperation() override; -}; - -struct GPUMaterializeDeviceEncodingPass final - : impl::GPUMaterializeDeviceEncodingPassBase< - GPUMaterializeDeviceEncodingPass> { - using GPUMaterializeDeviceEncodingPassBase:: - GPUMaterializeDeviceEncodingPassBase; - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - void runOnOperation() override; -}; - -SmallVector -getReassociationIndices(int outerDims, - const TileSwizzle::ExpandShapeType &expandShape) { - SmallVector result; - int expandedIdx = 0; - for (int i = 0; i < outerDims; ++i) { - result.push_back({expandedIdx++}); - } - for (auto expandShapeDim : expandShape) { - result.push_back({}); - for (int i = 0, e = expandShapeDim.size(); i < e; ++i) { - result.back().push_back(expandedIdx++); - } - } - return result; -} - -/// Convert iree_linalg_ext.set_encoding op to pack + tile swizzling ops. We use -/// expand_shape + linalg.transpose to represent a tile swizzling op. -struct GPUSetEncodingOpLoweringConversion - : public OpMaterializeEncodingPattern { - using OpMaterializeEncodingPattern< - IREE::Encoding::SetEncodingOp>::OpMaterializeEncodingPattern; - - LogicalResult - matchAndRewrite(IREE::Encoding::SetEncodingOp encodingOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto converter = static_cast( - getTypeConverter()); - auto packedValue = lowerSetEncodingOpToPackOp( - rewriter, encodingOp, adaptor.getSource(), *converter, - this->materializeEncodingValueFn); - if (failed(packedValue)) { - Type targetType = - getTypeConverter()->convertType(encodingOp.getResultType()); - Value result = rewriter.createOrFold( - encodingOp.getLoc(), targetType, adaptor.getSource()); - rewriter.replaceOp(encodingOp, result); - return success(); - } - - MaterializeEncodingInfo encodingInfo = - converter->getEncodingInfo(encodingOp.getResultType()); - if (!encodingInfo.swizzle) { - rewriter.replaceOp(encodingOp, packedValue.value()); - return success(); - } - - Location loc = encodingOp.getLoc(); - - // Create expand_shape op to tile the innermost two dimensions. - int origRank = encodingOp.getSourceType().getRank(); - SmallVector expandShapeShape( - cast(packedValue->getType()) - .getShape() - .take_front(origRank)); - expandShapeShape.append( - getExpandedTileShape(encodingInfo.swizzle->expandShape)); - RankedTensorType expandShapeType = - encodingOp.getSourceType().clone(expandShapeShape); - - SmallVector reassociation = - getReassociationIndices(origRank, encodingInfo.swizzle->expandShape); - auto expandShapeOp = rewriter.create( - loc, expandShapeType, packedValue.value(), reassociation); - - SmallVector transposePerm = - llvm::to_vector(llvm::seq(0, origRank)); - for (auto perm : encodingInfo.swizzle->permutation) { - transposePerm.push_back(origRank + perm); - } - SmallVector transposeResultDims = - tensor::getMixedSizes(rewriter, loc, expandShapeOp.getResult()); - applyPermutationToVector(transposeResultDims, transposePerm); - - auto emptyTensor = rewriter.create( - loc, transposeResultDims, encodingOp.getSourceType().getElementType()); - auto transposeOp = rewriter.create( - loc, expandShapeOp, emptyTensor, transposePerm); - rewriter.replaceOp(encodingOp, transposeOp->getResult(0)); - - return success(); - } -}; - -struct GPUUnsetEncodingOpLoweringConversion - : public OpMaterializeEncodingPattern { - using OpMaterializeEncodingPattern< - IREE::Encoding::UnsetEncodingOp>::OpMaterializeEncodingPattern; - - LogicalResult - matchAndRewrite(IREE::Encoding::UnsetEncodingOp unsetEncodingOp, - OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto converter = static_cast( - getTypeConverter()); - - MaterializeEncodingInfo encodingInfo = - converter->getEncodingInfo(unsetEncodingOp.getSource().getType()); - if (IREE::Codegen::isIdentityLayout(encodingInfo)) { - Type targetType = - getTypeConverter()->convertType(unsetEncodingOp.getSourceType()); - Value result = rewriter.createOrFold( - unsetEncodingOp.getLoc(), targetType, adaptor.getSource()); - rewriter.replaceOp(unsetEncodingOp, result); - return success(); - } - - Location loc = unsetEncodingOp.getLoc(); - Value unpackSrc = adaptor.getSource(); - if (encodingInfo.swizzle) { - int targetRank = unsetEncodingOp.getResultType().getRank(); - auto srcConvertedType = - cast(adaptor.getSource().getType()); - SmallVector emptyShape = - tensor::getMixedSizes(rewriter, loc, adaptor.getSource()); - emptyShape.resize(targetRank); - for (auto i : getExpandedTileShape(encodingInfo.swizzle->expandShape)) { - emptyShape.push_back(rewriter.getIndexAttr(i)); - } - auto emptyTensor = rewriter.create( - loc, emptyShape, unsetEncodingOp.getSourceType().getElementType()); - - SmallVector transposePerm = - llvm::to_vector(llvm::seq(0, targetRank)); - for (auto perm : encodingInfo.swizzle->permutation) { - transposePerm.push_back(targetRank + perm); - } - auto invertedTransposePerm = invertPermutationVector(transposePerm); - auto transposeOp = rewriter.create( - loc, adaptor.getSource(), emptyTensor, invertedTransposePerm); - - SmallVector reassociation = getReassociationIndices( - targetRank, encodingInfo.swizzle->expandShape); - SmallVector unpackSrcShape( - srcConvertedType.getShape().take_front(targetRank)); - unpackSrcShape.append(encodingInfo.innerTileSizes.begin(), - encodingInfo.innerTileSizes.end()); - RankedTensorType unpackSrcType = - unsetEncodingOp.getResultType().clone(unpackSrcShape); - unpackSrc = rewriter.create( - loc, unpackSrcType, transposeOp->getResult(0), reassociation); - } - - auto unpackedValue = lowerUnsetEncodingToUnpackOp( - rewriter, unsetEncodingOp, unpackSrc, *converter, - this->materializeEncodingValueFn); - if (failed(unpackedValue)) { - Type targetType = - getTypeConverter()->convertType(unsetEncodingOp.getResultType()); - Value result = rewriter.createOrFold(loc, targetType, - adaptor.getSource()); - rewriter.replaceOp(unsetEncodingOp, result); - return success(); - } - rewriter.replaceOp(unsetEncodingOp, unpackedValue.value()); - return success(); - } -}; - -class GPUConvertToMultiMma final - : public OpInterfaceConversionPattern { -public: - using OpInterfaceConversionPattern< - linalg::ContractionOpInterface>::OpInterfaceConversionPattern; - - GPUConvertToMultiMma( - MLIRContext *context, - const MaterializeEncodingTypeConverter &typeConverter, - MaterializeEncodingValueFn materializeEncodingValueFn = {}, - PatternBenefit benefit = 1) - : OpInterfaceConversionPattern( - typeConverter, context, benefit), - materializeEncodingValueFn(materializeEncodingValueFn) {} - - LogicalResult - matchAndRewrite(linalg::ContractionOpInterface op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - auto converter = static_cast( - this->getTypeConverter()); - auto layoutAttr = converter->getLayoutAttr(); - assert(layoutAttr && "layoutAttr is not set, which is not expected. Are " - "you adding new arch support?"); - SmallVector convertedResTypes; - auto linalgOp = cast(op.getOperation()); - for (auto init : linalgOp.getDpsInits()) { - convertedResTypes.push_back(converter->convertType(init.getType())); - } - Operation *newOp = - layoutAttr.lowerOp(rewriter, op, convertedResTypes, operands); - rewriter.replaceOp(op, newOp->getResults()); - return success(); - } - -protected: - const MaterializeEncodingValueFn materializeEncodingValueFn; -}; - -static LogicalResult -materializeFuncOpEncodings(FunctionOpInterface funcOp, - IREE::HAL::ExecutableTargetAttr targetAttr) { - MLIRContext *ctx = funcOp.getContext(); - { - RewritePatternSet patterns(ctx); - IREE::GPU::TargetAttr gpuTargetAttr; - if (targetAttr) { - gpuTargetAttr = getGPUTargetAttr(targetAttr); - } else { - gpuTargetAttr = getCLGPUTarget(ctx); - } - MaterializeEncodingTypeConverter typeConverter( - cast( - IREE::GPU::GPUEncodingLayoutAttr::get(ctx, gpuTargetAttr))); - MaterializeEncodingConversionTarget target(*ctx); - MaterializeEncodingValueFn materializeEncodingValueFn = - [](RankedTensorType, OpBuilder, - Location) -> FailureOr { return {}; }; - populateShapeIndependentMaterializeEncodingPatterns( - patterns, target, typeConverter, materializeEncodingValueFn); - - patterns.insert( - ctx, typeConverter, materializeEncodingValueFn); - - memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns); - if (failed(applyPartialConversion(funcOp, target, std::move(patterns)))) { - funcOp.emitOpError("materialization failed"); - return failure(); - } - } - - // Add patterns to fold pack/unpack ops with pad/extract_slice ops and - // resolve dims ops. - { - RewritePatternSet patterns(ctx); - tensor::CastOp::getCanonicalizationPatterns(patterns, ctx); - tensor::populateFoldIntoPackAndUnpackPatterns(patterns); - memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns); - if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) { - funcOp.emitOpError("folding patterns failed"); - return failure(); - } - } - - return success(); -} - -static std::optional> -getFuncExecutableTargetAttrs(FunctionOpInterface funcOp, - IREE::Stream::AffinityAnalysis &affinityAnalysis, - IREE::HAL::DeviceAnalysis &deviceAnalysis) { - // Get a set of all unique affinities used by resources within the function. - SetVector uniqueAffinityAttrs; - SmallVector lookupAffinityAttrs; - funcOp.walk([&](Operation *op) { - if (affinityAnalysis.tryLookupExecutionAffinity(op, lookupAffinityAttrs)) { - uniqueAffinityAttrs.insert(lookupAffinityAttrs.begin(), - lookupAffinityAttrs.end()); - } - lookupAffinityAttrs.clear(); - }); - - // Resolve affinities to executable targets. - SetVector executableTargetAttrs; - for (auto affinityAttr : uniqueAffinityAttrs) { - deviceAnalysis.gatherRequiredExecutableTargets(affinityAttr, funcOp, - executableTargetAttrs); - } - return executableTargetAttrs; -} - -} // namespace - -void GPUMaterializeHostEncodingPass::runOnOperation() { - auto moduleOp = getOperation(); - - // Run required analysis passes. - IREE::Stream::AffinityAnalysis affinityAnalysis(moduleOp); - if (failed(affinityAnalysis.run())) { - return signalPassFailure(); - } - IREE::HAL::DeviceAnalysis deviceAnalysis(moduleOp); - if (failed(deviceAnalysis.run())) { - return signalPassFailure(); - } - - for (auto funcOp : moduleOp.getOps()) { - // Gather the required executable targets for the function. Note that it's - // possible there are more required for ops nested within the function but - // this pass is a hack and can't handle that :shrug:. - auto executableTargets = - getFuncExecutableTargetAttrs(funcOp, affinityAnalysis, deviceAnalysis); - if (!executableTargets) { - funcOp.emitOpError() - << "could not determine executable targets for the function"; - return signalPassFailure(); - } else if (executableTargets->empty()) { - // Probably no tensors. - continue; - } - - // HACK: this pass is run on the host _but shouldn't be_. Because it's - // run on the host and IREE is a compiler capable of multi-targeting there - // may be multiple executable targets at any point in the host program. - // This pass can't handle that and assumes it's been checked earlier by - // spooky action at a distance. This needs to be fixed. - if (executableTargets->size() != 1) { - funcOp.emitOpError() << "has multiple executable targets and CPU data " - "tiling isn't built to support that"; - return signalPassFailure(); - } - - // Materialize encodings within the function. - if (failed( - materializeFuncOpEncodings(funcOp, executableTargets->front()))) { - return signalPassFailure(); - } - } -} - -void GPUMaterializeDeviceEncodingPass::runOnOperation() { - FunctionOpInterface funcOp = getOperation(); - auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(funcOp); - if (failed(materializeFuncOpEncodings(funcOp, targetAttr))) { - return signalPassFailure(); - } -} - -} // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td index 2c25e02852f4..ff2b2b94f9b2 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td @@ -247,16 +247,6 @@ def GPUApplyTilingLevelPass : ]; } -def GPUMaterializeHostEncodingPass : - Pass<"iree-codegen-gpu-materialize-host-encoding", "mlir::ModuleOp"> { - let summary = "Materialize the encoding for tensor as specified by the backend."; -} - -def GPUMaterializeDeviceEncodingPass : - InterfacePass<"iree-codegen-gpu-materialize-device-encoding", "mlir::FunctionOpInterface"> { - let summary = "Materialize the encoding for tensor as specified by the backend."; -} - def GPUTensorTileToSerialLoopsPass : InterfacePass<"iree-codegen-gpu-tensor-tile-to-serial-loops", "mlir::FunctionOpInterface"> { let summary = "Pass to tile reduction dimensions for certain GPU ops"; diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel index 030e6f4de497..2f3b092d5676 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel @@ -32,10 +32,6 @@ iree_lit_test_suite( "gpu_infer_memory_space.mlir", "gpu_combine_value_barriers.mlir", "gpu_lower_to_ukernels.mlir", - "gpu_materialize_encoding_gfx908.mlir", - "gpu_materialize_encoding_gfx90a.mlir", - "gpu_materialize_encoding_gfx942.mlir", - "gpu_materialize_encoding_gfx1100.mlir", "gpu_nested_layout_contract_amdgpu.mlir", "gpu_nested_layout_vector_distribution.mlir", "gpu_nested_layout_vector_distribution_step.mlir", diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt index 6d1f540f420a..50be391693cc 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt @@ -27,10 +27,6 @@ iree_lit_test_suite( "gpu_greedily_distribute_to_threads.mlir" "gpu_infer_memory_space.mlir" "gpu_lower_to_ukernels.mlir" - "gpu_materialize_encoding_gfx1100.mlir" - "gpu_materialize_encoding_gfx908.mlir" - "gpu_materialize_encoding_gfx90a.mlir" - "gpu_materialize_encoding_gfx942.mlir" "gpu_nested_layout_contract_amdgpu.mlir" "gpu_nested_layout_vector_distribution.mlir" "gpu_nested_layout_vector_distribution_step.mlir" diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp similarity index 64% rename from compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp rename to compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp index d182649f64ac..f1776b90f74e 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp @@ -1,47 +1,46 @@ -// Copyright 2023 The IREE Authors +// Copyright 2024 The IREE Authors // // Licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/Common/CPU/Passes.h" #include "iree/compiler/Codegen/Common/EncodingUtils.h" +#include "iree/compiler/Codegen/Common/PassUtils.h" +#include "iree/compiler/Codegen/Common/Passes.h" #include "iree/compiler/Codegen/Dialect/CPU/IR/IREECPUDialect.h" #include "iree/compiler/Codegen/Dialect/CPU/IR/IREECPUTypes.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.h" -#include "iree/compiler/Codegen/Dialect/Codegen/Utils/Utils.h" +#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenTypes.h" +#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUAttrs.h" +#include "iree/compiler/Codegen/Dialect/GPU/IR/IREEGPUDialect.h" +#include "iree/compiler/Codegen/Utils/GPUUtils.h" #include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/Encoding/IR/EncodingOps.h" #include "iree/compiler/Dialect/HAL/Analysis/DeviceAnalysis.h" #include "iree/compiler/Dialect/HAL/IR/HALTypes.h" #include "iree/compiler/Dialect/Stream/Analysis/Affinity.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MathExtras.h" -#include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" #include "mlir/Dialect/MemRef/Transforms/Transforms.h" -#include "mlir/Dialect/Tensor/Transforms/Transforms.h" -#include "mlir/IR/BuiltinAttributes.h" -#include "mlir/IR/BuiltinTypeInterfaces.h" -#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" -#define DEBUG_TYPE "cpu-materialize-encoding" +#define DEBUG_TYPE "iree-codegen--materialize-encoding" #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ") #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") namespace mlir::iree_compiler { -using IREE::Codegen::MaterializeEncodingInfo; -using IREE::Codegen::TileMxNxK; +#define GEN_PASS_DEF_MATERIALIZEDEVICEENCODINGPASS +#define GEN_PASS_DEF_MATERIALIZEHOSTENCODINGPASS +#include "iree/compiler/Codegen/Common/Passes.h.inc" -#define GEN_PASS_DEF_CPUMATERIALIZEDEVICEENCODINGPASS -#define GEN_PASS_DEF_CPUMATERIALIZEHOSTENCODINGPASS -#include "iree/compiler/Codegen/Common/CPU/Passes.h.inc" +using namespace IREE::Encoding; + +namespace { static FailureOr chooseDynamicEncodingInfoVMVXMicrokernels(RankedTensorType tensorType, @@ -64,33 +63,46 @@ getMaterializeEncodingValueFn(IREE::HAL::ExecutableTargetAttr targetAttr) { static LogicalResult materializeFuncOpEncodings(FunctionOpInterface funcOp, - IREE::HAL::ExecutableTargetAttr targetAttr) { + IREE::HAL::ExecutableTargetAttr targetAttr, + bool testCLGPUTarget = false) { MLIRContext *ctx = funcOp.getContext(); - RewritePatternSet materializeEncodingPattern(ctx); - DictionaryAttr targetConfig = targetAttr.getConfiguration(); - IREE::Codegen::LayoutAttrInterface layoutAttr; - if (isVMVXBackend(targetAttr)) { - LDBG("Select VMVXEncodingLayoutAttr attribute as the layout attribute."); - layoutAttr = cast( - IREE::CPU::VMVXEncodingLayoutAttr::get(ctx, targetConfig)); - } else { - LDBG("Select CPUEncodingLayoutAttr attribute as the layout attribute."); - layoutAttr = cast( - IREE::CPU::CPUEncodingLayoutAttr::get(ctx, targetConfig)); - } - MaterializeEncodingTypeConverter typeConverter(layoutAttr); - MaterializeEncodingConversionTarget target(*ctx); - auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr); - populateMaterializeEncodingIntoPackUnPackPatterns( - materializeEncodingPattern, typeConverter, materializeEncodingValueFn); - populateShapeIndependentMaterializeEncodingPatterns( - materializeEncodingPattern, target, typeConverter, - materializeEncodingValueFn); - - if (failed(applyPartialConversion(funcOp, target, - std::move(materializeEncodingPattern)))) { - funcOp.emitOpError("materialization failed"); - return failure(); + { + RewritePatternSet patterns(ctx); + IREE::Codegen::LayoutAttrInterface layoutAttr; + if (isVMVXBackend(targetAttr)) { + LDBG("Select VMVXEncodingLayoutAttr attribute as the layout attribute."); + layoutAttr = cast( + IREE::CPU::VMVXEncodingLayoutAttr::get( + ctx, targetAttr.getConfiguration())); + } else if (isLLVMCPUBackend(targetAttr)) { + LDBG("Select CPUEncodingLayoutAttr attribute as the layout attribute."); + layoutAttr = cast( + IREE::CPU::CPUEncodingLayoutAttr::get(ctx, + targetAttr.getConfiguration())); + } else if (isROCMBackend(targetAttr)) { + LDBG("Select GPUEncodingLayoutAttr attribute as the layout attribute."); + layoutAttr = cast( + IREE::GPU::GPUEncodingLayoutAttr::get(ctx, + getGPUTargetAttr(targetAttr))); + } else if (testCLGPUTarget) { + LDBG("Select GPUEncodingLayoutAttr attribute as the layout attribute. " + "(testCLGPUTarget)"); + layoutAttr = cast( + IREE::GPU::GPUEncodingLayoutAttr::get(ctx, getCLGPUTarget(ctx))); + } else { + LDBG("Select EncodingNopLayoutAttr attribute as the layout attribute."); + layoutAttr = IREE::Codegen::EncodingNopLayoutAttr::get(ctx); + } + MaterializeEncodingTypeConverter typeConverter(layoutAttr); + MaterializeEncodingConversionTarget target(*ctx); + auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr); + populateMaterializeEncodingPatterns(patterns, target, typeConverter, + materializeEncodingValueFn); + + if (failed(applyPartialConversion(funcOp, target, std::move(patterns)))) { + funcOp.emitOpError("materialization failed"); + return failure(); + } } // Add patterns to fold pack/unpack ops with pad/extract_slice ops and @@ -138,13 +150,13 @@ getFuncExecutableTargetAttrs(FunctionOpInterface funcOp, return executableTargetAttrs; } -struct CPUMaterializeHostEncodingPass - : public impl::CPUMaterializeHostEncodingPassBase< - CPUMaterializeHostEncodingPass> { +struct MaterializeHostEncodingPass + : public impl::MaterializeHostEncodingPassBase< + MaterializeHostEncodingPass> { void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); + registry.insert(); } void runOnOperation() override { @@ -199,22 +211,27 @@ struct CPUMaterializeHostEncodingPass // that. It should _not_ be running on both - target-specific codegen passes // are not allowed on host programs and it's a big violation of layering that // this exists. -struct CPUMaterializeDeviceEncodingPass - : public impl::CPUMaterializeDeviceEncodingPassBase< - CPUMaterializeDeviceEncodingPass> { +struct MaterializeDeviceEncodingPass + : public impl::MaterializeDeviceEncodingPassBase< + MaterializeDeviceEncodingPass> { + using impl::MaterializeDeviceEncodingPassBase< + MaterializeDeviceEncodingPass>::MaterializeDeviceEncodingPassBase; + void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); + registry.insert(); } void runOnOperation() override { auto funcOp = getOperation(); auto executableTargetAttr = IREE::HAL::ExecutableTargetAttr::lookup(funcOp); - if (failed(materializeFuncOpEncodings(funcOp, executableTargetAttr))) { + if (failed(materializeFuncOpEncodings(funcOp, executableTargetAttr, + testCLGPUTarget))) { return signalPassFailure(); } } }; +} // namespace } // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp index 4de4b454478a..d93cb98014de 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp @@ -48,11 +48,9 @@ struct MaterializeEncodingIntoNopPass final MaterializeEncodingTypeConverter typeConverter( IREE::Codegen::EncodingNopLayoutAttr::get(context)); MaterializeEncodingConversionTarget target(*context); - populateMaterializeEncodingIntoPackUnPackPatterns( - materializeEncodingPattern, typeConverter, materializeEncodingValueFn); - populateShapeIndependentMaterializeEncodingPatterns( - materializeEncodingPattern, target, typeConverter, - materializeEncodingValueFn); + populateMaterializeEncodingPatterns(materializeEncodingPattern, target, + typeConverter, + materializeEncodingValueFn); if (failed(applyPartialConversion(operation, target, std::move(materializeEncodingPattern)))) { diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp similarity index 85% rename from compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp rename to compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp index 087d91dccf41..cd3d27e5c7f9 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingPatterns.cpp @@ -32,6 +32,7 @@ namespace mlir::iree_compiler { using IREE::Codegen::MaterializeEncodingInfo; +using IREE::Codegen::TileSwizzle; //===---------------------------------------------------------------------===// // Utility methods @@ -237,6 +238,10 @@ static FailureOr lowerGenericOpWithEncoding( return rewriter.notifyMatchFailure( genericOp, "MaterializeEncodingInfo failed for output"); } + if (outMaterializeEncodingInfo.swizzle) { + return rewriter.notifyMatchFailure( + genericOp, "generic op lowering does not support swizzle yet"); + } auto convertedResultType = cast(convertedOutputOperands[0].getType()); @@ -561,60 +566,6 @@ struct MaterializeFlowDispatchTensorStoreOp // the core conversion utilities. //===---------------------------------------------------------------------===// -/// Convert `set_encoding` op to `pack` op. -struct SetEncodingOpToPackOpConversion - : public OpMaterializeEncodingPattern { - using OpMaterializeEncodingPattern< - IREE::Encoding::SetEncodingOp>::OpMaterializeEncodingPattern; - - LogicalResult - matchAndRewrite(IREE::Encoding::SetEncodingOp encodingOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto converter = static_cast( - getTypeConverter()); - auto packOp = lowerSetEncodingOpToPackOp(rewriter, encodingOp, - adaptor.getSource(), *converter, - this->materializeEncodingValueFn); - if (failed(packOp)) { - Type targetType = - getTypeConverter()->convertType(encodingOp.getResultType()); - Value result = rewriter.createOrFold( - encodingOp.getLoc(), targetType, adaptor.getSource()); - rewriter.replaceOp(encodingOp, result); - return success(); - } - rewriter.replaceOp(encodingOp, packOp.value()); - return success(); - } -}; - -/// Convert `unset_encoding` op to `unpack` op. -struct UnsetEncodingOpToUnPackOpConversion - : public OpMaterializeEncodingPattern { - using OpMaterializeEncodingPattern< - IREE::Encoding::UnsetEncodingOp>::OpMaterializeEncodingPattern; - - LogicalResult - matchAndRewrite(IREE::Encoding::UnsetEncodingOp encodingOp, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto converter = static_cast( - this->getTypeConverter()); - auto unpackedValue = lowerUnsetEncodingToUnpackOp( - rewriter, encodingOp, adaptor.getSource(), *converter, - this->materializeEncodingValueFn); - if (failed(unpackedValue)) { - Type targetType = - getTypeConverter()->convertType(encodingOp.getResultType()); - Value result = rewriter.createOrFold( - encodingOp.getLoc(), targetType, adaptor.getSource()); - rewriter.replaceOp(encodingOp, result); - return success(); - } - rewriter.replaceOp(encodingOp, unpackedValue.value()); - return success(); - } -}; - /// Generic pattern to convert operation that is in Destination Passing Style. template struct MaterializeDPSOperation : public OpMaterializeEncodingPattern { @@ -685,6 +636,166 @@ struct MaterializeOptimizationBarrierOp } }; +static SmallVector +getReassociationIndices(int outerDims, + const TileSwizzle::ExpandShapeType &expandShape) { + SmallVector result; + int expandedIdx = 0; + for (int i = 0; i < outerDims; ++i) { + result.push_back({expandedIdx++}); + } + for (auto expandShapeDim : expandShape) { + result.push_back({}); + for (int i = 0, e = expandShapeDim.size(); i < e; ++i) { + result.back().push_back(expandedIdx++); + } + } + return result; +} + +/// Convert iree_linalg_ext.set_encoding op to pack + tile swizzling ops. We use +/// expand_shape + linalg.transpose to represent a tile swizzling op. +struct SetEncodingOpLoweringConversion + : public OpMaterializeEncodingPattern { + using OpMaterializeEncodingPattern< + IREE::Encoding::SetEncodingOp>::OpMaterializeEncodingPattern; + + LogicalResult + matchAndRewrite(IREE::Encoding::SetEncodingOp encodingOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto converter = static_cast( + getTypeConverter()); + auto packedValue = lowerSetEncodingOpToPackOp( + rewriter, encodingOp, adaptor.getSource(), *converter, + this->materializeEncodingValueFn); + if (failed(packedValue)) { + Type targetType = + getTypeConverter()->convertType(encodingOp.getResultType()); + Value result = rewriter.createOrFold( + encodingOp.getLoc(), targetType, adaptor.getSource()); + rewriter.replaceOp(encodingOp, result); + return success(); + } + + MaterializeEncodingInfo encodingInfo = + converter->getEncodingInfo(encodingOp.getResultType()); + if (!encodingInfo.swizzle) { + rewriter.replaceOp(encodingOp, packedValue.value()); + return success(); + } + + Location loc = encodingOp.getLoc(); + + // Create expand_shape op to tile the innermost two dimensions. + int origRank = encodingOp.getSourceType().getRank(); + SmallVector expandShapeShape( + cast(packedValue->getType()) + .getShape() + .take_front(origRank)); + expandShapeShape.append( + getExpandedTileShape(encodingInfo.swizzle->expandShape)); + RankedTensorType expandShapeType = + encodingOp.getSourceType().clone(expandShapeShape); + + SmallVector reassociation = + getReassociationIndices(origRank, encodingInfo.swizzle->expandShape); + auto expandShapeOp = rewriter.create( + loc, expandShapeType, packedValue.value(), reassociation); + + SmallVector transposePerm = + llvm::to_vector(llvm::seq(0, origRank)); + for (auto perm : encodingInfo.swizzle->permutation) { + transposePerm.push_back(origRank + perm); + } + SmallVector transposeResultDims = + tensor::getMixedSizes(rewriter, loc, expandShapeOp.getResult()); + applyPermutationToVector(transposeResultDims, transposePerm); + + auto emptyTensor = rewriter.create( + loc, transposeResultDims, encodingOp.getSourceType().getElementType()); + auto transposeOp = rewriter.create( + loc, expandShapeOp, emptyTensor, transposePerm); + rewriter.replaceOp(encodingOp, transposeOp->getResult(0)); + + return success(); + } +}; + +struct UnsetEncodingOpLoweringConversion + : public OpMaterializeEncodingPattern { + using OpMaterializeEncodingPattern< + IREE::Encoding::UnsetEncodingOp>::OpMaterializeEncodingPattern; + + LogicalResult + matchAndRewrite(IREE::Encoding::UnsetEncodingOp unsetEncodingOp, + OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto converter = static_cast( + getTypeConverter()); + + MaterializeEncodingInfo encodingInfo = + converter->getEncodingInfo(unsetEncodingOp.getSource().getType()); + if (IREE::Codegen::isIdentityLayout(encodingInfo)) { + Type targetType = + getTypeConverter()->convertType(unsetEncodingOp.getSourceType()); + Value result = rewriter.createOrFold( + unsetEncodingOp.getLoc(), targetType, adaptor.getSource()); + rewriter.replaceOp(unsetEncodingOp, result); + return success(); + } + + Location loc = unsetEncodingOp.getLoc(); + Value unpackSrc = adaptor.getSource(); + if (encodingInfo.swizzle) { + int targetRank = unsetEncodingOp.getResultType().getRank(); + auto srcConvertedType = + cast(adaptor.getSource().getType()); + SmallVector emptyShape = + tensor::getMixedSizes(rewriter, loc, adaptor.getSource()); + emptyShape.resize(targetRank); + for (auto i : getExpandedTileShape(encodingInfo.swizzle->expandShape)) { + emptyShape.push_back(rewriter.getIndexAttr(i)); + } + auto emptyTensor = rewriter.create( + loc, emptyShape, unsetEncodingOp.getSourceType().getElementType()); + + SmallVector transposePerm = + llvm::to_vector(llvm::seq(0, targetRank)); + for (auto perm : encodingInfo.swizzle->permutation) { + transposePerm.push_back(targetRank + perm); + } + auto invertedTransposePerm = invertPermutationVector(transposePerm); + auto transposeOp = rewriter.create( + loc, adaptor.getSource(), emptyTensor, invertedTransposePerm); + + SmallVector reassociation = getReassociationIndices( + targetRank, encodingInfo.swizzle->expandShape); + SmallVector unpackSrcShape( + srcConvertedType.getShape().take_front(targetRank)); + unpackSrcShape.append(encodingInfo.innerTileSizes.begin(), + encodingInfo.innerTileSizes.end()); + RankedTensorType unpackSrcType = + unsetEncodingOp.getResultType().clone(unpackSrcShape); + unpackSrc = rewriter.create( + loc, unpackSrcType, transposeOp->getResult(0), reassociation); + } + + auto unpackedValue = lowerUnsetEncodingToUnpackOp( + rewriter, unsetEncodingOp, unpackSrc, *converter, + this->materializeEncodingValueFn); + if (failed(unpackedValue)) { + Type targetType = + getTypeConverter()->convertType(unsetEncodingOp.getResultType()); + Value result = rewriter.createOrFold(loc, targetType, + adaptor.getSource()); + rewriter.replaceOp(unsetEncodingOp, result); + return success(); + } + rewriter.replaceOp(unsetEncodingOp, unpackedValue.value()); + return success(); + } +}; + /// Pattern to convert contraction operations. class MaterializeContractionOp : public OpInterfaceConversionPattern { @@ -726,21 +837,7 @@ class MaterializeContractionOp } // namespace -void populateMaterializeEncodingIntoPackUnPackPatterns( - RewritePatternSet &patterns, - MaterializeEncodingTypeConverter &typeConverter, - MaterializeEncodingValueFn materializeEncodingValueFn) { - MLIRContext *context = patterns.getContext(); - // TODO(hanchung): Move the generic op pattern to ShapeIndependent category - // after we add the support for tile swizzling variants. - patterns.insert, - MaterializeContractionOp, SetEncodingOpToPackOpConversion, - UnsetEncodingOpToUnPackOpConversion>( - context, typeConverter, materializeEncodingValueFn); - memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns); -} - -void populateShapeIndependentMaterializeEncodingPatterns( +void populateMaterializeEncodingPatterns( RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target, MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn) { @@ -767,7 +864,10 @@ void populateShapeIndependentMaterializeEncodingPatterns( }); patterns.insert< + MaterializeContractionOp, SetEncodingOpLoweringConversion, + UnsetEncodingOpLoweringConversion, MaterializeDPSOperation, + MaterializeDPSOperation, MaterializeOperation, MaterializeOptimizationBarrierOp, MaterializeFlowDispatchTensorLoadOp, MaterializeFlowDispatchTensorStoreOp, MaterializeInterfaceBindingEncoding>(context, typeConverter, diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.td b/compiler/src/iree/compiler/Codegen/Common/Passes.td index 5571aba9b1e4..5cc0d555ec24 100644 --- a/compiler/src/iree/compiler/Codegen/Common/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/Passes.td @@ -431,6 +431,21 @@ def LowerUKernelOpsToCallsPass : let summary = "Lower micro-kernel wrapper ops into function calls"; } +def MaterializeHostEncodingPass : + Pass<"iree-codegen-materialize-host-encoding", "mlir::ModuleOp"> { + let summary = "Materialize the encoding for tensor as specified by the backend."; +} + +def MaterializeDeviceEncodingPass : + InterfacePass<"iree-codegen-materialize-device-encoding", "mlir::FunctionOpInterface"> { + let summary = "Materialize the encoding for tensor as specified by the backend."; + let options = [ + Option<"testCLGPUTarget", "test-cl-gpu-target", "bool", /*default=*/"false", + "Flag used for lit-testing GPU target only. Not for general usage">, + ]; +} + +// TODO(hanchung): Remove the pass after we deprecate MaterializeHomogeneousEncodingsPass. def MaterializeEncodingIntoNopPass : InterfacePass<"iree-codegen-materialize-encoding-into-nop", "mlir::FunctionOpInterface"> { let summary = "Drop the encodings from tensor types with encodings."; diff --git a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel index 5de2e3d6b95e..f0652d2c3636 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel @@ -47,12 +47,17 @@ iree_lit_test_suite( "fold_tensor_extract_op.mlir", "forop_canonicalization.mlir", "generic_vectorization.mlir", + "gpu_materialize_encoding_gfx1100.mlir", + "gpu_materialize_encoding_gfx908.mlir", + "gpu_materialize_encoding_gfx90a.mlir", + "gpu_materialize_encoding_gfx942.mlir", "hoist_statically_bound_allocations.mlir", "hoist_unrolled_vector_extract_insert_slice.mlir", "iree_comprehensive_bufferize.mlir", "iree_expand_strided_metadata.mlir", "iree_loop_invariant_code_motion.mlir", "link_tuning_specs.mlir", + "llvmcpu_materialize_encoding.mlir", "lower_ukernel_to_calls.mlir", "materialize_encoding_into_nop.mlir", "materialize_tuning_specs.mlir", @@ -74,8 +79,8 @@ iree_lit_test_suite( "replace_slow_min_max_ops.mlir", "strip_compilation_info.mlir", "test_partitionable_loops_interface.mlir", - "tile_and_distribute_to_workgroups_func_scope.mlir", "tile_and_distribute_to_workgroups.mlir", + "tile_and_distribute_to_workgroups_func_scope.mlir", "tile_and_distribute_workgroups_using_forall.mlir", "tile_large_tensors.mlir", "transform_buffer_opt.mlir", @@ -88,10 +93,11 @@ iree_lit_test_suite( "type_propagation.mlir", "type_propagation_packing.mlir", "unroll_annotated_loops.mlir", + "vector_layout_analysis.mlir", "vectorize_memref_copy.mlir", "vectorize_tensor_pad.mlir", - "vector_layout_analysis.mlir", "verify_workgroup_distribution.mlir", + "vmvx_materialize_encoding.mlir", ], include = ["*.mlir"], exclude = [ diff --git a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt index 4dc774caa54a..2d707f68c3aa 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt @@ -43,12 +43,17 @@ iree_lit_test_suite( "fold_tensor_extract_op.mlir" "forop_canonicalization.mlir" "generic_vectorization.mlir" + "gpu_materialize_encoding_gfx1100.mlir" + "gpu_materialize_encoding_gfx908.mlir" + "gpu_materialize_encoding_gfx90a.mlir" + "gpu_materialize_encoding_gfx942.mlir" "hoist_statically_bound_allocations.mlir" "hoist_unrolled_vector_extract_insert_slice.mlir" "iree_comprehensive_bufferize.mlir" "iree_expand_strided_metadata.mlir" "iree_loop_invariant_code_motion.mlir" "link_tuning_specs.mlir" + "llvmcpu_materialize_encoding.mlir" "lower_ukernel_to_calls.mlir" "materialize_encoding_into_nop.mlir" "materialize_tuning_specs.mlir" @@ -88,6 +93,7 @@ iree_lit_test_suite( "vectorize_memref_copy.mlir" "vectorize_tensor_pad.mlir" "verify_workgroup_distribution.mlir" + "vmvx_materialize_encoding.mlir" TOOLS FileCheck iree-opt diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx1100.mlir similarity index 98% rename from compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir rename to compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx1100.mlir index bb0c61072bd3..645fd712442a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx1100.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx1100.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-materialize-device-encoding))" \ +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding{test-cl-gpu-target}))" \ // RUN: --iree-gpu-test-target=gfx1100 \ // RUN: --split-input-file %s | FileCheck %s diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx908.mlir b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx908.mlir similarity index 98% rename from compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx908.mlir rename to compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx908.mlir index 4fca56365659..a9fc2bc66f62 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx908.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx908.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-materialize-device-encoding))" \ +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding{test-cl-gpu-target}))" \ // RUN: --iree-gpu-test-target=gfx908 \ // RUN: --split-input-file %s | FileCheck %s diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx90a.mlir b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx90a.mlir similarity index 99% rename from compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx90a.mlir rename to compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx90a.mlir index cc9cd9d30dbe..89fe357ba33b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx90a.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx90a.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-materialize-device-encoding))" \ +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding{test-cl-gpu-target}))" \ // RUN: --iree-gpu-test-target=gfx90a \ // RUN: --split-input-file %s | FileCheck %s diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx942.mlir b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir similarity index 99% rename from compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx942.mlir rename to compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir index 3338de98ebbf..2544fc127f89 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_materialize_encoding_gfx942.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/gpu_materialize_encoding_gfx942.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-materialize-device-encoding))" \ +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding{test-cl-gpu-target}))" \ // RUN: --iree-gpu-test-target=gfx942 \ // RUN: --split-input-file %s | FileCheck %s diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir similarity index 97% rename from compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir rename to compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir index 553c134b9f78..25b69a7e31e2 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/llvmcpu_materialize_encoding.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-cpu-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s #pipeline_layout = #hal.pipeline.layout, @@ -6,7 +6,7 @@ ]> #encoding = #iree_encoding.encoding (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> }{ %c0 = arith.constant 0 : index %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> @@ -44,7 +44,7 @@ func.func @set_encoding_with_padding_semantics_bf16_x86_64_avx512f() attributes #map2 = affine_map<(d0, d1, d2) -> (d0, d1)> #encoding = #iree_encoding.encoding> func.func @set_encoding_7x7x7_matmul_LHS() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> @@ -74,7 +74,7 @@ func.func @set_encoding_7x7x7_matmul_LHS() attributes { #map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> #encoding = #iree_encoding.encoding> func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index %8 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> @@ -105,7 +105,7 @@ func.func @set_encoding_128x80x32_batch_matmul_LHS() attributes { #map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> #encoding = #iree_encoding.encoding> func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 @@ -138,7 +138,7 @@ func.func @set_encoding_128x32x320_batch_matmul_RHS() attributes { #map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> #encoding = #iree_encoding.encoding> func.func @unset_encoding_128x80x320_batch_matmul_RESULT() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index %0 = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : i32 @@ -176,7 +176,7 @@ func.func @unset_encoding_128x80x320_batch_matmul_RESULT() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @pack_gemm_fill_dynamic(%arg0 : tensor, %arg1 : tensor) -> tensor attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx,+avx2,+fma"}> } { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -224,7 +224,7 @@ func.func @pack_gemm_fill_dynamic(%arg0 : tensor, %arg1 : tensor (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> #encoding_result = #iree_encoding.encoding (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> func.func @matvec_shaped_matmul_lowering_f32f32f32_aarch64(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<16x16xf32> @@ -257,7 +257,7 @@ func.func @matvec_shaped_matmul_lowering_f32f32f32_aarch64(%arg0: !hal.buffer_vi #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f32f32_aarch64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -323,7 +323,7 @@ func.func @matmul_lowering_f32f32f32_aarch64() attributes { #encoding_rhs = #iree_encoding.encoding (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0)>], round_dims_to = array> #encoding_result = #iree_encoding.encoding (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0)>], round_dims_to = array> func.func @matvec_lowering_f32f32f32_aarch64(%arg0: tensor<16x16xf32>, %arg1: tensor<16xf32>, %arg2: tensor<16xf32>) -> tensor<16xf32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %3 = iree_encoding.set_encoding %arg0 : tensor<16x16xf32> -> tensor<16x16xf32, #encoding_lhs> @@ -352,7 +352,7 @@ func.func @matvec_lowering_f32f32f32_aarch64(%arg0: tensor<16x16xf32>, %arg1: te #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matvec_lowering_f32f32f32_aarch64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) @@ -414,7 +414,7 @@ func.func @matvec_lowering_f32f32f32_aarch64() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f16f16f16_aarch64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -485,7 +485,7 @@ func.func @matmul_lowering_f16f16f16_aarch64() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f32f32_x86_64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -557,7 +557,7 @@ func.func @matmul_lowering_f32f32f32_x86_64() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f32f32_x86_64_avx2() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -628,7 +628,7 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx2() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f32f32_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -699,7 +699,7 @@ func.func @matmul_lowering_f32f32f32_x86_64_avx512f() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f16f16f32_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -770,7 +770,7 @@ func.func @matmul_lowering_f16f16f32_x86_64_avx512f() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f16f16f16_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -841,7 +841,7 @@ func.func @matmul_lowering_f16f16f16_x86_64_avx512f() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_bf16bf16f32_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -912,7 +912,7 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512f() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -983,7 +983,7 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512f() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_bf16bf16f32_x86_64_avx512bf16() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f,+avx512bf16"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f,+avx512bf16"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1056,7 +1056,7 @@ func.func @matmul_lowering_bf16bf16f32_x86_64_avx512bf16() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512bf16() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f,+avx512bf16"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f,+avx512bf16"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1129,7 +1129,7 @@ func.func @matmul_lowering_bf16bf16bf16_x86_64_avx512bf16() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f16f16_aarch64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1202,7 +1202,7 @@ func.func @matmul_lowering_f32f16f16_aarch64() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f16f16_x86_64_avx512f() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f,+avx512bf16"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f,+avx512bf16"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1276,7 +1276,7 @@ func.func @matmul_lowering_f32f16f16_x86_64_avx512f() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_aarch64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1344,7 +1344,7 @@ func.func @matmul_lowering_i8i8i32_aarch64() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_aarch64_dotprod() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1417,7 +1417,7 @@ func.func @matmul_lowering_i8i8i32_aarch64_dotprod() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_aarch64_i8mm() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod,+i8mm", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod,+i8mm", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1489,7 +1489,7 @@ func.func @matmul_lowering_i8i8i32_aarch64_i8mm() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i4i32_aarch64() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1563,7 +1563,7 @@ func.func @matmul_lowering_i8i4i32_aarch64() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i4i32_aarch64_dotprod() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1635,7 +1635,7 @@ func.func @matmul_lowering_i8i4i32_aarch64_dotprod() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i4i32_aarch64_i8mm() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod,+i8mm", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="aarch64-xyz-xyz", cpu_features="+dotprod,+i8mm", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1704,7 +1704,7 @@ func.func @matmul_lowering_i8i4i32_aarch64_i8mm() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f32f32_aarch64_sve(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {cpu_features = "+sve", target_triple="aarch64-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {cpu_features = "+sve", target_triple="aarch64-xyz-xyz"}> } { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -1736,7 +1736,7 @@ func.func @matmul_lowering_f32f32f32_aarch64_sve(%lhs: tensor, %rhs: te #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_f32f32f32_riscv(%lhs: tensor, %rhs: tensor, %acc: tensor) -> tensor attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="riscv32-xyz-xyz"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="riscv32-xyz-xyz"}> } { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -1772,7 +1772,7 @@ func.func @matmul_lowering_f32f32f32_riscv(%lhs: tensor, %rhs: tensor> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_riscv32_ukernel() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="riscv32-xyz-xyz", ukernels = "all"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="riscv32-xyz-xyz", ukernels = "all"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1845,7 +1845,7 @@ func.func @matmul_lowering_i8i8i32_riscv32_ukernel() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_x86_64_avx2() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx2"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx2"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1918,7 +1918,7 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx2() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_x86_64_avx512bw() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512bw"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512bw"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -1991,7 +1991,7 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512bw() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i8i8i32_x86_64_avx512vnni() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -2059,7 +2059,7 @@ func.func @matmul_lowering_i8i8i32_x86_64_avx512vnni() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @extend_batch_vecmat_explicit_unit_dim(%arg0: tensor<32x1x128xi8>, %arg1: tensor<32x128x11008xi8>) -> tensor<32x1x11008xi32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0_i32 = arith.constant 0 : i32 %4 = iree_encoding.set_encoding %arg0 : tensor<32x1x128xi8> -> tensor<32x1x128xi8, #encoding_lhs> @@ -2122,7 +2122,7 @@ func.func @extend_batch_vecmat_explicit_unit_dim(%arg0: tensor<32x1x128xi8>, %ar #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i16i16i32_x86_64_avx2() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx2"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx2"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -2195,7 +2195,7 @@ func.func @matmul_lowering_i16i16i32_x86_64_avx2() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_lowering_i16ui4i32_x86_64_avx512vnni() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0 = arith.constant 0 : index %M = hal.interface.constant.load layout(#pipeline_layout) ordinal(0) : index @@ -2263,7 +2263,7 @@ func.func @matmul_lowering_i16ui4i32_x86_64_avx512vnni() attributes { #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @vecmat(%arg0: tensor<128xi8>, %arg1: tensor<128x11008xi8>) -> tensor<11008xi32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0_i32 = arith.constant 0 : i32 %4 = iree_encoding.set_encoding %arg0 : tensor<128xi8> -> tensor<128xi8, #encoding_lhs> @@ -2325,7 +2325,7 @@ func.func @vecmat(%arg0: tensor<128xi8>, %arg1: tensor<128x11008xi8>) -> tensor< #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matvec(%arg0: tensor<11008x128xi8>, %arg1: tensor<128xi8>) -> tensor<11008xi32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0_i32 = arith.constant 0 : i32 %4 = iree_encoding.set_encoding %arg0 : tensor<11008x128xi8> -> tensor<11008x128xi8, #encoding_lhs> @@ -2387,7 +2387,7 @@ func.func @matvec(%arg0: tensor<11008x128xi8>, %arg1: tensor<128xi8>) -> tensor< #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matvec_with_narrow_M(%arg0: tensor<15x128xi8>, %arg1: tensor<128xi8>) -> tensor<15xi32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0_i32 = arith.constant 0 : i32 %4 = iree_encoding.set_encoding %arg0 : tensor<15x128xi8> -> tensor<15x128xi8, #encoding_lhs> @@ -2450,7 +2450,7 @@ func.func @matvec_with_narrow_M(%arg0: tensor<15x128xi8>, %arg1: tensor<128xi8>) #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @batch_vecmat(%arg0: tensor<32x128xi8>, %arg1: tensor<32x128x11008xi8>) -> tensor<32x11008xi32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0_i32 = arith.constant 0 : i32 %4 = iree_encoding.set_encoding %arg0 : tensor<32x128xi8> -> tensor<32x128xi8, #encoding_lhs> @@ -2509,7 +2509,7 @@ func.func @batch_vecmat(%arg0: tensor<32x128xi8>, %arg1: tensor<32x128x11008xi8> #encoding_rhs = #iree_encoding.encoding (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> #encoding_result = #iree_encoding.encoding (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> func.func @batch_matvec(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: !hal.buffer_view) -> !hal.buffer_view attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %0 = hal.tensor.import %arg0 "input0" : !hal.buffer_view -> tensor<32x11008x128xi8> %1 = hal.tensor.import %arg1 "input1" : !hal.buffer_view -> tensor<32x128xi8> @@ -2535,7 +2535,7 @@ func.func @batch_matvec(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view, %arg2: #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_transpose_a_f32f32f32(%arg0: tensor<256x128xf32>, %arg1: tensor<256x512xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c256 = arith.constant 256 : index %c128 = arith.constant 128 : index @@ -2574,7 +2574,7 @@ func.func @matmul_transpose_a_f32f32f32(%arg0: tensor<256x128xf32>, %arg1: tenso #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @matmul_transpose_b_f32f32f32(%arg0: tensor<128x256xf32>, %arg1: tensor<512x256xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c128 = arith.constant 128 : index %c256 = arith.constant 256 : index @@ -2612,7 +2612,7 @@ func.func @matmul_transpose_b_f32f32f32(%arg0: tensor<128x256xf32>, %arg1: tenso #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @batch_matmul_transpose_a_f32f32f32(%arg0: tensor<2x256x128xf32>, %arg1: tensor<2x256x512xf32>, %arg2: tensor<2x128x512xf32>) -> tensor<2x128x512xf32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c2 = arith.constant 2 : index %c256 = arith.constant 256 : index @@ -2651,7 +2651,7 @@ func.func @batch_matmul_transpose_a_f32f32f32(%arg0: tensor<2x256x128xf32>, %arg #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @batch_matmul_transpose_b_f32f32f32(%arg0: tensor<2x128x256xf32>, %arg1: tensor<2x512x256xf32>, %arg2: tensor<2x128x512xf32>) -> tensor<2x128x512xf32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c2 = arith.constant 2 : index %c128 = arith.constant 128 : index @@ -2690,7 +2690,7 @@ func.func @batch_matmul_transpose_b_f32f32f32(%arg0: tensor<2x128x256xf32>, %arg #encoding_rhs = #iree_encoding.encoding> #encoding_result = #iree_encoding.encoding> func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, %arg1: tensor<4096x32x128xi4>, %arg2: tensor<4096x32xi32>) -> tensor<4096x32xi32> attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512vnni"}> } { %c0_i32 = arith.constant 0 : i32 %c0_i4 = arith.constant 0 : i4 @@ -2747,7 +2747,7 @@ func.func @generic_batch_vecmat_transposed_i16u4i32(%arg0: tensor<32x128xi16>, % #encoding = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array> func.func @dequantization() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 @@ -2802,7 +2802,7 @@ func.func @dequantization() attributes { #encoding = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d1, d2)>, round_dims_to = array> func.func @broadcast_batch() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 @@ -2841,7 +2841,7 @@ func.func @broadcast_batch() attributes { #encoding = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1)>, round_dims_to = array> func.func @broadcast_M() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 @@ -2880,7 +2880,7 @@ func.func @broadcast_M() attributes { #encoding = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array> func.func @broadcast_N() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 @@ -2919,7 +2919,7 @@ func.func @broadcast_N() attributes { #encoding = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>, round_dims_to = array> #encoding_bcast = #iree_encoding.encoding (d0, d3, d2)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>], bcast_map = affine_map<(d0, d1, d2) -> (d0, d2)>, round_dims_to = array> func.func @broadcast_K() attributes { - hal.executable.target = #hal.executable.target<"xyz", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> + hal.executable.target = #hal.executable.target<"llvm-cpu", "xyz", {target_triple="x86_64-xyz-xyz", cpu_features="+avx512f"}> } { %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir similarity index 99% rename from compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir rename to compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir index 85dd416a8153..2f3b91ff7255 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/test/vmvx_materialize_encoding.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/test/vmvx_materialize_encoding.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-cpu-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s +// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s #pipeline_layout = #hal.pipeline.layout, diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp index 76b2745dbc45..1d2b66ee634e 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/Passes.cpp @@ -788,7 +788,7 @@ void buildLLVMCPUCodegenConfigurationPassPipelineImpl( // TODO(#13888): This(createExpandF16OpToF32Pass()) pass is being added // way to late and should insted be be done during lowering to LLVM. .addPass(createExpandF16OpToF32Pass) - .addPass(createCPUMaterializeDeviceEncodingPass) + .addPass(createMaterializeDeviceEncodingPass) // TODO: Remove the following pass the plumb support for // #hal.descriptor_type memory space through the stack. .addPass(createEraseHALDescriptorTypeFromMemRefPass); diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp index f17a353afcc2..812bc9bc2f5e 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.cpp @@ -161,6 +161,10 @@ const char *getIreeArchNameForTargetTriple(llvm::Triple triple) { return "unknown"; } +bool isLLVMCPUBackend(IREE::HAL::ExecutableTargetAttr targetAttr) { + return targetAttr && targetAttr.getBackend().getValue() == "llvm-cpu"; +} + bool isVMVXBackend(IREE::HAL::ExecutableTargetAttr targetAttr) { return targetAttr && targetAttr.getBackend().getValue().starts_with("vmvx"); } diff --git a/compiler/src/iree/compiler/Codegen/Utils/Utils.h b/compiler/src/iree/compiler/Codegen/Utils/Utils.h index d8f96de94213..ea3d06956a27 100644 --- a/compiler/src/iree/compiler/Codegen/Utils/Utils.h +++ b/compiler/src/iree/compiler/Codegen/Utils/Utils.h @@ -61,9 +61,8 @@ std::optional getTargetTriple(Attribute attr); const char *getIreeArchNameForTargetTriple(llvm::Triple triple); /// Methods to get target information. +bool isLLVMCPUBackend(IREE::HAL::ExecutableTargetAttr targetAttr); bool isVMVXBackend(IREE::HAL::ExecutableTargetAttr targetAttr); - -/// Methods to get target information. bool isROCMBackend(IREE::HAL::ExecutableTargetAttr targetAttr); // Returns true if the ukernel with given `ukernelName` is enabled. diff --git a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp index 00c5c9f9637b..a196e3121894 100644 --- a/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp +++ b/compiler/src/iree/compiler/Dialect/VMVX/Transforms/Passes.cpp @@ -44,7 +44,7 @@ void buildVMVXConfigurationPassPipeline(OpPassManager &variantPassManager) { } modulePassManager.addPass(createMaterializeUserConfigsPass()); FunctionLikeNest(modulePassManager) - .addPass(createCPUMaterializeDeviceEncodingPass) + .addPass(createMaterializeDeviceEncodingPass) // TODO: Remove the following pass the plumb support for // #hal.descriptor_type memory space through the stack. .addPass(createEraseHALDescriptorTypeFromMemRefPass); diff --git a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel index d85310e8dfe4..50ff8a6fad2b 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel +++ b/compiler/src/iree/compiler/GlobalOptimization/BUILD.bazel @@ -76,8 +76,6 @@ iree_compiler_cc_library( ":PassHeaders", ":PassesIncGen", "//compiler/src/iree/compiler/Codegen/Common", - "//compiler/src/iree/compiler/Codegen/Common/CPU:CommonCPUPasses", - "//compiler/src/iree/compiler/Codegen/Common/GPU:CommonGPUPasses", "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect", "//compiler/src/iree/compiler/Dialect/Encoding/IR", "//compiler/src/iree/compiler/Dialect/Flow/Conversion/TensorToFlow", diff --git a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt index 9ca16eed433d..6650602f8c98 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt +++ b/compiler/src/iree/compiler/GlobalOptimization/CMakeLists.txt @@ -91,8 +91,6 @@ iree_cc_library( MLIRTransformUtils MLIRTransforms iree::compiler::Codegen::Common - iree::compiler::Codegen::Common::CPU::CommonCPUPasses - iree::compiler::Codegen::Common::GPU::CommonGPUPasses iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect iree::compiler::Dialect::Encoding::IR iree::compiler::Dialect::Flow::Conversion::TensorToFlow diff --git a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp index adcc12977bad..f7aeb8225d0b 100644 --- a/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp +++ b/compiler/src/iree/compiler/GlobalOptimization/MaterializeHomogeneousEncodings.cpp @@ -4,8 +4,6 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#include "iree/compiler/Codegen/Common/CPU/Passes.h" -#include "iree/compiler/Codegen/Common/GPU/Passes.h" #include "iree/compiler/Codegen/Common/Passes.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h" #include "iree/compiler/Dialect/HAL/Analysis/DeviceAnalysis.h" @@ -82,10 +80,10 @@ class MaterializeHomogeneousEncodingsPass // Only llvm-cpu and rocm backends handle encodings for now, others just go // with nop. if (executableTarget.getBackend() == "llvm-cpu") { - passManager.addPass(createCPUMaterializeHostEncodingPass()); + passManager.addPass(createMaterializeHostEncodingPass()); } else if (clEnableExperimentalRocmDataTiling && executableTarget.getBackend() == "rocm") { - passManager.addPass(createGPUMaterializeHostEncodingPass()); + passManager.addPass(createMaterializeHostEncodingPass()); FunctionLikeNest(passManager).addPass([&]() { return createDecomposePackUnPackOpsPass( DecomposePackUnPackOpsPassOptions{/*tileOuterToOne=*/false,