diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index bd3c1f4614ac..77b99a902cc9 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -2922,10 +2922,14 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn, // loads and stores will have a performance impact. auto resultTypes = rootOperation->getResultTypes(); if (commonVecTileSizes.size() != 0 && !resultTypes.empty()) { - auto elementTypeSize = - cast(rootOperation->getResultTypes().front()) - .getElementType() - .getIntOrFloatBitWidth(); + Type elementType = cast(resultTypes[0]).getElementType(); + unsigned int elementTypeSize; + if (auto complexType = llvm::dyn_cast(elementType)) { + elementTypeSize = + 2 * complexType.getElementType().getIntOrFloatBitWidth(); + } else { + elementTypeSize = elementType.getIntOrFloatBitWidth(); + } // for now just enable for i1 if (elementTypeSize == 1) { auto innermostTileSize = commonVecTileSizes.back(); diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir index 9161c810aa23..22a288062bc2 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir @@ -1983,3 +1983,52 @@ func.func @i1_type() attributes {hal.executable.target = #executable_target_emb // CHECK: func @i1_type() // CHECK: linalg.generic { // CHECK-SAME: {lowering_config = #[[CONFIG]]} + +// ----- +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding, + #hal.pipeline.binding, + #hal.pipeline.binding +]> + +#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-unknown-linux-gnu"}> +#map = affine_map<(d0, d1, d2) -> (d1)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +func.func @complex_view_as_real() attributes {hal.executable.target = #executable_target_embedded_elf_x86_64_} { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(0) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor>> + %2 = hal.interface.binding.subspan layout(#pipeline_layout) binding(2) alignment(64) offset(%c0) flags(ReadOnly) : !flow.dispatch.tensor> + %3 = hal.interface.binding.subspan layout(#pipeline_layout) binding(3) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %4 = flow.dispatch.tensor.load %0, offsets = [0], sizes = [1], strides = [1] : !flow.dispatch.tensor> -> tensor<1xi32> + %5 = flow.dispatch.tensor.load %2, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 32, 50, 2], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor> -> tensor<1x1x32x50x2xf32> + %6 = tensor.empty() : tensor<32x50x2xf32> + %extracted = tensor.extract %4[%c0] : tensor<1xi32> + %7 = arith.extsi %extracted : i32 to i64 + %8 = arith.index_cast %7 : i64 to index + %9 = flow.dispatch.tensor.load %1, offsets = [%8, 0], sizes = [1, 50], strides = [1, 1] : !flow.dispatch.tensor>> -> tensor<50xcomplex> + %10 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%9 : tensor<50xcomplex>) outs(%6 : tensor<32x50x2xf32>) { + ^bb0(%in: complex, %out: f32): + %11 = linalg.index 0 : index + %12 = linalg.index 1 : index + %extracted_0 = tensor.extract %5[%c0, %c0, %11, %12, %c0] : tensor<1x1x32x50x2xf32> + %extracted_1 = tensor.extract %5[%c0, %c0, %11, %12, %c1] : tensor<1x1x32x50x2xf32> + %13 = complex.create %extracted_0, %extracted_1 : complex + %14 = complex.mul %13, %in : complex + %15 = complex.re %14 : complex + %16 = complex.im %14 : complex + %17 = linalg.index 2 : index + %18 = arith.cmpi eq, %17, %c0 : index + %19 = arith.select %18, %15, %16 : f32 + linalg.yield %19 : f32 + } -> tensor<32x50x2xf32> + flow.dispatch.tensor.store %10, %3, offsets = [0, 0, 0], sizes = [32, 50, 2], strides = [1, 1, 1] : tensor<32x50x2xf32> -> !flow.dispatch.tensor> + return +} + +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config +// CHECK: func.func @complex_view_as_real() +// CHECK: linalg.generic +// CHECK-SAME: lowering_config = #[[CONFIG]]