diff --git a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc index 7aef584334ba4..d94c6d5a03846 100644 --- a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc +++ b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc @@ -1528,6 +1528,9 @@ ValueRange EmitLoopNestImpl( }; scf::LoopNest loop_nest = scf::buildLoopNest(b, b.getLoc(), lbs, ubs, steps, iter_args_inits, bb); + if (loop_nest.results.empty()) { + return {}; + } ValueRange result_range = loop_nest.results.front().getDefiningOp()->getResults(); CHECK_EQ(result_range.size(), loop_nest.results.size()) diff --git a/xla/service/gpu/fusions/tests/reduce_column/horizontal_fusion_two_groups.hlo b/xla/service/gpu/fusions/tests/reduce_column/horizontal_fusion_two_groups.hlo new file mode 100644 index 0000000000000..145ab0b91e181 --- /dev/null +++ b/xla/service/gpu/fusions/tests/reduce_column/horizontal_fusion_two_groups.hlo @@ -0,0 +1,19 @@ +// RUN: test_correctness %s +// RUN: fusion_to_mlir %s | FileCheck %s + +region { + param_0 = f32[] parameter(0) + param_1 = f32[] parameter(1) + ROOT add = f32[] add(param_0, param_1) +} + +fused_computation { + p0 = f32[512,1280] parameter(0) + constant = f32[] constant(0) + reduce = f32[1280] reduce(p0, constant), dimensions={0}, to_apply=region + p1 = f32[512,1280] parameter(1) + ROOT tuple = (f32[1280], f32[512,1280]) tuple(reduce, p1) +} + +// The two outputs are completely unrelated, so they're put in separate groups. +// CHECK: scf.index_switch