diff --git a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc
index 7aef584334ba4..d94c6d5a03846 100644
--- a/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc
+++ b/xla/service/gpu/fusions/mlir/elemental_hlo_to_mlir.cc
@@ -1528,6 +1528,9 @@ ValueRange EmitLoopNestImpl(
   };
   scf::LoopNest loop_nest =
       scf::buildLoopNest(b, b.getLoc(), lbs, ubs, steps, iter_args_inits, bb);
+  if (loop_nest.results.empty()) {
+    return {};
+  }
   ValueRange result_range =
       loop_nest.results.front().getDefiningOp()->getResults();
   CHECK_EQ(result_range.size(), loop_nest.results.size())
diff --git a/xla/service/gpu/fusions/tests/reduce_column/horizontal_fusion_two_groups.hlo b/xla/service/gpu/fusions/tests/reduce_column/horizontal_fusion_two_groups.hlo
new file mode 100644
index 0000000000000..145ab0b91e181
--- /dev/null
+++ b/xla/service/gpu/fusions/tests/reduce_column/horizontal_fusion_two_groups.hlo
@@ -0,0 +1,19 @@
+// RUN: test_correctness %s
+// RUN: fusion_to_mlir %s | FileCheck %s
+
+region {
+  param_0 = f32[] parameter(0)
+  param_1 = f32[] parameter(1)
+  ROOT add = f32[] add(param_0, param_1)
+}
+
+fused_computation {
+  p0 = f32[512,1280] parameter(0)
+  constant = f32[] constant(0)
+  reduce = f32[1280] reduce(p0, constant), dimensions={0}, to_apply=region
+  p1 = f32[512,1280] parameter(1)
+  ROOT tuple = (f32[1280], f32[512,1280]) tuple(reduce, p1)
+}
+
+// The two outputs are completely unrelated, so they're put in separate groups.
+// CHECK: scf.index_switch