diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 981c5271fb3f6..7f605be976549 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2146,13 +2146,33 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) { return Base; } +bool CommonPointerBase::isExpensive() const { + unsigned NumGEPs = 0; + auto ProcessGEPs = [&NumGEPs](ArrayRef GEPs) { + bool SeenMultiUse = false; + for (GEPOperator *GEP : GEPs) { + // Only count multi-use GEPs, excluding the first one. For the first one, + // we will directly reuse the offset. For one-use GEPs, their offset will + // be folded into a multi-use GEP. + if (!GEP->hasOneUse()) { + if (SeenMultiUse) + ++NumGEPs; + SeenMultiUse = true; + } + } + }; + ProcessGEPs(LHSGEPs); + ProcessGEPs(RHSGEPs); + return NumGEPs > 2; +} + /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty, bool IsNUW) { CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS); - if (!Base.Ptr) + if (!Base.Ptr || Base.isExpensive()) return nullptr; // To avoid duplicating the offset arithmetic, rewrite the GEP to use the diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c90ff2a868d4c..c5e1b04002545 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -712,7 +712,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, }; CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS); - if (Base.Ptr == RHS && CanFold(Base.LHSNW)) { + if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). Type *IdxTy = DL.getIndexType(GEPLHS->getType()); Value *Offset = diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index f7fbf0815df03..c67e27e5b3e7c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -910,6 +910,9 @@ struct CommonPointerBase { GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all(); static CommonPointerBase compute(Value *LHS, Value *RHS); + + /// Whether expanding the GEP chains is expensive. + bool isExpensive() const; }; } // end namespace llvm diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index 3f104056fb1f2..938ec64c2bcad 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -849,3 +849,135 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) { %cmp = icmp ugt ptr %gep2, %base ret i1 %cmp } + +define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) { +; CHECK-LABEL: @gep_multiple_multi_use_below_limit( +; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2:%.*]], i64 [[GEP3_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP2_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[GEP3_IDX1:%.*]] = shl i64 [[IDX4:%.*]], 2 +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr i8, ptr [[GEP4]], i64 [[GEP3_IDX1]] +; CHECK-NEXT: call void @use(ptr [[GEP5]]) +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP3_IDX]], [[GEP2_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + call void @use(ptr %gep1) + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + call void @use(ptr %gep2) + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + call void @use(ptr %gep3) + %cmp = icmp eq ptr %gep3, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep1(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep1( +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP1]]) +; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP2]]) +; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + call void @use(ptr %gep1) + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + call void @use(ptr %gep2) + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + call void @use(ptr %gep3) + %gep4 = getelementptr i32, ptr %gep3, i64 %idx4 + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep2(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep2( +; CHECK-NEXT: [[GEP1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[GEP1_IDX1]], 2 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +; CHECK-NEXT: call void @use(ptr [[GEP2]]) +; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX4:%.*]], 2 +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]] +; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = sub i64 0, [[GEP4_IDX]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + call void @use(ptr %gep2) + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + call void @use(ptr %gep3) + %gep4 = getelementptr i32, ptr %gep3, i64 %idx4 + call void @use(ptr %gep4) + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_above_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @gep_multiple_multi_above_below_limit_consts( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16 +; CHECK-NEXT: call void @use(ptr [[GEP1]]) +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX1:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP2]]) +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16 +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX2:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 4 + call void @use(ptr %gep1) + %gep2 = getelementptr i32, ptr %gep1, i64 %idx1 + call void @use(ptr %gep2) + %gep3 = getelementptr i32, ptr %gep2, i64 4 + call void @use(ptr %gep3) + %gep4 = getelementptr i32, ptr %gep3, i64 %idx2 + call void @use(ptr %gep4) + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} + +define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @gep_multiple_multi_use_above_limit( +; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP4]]) +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP4]], i64 [[IDX2:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP3]]) +; CHECK-NEXT: [[GEP5:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX3:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP5]]) +; CHECK-NEXT: [[GEP6:%.*]] = getelementptr i32, ptr [[GEP5]], i64 [[IDX4:%.*]] +; CHECK-NEXT: call void @use(ptr [[GEP6]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP6]], [[BASE]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %base, i64 %idx1 + call void @use(ptr %gep1) + %gep2 = getelementptr i32, ptr %gep1, i64 %idx2 + call void @use(ptr %gep2) + %gep3 = getelementptr i32, ptr %gep2, i64 %idx3 + call void @use(ptr %gep3) + %gep4 = getelementptr i32, ptr %gep3, i64 %idx4 + call void @use(ptr %gep4) + %cmp = icmp eq ptr %gep4, %base + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 84e570395e03b..45e5686ad70e7 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -1172,3 +1172,65 @@ define i64 @nuw_ptrdiff_mul_nsw_nneg_scale_multiuse(ptr %base, i64 %idx) { %diff = sub nuw i64 %lhs, %rhs ret i64 %diff } + +define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) { +; CHECK-LABEL: @multiple_geps_multi_use_below_limit( +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 [[IDX2:%.*]] +; CHECK-NEXT: call void @use(ptr [[P2]]) +; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[IDX5:%.*]] +; CHECK-NEXT: call void @use(ptr [[P4]]) +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX3:%.*]] +; CHECK-NEXT: call void @use(ptr [[P3]]) +; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]] +; CHECK-NEXT: call void @use(ptr [[P5]]) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX2]], [[IDX5]] +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] +; + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1 + call void @use(ptr %p1) + %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2 + call void @use(ptr %p2) + %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3 + call void @use(ptr %p3) + %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4 + call void @use(ptr %p4) + %i1 = ptrtoint ptr %p4 to i64 + %i2 = ptrtoint ptr %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + +define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) { +; CHECK-LABEL: @multiple_geps_multi_use_above_limit( +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 [[IDX2:%.*]] +; CHECK-NEXT: call void @use(ptr [[P2]]) +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[IDX6:%.*]] +; CHECK-NEXT: call void @use(ptr [[P3]]) +; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[TMP3:%.*]] +; CHECK-NEXT: call void @use(ptr [[P5]]) +; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds nuw i8, ptr [[P5]], i64 [[IDX7:%.*]] +; CHECK-NEXT: call void @use(ptr [[P6]]) +; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds nuw i8, ptr [[P6]], i64 [[IDX5:%.*]] +; CHECK-NEXT: call void @use(ptr [[P7]]) +; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P7]] to i64 +; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P3]] to i64 +; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]] +; CHECK-NEXT: ret i64 [[D]] +; + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1 + call void @use(ptr %p1) + %p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2 + call void @use(ptr %p2) + %p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3 + call void @use(ptr %p3) + %p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4 + call void @use(ptr %p4) + %p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5 + call void @use(ptr %p5) + %i1 = ptrtoint ptr %p5 to i64 + %i2 = ptrtoint ptr %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +}