Skip to content

Commit 415c5ee

Browse files
committed
[LV] Ensure getScaledReductions only matches extends inside the loop
In getScaledReductions for the case where we try to match a partial reduction of the form: %phi = phi i32 ... ... %add = add i32 %phi, %zext where %zext = i8 %some_val to i32 we should ensure that %zext is actually inside the loop. Fixes #148260
1 parent 9544bb5 commit 415c5ee

File tree

2 files changed

+253
-1
lines changed

2 files changed

+253
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8109,14 +8109,19 @@ bool VPRecipeBuilder::getScaledReductions(
81098109
std::optional<unsigned> BinOpc;
81108110
Type *ExtOpTypes[2] = {nullptr};
81118111

8112-
auto CollectExtInfo = [&Exts,
8112+
auto CollectExtInfo = [this, &Exts,
81138113
&ExtOpTypes](SmallVectorImpl<Value *> &Ops) -> bool {
81148114
unsigned I = 0;
81158115
for (Value *OpI : Ops) {
81168116
Value *ExtOp;
81178117
if (!match(OpI, m_ZExtOrSExt(m_Value(ExtOp))))
81188118
return false;
81198119
Exts[I] = cast<Instruction>(OpI);
8120+
8121+
// Other operand should live inside the loop
8122+
if (!CM.TheLoop->contains(Exts[I]))
8123+
return false;
8124+
81208125
ExtOpTypes[I] = ExtOp->getType();
81218126
I++;
81228127
}

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,253 @@ for.exit: ; preds = %for.body
911911
ret i32 %add
912912
}
913913

914+
define void @add_of_zext_outside_loop(ptr noalias %a, ptr noalias %b, i8 %c, i8 %d) #0 {
915+
; CHECK-INTERLEAVE1-LABEL: define void @add_of_zext_outside_loop(
916+
; CHECK-INTERLEAVE1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
917+
; CHECK-INTERLEAVE1-NEXT: entry:
918+
; CHECK-INTERLEAVE1-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
919+
; CHECK-INTERLEAVE1-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
920+
; CHECK-INTERLEAVE1-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
921+
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]]
922+
; CHECK-INTERLEAVE1: for.body:
923+
; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
924+
; CHECK-INTERLEAVE1-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
925+
; CHECK-INTERLEAVE1-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
926+
; CHECK-INTERLEAVE1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
927+
; CHECK-INTERLEAVE1-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
928+
; CHECK-INTERLEAVE1-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
929+
; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
930+
; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
931+
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
932+
; CHECK-INTERLEAVE1: exit:
933+
; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
934+
; CHECK-INTERLEAVE1-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
935+
; CHECK-INTERLEAVE1-NEXT: ret void
936+
;
937+
; CHECK-INTERLEAVED-LABEL: define void @add_of_zext_outside_loop(
938+
; CHECK-INTERLEAVED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
939+
; CHECK-INTERLEAVED-NEXT: entry:
940+
; CHECK-INTERLEAVED-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
941+
; CHECK-INTERLEAVED-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
942+
; CHECK-INTERLEAVED-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
943+
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = sub i32 -4, [[CONV]]
944+
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
945+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
946+
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
947+
; CHECK-INTERLEAVED: vector.scevcheck:
948+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = trunc i8 [[D]] to i2
949+
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub i2 0, [[TMP3]]
950+
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext i2 [[TMP4]] to i32
951+
; CHECK-INTERLEAVED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[TMP5]], 0
952+
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sub i32 -4, [[CONV]]
953+
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 2
954+
; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 4, i32 [[TMP7]])
955+
; CHECK-INTERLEAVED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
956+
; CHECK-INTERLEAVED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
957+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i32 [[CONV]], [[MUL_RESULT]]
958+
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], [[CONV]]
959+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
960+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = or i1 [[IDENT_CHECK]], [[TMP10]]
961+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
962+
; CHECK-INTERLEAVED: vector.ph:
963+
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
964+
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
965+
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
966+
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i32 [[CONV]], [[TMP12]]
967+
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
968+
; CHECK-INTERLEAVED: vector.body:
969+
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
970+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[A_PROMOTED]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
971+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
972+
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul i32 [[INDEX]], 4
973+
; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[CONV]], [[TMP14]]
974+
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 4
975+
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET_IDX]] to i64
976+
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext i32 [[TMP15]] to i64
977+
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP16]]
978+
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP17]]
979+
; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP18]], align 1
980+
; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP19]], align 1
981+
; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[VEC_PHI]], [[CONV1]]
982+
; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add i32 [[VEC_PHI1]], [[CONV1]]
983+
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
984+
; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
985+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
986+
; CHECK-INTERLEAVED: middle.block:
987+
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP21]], [[TMP20]]
988+
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
989+
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
990+
; CHECK-INTERLEAVED: scalar.ph:
991+
;
992+
; CHECK-MAXBW-LABEL: define void @add_of_zext_outside_loop(
993+
; CHECK-MAXBW-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
994+
; CHECK-MAXBW-NEXT: entry:
995+
; CHECK-MAXBW-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
996+
; CHECK-MAXBW-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
997+
; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
998+
; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
999+
; CHECK-MAXBW: for.body:
1000+
; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1001+
; CHECK-MAXBW-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
1002+
; CHECK-MAXBW-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
1003+
; CHECK-MAXBW-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
1004+
; CHECK-MAXBW-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
1005+
; CHECK-MAXBW-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
1006+
; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
1007+
; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
1008+
; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
1009+
; CHECK-MAXBW: exit:
1010+
; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
1011+
; CHECK-MAXBW-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
1012+
; CHECK-MAXBW-NEXT: ret void
1013+
;
1014+
entry:
1015+
%conv = zext i8 %d to i32
1016+
%a.promoted = load i32, ptr %a, align 1
1017+
%conv1 = zext i8 %c to i32
1018+
br label %for.body
1019+
1020+
for.body:
1021+
%iv = phi i32 [ %conv, %entry ], [ %iv.next, %for.body ]
1022+
%rdx = phi i32 [ %a.promoted, %entry ], [ %rdx.next, %for.body ]
1023+
%idxprom = sext i32 %iv to i64
1024+
%arrayidx = getelementptr inbounds [0 x i8], ptr %b, i64 0, i64 %idxprom
1025+
store i8 0, ptr %arrayidx, align 1
1026+
%rdx.next = add nsw i32 %rdx, %conv1
1027+
%iv.next = add i32 %iv, 4
1028+
%cmp = icmp eq i32 %iv.next, 0
1029+
br i1 %cmp, label %exit, label %for.body
1030+
1031+
exit:
1032+
%add.lcssa = phi i32 [ %rdx.next, %for.body ]
1033+
store i32 %add.lcssa, ptr %a, align 4
1034+
ret void
1035+
}
1036+
1037+
define void @add_of_loop_invariant_zext(ptr noalias %a, ptr noalias %b, i8 %c, i8 %d) #0 {
1038+
; CHECK-INTERLEAVE1-LABEL: define void @add_of_loop_invariant_zext(
1039+
; CHECK-INTERLEAVE1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
1040+
; CHECK-INTERLEAVE1-NEXT: entry:
1041+
; CHECK-INTERLEAVE1-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
1042+
; CHECK-INTERLEAVE1-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
1043+
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]]
1044+
; CHECK-INTERLEAVE1: for.body:
1045+
; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1046+
; CHECK-INTERLEAVE1-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
1047+
; CHECK-INTERLEAVE1-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
1048+
; CHECK-INTERLEAVE1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
1049+
; CHECK-INTERLEAVE1-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
1050+
; CHECK-INTERLEAVE1-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
1051+
; CHECK-INTERLEAVE1-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
1052+
; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
1053+
; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
1054+
; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
1055+
; CHECK-INTERLEAVE1: exit:
1056+
; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
1057+
; CHECK-INTERLEAVE1-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
1058+
; CHECK-INTERLEAVE1-NEXT: ret void
1059+
;
1060+
; CHECK-INTERLEAVED-LABEL: define void @add_of_loop_invariant_zext(
1061+
; CHECK-INTERLEAVED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
1062+
; CHECK-INTERLEAVED-NEXT: entry:
1063+
; CHECK-INTERLEAVED-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
1064+
; CHECK-INTERLEAVED-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
1065+
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = sub i32 -4, [[CONV]]
1066+
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
1067+
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
1068+
; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1069+
; CHECK-INTERLEAVED: vector.scevcheck:
1070+
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = trunc i8 [[D]] to i2
1071+
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub i2 0, [[TMP3]]
1072+
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext i2 [[TMP4]] to i32
1073+
; CHECK-INTERLEAVED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[TMP5]], 0
1074+
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sub i32 -4, [[CONV]]
1075+
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 2
1076+
; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 4, i32 [[TMP7]])
1077+
; CHECK-INTERLEAVED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
1078+
; CHECK-INTERLEAVED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
1079+
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i32 [[CONV]], [[MUL_RESULT]]
1080+
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], [[CONV]]
1081+
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
1082+
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = or i1 [[IDENT_CHECK]], [[TMP10]]
1083+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1084+
; CHECK-INTERLEAVED: vector.ph:
1085+
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
1086+
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
1087+
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
1088+
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i32 [[CONV]], [[TMP12]]
1089+
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[C]] to i32
1090+
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
1091+
; CHECK-INTERLEAVED: vector.body:
1092+
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1093+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[A_PROMOTED]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
1094+
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
1095+
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = mul i32 [[INDEX]], 4
1096+
; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[CONV]], [[TMP15]]
1097+
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 4
1098+
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext i32 [[OFFSET_IDX]] to i64
1099+
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext i32 [[TMP16]] to i64
1100+
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP17]]
1101+
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP18]]
1102+
; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP19]], align 1
1103+
; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP20]], align 1
1104+
; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add i32 [[VEC_PHI]], [[TMP14]]
1105+
; CHECK-INTERLEAVED-NEXT: [[TMP22]] = add i32 [[VEC_PHI1]], [[TMP14]]
1106+
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
1107+
; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1108+
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1109+
; CHECK-INTERLEAVED: middle.block:
1110+
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP22]], [[TMP21]]
1111+
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
1112+
; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1113+
; CHECK-INTERLEAVED: scalar.ph:
1114+
;
1115+
; CHECK-MAXBW-LABEL: define void @add_of_loop_invariant_zext(
1116+
; CHECK-MAXBW-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
1117+
; CHECK-MAXBW-NEXT: entry:
1118+
; CHECK-MAXBW-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
1119+
; CHECK-MAXBW-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
1120+
; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
1121+
; CHECK-MAXBW: for.body:
1122+
; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1123+
; CHECK-MAXBW-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
1124+
; CHECK-MAXBW-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
1125+
; CHECK-MAXBW-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
1126+
; CHECK-MAXBW-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
1127+
; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
1128+
; CHECK-MAXBW-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
1129+
; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
1130+
; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
1131+
; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
1132+
; CHECK-MAXBW: exit:
1133+
; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
1134+
; CHECK-MAXBW-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
1135+
; CHECK-MAXBW-NEXT: ret void
1136+
;
1137+
entry:
1138+
%conv = zext i8 %d to i32
1139+
%a.promoted = load i32, ptr %a, align 1
1140+
br label %for.body
1141+
1142+
for.body:
1143+
%iv = phi i32 [ %conv, %entry ], [ %iv.next, %for.body ]
1144+
%rdx = phi i32 [ %a.promoted, %entry ], [ %rdx.next, %for.body ]
1145+
%idxprom = sext i32 %iv to i64
1146+
%arrayidx = getelementptr inbounds [0 x i8], ptr %b, i64 0, i64 %idxprom
1147+
store i8 0, ptr %arrayidx, align 1
1148+
%conv1 = zext i8 %c to i32
1149+
%rdx.next = add nsw i32 %rdx, %conv1
1150+
%iv.next = add i32 %iv, 4
1151+
%cmp = icmp eq i32 %iv.next, 0
1152+
br i1 %cmp, label %exit, label %for.body
1153+
1154+
exit:
1155+
%add.lcssa = phi i32 [ %rdx.next, %for.body ]
1156+
store i32 %add.lcssa, ptr %a, align 4
1157+
ret void
1158+
}
1159+
1160+
9141161
!0 = distinct !{!0, !1}
9151162
!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
9161163
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }

0 commit comments

Comments
 (0)