@@ -911,6 +911,253 @@ for.exit: ; preds = %for.body
911
911
ret i32 %add
912
912
}
913
913
914
+ define void @add_of_zext_outside_loop (ptr noalias %a , ptr noalias %b , i8 %c , i8 %d ) #0 {
915
+ ; CHECK-INTERLEAVE1-LABEL: define void @add_of_zext_outside_loop(
916
+ ; CHECK-INTERLEAVE1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
917
+ ; CHECK-INTERLEAVE1-NEXT: entry:
918
+ ; CHECK-INTERLEAVE1-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
919
+ ; CHECK-INTERLEAVE1-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
920
+ ; CHECK-INTERLEAVE1-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
921
+ ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]]
922
+ ; CHECK-INTERLEAVE1: for.body:
923
+ ; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
924
+ ; CHECK-INTERLEAVE1-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
925
+ ; CHECK-INTERLEAVE1-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
926
+ ; CHECK-INTERLEAVE1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
927
+ ; CHECK-INTERLEAVE1-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
928
+ ; CHECK-INTERLEAVE1-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
929
+ ; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
930
+ ; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
931
+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
932
+ ; CHECK-INTERLEAVE1: exit:
933
+ ; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
934
+ ; CHECK-INTERLEAVE1-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
935
+ ; CHECK-INTERLEAVE1-NEXT: ret void
936
+ ;
937
+ ; CHECK-INTERLEAVED-LABEL: define void @add_of_zext_outside_loop(
938
+ ; CHECK-INTERLEAVED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
939
+ ; CHECK-INTERLEAVED-NEXT: entry:
940
+ ; CHECK-INTERLEAVED-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
941
+ ; CHECK-INTERLEAVED-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
942
+ ; CHECK-INTERLEAVED-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
943
+ ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = sub i32 -4, [[CONV]]
944
+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
945
+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
946
+ ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
947
+ ; CHECK-INTERLEAVED: vector.scevcheck:
948
+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = trunc i8 [[D]] to i2
949
+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub i2 0, [[TMP3]]
950
+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext i2 [[TMP4]] to i32
951
+ ; CHECK-INTERLEAVED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[TMP5]], 0
952
+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sub i32 -4, [[CONV]]
953
+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 2
954
+ ; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 4, i32 [[TMP7]])
955
+ ; CHECK-INTERLEAVED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
956
+ ; CHECK-INTERLEAVED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
957
+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i32 [[CONV]], [[MUL_RESULT]]
958
+ ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], [[CONV]]
959
+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
960
+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = or i1 [[IDENT_CHECK]], [[TMP10]]
961
+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
962
+ ; CHECK-INTERLEAVED: vector.ph:
963
+ ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
964
+ ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
965
+ ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
966
+ ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i32 [[CONV]], [[TMP12]]
967
+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
968
+ ; CHECK-INTERLEAVED: vector.body:
969
+ ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
970
+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[A_PROMOTED]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
971
+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
972
+ ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul i32 [[INDEX]], 4
973
+ ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[CONV]], [[TMP14]]
974
+ ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 4
975
+ ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET_IDX]] to i64
976
+ ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext i32 [[TMP15]] to i64
977
+ ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP16]]
978
+ ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP17]]
979
+ ; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP18]], align 1
980
+ ; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP19]], align 1
981
+ ; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[VEC_PHI]], [[CONV1]]
982
+ ; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add i32 [[VEC_PHI1]], [[CONV1]]
983
+ ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
984
+ ; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
985
+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
986
+ ; CHECK-INTERLEAVED: middle.block:
987
+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP21]], [[TMP20]]
988
+ ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
989
+ ; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
990
+ ; CHECK-INTERLEAVED: scalar.ph:
991
+ ;
992
+ ; CHECK-MAXBW-LABEL: define void @add_of_zext_outside_loop(
993
+ ; CHECK-MAXBW-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
994
+ ; CHECK-MAXBW-NEXT: entry:
995
+ ; CHECK-MAXBW-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
996
+ ; CHECK-MAXBW-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
997
+ ; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
998
+ ; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
999
+ ; CHECK-MAXBW: for.body:
1000
+ ; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1001
+ ; CHECK-MAXBW-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
1002
+ ; CHECK-MAXBW-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
1003
+ ; CHECK-MAXBW-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
1004
+ ; CHECK-MAXBW-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
1005
+ ; CHECK-MAXBW-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
1006
+ ; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
1007
+ ; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
1008
+ ; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
1009
+ ; CHECK-MAXBW: exit:
1010
+ ; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
1011
+ ; CHECK-MAXBW-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
1012
+ ; CHECK-MAXBW-NEXT: ret void
1013
+ ;
1014
+ entry:
1015
+ %conv = zext i8 %d to i32
1016
+ %a.promoted = load i32 , ptr %a , align 1
1017
+ %conv1 = zext i8 %c to i32
1018
+ br label %for.body
1019
+
1020
+ for.body:
1021
+ %iv = phi i32 [ %conv , %entry ], [ %iv.next , %for.body ]
1022
+ %rdx = phi i32 [ %a.promoted , %entry ], [ %rdx.next , %for.body ]
1023
+ %idxprom = sext i32 %iv to i64
1024
+ %arrayidx = getelementptr inbounds [0 x i8 ], ptr %b , i64 0 , i64 %idxprom
1025
+ store i8 0 , ptr %arrayidx , align 1
1026
+ %rdx.next = add nsw i32 %rdx , %conv1
1027
+ %iv.next = add i32 %iv , 4
1028
+ %cmp = icmp eq i32 %iv.next , 0
1029
+ br i1 %cmp , label %exit , label %for.body
1030
+
1031
+ exit:
1032
+ %add.lcssa = phi i32 [ %rdx.next , %for.body ]
1033
+ store i32 %add.lcssa , ptr %a , align 4
1034
+ ret void
1035
+ }
1036
+
1037
+ define void @add_of_loop_invariant_zext (ptr noalias %a , ptr noalias %b , i8 %c , i8 %d ) #0 {
1038
+ ; CHECK-INTERLEAVE1-LABEL: define void @add_of_loop_invariant_zext(
1039
+ ; CHECK-INTERLEAVE1-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
1040
+ ; CHECK-INTERLEAVE1-NEXT: entry:
1041
+ ; CHECK-INTERLEAVE1-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
1042
+ ; CHECK-INTERLEAVE1-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
1043
+ ; CHECK-INTERLEAVE1-NEXT: br label [[FOR_BODY:%.*]]
1044
+ ; CHECK-INTERLEAVE1: for.body:
1045
+ ; CHECK-INTERLEAVE1-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1046
+ ; CHECK-INTERLEAVE1-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
1047
+ ; CHECK-INTERLEAVE1-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
1048
+ ; CHECK-INTERLEAVE1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
1049
+ ; CHECK-INTERLEAVE1-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
1050
+ ; CHECK-INTERLEAVE1-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
1051
+ ; CHECK-INTERLEAVE1-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
1052
+ ; CHECK-INTERLEAVE1-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
1053
+ ; CHECK-INTERLEAVE1-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
1054
+ ; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
1055
+ ; CHECK-INTERLEAVE1: exit:
1056
+ ; CHECK-INTERLEAVE1-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
1057
+ ; CHECK-INTERLEAVE1-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
1058
+ ; CHECK-INTERLEAVE1-NEXT: ret void
1059
+ ;
1060
+ ; CHECK-INTERLEAVED-LABEL: define void @add_of_loop_invariant_zext(
1061
+ ; CHECK-INTERLEAVED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
1062
+ ; CHECK-INTERLEAVED-NEXT: entry:
1063
+ ; CHECK-INTERLEAVED-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
1064
+ ; CHECK-INTERLEAVED-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
1065
+ ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = sub i32 -4, [[CONV]]
1066
+ ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2
1067
+ ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1
1068
+ ; CHECK-INTERLEAVED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1069
+ ; CHECK-INTERLEAVED: vector.scevcheck:
1070
+ ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = trunc i8 [[D]] to i2
1071
+ ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub i2 0, [[TMP3]]
1072
+ ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext i2 [[TMP4]] to i32
1073
+ ; CHECK-INTERLEAVED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[TMP5]], 0
1074
+ ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sub i32 -4, [[CONV]]
1075
+ ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 2
1076
+ ; CHECK-INTERLEAVED-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 4, i32 [[TMP7]])
1077
+ ; CHECK-INTERLEAVED-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
1078
+ ; CHECK-INTERLEAVED-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
1079
+ ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = add i32 [[CONV]], [[MUL_RESULT]]
1080
+ ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], [[CONV]]
1081
+ ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
1082
+ ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = or i1 [[IDENT_CHECK]], [[TMP10]]
1083
+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1084
+ ; CHECK-INTERLEAVED: vector.ph:
1085
+ ; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
1086
+ ; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
1087
+ ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul i32 [[N_VEC]], 4
1088
+ ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = add i32 [[CONV]], [[TMP12]]
1089
+ ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i8 [[C]] to i32
1090
+ ; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
1091
+ ; CHECK-INTERLEAVED: vector.body:
1092
+ ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1093
+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[A_PROMOTED]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
1094
+ ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
1095
+ ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = mul i32 [[INDEX]], 4
1096
+ ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[CONV]], [[TMP15]]
1097
+ ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 4
1098
+ ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext i32 [[OFFSET_IDX]] to i64
1099
+ ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext i32 [[TMP16]] to i64
1100
+ ; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP17]]
1101
+ ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[TMP18]]
1102
+ ; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP19]], align 1
1103
+ ; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP20]], align 1
1104
+ ; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add i32 [[VEC_PHI]], [[TMP14]]
1105
+ ; CHECK-INTERLEAVED-NEXT: [[TMP22]] = add i32 [[VEC_PHI1]], [[TMP14]]
1106
+ ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
1107
+ ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1108
+ ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1109
+ ; CHECK-INTERLEAVED: middle.block:
1110
+ ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP22]], [[TMP21]]
1111
+ ; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
1112
+ ; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1113
+ ; CHECK-INTERLEAVED: scalar.ph:
1114
+ ;
1115
+ ; CHECK-MAXBW-LABEL: define void @add_of_loop_invariant_zext(
1116
+ ; CHECK-MAXBW-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i8 [[C:%.*]], i8 [[D:%.*]]) #[[ATTR0]] {
1117
+ ; CHECK-MAXBW-NEXT: entry:
1118
+ ; CHECK-MAXBW-NEXT: [[CONV:%.*]] = zext i8 [[D]] to i32
1119
+ ; CHECK-MAXBW-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1
1120
+ ; CHECK-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
1121
+ ; CHECK-MAXBW: for.body:
1122
+ ; CHECK-MAXBW-NEXT: [[IV:%.*]] = phi i32 [ [[CONV]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1123
+ ; CHECK-MAXBW-NEXT: [[RDX:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY]] ], [ [[RDX_NEXT:%.*]], [[FOR_BODY]] ]
1124
+ ; CHECK-MAXBW-NEXT: [[IDXPROM:%.*]] = sext i32 [[IV]] to i64
1125
+ ; CHECK-MAXBW-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [0 x i8], ptr [[B]], i64 0, i64 [[IDXPROM]]
1126
+ ; CHECK-MAXBW-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
1127
+ ; CHECK-MAXBW-NEXT: [[CONV1:%.*]] = zext i8 [[C]] to i32
1128
+ ; CHECK-MAXBW-NEXT: [[RDX_NEXT]] = add nsw i32 [[RDX]], [[CONV1]]
1129
+ ; CHECK-MAXBW-NEXT: [[IV_NEXT]] = add i32 [[IV]], 4
1130
+ ; CHECK-MAXBW-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV_NEXT]], 0
1131
+ ; CHECK-MAXBW-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
1132
+ ; CHECK-MAXBW: exit:
1133
+ ; CHECK-MAXBW-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[FOR_BODY]] ]
1134
+ ; CHECK-MAXBW-NEXT: store i32 [[ADD_LCSSA]], ptr [[A]], align 4
1135
+ ; CHECK-MAXBW-NEXT: ret void
1136
+ ;
1137
+ entry:
1138
+ %conv = zext i8 %d to i32
1139
+ %a.promoted = load i32 , ptr %a , align 1
1140
+ br label %for.body
1141
+
1142
+ for.body:
1143
+ %iv = phi i32 [ %conv , %entry ], [ %iv.next , %for.body ]
1144
+ %rdx = phi i32 [ %a.promoted , %entry ], [ %rdx.next , %for.body ]
1145
+ %idxprom = sext i32 %iv to i64
1146
+ %arrayidx = getelementptr inbounds [0 x i8 ], ptr %b , i64 0 , i64 %idxprom
1147
+ store i8 0 , ptr %arrayidx , align 1
1148
+ %conv1 = zext i8 %c to i32
1149
+ %rdx.next = add nsw i32 %rdx , %conv1
1150
+ %iv.next = add i32 %iv , 4
1151
+ %cmp = icmp eq i32 %iv.next , 0
1152
+ br i1 %cmp , label %exit , label %for.body
1153
+
1154
+ exit:
1155
+ %add.lcssa = phi i32 [ %rdx.next , %for.body ]
1156
+ store i32 %add.lcssa , ptr %a , align 4
1157
+ ret void
1158
+ }
1159
+
1160
+
914
1161
!0 = distinct !{!0 , !1 }
915
1162
!1 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
916
1163
attributes #0 = { vscale_range(1 ,16 ) "target-features" ="+sve" }
0 commit comments