@@ -128,6 +128,34 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
128128 ret <1 x bfloat> %ret
129129}
130130
131+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
132+ ; CHECK3-LABEL: atomic_vec1_ptr:
133+ ; CHECK3: ## %bb.0:
134+ ; CHECK3-NEXT: pushq %rax
135+ ; CHECK3-NEXT: movq %rdi, %rsi
136+ ; CHECK3-NEXT: movq %rsp, %rdx
137+ ; CHECK3-NEXT: movl $8, %edi
138+ ; CHECK3-NEXT: movl $2, %ecx
139+ ; CHECK3-NEXT: callq ___atomic_load
140+ ; CHECK3-NEXT: movq (%rsp), %rax
141+ ; CHECK3-NEXT: popq %rcx
142+ ; CHECK3-NEXT: retq
143+ ;
144+ ; CHECK0-LABEL: atomic_vec1_ptr:
145+ ; CHECK0: ## %bb.0:
146+ ; CHECK0-NEXT: pushq %rax
147+ ; CHECK0-NEXT: movq %rdi, %rsi
148+ ; CHECK0-NEXT: movl $8, %edi
149+ ; CHECK0-NEXT: movq %rsp, %rdx
150+ ; CHECK0-NEXT: movl $2, %ecx
151+ ; CHECK0-NEXT: callq ___atomic_load
152+ ; CHECK0-NEXT: movq (%rsp), %rax
153+ ; CHECK0-NEXT: popq %rcx
154+ ; CHECK0-NEXT: retq
155+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
156+ ret <1 x ptr > %ret
157+ }
158+
131159define <1 x half > @atomic_vec1_half (ptr %x ) {
132160; CHECK3-LABEL: atomic_vec1_half:
133161; CHECK3: ## %bb.0:
@@ -155,3 +183,214 @@ define <1 x float> @atomic_vec1_float(ptr %x) {
155183 %ret = load atomic <1 x float >, ptr %x acquire , align 4
156184 ret <1 x float > %ret
157185}
186+
187+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
188+ ; CHECK3-LABEL: atomic_vec1_i64:
189+ ; CHECK3: ## %bb.0:
190+ ; CHECK3-NEXT: pushq %rax
191+ ; CHECK3-NEXT: movq %rdi, %rsi
192+ ; CHECK3-NEXT: movq %rsp, %rdx
193+ ; CHECK3-NEXT: movl $8, %edi
194+ ; CHECK3-NEXT: movl $2, %ecx
195+ ; CHECK3-NEXT: callq ___atomic_load
196+ ; CHECK3-NEXT: movq (%rsp), %rax
197+ ; CHECK3-NEXT: popq %rcx
198+ ; CHECK3-NEXT: retq
199+ ;
200+ ; CHECK0-LABEL: atomic_vec1_i64:
201+ ; CHECK0: ## %bb.0:
202+ ; CHECK0-NEXT: pushq %rax
203+ ; CHECK0-NEXT: movq %rdi, %rsi
204+ ; CHECK0-NEXT: movl $8, %edi
205+ ; CHECK0-NEXT: movq %rsp, %rdx
206+ ; CHECK0-NEXT: movl $2, %ecx
207+ ; CHECK0-NEXT: callq ___atomic_load
208+ ; CHECK0-NEXT: movq (%rsp), %rax
209+ ; CHECK0-NEXT: popq %rcx
210+ ; CHECK0-NEXT: retq
211+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
212+ ret <1 x i64 > %ret
213+ }
214+
215+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
216+ ; CHECK3-LABEL: atomic_vec1_double:
217+ ; CHECK3: ## %bb.0:
218+ ; CHECK3-NEXT: pushq %rax
219+ ; CHECK3-NEXT: movq %rdi, %rsi
220+ ; CHECK3-NEXT: movq %rsp, %rdx
221+ ; CHECK3-NEXT: movl $8, %edi
222+ ; CHECK3-NEXT: movl $2, %ecx
223+ ; CHECK3-NEXT: callq ___atomic_load
224+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
225+ ; CHECK3-NEXT: popq %rax
226+ ; CHECK3-NEXT: retq
227+ ;
228+ ; CHECK0-LABEL: atomic_vec1_double:
229+ ; CHECK0: ## %bb.0:
230+ ; CHECK0-NEXT: pushq %rax
231+ ; CHECK0-NEXT: movq %rdi, %rsi
232+ ; CHECK0-NEXT: movl $8, %edi
233+ ; CHECK0-NEXT: movq %rsp, %rdx
234+ ; CHECK0-NEXT: movl $2, %ecx
235+ ; CHECK0-NEXT: callq ___atomic_load
236+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
237+ ; CHECK0-NEXT: popq %rax
238+ ; CHECK0-NEXT: retq
239+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
240+ ret <1 x double > %ret
241+ }
242+
243+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
244+ ; CHECK3-LABEL: atomic_vec2_i32:
245+ ; CHECK3: ## %bb.0:
246+ ; CHECK3-NEXT: pushq %rax
247+ ; CHECK3-NEXT: movq %rdi, %rsi
248+ ; CHECK3-NEXT: movq %rsp, %rdx
249+ ; CHECK3-NEXT: movl $8, %edi
250+ ; CHECK3-NEXT: movl $2, %ecx
251+ ; CHECK3-NEXT: callq ___atomic_load
252+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
253+ ; CHECK3-NEXT: popq %rax
254+ ; CHECK3-NEXT: retq
255+ ;
256+ ; CHECK0-LABEL: atomic_vec2_i32:
257+ ; CHECK0: ## %bb.0:
258+ ; CHECK0-NEXT: pushq %rax
259+ ; CHECK0-NEXT: movq %rdi, %rsi
260+ ; CHECK0-NEXT: movl $8, %edi
261+ ; CHECK0-NEXT: movq %rsp, %rdx
262+ ; CHECK0-NEXT: movl $2, %ecx
263+ ; CHECK0-NEXT: callq ___atomic_load
264+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
265+ ; CHECK0-NEXT: popq %rax
266+ ; CHECK0-NEXT: retq
267+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
268+ ret <2 x i32 > %ret
269+ }
270+
271+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
272+ ; CHECK3-LABEL: atomic_vec4_float:
273+ ; CHECK3: ## %bb.0:
274+ ; CHECK3-NEXT: subq $24, %rsp
275+ ; CHECK3-NEXT: movq %rdi, %rsi
276+ ; CHECK3-NEXT: movq %rsp, %rdx
277+ ; CHECK3-NEXT: movl $16, %edi
278+ ; CHECK3-NEXT: movl $2, %ecx
279+ ; CHECK3-NEXT: callq ___atomic_load
280+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
281+ ; CHECK3-NEXT: addq $24, %rsp
282+ ; CHECK3-NEXT: retq
283+ ;
284+ ; CHECK0-LABEL: atomic_vec4_float:
285+ ; CHECK0: ## %bb.0:
286+ ; CHECK0-NEXT: subq $24, %rsp
287+ ; CHECK0-NEXT: movq %rdi, %rsi
288+ ; CHECK0-NEXT: movl $16, %edi
289+ ; CHECK0-NEXT: movq %rsp, %rdx
290+ ; CHECK0-NEXT: movl $2, %ecx
291+ ; CHECK0-NEXT: callq ___atomic_load
292+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
293+ ; CHECK0-NEXT: addq $24, %rsp
294+ ; CHECK0-NEXT: retq
295+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
296+ ret <4 x float > %ret
297+ }
298+
299+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
300+ ; CHECK3-LABEL: atomic_vec8_double:
301+ ; CHECK3: ## %bb.0:
302+ ; CHECK3-NEXT: subq $72, %rsp
303+ ; CHECK3-NEXT: movq %rdi, %rsi
304+ ; CHECK3-NEXT: movq %rsp, %rdx
305+ ; CHECK3-NEXT: movl $64, %edi
306+ ; CHECK3-NEXT: movl $2, %ecx
307+ ; CHECK3-NEXT: callq ___atomic_load
308+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
309+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
310+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
311+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
312+ ; CHECK3-NEXT: addq $72, %rsp
313+ ; CHECK3-NEXT: retq
314+ ;
315+ ; CHECK0-LABEL: atomic_vec8_double:
316+ ; CHECK0: ## %bb.0:
317+ ; CHECK0-NEXT: subq $72, %rsp
318+ ; CHECK0-NEXT: movq %rdi, %rsi
319+ ; CHECK0-NEXT: movl $64, %edi
320+ ; CHECK0-NEXT: movq %rsp, %rdx
321+ ; CHECK0-NEXT: movl $2, %ecx
322+ ; CHECK0-NEXT: callq ___atomic_load
323+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
324+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
325+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
326+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
327+ ; CHECK0-NEXT: addq $72, %rsp
328+ ; CHECK0-NEXT: retq
329+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
330+ ret <8 x double > %ret
331+ }
332+
333+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
334+ ; CHECK3-LABEL: atomic_vec16_bfloat:
335+ ; CHECK3: ## %bb.0:
336+ ; CHECK3-NEXT: subq $40, %rsp
337+ ; CHECK3-NEXT: movq %rdi, %rsi
338+ ; CHECK3-NEXT: movq %rsp, %rdx
339+ ; CHECK3-NEXT: movl $32, %edi
340+ ; CHECK3-NEXT: movl $2, %ecx
341+ ; CHECK3-NEXT: callq ___atomic_load
342+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
343+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
344+ ; CHECK3-NEXT: addq $40, %rsp
345+ ; CHECK3-NEXT: retq
346+ ;
347+ ; CHECK0-LABEL: atomic_vec16_bfloat:
348+ ; CHECK0: ## %bb.0:
349+ ; CHECK0-NEXT: subq $40, %rsp
350+ ; CHECK0-NEXT: movq %rdi, %rsi
351+ ; CHECK0-NEXT: movl $32, %edi
352+ ; CHECK0-NEXT: movq %rsp, %rdx
353+ ; CHECK0-NEXT: movl $2, %ecx
354+ ; CHECK0-NEXT: callq ___atomic_load
355+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
356+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
357+ ; CHECK0-NEXT: addq $40, %rsp
358+ ; CHECK0-NEXT: retq
359+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
360+ ret <16 x bfloat> %ret
361+ }
362+
363+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
364+ ; CHECK3-LABEL: atomic_vec32_half:
365+ ; CHECK3: ## %bb.0:
366+ ; CHECK3-NEXT: subq $72, %rsp
367+ ; CHECK3-NEXT: movq %rdi, %rsi
368+ ; CHECK3-NEXT: movq %rsp, %rdx
369+ ; CHECK3-NEXT: movl $64, %edi
370+ ; CHECK3-NEXT: movl $2, %ecx
371+ ; CHECK3-NEXT: callq ___atomic_load
372+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
373+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
374+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
375+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
376+ ; CHECK3-NEXT: addq $72, %rsp
377+ ; CHECK3-NEXT: retq
378+ ;
379+ ; CHECK0-LABEL: atomic_vec32_half:
380+ ; CHECK0: ## %bb.0:
381+ ; CHECK0-NEXT: subq $72, %rsp
382+ ; CHECK0-NEXT: movq %rdi, %rsi
383+ ; CHECK0-NEXT: movl $64, %edi
384+ ; CHECK0-NEXT: movq %rsp, %rdx
385+ ; CHECK0-NEXT: movl $2, %ecx
386+ ; CHECK0-NEXT: callq ___atomic_load
387+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
388+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
389+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
390+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
391+ ; CHECK0-NEXT: addq $72, %rsp
392+ ; CHECK0-NEXT: retq
393+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
394+ ret <32 x half > %ret
395+ }
396+
0 commit comments