@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146  ret  <1  x i64 > %ret 
147147}
148148
149+ define  <1  x ptr > @atomic_vec1_ptr (ptr  %x ) nounwind  {
150+ ; CHECK3-LABEL: atomic_vec1_ptr: 
151+ ; CHECK3:       ## %bb.0: 
152+ ; CHECK3-NEXT:    pushq %rax 
153+ ; CHECK3-NEXT:    movq %rdi, %rsi 
154+ ; CHECK3-NEXT:    movq %rsp, %rdx 
155+ ; CHECK3-NEXT:    movl $8, %edi 
156+ ; CHECK3-NEXT:    movl $2, %ecx 
157+ ; CHECK3-NEXT:    callq ___atomic_load 
158+ ; CHECK3-NEXT:    movq (%rsp), %rax 
159+ ; CHECK3-NEXT:    popq %rcx 
160+ ; CHECK3-NEXT:    retq 
161+ ; 
162+ ; CHECK0-LABEL: atomic_vec1_ptr: 
163+ ; CHECK0:       ## %bb.0: 
164+ ; CHECK0-NEXT:    pushq %rax 
165+ ; CHECK0-NEXT:    movq %rdi, %rsi 
166+ ; CHECK0-NEXT:    movl $8, %edi 
167+ ; CHECK0-NEXT:    movq %rsp, %rdx 
168+ ; CHECK0-NEXT:    movl $2, %ecx 
169+ ; CHECK0-NEXT:    callq ___atomic_load 
170+ ; CHECK0-NEXT:    movq (%rsp), %rax 
171+ ; CHECK0-NEXT:    popq %rcx 
172+ ; CHECK0-NEXT:    retq 
173+   %ret  = load  atomic  <1  x ptr >, ptr  %x  acquire , align  4 
174+   ret  <1  x ptr > %ret 
175+ }
176+ 
149177define  <1  x half > @atomic_vec1_half (ptr  %x ) {
150178; CHECK3-LABEL: atomic_vec1_half: 
151179; CHECK3:       ## %bb.0: 
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
182210  %ret  = load  atomic  <1  x double >, ptr  %x  acquire , align  8 
183211  ret  <1  x double > %ret 
184212}
213+ 
214+ define  <1  x i64 > @atomic_vec1_i64 (ptr  %x ) nounwind  {
215+ ; CHECK3-LABEL: atomic_vec1_i64: 
216+ ; CHECK3:       ## %bb.0: 
217+ ; CHECK3-NEXT:    pushq %rax 
218+ ; CHECK3-NEXT:    movq %rdi, %rsi 
219+ ; CHECK3-NEXT:    movq %rsp, %rdx 
220+ ; CHECK3-NEXT:    movl $8, %edi 
221+ ; CHECK3-NEXT:    movl $2, %ecx 
222+ ; CHECK3-NEXT:    callq ___atomic_load 
223+ ; CHECK3-NEXT:    movq (%rsp), %rax 
224+ ; CHECK3-NEXT:    popq %rcx 
225+ ; CHECK3-NEXT:    retq 
226+ ; 
227+ ; CHECK0-LABEL: atomic_vec1_i64: 
228+ ; CHECK0:       ## %bb.0: 
229+ ; CHECK0-NEXT:    pushq %rax 
230+ ; CHECK0-NEXT:    movq %rdi, %rsi 
231+ ; CHECK0-NEXT:    movl $8, %edi 
232+ ; CHECK0-NEXT:    movq %rsp, %rdx 
233+ ; CHECK0-NEXT:    movl $2, %ecx 
234+ ; CHECK0-NEXT:    callq ___atomic_load 
235+ ; CHECK0-NEXT:    movq (%rsp), %rax 
236+ ; CHECK0-NEXT:    popq %rcx 
237+ ; CHECK0-NEXT:    retq 
238+   %ret  = load  atomic  <1  x i64 >, ptr  %x  acquire , align  4 
239+   ret  <1  x i64 > %ret 
240+ }
241+ 
242+ define  <1  x double > @atomic_vec1_double (ptr  %x ) nounwind  {
243+ ; CHECK3-LABEL: atomic_vec1_double: 
244+ ; CHECK3:       ## %bb.0: 
245+ ; CHECK3-NEXT:    pushq %rax 
246+ ; CHECK3-NEXT:    movq %rdi, %rsi 
247+ ; CHECK3-NEXT:    movq %rsp, %rdx 
248+ ; CHECK3-NEXT:    movl $8, %edi 
249+ ; CHECK3-NEXT:    movl $2, %ecx 
250+ ; CHECK3-NEXT:    callq ___atomic_load 
251+ ; CHECK3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero 
252+ ; CHECK3-NEXT:    popq %rax 
253+ ; CHECK3-NEXT:    retq 
254+ ; 
255+ ; CHECK0-LABEL: atomic_vec1_double: 
256+ ; CHECK0:       ## %bb.0: 
257+ ; CHECK0-NEXT:    pushq %rax 
258+ ; CHECK0-NEXT:    movq %rdi, %rsi 
259+ ; CHECK0-NEXT:    movl $8, %edi 
260+ ; CHECK0-NEXT:    movq %rsp, %rdx 
261+ ; CHECK0-NEXT:    movl $2, %ecx 
262+ ; CHECK0-NEXT:    callq ___atomic_load 
263+ ; CHECK0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero 
264+ ; CHECK0-NEXT:    popq %rax 
265+ ; CHECK0-NEXT:    retq 
266+   %ret  = load  atomic  <1  x double >, ptr  %x  acquire , align  4 
267+   ret  <1  x double > %ret 
268+ }
269+ 
270+ define  <2  x i32 > @atomic_vec2_i32 (ptr  %x ) nounwind  {
271+ ; CHECK3-LABEL: atomic_vec2_i32: 
272+ ; CHECK3:       ## %bb.0: 
273+ ; CHECK3-NEXT:    pushq %rax 
274+ ; CHECK3-NEXT:    movq %rdi, %rsi 
275+ ; CHECK3-NEXT:    movq %rsp, %rdx 
276+ ; CHECK3-NEXT:    movl $8, %edi 
277+ ; CHECK3-NEXT:    movl $2, %ecx 
278+ ; CHECK3-NEXT:    callq ___atomic_load 
279+ ; CHECK3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero 
280+ ; CHECK3-NEXT:    popq %rax 
281+ ; CHECK3-NEXT:    retq 
282+ ; 
283+ ; CHECK0-LABEL: atomic_vec2_i32: 
284+ ; CHECK0:       ## %bb.0: 
285+ ; CHECK0-NEXT:    pushq %rax 
286+ ; CHECK0-NEXT:    movq %rdi, %rsi 
287+ ; CHECK0-NEXT:    movl $8, %edi 
288+ ; CHECK0-NEXT:    movq %rsp, %rdx 
289+ ; CHECK0-NEXT:    movl $2, %ecx 
290+ ; CHECK0-NEXT:    callq ___atomic_load 
291+ ; CHECK0-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero 
292+ ; CHECK0-NEXT:    popq %rax 
293+ ; CHECK0-NEXT:    retq 
294+   %ret  = load  atomic  <2  x i32 >, ptr  %x  acquire , align  4 
295+   ret  <2  x i32 > %ret 
296+ }
297+ 
298+ define  <4  x float > @atomic_vec4_float_align (ptr  %x ) nounwind  {
299+ ; CHECK-LABEL: atomic_vec4_float_align: 
300+ ; CHECK:       ## %bb.0: 
301+ ; CHECK-NEXT:    pushq %rax 
302+ ; CHECK-NEXT:    movl $2, %esi 
303+ ; CHECK-NEXT:    callq ___atomic_load_16 
304+ ; CHECK-NEXT:    movq %rdx, %xmm1 
305+ ; CHECK-NEXT:    movq %rax, %xmm0 
306+ ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 
307+ ; CHECK-NEXT:    popq %rax 
308+ ; CHECK-NEXT:    retq 
309+   %ret  = load  atomic  <4  x float >, ptr  %x  acquire , align  16 
310+   ret  <4  x float > %ret 
311+ }
312+ 
313+ define  <4  x float > @atomic_vec4_float (ptr  %x ) nounwind  {
314+ ; CHECK3-LABEL: atomic_vec4_float: 
315+ ; CHECK3:       ## %bb.0: 
316+ ; CHECK3-NEXT:    subq $24, %rsp 
317+ ; CHECK3-NEXT:    movq %rdi, %rsi 
318+ ; CHECK3-NEXT:    movq %rsp, %rdx 
319+ ; CHECK3-NEXT:    movl $16, %edi 
320+ ; CHECK3-NEXT:    movl $2, %ecx 
321+ ; CHECK3-NEXT:    callq ___atomic_load 
322+ ; CHECK3-NEXT:    movaps (%rsp), %xmm0 
323+ ; CHECK3-NEXT:    addq $24, %rsp 
324+ ; CHECK3-NEXT:    retq 
325+ ; 
326+ ; CHECK0-LABEL: atomic_vec4_float: 
327+ ; CHECK0:       ## %bb.0: 
328+ ; CHECK0-NEXT:    subq $24, %rsp 
329+ ; CHECK0-NEXT:    movq %rdi, %rsi 
330+ ; CHECK0-NEXT:    movl $16, %edi 
331+ ; CHECK0-NEXT:    movq %rsp, %rdx 
332+ ; CHECK0-NEXT:    movl $2, %ecx 
333+ ; CHECK0-NEXT:    callq ___atomic_load 
334+ ; CHECK0-NEXT:    movaps (%rsp), %xmm0 
335+ ; CHECK0-NEXT:    addq $24, %rsp 
336+ ; CHECK0-NEXT:    retq 
337+   %ret  = load  atomic  <4  x float >, ptr  %x  acquire , align  4 
338+   ret  <4  x float > %ret 
339+ }
340+ 
341+ define  <8  x double > @atomic_vec8_double (ptr  %x ) nounwind  {
342+ ; CHECK3-LABEL: atomic_vec8_double: 
343+ ; CHECK3:       ## %bb.0: 
344+ ; CHECK3-NEXT:    subq $72, %rsp 
345+ ; CHECK3-NEXT:    movq %rdi, %rsi 
346+ ; CHECK3-NEXT:    movq %rsp, %rdx 
347+ ; CHECK3-NEXT:    movl $64, %edi 
348+ ; CHECK3-NEXT:    movl $2, %ecx 
349+ ; CHECK3-NEXT:    callq ___atomic_load 
350+ ; CHECK3-NEXT:    movaps (%rsp), %xmm0 
351+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 
352+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 
353+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3 
354+ ; CHECK3-NEXT:    addq $72, %rsp 
355+ ; CHECK3-NEXT:    retq 
356+ ; 
357+ ; CHECK0-LABEL: atomic_vec8_double: 
358+ ; CHECK0:       ## %bb.0: 
359+ ; CHECK0-NEXT:    subq $72, %rsp 
360+ ; CHECK0-NEXT:    movq %rdi, %rsi 
361+ ; CHECK0-NEXT:    movl $64, %edi 
362+ ; CHECK0-NEXT:    movq %rsp, %rdx 
363+ ; CHECK0-NEXT:    movl $2, %ecx 
364+ ; CHECK0-NEXT:    callq ___atomic_load 
365+ ; CHECK0-NEXT:    movapd (%rsp), %xmm0 
366+ ; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1 
367+ ; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2 
368+ ; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3 
369+ ; CHECK0-NEXT:    addq $72, %rsp 
370+ ; CHECK0-NEXT:    retq 
371+   %ret  = load  atomic  <8  x double >, ptr  %x  acquire , align  4 
372+   ret  <8  x double > %ret 
373+ }
374+ 
375+ define  <16  x bfloat> @atomic_vec16_bfloat (ptr  %x ) nounwind  {
376+ ; CHECK3-LABEL: atomic_vec16_bfloat: 
377+ ; CHECK3:       ## %bb.0: 
378+ ; CHECK3-NEXT:    subq $40, %rsp 
379+ ; CHECK3-NEXT:    movq %rdi, %rsi 
380+ ; CHECK3-NEXT:    movq %rsp, %rdx 
381+ ; CHECK3-NEXT:    movl $32, %edi 
382+ ; CHECK3-NEXT:    movl $2, %ecx 
383+ ; CHECK3-NEXT:    callq ___atomic_load 
384+ ; CHECK3-NEXT:    movaps (%rsp), %xmm0 
385+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 
386+ ; CHECK3-NEXT:    addq $40, %rsp 
387+ ; CHECK3-NEXT:    retq 
388+ ; 
389+ ; CHECK0-LABEL: atomic_vec16_bfloat: 
390+ ; CHECK0:       ## %bb.0: 
391+ ; CHECK0-NEXT:    subq $40, %rsp 
392+ ; CHECK0-NEXT:    movq %rdi, %rsi 
393+ ; CHECK0-NEXT:    movl $32, %edi 
394+ ; CHECK0-NEXT:    movq %rsp, %rdx 
395+ ; CHECK0-NEXT:    movl $2, %ecx 
396+ ; CHECK0-NEXT:    callq ___atomic_load 
397+ ; CHECK0-NEXT:    movaps (%rsp), %xmm0 
398+ ; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 
399+ ; CHECK0-NEXT:    addq $40, %rsp 
400+ ; CHECK0-NEXT:    retq 
401+   %ret  = load  atomic  <16  x bfloat>, ptr  %x  acquire , align  4 
402+   ret  <16  x bfloat> %ret 
403+ }
404+ 
405+ define  <32  x half > @atomic_vec32_half (ptr  %x ) nounwind  {
406+ ; CHECK3-LABEL: atomic_vec32_half: 
407+ ; CHECK3:       ## %bb.0: 
408+ ; CHECK3-NEXT:    subq $72, %rsp 
409+ ; CHECK3-NEXT:    movq %rdi, %rsi 
410+ ; CHECK3-NEXT:    movq %rsp, %rdx 
411+ ; CHECK3-NEXT:    movl $64, %edi 
412+ ; CHECK3-NEXT:    movl $2, %ecx 
413+ ; CHECK3-NEXT:    callq ___atomic_load 
414+ ; CHECK3-NEXT:    movaps (%rsp), %xmm0 
415+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 
416+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 
417+ ; CHECK3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3 
418+ ; CHECK3-NEXT:    addq $72, %rsp 
419+ ; CHECK3-NEXT:    retq 
420+ ; 
421+ ; CHECK0-LABEL: atomic_vec32_half: 
422+ ; CHECK0:       ## %bb.0: 
423+ ; CHECK0-NEXT:    subq $72, %rsp 
424+ ; CHECK0-NEXT:    movq %rdi, %rsi 
425+ ; CHECK0-NEXT:    movl $64, %edi 
426+ ; CHECK0-NEXT:    movq %rsp, %rdx 
427+ ; CHECK0-NEXT:    movl $2, %ecx 
428+ ; CHECK0-NEXT:    callq ___atomic_load 
429+ ; CHECK0-NEXT:    movaps (%rsp), %xmm0 
430+ ; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1 
431+ ; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2 
432+ ; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3 
433+ ; CHECK0-NEXT:    addq $72, %rsp 
434+ ; CHECK0-NEXT:    retq 
435+   %ret  = load  atomic  <32  x half >, ptr  %x  acquire , align  4 
436+   ret  <32  x half > %ret 
437+ }
0 commit comments