@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146 ret <1 x i64 > %ret
147147}
148148
149+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
150+ ; CHECK3-LABEL: atomic_vec1_ptr:
151+ ; CHECK3: ## %bb.0:
152+ ; CHECK3-NEXT: pushq %rax
153+ ; CHECK3-NEXT: movq %rdi, %rsi
154+ ; CHECK3-NEXT: movq %rsp, %rdx
155+ ; CHECK3-NEXT: movl $8, %edi
156+ ; CHECK3-NEXT: movl $2, %ecx
157+ ; CHECK3-NEXT: callq ___atomic_load
158+ ; CHECK3-NEXT: movq (%rsp), %rax
159+ ; CHECK3-NEXT: popq %rcx
160+ ; CHECK3-NEXT: retq
161+ ;
162+ ; CHECK0-LABEL: atomic_vec1_ptr:
163+ ; CHECK0: ## %bb.0:
164+ ; CHECK0-NEXT: pushq %rax
165+ ; CHECK0-NEXT: movq %rdi, %rsi
166+ ; CHECK0-NEXT: movl $8, %edi
167+ ; CHECK0-NEXT: movq %rsp, %rdx
168+ ; CHECK0-NEXT: movl $2, %ecx
169+ ; CHECK0-NEXT: callq ___atomic_load
170+ ; CHECK0-NEXT: movq (%rsp), %rax
171+ ; CHECK0-NEXT: popq %rcx
172+ ; CHECK0-NEXT: retq
173+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
174+ ret <1 x ptr > %ret
175+ }
176+
149177define <1 x half > @atomic_vec1_half (ptr %x ) {
150178; CHECK3-LABEL: atomic_vec1_half:
151179; CHECK3: ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
182210 %ret = load atomic <1 x double >, ptr %x acquire , align 8
183211 ret <1 x double > %ret
184212}
213+
214+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
215+ ; CHECK3-LABEL: atomic_vec1_i64:
216+ ; CHECK3: ## %bb.0:
217+ ; CHECK3-NEXT: pushq %rax
218+ ; CHECK3-NEXT: movq %rdi, %rsi
219+ ; CHECK3-NEXT: movq %rsp, %rdx
220+ ; CHECK3-NEXT: movl $8, %edi
221+ ; CHECK3-NEXT: movl $2, %ecx
222+ ; CHECK3-NEXT: callq ___atomic_load
223+ ; CHECK3-NEXT: movq (%rsp), %rax
224+ ; CHECK3-NEXT: popq %rcx
225+ ; CHECK3-NEXT: retq
226+ ;
227+ ; CHECK0-LABEL: atomic_vec1_i64:
228+ ; CHECK0: ## %bb.0:
229+ ; CHECK0-NEXT: pushq %rax
230+ ; CHECK0-NEXT: movq %rdi, %rsi
231+ ; CHECK0-NEXT: movl $8, %edi
232+ ; CHECK0-NEXT: movq %rsp, %rdx
233+ ; CHECK0-NEXT: movl $2, %ecx
234+ ; CHECK0-NEXT: callq ___atomic_load
235+ ; CHECK0-NEXT: movq (%rsp), %rax
236+ ; CHECK0-NEXT: popq %rcx
237+ ; CHECK0-NEXT: retq
238+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
239+ ret <1 x i64 > %ret
240+ }
241+
242+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
243+ ; CHECK3-LABEL: atomic_vec1_double:
244+ ; CHECK3: ## %bb.0:
245+ ; CHECK3-NEXT: pushq %rax
246+ ; CHECK3-NEXT: movq %rdi, %rsi
247+ ; CHECK3-NEXT: movq %rsp, %rdx
248+ ; CHECK3-NEXT: movl $8, %edi
249+ ; CHECK3-NEXT: movl $2, %ecx
250+ ; CHECK3-NEXT: callq ___atomic_load
251+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
252+ ; CHECK3-NEXT: popq %rax
253+ ; CHECK3-NEXT: retq
254+ ;
255+ ; CHECK0-LABEL: atomic_vec1_double:
256+ ; CHECK0: ## %bb.0:
257+ ; CHECK0-NEXT: pushq %rax
258+ ; CHECK0-NEXT: movq %rdi, %rsi
259+ ; CHECK0-NEXT: movl $8, %edi
260+ ; CHECK0-NEXT: movq %rsp, %rdx
261+ ; CHECK0-NEXT: movl $2, %ecx
262+ ; CHECK0-NEXT: callq ___atomic_load
263+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
264+ ; CHECK0-NEXT: popq %rax
265+ ; CHECK0-NEXT: retq
266+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
267+ ret <1 x double > %ret
268+ }
269+
270+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
271+ ; CHECK3-LABEL: atomic_vec2_i32:
272+ ; CHECK3: ## %bb.0:
273+ ; CHECK3-NEXT: pushq %rax
274+ ; CHECK3-NEXT: movq %rdi, %rsi
275+ ; CHECK3-NEXT: movq %rsp, %rdx
276+ ; CHECK3-NEXT: movl $8, %edi
277+ ; CHECK3-NEXT: movl $2, %ecx
278+ ; CHECK3-NEXT: callq ___atomic_load
279+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
280+ ; CHECK3-NEXT: popq %rax
281+ ; CHECK3-NEXT: retq
282+ ;
283+ ; CHECK0-LABEL: atomic_vec2_i32:
284+ ; CHECK0: ## %bb.0:
285+ ; CHECK0-NEXT: pushq %rax
286+ ; CHECK0-NEXT: movq %rdi, %rsi
287+ ; CHECK0-NEXT: movl $8, %edi
288+ ; CHECK0-NEXT: movq %rsp, %rdx
289+ ; CHECK0-NEXT: movl $2, %ecx
290+ ; CHECK0-NEXT: callq ___atomic_load
291+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
292+ ; CHECK0-NEXT: popq %rax
293+ ; CHECK0-NEXT: retq
294+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
295+ ret <2 x i32 > %ret
296+ }
297+
298+ define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
299+ ; CHECK-LABEL: atomic_vec4_float_align:
300+ ; CHECK: ## %bb.0:
301+ ; CHECK-NEXT: pushq %rax
302+ ; CHECK-NEXT: movl $2, %esi
303+ ; CHECK-NEXT: callq ___atomic_load_16
304+ ; CHECK-NEXT: movq %rdx, %xmm1
305+ ; CHECK-NEXT: movq %rax, %xmm0
306+ ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
307+ ; CHECK-NEXT: popq %rax
308+ ; CHECK-NEXT: retq
309+ %ret = load atomic <4 x float >, ptr %x acquire , align 16
310+ ret <4 x float > %ret
311+ }
312+
313+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
314+ ; CHECK3-LABEL: atomic_vec4_float:
315+ ; CHECK3: ## %bb.0:
316+ ; CHECK3-NEXT: subq $24, %rsp
317+ ; CHECK3-NEXT: movq %rdi, %rsi
318+ ; CHECK3-NEXT: movq %rsp, %rdx
319+ ; CHECK3-NEXT: movl $16, %edi
320+ ; CHECK3-NEXT: movl $2, %ecx
321+ ; CHECK3-NEXT: callq ___atomic_load
322+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
323+ ; CHECK3-NEXT: addq $24, %rsp
324+ ; CHECK3-NEXT: retq
325+ ;
326+ ; CHECK0-LABEL: atomic_vec4_float:
327+ ; CHECK0: ## %bb.0:
328+ ; CHECK0-NEXT: subq $24, %rsp
329+ ; CHECK0-NEXT: movq %rdi, %rsi
330+ ; CHECK0-NEXT: movl $16, %edi
331+ ; CHECK0-NEXT: movq %rsp, %rdx
332+ ; CHECK0-NEXT: movl $2, %ecx
333+ ; CHECK0-NEXT: callq ___atomic_load
334+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
335+ ; CHECK0-NEXT: addq $24, %rsp
336+ ; CHECK0-NEXT: retq
337+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
338+ ret <4 x float > %ret
339+ }
340+
341+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
342+ ; CHECK3-LABEL: atomic_vec8_double:
343+ ; CHECK3: ## %bb.0:
344+ ; CHECK3-NEXT: subq $72, %rsp
345+ ; CHECK3-NEXT: movq %rdi, %rsi
346+ ; CHECK3-NEXT: movq %rsp, %rdx
347+ ; CHECK3-NEXT: movl $64, %edi
348+ ; CHECK3-NEXT: movl $2, %ecx
349+ ; CHECK3-NEXT: callq ___atomic_load
350+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
351+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
352+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
353+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
354+ ; CHECK3-NEXT: addq $72, %rsp
355+ ; CHECK3-NEXT: retq
356+ ;
357+ ; CHECK0-LABEL: atomic_vec8_double:
358+ ; CHECK0: ## %bb.0:
359+ ; CHECK0-NEXT: subq $72, %rsp
360+ ; CHECK0-NEXT: movq %rdi, %rsi
361+ ; CHECK0-NEXT: movl $64, %edi
362+ ; CHECK0-NEXT: movq %rsp, %rdx
363+ ; CHECK0-NEXT: movl $2, %ecx
364+ ; CHECK0-NEXT: callq ___atomic_load
365+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
366+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
367+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
368+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
369+ ; CHECK0-NEXT: addq $72, %rsp
370+ ; CHECK0-NEXT: retq
371+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
372+ ret <8 x double > %ret
373+ }
374+
375+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
376+ ; CHECK3-LABEL: atomic_vec16_bfloat:
377+ ; CHECK3: ## %bb.0:
378+ ; CHECK3-NEXT: subq $40, %rsp
379+ ; CHECK3-NEXT: movq %rdi, %rsi
380+ ; CHECK3-NEXT: movq %rsp, %rdx
381+ ; CHECK3-NEXT: movl $32, %edi
382+ ; CHECK3-NEXT: movl $2, %ecx
383+ ; CHECK3-NEXT: callq ___atomic_load
384+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
385+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
386+ ; CHECK3-NEXT: addq $40, %rsp
387+ ; CHECK3-NEXT: retq
388+ ;
389+ ; CHECK0-LABEL: atomic_vec16_bfloat:
390+ ; CHECK0: ## %bb.0:
391+ ; CHECK0-NEXT: subq $40, %rsp
392+ ; CHECK0-NEXT: movq %rdi, %rsi
393+ ; CHECK0-NEXT: movl $32, %edi
394+ ; CHECK0-NEXT: movq %rsp, %rdx
395+ ; CHECK0-NEXT: movl $2, %ecx
396+ ; CHECK0-NEXT: callq ___atomic_load
397+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
398+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
399+ ; CHECK0-NEXT: addq $40, %rsp
400+ ; CHECK0-NEXT: retq
401+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
402+ ret <16 x bfloat> %ret
403+ }
404+
405+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
406+ ; CHECK3-LABEL: atomic_vec32_half:
407+ ; CHECK3: ## %bb.0:
408+ ; CHECK3-NEXT: subq $72, %rsp
409+ ; CHECK3-NEXT: movq %rdi, %rsi
410+ ; CHECK3-NEXT: movq %rsp, %rdx
411+ ; CHECK3-NEXT: movl $64, %edi
412+ ; CHECK3-NEXT: movl $2, %ecx
413+ ; CHECK3-NEXT: callq ___atomic_load
414+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
415+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
416+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
417+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
418+ ; CHECK3-NEXT: addq $72, %rsp
419+ ; CHECK3-NEXT: retq
420+ ;
421+ ; CHECK0-LABEL: atomic_vec32_half:
422+ ; CHECK0: ## %bb.0:
423+ ; CHECK0-NEXT: subq $72, %rsp
424+ ; CHECK0-NEXT: movq %rdi, %rsi
425+ ; CHECK0-NEXT: movl $64, %edi
426+ ; CHECK0-NEXT: movq %rsp, %rdx
427+ ; CHECK0-NEXT: movl $2, %ecx
428+ ; CHECK0-NEXT: callq ___atomic_load
429+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
430+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
431+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
432+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
433+ ; CHECK0-NEXT: addq $72, %rsp
434+ ; CHECK0-NEXT: retq
435+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
436+ ret <32 x half > %ret
437+ }
0 commit comments