@@ -207,46 +207,43 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
207207; CHECK-O3-LABEL: atomic_vec1_bfloat:
208208; CHECK-O3: # %bb.0:
209209; CHECK-O3-NEXT: movzwl (%rdi), %eax
210- ; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
210+ ; CHECK-O3-NEXT: movd %eax, %xmm0
211211; CHECK-O3-NEXT: retq
212212;
213213; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
214214; CHECK-SSE-O3: # %bb.0:
215215; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
216- ; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
216+ ; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
217217; CHECK-SSE-O3-NEXT: retq
218218;
219219; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
220220; CHECK-AVX-O3: # %bb.0:
221221; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
222- ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0 , %xmm0
222+ ; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
223223; CHECK-AVX-O3-NEXT: retq
224224;
225225; CHECK-O0-LABEL: atomic_vec1_bfloat:
226226; CHECK-O0: # %bb.0:
227227; CHECK-O0-NEXT: movw (%rdi), %cx
228228; CHECK-O0-NEXT: # implicit-def: $eax
229229; CHECK-O0-NEXT: movw %cx, %ax
230- ; CHECK-O0-NEXT: # implicit-def: $xmm0
231- ; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
230+ ; CHECK-O0-NEXT: movd %eax, %xmm0
232231; CHECK-O0-NEXT: retq
233232;
234233; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
235234; CHECK-SSE-O0: # %bb.0:
236235; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
237236; CHECK-SSE-O0-NEXT: # implicit-def: $eax
238237; CHECK-SSE-O0-NEXT: movw %cx, %ax
239- ; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
240- ; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
238+ ; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
241239; CHECK-SSE-O0-NEXT: retq
242240;
243241; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
244242; CHECK-AVX-O0: # %bb.0:
245243; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
246244; CHECK-AVX-O0-NEXT: # implicit-def: $eax
247245; CHECK-AVX-O0-NEXT: movw %cx, %ax
248- ; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
249- ; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
246+ ; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
250247; CHECK-AVX-O0-NEXT: retq
251248 %ret = load atomic <1 x bfloat>, ptr %x acquire , align 2
252249 ret <1 x bfloat> %ret
@@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
377374 ret <2 x float > %ret
378375}
379376
377+ define <2 x half > @atomic_vec2_half (ptr %x ) {
378+ ; CHECK-O3-LABEL: atomic_vec2_half:
379+ ; CHECK-O3: # %bb.0:
380+ ; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
381+ ; CHECK-O3-NEXT: retq
382+ ;
383+ ; CHECK-SSE-O3-LABEL: atomic_vec2_half:
384+ ; CHECK-SSE-O3: # %bb.0:
385+ ; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386+ ; CHECK-SSE-O3-NEXT: retq
387+ ;
388+ ; CHECK-AVX-O3-LABEL: atomic_vec2_half:
389+ ; CHECK-AVX-O3: # %bb.0:
390+ ; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
391+ ; CHECK-AVX-O3-NEXT: retq
392+ ;
393+ ; CHECK-O0-LABEL: atomic_vec2_half:
394+ ; CHECK-O0: # %bb.0:
395+ ; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
396+ ; CHECK-O0-NEXT: retq
397+ ;
398+ ; CHECK-SSE-O0-LABEL: atomic_vec2_half:
399+ ; CHECK-SSE-O0: # %bb.0:
400+ ; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
401+ ; CHECK-SSE-O0-NEXT: retq
402+ ;
403+ ; CHECK-AVX-O0-LABEL: atomic_vec2_half:
404+ ; CHECK-AVX-O0: # %bb.0:
405+ ; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
406+ ; CHECK-AVX-O0-NEXT: retq
407+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
408+ ret <2 x half > %ret
409+ }
410+
411+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
412+ ; CHECK-O3-LABEL: atomic_vec2_bfloat:
413+ ; CHECK-O3: # %bb.0:
414+ ; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
415+ ; CHECK-O3-NEXT: retq
416+ ;
417+ ; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
418+ ; CHECK-SSE-O3: # %bb.0:
419+ ; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
420+ ; CHECK-SSE-O3-NEXT: retq
421+ ;
422+ ; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
423+ ; CHECK-AVX-O3: # %bb.0:
424+ ; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
425+ ; CHECK-AVX-O3-NEXT: retq
426+ ;
427+ ; CHECK-O0-LABEL: atomic_vec2_bfloat:
428+ ; CHECK-O0: # %bb.0:
429+ ; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
430+ ; CHECK-O0-NEXT: retq
431+ ;
432+ ; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
433+ ; CHECK-SSE-O0: # %bb.0:
434+ ; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
435+ ; CHECK-SSE-O0-NEXT: retq
436+ ;
437+ ; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
438+ ; CHECK-AVX-O0: # %bb.0:
439+ ; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
440+ ; CHECK-AVX-O0-NEXT: retq
441+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
442+ ret <2 x bfloat> %ret
443+ }
444+
380445define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
381446; CHECK-O3-LABEL: atomic_vec1_ptr:
382447; CHECK-O3: # %bb.0:
@@ -457,46 +522,43 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
457522; CHECK-O3-LABEL: atomic_vec1_half:
458523; CHECK-O3: # %bb.0:
459524; CHECK-O3-NEXT: movzwl (%rdi), %eax
460- ; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
525+ ; CHECK-O3-NEXT: movd %eax, %xmm0
461526; CHECK-O3-NEXT: retq
462527;
463528; CHECK-SSE-O3-LABEL: atomic_vec1_half:
464529; CHECK-SSE-O3: # %bb.0:
465530; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
466- ; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
531+ ; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
467532; CHECK-SSE-O3-NEXT: retq
468533;
469534; CHECK-AVX-O3-LABEL: atomic_vec1_half:
470535; CHECK-AVX-O3: # %bb.0:
471536; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
472- ; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0 , %xmm0
537+ ; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
473538; CHECK-AVX-O3-NEXT: retq
474539;
475540; CHECK-O0-LABEL: atomic_vec1_half:
476541; CHECK-O0: # %bb.0:
477542; CHECK-O0-NEXT: movw (%rdi), %cx
478543; CHECK-O0-NEXT: # implicit-def: $eax
479544; CHECK-O0-NEXT: movw %cx, %ax
480- ; CHECK-O0-NEXT: # implicit-def: $xmm0
481- ; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
545+ ; CHECK-O0-NEXT: movd %eax, %xmm0
482546; CHECK-O0-NEXT: retq
483547;
484548; CHECK-SSE-O0-LABEL: atomic_vec1_half:
485549; CHECK-SSE-O0: # %bb.0:
486550; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
487551; CHECK-SSE-O0-NEXT: # implicit-def: $eax
488552; CHECK-SSE-O0-NEXT: movw %cx, %ax
489- ; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
490- ; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
553+ ; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
491554; CHECK-SSE-O0-NEXT: retq
492555;
493556; CHECK-AVX-O0-LABEL: atomic_vec1_half:
494557; CHECK-AVX-O0: # %bb.0:
495558; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
496559; CHECK-AVX-O0-NEXT: # implicit-def: $eax
497560; CHECK-AVX-O0-NEXT: movw %cx, %ax
498- ; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
499- ; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
561+ ; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
500562; CHECK-AVX-O0-NEXT: retq
501563 %ret = load atomic <1 x half >, ptr %x acquire , align 2
502564 ret <1 x half > %ret
@@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
841903 ret <4 x i16 > %ret
842904}
843905
906+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
907+ ; CHECK-LABEL: atomic_vec4_half:
908+ ; CHECK: # %bb.0:
909+ ; CHECK-NEXT: movq (%rdi), %xmm0
910+ ; CHECK-NEXT: retq
911+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
912+ ret <4 x half > %ret
913+ }
914+
915+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
916+ ; CHECK-LABEL: atomic_vec4_bfloat:
917+ ; CHECK: # %bb.0:
918+ ; CHECK-NEXT: movq (%rdi), %xmm0
919+ ; CHECK-NEXT: retq
920+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
921+ ret <4 x bfloat> %ret
922+ }
923+
924+ define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
925+ ; CHECK-O3-LABEL: atomic_vec4_float_align:
926+ ; CHECK-O3: # %bb.0:
927+ ; CHECK-O3-NEXT: pushq %rax
928+ ; CHECK-O3-NEXT: movl $2, %esi
929+ ; CHECK-O3-NEXT: callq __atomic_load_16@PLT
930+ ; CHECK-O3-NEXT: movq %rdx, %xmm1
931+ ; CHECK-O3-NEXT: movq %rax, %xmm0
932+ ; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
933+ ; CHECK-O3-NEXT: popq %rax
934+ ; CHECK-O3-NEXT: retq
935+ ;
936+ ; CHECK-SSE-O3-LABEL: atomic_vec4_float_align:
937+ ; CHECK-SSE-O3: # %bb.0:
938+ ; CHECK-SSE-O3-NEXT: pushq %rbx
939+ ; CHECK-SSE-O3-NEXT: xorl %eax, %eax
940+ ; CHECK-SSE-O3-NEXT: xorl %edx, %edx
941+ ; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx
942+ ; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx
943+ ; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi)
944+ ; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1
945+ ; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
946+ ; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
947+ ; CHECK-SSE-O3-NEXT: popq %rbx
948+ ; CHECK-SSE-O3-NEXT: retq
949+ ;
950+ ; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
951+ ; CHECK-AVX-O3: # %bb.0:
952+ ; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
953+ ; CHECK-AVX-O3-NEXT: retq
954+ ;
955+ ; CHECK-O0-LABEL: atomic_vec4_float_align:
956+ ; CHECK-O0: # %bb.0:
957+ ; CHECK-O0-NEXT: pushq %rax
958+ ; CHECK-O0-NEXT: movl $2, %esi
959+ ; CHECK-O0-NEXT: callq __atomic_load_16@PLT
960+ ; CHECK-O0-NEXT: movq %rdx, %xmm1
961+ ; CHECK-O0-NEXT: movq %rax, %xmm0
962+ ; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
963+ ; CHECK-O0-NEXT: popq %rax
964+ ; CHECK-O0-NEXT: retq
965+ ;
966+ ; CHECK-SSE-O0-LABEL: atomic_vec4_float_align:
967+ ; CHECK-SSE-O0: # %bb.0:
968+ ; CHECK-SSE-O0-NEXT: pushq %rbx
969+ ; CHECK-SSE-O0-NEXT: xorl %eax, %eax
970+ ; CHECK-SSE-O0-NEXT: movl %eax, %ebx
971+ ; CHECK-SSE-O0-NEXT: movq %rbx, %rax
972+ ; CHECK-SSE-O0-NEXT: movq %rbx, %rdx
973+ ; CHECK-SSE-O0-NEXT: movq %rbx, %rcx
974+ ; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi)
975+ ; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1
976+ ; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
977+ ; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
978+ ; CHECK-SSE-O0-NEXT: popq %rbx
979+ ; CHECK-SSE-O0-NEXT: retq
980+ ;
981+ ; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
982+ ; CHECK-AVX-O0: # %bb.0:
983+ ; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
984+ ; CHECK-AVX-O0-NEXT: retq
985+ %ret = load atomic <4 x float >, ptr %x acquire , align 16
986+ ret <4 x float > %ret
987+ }
988+
844989define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
845990; CHECK-O3-LABEL: atomic_vec4_float:
846991; CHECK-O3: # %bb.0:
0 commit comments