@@ -204,6 +204,76 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
204204  ret  <2  x float > %ret 
205205}
206206
207+ define  <2  x half > @atomic_vec2_half (ptr  %x ) {
208+ ; CHECK3-LABEL: atomic_vec2_half: 
209+ ; CHECK3:       ## %bb.0: 
210+ ; CHECK3-NEXT:    movl (%rdi), %eax 
211+ ; CHECK3-NEXT:    movd %eax, %xmm1 
212+ ; CHECK3-NEXT:    shrl $16, %eax 
213+ ; CHECK3-NEXT:    pinsrw $0, %eax, %xmm2 
214+ ; CHECK3-NEXT:    movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535] 
215+ ; CHECK3-NEXT:    pand %xmm0, %xmm1 
216+ ; CHECK3-NEXT:    pslld $16, %xmm2 
217+ ; CHECK3-NEXT:    pandn %xmm2, %xmm0 
218+ ; CHECK3-NEXT:    por %xmm1, %xmm0 
219+ ; CHECK3-NEXT:    retq 
220+ ; 
221+ ; CHECK0-LABEL: atomic_vec2_half: 
222+ ; CHECK0:       ## %bb.0: 
223+ ; CHECK0-NEXT:    movl (%rdi), %eax 
224+ ; CHECK0-NEXT:    movl %eax, %ecx 
225+ ; CHECK0-NEXT:    shrl $16, %ecx 
226+ ; CHECK0-NEXT:    movw %cx, %dx 
227+ ; CHECK0-NEXT:    ## implicit-def: $ecx 
228+ ; CHECK0-NEXT:    movw %dx, %cx 
229+ ; CHECK0-NEXT:    ## implicit-def: $xmm2 
230+ ; CHECK0-NEXT:    pinsrw $0, %ecx, %xmm2 
231+ ; CHECK0-NEXT:    movd %eax, %xmm0 
232+ ; CHECK0-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535] 
233+ ; CHECK0-NEXT:    pand %xmm1, %xmm0 
234+ ; CHECK0-NEXT:    pslld $16, %xmm2 
235+ ; CHECK0-NEXT:    pandn %xmm2, %xmm1 
236+ ; CHECK0-NEXT:    por %xmm1, %xmm0 
237+ ; CHECK0-NEXT:    retq 
238+   %ret  = load  atomic  <2  x half >, ptr  %x  acquire , align  4 
239+   ret  <2  x half > %ret 
240+ }
241+ 
242+ define  <2  x bfloat> @atomic_vec2_bfloat (ptr  %x ) {
243+ ; CHECK3-LABEL: atomic_vec2_bfloat: 
244+ ; CHECK3:       ## %bb.0: 
245+ ; CHECK3-NEXT:    movl (%rdi), %eax 
246+ ; CHECK3-NEXT:    movd %eax, %xmm1 
247+ ; CHECK3-NEXT:    shrl $16, %eax 
248+ ; CHECK3-NEXT:    movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535] 
249+ ; CHECK3-NEXT:    pand %xmm0, %xmm1 
250+ ; CHECK3-NEXT:    pinsrw $0, %eax, %xmm2 
251+ ; CHECK3-NEXT:    pslld $16, %xmm2 
252+ ; CHECK3-NEXT:    pandn %xmm2, %xmm0 
253+ ; CHECK3-NEXT:    por %xmm1, %xmm0 
254+ ; CHECK3-NEXT:    retq 
255+ ; 
256+ ; CHECK0-LABEL: atomic_vec2_bfloat: 
257+ ; CHECK0:       ## %bb.0: 
258+ ; CHECK0-NEXT:    movl (%rdi), %eax 
259+ ; CHECK0-NEXT:    movl %eax, %ecx 
260+ ; CHECK0-NEXT:    shrl $16, %ecx 
261+ ; CHECK0-NEXT:    ## kill: def $cx killed $cx killed $ecx 
262+ ; CHECK0-NEXT:    movd %eax, %xmm0 
263+ ; CHECK0-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535] 
264+ ; CHECK0-NEXT:    pand %xmm1, %xmm0 
265+ ; CHECK0-NEXT:    ## implicit-def: $eax 
266+ ; CHECK0-NEXT:    movw %cx, %ax 
267+ ; CHECK0-NEXT:    ## implicit-def: $xmm2 
268+ ; CHECK0-NEXT:    pinsrw $0, %eax, %xmm2 
269+ ; CHECK0-NEXT:    pslld $16, %xmm2 
270+ ; CHECK0-NEXT:    pandn %xmm2, %xmm1 
271+ ; CHECK0-NEXT:    por %xmm1, %xmm0 
272+ ; CHECK0-NEXT:    retq 
273+   %ret  = load  atomic  <2  x bfloat>, ptr  %x  acquire , align  4 
274+   ret  <2  x bfloat> %ret 
275+ }
276+ 
207277define  <1  x ptr > @atomic_vec1_ptr (ptr  %x ) nounwind  {
208278; CHECK3-LABEL: atomic_vec1_ptr: 
209279; CHECK3:       ## %bb.0: 
@@ -376,6 +446,107 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
376446  ret  <4  x i16 > %ret 
377447}
378448
449+ define  <4  x half > @atomic_vec4_half (ptr  %x ) nounwind  {
450+ ; CHECK3-LABEL: atomic_vec4_half: 
451+ ; CHECK3:       ## %bb.0: 
452+ ; CHECK3-NEXT:    movq (%rdi), %rax 
453+ ; CHECK3-NEXT:    movl %eax, %ecx 
454+ ; CHECK3-NEXT:    shrl $16, %ecx 
455+ ; CHECK3-NEXT:    pinsrw $0, %ecx, %xmm1 
456+ ; CHECK3-NEXT:    movq %rax, %rcx 
457+ ; CHECK3-NEXT:    shrq $32, %rcx 
458+ ; CHECK3-NEXT:    pinsrw $0, %ecx, %xmm2 
459+ ; CHECK3-NEXT:    movq %rax, %xmm0 
460+ ; CHECK3-NEXT:    shrq $48, %rax 
461+ ; CHECK3-NEXT:    pinsrw $0, %eax, %xmm3 
462+ ; CHECK3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3] 
463+ ; CHECK3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 
464+ ; CHECK3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 
465+ ; CHECK3-NEXT:    retq 
466+ ; 
467+ ; CHECK0-LABEL: atomic_vec4_half: 
468+ ; CHECK0:       ## %bb.0: 
469+ ; CHECK0-NEXT:    movq (%rdi), %rax 
470+ ; CHECK0-NEXT:    movl %eax, %ecx 
471+ ; CHECK0-NEXT:    shrl $16, %ecx 
472+ ; CHECK0-NEXT:    movw %cx, %dx 
473+ ; CHECK0-NEXT:    ## implicit-def: $ecx 
474+ ; CHECK0-NEXT:    movw %dx, %cx 
475+ ; CHECK0-NEXT:    ## implicit-def: $xmm2 
476+ ; CHECK0-NEXT:    pinsrw $0, %ecx, %xmm2 
477+ ; CHECK0-NEXT:    movq %rax, %rcx 
478+ ; CHECK0-NEXT:    shrq $32, %rcx 
479+ ; CHECK0-NEXT:    movw %cx, %dx 
480+ ; CHECK0-NEXT:    ## implicit-def: $ecx 
481+ ; CHECK0-NEXT:    movw %dx, %cx 
482+ ; CHECK0-NEXT:    ## implicit-def: $xmm1 
483+ ; CHECK0-NEXT:    pinsrw $0, %ecx, %xmm1 
484+ ; CHECK0-NEXT:    movq %rax, %rcx 
485+ ; CHECK0-NEXT:    shrq $48, %rcx 
486+ ; CHECK0-NEXT:    movw %cx, %dx 
487+ ; CHECK0-NEXT:    ## implicit-def: $ecx 
488+ ; CHECK0-NEXT:    movw %dx, %cx 
489+ ; CHECK0-NEXT:    ## implicit-def: $xmm3 
490+ ; CHECK0-NEXT:    pinsrw $0, %ecx, %xmm3 
491+ ; CHECK0-NEXT:    movq %rax, %xmm0 
492+ ; CHECK0-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 
493+ ; CHECK0-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 
494+ ; CHECK0-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 
495+ ; CHECK0-NEXT:    retq 
496+   %ret  = load  atomic  <4  x half >, ptr  %x  acquire , align  8 
497+   ret  <4  x half > %ret 
498+ }
499+ 
500+ define  <4  x bfloat> @atomic_vec4_bfloat (ptr  %x ) nounwind  {
501+ ; CHECK3-LABEL: atomic_vec4_bfloat: 
502+ ; CHECK3:       ## %bb.0: 
503+ ; CHECK3-NEXT:    movq (%rdi), %rax 
504+ ; CHECK3-NEXT:    movq %rax, %xmm0 
505+ ; CHECK3-NEXT:    movl %eax, %ecx 
506+ ; CHECK3-NEXT:    shrl $16, %ecx 
507+ ; CHECK3-NEXT:    movq %rax, %rdx 
508+ ; CHECK3-NEXT:    shrq $32, %rdx 
509+ ; CHECK3-NEXT:    shrq $48, %rax 
510+ ; CHECK3-NEXT:    pinsrw $0, %eax, %xmm1 
511+ ; CHECK3-NEXT:    pinsrw $0, %edx, %xmm2 
512+ ; CHECK3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 
513+ ; CHECK3-NEXT:    pinsrw $0, %ecx, %xmm1 
514+ ; CHECK3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 
515+ ; CHECK3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 
516+ ; CHECK3-NEXT:    retq 
517+ ; 
518+ ; CHECK0-LABEL: atomic_vec4_bfloat: 
519+ ; CHECK0:       ## %bb.0: 
520+ ; CHECK0-NEXT:    movq (%rdi), %rax 
521+ ; CHECK0-NEXT:    movq %rax, %xmm0 
522+ ; CHECK0-NEXT:    movl %eax, %ecx 
523+ ; CHECK0-NEXT:    shrl $16, %ecx 
524+ ; CHECK0-NEXT:    ## kill: def $cx killed $cx killed $ecx 
525+ ; CHECK0-NEXT:    movq %rax, %rdx 
526+ ; CHECK0-NEXT:    shrq $32, %rdx 
527+ ; CHECK0-NEXT:    ## kill: def $dx killed $dx killed $rdx 
528+ ; CHECK0-NEXT:    shrq $48, %rax 
529+ ; CHECK0-NEXT:    movw %ax, %si 
530+ ; CHECK0-NEXT:    ## implicit-def: $eax 
531+ ; CHECK0-NEXT:    movw %si, %ax 
532+ ; CHECK0-NEXT:    ## implicit-def: $xmm2 
533+ ; CHECK0-NEXT:    pinsrw $0, %eax, %xmm2 
534+ ; CHECK0-NEXT:    ## implicit-def: $eax 
535+ ; CHECK0-NEXT:    movw %dx, %ax 
536+ ; CHECK0-NEXT:    ## implicit-def: $xmm1 
537+ ; CHECK0-NEXT:    pinsrw $0, %eax, %xmm1 
538+ ; CHECK0-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 
539+ ; CHECK0-NEXT:    ## implicit-def: $eax 
540+ ; CHECK0-NEXT:    movw %cx, %ax 
541+ ; CHECK0-NEXT:    ## implicit-def: $xmm2 
542+ ; CHECK0-NEXT:    pinsrw $0, %eax, %xmm2 
543+ ; CHECK0-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 
544+ ; CHECK0-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 
545+ ; CHECK0-NEXT:    retq 
546+   %ret  = load  atomic  <4  x bfloat>, ptr  %x  acquire , align  8 
547+   ret  <4  x bfloat> %ret 
548+ }
549+ 
379550define  <4  x float > @atomic_vec4_float_align (ptr  %x ) nounwind  {
380551; CHECK-LABEL: atomic_vec4_float_align: 
381552; CHECK:       ## %bb.0: 
0 commit comments