@@ -204,6 +204,76 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
204204 ret <2 x float > %ret
205205}
206206
207+ define <2 x half > @atomic_vec2_half (ptr %x ) {
208+ ; CHECK3-LABEL: atomic_vec2_half:
209+ ; CHECK3: ## %bb.0:
210+ ; CHECK3-NEXT: movl (%rdi), %eax
211+ ; CHECK3-NEXT: movd %eax, %xmm1
212+ ; CHECK3-NEXT: shrl $16, %eax
213+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
214+ ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
215+ ; CHECK3-NEXT: pand %xmm0, %xmm1
216+ ; CHECK3-NEXT: pslld $16, %xmm2
217+ ; CHECK3-NEXT: pandn %xmm2, %xmm0
218+ ; CHECK3-NEXT: por %xmm1, %xmm0
219+ ; CHECK3-NEXT: retq
220+ ;
221+ ; CHECK0-LABEL: atomic_vec2_half:
222+ ; CHECK0: ## %bb.0:
223+ ; CHECK0-NEXT: movl (%rdi), %eax
224+ ; CHECK0-NEXT: movl %eax, %ecx
225+ ; CHECK0-NEXT: shrl $16, %ecx
226+ ; CHECK0-NEXT: movw %cx, %dx
227+ ; CHECK0-NEXT: ## implicit-def: $ecx
228+ ; CHECK0-NEXT: movw %dx, %cx
229+ ; CHECK0-NEXT: ## implicit-def: $xmm2
230+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
231+ ; CHECK0-NEXT: movd %eax, %xmm0
232+ ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
233+ ; CHECK0-NEXT: pand %xmm1, %xmm0
234+ ; CHECK0-NEXT: pslld $16, %xmm2
235+ ; CHECK0-NEXT: pandn %xmm2, %xmm1
236+ ; CHECK0-NEXT: por %xmm1, %xmm0
237+ ; CHECK0-NEXT: retq
238+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
239+ ret <2 x half > %ret
240+ }
241+
242+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
243+ ; CHECK3-LABEL: atomic_vec2_bfloat:
244+ ; CHECK3: ## %bb.0:
245+ ; CHECK3-NEXT: movl (%rdi), %eax
246+ ; CHECK3-NEXT: movd %eax, %xmm1
247+ ; CHECK3-NEXT: shrl $16, %eax
248+ ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
249+ ; CHECK3-NEXT: pand %xmm0, %xmm1
250+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
251+ ; CHECK3-NEXT: pslld $16, %xmm2
252+ ; CHECK3-NEXT: pandn %xmm2, %xmm0
253+ ; CHECK3-NEXT: por %xmm1, %xmm0
254+ ; CHECK3-NEXT: retq
255+ ;
256+ ; CHECK0-LABEL: atomic_vec2_bfloat:
257+ ; CHECK0: ## %bb.0:
258+ ; CHECK0-NEXT: movl (%rdi), %eax
259+ ; CHECK0-NEXT: movl %eax, %ecx
260+ ; CHECK0-NEXT: shrl $16, %ecx
261+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
262+ ; CHECK0-NEXT: movd %eax, %xmm0
263+ ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
264+ ; CHECK0-NEXT: pand %xmm1, %xmm0
265+ ; CHECK0-NEXT: ## implicit-def: $eax
266+ ; CHECK0-NEXT: movw %cx, %ax
267+ ; CHECK0-NEXT: ## implicit-def: $xmm2
268+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
269+ ; CHECK0-NEXT: pslld $16, %xmm2
270+ ; CHECK0-NEXT: pandn %xmm2, %xmm1
271+ ; CHECK0-NEXT: por %xmm1, %xmm0
272+ ; CHECK0-NEXT: retq
273+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
274+ ret <2 x bfloat> %ret
275+ }
276+
207277define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
208278; CHECK3-LABEL: atomic_vec1_ptr:
209279; CHECK3: ## %bb.0:
@@ -376,6 +446,107 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
376446 ret <4 x i16 > %ret
377447}
378448
449+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
450+ ; CHECK3-LABEL: atomic_vec4_half:
451+ ; CHECK3: ## %bb.0:
452+ ; CHECK3-NEXT: movq (%rdi), %rax
453+ ; CHECK3-NEXT: movl %eax, %ecx
454+ ; CHECK3-NEXT: shrl $16, %ecx
455+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
456+ ; CHECK3-NEXT: movq %rax, %rcx
457+ ; CHECK3-NEXT: shrq $32, %rcx
458+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
459+ ; CHECK3-NEXT: movq %rax, %xmm0
460+ ; CHECK3-NEXT: shrq $48, %rax
461+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
462+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
463+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
464+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
465+ ; CHECK3-NEXT: retq
466+ ;
467+ ; CHECK0-LABEL: atomic_vec4_half:
468+ ; CHECK0: ## %bb.0:
469+ ; CHECK0-NEXT: movq (%rdi), %rax
470+ ; CHECK0-NEXT: movl %eax, %ecx
471+ ; CHECK0-NEXT: shrl $16, %ecx
472+ ; CHECK0-NEXT: movw %cx, %dx
473+ ; CHECK0-NEXT: ## implicit-def: $ecx
474+ ; CHECK0-NEXT: movw %dx, %cx
475+ ; CHECK0-NEXT: ## implicit-def: $xmm2
476+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
477+ ; CHECK0-NEXT: movq %rax, %rcx
478+ ; CHECK0-NEXT: shrq $32, %rcx
479+ ; CHECK0-NEXT: movw %cx, %dx
480+ ; CHECK0-NEXT: ## implicit-def: $ecx
481+ ; CHECK0-NEXT: movw %dx, %cx
482+ ; CHECK0-NEXT: ## implicit-def: $xmm1
483+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
484+ ; CHECK0-NEXT: movq %rax, %rcx
485+ ; CHECK0-NEXT: shrq $48, %rcx
486+ ; CHECK0-NEXT: movw %cx, %dx
487+ ; CHECK0-NEXT: ## implicit-def: $ecx
488+ ; CHECK0-NEXT: movw %dx, %cx
489+ ; CHECK0-NEXT: ## implicit-def: $xmm3
490+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm3
491+ ; CHECK0-NEXT: movq %rax, %xmm0
492+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
493+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
494+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
495+ ; CHECK0-NEXT: retq
496+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
497+ ret <4 x half > %ret
498+ }
499+
500+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
501+ ; CHECK3-LABEL: atomic_vec4_bfloat:
502+ ; CHECK3: ## %bb.0:
503+ ; CHECK3-NEXT: movq (%rdi), %rax
504+ ; CHECK3-NEXT: movq %rax, %xmm0
505+ ; CHECK3-NEXT: movl %eax, %ecx
506+ ; CHECK3-NEXT: shrl $16, %ecx
507+ ; CHECK3-NEXT: movq %rax, %rdx
508+ ; CHECK3-NEXT: shrq $32, %rdx
509+ ; CHECK3-NEXT: shrq $48, %rax
510+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511+ ; CHECK3-NEXT: pinsrw $0, %edx, %xmm2
512+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
513+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
514+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
515+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
516+ ; CHECK3-NEXT: retq
517+ ;
518+ ; CHECK0-LABEL: atomic_vec4_bfloat:
519+ ; CHECK0: ## %bb.0:
520+ ; CHECK0-NEXT: movq (%rdi), %rax
521+ ; CHECK0-NEXT: movq %rax, %xmm0
522+ ; CHECK0-NEXT: movl %eax, %ecx
523+ ; CHECK0-NEXT: shrl $16, %ecx
524+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
525+ ; CHECK0-NEXT: movq %rax, %rdx
526+ ; CHECK0-NEXT: shrq $32, %rdx
527+ ; CHECK0-NEXT: ## kill: def $dx killed $dx killed $rdx
528+ ; CHECK0-NEXT: shrq $48, %rax
529+ ; CHECK0-NEXT: movw %ax, %si
530+ ; CHECK0-NEXT: ## implicit-def: $eax
531+ ; CHECK0-NEXT: movw %si, %ax
532+ ; CHECK0-NEXT: ## implicit-def: $xmm2
533+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
534+ ; CHECK0-NEXT: ## implicit-def: $eax
535+ ; CHECK0-NEXT: movw %dx, %ax
536+ ; CHECK0-NEXT: ## implicit-def: $xmm1
537+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
538+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
539+ ; CHECK0-NEXT: ## implicit-def: $eax
540+ ; CHECK0-NEXT: movw %cx, %ax
541+ ; CHECK0-NEXT: ## implicit-def: $xmm2
542+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545+ ; CHECK0-NEXT: retq
546+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
547+ ret <4 x bfloat> %ret
548+ }
549+
379550define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
380551; CHECK-LABEL: atomic_vec4_float_align:
381552; CHECK: ## %bb.0:
0 commit comments