@@ -204,6 +204,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
204204 ret <2 x float > %ret
205205}
206206
207+ define <2 x half > @atomic_vec2_half (ptr %x ) {
208+ ; CHECK3-LABEL: atomic_vec2_half:
209+ ; CHECK3: ## %bb.0:
210+ ; CHECK3-NEXT: movl (%rdi), %eax
211+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
212+ ; CHECK3-NEXT: shrl $16, %eax
213+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
214+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
215+ ; CHECK3-NEXT: retq
216+ ;
217+ ; CHECK0-LABEL: atomic_vec2_half:
218+ ; CHECK0: ## %bb.0:
219+ ; CHECK0-NEXT: movl (%rdi), %eax
220+ ; CHECK0-NEXT: movl %eax, %ecx
221+ ; CHECK0-NEXT: shrl $16, %ecx
222+ ; CHECK0-NEXT: movw %cx, %dx
223+ ; CHECK0-NEXT: ## implicit-def: $ecx
224+ ; CHECK0-NEXT: movw %dx, %cx
225+ ; CHECK0-NEXT: ## implicit-def: $xmm1
226+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
227+ ; CHECK0-NEXT: movw %ax, %cx
228+ ; CHECK0-NEXT: ## implicit-def: $eax
229+ ; CHECK0-NEXT: movw %cx, %ax
230+ ; CHECK0-NEXT: ## implicit-def: $xmm0
231+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
232+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
233+ ; CHECK0-NEXT: retq
234+ %ret = load atomic <2 x half >, ptr %x acquire , align 4
235+ ret <2 x half > %ret
236+ }
237+
238+ define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
239+ ; CHECK3-LABEL: atomic_vec2_bfloat:
240+ ; CHECK3: ## %bb.0:
241+ ; CHECK3-NEXT: movl (%rdi), %eax
242+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
243+ ; CHECK3-NEXT: shrl $16, %eax
244+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
245+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246+ ; CHECK3-NEXT: retq
247+ ;
248+ ; CHECK0-LABEL: atomic_vec2_bfloat:
249+ ; CHECK0: ## %bb.0:
250+ ; CHECK0-NEXT: movl (%rdi), %eax
251+ ; CHECK0-NEXT: movl %eax, %ecx
252+ ; CHECK0-NEXT: shrl $16, %ecx
253+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
254+ ; CHECK0-NEXT: movw %ax, %dx
255+ ; CHECK0-NEXT: ## implicit-def: $eax
256+ ; CHECK0-NEXT: movw %dx, %ax
257+ ; CHECK0-NEXT: ## implicit-def: $xmm0
258+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
259+ ; CHECK0-NEXT: ## implicit-def: $eax
260+ ; CHECK0-NEXT: movw %cx, %ax
261+ ; CHECK0-NEXT: ## implicit-def: $xmm1
262+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
263+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
264+ ; CHECK0-NEXT: retq
265+ %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
266+ ret <2 x bfloat> %ret
267+ }
268+
207269define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
208270; CHECK3-LABEL: atomic_vec1_ptr:
209271; CHECK3: ## %bb.0:
@@ -376,6 +438,115 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
376438 ret <4 x i16 > %ret
377439}
378440
441+ define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
442+ ; CHECK3-LABEL: atomic_vec4_half:
443+ ; CHECK3: ## %bb.0:
444+ ; CHECK3-NEXT: movq (%rdi), %rax
445+ ; CHECK3-NEXT: movl %eax, %ecx
446+ ; CHECK3-NEXT: shrl $16, %ecx
447+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
448+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
449+ ; CHECK3-NEXT: movq %rax, %rcx
450+ ; CHECK3-NEXT: shrq $32, %rcx
451+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
452+ ; CHECK3-NEXT: shrq $48, %rax
453+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
454+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
455+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
456+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
457+ ; CHECK3-NEXT: retq
458+ ;
459+ ; CHECK0-LABEL: atomic_vec4_half:
460+ ; CHECK0: ## %bb.0:
461+ ; CHECK0-NEXT: movq (%rdi), %rax
462+ ; CHECK0-NEXT: movl %eax, %ecx
463+ ; CHECK0-NEXT: shrl $16, %ecx
464+ ; CHECK0-NEXT: movw %cx, %dx
465+ ; CHECK0-NEXT: ## implicit-def: $ecx
466+ ; CHECK0-NEXT: movw %dx, %cx
467+ ; CHECK0-NEXT: ## implicit-def: $xmm2
468+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
469+ ; CHECK0-NEXT: movw %ax, %dx
470+ ; CHECK0-NEXT: ## implicit-def: $ecx
471+ ; CHECK0-NEXT: movw %dx, %cx
472+ ; CHECK0-NEXT: ## implicit-def: $xmm0
473+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
474+ ; CHECK0-NEXT: movq %rax, %rcx
475+ ; CHECK0-NEXT: shrq $32, %rcx
476+ ; CHECK0-NEXT: movw %cx, %dx
477+ ; CHECK0-NEXT: ## implicit-def: $ecx
478+ ; CHECK0-NEXT: movw %dx, %cx
479+ ; CHECK0-NEXT: ## implicit-def: $xmm1
480+ ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
481+ ; CHECK0-NEXT: shrq $48, %rax
482+ ; CHECK0-NEXT: movw %ax, %cx
483+ ; CHECK0-NEXT: ## implicit-def: $eax
484+ ; CHECK0-NEXT: movw %cx, %ax
485+ ; CHECK0-NEXT: ## implicit-def: $xmm3
486+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
487+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
488+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
489+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490+ ; CHECK0-NEXT: retq
491+ %ret = load atomic <4 x half >, ptr %x acquire , align 8
492+ ret <4 x half > %ret
493+ }
494+
495+ define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
496+ ; CHECK3-LABEL: atomic_vec4_bfloat:
497+ ; CHECK3: ## %bb.0:
498+ ; CHECK3-NEXT: movq (%rdi), %rax
499+ ; CHECK3-NEXT: movq %rax, %rcx
500+ ; CHECK3-NEXT: movq %rax, %rdx
501+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
502+ ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
503+ ; CHECK3-NEXT: shrl $16, %eax
504+ ; CHECK3-NEXT: shrq $32, %rcx
505+ ; CHECK3-NEXT: shrq $48, %rdx
506+ ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
507+ ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
508+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
509+ ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
510+ ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
511+ ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
512+ ; CHECK3-NEXT: retq
513+ ;
514+ ; CHECK0-LABEL: atomic_vec4_bfloat:
515+ ; CHECK0: ## %bb.0:
516+ ; CHECK0-NEXT: movq (%rdi), %rax
517+ ; CHECK0-NEXT: movl %eax, %ecx
518+ ; CHECK0-NEXT: shrl $16, %ecx
519+ ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
520+ ; CHECK0-NEXT: movw %ax, %dx
521+ ; CHECK0-NEXT: movq %rax, %rsi
522+ ; CHECK0-NEXT: shrq $32, %rsi
523+ ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
524+ ; CHECK0-NEXT: shrq $48, %rax
525+ ; CHECK0-NEXT: movw %ax, %di
526+ ; CHECK0-NEXT: ## implicit-def: $eax
527+ ; CHECK0-NEXT: movw %di, %ax
528+ ; CHECK0-NEXT: ## implicit-def: $xmm0
529+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
530+ ; CHECK0-NEXT: ## implicit-def: $eax
531+ ; CHECK0-NEXT: movw %si, %ax
532+ ; CHECK0-NEXT: ## implicit-def: $xmm1
533+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
534+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
535+ ; CHECK0-NEXT: ## implicit-def: $eax
536+ ; CHECK0-NEXT: movw %dx, %ax
537+ ; CHECK0-NEXT: ## implicit-def: $xmm0
538+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
539+ ; CHECK0-NEXT: ## implicit-def: $eax
540+ ; CHECK0-NEXT: movw %cx, %ax
541+ ; CHECK0-NEXT: ## implicit-def: $xmm2
542+ ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543+ ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544+ ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545+ ; CHECK0-NEXT: retq
546+ %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
547+ ret <4 x bfloat> %ret
548+ }
549+
379550define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
380551; CHECK-LABEL: atomic_vec4_float_align:
381552; CHECK: ## %bb.0:
0 commit comments