@@ -205,71 +205,19 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205205}
206206
207207define <2 x half > @atomic_vec2_half (ptr %x ) {
208- ; CHECK3-LABEL: atomic_vec2_half:
209- ; CHECK3: ## %bb.0:
210- ; CHECK3-NEXT: movl (%rdi), %eax
211- ; CHECK3-NEXT: movd %eax, %xmm1
212- ; CHECK3-NEXT: shrl $16, %eax
213- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
214- ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
215- ; CHECK3-NEXT: pand %xmm0, %xmm1
216- ; CHECK3-NEXT: pslld $16, %xmm2
217- ; CHECK3-NEXT: pandn %xmm2, %xmm0
218- ; CHECK3-NEXT: por %xmm1, %xmm0
219- ; CHECK3-NEXT: retq
220- ;
221- ; CHECK0-LABEL: atomic_vec2_half:
222- ; CHECK0: ## %bb.0:
223- ; CHECK0-NEXT: movl (%rdi), %eax
224- ; CHECK0-NEXT: movl %eax, %ecx
225- ; CHECK0-NEXT: shrl $16, %ecx
226- ; CHECK0-NEXT: movw %cx, %dx
227- ; CHECK0-NEXT: ## implicit-def: $ecx
228- ; CHECK0-NEXT: movw %dx, %cx
229- ; CHECK0-NEXT: ## implicit-def: $xmm2
230- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
231- ; CHECK0-NEXT: movd %eax, %xmm0
232- ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
233- ; CHECK0-NEXT: pand %xmm1, %xmm0
234- ; CHECK0-NEXT: pslld $16, %xmm2
235- ; CHECK0-NEXT: pandn %xmm2, %xmm1
236- ; CHECK0-NEXT: por %xmm1, %xmm0
237- ; CHECK0-NEXT: retq
208+ ; CHECK-LABEL: atomic_vec2_half:
209+ ; CHECK: ## %bb.0:
210+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211+ ; CHECK-NEXT: retq
238212 %ret = load atomic <2 x half >, ptr %x acquire , align 4
239213 ret <2 x half > %ret
240214}
241215
242216define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
243- ; CHECK3-LABEL: atomic_vec2_bfloat:
244- ; CHECK3: ## %bb.0:
245- ; CHECK3-NEXT: movl (%rdi), %eax
246- ; CHECK3-NEXT: movd %eax, %xmm1
247- ; CHECK3-NEXT: shrl $16, %eax
248- ; CHECK3-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,65535,65535,65535,65535,65535]
249- ; CHECK3-NEXT: pand %xmm0, %xmm1
250- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm2
251- ; CHECK3-NEXT: pslld $16, %xmm2
252- ; CHECK3-NEXT: pandn %xmm2, %xmm0
253- ; CHECK3-NEXT: por %xmm1, %xmm0
254- ; CHECK3-NEXT: retq
255- ;
256- ; CHECK0-LABEL: atomic_vec2_bfloat:
257- ; CHECK0: ## %bb.0:
258- ; CHECK0-NEXT: movl (%rdi), %eax
259- ; CHECK0-NEXT: movl %eax, %ecx
260- ; CHECK0-NEXT: shrl $16, %ecx
261- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
262- ; CHECK0-NEXT: movd %eax, %xmm0
263- ; CHECK0-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,65535,65535,65535,65535,65535]
264- ; CHECK0-NEXT: pand %xmm1, %xmm0
265- ; CHECK0-NEXT: ## implicit-def: $eax
266- ; CHECK0-NEXT: movw %cx, %ax
267- ; CHECK0-NEXT: ## implicit-def: $xmm2
268- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
269- ; CHECK0-NEXT: pslld $16, %xmm2
270- ; CHECK0-NEXT: pandn %xmm2, %xmm1
271- ; CHECK0-NEXT: por %xmm1, %xmm0
272- ; CHECK0-NEXT: retq
217+ ; CHECK-LABEL: atomic_vec2_bfloat:
218+ ; CHECK: ## %bb.0:
219+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220+ ; CHECK-NEXT: retq
273221 %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
274222 ret <2 x bfloat> %ret
275223}
@@ -447,102 +395,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
447395}
448396
449397define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
450- ; CHECK3-LABEL: atomic_vec4_half:
451- ; CHECK3: ## %bb.0:
452- ; CHECK3-NEXT: movq (%rdi), %rax
453- ; CHECK3-NEXT: movl %eax, %ecx
454- ; CHECK3-NEXT: shrl $16, %ecx
455- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
456- ; CHECK3-NEXT: movq %rax, %rcx
457- ; CHECK3-NEXT: shrq $32, %rcx
458- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
459- ; CHECK3-NEXT: movq %rax, %xmm0
460- ; CHECK3-NEXT: shrq $48, %rax
461- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
462- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
463- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
464- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
465- ; CHECK3-NEXT: retq
466- ;
467- ; CHECK0-LABEL: atomic_vec4_half:
468- ; CHECK0: ## %bb.0:
469- ; CHECK0-NEXT: movq (%rdi), %rax
470- ; CHECK0-NEXT: movl %eax, %ecx
471- ; CHECK0-NEXT: shrl $16, %ecx
472- ; CHECK0-NEXT: movw %cx, %dx
473- ; CHECK0-NEXT: ## implicit-def: $ecx
474- ; CHECK0-NEXT: movw %dx, %cx
475- ; CHECK0-NEXT: ## implicit-def: $xmm2
476- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
477- ; CHECK0-NEXT: movq %rax, %rcx
478- ; CHECK0-NEXT: shrq $32, %rcx
479- ; CHECK0-NEXT: movw %cx, %dx
480- ; CHECK0-NEXT: ## implicit-def: $ecx
481- ; CHECK0-NEXT: movw %dx, %cx
482- ; CHECK0-NEXT: ## implicit-def: $xmm1
483- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
484- ; CHECK0-NEXT: movq %rax, %rcx
485- ; CHECK0-NEXT: shrq $48, %rcx
486- ; CHECK0-NEXT: movw %cx, %dx
487- ; CHECK0-NEXT: ## implicit-def: $ecx
488- ; CHECK0-NEXT: movw %dx, %cx
489- ; CHECK0-NEXT: ## implicit-def: $xmm3
490- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm3
491- ; CHECK0-NEXT: movq %rax, %xmm0
492- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
493- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
494- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
495- ; CHECK0-NEXT: retq
398+ ; CHECK-LABEL: atomic_vec4_half:
399+ ; CHECK: ## %bb.0:
400+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
401+ ; CHECK-NEXT: retq
496402 %ret = load atomic <4 x half >, ptr %x acquire , align 8
497403 ret <4 x half > %ret
498404}
499405
500406define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
501- ; CHECK3-LABEL: atomic_vec4_bfloat:
502- ; CHECK3: ## %bb.0:
503- ; CHECK3-NEXT: movq (%rdi), %rax
504- ; CHECK3-NEXT: movq %rax, %xmm0
505- ; CHECK3-NEXT: movl %eax, %ecx
506- ; CHECK3-NEXT: shrl $16, %ecx
507- ; CHECK3-NEXT: movq %rax, %rdx
508- ; CHECK3-NEXT: shrq $32, %rdx
509- ; CHECK3-NEXT: shrq $48, %rax
510- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511- ; CHECK3-NEXT: pinsrw $0, %edx, %xmm2
512- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
513- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
514- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
515- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
516- ; CHECK3-NEXT: retq
517- ;
518- ; CHECK0-LABEL: atomic_vec4_bfloat:
519- ; CHECK0: ## %bb.0:
520- ; CHECK0-NEXT: movq (%rdi), %rax
521- ; CHECK0-NEXT: movq %rax, %xmm0
522- ; CHECK0-NEXT: movl %eax, %ecx
523- ; CHECK0-NEXT: shrl $16, %ecx
524- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
525- ; CHECK0-NEXT: movq %rax, %rdx
526- ; CHECK0-NEXT: shrq $32, %rdx
527- ; CHECK0-NEXT: ## kill: def $dx killed $dx killed $rdx
528- ; CHECK0-NEXT: shrq $48, %rax
529- ; CHECK0-NEXT: movw %ax, %si
530- ; CHECK0-NEXT: ## implicit-def: $eax
531- ; CHECK0-NEXT: movw %si, %ax
532- ; CHECK0-NEXT: ## implicit-def: $xmm2
533- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
534- ; CHECK0-NEXT: ## implicit-def: $eax
535- ; CHECK0-NEXT: movw %dx, %ax
536- ; CHECK0-NEXT: ## implicit-def: $xmm1
537- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
538- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
539- ; CHECK0-NEXT: ## implicit-def: $eax
540- ; CHECK0-NEXT: movw %cx, %ax
541- ; CHECK0-NEXT: ## implicit-def: $xmm2
542- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545- ; CHECK0-NEXT: retq
407+ ; CHECK-LABEL: atomic_vec4_bfloat:
408+ ; CHECK: ## %bb.0:
409+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
410+ ; CHECK-NEXT: retq
546411 %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
547412 ret <4 x bfloat> %ret
548413}
0 commit comments