@@ -208,29 +208,12 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
208208define <2 x half > @atomic_vec2_half (ptr %x ) {
209209; CHECK3-LABEL: atomic_vec2_half:
210210; CHECK3: ## %bb.0:
211- ; CHECK3-NEXT: movl (%rdi), %eax
212- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
213- ; CHECK3-NEXT: shrl $16, %eax
214- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
215- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
211+ ; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
216212; CHECK3-NEXT: retq
217213;
218214; CHECK0-LABEL: atomic_vec2_half:
219215; CHECK0: ## %bb.0:
220- ; CHECK0-NEXT: movl (%rdi), %eax
221- ; CHECK0-NEXT: movl %eax, %ecx
222- ; CHECK0-NEXT: shrl $16, %ecx
223- ; CHECK0-NEXT: movw %cx, %dx
224- ; CHECK0-NEXT: ## implicit-def: $ecx
225- ; CHECK0-NEXT: movw %dx, %cx
226- ; CHECK0-NEXT: ## implicit-def: $xmm1
227- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
228- ; CHECK0-NEXT: movw %ax, %cx
229- ; CHECK0-NEXT: ## implicit-def: $eax
230- ; CHECK0-NEXT: movw %cx, %ax
231- ; CHECK0-NEXT: ## implicit-def: $xmm0
232- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
233- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
216+ ; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
234217; CHECK0-NEXT: retq
235218 %ret = load atomic <2 x half >, ptr %x acquire , align 4
236219 ret <2 x half > %ret
@@ -239,29 +222,12 @@ define <2 x half> @atomic_vec2_half(ptr %x) {
239222define <2 x bfloat> @atomic_vec2_bfloat (ptr %x ) {
240223; CHECK3-LABEL: atomic_vec2_bfloat:
241224; CHECK3: ## %bb.0:
242- ; CHECK3-NEXT: movl (%rdi), %eax
243- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
244- ; CHECK3-NEXT: shrl $16, %eax
245- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
246- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
225+ ; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
247226; CHECK3-NEXT: retq
248227;
249228; CHECK0-LABEL: atomic_vec2_bfloat:
250229; CHECK0: ## %bb.0:
251- ; CHECK0-NEXT: movl (%rdi), %eax
252- ; CHECK0-NEXT: movl %eax, %ecx
253- ; CHECK0-NEXT: shrl $16, %ecx
254- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
255- ; CHECK0-NEXT: movw %ax, %dx
256- ; CHECK0-NEXT: ## implicit-def: $eax
257- ; CHECK0-NEXT: movw %dx, %ax
258- ; CHECK0-NEXT: ## implicit-def: $xmm0
259- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
260- ; CHECK0-NEXT: ## implicit-def: $eax
261- ; CHECK0-NEXT: movw %cx, %ax
262- ; CHECK0-NEXT: ## implicit-def: $xmm1
263- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
264- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
230+ ; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
265231; CHECK0-NEXT: retq
266232 %ret = load atomic <2 x bfloat>, ptr %x acquire , align 4
267233 ret <2 x bfloat> %ret
@@ -440,110 +406,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
440406}
441407
442408define <4 x half > @atomic_vec4_half (ptr %x ) nounwind {
443- ; CHECK3-LABEL: atomic_vec4_half:
444- ; CHECK3: ## %bb.0:
445- ; CHECK3-NEXT: movq (%rdi), %rax
446- ; CHECK3-NEXT: movl %eax, %ecx
447- ; CHECK3-NEXT: shrl $16, %ecx
448- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
449- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
450- ; CHECK3-NEXT: movq %rax, %rcx
451- ; CHECK3-NEXT: shrq $32, %rcx
452- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
453- ; CHECK3-NEXT: shrq $48, %rax
454- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
455- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
456- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
457- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
458- ; CHECK3-NEXT: retq
459- ;
460- ; CHECK0-LABEL: atomic_vec4_half:
461- ; CHECK0: ## %bb.0:
462- ; CHECK0-NEXT: movq (%rdi), %rax
463- ; CHECK0-NEXT: movl %eax, %ecx
464- ; CHECK0-NEXT: shrl $16, %ecx
465- ; CHECK0-NEXT: movw %cx, %dx
466- ; CHECK0-NEXT: ## implicit-def: $ecx
467- ; CHECK0-NEXT: movw %dx, %cx
468- ; CHECK0-NEXT: ## implicit-def: $xmm2
469- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
470- ; CHECK0-NEXT: movw %ax, %dx
471- ; CHECK0-NEXT: ## implicit-def: $ecx
472- ; CHECK0-NEXT: movw %dx, %cx
473- ; CHECK0-NEXT: ## implicit-def: $xmm0
474- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
475- ; CHECK0-NEXT: movq %rax, %rcx
476- ; CHECK0-NEXT: shrq $32, %rcx
477- ; CHECK0-NEXT: movw %cx, %dx
478- ; CHECK0-NEXT: ## implicit-def: $ecx
479- ; CHECK0-NEXT: movw %dx, %cx
480- ; CHECK0-NEXT: ## implicit-def: $xmm1
481- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
482- ; CHECK0-NEXT: shrq $48, %rax
483- ; CHECK0-NEXT: movw %ax, %cx
484- ; CHECK0-NEXT: ## implicit-def: $eax
485- ; CHECK0-NEXT: movw %cx, %ax
486- ; CHECK0-NEXT: ## implicit-def: $xmm3
487- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
488- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
489- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
490- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
491- ; CHECK0-NEXT: retq
409+ ; CHECK-LABEL: atomic_vec4_half:
410+ ; CHECK: ## %bb.0:
411+ ; CHECK-NEXT: movq (%rdi), %xmm0
412+ ; CHECK-NEXT: retq
492413 %ret = load atomic <4 x half >, ptr %x acquire , align 8
493414 ret <4 x half > %ret
494415}
495416
496417define <4 x bfloat> @atomic_vec4_bfloat (ptr %x ) nounwind {
497- ; CHECK3-LABEL: atomic_vec4_bfloat:
498- ; CHECK3: ## %bb.0:
499- ; CHECK3-NEXT: movq (%rdi), %rax
500- ; CHECK3-NEXT: movq %rax, %rcx
501- ; CHECK3-NEXT: movq %rax, %rdx
502- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
503- ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
504- ; CHECK3-NEXT: shrl $16, %eax
505- ; CHECK3-NEXT: shrq $32, %rcx
506- ; CHECK3-NEXT: shrq $48, %rdx
507- ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
508- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
509- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
510- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
511- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
512- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
513- ; CHECK3-NEXT: retq
514- ;
515- ; CHECK0-LABEL: atomic_vec4_bfloat:
516- ; CHECK0: ## %bb.0:
517- ; CHECK0-NEXT: movq (%rdi), %rax
518- ; CHECK0-NEXT: movl %eax, %ecx
519- ; CHECK0-NEXT: shrl $16, %ecx
520- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
521- ; CHECK0-NEXT: movw %ax, %dx
522- ; CHECK0-NEXT: movq %rax, %rsi
523- ; CHECK0-NEXT: shrq $32, %rsi
524- ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
525- ; CHECK0-NEXT: shrq $48, %rax
526- ; CHECK0-NEXT: movw %ax, %di
527- ; CHECK0-NEXT: ## implicit-def: $eax
528- ; CHECK0-NEXT: movw %di, %ax
529- ; CHECK0-NEXT: ## implicit-def: $xmm0
530- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
531- ; CHECK0-NEXT: ## implicit-def: $eax
532- ; CHECK0-NEXT: movw %si, %ax
533- ; CHECK0-NEXT: ## implicit-def: $xmm1
534- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
535- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
536- ; CHECK0-NEXT: ## implicit-def: $eax
537- ; CHECK0-NEXT: movw %dx, %ax
538- ; CHECK0-NEXT: ## implicit-def: $xmm0
539- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
540- ; CHECK0-NEXT: ## implicit-def: $eax
541- ; CHECK0-NEXT: movw %cx, %ax
542- ; CHECK0-NEXT: ## implicit-def: $xmm2
543- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
544- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
545- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
546- ; CHECK0-NEXT: retq
418+ ; CHECK-LABEL: atomic_vec4_bfloat:
419+ ; CHECK: ## %bb.0:
420+ ; CHECK-NEXT: movq (%rdi), %xmm0
421+ ; CHECK-NEXT: retq
547422 %ret = load atomic <4 x bfloat>, ptr %x acquire , align 8
548423 ret <4 x bfloat> %ret
549424}
0 commit comments