diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 79bdd25c18f1fd..32ba5ebdec6d37 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -10956,8 +10956,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic loads. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to @@ -11097,8 +11097,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering ` and optional ``syncscope("")`` argument. The ``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads produce :ref:`defined ` results when they may see -multiple atomic stores. The type of the pointee must be an integer, pointer, or -floating-point type whose bit width is a power of two greater than or equal to +multiple atomic stores. The type of the pointee must be an integer, pointer, +floating-point, or vector type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. ``align`` must be explicitly specified on atomic stores. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 55de486e90e190..6f847e3b3fc70c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4255,9 +4255,10 @@ void Verifier::visitLoadInst(LoadInst &LI) { Check(LI.getOrdering() != AtomicOrdering::Release && LI.getOrdering() != AtomicOrdering::AcquireRelease, "Load cannot have Release ordering", &LI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic load operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic load operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { @@ -4281,9 +4282,10 @@ void Verifier::visitStoreInst(StoreInst &SI) { Check(SI.getOrdering() != AtomicOrdering::Acquire && SI.getOrdering() != AtomicOrdering::AcquireRelease, "Store cannot have Acquire ordering", &SI); - Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(), - "atomic store operand must have integer, pointer, or floating point " - "type!", + Check(ElTy->getScalarType()->isIntOrPtrTy() || + ElTy->getScalarType()->isFloatingPointTy(), + "atomic store operand must have integer, pointer, floating point, " + "or vector type!", ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9048d1d83f1874..3e0134cb6852a4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31158,6 +31158,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { return false; } +TargetLoweringBase::AtomicExpansionKind +X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (const auto VT = dyn_cast(LI->getType())) + if (VT->getElementType()->isFloatingPointTy()) + return AtomicExpansionKind::CastToInteger; + return TargetLowering::shouldCastAtomicLoadInIR(LI); +} + TargetLoweringBase::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { Type *MemType = SI->getValueOperand()->getType(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2db25d6dda061a..b4abb92822b70e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1808,6 +1808,8 @@ namespace llvm { const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; ArrayRef getRoundingControlRegisters() const override; + TargetLoweringBase::AtomicExpansionKind + shouldCastAtomicLoadInIR(LoadInst *LI) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; TargetLoweringBase::AtomicExpansionKind diff --git a/llvm/test/CodeGen/X86/atomicvec-float.ll b/llvm/test/CodeGen/X86/atomicvec-float.ll new file mode 100644 index 00000000000000..39e6772c8fd01b --- /dev/null +++ b/llvm/test/CodeGen/X86/atomicvec-float.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s --mtriple=x86_64 | FileCheck %s + +define <1 x float> @load_atomic_vector1_float(ptr %src) { +; CHECK-LABEL: load_atomic_vector1_float: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq + %ret = load atomic <1 x float>, ptr %src acquire, align 4 + ret <1 x float> %ret +} + +define <2 x float> @load_atomic_vector2_float(ptr %src) { +; CHECK-LABEL: load_atomic_vector2_float: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movq %rsp, %rdx +; CHECK-NEXT: movl $8, %edi +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: callq __atomic_load@PLT +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %ret = load atomic <2 x float>, ptr %src acquire, align 4 + ret <2 x float> %ret +} + +define <1 x double> @load_atomic_vector1_double(ptr %src) { +; CHECK-LABEL: load_atomic_vector1_double: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movq %rsp, %rdx +; CHECK-NEXT: movl $8, %edi +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: callq __atomic_load@PLT +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %ret = load atomic <1 x double>, ptr %src acquire, align 4 + ret <1 x double> %ret +} + +define <2 x double> @load_atomic_vector2_double(ptr %src) { +; CHECK-LABEL: load_atomic_vector2_double: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movq %rsp, %rdx +; CHECK-NEXT: movl $16, %edi +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: callq __atomic_load@PLT +; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %ret = load atomic <2 x double>, ptr %src acquire, align 4 + ret <2 x double> %ret +} + +define <2 x half> @load_atomic_vector_half(ptr %src) { +; CHECK-LABEL: load_atomic_vector_half: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: retq + %ret = load atomic <2 x half>, ptr %src acquire, align 4 + ret <2 x half> %ret +} + +define <2 x float> @load_atomic_vector_bfloat(ptr %src) { +; CHECK-LABEL: load_atomic_vector_bfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movq %rsp, %rdx +; CHECK-NEXT: movl $8, %edi +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: callq __atomic_load@PLT +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %ret = load atomic <2 x float>, ptr %src acquire, align 4 + ret <2 x float> %ret +} + +define <2 x fp128> @load_atomic_vector_fp128(ptr %src) { +; CHECK-LABEL: load_atomic_vector_fp128: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movq %rsp, %rdx +; CHECK-NEXT: movl $32, %edi +; CHECK-NEXT: movl $2, %ecx +; CHECK-NEXT: callq __atomic_load@PLT +; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %ret = load atomic <2 x fp128>, ptr %src acquire, align 4 + ret <2 x fp128> %ret +} diff --git a/llvm/test/Transforms/AtomicExpand/atomicvec-float.ll b/llvm/test/Transforms/AtomicExpand/atomicvec-float.ll new file mode 100644 index 00000000000000..b2a32178d6780c --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/atomicvec-float.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s --mtriple=x86_64 --passes=atomic-expand -S -o - | FileCheck %s + +define <1 x float> @load_atomic_vector1_float(ptr %src) { +; CHECK-LABEL: define <1 x float> @load_atomic_vector1_float( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x float> +; CHECK-NEXT: ret <1 x float> [[TMP2]] +; + %ret = load atomic <1 x float>, ptr %src acquire, align 4 + ret <1 x float> %ret +} + +define <2 x float> @load_atomic_vector2_float(ptr %src) { +; CHECK-LABEL: define <2 x float> @load_atomic_vector2_float( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x float>, align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP1]]) +; CHECK-NEXT: call void @__atomic_load(i64 8, ptr [[SRC]], ptr [[TMP1]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP1]]) +; CHECK-NEXT: ret <2 x float> [[TMP2]] +; + %ret = load atomic <2 x float>, ptr %src acquire, align 4 + ret <2 x float> %ret +} + +define <1 x double> @load_atomic_vector1_double(ptr %src) { +; CHECK-LABEL: define <1 x double> @load_atomic_vector1_double( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <1 x double>, align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP1]]) +; CHECK-NEXT: call void @__atomic_load(i64 8, ptr [[SRC]], ptr [[TMP1]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x double>, ptr [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP1]]) +; CHECK-NEXT: ret <1 x double> [[TMP2]] +; + %ret = load atomic <1 x double>, ptr %src acquire, align 4 + ret <1 x double> %ret +} + +define <2 x double> @load_atomic_vector2_double(ptr %src) { +; CHECK-LABEL: define <2 x double> @load_atomic_vector2_double( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x double>, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[TMP1]]) +; CHECK-NEXT: call void @__atomic_load(i64 16, ptr [[SRC]], ptr [[TMP1]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[TMP1]], align 16 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[TMP1]]) +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; + %ret = load atomic <2 x double>, ptr %src acquire, align 4 + ret <2 x double> %ret +} + +define <2 x half> @load_atomic_vector_half(ptr %src) { +; CHECK-LABEL: define <2 x half> @load_atomic_vector_half( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <2 x half> +; CHECK-NEXT: ret <2 x half> [[TMP2]] +; + %ret = load atomic <2 x half>, ptr %src acquire, align 4 + ret <2 x half> %ret +} + +define <2 x bfloat> @load_atomic_vector_bfloat(ptr %src) { +; CHECK-LABEL: define <2 x bfloat> @load_atomic_vector_bfloat( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <2 x bfloat> +; CHECK-NEXT: ret <2 x bfloat> [[TMP2]] +; + %ret = load atomic <2 x bfloat>, ptr %src acquire, align 4 + ret <2 x bfloat> %ret +} + +define <2 x fp128> @load_atomic_vector_fp128(ptr %src) { +; CHECK-LABEL: define <2 x fp128> @load_atomic_vector_fp128( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x fp128>, align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr [[TMP1]]) +; CHECK-NEXT: call void @__atomic_load(i64 32, ptr [[SRC]], ptr [[TMP1]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x fp128>, ptr [[TMP1]], align 16 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr [[TMP1]]) +; CHECK-NEXT: ret <2 x fp128> [[TMP2]] +; + %ret = load atomic <2 x fp128>, ptr %src acquire, align 4 + ret <2 x fp128> %ret +} diff --git a/llvm/test/Verifier/atomics.ll b/llvm/test/Verifier/atomics.ll index f835b98b243456..17bf5a0528d738 100644 --- a/llvm/test/Verifier/atomics.ll +++ b/llvm/test/Verifier/atomics.ll @@ -1,14 +1,15 @@ ; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s +; CHECK: atomic store operand must have integer, pointer, floating point, or vector type! +; CHECK: atomic load operand must have integer, pointer, floating point, or vector type! -; CHECK: atomic store operand must have integer, pointer, or floating point type! -; CHECK: atomic load operand must have integer, pointer, or floating point type! +%ty = type { i32 }; -define void @foo(ptr %P, <1 x i64> %v) { - store atomic <1 x i64> %v, ptr %P unordered, align 8 +define void @foo(ptr %P, %ty %v) { + store atomic %ty %v, ptr %P unordered, align 8 ret void } -define <1 x i64> @bar(ptr %P) { - %v = load atomic <1 x i64>, ptr %P unordered, align 8 - ret <1 x i64> %v +define %ty @bar(ptr %P) { + %v = load atomic %ty, ptr %P unordered, align 8 + ret %ty %v }