From c0027e693951f706cc12f122ec3c87b77a3454ba Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 28 Nov 2023 14:07:49 +0100 Subject: [PATCH] use hip atomic intrinsics --- hip/components/memory.hip.hpp | 109 +++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/hip/components/memory.hip.hpp b/hip/components/memory.hip.hpp index a138c755824..c2959ada779 100644 --- a/hip/components/memory.hip.hpp +++ b/hip/components/memory.hip.hpp @@ -58,117 +58,142 @@ struct gcc_atomic_intrinsic_type_map { }; -template +template __device__ __forceinline__ ValueType load_generic(const ValueType* ptr) { using atomic_type = typename gcc_atomic_intrinsic_type_map::type; static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map"); static_assert(alignof(atomic_type) == sizeof(ValueType), "invalid map"); - auto cast_value = - __atomic_load_n(reinterpret_cast(ptr), memorder); + auto cast_value = __hip_atomic_load( + reinterpret_cast(ptr), memorder, scope); ValueType result{}; memcpy(&result, &cast_value, sizeof(ValueType)); return result; } +template +__device__ __forceinline__ void store_generic(ValueType* ptr, ValueType value) +{ + using atomic_type = typename gcc_atomic_intrinsic_type_map::type; + static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map"); + static_assert(alignof(atomic_type) == sizeof(ValueType), "invalid map"); + atomic_type cast_value{}; + memcpy(&cast_value, &value, sizeof(ValueType)); + return __hip_atomic_store(reinterpret_cast(ptr), cast_value, + memorder, scope); +} + + template * = nullptr> __device__ __forceinline__ ValueType load_relaxed(const ValueType* ptr) { - return load_generic<__ATOMIC_RELAXED>(ptr); + return load_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(ptr); } template * = nullptr> -__device__ __forceinline__ ValueType load_acquire(const ValueType* ptr) +__device__ __forceinline__ ValueType load_relaxed_shared(const ValueType* ptr) { - return load_generic<__ATOMIC_ACQUIRE>(ptr); + return load_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr); } -template -__device__ __forceinline__ void store_generic(ValueType* ptr, ValueType value) +template * = nullptr> +__device__ __forceinline__ ValueType load_acquire(const ValueType* ptr) { - using atomic_type = typename gcc_atomic_intrinsic_type_map::type; - static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map"); - static_assert(alignof(atomic_type) == sizeof(ValueType), "invalid map"); - atomic_type cast_value{}; - memcpy(&cast_value, &value, sizeof(ValueType)); - return __atomic_store_n(reinterpret_cast(ptr), cast_value, - memorder); + return load_generic<__ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_AGENT>(ptr); } template * = nullptr> -__device__ __forceinline__ void store_relaxed(ValueType* ptr, ValueType value) +__device__ __forceinline__ ValueType load_acquire_shared(const ValueType* ptr) { - return store_generic<__ATOMIC_RELAXED>(ptr, value); + return load_generic<__ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr); } template * = nullptr> -__device__ __forceinline__ void store_release(ValueType* ptr, ValueType value) +__device__ __forceinline__ void store_relaxed(ValueType* ptr, ValueType value) { - return store_generic<__ATOMIC_RELEASE>(ptr, value); + return store_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(ptr, + value); } -template -__device__ __forceinline__ thrust::complex load_relaxed( - const thrust::complex* ptr) +template * = nullptr> +__device__ __forceinline__ void store_relaxed_shared(ValueType* ptr, + ValueType value) { - auto real_ptr = reinterpret_cast(ptr); - auto real = load_relaxed(real_ptr); - auto imag = load_relaxed(real_ptr + 1); - return {real, imag}; + return store_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr, + value); } template * = nullptr> -__device__ __forceinline__ void store_relaxed(thrust::complex* ptr, - thrust::complex value) +__device__ __forceinline__ void store_release(ValueType* ptr, ValueType value) { - auto real_ptr = reinterpret_cast(ptr); - store_relaxed(real_ptr, value.real()); - store_relaxed(real_ptr + 1, value.imag()); + return store_generic<__ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT>(ptr, + value); } -// we can't annotate pointers with shared easily, so we don't try to be clever +template * = nullptr> +__device__ __forceinline__ void store_release_shared(ValueType* ptr, + ValueType value) +{ + return store_generic<__ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr, + value); +} template -__device__ __forceinline__ ValueType load_relaxed_shared(const ValueType* ptr) +__device__ __forceinline__ thrust::complex load_relaxed( + const thrust::complex* ptr) { - return load_relaxed(ptr); + auto real_ptr = reinterpret_cast(ptr); + auto real = load_relaxed(real_ptr); + auto imag = load_relaxed(real_ptr + 1); + return {real, imag}; } template -__device__ __forceinline__ ValueType load_acquire_shared(const ValueType* ptr) +__device__ __forceinline__ thrust::complex load_relaxed_shared( + const thrust::complex* ptr) { - return load_acquire(ptr); + auto real_ptr = reinterpret_cast(ptr); + auto real = load_relaxed_shared(real_ptr); + auto imag = load_relaxed_shared(real_ptr + 1); + return {real, imag}; } template -__device__ __forceinline__ void store_relaxed_shared(ValueType* ptr, - ValueType value) +__device__ __forceinline__ void store_relaxed(thrust::complex* ptr, + thrust::complex value) { - store_relaxed(ptr, value); + auto real_ptr = reinterpret_cast(ptr); + store_relaxed(real_ptr, value.real()); + store_relaxed(real_ptr + 1, value.imag()); } template -__device__ __forceinline__ void store_release_shared(ValueType* ptr, - ValueType value) +__device__ __forceinline__ void store_relaxed_shared( + thrust::complex* ptr, thrust::complex value) { - store_release(ptr, value); + auto real_ptr = reinterpret_cast(ptr); + store_relaxed_shared(real_ptr, value.real()); + store_relaxed_shared(real_ptr + 1, value.imag()); }