Skip to content

Commit

Permalink
use hip atomic intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Nov 28, 2023
1 parent e749292 commit c0027e6
Showing 1 changed file with 67 additions and 42 deletions.
109 changes: 67 additions & 42 deletions hip/components/memory.hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,117 +58,142 @@ struct gcc_atomic_intrinsic_type_map<double> {
};


template <int memorder, typename ValueType>
template <int memorder, int scope, typename ValueType>
__device__ __forceinline__ ValueType load_generic(const ValueType* ptr)
{
using atomic_type = typename gcc_atomic_intrinsic_type_map<ValueType>::type;
static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map");
static_assert(alignof(atomic_type) == sizeof(ValueType), "invalid map");
auto cast_value =
__atomic_load_n(reinterpret_cast<const atomic_type*>(ptr), memorder);
auto cast_value = __hip_atomic_load(
reinterpret_cast<const atomic_type*>(ptr), memorder, scope);
ValueType result{};
memcpy(&result, &cast_value, sizeof(ValueType));
return result;
}


template <int memorder, int scope, typename ValueType>
__device__ __forceinline__ void store_generic(ValueType* ptr, ValueType value)
{
using atomic_type = typename gcc_atomic_intrinsic_type_map<ValueType>::type;
static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map");
static_assert(alignof(atomic_type) == sizeof(ValueType), "invalid map");
atomic_type cast_value{};
memcpy(&cast_value, &value, sizeof(ValueType));
return __hip_atomic_store(reinterpret_cast<atomic_type*>(ptr), cast_value,
memorder, scope);
}


template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ ValueType load_relaxed(const ValueType* ptr)
{
return load_generic<__ATOMIC_RELAXED>(ptr);
return load_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(ptr);
}


template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ ValueType load_acquire(const ValueType* ptr)
__device__ __forceinline__ ValueType load_relaxed_shared(const ValueType* ptr)
{
return load_generic<__ATOMIC_ACQUIRE>(ptr);
return load_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr);
}


template <int memorder, typename ValueType>
__device__ __forceinline__ void store_generic(ValueType* ptr, ValueType value)
template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ ValueType load_acquire(const ValueType* ptr)
{
using atomic_type = typename gcc_atomic_intrinsic_type_map<ValueType>::type;
static_assert(sizeof(atomic_type) == sizeof(ValueType), "invalid map");
static_assert(alignof(atomic_type) == sizeof(ValueType), "invalid map");
atomic_type cast_value{};
memcpy(&cast_value, &value, sizeof(ValueType));
return __atomic_store_n(reinterpret_cast<atomic_type*>(ptr), cast_value,
memorder);
return load_generic<__ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_AGENT>(ptr);
}


template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ void store_relaxed(ValueType* ptr, ValueType value)
__device__ __forceinline__ ValueType load_acquire_shared(const ValueType* ptr)
{
return store_generic<__ATOMIC_RELAXED>(ptr, value);
return load_generic<__ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr);
}


template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ void store_release(ValueType* ptr, ValueType value)
__device__ __forceinline__ void store_relaxed(ValueType* ptr, ValueType value)
{
return store_generic<__ATOMIC_RELEASE>(ptr, value);
return store_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT>(ptr,
value);
}


template <typename ValueType>
__device__ __forceinline__ thrust::complex<ValueType> load_relaxed(
const thrust::complex<ValueType>* ptr)
template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ void store_relaxed_shared(ValueType* ptr,
ValueType value)
{
auto real_ptr = reinterpret_cast<const ValueType*>(ptr);
auto real = load_relaxed(real_ptr);
auto imag = load_relaxed(real_ptr + 1);
return {real, imag};
return store_generic<__ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr,
value);
}


template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ void store_relaxed(thrust::complex<ValueType>* ptr,
thrust::complex<ValueType> value)
__device__ __forceinline__ void store_release(ValueType* ptr, ValueType value)
{
auto real_ptr = reinterpret_cast<ValueType*>(ptr);
store_relaxed(real_ptr, value.real());
store_relaxed(real_ptr + 1, value.imag());
return store_generic<__ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_AGENT>(ptr,
value);
}


// we can't annotate pointers with shared easily, so we don't try to be clever
template <typename ValueType,
gcc_atomic_intrinsic_type_map<ValueType>* = nullptr>
__device__ __forceinline__ void store_release_shared(ValueType* ptr,
ValueType value)
{
return store_generic<__ATOMIC_RELEASE, __HIP_MEMORY_SCOPE_WORKGROUP>(ptr,
value);
}


template <typename ValueType>
__device__ __forceinline__ ValueType load_relaxed_shared(const ValueType* ptr)
__device__ __forceinline__ thrust::complex<ValueType> load_relaxed(
const thrust::complex<ValueType>* ptr)
{
return load_relaxed(ptr);
auto real_ptr = reinterpret_cast<const ValueType*>(ptr);
auto real = load_relaxed(real_ptr);
auto imag = load_relaxed(real_ptr + 1);
return {real, imag};
}


template <typename ValueType>
__device__ __forceinline__ ValueType load_acquire_shared(const ValueType* ptr)
__device__ __forceinline__ thrust::complex<ValueType> load_relaxed_shared(
const thrust::complex<ValueType>* ptr)
{
return load_acquire(ptr);
auto real_ptr = reinterpret_cast<const ValueType*>(ptr);
auto real = load_relaxed_shared(real_ptr);
auto imag = load_relaxed_shared(real_ptr + 1);
return {real, imag};
}


template <typename ValueType>
__device__ __forceinline__ void store_relaxed_shared(ValueType* ptr,
ValueType value)
__device__ __forceinline__ void store_relaxed(thrust::complex<ValueType>* ptr,
thrust::complex<ValueType> value)
{
store_relaxed(ptr, value);
auto real_ptr = reinterpret_cast<ValueType*>(ptr);
store_relaxed(real_ptr, value.real());
store_relaxed(real_ptr + 1, value.imag());
}


template <typename ValueType>
__device__ __forceinline__ void store_release_shared(ValueType* ptr,
ValueType value)
__device__ __forceinline__ void store_relaxed_shared(
thrust::complex<ValueType>* ptr, thrust::complex<ValueType> value)
{
store_release(ptr, value);
auto real_ptr = reinterpret_cast<ValueType*>(ptr);
store_relaxed_shared(real_ptr, value.real());
store_relaxed_shared(real_ptr + 1, value.imag());
}


Expand Down

0 comments on commit c0027e6

Please sign in to comment.