Skip to content

Commit

Permalink
GPU: Switch integer types to <cstdint> types
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr committed Oct 7, 2024
1 parent 40f15f3 commit 42247aa
Show file tree
Hide file tree
Showing 419 changed files with 9,275 additions and 9,264 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ inline void RecoInputContainer::fillGPUIOPtr(o2::gpu::GPUTrackingInOutPointers*
ptrs->nTRDTriggerRecords = mNTriggerRecords;
ptrs->trdTriggerTimes = &(trdTriggerTimes[0]);
ptrs->trdTrackletIdxFirst = &(trdTriggerIndices[0]);
ptrs->trdTrigRecMask = reinterpret_cast<const char*>(mTrigRecMask.data());
ptrs->trdTrigRecMask = reinterpret_cast<const uint8_t*>(mTrigRecMask.data());
ptrs->nTRDTracklets = mNTracklets;
ptrs->trdTracklets = reinterpret_cast<const o2::gpu::GPUTRDTrackletWord*>(mTracklets.data());
ptrs->trdSpacePoints = reinterpret_cast<const o2::gpu::GPUTRDSpacePoint*>(mSpacePoints.data());
Expand Down
4 changes: 2 additions & 2 deletions Detectors/TPC/workflow/src/ZSSpec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ DataProcessorSpec getZSEncoderSpec(std::vector<int> const& tpcSectors, bool outR
using DigitArray = std::array<gsl::span<const o2::tpc::Digit>, NSectors>;

struct ProcessAttributes {
std::unique_ptr<unsigned long long int[]> zsoutput;
std::unique_ptr<unsigned long[]> zsoutput;
std::unique_ptr<IonTailCorrection> itcorr;
std::vector<unsigned int> sizes;
std::vector<int> tpcSectors;
Expand Down Expand Up @@ -216,7 +216,7 @@ DataProcessorSpec getZStoDigitsSpec(std::vector<int> const& tpcSectors)

struct ProcessAttributes {
std::array<std::vector<Digit>, NSectors> outDigits;
std::unique_ptr<unsigned long long int[]> zsinput;
std::unique_ptr<unsigned long[]> zsinput;
std::vector<unsigned int> sizes;
std::unique_ptr<o2::tpc::ZeroSuppress> decoder;
std::vector<int> tpcSectors;
Expand Down
26 changes: 13 additions & 13 deletions GPU/Common/GPUCommonAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l, Cmp cmp) noexcept
if (f == l) {
return;
}
using IndexType = unsigned short;
using IndexType = uint16_t;

struct pair {
IndexType first;
Expand All @@ -166,7 +166,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l, Cmp cmp) noexcept

struct Stack {
pair data[11];
unsigned char n{0};
uint8_t n{0};

GPUd() void emplace(IndexType x, IndexType y)
{
Expand Down Expand Up @@ -295,12 +295,12 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp)
#ifndef GPUCA_GPUCODE
GPUCommonAlgorithm::sort(begin, end, comp);
#else
int n = end - begin;
for (int i = 0; i < n; i++) {
for (int tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) {
int offset = i % 2;
int curPos = 2 * tIdx + offset;
int nextPos = curPos + 1;
int32_t n = end - begin;
for (int32_t i = 0; i < n; i++) {
for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) {
int32_t offset = i % 2;
int32_t curPos = 2 * tIdx + offset;
int32_t nextPos = curPos + 1;

if (nextPos < n) {
if (!comp(begin[curPos], begin[nextPos])) {
Expand Down Expand Up @@ -363,9 +363,9 @@ GPUdi() T work_group_scan_inclusive_add_FUNC(T v, S& smem)

#define work_group_broadcast(v, i) work_group_broadcast_FUNC(v, i, smem)
template <class T, class S>
GPUdi() T work_group_broadcast_FUNC(T v, int i, S& smem)
GPUdi() T work_group_broadcast_FUNC(T v, int32_t i, S& smem)
{
if ((int)threadIdx.x == i) {
if ((int32_t)threadIdx.x == i) {
smem.tmpBroadcast = v;
}
__syncthreads();
Expand Down Expand Up @@ -394,7 +394,7 @@ GPUdi() T warp_scan_inclusive_add_FUNC(T v, S& smem)

#define warp_broadcast(v, i) warp_broadcast_FUNC(v, i)
template <class T>
GPUdi() T warp_broadcast_FUNC(T v, int i)
GPUdi() T warp_broadcast_FUNC(T v, int32_t i)
{
#ifdef __CUDACC__
return __shfl_sync(0xFFFFFFFF, v, i);
Expand All @@ -419,7 +419,7 @@ GPUdi() T work_group_reduce_add(T v)
}

template <class T>
GPUdi() T work_group_broadcast(T v, int i)
GPUdi() T work_group_broadcast(T v, int32_t i)
{
return v;
}
Expand All @@ -431,7 +431,7 @@ GPUdi() T warp_scan_inclusive_add(T v)
}

template <class T>
GPUdi() T warp_broadcast(T v, int i)
GPUdi() T warp_broadcast(T v, int32_t i)
{
return v;
}
Expand Down
34 changes: 19 additions & 15 deletions GPU/Common/GPUCommonDefAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#error Please include GPUCommonDef.h!
#endif

#ifndef GPUCA_GPUCODE_DEVICE
#include <cstdint>
#endif

//Define macros for GPU keywords. i-version defines inline functions.
//All host-functions in GPU code are automatically inlined, to avoid duplicate symbols.
//For non-inline host only functions, use no keyword at all!
Expand Down Expand Up @@ -54,21 +58,21 @@
#define GPUconstantref() // reference / ptr to constant memory
#define GPUconstexprref() // reference / ptr to variable declared as GPUconstexpr()

#ifndef __VECTOR_TYPES_H__ // ROOT will pull in these CUDA definitions if built against CUDA, so we have to add an ugly protection here
#ifndef __VECTOR_TYPES_H__ // FIXME: ROOT will pull in these CUDA definitions if built against CUDA, so we have to add an ugly protection here
struct float4 { float x, y, z, w; };
struct float3 { float x, y, z; };
struct float2 { float x; float y; };
struct uchar2 { unsigned char x, y; };
struct short2 { short x, y; };
struct ushort2 { unsigned short x, y; };
struct int2 { int x, y; };
struct int3 { int x, y, z; };
struct int4 { int x, y, z, w; };
struct uint1 { unsigned int x; };
struct uint2 { unsigned int x, y; };
struct uint3 { unsigned int x, y, z; };
struct uint4 { unsigned int x, y, z, w; };
struct dim3 { unsigned int x, y, z; };
struct uchar2 { uint8_t x, y; };
struct short2 { int16_t x, y; };
struct ushort2 { uint16_t x, y; };
struct int2 { int32_t x, y; };
struct int3 { int32_t x, y, z; };
struct int4 { int32_t x, y, z, w; };
struct uint1 { uint32_t x; };
struct uint2 { uint32_t x, y; };
struct uint3 { uint32_t x, y, z; };
struct uint4 { uint32_t x, y, z, w; };
struct dim3 { uint32_t x, y, z; };
#endif
#elif defined(__OPENCL__) // Defines for OpenCL
#define GPUd()
Expand All @@ -95,15 +99,15 @@
#define GPUbarrier() work_group_barrier(mem_fence::global | mem_fence::local);
#define GPUbarrierWarp()
#define GPUAtomic(type) atomic<type>
static_assert(sizeof(atomic<unsigned int>) == sizeof(unsigned int), "Invalid size of atomic type");
static_assert(sizeof(atomic<uint32_t>) == sizeof(uint32_t), "Invalid size of atomic type");
#else
#define GPUbarrier() barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)
#define GPUbarrierWarp()
#if defined(__OPENCLCPP__) && defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)
namespace GPUCA_NAMESPACE { namespace gpu {
template <class T> struct oclAtomic;
template <> struct oclAtomic<unsigned int> {typedef atomic_uint t;};
static_assert(sizeof(oclAtomic<unsigned int>::t) == sizeof(unsigned int), "Invalid size of atomic type");
template <> struct oclAtomic<uint32_t> {typedef atomic_uint t;};
static_assert(sizeof(oclAtomic<uint32_t>::t) == sizeof(uint32_t), "Invalid size of atomic type");
}}
#define GPUAtomic(type) GPUCA_NAMESPACE::gpu::oclAtomic<type>::t
#else
Expand Down
58 changes: 31 additions & 27 deletions GPU/Common/GPUCommonMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#include <atomic>
#endif

#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__)
#include <cstdint>
#endif

#if !defined(__OPENCL__) || defined(__OPENCLCPP__)
namespace GPUCA_NAMESPACE
{
Expand Down Expand Up @@ -75,13 +79,13 @@ class GPUCommonMath
GPUd() static CONSTEXPR float Pi() { return 3.1415927f; }
GPUd() static float Round(float x);
GPUd() static float Floor(float x);
GPUd() static unsigned int Float2UIntReint(const float& x);
GPUd() static unsigned int Float2UIntRn(float x);
GPUd() static int Float2IntRn(float x);
GPUd() static uint32_t Float2UIntReint(const float& x);
GPUd() static uint32_t Float2UIntRn(float x);
GPUd() static int32_t Float2IntRn(float x);
GPUd() static float Modf(float x, float y);
GPUd() static bool Finite(float x);
GPUd() static unsigned int Clz(unsigned int val);
GPUd() static unsigned int Popcount(unsigned int val);
GPUd() static uint32_t Clz(uint32_t val);
GPUd() static uint32_t Popcount(uint32_t val);

GPUhdni() static float Hypot(float x, float y);
GPUhdni() static float Hypot(float x, float y, float z);
Expand Down Expand Up @@ -137,10 +141,10 @@ class GPUCommonMath
{
GPUCommonMath::AtomicMinInternal(addr, val);
}
GPUd() static int Mul24(int a, int b);
GPUd() static int32_t Mul24(int32_t a, int32_t b);
GPUd() static float FMulRZ(float a, float b);

template <int I, class T>
template <int32_t I, class T>
GPUd() CONSTEXPR static T nextMultipleOf(T val);

#ifdef GPUCA_NOCOMPAT
Expand All @@ -163,11 +167,11 @@ class GPUCommonMath

private:
template <class S, class T>
GPUd() static unsigned int AtomicExchInternal(S* addr, T val);
GPUd() static uint32_t AtomicExchInternal(S* addr, T val);
template <class S, class T>
GPUd() static bool AtomicCASInternal(S* addr, T cmp, T val);
template <class S, class T>
GPUd() static unsigned int AtomicAddInternal(S* addr, T val);
GPUd() static uint32_t AtomicAddInternal(S* addr, T val);
template <class S, class T>
GPUd() static void AtomicMaxInternal(S* addr, T val);
template <class S, class T>
Expand All @@ -185,7 +189,7 @@ typedef GPUCommonMath CAMath;
#define CHOICE(c1, c2, c3) (c1) // Select first option for Host
#endif // clang-format on

template <int I, class T>
template <int32_t I, class T>
GPUdi() CONSTEXPR T GPUCommonMath::nextMultipleOf(T val)
{
if CONSTEXPR (I & (I - 1)) {
Expand All @@ -212,23 +216,23 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)

GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); }

GPUdi() unsigned int GPUCommonMath::Float2UIntReint(const float& x)
GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
{
#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__))
return __float_as_uint(x);
#elif defined(GPUCA_GPUCODE_DEVICE) && (defined(__OPENCL__) || defined(__OPENCLCPP__))
return as_uint(x);
#else
return reinterpret_cast<const unsigned int&>(x);
return reinterpret_cast<const uint32_t&>(x);
#endif
}

GPUdi() unsigned int GPUCommonMath::Float2UIntRn(float x) { return (unsigned int)(int)(x + 0.5f); }
GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); }
GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); }

#ifdef GPUCA_NO_FAST_MATH
GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); }
GPUdi() int GPUCommonMath::Float2IntRn(float x) { return (int)Round(x); }
GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); }
GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); }
Expand All @@ -243,7 +247,7 @@ GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x),
GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
#else
GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); }
GPUdi() int GPUCommonMath::Float2IntRn(float x) { return CHOICE((int)Round(x), __float2int_rn(x), (int)Round(x)); }
GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); }
GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); }
Expand Down Expand Up @@ -283,12 +287,12 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
#endif
}

GPUdi() unsigned int GPUCommonMath::Clz(unsigned int x)
GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x)
{
#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) || defined(__OPENCLCPP__))
return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available
#else
for (int i = 31; i >= 0; i--) {
for (int32_t i = 31; i >= 0; i--) {
if (x & (1u << i)) {
return (31 - i);
}
Expand All @@ -297,7 +301,7 @@ GPUdi() unsigned int GPUCommonMath::Clz(unsigned int x)
#endif
}

GPUdi() unsigned int GPUCommonMath::Popcount(unsigned int x)
GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x)
{
#if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) /*|| defined(__OPENCLCPP__)*/) // TODO: remove OPENCLCPP workaround when reported SPIR-V bug is fixed
// use builtin if available
Expand Down Expand Up @@ -404,7 +408,7 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x)
#else
union {
float f;
int i;
int32_t i;
} x = {_x};
const float xhalf = 0.5f * x.f;
x.i = 0x5f3759df - (x.i >> 1);
Expand All @@ -428,7 +432,7 @@ GPUhdi() double GPUCommonMath::Abs<double>(double x)
#endif

template <>
GPUhdi() int GPUCommonMath::Abs<int>(int x)
GPUhdi() int32_t GPUCommonMath::Abs<int32_t>(int32_t x)
{
return CHOICE(abs(x), abs(x), abs(x));
}
Expand All @@ -448,7 +452,7 @@ GPUhdi() float GPUCommonMath::Copysign(float x, float y)
}

template <class S, class T>
GPUdi() unsigned int GPUCommonMath::AtomicExchInternal(S* addr, T val)
GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val)
{
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
return ::atomic_exchange(addr, val);
Expand All @@ -457,7 +461,7 @@ GPUdi() unsigned int GPUCommonMath::AtomicExchInternal(S* addr, T val)
#elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
return ::atomicExch(addr, val);
#elif defined(WITH_OPENMP)
unsigned int old;
uint32_t old;
__atomic_exchange(addr, &val, &old, __ATOMIC_SEQ_CST);
return old;
#else
Expand All @@ -482,7 +486,7 @@ GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val)
}

template <class S, class T>
GPUdi() unsigned int GPUCommonMath::AtomicAddInternal(S* addr, T val)
GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val)
{
#if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
return ::atomic_fetch_add(addr, val);
Expand Down Expand Up @@ -538,9 +542,9 @@ GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAt
val = 0.f;
}
if (val >= 0) {
AtomicMaxInternal((GPUAtomic(int)*)addr, __float_as_int(val));
AtomicMaxInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
} else {
AtomicMinInternal((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
AtomicMinInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
}
}
template <>
Expand All @@ -550,9 +554,9 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt
val = 0.f;
}
if (val >= 0) {
AtomicMinInternal((GPUAtomic(int)*)addr, __float_as_int(val));
AtomicMinInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
} else {
AtomicMaxInternal((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
AtomicMaxInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
}
}
#endif
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/GPUCommonRtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#define ClassImp(name)
#define templateClassImp(name)
#ifndef GPUCA_GPUCODE_DEVICE
// typedef unsigned long long ULong64_t;
// typedef unsigned int UInt_t;
// typedef uint64_t ULong64_t;
// typedef uint32_t UInt_t;
#include <iostream>
#endif
#endif
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/GPUCommonTransform3D.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Transform3D
Transform3D() = default;
Transform3D(float* v)
{
for (int i = 0; i < 12; i++) {
for (int32_t i = 0; i < 12; i++) {
m[i] = v[i];
}
}
Expand Down
Loading

0 comments on commit 42247aa

Please sign in to comment.