GPU: Switch integer types to <cstdint> types

AliceO2Group · Oct 7, 2024 · 42247aa · 42247aa
1 parent 40f15f3
commit 42247aa
Show file tree

Hide file tree

Showing 419 changed files with 9,275 additions and 9,264 deletions.
diff --git a/DataFormats/Detectors/TRD/include/DataFormatsTRD/RecoInputContainer.h b/DataFormats/Detectors/TRD/include/DataFormatsTRD/RecoInputContainer.h
@@ -88,7 +88,7 @@ inline void RecoInputContainer::fillGPUIOPtr(o2::gpu::GPUTrackingInOutPointers*
   ptrs->nTRDTriggerRecords = mNTriggerRecords;
   ptrs->trdTriggerTimes = &(trdTriggerTimes[0]);
   ptrs->trdTrackletIdxFirst = &(trdTriggerIndices[0]);
-  ptrs->trdTrigRecMask = reinterpret_cast<const char*>(mTrigRecMask.data());
+  ptrs->trdTrigRecMask = reinterpret_cast<const uint8_t*>(mTrigRecMask.data());
   ptrs->nTRDTracklets = mNTracklets;
   ptrs->trdTracklets = reinterpret_cast<const o2::gpu::GPUTRDTrackletWord*>(mTracklets.data());
   ptrs->trdSpacePoints = reinterpret_cast<const o2::gpu::GPUTRDSpacePoint*>(mSpacePoints.data());

diff --git a/Detectors/TPC/workflow/src/ZSSpec.cxx b/Detectors/TPC/workflow/src/ZSSpec.cxx
@@ -66,7 +66,7 @@ DataProcessorSpec getZSEncoderSpec(std::vector<int> const& tpcSectors, bool outR
   using DigitArray = std::array<gsl::span<const o2::tpc::Digit>, NSectors>;
 
   struct ProcessAttributes {
-    std::unique_ptr<unsigned long long int[]> zsoutput;
+    std::unique_ptr<unsigned long[]> zsoutput;
     std::unique_ptr<IonTailCorrection> itcorr;
     std::vector<unsigned int> sizes;
     std::vector<int> tpcSectors;
@@ -216,7 +216,7 @@ DataProcessorSpec getZStoDigitsSpec(std::vector<int> const& tpcSectors)
 
   struct ProcessAttributes {
     std::array<std::vector<Digit>, NSectors> outDigits;
-    std::unique_ptr<unsigned long long int[]> zsinput;
+    std::unique_ptr<unsigned long[]> zsinput;
     std::vector<unsigned int> sizes;
     std::unique_ptr<o2::tpc::ZeroSuppress> decoder;
     std::vector<int> tpcSectors;

diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h
@@ -157,7 +157,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l, Cmp cmp) noexcept
   if (f == l) {
     return;
   }
-  using IndexType = unsigned short;
+  using IndexType = uint16_t;
 
   struct pair {
     IndexType first;
@@ -166,7 +166,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l, Cmp cmp) noexcept
 
   struct Stack {
     pair data[11];
-    unsigned char n{0};
+    uint8_t n{0};
 
     GPUd() void emplace(IndexType x, IndexType y)
     {
@@ -295,12 +295,12 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp)
 #ifndef GPUCA_GPUCODE
   GPUCommonAlgorithm::sort(begin, end, comp);
 #else
-  int n = end - begin;
-  for (int i = 0; i < n; i++) {
-    for (int tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) {
-      int offset = i % 2;
-      int curPos = 2 * tIdx + offset;
-      int nextPos = curPos + 1;
+  int32_t n = end - begin;
+  for (int32_t i = 0; i < n; i++) {
+    for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) {
+      int32_t offset = i % 2;
+      int32_t curPos = 2 * tIdx + offset;
+      int32_t nextPos = curPos + 1;
 
       if (nextPos < n) {
         if (!comp(begin[curPos], begin[nextPos])) {
@@ -363,9 +363,9 @@ GPUdi() T work_group_scan_inclusive_add_FUNC(T v, S& smem)
 
 #define work_group_broadcast(v, i) work_group_broadcast_FUNC(v, i, smem)
 template <class T, class S>
-GPUdi() T work_group_broadcast_FUNC(T v, int i, S& smem)
+GPUdi() T work_group_broadcast_FUNC(T v, int32_t i, S& smem)
 {
-  if ((int)threadIdx.x == i) {
+  if ((int32_t)threadIdx.x == i) {
     smem.tmpBroadcast = v;
   }
   __syncthreads();
@@ -394,7 +394,7 @@ GPUdi() T warp_scan_inclusive_add_FUNC(T v, S& smem)
 
 #define warp_broadcast(v, i) warp_broadcast_FUNC(v, i)
 template <class T>
-GPUdi() T warp_broadcast_FUNC(T v, int i)
+GPUdi() T warp_broadcast_FUNC(T v, int32_t i)
 {
 #ifdef __CUDACC__
   return __shfl_sync(0xFFFFFFFF, v, i);
@@ -419,7 +419,7 @@ GPUdi() T work_group_reduce_add(T v)
 }
 
 template <class T>
-GPUdi() T work_group_broadcast(T v, int i)
+GPUdi() T work_group_broadcast(T v, int32_t i)
 {
   return v;
 }
@@ -431,7 +431,7 @@ GPUdi() T warp_scan_inclusive_add(T v)
 }
 
 template <class T>
-GPUdi() T warp_broadcast(T v, int i)
+GPUdi() T warp_broadcast(T v, int32_t i)
 {
   return v;
 }

diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h
@@ -20,6 +20,10 @@
   #error Please include GPUCommonDef.h!
 #endif
 
+#ifndef GPUCA_GPUCODE_DEVICE
+#include <cstdint>
+#endif
+
 //Define macros for GPU keywords. i-version defines inline functions.
 //All host-functions in GPU code are automatically inlined, to avoid duplicate symbols.
 //For non-inline host only functions, use no keyword at all!
@@ -54,21 +58,21 @@
   #define GPUconstantref()                          // reference / ptr to constant memory
   #define GPUconstexprref()                         // reference / ptr to variable declared as GPUconstexpr()
 
-  #ifndef __VECTOR_TYPES_H__ // ROOT will pull in these CUDA definitions if built against CUDA, so we have to add an ugly protection here
+  #ifndef __VECTOR_TYPES_H__ // FIXME: ROOT will pull in these CUDA definitions if built against CUDA, so we have to add an ugly protection here
     struct float4 { float x, y, z, w; };
     struct float3 { float x, y, z; };
     struct float2 { float x; float y; };
-    struct uchar2 { unsigned char x, y; };
-    struct short2 { short x, y; };
-    struct ushort2 { unsigned short x, y; };
-    struct int2 { int x, y; };
-    struct int3 { int x, y, z; };
-    struct int4 { int x, y, z, w; };
-    struct uint1 { unsigned int x; };
-    struct uint2 { unsigned int x, y; };
-    struct uint3 { unsigned int x, y, z; };
-    struct uint4 { unsigned int x, y, z, w; };
-    struct dim3 { unsigned int x, y, z; };
+    struct uchar2 { uint8_t x, y; };
+    struct short2 { int16_t x, y; };
+    struct ushort2 { uint16_t x, y; };
+    struct int2 { int32_t x, y; };
+    struct int3 { int32_t x, y, z; };
+    struct int4 { int32_t x, y, z, w; };
+    struct uint1 { uint32_t x; };
+    struct uint2 { uint32_t x, y; };
+    struct uint3 { uint32_t x, y, z; };
+    struct uint4 { uint32_t x, y, z, w; };
+    struct dim3 { uint32_t x, y, z; };
   #endif
 #elif defined(__OPENCL__) // Defines for OpenCL
   #define GPUd()
@@ -95,15 +99,15 @@
     #define GPUbarrier() work_group_barrier(mem_fence::global | mem_fence::local);
     #define GPUbarrierWarp()
     #define GPUAtomic(type) atomic<type>
-    static_assert(sizeof(atomic<unsigned int>) == sizeof(unsigned int), "Invalid size of atomic type");
+    static_assert(sizeof(atomic<uint32_t>) == sizeof(uint32_t), "Invalid size of atomic type");
   #else
     #define GPUbarrier() barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)
     #define GPUbarrierWarp()
     #if defined(__OPENCLCPP__) && defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS)
       namespace GPUCA_NAMESPACE { namespace gpu {
       template <class T> struct oclAtomic;
-      template <> struct oclAtomic<unsigned int> {typedef atomic_uint t;};
-      static_assert(sizeof(oclAtomic<unsigned int>::t) == sizeof(unsigned int), "Invalid size of atomic type");
+      template <> struct oclAtomic<uint32_t> {typedef atomic_uint t;};
+      static_assert(sizeof(oclAtomic<uint32_t>::t) == sizeof(uint32_t), "Invalid size of atomic type");
       }}
       #define GPUAtomic(type) GPUCA_NAMESPACE::gpu::oclAtomic<type>::t
     #else

diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h
@@ -27,6 +27,10 @@
 #include <atomic>
 #endif
 
+#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__)
+#include <cstdint>
+#endif
+
 #if !defined(__OPENCL__) || defined(__OPENCLCPP__)
 namespace GPUCA_NAMESPACE
 {
@@ -75,13 +79,13 @@ class GPUCommonMath
   GPUd() static CONSTEXPR float Pi() { return 3.1415927f; }
   GPUd() static float Round(float x);
   GPUd() static float Floor(float x);
-  GPUd() static unsigned int Float2UIntReint(const float& x);
-  GPUd() static unsigned int Float2UIntRn(float x);
-  GPUd() static int Float2IntRn(float x);
+  GPUd() static uint32_t Float2UIntReint(const float& x);
+  GPUd() static uint32_t Float2UIntRn(float x);
+  GPUd() static int32_t Float2IntRn(float x);
   GPUd() static float Modf(float x, float y);
   GPUd() static bool Finite(float x);
-  GPUd() static unsigned int Clz(unsigned int val);
-  GPUd() static unsigned int Popcount(unsigned int val);
+  GPUd() static uint32_t Clz(uint32_t val);
+  GPUd() static uint32_t Popcount(uint32_t val);
 
   GPUhdni() static float Hypot(float x, float y);
   GPUhdni() static float Hypot(float x, float y, float z);
@@ -137,10 +141,10 @@ class GPUCommonMath
   {
     GPUCommonMath::AtomicMinInternal(addr, val);
   }
-  GPUd() static int Mul24(int a, int b);
+  GPUd() static int32_t Mul24(int32_t a, int32_t b);
   GPUd() static float FMulRZ(float a, float b);
 
-  template <int I, class T>
+  template <int32_t I, class T>
   GPUd() CONSTEXPR static T nextMultipleOf(T val);
 
 #ifdef GPUCA_NOCOMPAT
@@ -163,11 +167,11 @@ class GPUCommonMath
 
  private:
   template <class S, class T>
-  GPUd() static unsigned int AtomicExchInternal(S* addr, T val);
+  GPUd() static uint32_t AtomicExchInternal(S* addr, T val);
   template <class S, class T>
   GPUd() static bool AtomicCASInternal(S* addr, T cmp, T val);
   template <class S, class T>
-  GPUd() static unsigned int AtomicAddInternal(S* addr, T val);
+  GPUd() static uint32_t AtomicAddInternal(S* addr, T val);
   template <class S, class T>
   GPUd() static void AtomicMaxInternal(S* addr, T val);
   template <class S, class T>
@@ -185,7 +189,7 @@ typedef GPUCommonMath CAMath;
     #define CHOICE(c1, c2, c3) (c1) // Select first option for Host
 #endif // clang-format on
 
-template <int I, class T>
+template <int32_t I, class T>
 GPUdi() CONSTEXPR T GPUCommonMath::nextMultipleOf(T val)
 {
   if CONSTEXPR (I & (I - 1)) {
@@ -212,23 +216,23 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y)
 
 GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); }
 
-GPUdi() unsigned int GPUCommonMath::Float2UIntReint(const float& x)
+GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x)
 {
 #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__))
   return __float_as_uint(x);
 #elif defined(GPUCA_GPUCODE_DEVICE) && (defined(__OPENCL__) || defined(__OPENCLCPP__))
   return as_uint(x);
 #else
-  return reinterpret_cast<const unsigned int&>(x);
+  return reinterpret_cast<const uint32_t&>(x);
 #endif
 }
 
-GPUdi() unsigned int GPUCommonMath::Float2UIntRn(float x) { return (unsigned int)(int)(x + 0.5f); }
+GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); }
 GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); }
 
 #ifdef GPUCA_NO_FAST_MATH
 GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); }
-GPUdi() int GPUCommonMath::Float2IntRn(float x) { return (int)Round(x); }
+GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); }
 GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); }
 GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); }
 GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); }
@@ -243,7 +247,7 @@ GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x),
 GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); }
 #else
 GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); }
-GPUdi() int GPUCommonMath::Float2IntRn(float x) { return CHOICE((int)Round(x), __float2int_rn(x), (int)Round(x)); }
+GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); }
 GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); }
 GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); }
 GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); }
@@ -283,12 +287,12 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c)
 #endif
 }
 
-GPUdi() unsigned int GPUCommonMath::Clz(unsigned int x)
+GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x)
 {
 #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) || defined(__OPENCLCPP__))
   return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available
 #else
-  for (int i = 31; i >= 0; i--) {
+  for (int32_t i = 31; i >= 0; i--) {
     if (x & (1u << i)) {
       return (31 - i);
     }
@@ -297,7 +301,7 @@ GPUdi() unsigned int GPUCommonMath::Clz(unsigned int x)
 #endif
 }
 
-GPUdi() unsigned int GPUCommonMath::Popcount(unsigned int x)
+GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x)
 {
 #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && (!defined(__OPENCL__) /*|| defined(__OPENCLCPP__)*/) // TODO: remove OPENCLCPP workaround when reported SPIR-V bug is fixed
   // use builtin if available
@@ -404,7 +408,7 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x)
 #else
   union {
     float f;
-    int i;
+    int32_t i;
   } x = {_x};
   const float xhalf = 0.5f * x.f;
   x.i = 0x5f3759df - (x.i >> 1);
@@ -428,7 +432,7 @@ GPUhdi() double GPUCommonMath::Abs<double>(double x)
 #endif
 
 template <>
-GPUhdi() int GPUCommonMath::Abs<int>(int x)
+GPUhdi() int32_t GPUCommonMath::Abs<int32_t>(int32_t x)
 {
   return CHOICE(abs(x), abs(x), abs(x));
 }
@@ -448,7 +452,7 @@ GPUhdi() float GPUCommonMath::Copysign(float x, float y)
 }
 
 template <class S, class T>
-GPUdi() unsigned int GPUCommonMath::AtomicExchInternal(S* addr, T val)
+GPUdi() uint32_t GPUCommonMath::AtomicExchInternal(S* addr, T val)
 {
 #if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
   return ::atomic_exchange(addr, val);
@@ -457,7 +461,7 @@ GPUdi() unsigned int GPUCommonMath::AtomicExchInternal(S* addr, T val)
 #elif defined(GPUCA_GPUCODE) && (defined(__CUDACC__) || defined(__HIPCC__))
   return ::atomicExch(addr, val);
 #elif defined(WITH_OPENMP)
-  unsigned int old;
+  uint32_t old;
   __atomic_exchange(addr, &val, &old, __ATOMIC_SEQ_CST);
   return old;
 #else
@@ -482,7 +486,7 @@ GPUdi() bool GPUCommonMath::AtomicCASInternal(S* addr, T cmp, T val)
 }
 
 template <class S, class T>
-GPUdi() unsigned int GPUCommonMath::AtomicAddInternal(S* addr, T val)
+GPUdi() uint32_t GPUCommonMath::AtomicAddInternal(S* addr, T val)
 {
 #if defined(GPUCA_GPUCODE) && defined(__OPENCLCPP__) && (!defined(__clang__) || defined(GPUCA_OPENCL_CPP_CLANG_C11_ATOMICS))
   return ::atomic_fetch_add(addr, val);
@@ -538,9 +542,9 @@ GPUdii() void GPUCommonMath::AtomicMaxInternal(GPUglobalref() GPUgeneric() GPUAt
     val = 0.f;
   }
   if (val >= 0) {
-    AtomicMaxInternal((GPUAtomic(int)*)addr, __float_as_int(val));
+    AtomicMaxInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
   } else {
-    AtomicMinInternal((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
+    AtomicMinInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
   }
 }
 template <>
@@ -550,9 +554,9 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt
     val = 0.f;
   }
   if (val >= 0) {
-    AtomicMinInternal((GPUAtomic(int)*)addr, __float_as_int(val));
+    AtomicMinInternal((GPUAtomic(int32_t)*)addr, __float_as_int(val));
   } else {
-    AtomicMaxInternal((GPUAtomic(unsigned int)*)addr, __float_as_uint(val));
+    AtomicMaxInternal((GPUAtomic(uint32_t)*)addr, __float_as_uint(val));
   }
 }
 #endif

diff --git a/GPU/Common/GPUCommonRtypes.h b/GPU/Common/GPUCommonRtypes.h
@@ -26,8 +26,8 @@
     #define ClassImp(name)
     #define templateClassImp(name)
     #ifndef GPUCA_GPUCODE_DEVICE
-//      typedef unsigned long long ULong64_t;
-//      typedef unsigned int UInt_t;
+//      typedef uint64_t ULong64_t;
+//      typedef uint32_t UInt_t;
       #include <iostream>
     #endif
   #endif

diff --git a/GPU/Common/GPUCommonTransform3D.h b/GPU/Common/GPUCommonTransform3D.h
@@ -27,7 +27,7 @@ class Transform3D
   Transform3D() = default;
   Transform3D(float* v)
   {
-    for (int i = 0; i < 12; i++) {
+    for (int32_t i = 0; i < 12; i++) {
       m[i] = v[i];
     }
   }