Skip to content

Commit

Permalink
GPU: Get rid of all long long types
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr committed Oct 7, 2024
1 parent 9e6bd6d commit 40f15f3
Show file tree
Hide file tree
Showing 44 changed files with 158 additions and 158 deletions.
4 changes: 2 additions & 2 deletions GPU/Common/GPUCommonRtypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#define ClassImp(name)
#define templateClassImp(name)
#ifndef GPUCA_GPUCODE_DEVICE
typedef unsigned long long int ULong64_t;
typedef unsigned int UInt_t;
// typedef unsigned long long ULong64_t;
// typedef unsigned int UInt_t;
#include <iostream>
#endif
#endif
Expand Down
18 changes: 9 additions & 9 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ constexpr const char* const GPUReconstruction::GEOMETRY_TYPE_NAMES[];
constexpr const char* const GPUReconstruction::IOTYPENAMES[];
constexpr GPUReconstruction::GeometryType GPUReconstruction::geometryType;

static long long int ptrDiff(void* a, void* b) { return (long long int)((char*)a - (char*)b); }
static long ptrDiff(void* a, void* b) { return (long)((char*)a - (char*)b); }

GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg)
{
Expand Down Expand Up @@ -600,7 +600,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
if (res->mReuse >= 0) {
if (res->mSize > mMemoryResources[res->mReuse].mSize) {
GPUError("Invalid reuse, insufficient size: %lld < %lld", (long long int)mMemoryResources[res->mReuse].mSize, (long long int)res->mSize);
GPUError("Invalid reuse, insufficient size: %ld < %ld", (long)mMemoryResources[res->mReuse].mSize, (long)res->mSize);
throw std::bad_alloc();
}
res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice;
Expand Down Expand Up @@ -834,9 +834,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, unsigned long tag)
}
if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) {
if (IsGPU()) {
printf("Allocated Device memory after %30s (%8s): %'13lld (non temporary %'13lld, blocked %'13lld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked == nullptr ? 0ll : ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked));
printf("Allocated Device memory after %30s (%8s): %'13ld (non temporary %'13ld, blocked %'13ld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked == nullptr ? 0l : ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked));
}
printf("Allocated Host memory after %30s (%8s): %'13lld (non temporary %'13lld, blocked %'13lld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked == nullptr ? 0ll : ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked));
printf("Allocated Host memory after %30s (%8s): %'13ld (non temporary %'13ld, blocked %'13ld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked == nullptr ? 0l : ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked));
printf("%16s", "");
PrintMemoryMax();
}
Expand Down Expand Up @@ -904,15 +904,15 @@ void GPUReconstruction::UpdateMaxMemoryUsed()

void GPUReconstruction::PrintMemoryMax()
{
printf("Maximum Memory Allocation: Host %'lld / Device %'lld\n", (long long int)mHostMemoryUsedMax, (long long int)mDeviceMemoryUsedMax);
printf("Maximum Memory Allocation: Host %'ld / Device %'ld\n", (long)mHostMemoryUsedMax, (long)mDeviceMemoryUsedMax);
}

void GPUReconstruction::PrintMemoryOverview()
{
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
printf("Memory Allocation: Host %'lld / %'lld (Permanent %'lld), Device %'lld / %'lld, (Permanent %'lld) %d chunks\n",
ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), (long long int)mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase),
ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), (long long int)mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), (int)mMemoryResources.size());
printf("Memory Allocation: Host %'ld / %'ld (Permanent %'ld), Device %'ld / %'ld, (Permanent %'ld) %d chunks\n",
ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), (long)mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase),
ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), (long)mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), (int)mMemoryResources.size());
}
}

Expand All @@ -937,7 +937,7 @@ void GPUReconstruction::PrintMemoryStatistics()
}
printf("%59s CPU / %9s GPU\n", "", "");
for (auto it = sizes.begin(); it != sizes.end(); it++) {
printf("Allocation %30s %s: Size %'14lld / %'14lld\n", it->first.c_str(), it->second[2] ? "P" : " ", (long long int)it->second[0], (long long int)it->second[1]);
printf("Allocation %30s %s: Size %'14ld / %'14ld\n", it->first.c_str(), it->second[2] ? "P" : " ", (long)it->second[0], (long)it->second[1]);
}
PrintMemoryOverview();
for (unsigned int i = 0; i < mChains.size(); i++) {
Expand Down
16 changes: 8 additions & 8 deletions GPU/GPUTracking/Base/GPUReconstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std
retVal = u.get();
if (mProcessingSettings.registerStandaloneInputMemory) {
if (registerMemoryForGPU(u.get(), n * sizeof(T))) {
GPUError("Error registering memory for GPU: %p - %lld bytes\n", (void*)u.get(), (long long int)(n * sizeof(T)));
GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (long)(n * sizeof(T)));
throw std::bad_alloc();
}
}
Expand Down Expand Up @@ -505,7 +505,7 @@ inline unsigned int GPUReconstruction::DumpData(FILE* fp, const T* const* entrie
}
}
if (mProcessingSettings.debugLevel >= 2) {
GPUInfo("Dumped %lld %s", (long long int)numTotal, IOTYPENAMES[type]);
GPUInfo("Dumped %ld %s", (long)numTotal, IOTYPENAMES[type]);
}
return numTotal;
}
Expand Down Expand Up @@ -539,7 +539,7 @@ inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, s
}
(void)r;
if (mProcessingSettings.debugLevel >= 2) {
GPUInfo("Read %lld %s", (long long int)numTotal, IOTYPENAMES[type]);
GPUInfo("Read %ld %s", (long)numTotal, IOTYPENAMES[type]);
}
return numTotal;
}
Expand Down Expand Up @@ -569,7 +569,7 @@ inline std::unique_ptr<T> GPUReconstruction::ReadFlatObjectFromFile(const char*
r = fread(size, sizeof(size[0]), 2, fp);
if (r == 0 || size[0] != sizeof(T)) {
fclose(fp);
GPUError("ERROR reading %s, invalid size: %lld (%lld expected)", file, (long long int)size[0], (long long int)sizeof(T));
GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (long)size[0], (long)sizeof(T));
throw std::runtime_error("invalid size");
}
std::unique_ptr<T> retVal(new T);
Expand All @@ -579,7 +579,7 @@ inline std::unique_ptr<T> GPUReconstruction::ReadFlatObjectFromFile(const char*
r = fread(buf, 1, size[1], fp);
fclose(fp);
if (mProcessingSettings.debugLevel >= 2) {
GPUInfo("Read %lld bytes from %s", (long long int)r, file);
GPUInfo("Read %ld bytes from %s", (long)r, file);
}
retVal->clearInternalBufferPtr();
retVal->setActualBufferAddress(buf);
Expand Down Expand Up @@ -611,14 +611,14 @@ inline std::unique_ptr<T> GPUReconstruction::ReadStructFromFile(const char* file
r = fread(&size, sizeof(size), 1, fp);
if (r == 0 || size != sizeof(T)) {
fclose(fp);
GPUError("ERROR reading %s, invalid size: %lld (%lld expected)", file, (long long int)size, (long long int)sizeof(T));
GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (long)size, (long)sizeof(T));
throw std::runtime_error("invalid size");
}
std::unique_ptr<T> newObj(new T);
r = fread(newObj.get(), 1, size, fp);
fclose(fp);
if (mProcessingSettings.debugLevel >= 2) {
GPUInfo("Read %lld bytes from %s", (long long int)r, file);
GPUInfo("Read %ld bytes from %s", (long)r, file);
}
return newObj;
}
Expand All @@ -639,7 +639,7 @@ inline int GPUReconstruction::ReadStructFromFile(const char* file, T* obj)
r = fread(obj, 1, size, fp);
fclose(fp);
if (mProcessingSettings.debugLevel >= 2) {
GPUInfo("Read %lld bytes from %s", (long long int)r, file);
GPUInfo("Read %ld bytes from %s", (long)r, file);
}
return 0;
}
Expand Down
20 changes: 10 additions & 10 deletions GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ namespace // anonymous

// ------------------------------------------------- TPC ZS General -------------------------------------------------

typedef std::array<long long int, TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(long long int)> zsPage;
typedef std::array<long, TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(long)> zsPage;

struct zsEncoder {
int curRegion = 0, outputRegion = 0;
Expand All @@ -219,7 +219,7 @@ struct zsEncoder {
unsigned int pageCounter = 0;
void ZSfillEmpty(void* ptr, int shift, unsigned int feeId, int orbit, int linkid);
static void ZSstreamOut(unsigned short* bufIn, unsigned int& lenIn, unsigned char* bufOut, unsigned int& lenOut, unsigned int nBits);
long int getHbf(long int timestamp) { return (timestamp * LHCBCPERTIMEBIN + bcShiftInFirstHBF) / o2::constants::lhc::LHCMaxBunches; }
long getHbf(long timestamp) { return (timestamp * LHCBCPERTIMEBIN + bcShiftInFirstHBF) / o2::constants::lhc::LHCMaxBunches; }
};

inline void zsEncoder::ZSfillEmpty(void* ptr, int shift, unsigned int feeId, int orbit, int linkid)
Expand Down Expand Up @@ -1325,7 +1325,7 @@ size_t zsEncoderRun<T>::compare(std::vector<zsPage>* buffer, std::vector<o2::tpc
#endif // GPUCA_TPC_GEOMETRY_O2

template <class S>
void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<unsigned long long int[]>* outBuffer, unsigned int* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int version, bool verify, float threshold, bool padding, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter)
void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<unsigned long[]>* outBuffer, unsigned int* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int version, bool verify, float threshold, bool padding, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter)
{
// Pass in either outBuffer / outSizes, to fill standalone output buffers, or raw to use RawFileWriter
// ir is the interaction record for time bin 0
Expand Down Expand Up @@ -1394,8 +1394,8 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<unsigne
}

if (outBuffer) {
outBuffer->reset(new unsigned long long int[totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(unsigned long long int)]);
unsigned long long int offset = 0;
outBuffer->reset(new unsigned long[totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(unsigned long)]);
unsigned long offset = 0;
for (unsigned int i = 0; i < NSLICES; i++) {
for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
memcpy((char*)outBuffer->get() + offset, buffer[i][j].data(), buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE);
Expand All @@ -1405,7 +1405,7 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<unsigne
}
}
if (nErrors) {
GPUError("ERROR: %lld INCORRECT SAMPLES DURING ZS ENCODING VERIFICATION!!!", (long long int)nErrors);
GPUError("ERROR: %ld INCORRECT SAMPLES DURING ZS ENCODING VERIFICATION!!!", (long)nErrors);
} else if (verify) {
GPUInfo("ENCODING VERIFICATION PASSED");
}
Expand All @@ -1414,15 +1414,15 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr<unsigne
}

#ifdef GPUCA_HAVE_O2HEADERS
template void GPUReconstructionConvert::RunZSEncoder<GPUTrackingInOutDigits>(const GPUTrackingInOutDigits&, std::unique_ptr<unsigned long long int[]>*, unsigned int*, o2::raw::RawFileWriter*, const o2::InteractionRecord*, const GPUParam&, int, bool, float, bool, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter);
template void GPUReconstructionConvert::RunZSEncoder<GPUTrackingInOutDigits>(const GPUTrackingInOutDigits&, std::unique_ptr<unsigned long[]>*, unsigned int*, o2::raw::RawFileWriter*, const o2::InteractionRecord*, const GPUParam&, int, bool, float, bool, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter);
#ifdef GPUCA_O2_LIB
template void GPUReconstructionConvert::RunZSEncoder<DigitArray>(const DigitArray&, std::unique_ptr<unsigned long long int[]>*, unsigned int*, o2::raw::RawFileWriter*, const o2::InteractionRecord*, const GPUParam&, int, bool, float, bool, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter);
template void GPUReconstructionConvert::RunZSEncoder<DigitArray>(const DigitArray&, std::unique_ptr<unsigned long[]>*, unsigned int*, o2::raw::RawFileWriter*, const o2::InteractionRecord*, const GPUParam&, int, bool, float, bool, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter);
#endif
#endif

void GPUReconstructionConvert::RunZSEncoderCreateMeta(const unsigned long long int* buffer, const unsigned int* sizes, void** ptrs, GPUTrackingInOutZS* out)
void GPUReconstructionConvert::RunZSEncoderCreateMeta(const unsigned long* buffer, const unsigned int* sizes, void** ptrs, GPUTrackingInOutZS* out)
{
unsigned long long int offset = 0;
unsigned long offset = 0;
for (unsigned int i = 0; i < NSLICES; i++) {
for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j] = (char*)buffer + offset;
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstructionConvert.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class GPUReconstructionConvert
static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr<GPUTPCClusterData[]>* clusters, unsigned int* nClusters, const TPCFastTransform* transform, int continuousMaxTimeBin = 0);
static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr<o2::tpc::ClusterNative[]>& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, unsigned int* nRawClusters);
template <class S>
static void RunZSEncoder(const S& in, std::unique_ptr<unsigned long long int[]>* outBuffer, unsigned int* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int version, bool verify, float threshold = 0.f, bool padding = false, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter = nullptr);
static void RunZSEncoderCreateMeta(const unsigned long long int* buffer, const unsigned int* sizes, void** ptrs, GPUTrackingInOutZS* out);
static void RunZSEncoder(const S& in, std::unique_ptr<unsigned long[]>* outBuffer, unsigned int* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int version, bool verify, float threshold = 0.f, bool padding = false, std::function<void(std::vector<o2::tpc::Digit>&)> digitsFilter = nullptr);
static void RunZSEncoderCreateMeta(const unsigned long* buffer, const unsigned int* sizes, void** ptrs, GPUTrackingInOutZS* out);
static void RunZSFilter(std::unique_ptr<o2::tpc::Digit[]>* buffers, const o2::tpc::Digit* const* ptrs, size_t* nsb, const size_t* ns, const GPUParam& param, bool zs12bit, float threshold);
static int GetMaxTimeBin(const o2::tpc::ClusterNativeAccess& native);
static int GetMaxTimeBin(const GPUTrackingInOutDigits& digits);
Expand Down
8 changes: 4 additions & 4 deletions GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,9 @@ int GPUReconstructionTimeframe::LoadCreateTimeFrame(int iEvent)
return (2);
}

long long int nBunch = -DRIFT_TIME / config.bunchSpacing;
long long int lastBunch = config.timeFrameLen / config.bunchSpacing;
long long int lastTFBunch = lastBunch - DRIFT_TIME / config.bunchSpacing;
long nBunch = -DRIFT_TIME / config.bunchSpacing;
long lastBunch = config.timeFrameLen / config.bunchSpacing;
long lastTFBunch = lastBunch - DRIFT_TIME / config.bunchSpacing;
int nCollisions = 0, nBorderCollisions = 0, nTrainCollissions = 0, nMultipleCollisions = 0, nTrainMultipleCollisions = 0;
int nTrain = 0;
int mcMin = -1, mcMax = -1;
Expand Down Expand Up @@ -288,7 +288,7 @@ int GPUReconstructionTimeframe::LoadCreateTimeFrame(int iEvent)
return (1);
}
nTotalClusters += nClusters;
printf("Placing event %4d+%d (ID %4d) at z %7.3f (time %'dns) %s(collisions %4d, bunch %6lld, train %3d) (%'10d clusters, %'10d MC labels, %'10d track MC info)\n", nCollisions, nBorderCollisions, useEvent, shift, (int)(nBunch * config.bunchSpacing), inTF ? " inside" : "outside",
printf("Placing event %4d+%d (ID %4d) at z %7.3f (time %'dns) %s(collisions %4d, bunch %6ld, train %3d) (%'10d clusters, %'10d MC labels, %'10d track MC info)\n", nCollisions, nBorderCollisions, useEvent, shift, (int)(nBunch * config.bunchSpacing), inTF ? " inside" : "outside",
nCollisions, nBunch, nTrain, nClusters, mChain->mIOPtrs.nMCLabelsTPC, mChain->mIOPtrs.nMCInfosTPC);
nInBunchPileUp++;
nCollisionsInTrain++;
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstructionTimeframe.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class GPUReconstructionTimeframe
int mNEventsInDirectory;

std::uniform_real_distribution<double> mDisUniReal;
std::uniform_int_distribution<unsigned long long int> mDisUniInt;
std::uniform_int_distribution<unsigned long> mDisUniInt;
std::mt19937_64 mRndGen1;
std::mt19937_64 mRndGen2;

Expand All @@ -68,7 +68,7 @@ class GPUReconstructionTimeframe

int mNTotalCollisions = 0;

long long int mEventStride;
long mEventStride;
int mSimBunchNoRepeatEvent;
std::vector<char> mEventUsed;
std::vector<std::tuple<GPUTrackingInOutPointers, GPUChainTracking::InOutMemory, o2::tpc::ClusterNativeAccess>> mShiftedEvents;
Expand Down
Loading

0 comments on commit 40f15f3

Please sign in to comment.