Skip to content

Commit

Permalink
GPU: use ptrDiff function
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr committed Oct 7, 2024
1 parent 42247aa commit f405bdd
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 28 deletions.
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUParam.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ void GPUParam::LoadClusterErrors(bool Print)

void GPUParamRTC::setFrom(const GPUParam& param)
{
memcpy((char*)this, (char*)&param, sizeof(param));
memcpy((void*)this, (void*)&param, sizeof(param));
}

std::string GPUParamRTC::generateRTCCode(const GPUParam& param, bool useConstexpr)
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/Base/GPUParam.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ struct GPUParam : public internal::GPUParam_t<GPUSettingsRec, GPUSettingsParam>
return 0.174533f + par.dAlpha * iSlice;
}
GPUd() float GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const;
GPUd() void GetClusterErrorsSeeding2(char sector, int32_t row, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const;
GPUd() void GetClusterErrorsSeeding2(uint8_t sector, int32_t row, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const;
GPUd() float GetSystematicClusterErrorIFC2(float trackX, float trackY, float z, bool sideC) const;
GPUd() float GetSystematicClusterErrorC122(float trackX, float trackY, char sector) const;
GPUd() float GetSystematicClusterErrorC122(float trackX, float trackY, uint8_t sector) const;

GPUd() float GetClusterError2(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult, float scaledAvgInvCharge, float scaledInvCharge) const;
GPUd() void GetClusterErrors2(char sector, int32_t row, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const;
GPUd() void GetClusterErrors2(uint8_t sector, int32_t row, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const;
GPUd() void UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const;
GPUd() float GetUnscaledMult(float time) const;

Expand Down
10 changes: 5 additions & 5 deletions GPU/GPUTracking/Base/GPUParam.inc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ GPUdi() void MEM_LG(GPUParam)::Global2Slice(int32_t iSlice, float X, float Y, fl
#ifdef GPUCA_TPC_GEOMETRY_O2

MEM_CLASS_PRE()
GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(char sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const
GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const
{
const int32_t rowType = tpcGeometry.GetROC(iRow);
z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z));
Expand Down Expand Up @@ -119,7 +119,7 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float x, float y,
}

MEM_CLASS_PRE()
GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float x, float y, char sector) const
GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float x, float y, uint8_t sector) const
{
const float dx = x - 83.f;
if (dx > occupancyTotal * rec.tpc.sysClusErrorC12Box) {
Expand All @@ -143,7 +143,7 @@ GPUdi() float MEM_LG(GPUParam)::GetClusterErrorSeeding(int32_t yz, int32_t type,
}

MEM_CLASS_PRE()
GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(char sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const
GPUdi() void MEM_LG(GPUParam)::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const
{
int32_t rowType = tpcGeometry.GetROC(iRow);
z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z));
Expand Down Expand Up @@ -179,15 +179,15 @@ GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorIFC2(float trackX, floa
}

MEM_CLASS_PRE()
GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float trackX, float trackY, char sector) const
GPUdi() float MEM_LG(GPUParam)::GetSystematicClusterErrorC122(float trackX, float trackY, uint8_t sector) const
{
return 0;
}

#endif // !GPUCA_TPC_GEOMETRY_O2

MEM_CLASS_PRE()
GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(char sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const
GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const
{
const int32_t rowType = tpcGeometry.GetROC(iRow);
z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z));
Expand Down
38 changes: 19 additions & 19 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,8 @@ int32_t GPUReconstruction::Init()
for (uint32_t i = 0; i < mSlaves.size(); i++) {
mSlaves[i]->mDeviceMemoryBase = mDeviceMemoryPermanent;
mSlaves[i]->mHostMemoryBase = mHostMemoryPermanent;
mSlaves[i]->mDeviceMemorySize = mDeviceMemorySize - ((char*)mSlaves[i]->mDeviceMemoryBase - (char*)mDeviceMemoryBase);
mSlaves[i]->mHostMemorySize = mHostMemorySize - ((char*)mSlaves[i]->mHostMemoryBase - (char*)mHostMemoryBase);
mSlaves[i]->mDeviceMemorySize = mDeviceMemorySize - ptrDiff(mSlaves[i]->mDeviceMemoryBase, mDeviceMemoryBase);
mSlaves[i]->mHostMemorySize = mHostMemorySize - ptrDiff(mSlaves[i]->mHostMemoryBase, mHostMemoryBase);
mSlaves[i]->mHostMemoryPoolEnd = mHostMemoryPoolEnd;
mSlaves[i]->mDeviceMemoryPoolEnd = mDeviceMemoryPoolEnd;
if (mSlaves[i]->InitDevice()) {
Expand Down Expand Up @@ -437,7 +437,7 @@ void GPUReconstruction::WriteConstantParams()
{
if (IsGPU()) {
const auto threadContext = GetThreadContext();
WriteToConstantMemory((char*)&processors()->param - (char*)processors(), &param(), sizeof(param()), -1);
WriteToConstantMemory(ptrDiff(&processors()->param, processors()), &param(), sizeof(param()), -1);
}
}

Expand Down Expand Up @@ -491,7 +491,7 @@ void GPUReconstruction::ComputeReuseMax(GPUProcessor* proc)
resMain.mOverrideSize = 0;
for (uint32_t i = 0; i < re.res.size(); i++) {
GPUMemoryResource& res = mMemoryResources[re.res[i]];
resMain.mOverrideSize = std::max<size_t>(resMain.mOverrideSize, (char*)res.SetPointers((void*)1) - (char*)1);
resMain.mOverrideSize = std::max<size_t>(resMain.mOverrideSize, ptrDiff(res.SetPointers((void*)1), (char*)1));
}
}
}
Expand Down Expand Up @@ -545,7 +545,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res,
GPUError("Invalid reuse ptr (%s)", res->mName);
throw std::bad_alloc();
}
size_t retVal = (char*)((res->*setPtr)(ptr)) - (char*)(ptr);
size_t retVal = ptrDiff((res->*setPtr)(ptr), ptr);
if (retVal > mMemoryResources[res->mReuse].mSize) {
GPUError("Insufficient reuse memory %lu < %lu (%s) (%s)", mMemoryResources[res->mReuse].mSize, retVal, res->mName, device);
throw std::bad_alloc();
Expand All @@ -561,31 +561,31 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res,
}
size_t retVal;
if ((res->mType & GPUMemoryResource::MEMORY_STACK) && memorypoolend) {
retVal = (char*)((res->*setPtr)((char*)1)) - (char*)(1);
retVal = ptrDiff((res->*setPtr)((char*)1), (char*)(1));
memorypoolend = (void*)((char*)memorypoolend - GPUProcessor::getAlignmentMod<GPUCA_MEMALIGN>(memorypoolend));
if (retVal < res->mOverrideSize) {
retVal = res->mOverrideSize;
}
retVal += GPUProcessor::getAlignment<GPUCA_MEMALIGN>(retVal);
memorypoolend = (char*)memorypoolend - retVal;
ptr = memorypoolend;
retVal = std::max<size_t>((char*)((res->*setPtr)(ptr)) - (char*)ptr, res->mOverrideSize);
retVal = std::max<size_t>(ptrDiff((res->*setPtr)(ptr), ptr), res->mOverrideSize);
} else {
ptr = memorypool;
memorypool = (char*)((res->*setPtr)(ptr));
retVal = (char*)memorypool - (char*)ptr;
retVal = ptrDiff(memorypool, ptr);
if (retVal < res->mOverrideSize) {
retVal = res->mOverrideSize;
memorypool = (char*)ptr + res->mOverrideSize;
}
memorypool = (void*)((char*)memorypool + GPUProcessor::getAlignment<GPUCA_MEMALIGN>(memorypool));
}
if (memorypoolend ? (memorypool > memorypoolend) : ((size_t)((char*)memorypool - (char*)memorybase) > memorysize)) {
std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ((char*)memorypool - (char*)memorypoolend)) : (char*)memorypool - (char*)memorybase) << " < " << memorysize << "\n";
if (memorypoolend ? (memorypool > memorypoolend) : ((size_t)ptrDiff(memorypool, memorybase) > memorysize)) {
std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " < " << memorysize << "\n";
throw std::bad_alloc();
}
if (mProcessingSettings.allocDebugLevel >= 2) {
std::cout << "Allocated (" << device << ") " << res->mName << ": " << retVal << " - available: " << (memorypoolend ? ((char*)memorypoolend - (char*)memorypool) : (memorysize - ((char*)memorypool - (char*)memorybase))) << "\n";
std::cout << "Allocated (" << device << ") " << res->mName << ": " << retVal << " - available: " << (memorypoolend ? ptrDiff(memorypoolend, memorypool) : (memorysize - ptrDiff(memorypool, memorybase))) << "\n";
}
return retVal;
}
Expand Down Expand Up @@ -633,7 +633,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res,
if (control->allocator) {
res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize);
res->mPtr = control->allocator(CAMath::nextMultipleOf<GPUCA_BUFFER_ALIGNMENT>(res->mSize));
res->mSize = std::max<size_t>((char*)res->SetPointers(res->mPtr) - (char*)res->mPtr, res->mOverrideSize);
res->mSize = std::max<size_t>(ptrDiff(res->SetPointers(res->mPtr), res->mPtr), res->mOverrideSize);
if (mProcessingSettings.allocDebugLevel >= 2) {
std::cout << "Allocated (from callback) " << res->mName << ": " << res->mSize << "\n";
}
Expand Down Expand Up @@ -701,12 +701,12 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type)
char* retVal;
GPUProcessor::computePointerWithAlignment(pool, retVal, size);
if (pool > poolend) {
GPUError("Insufficient unmanaged memory: missing %lu bytes", (size_t)((char*)pool - (char*)poolend));
GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend));
throw std::bad_alloc();
}
UpdateMaxMemoryUsed();
if (mProcessingSettings.allocDebugLevel >= 2) {
std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ((char*)poolend - (char*)pool) << "\n";
std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n";
}
return retVal;
}
Expand All @@ -723,12 +723,12 @@ void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size)
char* retVal;
GPUProcessor::computePointerWithAlignment(mDeviceMemoryPool, retVal, size);
if (mDeviceMemoryPool > mDeviceMemoryPoolEnd) {
GPUError("Insufficient volatile device memory: missing %lu", (size_t)((char*)mDeviceMemoryPool - (char*)mDeviceMemoryPoolEnd));
GPUError("Insufficient volatile device memory: missing %ld", ptrDiff(mDeviceMemoryPool, mDeviceMemoryPoolEnd));
throw std::bad_alloc();
}
UpdateMaxMemoryUsed();
if (mProcessingSettings.allocDebugLevel >= 2) {
std::cout << "Allocated (volatile GPU): " << size << " - available: " << ((char*)mDeviceMemoryPoolEnd - (char*)mDeviceMemoryPool) << "\n";
std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
}

return retVal;
Expand Down Expand Up @@ -757,15 +757,15 @@ void GPUReconstruction::ResetRegisteredMemoryPointers(int16_t ires)
GPUMemoryResource* res = &mMemoryResources[ires];
if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL) && (res->mType & GPUMemoryResource::MEMORY_HOST)) {
void* basePtr = res->mReuse >= 0 ? mMemoryResources[res->mReuse].mPtr : res->mPtr;
size_t size = (char*)res->SetPointers(basePtr) - (char*)basePtr;
size_t size = ptrDiff(res->SetPointers(basePtr), basePtr);
if (basePtr && size > std::max(res->mSize, res->mOverrideSize)) {
std::cerr << "Updated pointers exceed available memory size: " << size << " > " << std::max(res->mSize, res->mOverrideSize) << " - host - " << res->mName << "\n";
throw std::bad_alloc();
}
}
if (IsGPU() && (res->mType & GPUMemoryResource::MEMORY_GPU)) {
void* basePtr = res->mReuse >= 0 ? mMemoryResources[res->mReuse].mPtrDevice : res->mPtrDevice;
size_t size = (char*)res->SetDevicePointers(basePtr) - (char*)basePtr;
size_t size = ptrDiff(res->SetDevicePointers(basePtr), basePtr);
if (basePtr && size > std::max(res->mSize, res->mOverrideSize)) {
std::cerr << "Updated pointers exceed available memory size: " << size << " > " << std::max(res->mSize, res->mOverrideSize) << " - GPU - " << res->mName << "\n";
throw std::bad_alloc();
Expand Down Expand Up @@ -806,7 +806,7 @@ void GPUReconstruction::ReturnVolatileDeviceMemory()
mVolatileMemoryStart = nullptr;
}
if (mProcessingSettings.allocDebugLevel >= 2) {
std::cout << "Freed (volatile GPU) - available: " << ((char*)mDeviceMemoryPoolEnd - (char*)mDeviceMemoryPool) << "\n";
std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n";
}
}

Expand Down

0 comments on commit f405bdd

Please sign in to comment.