Skip to content

Commit

Permalink
Fix printf syntax / improve printf(size_t) usage / simplify syntax fo…
Browse files Browse the repository at this point in the history
…r MacOS
  • Loading branch information
davidrohr committed Oct 7, 2024
1 parent 29c9ab6 commit 15a87f3
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
14 changes: 7 additions & 7 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ constexpr const char* const GPUReconstruction::GEOMETRY_TYPE_NAMES[];
constexpr const char* const GPUReconstruction::IOTYPENAMES[];
constexpr GPUReconstruction::GeometryType GPUReconstruction::geometryType;

static int64_t ptrDiff(void* a, void* b) { return (int64_t)((char*)a - (char*)b); }
static ptrdiff_t ptrDiff(void* a, void* b) { return (char*)a - (char*)b; }

GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg)
{
Expand Down Expand Up @@ -834,9 +834,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag)
}
if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) {
if (IsGPU()) {
printf("Allocated Device memory after %30s (%8s): %'13ld (non temporary %'13ld, blocked %'13ld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked == nullptr ? 0l : ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked));
printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0);
}
printf("Allocated Host memory after %30s (%8s): %'13ld (non temporary %'13ld, blocked %'13ld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked == nullptr ? 0l : ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked));
printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0);
printf("%16s", "");
PrintMemoryMax();
}
Expand Down Expand Up @@ -910,9 +910,9 @@ void GPUReconstruction::PrintMemoryMax()
void GPUReconstruction::PrintMemoryOverview()
{
if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) {
printf("Memory Allocation: Host %'ld / %'ld (Permanent %'ld), Device %'ld / %'ld, (Permanent %'ld) %d chunks\n",
ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), (int64_t)mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase),
ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), (int64_t)mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), (int32_t)mMemoryResources.size());
printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n",
ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase),
ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), mMemoryResources.size());
}
}

Expand All @@ -937,7 +937,7 @@ void GPUReconstruction::PrintMemoryStatistics()
}
printf("%59s CPU / %9s GPU\n", "", "");
for (auto it = sizes.begin(); it != sizes.end(); it++) {
printf("Allocation %30s %s: Size %'14ld / %'14ld\n", it->first.c_str(), it->second[2] ? "P" : " ", (int64_t)it->second[0], (int64_t)it->second[1]);
printf("Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]);
}
PrintMemoryOverview();
for (uint32_t i = 0; i < mChains.size(); i++) {
Expand Down
16 changes: 8 additions & 8 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -268,24 +268,24 @@ int32_t GPUReconstructionCPU::RunChains()
}
char bandwidth[256] = "";
if (mTimers[i]->memSize && mStatNEvents && time != 0.) {
snprintf(bandwidth, 256, " (%6.3f GB/s - %'14lu bytes)", mTimers[i]->memSize / time * 1e-9, (uint64_t)(mTimers[i]->memSize / mStatNEvents));
snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count);
}
printf("Execution Time: Task (%c %8ux): %50s Time: %'10lu us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), (uint64_t)(time * 1000000 / mStatNEvents), bandwidth);
printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth);
if (mProcessingSettings.resetTimers) {
mTimers[i]->count = 0;
mTimers[i]->memSize = 0;
}
}
for (int32_t i = 0; i < GPUDataTypes::N_RECO_STEPS; i++) {
if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) {
printf("Execution Time: Step : %11s %38s Time: %'10lu us ( Total Time : %'14lu us)\n", "Tasks", GPUDataTypes::RECO_STEP_NAMES[i], (uint64_t)(kernelStepTimes[i] * 1000000 / mStatNEvents), (uint64_t)(mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents));
printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us)\n", "Tasks", GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents);
}
if (mTimersRecoSteps[i].bytesToGPU) {
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10lu us (%6.3f GB/s - %'14lu bytes - %'14lu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", GPUDataTypes::RECO_STEP_NAMES[i], (uint64_t)(mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents),
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents,
mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU);
}
if (mTimersRecoSteps[i].bytesToHost) {
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10lu us (%6.3f GB/s - %'14lu bytes - %'14lu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", GPUDataTypes::RECO_STEP_NAMES[i], (uint64_t)(mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents),
printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents,
mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost);
}
if (mProcessingSettings.resetTimers) {
Expand All @@ -299,12 +299,12 @@ int32_t GPUReconstructionCPU::RunChains()
}
for (int32_t i = 0; i < GPUDataTypes::N_GENERAL_STEPS; i++) {
if (mTimersGeneralSteps[i].GetElapsedTime() != 0.) {
printf("Execution Time: General Step : %50s Time: %'10lu us\n", GPUDataTypes::GENERAL_STEP_NAMES[i], (uint64_t)(mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents));
printf("Execution Time: General Step : %50s Time: %'10.0f us\n", GPUDataTypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents);
}
}
mStatKernelTime = kernelTotal * 1000000 / mStatNEvents;
printf("Execution Time: Total : %50s Time: %'10lu us%s\n", "Total Kernel", (uint64_t)mStatKernelTime, nEventReport.c_str());
printf("Execution Time: Total : %50s Time: %'10lu us%s\n", "Total Wall", (uint64_t)mStatWallTime, nEventReport.c_str());
printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str());
printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Wall", mStatWallTime, nEventReport.c_str());
} else if (GetProcessingSettings().debugLevel >= 0) {
GPUInfo("Total Wall Time: %lu us%s", (uint64_t)mStatWallTime, nEventReport.c_str());
}
Expand Down

0 comments on commit 15a87f3

Please sign in to comment.