From 15a87f3891c673ee4e35c65338aed90daac4335e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 7 Oct 2024 22:44:42 +0200 Subject: [PATCH] Fix printf syntax / improve printf(size_t) usage / simplify syntax for MacOS --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 14 +++++++------- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 90fc5216a943d..6c28f229cb07c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -74,7 +74,7 @@ constexpr const char* const GPUReconstruction::GEOMETRY_TYPE_NAMES[]; constexpr const char* const GPUReconstruction::IOTYPENAMES[]; constexpr GPUReconstruction::GeometryType GPUReconstruction::geometryType; -static int64_t ptrDiff(void* a, void* b) { return (int64_t)((char*)a - (char*)b); } +static ptrdiff_t ptrDiff(void* a, void* b) { return (char*)a - (char*)b; } GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg) { @@ -834,9 +834,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) } if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) { if (IsGPU()) { - printf("Allocated Device memory after %30s (%8s): %'13ld (non temporary %'13ld, blocked %'13ld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked == nullptr ? 0l : ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked)); + printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0); } - printf("Allocated Host memory after %30s (%8s): %'13ld (non temporary %'13ld, blocked %'13ld)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked == nullptr ? 0l : ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked)); + printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0); printf("%16s", ""); PrintMemoryMax(); } @@ -910,9 +910,9 @@ void GPUReconstruction::PrintMemoryMax() void GPUReconstruction::PrintMemoryOverview() { if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { - printf("Memory Allocation: Host %'ld / %'ld (Permanent %'ld), Device %'ld / %'ld, (Permanent %'ld) %d chunks\n", - ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), (int64_t)mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), - ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), (int64_t)mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), (int32_t)mMemoryResources.size()); + printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n", + ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), + ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), mMemoryResources.size()); } } @@ -937,7 +937,7 @@ void GPUReconstruction::PrintMemoryStatistics() } printf("%59s CPU / %9s GPU\n", "", ""); for (auto it = sizes.begin(); it != sizes.end(); it++) { - printf("Allocation %30s %s: Size %'14ld / %'14ld\n", it->first.c_str(), it->second[2] ? "P" : " ", (int64_t)it->second[0], (int64_t)it->second[1]); + printf("Allocation %30s %s: Size %'14zu / %'14zu\n", it->first.c_str(), it->second[2] ? "P" : " ", it->second[0], it->second[1]); } PrintMemoryOverview(); for (uint32_t i = 0; i < mChains.size(); i++) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 6bf164c08aa3e..1acbb99973b43 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -268,9 +268,9 @@ int32_t GPUReconstructionCPU::RunChains() } char bandwidth[256] = ""; if (mTimers[i]->memSize && mStatNEvents && time != 0.) { - snprintf(bandwidth, 256, " (%6.3f GB/s - %'14lu bytes)", mTimers[i]->memSize / time * 1e-9, (uint64_t)(mTimers[i]->memSize / mStatNEvents)); + snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count); } - printf("Execution Time: Task (%c %8ux): %50s Time: %'10lu us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), (uint64_t)(time * 1000000 / mStatNEvents), bandwidth); + printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth); if (mProcessingSettings.resetTimers) { mTimers[i]->count = 0; mTimers[i]->memSize = 0; @@ -278,14 +278,14 @@ int32_t GPUReconstructionCPU::RunChains() } for (int32_t i = 0; i < GPUDataTypes::N_RECO_STEPS; i++) { if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) { - printf("Execution Time: Step : %11s %38s Time: %'10lu us ( Total Time : %'14lu us)\n", "Tasks", GPUDataTypes::RECO_STEP_NAMES[i], (uint64_t)(kernelStepTimes[i] * 1000000 / mStatNEvents), (uint64_t)(mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents)); + printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us)\n", "Tasks", GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents); } if (mTimersRecoSteps[i].bytesToGPU) { - printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10lu us (%6.3f GB/s - %'14lu bytes - %'14lu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", GPUDataTypes::RECO_STEP_NAMES[i], (uint64_t)(mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents), + printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToGPU / mStatNEvents, mTimersRecoSteps[i].bytesToGPU / mTimersRecoSteps[i].countToGPU); } if (mTimersRecoSteps[i].bytesToHost) { - printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10lu us (%6.3f GB/s - %'14lu bytes - %'14lu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", GPUDataTypes::RECO_STEP_NAMES[i], (uint64_t)(mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents), + printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost); } if (mProcessingSettings.resetTimers) { @@ -299,12 +299,12 @@ int32_t GPUReconstructionCPU::RunChains() } for (int32_t i = 0; i < GPUDataTypes::N_GENERAL_STEPS; i++) { if (mTimersGeneralSteps[i].GetElapsedTime() != 0.) { - printf("Execution Time: General Step : %50s Time: %'10lu us\n", GPUDataTypes::GENERAL_STEP_NAMES[i], (uint64_t)(mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents)); + printf("Execution Time: General Step : %50s Time: %'10.0f us\n", GPUDataTypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents); } } mStatKernelTime = kernelTotal * 1000000 / mStatNEvents; - printf("Execution Time: Total : %50s Time: %'10lu us%s\n", "Total Kernel", (uint64_t)mStatKernelTime, nEventReport.c_str()); - printf("Execution Time: Total : %50s Time: %'10lu us%s\n", "Total Wall", (uint64_t)mStatWallTime, nEventReport.c_str()); + printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str()); + printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Wall", mStatWallTime, nEventReport.c_str()); } else if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("Total Wall Time: %lu us%s", (uint64_t)mStatWallTime, nEventReport.c_str()); }