diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 960f2fcec3f2c..bcb568d893007 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -285,6 +285,7 @@ AddOption(tpcSingleSector, int32_t, -1, "", 0, "Restrict TPC processing to a sin AddOption(tpcDownscaledEdx, uint8_t, 0, "", 0, "If != 0, downscale dEdx processing (if enabled) to x %") AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow") AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding") +AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters") AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index e65396cbce21a..7a36355bf843d 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -59,31 +59,30 @@ class GPUChain const GPUParam& GetParam() const { return mRec->mHostConstantMem->param; } const GPUSettingsGRP& GetGRPSettings() const { return mRec->mGRPSettings; } - const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->mDeviceBackendSettings; } - const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; } const GPUCalibObjectsConst& calib() const { return processors()->calibObjects; } GPUReconstruction* rec() { return mRec; } const GPUReconstruction* rec() const { return mRec; } inline const GPUConstantMem* GetProcessors() { return mRec->processors(); } + // Make functions from GPUReconstruction*** available GPUReconstruction::RecoStepField GetRecoSteps() const { return mRec->GetRecoSteps(); } GPUReconstruction::RecoStepField GetRecoStepsGPU() const { return mRec->GetRecoStepsGPU(); } GPUReconstruction::InOutTypeField GetRecoStepsInputs() const { return mRec->GetRecoStepsInputs(); } GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const { return mRec->GetRecoStepsOutputs(); } + inline const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->mDeviceBackendSettings; } + inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; } protected: GPUReconstructionCPU* mRec; GPUChain(GPUReconstruction* rec) : mRec((GPUReconstructionCPU*)rec) {} int32_t GetThread(); - // Make functions from GPUReconstruction*** available inline GPUConstantMem* processors() { return mRec->processors(); } inline GPUConstantMem* processorsShadow() { return mRec->mProcessorsShadow; } inline GPUConstantMem* processorsDevice() { return mRec->mDeviceConstantMem; } inline GPUParam& param() { return mRec->param(); } inline const GPUConstantMem* processors() const { return mRec->processors(); } - inline GPUSettingsProcessing& ProcessingSettings() { return mRec->mProcessingSettings; } inline void SynchronizeStream(int32_t stream) { mRec->SynchronizeStream(stream); } inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); } inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index d6fd370b3b330..528c683944ef1 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -309,7 +309,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("Must use external output for double pipeline mode"); return false; } - if (ProcessingSettings().tpcCompressionGatherMode == 1) { + if (GetProcessingSettings().tpcCompressionGatherMode == 1) { GPUError("Double pipeline incompatible to compression mode 1"); return false; } @@ -318,7 +318,11 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (ProcessingSettings().tpcCompressionGatherMode == 1 || ProcessingSettings().tpcCompressionGatherMode == 3)) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression) && GetProcessingSettings().tpcApplyCFCutsAtDecoding && !GetProcessingSettings().tpcUseOldCPUDecoding) { + GPUError("tpcApplyCFCutsAtDecoding currently requires tpcUseOldCPUDecoding"); + return false; + } + if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } @@ -888,7 +892,7 @@ int32_t GPUChainTracking::RunChainFinalize() if (GetProcessingSettings().eventDisplay->getDisplayControl() == 2) { mDisplayRunning = false; GetProcessingSettings().eventDisplay->DisplayExit(); - ProcessingSettings().eventDisplay = nullptr; + const_cast(GetProcessingSettings()).eventDisplay = nullptr; // TODO: fixme - eventDisplay should probably not be put into ProcessingSettings in the first place return (2); } GetProcessingSettings().eventDisplay->setDisplayControl(0); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 9d27a42720613..be89c82889753 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -40,7 +40,7 @@ int32_t GPUChainTracking::RunTPCCompression() RecordMarker(mEvents->single, 0); } - if (ProcessingSettings().tpcCompressionGatherMode == 3) { + if (GetProcessingSettings().tpcCompressionGatherMode == 3) { mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile } SetupGPUProcessor(&Compressor, true); @@ -73,19 +73,19 @@ int32_t GPUChainTracking::RunTPCCompression() Compressor.mOutputFlat->set(outputSize, *Compressor.mOutput); char* hostFlatPtr = (char*)Compressor.mOutput->qTotU; // First array as allocated in GPUTPCCompression::SetPointersCompressedClusters size_t copySize = 0; - if (ProcessingSettings().tpcCompressionGatherMode == 3) { + if (GetProcessingSettings().tpcCompressionGatherMode == 3) { CompressorShadow.mOutputA = Compressor.mOutput; copySize = AllocateRegisteredMemory(Compressor.mMemoryResOutputGPU); // We overwrite Compressor.mOutput with the allocated output pointers on the GPU } const o2::tpc::CompressedClustersPtrs* P = nullptr; HighResTimer* gatherTimer = nullptr; int32_t outputStream = 0; - if (ProcessingSettings().doublePipeline) { + if (GetProcessingSettings().doublePipeline) { SynchronizeStream(OutputStream()); // Synchronize output copies running in parallel from memory that might be released, only the following async copy from stacked memory is safe after the chain finishes. outputStream = OutputStream(); } - if (ProcessingSettings().tpcCompressionGatherMode >= 2) { - if (ProcessingSettings().tpcCompressionGatherMode == 2) { + if (GetProcessingSettings().tpcCompressionGatherMode >= 2) { + if (GetProcessingSettings().tpcCompressionGatherMode == 2) { void* devicePtr = mRec->getGPUPointer(Compressor.mOutputFlat); if (devicePtr != Compressor.mOutputFlat) { CompressedClustersPtrs& ptrs = *Compressor.mOutput; // We need to update the ptrs with the gpu-mapped version of the host address space @@ -97,7 +97,7 @@ int32_t GPUChainTracking::RunTPCCompression() TransferMemoryResourcesToGPU(myStep, &Compressor, outputStream); constexpr uint32_t nBlocksDefault = 2; constexpr uint32_t nBlocksMulti = 1 + 2 * 200; - switch (ProcessingSettings().tpcCompressionGatherModeKernel) { + switch (GetProcessingSettings().tpcCompressionGatherModeKernel) { case 0: runKernel(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression)); getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); @@ -120,10 +120,10 @@ int32_t GPUChainTracking::RunTPCCompression() getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); break; default: - GPUError("Invalid compression kernel %d selected.", (int32_t)ProcessingSettings().tpcCompressionGatherModeKernel); + GPUError("Invalid compression kernel %d selected.", (int32_t)GetProcessingSettings().tpcCompressionGatherModeKernel); return 1; } - if (ProcessingSettings().tpcCompressionGatherMode == 3) { + if (GetProcessingSettings().tpcCompressionGatherMode == 3) { RecordMarker(mEvents->stream[outputStream], outputStream); char* deviceFlatPts = (char*)Compressor.mOutput->qTotU; if (GetProcessingSettings().doublePipeline) { @@ -138,9 +138,9 @@ int32_t GPUChainTracking::RunTPCCompression() } } else { int8_t direction = 0; - if (ProcessingSettings().tpcCompressionGatherMode == 0) { + if (GetProcessingSettings().tpcCompressionGatherMode == 0) { P = &CompressorShadow.mPtrs; - } else if (ProcessingSettings().tpcCompressionGatherMode == 1) { + } else if (GetProcessingSettings().tpcCompressionGatherMode == 1) { P = &Compressor.mPtrs; direction = -1; gatherTimer = &getTimer("GPUTPCCompression_GatherOnCPU", 0); @@ -184,11 +184,11 @@ int32_t GPUChainTracking::RunTPCCompression() GPUMemCpyAlways(myStep, O->timeA, P->timeA, O->nTracks * sizeof(O->timeA[0]), outputStream, direction); GPUMemCpyAlways(myStep, O->padA, P->padA, O->nTracks * sizeof(O->padA[0]), outputStream, direction); } - if (ProcessingSettings().tpcCompressionGatherMode == 1) { + if (GetProcessingSettings().tpcCompressionGatherMode == 1) { gatherTimer->Stop(); } mIOPtrs.tpcCompressedClusters = Compressor.mOutputFlat; - if (ProcessingSettings().tpcCompressionGatherMode == 3) { + if (GetProcessingSettings().tpcCompressionGatherMode == 3) { SynchronizeEventAndRelease(mEvents->stream[outputStream]); mRec->ReturnVolatileDeviceMemory(); } @@ -209,18 +209,52 @@ int32_t GPUChainTracking::RunTPCDecompression() if (GetProcessingSettings().tpcUseOldCPUDecoding) { const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; - auto allocator = [this](size_t size) { + auto allocatorFinal = [this](size_t size) { this->mInputsHost->mNClusterNative = this->mInputsShadow->mNClusterNative = size; this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); return this->mInputsHost->mPclusterNativeOutput; }; - auto& gatherTimer = getTimer("TPCDecompression", 0); - gatherTimer.Start(); - if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocator, param(), GetProcessingSettings().deterministicGPUReconstruction)) { + std::unique_ptr tmpBuffer; + auto allocatorTmp = [&tmpBuffer](size_t size) { + return ((tmpBuffer = std::make_unique(size))).get(); + }; + auto& decompressTimer = getTimer("TPCDecompression", 0); + auto allocatorUse = GetProcessingSettings().tpcApplyCFCutsAtDecoding ? std::function{allocatorTmp} : std::function{allocatorFinal}; + decompressTimer.Start(); + if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { GPUError("Error decompressing clusters"); return 1; } - gatherTimer.Stop(); + if (GetProcessingSettings().tpcApplyCFCutsAtDecoding) { + ClusterNative* outputBuffer; + for (int32_t iPhase = 0; iPhase < 2; iPhase++) { + uint32_t countTotal = 0; + for (uint32_t iSector = 0; iSector < GPUCA_NSLICES; iSector++) { + for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { + uint32_t count = 0; + for (uint32_t k = 0; k < mClusterNativeAccess->nClusters[iSector][iRow]; k++) { + const ClusterNative& cl = mClusterNativeAccess->clusters[iSector][iRow][k]; + bool keep = cl.qTot > param().rec.tpc.cfQTotCutoff && cl.qMax > param().rec.tpc.cfQMaxCutoff && (cl.sigmaPadPacked || cl.qMax > param().rec.tpc.cfQMaxCutoffSinglePad) && (cl.sigmaTimePacked || cl.qMax > param().rec.tpc.cfQMaxCutoffSingleTime); + count += keep; + countTotal += keep; + if (iPhase) { + outputBuffer[countTotal] = cl; + } + } + if (iPhase) { + mClusterNativeAccess->nClusters[iSector][iRow] = count; + } + } + } + if (iPhase) { + mClusterNativeAccess->clustersLinear = outputBuffer; + mClusterNativeAccess->setOffsetPtrs(); + } else { + outputBuffer = allocatorFinal(countTotal); + } + } + } + decompressTimer.Stop(); mIOPtrs.clustersNative = mClusterNativeAccess.get(); if (mRec->IsGPU()) { AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index af04dd5e0ce93..82b9bb5ada373 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -206,7 +206,7 @@ void GPUChainTracking::PrintOutputStat() { int32_t nTracks = 0, nAttachedClusters = 0, nAttachedClustersFitted = 0, nAdjacentClusters = 0; uint32_t nCls = GetProcessingSettings().doublePipeline ? mIOPtrs.clustersNative->nClustersTotal : GetTPCMerger().NMaxClusters(); - if (ProcessingSettings().createO2Output > 1) { + if (GetProcessingSettings().createO2Output > 1) { nTracks = mIOPtrs.nOutputTracksTPCO2; nAttachedClusters = mIOPtrs.nMergedTrackHits; } else { @@ -244,7 +244,7 @@ void GPUChainTracking::PrintOutputStat() } snprintf(trdText, 1024, " - TRD Tracker reconstructed %d tracks (%d tracklets)", nTRDTracks, nTRDTracklets); } - GPUInfo("Output Tracks: %d (%d / %d / %d / %d clusters (fitted / attached / adjacent / total) - %s format)%s", nTracks, nAttachedClustersFitted, nAttachedClusters, nAdjacentClusters, nCls, ProcessingSettings().createO2Output > 1 ? "O2" : "GPU", trdText); + GPUInfo("Output Tracks: %d (%d / %d / %d / %d clusters (fitted / attached / adjacent / total) - %s format)%s", nTracks, nAttachedClustersFitted, nAttachedClusters, nAdjacentClusters, nCls, GetProcessingSettings().createO2Output > 1 ? "O2" : "GPU", trdText); } void GPUChainTracking::SanityCheck()