diff --git a/CMakeLists.txt b/CMakeLists.txt index 36bcc155a9b29..deacb50958b84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,9 +12,7 @@ # Preamble cmake_minimum_required(VERSION 3.27.1 FATAL_ERROR) -add_definitions(-DGPUCA_NO_FAST_MATH=1) -set(GPUCA_NO_FAST_MATH 1) -set(GPUCA_NO_FAST_MATH_WHOLEO2 1) + # it's important to specify accurately the list of languages. for instance C and # C++ as we _do_ have some C files to compile explicitely as C (e.g. gl3w.c) project(O2 LANGUAGES C CXX VERSION 1.2.0) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/PID.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/PID.h index 432bfdefba497..3f6ab06f6b2f2 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/PID.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/PID.h @@ -94,8 +94,8 @@ class PID static constexpr ID Alpha = 8; static constexpr ID First = Electron; - static constexpr ID Last = Alpha; ///< if extra IDs added, update this !!! - static constexpr ID NIDs = Last + 1; ///< number of defined IDs + static constexpr ID Last = Alpha; ///< if extra IDs added, update this !!! + static constexpr ID NIDs = Last + 1; ///< number of defined IDs // PID for derived particles static constexpr ID PI0 = 9; @@ -136,6 +136,7 @@ class PID } GPUd() static const char* getName(ID id) { return pid_constants::sNames[id]; } #endif + private: ID mID = Pion; @@ -148,8 +149,7 @@ class PID GPUdi() static constexpr ID nameToID(char const* name, ID id) { - return id > LastExt ? id : sameStr(name, pid_constants::sNames[id]) ? id - : nameToID(name, id + 1); + return id > LastExt ? id : sameStr(name, pid_constants::sNames[id]) ? id : nameToID(name, id + 1); } #endif diff --git a/DataFormats/Reconstruction/src/TrackParametrization.cxx b/DataFormats/Reconstruction/src/TrackParametrization.cxx index 738548c1ce5eb..a30d74d940977 100644 --- a/DataFormats/Reconstruction/src/TrackParametrization.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -592,7 +592,7 @@ GPUd() void TrackParametrization::printParam() const #else printf("X:%+.4e Alp:%+.3e Par: %+.4e %+.4e %+.4e %+.4e %+.4e |Q|:%d %s", getX(), getAlpha(), getY(), getZ(), getSnp(), getTgl(), getQ2Pt(), getAbsCharge(), -#ifndef __OPENCL__ +#if !defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT) getPID().getName() #else "" @@ -618,7 +618,7 @@ GPUd() void TrackParametrization::printParamHexadecimal() gpu::CAMath::Float2UIntReint(getTgl()), gpu::CAMath::Float2UIntReint(getQ2Pt()), gpu::CAMath::Float2UIntReint(getAbsCharge()), -#ifndef __OPENCL__ +#if !defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT) getPID().getName() #else "" diff --git a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx index f1ba9a6798842..b07fd8d6c3e6d 100644 --- a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx @@ -1188,7 +1188,7 @@ template std::string TrackParametrizationWithError::asString() const { return TrackParametrization::asString() + - fmt::format(" \nCovMat: [{:+.3e}]\n [{:+.3e} {:+.3e}]\n [{:+.3e} {:+.3e} {:+.3e}]\n [{:+.3e} {:+.3e} {:+.3e} {:+.3e}]\n [{:+.3e} {:+.3e} {:+.3e} {:+.3e} {:+.3e}]", + fmt::format(" Cov: [{:+.3e}] [{:+.3e} {:+.3e}] [{:+.3e} {:+.3e} {:+.3e}] [{:+.3e} {:+.3e} {:+.3e} {:+.3e}] [{:+.3e} {:+.3e} {:+.3e} {:+.3e} {:+.3e}]", mC[kSigY2], mC[kSigZY], mC[kSigZ2], mC[kSigSnpY], mC[kSigSnpZ], mC[kSigSnp2], mC[kSigTglY], mC[kSigTglZ], mC[kSigTglSnp], mC[kSigTgl2], mC[kSigQ2PtY], mC[kSigQ2PtZ], mC[kSigQ2PtSnp], mC[kSigQ2PtTgl], mC[kSigQ2Pt2]); @@ -1198,7 +1198,7 @@ template std::string TrackParametrizationWithError::asStringHexadecimal() { return TrackParametrization::asStringHexadecimal() + - fmt::format(" \n<> CovMat: [{:x}]\n<> [{:x} {:x}]\n<> [{:x} {:x} {:x}]\n<> [{:x} {:x} {:x} {:x}]\n<> [{:x} {:x} {:x} {:x} {:x}]", + fmt::format(" Cov: [{:x}] [{:x} {:x}] [{:x} {:x} {:x}] [{:x} {:x} {:x} {:x}] [{:x} {:x} {:x} {:x} {:x}]", reinterpret_cast(mC[kSigY2]), reinterpret_cast(mC[kSigZY]), reinterpret_cast(mC[kSigZ2]), reinterpret_cast(mC[kSigSnpY]), reinterpret_cast(mC[kSigSnpZ]), reinterpret_cast(mC[kSigSnp2]), reinterpret_cast(mC[kSigTglY]), reinterpret_cast(mC[kSigTglZ]), reinterpret_cast(mC[kSigTglSnp]), @@ -1217,13 +1217,9 @@ GPUd() void TrackParametrizationWithError::print() const #else TrackParametrization::printParam(); printf( - "\n%7s [%+.3e]\n" - "%7s [%+.3e %+.3e]\n" - "%7s [%+.3e %+.3e %+.3e]\n" - "%7s [%+.3e %+.3e %+.3e %+.3e]\n" - "%7s [%+.3e %+.3e %+.3e %+.3e %+.3e]\n", - "CovMat:", mC[kSigY2], "", mC[kSigZY], mC[kSigZ2], "", mC[kSigSnpY], mC[kSigSnpZ], mC[kSigSnp2], "", mC[kSigTglY], - mC[kSigTglZ], mC[kSigTglSnp], mC[kSigTgl2], "", mC[kSigQ2PtY], mC[kSigQ2PtZ], mC[kSigQ2PtSnp], mC[kSigQ2PtTgl], + "\nCov: [%+.3e] [%+.3e %+.3e] [%+.3e %+.3e %+.3e] [%+.3e %+.3e %+.3e %+.3e] [%+.3e %+.3e %+.3e %+.3e %+.3e]", + mC[kSigY2], mC[kSigZY], mC[kSigZ2], mC[kSigSnpY], mC[kSigSnpZ], mC[kSigSnp2], mC[kSigTglY], + mC[kSigTglZ], mC[kSigTglSnp], mC[kSigTgl2], mC[kSigQ2PtY], mC[kSigQ2PtZ], mC[kSigQ2PtSnp], mC[kSigQ2PtTgl], mC[kSigQ2Pt2]); #endif } @@ -1238,16 +1234,11 @@ GPUd() void TrackParametrizationWithError::printHexadecimal() #else TrackParametrization::printParamHexadecimal(); printf( - "\n<> %7s [%x]\n" - "<> %7s [%x %x]\n" - "<> %7s [%x %x %x]\n" - "<> %7s [%x %x %x %x]\n" - "<> %7s [%x %x %x %x %x]\n", - "<> CovMat:", - gpu::CAMath::Float2UIntReint(mC[kSigY2]), "", - gpu::CAMath::Float2UIntReint(mC[kSigZY]), gpu::CAMath::Float2UIntReint(mC[kSigZ2]), "", - gpu::CAMath::Float2UIntReint(mC[kSigSnpY]), gpu::CAMath::Float2UIntReint(mC[kSigSnpZ]), gpu::CAMath::Float2UIntReint(mC[kSigSnp2]), "", - gpu::CAMath::Float2UIntReint(mC[kSigTglY]), gpu::CAMath::Float2UIntReint(mC[kSigTglZ]), gpu::CAMath::Float2UIntReint(mC[kSigTglSnp]), gpu::CAMath::Float2UIntReint(mC[kSigTgl2]), "", + "\nCov: [%x] [%x %x] [%x %x %x] [%x %x %x %x] [%x %x %x %x %x]", + gpu::CAMath::Float2UIntReint(mC[kSigY2]), + gpu::CAMath::Float2UIntReint(mC[kSigZY]), gpu::CAMath::Float2UIntReint(mC[kSigZ2]), + gpu::CAMath::Float2UIntReint(mC[kSigSnpY]), gpu::CAMath::Float2UIntReint(mC[kSigSnpZ]), gpu::CAMath::Float2UIntReint(mC[kSigSnp2]), + gpu::CAMath::Float2UIntReint(mC[kSigTglY]), gpu::CAMath::Float2UIntReint(mC[kSigTglZ]), gpu::CAMath::Float2UIntReint(mC[kSigTglSnp]), gpu::CAMath::Float2UIntReint(mC[kSigTgl2]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtY]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtZ]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtSnp]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtTgl]), gpu::CAMath::Float2UIntReint(mC[kSigQ2Pt2])); #endif } diff --git a/Detectors/Base/src/MatLayerCylSet.cxx b/Detectors/Base/src/MatLayerCylSet.cxx index afb103f70fee4..12156fc55b381 100644 --- a/Detectors/Base/src/MatLayerCylSet.cxx +++ b/Detectors/Base/src/MatLayerCylSet.cxx @@ -18,7 +18,7 @@ #include "GPUCommonLogger.h" #include #include "CommonUtils/TreeStreamRedirector.h" -// #define _DBG_LOC_ // for local debugging only +//#define _DBG_LOC_ // for local debugging only #endif // !GPUCA_ALIGPUCODE #undef NDEBUG @@ -256,7 +256,7 @@ void MatLayerCylSet::print(bool data) const float(getFlatBufferSize()) / 1024 / 1024); } -#endif //! GPUCA_ALIGPUCODE +#endif //!GPUCA_ALIGPUCODE #ifndef GPUCA_GPUCODE //________________________________________________________________________________ @@ -391,8 +391,8 @@ GPUd() MatBudget MatLayerCylSet::getMatBudget(float x0, float y0, float z0, floa } // loop over layers if (rval.length != 0.f) { - rval.meanRho /= rval.length; // average - rval.meanX2X0 *= ray.getDist(); // normalize + rval.meanRho /= rval.length; // average + rval.meanX2X0 *= ray.getDist(); // normalize } rval.length = ray.getDist(); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index 9b71d95188aa5..532e4d1492d0b 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -499,20 +499,18 @@ void TimeFrameGPU::loadClustersDevice() template void TimeFrameGPU::loadTrackingFrameInfoDevice(const int iteration) { - for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { - LOGP(debug, "gpu-transfer: loading {} tfinfo on layer {}, for {} MB.", mTrackingFrameInfo[iLayer].size(), iLayer, mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo) / MB); - allocMemAsync(reinterpret_cast(&mTrackingFrameInfoDevice[iLayer]), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), nullptr, getExtAllocator()); - // Register and move data - if (!iteration) { + if (!iteration) { + for (auto iLayer{0}; iLayer < nLayers; ++iLayer) { + LOGP(info, "gpu-transfer: loading {} tfinfo on layer {}, for {} MB.", mTrackingFrameInfo[iLayer].size(), iLayer, mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo) / MB); + allocMemAsync(reinterpret_cast(&mTrackingFrameInfoDevice[iLayer]), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), nullptr, getExtAllocator()); + // Register and move data checkGPUError(cudaHostRegister(mTrackingFrameInfo[iLayer].data(), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaHostRegisterPortable)); + checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDevice[iLayer], mTrackingFrameInfo[iLayer].data(), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } - checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDevice[iLayer], mTrackingFrameInfo[iLayer].data(), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaMemcpyHostToDevice, mGpuStreams[0].get())); - } - allocMemAsync(reinterpret_cast(&mTrackingFrameInfoDeviceArray), nLayers * sizeof(TrackingFrameInfo*), nullptr, getExtAllocator()); - if (!iteration) { + allocMemAsync(reinterpret_cast(&mTrackingFrameInfoDeviceArray), nLayers * sizeof(TrackingFrameInfo*), nullptr, getExtAllocator()); checkGPUError(cudaHostRegister(mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaHostRegisterPortable)); + checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDeviceArray, mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } - checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDeviceArray, mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaMemcpyHostToDevice, mGpuStreams[0].get())); } template diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 4af0781b5cdb2..274c0b573e454 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -86,11 +86,7 @@ GPUd() bool fitTrack(TrackITSExt& track, if (!track.o2::track::TrackParCovF::rotate(trackingHit.alphaTrackingFrame)) { return false; } -#ifdef __HIPCC__ - if (!track.propagateTo(trackingHit.xTrackingFrame, Bz)) { - return false; - } -#else + if (!prop->propagateToX(track, trackingHit.xTrackingFrame, Bz, @@ -99,10 +95,8 @@ GPUd() bool fitTrack(TrackITSExt& track, matCorrType)) { return false; } -#endif -#ifndef __HIPCC__ + if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) { -#endif track.setChi2(track.getChi2() + track.getPredictedChi2(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)); if (!track.TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) { return false; @@ -113,9 +107,7 @@ GPUd() bool fitTrack(TrackITSExt& track, if (!track.correctForMaterial(xx0, xx0 * radiationLength * density, true)) { return false; } -#ifndef __HIPCC__ } -#endif auto predChi2{track.getPredictedChi2(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)}; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx index b52c20eaef83a..6e7114d9ca54d 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx @@ -39,7 +39,7 @@ struct ClusterHelper { float MSangle(float mass, float p, float xX0) { - float beta = p / std::hypot(mass, p); + float beta = p / o2::gpu::CAMath::Hypot(mass, p); return 0.0136f * o2::gpu::CAMath::Sqrt(xX0) * (1.f + 0.038f * o2::gpu::CAMath::Log(xX0)) / (beta * p); } @@ -376,12 +376,11 @@ void TimeFrame::initialise(const int iteration, const TrackingParameters& trkPar for (unsigned int iLayer{0}; iLayer < mClusters.size(); ++iLayer) { mMSangles[iLayer] = MSangle(0.14f, trkParam.TrackletMinPt, trkParam.LayerxX0[iLayer]); mPositionResolution[iLayer] = o2::gpu::CAMath::Sqrt(0.5f * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer]) + trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer]); - if (iLayer < mClusters.size() - 1) { const float& r1 = trkParam.LayerRadii[iLayer]; const float& r2 = trkParam.LayerRadii[iLayer + 1]; - const float res1 = std::hypot(trkParam.PVres, mPositionResolution[iLayer]); - const float res2 = std::hypot(trkParam.PVres, mPositionResolution[iLayer + 1]); + const float res1 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[iLayer]); + const float res2 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[iLayer + 1]); const float cosTheta1half = o2::gpu::CAMath::Sqrt(1.f - Sq(0.5f * r1 * oneOverR)); const float cosTheta2half = o2::gpu::CAMath::Sqrt(1.f - Sq(0.5f * r2 * oneOverR)); float x = r2 * cosTheta1half - r1 * cosTheta2half; @@ -430,7 +429,7 @@ void TimeFrame::fillPrimaryVerticesXandAlpha() } mPValphaX.reserve(mPrimaryVertices.size()); for (auto& pv : mPrimaryVertices) { - mPValphaX.emplace_back(std::array{std::hypot(pv.getX(), pv.getY()), math_utils::computePhi(pv.getX(), pv.getY())}); + mPValphaX.emplace_back(std::array{o2::gpu::CAMath::Hypot(pv.getX(), pv.getY()), math_utils::computePhi(pv.getX(), pv.getY())}); } } diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index fbf67c1369b19..cf15d9ee0409b 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -211,10 +211,12 @@ GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), GPUdi() unsigned int GPUCommonMath::Float2UIntReint(const float& x) { -#if !defined(GPUCA_GPUCODE) || defined(__OPENCL__) || defined(__OPENCL_HOST__) - return reinterpret_cast(x); -#else +#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) return __float_as_uint(x); +#elif defined(GPUCA_GPUCODE_DEVICE) && (defined(__OPENCL__) || defined(__OPENCLCPP__)) + return as_uint(x); +#else + return reinterpret_cast(x); #endif }