Skip to content

Commit

Permalink
Use CAMath in TimeFrame.cxx hypot
Browse files Browse the repository at this point in the history
  • Loading branch information
mconcas committed Mar 28, 2024
1 parent 192ebdd commit 6965591
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 60 deletions.
4 changes: 1 addition & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
# Preamble

cmake_minimum_required(VERSION 3.27.1 FATAL_ERROR)
add_definitions(-DGPUCA_NO_FAST_MATH=1)
set(GPUCA_NO_FAST_MATH 1)
set(GPUCA_NO_FAST_MATH_WHOLEO2 1)

# it's important to specify accurately the list of languages. for instance C and
# C++ as we _do_ have some C files to compile explicitely as C (e.g. gl3w.c)
project(O2 LANGUAGES C CXX VERSION 1.2.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ class PID
static constexpr ID Alpha = 8;

static constexpr ID First = Electron;
static constexpr ID Last = Alpha; ///< if extra IDs added, update this !!!
static constexpr ID NIDs = Last + 1; ///< number of defined IDs
static constexpr ID Last = Alpha; ///< if extra IDs added, update this !!!
static constexpr ID NIDs = Last + 1; ///< number of defined IDs

// PID for derived particles
static constexpr ID PI0 = 9;
Expand Down Expand Up @@ -136,6 +136,7 @@ class PID
}
GPUd() static const char* getName(ID id) { return pid_constants::sNames[id]; }
#endif

private:
ID mID = Pion;

Expand All @@ -148,8 +149,7 @@ class PID

GPUdi() static constexpr ID nameToID(char const* name, ID id)
{
return id > LastExt ? id : sameStr(name, pid_constants::sNames[id]) ? id
: nameToID(name, id + 1);
return id > LastExt ? id : sameStr(name, pid_constants::sNames[id]) ? id : nameToID(name, id + 1);
}
#endif

Expand Down
4 changes: 2 additions & 2 deletions DataFormats/Reconstruction/src/TrackParametrization.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ GPUd() void TrackParametrization<value_T>::printParam() const
#else
printf("X:%+.4e Alp:%+.3e Par: %+.4e %+.4e %+.4e %+.4e %+.4e |Q|:%d %s",
getX(), getAlpha(), getY(), getZ(), getSnp(), getTgl(), getQ2Pt(), getAbsCharge(),
#ifndef __OPENCL__
#if !defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT)
getPID().getName()
#else
""
Expand All @@ -618,7 +618,7 @@ GPUd() void TrackParametrization<value_T>::printParamHexadecimal()
gpu::CAMath::Float2UIntReint(getTgl()),
gpu::CAMath::Float2UIntReint(getQ2Pt()),
gpu::CAMath::Float2UIntReint(getAbsCharge()),
#ifndef __OPENCL__
#if !defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT)
getPID().getName()
#else
""
Expand Down
29 changes: 10 additions & 19 deletions DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1188,7 +1188,7 @@ template <typename value_T>
std::string TrackParametrizationWithError<value_T>::asString() const
{
return TrackParametrization<value_t>::asString() +
fmt::format(" \nCovMat: [{:+.3e}]\n [{:+.3e} {:+.3e}]\n [{:+.3e} {:+.3e} {:+.3e}]\n [{:+.3e} {:+.3e} {:+.3e} {:+.3e}]\n [{:+.3e} {:+.3e} {:+.3e} {:+.3e} {:+.3e}]",
fmt::format(" Cov: [{:+.3e}] [{:+.3e} {:+.3e}] [{:+.3e} {:+.3e} {:+.3e}] [{:+.3e} {:+.3e} {:+.3e} {:+.3e}] [{:+.3e} {:+.3e} {:+.3e} {:+.3e} {:+.3e}]",
mC[kSigY2], mC[kSigZY], mC[kSigZ2], mC[kSigSnpY], mC[kSigSnpZ], mC[kSigSnp2], mC[kSigTglY],
mC[kSigTglZ], mC[kSigTglSnp], mC[kSigTgl2], mC[kSigQ2PtY], mC[kSigQ2PtZ], mC[kSigQ2PtSnp], mC[kSigQ2PtTgl],
mC[kSigQ2Pt2]);
Expand All @@ -1198,7 +1198,7 @@ template <typename value_T>
std::string TrackParametrizationWithError<value_T>::asStringHexadecimal()
{
return TrackParametrization<value_t>::asStringHexadecimal() +
fmt::format(" \n<> CovMat: [{:x}]\n<> [{:x} {:x}]\n<> [{:x} {:x} {:x}]\n<> [{:x} {:x} {:x} {:x}]\n<> [{:x} {:x} {:x} {:x} {:x}]",
fmt::format(" Cov: [{:x}] [{:x} {:x}] [{:x} {:x} {:x}] [{:x} {:x} {:x} {:x}] [{:x} {:x} {:x} {:x} {:x}]",
reinterpret_cast<const unsigned int&>(mC[kSigY2]), reinterpret_cast<const unsigned int&>(mC[kSigZY]), reinterpret_cast<const unsigned int&>(mC[kSigZ2]),
reinterpret_cast<const unsigned int&>(mC[kSigSnpY]), reinterpret_cast<const unsigned int&>(mC[kSigSnpZ]), reinterpret_cast<const unsigned int&>(mC[kSigSnp2]),
reinterpret_cast<const unsigned int&>(mC[kSigTglY]), reinterpret_cast<const unsigned int&>(mC[kSigTglZ]), reinterpret_cast<const unsigned int&>(mC[kSigTglSnp]),
Expand All @@ -1217,13 +1217,9 @@ GPUd() void TrackParametrizationWithError<value_T>::print() const
#else
TrackParametrization<value_T>::printParam();
printf(
"\n%7s [%+.3e]\n"
"%7s [%+.3e %+.3e]\n"
"%7s [%+.3e %+.3e %+.3e]\n"
"%7s [%+.3e %+.3e %+.3e %+.3e]\n"
"%7s [%+.3e %+.3e %+.3e %+.3e %+.3e]\n",
"CovMat:", mC[kSigY2], "", mC[kSigZY], mC[kSigZ2], "", mC[kSigSnpY], mC[kSigSnpZ], mC[kSigSnp2], "", mC[kSigTglY],
mC[kSigTglZ], mC[kSigTglSnp], mC[kSigTgl2], "", mC[kSigQ2PtY], mC[kSigQ2PtZ], mC[kSigQ2PtSnp], mC[kSigQ2PtTgl],
"\nCov: [%+.3e] [%+.3e %+.3e] [%+.3e %+.3e %+.3e] [%+.3e %+.3e %+.3e %+.3e] [%+.3e %+.3e %+.3e %+.3e %+.3e]",
mC[kSigY2], mC[kSigZY], mC[kSigZ2], mC[kSigSnpY], mC[kSigSnpZ], mC[kSigSnp2], mC[kSigTglY],
mC[kSigTglZ], mC[kSigTglSnp], mC[kSigTgl2], mC[kSigQ2PtY], mC[kSigQ2PtZ], mC[kSigQ2PtSnp], mC[kSigQ2PtTgl],
mC[kSigQ2Pt2]);
#endif
}
Expand All @@ -1238,16 +1234,11 @@ GPUd() void TrackParametrizationWithError<value_T>::printHexadecimal()
#else
TrackParametrization<value_T>::printParamHexadecimal();
printf(
"\n<> %7s [%x]\n"
"<> %7s [%x %x]\n"
"<> %7s [%x %x %x]\n"
"<> %7s [%x %x %x %x]\n"
"<> %7s [%x %x %x %x %x]\n",
"<> CovMat:",
gpu::CAMath::Float2UIntReint(mC[kSigY2]), "",
gpu::CAMath::Float2UIntReint(mC[kSigZY]), gpu::CAMath::Float2UIntReint(mC[kSigZ2]), "",
gpu::CAMath::Float2UIntReint(mC[kSigSnpY]), gpu::CAMath::Float2UIntReint(mC[kSigSnpZ]), gpu::CAMath::Float2UIntReint(mC[kSigSnp2]), "",
gpu::CAMath::Float2UIntReint(mC[kSigTglY]), gpu::CAMath::Float2UIntReint(mC[kSigTglZ]), gpu::CAMath::Float2UIntReint(mC[kSigTglSnp]), gpu::CAMath::Float2UIntReint(mC[kSigTgl2]), "",
"\nCov: [%x] [%x %x] [%x %x %x] [%x %x %x %x] [%x %x %x %x %x]",
gpu::CAMath::Float2UIntReint(mC[kSigY2]),
gpu::CAMath::Float2UIntReint(mC[kSigZY]), gpu::CAMath::Float2UIntReint(mC[kSigZ2]),
gpu::CAMath::Float2UIntReint(mC[kSigSnpY]), gpu::CAMath::Float2UIntReint(mC[kSigSnpZ]), gpu::CAMath::Float2UIntReint(mC[kSigSnp2]),
gpu::CAMath::Float2UIntReint(mC[kSigTglY]), gpu::CAMath::Float2UIntReint(mC[kSigTglZ]), gpu::CAMath::Float2UIntReint(mC[kSigTglSnp]), gpu::CAMath::Float2UIntReint(mC[kSigTgl2]),
gpu::CAMath::Float2UIntReint(mC[kSigQ2PtY]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtZ]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtSnp]), gpu::CAMath::Float2UIntReint(mC[kSigQ2PtTgl]), gpu::CAMath::Float2UIntReint(mC[kSigQ2Pt2]));
#endif
}
Expand Down
8 changes: 4 additions & 4 deletions Detectors/Base/src/MatLayerCylSet.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "GPUCommonLogger.h"
#include <TFile.h>
#include "CommonUtils/TreeStreamRedirector.h"
// #define _DBG_LOC_ // for local debugging only
//#define _DBG_LOC_ // for local debugging only

#endif // !GPUCA_ALIGPUCODE
#undef NDEBUG
Expand Down Expand Up @@ -256,7 +256,7 @@ void MatLayerCylSet::print(bool data) const
float(getFlatBufferSize()) / 1024 / 1024);
}

#endif //! GPUCA_ALIGPUCODE
#endif //!GPUCA_ALIGPUCODE

#ifndef GPUCA_GPUCODE
//________________________________________________________________________________
Expand Down Expand Up @@ -391,8 +391,8 @@ GPUd() MatBudget MatLayerCylSet::getMatBudget(float x0, float y0, float z0, floa
} // loop over layers

if (rval.length != 0.f) {
rval.meanRho /= rval.length; // average
rval.meanX2X0 *= ray.getDist(); // normalize
rval.meanRho /= rval.length; // average
rval.meanX2X0 *= ray.getDist(); // normalize
}
rval.length = ray.getDist();

Expand Down
18 changes: 8 additions & 10 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -499,20 +499,18 @@ void TimeFrameGPU<nLayers>::loadClustersDevice()
template <int nLayers>
void TimeFrameGPU<nLayers>::loadTrackingFrameInfoDevice(const int iteration)
{
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
LOGP(debug, "gpu-transfer: loading {} tfinfo on layer {}, for {} MB.", mTrackingFrameInfo[iLayer].size(), iLayer, mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo) / MB);
allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDevice[iLayer]), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), nullptr, getExtAllocator());
// Register and move data
if (!iteration) {
if (!iteration) {
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
LOGP(info, "gpu-transfer: loading {} tfinfo on layer {}, for {} MB.", mTrackingFrameInfo[iLayer].size(), iLayer, mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo) / MB);
allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDevice[iLayer]), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), nullptr, getExtAllocator());
// Register and move data
checkGPUError(cudaHostRegister(mTrackingFrameInfo[iLayer].data(), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaHostRegisterPortable));
checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDevice[iLayer], mTrackingFrameInfo[iLayer].data(), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
}
checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDevice[iLayer], mTrackingFrameInfo[iLayer].data(), mTrackingFrameInfo[iLayer].size() * sizeof(TrackingFrameInfo), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
}
allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDeviceArray), nLayers * sizeof(TrackingFrameInfo*), nullptr, getExtAllocator());
if (!iteration) {
allocMemAsync(reinterpret_cast<void**>(&mTrackingFrameInfoDeviceArray), nLayers * sizeof(TrackingFrameInfo*), nullptr, getExtAllocator());
checkGPUError(cudaHostRegister(mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaHostRegisterPortable));
checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDeviceArray, mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
}
checkGPUError(cudaMemcpyAsync(mTrackingFrameInfoDeviceArray, mTrackingFrameInfoDevice.data(), nLayers * sizeof(TrackingFrameInfo*), cudaMemcpyHostToDevice, mGpuStreams[0].get()));
}

template <int nLayers>
Expand Down
12 changes: 2 additions & 10 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,7 @@ GPUd() bool fitTrack(TrackITSExt& track,
if (!track.o2::track::TrackParCovF::rotate(trackingHit.alphaTrackingFrame)) {
return false;
}
#ifdef __HIPCC__
if (!track.propagateTo(trackingHit.xTrackingFrame, Bz)) {
return false;
}
#else

if (!prop->propagateToX(track,
trackingHit.xTrackingFrame,
Bz,
Expand All @@ -99,10 +95,8 @@ GPUd() bool fitTrack(TrackITSExt& track,
matCorrType)) {
return false;
}
#endif
#ifndef __HIPCC__

if (matCorrType == o2::base::PropagatorF::MatCorrType::USEMatCorrNONE) {
#endif
track.setChi2(track.getChi2() + track.getPredictedChi2(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame));
if (!track.TrackParCov::update(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)) {
return false;
Expand All @@ -113,9 +107,7 @@ GPUd() bool fitTrack(TrackITSExt& track,
if (!track.correctForMaterial(xx0, xx0 * radiationLength * density, true)) {
return false;
}
#ifndef __HIPCC__
}
#endif

auto predChi2{track.getPredictedChi2(trackingHit.positionTrackingFrame, trackingHit.covarianceTrackingFrame)};

Expand Down
9 changes: 4 additions & 5 deletions Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct ClusterHelper {

float MSangle(float mass, float p, float xX0)
{
float beta = p / std::hypot(mass, p);
float beta = p / o2::gpu::CAMath::Hypot(mass, p);
return 0.0136f * o2::gpu::CAMath::Sqrt(xX0) * (1.f + 0.038f * o2::gpu::CAMath::Log(xX0)) / (beta * p);
}

Expand Down Expand Up @@ -376,12 +376,11 @@ void TimeFrame::initialise(const int iteration, const TrackingParameters& trkPar
for (unsigned int iLayer{0}; iLayer < mClusters.size(); ++iLayer) {
mMSangles[iLayer] = MSangle(0.14f, trkParam.TrackletMinPt, trkParam.LayerxX0[iLayer]);
mPositionResolution[iLayer] = o2::gpu::CAMath::Sqrt(0.5f * (trkParam.SystErrorZ2[iLayer] + trkParam.SystErrorY2[iLayer]) + trkParam.LayerResolution[iLayer] * trkParam.LayerResolution[iLayer]);

if (iLayer < mClusters.size() - 1) {
const float& r1 = trkParam.LayerRadii[iLayer];
const float& r2 = trkParam.LayerRadii[iLayer + 1];
const float res1 = std::hypot(trkParam.PVres, mPositionResolution[iLayer]);
const float res2 = std::hypot(trkParam.PVres, mPositionResolution[iLayer + 1]);
const float res1 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[iLayer]);
const float res2 = o2::gpu::CAMath::Hypot(trkParam.PVres, mPositionResolution[iLayer + 1]);
const float cosTheta1half = o2::gpu::CAMath::Sqrt(1.f - Sq(0.5f * r1 * oneOverR));
const float cosTheta2half = o2::gpu::CAMath::Sqrt(1.f - Sq(0.5f * r2 * oneOverR));
float x = r2 * cosTheta1half - r1 * cosTheta2half;
Expand Down Expand Up @@ -430,7 +429,7 @@ void TimeFrame::fillPrimaryVerticesXandAlpha()
}
mPValphaX.reserve(mPrimaryVertices.size());
for (auto& pv : mPrimaryVertices) {
mPValphaX.emplace_back(std::array<float, 2>{std::hypot(pv.getX(), pv.getY()), math_utils::computePhi(pv.getX(), pv.getY())});
mPValphaX.emplace_back(std::array<float, 2>{o2::gpu::CAMath::Hypot(pv.getX(), pv.getY()), math_utils::computePhi(pv.getX(), pv.getY())});
}
}

Expand Down
8 changes: 5 additions & 3 deletions GPU/Common/GPUCommonMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,12 @@ GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y),

GPUdi() unsigned int GPUCommonMath::Float2UIntReint(const float& x)
{
#if !defined(GPUCA_GPUCODE) || defined(__OPENCL__) || defined(__OPENCL_HOST__)
return reinterpret_cast<const unsigned int&>(x);
#else
#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__))
return __float_as_uint(x);
#elif defined(GPUCA_GPUCODE_DEVICE) && (defined(__OPENCL__) || defined(__OPENCLCPP__))
return as_uint(x);
#else
return reinterpret_cast<const unsigned int&>(x);
#endif
}

Expand Down

0 comments on commit 6965591

Please sign in to comment.