Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU clusterizer with neural networks #13981

Draft
wants to merge 43 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
d4dc46e
Copying kernels to implement NN clusterizer
ChSonnabend May 16, 2024
c191885
Merge branch 'dev' into gpu_clusterizer
ChSonnabend May 24, 2024
05831ef
First version of clusterizer in GPU code
ChSonnabend May 27, 2024
8515290
Merge branch 'gpu_clusterizer' of github.com:ChSonnabend/AliceO2 into…
ChSonnabend May 27, 2024
3f6c934
Adding a compiling and running version with single-threaded ONNX mode…
ChSonnabend May 29, 2024
8ba6805
Clusters now working by a hack
ChSonnabend May 29, 2024
6ec3c46
Working implementation of settings via GPUSettings.h and --configKeyV…
ChSonnabend Jun 6, 2024
626a46f
Merge branch 'AliceO2Group:dev' into gpu_clusterizer
ChSonnabend Jun 24, 2024
ab4653a
Modifying the onnx_interface to include the right headers
ChSonnabend Jun 24, 2024
04084c8
Adjusting initialization for new ONNXRuntime version
ChSonnabend Jun 24, 2024
01dc4a1
Adjusting global settings and CF code for several settings
ChSonnabend Jun 26, 2024
accd7ab
Adding return statement if cluster is rejected
ChSonnabend Jul 3, 2024
019b388
Merge branch 'AliceO2Group:dev' into gpu_clusterizer
ChSonnabend Jul 3, 2024
3473a06
Adding some statements back
ChSonnabend Jul 4, 2024
dfffdf5
Merge branch 'dev' into gpu_clusterizer
ChSonnabend Oct 16, 2024
df21c96
Update to latest status of gpu clusterization
ChSonnabend Oct 17, 2024
06737fd
Fixing uchar -> uint8_t
ChSonnabend Oct 18, 2024
b148449
Adding utils header
ChSonnabend Oct 18, 2024
534da50
Updating kernels.cmake to uint8_t
ChSonnabend Oct 21, 2024
bb2cb6e
Please consider the following formatting changes
alibuild Oct 21, 2024
027e225
Merge pull request #6 from alibuild/alibot-cleanup-13610
ChSonnabend Nov 4, 2024
25093b3
Adding an ONNX CPU library in the O2 framework
ChSonnabend Nov 18, 2024
74cf0e7
Merge branch 'AliceO2Group:dev' into onnxruntime-cpu
ChSonnabend Nov 18, 2024
9232328
Please consider the following formatting changes
alibuild Nov 18, 2024
9a6a9e8
Merge pull request #7 from alibuild/alibot-cleanup-13709
ChSonnabend Nov 18, 2024
7251c5c
Fixing macOS build issues with calling O*.data()
ChSonnabend Nov 19, 2024
d0f4dd8
Fixing compiler issues and char -> uint8_t
ChSonnabend Nov 19, 2024
7859ab2
Fixing curly braces
ChSonnabend Nov 19, 2024
c6cb3e6
Fixing std::make_shared
ChSonnabend Nov 19, 2024
55621f0
Merge branch 'onnxruntime-cpu' into gpu_clusterizer
ChSonnabend Nov 20, 2024
a00a54b
Merge branch 'dev' into gpu_clusterizer
ChSonnabend Nov 20, 2024
40bc437
Changing order for <CommonUtils/StringUtils.h>
ChSonnabend Nov 20, 2024
f0a8cc2
Merge branch 'dev' into gpu_clusterizer
ChSonnabend Nov 22, 2024
d3aede4
Merge branch 'dev' into gpu_clusterizer
ChSonnabend Dec 17, 2024
52b033f
Bug-fixing file name
ChSonnabend Dec 17, 2024
314a0ce
Merge branch 'dev' into gpu_clusterizer
ChSonnabend Jan 17, 2025
684eb56
Making NN clusterizer more efficient
ChSonnabend Feb 6, 2025
9bd1ce4
Merge branch 'dev' into gpu_clusterizer
ChSonnabend Feb 7, 2025
639b895
Changing constexpr
ChSonnabend Feb 7, 2025
3c4c587
Fixing build issues
ChSonnabend Feb 7, 2025
95bb2ff
Major changes to make clusterizer parallelizable. Problem remains: di…
ChSonnabend Feb 17, 2025
857f27d
Adjusting for default CF regression
ChSonnabend Feb 19, 2025
89c0105
Bug-fix for application of CF regression and logging message
ChSonnabend Feb 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Common/ML/include/ML/OrtInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class OrtModel
OrtModel(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
void init(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
void reset(std::unordered_map<std::string, std::string>);
bool isInitialized() { return mInitialized; }

virtual ~OrtModel() = default;

Expand Down Expand Up @@ -79,6 +80,7 @@ class OrtModel
std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes;

// Environment settings
bool mInitialized = false;
std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;

Expand All @@ -89,4 +91,4 @@ class OrtModel

} // namespace o2

#endif // O2_ML_ORTINTERFACE_H
#endif // O2_ML_ORTINTERFACE_H
168 changes: 83 additions & 85 deletions Common/ML/src/OrtInterface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,19 @@
if (!optionsMap.contains("model-path")) {
LOG(fatal) << "(ORT) Model path cannot be empty!";
}
modelPath = optionsMap["model-path"];
device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 2);
enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);

std::string dev_mem_str = "Hip";

Check failure on line 47 in Common/ML/src/OrtInterface.cxx

View workflow job for this annotation

GitHub Actions / PR formatting / whitespace

Trailing spaces

Remove the trailing spaces at the end of the line.
if (!optionsMap["model-path"].empty()) {
modelPath = optionsMap["model-path"];
device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0);
enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);

std::string dev_mem_str = "Hip";
#if defined(ORT_ROCM_BUILD)
#if ORT_ROCM_BUILD == 1
if (device == "ROCM") {
Expand All @@ -81,89 +83,85 @@
#endif
#endif

if (allocateDeviceMemory) {
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
LOG(info) << "(ORT) Memory info set to on-device memory";
}
if (allocateDeviceMemory) {
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
LOG(info) << "(ORT) Memory info set to on-device memory";
}

if (device == "CPU") {
(pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
if (intraOpNumThreads > 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
} else if (intraOpNumThreads == 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
if (device == "CPU") {
(pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
if (intraOpNumThreads > 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
} else if (intraOpNumThreads == 1) {
(pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
}
if (loggingLevel < 2) {
LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
}
}
LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
}

(pImplOrt->sessionOptions).DisableMemPattern();
(pImplOrt->sessionOptions).DisableCpuMemArena();
(pImplOrt->sessionOptions).DisableMemPattern();
(pImplOrt->sessionOptions).DisableCpuMemArena();

if (enableProfiling) {
if (optionsMap.contains("profiling-output-path")) {
(pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
if (enableProfiling) {
if (optionsMap.contains("profiling-output-path")) {
(pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
} else {
LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
(pImplOrt->sessionOptions).DisableProfiling();
}
} else {
LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
(pImplOrt->sessionOptions).DisableProfiling();
}
} else {
(pImplOrt->sessionOptions).DisableProfiling();
}
(pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
(pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));

pImplOrt->env = std::make_shared<Ort::Env>(
OrtLoggingLevel(loggingLevel),
(optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()),
// Integrate ORT logging into Fairlogger
[](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) {
if (severity == ORT_LOGGING_LEVEL_VERBOSE) {
LOG(debug) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_INFO) {
LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_WARNING) {
LOG(warning) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_ERROR) {
LOG(error) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_FATAL) {
LOG(fatal) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else {
LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
}
},
(void*)3);
(pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);

for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}
mInitialized = true;

inputNamesChar.resize(mInputNames.size(), nullptr);
std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
[&](const std::string& str) { return str.c_str(); });
outputNamesChar.resize(mOutputNames.size(), nullptr);
std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
[&](const std::string& str) { return str.c_str(); });

// Print names
LOG(info) << "\tInput Nodes:";
for (size_t i = 0; i < mInputNames.size(); i++) {
LOG(info) << "\t\t" << mInputNames[i] << " : " << printShape(mInputShapes[i]);
}
(pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
(pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));

pImplOrt->env = std::make_shared<Ort::Env>(
OrtLoggingLevel(loggingLevel),
(optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()),
// Integrate ORT logging into Fairlogger
[](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) {
if (severity == ORT_LOGGING_LEVEL_VERBOSE) {
LOG(debug) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_INFO) {
LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_WARNING) {
LOG(warning) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_ERROR) {
LOG(error) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else if (severity == ORT_LOGGING_LEVEL_FATAL) {
LOG(fatal) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
} else {
LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
}
},
(void*)3);
(pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);

for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
}
for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
}

inputNamesChar.resize(mInputNames.size(), nullptr);
std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
[&](const std::string& str) { return str.c_str(); });
outputNamesChar.resize(mOutputNames.size(), nullptr);
std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
[&](const std::string& str) { return str.c_str(); });

LOG(info) << "\tOutput Nodes:";
for (size_t i = 0; i < mOutputNames.size(); i++) {
LOG(info) << "\t\t" << mOutputNames[i] << " : " << printShape(mOutputShapes[i]);
}
}

Expand Down Expand Up @@ -301,4 +299,4 @@

} // namespace ml

} // namespace o2
} // namespace o2
62 changes: 32 additions & 30 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -159,36 +159,37 @@ set(HDRS_INSTALL

set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx)
set(SRCS_NO_CINT
${SRCS_NO_CINT}
Global/GPUChainITS.cxx
ITS/GPUITSFitter.cxx
ITS/GPUITSFitterKernels.cxx
dEdx/GPUdEdx.cxx
TPCConvert/GPUTPCConvert.cxx
TPCConvert/GPUTPCConvertKernel.cxx
DataCompression/GPUTPCCompression.cxx
DataCompression/GPUTPCCompressionTrackModel.cxx
DataCompression/GPUTPCCompressionKernels.cxx
DataCompression/GPUTPCDecompression.cxx
DataCompression/GPUTPCDecompressionKernels.cxx
DataCompression/TPCClusterDecompressor.cxx
DataCompression/GPUTPCClusterStatistics.cxx
TPCClusterFinder/GPUTPCClusterFinder.cxx
TPCClusterFinder/ClusterAccumulator.cxx
TPCClusterFinder/MCLabelAccumulator.cxx
TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx
TPCClusterFinder/GPUTPCCFPeakFinder.cxx
TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
TPCClusterFinder/GPUTPCCFClusterizer.cxx
TPCClusterFinder/GPUTPCCFDeconvolution.cxx
TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
TPCClusterFinder/GPUTPCCFDecodeZS.cxx
TPCClusterFinder/GPUTPCCFGather.cxx
Refit/GPUTrackingRefit.cxx
Refit/GPUTrackingRefitKernel.cxx
Merger/GPUTPCGMO2Output.cxx)
${SRCS_NO_CINT}
Global/GPUChainITS.cxx
ITS/GPUITSFitter.cxx
ITS/GPUITSFitterKernels.cxx
dEdx/GPUdEdx.cxx
TPCConvert/GPUTPCConvert.cxx
TPCConvert/GPUTPCConvertKernel.cxx
DataCompression/GPUTPCCompression.cxx
DataCompression/GPUTPCCompressionTrackModel.cxx
DataCompression/GPUTPCCompressionKernels.cxx
DataCompression/GPUTPCDecompression.cxx
DataCompression/GPUTPCDecompressionKernels.cxx
DataCompression/TPCClusterDecompressor.cxx
DataCompression/GPUTPCClusterStatistics.cxx
TPCClusterFinder/GPUTPCClusterFinder.cxx
TPCClusterFinder/ClusterAccumulator.cxx
TPCClusterFinder/MCLabelAccumulator.cxx
TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx
TPCClusterFinder/GPUTPCCFPeakFinder.cxx
TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
TPCClusterFinder/GPUTPCCFClusterizer.cxx
TPCClusterFinder/GPUTPCNNClusterizer.cxx
TPCClusterFinder/GPUTPCCFDeconvolution.cxx
TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
TPCClusterFinder/GPUTPCCFDecodeZS.cxx
TPCClusterFinder/GPUTPCCFGather.cxx
Refit/GPUTrackingRefit.cxx
Refit/GPUTrackingRefitKernel.cxx
Merger/GPUTPCGMO2Output.cxx)

set(SRCS_DATATYPES
${SRCS_DATATYPES}
Expand Down Expand Up @@ -269,6 +270,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
O2::GPUCommon
O2::ReconstructionDataFormats
O2::TPCFastTransformation
O2::ML
PRIVATE_LINK_LIBRARIES O2::DataFormatsTPC
SOURCES ${SRCS_DATATYPES})
target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2)
Expand Down
6 changes: 6 additions & 0 deletions GPU/GPUTracking/Definitions/GPUDefGPUParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
#define GPUCA_LB_GPUTPCCFDeconvolution 512
#define GPUCA_LB_GPUTPCCFClusterizer 448
#define GPUCA_LB_GPUTPCNNClusterizer 448
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
Expand Down Expand Up @@ -147,6 +148,7 @@
#define GPUCA_LB_GPUTPCCFNoiseSuppression 512
#define GPUCA_LB_GPUTPCCFDeconvolution 512
#define GPUCA_LB_GPUTPCCFClusterizer 512
#define GPUCA_LB_GPUTPCNNClusterizer 512
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
Expand Down Expand Up @@ -213,6 +215,7 @@
#define GPUCA_LB_GPUTPCCFNoiseSuppression 448
#define GPUCA_LB_GPUTPCCFDeconvolution 384
#define GPUCA_LB_GPUTPCCFClusterizer 448
#define GPUCA_LB_GPUTPCNNClusterizer 448
#define GPUCA_LB_COMPRESSION_GATHER 1024
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
Expand Down Expand Up @@ -489,6 +492,9 @@
#ifndef GPUCA_LB_GPUTPCCFClusterizer
#define GPUCA_LB_GPUTPCCFClusterizer 512
#endif
#ifndef GPUCA_LB_GPUTPCNNClusterizer
#define GPUCA_LB_GPUTPCNNClusterizer 512
#endif
#ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU
#define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256
#endif
Expand Down
22 changes: 22 additions & 0 deletions GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,28 @@ AddOption(printSettings, bool, false, "", 0, "Print all settings when initializi
AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr)
AddSubConfig(GPUSettingsProcessingRTC, rtc)
AddSubConfig(GPUSettingsProcessingParam, param)
AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural network clusterizer should be used.")
AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)")
AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id")
AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference")
AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16
AddOption(nnInferenceThreadsPerNN, int, 0, "", 0, "Number of threads used to evaluate one neural network")
AddOption(nnInferenceEnableOrtOptimization, unsigned int, 1, "", 0, "Enables graph optimizations in ONNX Runtime. Can be greater than 1!")
AddOption(nnInferenceOrtProfiling, int, 0, "", 0, "Enables profiling of model execution in ONNX Runtime")
AddOption(nnInferenceOrtProfilingPath, std::string, ".", "", 0, "If mmInferenceOrtProfiling is set, the path to store the profiling data")
AddOption(nnInferenceVerbosity, int, 1, "", 0, "0: No messages; 1: Warnings; 2: Warnings + major debugs; >3: All debugs")
AddOption(nnClusterizerAddIndexData, int, 1, "", 0, "If normalized index data (sector, row, pad), should be appended to the input")
AddOption(nnClusterizerSizeInputRow, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2")
AddOption(nnClusterizerSizeInputPad, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2")
AddOption(nnClusterizerSizeInputTime, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2")
AddOption(nnClusterizerUseCFregression, int, 0, "", 0, "(bool, default = false) If true, use the regression from the native clusterizer and not the NN")
AddOption(nnClusterizerBatchedMode, unsigned int, 1, "", 0, "(int, default = 1) If >1, the NN is evaluated on batched input of size specified in this variable")
AddOption(nnClusterizerVerbosity, int, -1, "", 0, "(int, default = -1) If >0, logging messages of the clusterizer will be displayed")
AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the boundary of the input to the NN")
AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path")
AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")
AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
AddHelp("help", 'h')
EndConfig()
#endif // __OPENCL__
Expand Down
Loading
Loading