AliceO2Group · ChSonnabend · May 16, 2024 · May 24, 2024 · May 27, 2024 · May 27, 2024
@@ -41,6 +41,7 @@ class OrtModel
   OrtModel(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
   void init(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
   void reset(std::unordered_map<std::string, std::string>);
+  bool isInitialized() { return mInitialized; }
 
   virtual ~OrtModel() = default;
 
@@ -79,6 +80,7 @@ class OrtModel
   std::vector<std::vector<int64_t>> mInputShapes, mOutputShapes;
 
   // Environment settings
+  bool mInitialized = false;
   std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda
   int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0;
 
@@ -89,4 +91,4 @@ class OrtModel
 
 } // namespace o2
 
-#endif // O2_ML_ORTINTERFACE_H
+#endif // O2_ML_ORTINTERFACE_H
@@ -44,17 +44,19 @@
   if (!optionsMap.contains("model-path")) {
     LOG(fatal) << "(ORT) Model path cannot be empty!";
   }
-  modelPath = optionsMap["model-path"];
-  device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
-  dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
-  deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
-  allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
-  intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
-  loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 2);
-  enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
-  enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);
-
-  std::string dev_mem_str = "Hip";
+
+  if (!optionsMap["model-path"].empty()) {
+    modelPath = optionsMap["model-path"];
+    device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU");
+    dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float");
+    deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0);
+    allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0);
+    intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0);
+    loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0);
+    enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
+    enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);
+
+    std::string dev_mem_str = "Hip";
 #if defined(ORT_ROCM_BUILD)
 #if ORT_ROCM_BUILD == 1
   if (device == "ROCM") {
@@ -81,89 +83,85 @@
 #endif
 #endif
 
-  if (allocateDeviceMemory) {
-    pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
-    LOG(info) << "(ORT) Memory info set to on-device memory";
-  }
+    if (allocateDeviceMemory) {
+      pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault);
+      LOG(info) << "(ORT) Memory info set to on-device memory";
+    }
 
-  if (device == "CPU") {
-    (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
-    if (intraOpNumThreads > 1) {
-      (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
-    } else if (intraOpNumThreads == 1) {
-      (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
+    if (device == "CPU") {
+      (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);
+      if (intraOpNumThreads > 1) {
+        (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL);
+      } else if (intraOpNumThreads == 1) {
+        (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
+      }
+      if (loggingLevel < 2) {
+        LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
+      }
     }
-    LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads";
-  }
 
-  (pImplOrt->sessionOptions).DisableMemPattern();
-  (pImplOrt->sessionOptions).DisableCpuMemArena();
+    (pImplOrt->sessionOptions).DisableMemPattern();
+    (pImplOrt->sessionOptions).DisableCpuMemArena();
 
-  if (enableProfiling) {
-    if (optionsMap.contains("profiling-output-path")) {
-      (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
+    if (enableProfiling) {
+      if (optionsMap.contains("profiling-output-path")) {
+        (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str());
+      } else {
+        LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
+        (pImplOrt->sessionOptions).DisableProfiling();
+      }
     } else {
-      LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now.";
       (pImplOrt->sessionOptions).DisableProfiling();
     }
-  } else {
-    (pImplOrt->sessionOptions).DisableProfiling();
-  }
-  (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
-  (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));
-
-  pImplOrt->env = std::make_shared<Ort::Env>(
-    OrtLoggingLevel(loggingLevel),
-    (optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()),
-    // Integrate ORT logging into Fairlogger
-    [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) {
-      if (severity == ORT_LOGGING_LEVEL_VERBOSE) {
-        LOG(debug) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-      } else if (severity == ORT_LOGGING_LEVEL_INFO) {
-        LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-      } else if (severity == ORT_LOGGING_LEVEL_WARNING) {
-        LOG(warning) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-      } else if (severity == ORT_LOGGING_LEVEL_ERROR) {
-        LOG(error) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-      } else if (severity == ORT_LOGGING_LEVEL_FATAL) {
-        LOG(fatal) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-      } else {
-        LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
-      }
-    },
-    (void*)3);
-  (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
-  pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);
 
-  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
-    mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
-  }
-  for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
-    mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
-  }
-  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
-    mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
-  }
-  for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
-    mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
-  }
+    mInitialized = true;
 
-  inputNamesChar.resize(mInputNames.size(), nullptr);
-  std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
-                 [&](const std::string& str) { return str.c_str(); });
-  outputNamesChar.resize(mOutputNames.size(), nullptr);
-  std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
-                 [&](const std::string& str) { return str.c_str(); });
-
-  // Print names
-  LOG(info) << "\tInput Nodes:";
-  for (size_t i = 0; i < mInputNames.size(); i++) {
-    LOG(info) << "\t\t" << mInputNames[i] << " : " << printShape(mInputShapes[i]);
-  }
+    (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations));
+    (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel));
+
+    pImplOrt->env = std::make_shared<Ort::Env>(
+      OrtLoggingLevel(loggingLevel),
+      (optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()),
+      // Integrate ORT logging into Fairlogger
+      [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) {
+        if (severity == ORT_LOGGING_LEVEL_VERBOSE) {
+          LOG(debug) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+        } else if (severity == ORT_LOGGING_LEVEL_INFO) {
+          LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+        } else if (severity == ORT_LOGGING_LEVEL_WARNING) {
+          LOG(warning) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+        } else if (severity == ORT_LOGGING_LEVEL_ERROR) {
+          LOG(error) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+        } else if (severity == ORT_LOGGING_LEVEL_FATAL) {
+          LOG(fatal) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+        } else {
+          LOG(info) << "(ORT) [" << logid << "|" << category << "|" << code_location << "]: " << message;
+        }
+      },
+      (void*)3);
+    (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
+    pImplOrt->session = std::make_shared<Ort::Session>(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions);
+
+    for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
+      mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get());
+    }
+    for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) {
+      mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
+    }
+    for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
+      mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get());
+    }
+    for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) {
+      mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape());
+    }
+
+    inputNamesChar.resize(mInputNames.size(), nullptr);
+    std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar),
+                  [&](const std::string& str) { return str.c_str(); });
+    outputNamesChar.resize(mOutputNames.size(), nullptr);
+    std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar),
+                  [&](const std::string& str) { return str.c_str(); });
 
-  LOG(info) << "\tOutput Nodes:";
-  for (size_t i = 0; i < mOutputNames.size(); i++) {
-    LOG(info) << "\t\t" << mOutputNames[i] << " : " << printShape(mOutputShapes[i]);
   }
 }
 
@@ -301,4 +299,4 @@
 
 } // namespace ml
 
-} // namespace o2
+} // namespace o2
@@ -159,36 +159,37 @@ set(HDRS_INSTALL
 
 set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx)
 set(SRCS_NO_CINT
-    ${SRCS_NO_CINT}
-    Global/GPUChainITS.cxx
-    ITS/GPUITSFitter.cxx
-    ITS/GPUITSFitterKernels.cxx
-    dEdx/GPUdEdx.cxx
-    TPCConvert/GPUTPCConvert.cxx
-    TPCConvert/GPUTPCConvertKernel.cxx
-    DataCompression/GPUTPCCompression.cxx
-    DataCompression/GPUTPCCompressionTrackModel.cxx
-    DataCompression/GPUTPCCompressionKernels.cxx
-    DataCompression/GPUTPCDecompression.cxx
-    DataCompression/GPUTPCDecompressionKernels.cxx
-    DataCompression/TPCClusterDecompressor.cxx
-    DataCompression/GPUTPCClusterStatistics.cxx
-    TPCClusterFinder/GPUTPCClusterFinder.cxx
-    TPCClusterFinder/ClusterAccumulator.cxx
-    TPCClusterFinder/MCLabelAccumulator.cxx
-    TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
-    TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
-    TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx
-    TPCClusterFinder/GPUTPCCFPeakFinder.cxx
-    TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
-    TPCClusterFinder/GPUTPCCFClusterizer.cxx
-    TPCClusterFinder/GPUTPCCFDeconvolution.cxx
-    TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
-    TPCClusterFinder/GPUTPCCFDecodeZS.cxx
-    TPCClusterFinder/GPUTPCCFGather.cxx
-    Refit/GPUTrackingRefit.cxx
-    Refit/GPUTrackingRefitKernel.cxx
-    Merger/GPUTPCGMO2Output.cxx)
+  ${SRCS_NO_CINT}
+  Global/GPUChainITS.cxx
+  ITS/GPUITSFitter.cxx
+  ITS/GPUITSFitterKernels.cxx
+  dEdx/GPUdEdx.cxx
+  TPCConvert/GPUTPCConvert.cxx
+  TPCConvert/GPUTPCConvertKernel.cxx
+  DataCompression/GPUTPCCompression.cxx
+  DataCompression/GPUTPCCompressionTrackModel.cxx
+  DataCompression/GPUTPCCompressionKernels.cxx
+  DataCompression/GPUTPCDecompression.cxx
+  DataCompression/GPUTPCDecompressionKernels.cxx
+  DataCompression/TPCClusterDecompressor.cxx
+  DataCompression/GPUTPCClusterStatistics.cxx
+  TPCClusterFinder/GPUTPCClusterFinder.cxx
+  TPCClusterFinder/ClusterAccumulator.cxx
+  TPCClusterFinder/MCLabelAccumulator.cxx
+  TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx
+  TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
+  TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx
+  TPCClusterFinder/GPUTPCCFPeakFinder.cxx
+  TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
+  TPCClusterFinder/GPUTPCCFClusterizer.cxx
+  TPCClusterFinder/GPUTPCNNClusterizer.cxx
+  TPCClusterFinder/GPUTPCCFDeconvolution.cxx
+  TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx
+  TPCClusterFinder/GPUTPCCFDecodeZS.cxx
+  TPCClusterFinder/GPUTPCCFGather.cxx
+  Refit/GPUTrackingRefit.cxx
+  Refit/GPUTrackingRefitKernel.cxx
+  Merger/GPUTPCGMO2Output.cxx)
 
 set(SRCS_DATATYPES
     ${SRCS_DATATYPES}
@@ -269,6 +270,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
                                        O2::GPUCommon
                                        O2::ReconstructionDataFormats
                                        O2::TPCFastTransformation
+                                       O2::ML
                  PRIVATE_LINK_LIBRARIES O2::DataFormatsTPC
                  SOURCES ${SRCS_DATATYPES})
   target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2)

@@ -81,6 +81,7 @@
   #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
   #define GPUCA_LB_GPUTPCCFDeconvolution 512
   #define GPUCA_LB_GPUTPCCFClusterizer 448
+  #define GPUCA_LB_GPUTPCNNClusterizer 448
   #define GPUCA_LB_COMPRESSION_GATHER 1024
   #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
   #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
@@ -147,6 +148,7 @@
   #define GPUCA_LB_GPUTPCCFNoiseSuppression 512
   #define GPUCA_LB_GPUTPCCFDeconvolution 512
   #define GPUCA_LB_GPUTPCCFClusterizer 512
+  #define GPUCA_LB_GPUTPCNNClusterizer 512
   #define GPUCA_LB_COMPRESSION_GATHER 1024
   #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5
   #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
@@ -213,6 +215,7 @@
   #define GPUCA_LB_GPUTPCCFNoiseSuppression 448
   #define GPUCA_LB_GPUTPCCFDeconvolution 384
   #define GPUCA_LB_GPUTPCCFClusterizer 448
+  #define GPUCA_LB_GPUTPCNNClusterizer 448
   #define GPUCA_LB_COMPRESSION_GATHER 1024
   #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
   #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
@@ -489,6 +492,9 @@
   #ifndef GPUCA_LB_GPUTPCCFClusterizer
     #define GPUCA_LB_GPUTPCCFClusterizer 512
   #endif
+  #ifndef GPUCA_LB_GPUTPCNNClusterizer
+    #define GPUCA_LB_GPUTPCNNClusterizer 512
+  #endif
   #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU
     #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256
   #endif

@@ -298,6 +298,28 @@ AddOption(printSettings, bool, false, "", 0, "Print all settings when initializi
 AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr)
 AddSubConfig(GPUSettingsProcessingRTC, rtc)
 AddSubConfig(GPUSettingsProcessingParam, param)
+AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural network clusterizer should be used.")
+AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)")
+AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id")
+AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference")
+AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16
+AddOption(nnInferenceThreadsPerNN, int, 0, "", 0, "Number of threads used to evaluate one neural network")
+AddOption(nnInferenceEnableOrtOptimization, unsigned int, 1, "", 0, "Enables graph optimizations in ONNX Runtime. Can be greater than 1!")
+AddOption(nnInferenceOrtProfiling, int, 0, "", 0, "Enables profiling of model execution in ONNX Runtime")
+AddOption(nnInferenceOrtProfilingPath, std::string, ".", "", 0, "If mmInferenceOrtProfiling is set, the path to store the profiling data")
+AddOption(nnInferenceVerbosity, int, 1, "", 0, "0: No messages; 1: Warnings; 2: Warnings + major debugs; >3: All debugs")
+AddOption(nnClusterizerAddIndexData, int, 1, "", 0, "If normalized index data (sector, row, pad), should be appended to the input")
+AddOption(nnClusterizerSizeInputRow, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2")
+AddOption(nnClusterizerSizeInputPad, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2")
+AddOption(nnClusterizerSizeInputTime, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2")
+AddOption(nnClusterizerUseCFregression, int, 0, "", 0, "(bool, default = false) If true, use the regression from the native clusterizer and not the NN")
+AddOption(nnClusterizerBatchedMode, unsigned int, 1, "", 0, "(int, default = 1) If >1, the NN is evaluated on batched input of size specified in this variable")
+AddOption(nnClusterizerVerbosity, int, -1, "", 0, "(int, default = -1) If >0, logging messages of the clusterizer will be displayed")
+AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the boundary of the input to the NN")
+AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path")
+AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
+AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")
+AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
 AddHelp("help", 'h')
 EndConfig()
 #endif // __OPENCL__