From 4e0361a52289b79c671cfc6bd44da91c2844cbdb Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Thu, 18 Apr 2024 10:06:06 +0200
Subject: [PATCH] Various small updates (#287)

While rebasing the AsyncExample onto master, I found the following
items:
- fixed some compiler warnings
- fixed a link error with cuda
- worked around the "ptx assembly failed" bug in cuda < 12.6 (I reported
it to NVidia, and that's the first version where it will be fixed)
- Corrected a few docstrings
- And removed `-G` from `RelWithDebInfo`

I cherry-picked them out of the branch to share them already now.
---
 .clang-format                             |  1 +
 CMakeLists.txt                            | 29 ++++++++++++-----------
 include/AdePT/base/MParrayT.h             |  2 +-
 include/AdePT/benchmarking/NVTX.h         |  6 ++---
 include/AdePT/copcore/CopCore.h           |  2 +-
 include/AdePT/copcore/Global.h            |  2 +-
 include/AdePT/copcore/Macros.h            |  4 ++--
 include/AdePT/copcore/PhysicalConstants.h |  2 +-
 include/AdePT/copcore/VariableSizeObj.h   |  2 +-
 include/AdePT/kernels/electrons.cuh       |  3 ++-
 src/AdePTTrackingManager.cu               |  3 ++-
 11 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/.clang-format b/.clang-format
index 1e76086f..85173142 100644
--- a/.clang-format
+++ b/.clang-format
@@ -58,6 +58,7 @@ IncludeCategories:
 IndentCaseLabels: false
 IndentWidth:     2
 IndentWrappedFunctionNames: false
+InsertNewlineAtEOF: true
 KeepEmptyLinesAtTheStartOfBlocks: true
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7743ca5a..86536452 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ project(AdePT
 #----------------------------------------------------------------------------#
 # - Include needed custom/core modules
 set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
-include(CMakeSettings) 
+include(CMakeSettings)
 include(CTest)
 include(CheckCXXSourceCompiles)
 
@@ -124,12 +124,12 @@ add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:RelWithDebInfo>>:
 # - For Debug, generate full debug information - this completely disables optimizations!
 add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<CONFIG:Debug>>:--device-debug>")
 # - For both, interleave the source in PTX to enhance the debugging experience.
-add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<OR:$<CONFIG:RelWithDebInfo>,$<CONFIG:Debug>>>:-G>")
+add_compile_options("$<$<AND:$<COMPILE_LANGUAGE:CUDA>,$<OR:$<CONFIG:RelWithDebInfo>,$<CONFIG:Debug>>>:--source-in-ptx>")
 
 # Disable warnings from the CUDA frontend about unknown GCC pragmas - let the compiler decide what it likes.
 add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe;--diag_suppress=unrecognized_gcc_pragma>")
 
-find_package(G4HepEm)
+find_package(G4HepEm CONFIG REQUIRED)
 if(G4HepEm_FOUND)
   message(STATUS "G4HepEm found ${G4HepEm_INCLUDE_DIR}")
 endif()
@@ -154,13 +154,13 @@ target_include_directories(CopCore
 )
 
 add_library(AdePT_G4_integration SHARED ${ADEPT_G4_INTEGRATION_SRCS})
-target_include_directories(AdePT_G4_integration 
-  PUBLIC 
+target_include_directories(AdePT_G4_integration
+  PUBLIC
     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
     $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}>
 )
-target_link_libraries(AdePT_G4_integration 
-  PUBLIC 
+target_link_libraries(AdePT_G4_integration
+  PUBLIC
     CopCore
     VecGeom::vecgeom
     VecGeom::vecgeomcuda_static
@@ -170,11 +170,12 @@ target_link_libraries(AdePT_G4_integration
     G4HepEm::g4HepEmData
     G4HepEm::g4HepEmInit
     G4HepEm::g4HepEmRun
+    CUDA::cudart
 )
 
 set_target_properties(AdePT_G4_integration
-  PROPERTIES 
-    CUDA_SEPARABLE_COMPILATION ON 
+  PROPERTIES
+    CUDA_SEPARABLE_COMPILATION ON
     CUDA_RESOLVE_DEVICE_SYMBOLS ON
 )
 
@@ -196,14 +197,14 @@ endif()
 
 include(CMakePackageConfigHelpers)
 #Generate the configuration file from the template and save it to the build directory
-configure_package_config_file(cmake/${PROJECT_NAME}Config.cmake.in 
+configure_package_config_file(cmake/${PROJECT_NAME}Config.cmake.in
   "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${PROJECT_NAME}Config.cmake"
   INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
   PATH_VARS CMAKE_INSTALL_INCLUDEDIR
 )
 
 #Install the libraries
-install(TARGETS CopCore AdePT_G4_integration 
+install(TARGETS CopCore AdePT_G4_integration
   EXPORT ${PROJECT_NAME}Targets
   ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
   LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
@@ -211,12 +212,12 @@ install(TARGETS CopCore AdePT_G4_integration
 )
 
 #Install the headers
-install(DIRECTORY include/AdePT 
+install(DIRECTORY include/AdePT
   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
 )
 
 #Install the configuration file
-install(FILES "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/AdePTConfig.cmake" 
+install(FILES "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/AdePTConfig.cmake"
   DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
 )
 
@@ -229,4 +230,4 @@ export(TARGETS CopCore AdePT_G4_integration
 install(EXPORT ${PROJECT_NAME}Targets
   NAMESPACE ${PROJECT_NAME}::
   DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
-)
\ No newline at end of file
+)
diff --git a/include/AdePT/base/MParrayT.h b/include/AdePT/base/MParrayT.h
index c6ab13d9..a458d9b6 100644
--- a/include/AdePT/base/MParrayT.h
+++ b/include/AdePT/base/MParrayT.h
@@ -89,7 +89,7 @@ class MParrayT : protected copcore::VariableSizeObjectInterface<MParrayT<T>, T>
   {
     // Operation may fail if the max size is exceeded. Has to be checked by the user.
     int index = fNbooked.fetch_add(1);
-    if (index >= fCapacity) return false;
+    if (static_cast<adept::MParrayT<T>::size_t>(index) >= fCapacity) return false;
     fData[index] = val;
     fNused++;
     return true;
diff --git a/include/AdePT/benchmarking/NVTX.h b/include/AdePT/benchmarking/NVTX.h
index 2e68deae..60c3669b 100644
--- a/include/AdePT/benchmarking/NVTX.h
+++ b/include/AdePT/benchmarking/NVTX.h
@@ -37,7 +37,7 @@ class NVTXTracer {
     _name = name;
 
     if (!first) nvtxRangeEnd(_id);
-    nvtxEventAttributes_t eventAttrib = {0};
+    nvtxEventAttributes_t eventAttrib;
     eventAttrib.version               = NVTX_VERSION;
     eventAttrib.size                  = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
     eventAttrib.colorType             = NVTX_COLOR_ARGB;
@@ -51,8 +51,8 @@ class NVTXTracer {
   {
     // Require the occupancy to be larger than the majority of previous iterations to call it rising
     const bool rising = 2 * std::count_if(_lastOccups.begin(), _lastOccups.end(),
-                                          [occupancy](auto elm) { return occupancy > elm + 1; }) >
-                        _lastOccups.size();
+                                          [occupancy](auto const elm) { return occupancy > elm + 1; }) >
+                        static_cast<ptrdiff_t>(_lastOccups.size());
 
     if (rising) {
       setTag("occupancy rising");
diff --git a/include/AdePT/copcore/CopCore.h b/include/AdePT/copcore/CopCore.h
index c49ca482..15e7ea1b 100644
--- a/include/AdePT/copcore/CopCore.h
+++ b/include/AdePT/copcore/CopCore.h
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /**
- * @file CopCore/CopCore.h
+ * @file AdePT/copcore/CopCore.h
  * @brief Main front end to CopCore
  */
 
diff --git a/include/AdePT/copcore/Global.h b/include/AdePT/copcore/Global.h
index 6bdbb9b0..7267c126 100644
--- a/include/AdePT/copcore/Global.h
+++ b/include/AdePT/copcore/Global.h
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /**
- * @file CopCore/Global.h
+ * @file AdePT/copcore/Global.h
  * @brief CopCore global macros and types
  */
 
diff --git a/include/AdePT/copcore/Macros.h b/include/AdePT/copcore/Macros.h
index 4db9ee85..b66e5ebb 100644
--- a/include/AdePT/copcore/Macros.h
+++ b/include/AdePT/copcore/Macros.h
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /**
- * @file CopCore/Macros.h
+ * @file AdePT/copcore/Macros.h
  * @brief CopCore global macros
  *
  * @details Compiling mixtures of C/C++/CUDA, we run into the cases of compiling for
@@ -12,7 +12,7 @@
  * assist this markup process. Only C/C++ and NVidia CUDA are currently supported,
  * but other "backends" such as HIP can be added as time goes on.
  *
- * CopCore/AdePT follow LHCb's Allen project in using macros for host/device/inline
+ * AdePT/copcore/AdePT follow LHCb's Allen project in using macros for host/device/inline
  * functions that match the CUDA keywords.
  */
 /**
diff --git a/include/AdePT/copcore/PhysicalConstants.h b/include/AdePT/copcore/PhysicalConstants.h
index 54d2a31b..0faefe07 100644
--- a/include/AdePT/copcore/PhysicalConstants.h
+++ b/include/AdePT/copcore/PhysicalConstants.h
@@ -3,7 +3,7 @@
 
 /**
  * @brief   Physical constants in internal units.
- * @file    CopCore/PhysicalConstants.h
+ * @file    AdePT/copcore/PhysicalConstants.h
  * @author  M Novak, A Ribon
  * @date    december 2015
  *
diff --git a/include/AdePT/copcore/VariableSizeObj.h b/include/AdePT/copcore/VariableSizeObj.h
index 6ce5b140..ceaf6ef0 100644
--- a/include/AdePT/copcore/VariableSizeObj.h
+++ b/include/AdePT/copcore/VariableSizeObj.h
@@ -291,7 +291,7 @@ class VariableSizeObjectInterface {
   }
 
   // Size of the allocated derived type data members that are also variable size
-  __host__ __device__ static constexpr size_t SizeOfExtra(size_t nvalues) { return 0; }
+  __host__ __device__ static constexpr size_t SizeOfExtra(size_t /*nvalues*/) { return 0; }
 
   // equivalent of sizeof function taking into account padding for alignment
   // this function should be used when making arrays of VariableSizeObjects
diff --git a/include/AdePT/kernels/electrons.cuh b/include/AdePT/kernels/electrons.cuh
index 9ce7ed9e..b00649f8 100644
--- a/include/AdePT/kernels/electrons.cuh
+++ b/include/AdePT/kernels/electrons.cuh
@@ -442,4 +442,5 @@ __global__ void TransportPositrons(adept::TrackManager<Track> *positrons, Second
                                    MParrayTracks *leakedQueue, Scoring *userScoring, VolAuxData const *auxDataArray)
 {
   TransportElectrons</*IsElectron*/ false, Scoring>(positrons, secondaries, leakedQueue, userScoring, auxDataArray);
-}
\ No newline at end of file
+}
+
diff --git a/src/AdePTTrackingManager.cu b/src/AdePTTrackingManager.cu
index 8a753113..615d2fb9 100644
--- a/src/AdePTTrackingManager.cu
+++ b/src/AdePTTrackingManager.cu
@@ -7,4 +7,5 @@
 // Explicit instantiation of the ShowerGPU<AdePTGeant4Integration> function
 namespace adept_impl {
     template void ShowerGPU<AdePTGeant4Integration>(AdePTGeant4Integration&, int, adeptint::TrackBuffer&, GPUstate&, HostScoring*, HostScoring*);
-}
\ No newline at end of file
+}
+