From bebc9ad28930e807573d75aea2518d1d00a4a3f8 Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Thu, 8 Aug 2024 07:47:45 -0500 Subject: [PATCH 1/7] fixing cmake cuda build: renaming cuda cmake option --- CMakeLists.txt | 9 ++++----- buildcmake | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b6f699574..a90cbf042b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -209,8 +209,7 @@ endif() option(BUILD_SHARED "Build Charm++ dynamic libraries" OFF) # Other options -option(CUDA "Build with CUDA support" OFF) - +option(BUILD_CUDA "Build with CUDA support" OFF) option(PXSHM "Build with PXSHM" OFF) # LRTS PMI options @@ -224,8 +223,6 @@ string(REPLACE ";" " " MY_EXTRA_OPTS "${MY_EXTRA_OPTS}") set(OPTS "${OPTS} ${MY_EXTRA_OPTS}") set(OPTSATBUILDTIME "${OPTSATBUILDTIME} ${MY_EXTRA_OPTS}") -# We need both BUILD_CUDA and CUDA -set(BUILD_CUDA ${CUDA}) # Also build shared Charm++ libraries in lib_so/ if(BUILD_SHARED) @@ -689,7 +686,8 @@ configure_file(src/scripts/conv-config.sh include/ COPYONLY) configure_file(src/arch/${VDIR}/conv-mach.sh include/ COPYONLY) set(CUDA_DIR "") -if(CUDA) +if(BUILD_CUDA) + file(GLOB_RECURSE hybridAPI-h-sources ${CMAKE_SOURCE_DIR}/src/arch/cuda/*.h) file(GLOB_RECURSE hybridAPI-cxx-sources ${CMAKE_SOURCE_DIR}/src/arch/cuda/*.cpp) foreach(file ${hybridAPI-h-sources}) @@ -1096,6 +1094,7 @@ foreach(l CUDA_DIR BUILD_CUDA CMK_AMPI_WITH_ROMIO CMK_MACOSX CMK_BUILD_PYTHON endforeach(l) # Add options +set(CUDA ${BUILD_CUDA}) # need CUDA to match conv-mach file name foreach(opt SMP OMP TCP PTHREADS SYNCFT PXSHM PERSISTENT OOC CUDA PAPI CXI) if(${opt}) string(TOLOWER ${opt} optl) diff --git a/buildcmake b/buildcmake index ab65c2407b..e7071b7fe4 100755 --- a/buildcmake +++ b/buildcmake @@ -647,7 +647,7 @@ CC=$opt_CC CXX=$opt_CXX FC=$opt_FC cmake "$my_srcdir" \ -DCCS="$opt_ccs" \ -DCHARMDEBUG="$opt_charmdebug" \ -DCONTROLPOINT="$opt_controlpoint" \ - -DCUDA="$opt_cuda" \ + -DBUILD_CUDA="$opt_cuda" \ -DDISABLE_TLS="$opt_disabletls" \ -DDRONE_MODE="$opt_drone_mode" \ -DENABLE_FORTRAN=$opt_enable_fortran \ From 2a07a17a984dcd82edc8a89697718af56a642a8c Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Mon, 12 Aug 2024 07:32:09 -0500 Subject: [PATCH 2/7] linking cudart into charmlib.so --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a90cbf042b..50fcd28a0d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -993,6 +993,10 @@ if(${TARGET} STREQUAL "charm4py") target_link_libraries(charm ck converse memory-default threads-default ldb-rand "-Llib/ -standalone -whole-archive -c++stl -shared") endif() + if (${BUILD_CUDA}) + target_link_libraries(charm cudart cudahybridapi) + endif() + add_dependencies(charm hwloc) endif() From 5400c919141a1db4154050288d592e307369208d Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Wed, 2 Oct 2024 16:23:08 -0500 Subject: [PATCH 3/7] first impl for hapi charm4py --- src/ck-core/charm.h | 9 +++++++++ src/ck-core/ck.C | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/ck-core/charm.h b/src/ck-core/charm.h index cee65d539a..50615f0ec6 100644 --- a/src/ck-core/charm.h +++ b/src/ck-core/charm.h @@ -8,6 +8,10 @@ #include "converse.h" #include /* for size_t */ +#if CMK_CHARM4PY && CMK_CUDA +#include +#endif + #ifdef __cplusplus #include "conv-rdma.h" #include "pup.h" @@ -510,6 +514,11 @@ extern int CkDisableTracing(int epIdx); extern void CkEnableTracing(int epIdx); extern void CkCallWhenIdle(int epIdx, void* obj); + +#if CMK_CHARM4PY +extern void CkHapiAddCallback(cudaStream_t stream, void* cb, void* cb_msg); +#endif + #ifdef __cplusplus } #endif diff --git a/src/ck-core/ck.C b/src/ck-core/ck.C index 33bfc82bcf..9108e49195 100644 --- a/src/ck-core/ck.C +++ b/src/ck-core/ck.C @@ -12,6 +12,8 @@ clients, including the rest of Charm++, are actually C++. #include "pathHistory.h" + + #if CMK_LBDB_ON #include "LBManager.h" #endif // CMK_LBDB_ON @@ -46,6 +48,16 @@ int CkIndex_ArrayBase::__idx=-1; extern int _defaultObjectQ; + +#include "hapi.h" + + +void CkHapiAddCallback(cudaStream_t stream, void *fn, void* cb_msg) +{ + CkPrintf("CkHapiAddCallback is not implemented yet, but its being called!\n"); + hapiAddCallback(stream, (CkCallback*)fn, cb_msg); +} + void _initChareTables() { #ifndef CMK_CHARE_USE_PTR From a442c9638f94b9bf1a10fd1a8f4b76919542fb35 Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Thu, 3 Oct 2024 17:54:01 -0500 Subject: [PATCH 4/7] first working commit (in cython) --- src/ck-core/charm.h | 2 +- src/ck-core/ck.C | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ck-core/charm.h b/src/ck-core/charm.h index 50615f0ec6..696709e275 100644 --- a/src/ck-core/charm.h +++ b/src/ck-core/charm.h @@ -516,7 +516,7 @@ extern void CkCallWhenIdle(int epIdx, void* obj); #if CMK_CHARM4PY -extern void CkHapiAddCallback(cudaStream_t stream, void* cb, void* cb_msg); +extern void CkHapiAddCallback(long stream, void (*cb)(void*, void*), void* cb_msg); #endif #ifdef __cplusplus diff --git a/src/ck-core/ck.C b/src/ck-core/ck.C index 9108e49195..84cb9347ef 100644 --- a/src/ck-core/ck.C +++ b/src/ck-core/ck.C @@ -50,12 +50,13 @@ extern int _defaultObjectQ; #include "hapi.h" - - -void CkHapiAddCallback(cudaStream_t stream, void *fn, void* cb_msg) +void CkHapiAddCallback(long stream, void (*cb)(void*, void*), void* cb_msg) { - CkPrintf("CkHapiAddCallback is not implemented yet, but its being called!\n"); - hapiAddCallback(stream, (CkCallback*)fn, cb_msg); + + cudaStream_t stream_ptr = (cudaStream_t)stream; + CkCallback callback(cb, cb_msg); + + hapiAddCallback(stream_ptr, callback, cb_msg); } void _initChareTables() From 47623e5d1aeed0351c8fe34d0ea3ac15757bee20 Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Fri, 4 Oct 2024 17:03:57 -0500 Subject: [PATCH 5/7] switch to future --- src/ck-core/charm.h | 2 +- src/ck-core/ck.C | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ck-core/charm.h b/src/ck-core/charm.h index 696709e275..befaa6282c 100644 --- a/src/ck-core/charm.h +++ b/src/ck-core/charm.h @@ -516,7 +516,7 @@ extern void CkCallWhenIdle(int epIdx, void* obj); #if CMK_CHARM4PY -extern void CkHapiAddCallback(long stream, void (*cb)(void*, void*), void* cb_msg); +extern void CkHapiAddCallback(long stream, void (*cb)(void*, void*), int fid); #endif #ifdef __cplusplus diff --git a/src/ck-core/ck.C b/src/ck-core/ck.C index 84cb9347ef..e457272fdc 100644 --- a/src/ck-core/ck.C +++ b/src/ck-core/ck.C @@ -50,13 +50,13 @@ extern int _defaultObjectQ; #include "hapi.h" -void CkHapiAddCallback(long stream, void (*cb)(void*, void*), void* cb_msg) +void CkHapiAddCallback(long stream, void (*cb)(void*, void*), int fid) { cudaStream_t stream_ptr = (cudaStream_t)stream; - CkCallback callback(cb, cb_msg); + CkCallback callback(cb, (void *) fid); - hapiAddCallback(stream_ptr, callback, cb_msg); + hapiAddCallback(stream_ptr, callback, NULL); } void _initChareTables() From dcaed6406d2ee1dde557077a2487fdb1b6431caf Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Wed, 9 Oct 2024 09:16:35 -0500 Subject: [PATCH 6/7] rearrange pop to (temporarily) handle race condition? --- src/arch/cuda/hybridAPI/hapi_impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/arch/cuda/hybridAPI/hapi_impl.cpp b/src/arch/cuda/hybridAPI/hapi_impl.cpp index beeea38d24..cfa106e8e2 100644 --- a/src/arch/cuda/hybridAPI/hapi_impl.cpp +++ b/src/arch/cuda/hybridAPI/hapi_impl.cpp @@ -1367,16 +1367,16 @@ void hapiPollEvents(void* param) { std::queue& queue = CpvAccess(hapi_event_queue); while (!queue.empty()) { hapiEvent hev = queue.front(); + queue.pop(); // TODO: investigate possible race condition with charm4py futures - temporarily resolved by popping here if (cudaEventQuery(hev.event) == cudaSuccess) { // invoke Charm++ callback if one was given - hev.cb.send(hev.cb_msg); + hev.cb.send(hev.cb_msg); // clean up hapiWorkRequest if (hev.wr) { hapiWorkRequestCleanup(hev.wr); } cudaEventDestroy(hev.event); - queue.pop(); CpvAccess(n_hapi_events)--; // inform QD that an event was processed From a9e0674909d60955fade28f0c846ac6ca2d9ebee Mon Sep 17 00:00:00 2001 From: mtaylo12 Date: Wed, 30 Oct 2024 10:21:18 -0500 Subject: [PATCH 7/7] cleanup --- src/arch/cuda/hybridAPI/hapi_impl.cpp | 2 +- src/ck-core/charm.h | 6 +----- src/ck-core/ck.C | 25 ++++++++++++------------- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/src/arch/cuda/hybridAPI/hapi_impl.cpp b/src/arch/cuda/hybridAPI/hapi_impl.cpp index cfa106e8e2..309bd6e23b 100644 --- a/src/arch/cuda/hybridAPI/hapi_impl.cpp +++ b/src/arch/cuda/hybridAPI/hapi_impl.cpp @@ -1370,7 +1370,7 @@ void hapiPollEvents(void* param) { queue.pop(); // TODO: investigate possible race condition with charm4py futures - temporarily resolved by popping here if (cudaEventQuery(hev.event) == cudaSuccess) { // invoke Charm++ callback if one was given - hev.cb.send(hev.cb_msg); + hev.cb.send(hev.cb_msg); // clean up hapiWorkRequest if (hev.wr) { diff --git a/src/ck-core/charm.h b/src/ck-core/charm.h index befaa6282c..b645027626 100644 --- a/src/ck-core/charm.h +++ b/src/ck-core/charm.h @@ -8,10 +8,6 @@ #include "converse.h" #include /* for size_t */ -#if CMK_CHARM4PY && CMK_CUDA -#include -#endif - #ifdef __cplusplus #include "conv-rdma.h" #include "pup.h" @@ -515,7 +511,7 @@ extern void CkEnableTracing(int epIdx); extern void CkCallWhenIdle(int epIdx, void* obj); -#if CMK_CHARM4PY +#if CMK_CHARM4PY && CMK_CUDA extern void CkHapiAddCallback(long stream, void (*cb)(void*, void*), int fid); #endif diff --git a/src/ck-core/ck.C b/src/ck-core/ck.C index e457272fdc..8e6aa5e470 100644 --- a/src/ck-core/ck.C +++ b/src/ck-core/ck.C @@ -12,8 +12,6 @@ clients, including the rest of Charm++, are actually C++. #include "pathHistory.h" - - #if CMK_LBDB_ON #include "LBManager.h" #endif // CMK_LBDB_ON @@ -48,17 +46,6 @@ int CkIndex_ArrayBase::__idx=-1; extern int _defaultObjectQ; - -#include "hapi.h" -void CkHapiAddCallback(long stream, void (*cb)(void*, void*), int fid) -{ - - cudaStream_t stream_ptr = (cudaStream_t)stream; - CkCallback callback(cb, (void *) fid); - - hapiAddCallback(stream_ptr, callback, NULL); -} - void _initChareTables() { #ifndef CMK_CHARE_USE_PTR @@ -2650,6 +2637,18 @@ void CkArrayExtSend_multi(int aid, int *idx, int ndims, int epIdx, int num_bufs, } } + +#if CMK_CUDA +#include "hapi.h" +void CkHapiAddCallback(long stream, void (*cb)(void*, void*), int fid) +{ + cudaStream_t stream_ptr = (cudaStream_t)stream; + CkCallback callback(cb, (void *) fid); + + hapiAddCallback(stream_ptr, callback, NULL); +} +#endif // CMK_CUDA + #endif //------------------- Message Watcher (record/replay) ----------------