diff --git a/src/comm/HALO_EXCHANGE_FUSED-Cuda.cpp b/src/comm/HALO_EXCHANGE_FUSED-Cuda.cpp index 6dc0d371a..e82795553 100644 --- a/src/comm/HALO_EXCHANGE_FUSED-Cuda.cpp +++ b/src/comm/HALO_EXCHANGE_FUSED-Cuda.cpp @@ -22,33 +22,33 @@ namespace rajaperf namespace comm { -#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_CUDA \ +#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_CUDA(vid) \ Real_ptr* pack_buffer_ptrs; \ Int_ptr* pack_list_ptrs; \ Real_ptr* pack_var_ptrs; \ Index_type* pack_len_ptrs; \ - allocData(DataSpace::CudaPinned, pack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, pack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, pack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, pack_len_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_len_ptrs, num_neighbors * num_vars); \ Real_ptr* unpack_buffer_ptrs; \ Int_ptr* unpack_list_ptrs; \ Real_ptr* unpack_var_ptrs; \ Index_type* unpack_len_ptrs; \ - allocData(DataSpace::CudaPinned, unpack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, unpack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, unpack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, unpack_len_ptrs, num_neighbors * num_vars); - -#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_CUDA \ - deallocData(DataSpace::CudaPinned, pack_buffer_ptrs); \ - deallocData(DataSpace::CudaPinned, pack_list_ptrs); \ - deallocData(DataSpace::CudaPinned, pack_var_ptrs); \ - deallocData(DataSpace::CudaPinned, pack_len_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_buffer_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_list_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_var_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_len_ptrs); + allocData(getFuserDataSpace(vid), unpack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_len_ptrs, num_neighbors * num_vars); + +#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_CUDA(vid) \ + deallocData(getFuserDataSpace(vid), pack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_len_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_len_ptrs); template < size_t block_size > __launch_bounds__(block_size) @@ -100,7 +100,7 @@ void HALO_EXCHANGE_FUSED::runCudaVariantDirect(VariantID vid) if ( vid == Base_CUDA ) { - HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_CUDA; + HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_CUDA(Base_CUDA); startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -199,7 +199,7 @@ void HALO_EXCHANGE_FUSED::runCudaVariantDirect(VariantID vid) } stopTimer(); - HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_CUDA; + HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_CUDA(Base_CUDA); } else { getCout() << "\n HALO_EXCHANGE_FUSED : Unknown Cuda variant id = " << vid << std::endl; diff --git a/src/comm/HALO_EXCHANGE_FUSED-Hip.cpp b/src/comm/HALO_EXCHANGE_FUSED-Hip.cpp index 614c0deaf..e297eede2 100644 --- a/src/comm/HALO_EXCHANGE_FUSED-Hip.cpp +++ b/src/comm/HALO_EXCHANGE_FUSED-Hip.cpp @@ -22,33 +22,33 @@ namespace rajaperf namespace comm { -#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_HIP \ +#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_HIP(vid) \ Real_ptr* pack_buffer_ptrs; \ Int_ptr* pack_list_ptrs; \ Real_ptr* pack_var_ptrs; \ Index_type* pack_len_ptrs; \ - allocData(DataSpace::HipPinnedCoarse, pack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, pack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, pack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, pack_len_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_len_ptrs, num_neighbors * num_vars); \ Real_ptr* unpack_buffer_ptrs; \ Int_ptr* unpack_list_ptrs; \ Real_ptr* unpack_var_ptrs; \ Index_type* unpack_len_ptrs; \ - allocData(DataSpace::HipPinnedCoarse, unpack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, unpack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, unpack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, unpack_len_ptrs, num_neighbors * num_vars); - -#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_HIP \ - deallocData(DataSpace::HipPinnedCoarse, pack_buffer_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, pack_list_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, pack_var_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, pack_len_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_buffer_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_list_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_var_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_len_ptrs); + allocData(getFuserDataSpace(vid), unpack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_len_ptrs, num_neighbors * num_vars); + +#define HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_HIP(vid) \ + deallocData(getFuserDataSpace(vid), pack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_len_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_len_ptrs); template < size_t block_size > __launch_bounds__(block_size) @@ -100,7 +100,7 @@ void HALO_EXCHANGE_FUSED::runHipVariantDirect(VariantID vid) if ( vid == Base_HIP ) { - HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_HIP; + HALO_EXCHANGE_FUSED_MANUAL_FUSER_SETUP_HIP(Base_HIP); startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -199,7 +199,7 @@ void HALO_EXCHANGE_FUSED::runHipVariantDirect(VariantID vid) } stopTimer(); - HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_HIP; + HALO_EXCHANGE_FUSED_MANUAL_FUSER_TEARDOWN_HIP(Base_HIP); } else { getCout() << "\n HALO_EXCHANGE_FUSED : Unknown Hip variant id = " << vid << std::endl; diff --git a/src/comm/HALO_PACKING_FUSED-Cuda.cpp b/src/comm/HALO_PACKING_FUSED-Cuda.cpp index 54cf2e782..de4cb8252 100644 --- a/src/comm/HALO_PACKING_FUSED-Cuda.cpp +++ b/src/comm/HALO_PACKING_FUSED-Cuda.cpp @@ -22,33 +22,33 @@ namespace rajaperf namespace comm { -#define HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_CUDA \ +#define HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_CUDA(vid) \ Real_ptr* pack_buffer_ptrs; \ Int_ptr* pack_list_ptrs; \ Real_ptr* pack_var_ptrs; \ Index_type* pack_len_ptrs; \ - allocData(DataSpace::CudaPinned, pack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, pack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, pack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, pack_len_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_len_ptrs, num_neighbors * num_vars); \ Real_ptr* unpack_buffer_ptrs; \ Int_ptr* unpack_list_ptrs; \ Real_ptr* unpack_var_ptrs; \ Index_type* unpack_len_ptrs; \ - allocData(DataSpace::CudaPinned, unpack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, unpack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, unpack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::CudaPinned, unpack_len_ptrs, num_neighbors * num_vars); - -#define HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_CUDA \ - deallocData(DataSpace::CudaPinned, pack_buffer_ptrs); \ - deallocData(DataSpace::CudaPinned, pack_list_ptrs); \ - deallocData(DataSpace::CudaPinned, pack_var_ptrs); \ - deallocData(DataSpace::CudaPinned, pack_len_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_buffer_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_list_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_var_ptrs); \ - deallocData(DataSpace::CudaPinned, unpack_len_ptrs); + allocData(getFuserDataSpace(vid), unpack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_len_ptrs, num_neighbors * num_vars); + +#define HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_CUDA(vid) \ + deallocData(getFuserDataSpace(vid), pack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_len_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_len_ptrs); template < size_t block_size > __launch_bounds__(block_size) @@ -104,7 +104,7 @@ void HALO_PACKING_FUSED::runCudaVariantDirect(VariantID vid) if ( vid == Base_CUDA ) { - HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_CUDA; + HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_CUDA(Base_CUDA); startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -190,7 +190,7 @@ void HALO_PACKING_FUSED::runCudaVariantDirect(VariantID vid) } stopTimer(); - HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_CUDA; + HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_CUDA(Base_CUDA); } else { getCout() << "\n HALO_PACKING_FUSED : Unknown Cuda variant id = " << vid << std::endl; diff --git a/src/comm/HALO_PACKING_FUSED-Hip.cpp b/src/comm/HALO_PACKING_FUSED-Hip.cpp index ac71ad40a..43c131144 100644 --- a/src/comm/HALO_PACKING_FUSED-Hip.cpp +++ b/src/comm/HALO_PACKING_FUSED-Hip.cpp @@ -22,33 +22,33 @@ namespace rajaperf namespace comm { -#define HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_HIP \ +#define HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_HIP(vid) \ Real_ptr* pack_buffer_ptrs; \ Int_ptr* pack_list_ptrs; \ Real_ptr* pack_var_ptrs; \ Index_type* pack_len_ptrs; \ - allocData(DataSpace::HipPinnedCoarse, pack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, pack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, pack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, pack_len_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), pack_len_ptrs, num_neighbors * num_vars); \ Real_ptr* unpack_buffer_ptrs; \ Int_ptr* unpack_list_ptrs; \ Real_ptr* unpack_var_ptrs; \ Index_type* unpack_len_ptrs; \ - allocData(DataSpace::HipPinnedCoarse, unpack_buffer_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, unpack_list_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, unpack_var_ptrs, num_neighbors * num_vars); \ - allocData(DataSpace::HipPinnedCoarse, unpack_len_ptrs, num_neighbors * num_vars); - -#define HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_HIP \ - deallocData(DataSpace::HipPinnedCoarse, pack_buffer_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, pack_list_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, pack_var_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, pack_len_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_buffer_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_list_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_var_ptrs); \ - deallocData(DataSpace::HipPinnedCoarse, unpack_len_ptrs); + allocData(getFuserDataSpace(vid), unpack_buffer_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_list_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_var_ptrs, num_neighbors * num_vars); \ + allocData(getFuserDataSpace(vid), unpack_len_ptrs, num_neighbors * num_vars); + +#define HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_HIP(vid) \ + deallocData(getFuserDataSpace(vid), pack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), pack_len_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_buffer_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_list_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_var_ptrs); \ + deallocData(getFuserDataSpace(vid), unpack_len_ptrs); template < size_t block_size > __launch_bounds__(block_size) @@ -104,7 +104,7 @@ void HALO_PACKING_FUSED::runHipVariantDirect(VariantID vid) if ( vid == Base_HIP ) { - HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_HIP; + HALO_PACKING_FUSED_MANUAL_FUSER_SETUP_HIP(Base_HIP); startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { @@ -190,7 +190,7 @@ void HALO_PACKING_FUSED::runHipVariantDirect(VariantID vid) } stopTimer(); - HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_HIP; + HALO_PACKING_FUSED_MANUAL_FUSER_TEARDOWN_HIP(Base_HIP); } else { getCout() << "\n HALO_PACKING_FUSED : Unknown Hip variant id = " << vid << std::endl;