From 89cb815a14cf68060b00544efa029804e64860cf Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Mon, 26 Aug 2024 11:34:58 +0000 Subject: [PATCH] Add size argument to deallocate_managedmem --- src/atlas/array/SVector.h | 2 +- src/atlas/parallel/HaloExchange.h | 16 ++++----- src/atlas/util/Allocate.cc | 39 +++++++++++++--------- src/atlas/util/Allocate.h | 29 ++++++++-------- src/atlas_f/util/atlas_allocate_module.F90 | 16 ++++----- 5 files changed, 57 insertions(+), 45 deletions(-) diff --git a/src/atlas/array/SVector.h b/src/atlas/array/SVector.h index 83234cdd2..1d6861a96 100644 --- a/src/atlas/array/SVector.h +++ b/src/atlas/array/SVector.h @@ -161,7 +161,7 @@ class SVector { for (idx_t c = 0; c < size; ++c) { ptr[c].~T(); } - util::delete_managedmem(ptr); + util::delete_managedmem(ptr, size); } } diff --git a/src/atlas/parallel/HaloExchange.h b/src/atlas/parallel/HaloExchange.h index ea03dc53b..2c4931007 100644 --- a/src/atlas/parallel/HaloExchange.h +++ b/src/atlas/parallel/HaloExchange.h @@ -90,7 +90,7 @@ class HaloExchange : public util::Object { DATA_TYPE* allocate_buffer(const int buffer_size, const bool on_device) const; template - void deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const; + void deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const; template void pack_send_buffer(const array::ArrayView& hfield, @@ -198,8 +198,8 @@ void HaloExchange::execute(array::Array& field, bool on_device) const { wait_for_send(inner_counts_init, inner_req); - deallocate_buffer(inner_buffer, on_device); - deallocate_buffer(halo_buffer, on_device); + deallocate_buffer(inner_buffer, inner_size, on_device); + deallocate_buffer(halo_buffer, halo_size, on_device); } template @@ -249,8 +249,8 @@ void HaloExchange::execute_adjoint(array::Array& field, bool on_device) const { zero_halos(field_hv, field_dv, halo_buffer, halo_size, on_device); - deallocate_buffer(halo_buffer, on_device); - deallocate_buffer(inner_buffer, on_device); + deallocate_buffer(halo_buffer, halo_size, on_device); + deallocate_buffer(inner_buffer, inner_size, on_device); } template @@ -269,12 +269,12 @@ DATA_TYPE* HaloExchange::allocate_buffer(const int buffer_size, const bool on_de template -void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const { +void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const { if (on_device) { - util::delete_devicemem(buffer); + util::delete_devicemem(buffer, buffer_size); } else { - util::delete_hostmem(buffer); + util::delete_hostmem(buffer, buffer_size); } } diff --git a/src/atlas/util/Allocate.cc b/src/atlas/util/Allocate.cc index fcdce0781..9a22f8161 100644 --- a/src/atlas/util/Allocate.cc +++ b/src/atlas/util/Allocate.cc @@ -25,41 +25,41 @@ namespace util { namespace detail { //------------------------------------------------------------------------------ -void allocate_managed(void** ptr, size_t size) { +void allocate_managed(void** ptr, size_t bytes) { if constexpr (not ATLAS_HAVE_GPU) { - return allocate_host(ptr, size); + return allocate_host(ptr, bytes); } - HIC_CALL(hicMallocManaged(ptr, size)); + HIC_CALL(hicMallocManaged(ptr, bytes)); } -void deallocate_managed(void* ptr) { +void deallocate_managed(void* ptr, size_t bytes) { if constexpr (not ATLAS_HAVE_GPU) { - return deallocate_host(ptr); + return deallocate_host(ptr, bytes); } HIC_CALL(hicDeviceSynchronize()); HIC_CALL(hicFree(ptr)); } -void allocate_device(void** ptr, size_t size) { +void allocate_device(void** ptr, size_t bytes) { if constexpr (not ATLAS_HAVE_GPU) { - return allocate_host(ptr, size); + return allocate_host(ptr, bytes); } - HIC_CALL(hicMalloc(ptr, size)); + HIC_CALL(hicMalloc(ptr, bytes)); } -void deallocate_device(void* ptr) { +void deallocate_device(void* ptr, size_t bytes) { if constexpr (not ATLAS_HAVE_GPU) { - return deallocate_host(ptr); + return deallocate_host(ptr, bytes); } HIC_CALL(hicDeviceSynchronize()); HIC_CALL(hicFree(ptr)); } -void allocate_host(void** ptr, size_t size) { - *ptr = malloc(size); +void allocate_host(void** ptr, size_t bytes) { + *ptr = malloc(bytes); } -void deallocate_host(void* ptr) { +void deallocate_host(void* ptr, size_t /*bytes*/) { free(ptr); } @@ -80,8 +80,17 @@ void atlas__allocate_managedmem_int(int*& a, size_t N) { void atlas__allocate_managedmem_long(long*& a, size_t N) { allocate_managedmem(a, N); } -void atlas__deallocate_managedmem(void*& a) { - delete_managedmem(a); +void atlas__deallocate_managedmem_double(double*& a, size_t N) { + delete_managedmem(a, N); +} +void atlas__deallocate_managedmem_float(float*& a, size_t N) { + delete_managedmem(a, N); +} +void atlas__deallocate_managedmem_int(int*& a, size_t N) { + delete_managedmem(a, N); +} +void atlas__deallocate_managedmem_long(long*& a, size_t N) { + delete_managedmem(a, N); } } diff --git a/src/atlas/util/Allocate.h b/src/atlas/util/Allocate.h index 7d24b2432..b8fac7179 100644 --- a/src/atlas/util/Allocate.h +++ b/src/atlas/util/Allocate.h @@ -18,14 +18,14 @@ namespace util { //------------------------------------------------------------------------------ namespace detail { -void allocate_managed(void** ptr, size_t size); -void deallocate_managed(void* ptr); +void allocate_managed(void** ptr, size_t bytes); +void deallocate_managed(void* ptr, size_t bytes); -void allocate_device(void** ptr, size_t size); -void deallocate_device(void* ptr); +void allocate_device(void** ptr, size_t bytes); +void deallocate_device(void* ptr, size_t bytes); -void allocate_host(void** ptr, size_t size); -void deallocate_host(void* ptr); +void allocate_host(void** ptr, size_t bytes); +void deallocate_host(void* ptr, size_t bytes); } // namespace detail @@ -37,9 +37,9 @@ void allocate_managedmem(T*& data, size_t N) { } template -void delete_managedmem(T*& data) { +void delete_managedmem(T*& data, size_t N) { if (data) { - detail::deallocate_managed(data); + detail::deallocate_managed(data, N * sizeof(T)); data = nullptr; } } @@ -52,9 +52,9 @@ void allocate_devicemem(T*& data, size_t N) { } template -void delete_devicemem(T*& data) { +void delete_devicemem(T*& data, size_t N) { if (data) { - detail::deallocate_device(data); + detail::deallocate_device(data, N * sizeof(T)); data = nullptr; } } @@ -67,9 +67,9 @@ void allocate_hostmem(T*& data, size_t N) { } template -void delete_hostmem(T*& data) { +void delete_hostmem(T*& data, size_t N) { if (data) { - detail::deallocate_host(data); + detail::deallocate_host(data, N * sizeof(T)); data = nullptr; } } @@ -82,7 +82,10 @@ void atlas__allocate_managedmem_double(double*& a, size_t N); void atlas__allocate_managedmem_float(float*& a, size_t N); void atlas__allocate_managedmem_int(int*& a, size_t N); void atlas__allocate_managedmem_long(long*& a, size_t N); -void atlas__deallocate_managedmem(void*& a); +void atlas__deallocate_managedmem_double(double*& a, size_t N); +void atlas__deallocate_managedmem_float(float*& a, size_t N); +void atlas__deallocate_managedmem_int(int*& a, size_t N); +void atlas__deallocate_managedmem_long(long*& a, size_t N); } //------------------------------------------------------------------------------ diff --git a/src/atlas_f/util/atlas_allocate_module.F90 b/src/atlas_f/util/atlas_allocate_module.F90 index 3cc1d3a7a..1bda17e81 100644 --- a/src/atlas_f/util/atlas_allocate_module.F90 +++ b/src/atlas_f/util/atlas_allocate_module.F90 @@ -231,7 +231,7 @@ subroutine atlas_deallocate_managedmem_real64_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_double), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_real64(A(1)) ) + call atlas__deallocate_managedmem_double( c_loc_real64(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -239,7 +239,7 @@ subroutine atlas_deallocate_managedmem_real32_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_float), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_real32(A(1)) ) + call atlas__deallocate_managedmem_float( c_loc_real32(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -247,7 +247,7 @@ subroutine atlas_deallocate_managedmem_int32_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_int), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_int32(A(1)) ) + call atlas__deallocate_managedmem_int( c_loc_int32(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -255,7 +255,7 @@ subroutine atlas_deallocate_managedmem_int64_r1( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_long), pointer :: a(:) - call atlas__deallocate_managedmem( c_loc_int64(A(1)) ) + call atlas__deallocate_managedmem_long( c_loc_int64(A(1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -263,7 +263,7 @@ subroutine atlas_deallocate_managedmem_real64_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_double), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_real64(A(1,1)) ) + call atlas__deallocate_managedmem_double( c_loc_real64(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -271,7 +271,7 @@ subroutine atlas_deallocate_managedmem_real32_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding real(c_float), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_real32(A(1,1)) ) + call atlas__deallocate_managedmem_float( c_loc_real32(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -279,7 +279,7 @@ subroutine atlas_deallocate_managedmem_int32_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_int), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_int32(A(1,1)) ) + call atlas__deallocate_managedmem_int( c_loc_int32(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine @@ -287,7 +287,7 @@ subroutine atlas_deallocate_managedmem_int64_r2( A ) use, intrinsic :: iso_c_binding use atlas_allocate_c_binding integer(c_long), pointer :: a(:,:) - call atlas__deallocate_managedmem( c_loc_int64(A(1,1)) ) + call atlas__deallocate_managedmem_long( c_loc_int64(A(1,1)), size(A,KIND=c_size_t) ) nullify( a ) end subroutine