Skip to content

Commit

Permalink
Merge branch 'develop' into feature/array_view_variant
Browse files Browse the repository at this point in the history
  • Loading branch information
odlomax authored Sep 13, 2024
2 parents 44d7f2b + b6a4e60 commit 97eadff
Show file tree
Hide file tree
Showing 11 changed files with 99 additions and 53 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ jobs:

- name: macos
# Xcode compiler requires empty environment variables, so we pass null (~) here
os: macos-13
compiler: clang-14
os: macos-14
compiler: clang-15
compiler_cc: ~
compiler_cxx: ~
compiler_fc: gfortran-13
Expand Down
2 changes: 1 addition & 1 deletion src/atlas/array/SVector.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class SVector {
for (idx_t c = 0; c < size; ++c) {
ptr[c].~T();
}
util::delete_managedmem(ptr);
util::delete_managedmem(ptr, size);
}
}

Expand Down
23 changes: 23 additions & 0 deletions src/atlas/array/native/NativeDataStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#if ATLAS_HAVE_ACC
#include "atlas_acc_support/atlas_acc_map_data.h"
#define ATLAS_ACC_DEBUG 0
#endif

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -213,7 +214,13 @@ class DataStore : public ArrayDataStore {
#if ATLAS_HAVE_ACC
if (not acc_mapped_) {
ATLAS_ASSERT(deviceAllocated(),"Could not accMap as device data is not allocated");
ATLAS_ASSERT(!atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
if constexpr(ATLAS_ACC_DEBUG) {
std::cout << " + acc_map_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<footprint()<<")" <<std::endl;
}
atlas_acc_map_data((void*)host_data_, (void*)device_data_, size_ * sizeof(Value));
ATLAS_ASSERT(atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
ATLAS_ASSERT(atlas_acc_deviceptr(host_data_) == device_data_);
acc_mapped_ = true;
}
#endif
Expand All @@ -226,7 +233,12 @@ class DataStore : public ArrayDataStore {
void accUnmap() const override {
#if ATLAS_HAVE_ACC
if (acc_mapped_) {
if constexpr(ATLAS_ACC_DEBUG) {
std::cout << " - acc_unmap_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<footprint()<<")" <<std::endl;
}
ATLAS_ASSERT(atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
atlas_acc_unmap_data(host_data_);
ATLAS_ASSERT(!atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
acc_mapped_ = false;
}
#endif
Expand Down Expand Up @@ -398,7 +410,13 @@ class WrappedDataStore : public ArrayDataStore {
#if ATLAS_HAVE_ACC
if (not acc_mapped_) {
ATLAS_ASSERT(deviceAllocated(),"Could not accMap as device data is not allocated");
ATLAS_ASSERT(!atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
if constexpr(ATLAS_ACC_DEBUG) {
std::cout << " + acc_map_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<size_ * sizeof(Value)<<")" <<std::endl;
}
atlas_acc_map_data((void*)host_data_, (void*)device_data_, size_ * sizeof(Value));
ATLAS_ASSERT(atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
ATLAS_ASSERT(atlas_acc_deviceptr(host_data_) == device_data_);
acc_mapped_ = true;
}
#endif
Expand All @@ -411,7 +429,12 @@ class WrappedDataStore : public ArrayDataStore {
void accUnmap() const override {
#if ATLAS_HAVE_ACC
if (acc_mapped_) {
ATLAS_ASSERT(atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
if constexpr(ATLAS_ACC_DEBUG) {
std::cout << " - acc_unmap_data(hostptr:"<<host_data_<<", device:"<<device_data_<<", bytes:"<<size_ * sizeof(Value)<<")" <<std::endl;
}
atlas_acc_unmap_data(host_data_);
ATLAS_ASSERT(!atlas_acc_is_present(host_data_, size_ * sizeof(Value)));
acc_mapped_ = false;
}
#endif
Expand Down
3 changes: 2 additions & 1 deletion src/atlas/functionspace/Spectral.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ class Spectral : public functionspace::FunctionSpaceImpl {
n = total wavenumber
const auto zonal_wavenumbers = Spectral::zonal_wavenumbers();
const int truncation = Spectral::truncation();
idx_t jc=0;
for( int jm=0; jm<zonal_wavenumbers.size(); ++jm ) {
int m = zonal_wavenumbers(m);
int m = zonal_wavenumbers(jm);
for( int n=m; m<=truncation; ++n ) {
data( jc++, jfld ) = func_real_part(m,n);
data( jc++, jfld ) = func_imag_part(m,n);
Expand Down
16 changes: 8 additions & 8 deletions src/atlas/parallel/HaloExchange.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class HaloExchange : public util::Object {
DATA_TYPE* allocate_buffer(const int buffer_size, const bool on_device) const;

template <typename DATA_TYPE>
void deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const;
void deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const;

template <int ParallelDim, typename DATA_TYPE, int RANK>
void pack_send_buffer(const array::ArrayView<DATA_TYPE, RANK>& hfield,
Expand Down Expand Up @@ -198,8 +198,8 @@ void HaloExchange::execute(array::Array& field, bool on_device) const {

wait_for_send(inner_counts_init, inner_req);

deallocate_buffer<DATA_TYPE>(inner_buffer, on_device);
deallocate_buffer<DATA_TYPE>(halo_buffer, on_device);
deallocate_buffer<DATA_TYPE>(inner_buffer, inner_size, on_device);
deallocate_buffer<DATA_TYPE>(halo_buffer, halo_size, on_device);
}

template <typename DATA_TYPE, int RANK, typename ParallelDim>
Expand Down Expand Up @@ -249,8 +249,8 @@ void HaloExchange::execute_adjoint(array::Array& field, bool on_device) const {

zero_halos<parallelDim>(field_hv, field_dv, halo_buffer, halo_size, on_device);

deallocate_buffer<DATA_TYPE>(halo_buffer, on_device);
deallocate_buffer<DATA_TYPE>(inner_buffer, on_device);
deallocate_buffer<DATA_TYPE>(halo_buffer, halo_size, on_device);
deallocate_buffer<DATA_TYPE>(inner_buffer, inner_size, on_device);
}

template <typename DATA_TYPE>
Expand All @@ -269,12 +269,12 @@ DATA_TYPE* HaloExchange::allocate_buffer(const int buffer_size, const bool on_de


template <typename DATA_TYPE>
void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const bool on_device) const {
void HaloExchange::deallocate_buffer(DATA_TYPE* buffer, const int buffer_size, const bool on_device) const {
if (on_device) {
util::delete_devicemem(buffer);
util::delete_devicemem(buffer, buffer_size);
}
else {
util::delete_hostmem(buffer);
util::delete_hostmem(buffer, buffer_size);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/atlas/runtime/trace/Timings.cc
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,8 @@ void TimingsRegistry::report(std::ostream& out, const eckit::Configuration& conf
}
}

size_t max_title_length(0);
size_t max_location_length(0);
size_t max_title_length(2);
size_t max_location_length(9);
size_t max_nest(0);
long max_count(0);
double max_seconds(0);
Expand Down
39 changes: 24 additions & 15 deletions src/atlas/util/Allocate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,41 +25,41 @@ namespace util {
namespace detail {
//------------------------------------------------------------------------------

void allocate_managed(void** ptr, size_t size) {
void allocate_managed(void** ptr, size_t bytes) {
if constexpr (not ATLAS_HAVE_GPU) {
return allocate_host(ptr, size);
return allocate_host(ptr, bytes);
}
HIC_CALL(hicMallocManaged(ptr, size));
HIC_CALL(hicMallocManaged(ptr, bytes));
}

void deallocate_managed(void* ptr) {
void deallocate_managed(void* ptr, size_t bytes) {
if constexpr (not ATLAS_HAVE_GPU) {
return deallocate_host(ptr);
return deallocate_host(ptr, bytes);
}
HIC_CALL(hicDeviceSynchronize());
HIC_CALL(hicFree(ptr));
}

void allocate_device(void** ptr, size_t size) {
void allocate_device(void** ptr, size_t bytes) {
if constexpr (not ATLAS_HAVE_GPU) {
return allocate_host(ptr, size);
return allocate_host(ptr, bytes);
}
HIC_CALL(hicMalloc(ptr, size));
HIC_CALL(hicMalloc(ptr, bytes));
}

void deallocate_device(void* ptr) {
void deallocate_device(void* ptr, size_t bytes) {
if constexpr (not ATLAS_HAVE_GPU) {
return deallocate_host(ptr);
return deallocate_host(ptr, bytes);
}
HIC_CALL(hicDeviceSynchronize());
HIC_CALL(hicFree(ptr));
}

void allocate_host(void** ptr, size_t size) {
*ptr = malloc(size);
void allocate_host(void** ptr, size_t bytes) {
*ptr = malloc(bytes);
}

void deallocate_host(void* ptr) {
void deallocate_host(void* ptr, size_t /*bytes*/) {
free(ptr);
}

Expand All @@ -80,8 +80,17 @@ void atlas__allocate_managedmem_int(int*& a, size_t N) {
void atlas__allocate_managedmem_long(long*& a, size_t N) {
allocate_managedmem(a, N);
}
void atlas__deallocate_managedmem(void*& a) {
delete_managedmem(a);
void atlas__deallocate_managedmem_double(double*& a, size_t N) {
delete_managedmem(a, N);
}
void atlas__deallocate_managedmem_float(float*& a, size_t N) {
delete_managedmem(a, N);
}
void atlas__deallocate_managedmem_int(int*& a, size_t N) {
delete_managedmem(a, N);
}
void atlas__deallocate_managedmem_long(long*& a, size_t N) {
delete_managedmem(a, N);
}
}

Expand Down
29 changes: 16 additions & 13 deletions src/atlas/util/Allocate.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ namespace util {
//------------------------------------------------------------------------------

namespace detail {
void allocate_managed(void** ptr, size_t size);
void deallocate_managed(void* ptr);
void allocate_managed(void** ptr, size_t bytes);
void deallocate_managed(void* ptr, size_t bytes);

void allocate_device(void** ptr, size_t size);
void deallocate_device(void* ptr);
void allocate_device(void** ptr, size_t bytes);
void deallocate_device(void* ptr, size_t bytes);

void allocate_host(void** ptr, size_t size);
void deallocate_host(void* ptr);
void allocate_host(void** ptr, size_t bytes);
void deallocate_host(void* ptr, size_t bytes);

} // namespace detail

Expand All @@ -37,9 +37,9 @@ void allocate_managedmem(T*& data, size_t N) {
}

template <typename T>
void delete_managedmem(T*& data) {
void delete_managedmem(T*& data, size_t N) {
if (data) {
detail::deallocate_managed(data);
detail::deallocate_managed(data, N * sizeof(T));
data = nullptr;
}
}
Expand All @@ -52,9 +52,9 @@ void allocate_devicemem(T*& data, size_t N) {
}

template <typename T>
void delete_devicemem(T*& data) {
void delete_devicemem(T*& data, size_t N) {
if (data) {
detail::deallocate_device(data);
detail::deallocate_device(data, N * sizeof(T));
data = nullptr;
}
}
Expand All @@ -67,9 +67,9 @@ void allocate_hostmem(T*& data, size_t N) {
}

template <typename T>
void delete_hostmem(T*& data) {
void delete_hostmem(T*& data, size_t N) {
if (data) {
detail::deallocate_host(data);
detail::deallocate_host(data, N * sizeof(T));
data = nullptr;
}
}
Expand All @@ -82,7 +82,10 @@ void atlas__allocate_managedmem_double(double*& a, size_t N);
void atlas__allocate_managedmem_float(float*& a, size_t N);
void atlas__allocate_managedmem_int(int*& a, size_t N);
void atlas__allocate_managedmem_long(long*& a, size_t N);
void atlas__deallocate_managedmem(void*& a);
void atlas__deallocate_managedmem_double(double*& a, size_t N);
void atlas__deallocate_managedmem_float(float*& a, size_t N);
void atlas__deallocate_managedmem_int(int*& a, size_t N);
void atlas__deallocate_managedmem_long(long*& a, size_t N);
}

//------------------------------------------------------------------------------
Expand Down
12 changes: 10 additions & 2 deletions src/atlas_acc_support/atlas_acc_map_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,19 @@

#include <openacc.h>

void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long size) {
acc_map_data(cpu_ptr, gpu_ptr, size);
void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long bytes) {
acc_map_data(cpu_ptr, gpu_ptr, bytes);
}


void atlas_acc_unmap_data(void* cpu_ptr) {
acc_unmap_data(cpu_ptr);
}

int atlas_acc_is_present(void* cpu_ptr, unsigned long bytes) {
return acc_is_present(cpu_ptr, bytes);
}

void* atlas_acc_deviceptr(void* cpu_ptr) {
return acc_deviceptr(cpu_ptr);
}
4 changes: 3 additions & 1 deletion src/atlas_acc_support/atlas_acc_map_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
extern "C" {
#endif

void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long size);
void atlas_acc_map_data(void* cpu_ptr, void* gpu_ptr, unsigned long bytes);
void atlas_acc_unmap_data(void* cpu_ptr);
int atlas_acc_is_present(void* cpu_ptr, unsigned long bytes);
void* atlas_acc_deviceptr(void* cpu_ptr);

#ifdef __cplusplus
}
Expand Down
Loading

0 comments on commit 97eadff

Please sign in to comment.