diff --git a/.circleci/config.yml b/.circleci/config.yml index 774844647..61a214465 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ jobs: steps: - checkout - run: bash dash/scripts/dash-ci.sh Release - - run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' + #- run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' - run: command: bash dash/scripts/circleci/collect-artifacts.sh when: always @@ -36,7 +36,7 @@ jobs: steps: - checkout - run: bash dash/scripts/dash-ci.sh Release - - run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' + #- run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' - run: command: bash dash/scripts/circleci/collect-artifacts.sh when: always @@ -59,7 +59,7 @@ jobs: steps: - checkout - run: bash dash/scripts/dash-ci.sh Release - - run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' + #- run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' - run: command: bash dash/scripts/circleci/collect-artifacts.sh when: always @@ -81,7 +81,7 @@ jobs: steps: - checkout - run: bash dash/scripts/dash-ci.sh Release - - run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' + #- run: bash -c 'export DART_FORCE_C_STD=99; dash/scripts/dash-ci.sh Minimal' - run: command: bash dash/scripts/circleci/collect-artifacts.sh when: always diff --git a/dart-impl/mpi/src/dart_globmem.c b/dart-impl/mpi/src/dart_globmem.c index 1b83cda57..0609009eb 100644 --- a/dart-impl/mpi/src/dart_globmem.c +++ b/dart-impl/mpi/src/dart_globmem.c @@ -24,6 +24,8 @@ #include #include +#include +#include /* For PRIu64, uint64_t in printf */ #define __STDC_FORMAT_MACROS @@ -225,6 +227,13 @@ dart_team_memalloc_aligned_dynamic( dtype_size, nelem, nbytes); + /** + * Work around a bug in Open MPI where registrations are expected to be page + * aligned: https://github.com/open-mpi/ompi/issues/7384 + */ + size_t page_size = sysconf(_SC_PAGE_SIZE); + nbytes = (nbytes + ((page_size)-1)) & ~((page_size)-1); + dart_team_data_t *team_data = dart_adapt_teamlist_get(teamid); if (team_data == NULL) { DART_LOG_ERROR( @@ -240,7 +249,7 @@ dart_team_memalloc_aligned_dynamic( #if !defined(DART_MPI_DISABLE_SHARED_WINDOWS) char ** baseptr_set = NULL; - /* Allocate shared memory on sharedmem_comm, and create the related + /* Allocate shared memory on sharedmem_comm, and create the related * sharedmem_win */ /* NOTE: * Windows should definitely be optimized for the concrete value type i.e. @@ -331,15 +340,23 @@ dart_team_memalloc_aligned_dynamic( } else { baseptr_set[i] = sub_mem; } - } -#else - if (MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &sub_mem) != MPI_SUCCESS) { + } +#else // DART_MPI_DISABLE_SHARED_WINDOWS +#ifdef DYNAMIC_MEM_USE_POSIX_MEMALIGN + if (0 != posix_memalign((void**)&sub_mem, page_size, nbytes)) { + DART_LOG_ERROR( + "dart_team_memalloc_aligned_dynamic: bytes:%lu posix_memalign failed", + nbytes); + } +#else // DYNAMIC_MEM_USE_POSIX_MEMALIGN + if (MPI_Alloc_mem(nbytes, MPI_INFO_NULL, &sub_mem) != MPI_SUCCESS) { DART_LOG_ERROR( "dart_team_memalloc_aligned_dynamic: bytes:%lu MPI_Alloc_mem failed", nbytes); return DART_ERR_OTHER; } -#endif +#endif // DYNAMIC_MEM_USE_POSIX_MEMALIGN +#endif // DART_MPI_DISABLE_SHARED_WINDOWS MPI_Aint disp; MPI_Win win = team_data->window; diff --git a/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp b/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp index 2cfa618fc..ae86f5d9a 100644 --- a/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp +++ b/dash/examples/ex.02.matrix.halo.heat_equation/main.cpp @@ -123,26 +123,9 @@ int main(int argc, char *argv[]) // Update Halos asynchroniously current_halo->update_async(); - - // optimized calculation of inner matrix elements - auto* current_begin = current_matrix.lbegin(); + auto* new_begin = new_matrix.lbegin(); -#if 0 - for (auto i = inner_start; i < inner_end; i += offset) { - auto* center = current_begin + i; - auto* center_y_plus = center + offset; - auto* center_y_minus = center - offset; - for (auto j = 0; j < offset - 2; - ++j, ++center, ++center_y_plus, ++center_y_minus) { - /*auto dtheta = - (*(center - 1) + *(center + 1) - 2 * (*center)) / (dx * dx) + - (*(center_y_minus) + *(center_y_plus) - 2 * (*center)) / (dy * dy); - *(new_begin + i + j) = *center + k * dtheta * dt;*/ - *(new_begin + i + j) = calc(center, center_y_minus, center_y_plus, center - 1, center + 1); - } - } -#endif - // slow version + auto it_end = current_op->inner.end(); for(auto it = current_op->inner.begin(); it != it_end; ++it) { diff --git a/dash/include/dash/Dimensional.h b/dash/include/dash/Dimensional.h index 823abc78b..0ce7841ea 100644 --- a/dash/include/dash/Dimensional.h +++ b/dash/include/dash/Dimensional.h @@ -441,6 +441,9 @@ class ViewSpec typedef ViewRegion region_type; typedef ViewRange range_type; + using index_type = IndexType; + using size_type = SizeType; + public: template friend std::ostream& operator<<( @@ -520,6 +523,8 @@ class ViewSpec */ self_t & operator=(self_t && other) = default; + static constexpr auto ndim() { return NumDimensions; } + /** * Equality comparison operator. */ diff --git a/dash/include/dash/Matrix.h b/dash/include/dash/Matrix.h index 1d4c49f94..90509712c 100644 --- a/dash/include/dash/Matrix.h +++ b/dash/include/dash/Matrix.h @@ -116,11 +116,6 @@ template < typename LocalMemSpaceT> class LocalMatrixRef; -namespace halo { - template - class HaloMatrixWrapper; -} - /** * An n-dimensional array supporting subranges and sub-dimensional * projection. @@ -167,8 +162,6 @@ class Matrix typedef PatternT Pattern_t; - template - friend class halo::HaloMatrixWrapper; public: typedef GlobStaticMem GlobMem_t; diff --git a/dash/include/dash/TeamSpec.h b/dash/include/dash/TeamSpec.h index fd39e5036..4af37ca92 100644 --- a/dash/include/dash/TeamSpec.h +++ b/dash/include/dash/TeamSpec.h @@ -307,7 +307,20 @@ class TeamSpec : } ++d; } - return this->at(neighbor_coords); + return team_unit_t(static_cast(this->at(neighbor_coords))); + } + + team_unit_t neighbor(const std::array& offsets) const + { + auto neighbor_coords = this->coords(_myid); + for (dim_t d = 0; d < MaxDimensions; ++d) { + neighbor_coords[d] += offsets[d]; + if (neighbor_coords[d] < 0 || + neighbor_coords[d] >= this->_extents[d]) { + return UNDEFINED_TEAM_UNIT_ID; + } + } + return team_unit_t(static_cast(this->at(neighbor_coords))); } /** @@ -340,13 +353,29 @@ class TeamSpec : dim_t d = 0; for (auto offset_d : offsets) { neighbor_coords[d] += offset_d; - if (neighbor_coords[d] < 0 || - neighbor_coords[d] >= this->_extents[d]) { + if (neighbor_coords[d] < 0) { + neighbor_coords[d] = this->_extents[d] + offset_d; + } else if(neighbor_coords[d] >= this->_extents[d]) { neighbor_coords[d] %= this->_extents[d]; } ++d; } - return at(neighbor_coords); + return team_unit_t(static_cast(this->at(neighbor_coords))); + } + + team_unit_t periodic_neighbor(const std::array& offsets) const + { + auto neighbor_coords = this->coords(_myid); + + for (dim_t d = 0; d < MaxDimensions; ++d) { + neighbor_coords[d] += offsets[d]; + if (neighbor_coords[d] < 0) { + neighbor_coords[d] = this->_extents[d] + offsets[d]; + } else if(neighbor_coords[d] >= this->_extents[d]) { + neighbor_coords[d] %= this->_extents[d]; + } + } + return team_unit_t(static_cast(this->at(neighbor_coords))); } /** diff --git a/dash/include/dash/halo/CoordinateAccess.h b/dash/include/dash/halo/CoordinateAccess.h new file mode 100644 index 000000000..4ab0640eb --- /dev/null +++ b/dash/include/dash/halo/CoordinateAccess.h @@ -0,0 +1,412 @@ +#ifndef DASH__HALO_HALOCOORDINATEACCESS_H +#define DASH__HALO_HALOCOORDINATEACCESS_H + +#include + +namespace dash { + +namespace halo { + +using namespace internal; + +// forward declaration +template +class CoordinateAccess; + +template +class CoordinateInnerAccess; + +template +class DataInnerAccess { +private: + using Self_t = DataInnerAccess; + + static constexpr auto NumDimensions = CoordinateInnerAccessT::ndim(); + +public: + using Offsets_t = typename CoordinateInnerAccessT::Offsets_t; + using Element_t = typename CoordinateInnerAccessT::Element_t; + using index_t = typename CoordinateInnerAccessT::index_t; + +public: + + DataInnerAccess(const Offsets_t* offsets, Element_t* mem) + : _offsets(offsets), _mem(mem) { + } + + template + std::enable_if_t<(_CurrentDimension != NumDimensions), DataInnerAccess> + operator[](index_t pos) { + + return DataInnerAccess(_offsets, _mem + pos * (*_offsets)[CurrentDimension]); + } + + template + std::enable_if_t<(_CurrentDimension == NumDimensions), Element_t>& + operator[](index_t pos) { + return _mem[pos]; + } + +private: + const Offsets_t* _offsets; + Element_t* _mem; +}; + +template +class CoordinateInnerAccess { +private: + using Self_t = CoordinateInnerAccess; + + static constexpr auto NumDimensions = CoordinateAccessT::ndim(); + static constexpr auto MemoryArrange = CoordinateAccessT::memory_order(); + +public: + using Element_t = typename CoordinateAccessT::Element_t; + using index_t = typename CoordinateAccessT::index_t; + using ViewSpec_t = typename CoordinateAccessT::ViewSpec_t; + using Offsets_t = typename CoordinateAccessT::Offsets_t; + using DataInnerAccess_t = DataInnerAccess; + using ViewRange_t = typename CoordinateAccessT::ViewRange_t; + using AllViewRanges_t = typename CoordinateAccessT::AllViewRanges_t; + +public: + + CoordinateInnerAccess(const AllViewRanges_t& ranges, Element_t* mem, const Offsets_t* offsets) + : _ranges(ranges) + , _mem(mem) + , _offsets(offsets) + , _data_access(DataInnerAccess_t(_offsets, _mem)) { + } + + static constexpr decltype(auto) ndim() { return NumDimensions; } + + static constexpr decltype(auto) memory_order() { return MemoryArrange; } + + ViewRange_t range_dim(dim_t dim) { + return _ranges(dim); + } + + AllViewRanges_t ranges() { + return _ranges; + } + + decltype(auto) operator[] (index_t pos) { + return _data_access[pos]; + } + + decltype(auto) operator[] (index_t pos) const { + return _data_access[pos]; + } + +private: + AllViewRanges_t _ranges; + Element_t* _mem; + const Offsets_t* _offsets; + DataInnerAccess_t _data_access; +}; + + +template +class DataAccess { +private: + using Self_t = DataAccess; + + static constexpr auto NumDimensions = CoordinateAccessT::ndim(); + static constexpr auto MemoryArrange = CoordinateAccessT::memory_order(); + + using RegCoords_t = RegionCoords; + static constexpr auto RegIndexCenter = RegCoords_t::center_index(); + +public: + using Element_t = typename CoordinateAccessT::Element_t; + using index_t = typename CoordinateAccessT::index_t; + using Coords_t = typename CoordinateAccessT::Coords_t; + +public: + + DataAccess(const CoordinateAccessT* access, Element_t* mem, const Coords_t& coords, region_index_t reg_index, bool halo) + : _access(access), _mem(mem), _coords(coords), _reg_index(reg_index), _halo(halo) { + } + + template + std::enable_if_t<(_CurrentDimension != NumDimensions), DataAccess> + operator[](index_t pos) { + _coords[CurrentDimension] = pos; + _reg_index *= REGION_INDEX_BASE; + if(_halo || pos < 0 || pos >= static_cast(_access->_view_local->extent(CurrentDimension))) { + if(pos < 0) { + return DataAccess(_access, _mem + pos * _access->_offsets[CurrentDimension], _coords, _reg_index, true); + } + + if(pos >= static_cast(_access->_view_local->extent(CurrentDimension))) { + return DataAccess(_access, _mem + pos * _access->_offsets[CurrentDimension], _coords, _reg_index + 2, true); + } + + return DataAccess(_access, _mem + pos * _access->_offsets[CurrentDimension], _coords, _reg_index + 1, true); + } + + return DataAccess(_access, _mem + pos * _access->_offsets[CurrentDimension], _coords, _reg_index + 1, false); + } + + template + std::enable_if_t<(_CurrentDimension == NumDimensions), Element_t>& + operator[](index_t pos) { + if(_halo || pos < 0 || pos >= static_cast(_access->_view_local->extent(CurrentDimension))) { + _reg_index *= REGION_INDEX_BASE; + + if(pos >= 0) { + ++_reg_index; + } + + if(pos >= static_cast(_access->_view_local->extent(CurrentDimension))) { + ++_reg_index; + } + + _coords[CurrentDimension] = pos; + auto halo_memory = _access->_halo_memory; + halo_memory->to_halo_mem_coords(_reg_index, _coords); + + return *(halo_memory->first_element_at(_reg_index) + + halo_memory->offset(_reg_index, _coords)); + } + + return _mem[pos]; + } + +private: + const CoordinateAccessT* _access; + Element_t* _mem; + Coords_t _coords; + region_index_t _reg_index; + bool _halo; +}; + +template +class CoordinateHaloAccess { +private: + using Self_t = CoordinateHaloAccess; + + static constexpr auto NumDimensions = CoordinateAccessT::ndim(); + static constexpr auto MemoryArrange = CoordinateAccessT::memory_order(); + +public: + using Element_t = typename CoordinateAccessT::Element_t; + using index_t = typename CoordinateAccessT::index_t; + using ViewSpec_t = typename CoordinateAccessT::ViewSpec_t; + using Offsets_t = typename CoordinateAccessT::Offsets_t; + using DataAccess_t = DataAccess; + using ViewRange_t = typename CoordinateAccessT::ViewRange_t; + using AllViewRanges_t = typename CoordinateAccessT::AllViewRanges_t; + using AllBndViewRanges = std::vector; + + using HaloBlock_t = typename CoordinateAccessT::HaloBlock_t; + using HaloMemory_t = typename CoordinateAccessT::HaloMemory_t; + using Coords_t = typename CoordinateAccessT::Coords_t; + +public: + CoordinateHaloAccess(const CoordinateAccessT* _access) + : _access(_access) + , _data_access(_access->_data_access) + , _ranges(set_ranges(_access->_halo_block)) { + } + + static constexpr decltype(auto) ndim() { return NumDimensions; } + + static constexpr decltype(auto) memory_order() { return MemoryArrange; } + + AllBndViewRanges ranges() { + return _ranges; + } + + decltype(auto) operator[] (index_t pos) { + return _data_access[pos]; + } + + decltype(auto) operator[] (index_t pos) const { + return _data_access[pos]; + } + +private: + + AllBndViewRanges set_ranges(const HaloBlock_t* halo_block) const { + AllBndViewRanges all_ranges; + const auto& bnd_views = halo_block->boundary_views(); + all_ranges.reserve(bnd_views.size()); + + for(const auto& view : bnd_views) { + AllViewRanges_t ranges; + for(dim_t d = 0; d < NumDimensions; ++d) { + ranges[d] = {static_cast(view.offset(d)), + static_cast(view.offset(d) + view.extent(d))}; + } + all_ranges.push_back(ranges); + } + + return all_ranges; + } + + +private: + const CoordinateAccessT* _access; + DataAccess_t _data_access; + AllBndViewRanges _ranges; +}; + +template +class CoordinateAccess { +private: + using Self_t = CoordinateAccess; + using Pattern_t = typename HaloBlockT::Pattern_t; + + static constexpr auto NumDimensions = Pattern_t::ndim(); + static constexpr auto MemoryArrange = Pattern_t::memory_order(); + + template + friend class CoordinateInnerAccess; + + template + friend class CoordinateHaloAccess; + + template + friend class DataInnerAccess; + + template + friend class DataAccess; + +public: + using Element_t = typename HaloBlockT::Element_t; + using HaloBlock_t = HaloBlockT; + using HaloMemory_t = HaloMemory; + using index_t = typename std::make_signed::type; + using uindex_t = typename std::make_unsigned::type; + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using Offsets_t = std::array; + using DataAccess_t = DataAccess; + using Coords_t = typename HaloMemory_t::ElementCoords_t; + using ViewRange_t = ViewRange; + using AllViewRanges_t = std::array; + + using CoordInnerAcc_t = CoordinateInnerAccess; + using CoordHaloAcc_t = CoordinateHaloAccess; + +public: + CoordinateAccess(const HaloBlockT* haloblock, + Element_t* local_memory, + HaloMemory_t* halomemory) + : _halo_block(haloblock) + , _local_memory(local_memory) + , _halo_memory(halomemory) + , _view_local(&(haloblock->view_local())) + , _offsets(set_offsets()) + , _data_access(DataAccess_t(this, _local_memory, Coords_t(), 0, false)) + , _ranges(set_ranges(_halo_block->view_inner_with_boundaries())) + , _ranges_local(set_ranges(_halo_block->view_local())) + , _ranges_halo(set_ranges_halo(_halo_block->view_inner_with_boundaries())) + , inner(set_ranges(_halo_block->view_inner()), _local_memory, &_offsets) + , boundary(this) { + } + + static constexpr decltype(auto) ndim() { return NumDimensions; } + + static constexpr decltype(auto) memory_order() { return MemoryArrange; } + + ViewRange_t range_dim(dim_t dim) { + return _ranges(dim); + } + + AllViewRanges_t ranges() { + return _ranges; + } + + ViewRange_t range_local_dim(dim_t dim) { + return _ranges_local(dim); + } + + AllViewRanges_t ranges_local() { + return _ranges_local; + } + + ViewRange_t range_halo_dim(dim_t dim) { + return _ranges_halo(dim); + } + + AllViewRanges_t ranges_halo() { + return _ranges_halo; + } + + decltype(auto) operator[] (index_t pos) { + return _data_access[pos]; + } + + decltype(auto) operator[] (index_t pos) const { + return _data_access[pos]; + } + +private: + Offsets_t set_offsets() { + Offsets_t offsets; + if(MemoryArrange == ROW_MAJOR) { + offsets[NumDimensions - 1] = 1; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + offsets[d] = 1; + for(dim_t d_tmp = d + 1; d_tmp < NumDimensions; ++d_tmp) { + offsets[d] *= _view_local->extent(d_tmp); + } + } + } else { + offsets[0] = 1; + for(dim_t d = 1; d < NumDimensions; ++d) { + offsets[d] = 1; + for(dim_t d_tmp = 0; d_tmp < d; ++d_tmp) { + offsets[d] *= _view_local->extent(d_tmp); + } + } + } + + return offsets; + } + + AllViewRanges_t set_ranges(ViewSpec_t view) const { + AllViewRanges_t ranges; + for(dim_t d = 0; d < NumDimensions; ++d) { + ranges[d] = {static_cast(view.offset(d)), + static_cast(view.offset(d) + view.extent(d))}; + } + + return ranges; + } + + AllViewRanges_t set_ranges_halo(ViewSpec_t view) const { + AllViewRanges_t ranges; + for(dim_t d = 0; d < NumDimensions; ++d) { + const auto& ext_max = _halo_block->halo_spec().halo_extension_max(d); + ranges[d] = {static_cast(view.offset(d)) - ext_max.first, + static_cast(view.offset(d) + view.extent(d)) + ext_max.second}; + } + + return ranges; + } + + +private: + const HaloBlock_t* _halo_block; + Element_t* _local_memory; + HaloMemory_t* _halo_memory; + const ViewSpec_t* _view_local; + Offsets_t _offsets; + DataAccess_t _data_access; + AllViewRanges_t _ranges; + AllViewRanges_t _ranges_local; + AllViewRanges_t _ranges_halo; + +public: + CoordInnerAcc_t inner; + CoordHaloAcc_t boundary; + +}; + +} // namespace halo + +} // namespace dash +#endif // DASH__HALO_HALOCOORDINATEACCESS_H \ No newline at end of file diff --git a/dash/include/dash/halo/Halo.h b/dash/include/dash/halo/Halo.h index 446ebc27b..4dfe7fd45 100644 --- a/dash/include/dash/halo/Halo.h +++ b/dash/include/dash/halo/Halo.h @@ -1,10 +1,12 @@ #ifndef DASH__HALO__HALO_H__ #define DASH__HALO__HALO_H__ -#include - #include -#include + + +#include +#include +#include #include @@ -12,316 +14,7 @@ namespace dash { namespace halo { -/** - * Stencil point with raletive coordinates for N dimensions - * e.g. StencilPoint<2>(-1,-1) -> north west - */ -template -class StencilPoint : public Dimensional { -public: - using point_value_t = int16_t; - using coefficient_t = CoeffT; - -private: - using Base_t = Dimensional; - -public: - // TODO constexpr - /** - * Default Contructor - * - * All stencil point values are 0 and default coefficient = 1.0. - */ - StencilPoint() { - for(dim_t i(0); i < NumDimensions; ++i) { - this->_values[i] = 0; - } - } - - /** - * Constructor - * - * Custom stencil point values for all dimensions and default - * coefficient = 1.0. - */ - template - constexpr StencilPoint( - typename std::enable_if::type value, - Values... values) - : Base_t::Dimensional(value, (point_value_t) values...) {} - - /** - * Constructor - * - * Custom values and custom coefficient. - */ - template - constexpr StencilPoint( - typename std::enable_if::type coefficient, - point_value_t value, Values... values) - : Base_t::Dimensional(value, (point_value_t) values...), - _coefficient(coefficient) {} - - // TODO as constexpr - /** - * Returns maximum distance to center over all dimensions - */ - int max() const { - int max = 0; - for(dim_t i(0); i < NumDimensions; ++i) - max = std::max(max, (int) std::abs(this->_values[i])); - return max; - } - - /** - * Returns coordinates adjusted by stencil point - */ - template - ElementCoordsT stencil_coords(ElementCoordsT& coords) const { - return StencilPoint::stencil_coords(coords, this); - } - - /** - * Returns coordinates adjusted by a given stencil point - */ - template - static ElementCoordsT stencil_coords( - ElementCoordsT coords, - const StencilPoint& stencilp) { - for(dim_t d = 0; d < NumDimensions; ++d) { - coords[d] += stencilp[d]; - } - - return coords; - } - - /** - * Returns coordinates adjusted by a stencil point and a boolean to indicate - * a if the adjusted coordinate points to elements out of the given - * \ref ViewSpecpossible (inside: true, else: false). - */ - template - std::pair stencil_coords_check( - ElementCoordsT coords, const ViewSpecT& view) const { - bool halo = false; - for(dim_t d = 0; d < NumDimensions; ++d) { - coords[d] += this->_values[d]; - if(coords[d] < 0 || coords[d] >= view.extent(d)) - halo = true; - } - - return std::make_pair(coords, halo); - } - - /** - * Returns coordinates adjusted by a stencil point and a boolean to indicate - * a if the adjusted coordinate points to elements out of the given - * \ref ViewSpecpossible (inside: true, else: false). - * If one dimension points to an element outside the \ref ViewSpec this method - * returns immediately the unfinished adjusted coordinate and true. Otherwise - * the adjusted coordinate and false is returned, - */ - template - std::pair stencil_coords_check_abort( - ElementCoordsT coords, const ViewSpecT& view) const { - for(dim_t d = 0; d < NumDimensions; ++d) { - coords[d] += this->_values[d]; - if(coords[d] < 0 || coords[d] >= view.extent(d)) - return std::make_pair(coords, true); - } - - return std::make_pair(coords, false); - } - - /** - * Returns the coefficient for this stencil point - */ - CoeffT coefficient() const { return _coefficient; } - -private: - CoeffT _coefficient = 1.0; -}; // StencilPoint - -template -std::ostream& operator<<( - std::ostream& os, const StencilPoint& stencil_point) { - os << "dash::halo::StencilPoint<" << NumDimensions << ">" - << "(coefficient = " << stencil_point.coefficient() << " - points: "; - for(auto d = 0; d < NumDimensions; ++d) { - if(d > 0) { - os << ","; - } - os << stencil_point[d]; - } - os << ")"; - - return os; -} - -/** - * A collection of stencil points (\ref Stencil) - * e.g. StencilSpec, 2,2>({StencilPoint<2>(-1,0), - * StencilPoint<2>(1,0)}) -> north and south - */ -template -class StencilSpec { -private: - using Self_t = StencilSpec; - static constexpr auto NumDimensions = StencilPointT::ndim(); - -public: - using stencil_size_t = std::size_t; - using stencil_index_t = std::size_t; - using StencilArray_t = std::array; - using StencilPoint_t = StencilPointT; - using point_value_t = typename StencilPoint_t::point_value_t; - using MaxDistanceDim_t = std::pair; - using MaxDistanceAll_t = std::array; - -public: - /** - * Constructor - * - * Takes a list of \ref StencilPoint - */ - constexpr StencilSpec(const StencilArray_t& specs) : _specs(specs) {} - - /** - * Constructor - * - * Takes all given \ref StencilPoint. The number of arguments has to be the - * same as the given number of stencil points via the template argument. - */ - template - constexpr StencilSpec(const StencilPointT& value, const Values&... values) - : _specs{ { value, (StencilPointT) values... } } { - static_assert(sizeof...(values) == NumStencilPoints - 1, - "Invalid number of stencil point arguments"); - } - - // TODO constexpr - /** - * Copy Constructor - */ - StencilSpec(const Self_t& other) { _specs = other._specs; } - - /** - * \return container storing all stencil points - */ - constexpr const StencilArray_t& specs() const { return _specs; } - - /** - * \return number of stencil points - */ - static constexpr stencil_size_t num_stencil_points() { - return NumStencilPoints; - } - - /** - * Returns the stencil point index for a given \ref StencilPoint - * - * \return The index and true if the given stecil point was found, - * else the index 0 and false. - * Keep in mind that index 0 is only a valid index, if the returned - * bool is true - */ - const std::pair index(StencilPointT stencil) const { - for(auto i = 0; i < _specs.size(); ++i) { - if(_specs[i] == stencil) - return std::make_pair(i, true); - } - - return std::make_pair(0, false); - } - - /** - * Returns the minimal and maximal distances of all stencil points for all - * dimensions. - */ - MaxDistanceAll_t minmax_distances() const { - MaxDistanceAll_t max_dist{}; - for(const auto& stencil_point : _specs) { - for(auto d = 0; d < NumDimensions; ++d) { - if(stencil_point[d] < max_dist[d].first) { - max_dist[d].first = stencil_point[d]; - continue; - } - if(stencil_point[d] > max_dist[d].second) - max_dist[d].second = stencil_point[d]; - } - } - - return max_dist; - } - - /** - * Returns the minimal and maximal distances of all stencil points for the - * given dimension. - */ - MaxDistanceDim_t minmax_distances(dim_t dim) const { - MaxDistanceDim_t max_dist{}; - for(const auto& stencil_point : _specs) { - if(stencil_point[dim] < max_dist.first) { - max_dist.first = stencil_point[dim]; - continue; - } - if(stencil_point[dim] > max_dist.second) - max_dist.second = stencil_point[dim]; - } - - return max_dist; - } - /** - * \return stencil point for a given index - */ - constexpr const StencilPointT& operator[](stencil_index_t index) const { - return _specs[index]; - } - -private: - StencilArray_t _specs{}; -}; // StencilSpec - -template -std::ostream& operator<<( - std::ostream& os, const StencilSpec& specs) { - os << "dash::halo::StencilSpec<" << NumStencilPoints << ">" - << "("; - for(auto i = 0; i < NumStencilPoints; ++i) { - if(i > 0) { - os << ","; - } - os << specs[i]; - } - os << ")"; - - return os; -} - -/** - * Global boundary Halo properties - */ -enum class BoundaryProp : uint8_t { - /// No global boundary Halos - NONE, - /// Global boundary Halos with values from the opposite boundary - CYCLIC, - /// Global boundary Halos with predefined custom values - CUSTOM -}; - -inline std::ostream& operator<<(std::ostream& os, const BoundaryProp& prop) { - if(prop == BoundaryProp::NONE) - os << "NONE"; - else if(prop == BoundaryProp::CYCLIC) - os << "CYCLIC"; - else - os << "CUSTOM"; - - return os; -} +using namespace internal; /** * Global boundary property specification for every dimension @@ -367,295 +60,7 @@ std::ostream& operator<<(std::ostream& os, return os; } -/** - * Position of a \ref Region in one dimension relating to the center - */ -enum class RegionPos : bool { - /// Region before center - PRE, - /// Region behind center - POST -}; - -inline std::ostream& operator<<(std::ostream& os, const RegionPos& pos) { - if(pos == RegionPos::PRE) - os << "PRE"; - else - os << "POST"; - return os; -} - -/** - * N-Dimensional region coordinates and associated indices for all possible - * Halo/Boundary regions of a \ref HaloBlock. The center (all values = 1) is the - * local NArray memory block used by the \ref HaloBlock. - * - * Example for 2-D - * - * .-------..-------..-------. - * | 0 || 1 || 2 <-|-- region index - * | (0,0) || (0,1) || (0,2)<|-- region coordinates - * | NW || N || NE <|-- north east (only for explanation) - * '-------''-------''-------' - * .-------..-------..-------. - * | 3 || 4 || 5 | - * | (1,0) || (1,1) || (1,2) | - * | W || C || E | - * '-------''-------''-------' - * .-------..-------..-------. - * | 6 || 7 || 8 | - * | (2,0) || (2,1) || (2,2) | - * | SW || S || SE | - * '-------''-------''-------' - */ -template -class RegionCoords : public Dimensional { -private: - using Self_t = RegionCoords; - using Base_t = Dimensional; - using udim_t = std::make_unsigned::type; - -public: - using region_coord_t = uint8_t; - using region_index_t = uint32_t; - using region_size_t = uint32_t; - using Coords_t = std::array; - - /// index calculation base - 3^N regions for N-Dimensions - static constexpr uint8_t REGION_INDEX_BASE = 3; - - /// number of maximal possible regions - static constexpr auto MaxIndex = - ce::pow(REGION_INDEX_BASE, static_cast(NumDimensions)); - -public: - /** - * Default Constructor - * - * All region coordinate values are 1 and pointing to the center. - */ - RegionCoords() { - for(dim_t i = 0; i < NumDimensions; ++i) { - this->_values[i] = 1; - } - _index = index(this->_values); - } - - /** - * Constructor allows custom coordinate values and calculates the fitting - * region index. - */ - template - RegionCoords(region_coord_t value, Values... values) - : Base_t::Dimensional(value, values...) { - _index = index(this->_values); - } - - /** - * Constructor takes a region index to set up the region coordinates - */ - RegionCoords(region_index_t index) : _index(index) { - this->_values = coords(index); - } - - /** - * \return region index - */ - constexpr region_index_t index() const { return _index; } - - /** - * Returns a region index for a given dimension and \ref RegionPos - */ - static region_index_t index(dim_t dim, RegionPos pos) { - region_coord_t coord = (pos == RegionPos::PRE) ? 0 : 2; - - region_index_t index = 0; - for(dim_t d = 0; d < NumDimensions; ++d) - if(dim == d) - index = coord + index * REGION_INDEX_BASE; - else - index = 1 + index * REGION_INDEX_BASE; - - return index; - } - - /** - * Returns the region index for a given \ref RegionCoords - * - * \return region index - */ - static region_index_t index(const Coords_t& coords) { - region_index_t index = coords[0]; - for(auto i = 1; i < NumDimensions; ++i) - index = coords[i] + index * REGION_INDEX_BASE; - - return index; - } - - /** - * \param index region index - * - * \return region coordinates - */ - static Coords_t coords(const region_index_t index) { - Coords_t coords{}; - region_index_t index_tmp = static_cast(index); - for(auto i = (NumDimensions - 1); i >= 1; --i) { - auto res = std::div(index_tmp, REGION_INDEX_BASE); - coords[i] = res.rem; - index_tmp = res.quot; - } - coords[0] = index_tmp; - - return coords; - } - - constexpr bool operator==(const Self_t& other) const { - return _index == other._index && this->_values == other._values; - } - - constexpr bool operator!=(const Self_t& other) const { - return !(*this == other); - } - -private: - region_index_t _index; -}; // RegionCoords - -/** - * Region specification connecting \ref RegionCoords with an extent. - * The region extent applies to all dimensions. - */ -template -class RegionSpec : public Dimensional { -private: - using Self_t = RegionSpec; - -public: - using RegionCoords_t = RegionCoords; - using region_index_t = typename RegionCoords_t::region_index_t; - using region_extent_t = uint16_t; - using region_coord_t = typename RegionCoords_t::region_coord_t; - -public: - /** - * Constructor using RegionCoords and the extent - */ - RegionSpec(const RegionCoords_t& coords, const region_extent_t extent) - : _coords(coords), _extent(extent), _rel_dim(init_rel_dim()) { - init_level(); - } - - /** - * Constructor using a region index and an extent - */ - RegionSpec(region_index_t index, const region_extent_t extent) - : _coords(RegionCoords_t(index)), _extent(extent), _rel_dim(init_rel_dim()) { - init_level(); - } - - RegionSpec() = default; - - /** - * Returns the region index for a given \ref StencilPoint - */ - template - static region_index_t index(const StencilT& stencil) { - region_index_t index = 0; - if(stencil[0] == 0) - index = 1; - else if(stencil[0] > 0) - index = 2; - for(auto d(1); d < NumDimensions; ++d) { - if(stencil[d] < 0) - index *= RegionCoords_t::REGION_INDEX_BASE; - else if(stencil[d] == 0) - index = 1 + index * RegionCoords_t::REGION_INDEX_BASE; - else - index = 2 + index * RegionCoords_t::REGION_INDEX_BASE; - } - - return index; - } - - /** - * Returns the region index - */ - constexpr region_index_t index() const { return _coords.index(); } - - /** - * Returns the \ref RegionCoords - */ - constexpr const RegionCoords_t& coords() const { return _coords; } - - /** - * Returns the extent - */ - constexpr region_extent_t extent() const { return _extent; } - - /** - * Returns the \ref RegionCoords for a given region index - */ - constexpr region_coord_t operator[](const region_index_t index) const { - return _coords[index]; - } - - constexpr bool operator==(const Self_t& other) const { - return _coords.index() == other._coords.index() && _extent == other._extent; - } - - constexpr bool operator!=(const Self_t& other) const { - return !(*this == other); - } - - /** - * Returns the highest dimension with region values != 1 - */ - dim_t relevant_dim() const { return _rel_dim; } - - /** - * returns the number of coordinates unequal the center (1) for all - * dimensions - */ - dim_t level() const { return _level; } - -private: - // TODO put init_rel_dim and level together - dim_t init_rel_dim() { - dim_t dim = 1; - for(auto d = 1; d < NumDimensions; ++d) { - if(_coords[d] != 1) - dim = d + 1; - } - - return dim; - } - - void init_level() { - for(auto d = 0; d < NumDimensions; ++d) { - if(_coords[d] != 1) - ++_level; - } - } - -private: - RegionCoords_t _coords{}; - region_extent_t _extent = 0; - dim_t _rel_dim = 1; - dim_t _level = 0; -}; // RegionSpec - -template -std::ostream& operator<<(std::ostream& os, - const RegionSpec& rs) { - os << "dash::halo::RegionSpec<" << NumDimensions << ">(" << (uint32_t) rs[0]; - for(auto i = 1; i < NumDimensions; ++i) - os << "," << (uint32_t) rs[i]; - os << "), Extent:" << rs.extent(); - - return os; -} /** * Contains all specified Halo regions. HaloSpec can be build with @@ -666,19 +71,21 @@ class HaloSpec { private: using Self_t = HaloSpec; using RegionCoords_t = RegionCoords; + static constexpr auto RegionsMax = NumRegionsMax; public: using RegionSpec_t = RegionSpec; - using Specs_t = std::array; - using region_index_t = typename RegionCoords_t::region_index_t; - using region_size_t = typename RegionCoords_t::region_index_t; + using Specs_t = std::array; using region_extent_t = typename RegionSpec_t::region_extent_t; + using HaloExtsMaxPair_t = std::pair; + using HaloExtsMax_t = std::array; public: constexpr HaloSpec(const Specs_t& specs) : _specs(specs) {} template HaloSpec(const StencilSpecT& stencil_spec) { + init_region_specs(); read_stencil_points(stencil_spec); } @@ -690,15 +97,25 @@ class HaloSpec { template HaloSpec(const RegionSpec_t& region_spec, const ARGS&... args) { + init_region_specs(); std::array tmp{ region_spec, args... }; for(auto& spec : tmp) { - _specs[spec.index()] = spec; - ++_num_regions; + auto& current_spec = _specs[spec.index()]; + if(current_spec.extent() == 0 && spec.extent() > 0) { + ++_num_regions; + } + + if(current_spec.extent() < spec.extent()) { + current_spec = spec; + set_max_halo_dist(current_spec.coords(), current_spec.extent()); + } } } HaloSpec(const Self_t& other) { _specs = other._specs; } + static constexpr dim_t ndim() { return NumDimensions; } + /** * Matching \ref RegionSpec for a given region index */ @@ -723,7 +140,25 @@ class HaloSpec { */ const Specs_t& specs() const { return _specs; } + /** + * Returns the maximal extension for a specific dimension + */ + const HaloExtsMaxPair_t& halo_extension_max(dim_t dim) const { + return _halo_extents_max[dim]; + } + + /** + * Returns the maximal halo extension for every dimension + */ + const HaloExtsMax_t& halo_extension_max() const { return _halo_extents_max; } + private: + void init_region_specs() { + for(region_index_t r = 0; r < RegionsMax; ++r) { + _specs[r] = RegionSpec_t(r, 0); + } + } + /* * Reads all stencil points of the given stencil spec and sets the region * specification. @@ -747,12 +182,16 @@ class HaloSpec { void set_region_spec(const StencilPointT& stencil) { auto index = RegionSpec_t::index(stencil); - if(_specs[index].extent() == 0) + auto max = stencil.max(); + auto reg_extent = _specs[index].extent(); + if(reg_extent == 0 && max > 0) { ++_num_regions; + } - auto max = stencil.max(); - if(max > _specs[index].extent()) + if(max > reg_extent) { _specs[index] = RegionSpec_t(index, max); + set_max_halo_dist(_specs[index].coords(), max); + } } /* @@ -763,7 +202,7 @@ class HaloSpec { template bool next_region(const StencilPointT& stencil, StencilPointT& stencil_combination) { - for(auto d = 0; d < NumDimensions; ++d) { + for(dim_t d = 0; d < NumDimensions; ++d) { if(stencil[d] == 0) continue; stencil_combination[d] = (stencil_combination[d] == 0) ? stencil[d] : 0; @@ -775,11 +214,27 @@ class HaloSpec { return false; } + void set_max_halo_dist(RegionCoords_t reg_coords, region_extent_t extent) { + for(dim_t d = 0; d < NumDimensions; ++d) { + if(reg_coords[d] == 1) { + continue; + } + + if(reg_coords[d] < 1) { + _halo_extents_max[d].first = std::max(_halo_extents_max[d].first, extent); + continue; + } + _halo_extents_max[d].second = std::max(_halo_extents_max[d].second, extent); + } + } + private: Specs_t _specs{}; + HaloExtsMax_t _halo_extents_max{}; region_size_t _num_regions{ 0 }; }; // HaloSpec + template std::ostream& operator<<(std::ostream& os, const HaloSpec& hs) { os << "dash::halo::HaloSpec<" << NumDimensions << ">("; @@ -800,412 +255,566 @@ std::ostream& operator<<(std::ostream& os, const HaloSpec& hs) { return os; } -/** - * Iterator to iterate over all region elements defined by \ref Region - */ -template < - typename ElementT, - typename PatternT, - typename GlobMemT, - typename PointerT = - typename GlobMemT::void_pointer::template rebind, - typename ReferenceT = GlobRef> -class RegionIter { -private: - using Self_t = RegionIter; +template +class BoundaryRegionCheck { + static constexpr auto NumDimensions = ViewSpecT::ndim(); + static constexpr auto CenterIndex = RegionCoords::center_index(); +public: + using GlobalBndSpec_t = GlobalBoundarySpec; + using EnvRegInfo_t = EnvironmentRegionInfo; + using RegionBorders_t = typename EnvRegInfo_t::RegionBorders_t; + using RegionData_t = typename EnvRegInfo_t::RegionData_t; + using RegionSpec_t = RegionSpec; + using MaxDistPair_t = std::pair; + using MaxDist_t = std::array; + using BlockViewSpec_t = BlockViewSpec; - static const auto NumDimensions = PatternT::ndim(); public: - // Iterator traits - using iterator_category = std::random_access_iterator_tag; - using value_type = ElementT; - using difference_type = typename PatternT::index_type; - using pointer = PointerT; - using local_pointer = typename pointer::local_type; - using reference = ReferenceT; + BoundaryRegionCheck(const ViewSpecT& view, const MaxDist_t& max_dist, const GlobalBndSpec_t& glob_bnd_spec, const RegionBorders_t& borders) + : _view(&view), _max_dist(max_dist) { + std::array to_small; - using const_reference = const reference; - using const_pointer = const pointer; + const auto& view_extents = _view->extents(); - using GlobMem_t = GlobMemT; + for(dim_t d = 0; d < NumDimensions; ++d) { + auto minmax_dim = max_dist[d]; + auto dist = minmax_dim.first + minmax_dim.second; - using ViewSpec_t = typename PatternT::viewspec_type; - using pattern_index_t = typename PatternT::index_type; - using pattern_size_t = typename PatternT::size_type; + to_small[d] = BS::BIGGER; + if(view_extents[d] <= dist) { + to_small[d] = (view_extents[d] > minmax_dim.first) ? BS::EQUALS_LESS : BS::PRE_ONLY; + } + } -public: - /** - * Constructor, creates a region iterator. - */ - RegionIter( - GlobMem_t* globmem, - const PatternT* pattern, - const ViewSpec_t& _region_view, - pattern_index_t pos, - pattern_size_t size) - : _globmem(globmem) - , _pattern(pattern) - , _region_view(_region_view) - , _idx(pos) - , _max_idx(size - 1) - , _myid(pattern->team().myid()) - , _lbegin(dash::local_begin( - static_cast(globmem->begin()), pattern->team().myid())) - { + for(int d = 0; d < NumDimensions; ++d) { + bool test_small = false; + for(int d_tmp = 0; d_tmp < d; ++d_tmp) { + if(to_small[d_tmp] != BS::BIGGER) { + test_small = true; + break; + } + } + + if(test_small) { + if(borders[d].first && glob_bnd_spec[d] == BoundaryProp::NONE) { + _valid_main[d].first = {false, REASON::BORDER}; + } else { + _valid_main[d].first = {false, REASON::TO_SMALL}; + } + + if(borders[d].second && glob_bnd_spec[d] == BoundaryProp::NONE) { + _valid_main[d].second = {false, REASON::BORDER}; + } else { + _valid_main[d].second = {false, REASON::TO_SMALL}; + } + continue; + } + if(borders[d].first && glob_bnd_spec[d] == BoundaryProp::NONE) { + _valid_main[d].first = {false, REASON::BORDER}; + } else { + _valid_main[d].first = {true, REASON::NONE}; + } + + if(borders[d].second && glob_bnd_spec[d] == BoundaryProp::NONE) { + _valid_main[d].second = {false, REASON::BORDER}; + } else { + if(_valid_main[d].first.valid && to_small[d] == BS::PRE_ONLY) { + _valid_main[d].second = {false, REASON::TO_SMALL}; + } else { + _valid_main[d].second = {true, REASON::NONE}; + } + } + } } - /** - * Copy constructor. - */ - RegionIter(const Self_t& other) = default; + BlockViewSpec_t block_views() { + // TODO PRE and POST Region true but not full POST region possible + auto offsets_inner = _view->offsets(); + auto extents_inner = _view->extents(); + auto offsets_inner_bnd = _view->offsets(); + auto extents_inner_bnd = _view->extents(); - /** - * Move constructor - */ - RegionIter(Self_t&& other) = default; + for(int d = 0; d < NumDimensions; ++d) { - /** - * Assignment operator. - * - * \see DashGlobalIteratorConcept - */ - Self_t& operator=(const Self_t& other) = default; + offsets_inner[d] = _max_dist[d].first; + DASH_ASSERT_MSG(extents_inner[d] >= _max_dist[d].first, + "Inner view to small for the given Stencil."); + auto sum_dist = _max_dist[d].first + _max_dist[d].second; + extents_inner[d] -= (extents_inner[d] < sum_dist) ? _max_dist[d].first : sum_dist; - /** - * Move assignment operator - */ - Self_t& operator=(Self_t&& other) = default; + offsets_inner_bnd[d] = 0; - /** - * The number of dimensions of the iterator's underlying pattern. - * - * \see DashGlobalIteratorConcept - */ - static constexpr dim_t ndim() { return NumDimensions; } + if(!_valid_main[d].first.valid) { + offsets_inner_bnd[d] = _max_dist[d].first; + extents_inner_bnd[d] -= _max_dist[d].first; + } - /** - * Dereference operator. - * - * \return A global reference to the element at the iterator's position. - */ - reference operator*() const { return operator[](_idx); } + if(!_valid_main[d].second.valid) { + if(extents_inner_bnd[d] >= _max_dist[d].second) { + extents_inner_bnd[d] -= _max_dist[d].second; + } + } + } - /** - * Subscript operator, returns global reference to element at given - * global index. - * - * \see DashGlobalIteratorConcept - */ - reference operator[](pattern_index_t n) const { - auto coords = glob_coords(_idx + n); - auto local_pos = _pattern->local_index(coords); + return {ViewSpecT(offsets_inner, extents_inner), ViewSpecT(offsets_inner_bnd, extents_inner_bnd)}; + } + + bool is_bnd_region_valid(const RegionSpec_t& region) { + + auto& coords = region.coords(); + for(int d = 0; d < NumDimensions; ++d) { + if(coords[d] == 0 && !_valid_main[d].first.valid) { + return false; + } - auto p = static_cast(_globmem->begin()); - p.set_unit(local_pos.unit); - p += local_pos.index; - return *p; + if(coords[d] > 1 && !_valid_main[d].second.valid) { + return false; + } + } + return true; } - dart_gptr_t dart_gptr() const { return operator[](_idx).dart_gptr(); } + RegionData_t region_data(const RegionSpec_t& region, bool local_offsets = true) { - /** - * Checks whether the element referenced by this global iterator is in - * the calling unit's local memory. - */ - bool is_local() const { return (_myid == lpos().unit); } + if(region.index() == CenterIndex) { + return {ViewSpecT(), false}; + } + + RegionData_t region_data; + auto& coords = region.coords(); + for(dim_t d = 0; d < NumDimensions; ++d) { + if(coords[d] == 0 && !_valid_main[d].first.valid) { + return {ViewSpecT(), false}; + } + + if(coords[d] > 1 && !_valid_main[d].second.valid) { + return {ViewSpecT(), false}; + } + } + + auto offsets = _view->offsets(); + auto extents = _view->extents(); + + if(local_offsets) { + std::fill(offsets.begin(), offsets.end(), 0); + } + + for(dim_t d = 0; d < NumDimensions; ++d) { + + if(coords[d] < 1) { + extents[d] = (region.extent() == 0) ? _max_dist[d].first : region.extent(); + continue; + } + + if(coords[d] == 1) { + if(_valid_main[d].first.valid || + (!_valid_main[d].first.valid && _valid_main[d].first.reason == REASON::BORDER)) { + extents[d] -= _max_dist[d].first; + offsets[d] += _max_dist[d].first; + } + + if(_valid_main[d].second.valid || + (!_valid_main[d].second.valid && _valid_main[d].second.reason == REASON::BORDER)) { + extents[d] -= _max_dist[d].second; + } + continue; + } + + offsets[d] = extents[d] - _max_dist[d].second; + extents[d] = (region.extent() == 0) ? _max_dist[d].second : region.extent(); + } + + region_data.valid = true; + region_data.view = ViewSpecT(offsets, extents); + return region_data; + } + + RegionData_t region_data_duplicate(const RegionSpec_t& region, bool local_offsets = true) { + if(region.extent() == 0) { + return {ViewSpecT(), false}; + } + + RegionData_t region_data; + auto& coords = region.coords(); + for(dim_t d = 0; d < NumDimensions; ++d) { + if(coords[d] == 0 && !_valid_main[d].first.valid) { + return {ViewSpecT(), false}; + } + + if(coords[d] > 1 && !_valid_main[d].second.valid) { + return {ViewSpecT(), false}; + } + } + + auto offsets = _view->offsets(); + auto extents = _view->extents(); + + if(local_offsets) { + std::fill(offsets.begin(), offsets.end(), 0); + } + + // TODO PRE and POST Region true but not full POST region possible + for(dim_t d = 0; d < NumDimensions; ++d) { + + if(coords[d] < 1) { + extents[d] = region.extent(); + continue; + } + + if(coords[d] == 1) { + continue; + } + + offsets[d] = extents[d] - _max_dist[d].second; + extents[d] = region.extent(); + } + + region_data.valid = true; + region_data.view = ViewSpecT(offsets, extents); + return region_data; + } + +private: + /* + * Defines the relation between block extent and stencil distance + * PRE_ONLY -> only the boundary on the pre center side is valid + * EQUALS_LESS -> both bboundaries (pre and post center) are valid, but are equal or less than matrix extent + * BIGGER -> matrix extent is bigger than stencil distance + */ + enum class BS{ + PRE_ONLY, + EQUALS_LESS, + BIGGER + }; + + enum class REASON { + NONE, + TO_SMALL, + BORDER + }; + + struct valid_region { + bool valid{}; + REASON reason{}; + + }; + + +private: + const ViewSpecT* _view; + const MaxDist_t _max_dist; + std::array,NumDimensions> _valid_main; +}; + + +template +class BlockEnvironment { + static constexpr auto NumDimensions = PatternT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; + +public: + using ViewSpec_t = typename PatternT::viewspec_type; + +private: + using RegionData_t = RegionData; + using BndInfos_t = std::array; + using EnvRegInfo_t = EnvironmentRegionInfo; + using BlockEnv_t = std::array; + using HaloSpec_t = HaloSpec; + using HaloExtsMaxPair_t = typename HaloSpec_t::HaloExtsMaxPair_t; + using HaloExtsMax_t = typename HaloSpec_t::HaloExtsMax_t; + using RegionCoords_t = RegionCoords; + +public: + using GlobalBndSpec_t = GlobalBoundarySpec; + + using RegionBorders_t = typename EnvRegInfo_t::RegionBorders_t; + using BndRegCheck_t = BoundaryRegionCheck; + using RegIdxMain_t = std::array; + using BlockViewSpec_t = BlockViewSpec; + + + BlockEnvironment(const PatternT& pattern, const HaloSpec_t& halo_spec, + const ViewSpec_t& view_glob, const GlobalBndSpec_t& glob_bound_spec) + : _view(&view_glob), _glob_bnd_spec(&glob_bound_spec) { + set_environment(pattern, halo_spec); + } + + BndRegCheck_t boundary_region_check(const HaloSpec_t& halo_spec) const { + return BndRegCheck_t(*_view, halo_spec.halo_extension_max(), *_glob_bnd_spec, _borders); + } + + template + BndRegCheck_t boundary_region_check(const StencilSpec& stencil_spec) const { + auto minmax = stencil_spec.minmax_distances(); + HaloExtsMax_t max_dist{}; + for(dim_t d = 0; d < NumDimensions; ++d) { + max_dist[d] = {std::abs(minmax[d].first), minmax[d].second}; + } - GlobIter global() const { - auto g_idx = gpos(); - return GlobIter(_globmem, *_pattern, g_idx); + return BndRegCheck_t(*_view, max_dist, *_glob_bnd_spec, _borders); } - ElementT* local() const { - auto local_pos = lpos(); - - if(_myid != local_pos.unit) - return nullptr; - - // - return _lbegin + local_pos.index; + auto info_dim(dim_t dim) const { + return std::make_pair(std::ref(_block_env[_reg_idx_main[dim].first]), std::ref(_block_env[_reg_idx_main[dim].second])); } - /** - * Position of the iterator in global storage order. - * - * \see DashGlobalIteratorConcept - */ - pattern_index_t pos() const { return gpos(); } + const EnvRegInfo_t& info(region_index_t region_index) const { + return _block_env[region_index]; + } - /** - * Position of the iterator in its view's iteration space, disregarding - * the view's offset in global index space. - * - * \see DashViewIteratorConcept - */ - pattern_index_t rpos() const { return _idx; } + const BlockEnv_t& info() const { + return _block_env; + } - /** - * Position of the iterator in global index range. - * Projects iterator position from its view spec to global index domain. - * - * \see DashGlobalIteratorConcept - */ - pattern_index_t gpos() const { - return _pattern->global_at(glob_coords(_idx)); + const auto& view_inner() const { + return _block_views.inner; } - std::array gcoords() const { - return glob_coords(_idx); + const auto& view_inner_boundary() const { + return _block_views.inner_bound; } - typename PatternT::local_index_t lpos() const { - return _pattern->local_index(glob_coords(_idx)); + const auto& views() const { + return _block_views; } - const ViewSpec_t view() const { return _region_view; } +private: - inline bool is_relative() const noexcept { return true; } + void set_environment(const PatternT& pattern, const HaloSpec_t& halo_spec) { + const auto& view_offsets = _view->offsets(); + const auto& view_extents = _view->extents(); - /** - * The instance of \c GlobStaticMem used by this iterator to resolve addresses - * in global memory. - * - * \see DashGlobalIteratorConcept - */ - const GlobMem_t& globmem() const { return *_globmem; } + const auto& glob_extent = pattern.extents(); + for(dim_t d = 0; d < NumDimensions; ++d) { + _reg_idx_main[d] = RegionCoords_t::index(d); + if(view_offsets[d] == 0) { + _borders[d].first = true; + } + if(view_offsets[d] + view_extents[d] == glob_extent[d]) { + _borders[d].second = true; + } + } - /** - * Prefix increment operator. - */ - Self_t& operator++() { - ++_idx; - return *this; - } + BndRegCheck_t bnd_check(*_view, halo_spec.halo_extension_max(), *_glob_bnd_spec, _borders); + _block_views = bnd_check.block_views(); - /** - * Postfix increment operator. - */ - Self_t operator++(int) { - Self_t result = *this; - ++_idx; - return result; - } + const auto& team_spec = pattern.teamspec(); + for(const auto& spec : halo_spec.specs()) { + auto halo_extent = spec.extent(); + if(!halo_extent) { + continue; + } - /** - * Prefix decrement operator. - */ - Self_t& operator--() { - --_idx; - return *this; - } + auto& env_md = _block_env[spec.index()]; - /** - * Postfix decrement operator. - */ - Self_t operator--(int) { - Self_t result = *this; - --_idx; - return result; - } + env_md.bnd_reg_data = bnd_check.region_data(spec); - Self_t& operator+=(pattern_index_t n) { - _idx += n; - return *this; - } + std::array neighbor_coords_rem; + std::array neighbor_coords; + auto reg_coords = spec.coords(); + auto reg_coords_rem = RegionCoords_t::coords(RegionsMax - 1 - spec.index()); - Self_t& operator-=(pattern_index_t n) { - _idx -= n; - return *this; - } + env_md.boundary_prop = BoundaryProp::CYCLIC; + BoundaryProp bnd_prop_to = BoundaryProp::CYCLIC; - Self_t operator+(pattern_index_t n) const { - Self_t res{ *this }; - res += n; + const auto& halo_ext_max = halo_spec.halo_extension_max(); - return res; - } + auto halo_region_offsets = _view->offsets(); + auto halo_region_extents = _view->extents(); + for(dim_t d = 0; d < NumDimensions; ++d) { - Self_t operator-(pattern_index_t n) const { - Self_t res{ *this }; - res -= n; + // region coords uses 1 for the center position, while \ref TeamSpec use 0 + neighbor_coords[d] = static_cast(reg_coords[d]) - 1; + neighbor_coords_rem[d] = static_cast(reg_coords_rem[d]) - 1; - return res; - } - bool operator<(const Self_t& other) const { - return compare(other, std::less()); - } + if(spec[d] == 1) { + continue; + } - bool operator<=(const Self_t& other) const { - return compare(other, std::less_equal()); - } + halo_region_extents[d] = halo_extent; - bool operator>(const Self_t& other) const { - return compare(other, std::greater()); - } + if(spec[d] < 1) { + if(_borders[d].first) { + halo_region_offsets[d] = pattern.extent(d) - halo_extent; + env_md.boundary_prop = test_bound_prop(env_md.boundary_prop, (*_glob_bnd_spec)[d]); + env_md.border_region = true; + env_md.region_borders[d].first = true; + }else { + halo_region_offsets[d] -= halo_extent; + } - bool operator>=(const Self_t& other) const { - return compare(other, std::greater_equal()); - } + if(_borders[d].second) { + bnd_prop_to = test_bound_prop(bnd_prop_to, (*_glob_bnd_spec)[d]); + } + continue; + } - bool operator==(const Self_t& other) const { - return compare(other, std::equal_to()); - } + // spec[d] > 1 + if(_borders[d].second) { + halo_region_offsets[d] = 0; + env_md.boundary_prop = test_bound_prop(env_md.boundary_prop, (*_glob_bnd_spec)[d]); + env_md.border_region = true; + env_md.region_borders[d].second = true; + } else { + halo_region_offsets[d] += _view->extent(d); + } - bool operator!=(const Self_t& other) const { - return compare(other, std::not_equal_to()); - } + if(_borders[d].first) { + bnd_prop_to = test_bound_prop(bnd_prop_to, (*_glob_bnd_spec)[d]); + } + } - const PatternT& pattern() const { return *_pattern; } + env_md.halo_reg_data = {ViewSpec_t(halo_region_offsets, halo_region_extents), true}; + if(env_md.boundary_prop != BoundaryProp::NONE) { + if(env_md.boundary_prop == BoundaryProp::CYCLIC) { + env_md.neighbor_id_from = static_cast(team_spec.periodic_neighbor(neighbor_coords)); + } else { + env_md.neighbor_id_from = static_cast(team_spec.neighbor(neighbor_coords)); + } + } else { + env_md.neighbor_id_from = static_cast(team_spec.neighbor(neighbor_coords)); + env_md.halo_reg_data.valid = false; + } -private: - /** - * Compare position of this global iterator to the position of another - * global iterator with respect to viewspec projection. - */ - template - bool compare(const Self_t& other, const GlobIndexCmpFunc& gidx_cmp) const { -#if __REMARK__ - // Usually this is a best practice check, but it's an infrequent case - // so we rather avoid this comparison: - if(this == &other) { - return true; - } -#endif - if(&_region_view == &(other._region_view) - || _region_view == other._region_view) { - return gidx_cmp(_idx, other._idx); + if(bnd_prop_to == BoundaryProp::CYCLIC) { + env_md.neighbor_id_to = static_cast(team_spec.periodic_neighbor(neighbor_coords_rem)); + } else { + env_md.neighbor_id_to = static_cast(team_spec.neighbor(neighbor_coords_rem)); + } } - // TODO not the best solution - return false; } - std::array glob_coords( - pattern_index_t idx) const { - return _pattern->coords(idx, _region_view); + BoundaryProp test_bound_prop(const BoundaryProp& current_prop, const BoundaryProp& new_prop) { + if(current_prop == BoundaryProp::NONE || new_prop == BoundaryProp::NONE) { + return BoundaryProp::NONE; + } + + if(current_prop == BoundaryProp::CUSTOM || new_prop == BoundaryProp::CUSTOM) { + return BoundaryProp::CUSTOM; + } + + return BoundaryProp::CYCLIC; } private: - /// Global memory used to dereference iterated values. - GlobMem_t* _globmem; - /// Pattern that created the encapsulated block. - const PatternT* _pattern; - - const ViewSpec_t _region_view; - /// Iterator's position relative to the block border's iteration space. - pattern_index_t _idx{ 0 }; - /// Maximum iterator position in the block border's iteration space. - pattern_index_t _max_idx{ 0 }; - /// Unit id of the active unit - team_unit_t _myid; + const ViewSpec_t* _view; + const GlobalBndSpec_t* _glob_bnd_spec; + BlockEnv_t _block_env; + RegionBorders_t _borders{}; + RegIdxMain_t _reg_idx_main; + BlockViewSpec_t _block_views; +}; - local_pointer _lbegin; +template +std::ostream& operator<<(std::ostream& os, const BlockEnvironment& env_info) { + static constexpr auto NumDimensions = PatternT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; -}; // class HaloBlockIter + const auto& env_mds = env_info.info(); -template -std::ostream& operator<<( - std::ostream& os, - const RegionIter& it) { - os << "dash::halo::RegionIter<" << typeid(ElementT).name() << ">(" - << "; idx: " << it.rpos() << "; view: " << it.view() << ")"; + os << "dash::halo::BlockEnvironment { "; + for(region_index_t r = 0; r < RegionsMax; ++r) { + const auto& env_md = env_mds[r]; + os << dash::myid() << " -> "; + os << "region_index: " << r << ";" + << env_md << ")\n"; + } + os << "}"; return os; } -template -auto distance( - /// Global iterator to the initial position in the global sequence - const RegionIter& first, - /// Global iterator to the final position in the global sequence - const RegionIter& last) -> - typename PatternT::index_type { - return last - first; -} - /** - * Provides \ref RegionIter and some region metadata like \ref RegionSpec, - * size etc. + * Adapts all views \ref HaloBlock provides to the given \ref StencilSpec. */ -template -class Region { +template +class StencilSpecificViews { private: - static constexpr auto NumDimensions = PatternT::ndim(); + static constexpr auto NumDimensions = HaloBlockT::ndim(); -public: - using iterator = RegionIter; - using const_iterator = const iterator; - using RegionSpec_t = RegionSpec; - using GlobMem_t = GlobMemT; - using ViewSpec_t = typename PatternT::viewspec_type; - using Border_t = std::array; - using region_index_t = typename RegionSpec_t::region_index_t; - using pattern_size_t = typename PatternT::size_type; + using Pattern_t = typename HaloBlockT::Pattern_t; + using HaloSpec_t = typename HaloBlockT::HaloSpec_t; public: - Region(const RegionSpec_t& region_spec, const ViewSpec_t& region, - GlobMem_t& globmem, const PatternT& pattern, const Border_t& border, - bool custom_region) - : _region_spec(region_spec), _region(region), _border(border), - _border_region( - std::any_of(border.begin(), border.end(), - [](bool border_dim) { return border_dim == true; })), - _custom_region(custom_region), - _beg(&globmem, &pattern, _region, 0, _region.size()), - _end(&globmem, &pattern, _region, _region.size(), _region.size()) {} - - const region_index_t index() const { return _region_spec.index(); } - - const RegionSpec_t& spec() const { return _region_spec; } + using HaloBlock_t = HaloBlockT; + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using BoundaryViews_t = typename HaloBlockT::BoundaryViews_t; + using pattern_size_t = typename Pattern_t::size_type; + using StencilSpec_t = StencilSpecT; - const ViewSpec_t& view() const { return _region; } - - constexpr pattern_size_t size() const { return _region.size(); } +public: + StencilSpecificViews(const HaloBlockT& halo_block, + const StencilSpec_t& stencil_spec, + const ViewSpec_t* view_local) + : _stencil_spec(&stencil_spec), _view_local(view_local) { + HaloSpec_t halo_spec(stencil_spec); + auto bnd_region_check = halo_block.block_env().boundary_region_check(halo_spec); + + auto block_views = bnd_region_check.block_views(); + _view_inner = block_views.inner; + _view_inner_with_boundaries = block_views.inner_bound; + for(const auto& region : halo_spec.specs()) { + auto bnd_region_data = bnd_region_check.region_data(region); + _size_bnd_elems += bnd_region_data.view.size(); + _boundary_views.push_back(std::move(bnd_region_data.view)); + } + } - constexpr Border_t border() const { return _border; } + /** + * Returns \ref StencilSpec + */ + const StencilSpec_t& stencil_spec() const { return *_stencil_spec; } - bool is_border_region() const { return _border_region; }; + /** + * Returns \ref ViewSpec including all elements (locally) + */ + const ViewSpec_t& view() const { return *_view_local; } - bool is_custom_region() const { return _custom_region; }; + /** + * Returns \ref ViewSpec including all inner elements + */ + const ViewSpec_t& inner() const { return _view_inner; } - constexpr bool border_dim(dim_t dim) const { return _border[dim]; } + /** + * Returns \ref ViewSpec including all inner and boundary elements + */ + const ViewSpec_t& inner_with_boundaries() const { + return _view_inner_with_boundaries; + } - iterator begin() const { return _beg; } + /** + * Returns all boundary views including all boundary elements (no dublicates) + */ + const BoundaryViews_t& boundary_views() const { return _boundary_views; } - iterator end() const { return _end; } + /** + * Returns the number of all boundary elements (no dublicates) + */ + pattern_size_t boundary_size() const { return _size_bnd_elems; } private: - const RegionSpec_t _region_spec; - const ViewSpec_t _region; - Border_t _border; - bool _border_region; - bool _custom_region; - iterator _beg; - iterator _end; -}; // Region + const StencilSpec_t* _stencil_spec; + const ViewSpec_t* _view_local; + ViewSpec_t _view_inner; + ViewSpec_t _view_inner_with_boundaries; + BoundaryViews_t _boundary_views; + pattern_size_t _size_bnd_elems = 0; +}; -template -std::ostream& operator<<(std::ostream& os, - const Region& region) { - os << "dash::halo::Region<" << typeid(ElementT).name() << ">" - << "( view: " << region.view() << "; region spec: " << region.spec() - << "; border regions: {"; - const auto& border = region.border(); - for(auto d = 0; d < border.size(); ++d) { - if(d == 0) - os << border[d]; - else - os << "," << border[d]; - } - os << "}" - << "; is border: " << region.is_border_region() - << "; is custom: " << region.is_custom_region() - << "; begin iterator: " << region.begin() - << "; end iterator: " << region.begin() << ")"; +template +std::ostream& operator<<( + std::ostream& os, + const StencilSpecificViews& stencil_views) { + os << "dash::halo::StencilSpecificViews" + << "(local: " << stencil_views.view() + << "; inner: " << stencil_views.inner() + << "; inner_bound: " << stencil_views.inner_with_boundaries() + << "; boundary_views: " << stencil_views.boundary_views() + << "; boundary elems: " << stencil_views.boundary_size() << ")"; return os; } @@ -1218,12 +827,14 @@ template class HaloBlock { private: static constexpr auto NumDimensions = PatternT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; using Self_t = HaloBlock; using pattern_index_t = typename PatternT::index_type; using RegionSpec_t = RegionSpec; using Region_t = Region; using RegionCoords_t = RegionCoords; + using Coords_t = typename RegionCoords_t::Coords_t; using region_extent_t = typename RegionSpec_t::region_extent_t; public: @@ -1236,11 +847,10 @@ class HaloBlock { using BoundaryViews_t = std::vector; using HaloSpec_t = HaloSpec; using RegionVector_t = std::vector; - using region_index_t = typename RegionSpec_t::region_index_t; using ElementCoords_t = std::array; - - using HaloExtsMaxPair_t = std::pair; - using HaloExtsMax_t = std::array; + using HaloExtsMax_t = typename HaloSpec_t::HaloExtsMax_t; + using RegIndDepVec_t = typename RegionCoords_t::RegIndDepVec_t; + using BlockEnv_t = BlockEnvironment; public: /** @@ -1248,159 +858,54 @@ class HaloBlock { */ HaloBlock(GlobMem_t& globmem, const PatternT& pattern, const ViewSpec_t& view, const HaloSpec_t& halo_reg_spec, - const GlobBoundSpec_t& bound_spec = GlobBoundSpec_t{}) + const GlobBoundSpec_t& bound_spec) : _globmem(globmem), _pattern(pattern), _view(view), - _halo_reg_spec(halo_reg_spec), _view_local(_view.extents()) { - // setup local views - _view_inner = _view_local; - _view_inner_with_boundaries = _view_local; + _halo_reg_spec(halo_reg_spec), _view_local(_view.extents()), + _glob_bound_spec(bound_spec), + _block_env(pattern, _halo_reg_spec, _view, _glob_bound_spec) { // TODO put functionallity to HaloSpec _halo_regions.reserve(_halo_reg_spec.num_regions()); _boundary_regions.reserve(_halo_reg_spec.num_regions()); + + _view_inner = _block_env.view_inner(); + _view_inner_with_boundaries = _block_env.view_inner_boundary(); + /* * Setup for all halo and boundary regions and properties like: * is the region a global boundary region and is the region custom or not */ - for(const auto& spec : _halo_reg_spec.specs()) { - auto halo_extent = spec.extent(); - if(!halo_extent) - continue; - - std::array border{}; - bool custom_region = false; - auto halo_region_offsets = view.offsets(); - auto halo_region_extents = view.extents(); - auto bnd_region_offsets = view.offsets(); - auto bnd_region_extents = view.extents(); + auto bnd_check = _block_env.boundary_region_check(halo_reg_spec); + for(region_index_t r = 0; r < RegionsMax; ++r) { + const auto& env_reg_info = _block_env.info(r); + const auto& spec = _halo_reg_spec.specs()[r]; - for(dim_t d = 0; d < NumDimensions; ++d) { - if(spec[d] == 1) - continue; - - auto view_offset = view.offset(d); - auto view_extent = view.extent(d); - - if(spec[d] < 1) { - _halo_extents_max[d].first = - std::max(_halo_extents_max[d].first, halo_extent); - if(view_offset < _halo_extents_max[d].first) { - border[d] = true; - - if(bound_spec[d] == BoundaryProp::NONE) { - halo_region_offsets[d] = 0; - halo_region_extents[d] = 0; - bnd_region_offsets[d] = 0; - bnd_region_extents[d] = 0; - } else { - if(bound_spec[d] == BoundaryProp::CUSTOM) - custom_region = true; - halo_region_offsets[d] = _pattern.extent(d) - halo_extent; - halo_region_extents[d] = halo_extent; - bnd_region_extents[d] = halo_extent; - } - - } else { - halo_region_offsets[d] -= halo_extent; - halo_region_extents[d] = halo_extent; - bnd_region_extents[d] = halo_extent; - } - } else { - _halo_extents_max[d].second = - std::max(_halo_extents_max[d].second, halo_extent); - auto check_extent = - view_offset + view_extent + _halo_extents_max[d].second; - if(check_extent > _pattern.extent(d)) { - border[d] = true; - - if(bound_spec[d] == BoundaryProp::NONE) { - halo_region_offsets[d] = 0; - halo_region_extents[d] = 0; - bnd_region_offsets[d] = 0; - bnd_region_extents[d] = 0; - } else { - if(bound_spec[d] == BoundaryProp::CUSTOM) - custom_region = true; - halo_region_offsets[d] = 0; - halo_region_extents[d] = halo_extent; - bnd_region_offsets[d] += view_extent - halo_extent; - bnd_region_extents[d] = halo_extent; - } - } else { - halo_region_offsets[d] += halo_region_extents[d]; - halo_region_extents[d] = halo_extent; - bnd_region_offsets[d] += view_extent - halo_extent; - bnd_region_extents[d] = halo_extent; - } - } - } - auto index = spec.index(); - _halo_regions.push_back( - Region_t(spec, ViewSpec_t(halo_region_offsets, halo_region_extents), - _globmem, _pattern, border, custom_region)); - auto& region_tmp = _halo_regions.back(); - _size_halo_elems += region_tmp.size(); - _halo_reg_mapping[index] = ®ion_tmp; - _boundary_regions.push_back( - Region_t(spec, ViewSpec_t(bnd_region_offsets, bnd_region_extents), - _globmem, _pattern, border, custom_region)); - _boundary_reg_mapping[index] = &_boundary_regions.back(); - } + _boundary_views.push_back(env_reg_info.bnd_reg_data.view); + _size_bnd_elems += env_reg_info.bnd_reg_data.view.size(); - /* - * Setup for the non duplicate boundary elements and the views: inner, - * boundary and inner + boundary - */ - for(dim_t d = 0; d < NumDimensions; ++d) { - const auto global_offset = view.offset(d); - const auto view_extent = _view_local.extent(d); - - auto bnd_elem_offsets = _view.offsets(); - auto bnd_elem_extents = _view_local.extents(); - bnd_elem_extents[d] = _halo_extents_max[d].first; - for(auto d_tmp = 0; d_tmp < d; ++d_tmp) { - bnd_elem_offsets[d_tmp] -= - _view.offset(d_tmp) - _halo_extents_max[d_tmp].first; - bnd_elem_extents[d_tmp] -= - _halo_extents_max[d_tmp].first + _halo_extents_max[d_tmp].second; + auto halo_extent = spec.extent(); + if(!halo_extent) { + continue; } - _view_inner.resize_dim( - d, _halo_extents_max[d].first, - view_extent - _halo_extents_max[d].first - _halo_extents_max[d].second); - if(bound_spec[d] == BoundaryProp::NONE) { - auto safe_offset = global_offset; - auto safe_extent = view_extent; - if(global_offset < _halo_extents_max[d].first) { - safe_offset = _halo_extents_max[d].first; - safe_extent -= _halo_extents_max[d].first - global_offset; - } else { - bnd_elem_offsets[d] -= global_offset; - push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, - _halo_extents_max, bound_spec); - } - auto check_extent = - global_offset + view_extent + _halo_extents_max[d].second; - if(check_extent > _pattern.extent(d)) { - safe_extent -= check_extent - _pattern.extent(d); - } else { - bnd_elem_offsets[d] += view_extent - _halo_extents_max[d].first; - bnd_elem_extents[d] = _halo_extents_max[d].second; - push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, - _halo_extents_max, bound_spec); - } - _view_inner_with_boundaries.resize_dim(d, safe_offset - global_offset, - safe_extent); + if(env_reg_info.halo_reg_data.valid) { + _halo_regions.push_back( + Region_t(spec, env_reg_info.halo_reg_data.view, + _globmem, _pattern, env_reg_info)); + _halo_reg_mapping[r] = &_halo_regions.back(); + _size_halo_elems += env_reg_info.halo_reg_data.view.size(); } else { - bnd_elem_offsets[d] -= global_offset; - push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, - bound_spec); - bnd_elem_offsets[d] += view_extent - _halo_extents_max[d].first; - bnd_elem_extents[d] = _halo_extents_max[d].second; - push_bnd_elems(d, bnd_elem_offsets, bnd_elem_extents, _halo_extents_max, - bound_spec); + _halo_regions.push_back( + Region_t(spec, ViewSpec_t(), + _globmem, _pattern, env_reg_info)); + _halo_reg_mapping[r] = &_halo_regions.back(); } + auto bnd_reg_data = bnd_check.region_data_duplicate(spec, false); + _boundary_regions.push_back( + Region_t(spec, bnd_reg_data.view, + _globmem, _pattern, env_reg_info)); + _boundary_reg_mapping[r] = &_boundary_regions.back(); } } @@ -1421,40 +926,47 @@ class HaloBlock { /** * The pattern instance that created the encapsulated block. */ - const PatternT& pattern() const { return _pattern; } + const Pattern_t& pattern() const { return _pattern; } /** * The global memory instance that created the encapsulated block. */ const GlobMem_t& globmem() const { return _globmem; } + /** + * Returns the \ref GlobalBoundarySpec used by the HaloBlock instance. + */ + const GlobBoundSpec_t& global_boundary_spec() const { + return _glob_bound_spec; + } + /** * Returns used \ref HaloSpec */ const HaloSpec_t& halo_spec() const { return _halo_reg_spec; } - /** - * Returns a specific halo region and nullptr if no region exists + +/** + * Returns the environment information object \ref BlockEnvironment */ - const Region_t* halo_region(const region_index_t index) const { - return _halo_reg_mapping[index]; - } + BlockEnv_t block_env() { return _block_env; } /** - * Returns all halo regions + * Returns the environment information object \ref BlockEnvironment */ - const RegionVector_t& halo_regions() const { return _halo_regions; } + const BlockEnv_t& block_env() const { return _block_env; } + /** - * Returns the maximal extension for a specific dimension + * Returns a specific halo region and nullptr if no region exists */ - const HaloExtsMaxPair_t& halo_extension_max(dim_t dim) const { - return _halo_extents_max[dim]; + const Region_t* halo_region(const region_index_t index) const { + return _halo_reg_mapping[index]; } /** - * Returns the maximal halo extension for every dimension + * Returns all halo regions */ - const HaloExtsMax_t& halo_extension_max() const { return _halo_extents_max; } + const RegionVector_t& halo_regions() const { return _halo_regions; } /** * Returns a specific region and nullptr if no region exists @@ -1469,6 +981,18 @@ class HaloBlock { */ const RegionVector_t& boundary_regions() const { return _boundary_regions; } + RegIndDepVec_t boundary_dependencies(region_index_t index) const { + RegIndDepVec_t index_dep{}; + for(auto reg_index : RegionCoords_t::boundary_dependencies(index)) { + auto region = halo_region(reg_index); + if(region != nullptr) { + index_dep.push_back(reg_index); + } + } + + return index_dep; + } + /** * Returns the initial global \ref ViewSpec */ @@ -1487,14 +1011,14 @@ class HaloBlock { } /** - * Returns the inner view with global offsets depending on the used + * Returns the inner \ref ViewSpec with local offsets depending on the used * \ref HaloSpec. */ const ViewSpec_t& view_inner() const { return _view_inner; } /** * Returns a set of local views that contains all boundary elements. - * No duplicates of elements included. + * No duplicated elements included. */ const BoundaryViews_t& boundary_views() const { return _boundary_views; } @@ -1509,7 +1033,7 @@ class HaloBlock { pattern_size_t boundary_size() const { return _size_bnd_elems; } /** - * Returns the index belonging to the given coordinates and \ref ViewSpec + * Returns the region index belonging to the given coordinates and \ref ViewSpec */ region_index_t index_at(const ViewSpec_t& view, const ElementCoords_t& coords) const { @@ -1525,44 +1049,16 @@ class HaloBlock { index = 2; for(auto d = 1; d < NumDimensions; ++d) { if(coords[d] < offsets[d]) - index *= RegionCoords_t::REGION_INDEX_BASE; + index *= REGION_INDEX_BASE; else if(coords[d] < static_cast(extents[d])) - index = 1 + index * RegionCoords_t::REGION_INDEX_BASE; + index = 1 + index * REGION_INDEX_BASE; else - index = 2 + index * RegionCoords_t::REGION_INDEX_BASE; + index = 2 + index * REGION_INDEX_BASE; } return index; } -private: - void push_bnd_elems(dim_t dim, - std::array& offsets, - std::array& extents, - const HaloExtsMax_t& halo_exts_max, - const GlobBoundSpec_t& bound_spec) { - auto tmp = offsets; - for(auto d_tmp = dim + 1; d_tmp < NumDimensions; ++d_tmp) { - if(bound_spec[d_tmp] == BoundaryProp::NONE) { - if(offsets[d_tmp] < halo_exts_max[d_tmp].first) { - offsets[d_tmp] = halo_exts_max[d_tmp].first; - tmp[d_tmp] = halo_exts_max[d_tmp].first; - extents[d_tmp] -= halo_exts_max[d_tmp].first; - } - auto check_extent_tmp = - offsets[d_tmp] + extents[d_tmp] + halo_exts_max[d_tmp].second; - if(check_extent_tmp > _pattern.extent(d_tmp)) - extents[d_tmp] -= halo_exts_max[d_tmp].second; - } - - tmp[d_tmp] -= _view.offset(d_tmp); - } - - ViewSpec_t boundary_next(tmp, extents); - _size_bnd_elems += boundary_next.size(); - _boundary_views.push_back(std::move(boundary_next)); - } - private: GlobMem_t& _globmem; @@ -1574,25 +1070,27 @@ class HaloBlock { const ViewSpec_t _view_local; + const GlobBoundSpec_t _glob_bound_spec; + + BlockEnv_t _block_env; + ViewSpec_t _view_inner_with_boundaries; ViewSpec_t _view_inner; RegionVector_t _halo_regions; - std::array _halo_reg_mapping{}; + std::array _halo_reg_mapping{}; RegionVector_t _boundary_regions; - std::array _boundary_reg_mapping{}; + std::array _boundary_reg_mapping{}; BoundaryViews_t _boundary_views; pattern_size_t _size_bnd_elems = 0; pattern_size_t _size_halo_elems = 0; - - HaloExtsMax_t _halo_extents_max{}; }; // class HaloBlock template @@ -1631,174 +1129,7 @@ std::ostream& operator<<(std::ostream& os, return os; } -/** - * Mangages the memory for all halo regions provided by the given - * \ref HaloBlock - */ -template -class HaloMemory { -private: - static constexpr auto NumDimensions = HaloBlockT::ndim(); - - using RegionCoords_t = RegionCoords; - using Pattern_t = typename HaloBlockT::Pattern_t; - - static constexpr auto MaxIndex = RegionCoords_t::MaxIndex; - static constexpr auto MemoryArrange = Pattern_t::memory_order(); -public: - using Element_t = typename HaloBlockT::Element_t; - using ElementCoords_t = - std::array; - using HaloBuffer_t = std::vector; - using region_index_t = typename RegionCoords_t::region_index_t; - using pattern_size_t = typename Pattern_t::size_type; - - using iterator = typename HaloBuffer_t::iterator; - using const_iterator = const iterator; - - using MemRange_t = std::pair; - -public: - /** - * Constructor - */ - HaloMemory(const HaloBlockT& haloblock) : _haloblock(haloblock) { - _halobuffer.resize(haloblock.halo_size()); - auto it = _halobuffer.begin(); - - for(const auto& region : haloblock.halo_regions()) { - _halo_offsets[region.index()] = it; - it += region.size(); - } - } - - /** - * Iterator to the first halo element for the given region index - * \param index halo region index - * \return Iterator to the first halo element. If no region exists the - * end iterator will be returned. - */ - iterator first_element_at(region_index_t index) { - return _halo_offsets[index]; - } - - /** - * iReturns the range of all halo elements for the given region index. - * \param index halo region index - * \return Pair of iterator. First points ot the beginning and second to the - * end. - */ - MemRange_t range_at(region_index_t index) { - auto it = _halo_offsets[index]; - if(it == _halobuffer.end()) - return std::make_pair(it, it); - - auto* region = _haloblock.halo_region(index); - - DASH_ASSERT_MSG( - region != nullptr, - "HaloMemory manages memory for a region that seemed to be empty."); - - return std::make_pair(it, it + region->size()); - } - - /** - * Returns an iterator to the first halo element - */ - iterator begin() { return _halobuffer.begin(); } - - /** - * Returns a const iterator to the first halo element - */ - const_iterator begin() const { return _halobuffer.begin(); } - - /** - * Returns an iterator to the end of the halo elements - */ - iterator end() { return _halobuffer.end(); } - - /** - * Returns a const iterator to the end of the halo elements - */ - const_iterator end() const { return _halobuffer.end(); } - - /** - * Container storing all halo elements - * - * \return Reference to the container storing all halo elements - */ - const HaloBuffer_t& buffer() const { return _halobuffer; } - - /** - * Converts coordinates to halo memory coordinates for a given - * region index and returns true if the coordinates are valid and - * false if not. - */ - bool to_halo_mem_coords_check(const region_index_t region_index, - ElementCoords_t& coords) const { - const auto& extents = - _haloblock.halo_region(region_index)->view().extents(); - for(auto d = 0; d < NumDimensions; ++d) { - if(coords[d] < 0) - coords[d] += extents[d]; - else if(coords[d] >= _haloblock.view().extent(d)) - coords[d] -= _haloblock.view().extent(d); - - if(coords[d] >= extents[d] || coords[d] < 0) - return false; - } - - return true; - } - - /** - * Converts coordinates to halo memory coordinates for a given region index. - */ - void to_halo_mem_coords(const region_index_t region_index, - ElementCoords_t& coords) const { - const auto& extents = - _haloblock.halo_region(region_index)->view().extents(); - for(auto d = 0; d < NumDimensions; ++d) { - if(coords[d] < 0) { - coords[d] += extents[d]; - continue; - } - - if(coords[d] >= _haloblock.view().extent(d)) - coords[d] -= _haloblock.view().extent(d); - } - } - - /* - * Retuns the offset for a given region index and coordinates within the - * region. - */ - pattern_size_t offset(const region_index_t region_index, - const ElementCoords_t& coords) const { - const auto& extents = - _haloblock.halo_region(region_index)->view().extents(); - pattern_size_t off = 0; - if(MemoryArrange == ROW_MAJOR) { - off = coords[0]; - for(dim_t d = 1; d < NumDimensions; ++d) - off = off * extents[d] + coords[d]; - } else { - off = coords[NumDimensions - 1]; - for(dim_t d = NumDimensions - 1; d > 0;) { - --d; - off = off * extents[d] + coords[d]; - } - } - - return off; - } - -private: - const HaloBlockT& _haloblock; - HaloBuffer_t _halobuffer; - std::array _halo_offsets{}; -}; // class HaloMemory } // namespace halo diff --git a/dash/include/dash/halo/HaloMatrixWrapper.h b/dash/include/dash/halo/HaloMatrixWrapper.h index 4506df332..1b88b9822 100644 --- a/dash/include/dash/halo/HaloMatrixWrapper.h +++ b/dash/include/dash/halo/HaloMatrixWrapper.h @@ -6,10 +6,14 @@ #include #include #include +#include +#include #include #include + + namespace dash { namespace halo { @@ -45,7 +49,7 @@ namespace halo { * halo region 3 '- halo region 7 */ -template +template class HaloMatrixWrapper { private: using Pattern_t = typename MatrixT::pattern_type; @@ -61,8 +65,10 @@ class HaloMatrixWrapper { using GlobBoundSpec_t = GlobalBoundarySpec; using HaloBlock_t = HaloBlock; using HaloMemory_t = HaloMemory; + using HaloUpdateEnv_t = HaloUpdateEnv; using ElementCoords_t = std::array; - using region_index_t = typename RegionCoords::region_index_t; + using region_index_t = internal::region_index_t; + using stencil_dist_t = internal::spoint_value_t; private: static constexpr auto MemoryArrange = Pattern_t::memory_order(); @@ -77,164 +83,41 @@ class HaloMatrixWrapper { * Constructor that takes \ref Matrix, a \ref GlobalBoundarySpec and a user * defined number of stencil specifications (\ref StencilSpec) */ - template - HaloMatrixWrapper(MatrixT& matrix, const GlobBoundSpec_t& cycle_spec, - const StencilSpecT&... stencil_spec) - : _matrix(matrix), _cycle_spec(cycle_spec), _halo_spec(stencil_spec...), + template + HaloMatrixWrapper(MatrixT& matrix, const GlobBoundSpec_t& glob_bnd_spec, + const StencilSpec, NumStencilPointsFirst>& stencil_spec_first, + const StencilSpecRestT&... stencil_spec) + : _matrix(matrix), _glob_bnd_spec(glob_bnd_spec), + _halo_spec(stencil_spec_first, stencil_spec...), _view_global(matrix.local.offsets(), matrix.local.extents()), _haloblock(matrix.begin().globmem(), matrix.pattern(), _view_global, - _halo_spec, cycle_spec), - _view_local(_haloblock.view_local()), _halomemory(_haloblock) { - for(const auto& region : _haloblock.halo_regions()) { - if(region.size() == 0) - continue; - // number of contiguous elements - pattern_size_t num_blocks = 1; - pattern_size_t num_elems_block = 1; - auto rel_dim = region.spec().relevant_dim(); - auto level = region.spec().level(); - auto* off = &*(_halomemory.first_element_at(region.index())); - auto it = region.begin(); - - if(MemoryArrange == ROW_MAJOR) { - if(level == 1) { //|| (level == 2 && region.regionSpec()[0] != 1)) { - for(auto i = rel_dim - 1; i < NumDimensions; ++i) - num_elems_block *= region.view().extent(i); - - size_t region_size = region.size(); - auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region_size / num_elems_block; - auto it_dist = it + num_elems_block; - pattern_size_t stride = - (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) - : 1; - auto ds_stride = dart_storage(stride); - dart_datatype_t stride_type; - dart_type_create_strided(ds_num_elems_block.dtype, ds_stride.nelem, - ds_num_elems_block.nelem, &stride_type); - _dart_types.push_back(stride_type); - - _region_data.insert(std::make_pair( - region.index(), Data{ region, - [off, it, region_size, ds_num_elems_block, - stride_type](dart_handle_t& handle) { - dart_get_handle(off, it.dart_gptr(), - region_size, stride_type, - ds_num_elems_block.dtype, - &handle); - }, - DART_HANDLE_NULL })); + _halo_spec, glob_bnd_spec), + _view_local(_haloblock.view_local()), + //_halomemory(_haloblock), + _halo_env(_haloblock, matrix.lbegin(), matrix.team(), matrix.pattern().teamspec()) { + } - } - // TODO more optimizations - else { - num_elems_block *= region.view().extent(NumDimensions - 1); - size_t region_size = region.size(); - auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region_size / num_elems_block; - auto it_tmp = it; - auto start_index = it.lpos().index; - std::vector block_sizes(num_blocks); - std::vector block_offsets(num_blocks); - std::fill(block_sizes.begin(), block_sizes.end(), - ds_num_elems_block.nelem); - for(auto& index : block_offsets) { - index = - dart_storage(it_tmp.lpos().index - start_index).nelem; - it_tmp += num_elems_block; - } - dart_datatype_t index_type; - dart_type_create_indexed( - ds_num_elems_block.dtype, - num_blocks, // number of blocks - block_sizes.data(), // size of each block - block_offsets.data(), // offset of first element of each block - &index_type); - _dart_types.push_back(index_type); - _region_data.insert(std::make_pair( - region.index(), Data{ region, - [off, it, ds_num_elems_block, region_size, - index_type](dart_handle_t& handle) { - dart_get_handle(off, it.dart_gptr(), - region_size, index_type, - ds_num_elems_block.dtype, - &handle); - }, - DART_HANDLE_NULL })); - } - } else { - if(level == 1) { //|| (level == 2 && - // region.regionSpec()[NumDimensions - 1] != 1)) { - for(auto i = 0; i < rel_dim; ++i) - num_elems_block *= region.view().extent(i); - - size_t region_size = region.size(); - auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region_size / num_elems_block; - auto it_dist = it + num_elems_block; - pattern_size_t stride = - (num_blocks > 1) ? std::abs(it_dist.lpos().index - it.lpos().index) - : 1; - auto ds_stride = dart_storage(stride); - - dart_datatype_t stride_type; - dart_type_create_strided(ds_num_elems_block.dtype, ds_stride.nelem, - ds_num_elems_block.nelem, &stride_type); - _dart_types.push_back(stride_type); - - _region_data.insert(std::make_pair( - region.index(), Data{ region, - [off, it, region_size, ds_num_elems_block, - stride_type](dart_handle_t& handle) { - dart_get_handle(off, it.dart_gptr(), - region_size, stride_type, - ds_num_elems_block.dtype, - &handle); - }, - DART_HANDLE_NULL })); - } - // TODO more optimizations - else { - num_elems_block *= region.view().extent(0); - size_t region_size = region.size(); - auto ds_num_elems_block = dart_storage(num_elems_block); - num_blocks = region_size / num_elems_block; - auto it_tmp = it; - std::vector block_sizes(num_blocks); - std::vector block_offsets(num_blocks); - std::fill(block_sizes.begin(), block_sizes.end(), - ds_num_elems_block.nelem); - auto start_index = it.lpos().index; - for(auto& index : block_offsets) { - index = - dart_storage(it_tmp.lpos().index - start_index).nelem; - it_tmp += num_elems_block; - } - - dart_datatype_t index_type; - dart_type_create_indexed( - ds_num_elems_block.dtype, - num_blocks, // number of blocks - block_sizes.data(), // size of each block - block_offsets.data(), // offset of first element of each block - &index_type); - _dart_types.push_back(index_type); - - _region_data.insert(std::make_pair( - region.index(), Data{ region, - [off, it, index_type, region_size, - ds_num_elems_block](dart_handle_t& handle) { - dart_get_handle(off, it.dart_gptr(), - region_size, index_type, - ds_num_elems_block.dtype, - &handle); - }, - DART_HANDLE_NULL })); - } + /** + * Constructor that takes \ref Matrix and a stencil point distance + * to create a \ref HaloMatrixWrapper with a full stencil with the + * given width. + * The \ref GlobalBoundarySpec is set to default. + */ + template > + HaloMatrixWrapper(MatrixT& matrix, const GlobBoundSpec_t& glob_bnd_spec, + stencil_dist_t dist, std::enable_if_t::value, std::nullptr_t> = nullptr ) + : HaloMatrixWrapper(matrix, glob_bnd_spec, StencilSpecFactory::full_stencil_spec(dist)) { + } - num_elems_block = region.view().extent(0); - } - } + /** + * Constructor that takes \ref Matrix and a stencil point distance + * to create a \ref HaloMatrixWrapper with a full stencil with the + * given width. + * The \ref GlobalBoundarySpec is set to default. + */ + template > + HaloMatrixWrapper(MatrixT& matrix, stencil_dist_t dist, std::enable_if_t::value, std::nullptr_t> = nullptr ) + : HaloMatrixWrapper(matrix, GlobBoundSpec_t(), StencilSpecFactory::full_stencil_spec(dist)) { } /** @@ -242,19 +125,21 @@ class HaloMatrixWrapper { * defined number of stencil specifications (\ref StencilSpec). * The \ref GlobalBoundarySpec is set to default. */ - template + template + HaloMatrixWrapper(MatrixT& matrix, const StencilSpec stencil_spec) + : HaloMatrixWrapper(matrix, GlobBoundSpec_t(), stencil_spec) {} + + /** + * Constructor that takes \ref Matrix and a user + * defined number of stencil specifications (\ref StencilSpec). + * The \ref GlobalBoundarySpec is set to default. + */ + template = 2, bool>> HaloMatrixWrapper(MatrixT& matrix, const StencilSpecT&... stencil_spec) : HaloMatrixWrapper(matrix, GlobBoundSpec_t(), stencil_spec...) {} HaloMatrixWrapper() = delete; - ~HaloMatrixWrapper() { - for(auto& dart_type : _dart_types) { - dart_type_destroy(&dart_type); - } - _dart_types.clear(); - } - /** * Returns the underlying \ref HaloBlock */ @@ -264,10 +149,7 @@ class HaloMatrixWrapper { * Initiates a blocking halo region update for all halo elements. */ void update() { - for(auto& region : _region_data) { - update_halo_intern(region.second); - } - wait(); + _halo_env.update(); } /** @@ -275,20 +157,14 @@ class HaloMatrixWrapper { * the given region. */ void update_at(region_index_t index) { - auto it_find = _region_data.find(index); - if(it_find != _region_data.end()) { - update_halo_intern(it_find->second); - dart_wait_local(&it_find->second.handle); - } + _halo_env.update_at(index); } /** * Initiates an asychronous halo region update for all halo elements. */ void update_async() { - for(auto& region : _region_data) { - update_halo_intern(region.second); - } + _halo_env.update_async(); } /** @@ -296,10 +172,7 @@ class HaloMatrixWrapper { * the given region. */ void update_async_at(region_index_t index) { - auto it_find = _region_data.find(index); - if(it_find != _region_data.end()) { - update_halo_intern(it_find->second); - } + _halo_env.update_async_at(index); } /** @@ -307,9 +180,7 @@ class HaloMatrixWrapper { * halo updates. */ void wait() { - for(auto& region : _region_data) { - dart_wait_local(®ion.second.handle); - } + _halo_env.wait(); } /** @@ -317,9 +188,7 @@ class HaloMatrixWrapper { * Only useful for asynchronous halo updates. */ void wait(region_index_t index) { - auto it_find = _region_data.find(index); - if(it_find != _region_data.end()) - dart_wait_local(&it_find->second.handle); + _halo_env.wait(index); } /** @@ -329,14 +198,14 @@ class HaloMatrixWrapper { const ViewSpec_t& view_local() const { return _view_local; } /** - * Returns the halo memory management object \ref HaloMemory + * Returns the halo environment management object \ref HaloUpdateEnv */ - HaloMemory_t& halo_memory() { return _halomemory; } + HaloUpdateEnv_t& halo_env() { return _halo_env; } /** - * Returns the halo memory management object \ref HaloMemory + * Returns the halo environment management object \ref HaloUpdateEnv */ - const HaloMemory_t& halo_memory() const { return _halomemory; } + const HaloUpdateEnv_t& halo_env() const { return _halo_env; } /** * Returns the underlying NArray @@ -374,34 +243,36 @@ class HaloMatrixWrapper { void set_custom_halos(FunctionT f) { using signed_extent_t = typename std::make_signed::type; for(const auto& region : _haloblock.boundary_regions()) { - if(region.is_custom_region()) { - const auto& spec = region.spec(); - std::array coords_offset{}; - const auto& reg_ext = region.view().extents(); - for(auto d = 0; d < NumDimensions; ++d) { - if(spec[d] == 0) { - coords_offset[d] -= reg_ext[d]; - continue; - } - if(spec[d] == 2) - coords_offset[d] = reg_ext[d]; - } - - auto range_mem = _halomemory.range_at(region.index()); - auto it_mem = range_mem.first; - auto it_reg_end = region.end(); - DASH_ASSERT_MSG( - std::distance(range_mem.first, range_mem.second) == region.size(), - "Range distance of the HaloMemory is unequal region size"); + if(!region.is_custom_region()) { + continue; + } - for(auto it = region.begin(); it != it_reg_end; ++it, ++it_mem) { - auto coords = it.gcoords(); - for(auto d = 0; d < NumDimensions; ++d) { - coords[d] += coords_offset[d]; - } + const auto& spec = region.spec(); + std::array coords_offset{}; + const auto& reg_ext = region.view().extents(); + for(auto d = 0; d < NumDimensions; ++d) { + if(spec[d] == 0) { + coords_offset[d] -= reg_ext[d]; + continue; + } + if(spec[d] == 2) + coords_offset[d] = reg_ext[d]; + } - *it_mem = f(coords); + auto range_mem = _halo_env.halo_memory().range_at(region.index()); + auto it_mem = range_mem.first; + auto it_reg_end = region.end(); + DASH_ASSERT_MSG( + std::distance(range_mem.first, range_mem.second) == region.size(), + "Range distance of the HaloMemory is unequal region size"); + const auto& pattern = _matrix.pattern(); + for(auto it = region.begin(); it != it_reg_end; ++it, ++it_mem) { + auto coords = pattern.coords(it.rpos(), region.view()); + for(auto d = 0; d < NumDimensions; ++d) { + coords[d] += coords_offset[d]; } + + *it_mem = f(coords); } } } @@ -433,7 +304,7 @@ class HaloMatrixWrapper { * Asserts whether the StencilSpec fits in the provided halo regions. */ template - StencilOperator stencil_operator( + StencilOperator stencil_operator( const StencilSpecT& stencil_spec) { for(const auto& stencil : stencil_spec.specs()) { DASH_ASSERT_MSG( @@ -442,47 +313,38 @@ class HaloMatrixWrapper { "Stencil point extent higher than halo region extent."); } - return StencilOperator( - &_haloblock, &_halomemory, stencil_spec, &_view_local); + return StencilOperator( + &_haloblock, _matrix.lbegin(), &_halo_env.halo_memory(), stencil_spec); } -private: - struct Data { - const Region_t& region; - std::function get_halos; - dart_handle_t handle{}; - }; - - void update_halo_intern(Data& data) { - if(data.region.is_custom_region()) - return; - - data.get_halos(data.handle); + CoordinateAccess coordinate_access() { + return CoordinateAccess(&_haloblock, _matrix.lbegin(),&_halo_env.halo_memory()); } +private: + Element_t* halo_element_at(ElementCoords_t& coords) { auto index = _haloblock.index_at(_view_local, coords); const auto& spec = _halo_spec.spec(index); - auto range_mem = _halomemory.range_at(index); + auto& halo_memory = _halo_env.halo_memory(); + auto range_mem = halo_memory.range_at(index); if(spec.level() == 0 || range_mem.first == range_mem.second) return nullptr; - if(!_halomemory.to_halo_mem_coords_check(index, coords)) + if(!halo_memory.to_halo_mem_coords_check(index, coords)) return nullptr; - return &*(range_mem.first + _halomemory.offset(index, coords)); + return &*(range_mem.first + halo_memory.offset(index, coords)); } private: MatrixT& _matrix; - const GlobBoundSpec_t _cycle_spec; + const GlobBoundSpec_t _glob_bnd_spec; const HaloSpec_t _halo_spec; const ViewSpec_t _view_global; const HaloBlock_t _haloblock; const ViewSpec_t& _view_local; - HaloMemory_t _halomemory; - std::map _region_data; - std::vector _dart_types; + HaloUpdateEnv_t _halo_env; }; } // namespace halo diff --git a/dash/include/dash/halo/HaloMemory.h b/dash/include/dash/halo/HaloMemory.h new file mode 100644 index 000000000..f718c78bf --- /dev/null +++ b/dash/include/dash/halo/HaloMemory.h @@ -0,0 +1,731 @@ +#ifndef DASH__HALO_HALOMEMORY_H +#define DASH__HALO_HALOMEMORY_H + +#include +#include +#include +#include + +namespace dash { + +namespace halo { + +using namespace internal; + +/** + * Mangages the memory for all halo regions provided by the given + * \ref HaloBlock + */ +template +class HaloMemory { +private: + static constexpr auto NumDimensions = HaloBlockT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; + + using RegionCoords_t = RegionCoords; + using Pattern_t = typename HaloBlockT::Pattern_t; + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using extent_t = typename ViewSpec_t::size_type; + static constexpr auto MemoryArrange = Pattern_t::memory_order(); + +public: + using Element_t = typename HaloBlockT::Element_t; + using ElementCoords_t = + std::array; + using HaloBuffer_t = std::vector; + using pattern_size_t = typename Pattern_t::size_type; + + using iterator = typename HaloBuffer_t::iterator; + using const_iterator = const iterator; + + using MemRange_t = std::pair; + +public: + /** + * Constructor + */ + HaloMemory(const HaloBlockT& haloblock) : _haloblock(haloblock) { + _halobuffer.resize(haloblock.halo_size()); + auto it = _halobuffer.begin(); + std::fill(_halo_offsets.begin(), _halo_offsets.end(), _halobuffer.end()); + for(const auto& region : haloblock.halo_regions()) { + _halo_offsets[region.index()] = it; + it += region.size(); + } + } + + /** + * Iterator to the first halo element for the given region index + * \param index halo region index + * \return Iterator to the first halo element. If no region exists the + * end iterator will be returned. + */ + iterator first_element_at(region_index_t index) { + return _halo_offsets[index]; + } + + /** + * Returns the range of all halo elements for the given region index. + * \param index halo region index + * \return Pair of iterator. First points ot the beginning and second to the + * end. + */ + MemRange_t range_at(region_index_t index) { + auto it = _halo_offsets[index]; + if(it == _halobuffer.end()) + return std::make_pair(it, it); + + auto* region = _haloblock.halo_region(index); + + DASH_ASSERT_MSG( + region != nullptr, + "HaloMemory manages memory for a region that seemed to be empty."); + + return std::make_pair(it, it + region->size()); + } + + /** + * Returns an iterator to the first halo element + */ + iterator begin() { return _halobuffer.begin(); } + + /** + * Returns a const iterator to the first halo element + */ + const_iterator begin() const { return _halobuffer.begin(); } + + /** + * Returns an iterator to the end of the halo elements + */ + iterator end() { return _halobuffer.end(); } + + /** + * Returns a const iterator to the end of the halo elements + */ + const_iterator end() const { return _halobuffer.end(); } + + /** + * Container storing all halo elements + * + * \return Reference to the container storing all halo elements + */ + const HaloBuffer_t& buffer() const { return _halobuffer; } + + /** + * Converts coordinates to halo memory coordinates for a given + * region index and returns true if the coordinates are valid and + * false if not. + */ + bool to_halo_mem_coords_check(const region_index_t region_index, + ElementCoords_t& coords) const { + const auto& extents = + _haloblock.halo_region(region_index)->view().extents(); + for(auto d = 0; d < NumDimensions; ++d) { + if(coords[d] < 0) + coords[d] += extents[d]; + else if(static_cast(coords[d]) >= _haloblock.view().extent(d)) + coords[d] -= _haloblock.view().extent(d); + + if(static_cast(coords[d]) >= extents[d] || coords[d] < 0) + return false; + } + + return true; + } + + /** + * Converts coordinates to halo memory coordinates for a given region index. + */ + void to_halo_mem_coords(const region_index_t region_index, + ElementCoords_t& coords) const { + const auto& extents = + _haloblock.halo_region(region_index)->view().extents(); + for(dim_t d = 0; d < NumDimensions; ++d) { + if(coords[d] < 0) { + coords[d] += extents[d]; + continue; + } + + if(static_cast(coords[d]) >= _haloblock.view().extent(d)) + coords[d] -= _haloblock.view().extent(d); + } + } + + /* + * Retuns the offset for a given region index and coordinates within the + * region. + */ + pattern_size_t offset(const region_index_t region_index, + const ElementCoords_t& coords) const { + const auto& extents = + _haloblock.halo_region(region_index)->view().extents(); + pattern_size_t off = 0; + if(MemoryArrange == ROW_MAJOR) { + off = coords[0]; + for(dim_t d = 1; d < NumDimensions; ++d) + off = off * extents[d] + coords[d]; + } else { + off = coords[NumDimensions - 1]; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + off = off * extents[d] + coords[d]; + } + } + + return off; + } + +private: + const HaloBlockT& _haloblock; + HaloBuffer_t _halobuffer; + std::array _halo_offsets{}; +}; // class HaloMemory + +template +class SignalEnv { + + struct SignalData { + bool signal_used{false}; + dart_gptr_t gptr{DART_GPTR_NULL}; + }; + + static constexpr auto NumDimensions = HaloBlockT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; + + using signal_t = bool; + using HaloSignalBuffer_t = dash::Array; + using SignalDataSet_t = std::array; + using SignalHandles_t = std::vector; + using Pattern_t = typename HaloBlockT::Pattern_t; + +public: + using Team_t = dash::Team; + +public: + SignalEnv(const HaloBlockT& halo_block, Team_t& team) + : _signal_buffer(RegionsMax * team.size(), team), + _signal_ready_buffer(RegionsMax * team.size(), team) { + + for(region_index_t r = 0; r < RegionsMax; ++r) { + _signal_buffer.local[r] = 0; + _signal_ready_buffer.local[r] = 1; + } + + init_signal_env(halo_block); + } + + void put_signal_async(region_index_t region_index) { + auto& put_sig = _put_signals[region_index]; + if(!put_sig.signal_used) { + return; + } + + dart_handle_t handle; + dash::internal::put_handle(put_sig.gptr, &_signal, 1, &handle); + _signal_handles.push_back(std::move(handle)); + } + + void put_signal_blocking(region_index_t region_index) { + auto& put_sig = _put_signals[region_index]; + if(!put_sig.signal_used) { + return; + } + + dash::internal::put_blocking(put_sig.gptr, &_signal, 1); + } + + void put_ready_signal_async(region_index_t region_index) { + auto& put_sig = _put_ready_signals[region_index]; + if(!put_sig.signal_used) { + return; + } + + dart_handle_t handle; + dash::internal::put_handle(put_sig.gptr, &_signal, 1, &handle); + _signal_ready_handles.push_back(std::move(handle)); + } + + void put_ready_signal_blocking(region_index_t region_index) { + auto& put_sig = _put_ready_signals[region_index]; + if(!put_sig.signal_used) { + return; + } + + dash::internal::put_blocking(put_sig.gptr, &_signal, 1); + } + + void ready_to_update(region_index_t region_index) { + auto& get_data = _get_ready_signals[region_index]; + if(!get_data.signal_used) { + return; + } + + signal_t signal = false; + while(!signal) { + dash::internal::get_blocking(get_data.gptr, &signal, 1); + } + _signal_ready_buffer.lbegin()[region_index] = 0; + } + + void wait_put_signals() { + dart_waitall_local(_signal_handles.data(), _signal_handles.size()); + _signal_handles.clear(); + } + + void wait_put_ready_signals() { + dart_waitall_local(_signal_ready_handles.data(), _signal_ready_handles.size()); + _signal_ready_handles.clear(); + } + + void wait_signal(region_index_t region_index) { + auto& get_data = _get_signals[region_index]; + if(!get_data.signal_used) { + return; + } + + signal_t signal = false; + while(!signal) { + dash::internal::get_blocking(get_data.gptr, &signal, 1); + } + _signal_buffer.lbegin()[region_index] = 0; + } + +private: + void init_signal_env(HaloBlockT halo_block) { + const auto& env_info_md = halo_block.block_env(); + + long count_put_signals = 0; + long count_put_ready_signals = 0; + auto my_team_id = halo_block.pattern().team().myid(); + auto signal_gptr = _signal_buffer.begin().dart_gptr(); + auto signal_ready_gptr = _signal_ready_buffer.begin().dart_gptr(); + + for(auto r = 0; r < RegionsMax; ++r) { + auto signal_offset = r * sizeof(bool); + + const auto& env_md = env_info_md.info(r); + + if(env_md.neighbor_id_to >= 0) { + auto& put_signal = _put_signals[r]; + put_signal.signal_used = true; + put_signal.gptr = signal_gptr; + put_signal.gptr.unitid = env_md.neighbor_id_to; + put_signal.gptr.addr_or_offs.offset = signal_offset; + + auto& get_ready_signal = _get_ready_signals[r]; + get_ready_signal.signal_used = true; + get_ready_signal.gptr = signal_ready_gptr; + get_ready_signal.gptr.unitid = my_team_id; + get_ready_signal.gptr.addr_or_offs.offset = signal_offset; + + ++count_put_signals; + } + + auto region = halo_block.halo_region(r); + if(region != nullptr && region->size() > 0 && env_md.neighbor_id_from >= 0) { + auto& get_signal = _get_signals[r]; + // sets local signal gptr -> necessary for dart_get + get_signal.signal_used = true; + get_signal.gptr = signal_gptr; + get_signal.gptr.unitid = my_team_id; + get_signal.gptr.addr_or_offs.offset = signal_offset; + + auto& put_ready_signal = _put_ready_signals[r]; + put_ready_signal.signal_used = true; + put_ready_signal.gptr = signal_ready_gptr; + put_ready_signal.gptr.unitid = env_md.neighbor_id_from; + put_ready_signal.gptr.addr_or_offs.offset = signal_offset; + + ++count_put_ready_signals; + } + } + _signal_handles.reserve(count_put_signals); + _signal_ready_handles.reserve(count_put_ready_signals); + } + +private: + HaloSignalBuffer_t _signal_buffer; + HaloSignalBuffer_t _signal_ready_buffer; + signal_t _signal = true; + SignalDataSet_t _get_signals{}; + SignalDataSet_t _put_signals{}; + SignalDataSet_t _get_ready_signals{}; + SignalDataSet_t _put_ready_signals{}; + SignalHandles_t _signal_handles; + SignalHandles_t _signal_ready_handles; +}; + +template +struct PackMetaData { + bool needs_packing{false}; + std::vector block_pos{}; + LengthSizeT block_len{0}; + ElementT* buffer_pos{nullptr}; + std::function pack_func = [](){}; +}; + +template +std::ostream& operator<<(std::ostream& os, const PackMetaData& pack) { + os << "packing:" << std::boolalpha << pack.needs_packing + << ", block_len " << pack.block_len + << ", buffer_pos" << pack.buffer_pos; + + return os; +} + +template +class PackEnv { + static constexpr auto NumDimensions = HaloBlockT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; + + using Pattern_t = typename HaloBlockT::Pattern_t; + using pattern_size_t = std::make_signed_t; + using upattern_size_t = std::make_unsigned_t; + using Element_t = typename HaloBlockT::Element_t; + + static constexpr auto MemoryArrange = Pattern_t::memory_order(); + // value not related to array index + static constexpr auto FastestDim = + MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0; + static constexpr auto ContiguousDim = + MemoryArrange == ROW_MAJOR ? 1 : NumDimensions; + + using HaloBuffer_t = dash::Array; + using HaloPosAll_t = std::array; + using PackMData_t = PackMetaData; + using PackMDataAll_t = std::array; + using PackOffs_t = std::array; + +public: + using Team_t = dash::Team; + +public: + PackEnv(const HaloBlockT& halo_block, Element_t* local_memory, Team_t& team) + : _local_memory(local_memory), + _pack_buffer() { + auto pack_info = info_pack_buffer(halo_block); + _pack_buffer.allocate(pack_info.first * team.size(), team); + init_block_data(halo_block, pack_info.second); + } + + void pack(region_index_t region) { + _pack_md_all[region].pack_func(); + } + + dart_gptr_t halo_gptr(region_index_t region_index) { + return _get_halos[region_index]; + } + + const dart_gptr_t& halo_gptr(region_index_t region_index) const { + return _get_halos[region_index]; + } + +private: + auto info_pack_buffer(const HaloBlockT& halo_block) { + const auto& halo_spec = halo_block.halo_spec(); + team_unit_t rank_0(0); + auto max_local_extents = halo_block.pattern().local_extents(rank_0); + PackOffs_t packed_offs; + + pattern_size_t num_pack_elems = 0; + pattern_size_t current_offset = 0; + for(auto r = 0; r < RegionsMax; ++r) { + const auto& region_spec = halo_spec.spec(r); + if(region_spec.extent() == 0 || + (region_spec.level() == 1 && region_spec.relevant_dim() == ContiguousDim)) { + continue; + } + + pattern_size_t reg_size = 1; + for(auto d = 0; d < NumDimensions; ++d) { + if(region_spec[d] != 1) { + reg_size *= region_spec.extent(); + } else { + reg_size *= max_local_extents[d]; + } + } + num_pack_elems += reg_size; + packed_offs[r] = current_offset; + current_offset += reg_size; + } + + return std::make_pair(num_pack_elems, packed_offs); + } + + + void init_block_data(const HaloBlockT& halo_block, const PackOffs_t& packed_offs) { + using ViewSpec_t = typename Pattern_t::viewspec_type; + + const auto& env_info_md = halo_block.block_env(); + for(auto r = 0; r < RegionsMax; ++r) { + const auto& env_md = env_info_md.info(r); + + auto region = halo_block.halo_region(r); + auto& halo_gptr = _get_halos[r]; + if(region != nullptr && region->size() > 0) { + // Halo elements can be updated with one request + if(region->spec().relevant_dim() == ContiguousDim && region->spec().level() == 1) { + halo_gptr = region->begin().dart_gptr(); + } else { + halo_gptr = _pack_buffer.begin().dart_gptr(); + halo_gptr.unitid = region->begin().dart_gptr().unitid; + halo_gptr.addr_or_offs.offset = packed_offs[r] * sizeof(Element_t); + } + } else { + halo_gptr = DART_GPTR_NULL; + } + + if(env_md.neighbor_id_to < 0) { + continue; + } + + // Setting all packing data + // no packing needed -> all elements are contiguous + auto& pack_md = _pack_md_all[r]; + const auto& reg_spec = halo_block.halo_spec().spec(r); + if(reg_spec.relevant_dim() == ContiguousDim && reg_spec.level() == 1) { + continue; + } + + pack_md.needs_packing = true; + pack_md.buffer_pos = _pack_buffer.lbegin() + packed_offs[r]; + + const auto& view_glob = halo_block.view(); + auto reg_offsets = view_glob.offsets(); + + const auto& region_extents = env_md.halo_reg_data.view.extents(); + for(dim_t d = 0; d < NumDimensions; ++d) { + if(reg_spec[d] == 1) { + continue; + } + + if(reg_spec[d] == 0) { + reg_offsets[d] += view_glob.extent(d) - region_extents[d]; + } else { + reg_offsets[d] = view_glob.offset(d); + } + } + ViewSpec_t view_pack(reg_offsets, region_extents); + pattern_size_t num_elems_block = region_extents[FastestDim]; + pattern_size_t num_blocks = view_pack.size() / num_elems_block; + + pack_md.block_len = num_elems_block; + pack_md.block_pos.resize(num_blocks); + + auto it_region = region->begin(); + decltype(it_region) it_pack_data(&(it_region.globmem()), it_region.pattern(), view_pack); + for(auto& pos : pack_md.block_pos) { + pos = _local_memory + it_pack_data.lpos().index; + it_pack_data += num_elems_block; + } + auto pack = &pack_md; + pack_md.pack_func = [pack](){ + auto buffer_offset = pack->buffer_pos; + for(auto& pos : pack->block_pos) { + std::copy(pos, pos + pack->block_len, buffer_offset); + buffer_offset += pack->block_len; + } + }; + } + } + +private: + Element_t* _local_memory; + HaloBuffer_t _pack_buffer; + HaloPosAll_t _get_halos; + PackMDataAll_t _pack_md_all; +}; + +template +class HaloUpdateEnv { + struct UpdateData { + std::function get_halos; + dart_handle_t handle{}; + }; + + static constexpr auto NumDimensions = HaloBlockT::ndim(); + static constexpr auto RegionsMax = NumRegionsMax; + + using TeamSpec_t = TeamSpec; + using HaloSpec_t = typename HaloBlockT::HaloSpec_t; + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using Pattern_t = typename HaloBlockT::Pattern_t; + using BlockEnv_t = BlockEnvironment; + using SignalEnv_t = SignalEnv; + using PackEnv_t = PackEnv; + + + +public: + using HaloMemory_t = HaloMemory; + using Element_t = typename HaloBlockT::Element_t; + using Team_t = dash::Team; + +public: + HaloUpdateEnv(const HaloBlockT& halo_block, Element_t* local_memory, Team_t& team, const TeamSpec_t& tspec) + : _halo_block(halo_block), + //_env_info_md(tspec, halo_block), + _halo_memory(halo_block), + _signal_env(halo_block, team), + _pack_env(_halo_block, local_memory, team) { + init_update_data(); + } + + /** + * Initiates a blocking halo region update for all halo elements. + */ + void update() { + prepare_update(); + for(auto& data : _region_data) { + update_halo_intern(data.first, data.second); + } + wait(); + } + + /** + * Initiates a blocking halo region update for all halo elements within the + * the given region. + * + * TODO: find a solution for prepare update + */ + void update_at(region_index_t index) { + auto it_find = _region_data.find(index); + if(it_find != _region_data.end()) { + update_halo_intern(it_find->first, it_find->second); + dart_wait_local(&it_find->second.handle); + if(SigReady == SignalReady::ON) { + _signal_env.put_ready_signal_blocking(it_find->first); + } + } + } + + /** + * Initiates an asychronous halo region update for all halo elements. + */ + void update_async() { + prepare_update(); + for(auto& data : _region_data) { + update_halo_intern(data.first, data.second); + } + } + + /** + * Initiates an asychronous halo region update for all halo elements within + * the given region. + * + * TODO: find a solution for prepare update + */ + void update_async_at(region_index_t index) { + auto it_find = _region_data.find(index); + if(it_find != _region_data.end()) { + update_halo_intern(it_find->first, it_find->second); + } + } + + /** + * Waits until all halo updates are finished. Only useful for asynchronous + * halo updates. + */ + void wait() { + for(auto& region : _region_data) { + dart_wait_local(®ion.second.handle); + if(SigReady == SignalReady::ON) { + _signal_env.put_ready_signal_async(region.first); + } + } + if(SigReady == SignalReady::ON) { + _signal_env.wait_put_ready_signals(); + } + } + + /** + * Waits until the halo updates for the given halo region is finished. + * Only useful for asynchronous halo updates. + */ + void wait(region_index_t index) { + auto it_find = _region_data.find(index); + if(it_find == _region_data.end()) { + return; + } + + dart_wait_local(&it_find->second.handle); + if(SigReady == SignalReady::ON) { + _signal_env.put_ready_signal_blocking(it_find->first); + } + } + + // prepares the halo elements for update -> packing and sending signals to all relevant neighbors + void prepare_update() { + for(region_index_t r = 0; r < RegionsMax; ++r) { + if(SigReady == SignalReady::ON) { + _signal_env.ready_to_update(r); + } + _pack_env.pack(r); + _signal_env.put_signal_async(r); + } + _signal_env.wait_put_signals(); + } + + /** + * Returns the halo memory management object \ref HaloMemory + */ + HaloMemory_t& halo_memory() { return _halo_memory; } + + /** + * Returns the halo memory management object \ref HaloMemory + */ + const HaloMemory_t& halo_memory() const { return _halo_memory; } + + /** + * Returns the halo environment information object \ref BlockEnvironment + */ + BlockEnv_t block_env() { return _halo_block.block_env(); } + + /** + * Returns the halo environment information object \ref BlockEnvironment + */ + const BlockEnv_t& block_env() const { return _halo_block.block_env() ; } + +private: + void init_update_data() { + for(const auto& region : _halo_block.halo_regions()) { + size_t region_size = region.size(); + if(region_size == 0) { + continue; + } + + auto gptr = _pack_env.halo_gptr(region.index()); + if(region.is_custom_region()) { + _region_data.insert(std::make_pair( + region.index(), UpdateData{ [](dart_handle_t& handle) {}, + DART_HANDLE_NULL })); + } else { + auto* pos = &*(_halo_memory.first_element_at(region.index())); + const auto& gptr = _pack_env.halo_gptr(region.index()); + _region_data.insert(std::make_pair( + region.index(), UpdateData{ [pos, gptr, region_size](dart_handle_t& handle) { + dash::internal::get_handle(gptr, pos, region_size, &handle); + }, + DART_HANDLE_NULL })); + } + } + } + + void update_halo_intern(region_index_t region_index, UpdateData& data) { + _signal_env.wait_signal(region_index); + data.get_halos(data.handle); + } + +private: + const HaloBlockT& _halo_block; + HaloMemory_t _halo_memory; + SignalEnv_t _signal_env; + PackEnv_t _pack_env; + std::map _region_data; +}; // HaloUpdateEnv + +} // namespace halo + +} // namespace dash + +#endif // DASH__HALO_HALOMEMORY_H diff --git a/dash/include/dash/halo/Region.h b/dash/include/dash/halo/Region.h new file mode 100644 index 000000000..d0cfb3ccc --- /dev/null +++ b/dash/include/dash/halo/Region.h @@ -0,0 +1,555 @@ +#ifndef DASH__HALO_REGION_H +#define DASH__HALO_REGION_H + +#include +#include +#include + +#include + +namespace dash { + +namespace halo { + +using namespace internal; + +/** + * N-Dimensional region coordinates and associated indices for all possible + * Halo/Boundary regions of a \ref HaloBlock. The center (all values = 1) is the + * local NArray memory block used by the \ref HaloBlock. + * + * Example for 2-D + * + * .-------..-------..-------. + * | 0 || 1 || 2 <-|-- region index + * | (0,0) || (0,1) || (0,2)<|-- region coordinates + * | NW || N || NE <|-- north east (only for explanation) + * '-------''-------''-------' + * .-------..-------..-------. + * | 3 || 4 || 5 | + * | (1,0) || (1,1) || (1,2) | + * | W || C || E | + * '-------''-------''-------' + * .-------..-------..-------. + * | 6 || 7 || 8 | + * | (2,0) || (2,1) || (2,2) | + * | SW || S || SE | + * '-------''-------''-------' + */ +template +class RegionCoords : public Dimensional { +private: + using Self_t = RegionCoords; + using Base_t = Dimensional; + + static constexpr auto RegionsMax = NumRegionsMax; + +public: + using Coords_t = std::array; + using CoordsVec_t = std::vector; + using RegIndDepVec_t = std::vector; + using RegIndexDim_t = std::pair; + + + +public: + /** + * Default Constructor + * + * All region coordinate values are 1 and pointing to the center. + */ + RegionCoords() { + for(dim_t i = 0; i < NumDimensions; ++i) { + this->_values[i] = 1; + } + } + + /** + * Constructor allows custom coordinate values and calculates the fitting + * region index. + */ + template + RegionCoords(region_coord_t value, Values... values) + : Base_t::Dimensional(value, values...) { + } + + /** + * Constructor takes a region index to set up the region coordinates + */ + RegionCoords(region_index_t index) { + this->_values = Self_t::coords(index); + } + + static constexpr region_index_t center_index() { + return NumRegionsMax / 2; + } + + static Coords_t center_coords() { + Coords_t reg_coords; + reg_coords.fill(1); + + return reg_coords; + } + + /** + * \return region index + */ + constexpr region_index_t index() const { return index(this->_values); } + + /** + * Returns a region index for a given dimension and \ref RegionPos + */ + static constexpr RegIndexDim_t index(dim_t dim) { + RegIndexDim_t index_dim = std::make_pair(0,0); + + for(dim_t d = 0; d < NumDimensions; ++d) + if(dim == d) { + index_dim.first = index_dim.first * REGION_INDEX_BASE; + index_dim.second = 2 + index_dim.second * REGION_INDEX_BASE; + } + else { + index_dim.first = 1 + index_dim.first * REGION_INDEX_BASE; + index_dim.second = 1 + index_dim.second * REGION_INDEX_BASE; + } + + return index_dim; + } + + /** + * Returns a region index for a given dimension and \ref RegionPos + */ + static constexpr region_index_t index(dim_t dim, RegionPos pos) { + region_coord_t coord = (pos == RegionPos::PRE) ? 0 : 2; + + region_index_t index = 0; + for(dim_t d = 0; d < NumDimensions; ++d) + if(dim == d) + index = coord + index * REGION_INDEX_BASE; + else + index = 1 + index * REGION_INDEX_BASE; + + return index; + } + + /** + * Returns a region index for a given dimension and \ref RegionPos + */ + template + static constexpr region_index_t index(const StencilPointT& stencil) { + region_index_t index = 0; + for(dim_t d = 0; d < NumDimensions; ++d) { + if(stencil[d] < 0) { + index *= REGION_INDEX_BASE; + continue; + } + + if(stencil[d] > 0) { + index = 2 + index * REGION_INDEX_BASE; + continue; + } + + index = 1 + index * REGION_INDEX_BASE; + } + + return index; + } + + /** + * Returns the region index for a given \ref RegionCoords + * + * \return region index + */ + static region_index_t index(const Coords_t& coords) { + region_index_t index = coords[0]; + for(dim_t d = 1; d < NumDimensions; ++d) { + // in case a wrong region coordinate was set + if(coords[d] > 2) { + index = coords[d] + index * REGION_INDEX_BASE; + } else { + index = coords[d] + index * REGION_INDEX_BASE; + } + } + + return index; + } + + /** + * \param index region index + * + * \return region coordinates + */ + static Coords_t coords(const region_index_t index) { + Coords_t coords{}; + region_index_t index_tmp =index; + for(auto i = (NumDimensions - 1); i >= 1; --i) { + auto res = std::div(static_cast(index_tmp), static_cast(REGION_INDEX_BASE)); + coords[i] = res.rem; + index_tmp = res.quot; + } + coords[0] = index_tmp; + + return coords; + } + + /** + * Returns the highest dimension with region values != 1 + */ + static dim_t relevant_dim(const Coords_t& coords) { + dim_t dim = 1; + for(auto d = 1; d < NumDimensions; ++d) { + if(coords[d] != 1) + dim = d + 1; + } + + return dim; + } + + /**auto max = stencil.max(); + * level = 0 -> center (1,1) + * level = 1 -> main regions (e.g. 2D: (0,1) (2,1) (1,0) (1,2) + * level = 2 e.g. 2D corner regions or 3D edge regions + * ... for dimensions higher than 2D relevant + */ + static dim_t level(const Coords_t& coords) { + dim_t level = 0; + for(auto d = 0; d < NumDimensions; ++d) { + if(coords[d] != 1) + ++level; + } + return level; + } + + /** + * returns the number of coordinates unequal to the center (1) for + * all dimensions + * + * level = 0 -> center (1,1) + * level = 1 -> main regions (e.g. 2D: (0,1) (2,1) (1,0) (1,2) + * level = 2 e.g. 2D corner regions or 3D edge regions + * ... for dimensions higher than 2D relevant + */ + dim_t level() { return level(this->_values); } + + static RegIndDepVec_t boundary_dependencies(region_index_t index) { + RegIndDepVec_t index_dep{}; + + if(index >= RegionsMax) { + DASH_LOG_ERROR("Invalid region index: %d", index); + + return index_dep; + } + + auto region_coords = Self_t(index); + auto level = region_coords.level(); + + if(level == 0) { + return index_dep; + } + + if(level == 1) { + index_dep.push_back(index); + + return index_dep; + } + + CoordsVec_t found_coords{}; + find_dep_regions(0, region_coords, found_coords); + + for(auto& reg_coords : found_coords) { + index_dep.push_back(reg_coords.index()); + } + + return index_dep; + } + + constexpr bool operator==(const Self_t& other) const { + + return this->_values == other._values; + } + + constexpr bool operator!=(const Self_t& other) const { + return !(*this == other); + } + +private: + + static void find_dep_regions(dim_t dim_change, const Self_t& current_coords, CoordsVec_t& dep_coords) { + dep_coords.push_back(current_coords); + + for(dim_t d = dim_change; d < NumDimensions; ++d) { + if(current_coords[d] != 1) { + auto new_coords = current_coords; + new_coords[d] = 1; + find_dep_regions(d+1, new_coords, dep_coords); + } + } + } +}; // RegionCoords + +/** + * Region specification connecting \ref RegionCoords with an extent. + * The region extent applies to all dimensions. + */ +template +class RegionSpec { +private: + using Self_t = RegionSpec; + +public: + using RegionCoords_t = RegionCoords; + using region_extent_t = uint16_t; + +public: + /** + * Constructor using RegionCoords and the extent + */ + RegionSpec(const RegionCoords_t& coords, const region_extent_t extent) + : _coords(coords), _index(coords.index()), _extent(extent), + _rel_dim(RegionCoords_t::relevant_dim(coords.values())), + _level(RegionCoords_t::level(coords.values())) {} + + /** + * Constructor using a region index and an extent + */ + RegionSpec(region_index_t index, const region_extent_t extent) + : _coords(RegionCoords_t(index)), _index(index), _extent(extent), + _rel_dim(RegionCoords_t::relevant_dim(_coords.values())), + _level(RegionCoords_t::level(_coords.values())) {} + + RegionSpec() + : _coords(), _index(_coords.index()), _extent(0), + _rel_dim(RegionCoords_t::relevant_dim(_coords.values())), + _level(RegionCoords_t::level(_coords.values())) {} + + /** + * Returns the region index for a given \ref StencilPoint + */ + template + static region_index_t index(const StencilT& stencil) { + region_index_t index = 0; + if(stencil[0] == 0) + index = 1; + else if(stencil[0] > 0) + index = 2; + for(auto d(1); d < NumDimensions; ++d) { + if(stencil[d] < 0) + index *= REGION_INDEX_BASE; + else if(stencil[d] == 0) + index = 1 + index * REGION_INDEX_BASE; + else + index = 2 + index * REGION_INDEX_BASE; + } + + return index; + } + + /** + * Returns the region index + */ + constexpr region_index_t index() const { return _index; } + + /** + * Returns the \ref RegionCoords + */ + constexpr const RegionCoords_t& coords() const { return _coords; } + + /** + * Returns the extent + */ + constexpr region_extent_t extent() const { return _extent; } + + /** + * Returns the \ref RegionCoords for a given region index + */ + constexpr region_coord_t operator[](const region_index_t index) const { + return _coords[index]; + } + + constexpr bool operator==(const Self_t& other) const { + return _coords.index() == other._coords.index() && _extent == other._extent; + } + + constexpr bool operator!=(const Self_t& other) const { + return !(*this == other); + } + + /** + * Returns the highest dimension with region values != 1 + */ + dim_t relevant_dim() const { return _rel_dim; } + + /** + * returns the number of coordinates unequal the center (1) for all + * dimensions + */ + dim_t level() const { return _level; } + +private: + RegionCoords_t _coords{}; + region_index_t _index; + region_extent_t _extent = 0; + dim_t _rel_dim = 1; + dim_t _level = 0; +}; // RegionSpec + +template +std::ostream& operator<<(std::ostream& os, + const RegionSpec& rs) { + os << "dash::halo::RegionSpec<" << NumDimensions << ">(" << (uint32_t) rs[0]; + for(auto i = 1; i < NumDimensions; ++i) + os << "," << (uint32_t) rs[i]; + os << "), Extent:" << rs.extent(); + + return os; +} + +/** + * Provides \ref RegionIter and some region metadata like \ref RegionSpec, + * size etc. + */ +template +class Region { +private: + using Self_t = Region; + static constexpr auto NumDimensions = PatternT::ndim(); + +public: + using iterator = GlobViewIter; + using const_iterator = const iterator; + using RegionSpec_t = RegionSpec; + using GlobMem_t = GlobMemT; + using ViewSpec_t = typename PatternT::viewspec_type; + using extent_size_t = typename ViewSpec_t::size_type; + using pattern_size_t = typename PatternT::size_type; + using EnvRegInfo_t = EnvironmentRegionInfo; + using BorderPair_t = typename EnvRegInfo_t::PrePostBool_t; + using RegBorders_t = typename EnvRegInfo_t::RegionBorders_t; + +public: + Region(const RegionSpec_t& region_spec, const ViewSpec_t& view, + GlobMem_t& globmem, const PatternT& pattern, + const EnvRegInfo_t& env_reg_info) + : _region_spec(®ion_spec), _view(view), + _globmem(&globmem), _pattern(&pattern), + _env_reg_info(&env_reg_info), + _beg(&globmem, *_pattern, _view, 0), + _end(&globmem, *_pattern, _view, _view.size()) { + } + + Region(const Self_t& other) + : _region_spec(other._region_spec), + _view (other._view), + _globmem(other._globmem), + _pattern(other._pattern), + _env_reg_info(other._env_reg_info), + _beg(_globmem, *_pattern, _view, 0), + _end(_globmem, *_pattern, _view, _view.size()) { + } + + Region(const Self_t&& other) + : _region_spec(std::move(other._region_spec)), + _view (std::move(other._view)), + _globmem(std::move(other._globmem)), + _pattern(std::move(other._pattern)), + _env_reg_info(std::move(other._env_reg_info)), + _beg(_globmem, *_pattern, _view, 0), + _end(_globmem, *_pattern, _view, _view.size()) { + } + + Self_t& operator=(const Self_t& other) { + _region_spec = other._region_spec; + _view = other._view; + _globmem = other._globmem; + _pattern = other._pattern; + _env_reg_info = other._env_reg_info; + _beg = iterator(_globmem, *_pattern, _view, 0); + _end = iterator(_globmem, *_pattern, _view, _view.size()); + + return *this; + } + + Self_t& operator=(const Self_t&& other) { + _region_spec = std::move(other._region_spec); + _view = std::move(other._view); + _globmem = std::move(other._globmem); + _pattern = std::move(other._pattern); + _env_reg_info = std::move(other._env_reg_info), + _beg = iterator(_globmem, *_pattern, _view, 0); + _end = iterator(_globmem, *_pattern, _view, _view.size()); + + return *this; + } + + const region_index_t index() const { return _region_spec->index(); } + + const RegionSpec_t& spec() const { return *_region_spec; } + + const ViewSpec_t& view() const { return _view; } + + pattern_size_t size() const { return _view.size(); } + + const RegBorders_t& border() const { return _env_reg_info->region_borders; } + + bool is_border_region() const { return _env_reg_info->border_region; } + + bool is_custom_region() const { + return (_env_reg_info->border_region && _env_reg_info->boundary_prop == BoundaryProp::CUSTOM) ? true : false; + } + + /** + * Returns a pair of two booleans for a given dimension. + * In case the region is the global border in this dimension + * the value is true, otherwise false + * first -> Pre center position; second -> Post center position + */ + BorderPair_t border_dim(dim_t dim) const { return _env_reg_info->region_borders[dim]; } + + bool border_dim(dim_t dim, RegionPos pos) const { + if(pos == RegionPos::PRE) { + return _env_reg_info->region_borders[dim].first; + } + + return _env_reg_info->region_borders[dim].second; + } + + iterator begin() const { return _beg; } + + iterator end() const { return _end; } + +private: + const RegionSpec_t* _region_spec; + ViewSpec_t _view; + GlobMemT* _globmem; + const PatternT* _pattern; + const EnvRegInfo_t* _env_reg_info; + iterator _beg; + iterator _end; +}; // Region + +template +std::ostream& operator<<(std::ostream& os, + const Region& region) { + os << "dash::halo::Region<" << typeid(ElementT).name() << ">" + << "( view: " << region.view() << "; region spec: " << region.spec() + << "; env_reg_info: {"; + const auto& border = region.border(); + for(auto d = 0; d < border.size(); ++d) { + if(d > 0) { + os << ","; + } + os << "(" << border[d].first << border[d].second << ")"; + } + os << "}" + << "; is border: " << region.is_border_region() + << "; is custom: " << region.is_custom_region(); + //<< "; begin iterator: " << region.begin() + //<< "; end iterator: " << region.begin() << ")"; + + return os; +} + +} // namespace halo + +} // namespace dash + +#endif // DASH__HALO_REGION_H \ No newline at end of file diff --git a/dash/include/dash/halo/Stencil.h b/dash/include/dash/halo/Stencil.h new file mode 100644 index 000000000..4e82cef28 --- /dev/null +++ b/dash/include/dash/halo/Stencil.h @@ -0,0 +1,389 @@ +#ifndef DASH__HALO_STENCIL_H +#define DASH__HALO_STENCIL_H + +#include + +#include + +namespace dash { + +namespace halo { + +using namespace internal; + +/** + * Stencil point with raletive coordinates for N dimensions + * e.g. StencilPoint<2>(-1,-1) -> north west + */ +template +class StencilPoint : public Dimensional { +public: + using coefficient_t = CoeffT; + +private: + using Base_t = Dimensional; + +public: + // TODO constexpr + /** + * Default Contructor + * + * All stencil point values are 0 and default coefficient = 1.0. + */ + StencilPoint() { + for(dim_t d = 0; d < NumDimensions; ++d) { + this->_values[d] = 0; + } + } + + /** + * Constructor + * + * Custom stencil point values for all dimensions and default + * coefficient = 1.0. + */ + template + constexpr StencilPoint( + typename std::enable_if::type value, + Values... values) + : Base_t::Dimensional(value, (spoint_value_t) values...) {} + + /** + * Constructor + * + * Custom values and custom coefficient. + */ + template + constexpr StencilPoint( + typename std::enable_if::type coefficient, + spoint_value_t value, Values... values) + : Base_t::Dimensional(value, (spoint_value_t) values...), + _coefficient(coefficient) {} + + // TODO as constexpr + /** + * Returns maximum distance to center over all dimensions + */ + int max() const { + int max = 0; + for(dim_t d = 0; d < NumDimensions; ++d) + max = std::max(max, (int) std::abs(this->_values[d])); + return max; + } + + /** + * Returns coordinates adjusted by stencil point + */ + template + ElementCoordsT stencil_coords(ElementCoordsT& coords) const { + return StencilPoint::stencil_coords(coords, this); + } + + /** + * Returns coordinates adjusted by a given stencil point + */ + template + static ElementCoordsT stencil_coords( + ElementCoordsT coords, + const StencilPoint& stencilp) { + for(dim_t d = 0; d < NumDimensions; ++d) { + coords[d] += stencilp[d]; + } + + return coords; + } + + /** + * Returns coordinates adjusted by a stencil point and a boolean to indicate + * a if the adjusted coordinate points to elements out of the given + * \ref ViewSpecpossible (inside: true, else: false). + */ + template + std::pair stencil_coords_check( + ElementCoordsT coords, const ViewSpecT& view) const { + bool halo = false; + for(dim_t d = 0; d < NumDimensions; ++d) { + coords[d] += this->_values[d]; + if(coords[d] < view.offset(d) || coords[d] >= view.offset(d) + view.extent(d)) + halo = true; + } + + return std::make_pair(coords, halo); + } + + /** + * Returns coordinates adjusted by a stencil point and a boolean to indicate + * a if the adjusted coordinate points to elements out of the given + * \ref ViewSpec: possible (inside: true, else: false). + * If one dimension points to an element outside the \ref ViewSpec this method + * returns immediately the unfinished adjusted coordinate and true. Otherwise + * the adjusted coordinate and false is returned, + */ + template + std::pair stencil_coords_check_abort( + ElementCoordsT coords, const ViewSpecT& view) const { + for(dim_t d = 0; d < NumDimensions; ++d) { + coords[d] += this->_values[d]; + if(coords[d] < view.offset(d) || coords[d] >= view.offset(d) + view.extent(d)) + return std::make_pair(coords, true); + } + + return std::make_pair(coords, false); + } + + /** + * Returns the coefficient for this stencil point + */ + CoeffT coefficient() const { return _coefficient; } + +private: + CoeffT _coefficient = 1.0; +}; // StencilPoint + +template +std::ostream& operator<<( + std::ostream& os, const StencilPoint& stencil_point) { + os << "dash::halo::StencilPoint<" << NumDimensions << ">" + << "(coefficient = " << stencil_point.coefficient() << " - points: "; + for(auto d = 0; d < NumDimensions; ++d) { + if(d > 0) { + os << ","; + } + os << stencil_point[d]; + } + os << ")"; + + return os; +} + +/** + * A collection of stencil points (\ref Stencil) + * e.g. StencilSpec, 2,2>({StencilPoint<2>(-1,0), + * StencilPoint<2>(1,0)}) -> north and south + */ +template +class StencilSpec { +private: + using Self_t = StencilSpec; + static constexpr auto NumDimensions = StencilPointT::ndim(); + +public: + using stencil_size_t = std::size_t; + using stencil_index_t = std::size_t; + using StencilArray_t = std::array; + using StencilPoint_t = StencilPointT; + using DistanceDim_t = std::pair; + using DistanceAll_t = std::array; + using DistanceTotal_t = std::array; + +public: + /** + * Constructor + * + * Takes a list of \ref StencilPoint + */ + constexpr StencilSpec(const StencilArray_t& specs) : _specs(specs) {} + + /** + * Constructor + * + * Takes all given \ref StencilPoint. The number of arguments has to be the + * same as the given number of stencil points via the template argument. + */ + template + constexpr StencilSpec(const StencilPointT& value, const Values&... values) + : _specs{ { value, (StencilPointT) values... } } { + static_assert(sizeof...(values) == NumStencilPoints - 1, + "Invalid number of stencil point arguments"); + } + + // TODO constexpr + /** + * Copy Constructor + */ + StencilSpec(const Self_t& other) { _specs = other._specs; } + + /** + * \return container storing all stencil points + */ + constexpr const StencilArray_t& specs() const { return _specs; } + + /** + * \return number of stencil points + */ + static constexpr stencil_size_t num_stencil_points() { + return NumStencilPoints; + } + + /** + * Returns the stencil point index for a given \ref StencilPoint + * + * \return The index and true if the given stecil point was found, + * else the index 0 and false. + * Keep in mind that index 0 is only a valid index, if the returned + * bool is true + */ + const std::pair index(StencilPointT stencil) const { + for(auto i = 0; i < _specs.size(); ++i) { + if(_specs[i] == stencil) + return std::make_pair(i, true); + } + + return std::make_pair(0, false); + } + + /** + * Returns the total distances of all stencil points for all + * dimensions. total distance = distance bewteen min and max of + * a dimension + */ + DistanceTotal_t total_distances() const { + DistanceTotal_t total_dist{}; + auto minmax = minmax_distances(); + for(auto d = 0; d < NumDimensions; ++d) { + total_dist[d] = (-1) * minmax.first() + minmax.second; + } + + return total_dist; + } + + /** + * Returns the total distances of all stencil points for all + * dimensions. total distance = distance bewteen min and max of + * a dimension + */ + spoint_distance_t total_distances(dim_t dim) const { + auto minmax = minmax_distances(); + + return (-1) * minmax.first() + minmax.second; + } + + /** + * Returns the minimal and maximal distances of all stencil points for all + * dimensions. (minimum (first) <= 0 and maximum (second) >= 0) + */ + DistanceAll_t minmax_distances() const { + DistanceAll_t max_dist{}; + for(const auto& stencil_point : _specs) { + for(auto d = 0; d < NumDimensions; ++d) { + if(stencil_point[d] < max_dist[d].first) { + max_dist[d].first = stencil_point[d]; + continue; + } + if(stencil_point[d] > max_dist[d].second) + max_dist[d].second = stencil_point[d]; + } + } + + return max_dist; + } + + /** + * Returns the minimal and maximal distances of all stencil points for the + * given dimension. (minimum (first) <= 0 and maximum (second) >= 0) + */ + DistanceDim_t minmax_distances(dim_t dim) const { + DistanceDim_t max_dist{}; + for(const auto& stencil_point : _specs) { + if(stencil_point[dim] < max_dist.first) { + max_dist.first = stencil_point[dim]; + continue; + } + if(stencil_point[dim] > max_dist.second) + max_dist.second = stencil_point[dim]; + } + + return max_dist; + } + /** + * \return stencil point for a given index + */ + constexpr const StencilPointT& operator[](stencil_index_t index) const { + return _specs[index]; + } + +private: + StencilArray_t _specs{}; +}; // StencilSpec + +template +std::ostream& operator<<( + std::ostream& os, const StencilSpec& specs) { + os << "dash::halo::StencilSpec<" << NumStencilPoints << ">" + << "("; + for(auto i = 0; i < NumStencilPoints; ++i) { + if(i > 0) { + os << ","; + } + os << specs[i]; + } + os << ")"; + + return os; +} + +template +class StencilSpecFactory { +private: + using stencil_dist_t = spoint_value_t; + using StencilPerm_t = std::vector; + + static constexpr auto NumDimensions = StencilPointT::ndim(); + +public: + + static constexpr decltype(auto) full_stencil_spec(stencil_dist_t dist) { + using StencilSpec_t = StencilSpec-1>; + + using StencilArray_t = typename StencilSpec_t::StencilArray_t; + + StencilPerm_t stencil_perms; + StencilArray_t points; + StencilPointT start_stencil; + for(dim_t d = 0; d < NumDimensions; ++d) { + start_stencil[d] = std::abs(dist); + } + permutate_stencil_points(0, start_stencil, stencil_perms, dist); + + size_t count = 0; + for(const auto& elem : stencil_perms) { + bool center = true; + for(dim_t d = 0; d < NumDimensions; ++d) { + if(elem[d] != 0 ) { + center = false; + break; + } + } + if(!center) { + points[count] = elem; + ++count; + } + } + + return StencilSpec_t(points); + } + +private: + static void permutate_stencil_points(dim_t dim_change, const StencilPointT& current_stencil, StencilPerm_t& perm_stencil, stencil_dist_t dist) { + perm_stencil.push_back(current_stencil); + + for(dim_t d = dim_change; d < NumDimensions; ++d) { + if(current_stencil[d] != 0) { + auto new_stencil = current_stencil; + new_stencil[d] = 0; + permutate_stencil_points(d+1, new_stencil, perm_stencil, dist); + new_stencil[d] = -dist; + permutate_stencil_points(d+1, new_stencil, perm_stencil, dist); + } + } + } + +}; + +} // namespace halo + +} // namespace dash + +#endif // DASH__HALO_STENCIL_H \ No newline at end of file diff --git a/dash/include/dash/halo/StencilOperator.h b/dash/include/dash/halo/StencilOperator.h index 10ce46999..31d8c0818 100644 --- a/dash/include/dash/halo/StencilOperator.h +++ b/dash/include/dash/halo/StencilOperator.h @@ -17,32 +17,33 @@ struct replace { } // namespace internal +using namespace internal; + // Forward declaration -template +template class StencilOperator; /** * Proxy StencilOperator for inner elements only */ -template +template class StencilOperatorInner { private: - static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); - static constexpr auto NumDimensions = PatternT::ndim(); - using StencilOperator_t = StencilOperator; - using pattern_size_t = typename StencilOperator_t::pattern_size_t; + static constexpr auto NumStencilPoints = StencilOperatorT::num_stencil_points(); + static constexpr auto NumDimensions = StencilOperatorT::ndim(); public: - using ViewSpec_t = typename StencilOperator_t::ViewSpec_t; - using ElementCoords_t = typename StencilOperator_t::ElementCoords_t; - using iterator = typename StencilOperator_t::iterator_inner; + using Element_t = typename StencilOperatorT::Element_t; + using ViewSpec_t = typename StencilOperatorT::ViewSpec_t; + using Coords_t = typename StencilOperatorT::Coords_t; + using iterator = typename StencilOperatorT::iterator_inner; using const_iterator = const iterator; - using StencilOffsets_t = typename iterator::StencilOffsets_t; + using StencilOffsets_t = typename StencilOperatorT::StencilOffsets_t; public: - StencilOperatorInner(StencilOperator_t* stencil_op) + StencilOperatorInner(StencilOperatorT* stencil_op) : _stencil_op(stencil_op) {} /** @@ -82,9 +83,9 @@ class StencilOperatorInner { * \param coefficient for center * \param op operation to use (e.g. std::plus). default: replace */ - template > - void set_values_at(const ElementCoords_t& coords, ElementT value, - ElementT coefficient_center, + template > + void set_values_at(const Coords_t& coords, Element_t value, + Element_t coefficient_center, BinaryFunc op = BinaryFunc()) { auto* center = _stencil_op->_local_memory + _stencil_op->get_offset(coords); @@ -109,12 +110,12 @@ class StencilOperatorInner { * \param coefficient for center * \param op operation to use (e.g. std::plus). default: std::plus */ - template > - ElementT get_value_at(const ElementCoords_t& coords, - ElementT coefficient_center, + template > + Element_t get_value_at(const Coords_t& coords, + Element_t coefficient_center, BinaryFunc op = BinaryFunc()) const { auto* center = _stencil_op->_local_memory + _stencil_op->get_offset(coords); - ElementT value = *center * coefficient_center; + Element_t value = *center * coefficient_center; for(auto i = 0; i < NumStencilPoints; ++i) { auto& stencil_point_value = center[_stencil_op->_stencil_offsets[i]]; @@ -132,7 +133,7 @@ class StencilOperatorInner { * \param operation User-definied operation for updating all inner elements */ template - void update(ElementT* begin_dst, Op operation) { + void update(Element_t* begin_dst, Op operation) { update(begin(), end(), begin_dst, operation); } @@ -146,12 +147,149 @@ class StencilOperatorInner { * \param operation User-definied operation for updating all inner elements */ template - void update(iterator begin, iterator end, ElementT* begin_dst, Op operation) { - if(end == this->end()) - end -= 1; + void update(iterator begin, iterator end, Element_t* begin_dst, Op operation) { + if(begin < this->begin() || end < this->begin() || + begin > this->end() || end > this->end()) { + + DASH_LOG_ERROR("Begin or End iterator located outside of inner view."); + + return; + } + + end -= 1; - auto& begin_coords = begin.coords(); - auto& end_coords = end.coords(); + const auto& view = this->view(); + const auto& offsets_view = view.offsets(); + const auto& extents_view = view.extents(); + auto end_coords_view = offsets_view; + for(auto d = 0; d < NumDimensions; ++d) { + end_coords_view[d] += extents_view[d]; + } + + auto begin_coords = begin.coords(); + auto end_coords = end.coords(); + + auto center = _stencil_op->_local_memory; + auto offsets = _stencil_op->set_dimension_offsets(); + auto offset = 0; + for(dim_t d = 0; d < NumDimensions; ++d) { + offset += offsets[d] * begin_coords[d]; + } + center += offset; + auto center_dst = begin_dst + offset; + + // specialization for 2-D + if(NumDimensions == 2) { + if(begin_coords[0] == end_coords[0]) { + auto center_i = center; + auto center_dst_i = center_dst; + auto offset_i = offset; + for(int j = begin_coords[1]; j <= end_coords[1]; + ++j, ++center_i, ++center_dst_i, ++offset_i) { + operation(center_i, center_dst_i, offset_i, + _stencil_op->_stencil_offsets); + } + + return; + } + + auto center_i = center; + auto center_dst_i = center_dst; + auto offset_i = offset; + auto align_offset = begin_coords[1] - offsets_view[1]; + for(int j = begin_coords[1]; j < end_coords_view[1]; + ++j, ++center_i, ++center_dst_i, ++offset_i) { + operation(center_i, center_dst_i, offset_i, + _stencil_op->_stencil_offsets); + } + + center += offsets[0] - align_offset; + center_dst += offsets[0] - align_offset; + offset += offsets[0] - align_offset; + for(int i = begin_coords[0] + 1; i < end_coords[0]; ++i, + center += offsets[0], center_dst += offsets[0], + offset += offsets[0]) { + center_i = center; + center_dst_i = center_dst; + offset_i = offset; + for(int j = offsets_view[1]; j < end_coords_view[1]; + ++j, ++center_i, ++center_dst_i, ++offset_i) { + operation(center_i, center_dst_i, offset_i, + _stencil_op->_stencil_offsets); + } + } + + center_i = center; + center_dst_i = center_dst; + offset_i = offset; + for(int j = offsets_view[1]; j <= end_coords[1]; + ++j, ++center_i, ++center_dst_i, ++offset_i) { + operation(center_i, center_dst_i, offset_i, + _stencil_op->_stencil_offsets); + } + return; + } + + // specialization for 3-D + if(NumDimensions == 3) { + for(int i = begin_coords[0]; i <= end_coords[0]; ++i, + center += offsets[0], center_dst += offsets[0], + offset += offsets[0]) { + auto center_i = center; + auto center_dst_i = center_dst; + auto offset_i = offset; + for(int j = begin_coords[1]; j <= end_coords[1]; ++j, + center_i += offsets[1], center_dst_i += offsets[1], + offset_i += offsets[1]) { + auto center_j = center_i; + auto center_dst_j = center_dst_i; + auto offset_j = offset_i; + for(int k = begin_coords[2]; k <= end_coords[2]; + ++k, ++center_j, ++center_dst_j, ++offset_j) { + operation(center_j, center_dst_j, offset_j, + _stencil_op->_stencil_offsets); + } + } + } + + return; + } + + // dimensions above 3-D + for(int i = begin_coords[0]; i <= end_coords[0]; ++i, center += offsets[0], + center_dst += offsets[0], offset += offsets[0]) { + Loop<1, Op>()(_stencil_op->_stencil_offsets, offsets, begin_coords, + end_coords, center, center_dst, offset, operation); + } + } + + /** + * Updates all inner elements within a user defined range using a user-defined + * stencil operation. + * + * \param begin Iterator of the beginnning inner data element + * \param end Iterator of the last inner data element + * \param begin_dst Pointer to the beginning of the destination memory + * \param operation User-definied operation for updating all inner elements + */ + template + void update_blocked(const Coords_t& begin_coords, const Coords_t& end_coords, Element_t* begin_dst, Op operation) { + const auto& view = this->view(); + const auto& offsets_view = view.offsets(); + const auto& extents_view = view.extents(); + + for(auto d = 0; d < NumDimensions; ++d) { + auto end_coord = offsets_view[d] + extents_view[d]; + if(begin_coords[d] < offsets_view[d] || + end_coords[d] < offsets_view[d] || + begin_coords[d] >= end_coord || + end_coords[d] >= end_coord) { + + DASH_LOG_ERROR("Begin or End coordinates located outside of inner view."); + + return; + } + } auto center = _stencil_op->_local_memory; auto offsets = _stencil_op->set_dimension_offsets(); @@ -218,9 +356,9 @@ class StencilOperatorInner { template constexpr void operator()(const StencilOffsets_t& stencil_offs, const StencilOffsets_t& dim_offs, - const ElementCoords_t& begin, - const ElementCoords_t& end, ElementT* center, - ElementT* center_dst, OffsetT offset, Op op) { + const Coords_t& begin, + const Coords_t& end, Element_t* center, + Element_t* center_dst, OffsetT offset, Op op) { for(int i = begin[dim]; i <= end[dim]; ++i, center += dim_offs[dim], center_dst += dim_offs[dim], offset += dim_offs[dim]) { Loop()(stencil_offs, dim_offs, begin, end, center, @@ -234,9 +372,9 @@ class StencilOperatorInner { template constexpr void operator()(const StencilOffsets_t& stencil_offs, const StencilOffsets_t& dim_offs, - const ElementCoords_t& begin, - const ElementCoords_t& end, ElementT* center, - ElementT* center_dst, OffsetT offset, Op op) { + const Coords_t& begin, + const Coords_t& end, Element_t* center, + Element_t* center_dst, OffsetT offset, Op op) { for(int i = begin[NumDimensions - 1]; i <= end[NumDimensions - 1]; ++i, ++center, ++center_dst, ++offset) { op(center, center_dst, offset, stencil_offs); @@ -245,31 +383,33 @@ class StencilOperatorInner { }; private: - StencilOperator_t* _stencil_op; + StencilOperatorT* _stencil_op; }; /** * Proxy StencilOperator for boundary elements only */ -template +template class StencilOperatorBoundary { private: - static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); - static constexpr auto NumDimensions = PatternT::ndim(); - using StencilOperator_t = StencilOperator; - using pattern_size_t = typename StencilOperator_t::pattern_size_t; - using StencilSpecViews_t = typename StencilOperator_t::StencilSpecViews_t; + static constexpr auto NumStencilPoints = StencilOperatorT::num_stencil_points(); + static constexpr auto NumDimensions = StencilOperatorT::ndim(); + + using StencilSpecViews_t = typename StencilOperatorT::StencilSpecViews_t; public: - using ViewSpec_t = typename StencilOperator_t::ViewSpec_t; - using ElementCoords_t = typename StencilOperator_t::ElementCoords_t; - using iterator = typename StencilOperator_t::iterator_bnd; + using Element_t = typename StencilOperatorT::Element_t; + using uindex_t = typename StencilOperatorT::uindex_t; + using ViewSpec_t = typename StencilOperatorT::ViewSpec_t; + using Coords_t = typename StencilOperatorT::Coords_t; + using iterator = typename StencilOperatorT::iterator_bnd; using const_iterator = const iterator; using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; + using RegionCoords_t = RegionCoords; public: - StencilOperatorBoundary(const StencilOperator_t* stencil_op) + StencilOperatorBoundary(const StencilOperatorT* stencil_op) : _stencil_op(stencil_op) {} /** @@ -305,7 +445,7 @@ class StencilOperatorBoundary { /** * Returns the number of all boundary elements (no dublicates) */ - pattern_size_t boundary_size() const { + uindex_t boundary_size() const { return _stencil_op->_spec_views.boundary_size(); } @@ -323,9 +463,9 @@ class StencilOperatorBoundary { * \param coefficient for center * \param op operation to use (e.g. std::plus). default: replace */ - template > - void set_values_at(const ElementCoords_t& coords, ElementT value, - ElementT coefficient_center, + template > + void set_values_at(const Coords_t& coords, Element_t value, + Element_t coefficient_center, BinaryFunc op = BinaryFunc()) { auto* center = _stencil_op->_local_memory + _stencil_op->get_offset(coords); @@ -361,7 +501,7 @@ class StencilOperatorBoundary { std::pair iterator_at(dim_t dim, RegionPos pos) { DASH_ASSERT_LT(dim, NumDimensions, "Given dimension to great"); const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); - pattern_size_t offset = 0; + uindex_t offset = 0; auto it_views = std::begin(bnd_views); for(dim_t d = 0; d < dim; ++d, ++it_views) offset += it_views->size() + (++it_views)->size(); @@ -376,6 +516,21 @@ class StencilOperatorBoundary { return std::make_pair(it_begin, it_begin + it_views->size()); } + std::pair iterator_at(region_index_t index) { + DASH_ASSERT_LT(index, NumRegionsMax, "Given index out of range"); + const auto& bnd_views = _stencil_op->_spec_views.boundary_views(); + uindex_t offset = 0; + for(region_index_t r = 0; r < index; ++r) { + offset += bnd_views[r].size(); + } + + auto it_begin = _stencil_op->_bbegin + offset; + + return std::make_pair(it_begin, it_begin + bnd_views[index].size()); + } + + + /** * Returns the result of the given operation done on all stencil point * elements and the center. @@ -388,12 +543,12 @@ class StencilOperatorBoundary { * \param coefficient for center * \param op operation to use (e.g. std::plus). default: std::plus */ - template > - ElementT get_value_at(const ElementCoords_t& coords, - ElementT coefficient_center, + template > + Element_t get_value_at(const Coords_t& coords, + Element_t coefficient_center, BinaryFunc op = BinaryFunc()) const { auto* center = _stencil_op->_local_memory + _stencil_op->get_offset(coords); - ElementT value = *center * coefficient_center; + Element_t value = *center * coefficient_center; auto& stencil_spec = _stencil_op->_stencil_spec; for(auto i = 0; i < NumStencilPoints; ++i) { bool halo = false; @@ -431,7 +586,7 @@ class StencilOperatorBoundary { * \param operation User-definied operation for updating all boundary elements */ template - void update(ElementT* begin_dst, Op operation) { + void update(Element_t* begin_dst, Op operation) { update(begin(), end(), begin_dst, operation); } @@ -446,7 +601,7 @@ class StencilOperatorBoundary { * \param operation User-definied operation for updating all inner elements */ template - void update(const iterator& begin, const iterator& end, ElementT* begin_out, + void update(const iterator& begin, const iterator& end, Element_t* begin_out, Op operation) { for(auto it = begin; it != end; ++it) { begin_out[it.lpos()] = operation(it); @@ -454,7 +609,7 @@ class StencilOperatorBoundary { } private: - const StencilOperator_t* _stencil_op; + const StencilOperatorT* _stencil_op; }; /** @@ -489,69 +644,74 @@ class StencilOperatorBoundary { * boundary region 3 boundary region 8 * */ -template +template class StencilOperator { private: + using Self_t = StencilOperator; + using Pattern_t = typename HaloBlockT::Pattern_t; + static constexpr auto NumStencilPoints = StencilSpecT::num_stencil_points(); - static constexpr auto NumDimensions = PatternT::ndim(); - static constexpr auto MemoryArrange = PatternT::memory_order(); + static constexpr auto NumDimensions = Pattern_t::ndim(); + static constexpr auto MemoryArrange = Pattern_t::memory_order(); + - using pattern_size_t = typename PatternT::size_type; - using pattern_index_t = typename PatternT::index_type; - template + template friend class StencilOperatorInner; - template + template friend class StencilOperatorBoundary; public: - using Inner_t = StencilOperatorInner; - using Bnd_t = StencilOperatorBoundary; + using Inner_t = StencilOperatorInner; + using Bnd_t = StencilOperatorBoundary; - using iterator = - StencilIterator; - using const_iterator = const iterator; - using iterator_inner = - StencilIterator; - using const_iterator_inner = const iterator; - using iterator_bnd = StencilIterator; - using const_iterator_bnd = const iterator; - using signed_pattern_size_t = typename iterator::signed_pattern_size_t; - using StencilOffsets_t = typename iterator::StencilOffsets_t; - using HaloBlock_t = HaloBlock; + using Element_t = typename HaloBlockT::Element_t; + using index_t = typename std::make_signed::type; + using uindex_t = typename std::make_unsigned::type; + using StencilOffsets_t = std::array; + using HaloBlock_t = HaloBlockT; using HaloMemory_t = HaloMemory; - using ViewSpec_t = ViewSpec; - using ElementCoords_t = std::array; + using ViewSpec_t = typename HaloBlockT::ViewSpec_t; + using Coords_t = std::array; + using StencilSpec_t = StencilSpecT; using StencilSpecViews_t = StencilSpecificViews; - using region_index_t = typename RegionSpec::region_index_t; using stencil_index_t = typename StencilSpecT::stencil_index_t; + using iterator = + StencilIterator; + using const_iterator = const iterator; + /*using iterator_inner = + StencilIterator; + using const_iterator_inner = const iterator;*/ + using CoordsIdxManagerInner_t = CoordsIdxManagerInner; + using iterator_inner = StencilIteratorTest; + using const_iterator_inner = const iterator; + using CoordsIdxManagerBoundary_t = CoordsIdxManagerBoundary; + using iterator_bnd = StencilIteratorTest; + using const_iterator_bnd = const iterator; + public: /** * Constructor that takes a \ref HaloBlock, a \ref HaloMemory, * a \ref StencilSpec and a local \ref ViewSpec */ StencilOperator( - const HaloBlock_t* haloblock, + const HaloBlockT* haloblock, + Element_t* local_memory, HaloMemory_t* halomemory, - const StencilSpecT& stencil_spec, - const ViewSpec_t* view_local) + const StencilSpecT& stencil_spec) : inner(this) , boundary(this) , _halo_block(haloblock) + , _local_memory(local_memory) , _halo_memory(halomemory) , _stencil_spec(stencil_spec) - , _view_local(view_local) + , _view_local(&haloblock->view_local()) , _stencil_offsets(set_stencil_offsets()) - , _local_memory(static_cast( - const_cast(dash::local_begin( - _halo_block->globmem().begin(), - _halo_block->globmem().team().myid())))) , _spec_views(*_halo_block, _stencil_spec, _view_local) , _begin( _local_memory, @@ -569,41 +729,20 @@ class StencilOperator { *_view_local, _spec_views.inner_with_boundaries(), _spec_views.inner_with_boundaries().size()) - , _ibegin( - _local_memory, - _halo_memory, - &_stencil_spec, - &_stencil_offsets, - *_view_local, - _spec_views.inner(), - 0) - , _iend( - _local_memory, - _halo_memory, - &_stencil_spec, - &_stencil_offsets, - *_view_local, - _spec_views.inner(), - _spec_views.inner().size()) - , _bbegin( - _local_memory, - _halo_memory, - &_stencil_spec, - &_stencil_offsets, - *_view_local, - _spec_views.boundary_views(), - 0) - , _bend( - _local_memory, - _halo_memory, - &_stencil_spec, - &_stencil_offsets, - *_view_local, - _spec_views.boundary_views(), - _spec_views.boundary_size()) - { + , _ibegin(CoordsIdxManagerInner_t(*this)) + , _iend(CoordsIdxManagerInner_t(*this, + _spec_views.inner().size())) + , _bbegin(CoordsIdxManagerBoundary_t(*this)) + , _bend(CoordsIdxManagerBoundary_t(*this, + _spec_views.boundary_size())) { } + static constexpr decltype(auto) ndim() { return NumDimensions; } + + static constexpr decltype(auto) memory_order() { return MemoryArrange; } + + static constexpr decltype(auto) num_stencil_points() { return NumStencilPoints; } + /** * Returns the begin iterator for all relevant elements (inner + boundary) */ @@ -641,6 +780,11 @@ class StencilOperator { */ HaloMemory_t& halo_memory() { return *_halo_memory; } + /** + * Returns the halo memory management object \ref HaloMemory + */ + Element_t* local_memory() { return _local_memory; } + /** * Returns the \ref StencilSpecificView */ @@ -664,15 +808,15 @@ class StencilOperator { /** * Returns the offset for specific stencil point. */ - const signed_pattern_size_t stencil_offset_at(std::size_t pos) const { + const index_t stencil_offset_at(std::size_t pos) const { return _stencil_offsets[pos]; } /** * Returns the local memory offset for a given coordinate */ - pattern_index_t get_offset(const ElementCoords_t& coords) const { - pattern_index_t offset = 0; + index_t get_offset(const Coords_t& coords) const { + index_t offset = 0; if(MemoryArrange == ROW_MAJOR) { offset = coords[0]; @@ -692,8 +836,8 @@ class StencilOperator { private: StencilOffsets_t set_stencil_offsets() { StencilOffsets_t stencil_offs; - for(auto i = 0; i < NumStencilPoints; ++i) { - signed_pattern_size_t offset = 0; + for(auto i = 0u; i < NumStencilPoints; ++i) { + index_t offset = 0; if(MemoryArrange == ROW_MAJOR) { offset = _stencil_spec[i][0]; for(auto d = 1; d < NumDimensions; ++d) @@ -713,7 +857,7 @@ class StencilOperator { StencilOffsets_t set_dimension_offsets() { StencilOffsets_t dim_offs; - signed_pattern_size_t offset = 0; + index_t offset = 0; if(MemoryArrange == ROW_MAJOR) { dim_offs[NumDimensions - 1] = 1; for(auto d = NumDimensions - 1; d > 0;) { @@ -735,11 +879,11 @@ class StencilOperator { private: const HaloBlock_t* _halo_block; + Element_t* _local_memory; HaloMemory_t* _halo_memory; const StencilSpecT _stencil_spec; const ViewSpec_t* _view_local; StencilOffsets_t _stencil_offsets; - ElementT* _local_memory; StencilSpecViews_t _spec_views; iterator _begin; diff --git a/dash/include/dash/halo/Types.h b/dash/include/dash/halo/Types.h new file mode 100644 index 000000000..9b3ed84e2 --- /dev/null +++ b/dash/include/dash/halo/Types.h @@ -0,0 +1,218 @@ +#ifndef DASH__HALO_TYPES_H +#define DASH__HALO_TYPES_H + +#include +#include +#include + +namespace dash { + +namespace halo { + +namespace internal { + +// Stencil point type +using spoint_value_t = int16_t; +using spoint_distance_t = std::make_unsigned_t; + + +using region_coord_t = uint8_t; +using region_index_t = uint32_t; +using region_size_t = region_index_t; + +using region_extent_t = spoint_distance_t; + +/// index calculation base - 3^N regions for N-Dimensions +static constexpr region_index_t REGION_INDEX_BASE = 3; + +/// number of maximal possible regions +template +static constexpr region_index_t NumRegionsMax = + ce::pow(REGION_INDEX_BASE, static_cast::type>(NumDimensions)); + +/** + * View property of the StencilIterator + */ +enum class StencilViewScope : std::uint8_t { + /// inner elements only + INNER, + /// Boundary elements only + BOUNDARY, + /// Inner and boundary elements + ALL +}; + +inline std::ostream& operator<<(std::ostream& os, + const StencilViewScope& scope) { + if(scope == StencilViewScope::INNER) + os << "INNER"; + else if(scope == StencilViewScope::BOUNDARY) + os << "BOUNDARY"; + else + os << "ALL"; + + return os; +} + +} // namespace internal + +/** + * Global boundary Halo properties + */ +enum class BoundaryProp : uint8_t { + /// No global boundary Halos + NONE, + /// Global boundary Halos with values from the opposite boundary + CYCLIC, + /// Global boundary Halos with predefined custom values + CUSTOM +}; + +inline std::ostream& operator<<(std::ostream& os, const BoundaryProp& prop) { + if(prop == BoundaryProp::NONE) + os << "NONE"; + else if(prop == BoundaryProp::CYCLIC) + os << "CYCLIC"; + else + os << "CUSTOM"; + + return os; +} + +/** + * Position of a \ref Region in one dimension relating to the center + */ +enum class RegionPos : bool { + /// Region before center + PRE, + /// Region behind center + POST +}; + +inline std::ostream& operator<<(std::ostream& os, const RegionPos& pos) { + if(pos == RegionPos::PRE) + os << "PRE"; + else + os << "POST"; + + return os; +} + +/** + * Switch to turn on halo update signaling in both directions + */ +enum class SignalReady : bool { + /// Region before center + ON, + /// Region behind center + OFF +}; + +inline std::ostream& operator<<(std::ostream& os, const SignalReady& pos) { + if(pos == SignalReady::ON) + os << "ON"; + else + os << "OFF"; + + return os; +} + +namespace internal { + +template +struct BlockViewSpec { + ViewSpecT inner; + ViewSpecT inner_bound; +}; + +/** + * Region information describing the global border connections and + * direct neighbor ids + */ +template +struct RegionData { + ViewSpecT view{}; + /// while neighbor_id_from is DART_UNDEFINED_UNIT_ID this flag shows the + /// status of this region + bool valid{false}; +}; + +template +std::ostream& operator<<(std::ostream& os, const RegionData& region_data) { + os << "RegionData("; + os << "valid_region: " << std::boolalpha << region_data.valid << "; "; + os << region_data.view << ")"; + + return os; +} + +/** + * Region information describing the global border connections and + * direct neighbor ids + */ +template +struct EnvironmentRegionInfo { + static constexpr auto NumDimensions = ViewSpecT::ndim(); + + using PrePostBool_t = std::pair; + using RegionBorders_t = std::array; + using RegionData_t = RegionData; + using region_extent_t = std::array; + + + /// neighbor id of the region the halo data got from + /// if id is DART_UNDEFINED_UNIT_ID, no neighbor is defined + dart_unit_t neighbor_id_from{DART_UNDEFINED_UNIT_ID}; + /// neighbor id of the region the halo data need to prepared for + /// if id is DART_UNDEFINED_UNIT_ID, no neighbor is defined + dart_unit_t neighbor_id_to{DART_UNDEFINED_UNIT_ID}; + /// halo extents and validation for halo preparation + RegionData_t halo_reg_data{}; + /// defines the \ref BoundaryProp in case this region is a border region + BoundaryProp boundary_prop{BoundaryProp::NONE}; + /// defines whether a region is located at the narray global border + bool border_region{false}; + /// stores all borders the region is connected to + /// -> each dimension has to possible border locations pre and post center + RegionBorders_t region_borders{}; + /// halo extents and validation for halo preparation + RegionData_t bnd_reg_data; +}; + +template +std::ostream& operator<<(std::ostream& os, const EnvironmentRegionInfo& env_reg_info) { + static constexpr auto NumDimensions = ViewSpecT::ndim(); + + os << "neighbor_id_from: " << env_reg_info.neighbor_id_from << "; " + << "neighbor_id_to: " << std::boolalpha << env_reg_info.neighbor_id_to << "; "; + os << "boundary_prop: "; + if(env_reg_info.border_region) { + os << env_reg_info.boundary_prop; + } else if(env_reg_info.halo_reg_data.valid) { + os << "INNER"; + } else { + os << "UNUSED"; + } + os << "; "; + os << "is border region: " << std::boolalpha << env_reg_info.border_region << ";" + << "region_borders[" << std::boolalpha; + for(dim_t d = 0; d < NumDimensions; ++d) { + os << "(" << std::boolalpha + << env_reg_info.region_borders[d].first << "," + << env_reg_info.region_borders[d].second << ") "; + } + + os << "];"; + os << "halo region: " << env_reg_info.halo_reg_data << "; "; + os << "boundary region: " << env_reg_info.bnd_reg_data << "; "; + + return os; +} + +} // namespace internal + +} // namespace halo + +} // namespace dash + +#endif // DASH__HALO_TYPES_H \ No newline at end of file diff --git a/dash/include/dash/halo/iterator/StencilIterator.h b/dash/include/dash/halo/iterator/StencilIterator.h index ebfe34401..dec4d0198 100644 --- a/dash/include/dash/halo/iterator/StencilIterator.h +++ b/dash/include/dash/halo/iterator/StencilIterator.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -11,185 +12,854 @@ namespace dash { namespace halo { -/** - * View property of the StencilIterator - */ -enum class StencilViewScope : std::uint8_t { - /// inner elements only - INNER, - /// Boundary elements only - BOUNDARY, - /// Inner and boundary elements - ALL +using namespace internal; + +template +class CoordsIdxManagerInner { +private: + using Self_t = CoordsIdxManagerInner; + using StencilSpec_t = typename StencilOpT::StencilSpec_t; + + static constexpr auto NumDimensions = StencilOpT::ndim(); + static constexpr auto NumStencilPoints = StencilOpT::num_stencil_points(); + static constexpr auto MemoryArrange = StencilOpT::memory_order(); + static constexpr auto FastestDimension = + MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0; + +public: + using Element_t = typename StencilOpT::Element_t; + using ViewSpec_t = typename StencilOpT::ViewSpec_t; + using index_t = typename StencilOpT::index_t; + using uindex_t = typename StencilOpT::uindex_t; + using StencilP_t = StencilPoint; + using Coords_t = typename StencilOpT::Coords_t; + using stencil_index_t = typename StencilSpec_t::stencil_index_t; + +private: + using RangeDim_t = std::pair; + using Ranges_t = std::array; + using StencilOffsPtrs_t = std::array; + using OffsetsDim_t = std::array; + using viewspec_index_t = typename ViewSpec_t::index_type; + using LocalLayout_t = + CartesianIndexSpace; + +public: + + CoordsIdxManagerInner(StencilOpT& stencil_op, + uindex_t start_idx = 0, const ViewSpec_t* sub_view = nullptr) + : _stencil_op(&stencil_op), + _sub_view((sub_view != nullptr) ? sub_view : &(stencil_op.inner.view())), + _size(_sub_view->size()), + _local_layout(stencil_op.view_local().extents()) { + // initializes ranges, coordinates depending on the index, offsets for all dimensions and all stencil pointer + init_ranges(); + set(start_idx); + } + + static constexpr decltype(auto) ndim() { return NumDimensions; } + + const ViewSpec_t& view() const { return _stencil_op->view_local(); } + + const ViewSpec_t& sub_view() const { return *_sub_view; } + + const Coords_t& coords() const { return _coords; } + + Coords_t coords(uindex_t idx) const { return _local_layout.coords(idx, *_sub_view); } + + const uindex_t& index() const { return _idx; } + + const uindex_t& offset() const { return _offset; } + + Element_t& value() const { return *_current_lmemory_addr; } + + Element_t& value_at(const stencil_index_t index_stencil ) const { return *_stencil_mem_ptr[index_stencil]; } + + Element_t& value_at(const StencilP_t& stencil) { + const auto index_stencil = _stencil_op->stencil_spec().index(stencil); + + DASH_ASSERT_MSG(index_stencil.second, + "No valid region index for given stencil point found"); + + return value_at(index_stencil.first); + } + + void set(uindex_t idx) { + if(idx >=_size) { + _idx = _size; + + return; + } + + _idx = idx; + init_coords(); + init_offset(); + init_stencil_points(); + } + + Element_t operator[](index_t n) const { + auto index = _idx + n; + auto new_coords = coords(index); + + return _stencil_op->local_memory()[_local_layout.at(new_coords)]; + } + + Element_t& operator[](index_t n) { + return operator[](n); + } + + void next_element() { + ++_idx; + ++_coords[FastestDimension]; + if(static_cast(_coords[FastestDimension]) < _ranges[FastestDimension].second) { + for(auto i = 0u; i < NumStencilPoints; ++i) + ++_stencil_mem_ptr[i]; + + ++_current_lmemory_addr; + ++_offset; + + return; + } + _coords[FastestDimension] = _sub_view->offset(FastestDimension); + uindex_t add = 0; + if(MemoryArrange == ROW_MAJOR) { + for(dim_t d = NumDimensions-1; d > 0;) { + --d; + ++_coords[d]; + if(static_cast(_coords[d]) < _ranges[d].second) { + add = _offsets_dim[d]; + break; + } else { + _coords[d] = _ranges[d].first; + + } + } + } else { + for(dim_t d = 1; d < NumDimensions; ++d) { + ++_coords[d]; + if(static_cast(_coords[d]) < _ranges[d].second) { + add = _offsets_dim[d]; + break; + } else { + _coords[d] = _ranges[d].first; + } + } + } + _current_lmemory_addr += add; + _offset += add; + for(auto i = 0u; i < NumStencilPoints; ++i) { + _stencil_mem_ptr[i] += add; + } + } + + +private: + void init_ranges() { + for(dim_t d = 0; d < NumDimensions; ++d) { + _ranges[d] = std::make_pair(_sub_view->offset(d), _sub_view->offset(d) + _sub_view->extent(d)); + } + } + + void init_coords() { + _coords = _local_layout.coords(_idx, *_sub_view); + } + + void init_offset() { + if(MemoryArrange == ROW_MAJOR) { + _offset = _coords[0]; + for(dim_t d = 1; d < NumDimensions; ++d) + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } else { + _offset = _coords[NumDimensions - 1]; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } + } + + const auto& view = _stencil_op->view_local(); + _offsets_dim[FastestDimension] = 1; + if(MemoryArrange == ROW_MAJOR) { + if(FastestDimension > 0) { + _offsets_dim[FastestDimension - 1] = (view.extent(FastestDimension) - _sub_view->extent(FastestDimension)) + 1; + } + for(dim_t d = FastestDimension - 1; d > 0;) { + --d; + _offsets_dim[d] = (view.extent(d+1) - _sub_view->extent(d+1)) * view.extent(d+2) + _offsets_dim[d+1]; + } + } else { + if(NumDimensions > 1) { + _offsets_dim[FastestDimension + 1] = (view.extent(FastestDimension) - _sub_view->extent(FastestDimension)) + 1; + } + for(dim_t d = 2; d < NumDimensions; ++d) { + _offsets_dim[d] = (view.extent(d-1) - _sub_view->extent(d-1)) * view.extent(d-2) + _offsets_dim[d-1]; + } + } + } + + void init_stencil_points() { + _current_lmemory_addr = _stencil_op->local_memory() + _offset; + for(auto i = 0u; i < NumStencilPoints; ++i) { + _stencil_mem_ptr[i] = _current_lmemory_addr + _stencil_op->stencil_offsets()[i]; + } + } + +private: + StencilOpT* _stencil_op; + const ViewSpec_t* _sub_view; + uindex_t _size; + LocalLayout_t _local_layout; + uindex_t _idx; + Element_t* _current_lmemory_addr; + StencilOffsPtrs_t _stencil_mem_ptr; + Ranges_t _ranges; + Coords_t _coords; + uindex_t _offset; + OffsetsDim_t _offsets_dim; }; -inline std::ostream& operator<<(std::ostream& os, - const StencilViewScope& scope) { - if(scope == StencilViewScope::INNER) - os << "INNER"; - else if(scope == StencilViewScope::BOUNDARY) - os << "BOUNDARY"; - else - os << "ALL"; +template +std::ostream& operator<<( + std::ostream& os, + const CoordsIdxManagerInner& helper) { + os << "dash::halo::CoordsHelper" + << "(view: " << helper.view() + << "; sub_view: " << helper.sub_view() + << "; index: " << helper.index() + << "; offset: " << helper.offset() + << "; coords: { "; + for(const auto& elem : helper.coords()) { + os << elem << " "; + } + os << "})"; return os; } -/** - * Adapts all views \ref HaloBlock provides to the given \ref StencilSpec. - */ -template -class StencilSpecificViews { +template +class CoordsIdxManagerBoundary { + private: - static constexpr auto NumDimensions = HaloBlockT::ndim(); + using Self_t = CoordsIdxManagerBoundary; + using StencilSpec_t = typename StencilOpT::StencilSpec_t; + using StencilSpecViews_t = typename StencilOpT::StencilSpecViews_t; - using Pattern_t = typename HaloBlockT::Pattern_t; + static constexpr auto NumDimensions = StencilOpT::ndim(); + static constexpr auto NumStencilPoints = StencilOpT::num_stencil_points(); + static constexpr auto MemoryArrange = StencilOpT::memory_order(); + static constexpr auto FastestDimension = + MemoryArrange == ROW_MAJOR ? NumDimensions - 1 : 0; public: - using ViewSpec_t = typename HaloBlockT::ViewSpec_t; - using BoundaryViews_t = typename HaloBlockT::BoundaryViews_t; - using pattern_size_t = typename Pattern_t::size_type; + using Element_t = typename StencilOpT::Element_t; + using ViewSpec_t = typename StencilOpT::ViewSpec_t; + using index_t = typename StencilOpT::index_t; + using uindex_t = typename StencilOpT::uindex_t; + using StencilP_t = StencilPoint; + using Coords_t = typename StencilOpT::Coords_t; + using stencil_index_t = typename StencilSpec_t::stencil_index_t; + + using RegionCoords_t = RegionCoords; +private: + using BoundaryViews_t = typename StencilSpecViews_t::BoundaryViews_t; + using viewspec_index_t = typename ViewSpec_t::index_type; + using LocalLayout_t = + CartesianIndexSpace; + using ViewIndexPair_t = std::pair; + using StencilOffsPtrs_t = std::array; + using OffsetsDim_t = std::array; + + struct RangeDim_t { + uindex_t begin = 0; + uindex_t end = 0; + }; + using Ranges_t = std::array; + + struct HaloPointProp_t { + bool possible; + bool always; + region_index_t index; + }; + using HaloPoints_t = std::array; + public: - StencilSpecificViews(const HaloBlockT& haloblock, - const StencilSpecT& stencil_spec, - const ViewSpec_t* view_local) - : _view_local(view_local) { - auto minmax_dist = stencil_spec.minmax_distances(); - for(auto& dist : minmax_dist) - dist.first = std::abs(dist.first); - - auto inner_off = haloblock.view_inner().offsets(); - auto inner_ext = haloblock.view_inner().extents(); - auto inner_bound_off = haloblock.view_inner_with_boundaries().offsets(); - auto inner_bound_ext = haloblock.view_inner_with_boundaries().extents(); - for(auto d = 0; d < NumDimensions; ++d) { - resize_offset(inner_off[d], inner_ext[d], minmax_dist[d].first); - resize_extent(inner_off[d], inner_ext[d], _view_local->extent(d), - minmax_dist[d].second); - resize_offset(inner_bound_off[d], inner_bound_ext[d], - minmax_dist[d].first); - resize_extent(inner_bound_off[d], inner_bound_ext[d], - _view_local->extent(d), minmax_dist[d].second); + + CoordsIdxManagerBoundary(StencilOpT& stencil_op, uindex_t start_idx = 0) + : _stencil_op(&stencil_op), + _size(stencil_op.spec_views().boundary_size()), + _region_number(0), + _local_layout(stencil_op.view_local().extents()), + _idx(0) { + const auto& ext_max = stencil_op.stencil_spec().minmax_distances(FastestDimension); + + _ext_dim_reduced = {static_cast(std::abs(ext_max.first)), + _local_layout.extent(FastestDimension) - static_cast(ext_max.second)}; + set(start_idx); + } + + static constexpr decltype(auto) ndim() { return NumDimensions; } + + const ViewSpec_t& view() const { return _stencil_op->view_local(); } + + const ViewSpec_t& sub_view() const { return *(_current_view.first); } + + const Coords_t& coords() const { return _coords; } + + const uindex_t& index() const { return _idx; } + + const uindex_t& offset() const { return _offset; } + + Element_t& value() const { return *_current_lmemory_addr; } + + Element_t& value_at(const stencil_index_t index_stencil ) const { return *_stencil_mem_ptr[index_stencil]; } + + Element_t& value_at(const StencilP_t& stencil) { + const auto index_stencil = _stencil_op->stencil_spec().index(stencil); + + DASH_ASSERT_MSG(index_stencil.second, + "No valid region index for given stencil point found"); + + return value_at(index_stencil.first); + } + + void set(uindex_t idx) { + if(idx >=_size) { + _idx = _size; + + return; + } + + _idx = idx; + _current_view = get_current_view(_idx); + init_ranges(); + init_coords(); + init_offset(); + init_stencil_points(); + + } + + Element_t operator[](index_t n) const { + auto index = _idx + n; + auto new_coords = coords(index); + + return _stencil_op->local_memory()[_local_layout.at(new_coords)]; + } + + Element_t& operator[](index_t n) { + return operator[](n); + } + + const region_index_t region_id() const { return _region_number; } + + const uindex_t& size() const { return _size; } + + void next_element() { + ++_idx; + ++_current_view.second; + ++_coords[FastestDimension]; + uindex_t add = 1; + if(static_cast(_coords[FastestDimension]) < _ranges[FastestDimension].end) { + if(static_cast(_coords[FastestDimension]) >= _ext_dim_reduced.begin + && static_cast(_coords[FastestDimension]) < _ext_dim_reduced.end) { + ++_current_lmemory_addr; + ++_offset; + for(auto i = 0u; i < NumStencilPoints; ++i) { + ++_stencil_mem_ptr[i]; + } + + return; + } + } else { + + if(_current_view.second == (*_current_view.first).size()) { + + auto& bnd_views = _stencil_op->boundary.view(); + + do { + ++_region_number; + if(_region_number >= bnd_views.size()) { + _region_number = bnd_views.size(); + + return; + } + + } while(bnd_views[_region_number].size() == 0); + + if(_idx < _size) { + _current_view = {&bnd_views[_region_number],0}; + init_ranges(); + init_coords(); + init_offset(); + init_stencil_points(); + } + + return; + } + + if(static_cast(_coords[FastestDimension]) >= _ranges[FastestDimension].end) { + _coords[FastestDimension] = _ranges[FastestDimension].begin; + if(MemoryArrange == ROW_MAJOR) { + for(dim_t d = NumDimensions-1; d > 0;) { + --d; + ++_coords[d]; + if(static_cast(_coords[d]) < _ranges[d].end) { + add = _offsets_dim[d]; + break; + } else { + _coords[d] = _ranges[d].begin; + } + } + } + + if(MemoryArrange == COL_MAJOR) { + for(dim_t d = 1; d < NumDimensions; ++d) { + ++_coords[d]; + if(static_cast(_coords[d]) < _ranges[d].end) { + add = _offsets_dim[d]; + break; + } else { + _coords[d] = _ranges[d].begin; + } + } + } + } } - _view_inner = ViewSpec_t(inner_off, inner_ext); - _view_inner_with_boundaries = ViewSpec_t(inner_bound_off, inner_bound_ext); - using RegionCoords_t = RegionCoords; - using region_index_t = typename RegionCoords_t::region_index_t; + _current_lmemory_addr += add; + _offset += add; + const auto& extents = _local_layout.extents(); + const auto& specs = _stencil_op->stencil_spec(); + const auto& stencil_offs = _stencil_op->stencil_offsets(); + for(auto i = 0u; i < NumStencilPoints; ++i) { + if(_spoint_is_halo[i].possible) { + auto& stencil = specs[i]; + auto coords = _coords; + + if(_spoint_is_halo[i].always) { + for(dim_t d = 0; d < NumDimensions; ++d) + coords[d] += stencil[d]; + _stencil_mem_ptr[i] = value_halo_at(_spoint_is_halo[i].index, coords); + continue; + } + + bool is_halo = false; + region_index_t index = 0; + for(dim_t d = 0; d < NumDimensions; ++d) { + auto stencil_off = stencil[d]; + if(stencil_off == 0) { + index = 1 + index * REGION_INDEX_BASE; + continue; + } + coords[d] += stencil_off; + if(coords[d] < 0) { + index *= REGION_INDEX_BASE; + is_halo = true; + continue; + } + + if(static_cast(coords[d]) < extents[d]) { + index = 1 + index * REGION_INDEX_BASE; + continue; + } + + index = 2 + index * REGION_INDEX_BASE; + is_halo = true; + } + if(is_halo) { + _stencil_mem_ptr[i] = value_halo_at(index, coords); + continue; + } + + _stencil_mem_ptr[i] = _current_lmemory_addr + stencil_offs[i]; + } else { + _stencil_mem_ptr[i] += add; + } + } + } - const auto& bnd_elems = haloblock.boundary_views(); - const auto& halo_ext_max = haloblock.halo_extension_max(); - _boundary_views.reserve(NumDimensions * 2); - auto it_views = std::begin(bnd_elems); +private: + + void init_ranges() { for(dim_t d = 0; d < NumDimensions; ++d) { - region_index_t index = RegionCoords_t::index(d, RegionPos::PRE); - auto* region = haloblock.boundary_region(index); - if(region == nullptr || (region != nullptr && region->size() == 0)) - _boundary_views.push_back(ViewSpec_t()); - else { - push_boundary_views(*it_views, halo_ext_max, minmax_dist); - ++it_views; + auto sub_view = _current_view.first; + _ranges[d] = {static_cast(sub_view->offset(d)), + static_cast(sub_view->offset(d)) + sub_view->extent(d)}; + } + } + + void init_coords() { + _coords = _local_layout.coords(_current_view.second, *_current_view.first); + } + + void init_offset() { + if(MemoryArrange == ROW_MAJOR) { + _offset = _coords[0]; + for(dim_t d = 1; d < NumDimensions; ++d) + _offset = _offset * _local_layout.extent(d) + _coords[d]; + } else { + _offset = _coords[NumDimensions - 1]; + for(dim_t d = NumDimensions - 1; d > 0;) { + --d; + _offset = _offset * _local_layout.extent(d) + _coords[d]; } - index = RegionCoords_t::index(d, RegionPos::POST); - region = haloblock.boundary_region(index); - if(region == nullptr || (region != nullptr && region->size() == 0)) - _boundary_views.push_back(ViewSpec_t()); - else { - push_boundary_views(*it_views, halo_ext_max, minmax_dist); - ++it_views; + } + + auto& sub_view = *(_current_view.first); + _offsets_dim[FastestDimension] = 1; + const auto& view = _stencil_op->view_local(); + if(MemoryArrange == ROW_MAJOR) { + if(FastestDimension > 0) { + _offsets_dim[FastestDimension - 1] = (view.extent(FastestDimension) - sub_view.extent(FastestDimension)) + 1; } + for(dim_t d = FastestDimension - 1; d > 0;) { + --d; + _offsets_dim[d] = (view.extent(d+1) - sub_view.extent(d+1)) * view.extent(d+2) + _offsets_dim[d+1]; + } + } else { + if(NumDimensions > 1) { + _offsets_dim[FastestDimension + 1] = (view.extent(FastestDimension) - sub_view.extent(FastestDimension)) + 1; + } + for(dim_t d = 2; d < NumDimensions; ++d) { + _offsets_dim[d] = (view.extent(d-1) - sub_view.extent(d-1)) * view.extent(d-2) + _offsets_dim[d-1]; + } + } + } + + ViewIndexPair_t get_current_view(uindex_t idx) { + _region_number = 0; + const auto& bnd_views = _stencil_op->boundary.view(); + for(const auto& region : bnd_views) { + if(idx < region.size()) { + return std::make_pair(®ion, idx); + } + ++_region_number; + idx -= region.size(); } + + auto& last_region = bnd_views.back(); + return std::make_pair(&last_region, last_region.size()); + } + + void init_stencil_points() { + _current_lmemory_addr = _stencil_op->local_memory() + _offset; + const auto& specs = _stencil_op->stencil_spec(); + const auto& stencil_offs = _stencil_op->stencil_offsets(); + auto minmax = specs.minmax_distances(); + const auto& extents = _local_layout.extents(); + + for(auto i = 0u; i < NumStencilPoints; ++i) { + auto& spoint_halo =_spoint_is_halo[i]; + spoint_halo = {false, true, 0}; + + auto halo_coord = _coords; + bool is_halo = false; + for(dim_t d = 0; d < NumDimensions; ++d) { + auto stencil_off = specs[i][d]; + if(stencil_off == 0) { + spoint_halo.index = 1 + spoint_halo.index * REGION_INDEX_BASE; + continue; + } + + halo_coord[d] += stencil_off; + if(halo_coord[d] < 0) { + spoint_halo.index *= REGION_INDEX_BASE; + spoint_halo.possible = true; + is_halo = true; + if( halo_coord[d] > minmax[d].first) { + spoint_halo.always = false; + } + continue; + } + + if(static_cast(halo_coord[d]) < extents[d]) { + spoint_halo.index = 1 + spoint_halo.index * REGION_INDEX_BASE; + if(_coords[d] < std::abs(minmax[d].first) || + (extents[d] - static_cast(_coords[d])) <= static_cast(minmax[d].second)) { + spoint_halo.always = false; + spoint_halo.possible = true; + } + + continue; + } + + spoint_halo.index = 2 + spoint_halo.index * REGION_INDEX_BASE; + spoint_halo.possible = true; + is_halo = true; + if(minmax[d].second != stencil_off) { + spoint_halo.always = false; + } + } + + if(is_halo) + _stencil_mem_ptr[i] = value_halo_at(spoint_halo.index, halo_coord); + else + _stencil_mem_ptr[i] = _current_lmemory_addr + stencil_offs[i]; + } + } + + Element_t* value_halo_at(region_index_t region_index, + Coords_t& halo_coords) { + auto& halo_memory = _stencil_op->halo_memory(); + halo_memory.to_halo_mem_coords(region_index, halo_coords); + + return &*(halo_memory.first_element_at(region_index) + + halo_memory.offset(region_index, halo_coords)); + } + +private: + StencilOpT* _stencil_op; + uindex_t _size; + region_index_t _region_number{ 0 }; + LocalLayout_t _local_layout; + uindex_t _idx; + Coords_t _coords; + ViewIndexPair_t _current_view; + Element_t* _current_lmemory_addr; + StencilOffsPtrs_t _stencil_mem_ptr; + HaloPoints_t _spoint_is_halo; + + uindex_t _offset; + OffsetsDim_t _offsets_dim; + Ranges_t _ranges; + RangeDim_t _ext_dim_reduced; +}; + +template +std::ostream& operator<<( + std::ostream& os, + const CoordsIdxManagerBoundary& helper) { + os << "dash::halo::CoordsHelper" + << "(view: " << helper.view() + << "; region id: " << helper.region_id() + << "; sub_view: " << helper.sub_view() + << "; index: " << helper.index() + << "; offset: " << helper.offset() + << "; coords: { "; + for(const auto& elem : helper.coords()) { + os << elem << " "; + } + os << "})"; + + return os; +} + + +/* + * Stencil specific iterator to iterate over a given scope of elements. + * The iterator provides element access via stencil points and for boundary + * elements halo element access. + */ +template +class StencilIteratorTest { +private: + using Element_t = typename CoordsIdxManagerT::Element_t; + + using Self_t = StencilIteratorTest; + using ViewSpec_t = typename CoordsIdxManagerT::ViewSpec_t; + + static constexpr auto NumDimensions = CoordsIdxManagerT::ndim(); + +public: + // Iterator traits + using iterator_category = std::random_access_iterator_tag; + using value_type = Element_t; + using difference_type = typename CoordsIdxManagerT::uindex_t; + using pointer = Element_t*; + using reference = Element_t&; + + using index_t = typename CoordsIdxManagerT::index_t; + using uindex_t = typename CoordsIdxManagerT::uindex_t; + using StencilP_t = StencilPoint; + using Coords_t = typename CoordsIdxManagerT::Coords_t; + using stencil_index_t = typename CoordsIdxManagerT::stencil_index_t; + +public: + //TODO anpassen + /** + * Constructor + * + * \param + */ + StencilIteratorTest(CoordsIdxManagerT coords_mng) + : _coords_mng(coords_mng) { + } + + /** + * Copy constructor. + */ + StencilIteratorTest(const Self_t& other) = default; + + /** + * Assignment operator. + * + * \see DashGlobalIteratorConcept + */ + Self_t& operator=(const Self_t& other) = default; + + /** + * The number of dimensions of the iterator's underlying pattern. + * + * \see DashGlobalIteratorConcept + */ + static constexpr dim_t ndim() { return NumDimensions; } + + /** + * Dereference operator. + * + * \return A global reference to the element at the iterator's position. + */ + reference operator*() const { return _coords_mng.value(); } + + /** + * Subscript operator, returns global reference to element at given + * global index. + * + * \see DashGlobalIteratorConcept + */ + Element_t operator[](index_t n) const { + return _coords_mng[n]; } + reference operator[](index_t n) { + return _coords_mng[n]; + } + + uindex_t rpos() const { return _coords_mng.index(); } + + uindex_t lpos() const { return _coords_mng.offset(); } + + Coords_t coords() const { return _coords_mng.coords(); } + + CoordsIdxManagerT& helper() {return _coords_mng;} + /** - * Returns \ref ViewSpec including all elements (locally) + * Returns the value for a given stencil point index (index postion in + * \ref StencilSpec) */ - const ViewSpec_t& view() const { return *_view_local; } + Element_t value_at(const stencil_index_t index_stencil) { + return _coords_mng.value_at(index_stencil); + } + + /* returns the value of a given stencil point (not as efficient as + * stencil point index ) + */ + Element_t value_at(const StencilP_t& stencil) { + return _coords_mng.value_at(stencil); + } /** - * Returns \ref ViewSpec including all inner elements + * Prefix increment operator. */ - const ViewSpec_t& inner() const { return _view_inner; } + Self_t& operator++() { + _coords_mng.next_element(); + + return *this; + } /** - * Returns \ref ViewSpec including all inner and boundary elements + * Postfix increment operator. */ - const ViewSpec_t& inner_with_boundaries() const { - return _view_inner_with_boundaries; + Self_t operator++(int) { + Self_t result = *this; + + _coords_mng.next_element(); + + return result; } /** - * Returns all boundary views including all boundary elements (no dublicates) + * Prefix decrement operator. */ - const BoundaryViews_t& boundary_views() const { return _boundary_views; } + Self_t& operator--() { + _coords_mng.set(_coords_mng.index()-1); + + return *this; + } /** - * Returns the number of all boundary elements (no dublicates) + * Postfix decrement operator. */ - pattern_size_t boundary_size() const { return _size_bnd_elems; } + Self_t operator--(int) { + Self_t result = *this; -private: - template - void push_boundary_views(const ViewSpec_t& view, const MaxExtT& max_ext, - const MaxDistT& max_dist) { - auto view_off = view.offsets(); - auto view_ext = view.extents(); - for(auto d = 0; d < NumDimensions; ++d) { - if(view_off[d] < max_ext[d].first && view_ext[d] == max_ext[d].first) { - view_ext[d] = max_dist[d].first; - } else if(view_ext[d] == max_ext[d].second) { - view_ext[d] = max_dist[d].second; - view_off[d] += max_ext[d].second - max_dist[d].second; - } else { - resize_offset(view_off[d], view_ext[d], max_dist[d].first); - resize_extent(view_off[d], view_ext[d], _view_local->extent(d), - max_dist[d].second); - } - } - ViewSpec_t tmp(view_off, view_ext); - _size_bnd_elems += tmp.size(); - _boundary_views.push_back(std::move(tmp)); + _coords_mng.set(_coords_mng.index()-1); + + return result; } - template - void resize_offset(OffT& offset, ExtT& extent, MaxT max) { - if(offset > max) { - extent += offset - max; - offset = max; - } + Self_t& operator+=(index_t n) { + auto index = _coords_mng.index() + n; + //if(index < _coords_mng.size()) + _coords_mng.set(index); + + return *this; + } + + Self_t& operator-=(index_t n) { + auto index = _coords_mng.index(); + if(index >= n) + _coords_mng.set(index - n); + + return *this; } - template - void resize_extent(OffT& offset, ExtT& extent, ExtT extent_local, MinT max) { - auto diff_ext = extent_local - offset - extent; - if(diff_ext > max) - extent += diff_ext - max; + Self_t operator+(index_t n) const { + auto res( *this ); + res += n; + + return res; + } + + Self_t operator-(index_t n) const { + auto res( *this ); + res -= n; + + return res; + } + + difference_type operator-(const Self_t& other) const { return _coords_mng.index() - other._coords_mng.index(); } + + bool operator<(const Self_t& other) const { + return compare(other, std::less()); + } + + bool operator<=(const Self_t& other) const { + return compare(other, std::less_equal()); + } + + bool operator>(const Self_t& other) const { + return compare(other, std::greater()); + } + + bool operator>=(const Self_t& other) const { + return compare(other, std::greater_equal()); + } + + bool operator==(const Self_t& other) const { + return compare(other, std::equal_to()); + } + + bool operator!=(const Self_t& other) const { + return compare(other, std::not_equal_to()); } private: - const ViewSpec_t* _view_local; - ViewSpec_t _view_inner; - ViewSpec_t _view_inner_with_boundaries; - BoundaryViews_t _boundary_views; - pattern_size_t _size_bnd_elems = 0; -}; + /** + * Compare position of this global iterator to the position of another + * global iterator with respect to viewspec projection. + */ + template + bool compare(const Self_t& other, const GlobIndexCmpFunc& gidx_cmp) const { + + return gidx_cmp(_coords_mng.index(), other._coords_mng.index()); + } + +private: + CoordsIdxManagerT _coords_mng; +}; // class StencilIterator -template -std::ostream& operator<<( - std::ostream& os, - const StencilSpecificViews& stencil_views) { - std::ostringstream ss; - ss << "dash::halo::StencilSpecificViews" - << "(local: " << stencil_views.local() - << "; inner: " << stencil_views.inner() - << "; inner_bound: " << stencil_views.inner_with_boundaries() - << "; boundary_views: " << stencil_views.boundary_views() - << "; boundary elems: " << stencil_views.boundary_size() << ")"; - - return operator<<(os, ss.str()); -} /* * Stencil specific iterator to iterate over a given scope of elements. @@ -222,7 +892,6 @@ class StencilIterator { using HaloMemory_t = HaloMemory; using pattern_index_t = typename PatternT::index_type; - using region_index_t = typename RegionCoords_t::region_index_t; using LocalLayout_t = CartesianIndexSpace; using StencilP_t = StencilPoint; @@ -316,8 +985,7 @@ class StencilIterator { * * \see DashGlobalIteratorConcept */ - //Self_t& operator=(const Self_t& other) = default; - Self_t& operator=(const Self_t& other) = delete; + Self_t& operator=(const Self_t& other) = default; /** * The number of dimensions of the iterator's underlying pattern. @@ -348,7 +1016,7 @@ class StencilIterator { pattern_index_t lpos() const { return _offset; } - const ElementCoords_t& coords() const { return _coords; }; + ElementCoords_t coords() const { return _coords; }; bool is_halo_value(const region_index_t index_stencil) { if(Scope == StencilViewScope::INNER) @@ -464,7 +1132,7 @@ class StencilIterator { return res; } - difference_type operator-(Self_t& other) const { return _idx - other._idx; } + difference_type operator-(const Self_t& other) const { return _idx - other._idx; } bool operator<(const Self_t& other) const { return compare(other, std::less()); @@ -504,9 +1172,8 @@ class StencilIterator { return true; } #endif - if(&_view == &(other._view) || _view == other._view) { - return gidx_cmp(_idx, other._idx); - } + return gidx_cmp(_idx, other._idx); + // TODO not the best solution return false; } @@ -641,17 +1308,17 @@ class StencilIterator { auto& halo_coord = halo_coords[i][d]; halo_coord = _coords[d] + (*_stencil_spec)[i][d]; if(halo_coord < 0) { - indexes[i] *= RegionCoords_t::REGION_INDEX_BASE; + indexes[i] *= REGION_INDEX_BASE; is_halo[i] = true; continue; } if(halo_coord < static_cast(extent)) { - indexes[i] = 1 + indexes[i] * RegionCoords_t::REGION_INDEX_BASE; + indexes[i] = 1 + indexes[i] * REGION_INDEX_BASE; continue; } - indexes[i] = 2 + indexes[i] * RegionCoords_t::REGION_INDEX_BASE; + indexes[i] = 2 + indexes[i] * REGION_INDEX_BASE; is_halo[i] = true; } } @@ -700,11 +1367,11 @@ class StencilIterator { HaloMemory_t* _halomemory; const StencilSpecT* _stencil_spec; const StencilOffsets_t* _stencil_offsets; - const ViewSpec_t _view; - const BoundaryViews_t _boundary_views{}; + ViewSpec_t _view; + BoundaryViews_t _boundary_views{}; ElementT* _local_memory; std::array _stencil_mem_ptr; - const LocalLayout_t _local_layout; + LocalLayout_t _local_layout; pattern_index_t _idx{ 0 }; // extension of the fastest index dimension minus the halo extension std::pair _ext_dim_reduced; @@ -716,9 +1383,9 @@ class StencilIterator { pattern_index_t _size; }; // class StencilIterator + } // namespace halo } // namespace dash #endif // DASH__HALO__ITERATOR__STENCILITERATOR_H - diff --git a/dash/include/dash/iterator/GlobViewIter.h b/dash/include/dash/iterator/GlobViewIter.h index cd21a3382..7afb7eebf 100644 --- a/dash/include/dash/iterator/GlobViewIter.h +++ b/dash/include/dash/iterator/GlobViewIter.h @@ -262,13 +262,13 @@ class GlobViewIter constexpr GlobViewIter( // GlobViewIter && other) GlobViewIter && other) - : _globmem (other._globmem) - , _pattern (other._pattern) - , _viewspec (other._viewspec) - , _idx (other._idx) - , _view_idx_offset(other._view_idx_offset) - , _max_idx (other._max_idx) - , _myid (other._myid) + : _globmem (std::move(other._globmem)) + , _pattern (std::move(other._pattern)) + , _viewspec (std::move(other._viewspec)) + , _idx (std::move(other._idx)) + , _view_idx_offset(std::move(other._view_idx_offset)) + , _max_idx (std::move(other._max_idx)) + , _myid (std::move(other._myid)) { } /** @@ -290,6 +290,8 @@ class GlobViewIter _view_idx_offset = other._view_idx_offset; _max_idx = other._max_idx; _myid = other._myid; + + return *this; } /** @@ -304,13 +306,13 @@ class GlobViewIter self_t & operator=( GlobViewIter && other) { - _globmem = other._globmem; - _pattern = other._pattern; - _viewspec = other._viewspec; - _idx = other._idx; - _view_idx_offset = other._view_idx_offset; - _max_idx = other._max_idx; - _myid = other._myid; + _globmem = std::move(other._globmem); + _pattern = std::move(other._pattern); + _viewspec = std::move(other._viewspec); + _idx = std::move(other._idx); + _view_idx_offset = std::move(other._view_idx_offset); + _max_idx = std::move(other._max_idx); + _myid = std::move(other._myid); // no ownership to transfer return *this; } diff --git a/dash/test/halo/HaloTest.cc b/dash/test/halo/HaloTest.cc index 7d21c5b4f..3b922e635 100644 --- a/dash/test/halo/HaloTest.cc +++ b/dash/test/halo/HaloTest.cc @@ -1,4 +1,3 @@ - #include "HaloTest.h" #include @@ -11,6 +10,8 @@ using namespace dash; using namespace dash::halo; +using namespace dash::halo::internal; + TEST_F(HaloTest, GlobalBoundarySpec) { using GlobBoundSpec_t = GlobalBoundarySpec<3>; @@ -74,6 +75,8 @@ TEST_F(HaloTest, HaloSpecStencils) using RCoords_t = RegionCoords<3>; using StencilP_t = StencilPoint<3>; + static constexpr auto RegionsMax = NumRegionsMax<3>; + { using StencilSpec_t = StencilSpec; StencilSpec_t stencil_spec( @@ -107,7 +110,7 @@ TEST_F(HaloTest, HaloSpecStencils) EXPECT_EQ(halo_spec.spec(16).coords(), RCoords_t({1,2,1})); EXPECT_EQ((uint32_t)halo_spec.extent(16), 2); - for(auto i = 0; i < RCoords_t::MaxIndex; ++i) { + for(auto i = 0; i < RegionsMax; ++i) { if (i != 16) { EXPECT_EQ((uint32_t)halo_spec.extent(i), 0); } @@ -125,7 +128,7 @@ TEST_F(HaloTest, HaloSpecStencils) EXPECT_EQ((uint32_t)halo_spec.extent(3), 2); EXPECT_EQ((uint32_t)halo_spec.extent(4), 2); EXPECT_EQ((uint32_t)halo_spec.extent(12), 1); - for(auto i = 0; i < RCoords_t::MaxIndex; ++i) { + for(auto i = 0; i < RegionsMax; ++i) { if(i != 3 && i != 4 && i != 12) { EXPECT_EQ((uint32_t)halo_spec.extent(i), 0); } @@ -151,7 +154,7 @@ TEST_F(HaloTest, HaloSpecStencils) EXPECT_EQ((uint32_t)halo_spec.extent(9), 2); EXPECT_EQ((uint32_t)halo_spec.extent(10), 2); EXPECT_EQ((uint32_t)halo_spec.extent(12), 1); - for(auto i = 0; i < RCoords_t::MaxIndex; ++i) { + for(auto i = 0; i < RegionsMax; ++i) { if (i != 0 && i != 1 && i != 3 && i != 4 && i != 9 && i != 10 && i != 12) { EXPECT_EQ((uint32_t)halo_spec.extent(i), 0); @@ -178,7 +181,7 @@ TEST_F(HaloTest, HaloSpecStencils) EXPECT_EQ((uint32_t)halo_spec.extent(23), 3); EXPECT_EQ((uint32_t)halo_spec.extent(25), 3); EXPECT_EQ((uint32_t)halo_spec.extent(26), 3); - for(auto i = 0; i < RCoords_t::MaxIndex; ++i) { + for(auto i = 0; i < RegionsMax; ++i) { if (i != 14 && i != 16 && i != 17 && i != 22 && i != 23 && i != 25 && i != 26) { EXPECT_EQ((uint32_t)halo_spec.extent(i), 0); @@ -187,7 +190,7 @@ TEST_F(HaloTest, HaloSpecStencils) } } -TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) +TEST_F(HaloTest, HaloRegionDependencies2D) { using Pattern_t = dash::Pattern<2>; using index_type = typename Pattern_t::index_type; @@ -207,6 +210,112 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) team_spec.balance_extents(); Pattern_t pattern(SizeSpec_t(ext_per_dim,ext_per_dim), dist_spec, team_spec, dash::Team::All()); + Matrix_t matrix_halo(pattern); + + dash::Team::All().barrier(); + + StencilSpec_t stencil_spec( + StencilP_t(-1,-1), StencilP_t(-1, 0), StencilP_t(-1, 1), + StencilP_t( 0,-1), StencilP_t( 0, 1), + StencilP_t( 1,-1), StencilP_t( 1, 0), StencilP_t( 1, 1)); + + HaloMatrixWrapper halo_wrapper(matrix_halo, stencil_spec); + auto& halo_block = halo_wrapper.halo_block(); + + // center + auto reg_dep_indices = halo_block.boundary_dependencies(4); + EXPECT_EQ(0, reg_dep_indices.size()); + // main regions + reg_dep_indices = halo_block.boundary_dependencies(1); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(1, reg_dep_indices[0]); + reg_dep_indices = halo_block.boundary_dependencies(3); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(3, reg_dep_indices[0]); + reg_dep_indices = halo_block.boundary_dependencies(5); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(5, reg_dep_indices[0]); + reg_dep_indices = halo_block.boundary_dependencies(7); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(7, reg_dep_indices[0]); + + reg_dep_indices = halo_block.boundary_dependencies(0); + EXPECT_EQ(3, reg_dep_indices.size()); + EXPECT_EQ(0, reg_dep_indices[0]); + EXPECT_EQ(3, reg_dep_indices[1]); + EXPECT_EQ(1, reg_dep_indices[2]); + reg_dep_indices = halo_block.boundary_dependencies(2); + EXPECT_EQ(3, reg_dep_indices.size()); + EXPECT_EQ(2, reg_dep_indices[0]); + EXPECT_EQ(5, reg_dep_indices[1]); + EXPECT_EQ(1, reg_dep_indices[2]); + reg_dep_indices = halo_block.boundary_dependencies(6); + EXPECT_EQ(3, reg_dep_indices.size()); + EXPECT_EQ(6, reg_dep_indices[0]); + EXPECT_EQ(3, reg_dep_indices[1]); + EXPECT_EQ(7, reg_dep_indices[2]); + reg_dep_indices = halo_block.boundary_dependencies(8); + EXPECT_EQ(3, reg_dep_indices.size()); + EXPECT_EQ(8, reg_dep_indices[0]); + EXPECT_EQ(5, reg_dep_indices[1]); + EXPECT_EQ(7, reg_dep_indices[2]); + + StencilSpec stencil_spec_2( StencilP_t(-1, 0), StencilP_t( 1, 0)); + + HaloMatrixWrapper halo_wrapper_2(matrix_halo, stencil_spec_2); + auto& halo_block_2 = halo_wrapper_2.halo_block(); + + // center + reg_dep_indices = halo_block_2.boundary_dependencies(4); + EXPECT_EQ(0, reg_dep_indices.size()); + // main regions + reg_dep_indices = halo_block_2.boundary_dependencies(1); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(1, reg_dep_indices[0]); + reg_dep_indices = halo_block_2.boundary_dependencies(3); + EXPECT_EQ(0, reg_dep_indices.size()); + reg_dep_indices = halo_block_2.boundary_dependencies(5); + EXPECT_EQ(0, reg_dep_indices.size()); + reg_dep_indices = halo_block_2.boundary_dependencies(7); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(7, reg_dep_indices[0]); + + reg_dep_indices = halo_block_2.boundary_dependencies(0); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(1, reg_dep_indices[0]); + reg_dep_indices = halo_block_2.boundary_dependencies(2); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(1, reg_dep_indices[0]); + reg_dep_indices = halo_block_2.boundary_dependencies(6); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(7, reg_dep_indices[0]); + reg_dep_indices = halo_block_2.boundary_dependencies(8); + EXPECT_EQ(1, reg_dep_indices.size()); + EXPECT_EQ(7, reg_dep_indices[0]); + + dash::Team::All().barrier(); +} + +TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) +{ + using Pattern_t = dash::Pattern<2>; + using index_type = typename Pattern_t::index_type; + using Matrix_t = dash::Matrix; + using DistSpec_t = dash::DistributionSpec<2>; + using TeamSpec_t = dash::TeamSpec<2>; + using SizeSpec_t = dash::SizeSpec<2>; + + using GlobBoundSpec_t = GlobalBoundarySpec<2>; + using StencilP_t = StencilPoint<2>; + using StencilSpec_t = StencilSpec; + + auto myid = dash::myid(); + + DistSpec_t dist_spec(dash::BLOCKED, dash::BLOCKED); + TeamSpec_t team_spec{}; + team_spec.balance_extents(); + Pattern_t pattern(SizeSpec_t(ext_per_dim,ext_per_dim), dist_spec, team_spec, dash::Team::All()); + Matrix_t matrix_halo(pattern); dash::fill(matrix_halo.begin(), matrix_halo.end(), 1); @@ -273,14 +382,15 @@ TEST_F(HaloTest, HaloMatrixWrapperNonCyclic2D) } halo_wrapper.update(); + auto it_bend = stencil_op.boundary.end(); for(auto it = stencil_op.boundary.begin(); it != it_bend; ++it) { - for(auto i = 0; i < stencil_spec.num_stencil_points(); ++i) + for(auto i = 0; i < stencil_spec.num_stencil_points(); ++i){ *sum_local += it.value_at(i); + } *sum_local += *it; } - sum_halo.barrier(); unsigned long sum_halo_total = 0; @@ -337,24 +447,17 @@ unsigned long calc_sum_halo(HaloWrapperT& halo_wrapper, StencilOpT stencil_op, b *sum_local += *it; } - halo_wrapper.wait(); - if(region_wise) { - for( auto d = 0; d < 3; ++d) { - auto it_bnd = stencil_op.boundary.iterator_at(d, RegionPos::PRE); + for( auto r = 0; r < NumRegionsMax<3>; ++r) { + auto it_bnd = stencil_op.boundary.iterator_at(r); + if(it_bnd.first == it_bnd.second) + continue; for(auto it = it_bnd.first; it != it_bnd.second; ++it) { for(auto i = 0; i < num_stencil_points; ++i) *sum_local += it.value_at(i); *sum_local += *it; } - auto it_bnd_2 = stencil_op.boundary.iterator_at(d, RegionPos::POST); - for(auto it = it_bnd_2.first; it != it_bnd_2.second; ++it) { - for(auto i = 0; i < num_stencil_points; ++i) - *sum_local += it.value_at(i); - - *sum_local += *it; - } } } else { auto it_bend = stencil_op.boundary.end(); @@ -939,7 +1042,6 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMix3D) } } - for(auto i = 0; i < ext_per_dim; ++i) { for(auto j = 0; j < ext_per_dim_check; ++j) delete[] matrix_check[i][j]; @@ -974,6 +1076,7 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMix3D) }); auto stencil_op = halo_wrapper.stencil_operator(stencil_spec); + auto sum_halo = calc_sum_halo(halo_wrapper, stencil_op); auto sum_halo_region = calc_sum_halo(halo_wrapper, stencil_op, true); auto sum_halo_via_stencil = calc_sum_halo_via_stencil(halo_wrapper, stencil_op); @@ -1268,7 +1371,6 @@ TEST_F(HaloTest, HaloMatrixWrapperBigMultiStencil) ); GlobBoundSpec_t bound_spec(BoundaryProp::NONE, BoundaryProp::CYCLIC, BoundaryProp::CUSTOM); HaloMatrixWrapper halo_wrapper(matrix_halo, bound_spec, stencil_spec_1, stencil_spec_2, stencil_spec_3); - halo_wrapper.set_custom_halos([](const std::array& coords) { return 20; });