diff --git a/src/cartesian_grid.hpp b/src/cartesian_grid.hpp index 65f3d47..60ce745 100644 --- a/src/cartesian_grid.hpp +++ b/src/cartesian_grid.hpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace fsgrid { struct CartesianGrid { @@ -35,7 +36,8 @@ struct CartesianGrid { using GlobalID = FsGridTools::GlobalID; using Task_t = FsGridTools::Task_t; -public: + //!< Width of the ghost cells frame + int32_t stencilSize = 0; //!< Global size of the simulation space, in cells std::array globalSize = {}; //!< Information about whether a given direction is periodic @@ -65,11 +67,12 @@ struct CartesianGrid { //!< Local size of simulation space handled by this task (including ghost cells) std::array storageSize = {}; +public: CartesianGrid(std::array globalSize, MPI_Comm parentComm, std::array periodic, const std::array& decomposition, int32_t stencilSize) - : globalSize(globalSize), periodic(periodic), parentRank(FsGridTools::getCommRank(parentComm)), - parentSize(FsGridTools::getCommSize(parentComm)), size(FsGridTools::getNumFsGridProcs(parentSize)), - colourFs(FsGridTools::computeColourFs(parentRank, size)), + : stencilSize(stencilSize), globalSize(globalSize), periodic(periodic), + parentRank(FsGridTools::getCommRank(parentComm)), parentSize(FsGridTools::getCommSize(parentComm)), + size(FsGridTools::getNumFsGridProcs(parentSize)), colourFs(FsGridTools::computeColourFs(parentRank, size)), colourAux(FsGridTools::computeColourAux(parentRank, parentSize, size)), numTasksPerDim(FsGridTools::computeNumTasksPerDim(globalSize, decomposition, size, stencilSize)), cartesian3d(FsGridTools::createCartesianCommunicator(parentComm, colourFs, colourAux, parentRank, @@ -87,5 +90,156 @@ struct CartesianGrid { FSGRID_MPI_CHECK(MPI_Comm_free(&cartesian3d), "Failed to free MPI comm"); } } + + /*! Returns the task responsible, and its localID for handling the cell with the given GlobalID + * \param id GlobalID of the cell for which task is to be determined + * \return a task for the grid's cartesian communicator + */ + std::pair getTaskForGlobalID(GlobalID id) { + // Transform globalID to global cell coordinate + std::array cell = FsGridTools::globalIDtoCellCoord(id, globalSize); + + // Find the index in the task grid this Cell belongs to + std::array taskIndex; + for (int32_t i = 0; i < 3; i++) { + int32_t n_per_task = globalSize[i] / numTasksPerDim[i]; + int32_t remainder = globalSize[i] % numTasksPerDim[i]; + + if (cell[i] < remainder * (n_per_task + 1)) { + taskIndex[i] = cell[i] / (n_per_task + 1); + } else { + taskIndex[i] = remainder + (cell[i] - remainder * (n_per_task + 1)) / n_per_task; + } + } + + // Get the task number from the communicator + std::pair retVal; + FSGRID_MPI_CHECK(MPI_Cart_rank(cartesian3d, taskIndex.data(), &retVal.first), + "Unable to find FsGrid rank for global ID ", id, "(coordinates [", cell[0], ", ", cell[1], ", ", + cell[2], "])"); + + // Determine localID of that cell within the target task + std::array thatTasksStart; + std::array thatTaskStorageSize; + for (int32_t i = 0; i < 3; i++) { + thatTasksStart[i] = FsGridTools::calcLocalStart(globalSize[i], numTasksPerDim[i], taskIndex[i]); + thatTaskStorageSize[i] = + FsGridTools::calcLocalSize(globalSize[i], numTasksPerDim[i], taskIndex[i]) + 2 * stencilSize; + } + + retVal.second = 0; + int32_t stride = 1; + for (int32_t i = 0; i < 3; i++) { + if (globalSize[i] <= 1) { + // Collapsed dimension, doesn't contribute. + retVal.second += 0; + } else { + retVal.second += stride * (cell[i] - thatTasksStart[i] + stencilSize); + stride *= thatTaskStorageSize[i]; + } + } + + return retVal; + } + + /*! Transform global cell coordinates into the local domain. + * If the coordinates are out of bounds, (-1,-1,-1) is returned. + * \param x The cell's global x coordinate + * \param y The cell's global y coordinate + * \param z The cell's global z coordinate + */ + std::array globalToLocal(FsSize_t x, FsSize_t y, FsSize_t z) { + std::array retval{(FsIndex_t)x - localStart[0], (FsIndex_t)y - localStart[1], + (FsIndex_t)z - localStart[2]}; + + if (retval[0] >= localSize[0] || retval[1] >= localSize[1] || retval[2] >= localSize[2] || retval[0] < 0 || + retval[1] < 0 || retval[2] < 0) { + return {-1, -1, -1}; + } + + return retval; + } + + /*! Determine the cell's GlobalID from its local x,y,z coordinates + * \param x The cell's task-local x coordinate + * \param y The cell's task-local y coordinate + * \param z The cell's task-local z coordinate + */ + GlobalID getGlobalIDForCoords(int32_t x, int32_t y, int32_t z) { + return x + localStart[0] + globalSize[0] * (y + localStart[1]) + + globalSize[0] * globalSize[1] * (z + localStart[2]); + } + + /*! Determine the cell's LocalID from its local x,y,z coordinates + * \param x The cell's task-local x coordinate + * \param y The cell's task-local y coordinate + * \param z The cell's task-local z coordinate + */ + LocalID getLocalIDForCoords(int32_t x, int32_t y, int32_t z) { + return static_cast(globalSize[2] > 1) * storageSize[0] * storageSize[1] * (stencilSize + z) + + static_cast(globalSize[1] > 1) * storageSize[0] * (stencilSize + y) + + static_cast(globalSize[0] > 1) * (stencilSize + x); + } + + std::array& getTaskPosition() { return taskPosition; } + const std::array& getTaskPosition() const { return taskPosition; } + + /*! Get the size of the local domain handled by this grid. + */ + std::array& getLocalSize() { return localSize; } + const std::array& getLocalSize() const { return localSize; } + + /*! Get the start coordinates of the local domain handled by this grid. + */ + std::array& getLocalStart() { return localStart; } + const std::array& getLocalStart() const { return localStart; } + + /*! Get global size of the fsgrid domain + */ + std::array& getGlobalSize() { return globalSize; } + const std::array& getGlobalSize() const { return globalSize; } + + /*! Calculate global cell position (XYZ in global cell space) from local cell coordinates. + * + * \param x x-Coordinate, in cells + * \param y y-Coordinate, in cells + * \param z z-Coordinate, in cells + * + * \return Global cell coordinates + */ + std::array getGlobalIndices(int64_t x, int64_t y, int64_t z) { + return {static_cast(localStart[0] + x), static_cast(localStart[1] + y), + static_cast(localStart[2] + z)}; + } + + /*! Get the rank of this CPU in the FsGrid communicator */ + int32_t getRank() const { return rank; } + + /*! Get the number of ranks in the FsGrid communicator */ + int32_t getSize() const { return numTasksPerDim[0] * numTasksPerDim[1] * numTasksPerDim[2]; } + + int32_t getStencilSize() const { return stencilSize; } + + /*! Get in which directions, if any, this grid is periodic */ + std::array& getPeriodic() { return periodic; } + const std::array& getPeriodic() const { return periodic; } + + /*! Get the decomposition array*/ + std::array& getDecomposition() { return numTasksPerDim; } + const std::array& getDecomposition() const { return numTasksPerDim; } + + std::array& getStorageSize() { return storageSize; } + const std::array& getStorageSize() const { return storageSize; } + + size_t getNumTotalStored() const { + return std::accumulate(storageSize.cbegin(), storageSize.cend(), 1, std::multiplies<>()); + } + + MPI_Comm getComm() const { return cartesian3d; } + + constexpr int32_t computeNeighbourIndex(int32_t x, int32_t y, int32_t z) const { + return 13 - (x < 0) * 9 + (x >= localSize[0]) * 9 - (y < 0) * 3 + (y >= localSize[1]) * 3 - (z < 0) + + (z >= localSize[2]); + } }; } diff --git a/src/grid.hpp b/src/grid.hpp index ebb1ba4..c52ed76 100644 --- a/src/grid.hpp +++ b/src/grid.hpp @@ -21,21 +21,15 @@ along with fsgrid. If not, see . */ #include "cartesian_grid.hpp" +#include "neighbourhood.hpp" #include "tools.hpp" -#include #include #include #include #include -#include -#include -#include #include #include -#include -#include -#include #include /*! Simple cartesian, non-loadbalancing MPI Grid for use with the fieldsolver @@ -59,153 +53,13 @@ template class FsGrid { FsGrid(std::array globalSize, MPI_Comm parentComm, std::array periodic, const std::array& decomposition = {0, 0, 0}) : cartesianGrid(fsgrid::CartesianGrid(globalSize, parentComm, periodic, decomposition, stencil)), - neighbourIndexToRank(mapNeigbourIndexToRank(cartesianGrid.taskPosition, cartesianGrid.numTasksPerDim, periodic, - cartesianGrid.cartesian3d, cartesianGrid.rank)), - neighbourRankToIndex(mapNeighbourRankToIndex(neighbourIndexToRank, cartesianGrid.size)), - data(cartesianGrid.rank == -1 ? 0 - : std::accumulate(cartesianGrid.storageSize.cbegin(), - cartesianGrid.storageSize.cend(), 1, std::multiplies<>())), - neighbourSendType(generateMPITypes(cartesianGrid.storageSize, cartesianGrid.localSize, stencil, true)), - neighbourReceiveType(generateMPITypes(cartesianGrid.storageSize, cartesianGrid.localSize, stencil, false)) {} + neighbourhood(fsgrid::Neighbourhood(cartesianGrid)), + data(cartesianGrid.getRank() == -1 ? 0 : cartesianGrid.getNumTotalStored()) {} - /*! Finalize instead of destructor, as the MPI calls fail after the main program called MPI_Finalize(). - * Cleans up the cartesian communicator - */ + //!< Finalize instead of destructor, as the MPI calls fail after the main program called MPI_Finalize(). void finalize() { - // If not a non-FS process - if (cartesianGrid.rank != -1) { - for (int32_t i = 0; i < 27; i++) { - if (neighbourReceiveType[i] != MPI_DATATYPE_NULL) - FSGRID_MPI_CHECK(MPI_Type_free(&(neighbourReceiveType[i])), "Failed to free MPI type"); - if (neighbourSendType[i] != MPI_DATATYPE_NULL) - FSGRID_MPI_CHECK(MPI_Type_free(&(neighbourSendType[i])), "Failed to free MPI type"); - } - } - } - - static std::array mapNeigbourIndexToRank(const std::array& taskPosition, - const std::array& numTasksPerDim, - const std::array& periodic, MPI_Comm comm, - int32_t rank) { - auto calculateNeighbourRank = [&](int32_t neighbourIndex) { - auto calculateNeighbourPosition = [&](int32_t neighbourIndex, uint32_t i) { - const auto pos3D = - i == 0 ? linearToX(neighbourIndex) : (i == 1 ? linearToY(neighbourIndex) : linearToZ(neighbourIndex)); - const auto nonPeriodicPos = taskPosition[i] + pos3D; - return periodic[i] ? (nonPeriodicPos + numTasksPerDim[i]) % numTasksPerDim[i] : nonPeriodicPos; - }; - - const std::array neighbourPosition = { - calculateNeighbourPosition(neighbourIndex, 0), - calculateNeighbourPosition(neighbourIndex, 1), - calculateNeighbourPosition(neighbourIndex, 2), - }; - - const bool taskPositionWithinLimits = numTasksPerDim[0] > neighbourPosition[0] && neighbourPosition[0] >= 0 && - numTasksPerDim[1] > neighbourPosition[1] && neighbourPosition[1] >= 0 && - numTasksPerDim[2] > neighbourPosition[2] && neighbourPosition[2] >= 0; - - if (taskPositionWithinLimits) { - int32_t neighbourRank; - FSGRID_MPI_CHECK(MPI_Cart_rank(comm, neighbourPosition.data(), &neighbourRank), "Rank ", rank, - " can't determine neighbour rank at position [", neighbourPosition[0], ", ", - neighbourPosition[1], ", ", neighbourPosition[2], "]"); - return neighbourRank; - } else { - return MPI_PROC_NULL; - } - }; - - std::array ranks; - if (rank == -1) { - ranks.fill(MPI_PROC_NULL); - } else { - std::generate(ranks.begin(), ranks.end(), - [&calculateNeighbourRank, n = 0]() mutable { return calculateNeighbourRank(n++); }); - } - return ranks; - } - - static std::vector mapNeighbourRankToIndex(const std::array& indexToRankMap, FsSize_t numRanks) { - std::vector indices(numRanks, MPI_PROC_NULL); - std::for_each(indexToRankMap.cbegin(), indexToRankMap.cend(), [&indices, &numRanks, n = 0](auto rank) mutable { - if (numRanks > rank && rank >= 0) { - indices[rank] = n; - } - n++; - }); - return indices; - } - - // Assumes x, y and z to belong to set [-1, 0, 1] - // returns a value in (inclusive) range [0, 26] - constexpr static int32_t xyzToLinear(int32_t x, int32_t y, int32_t z) { return (x + 1) * 9 + (y + 1) * 3 + (z + 1); } - - // These assume i to be in (inclusive) range [0, 26] - // returns a value from the set [-1, 0, 1] - constexpr static int32_t linearToX(int32_t i) { return i / 9 - 1; } - constexpr static int32_t linearToY(int32_t i) { return (i % 9) / 3 - 1; } - constexpr static int32_t linearToZ(int32_t i) { return i % 3 - 1; } - - static std::array generateMPITypes(const std::array& storageSize, - const std::array& localSize, int32_t stencilSize, - bool generateForSend) { - MPI_Datatype baseType; - FSGRID_MPI_CHECK(MPI_Type_contiguous(sizeof(T), MPI_BYTE, &baseType), "Failed to create a contiguous data type"); - const std::array reverseStorageSize = { - storageSize[2], - storageSize[1], - storageSize[0], - }; - std::array types; - types.fill(MPI_DATATYPE_NULL); - - for (int32_t i = 0; i < 27; i++) { - const auto x = linearToX(i); - const auto y = linearToY(i); - const auto z = linearToZ(i); - - const bool self = x == 0 && y == 0 && z == 0; - const bool flatX = storageSize[0] == 1 && x != 0; - const bool flatY = storageSize[1] == 1 && y != 0; - const bool flatZ = storageSize[2] == 1 && z != 0; - const bool skip = flatX || flatY || flatZ || self; - - if (skip) { - continue; - } - - const std::array reverseSubarraySize = { - (z == 0) ? localSize[2] : stencil, - (y == 0) ? localSize[1] : stencil, - (x == 0) ? localSize[0] : stencil, - }; - - const std::array reverseSubarrayStart = [&]() { - if (generateForSend) { - return std::array{ - storageSize[2] == 1 ? 0 : (z == 1 ? storageSize[2] - 2 * stencil : stencil), - storageSize[1] == 1 ? 0 : (y == 1 ? storageSize[1] - 2 * stencil : stencil), - storageSize[0] == 1 ? 0 : (x == 1 ? storageSize[0] - 2 * stencil : stencil), - }; - } else { - return std::array{ - storageSize[2] == 1 ? 0 : (z == -1 ? storageSize[2] - stencil : (z == 0 ? stencil : 0)), - storageSize[1] == 1 ? 0 : (y == -1 ? storageSize[1] - stencil : (y == 0 ? stencil : 0)), - storageSize[0] == 1 ? 0 : (x == -1 ? storageSize[0] - stencil : (x == 0 ? stencil : 0)), - }; - } - }(); - - FSGRID_MPI_CHECK(MPI_Type_create_subarray(3, reverseStorageSize.data(), reverseSubarraySize.data(), - reverseSubarrayStart.data(), MPI_ORDER_C, baseType, &(types[i])), - "Failed to create a subarray type"); - FSGRID_MPI_CHECK(MPI_Type_commit(&(types[i])), "Failed to commit MPI type"); - } - - FSGRID_MPI_CHECK(MPI_Type_free(&baseType), "Couldn't free the basetype used to create the sendTypes"); - - return types; + cartesianGrid.finalize(); + neighbourhood.finalize(); } // ============================ @@ -218,189 +72,20 @@ template class FsGrid { data = other.getData(); } - /*! Returns the task responsible, and its localID for handling the cell with the given GlobalID - * \param id GlobalID of the cell for which task is to be determined - * \return a task for the grid's cartesian communicator - */ - std::pair getTaskForGlobalID(GlobalID id) { - // Transform globalID to global cell coordinate - std::array cell = FsGridTools::globalIDtoCellCoord(id, cartesianGrid.globalSize); - - // Find the index in the task grid this Cell belongs to - std::array taskIndex; - for (int32_t i = 0; i < 3; i++) { - int32_t n_per_task = cartesianGrid.globalSize[i] / cartesianGrid.numTasksPerDim[i]; - int32_t remainder = cartesianGrid.globalSize[i] % cartesianGrid.numTasksPerDim[i]; - - if (cell[i] < remainder * (n_per_task + 1)) { - taskIndex[i] = cell[i] / (n_per_task + 1); - } else { - taskIndex[i] = remainder + (cell[i] - remainder * (n_per_task + 1)) / n_per_task; - } - } - - // Get the task number from the communicator - std::pair retVal; - FSGRID_MPI_CHECK(MPI_Cart_rank(cartesianGrid.cartesian3d, taskIndex.data(), &retVal.first), - "Unable to find FsGrid rank for global ID ", id, "(coordinates [", cell[0], ", ", cell[1], ", ", - cell[2], "])"); - - // Determine localID of that cell within the target task - std::array thatTasksStart; - std::array thatTaskStorageSize; - for (int32_t i = 0; i < 3; i++) { - thatTasksStart[i] = - FsGridTools::calcLocalStart(cartesianGrid.globalSize[i], cartesianGrid.numTasksPerDim[i], taskIndex[i]); - thatTaskStorageSize[i] = - FsGridTools::calcLocalSize(cartesianGrid.globalSize[i], cartesianGrid.numTasksPerDim[i], taskIndex[i]) + - 2 * stencil; - } - - retVal.second = 0; - int32_t stride = 1; - for (int32_t i = 0; i < 3; i++) { - if (cartesianGrid.globalSize[i] <= 1) { - // Collapsed dimension, doesn't contribute. - retVal.second += 0; - } else { - retVal.second += stride * (cell[i] - thatTasksStart[i] + stencil); - stride *= thatTaskStorageSize[i]; - } - } - - return retVal; - } - - /*! Transform global cell coordinates into the local domain. - * If the coordinates are out of bounds, (-1,-1,-1) is returned. - * \param x The cell's global x coordinate - * \param y The cell's global y coordinate - * \param z The cell's global z coordinate - */ - std::array globalToLocal(FsSize_t x, FsSize_t y, FsSize_t z) { - std::array retval; - retval[0] = (FsIndex_t)x - cartesianGrid.localStart[0]; - retval[1] = (FsIndex_t)y - cartesianGrid.localStart[1]; - retval[2] = (FsIndex_t)z - cartesianGrid.localStart[2]; - - if (retval[0] >= cartesianGrid.localSize[0] || retval[1] >= cartesianGrid.localSize[1] || - retval[2] >= cartesianGrid.localSize[2] || retval[0] < 0 || retval[1] < 0 || retval[2] < 0) { - return {-1, -1, -1}; - } - - return retval; - } - - /*! Determine the cell's GlobalID from its local x,y,z coordinates - * \param x The cell's task-local x coordinate - * \param y The cell's task-local y coordinate - * \param z The cell's task-local z coordinate - */ - GlobalID GlobalIDForCoords(int32_t x, int32_t y, int32_t z) { - return x + cartesianGrid.localStart[0] + cartesianGrid.globalSize[0] * (y + cartesianGrid.localStart[1]) + - cartesianGrid.globalSize[0] * cartesianGrid.globalSize[1] * (z + cartesianGrid.localStart[2]); - } - - /*! Determine the cell's LocalID from its local x,y,z coordinates - * \param x The cell's task-local x coordinate - * \param y The cell's task-local y coordinate - * \param z The cell's task-local z coordinate - */ - LocalID LocalIDForCoords(int32_t x, int32_t y, int32_t z) { - LocalID index = 0; - if (cartesianGrid.globalSize[2] > 1) { - index += cartesianGrid.storageSize[0] * cartesianGrid.storageSize[1] * (stencil + z); - } - if (cartesianGrid.globalSize[1] > 1) { - index += cartesianGrid.storageSize[0] * (stencil + y); - } - if (cartesianGrid.globalSize[0] > 1) { - index += stencil + x; - } - - return index; - } - + // ------------------------------------------------------------ + // Neighbourhood stuff + // ------------------------------------------------------------ /*! Perform ghost cell communication. */ void updateGhostCells() { + const auto rank = cartesianGrid.getRank(); + const auto comm = cartesianGrid.getComm(); - if (cartesianGrid.rank == -1) + if (rank == -1) { return; - - // TODO, faster with simultaneous isends& ireceives? - std::array receiveRequests; - std::array sendRequests; - - for (int32_t i = 0; i < 27; i++) { - receiveRequests[i] = MPI_REQUEST_NULL; - sendRequests[i] = MPI_REQUEST_NULL; - } - - for (int32_t x = -1; x <= 1; x++) { - for (int32_t y = -1; y <= 1; y++) { - for (int32_t z = -1; z <= 1; z++) { - int32_t shiftId = (x + 1) * 9 + (y + 1) * 3 + (z + 1); - int32_t receiveId = (1 - x) * 9 + (1 - y) * 3 + (1 - z); - if (neighbourIndexToRank[receiveId] != MPI_PROC_NULL && - neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { - FSGRID_MPI_CHECK(MPI_Irecv(data.data(), 1, neighbourReceiveType[shiftId], - neighbourIndexToRank[receiveId], shiftId, cartesianGrid.cartesian3d, - &(receiveRequests[shiftId])), - "Rank ", cartesianGrid.rank, " failed to receive data from neighbor ", receiveId, - " with rank ", neighbourIndexToRank[receiveId]); - } - } - } } - for (int32_t x = -1; x <= 1; x++) { - for (int32_t y = -1; y <= 1; y++) { - for (int32_t z = -1; z <= 1; z++) { - int32_t shiftId = (x + 1) * 9 + (y + 1) * 3 + (z + 1); - int32_t sendId = shiftId; - if (neighbourIndexToRank[sendId] != MPI_PROC_NULL && neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { - FSGRID_MPI_CHECK(MPI_Isend(data.data(), 1, neighbourSendType[shiftId], neighbourIndexToRank[sendId], - shiftId, cartesianGrid.cartesian3d, &(sendRequests[shiftId])), - "Rank ", cartesianGrid.rank, " failed to send data to neighbor ", sendId, - " with rank ", neighbourIndexToRank[sendId]); - } - } - } - } - FSGRID_MPI_CHECK(MPI_Waitall(27, receiveRequests.data(), MPI_STATUSES_IGNORE), - "Synchronization at ghost cell update failed"); - FSGRID_MPI_CHECK(MPI_Waitall(27, sendRequests.data(), MPI_STATUSES_IGNORE), - "Synchronization at ghost cell update failed"); - } - - /*! Get the size of the local domain handled by this grid. - */ - std::array& getLocalSize() { return cartesianGrid.localSize; } - - /*! Get the start coordinates of the local domain handled by this grid. - */ - std::array& getLocalStart() { return cartesianGrid.localStart; } - - /*! Get global size of the fsgrid domain - */ - std::array& getGlobalSize() { return cartesianGrid.globalSize; } - - /*! Calculate global cell position (XYZ in global cell space) from local cell coordinates. - * - * \param x x-Coordinate, in cells - * \param y y-Coordinate, in cells - * \param z z-Coordinate, in cells - * - * \return Global cell coordinates - */ - std::array getGlobalIndices(int64_t x, int64_t y, int64_t z) { - std::array retval; - retval[0] = cartesianGrid.localStart[0] + x; - retval[1] = cartesianGrid.localStart[1] + y; - retval[2] = cartesianGrid.localStart[2] + z; - - return retval; + neighbourhood.updateGhostCells(rank, comm, data.data()); } /*! Get a reference to the field data in a cell @@ -412,99 +97,46 @@ template class FsGrid { T* get(int32_t x, int32_t y, int32_t z) { // Keep track which neighbour this cell actually belongs to (13 = ourself) - int32_t isInNeighbourDomain = 13; + // TODO: compute in cartesian grid int32_t coord_shift[3] = {0, 0, 0}; if (x < 0) { - isInNeighbourDomain -= 9; coord_shift[0] = 1; } - if (x >= cartesianGrid.localSize[0]) { - isInNeighbourDomain += 9; + if (x >= cartesianGrid.getLocalSize()[0]) { coord_shift[0] = -1; } if (y < 0) { - isInNeighbourDomain -= 3; coord_shift[1] = 1; } - if (y >= cartesianGrid.localSize[1]) { - isInNeighbourDomain += 3; + if (y >= cartesianGrid.getLocalSize()[1]) { coord_shift[1] = -1; } if (z < 0) { - isInNeighbourDomain -= 1; coord_shift[2] = 1; } - if (z >= cartesianGrid.localSize[2]) { - isInNeighbourDomain += 1; + if (z >= cartesianGrid.getLocalSize()[2]) { coord_shift[2] = -1; } - // Santiy-Check that the requested cell is actually inside our domain - // TODO: ugh, this is ugly. -#ifdef FSGRID_DEBUG - bool inside = true; - if (localSize[0] <= 1 && !periodic[0]) { - if (x != 0) { - std::cerr << "x != 0 despite non-periodic x-axis with only one cell." << std::endl; - inside = false; - } - } else { - if (x < -stencil || x >= localSize[0] + stencil) { - std::cerr << "x = " << x << " is outside of [ " << -stencil << ", " << localSize[0] + stencil << "[!" - << std::endl; - inside = false; - } - } - - if (localSize[1] <= 1 && !periodic[1]) { - if (y != 0) { - std::cerr << "y != 0 despite non-periodic y-axis with only one cell." << std::endl; - inside = false; - } - } else { - if (y < -stencil || y >= localSize[1] + stencil) { - std::cerr << "y = " << y << " is outside of [ " << -stencil << ", " << localSize[1] + stencil << "[!" - << std::endl; - inside = false; - } - } - - if (localSize[2] <= 1 && !periodic[2]) { - if (z != 0) { - std::cerr << "z != 0 despite non-periodic z-axis with only one cell." << std::endl; - inside = false; - } - } else { - if (z < -stencil || z >= localSize[2] + stencil) { - inside = false; - std::cerr << "z = " << z << " is outside of [ " << -stencil << ", " << localSize[2] + stencil << "[!" - << std::endl; - } - } - if (!inside) { - std::cerr << "Out-of bounds access in FsGrid::get! Expect weirdness." << std::endl; - return NULL; - } -#endif // FSGRID_DEBUG - - if (isInNeighbourDomain != 13) { - + const int32_t neighbourIndex = cartesianGrid.computeNeighbourIndex(x, y, z); + const auto neigbourRank = neighbourhood.getNeighbourRank(neighbourIndex); + if (neighbourIndex != 13) { // Check if the corresponding neighbour exists - if (neighbourIndexToRank[isInNeighbourDomain] == MPI_PROC_NULL) { + if (neigbourRank == MPI_PROC_NULL) { + // TODO: change to assert? // Neighbour doesn't exist, we must be an outer boundary cell // (or something is quite wrong) return NULL; - - } else if (neighbourIndexToRank[isInNeighbourDomain] == cartesianGrid.rank) { + } else if (neigbourRank == cartesianGrid.getRank()) { // For periodic boundaries, where the neighbour is actually ourself, // return our own actual cell instead of the ghost - x += coord_shift[0] * cartesianGrid.localSize[0]; - y += coord_shift[1] * cartesianGrid.localSize[1]; - z += coord_shift[2] * cartesianGrid.localSize[2]; + x += coord_shift[0] * cartesianGrid.getLocalSize()[0]; + y += coord_shift[1] * cartesianGrid.getLocalSize()[1]; + z += coord_shift[2] * cartesianGrid.getLocalSize()[2]; } // Otherwise we return the ghost cell } - LocalID index = LocalIDForCoords(x, y, z); + LocalID index = cartesianGrid.getLocalIDForCoords(x, y, z); return &data[index]; } @@ -527,14 +159,32 @@ template class FsGrid { * \param z local z-Coordinate, in cells */ std::array getPhysicalCoords(int32_t x, int32_t y, int32_t z) { - std::array coords; - coords[0] = physicalGlobalStart[0] + (cartesianGrid.localStart[0] + x) * DX; - coords[1] = physicalGlobalStart[1] + (cartesianGrid.localStart[1] + y) * DY; - coords[2] = physicalGlobalStart[2] + (cartesianGrid.localStart[2] + z) * DZ; + return {physicalGlobalStart[0] + (cartesianGrid.getLocalStart()[0] + x) * DX, + physicalGlobalStart[1] + (cartesianGrid.getLocalStart()[1] + y) * DY, + physicalGlobalStart[2] + (cartesianGrid.getLocalStart()[2] + z) * DZ}; + } - return coords; + /*! Perform an MPI_Allreduce with this grid's internal communicator + * Function syntax is identical to MPI_Allreduce, except the final (communicator + * argument will not be needed) */ + int32_t Allreduce(void* sendbuf, void* recvbuf, int32_t count, MPI_Datatype datatype, MPI_Op op) { + // If a normal FS-rank + if (cartesianGrid.getRank() != -1) { + return MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, cartesianGrid.getComm()); + } + // If a non-FS rank, no need to communicate + else { + int32_t datatypeSize; + MPI_Type_size(datatype, &datatypeSize); + for (int32_t i = 0; i < count * datatypeSize; ++i) + ((char*)recvbuf)[i] = ((char*)sendbuf)[i]; + return MPI_ERR_RANK; // This is ok for a non-FS rank + } } + // ------------------------------------------------------------ + // Debug stuff + // ------------------------------------------------------------ /*! Debugging output helper function. Allows for nicely formatted printing * of grid contents. Since the grid data format is varying, the actual * printing should be done in a lambda passed to this function. Example usage @@ -551,17 +201,17 @@ template class FsGrid { int32_t xmin = 0, xmax = 1; int32_t ymin = 0, ymax = 1; int32_t zmin = 0, zmax = 1; - if (cartesianGrid.localSize[0] > 1) { + if (cartesianGrid.getLocalSize()[0] > 1) { xmin = -stencil; - xmax = cartesianGrid.localSize[0] + stencil; + xmax = cartesianGrid.getLocalSize()[0] + stencil; } - if (cartesianGrid.localSize[1] > 1) { + if (cartesianGrid.getLocalSize()[1] > 1) { ymin = -stencil; - ymax = cartesianGrid.localSize[1] + stencil; + ymax = cartesianGrid.getLocalSize()[1] + stencil; } - if (cartesianGrid.localSize[2] > 1) { + if (cartesianGrid.getLocalSize()[2] > 1) { zmin = -stencil; - zmax = cartesianGrid.localSize[2] + stencil; + zmax = cartesianGrid.getLocalSize()[2] + stencil; } for (int32_t z = zmin; z < zmax; z++) { for (int32_t y = ymin; y < ymax; y++) { @@ -574,368 +224,7 @@ template class FsGrid { } } - /*! Get the rank of this CPU in the FsGrid communicator */ - int32_t getRank() { return cartesianGrid.rank; } - - /*! Get the number of ranks in the FsGrid communicator */ - int32_t getSize() { - return cartesianGrid.numTasksPerDim[0] * cartesianGrid.numTasksPerDim[1] * cartesianGrid.numTasksPerDim[2]; - } - - /*! Get in which directions, if any, this grid is periodic */ - std::array& getPeriodic() { return cartesianGrid.periodic; } - - /*! Perform an MPI_Allreduce with this grid's internal communicator - * Function syntax is identical to MPI_Allreduce, except the final (communicator - * argument will not be needed) */ - int32_t Allreduce(void* sendbuf, void* recvbuf, int32_t count, MPI_Datatype datatype, MPI_Op op) { - - // If a normal FS-rank - if (cartesianGrid.rank != -1) { - return MPI_Allreduce(sendbuf, recvbuf, count, datatype, op, cartesianGrid.cartesian3d); - } - // If a non-FS rank, no need to communicate - else { - int32_t datatypeSize; - MPI_Type_size(datatype, &datatypeSize); - for (int32_t i = 0; i < count * datatypeSize; ++i) - ((char*)recvbuf)[i] = ((char*)sendbuf)[i]; - return MPI_ERR_RANK; // This is ok for a non-FS rank - } - } - - /*! Get the decomposition array*/ - std::array& getDecomposition() { return cartesianGrid.numTasksPerDim; } - - // Debug helper types, can be removed once fsgrid is split to different structs - std::string display() const { - std::stringstream ss; - std::ios defaultState(nullptr); - defaultState.copyfmt(ss); - - auto pushContainerValues = [&ss, &defaultState](auto container, bool asByteStr = false, uint32_t nPerLine = 0, - uint32_t numTabs = 2) { - nPerLine = nPerLine == 0 ? container.size() : nPerLine; - const uint32_t numBytes = sizeof(decltype(container[0])); - if (asByteStr) { - ss << std::hex << std::setfill('0'); - } - - uint32_t i = 1; - for (const auto& v : container) { - if (asByteStr) { - ss << "0x" << std::setw(2 * numBytes); - if constexpr (std::is_integral_v::type>) { - ss << static_cast( - static_cast::type>>(v)); - } else { - ss << v; - } - } else { - ss << v; - } - - if (i < container.size()) { - ss << ", "; - } - if (i % nPerLine == 0 && i < container.size()) { - ss << "\n"; - for (uint32_t j = 0; j < numTabs; j++) { - ss << "\t"; - } - } - - i++; - } - - ss.copyfmt(defaultState); - }; - - auto pushMPIComm = [&ss, &defaultState](auto prefix, auto comm, auto newliner) { - ss << prefix; - if (comm == MPI_COMM_NULL) { - ss << "MPI_COMM_NULL"; - } else { - int rank = 0; - FSGRID_MPI_CHECK(MPI_Comm_rank(comm, &rank), "Failed to get rank from comm ", comm); - int size = 0; - FSGRID_MPI_CHECK(MPI_Comm_size(comm, &size), "Failed to get size from comm ", comm); - ss << newliner; - ss << "comm rank: "; - if (rank != MPI_UNDEFINED) { - ss << rank; - } else { - ss << "MPI_UNDEFINED"; - } - ss << newliner; - ss << "comm size: " << size; - - MPI_Group group = MPI_GROUP_NULL; - FSGRID_MPI_CHECK(MPI_Comm_group(comm, &group), "Failed to get group from comm ", comm); - if (group != MPI_GROUP_NULL) { - int rank = 0; - FSGRID_MPI_CHECK(MPI_Group_rank(group, &rank), "Failed to get rank from group ", group); - int size = 0; - FSGRID_MPI_CHECK(MPI_Group_size(group, &size), "Failed to get size from group ", group); - - ss << newliner; - ss << "group rank: "; - if (rank != MPI_UNDEFINED) { - ss << rank; - } else { - ss << "MPI_UNDEFINED"; - } - ss << newliner; - ss << "group size: " << size; - FSGRID_MPI_CHECK(MPI_Group_free(&group), "Failed to free group"); - } - - int isInterComm = 0; - FSGRID_MPI_CHECK(MPI_Comm_test_inter(comm, &isInterComm), "Failed to get intecomm flag from comm ", comm); - ss << newliner; - ss << "is intercomm: " << isInterComm; - if (isInterComm) { - MPI_Group remotegroup = MPI_GROUP_NULL; - FSGRID_MPI_CHECK(MPI_Comm_remote_group(comm, &remotegroup), "Failed to get remotegroup from comm ", - comm); - if (remotegroup != MPI_GROUP_NULL) { - int rank = 0; - FSGRID_MPI_CHECK(MPI_Group_rank(remotegroup, &rank), "Failed to get rank from remotegroup ", - remotegroup); - int size = 0; - FSGRID_MPI_CHECK(MPI_Group_size(remotegroup, &size), "Failed to get size from remotegroup ", - remotegroup); - - ss << newliner; - ss << "remotegroup rank: "; - if (rank != MPI_UNDEFINED) { - ss << rank; - } else { - ss << "MPI_UNDEFINED"; - } - ss << newliner; - ss << "remotegroup size: " << size; - FSGRID_MPI_CHECK(MPI_Group_free(&remotegroup), "Failed to free remotegroup"); - } - - int remotesize = 0; - FSGRID_MPI_CHECK(MPI_Comm_remote_size(comm, &remotesize), "Failed to get remotesize from comm ", comm); - ss << newliner; - ss << "remotesize: " << remotesize; - } - } - }; - - ss << "{"; - - pushMPIComm("\n\tcomm3d: ", cartesianGrid.cartesian3d, "\n\t\t"); - ss << "\n\trank: " << cartesianGrid.rank; - ss << "\n\tneigbour: [\n\t\t"; - pushContainerValues(neighbourIndexToRank, true, 9); - ss << "\n\t]"; - ss << "\n\tneigbour_index: [\n\t\t"; - pushContainerValues(neighbourRankToIndex, true, 9); - ss << "\n\t]"; - ss << "\n\tntasksPerDim: [\n\t\t"; - pushContainerValues(cartesianGrid.numTasksPerDim); - ss << "\n\t]"; - ss << "\n\ttaskPosition: [\n\t\t"; - pushContainerValues(cartesianGrid.taskPosition); - ss << "\n\t]"; - ss << "\n\tperiodic: [\n\t\t"; - pushContainerValues(cartesianGrid.periodic); - ss << "\n\t]"; - ss << "\n\tglobalSize: [\n\t\t"; - pushContainerValues(cartesianGrid.globalSize); - ss << "\n\t]"; - ss << "\n\tlocalSize: [\n\t\t"; - pushContainerValues(cartesianGrid.localSize); - ss << "\n\t]"; - ss << "\n\tlocalStart: [\n\t\t"; - pushContainerValues(cartesianGrid.localStart); - ss << "\n\t]"; - ss << "\n\tneigbourSendType: ["; - for (const auto& v : getMPITypes(neighbourSendType)) { - ss << "\n\t\t" << v.display("\n\t\t"); - } - ss << "\n\t]"; - ss << "\n\tneighbourReceiveType: ["; - for (const auto& v : getMPITypes(neighbourReceiveType)) { - ss << "\n\t\t" << v.display("\n\t\t"); - } - ss << "\n\t]"; - ss << "\n\tinfo on data:"; - ss << "\n\t\tcapacity: " << data.capacity(); - ss << "\n\t\tsize: " << data.size(); - ss << "\n\t\tdata.front: [\n\t\t\t"; - pushContainerValues(data.front()); - ss << "\n\t\t]"; - ss << "\n\t\tdata.back: [\n\t\t\t"; - pushContainerValues(data.back()); - ss << "\n\t\t]"; - - ss << "\n}"; - - return ss.str(); - } - - struct MPITypeMetaData { - int combiner = -1; - std::vector integers; - std::vector addresses; - std::vector metaDatas; - - std::string display(std::string newliner) const { - std::stringstream ss; - std::ios defaultState(nullptr); - defaultState.copyfmt(ss); - - auto pushContainerValues = [&ss, &defaultState, &newliner](auto container, bool asByteStr = false, - uint32_t nPerLine = 0, uint32_t numTabs = 2) { - nPerLine = nPerLine == 0 ? container.size() : nPerLine; - const uint32_t numBytes = sizeof(decltype(container[0])); - if (asByteStr) { - ss << std::hex << std::setfill('0') << std::uppercase; - } - - uint32_t i = 1; - for (const auto& v : container) { - if (asByteStr) { - ss << "0x" << std::setw(2 * numBytes); - if constexpr (std::is_integral_v::type>) { - ss << static_cast( - static_cast::type>>(v)); - } else { - ss << v; - } - } else { - ss << v; - } - - if (i < container.size()) { - ss << ", "; - } - if (i % nPerLine == 0 && i < container.size()) { - ss << newliner; - for (uint32_t j = 0; j < numTabs; j++) { - ss << "\t"; - } - } - - i++; - } - - ss.copyfmt(defaultState); - }; - - auto pushCombiner = [&ss](auto combiner) { - switch (combiner) { - case MPI_COMBINER_NAMED: - ss << "MPI_COMBINER_NAMED"; - return; - case MPI_COMBINER_DUP: - ss << "MPI_COMBINER_DUP"; - return; - case MPI_COMBINER_CONTIGUOUS: - ss << "MPI_COMBINER_CONTIGUOUS"; - return; - case MPI_COMBINER_VECTOR: - ss << "MPI_COMBINER_VECTOR"; - return; - case MPI_COMBINER_HVECTOR: - ss << "MPI_COMBINER_HVECTOR"; - return; - case MPI_COMBINER_INDEXED: - ss << "MPI_COMBINER_INDEXED"; - return; - case MPI_COMBINER_HINDEXED: - ss << "MPI_COMBINER_HINDEXED"; - return; - case MPI_COMBINER_INDEXED_BLOCK: - ss << "MPI_COMBINER_INDEXED_BLOCK"; - return; - case MPI_COMBINER_STRUCT: - ss << "MPI_COMBINER_STRUCT"; - return; - case MPI_COMBINER_SUBARRAY: - ss << "MPI_COMBINER_SUBARRAY"; - return; - case MPI_COMBINER_DARRAY: - ss << "MPI_COMBINER_DARRAY"; - return; - case MPI_COMBINER_F90_REAL: - ss << "MPI_COMBINER_F90_REAL"; - return; - case MPI_COMBINER_F90_COMPLEX: - ss << "MPI_COMBINER_F90_COMPLEX"; - return; - case MPI_COMBINER_F90_INTEGER: - ss << "MPI_COMBINER_F90_INTEGER"; - return; - case MPI_COMBINER_RESIZED: - ss << "MPI_COMBINER_RESIZED"; - return; - default: - ss << "NO_SUCH_COMBINER"; - return; - } - }; - - ss << "{"; - ss << newliner << "\tcombiner: "; - pushCombiner(combiner); - ss << newliner << "\tintegers: [" << newliner << "\t\t"; - pushContainerValues(integers, false, 9); - ss << newliner << "\t]"; - ss << newliner << "\taddresses: [" << newliner << "\t\t"; - pushContainerValues(addresses, true, 9); - ss << newliner << "\t]"; - ss << newliner << "\tdata types: [" << newliner << "\t\t"; - for (const auto& mt : metaDatas) { - ss << mt.display(newliner + "\t\t"); - } - ss << newliner << "\t]"; - ss << newliner << "}"; - - return ss.str(); - } - }; - - template std::vector getMPITypes(const U& typeVec) const { - std::vector metadatas; - metadatas.reserve(typeVec.size()); - for (const auto& mpiType : typeVec) { - if (mpiType == MPI_DATATYPE_NULL) { - continue; - } - - int numIntegers = 0; - int numAddresses = 0; - int numDataTypes = 0; - int combiner = 0; - FSGRID_MPI_CHECK(MPI_Type_get_envelope(mpiType, &numIntegers, &numAddresses, &numDataTypes, &combiner), - "Failed to get envelope for type ", mpiType); - - if (combiner == MPI_COMBINER_NAMED) { - continue; - } - - metadatas.push_back(MPITypeMetaData{combiner, std::vector(numIntegers), - std::vector(numAddresses), std::vector()}); - std::vector dataTypes(numDataTypes); - FSGRID_MPI_CHECK(MPI_Type_get_contents(mpiType, numIntegers, numAddresses, numDataTypes, - metadatas.back().integers.data(), metadatas.back().addresses.data(), - dataTypes.data()), - "Failed to get type contents for type ", mpiType); - - if (numDataTypes != 0) { - metadatas.back().metaDatas = getMPITypes(dataTypes); - } - } - - return metadatas; - } - + // TODO: move these somewhere? /*! Physical grid spacing and physical coordinate space start. */ double DX = 0.0; @@ -944,21 +233,9 @@ template class FsGrid { std::array physicalGlobalStart = {}; private: - //!< Cartesian grid fsgrid::CartesianGrid cartesianGrid = {}; - //!< Lookup table from index to rank in the neighbour array (includes self) - std::array neighbourIndexToRank = { - MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, - MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, - MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, - MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, - }; - //!< Lookup table from rank to index in the neighbour array - std::vector neighbourRankToIndex = {}; + fsgrid::Neighbourhood neighbourhood = {}; + //! Actual storage of field data std::vector data = {}; - //!< Datatype for sending data - std::array neighbourSendType = {}; - //!< Datatype for receiving data - std::array neighbourReceiveType = {}; }; diff --git a/src/neighbourhood.hpp b/src/neighbourhood.hpp new file mode 100644 index 0000000..028383c --- /dev/null +++ b/src/neighbourhood.hpp @@ -0,0 +1,128 @@ +#pragma once + +/* + Copyright (C) 2016 Finnish Meteorological Institute + Copyright (C) 2016-2024 CSC -IT Center for Science + + This file is part of fsgrid + + fsgrid is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + fsgrid is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; + without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fsgrid. If not, see . +*/ +#include "cartesian_grid.hpp" +#include "tools.hpp" +#include +#include +#include +#include + +namespace fsgrid { +template struct Neighbourhood { +private: + //!< Lookup table from index to rank in the neighbour array (includes self) + std::array neighbourIndexToRank = { + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, MPI_PROC_NULL, + }; + //!< Lookup table from rank to index in the neighbour array + std::vector neighbourRankToIndex = {}; + //!< Datatype for sending data + std::array neighbourSendType = { + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + }; + //!< Datatype for receiving data + std::array neighbourReceiveType = { + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, MPI_DATATYPE_NULL, + }; + +public: + Neighbourhood(const CartesianGrid& cartesianGrid) + : neighbourIndexToRank(FsGridTools::mapNeigbourIndexToRank( + cartesianGrid.getTaskPosition(), cartesianGrid.getDecomposition(), cartesianGrid.getPeriodic(), + cartesianGrid.getComm(), cartesianGrid.getRank())), + neighbourRankToIndex(FsGridTools::mapNeighbourRankToIndex(neighbourIndexToRank, cartesianGrid.getSize())), + neighbourSendType(FsGridTools::generateMPITypes( + cartesianGrid.getStorageSize(), cartesianGrid.getLocalSize(), cartesianGrid.getStencilSize(), true)), + neighbourReceiveType(FsGridTools::generateMPITypes( + cartesianGrid.getStorageSize(), cartesianGrid.getLocalSize(), cartesianGrid.getStencilSize(), false)) {} + + void finalize() { + for (size_t i = 0; i < neighbourReceiveType.size(); i++) { + if (neighbourReceiveType[i] != MPI_DATATYPE_NULL) { + FSGRID_MPI_CHECK(MPI_Type_free(&neighbourReceiveType[i]), "Failed to free MPI type"); + } + } + + for (size_t i = 0; i < neighbourSendType.size(); i++) { + if (neighbourSendType[i] != MPI_DATATYPE_NULL) { + FSGRID_MPI_CHECK(MPI_Type_free(&neighbourSendType[i]), "Failed to free MPI type"); + } + } + } + + int32_t getNeighbourRank(int32_t neighbourIndex) { return neighbourIndexToRank[neighbourIndex]; } + + void updateGhostCells(int32_t rank, MPI_Comm comm, T* data) { + std::array receiveRequests; + std::array sendRequests; + receiveRequests.fill(MPI_REQUEST_NULL); + sendRequests.fill(MPI_REQUEST_NULL); + + for (int32_t x = -1; x <= 1; x++) { + for (int32_t y = -1; y <= 1; y++) { + for (int32_t z = -1; z <= 1; z++) { + const int32_t shiftId = (x + 1) * 9 + (y + 1) * 3 + (z + 1); + const int32_t receiveId = (1 - x) * 9 + (1 - y) * 3 + (1 - z); + if (neighbourIndexToRank[receiveId] != MPI_PROC_NULL && + neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { + FSGRID_MPI_CHECK(MPI_Irecv(data, 1, neighbourReceiveType[shiftId], neighbourIndexToRank[receiveId], + shiftId, comm, &(receiveRequests[shiftId])), + "Rank ", rank, " failed to receive data from neighbor ", receiveId, " with rank ", + neighbourIndexToRank[receiveId]); + } + } + } + } + + for (int32_t x = -1; x <= 1; x++) { + for (int32_t y = -1; y <= 1; y++) { + for (int32_t z = -1; z <= 1; z++) { + const int32_t shiftId = (x + 1) * 9 + (y + 1) * 3 + (z + 1); + const int32_t sendId = shiftId; + if (neighbourIndexToRank[sendId] != MPI_PROC_NULL && neighbourSendType[shiftId] != MPI_DATATYPE_NULL) { + FSGRID_MPI_CHECK(MPI_Isend(data, 1, neighbourSendType[shiftId], neighbourIndexToRank[sendId], shiftId, + comm, &(sendRequests[shiftId])), + "Rank ", rank, " failed to send data to neighbor ", sendId, " with rank ", + neighbourIndexToRank[sendId]); + } + } + } + } + FSGRID_MPI_CHECK(MPI_Waitall(27, receiveRequests.data(), MPI_STATUSES_IGNORE), + "Synchronization at ghost cell update failed"); + FSGRID_MPI_CHECK(MPI_Waitall(27, sendRequests.data(), MPI_STATUSES_IGNORE), + "Synchronization at ghost cell update failed"); + } +}; +} diff --git a/src/tools.hpp b/src/tools.hpp index d64d50c..1cf8eaa 100644 --- a/src/tools.hpp +++ b/src/tools.hpp @@ -27,6 +27,7 @@ #include #include #include +#include #define FSGRID_MPI_CHECK(status, ...) FsGridTools::writeToCerrAndThrowIfFailed(status != MPI_SUCCESS, __VA_ARGS__) @@ -297,4 +298,131 @@ static std::array calculateStorageSize(const std::array mapNeigbourIndexToRank(const std::array& taskPosition, + const std::array& numTasksPerDim, + const std::array& periodic, MPI_Comm comm, + int32_t rank) { + auto calculateNeighbourRank = [&](int32_t neighbourIndex) { + auto calculateNeighbourPosition = [&](int32_t neighbourIndex, uint32_t i) { + const auto pos3D = + i == 0 ? linearToX(neighbourIndex) : (i == 1 ? linearToY(neighbourIndex) : linearToZ(neighbourIndex)); + const auto nonPeriodicPos = taskPosition[i] + pos3D; + return periodic[i] ? (nonPeriodicPos + numTasksPerDim[i]) % numTasksPerDim[i] : nonPeriodicPos; + }; + + const std::array neighbourPosition = { + calculateNeighbourPosition(neighbourIndex, 0), + calculateNeighbourPosition(neighbourIndex, 1), + calculateNeighbourPosition(neighbourIndex, 2), + }; + + const bool taskPositionWithinLimits = numTasksPerDim[0] > neighbourPosition[0] && neighbourPosition[0] >= 0 && + numTasksPerDim[1] > neighbourPosition[1] && neighbourPosition[1] >= 0 && + numTasksPerDim[2] > neighbourPosition[2] && neighbourPosition[2] >= 0; + + if (taskPositionWithinLimits) { + int32_t neighbourRank; + FSGRID_MPI_CHECK(MPI_Cart_rank(comm, neighbourPosition.data(), &neighbourRank), "Rank ", rank, + " can't determine neighbour rank at position [", neighbourPosition[0], ", ", + neighbourPosition[1], ", ", neighbourPosition[2], "]"); + return neighbourRank; + } else { + return MPI_PROC_NULL; + } + }; + + std::array ranks; + if (rank == -1) { + ranks.fill(MPI_PROC_NULL); + } else { + std::generate(ranks.begin(), ranks.end(), + [&calculateNeighbourRank, n = 0]() mutable { return calculateNeighbourRank(n++); }); + } + return ranks; +} + +static std::vector mapNeighbourRankToIndex(const std::array& indexToRankMap, FsSize_t numRanks) { + std::vector indices(numRanks, MPI_PROC_NULL); + std::for_each(indexToRankMap.cbegin(), indexToRankMap.cend(), [&indices, &numRanks, n = 0](auto rank) mutable { + if (numRanks > rank && rank >= 0) { + indices[rank] = n; + } + n++; + }); + return indices; +} + +template +static std::array generateMPITypes(const std::array& storageSize, + const std::array& localSize, int32_t stencilSize, + bool generateForSend) { + MPI_Datatype baseType; + FSGRID_MPI_CHECK(MPI_Type_contiguous(sizeof(T), MPI_BYTE, &baseType), "Failed to create a contiguous data type"); + const std::array reverseStorageSize = { + storageSize[2], + storageSize[1], + storageSize[0], + }; + std::array types; + types.fill(MPI_DATATYPE_NULL); + + for (int32_t i = 0; i < 27; i++) { + const auto x = linearToX(i); + const auto y = linearToY(i); + const auto z = linearToZ(i); + + const bool self = x == 0 && y == 0 && z == 0; + const bool flatX = storageSize[0] == 1 && x != 0; + const bool flatY = storageSize[1] == 1 && y != 0; + const bool flatZ = storageSize[2] == 1 && z != 0; + const bool skip = flatX || flatY || flatZ || self; + + if (skip) { + continue; + } + + const std::array reverseSubarraySize = { + (z == 0) ? localSize[2] : stencilSize, + (y == 0) ? localSize[1] : stencilSize, + (x == 0) ? localSize[0] : stencilSize, + }; + + const std::array reverseSubarrayStart = [&]() { + if (generateForSend) { + return std::array{ + storageSize[2] == 1 ? 0 : (z == 1 ? storageSize[2] - 2 * stencilSize : stencilSize), + storageSize[1] == 1 ? 0 : (y == 1 ? storageSize[1] - 2 * stencilSize : stencilSize), + storageSize[0] == 1 ? 0 : (x == 1 ? storageSize[0] - 2 * stencilSize : stencilSize), + }; + } else { + return std::array{ + storageSize[2] == 1 ? 0 : (z == -1 ? storageSize[2] - stencilSize : (z == 0 ? stencilSize : 0)), + storageSize[1] == 1 ? 0 : (y == -1 ? storageSize[1] - stencilSize : (y == 0 ? stencilSize : 0)), + storageSize[0] == 1 ? 0 : (x == -1 ? storageSize[0] - stencilSize : (x == 0 ? stencilSize : 0)), + }; + } + }(); + + FSGRID_MPI_CHECK(MPI_Type_create_subarray(3, reverseStorageSize.data(), reverseSubarraySize.data(), + reverseSubarrayStart.data(), MPI_ORDER_C, baseType, &(types[i])), + "Failed to create a subarray type"); + FSGRID_MPI_CHECK(MPI_Type_commit(&(types[i])), "Failed to commit MPI type"); + } + + FSGRID_MPI_CHECK(MPI_Type_free(&baseType), "Couldn't free the basetype used to create the sendTypes"); + + return types; +} + } // namespace FsGridTools