Skip to content

Commit

Permalink
Faster comms (#160)
Browse files Browse the repository at this point in the history
* First pass

* compiling and able to communicate. need to verify

* passing all tests.

* Whoops, not passing.

* debugging ng > 1

* Now passing all orientation tests

* Update whole view comm to match.
  • Loading branch information
kaschau authored Dec 27, 2022
1 parent 166a3e6 commit 78a97bb
Show file tree
Hide file tree
Showing 10 changed files with 473 additions and 357 deletions.
36 changes: 30 additions & 6 deletions src/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,12 +167,36 @@ PYBIND11_MODULE(compute, m) {

.def_readwrite("qBcVals", &face_::qBcVals)
.def_readwrite("QBcVals", &face_::QBcVals)
.def_readwrite("sendBuffer3", &face_::sendBuffer3)
.def_readwrite("sendBuffer4", &face_::sendBuffer4)
.def_readwrite("recvBuffer3", &face_::recvBuffer3)
.def_readwrite("recvBuffer4", &face_::recvBuffer4)
.def_readwrite("tempRecvBuffer3", &face_::tempRecvBuffer3)
.def_readwrite("tempRecvBuffer4", &face_::tempRecvBuffer4)
.def_readwrite("sendBuffer_x", &face_::sendBuffer_x)
.def_readwrite("sendBuffer_y", &face_::sendBuffer_y)
.def_readwrite("sendBuffer_z", &face_::sendBuffer_z)
.def_readwrite("sendBuffer_q", &face_::sendBuffer_q)
.def_readwrite("sendBuffer_Q", &face_::sendBuffer_Q)
.def_readwrite("sendBuffer_dqdx", &face_::sendBuffer_dqdx)
.def_readwrite("sendBuffer_dqdy", &face_::sendBuffer_dqdy)
.def_readwrite("sendBuffer_dqdz", &face_::sendBuffer_dqdz)
.def_readwrite("sendBuffer_phi", &face_::sendBuffer_phi)

.def_readwrite("recvBuffer_x", &face_::recvBuffer_x)
.def_readwrite("recvBuffer_y", &face_::recvBuffer_y)
.def_readwrite("recvBuffer_z", &face_::recvBuffer_z)
.def_readwrite("recvBuffer_q", &face_::recvBuffer_q)
.def_readwrite("recvBuffer_Q", &face_::recvBuffer_Q)
.def_readwrite("recvBuffer_dqdx", &face_::recvBuffer_dqdx)
.def_readwrite("recvBuffer_dqdy", &face_::recvBuffer_dqdy)
.def_readwrite("recvBuffer_dqdz", &face_::recvBuffer_dqdz)
.def_readwrite("recvBuffer_phi", &face_::recvBuffer_phi)

.def_readwrite("tempRecvBuffer_x", &face_::tempRecvBuffer_x)
.def_readwrite("tempRecvBuffer_y", &face_::tempRecvBuffer_y)
.def_readwrite("tempRecvBuffer_z", &face_::tempRecvBuffer_z)
.def_readwrite("tempRecvBuffer_q", &face_::tempRecvBuffer_q)
.def_readwrite("tempRecvBuffer_Q", &face_::tempRecvBuffer_Q)
.def_readwrite("tempRecvBuffer_dqdx", &face_::tempRecvBuffer_dqdx)
.def_readwrite("tempRecvBuffer_dqdy", &face_::tempRecvBuffer_dqdy)
.def_readwrite("tempRecvBuffer_dqdz", &face_::tempRecvBuffer_dqdz)
.def_readwrite("tempRecvBuffer_phi", &face_::tempRecvBuffer_phi)

.def_readwrite("periodicRotMatrixUp", &face_::periodicRotMatrixUp)
.def_readwrite("periodicRotMatrixDown", &face_::periodicRotMatrixDown)

Expand Down
17 changes: 15 additions & 2 deletions src/compute/face_.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,21 @@ struct face_ {
threeDview qBcVals, QBcVals;

// MPI send and recv buffers
threeDview sendBuffer3, recvBuffer3, tempRecvBuffer3;
fourDview sendBuffer4, recvBuffer4, tempRecvBuffer4;
// send
threeDview sendBuffer_x, sendBuffer_y, sendBuffer_z;
fourDview sendBuffer_q, sendBuffer_Q;
fourDview sendBuffer_dqdx, sendBuffer_dqdy, sendBuffer_dqdz;
fourDview sendBuffer_phi;
// recv
threeDview recvBuffer_x, recvBuffer_y, recvBuffer_z;
fourDview recvBuffer_q, recvBuffer_Q;
fourDview recvBuffer_dqdx, recvBuffer_dqdy, recvBuffer_dqdz;
fourDview recvBuffer_phi;
// temps
threeDview tempRecvBuffer_x, tempRecvBuffer_y, tempRecvBuffer_z;
fourDview tempRecvBuffer_q, tempRecvBuffer_Q;
fourDview tempRecvBuffer_dqdx, tempRecvBuffer_dqdy, tempRecvBuffer_dqdz;
fourDview tempRecvBuffer_phi;

// For cubic spline inlets
fiveDviewHost cubicSplineAlphas;
Expand Down
28 changes: 20 additions & 8 deletions src/compute/utils/bindingsUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,30 @@ void bindUtils(py::module_ &m) {
utils.def("checkNan", &checkNan, "Check for any nans/infs in the Q array",
py::arg("std::vector<block_ object>"));
// |------> sendRecvBuffer
utils.def("extract_sendBuffer3", &extract_sendBuffer3,
utils.def("extractSendBuffer",
py::overload_cast<threeDview &, threeDview &, face_ &,
const std::vector<int> &>(&extractSendBuffer),
"Extract the send buffer of a view", py::arg("kokkos view"),
py::arg("face object"), py::arg("lists of slices"));
utils.def("extract_sendBuffer4", &extract_sendBuffer4,
py::arg("buffer"), py::arg("face object"),
py::arg("lists of slices"));
utils.def("extractSendBuffer",
py::overload_cast<fourDview &, fourDview &, face_ &,
const std::vector<int> &>(&extractSendBuffer),
"Extract the send buffer of a view", py::arg("kokkos view"),
py::arg("face object"), py::arg("lists of slices"));
utils.def("place_recvBuffer3", &place_recvBuffer3,
py::arg("buffer"), py::arg("face object"),
py::arg("lists of slices"));
utils.def("placeRecvBuffer",
py::overload_cast<threeDview &, threeDview &, face_ &,
const std::vector<int> &>(&placeRecvBuffer),
"Place the recv buffer of a view", py::arg("kokkos view"),
py::arg("face object"), py::arg("lists of slices"));
utils.def("place_recvBuffer4", &place_recvBuffer4,
py::arg("buffer"), py::arg("face object"),
py::arg("lists of slices"));
utils.def("placeRecvBuffer",
py::overload_cast<fourDview &, fourDview &, face_ &,
const std::vector<int> &>(&placeRecvBuffer),
"Place the recv buffer of a view", py::arg("kokkos view"),
py::arg("face object"), py::arg("lists of slices"));
py::arg("buffer"), py::arg("face object"),
py::arg("lists of slices"));
// |------> viscousSponge
utils.def("viscousSponge", &viscousSponge, "Compute viscous multiplier",
py::arg("block_"), py::arg("origin"), py::arg("ending"),
Expand Down
80 changes: 16 additions & 64 deletions src/compute/utils/sendRecvBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,118 +3,70 @@
#include "face_.hpp"
#include "kokkosTypes.hpp"

void extract_sendBuffer3(threeDview &view, face_ &face,
const std::vector<int> &slices) {
void extractSendBuffer(threeDview &view, threeDview &buffer, face_ &face,
const std::vector<int> &slices) {

int &nface = face._nface;
int &ng = face._ng;

threeDview &buffer = face.tempRecvBuffer3;
int nLayer = slices.size();

// MDRange2 range_face = MDRange2({0, 0}, {buffer.extent(0),
// buffer.extent(1)});

for (int g = 0; g < ng; g++) {
for (int g = 0; g < nLayer; g++) {
int s = slices[g];

twoDsubview viewSlice = getHaloSlice(view, nface, s);
twoDsubview bufferSlice =
Kokkos::subview(buffer, g, Kokkos::ALL, Kokkos::ALL);

Kokkos::deep_copy(bufferSlice, viewSlice);

// Kokkos::parallel_for(
// "Copy buffer data", range_face,
// KOKKOS_LAMBDA(const int i, const int j) {
// // set pressure
// bufferSlice(i, j) = viewSlice(i,j)
// });
}
}

void extract_sendBuffer4(fourDview &view, face_ &face,
const std::vector<int> &slices) {
void extractSendBuffer(fourDview &view, fourDview &buffer, face_ &face,
const std::vector<int> &slices) {

int &nface = face._nface;
int &ng = face._ng;

fourDview &buffer = face.tempRecvBuffer4;
int nLayer = slices.size();

// MDRange3 range_face = MDRange3({0, 0, 0}, {buffer.extent(0),
// buffer.extent(1), buffer.extent(1)});

for (int g = 0; g < ng; g++) {
for (int g = 0; g < nLayer; g++) {
int s = slices[g];

threeDsubview viewSlice = getHaloSlice(view, nface, s);
threeDsubview bufferSlice =
Kokkos::subview(buffer, g, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL);

Kokkos::deep_copy(bufferSlice, viewSlice);

// Kokkos::parallel_for(
// "Copy buffer data", range_face,
// KOKKOS_LAMBDA(const int i, const int j, const int l) {
// // C
// bufferSlice(i, j, l) = viewSlice(i, j, l)
// });
}
}

void place_recvBuffer3(threeDview &view, face_ &face,
const std::vector<int> &slices) {
void placeRecvBuffer(threeDview &view, threeDview &buffer, face_ &face,
const std::vector<int> &slices) {

int &nface = face._nface;
int &ng = face._ng;

threeDview &buffer = face.recvBuffer3;
int nLayer = slices.size();

// MDRange2 range_face = MDRange2({0, 0}, {buffer.extent(0),
// buffer.extent(1)});

for (int g = 0; g < ng; g++) {
for (int g = 0; g < nLayer; g++) {
int s = slices[g];

twoDsubview viewSlice = getHaloSlice(view, nface, s);
twoDsubview bufferSlice =
Kokkos::subview(buffer, g, Kokkos::ALL, Kokkos::ALL);

Kokkos::deep_copy(viewSlice, bufferSlice);

// Kokkos::parallel_for(
// "Copy buffer data", range_face,
// KOKKOS_LAMBDA(const int i, const int j) {
// // set pressure
// viewSlice(i, j) = bufferSlice(i,j)
// });
}
}

void place_recvBuffer4(fourDview &view, face_ &face,
const std::vector<int> &slices) {
void placeRecvBuffer(fourDview &view, fourDview &buffer, face_ &face,
const std::vector<int> &slices) {

int &nface = face._nface;
int &ng = face._ng;

fourDview &buffer = face.recvBuffer4;
int nLayer = slices.size();

// MDRange3 range_face = MDRange3({0, 0, 0}, {buffer.extent(0),
// buffer.extent(1), buffer.extent(1)});

for (int g = 0; g < ng; g++) {
for (int g = 0; g < nLayer; g++) {
int s = slices[g];

threeDsubview viewSlice = getHaloSlice(view, nface, s);
threeDsubview bufferSlice =
Kokkos::subview(buffer, g, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL);

Kokkos::deep_copy(viewSlice, bufferSlice);

// Kokkos::parallel_for(
// "Copy buffer data", range_face,
// KOKKOS_LAMBDA(const int i, const int j, const int l) {
// // C
// viewSlice(i, j, l) = bufferSlice(i, j, l)
// });
}
}
12 changes: 6 additions & 6 deletions src/compute/utils/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ std::array<double, 3> CFLmax(const std::vector<block_> &mb);
int checkNan(const std::vector<block_> &mb);

// |------> sendRecvBuffer
void extract_sendBuffer3(threeDview &view, face_ &face,
const std::vector<int> &slices);
void extract_sendBuffer4(fourDview &view, face_ &face,
const std::vector<int> &slices);
void place_recvBuffer3(threeDview &view, face_ &face,
void extractSendBuffer(threeDview &view, threeDview &buffer, face_ &face,
const std::vector<int> &slices);
void place_recvBuffer4(fourDview &view, face_ &face,
void extractSendBuffer(fourDview &view, fourDview &buffer, face_ &face,
const std::vector<int> &slices);
void placeRecvBuffer(threeDview &view, threeDview &buffer, face_ &face,
const std::vector<int> &slices);
void placeRecvBuffer(fourDview &view, fourDview &buffer, face_ &face,
const std::vector<int> &slices);

// |------> viscousSponge
void viscousSponge(block_ &b, const std::array<double, 3> &origin,
Expand Down
68 changes: 31 additions & 37 deletions src/peregrinepy/mpiComm/communicate.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
from .mpiUtils import getCommRankSize
from mpi4py.MPI import DOUBLE as MPIDOUBLE
from mpi4py.MPI import Request
from ..compute.utils import (
extract_sendBuffer3,
extract_sendBuffer4,
place_recvBuffer3,
place_recvBuffer4,
)
from ..compute.utils import extractSendBuffer, placeRecvBuffer


def communicate(mb, varis):
Expand All @@ -18,16 +13,11 @@ def communicate(mb, varis):
reqs = []
# Post non-blocking recieves
for blk in mb:
ndim = blk.array[var].ndim
for face in blk.faces:
if face.neighbor is None:
continue

recv = (
face.array["recvBuffer4"]
if ndim == 4
else face.array["recvBuffer3"]
)
recv = face.array["recvBuffer_" + var]
ssize = recv.size
reqs.append(
comm.Irecv(
Expand All @@ -37,34 +27,35 @@ def communicate(mb, varis):

# Post non-blocking sends
for blk in mb:
ndim = blk.array[var].ndim
for face in blk.faces:
if face.neighbor is None:
continue

if ndim == 4:
send = face.array["sendBuffer4"]
recv = "tempRecvBuffer4"
sliceS = face.sliceS4
extract = extract_sendBuffer4
else:
send = face.array["sendBuffer3"]
recv = "tempRecvBuffer3"
sliceS = face.sliceS3
extract = extract_sendBuffer3
send = face.array["sendBuffer_" + var]
recvName = "tempRecvBuffer_" + var
if var in ["Q", "q"]:
sliceS = face.ccSendAllSlices
elif var in ["dqdx", "dqdy", "dqdz", "phi"]:
sliceS = face.ccSendFirstHaloSlice
elif var in ["x", "y", "z"]:
sliceS = face.nodeSendSlices

# Get the indices of the send slices from the numpy slice object
sliceIndxs = [s for f in sliceS for s in f if type(s) is int]
# populate the temp recv array with the unoriented send data, since its
# the correct size and shape
extract(getattr(blk, var), face, sliceIndxs)
extractSendBuffer(
getattr(blk, var), getattr(face, recvName), face, sliceIndxs
)
# update the device temp recv buffer
face.updateHostView(recv)
face.updateHostView(recvName)
# Now, orient each send slice and place in send buffer
for i in range(face.ng):
send[i] = face.orient(face.array[recv][i])
for i in range(len(sliceIndxs)):
send[i] = face.orient(face.array[recvName][i])
ssize = send.size
comm.Send([send, ssize, MPIDOUBLE], dest=face.commRank, tag=face.tagS)

# Post non-blocking sends
# wait and assign
reqs = iter(reqs)
for blk in mb:
Expand All @@ -73,19 +64,22 @@ def communicate(mb, varis):
if face.neighbor is None:
continue
Request.Wait(reqs.__next__())
if ndim == 4:
recv = "recvBuffer4"
sliceR = face.sliceR4
place = place_recvBuffer4
else:
recv = "recvBuffer3"
sliceR = face.sliceR3
place = place_recvBuffer3

recvName = "recvBuffer_" + var
if var in ["Q", "q"]:
sliceR = face.ccRecvAllSlices
elif var in ["dqdx", "dqdy", "dqdz", "phi"]:
sliceR = face.ccRecvFirstHaloSlice
elif var in ["x", "y", "z"]:
sliceR = face.nodeRecvSlices

# Push back up the device
face.updateDeviceView(recv)
face.updateDeviceView(recvName)
# Get the indices of the send slices from the numpy slice object
sliceIndxs = [s for f in sliceR for s in f if type(s) is int]
# Place the recv in the view
place(getattr(blk, var), face, sliceIndxs)
placeRecvBuffer(
getattr(blk, var), getattr(face, recvName), face, sliceIndxs
)

comm.Barrier()
Loading

0 comments on commit 78a97bb

Please sign in to comment.