Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement TeamMDRange Policy #151

Open
kaschau opened this issue Dec 5, 2022 · 5 comments
Open

Implement TeamMDRange Policy #151

kaschau opened this issue Dec 5, 2022 · 5 comments
Labels
enhancement New feature or request

Comments

@kaschau
Copy link
Owner

kaschau commented Dec 5, 2022

kokkos/kokkos#5238

Parallelize a group of structured blocks.... and much performance is gained

@kaschau kaschau added the enhancement New feature or request label Dec 5, 2022
@kaschau
Copy link
Owner Author

kaschau commented Dec 5, 2022

https://kokkos.github.io/kokkos-core-wiki/API/core/spaces/partition_space.html

christian trott suggests using concurrent exec space instances instead

@kaschau
Copy link
Owner Author

kaschau commented Dec 5, 2022

image

@kaschau
Copy link
Owner Author

kaschau commented Dec 5, 2022

image

@kaschau
Copy link
Owner Author

kaschau commented Dec 5, 2022

image

@kaschau
Copy link
Owner Author

kaschau commented Dec 5, 2022

diff --git a/src/compute/advFlux/advFlux.hpp b/src/compute/advFlux/advFlux.hpp
index 0af10f8..1a2c3f2 100644
--- a/src/compute/advFlux/advFlux.hpp
+++ b/src/compute/advFlux/advFlux.hpp
@@ -2,10 +2,11 @@
 #define __advFlux_H__
 
 #include "block_.hpp"
+#include <vector>
 
 // ./advFlux
 //    |------> secondOrderKEEP
-void secondOrderKEEP(block_ &b);
+void secondOrderKEEP(std::vector<block_> &mb);
 //    |------> centralDifference
 void centralDifference(block_ &b);
 //    |------> fourthOrderKEEP
diff --git a/src/compute/advFlux/bindingsAdvFlux.cpp b/src/compute/advFlux/bindingsAdvFlux.cpp
index caf7e67..637d0c1 100644
--- a/src/compute/advFlux/bindingsAdvFlux.cpp
+++ b/src/compute/advFlux/bindingsAdvFlux.cpp
@@ -10,7 +10,7 @@ void bindAdvFlux(py::module_ &m) {
   //  |----> secondOrderKEEP.cpp
   advFlux.def("secondOrderKEEP", &secondOrderKEEP,
               "Compute centeral difference euler fluxes via second order KEEP",
-              py::arg("block_ object"));
+              py::arg("mb"));
   //  |----> centeredDifference.cpp
   advFlux.def("centralDifference", &centralDifference,
               "Compute central difference euler fluxes",
diff --git a/src/compute/advFlux/secondOrderKEEP.cpp b/src/compute/advFlux/secondOrderKEEP.cpp
index db9d25f..52cee4b 100644
--- a/src/compute/advFlux/secondOrderKEEP.cpp
+++ b/src/compute/advFlux/secondOrderKEEP.cpp
@@ -2,77 +2,96 @@
 #include "block_.hpp"
 #include "kokkosTypes.hpp"
 #include "thtrdat_.hpp"
-
-void secondOrderKEEP(block_ &b) {
+#include <vector>
+
+void secondOrderKEEP(std::vector<block_> &mb) {
+  int nblks = mb.size();
+  std::vector<double> weight(nblks);
+  double sum = 0;
+  for (int nblki = 0; nblki < nblks; nblki++) {
+    double w = static_cast<double>(mb[nblki].ni * mb[nblki].nj * mb[nblki].nk);
+    weight[nblki] = w;
+    sum += w;
+  };
+  for (int nblki = 0; nblki < nblks; nblki++) {
+    weight[nblki] /= sum;
+  };
+  auto instances = Kokkos::Experimental::partition_space(execSpace(), weight);
 
   //-------------------------------------------------------------------------------------------|
   // i flux face range
   //-------------------------------------------------------------------------------------------|
-  MDRange3 range_i({b.ng, b.ng, b.ng},
-                   {b.ni + b.ng, b.nj + b.ng - 1, b.nk + b.ng - 1});
-  Kokkos::parallel_for(
-      "2nd order KEEP i face conv fluxes", range_i,
-      KOKKOS_LAMBDA(const int i, const int j, const int k) {
-        double U;
-        double uf;
-        double vf;
-        double wf;
-        double pf;
-
-        // Compute face normal volume flux vector
-        uf = 0.5 * (b.q(i, j, k, 1) + b.q(i - 1, j, k, 1));
-        vf = 0.5 * (b.q(i, j, k, 2) + b.q(i - 1, j, k, 2));
-        wf = 0.5 * (b.q(i, j, k, 3) + b.q(i - 1, j, k, 3));
-
-        U = b.isx(i, j, k) * uf + b.isy(i, j, k) * vf + b.isz(i, j, k) * wf;
-
-        pf = 0.5 * (b.q(i, j, k, 0) + b.q(i - 1, j, k, 0));
-
-        // Compute fluxes
-        double rho;
-        rho = 0.5 * (b.Q(i, j, k, 0) + b.Q(i - 1, j, k, 0));
-
-        // Continuity rho*Ui
-        b.iF(i, j, k, 0) = rho * U;
-
-        // x momentum rho*u*Ui+ p*Ax
-        b.iF(i, j, k, 1) = rho * uf * U + pf * b.isx(i, j, k);
-
-        // y momentum rho*v*Ui+ p*Ay
-        b.iF(i, j, k, 2) = rho * vf * U + pf * b.isy(i, j, k);
-
-        // w momentum rho*w*Ui+ p*Az
-        b.iF(i, j, k, 3) = rho * wf * U + pf * b.isz(i, j, k);
-
-        // Total energy (rhoE+ p)*Ui)
-        double e;
-        double em;
-
-        e = b.qh(i, j, k, 4);
-        em = b.qh(i - 1, j, k, 4);
-
-        b.iF(i, j, k, 4) =
-            ((0.5 * (e + em) + rho * 0.5 *
-                                   (b.q(i, j, k, 1) * b.q(i - 1, j, k, 1) +
-                                    b.q(i, j, k, 2) * b.q(i - 1, j, k, 2) +
-                                    b.q(i, j, k, 3) * b.q(i - 1, j, k, 3)))) *
-            U;
-
-        b.iF(i, j, k, 4) +=
-            0.5 * (b.q(i - 1, j, k, 0) * (b.q(i, j, k, 1) * b.isx(i, j, k) +
-                                          b.q(i, j, k, 2) * b.isy(i, j, k) +
-                                          b.q(i, j, k, 3) * b.isz(i, j, k)) +
-                   b.q(i, j, k, 0) * (b.q(i - 1, j, k, 1) * b.isx(i, j, k) +
-                                      b.q(i - 1, j, k, 2) * b.isy(i, j, k) +
-                                      b.q(i - 1, j, k, 3) * b.isz(i, j, k)));
-        // Species
-        for (int n = 0; n < b.ne - 5; n++) {
-          b.iF(i, j, k, 5 + n) =
-              0.5 * (b.Q(i, j, k, 5 + n) + b.Q(i - 1, j, k, 5 + n)) * U;
-        }
-      });
-
-  //-------------------------------------------------------------------------------------------|
+  for (int nblki = 0; nblki < nblks; nblki++) {
+    block_ b = mb[nblki];
+    MDRange3 range_i(instances[nblki], {b.ng, b.ng, b.ng},
+                     {b.ni + b.ng, b.nj + b.ng - 1, b.nk + b.ng - 1});
+    Kokkos::parallel_for(
+        "2nd order KEEP i face conv fluxes", range_i,
+        KOKKOS_LAMBDA(const int i, const int j, const int k) {
+          double U;
+          double uf;
+          double vf;
+          double wf;
+          double pf;
+
+          // Compute face normal volume flux vector
+          uf = 0.5 * (b.q(i, j, k, 1) + b.q(i - 1, j, k, 1));
+          vf = 0.5 * (b.q(i, j, k, 2) + b.q(i - 1, j, k, 2));
+          wf = 0.5 * (b.q(i, j, k, 3) + b.q(i - 1, j, k, 3));
+
+          U = b.isx(i, j, k) * uf + b.isy(i, j, k) * vf + b.isz(i, j, k) * wf;
+
+          pf = 0.5 * (b.q(i, j, k, 0) + b.q(i - 1, j, k, 0));
+
+          // Compute fluxes
+          double rho;
+          rho = 0.5 * (b.Q(i, j, k, 0) + b.Q(i - 1, j, k, 0));
+
+          // Continuity rho*Ui
+          b.iF(i, j, k, 0) = rho * U;
+
+          // x momentum rho*u*Ui+ p*Ax
+          b.iF(i, j, k, 1) = rho * uf * U + pf * b.isx(i, j, k);
+
+          // y momentum rho*v*Ui+ p*Ay
+          b.iF(i, j, k, 2) = rho * vf * U + pf * b.isy(i, j, k);
+
+          // w momentum rho*w*Ui+ p*Az
+          b.iF(i, j, k, 3) = rho * wf * U + pf * b.isz(i, j, k);
+
+          // Total energy (rhoE+ p)*Ui)
+          double e;
+          double em;
+
+          e = b.qh(i, j, k, 4);
+          em = b.qh(i - 1, j, k, 4);
+
+          b.iF(i, j, k, 4) =
+              ((0.5 * (e + em) + rho * 0.5 *
+                                     (b.q(i, j, k, 1) * b.q(i - 1, j, k, 1) +
+                                      b.q(i, j, k, 2) * b.q(i - 1, j, k, 2) +
+                                      b.q(i, j, k, 3) * b.q(i - 1, j, k, 3)))) *
+              U;
+
+          b.iF(i, j, k, 4) +=
+              0.5 * (b.q(i - 1, j, k, 0) * (b.q(i, j, k, 1) * b.isx(i, j, k) +
+                                            b.q(i, j, k, 2) * b.isy(i, j, k) +
+                                            b.q(i, j, k, 3) * b.isz(i, j, k)) +
+                     b.q(i, j, k, 0) * (b.q(i - 1, j, k, 1) * b.isx(i, j, k) +
+                                        b.q(i - 1, j, k, 2) * b.isy(i, j, k) +
+                                        b.q(i - 1, j, k, 3) * b.isz(i, j, k)));
+          // Species
+          for (int n = 0; n < b.ne - 5; n++) {
+            b.iF(i, j, k, 5 + n) =
+                0.5 * (b.Q(i, j, k, 5 + n) + b.Q(i - 1, j, k, 5 + n)) * U;
+          }
+        });
+  }
+  for (int nblki = 0; nblki < nblks; nblki++) {
+    instance[0].fence();
+  };
+
+  //   //-------------------------------------------------------------------------------------------|
   // j flux face range
   //-------------------------------------------------------------------------------------------|
   MDRange3 range_j({b.ng, b.ng, b.ng},

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

No branches or pull requests

1 participant