Skip to content

Commit

Permalink
Timesteppers (RK2, Euler, etc) on GPU. Inter species collisions uses …
Browse files Browse the repository at this point in the history
…the updated RK2 to run on GPU
  • Loading branch information
EmilyBourne committed Jan 24, 2024
1 parent 9201c93 commit 2d82add
Show file tree
Hide file tree
Showing 18 changed files with 569 additions and 159 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ cmake_tests_Release_GPU:
-DCMAKE_CXX_FLAGS="-Wall -Wno-sign-compare -Xcudafe --diag_suppress=unsigned_compare_with_zero -Xcudafe --diag_suppress=integer_sign_change" \
-DCMAKE_BUILD_TYPE=Release $GENERAL_CMAKE_OPTIONS -DKokkos_ARCH_VOLTA70=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON ..
make -j 8
ctest --output-on-failure --timeout 5 --output-junit tests.xml -LE ResultsNotTested
ctest -j 2 --output-on-failure --timeout 5 --output-junit tests.xml -LE ResultsNotTested
ls ./tests/geometryXVx/landau/fft/growthrate_t0.0to45.0.png
ls ./tests/geometryXVx/landau/fft/frequency_t0.0to45.0.png
artifacts:
Expand Down
2 changes: 1 addition & 1 deletion src/geometryRTheta/advection/spline_foot_finder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class SplineFootFinder : public IFootFinder


// Solve the characteristic equation
m_time_stepper.update(feet, dt, dy, update_function);
m_time_stepper.update(Kokkos::Serial(), feet, dt, dy, update_function);

is_unified(feet);
}
Expand Down
7 changes: 6 additions & 1 deletion src/geometryRTheta/time_solver/bsl_predcorr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,12 @@ class BslPredCorrRP : public ITimeSolverRP

start_time = std::chrono::system_clock::now();
for (int iter(0); iter < steps; ++iter) {
time_stepper.update(allfdistribu, dt, define_advection_field, advect_allfdistribu);
time_stepper
.update(Kokkos::Serial(),
allfdistribu,
dt,
define_advection_field,
advect_allfdistribu);

DFieldRP electrical_potential(grid);
VectorDFieldRP<RDimX, RDimY> electric_field(grid);
Expand Down
55 changes: 25 additions & 30 deletions src/geometryXVx/rhs/collisions_inter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,14 @@ CollisionsInter::CollisionsInter(IDomainSpXVx const& mesh, double nustar0)
ddc::expose_to_pdi("collinter_nustar0", m_nustar0);
}

void CollisionsInter::get_derivative(DSpanSpXVx const df, DViewSpXVx const allfdistribu) const
void CollisionsInter::get_derivative(
device_t<DSpanSpXVx> const df,
device_t<DViewSpXVx> const allfdistribu) const
{
auto allfdistribu_device_alloc = ddc::
create_mirror_view_and_copy(Kokkos::DefaultExecutionSpace(), allfdistribu.span_view());
auto allfdistribu_device = allfdistribu_device_alloc.span_view();

device_t<DFieldSpX> density_f(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
device_t<DFieldSpX> fluid_velocity_f(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
device_t<DFieldSpX> temperature_f(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
IDomainSpX grid_sp_x = allfdistribu.domain<IDimSp, IDimX>();
device_t<DFieldSpX> density_f(grid_sp_x);
device_t<DFieldSpX> fluid_velocity_f(grid_sp_x);
device_t<DFieldSpX> temperature_f(grid_sp_x);
auto density = density_f.span_view();
auto fluid_velocity = fluid_velocity_f.span_view();
auto temperature = temperature_f.span_view();
Expand All @@ -48,16 +47,15 @@ void CollisionsInter::get_derivative(DSpanSpXVx const df, DViewSpXVx const allfd
ddc::fill(density, 0.);
ddc::for_each(
ddc::policies::parallel_device,
ddc::get_domain<IDimSp, IDimX>(allfdistribu),
grid_sp_x,
KOKKOS_LAMBDA(IndexSpX const ispx) {
IndexSp isp(ddc::select<IDimSp>(ispx));
IndexX ix(ddc::select<IDimX>(ispx));
double particle_flux(0);
double momentum_flux(0);
for (IndexVx ivx : allfdistribu.domain<IDimVx>()) {
CoordVx const coordv = ddc::coordinate(ivx);
double const val(
quadrature_coeffs_device(ivx) * allfdistribu_device(isp, ix, ivx));
double const val(quadrature_coeffs_device(ivx) * allfdistribu(isp, ix, ivx));
density(isp, ix) += val;
particle_flux += val * coordv;
momentum_flux += val * coordv * coordv;
Expand All @@ -69,11 +67,11 @@ void CollisionsInter::get_derivative(DSpanSpXVx const df, DViewSpXVx const allfd


//Collision frequencies, momentum and energy exchange terms
device_t<DFieldSpX> nustar_profile(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
device_t<DFieldSpX> nustar_profile(grid_sp_x);
ddc::deepcopy(nustar_profile, m_nustar_profile);
device_t<DFieldSpX> collfreq_ab(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
device_t<DFieldSpX> momentum_exchange_ab_f(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
device_t<DFieldSpX> energy_exchange_ab_f(ddc::get_domain<IDimSp, IDimX>(allfdistribu));
device_t<DFieldSpX> collfreq_ab(grid_sp_x);
device_t<DFieldSpX> momentum_exchange_ab_f(grid_sp_x);
device_t<DFieldSpX> energy_exchange_ab_f(grid_sp_x);
auto momentum_exchange_ab = momentum_exchange_ab_f.span_view();
auto energy_exchange_ab = energy_exchange_ab_f.span_view();
compute_collfreq_ab(collfreq_ab.span_view(), nustar_profile, density, temperature);
Expand Down Expand Up @@ -103,8 +101,6 @@ void CollisionsInter::get_derivative(DSpanSpXVx const df, DViewSpXVx const allfd
/ (2. * temperature(isp, ix)));
});

device_t<DFieldSpXVx> df_device_f(allfdistribu.domain());
auto df_device = df_device_f.span_view();

ddc::for_each(
ddc::policies::parallel_device,
Expand All @@ -115,13 +111,12 @@ void CollisionsInter::get_derivative(DSpanSpXVx const df, DViewSpXVx const allfd
IndexVx ivx(ddc::select<IDimVx>(ispxvx));
double const coordv = ddc::coordinate(ivx);
double const term_v(coordv - fluid_velocity(isp, ix));
df_device(isp, ix, ivx)
= (2. * energy_exchange_ab(isp, ix)
* (0.5 / temperature(isp, ix) * term_v * term_v - 0.5)
+ momentum_exchange_ab(isp, ix) * term_v)
* fmaxwellian(isp, ix, ivx) / (density(isp, ix) * temperature(isp, ix));
df(isp, ix, ivx) = (2. * energy_exchange_ab(isp, ix)
* (0.5 / temperature(isp, ix) * term_v * term_v - 0.5)
+ momentum_exchange_ab(isp, ix) * term_v)
* fmaxwellian(isp, ix, ivx)
/ (density(isp, ix) * temperature(isp, ix));
});
ddc::deepcopy(df, df_device);
}


Expand All @@ -130,13 +125,13 @@ device_t<DSpanSpXVx> CollisionsInter::operator()(
double dt) const
{
Kokkos::Profiling::pushRegion("CollisionsInter");
auto allfdistribu_alloc = ddc::create_mirror_view_and_copy(allfdistribu_device);
ddc::ChunkSpan allfdistribu = allfdistribu_alloc.span_view();
RK2<DFieldSpXVx> timestepper(allfdistribu.domain());
timestepper.update(allfdistribu, dt, [&](DSpanSpXVx dy, DViewSpXVx y) {
get_derivative(dy, y);
});
ddc::deepcopy(allfdistribu_device, allfdistribu);
RK2<device_t<DFieldSpXVx>> timestepper(allfdistribu_device.domain());

timestepper
.update(allfdistribu_device, dt, [&](device_t<DSpanSpXVx> dy, device_t<DViewSpXVx> y) {
get_derivative(dy, y);
});

Kokkos::Profiling::popRegion();
return allfdistribu_device;
}
2 changes: 1 addition & 1 deletion src/geometryXVx/rhs/collisions_inter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,5 @@ class CollisionsInter : public IRightHandSide
* @param[inout] df The time derivative.
* @param[in] allfdistribu The distribution function.
*/
void get_derivative(DSpanSpXVx df, DViewSpXVx allfdistribu) const;
void get_derivative(device_t<DSpanSpXVx> df, device_t<DViewSpXVx> allfdistribu) const;
};
162 changes: 123 additions & 39 deletions src/timestepper/crank_nicolson.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,57 @@ class CrankNicolson : public ITimeStepper
* The function describing how the derivative of the evolve function is calculated.
*/
void update(ValSpan y, double dt, std::function<void(DerivSpan, ValView)> dy) const
{
using ExecSpace = typename ValChunk::memory_space::execution_space;
update(ExecSpace(), y, dt, dy);
}

/**
* @brief Carry out one step of the Crank-Nicolson scheme.
*
* This function is a wrapper around the update function below. The values of the function are
* updated using the trivial method $f += df * dt$. This is the standard method however some
* cases may need a more complex update function which is why the more explicit method is
* also provided.
*
* @param[in] exec_space
* The space on which the function is executed (CPU/GPU).
* @param[inout] y
* The value(s) which should be evolved over time defined on each of the dimensions at each point
* of the domain.
* @param[in] dt
* The time step over which the values should be evolved.
* @param[in] dy
* The function describing how the derivative of the evolve function is calculated.
*/
template <class ExecSpace>
void update(
ExecSpace const& exec_space,
ValSpan y,
double dt,
std::function<void(DerivSpan, ValView)> dy) const
{
static_assert(ddc::is_chunk_v<ValChunk>);
update(y, dt, dy, [&](ValSpan y, DerivView dy, double dt) {
ddc::for_each(y.domain(), [&](Index const idx) { y(idx) = y(idx) + dy(idx) * dt; });
static_assert(
Kokkos::SpaceAccessibility<ExecSpace, typename ValChunk::memory_space>::accessible,
"MemorySpace has to be accessible for ExecutionSpace.");
static_assert(
Kokkos::SpaceAccessibility<ExecSpace, typename DerivChunk::memory_space>::
accessible,
"MemorySpace has to be accessible for ExecutionSpace.");
update(exec_space, y, dt, dy, [&](ValSpan y, DerivView dy, double dt) {
ddc::for_each(
ddc::policies::policy(exec_space),
y.domain(),
KOKKOS_LAMBDA(Index const idx) { y(idx) = y(idx) + dy(idx) * dt; });
});
}

/**
* @brief Carry out one step of the Crank-Nicolson scheme.
*
* @param[in] exec_space
* The space on which the function is executed (CPU/GPU).
* @param[inout] y
* The value(s) which should be evolved over time defined on each of the dimensions at each point
* of the domain.
Expand All @@ -105,17 +146,31 @@ class CrankNicolson : public ITimeStepper
* @param[in] y_update
* The function describing how the value(s) are updated using the derivative.
*/
template <class ExecSpace>
void update(
ExecSpace const& exec_space,
ValSpan y,
double dt,
std::function<void(DerivSpan, ValView)> dy,
std::function<void(ValSpan, DerivView, double)> y_update) const
{
ValChunk m_y_init(m_dom);
ValChunk m_y_old(m_dom);
DerivChunk m_k1(m_dom);
DerivChunk m_k_new(m_dom);
DerivChunk m_k_total(m_dom);
static_assert(
Kokkos::SpaceAccessibility<ExecSpace, typename ValChunk::memory_space>::accessible,
"MemorySpace has to be accessible for ExecutionSpace.");
static_assert(
Kokkos::SpaceAccessibility<ExecSpace, typename DerivChunk::memory_space>::
accessible,
"MemorySpace has to be accessible for ExecutionSpace.");
ValChunk m_y_init_alloc(m_dom);
ValChunk m_y_old_alloc(m_dom);
DerivChunk m_k1_alloc(m_dom);
DerivChunk m_k_new_alloc(m_dom);
DerivChunk m_k_total_alloc(m_dom);
ValSpan m_y_init = m_y_init_alloc.span_view();
ValSpan m_y_old = m_y_old_alloc.span_view();
DerivSpan m_k1 = m_k1_alloc.span_view();
DerivSpan m_k_new = m_k_new_alloc.span_view();
DerivSpan m_k_total = m_k_total_alloc.span_view();


copy(m_y_init, y);
Expand All @@ -134,14 +189,23 @@ class CrankNicolson : public ITimeStepper
dy(m_k_new, y);

// Calculation of step
ddc::for_each(m_k_total.domain(), [&](Index const i) {
// k_total = k1 + k_new
if constexpr (is_field_v<DerivChunk>) {
fill_k_total(i, m_k_total, m_k1(i) + m_k_new(i));
} else {
m_k_total(i) = m_k1(i) + m_k_new(i);
}
});
if constexpr (is_field_v<DerivChunk>) {
ddc::for_each(
ddc::policies::policy(exec_space),
m_k_total.domain(),
KOKKOS_CLASS_LAMBDA(Index const i) {
// k_total = k1 + k_new
fill_k_total(i, m_k_total, m_k1(i) + m_k_new(i));
});
} else {
ddc::for_each(
ddc::policies::policy(exec_space),
m_k_total.domain(),
KOKKOS_CLASS_LAMBDA(Index const i) {
// k_total = k1 + k_new
m_k_total(i) = m_k1(i) + m_k_new(i);
});
}

// Save the old characteristic feet
copy(m_y_old, y);
Expand All @@ -154,12 +218,51 @@ class CrankNicolson : public ITimeStepper


// Check convergence
not_converged = not have_converged(m_y_old, y);
not_converged = not have_converged(exec_space, m_y_old, y);


} while (not_converged and (counter < m_max_counter));
};


/**
* Check if the relative difference of the function between
* two time steps is below epsilon.
*
* This function should be private. It is not due to the inclusion of a KOKKOS_LAMBDA
* function.
*
* @param[in] exec_space
* The space on which the function is executed (CPU/GPU).
* @param[in] y_old
* The value of the function at the previous time step.
* @param[in] y_new
* The updated value of the function at the new time step.
* @returns
* True if converged, False otherwise.
*/
template <class ExecSpace>
bool have_converged(ExecSpace const& exec_space, ValView y_old, ValView y_new) const
{
auto const dom = y_old.domain();

double norm_old = ddc::transform_reduce(
ddc::policies::policy(exec_space),
dom,
0.,
ddc::reducer::max<double>(),
KOKKOS_LAMBDA(Index const idx) { return norm_inf(y_old(idx)); });

double max_diff = ddc::transform_reduce(
ddc::policies::policy(exec_space),
dom,
0.,
ddc::reducer::max<double>(),
KOKKOS_LAMBDA(Index const idx) { return norm_inf(y_old(idx) - y_new(idx)); });

return (max_diff / norm_old) < m_epsilon;
}

private:
void copy(ValSpan copy_to, ValView copy_from) const
{
Expand All @@ -171,30 +274,11 @@ class CrankNicolson : public ITimeStepper
}

template <class... DDims>
void fill_k_total(Index i, DerivSpan m_k_total, ddc::Coordinate<DDims...> new_val) const
KOKKOS_FUNCTION void fill_k_total(
Index i,
DerivSpan m_k_total,
ddc::Coordinate<DDims...> new_val) const
{
((ddcHelper::get<DDims>(m_k_total)(i) = ddc::get<DDims>(new_val)), ...);
}


// Check if the relative difference of the function between
// two times steps is below epsilon.
bool have_converged(ValView y_old, ValView y_new) const
{
auto const dom = y_old.domain();

double norm_old = 0.;
ddc::for_each(dom, [&](Index const idx) {
const double abs_val = norm_inf(y_old(idx));
norm_old = norm_old > abs_val ? norm_old : abs_val;
});

double max_diff = 0.;
ddc::for_each(dom, [&](Index const idx) {
const double abs_diff = norm_inf(y_old(idx) - y_new(idx));
max_diff = max_diff > abs_diff ? max_diff : abs_diff;
});

return (max_diff / norm_old) < m_epsilon;
}
};
Loading

0 comments on commit 2d82add

Please sign in to comment.