Skip to content

Commit

Permalink
Merge pull request spdomin#26 from alanw0/kokkos
Browse files Browse the repository at this point in the history
Add unit-tests that show simple kokkos usage.
  • Loading branch information
spdomin authored Oct 27, 2016
2 parents d9e9967 + 25d588e commit af1c440
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 0 deletions.
16 changes: 16 additions & 0 deletions unit_tests.C
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@

#include <gtest/gtest.h> // for InitGoogleTest, etc
#include <mpi.h> // for MPI_Comm_rank, MPI_Finalize, etc
#include <Kokkos_Core.hpp>
#include <stk_util/parallel/Parallel.hpp>

// can't use stk_unit_test_utils until Trilinos/stk is updated, configuration is changed...
// #include <stk_unit_test_utils/ParallelGtestOutput.hpp>

#include "include/NaluEnv.h"

Expand All @@ -16,6 +21,9 @@ char** gl_argv = 0;
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);

Kokkos::initialize(argc, argv);

//NaluEnv will call MPI_Finalize for us.
sierra::nalu::NaluEnv::self();

Expand All @@ -24,8 +32,16 @@ int main(int argc, char **argv)
gl_argc = argc;
gl_argv = argv;

// can't use stk_unit_test_utils until Trilinos/stk is updated, configuration is changed...
// int procId = stk::parallel_machine_rank(MPI_COMM_WORLD);
// stk::unit_test_util::create_parallel_output(procId);

int returnVal = RUN_ALL_TESTS();

Kokkos::finalize_all();

//NaluEnv will call MPI_Finalize when the NaluEnv singleton is cleaned up,
//which is after we return.
return returnVal;
}

160 changes: 160 additions & 0 deletions unit_tests/UnitTestBasicKokkos.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#include <gtest/gtest.h>
#include <limits>

#include <stk_util/parallel/Parallel.hpp>
#include <Kokkos_Core.hpp>

TEST(BasicKokkos, discover_execution_space)
{
stk::ParallelMachine comm = MPI_COMM_WORLD;
int proc = stk::parallel_machine_rank(comm);

if (proc == 0) {
std::cout << std::endl;

#ifdef KOKKOS_HAVE_SERIAL
std::cout << "Kokkos::Serial is available." << std::endl;
#endif

#ifdef KOKKOS_HAVE_OPENMP
std::cout << "Kokkos::OpenMP is available. (Control num-threads via env-var OMP_NUM_THREADS)" << std::endl;
#endif

#ifdef KOKKOS_HAVE_CUDA
std::cout << "Kokkos::Cuda is available." << std::endl;
#endif
std::cout << "Default execution space info: ";
Kokkos::DefaultExecutionSpace::print_configuration(std::cout);

std::cout << std::endl;
}
}

TEST(BasicKokkos, simple_views_1D)
{
const double tolerance = 0.0000001;
const size_t N = 10;
Kokkos::View<double*>::HostMirror host_view1D("host_view1D", N);
for(size_t i=0; i<N; ++i) {
host_view1D(i) = i+1;
}

Kokkos::View<double*> device_view1D = Kokkos::create_mirror_view(host_view1D);
Kokkos::deep_copy(device_view1D, host_view1D);

Kokkos::View<double*>::HostMirror host_view1D_2("host_view1D_2", N);
Kokkos::deep_copy(host_view1D_2, device_view1D);

for(size_t i=0; i<N; ++i) {
EXPECT_NEAR(host_view1D(i), host_view1D_2(i), tolerance);
}
}

TEST(BasicKokkos, simple_views_2D)
{
const double tolerance = 0.0000001;
const size_t N = 10;
const size_t M = 20;
Kokkos::View<double**>::HostMirror host_view2D("host_view2D", N, M);
for(size_t i=0; i<N; ++i) {
for(size_t j=0; j<M; ++j) {
host_view2D(i,j) = i+j+1;
}
}

Kokkos::View<double**> device_view2D = Kokkos::create_mirror_view(host_view2D);
Kokkos::deep_copy(device_view2D, host_view2D);

Kokkos::View<double**>::HostMirror host_view2D_2("host_view2D_2", N, M);
Kokkos::deep_copy(host_view2D_2, device_view2D);

for(size_t i=0; i<N; ++i) {
for(size_t j=0; j<M; ++j) {
EXPECT_NEAR(host_view2D(i,j), host_view2D_2(i,j), tolerance);
}
}
}

TEST(BasicKokkos, parallel_for)
{
const double tolerance = 0.0000001;
const size_t N = 10;
const size_t M = 20;
Kokkos::View<double**>::HostMirror host_view2D("host_view2D", N, M);

for(size_t i=0; i<N; ++i) {
for(size_t j=0; j<M; ++j) {
host_view2D(i,j) = i+j+1;
}
}

Kokkos::View<double**> device_view2D = Kokkos::create_mirror_view(host_view2D);
Kokkos::deep_copy(device_view2D, host_view2D);

//Important note: when the 'host' and 'device' share the same memory space, (as is the case for OpenMP),
//device_vew2D is semantically just a pointer to host_view2D, and the deep_copy is a no-op.
//That means that the parallel_for which comes next, is updating the values of host_view2D.
Kokkos::parallel_for(N, KOKKOS_LAMBDA(const size_t& i) {
for(size_t j=0; j<M; ++j) {
device_view2D(i, j) *= 2;
}
});

//This deep_copy is a no-op for OpenMP, but for Cuda it is necessary; otherwise the values
//in host_view2D would not be updated and the following EXPECT_NEAR checks would fail.
Kokkos::deep_copy(host_view2D, device_view2D);

Kokkos::View<double**>::HostMirror host_result("host_result", N, M);
Kokkos::deep_copy(host_result, device_view2D);

for(size_t i=0; i<N; ++i) {
for(size_t j=0; j<M; ++j) {
EXPECT_NEAR(host_result(i,j), host_view2D(i,j), tolerance);
}
}
}

TEST(BasicKokkos, nested_parallel_for_thread_teams)
{
const double tolerance = 0.0000001;
const size_t N = 8;
const size_t M = 8;
Kokkos::View<double**>::HostMirror host_view2D("host_view2D", N, M);

for(size_t i=0; i<N; ++i) {
for(size_t j=0; j<M; ++j) {
host_view2D(i,j) = i+j+1;
}
}

Kokkos::View<double**> device_view2D = Kokkos::create_mirror_view(host_view2D);
Kokkos::deep_copy(device_view2D, host_view2D);

typedef Kokkos::Schedule<Kokkos::Dynamic> DynamicScheduleType;
typedef typename Kokkos::TeamPolicy<typename Kokkos::DefaultExecutionSpace, DynamicScheduleType>::member_type TeamHandleType;

//Important note: when the 'host' and 'device' share the same memory space, (as is the case for OpenMP),
//device_vew2D is semantically just a pointer to host_view2D, and the deep_copy is a no-op.
//That means that the parallel_for which comes next, is updating the values of host_view2D.
Kokkos::parallel_for(Kokkos::TeamPolicy<Kokkos::DefaultExecutionSpace>(N, Kokkos::AUTO),
KOKKOS_LAMBDA(const TeamHandleType& team) {
size_t i = team.league_rank();
Kokkos::parallel_for(Kokkos::TeamThreadRange(team, (size_t)0, M), KOKKOS_LAMBDA(const size_t& j) {
device_view2D(i, j) *= 2;
});
});

//This deep_copy is a no-op for OpenMP, but for Cuda it is necessary; otherwise the values
//in host_view2D would not be updated and the following EXPECT_NEAR checks would fail.
Kokkos::deep_copy(host_view2D, device_view2D);

Kokkos::View<double**>::HostMirror host_result("host_result", N, M);
Kokkos::deep_copy(host_result, device_view2D);

for(size_t i=0; i<N; ++i) {
for(size_t j=0; j<M; ++j) {
EXPECT_NEAR(host_result(i,j), host_view2D(i,j), tolerance);
}
}
}

0 comments on commit af1c440

Please sign in to comment.