diff --git a/CHANGELOG.md b/CHANGELOG.md index 44eb259cbf..6dbf013aef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # SUNDIALS Changelog +## Changes to SUNDIALS in release 6.6.0 + +Added the second order IMEX method from Giraldo, Kelly, and Constantinescu 2013 +as the default second order IMEX method in ARKStep. The explicit table is given +by `ARKODE_ARK2_ERK_3_1_2` and the implicit table by `ARKODE_ARK2_DIRK_3_1_2`. + +Updated the F2003 utility routines `SUNDIALSFileOpen` and `SUNDIALSFileClose` +to support user specification of `stdout` and `stderr` strings for the output +file names. + +Updated CVODE, CVODES and ARKODE default behavior when returning the solution when +the internal time has reached a user-specified stop time. Previously, the output +solution was interpolated to the value of `tstop`; the default is now to copy the +internal solution vector. Users who wish to revert to interpolation may call a new +routine `CVodeSetInterpolateStopTime`, `ARKStepSetInterpolateStopTime`, +`ERKStepSetInterpolateStopTime`, or `MRIStepSetInterpolateStopTime`. + ## Changes to SUNDIALS in release 6.5.1 Added the functions `ARKStepClearStopTime`, `ERKStepClearStopTime`, diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 793ffbb0a4..e9fd4648c1 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -26,12 +26,9 @@ sundials_option(BENCHMARK_NVECTOR BOOL "NVector benchmarks are on" ON) # Add specific benchmarks #---------------------------------------- -if(ENABLE_MPI AND ENABLE_RAJA) - add_subdirectory(advection_reaction_3D) -endif() - if(ENABLE_MPI) - add_subdirectory(diffusion_2D) +add_subdirectory(diffusion_2D) +add_subdirectory(advection_reaction_3D) endif() # Add the nvector benchmarks diff --git a/benchmarks/advection_reaction_3D/CMakeLists.txt b/benchmarks/advection_reaction_3D/CMakeLists.txt index e51a95155a..7469a6a10a 100644 --- a/benchmarks/advection_reaction_3D/CMakeLists.txt +++ b/benchmarks/advection_reaction_3D/CMakeLists.txt @@ -1,5 +1,5 @@ # --------------------------------------------------------------- -# Programmer(s): Cody J. Balos @ LLNL +# Programmer(s): Daniel R. Reynolds @ SMU # --------------------------------------------------------------- # SUNDIALS Copyright Start # Copyright (c) 2002-2023, Lawrence Livermore National Security @@ -12,135 +12,10 @@ # SUNDIALS Copyright End # --------------------------------------------------------------- -if(BUILD_ARKODE AND BUILD_CVODE AND BUILD_IDA) - - if((RAJA_BACKENDS MATCHES "TARGET_OPENMP") OR (RAJA_BACKENDS MATCHES "OPENMP")) - set(OTHER_LIBS OpenMP::OpenMP_CXX) - endif() - - # ---------------------------------------------------------------------------- - # MPI only - # ---------------------------------------------------------------------------- - - add_executable(advection_reaction_3D - advection_reaction_3D.cpp - arkode_driver.cpp - cvode_driver.cpp - ida_driver.cpp - rhs3D.hpp - ParallelGrid.hpp - backends.hpp) - - # ensure the linker language is reset to CXX - set_target_properties(advection_reaction_3D PROPERTIES LINKER_LANGUAGE CXX) - - target_include_directories(advection_reaction_3D - PRIVATE - ${PROJECT_SOURCE_DIR}/utilities - ${MPI_CXX_INCLUDE_DIRS}) - - target_link_libraries(advection_reaction_3D - PRIVATE - sundials_arkode - sundials_cvode - sundials_ida - sundials_nvecmpiplusx - sundials_nvecserial - RAJA - ${MPI_CXX_LIBRARIES} - ${OTHER_LIBS}) - - install(TARGETS advection_reaction_3D - DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D") - - install(FILES README.md - DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D") - - # ---------------------------------------------------------------------------- - # MPI + CUDA - # ---------------------------------------------------------------------------- - - if(BUILD_NVECTOR_CUDA) - - set_source_files_properties(advection_reaction_3D.cpp - PROPERTIES LANGUAGE CUDA) - set_source_files_properties(arkode_driver.cpp PROPERTIES LANGUAGE CUDA) - set_source_files_properties(cvode_driver.cpp PROPERTIES LANGUAGE CUDA) - set_source_files_properties(ida_driver.cpp PROPERTIES LANGUAGE CUDA) - - add_executable(advection_reaction_3D_mpicuda - advection_reaction_3D.cpp - arkode_driver.cpp - cvode_driver.cpp - ida_driver.cpp - rhs3D.hpp - ParallelGrid.hpp - backends.hpp) - - # ensure the linker language is reset to CXX - set_target_properties(advection_reaction_3D_mpicuda - PROPERTIES LINKER_LANGUAGE CXX) - - target_include_directories(advection_reaction_3D_mpicuda - PRIVATE - ${PROJECT_SOURCE_DIR}/utilities - ${MPI_CXX_INCLUDE_DIRS}) - - target_link_libraries(advection_reaction_3D_mpicuda - PRIVATE - sundials_arkode - sundials_cvode - sundials_ida - sundials_nvecmpiplusx - sundials_nveccuda - RAJA - ${MPI_CXX_LIBRARIES} - ${OTHER_LIBS}) - - target_compile_definitions(advection_reaction_3D_mpicuda PRIVATE USE_CUDA_NVEC) - - install(TARGETS advection_reaction_3D_mpicuda - DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D") - - endif() - - # ---------------------------------------------------------------------------- - # MPI + HIP - # ---------------------------------------------------------------------------- - - if(BUILD_NVECTOR_HIP) - - add_executable(advection_reaction_3D_mpihip - advection_reaction_3D.cpp - arkode_driver.cpp - cvode_driver.cpp - ida_driver.cpp - rhs3D.hpp - ParallelGrid.hpp - backends.hpp) - - target_include_directories(advection_reaction_3D_mpihip - PRIVATE - ${PROJECT_SOURCE_DIR}/utilities - ${MPI_CXX_INCLUDE_DIRS}) - - target_link_libraries(advection_reaction_3D_mpihip - PRIVATE - sundials_arkode - sundials_cvode - sundials_ida - sundials_nvecmpiplusx - sundials_nvechip - RAJA - hip::device - ${MPI_CXX_LIBRARIES} - ${OTHER_LIBS}) - - target_compile_definitions(advection_reaction_3D_mpihip PRIVATE USE_HIP_NVEC) - - install(TARGETS advection_reaction_3D_mpihip - DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D") - - endif() +if(ENABLE_RAJA) + add_subdirectory(raja) +endif() +if(ENABLE_KOKKOS AND BUILD_NVECTOR_KOKKOS) + add_subdirectory(kokkos) endif() diff --git a/benchmarks/advection_reaction_3D/kokkos/CMakeLists.txt b/benchmarks/advection_reaction_3D/kokkos/CMakeLists.txt new file mode 100644 index 0000000000..2d58e5fe4c --- /dev/null +++ b/benchmarks/advection_reaction_3D/kokkos/CMakeLists.txt @@ -0,0 +1,61 @@ +# --------------------------------------------------------------- +# Programmer(s): Daniel R. Reynolds @ SMU +# --------------------------------------------------------------- +# SUNDIALS Copyright Start +# Copyright (c) 2002-2023, Lawrence Livermore National Security +# and Southern Methodist University. +# All rights reserved. +# +# See the top-level LICENSE and NOTICE files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +# SUNDIALS Copyright End +# --------------------------------------------------------------- + +# Add the build targets for each backend +if(BUILD_ARKODE AND BUILD_CVODE AND BUILD_IDA) + foreach(backend ${KOKKOS_EXAMPLES_BACKENDS}) + + # set benchmark target name + set(benchmark_target "advection_reaction_3D_kokkos.${backend}") + + # benchmark source files + add_executable(${benchmark_target} + advection_reaction_3D.cpp + arkode_driver.cpp + cvode_driver.cpp + ida_driver.cpp + rhs3D.hpp + ParallelGrid.hpp + check_retval.h) + + # which backend to use + target_compile_definitions(${benchmark_target} PRIVATE USE_${backend}) + + # directories to include + target_include_directories(${benchmark_target} + PRIVATE + ${PROJECT_SOURCE_DIR}/utilities + ${MPI_CXX_INCLUDE_DIRS} + ) + + # libraries to link against + target_link_libraries(${benchmark_target} + PRIVATE + sundials_arkode + sundials_cvode + sundials_ida + sundials_nvecmpiplusx + sundials_nveckokkos + ${MPI_CXX_LIBRARIES} + ${EXE_EXTRA_LINK_LIBS} + ) + + install(TARGETS ${benchmark_target} + DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D/kokkos") + + install(FILES README.md ../scripts/compare_error.py ../scripts/compute_error.py ../scripts/pickle_solution_output.py + DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D/kokkos") + + endforeach() +endif() diff --git a/benchmarks/advection_reaction_3D/kokkos/ParallelGrid.hpp b/benchmarks/advection_reaction_3D/kokkos/ParallelGrid.hpp new file mode 100644 index 0000000000..c324105b02 --- /dev/null +++ b/benchmarks/advection_reaction_3D/kokkos/ParallelGrid.hpp @@ -0,0 +1,593 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * A simple implementation of a parallel structured Cartesian mesh class that + * supports up to 3 spatial dimensions and an arbitrary number of degrees of + * freedom, and that uses Kokkos views to store communication buffer data. + * ----------------------------------------------------------------------------*/ + +#ifndef _KOKKOSPARGRID_H +#define _KOKKOSPARGRID_H + +#include +#include +#include +#include +#include +#include + + +/* Set Kokkos execution space and type shortcuts */ +#if defined(USE_CUDA) +using ExecSpace = Kokkos::Cuda; +using MemSpace = Kokkos::CudaSpace; +#elif defined(USE_HIP) +#if KOKKOS_VERSION / 10000 > 3 +using ExecSpace = Kokkos::HIP; +using MemSpace = Kokkos::HIPSpace; +#else +using ExecSpace = Kokkos::Experimental::HIP; +using MemSpace = Kokkos::Experimental::HIPSpace; +#endif +#elif defined(USE_OPENMP) +using ExecSpace = Kokkos::OpenMP; +using MemSpace = Kokkos::HostSpace; +#else +using ExecSpace = Kokkos::Serial; +using MemSpace = Kokkos::HostSpace; +#endif +using Vec1D = Kokkos::View; +using Vec4D = Kokkos::View; +using Vec1DHost = Vec1D::HostMirror; +using Vec4DHost = Vec4D::HostMirror; +using Range3D = Kokkos::MDRangePolicy>; + + +namespace sundials_tools +{ + +// Types of boundaries supported. +enum class BoundaryType +{ + PERIODIC +}; + +// Types of stencils supported. +enum class StencilType +{ + UPWIND +}; + +template +class ParallelGrid +{ +public: + // Constructor that creates a new ParallelGrid object. + // [in] - the memory helper to use for allocating the MPI buffers + // [in,out] comm - on input, the overal MPI communicator, on output, the cartesian communicator + // [in] a[] - an array of length 3 which defines the domain [a,b] + // [in] b[] - an array of length 3 which defines the domain [a,b] + // [in] npts[] - an array of length 3 which defines the number of mesh points in each dimension + // [in] dof - the number of degrees of freedom in each dimension + // [in] bc - the type of boundary conditions (see BoundaryType) + // [in] st - the stencil to use (see StencilType) + // [in] npxyz - the number of processors in each dimension; defaults to 0 which means MPI will choose + // [in] reorder - should MPI_Cart_create do process reordering to optimize or not; defaults to false (some MPI implementations ignore this) + ParallelGrid(MPI_Comm* comm, const realtype a[], const realtype b[], const GLOBALINT npts[], + int dof, BoundaryType bc, StencilType st, const realtype c, + const int npxyz[] = nullptr, bool reorder = false) + : nx(1), ny(1), nz(1), + nxl(1), nyl(1), nzl(1), + npx(1), npy(1), npz(1), + dx(0.0), dy(0.0), dz(0.0), + ax(0.0), ay(0.0), az(0.0), + bx(0.0), by(0.0), bz(0.0), + dof(dof), dims{0,0,0}, coords{0,0,0}, + bc(bc), st(st), upwindRight(true) + { + assert(st == StencilType::UPWIND); + + /* Set up MPI Cartesian communicator */ + if (npxyz) + { + dims[0] = npxyz[0]; + dims[1] = npxyz[1]; + dims[2] = npxyz[2]; + } + + int retval, nprocs; + MPI_Comm_size(*comm, &nprocs); + retval = MPI_Dims_create(nprocs, 3, dims); + assert(retval == MPI_SUCCESS); + + int periods[] = { bc == BoundaryType::PERIODIC, + bc == BoundaryType::PERIODIC, + bc == BoundaryType::PERIODIC }; + retval = MPI_Cart_create(*comm, 3, dims, periods, reorder, comm); + assert(retval == MPI_SUCCESS); + + retval = MPI_Cart_get(*comm, 3, dims, periods, coords); + assert(retval == MPI_SUCCESS); + + cart_comm = *comm; + + /* Set upwinding direction */ + upwindRight = (c > 0.0); + + /* Set up information for the first spatial dimension */ + npx = dims[0]; + nx = npts[0]; + ax = a[0]; + bx = b[0]; + dx = (bx-ax) / (realtype) nx; + int is = nx*(coords[0])/npx; + int ie = nx*(coords[0]+1)/npx-1; + nxl = ie-is+1; + neq = dof * nxl; + + /* Set up information for the second spatial dimension */ + npy = dims[1]; + ny = npts[1]; + ay = a[1]; + by = b[1]; + dy = (by-ay) / (realtype) ny; + int js = ny*(coords[1])/npy; + int je = ny*(coords[1]+1)/npy-1; + nyl = je-js+1; + neq *= nyl; + + /* Set up information for the third spatial dimension */ + npz = dims[2]; + nz = npts[2]; + az = a[2]; + bz = b[2]; + dz = (bz-az) / (realtype) nz; + int ks = nz*(coords[2])/npz; + int ke = nz*(coords[2]+1)/npz-1; + nzl = ke-ks+1; + neq *= nzl; + + /* Allocate buffers for nearest-neighbor exchange */ + if (st == StencilType::UPWIND) + AllocateBuffersUpwind(); + + } + + // TODO: + // - support non-periodic boundary conditions + // For all faces where neighbors exist: determine neighbor process indices. + // For all faces: allocate upwind exchange buffers. + void AllocateBuffersUpwind() + { + + /* Allocate send/receive buffers and determine ID for communication West */ + if (upwindRight) { + Wrecv_ = Vec1D("Wrecv", dof*nyl*nzl); + WrecvH_ = Kokkos::create_mirror_view(Wrecv_); + } else { + Wsend_ = Vec1D("Wsend", dof*nyl*nzl); + WsendH_ = Kokkos::create_mirror_view(Wsend_); + } + ipW = MPI_PROC_NULL; + if ((coords[0] > 0) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0]-1, coords[1], coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipW); + assert(retval == MPI_SUCCESS); + } + + /* Allocate send/receive buffers and determine ID for communication East */ + if (upwindRight) { + Esend_ = Vec1D("Esend", dof*nyl*nzl); + EsendH_ = Kokkos::create_mirror_view(Esend_); + } else { + Erecv_ = Vec1D("Erecv", dof*nyl*nzl); + ErecvH_ = Kokkos::create_mirror_view(Erecv_); + } + ipE = MPI_PROC_NULL; + if ((coords[0] < dims[0]-1) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0]+1, coords[1], coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipE); + assert(retval == MPI_SUCCESS); + } + + /* Allocate send/receive buffers and determine ID for communication South */ + if (upwindRight) { + Srecv_ = Vec1D("Srecv", dof*nxl*nzl); + SrecvH_ = Kokkos::create_mirror_view(Srecv_); + } else { + Ssend_ = Vec1D("Ssend", dof*nxl*nzl); + SsendH_ = Kokkos::create_mirror_view(Ssend_); + } + ipS = MPI_PROC_NULL; + if ((coords[1] > 0) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1]-1, coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipS); + assert(retval == MPI_SUCCESS); + } + + /* Allocate send/receive buffers and determine ID for communication North */ + if (upwindRight) { + Nsend_ = Vec1D("Nsend", dof*nxl*nzl); + NsendH_ = Kokkos::create_mirror_view(Nsend_); + } else { + Nrecv_ = Vec1D("Nrecv", dof*nxl*nzl); + NrecvH_ = Kokkos::create_mirror_view(Nrecv_); + } + ipN = MPI_PROC_NULL; + if ((coords[1] < dims[1]-1) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1]+1, coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipN); + assert(retval == MPI_SUCCESS); + } + + /* Allocate send/receive buffers and determine ID for communication Back */ + if (upwindRight) { + Brecv_ = Vec1D("Brecv", dof*nxl*nyl); + BrecvH_ = Kokkos::create_mirror_view(Brecv_); + } else { + Bsend_ = Vec1D("Bsend", dof*nxl*nyl); + BsendH_ = Kokkos::create_mirror_view(Bsend_); + } + ipB = MPI_PROC_NULL; + if ((coords[2] > 0) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1], coords[2]-1}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipB); + assert(retval == MPI_SUCCESS); + } + + /* Allocate send/receive buffers and determine ID for communication Front */ + if (upwindRight) { + Fsend_ = Vec1D("Fsend", dof*nxl*nyl); + FsendH_ = Kokkos::create_mirror_view(Fsend_); + } else { + Frecv_ = Vec1D("Frecv", dof*nxl*nyl); + FrecvH_ = Kokkos::create_mirror_view(Frecv_); + } + ipF = MPI_PROC_NULL; + if ((coords[2] < dims[2]-1) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1], coords[2]+1}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipF); + assert(retval == MPI_SUCCESS); + } + + } + + // Initiate non-blocking neighbor communication + int ExchangeStart() + { + int retval = 0; + nreq = 0; + + // Initialize all requests in array + for (int i=0; i<12; i++) + req[i] = MPI_REQUEST_NULL; + + // Open an Irecv buffer on host for each neighbor + if ((ipW != MPI_PROC_NULL) && (upwindRight)) + { + retval = MPI_Irecv(WrecvH_.data(), dof*nyl*nzl, MPI_SUNREALTYPE, ipW, + 1, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipE != MPI_PROC_NULL) && (!upwindRight)) + { + retval = MPI_Irecv(ErecvH_.data(), dof*nyl*nzl, MPI_SUNREALTYPE, ipE, + 0, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipS != MPI_PROC_NULL) && (upwindRight)) + { + retval = MPI_Irecv(SrecvH_.data(), dof*nxl*nzl, MPI_SUNREALTYPE, ipS, + 3, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipN != MPI_PROC_NULL) && (!upwindRight)) + { + retval = MPI_Irecv(NrecvH_.data(), dof*nxl*nzl, MPI_SUNREALTYPE, ipN, + 2, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipB != MPI_PROC_NULL) && (upwindRight)) + { + retval = MPI_Irecv(BrecvH_.data(), dof*nxl*nyl, MPI_SUNREALTYPE, ipB, + 5, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipF != MPI_PROC_NULL) && (!upwindRight)) + { + retval = MPI_Irecv(FrecvH_.data(), dof*nxl*nyl, MPI_SUNREALTYPE, ipF, + 4, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + // Send data to neighbors, first copying from device to host buffers + if ((ipW != MPI_PROC_NULL) && (!upwindRight)) + { + Kokkos::deep_copy(WsendH_, Wsend_); + retval = MPI_Isend(WsendH_.data(), dof*nyl*nzl, MPI_SUNREALTYPE, ipW, 0, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipE != MPI_PROC_NULL) && (upwindRight)) + { + Kokkos::deep_copy(EsendH_, Esend_); + retval = MPI_Isend(EsendH_.data(), dof*nyl*nzl, MPI_SUNREALTYPE, ipE, 1, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipS != MPI_PROC_NULL) && (!upwindRight)) + { + Kokkos::deep_copy(SsendH_, Ssend_); + retval = MPI_Isend(SsendH_.data(), dof*nxl*nzl, MPI_SUNREALTYPE, ipS, 2, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipN != MPI_PROC_NULL) && (upwindRight)) + { + Kokkos::deep_copy(NsendH_, Nsend_); + retval = MPI_Isend(NsendH_.data(), dof*nxl*nzl, MPI_SUNREALTYPE, ipN, 3, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipB != MPI_PROC_NULL) && (!upwindRight)) + { + Kokkos::deep_copy(BsendH_, Bsend_); + retval = MPI_Isend(BsendH_.data(), dof*nxl*nyl, MPI_SUNREALTYPE, ipB, 4, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipF != MPI_PROC_NULL) && (upwindRight)) + { + Kokkos::deep_copy(FsendH_, Fsend_); + retval = MPI_Isend(FsendH_.data(), dof*nxl*nyl, MPI_SUNREALTYPE, ipF, 5, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + return retval; + } + + // Waits for neighbor exchange to finish. + int ExchangeEnd() + { + MPI_Status stat[12]; + int retval; + + // return automatically with success if there are no outstanding requests + if (nreq == 0) + return(0); + + // Wait for messages to finish send/receive + retval = MPI_Waitall(nreq, req, stat); + assert(retval == MPI_SUCCESS); + + // Copy data from host to device buffers + if ((ipW != MPI_PROC_NULL) && (upwindRight)) + Kokkos::deep_copy(Wrecv_, WrecvH_); + if ((ipE != MPI_PROC_NULL) && (!upwindRight)) + Kokkos::deep_copy(Erecv_, ErecvH_); + if ((ipS != MPI_PROC_NULL) && (upwindRight)) + Kokkos::deep_copy(Srecv_, SrecvH_); + if ((ipN != MPI_PROC_NULL) && (!upwindRight)) + Kokkos::deep_copy(Nrecv_, NrecvH_); + if ((ipB != MPI_PROC_NULL) && (upwindRight)) + Kokkos::deep_copy(Brecv_, BrecvH_); + if ((ipF != MPI_PROC_NULL) && (!upwindRight)) + Kokkos::deep_copy(Frecv_, FrecvH_); + + return retval; + } + + // Prints out information about the ParallelGrid to stdout. + void PrintInfo() + { + printf("ParallelGrid Info:\n"); + printf(" dimensions = %d\n", 3); + printf(" processors = {%d, %d, %d}\n", npx, npy, npz); + printf(" domain = {[%g,%g], [%g,%g], [%g,%g]}\n", ax, bx, ay, by, az, bz); + printf(" global npts = {%li, %li, %li}\n", (long int) nx, (long int) ny, (long int) nz); + printf(" local npts = {%d, %d, %d}\n", nxl, nyl, nzl); + printf(" mesh spacing = {%g, %g, %g}\n", dx, dy, dz); + if (upwindRight) + printf(" upwind dir = right\n"); + else + printf(" upwind dir = left\n"); + } + + // Saves the mesh to a file. + // First row is x. Second row is y. Third row is z. + // Can be loaded into MATLAB like so: + // mesh = loadtxt('mesh.txt'); + // [X,Y,Z] = meshgrid(mesh(1,:),mesh(2,:),mesh(3,:)); + void MeshToFile(const std::string& fname) + { + std::ofstream mesh_file; + mesh_file.open(fname); + mesh_file << std::setprecision(16); + for (GLOBALINT i = 0; i < nx; i++) + mesh_file << " " << dx*i; + mesh_file << std::endl; + for (GLOBALINT i = 0; i < ny; i++) + mesh_file << " " << dy*i; + mesh_file << std::endl; + for (GLOBALINT i = 0; i < nz; i++) + mesh_file << " " << dz*i; + mesh_file << std::endl; + mesh_file.close(); + } + + int nprocs() const + { + return npx*npy*npz; + } + + GLOBALINT npts() const + { + return nx*ny*nz; + } + + GLOBALINT nptsl() const + { + return nxl*nyl*nzl; + } + + GLOBALINT neql() const + { + return dof*nptsl(); + } + + realtype* GetRecvView(const std::string& direction) + { + if (direction == "WEST") + { + return static_cast(Wrecv_.data()); + } + else if (direction == "EAST") + { + return static_cast(Erecv_.data()); + } + else if (direction == "NORTH") + { + return static_cast(Nrecv_.data()); + } + else if (direction == "SOUTH") + { + return static_cast(Srecv_.data()); + } + else if (direction == "FRONT") + { + return static_cast(Frecv_.data()); + } + else if (direction == "BACK") + { + return static_cast(Brecv_.data()); + } + else + { + assert(direction == "ILLEGAL"); + return nullptr; + } + } + + realtype* GetSendView(const std::string& direction) + { + if (direction == "WEST") + { + return static_cast(Wsend_.data()); + } + else if (direction == "EAST") + { + return static_cast(Esend_.data()); + } + else if (direction == "NORTH") + { + return static_cast(Nsend_.data()); + } + else if (direction == "SOUTH") + { + return static_cast(Ssend_.data()); + } + else if (direction == "FRONT") + { + return static_cast(Fsend_.data()); + } + else if (direction == "BACK") + { + return static_cast(Bsend_.data()); + } + else + { + assert(direction == "ILLEGAL"); + return nullptr; + } + } + + GLOBALINT nx, ny, nz; /* number of intervals globally */ + int nxl, nyl, nzl; /* number of intervals locally */ + int npx, npy, npz; /* numner of processes */ + realtype dx, dy, dz; /* mesh spacing */ + realtype ax, ay, az; /* domain in [a, b] */ + realtype bx, by, bz; + int dof; /* degrees of freedom per node */ + int neq; /* total number of equations locally */ + + int ipW, ipE; /* MPI ranks for neighbor procs */ + int ipS, ipN; + int ipB, ipF; + bool upwindRight; /* Upwind dir: true/false == R/L */ + + int dims[3]; + int coords[3]; + + +private: + MPI_Comm cart_comm; /* MPI cartesian communicator */ + MPI_Request req[12]; + int nreq; + + BoundaryType bc; + StencilType st; + + Vec1D Wsend_; /* MPI send/recv buffers */ + Vec1D Esend_; + Vec1D Ssend_; + Vec1D Nsend_; + Vec1D Bsend_; + Vec1D Fsend_; + Vec1D Wrecv_; + Vec1D Erecv_; + Vec1D Srecv_; + Vec1D Nrecv_; + Vec1D Brecv_; + Vec1D Frecv_; + Vec1DHost WsendH_; /* MPI send/recv buffers (host) */ + Vec1DHost EsendH_; + Vec1DHost SsendH_; + Vec1DHost NsendH_; + Vec1DHost BsendH_; + Vec1DHost FsendH_; + Vec1DHost WrecvH_; + Vec1DHost ErecvH_; + Vec1DHost SrecvH_; + Vec1DHost NrecvH_; + Vec1DHost BrecvH_; + Vec1DHost FrecvH_; + +}; + +} + +#endif diff --git a/benchmarks/advection_reaction_3D/kokkos/README.md b/benchmarks/advection_reaction_3D/kokkos/README.md new file mode 100644 index 0000000000..f27484385f --- /dev/null +++ b/benchmarks/advection_reaction_3D/kokkos/README.md @@ -0,0 +1,113 @@ +# Benchmark: 3D Advection-Reaction + +This benchmark problem implements a 3D advection-reaction equation using the +Kokkos performance portability layer with serial, OpenMP, CUDA, or HIP backends. + +## Problem description + +This code simulates the advection and reaction of three chemical species where +the reaction mechanism is a variation of the Brusselator problem from chemical +kinetics. The PDE system is given by +```math +\begin{align} + u_t &= -c \nabla u + A - (w+1) u + v u^2 \\ + v_t &= -c \nabla v + w u - v u^2 \\ + w_t &= -c \nabla w + (B - w) / \epsilon - w u +\end{align} +``` +where $u$, $v$, and $w$ are chemical concentrations, $c$ is the advection speed, +$A$ and $B$ are the concentrations of chemical species that remain constant over +space and time, and $\epsilon$ is a parameter that varies the stiffness of the +system. The problem is solved on the domain $(x,y,z) = X$ in $[0, X_{\text{max}}]^3$, +for times $t$ in $[0,t_f]$. The initial condition is +```math +\begin{align} + u(0,X) &= A + p(X) \\ + v(0,X) &= B / A + p(X) \\ + w(0,X) &= 3.0 + p(X) +\end{align} +``` +where the perturbation function is +```math + p(X) = \alpha e^{-(X-\mu)^T \sigma^{-1} (X-\mu) / 2 \sqrt{|\sigma| 8 \pi^3}} +``` +with $\alpha = 0.1$, $\mu = 0.5 X_{\text{max}}$, and $\sigma$ is a diagonal +matrix with entries $0.25 X_{\text{max}}$. + +Spatial derivatives are discretized with first-order upwind finite differences +on a uniform spatial grid. The system can be evolved in time using explicit, +implicit, or IMEX methods from ARKODE, Adams or BDF methods from CVODE, or BDF +methods from IDA. When using an IMEX method, advection is treated explicitly and +reactions implicitly. + +The nonlinear system(s) that arise in each time step may be solved using a +global Newton method with a matrix-free GMRES linear solver or an Anderson +accelerated fixed-point method. When using an IMEX method, a custom task-local +nonlinear solver that leverages the locality of the reaction systems may also be +used. + +## Options + +Several command line options are available to change the problem parameters +as well as the integrator and solver options. A summary of the options are +listed below. + +| Option | Description | Default | +|:----------------------------|:------------------------------------------------------------------------------|:------------| +| `--help` | Print the command line options and description | -- | +| `--dont-save` | Do not save the solution to the disk | Save | +| `--output-dir ` | Directory where all output files will be written | `.` | +| `--nout ` | Number of output times | 40 | +| `--npts ` | Number of mesh points in each direction | 100 | +| `--npxyz ` | Number of MPI tasks in each direction (0 forces MPI to decide) | 0 0 0 | +| `--xmax ` | Maximum value of `x`, `y`, and `z` in :math:`X_max` | 1.0 | +| `--A ` | Constant concentration of species `A` | 1.0 | +| `--B ` | Constant concentration of species `B` | 3.5 | +| `--c ` | Advection speed `c` | 0.01 | +| `--order ` | Integration method order | 3 | +| `--method ` | Integrator to use: `ERK`, `ARK-DIRK`, `ARK-IMEX`, `CV-BDF`, `CV-ADAMS`, `IDA` | `ARK-DIRK` | +| `--nls ` | Nonlinear Solver Method: `newton`, `tl-newton`, `fixedpoint`, `none` | `newton` | +| `--fpaccel ` | Number of fixed point acceleration vectors | 3 | +| `--nopre` | Disable preconditioning | False | +| `--fused` | Enabled fused operations | Off | +| `--tf ` | Final integration time `t_f` | 10.0 | +| `--rtol ` | Relative tolerance | 1.0e-6 | +| `--atol ` | Absolute tolerance | 1.0e-9 | + +## Building and Running + +To build the benchmark executables SUNDIALS must be configured with ARKODE, +CVODE, and IDA enabled and with MPI and Kokkos support on. Additionally, either +CUDA or HIP support must be on to build executables utilizing NVIDIA or AMD +GPUs. See the installation guide for more details on configuring, building, +and installing SUNDIALS. + +Based on the configuration the following executables will be built and installed +in the `/advection_reaction_3D/kokkos` directory: + +* `advection_reaction_3D_kokkos.SERIAL` -- MPI parallelism +* `advection_reaction_3D_kokkos.OPENMP` -- MPI + OpenMP parallelism +* `advection_reaction_3D_kokkos.CUDA` -- MPI + CUDA parallelism +* `advection_reaction_3D_kokkos.HIP` -- MPI + HIP parallelism + +On Summit, with the default environment +``` + Compiler: xl/16.1.1-5 + MPI: spectrum-mpi/10.3.1.2-20200121 + CUDA: cuda/10.1.243 +``` +an example `jsrun` command is +``` +jsrun -n 2 -a 1 -c 1 -g 1 ./advection_reaction_3D_kokkos.CUDA +``` + +On Lassen, with the environment +``` + Compiler: gcc/8.3.1 + MPI: mvapich2/2021.05.28-cuda-11.1.1 + CUDA: cuda/11.1.1 +``` +an example `jsrun` command is +``` +jsrun -n 2 -a 1 -c 1 -g 1 ./advection_reaction_3D_kokkos.CUDA +``` diff --git a/benchmarks/advection_reaction_3D/kokkos/advection_reaction_3D.cpp b/benchmarks/advection_reaction_3D/kokkos/advection_reaction_3D.cpp new file mode 100644 index 0000000000..fa9f2bcc94 --- /dev/null +++ b/benchmarks/advection_reaction_3D/kokkos/advection_reaction_3D.cpp @@ -0,0 +1,711 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * David J. Gardner, Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * This benchmark problem simulates the advection and reaction of three + * chemical species, u, v, and w, in a three dimensional domain. The reaction + * mechanism is a variation of the Brusselator problem from chemical kinetics. + * This is a PDE system with 3 components, Y = [u,v,w], satisfying the + * equations, + * + * u_t = -c * dot(grad,u) + A - (w+1) * u + v * u^2 + * v_t = -c * dot(grad,v) + w * u - v * u^2 + * w_t = -c * dot(grad,w) + (B - w) / ep - w * u + * + * for t in [0,tf], X = (x,y,z) where in (x,y,z) in [0,xmax] with periodic + * boundary conditions. The initial condition is + * + * u(0,X) = k1 * A / k4 + p(X) + * v(0,X) = k2 * k4 * B / (k1 * k3 * A) + p(X) + * w(0,X) = 3.0 + p(X) + * p(X) = alpha * e^( -((X - mu)^T Sigma^{-1} (x-mu)) / (2*sqrt(|Sigma|*(2pi)^3)) ) + * + * alpha = 0.1, mu = (xmax/2.0, xmax/2.0, xmax/2.0), and Sigma = diag(xmax/4.0). + * The reaction rates are set so k_1 = k_2 = k_3 = k_4 = k, and k_5 = k_6 + * = 1/5e-6. The spatial derivatives are discretized with first-order upwind + * finite differences. NOUT outputs are printed at equal intervals, and run + * statistics are printed at the end. + * + * Command line options: + * --help prints this message + * --dont-save do not save the solution to the filesystem at the nout interval (default is to save) + * --output-dir the directory where all output files will be written + * --nout number of output times + * --method ERK, ARK-DIRK, ARK-IMEX (default), CV-BDF, CV-ADAMS, IDA + * --nls nonlinear solver to use; options are newton, + * tl-newton (task-local newton), or fixedpoint + * --fpaccel the number of fixed-point acceleration vectors to use + * (only valid when using fixedpoint nonlinear solver) + * --nopre turn off preconditioning + * --order the method order to use + * --npts number of mesh points in each direction + * --xmax maximum value of x (size of domain) + * --tf final time + * --A A parameter value + * --B B parameter value + * --k reaction rate + * --c advection speed + * --rtol relative tolerance + * --atol absolute tolerance + * --------------------------------------------------------------------------*/ + +#include "advection_reaction_3D.hpp" + + +/* Main Program */ +int main(int argc, char *argv[]) +{ + + SUNContext ctx; + + /* Initialize MPI */ + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Init(&argc, &argv); + + /* Create SUNDIALS context */ + SUNContext_Create((void*) &comm, &ctx); + + /* Initialize Kokkos */ + Kokkos::initialize(argc, argv); + { + + /* General problem variables */ + N_Vector y = NULL; /* empty solution vector */ + UserData udata(ctx); /* user data */ + UserOptions uopt; /* user options */ + int retval; /* reusable error-checking flag */ + + SUNDIALS_CXX_MARK_FUNCTION(udata.prof); + + /* Process input arguments and set up the problem */ + retval = SetupProblem(argc, argv, &udata, &uopt, ctx); + if (check_retval(&retval, "SetupProblem", 1, udata.myid)) MPI_Abort(comm, 1); + + /* Create solution vector (on-node and MPI-parallel versions) */ + SUNVector yloc{(unsigned int)udata.grid->neq, ctx}; + y = N_VMake_MPIPlusX(udata.comm, yloc, ctx); + if (check_retval((void *) y, "N_VMake_MPIPlusX", 0, udata.myid)) MPI_Abort(comm, 1); + + /* Set the initial condition */ + retval = SetIC(y, &udata); + if (check_retval(&retval, "SetIC", 1, udata.myid)) MPI_Abort(comm, 1); + + /* Output spatial mesh to disk (add extra point for periodic BC) */ + if (udata.myid == 0 && uopt.nout > 0) + { + char fname[MXSTR]; + snprintf(fname, MXSTR, "%s/mesh.txt", uopt.outputdir); + udata.grid->MeshToFile(fname); + } + + /* Integrate in time */ + if (uopt.method == "ERK") retval = EvolveProblemExplicit(y, &udata, &uopt); + else if (uopt.method == "ARK-DIRK") retval = EvolveProblemDIRK(y, &udata, &uopt); + else if (uopt.method == "ARK-IMEX") retval = EvolveProblemIMEX(y, &udata, &uopt); + else if (uopt.method == "CV-BDF") retval = EvolveProblemBDF(y, &udata, &uopt); + else if (uopt.method == "CV-ADAMS") retval = EvolveProblemAdams(y, &udata, &uopt); + else if (uopt.method == "IDA") retval = EvolveDAEProblem(y, &udata, &uopt); + if (check_retval(&retval, "Evolve", 1, udata.myid)) MPI_Abort(comm, 1); + + /* Clean up */ + N_VDestroy(y); + } + Kokkos::finalize(); + SUNContext_Free(&ctx); + MPI_Finalize(); + return(0); +} + + +/* Destructor for problem data */ +UserData::~UserData() +{ + /* close output streams */ + if (uopt->nout > 0) + { + if (UFID) fclose(UFID); + if (VFID) fclose(VFID); + if (WFID) fclose(WFID); + if (TFID && myid == 0) fclose(TFID); + } + + /* free solution masks */ + if (umask != nullptr) { + N_VDestroy(umask); + umask = nullptr; + } + if (vmask != nullptr) { + N_VDestroy(vmask); + vmask = nullptr; + } + if (wmask != nullptr) { + N_VDestroy(wmask); + wmask = nullptr; + } + + /* free the parallel grid */ + delete grid; +} + + +/* -------------------------------------------------------------- + * Communication functions + * --------------------------------------------------------------*/ + +/* Fills send buffers before exchanging neighbor information */ +int FillSendBuffers(N_Vector y, UserData* udata) +{ + + /* Shortcuts */ + const realtype c = udata->c; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + + /* Create 4D view of the vector */ + Vec4D Yview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + + if (c > 0.0) + { + + /* Flow moving in the positive directions uses backward difference. */ + + /* Create 4D views of send buffers */ + Vec4D Esend(udata->grid->GetSendView("EAST"), 1, nyl, nzl, dof); + Vec4D Nsend(udata->grid->GetSendView("NORTH"), nxl, 1, nzl, dof); + Vec4D Fsend(udata->grid->GetSendView("FRONT"), nxl, nyl, 1, dof); + + /* Fill buffers on device */ + Kokkos::parallel_for("FillEastBuffer", + Range3D({0,0,0},{nyl,nzl,dof}), + KOKKOS_LAMBDA (int j, int k, int l) { + Esend(0,j,k,l) = Yview(nxl-1,j,k,l); + }); + Kokkos::parallel_for("FillNorthBuffer", + Range3D({0,0,0},{nxl,nzl,dof}), + KOKKOS_LAMBDA (int i, int k, int l) { + Nsend(i,0,k,l) = Yview(i,nyl-1,k,l); + }); + Kokkos::parallel_for("FillFrontBuffer", + Range3D({0,0,0},{nxl,nyl,dof}), + KOKKOS_LAMBDA (int i, int j, int l) { + Fsend(i,j,0,l) = Yview(i,j,nzl-1,l); + }); + + } + else if (c < 0.0) + { + + /* Flow moving in the negative directions uses forward difference. */ + + /* Create 4D views of send buffers */ + Vec4D Wsend(udata->grid->GetSendView("WEST"), 1, nyl, nzl, dof); + Vec4D Ssend(udata->grid->GetSendView("SOUTH"), nxl, 1, nzl, dof); + Vec4D Bsend(udata->grid->GetSendView("BACK"), nxl, nyl, 1, dof); + + /* Fill buffers on device */ + Kokkos::parallel_for("FillWestBuffer", + Range3D({0,0,0},{nyl,nzl,dof}), + KOKKOS_LAMBDA (int j, int k, int l) { + Wsend(0,j,k,l) = Yview(0,j,k,l); + }); + Kokkos::parallel_for("FillSouthBuffer", + Range3D({0,0,0},{nxl,nzl,dof}), + KOKKOS_LAMBDA (int i, int k, int l) { + Ssend(i,0,k,l) = Yview(i,0,k,l); + }); + Kokkos::parallel_for("FillBackBuffer", + Range3D({0,0,0},{nxl,nyl,dof}), + KOKKOS_LAMBDA (int i, int j, int l) { + Bsend(i,j,0,l) = Yview(i,j,0,l); + }); + + } + + return(0); +} + + +/* -------------------------------------------------------------- + * Problem setup + * --------------------------------------------------------------*/ + +/* Parses the CLI arguments */ +int ParseArgs(int argc, char *argv[], UserData* udata, UserOptions* uopt) +{ + /* check for input args */ + if (argc > 1) + { + /* loop over input args and get value */ + for (int i = 1; i < argc; i++) + { + string argvi(argv[i]); + + if (argvi.compare("--help") == 0) + { + InputError(argv[0]); + return(-1); + } + else if (argvi.compare("--nout") == 0) + { + uopt->nout = atoi(argv[++i]); + } + else if (argvi.compare("--dont-save") == 0) + { + uopt->save = 0; + } + else if (argvi.compare("--output-dir") == 0) + { + if (strlen(argv[i+1]) > MXSTR) + { + if (udata->myid == 0) + fprintf(stderr, "ERROR: output directory string is too long\n"); + return(-1); + } + uopt->outputdir = argv[++i]; + } + else if (argvi.compare("--npts") == 0) + { + uopt->npts = atoi(argv[++i]); + } + else if (argvi.compare("--npxyz") == 0) + { + uopt->npxyz[0] = atoi(argv[++i]); + uopt->npxyz[1] = atoi(argv[++i]); + uopt->npxyz[2] = atoi(argv[++i]); + } + else if (argvi.compare("--xmax") == 0) + { + udata->xmax = strtod(argv[++i], NULL); + } + else if (argvi.compare("--A") == 0) + { + udata->A = strtod(argv[++i], NULL); + } + else if (argvi.compare("--B") == 0) + { + udata->B = strtod(argv[++i], NULL); + } + else if (argvi.compare("--k") == 0) + { + udata->k1 = strtod(argv[++i], NULL); + udata->k2 = strtod(argv[++i], NULL); + udata->k3 = strtod(argv[++i], NULL); + udata->k4 = strtod(argv[++i], NULL); + } + else if (argvi.compare("--c") == 0) + { + udata->c = strtod(argv[++i], NULL); + } + else if (argvi.compare("--order") == 0) + { + uopt->order = atoi(argv[++i]); + } + else if (argvi.compare("--method") == 0) + { + uopt->method = string(argv[++i]); + if (uopt->method != "ERK" && + uopt->method != "ARK-DIRK" && + uopt->method != "ARK-IMEX" && + uopt->method != "CV-BDF" && + uopt->method != "CV-ADAMS" && + uopt->method != "IDA") + { + fprintf(stderr, "ERROR: unknown method\n"); + InputError(argv[0]); + return(-1); + } + } + else if (argvi.compare("--fpaccel") == 0) + { + uopt->fpaccel = atoi(argv[++i]); + } + else if (argvi.compare("--nls") == 0) + { + uopt->nls = string(argv[++i]); + if (uopt->nls != "newton" && + uopt->nls != "tl-newton" && + uopt->nls != "fixedpoint" && + uopt->nls != "none") + { + fprintf(stderr, "ERROR: unknown nls\n"); + InputError(argv[0]); + return(-1); + } + } + else if (argvi.compare("--nopre") == 0) + { + uopt->precond = 0; + } + else if (argvi.compare("--fused") == 0) + { + uopt->fused = 1; + } + else if (argvi.compare("--tf") == 0) + { + uopt->tf = strtod(argv[++i], NULL); + } + else if (argvi.compare("--rtol") == 0) + { + uopt->rtol = strtod(argv[++i], NULL); + } + else if (argvi.compare("--atol") == 0) + { + uopt->atol = strtod(argv[++i], NULL); + } + else + { + InputError(argv[0]); + return(-1); + } + } + } + + /* Explicit method uses no nonlinear solver */ + if (uopt->method == "ERK") + uopt->nls = "none"; + + /* CV Adams method only uses fixedpoint nonlinear solver */ + if (uopt->method == "CV-ADAMS") + uopt->nls = "fixedpoint"; + + return(0); +} + + +/* Fills the mask vector for the component so that + u = y .* umask, v = y .* vmask, w = y .* wmask */ +int ComponentMask(N_Vector mask, const int component, const UserData* udata) +{ + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* Shortcuts */ + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + + /* Create 4D view of mask data */ + Vec4D maskview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(mask)), nxl, nyl, nzl, dof); + + /* Fill mask data */ + N_VConst(0.0, mask); + Kokkos::parallel_for("Fill_mask", + Range3D({0,0,0},{nxl,nyl,nzl}), + KOKKOS_LAMBDA (int i, int j, int k) + { + maskview(i,j,k,component) = 1.0; + }); + + return 0; +} + + +/* Parses the CLI arguments and sets up the problem */ +int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, + SUNContext ctx) +{ + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* MPI variables */ + udata->comm = MPI_COMM_WORLD; + MPI_Comm_rank(udata->comm, &udata->myid); + MPI_Comm_size(udata->comm, &udata->nprocs); + + /* Default problem parameters */ + udata->add_reactions = true; + udata->xmax = 1.0; + udata->A = 1.0; + udata->B = 3.5; + udata->k1 = 1.0; + udata->k2 = 1.0; + udata->k3 = 1.0; + udata->k4 = 1.0; + udata->k5 = 1.0/5.0e-6; + udata->k6 = 1.0/5.0e-6; + udata->c = 0.01; + udata->uopt = uopt; + udata->TFID = NULL; + udata->UFID = NULL; + udata->VFID = NULL; + udata->WFID = NULL; + udata->nnlfi = 0; + + /* Set default integrator options */ + uopt->npxyz[0] = 0; /* number of processesors in x */ + uopt->npxyz[1] = 0; /* number of processesors in y */ + uopt->npxyz[2] = 0; /* number of processesors in z */ + uopt->npts = 100; /* number of mesh points in each direction */ + uopt->order = 3; /* method order */ + uopt->method = "ARK-DIRK"; /* stepper/method */ + uopt->t0 = 0.0; /* initial time */ + uopt->tf = 10.0; /* final time */ + uopt->rtol = 1.0e-6; /* relative tolerance */ + uopt->atol = 1.0e-9; /* absolute tolerance */ + uopt->nls = "newton"; /* default to newton, when appropriate */ + uopt->fpaccel = 3; /* default number of fixed point acceleration vectors */ + uopt->precond = 1; /* by default, precondition when appropriate */ + uopt->fused = 0; /* use fused vector ops */ + uopt->save = 1; /* save solution to disk */ + uopt->nout = 10; /* number of output times */ + uopt->outputdir = (char *) "."; /* output directory */ + + /* Parse CLI args and set udata/uopt appropriately */ + int retval = ParseArgs(argc, argv, udata, uopt); + if (check_retval((void*)&retval, "ParseArgs", 1, udata->myid)) return -1; + + /* Setup the parallel decomposition */ + const sunindextype npts[] = {uopt->npts, uopt->npts, uopt->npts}; + const realtype amax[] = {0.0, 0.0, 0.0}; + const realtype bmax[] = {udata->xmax, udata->xmax, udata->xmax}; + udata->grid = new ParallelGrid(&udata->comm, amax, bmax, npts, + 3, BoundaryType::PERIODIC, StencilType::UPWIND, udata->c, uopt->npxyz); + + /* Create the solution masks */ + SUNVector *umaskloc = new SUNVector((unsigned int)udata->grid->neq, ctx); + udata->umask = N_VMake_MPIPlusX(udata->comm, *umaskloc, ctx); + if (check_retval((void *) udata->umask, "N_VMake_MPIPlusX", 0, udata->myid)) MPI_Abort(udata->comm, 1); + SUNVector *vmaskloc = new SUNVector((unsigned int)udata->grid->neq, ctx); + udata->vmask = N_VMake_MPIPlusX(udata->comm, *vmaskloc, ctx); + if (check_retval((void *) udata->vmask, "N_VMake_MPIPlusX", 0, udata->myid)) MPI_Abort(udata->comm, 1); + SUNVector *wmaskloc = new SUNVector((unsigned int)udata->grid->neq, ctx); + udata->wmask = N_VMake_MPIPlusX(udata->comm, *wmaskloc, ctx); + if (check_retval((void *) udata->wmask, "N_VMake_MPIPlusX", 0, udata->myid)) MPI_Abort(udata->comm, 1); + ComponentMask(udata->umask, 0, udata); + ComponentMask(udata->vmask, 1, udata); + ComponentMask(udata->wmask, 2, udata); + + /* Open output files for results */ + if (uopt->save) + { + char fname[MXSTR]; + if (udata->myid == 0) + { + sprintf(fname, "%s/t.%06d.txt", uopt->outputdir, udata->myid); + udata->TFID = fopen(fname, "w"); + } + + sprintf(fname, "%s/u.%06d.txt", uopt->outputdir, udata->myid); + udata->UFID = fopen(fname, "w"); + + sprintf(fname, "%s/v.%06d.txt", uopt->outputdir, udata->myid); + udata->VFID = fopen(fname, "w"); + + sprintf(fname, "%s/w.%06d.txt", uopt->outputdir, udata->myid); + udata->WFID = fopen(fname, "w"); + } + + /* Print problem setup */ + if (udata->myid == 0) + { + printf("\n\t\tAdvection-Reaction Test Problem\n\n"); + printf("Using the MPI+Kokkos NVECTOR"); +#if defined(USE_CUDA) + printf(" with the CUDA back-end\n"); +#elif defined(USE_HIP) + printf(" with the HIP back-end\n"); +#elif defined(USE_OPENMP) + printf(" with the OpenMP back-end and %i threads\n", omp_get_max_threads()); +#else + printf(" with the serial back-end\n"); +#endif + printf("Number of Processors = %li\n", (long int) udata->nprocs); + udata->grid->PrintInfo(); + printf("Problem Parameters:\n"); + printf(" A = %g\n", udata->A); + printf(" B = %g\n", udata->B); + printf(" k = %g\n", udata->k1); + printf(" c = %g\n", udata->c); + printf("Integrator Options:\n"); + printf(" order = %d\n", uopt->order); + printf(" method = %s\n", uopt->method.c_str()); + printf(" nonlinear solver = %s\n", uopt->nls.c_str()); + printf(" fpaccel = %d\n", uopt->fpaccel); + printf(" preconditioner = %d\n", uopt->precond); + printf(" fused vector ops = %d\n", uopt->fused); + printf(" t0 = %g\n", uopt->t0); + printf(" tf = %g\n", uopt->tf); + printf(" reltol = %.1e\n", uopt->rtol); + printf(" abstol = %.1e\n", uopt->atol); + printf(" nout = %d\n", uopt->nout); + printf("Output directory: %s\n", uopt->outputdir); + } + + + /* return success */ + return(0); +} + + +/* Compute the 3D Gaussian function. */ +KOKKOS_FUNCTION +void Gaussian3D(realtype& x, realtype& y, realtype& z, realtype xmax) +{ + /* Gaussian distribution defaults */ + const realtype alpha = 0.1; + const realtype mu[] = { xmax/RCONST(2.0), xmax/RCONST(2.0), xmax/RCONST(2.0) }; + const realtype sigma[] = { xmax/RCONST(4.0), xmax/RCONST(4.0), xmax/RCONST(4.0) }; // Sigma = diag(sigma) + + /* denominator = 2*sqrt(|Sigma|*(2pi)^3) */ + const realtype denom = 2.0 * sqrt((sigma[0]*sigma[1]*sigma[2])*pow(2*M_PI,3)); + x = alpha * exp( -((x - mu[0])*(x - mu[0])*(1.0/sigma[0])) / denom ); + y = alpha * exp( -((y - mu[1])*(y - mu[1])*(1.0/sigma[1])) / denom ); + z = alpha * exp( -((z - mu[2])*(z - mu[2])*(1.0/sigma[2])) / denom ); +} + + +/* Initial condition function */ +int SetIC(N_Vector y, UserData* udata) +{ + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* Variable shortcuts */ + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + const realtype dx = udata->grid->dx; + const realtype dy = udata->grid->dy; + const realtype dz = udata->grid->dz; + const realtype xmax = udata->xmax; + const realtype A = udata->A; + const realtype B = udata->B; + const realtype k1 = udata->k1; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const int xcrd = udata->grid->coords[0]; + const int ycrd = udata->grid->coords[1]; + const int zcrd = udata->grid->coords[2]; + + /* Steady state solution */ + const realtype us = k1 * A / k4; + const realtype vs = k2 * k4 * B / (k1 * k3 * A); + const realtype ws = 3.0; + + /* Create 4D view of y */ + Vec4D yview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + + /* Gaussian perturbation of the steady state solution */ + Kokkos::parallel_for("SetIC", + Range3D({0,0,0},{nxl,nyl,nzl}), + KOKKOS_LAMBDA (int i, int j, int k) + { + realtype x = (xcrd * nxl + i) * dx; + realtype y = (ycrd * nyl + j) * dy; + realtype z = (zcrd * nzl + k) * dz; + Gaussian3D(x,y,z,xmax); + const realtype p = x + y + z; + yview(i,j,k,0) = us + p; + yview(i,j,k,1) = vs + p; + yview(i,j,k,2) = ws + p; + }); + + /* Return success */ + return(0); +} + + +/* Write time and solution to disk */ +int WriteOutput(realtype t, N_Vector y, UserData* udata, UserOptions* uopt) +{ + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* output current solution norm to screen */ + realtype N = (realtype) udata->grid->npts(); + realtype u = N_VWL2Norm(y, udata->umask); + u = sqrt(u*u/N); + realtype v = N_VWL2Norm(y, udata->vmask); + v = sqrt(v*v/N); + realtype w = N_VWL2Norm(y, udata->wmask); + w = sqrt(w*w/N); + if (udata->myid == 0) { + printf(" %10.6f %10.6f %10.6f %10.6f\n", t, u, v, w); + std::fflush(stdout); + } + + if (uopt->save) + { + /* Copy solution data to host mirror view */ + SUNVector* ylocal = sundials::kokkos::GetVec(N_VGetLocalVector_MPIPlusX(y)); + sundials::kokkos::CopyFromDevice(*ylocal); + + /* output the times to disk */ + if (udata->myid == 0 && udata->TFID) { + fprintf(udata->TFID," %.16e\n", t); + std::fflush(udata->TFID); + } + + /* create 4D view of host data */ + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + Vec4DHost yview(N_VGetArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + + /* output results to disk */ + for (int i = 0; i < nxl; i++) + for (int j = 0; j < nyl; j++) + for (int k = 0; k < nzl; k++) { + fprintf(udata->UFID," %.16e", yview(i,j,k,0)); + fprintf(udata->VFID," %.16e", yview(i,j,k,1)); + fprintf(udata->WFID," %.16e", yview(i,j,k,2)); + } + + fprintf(udata->UFID,"\n"); + fprintf(udata->VFID,"\n"); + fprintf(udata->WFID,"\n"); + std::fflush(udata->UFID); + std::fflush(udata->VFID); + std::fflush(udata->WFID); + } + + return(0); +} + + +void InputError(char *name) +{ + int myid; + + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + if (myid == 0) + { + fprintf(stderr, "\nERROR: Invalid command line input\n"); + fprintf(stderr, "\nCommand line options for %s\n",name); + fprintf(stderr, " --help prints this message\n"); + fprintf(stderr, " --output-dir the directory where all output files will be written (default is the CWD)\n"); + fprintf(stderr, " --nout number of output times to print (default is 10)\n"); + fprintf(stderr, " --dont-save do not save the solution to the filesystem at the nout interval (default is to save)\n"); + fprintf(stderr, " --method ERK, ARK-DIRK, ARK-IMEX (default), CV-BDF, CV-ADAMS, IDA\n"); + fprintf(stderr, " --fpaccel the number of fixed-point acceleration vectors to use (only valid when using fixedpoint nonlinear solver)\n"); + fprintf(stderr, " --nls nonlinear solver to use (newton, tl-newton (task-local newton), fixedpoint)\n"); + fprintf(stderr, " --nopre do not precondition the linear system\n"); + fprintf(stderr, " --order the method order to use\n"); + fprintf(stderr, " --npts number of mesh points in each direction\n"); + fprintf(stderr, " --npxyz number of processors in each direction (0 forces MPI to decide)\n"); + fprintf(stderr, " --xmax maximum value of x (size of domain)\n"); + fprintf(stderr, " --tf final time\n"); + fprintf(stderr, " --A A parameter value\n"); + fprintf(stderr, " --B B parameter value\n"); + fprintf(stderr, " --k reaction rate\n"); + fprintf(stderr, " --c advection speed\n"); + fprintf(stderr, " --rtol relative tolerance\n"); + fprintf(stderr, " --atol absolute tolerance\n"); + } + + MPI_Barrier(MPI_COMM_WORLD); +} diff --git a/benchmarks/advection_reaction_3D/kokkos/advection_reaction_3D.hpp b/benchmarks/advection_reaction_3D/kokkos/advection_reaction_3D.hpp new file mode 100644 index 0000000000..cb0dceea64 --- /dev/null +++ b/benchmarks/advection_reaction_3D/kokkos/advection_reaction_3D.hpp @@ -0,0 +1,171 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * David J. Gardner, Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ---------------------------------------------------------------------------*/ + +#ifndef ADVECTION_REACTION_3D_HPP +#define ADVECTION_REACTION_3D_HPP + +#include +#include +#include +#include +#include + +#include +#include +#include "nvector/nvector_kokkos.hpp" +#include "check_retval.h" +#include "ParallelGrid.hpp" + +/* Set SUNDIALS Kokkos vector shortcut */ +using SUNVector = sundials::kokkos::Vector; + +using sundials_tools::ParallelGrid; +using sundials_tools::BoundaryType; +using sundials_tools::StencilType; +using std::string; + +/* Maximum size of output directory string */ +constexpr int MXSTR = 2048; + +/* + * Data structure for problem options + */ + +struct UserOptions +{ + int npxyz[3]; /* number of processors in x,y,z */ + sunindextype npts; /* number of spatial mesh points */ + realtype t0; /* initial time */ + realtype tf; /* final time */ + realtype rtol; /* relative tolerance */ + realtype atol; /* absolute tolerance */ + int order; /* method order */ + string method; /* method string */ + string nls; /* nonlinear solver to use */ + int fpaccel; /* number of fixedpoint vectors */ + int precond; /* to precondition or not */ + int fused; /* use fused vector ops */ + int nout; /* number of outputs */ + int save; /* save solution to disk */ + char* outputdir; +}; + + +/* + * Data structure for problem specific data + */ + +struct UserData +{ + SUNContext ctx; + SUNProfiler prof; + + /* MPI data */ + MPI_Comm comm; + int myid; + int nprocs; + MPI_Request req[2]; + + /* Should reactions be added to the advection or not */ + bool add_reactions; + + /* File handles for output */ + FILE* TFID; /* time output file pointer */ + FILE* UFID; /* solution output file pointer */ + FILE* VFID; + FILE* WFID; + + /* Solution masks */ + N_Vector umask; + N_Vector vmask; + N_Vector wmask; + + /* Problem parameters */ + realtype xmax; /* maximum x value */ + realtype A; /* concentration of species A */ + realtype B; /* w source rate */ + realtype k1; /* reaction rates */ + realtype k2; + realtype k3; + realtype k4; + realtype k5; + realtype k6; + realtype c; /* advection coefficient */ + + /* Parallel mesh */ + ParallelGrid* grid; + + /* Count of implicit function evals by the task local nonlinear solver */ + long int nnlfi; + + /* Integrator options */ + UserOptions* uopt; + + /* Constructor that takes the context */ + UserData(SUNContext ctx) + : ctx(ctx), umask(nullptr), vmask(nullptr), wmask(nullptr), uopt(nullptr), + TFID(nullptr), UFID(nullptr), VFID(nullptr), WFID(nullptr) + { + SUNContext_GetProfiler(ctx, &prof); + } + + /* destructor frees the problem data */ + ~UserData(); +}; + + +/* + * Functions to evolve the solution (defined by the drivers) + */ + +/* function that does ARKStep setup and evolves the solution with a DIRK method */ +extern int EvolveProblemDIRK(N_Vector y, UserData* udata, UserOptions* uopt); + +/* function that does ARKStep setup and evolves the solution with an IMEX method */ +extern int EvolveProblemIMEX(N_Vector y, UserData* udata, UserOptions* uopt); + +/* function that does ERKStep setup and evolves the solution */ +extern int EvolveProblemExplicit(N_Vector y, UserData* udata, UserOptions* uopt); + +/* function that does CVODE BDF setup and evolves the solution */ +extern int EvolveProblemBDF(N_Vector y, UserData* udata, UserOptions* uopt); + +/* function that does CVODE Adams setup and evolves the solution */ +extern int EvolveProblemAdams(N_Vector y, UserData* udata, UserOptions* uopt); + +/* function that does IDA BDF setup and evolves the solution */ +extern int EvolveDAEProblem(N_Vector y, UserData* udata, UserOptions* uopt); + + +/* + * Helper functions + */ + +/* function to set initial condition */ +int SetIC(N_Vector y, UserData* udata); + +/* function to fill neighbor data */ +int FillSendBuffers(N_Vector y, UserData* udata); + +/* functions for processing command line args */ +int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, + SUNContext ctx); +void InputError(char *name); +int ComponentMask(N_Vector mask, const int component, const UserData* udata); + +/* function to write solution to disk */ +int WriteOutput(realtype t, N_Vector y, UserData* udata, UserOptions* uopt); + +#endif diff --git a/benchmarks/advection_reaction_3D/arkode_driver.cpp b/benchmarks/advection_reaction_3D/kokkos/arkode_driver.cpp similarity index 98% rename from benchmarks/advection_reaction_3D/arkode_driver.cpp rename to benchmarks/advection_reaction_3D/kokkos/arkode_driver.cpp index bbea07956a..e2cf1451e3 100644 --- a/benchmarks/advection_reaction_3D/arkode_driver.cpp +++ b/benchmarks/advection_reaction_3D/kokkos/arkode_driver.cpp @@ -588,10 +588,7 @@ int TaskLocalLSolve(N_Vector delta, void* arkode_mem) SUNDIALS_CXX_MARK_FUNCTION(udata->prof); /* set up I - gamma*J and solve */ - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl), - RAJA::RangeSegment(0, udata->grid->nyl), - RAJA::RangeSegment(0, udata->grid->nzl)); - retval = SolveReactionLinSys(z, delta, delta, gamma, range, udata); + retval = SolveReactionLinSys(z, delta, delta, gamma, udata); return(retval); diff --git a/benchmarks/advection_reaction_3D/check_retval.h b/benchmarks/advection_reaction_3D/kokkos/check_retval.h similarity index 99% rename from benchmarks/advection_reaction_3D/check_retval.h rename to benchmarks/advection_reaction_3D/kokkos/check_retval.h index 31a4fa5922..887b7cea5d 100644 --- a/benchmarks/advection_reaction_3D/check_retval.h +++ b/benchmarks/advection_reaction_3D/kokkos/check_retval.h @@ -54,4 +54,4 @@ static int check_retval(void *returnvalue, const char *funcname, int opt, int my return(0); } -#endif \ No newline at end of file +#endif diff --git a/benchmarks/advection_reaction_3D/cvode_driver.cpp b/benchmarks/advection_reaction_3D/kokkos/cvode_driver.cpp similarity index 100% rename from benchmarks/advection_reaction_3D/cvode_driver.cpp rename to benchmarks/advection_reaction_3D/kokkos/cvode_driver.cpp diff --git a/benchmarks/advection_reaction_3D/ida_driver.cpp b/benchmarks/advection_reaction_3D/kokkos/ida_driver.cpp similarity index 100% rename from benchmarks/advection_reaction_3D/ida_driver.cpp rename to benchmarks/advection_reaction_3D/kokkos/ida_driver.cpp diff --git a/benchmarks/advection_reaction_3D/kokkos/rhs3D.hpp b/benchmarks/advection_reaction_3D/kokkos/rhs3D.hpp new file mode 100644 index 0000000000..34698146ab --- /dev/null +++ b/benchmarks/advection_reaction_3D/kokkos/rhs3D.hpp @@ -0,0 +1,540 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * David J. Gardner, Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------------------*/ + +#ifndef ADVECTION_REACTION_3D_RHS_HPP +#define ADVECTION_REACTION_3D_RHS_HPP + +#include "advection_reaction_3D.hpp" + +/* -------------------------------------------------------------- + * Right hand side (RHS) and residual functions + * --------------------------------------------------------------*/ + +/* Compute the advection term f(t,y) = -c (grad * y). This is done using + upwind 1st order finite differences. At present, only periodic boudary + conditions are supported, which are handled via MPI's Cartesian + communicator (even for serial runs). */ +static int Advection(realtype t, N_Vector y, N_Vector ydot, void* user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* set variable shortcuts */ + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + const realtype c = udata->c; + const realtype cx = -c / udata->grid->dx; + const realtype cy = -c / udata->grid->dy; + const realtype cz = -c / udata->grid->dz; + + /* local variables */ + int retval; + + /* fill send buffers and begin exchanging boundary information */ + SUNDIALS_MARK_BEGIN(udata->prof, "Neighbor Exchange"); + retval = FillSendBuffers(y, udata); + if (check_retval(&retval, "FillSendBuffers", 1, udata->myid)) + return(-1); + retval = udata->grid->ExchangeStart(); + if (check_retval(&retval, "ExchangeStart", 1, udata->myid)) + return(-1); + SUNDIALS_MARK_END(udata->prof, "Neighbor Exchange"); + + /* set output to zero */ + N_VConst(0.0, ydot); + + /* create 4D views of the state and RHS vectors */ + Vec4D Yview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + Vec4D dYview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(ydot)), nxl, nyl, nzl, dof); + + /* iterate over domain interior, computing advection */ + if (c > 0.0) + { + /* flow moving in the positive x,y,z direction */ + Kokkos::parallel_for("AdvectionInteriorRight", + Range3D({1,1,1},{nxl,nyl,nzl}), + KOKKOS_LAMBDA (int i, int j, int k) + { + const realtype u_ijk = Yview(i,j,k,0); + const realtype v_ijk = Yview(i,j,k,1); + const realtype w_ijk = Yview(i,j,k,2); + + // grad * u + dYview(i,j,k,0) = cz * (u_ijk - Yview(i,j,k-1,0)); // du/dz + dYview(i,j,k,0) += cy * (u_ijk - Yview(i,j-1,k,0)); // du/dy + dYview(i,j,k,0) += cx * (u_ijk - Yview(i-1,j,k,0)); // du/dx + + // grad * v + dYview(i,j,k,1) = cz * (v_ijk - Yview(i,j,k-1,1)); // dv/dz + dYview(i,j,k,1) += cy * (v_ijk - Yview(i,j-1,k,1)); // dv/dy + dYview(i,j,k,1) += cx * (v_ijk - Yview(i-1,j,k,1)); // dv/dx + + // grad * w + dYview(i,j,k,2) = cz * (w_ijk - Yview(i,j,k-1,2)); // dw/dz + dYview(i,j,k,2) += cy * (w_ijk - Yview(i,j-1,k,2)); // dw/dy + dYview(i,j,k,2) += cx * (w_ijk - Yview(i-1,j,k,2)); // dw/dx + }); + } + else if (c < 0.0) + { + /* flow moving in the negative x,y,z direction */ + Kokkos::parallel_for("AdvectionInteriorLeft", + Range3D({0,0,0},{nxl-1,nyl-1,nzl-1}), + KOKKOS_LAMBDA (int i, int j, int k) + { + const realtype u_ijk = Yview(i,j,k,0); + const realtype v_ijk = Yview(i,j,k,1); + const realtype w_ijk = Yview(i,j,k,2); + + // grad * u + dYview(i,j,k,0) = cz * (Yview(i,j,k+1,0) - u_ijk); // du/dz + dYview(i,j,k,0) += cy * (Yview(i,j+1,k,0) - u_ijk); // du/dy + dYview(i,j,k,0) += cx * (Yview(i+1,j,k,0) - u_ijk); // du/dx + + // grad * v + dYview(i,j,k,1) = cz * (Yview(i,j,k+1,1) - v_ijk); // dv/dz + dYview(i,j,k,1) += cy * (Yview(i,j+1,k,1) - v_ijk); // dv/dy + dYview(i,j,k,1) += cx * (Yview(i+1,j,k,1) - v_ijk); // dv/dx + + // grad * w + dYview(i,j,k,2) = cz * (Yview(i,j,k+1,2) - w_ijk); // dw/dz + dYview(i,j,k,2) += cy * (Yview(i,j+1,k,2) - w_ijk); // dw/dy + dYview(i,j,k,2) += cx * (Yview(i+1,j,k,2) - w_ijk); // dw/dx + }); + } + + /* finish exchanging boundary information */ + SUNDIALS_MARK_BEGIN(udata->prof, "Neighbor Exchange"); + retval = udata->grid->ExchangeEnd(); + if (check_retval(&retval, "ExchangeEnd", 1, udata->myid)) + return(-1); + SUNDIALS_MARK_END(udata->prof, "Neighbor Exchange"); + + /* compute advection at process boundaries */ + if (c > 0.0) + { + /* Flow moving in the positive x,y,z direction: + boundaries are west face, south face, and back face */ + + /* Create 4D views of receive buffers */ + Vec4D Wrecv(udata->grid->GetRecvView("WEST"), 1, nyl, nzl, dof); + Vec4D Srecv(udata->grid->GetRecvView("SOUTH"), nxl, 1, nzl, dof); + Vec4D Brecv(udata->grid->GetRecvView("BACK"), nxl, nyl, 1, dof); + + /* Perform calculations on each "lower" face */ + Kokkos::parallel_for("AdvectionBoundaryWest", + Range3D({0,0,0},{nyl,nzl,dof}), + KOKKOS_LAMBDA (int j, int k, int l) + { + const int i = 0; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YSouth = (j > 0) ? Yview(i,j-1,k,l) : Srecv(i,0,k,l); + const realtype YBack = (k > 0) ? Yview(i,j,k-1,l) : Brecv(i,j,0,l); + dYview(i,j,k,l) = cx * (Yijkl - Wrecv(0,j,k,l)); // d/dx + dYview(i,j,k,l) += cy * (Yijkl - YSouth); // d/dy + dYview(i,j,k,l) += cz * (Yijkl - YBack); // d/dz + }); + Kokkos::parallel_for("AdvectionBoundarySouth", + Range3D({0,0,0},{nxl,nzl,dof}), + KOKKOS_LAMBDA (int i, int k, int l) + { + const int j = 0; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YWest = (i > 0) ? Yview(i-1,j,k,l) : Wrecv(0,j,k,l); + const realtype YBack = (k > 0) ? Yview(i,j,k-1,l) : Brecv(i,j,0,l); + dYview(i,j,k,l) = cx * (Yijkl - YWest); // d/dx + dYview(i,j,k,l) += cy * (Yijkl - Srecv(i,0,k,l)); // d/dy + dYview(i,j,k,l) += cz * (Yijkl - YBack); // d/dz + }); + Kokkos::parallel_for("AdvectionBoundaryBack", + Range3D({0,0,0},{nxl,nyl,dof}), + KOKKOS_LAMBDA (int i, int j, int l) + { + const int k = 0; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YWest = (i > 0) ? Yview(i-1,j,k,l) : Wrecv(0,j,k,l); + const realtype YSouth = (j > 0) ? Yview(i,j-1,k,l) : Srecv(i,0,k,l); + dYview(i,j,k,l) = cx * (Yijkl - YWest); // d/dx + dYview(i,j,k,l) += cy * (Yijkl - YSouth); // d/dy + dYview(i,j,k,l) += cz * (Yijkl - Brecv(i,j,0,l)); // d/dz + }); + + } + else if (c < 0.0) + { + + /* Flow moving in the negative x,y,z direction: + boundaries are east face, north face, and front face */ + + /* Create 4D views of receive buffers */ + Vec4D Erecv(udata->grid->GetRecvView("EAST"), 1, nyl, nzl, dof); + Vec4D Nrecv(udata->grid->GetRecvView("NORTH"), nxl, 1, nzl, dof); + Vec4D Frecv(udata->grid->GetRecvView("FRONT"), nxl, nyl, 1, dof); + + /* Perform calculations on each "upper" face */ + Kokkos::parallel_for("AdvectionBoundaryEast", + Range3D({0,0,0},{nyl,nzl,dof}), + KOKKOS_LAMBDA (int j, int k, int l) + { + const int i = nxl-1; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YNorth = (j < nyl-1) ? Yview(i,j+1,k,l) : Nrecv(i,0,k,l); + const realtype YFront = (k < nzl-1) ? Yview(i,j,k+1,l) : Frecv(i,j,0,l); + dYview(i,j,k,l) = cx * (Erecv(0,j,k,l) - Yijkl); // d/dx + dYview(i,j,k,l) += cy * (YNorth - Yijkl); // d/dy + dYview(i,j,k,l) += cz * (YFront - Yijkl); // d/dz + }); + Kokkos::parallel_for("AdvectionBoundaryNorth", + Range3D({0,0,0},{nxl,nzl,dof}), + KOKKOS_LAMBDA (int i, int k, int l) + { + const int j = nyl-1; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YEast = (i < nxl-1) ? Yview(i+1,j,k,l) : Erecv(0,j,k,l); + const realtype YFront = (k < nzl-1) ? Yview(i,j,k+1,l) : Frecv(i,j,0,l); + dYview(i,j,k,l) = cx * (YEast - Yijkl); // d/dx + dYview(i,j,k,l) += cy * (Nrecv(i,0,k,l) - Yijkl); // d/dy + dYview(i,j,k,l) += cz * (YFront - Yijkl); // d/dz + }); + Kokkos::parallel_for("AdvectionBoundaryFront", + Range3D({0,0,0},{nxl,nyl,dof}), + KOKKOS_LAMBDA (int i, int j, int l) + { + const int k = nzl-1; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YEast = (i < nxl-1) ? Yview(i+1,j,k,l) : Erecv(0,j,k,l); + const realtype YNorth = (j < nyl-1) ? Yview(i,j+1,k,l) : Nrecv(i,0,k,l); + dYview(i,j,k,l) = cx * (YEast - Yijkl); // d/dx + dYview(i,j,k,l) += cy * (YNorth - Yijkl); // d/dy + dYview(i,j,k,l) += cz * (Frecv(i,j,0,l) - Yijkl); // d/dz + }); + } + + /* return success */ + return(0); +} + + +/* Compute the reaction term g(t,y). */ +static int Reaction(realtype t, N_Vector y, N_Vector ydot, void* user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* set variable shortcuts */ + const realtype A = udata->A; + const realtype B = udata->B; + const realtype k1 = udata->k1; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const realtype k5 = udata->k5; + const realtype k6 = udata->k6; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + + /* Zero output if not adding reactions to existing RHS */ + if (!udata->add_reactions) + N_VConst(0.0, ydot); + + /* create 4D views of state and RHS vectors */ + Vec4D Yview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + Vec4D dYview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(ydot)), nxl, nyl, nzl, dof); + + /* add reaction terms to RHS */ + Kokkos::parallel_for("ReactionRHS", + Range3D({0,0,0},{nxl,nyl,nzl}), + KOKKOS_LAMBDA (int i, int j, int k) + { + const realtype u = Yview(i,j,k,0); + const realtype v = Yview(i,j,k,1); + const realtype w = Yview(i,j,k,2); + dYview(i,j,k,0) += k1 * A - k2 * w * u + k3 * u * u * v - k4 * u; + dYview(i,j,k,1) += k2 * w * u - k3 * u * u * v; + dYview(i,j,k,2) += -k2 * w * u + k5 * B - k6 * w; + }); + + /* return success */ + return(0); +} + + +/* Compute the RHS as h(t,y) = f(t,y) + g(t,y). */ +static int AdvectionReaction(realtype t, N_Vector y, N_Vector ydot, + void *user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + int retval; + + /* NOTE: The order in which Advection and Reaction are called + is critical here. Advection must be computed first. */ + retval = Advection(t, y, ydot, user_data); + if (check_retval((void *)&retval, "Advection", 1, udata->myid)) return(-1); + + retval = Reaction(t, y, ydot, user_data); + if (check_retval((void *)&retval, "Reaction", 1, udata->myid)) return(-1); + + /* return success */ + return(0); +} + +/* Compute the residual F(t,y,y') = ydot - h(t,y) = 0. */ +static int AdvectionReactionResidual(realtype t, N_Vector y, N_Vector ydot, + N_Vector F, void *user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + int retval; + + /* NOTE: The order in which Advection and Reaction are called + is critical here. Advection must be computed first. */ + retval = Advection(t, y, F, user_data); /* F = -c y_x */ + if (check_retval((void *)&retval, "Advection", 1, udata->myid)) return(-1); + + retval = Reaction(t, y, F, user_data); /* F = -c y_x + g(t,y) */ + if (check_retval((void *)&retval, "Reaction", 1, udata->myid)) return(-1); + + /* F = ydot - h(t,y) = ydot + c y_x - g(t,y) */ + N_VLinearSum(1.0, ydot, -1.0, F, F); + + /* return success */ + return(0); +} + +/* -------------------------------------------------------------- + * Linear system and Jacobian functions + * --------------------------------------------------------------*/ + +/* Solve the linear systems Ax = b where A = I - gamma*dg/dy. + When using a fully implicit method, we are approximating + dh/dy as dg/dy. */ +static int SolveReactionLinSys(N_Vector y, N_Vector x, N_Vector b, + const realtype gamma, UserData* udata) +{ + /* set variable shortcuts */ + const int dof = udata->grid->dof; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const realtype k6 = udata->k6; + + /* create 4D views of state, RHS and solution vectors */ + Vec4D Yview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + Vec4D Bview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(b)), nxl, nyl, nzl, dof); + Vec4D Xview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(x)), nxl, nyl, nzl, dof); + + /* solve reaction linear system */ + Kokkos::parallel_for("SolveReactionLinSys", + Range3D({0,0,0},{nxl,nyl,nzl}), + KOKKOS_LAMBDA (int i, int j, int k) + { + + /* shortcuts to u, v, w for the block */ + const realtype u = Yview(i,j,k,0); + const realtype v = Yview(i,j,k,1); + const realtype w = Yview(i,j,k,2); + + // + // compute A = I - gamma*(dg/dy) + // + + /* 1st row: u, v, w */ + const realtype A0 = 1. - gamma * (-k2 * w + 2.0 * k3 * u * v - k4); + const realtype A1 = -gamma * (k3 * u * u); + const realtype A2 = -gamma * (-k2 * u); + + /* 2nd row: u, v, w */ + const realtype A3 = -gamma * (k2 * w - 2.0 * k3 * u * v); + const realtype A4 = 1. - gamma * (-k3 * u * u); + const realtype A5 = -gamma * (k2 * u); + + /* 3rd row: u, v, w */ + const realtype A6 = -gamma * (-k2 * w); + const realtype A7 = 0.0; + const realtype A8 = 1. - gamma * (-k2 * u - k6); + + // + // compute x = A^{-1}*b + // + + const realtype scratch_0 = A4*A8; + const realtype scratch_1 = A1*A5; + const realtype scratch_2 = A2*A7; + const realtype scratch_3 = A5*A7; + const realtype scratch_4 = A1*A8; + const realtype scratch_5 = A2*A4; + const realtype scratch_6 = 1.0/(A0*scratch_0 - A0*scratch_3 + A3*scratch_2 - A3*scratch_4 + A6*scratch_1 - A6*scratch_5); + const realtype scratch_7 = A2*A3; + const realtype scratch_8 = A6*Bview(i,j,k,0); + const realtype scratch_9 = A2*A6; + const realtype scratch_10 = A3*Bview(i,j,k,0); + const realtype scratch_11 = 1.0/A0; + const realtype scratch_12 = A1*scratch_11; + const realtype scratch_13 = (-A6*scratch_12 + A7)/(-A3*scratch_12 + A4); + + Xview(i,j,k,0) = scratch_6*( Bview(i,j,k,0)*(scratch_0 - scratch_3) + + Bview(i,j,k,1)*(scratch_2 - scratch_4) + + Bview(i,j,k,2)*(scratch_1 - scratch_5)); + Xview(i,j,k,1) = scratch_6*( Bview(i,j,k,2)*(scratch_7 - A0*A5) + + Bview(i,j,k,1)*(A0*A8 - scratch_9) + + A5*scratch_8 - A8*scratch_10 ); + Xview(i,j,k,2) = ( -Bview(i,j,k,2) + scratch_11*scratch_8 + + scratch_13*(Bview(i,j,k,1) - scratch_10*scratch_11)) / + (-A8 + scratch_11*scratch_9 + scratch_13*(A5 - scratch_11*scratch_7)); + + }); + + return(0); +} + +/* Solve the linear systems Ax = b where A = -dg/dy + gamma. + We are approximating dh/dy as dg/dy. */ +static int SolveReactionLinSysRes(N_Vector y, N_Vector x, N_Vector b, + const realtype gamma, UserData* udata) +{ + /* set variable shortcuts */ + const int dof = udata->grid->dof; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const realtype k6 = udata->k6; + + /* create 4D views of state, RHS and solution vectors */ + Vec4D Yview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(y)), nxl, nyl, nzl, dof); + Vec4D Bview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(b)), nxl, nyl, nzl, dof); + Vec4D Xview(N_VGetDeviceArrayPointer(N_VGetLocalVector_MPIPlusX(x)), nxl, nyl, nzl, dof); + + /* solve reaction linear system */ + Kokkos::parallel_for("SolveReactionLinSys", + Range3D({0,0,0},{nxl,nyl,nzl}), + KOKKOS_LAMBDA (int i, int j, int k) + { + + /* shortcuts to u, v, w for the block */ + const realtype u = Yview(i,j,k,0); + const realtype v = Yview(i,j,k,1); + const realtype w = Yview(i,j,k,2); + + // + // compute A = -dg/dy + gamma*diag(df/dydot) + // where diag(df/dydot) is approximated as + // diag([udot, vdot, wdot]) + // + + /* 1st row: u, v, w */ + const realtype A0 = -(-k2 * w + 2.0 * k3 * u * v - k4) + gamma; + const realtype A1 = -(k3 * u * u); + const realtype A2 = -(-k2 * u); + + /* 2nd row: u, v, w */ + const realtype A3 = -(k2 * w - 2.0 * k3 * u * v); + const realtype A4 = -(-k3 * u * u) + gamma; + const realtype A5 = -(k2 * u); + + /* 3rd row: u, v, w */ + const realtype A6 = -(-k2 * w); + const realtype A7 = 0.0; + const realtype A8 = -(-k2 * u - k6) + gamma; + + // + // compute x = A^{-1}*b + // + + const realtype scratch_0 = A4*A8; + const realtype scratch_1 = A1*A5; + const realtype scratch_2 = A2*A7; + const realtype scratch_3 = A5*A7; + const realtype scratch_4 = A1*A8; + const realtype scratch_5 = A2*A4; + const realtype scratch_6 = 1.0/(A0*scratch_0 - A0*scratch_3 + A3*scratch_2 - A3*scratch_4 + A6*scratch_1 - A6*scratch_5); + const realtype scratch_7 = A2*A3; + const realtype scratch_8 = A6*Bview(i,j,k,0); + const realtype scratch_9 = A2*A6; + const realtype scratch_10 = A3*Bview(i,j,k,0); + const realtype scratch_11 = 1.0/A0; + const realtype scratch_12 = A1*scratch_11; + const realtype scratch_13 = (-A6*scratch_12 + A7)/(-A3*scratch_12 + A4); + + Xview(i,j,k,0) = scratch_6*( Bview(i,j,k,0)*(scratch_0 - scratch_3) + + Bview(i,j,k,1)*(scratch_2 - scratch_4) + + Bview(i,j,k,2)*(scratch_1 - scratch_5)); + Xview(i,j,k,1) = scratch_6*( Bview(i,j,k,2)*(scratch_7 - A0*A5) + + Bview(i,j,k,1)*(A0*A8 - scratch_9) + + A5*scratch_8 - A8*scratch_10 ); + Xview(i,j,k,2) = ( -Bview(i,j,k,2) + scratch_11*scratch_8 + + scratch_13*(Bview(i,j,k,1) - scratch_10*scratch_11)) / + (-A8 + scratch_11*scratch_9 + scratch_13*(A5 - scratch_11*scratch_7)); + + }); + + return(0); +} + + +/* -------------------------------------------------------------- + * Preconditioner functions + * --------------------------------------------------------------*/ + +/* Solves Pz = r where P = I - gamma * dg/dy */ +static int PSolve(realtype t, N_Vector y, N_Vector ydot, N_Vector r, + N_Vector z, realtype gamma, realtype delta, int lr, + void *user_data) +{ + /* local variables */ + UserData* udata = (UserData*) user_data; + int retval; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* solve the task-local linear system Pz = r */ + retval = SolveReactionLinSys(y, z, r, gamma, udata); + + return(retval); +} + +/* Solves Pz = r where P = -dg/dy + gamma */ +static int PSolveRes(realtype t, N_Vector y, N_Vector ydot, N_Vector F, + N_Vector r, N_Vector z, realtype cj, realtype delta, + void *user_data) +{ + /* local variables */ + UserData* udata = (UserData*) user_data; + int retval; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* solve the task-local linear system Pz = r */ + retval = SolveReactionLinSysRes(y, z, r, cj, udata); + + return(retval); +} + + +#endif diff --git a/benchmarks/advection_reaction_3D/raja/CMakeLists.txt b/benchmarks/advection_reaction_3D/raja/CMakeLists.txt new file mode 100644 index 0000000000..0bae78c562 --- /dev/null +++ b/benchmarks/advection_reaction_3D/raja/CMakeLists.txt @@ -0,0 +1,151 @@ +# --------------------------------------------------------------- +# Programmer(s): Cody J. Balos @ LLNL +# Daniel R. Reynolds @ SMU +# --------------------------------------------------------------- +# SUNDIALS Copyright Start +# Copyright (c) 2002-2023, Lawrence Livermore National Security +# and Southern Methodist University. +# All rights reserved. +# +# See the top-level LICENSE and NOTICE files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +# SUNDIALS Copyright End +# --------------------------------------------------------------- + +if(BUILD_ARKODE AND BUILD_CVODE AND BUILD_IDA) + + if((RAJA_BACKENDS MATCHES "TARGET_OPENMP") OR (RAJA_BACKENDS MATCHES "OPENMP")) + set(OTHER_LIBS OpenMP::OpenMP_CXX) + endif() + + # ---------------------------------------------------------------------------- + # MPI only + # ---------------------------------------------------------------------------- + + add_executable(advection_reaction_3D_raja + advection_reaction_3D.cpp + arkode_driver.cpp + cvode_driver.cpp + ida_driver.cpp + rhs3D.hpp + ParallelGrid.hpp + check_retval.h + backends.hpp) + + # ensure the linker language is reset to CXX + set_target_properties(advection_reaction_3D_raja PROPERTIES LINKER_LANGUAGE CXX) + + target_include_directories(advection_reaction_3D_raja + PRIVATE + ${PROJECT_SOURCE_DIR}/utilities + ${MPI_CXX_INCLUDE_DIRS}) + + target_link_libraries(advection_reaction_3D_raja + PRIVATE + sundials_arkode + sundials_cvode + sundials_ida + sundials_nvecmpiplusx + sundials_nvecserial + RAJA + ${MPI_CXX_LIBRARIES} + ${OTHER_LIBS}) + + install(TARGETS advection_reaction_3D_raja + DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D/raja") + + install(FILES README.md ../scripts/compare_error.py ../scripts/compute_error.py ../scripts/pickle_solution_output.py + DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D/raja") + + # ---------------------------------------------------------------------------- + # MPI + CUDA + # ---------------------------------------------------------------------------- + + if(BUILD_NVECTOR_CUDA) + + set_source_files_properties(advection_reaction_3D.cpp + PROPERTIES LANGUAGE CUDA) + set_source_files_properties(arkode_driver.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(cvode_driver.cpp PROPERTIES LANGUAGE CUDA) + set_source_files_properties(ida_driver.cpp PROPERTIES LANGUAGE CUDA) + + add_executable(advection_reaction_3D_raja_mpicuda + advection_reaction_3D.cpp + arkode_driver.cpp + cvode_driver.cpp + ida_driver.cpp + rhs3D.hpp + ParallelGrid.hpp + check_retval.h + backends.hpp) + + # ensure the linker language is reset to CXX + set_target_properties(advection_reaction_3D_raja_mpicuda + PROPERTIES LINKER_LANGUAGE CXX) + + target_include_directories(advection_reaction_3D_raja_mpicuda + PRIVATE + ${PROJECT_SOURCE_DIR}/utilities + ${MPI_CXX_INCLUDE_DIRS}) + + target_link_libraries(advection_reaction_3D_raja_mpicuda + PRIVATE + sundials_arkode + sundials_cvode + sundials_ida + sundials_nvecmpiplusx + sundials_nveccuda + RAJA + ${MPI_CXX_LIBRARIES} + ${OTHER_LIBS}) + + target_compile_definitions(advection_reaction_3D_raja_mpicuda PRIVATE USE_CUDA_NVEC) + + install(TARGETS advection_reaction_3D_raja_mpicuda + DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D/raja") + + endif() + + # ---------------------------------------------------------------------------- + # MPI + HIP + # ---------------------------------------------------------------------------- + + if(BUILD_NVECTOR_HIP) + + add_executable(advection_reaction_3D_raja_mpihip + advection_reaction_3D.cpp + advection_reaction_3D.hpp + arkode_driver.cpp + cvode_driver.cpp + ida_driver.cpp + rhs3D.hpp + ParallelGrid.hpp + check_retval.h + backends.hpp) + + target_include_directories(advection_reaction_3D_raja_mpihip + PRIVATE + ${PROJECT_SOURCE_DIR}/utilities + ${MPI_CXX_INCLUDE_DIRS}) + + target_link_libraries(advection_reaction_3D_raja_mpihip + PRIVATE + sundials_arkode + sundials_cvode + sundials_ida + sundials_nvecmpiplusx + sundials_nvechip + RAJA + hip::device + ${MPI_CXX_LIBRARIES} + ${OTHER_LIBS}) + + target_compile_definitions(advection_reaction_3D_raja_mpihip PRIVATE USE_HIP_NVEC) + + install(TARGETS advection_reaction_3D_raja_mpihip + DESTINATION "${BENCHMARKS_INSTALL_PATH}/advection_reaction_3D/raja") + + endif() + +endif() diff --git a/benchmarks/advection_reaction_3D/ParallelGrid.hpp b/benchmarks/advection_reaction_3D/raja/ParallelGrid.hpp similarity index 56% rename from benchmarks/advection_reaction_3D/ParallelGrid.hpp rename to benchmarks/advection_reaction_3D/raja/ParallelGrid.hpp index abd6185810..1592a27806 100644 --- a/benchmarks/advection_reaction_3D/ParallelGrid.hpp +++ b/benchmarks/advection_reaction_3D/raja/ParallelGrid.hpp @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------------- * Programmer(s): Cody J. Balos @ LLNL + * Daniel R. Reynolds @ SMU * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start * Copyright (c) 2002-2023, Lawrence Livermore National Security @@ -40,24 +41,26 @@ enum class StencilType UPWIND }; -template +template class ParallelGrid { public: // Constructor that creates a new ParallelGrid object. // [in] - the memory helper to use for allocating the MPI buffers // [in,out] comm - on input, the overal MPI communicator, on output, the cartesian communicator - // [in] a[] - an array of length NDIMS which defines the domain [a,b] - // [in] b[] - an array of length NDIMS which defines the domain [a,b] - // [in] npts[] - an array of length NDIMS which defines the number of mesh points in each dimension + // [in] a[] - an array of length 3 which defines the domain [a,b] + // [in] b[] - an array of length 3 which defines the domain [a,b] + // [in] npts[] - an array of length 3 which defines the number of mesh points in each dimension // [in] dof - the number of degrees of freedom in each dimension // [in] bc - the type of boundary conditions (see BoundaryType) // [in] st - the stencil to use (see StencilType) // [in] width - the stencil width; defaults to 1 // [in] npxyz - the number of processors in each dimension; defaults to 0 which means MPI will choose // [in] reorder - should MPI_Cart_create do process reordering to optimize or not; defaults to false (some MPI implementations ignore this) - ParallelGrid(SUNMemoryHelper memhelp, MPI_Comm* comm, const REAL a[], const REAL b[], const GLOBALINT npts[], int dof, - BoundaryType bc, StencilType st, int width = 1, const int npxyz[] = nullptr, bool reorder = false) + ParallelGrid(SUNMemoryHelper memhelp, MPI_Comm* comm, const REAL a[], const REAL b[], + const GLOBALINT npts[], int dof, BoundaryType bc, StencilType st, + const REAL c, int width = 1, const int npxyz[] = nullptr, + bool reorder = false) : nx(1), ny(1), nz(1), nxl(1), nyl(1), nzl(1), npx(1), npy(1), npz(1), @@ -66,35 +69,40 @@ class ParallelGrid bx(0.0), by(0.0), bz(0.0), dof(dof), dims{0,0,0}, coords{0,0,0}, bc(bc), st(st), width(width), + upwindRight(true), memhelp(memhelp) - { - static_assert((NDIMS >= 1 && NDIMS <= 3), "ParallelGrid NDIMS must be 1, 2 or 3"); - int retval, nprocs; - int periods[] = {0, 0, 0}; + { + assert(st == StencilType::UPWIND); + /* Set up MPI Cartesian communicator */ if (npxyz) { dims[0] = npxyz[0]; - if (NDIMS >= 2) dims[1] = npxyz[1]; - if (NDIMS == 3) dims[2] = npxyz[2]; + dims[1] = npxyz[1]; + dims[2] = npxyz[2]; } + int retval, nprocs; MPI_Comm_size(*comm, &nprocs); - retval = MPI_Dims_create(nprocs, NDIMS, dims); + retval = MPI_Dims_create(nprocs, 3, dims); assert(retval == MPI_SUCCESS); - periods[0] = bc == BoundaryType::PERIODIC; - periods[1] = bc == BoundaryType::PERIODIC; - periods[2] = bc == BoundaryType::PERIODIC; - retval = MPI_Cart_create(*comm, NDIMS, dims, periods, reorder, comm); + int periods[] = { bc == BoundaryType::PERIODIC, + bc == BoundaryType::PERIODIC, + bc == BoundaryType::PERIODIC }; + retval = MPI_Cart_create(*comm, 3, dims, periods, reorder, comm); assert(retval == MPI_SUCCESS); - retval = MPI_Cart_get(*comm, NDIMS, dims, periods, coords); + retval = MPI_Cart_get(*comm, 3, dims, periods, coords); assert(retval == MPI_SUCCESS); cart_comm = *comm; + /* Set upwinding direction */ + upwindRight = (c > 0.0); + + /* Set up information for the first spatial dimension */ npx = dims[0]; nx = npts[0]; ax = a[0]; @@ -103,251 +111,235 @@ class ParallelGrid int is = nx*(coords[0])/npx; int ie = nx*(coords[0]+1)/npx-1; nxl = ie-is+1; - neq = dof * nxl; - if (NDIMS >= 2) - { - npy = dims[1]; - ny = npts[1]; - ay = a[1]; - by = b[1]; - dy = (by-ay) / (REAL) ny; - int js = ny*(coords[1])/npy; - int je = ny*(coords[1]+1)/npy-1; - nyl = je-js+1; - - neq *= nyl; - } - - if (NDIMS == 3) - { - npz = dims[2]; - nz = npts[2]; - az = a[2]; - bz = b[2]; - dz = (bz-az) / (REAL) nz; - int ks = nz*(coords[2])/npz; - int ke = nz*(coords[2]+1)/npz-1; - nzl = ke-ks+1; - - neq *= nzl; - } - + /* Set up information for the second spatial dimension */ + npy = dims[1]; + ny = npts[1]; + ay = a[1]; + by = b[1]; + dy = (by-ay) / (REAL) ny; + int js = ny*(coords[1])/npy; + int je = ny*(coords[1]+1)/npy-1; + nyl = je-js+1; + neq *= nyl; + + /* Set up information for the third spatial dimension */ + npz = dims[2]; + nz = npts[2]; + az = a[2]; + bz = b[2]; + dz = (bz-az) / (REAL) nz; + int ks = nz*(coords[2])/npz; + int ke = nz*(coords[2]+1)/npz-1; + nzl = ke-ks+1; + neq *= nzl; + + /* Allocate buffers for nearest-neighbor exchange */ if (st == StencilType::UPWIND) AllocateBuffersUpwind(); } // TODO: - // - does not take advantage of upwind scheme to reduce communications and memory // - support non-periodic boundary conditions // For all faces where neighbors exist: determine neighbor process indices. // For all faces: allocate exchange buffers. void AllocateBuffersUpwind() { - int retval = 0; - int nbcoords[] = {0, 0, 0}; - SUNMemoryHelper_Alloc(memhelp, &Wrecv_, sizeof(REAL)*dof*width*nyl*nzl, - memoryType(), nullptr); - SUNMemoryHelper_Alloc(memhelp, &Wsend_, sizeof(REAL)*dof*width*nyl*nzl, - memoryType(), nullptr); + /* Allocate send/receive buffers and determine ID for communication West */ + if (upwindRight) + SUNMemoryHelper_Alloc(memhelp, &Wrecv_, sizeof(REAL)*dof*width*nyl*nzl, + memoryType(), nullptr); + else + SUNMemoryHelper_Alloc(memhelp, &Wsend_, sizeof(REAL)*dof*width*nyl*nzl, + memoryType(), nullptr); ipW = MPI_PROC_NULL; if ((coords[0] > 0) || (bc == BoundaryType::PERIODIC)) { - nbcoords[0] = coords[0]-1; - nbcoords[1] = coords[1]; - nbcoords[2] = coords[2]; - retval = MPI_Cart_rank(cart_comm, nbcoords, &ipW); + int nbcoords[] = {coords[0]-1, coords[1], coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipW); assert(retval == MPI_SUCCESS); } - SUNMemoryHelper_Alloc(memhelp, &Erecv_, sizeof(REAL)*dof*width*nyl*nzl, - memoryType(), nullptr); - SUNMemoryHelper_Alloc(memhelp, &Esend_, sizeof(REAL)*dof*width*nyl*nzl, - memoryType(), nullptr); + /* Allocate send/receive buffers and determine ID for communication East */ + if (upwindRight) + SUNMemoryHelper_Alloc(memhelp, &Esend_, sizeof(REAL)*dof*width*nyl*nzl, + memoryType(), nullptr); + else + SUNMemoryHelper_Alloc(memhelp, &Erecv_, sizeof(REAL)*dof*width*nyl*nzl, + memoryType(), nullptr); ipE = MPI_PROC_NULL; if ((coords[0] < dims[0]-1) || (bc == BoundaryType::PERIODIC)) { - nbcoords[0] = coords[0]+1; - nbcoords[1] = coords[1]; - nbcoords[2] = coords[2]; - retval = MPI_Cart_rank(cart_comm, nbcoords, &ipE); + int nbcoords[] = {coords[0]+1, coords[1], coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipE); assert(retval == MPI_SUCCESS); } - if (NDIMS >= 2) - { + /* Allocate send/receive buffers and determine ID for communication South */ + if (upwindRight) SUNMemoryHelper_Alloc(memhelp, &Srecv_, sizeof(REAL)*dof*width*nxl*nzl, memoryType(), nullptr); + else SUNMemoryHelper_Alloc(memhelp, &Ssend_, sizeof(REAL)*dof*width*nxl*nzl, memoryType(), nullptr); - ipS = MPI_PROC_NULL; - if ((coords[1] > 0) || (bc == BoundaryType::PERIODIC)) { - nbcoords[0] = coords[0]; - nbcoords[1] = coords[1]-1; - nbcoords[2] = coords[2]; - retval = MPI_Cart_rank(cart_comm, nbcoords, &ipS); - assert(retval == MPI_SUCCESS); - } + ipS = MPI_PROC_NULL; + if ((coords[1] > 0) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1]-1, coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipS); + assert(retval == MPI_SUCCESS); + } - SUNMemoryHelper_Alloc(memhelp, &Nrecv_, sizeof(REAL)*dof*width*nxl*nzl, - memoryType(), nullptr); + /* Allocate send/receive buffers and determine ID for communication North */ + if (upwindRight) SUNMemoryHelper_Alloc(memhelp, &Nsend_, sizeof(REAL)*dof*width*nxl*nzl, memoryType(), nullptr); - ipN = MPI_PROC_NULL; - if ((coords[1] < dims[1]-1) || (bc == BoundaryType::PERIODIC)) { - nbcoords[0] = coords[0]; - nbcoords[1] = coords[1]+1; - nbcoords[2] = coords[2]; - retval = MPI_Cart_rank(cart_comm, nbcoords, &ipN); - assert(retval == MPI_SUCCESS); - } + else + SUNMemoryHelper_Alloc(memhelp, &Nrecv_, sizeof(REAL)*dof*width*nxl*nzl, + memoryType(), nullptr); + ipN = MPI_PROC_NULL; + if ((coords[1] < dims[1]-1) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1]+1, coords[2]}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipN); + assert(retval == MPI_SUCCESS); } - if (NDIMS == 3) - { + /* Allocate send/receive buffers and determine ID for communication Back */ + if (upwindRight) SUNMemoryHelper_Alloc(memhelp, &Brecv_, sizeof(REAL)*dof*width*nxl*nyl, memoryType(), nullptr); + else SUNMemoryHelper_Alloc(memhelp, &Bsend_, sizeof(REAL)*dof*width*nxl*nyl, memoryType(), nullptr); - ipB = MPI_PROC_NULL; - if ((coords[2] > 0) || (bc == BoundaryType::PERIODIC)) { - nbcoords[0] = coords[0]; - nbcoords[1] = coords[1]; - nbcoords[2] = coords[2]-1; - retval = MPI_Cart_rank(cart_comm, nbcoords, &ipB); - assert(retval == MPI_SUCCESS); - } + ipB = MPI_PROC_NULL; + if ((coords[2] > 0) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1], coords[2]-1}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipB); + assert(retval == MPI_SUCCESS); + } - SUNMemoryHelper_Alloc(memhelp, &Frecv_, sizeof(REAL)*dof*width*nxl*nyl, - memoryType(), nullptr); + /* Allocate send/receive buffers and determine ID for communication Front */ + if (upwindRight) SUNMemoryHelper_Alloc(memhelp, &Fsend_, sizeof(REAL)*dof*width*nxl*nyl, memoryType(), nullptr); - ipF = MPI_PROC_NULL; - if ((coords[2] < dims[2]-1) || (bc == BoundaryType::PERIODIC)) { - nbcoords[0] = coords[0]; - nbcoords[1] = coords[1]; - nbcoords[2] = coords[2]+1; - retval = MPI_Cart_rank(cart_comm, nbcoords, &ipF); - assert(retval == MPI_SUCCESS); - } + else + SUNMemoryHelper_Alloc(memhelp, &Frecv_, sizeof(REAL)*dof*width*nxl*nyl, + memoryType(), nullptr); + ipF = MPI_PROC_NULL; + if ((coords[2] < dims[2]-1) || (bc == BoundaryType::PERIODIC)) { + int nbcoords[] = {coords[0], coords[1], coords[2]+1}; + int retval = MPI_Cart_rank(cart_comm, nbcoords, &ipF); + assert(retval == MPI_SUCCESS); } } - // TODO: this could be optimized for upwind - int ExchangeStart(std::function fill) + // Initiate non-blocking neighbor communication + int ExchangeStart() { int retval = 0; + nreq = 0; // Initialize all requests in array for (int i=0; i<12; i++) req[i] = MPI_REQUEST_NULL; // Open an Irecv buffer for each neighbor - if (ipW != MPI_PROC_NULL) + if ((ipW != MPI_PROC_NULL) && (upwindRight)) { - retval = MPI_Irecv(getRecvBuffer("EAST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipW, - 1, cart_comm, req); + retval = MPI_Irecv(getRecvBuffer("WEST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipW, + 1, cart_comm, req+nreq); assert(retval == MPI_SUCCESS); + nreq++; } - if (ipE != MPI_PROC_NULL) + if ((ipE != MPI_PROC_NULL) && (!upwindRight)) { - retval = MPI_Irecv(getRecvBuffer("WEST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipE, - 0, cart_comm, req+1); + retval = MPI_Irecv(getRecvBuffer("EAST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipE, + 0, cart_comm, req+nreq); assert(retval == MPI_SUCCESS); + nreq++; } - if (NDIMS >= 2) + if ((ipS != MPI_PROC_NULL) && (upwindRight)) { - if (ipS != MPI_PROC_NULL) - { - retval = MPI_Irecv(getRecvBuffer("NORTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipS, - 3, cart_comm, req+2); - assert(retval == MPI_SUCCESS); - } - - if (ipN != MPI_PROC_NULL) - { - retval = MPI_Irecv(getRecvBuffer("SOUTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipN, - 2, cart_comm, req+3); - assert(retval == MPI_SUCCESS); - } - } - - if (NDIMS >= 3) + retval = MPI_Irecv(getRecvBuffer("SOUTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipS, + 3, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipN != MPI_PROC_NULL) && (!upwindRight)) + { + retval = MPI_Irecv(getRecvBuffer("NORTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipN, + 2, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipB != MPI_PROC_NULL) && (upwindRight)) + { + retval = MPI_Irecv(getRecvBuffer("BACK"), dof*nxl*nyl, MPI_SUNREALTYPE, ipB, + 5, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipF != MPI_PROC_NULL) && (!upwindRight)) { - if (ipB != MPI_PROC_NULL) - { - retval = MPI_Irecv(getRecvBuffer("FRONT"), dof*nxl*nyl, MPI_SUNREALTYPE, ipB, - 5, cart_comm, req+4); - assert(retval == MPI_SUCCESS); - } - - if (ipF != MPI_PROC_NULL) - { - retval = MPI_Irecv(getRecvBuffer("BACK"), dof*nxl*nyl, MPI_SUNREALTYPE, ipF, - 4, cart_comm, req+5); - assert(retval == MPI_SUCCESS); - } - } - - // Call user lambda to fill the send buffers - fill(getSendBuffer("WEST"), - getSendBuffer("EAST"), - getSendBuffer("SOUTH"), - getSendBuffer("NORTH"), - getSendBuffer("BACK"), - getSendBuffer("FRONT")); + retval = MPI_Irecv(getRecvBuffer("FRONT"), dof*nxl*nyl, MPI_SUNREALTYPE, ipF, + 4, cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } // Send data to neighbors - if (ipW != MPI_PROC_NULL) + if ((ipW != MPI_PROC_NULL) && (!upwindRight)) { - retval = MPI_Isend(getSendBuffer("EAST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipW, 0, - cart_comm, req+6); + retval = MPI_Isend(getSendBuffer("WEST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipW, 0, + cart_comm, req+nreq); assert(retval == MPI_SUCCESS); + nreq++; } - if (ipE != MPI_PROC_NULL) + if ((ipE != MPI_PROC_NULL) && (upwindRight)) { - retval = MPI_Isend(getSendBuffer("WEST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipE, 1, - cart_comm, req+7); + retval = MPI_Isend(getSendBuffer("EAST"), dof*nyl*nzl, MPI_SUNREALTYPE, ipE, 1, + cart_comm, req+nreq); assert(retval == MPI_SUCCESS); + nreq++; } - if (NDIMS >= 2) + if ((ipS != MPI_PROC_NULL) && (!upwindRight)) { - if (ipS != MPI_PROC_NULL) - { - retval = MPI_Isend(getSendBuffer("NORTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipS, 2, - cart_comm, req+8); - assert(retval == MPI_SUCCESS); - } - - if (ipN != MPI_PROC_NULL) - { - retval = MPI_Isend(getSendBuffer("SOUTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipN, 3, - cart_comm, req+9); - assert(retval == MPI_SUCCESS); - } - } - - if (NDIMS == 3) + retval = MPI_Isend(getSendBuffer("SOUTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipS, 2, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipN != MPI_PROC_NULL) && (upwindRight)) { - if (ipB != MPI_PROC_NULL) - { - retval = MPI_Isend(getSendBuffer("FRONT"), dof*nxl*nyl, MPI_SUNREALTYPE, ipB, 4, - cart_comm, req+10); - assert(retval == MPI_SUCCESS); - } + retval = MPI_Isend(getSendBuffer("NORTH"), dof*nxl*nzl, MPI_SUNREALTYPE, ipN, 3, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } - if (ipF != MPI_PROC_NULL) - { - retval = MPI_Isend(getSendBuffer("BACK"), dof*nxl*nyl, MPI_SUNREALTYPE, ipF, 5, - cart_comm, req+11); - assert(retval == MPI_SUCCESS); - } + if ((ipB != MPI_PROC_NULL) && (!upwindRight)) + { + retval = MPI_Isend(getSendBuffer("BACK"), dof*nxl*nyl, MPI_SUNREALTYPE, ipB, 4, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; + } + + if ((ipF != MPI_PROC_NULL) && (upwindRight)) + { + retval = MPI_Isend(getSendBuffer("FRONT"), dof*nxl*nyl, MPI_SUNREALTYPE, ipF, 5, + cart_comm, req+nreq); + assert(retval == MPI_SUCCESS); + nreq++; } return retval; @@ -359,8 +351,12 @@ class ParallelGrid MPI_Status stat[12]; int retval; + // return automatically with success if there are no outstanding requests + if (nreq == 0) + return(0); + // Wait for messages to finish send/receive - retval = MPI_Waitall(12, req, stat); + retval = MPI_Waitall(nreq, req, stat); assert(retval == MPI_SUCCESS); return retval; @@ -370,12 +366,16 @@ class ParallelGrid void PrintInfo() { printf("ParallelGrid Info:\n"); - printf(" dimensions = %d\n", NDIMS); + printf(" dimensions = %d\n", 3); printf(" processors = {%d, %d, %d}\n", npx, npy, npz); printf(" domain = {[%g,%g], [%g,%g], [%g,%g]}\n", ax, bx, ay, by, az, bz); printf(" global npts = {%li, %li, %li}\n", (long int) nx, (long int) ny, (long int) nz); printf(" local npts = {%d, %d, %d}\n", nxl, nyl, nzl); printf(" mesh spacing = {%g, %g, %g}\n", dx, dy, dz); + if (upwindRight) + printf(" upwind dir = right\n"); + else + printf(" upwind dir = left\n"); } // Saves the mesh to a file. @@ -407,16 +407,12 @@ class ParallelGrid GLOBALINT npts() const { - if (NDIMS == 1) return nx; - if (NDIMS == 2) return nx*ny; - if (NDIMS == 3) return nx*ny*nz; + return nx*ny*nz; } GLOBALINT nptsl() const { - if (NDIMS == 1) return nxl; - if (NDIMS == 2) return nxl*nyl; - if (NDIMS == 3) return nxl*nyl*nzl; + return nxl*nyl*nzl; } GLOBALINT neql() const @@ -452,6 +448,7 @@ class ParallelGrid } else { + assert(direction == "ILLEGAL"); return nullptr; } } @@ -484,24 +481,28 @@ class ParallelGrid } else { + assert(direction == "ILLEGAL"); return nullptr; } } ~ParallelGrid() { - SUNMemoryHelper_Dealloc(memhelp, Esend_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Wsend_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Nsend_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Ssend_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Fsend_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Bsend_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Erecv_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Wrecv_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Nrecv_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Srecv_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Frecv_, nullptr); - SUNMemoryHelper_Dealloc(memhelp, Brecv_, nullptr); + if (upwindRight) { + SUNMemoryHelper_Dealloc(memhelp, Esend_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Nsend_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Fsend_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Wrecv_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Srecv_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Brecv_, nullptr); + } else { + SUNMemoryHelper_Dealloc(memhelp, Wsend_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Ssend_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Bsend_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Erecv_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Nrecv_, nullptr); + SUNMemoryHelper_Dealloc(memhelp, Frecv_, nullptr); + } } GLOBALINT nx, ny, nz; /* number of intervals globally */ @@ -516,6 +517,7 @@ class ParallelGrid int ipW, ipE; /* MPI ranks for neighbor procs */ int ipS, ipN; int ipB, ipF; + bool upwindRight; /* Upwind dir: true/false == R/L */ int dims[3]; int coords[3]; @@ -524,6 +526,7 @@ class ParallelGrid private: MPI_Comm cart_comm; /* MPI cartesian communicator */ MPI_Request req[12]; + int nreq; BoundaryType bc; StencilType st; diff --git a/benchmarks/advection_reaction_3D/README.md b/benchmarks/advection_reaction_3D/raja/README.md similarity index 78% rename from benchmarks/advection_reaction_3D/README.md rename to benchmarks/advection_reaction_3D/raja/README.md index ab9974b660..33c82db725 100644 --- a/benchmarks/advection_reaction_3D/README.md +++ b/benchmarks/advection_reaction_3D/raja/README.md @@ -8,27 +8,31 @@ RAJA performance portability layer with serial, CUDA, or HIP backends. This code simulates the advection and reaction of three chemical species where the reaction mechanism is a variation of the Brusselator problem from chemical kinetics. The PDE system is given by +```math +\begin{align} + u_t &= -c \nabla u + A - (w+1) u + v u^2 \\ + v_t &= -c \nabla v + w u - v u^2 \\ + w_t &= -c \nabla w + (B - w) / \epsilon - w u +\end{align} ``` - u_t = -c grad(u) + A - (w+1) * u + v * u^2 - v_t = -c grad(v) + w * u - v * u^2 - w_t = -c grad(w) + (B - w) / epsilon - w * u -``` -where `u`, `v`, and `w` are chemical concentrations, `c` is the advection speed, -`A` and `B` are the concentrations of chemical species that remain constant over -space and time, and `epsilon` is a parameter that varies the stiffness of the -system. The problem is solved on the domain `(x,y,z) = X` in `[0, X_max]^3`, -for times `t` in `[0,t_f]`. The initial condition is -``` - u(0,X) = A + p(X) - v(0,X) = B / A + p(X) - w(0,X) = 3.0 + p(X) +where $u$, $v$, and $w$ are chemical concentrations, $c$ is the advection speed, +$A$ and $B$ are the concentrations of chemical species that remain constant over +space and time, and $\epsilon$ is a parameter that varies the stiffness of the +system. The problem is solved on the domain $(x,y,z) = X$ in $[0, X_{\text{max}}]^3$, +for times $t$ in $[0,t_f]$. The initial condition is +```math +\begin{align} + u(0,X) &= A + p(X) \\ + v(0,X) &= B / A + p(X) \\ + w(0,X) &= 3.0 + p(X) +\end{align} ``` where the perturbation function is +```math + p(X) = \alpha e^{-(X-\mu)^T \sigma^{-1} (X-\mu) / 2 \sqrt{|\sigma| 8 \pi^3}} ``` - p(X) = alpha * e^( -((X-mu)^T sigma^{-1} (X-mu)) / (2*sqrt(|sigma| 8 pi^3)) ) -``` -with `alpha = 0.1`, `mu = 0.5 X_max`, and `sigma` is a diagonal matrix with -entries `0.25 X_max`. +with $\alpha = 0.1$, $\mu = 0.5 X_{\text{max}}$, and $\sigma$ is a diagonal +matrix with entries $0.25 X_{\text{max}}$. Spatial derivatives are discretized with first-order upwind finite differences on a uniform spatial grid. The system can be evolved in time using explicit, @@ -64,7 +68,7 @@ listed below. | `--method ` | Integrator to use: `ERK`, `ARK-DIRK`, `ARK-IMEX`, `CV-BDF`, `CV-ADAMS`, `IDA` | `ARK-DIRK` | | `--nls ` | Nonlinear Solver Method: `newton`, `tl-newton`, `fixedpoint`, `none` | `newton` | | `--fpaccel ` | Number of fixed point acceleration vectors | 3 | -| `--nopre` | Disable preconditioning | False | +| `--nopre` | Disable preconditioning | False | | `--fused` | Enabled fused operations | Off | | `--tf ` | Final integration time `t_f` | 10.0 | | `--rtol ` | Relative tolerance | 1.0e-6 | @@ -79,11 +83,11 @@ GPUs. See the installation guide for more details on configuring, building, and installing SUNDIALS. Based on the configuration the following executables will be built and installed -in the `/bin/benchmarks/advection_reaction_3D` directory: +in the `/advection_reaction_3D/raja` directory: -* `advection_reaction_3D` -- MPI parallelism -* `advection_reaction_3D_mpicuda` -- MPI + CUDA parallelism -* `advection_reaction_3D_mpihip` -- MPI + HIP parallelism +* `advection_reaction_3D_raja` -- MPI parallelism +* `advection_reaction_3D_raja_mpicuda` -- MPI + CUDA parallelism +* `advection_reaction_3D_raja_mpihip` -- MPI + HIP parallelism On Summit, with the default environment ``` @@ -93,7 +97,7 @@ On Summit, with the default environment ``` an example `jsrun` command is ``` -jsrun -n 2 -a 1 -c 1 -g 1 ./advection_reaction_3D_mpicuda +jsrun -n 2 -a 1 -c 1 -g 1 ./advection_reaction_3D_raja_mpicuda ``` On Lassen, with the environment @@ -104,5 +108,5 @@ On Lassen, with the environment ``` an example `jsrun` command is ``` -jsrun -n 2 -a 1 -c 1 -g 1 ./advection_reaction_3D_mpicuda +jsrun -n 2 -a 1 -c 1 -g 1 ./advection_reaction_3D_raja_mpicuda ``` diff --git a/benchmarks/advection_reaction_3D/advection_reaction_3D.cpp b/benchmarks/advection_reaction_3D/raja/advection_reaction_3D.cpp similarity index 71% rename from benchmarks/advection_reaction_3D/advection_reaction_3D.cpp rename to benchmarks/advection_reaction_3D/raja/advection_reaction_3D.cpp index dc169c5fa1..088e4536a0 100644 --- a/benchmarks/advection_reaction_3D/advection_reaction_3D.cpp +++ b/benchmarks/advection_reaction_3D/raja/advection_reaction_3D.cpp @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------------- * Programmer(s): David J. Gardner, Cody J. Balos @ LLNL + * Daniel R. Reynolds @ SMU * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start * Copyright (c) 2002-2023, Lawrence Livermore National Security @@ -60,9 +61,13 @@ #include "advection_reaction_3D.hpp" +#define STENCIL_WIDTH 1 + + /* Main Program */ int main(int argc, char *argv[]) { + SUNContext ctx; /* Initialize MPI */ @@ -87,7 +92,6 @@ int main(int argc, char *argv[]) UserData udata(ctx); /* user data */ UserOptions uopt; /* user options */ int retval; /* reusable error-checking flag */ - char fname[MXSTR]; SUNDIALS_CXX_MARK_FUNCTION(udata.prof); @@ -113,6 +117,7 @@ int main(int argc, char *argv[]) /* Output spatial mesh to disk (add extra point for periodic BC) */ if (udata.myid == 0 && uopt.nout > 0) { + char fname[MXSTR]; snprintf(fname, MXSTR, "%s/mesh.txt", uopt.outputdir); udata.grid->MeshToFile(fname); } @@ -124,7 +129,6 @@ int main(int argc, char *argv[]) else if (uopt.method == "CV-BDF") retval = EvolveProblemBDF(y, &udata, &uopt); else if (uopt.method == "CV-ADAMS") retval = EvolveProblemAdams(y, &udata, &uopt); else if (uopt.method == "IDA") retval = EvolveDAEProblem(y, &udata, &uopt); - if (check_retval(&retval, "Evolve", 1, udata.myid)) MPI_Abort(comm, 1); /* Clean up */ @@ -142,15 +146,6 @@ int main(int argc, char *argv[]) /* Destructor for problem data */ UserData::~UserData() { - /* free solution masks */ - N_VDestroy(N_VGetLocalVector_MPIPlusX(umask)); - N_VDestroy(umask); - N_VDestroy(vmask); - N_VDestroy(wmask); - - /* free the parallel grid */ - delete grid; - /* close output streams */ if (uopt->nout > 0) { @@ -159,6 +154,24 @@ UserData::~UserData() if (WFID) fclose(WFID); if (TFID && myid == 0) fclose(TFID); } + + /* free solution masks */ + if (umask != nullptr) { + N_VDestroy(N_VGetLocalVector_MPIPlusX(umask)); + N_VDestroy(umask); + umask = nullptr; + } + if (vmask != nullptr) { + N_VDestroy(vmask); + vmask = nullptr; + } + if (wmask != nullptr) { + N_VDestroy(wmask); + wmask = nullptr; + } + + /* free the parallel grid */ + delete grid; } @@ -166,175 +179,98 @@ UserData::~UserData() * Communication functions * --------------------------------------------------------------*/ -/* Exchanges the boundary conditions only, */ -int ExchangeBCOnly(N_Vector y, UserData* udata) +/* Fills send buffers before exchanging neighbor information */ +int FillSendBuffers(N_Vector y, UserData* udata) { - int ierr; - MPI_Status stat; - MPI_Request reqR, reqS; /* shortcuts */ - int nvar = udata->grid->dof; - int myid = udata->myid; - int first = 0; - int last = udata->nprocs - 1; + const realtype c = udata->c; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; - /* extract the data */ - realtype* Ydata = GetVecData(y); - realtype* Wsend = udata->grid->getSendBuffer("WEST"); + /* Create a 4D view of the vector */ + RAJA::View > Yview(GetVecData(y), + nxl, nyl, nzl, dof); - /* open the East Irecv buffer */ - if (myid == last) - { - ierr = MPI_Irecv(udata->grid->getRecvBuffer("EAST"), nvar, MPI_SUNREALTYPE, first, - MPI_ANY_TAG, udata->comm, &reqR); - } - - /* send first mesh node to the last processor */ - if (myid == first) - { - RAJA::forall< EXEC_POLICY >( RAJA::RangeSegment(0, nvar), - [=] DEVICE_FUNC (int var) { - Wsend[IDX(nvar, 0, var)] = Ydata[IDX(nvar, 0, var)]; - }); - ierr = MPI_Isend(Wsend, nvar, MPI_SUNREALTYPE, - last, 0, udata->comm, &reqS); - } - - if (myid == last) - { - /* wait for exchange to finish */ - ierr = MPI_Wait(&reqR, &stat); - if (ierr != MPI_SUCCESS) - { - fprintf(stderr, "\nERROR: error in MPI_Wait = %d\n", ierr); - return -1; - } - } - - if (myid == first) + if (c > 0.0) { - /* wait for exchange to finish */ - ierr = MPI_Wait(&reqS, &stat); - if (ierr != MPI_SUCCESS) - { - fprintf(stderr, "\nERROR: error in MPI_Wait = %d\n", ierr); - return -1; - } - } - - return(0); -} + /* Flow moving in the positive directions uses backward difference. */ -/* Starts the exchange of the neighbor information */ -int ExchangeAllStart(N_Vector y, UserData* udata) -{ - SUNDIALS_MARK_BEGIN(udata->prof, "Neighbor Exchange"); + /* Fill 3D views of send buffers on device */ + RAJA::View > + Esend(udata->grid->getSendBuffer("EAST"), nyl, nzl, dof); + RAJA::View > + Nsend(udata->grid->getSendBuffer("NORTH"), nxl, nzl, dof); + RAJA::View > + Fsend(udata->grid->getSendBuffer("FRONT"), nxl, nyl, dof); + + auto east_face = RAJA::make_tuple(RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(east_face, + [=] DEVICE_FUNC (int j, int k, int l) { + Esend(j,k,l) = Yview(nxl-1,j,k,l); + }); - /* shortcuts */ - realtype c = udata->c; + auto north_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(north_face, + [=] DEVICE_FUNC (int i, int k, int l) { + Nsend(i,k,l) = Yview(i,nyl-1,k,l); + }); - /* extract the data */ - RAJA::View > Yview(GetVecData(y), - udata->grid->nxl, - udata->grid->nyl, - udata->grid->nzl, - udata->grid->dof); + auto front_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(front_face, + [=] DEVICE_FUNC (int i, int j, int l) { + Fsend(i,j,l) = Yview(i,j,nzl-1,l); + }); - if (c > 0.0) - { - /* Flow moving in the positive directions uses backward difference. */ - udata->grid->ExchangeStart( - [=] (realtype*, realtype* Esend, realtype*, realtype* Nsend, realtype* Bsend, realtype*) { - int nxl = udata->grid->nxl; - int nyl = udata->grid->nyl; - int nzl = udata->grid->nzl; - int dof = udata->grid->dof; - - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, std::max(1,nxl-1)), - RAJA::RangeSegment(0, std::max(1,nyl-1)), - RAJA::RangeSegment(0, std::max(1,nzl-1))); - - RAJA::View > - Eview(Esend, nyl, nzl, dof); - RAJA::View > - Nview(Nsend, nxl, nzl, dof); - RAJA::View > - Bview(Bsend, nxl, nyl, dof); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - - if (nxl > 1) - { - Eview(j,k,0) = Yview(nxl-1,j,k,0); - Eview(j,k,1) = Yview(nxl-1,j,k,1); - Eview(j,k,2) = Yview(nxl-1,j,k,2); - } - - if (nyl > 1) - { - Nview(i,k,0) = Yview(i,nyl-1,k,0); - Nview(i,k,1) = Yview(i,nyl-1,k,1); - Nview(i,k,2) = Yview(i,nyl-1,k,2); - } - - if (nzl > 1) - { - Bview(i,j,0) = Yview(i,j,nzl-1,0); - Bview(i,j,1) = Yview(i,j,nzl-1,1); - Bview(i,j,2) = Yview(i,j,nzl-1,2); - } - - }); - }); } else if (c < 0.0) { + /* Flow moving in the negative directions uses forward difference. */ - udata->grid->ExchangeStart( - [=] (realtype* Wsend, realtype*, realtype*Ssend, realtype*, realtype*, realtype* Fsend) { - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl-1), - RAJA::RangeSegment(0, udata->grid->nyl-1), - RAJA::RangeSegment(0, udata->grid->nzl-1)); - - RAJA::View > - Wview(Wsend, udata->grid->nyl, udata->grid->nzl, udata->grid->dof); - RAJA::View > - Sview(Ssend, udata->grid->nxl, udata->grid->nzl, udata->grid->dof); - RAJA::View > - Fview(Fsend, udata->grid->nxl, udata->grid->nyl, udata->grid->dof); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - Wview(j,k,0) = Yview(0,j,k,0); - Wview(j,k,1) = Yview(0,j,k,1); - Wview(j,k,2) = Yview(0,j,k,2); - - Sview(i,k,0) = Yview(i,0,k,0); - Sview(i,k,1) = Yview(i,0,k,1); - Sview(i,k,2) = Yview(i,0,k,2); - - Fview(i,j,0) = Yview(i,j,0,0); - Fview(i,j,1) = Yview(i,j,0,1); - Fview(i,j,2) = Yview(i,j,0,2); - }); - }); - } + /* Fill 3D views of send buffers on device */ + RAJA::View > + Wsend(udata->grid->getSendBuffer("WEST"), nyl, nzl, dof); + RAJA::View > + Ssend(udata->grid->getSendBuffer("SOUTH"), nxl, nzl, dof); + RAJA::View > + Bsend(udata->grid->getSendBuffer("BACK"), nxl, nyl, dof); + + auto west_face = RAJA::make_tuple(RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(west_face, + [=] DEVICE_FUNC (int j, int k, int l) { + Wsend(j,k,l) = Yview(0,j,k,l); + }); - SUNDIALS_MARK_END(udata->prof, "Neighbor Exchange"); - return(0); -} + auto south_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(south_face, + [=] DEVICE_FUNC (int i, int k, int l) { + Ssend(i,k,l) = Yview(i,0,k,l); + }); + auto back_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(back_face, + [=] DEVICE_FUNC (int i, int j, int l) { + Bsend(i,j,l) = Yview(i,j,0,l); + }); + + } -/* Completes the exchange of the neighbor information */ -int ExchangeAllEnd(UserData* udata) -{ - SUNDIALS_MARK_BEGIN(udata->prof, "Neighbor Exchange"); - udata->grid->ExchangeEnd(); - SUNDIALS_MARK_END(udata->prof, "Neighbor Exchange"); return(0); } @@ -494,17 +430,20 @@ int ComponentMask(N_Vector mask, int component, const UserData* udata) N_VConst(0.0, mask); - RAJA::View > mask_view(GetVecData(mask), - udata->grid->nxl, - udata->grid->nyl, - udata->grid->nzl, - udata->grid->dof); + /* Create 4D view of mask data */ + RAJA::View > mask_view(GetVecData(mask), + udata->grid->nxl, + udata->grid->nyl, + udata->grid->nzl, + udata->grid->dof); + /* Fill mask data */ auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl), RAJA::RangeSegment(0, udata->grid->nyl), RAJA::RangeSegment(0, udata->grid->nzl)); RAJA::kernel(range, - [=] DEVICE_FUNC (int xi, int yi, int zi) { - mask_view(xi,yi,zi,component) = 1.0; + [=] DEVICE_FUNC (int i, int j, int k) + { + mask_view(i,j,k,component) = 1.0; }); return 0; @@ -515,14 +454,9 @@ int ComponentMask(N_Vector mask, int component, const UserData* udata) int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, SUNMemoryHelper memhelper, SUNContext ctx) { - constexpr int STENCIL_WIDTH = 1; SUNDIALS_CXX_MARK_FUNCTION(udata->prof); - /* Local variables */ - int retval = 0; - char fname[MXSTR]; - /* MPI variables */ udata->comm = MPI_COMM_WORLD; MPI_Comm_rank(udata->comm, &udata->myid); @@ -567,16 +501,16 @@ int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, uopt->outputdir = (char *) "."; /* output directory */ /* Parse CLI args and set udata/uopt appropriately */ - retval = ParseArgs(argc, argv, udata, uopt); + int retval = ParseArgs(argc, argv, udata, uopt); if (check_retval((void*)&retval, "ParseArgs", 1, udata->myid)) return -1; /* Setup the parallel decomposition */ const sunindextype npts[] = {uopt->npts, uopt->npts, uopt->npts}; const realtype amax[] = {0.0, 0.0, 0.0}; const realtype bmax[] = {udata->xmax, udata->xmax, udata->xmax}; - udata->grid = new ParallelGrid(memhelper, - &udata->comm, amax, bmax, npts, 3, BoundaryType::PERIODIC, StencilType::UPWIND, STENCIL_WIDTH, uopt->npxyz - ); + udata->grid = new ParallelGrid(memhelper, &udata->comm, + amax, bmax, npts, 3, BoundaryType::PERIODIC, StencilType::UPWIND, udata->c, + STENCIL_WIDTH, uopt->npxyz); /* Create the solution masks */ udata->umask = N_VMake_MPIPlusX(udata->comm, LocalNvector(udata->grid->neq, ctx), ctx); @@ -589,6 +523,7 @@ int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, /* Open output files for results */ if (uopt->save) { + char fname[MXSTR]; if (udata->myid == 0) { sprintf(fname, "%s/t.%06d.txt", uopt->outputdir, udata->myid); @@ -609,7 +544,7 @@ int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, if (udata->myid == 0) { printf("\n\t\tAdvection-Reaction Test Problem\n\n"); - printf("Using the %s NVECTOR\n", NVECTOR_ID_STRING); + printf("Using the MPI+%s NVECTOR\n", NVECTOR_ID_STRING); printf("Number of Processors = %li\n", (long int) udata->nprocs); udata->grid->PrintInfo(); printf("Problem Parameters:\n"); @@ -632,7 +567,6 @@ int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, printf("Output directory: %s\n", uopt->outputdir); } - /* return success */ return(0); } @@ -644,8 +578,8 @@ void Gaussian3D(realtype& x, realtype& y, realtype& z, realtype xmax) { /* Gaussian distribution defaults */ const realtype alpha = 0.1; - const realtype mu[3] = { xmax/RCONST(2.0), xmax/RCONST(2.0), xmax/RCONST(2.0) }; - const realtype sigma[3] = { xmax/RCONST(4.0), xmax/RCONST(4.0), xmax/RCONST(4.0) }; // Sigma = diag(sigma) + const realtype mu[] = { xmax/RCONST(2.0), xmax/RCONST(2.0), xmax/RCONST(2.0) }; + const realtype sigma[] = { xmax/RCONST(4.0), xmax/RCONST(4.0), xmax/RCONST(4.0) }; // Sigma = diag(sigma) /* denominator = 2*sqrt(|Sigma|*(2pi)^3) */ const realtype denom = 2.0 * sqrt((sigma[0]*sigma[1]*sigma[2])*pow(2*M_PI,3)); @@ -664,6 +598,7 @@ int SetIC(N_Vector y, UserData* udata) const int nxl = udata->grid->nxl; const int nyl = udata->grid->nyl; const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; const realtype dx = udata->grid->dx; const realtype dy = udata->grid->dy; const realtype dz = udata->grid->dz; @@ -683,22 +618,25 @@ int SetIC(N_Vector y, UserData* udata) const realtype vs = k2 * k4 * B / (k1 * k3 * A); const realtype ws = 3.0; + /* Create 4D view of y */ + RAJA::View > yview(GetVecData(y), + nxl, nyl, nzl, dof); + /* Gaussian perturbation of the steady state solution */ - RAJA::View > yview(GetVecData(y), nxl, nyl, nzl, - udata->grid->dof); auto range = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), RAJA::RangeSegment(0, nyl), RAJA::RangeSegment(0, nzl)); RAJA::kernel(range, - [=] DEVICE_FUNC (int xi, int yi, int zi) { - realtype x = (xcrd * nxl + xi) * dx; - realtype y = (ycrd * nyl + yi) * dy; - realtype z = (zcrd * nzl + zi) * dz; + [=] DEVICE_FUNC (int i, int j, int k) + { + realtype x = (xcrd * nxl + i) * dx; + realtype y = (ycrd * nyl + j) * dy; + realtype z = (zcrd * nzl + k) * dz; Gaussian3D(x,y,z,xmax); const realtype p = x + y + z; - yview(xi,yi,zi,0) = us + p; - yview(xi,yi,zi,1) = vs + p; - yview(xi,yi,zi,2) = ws + p; + yview(i,j,k,0) = us + p; + yview(i,j,k,1) = vs + p; + yview(i,j,k,2) = ws + p; }); /* Return success */ @@ -710,23 +648,17 @@ int SetIC(N_Vector y, UserData* udata) int WriteOutput(realtype t, N_Vector y, UserData* udata, UserOptions* uopt) { SUNDIALS_CXX_MARK_FUNCTION(udata->prof); - - realtype u, v, w, N; - realtype* ydata = NULL; - - /* get vector data array */ - ydata = N_VGetArrayPointer(y); - if (check_retval((void *) ydata, "N_VGetArrayPointer", 0, udata->myid)) return -1; + /* Copy solution data to host mirror view */ CopyVecFromDevice(N_VGetLocalVector_MPIPlusX(y)); /* output current solution norm to screen */ - N = (realtype) udata->grid->npts(); - u = N_VWL2Norm(y, udata->umask); + realtype N = (realtype) udata->grid->npts(); + realtype u = N_VWL2Norm(y, udata->umask); u = sqrt(u*u/N); - v = N_VWL2Norm(y, udata->vmask); + realtype v = N_VWL2Norm(y, udata->vmask); v = sqrt(v*v/N); - w = N_VWL2Norm(y, udata->wmask); + realtype w = N_VWL2Norm(y, udata->wmask); w = sqrt(w*w/N); if (udata->myid == 0) { printf(" %10.6f %10.6f %10.6f %10.6f\n", t, u, v, w); @@ -736,32 +668,38 @@ int WriteOutput(realtype t, N_Vector y, UserData* udata, UserOptions* uopt) if (uopt->save) { /* output the times to disk */ - if (udata->myid == 0 && udata->TFID) + if (udata->myid == 0 && udata->TFID) { fprintf(udata->TFID," %.16e\n", t); + std::fflush(udata->TFID); + } + + /* create 4D view of host data */ + realtype* ydata = NULL; + ydata = N_VGetArrayPointer(y); + if (check_retval((void *) ydata, "N_VGetArrayPointer", 0, udata->myid)) return -1; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + RAJA::View > Yview(ydata, nxl, nyl, nzl, dof); /* output results to disk */ - RAJA::View > Yview(ydata, - udata->grid->nxl, - udata->grid->nyl, - udata->grid->nzl, - udata->grid->dof); - - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl), - RAJA::RangeSegment(0, udata->grid->nyl), - RAJA::RangeSegment(0, udata->grid->nzl)); - - RAJA::kernel(range, - [=] (int i, int j, int k) { - fprintf(udata->UFID," %.16e", Yview(i,j,k,0)); - fprintf(udata->VFID," %.16e", Yview(i,j,k,1)); - fprintf(udata->WFID," %.16e", Yview(i,j,k,2)); - }); + for (int i = 0; i < nxl; i++) + for (int j = 0; j < nyl; j++) + for (int k = 0; k < nzl; k++) { + fprintf(udata->UFID," %.16e", Yview(i,j,k,0)); + fprintf(udata->VFID," %.16e", Yview(i,j,k,1)); + fprintf(udata->WFID," %.16e", Yview(i,j,k,2)); + } fprintf(udata->UFID,"\n"); fprintf(udata->VFID,"\n"); fprintf(udata->WFID,"\n"); + std::fflush(udata->UFID); + std::fflush(udata->VFID); + std::fflush(udata->WFID); } - + return(0); } @@ -799,4 +737,3 @@ void InputError(char *name) MPI_Barrier(MPI_COMM_WORLD); } - diff --git a/benchmarks/advection_reaction_3D/advection_reaction_3D.hpp b/benchmarks/advection_reaction_3D/raja/advection_reaction_3D.hpp similarity index 91% rename from benchmarks/advection_reaction_3D/advection_reaction_3D.hpp rename to benchmarks/advection_reaction_3D/raja/advection_reaction_3D.hpp index 4396e69eb5..e4227d62c7 100644 --- a/benchmarks/advection_reaction_3D/advection_reaction_3D.hpp +++ b/benchmarks/advection_reaction_3D/raja/advection_reaction_3D.hpp @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------------- * Programmer(s): David J. Gardner, Cody J. Balos @ LLNL + * Daniel R. Reynolds @ SMU * ----------------------------------------------------------------------------- * SUNDIALS Copyright Start * Copyright (c) 2002-2023, Lawrence Livermore National Security @@ -34,19 +35,9 @@ using sundials_tools::BoundaryType; using sundials_tools::StencilType; using std::string; -/* Number of dimensions */ -constexpr int NDIMS = 3; - /* Maximum size of output directory string */ constexpr int MXSTR = 2048; -/* Accessor macro: - n = number of state variables - i = mesh node index - c = component */ -#define IDX(n,i,c) ((n)*(i)+(c)) - - /* * Data structure for problem options */ @@ -113,7 +104,7 @@ struct UserData realtype c; /* advection coefficient */ /* parallel mesh */ - ParallelGrid* grid; + ParallelGrid* grid; /* count of implicit function evals by the task local nonlinear solver */ long int nnlfi; @@ -122,7 +113,10 @@ struct UserData UserOptions* uopt; /* constructor that takes the context */ - UserData(SUNContext ctx) : ctx(ctx) { + UserData(SUNContext ctx) + : ctx(ctx), umask(nullptr), vmask(nullptr), wmask(nullptr), uopt(nullptr), + TFID(nullptr), UFID(nullptr), VFID(nullptr), WFID(nullptr) + { SUNContext_GetProfiler(ctx, &prof); } @@ -161,15 +155,14 @@ extern int EvolveDAEProblem(N_Vector y, UserData* udata, UserOptions* uopt); /* function to set initial condition */ int SetIC(N_Vector y, UserData* udata); -/* functions to exchange neighbor data */ -int ExchangeBCOnly(N_Vector y, UserData* udata); -int ExchangeAllStart(N_Vector y, UserData* udata); -int ExchangeAllEnd(UserData* udata); +/* function to fill neighbor data */ +int FillSendBuffers(N_Vector y, UserData* udata); /* functions for processing command line args */ int SetupProblem(int argc, char *argv[], UserData* udata, UserOptions* uopt, SUNMemoryHelper memhelper, SUNContext ctx); void InputError(char *name); +int ComponentMask(N_Vector mask, const int component, const UserData* udata); /* function to write solution to disk */ int WriteOutput(realtype t, N_Vector y, UserData* udata, UserOptions* uopt); diff --git a/benchmarks/advection_reaction_3D/raja/arkode_driver.cpp b/benchmarks/advection_reaction_3D/raja/arkode_driver.cpp new file mode 100644 index 0000000000..e2cf1451e3 --- /dev/null +++ b/benchmarks/advection_reaction_3D/raja/arkode_driver.cpp @@ -0,0 +1,782 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner, Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ---------------------------------------------------------------------------*/ + +#include "arkode/arkode_arkstep.h" +#include "arkode/arkode_erkstep.h" +#include "sunlinsol/sunlinsol_spgmr.h" +#include "sunnonlinsol/sunnonlinsol_newton.h" +#include "sunnonlinsol/sunnonlinsol_fixedpoint.h" +#include "advection_reaction_3D.hpp" +#include "rhs3D.hpp" + +/* + * Definitions for a custom task local SUNNonlinearSolver + */ + +typedef struct +{ + int myid; + int nprocs; + long int ncnf; + MPI_Comm comm; + SUNNonlinearSolver local_nls; +} *TaskLocalNewton_Content; + +/* Content accessor macors */ +#define GET_NLS_CONTENT(NLS) ( (TaskLocalNewton_Content)(NLS->content) ) +#define LOCAL_NLS(NLS) ( GET_NLS_CONTENT(NLS)->local_nls ) + +/* SUNNonlinearSolver constructor */ +SUNNonlinearSolver TaskLocalNewton(SUNContext ctx, N_Vector y, FILE* DFID); + + +/* -------------------------------------------------------------- + * Evolve functions + * --------------------------------------------------------------*/ + +/* Setup ARKODE and evolve problem in time with IMEX method */ +int EvolveProblemDIRK(N_Vector y, UserData* udata, UserOptions* uopt) +{ + void* arkode_mem = NULL; /* empty ARKODE memory structure */ + SUNNonlinearSolver NLS = NULL; /* empty nonlinear solver structure */ + SUNLinearSolver LS = NULL; /* empty linear solver structure */ + + realtype t, dtout, tout; /* current/output time data */ + int retval; /* reusable error-checking flag */ + int iout; /* output counter */ + long int nst, nst_a, netf; /* step stats */ + long int nfe, nfi; /* RHS stats */ + long int nni, ncnf; /* nonlinear solver stats */ + long int nli, npsol; /* linear solver stats */ + FILE* DFID = NULL; /* diagnostics output file */ + char fname[MXSTR]; + + /* Additively split methods should not add the advection and reaction terms */ + udata->add_reactions = true; + + /* Create the ARK timestepper module */ + arkode_mem = ARKStepCreate(NULL, AdvectionReaction, uopt->t0, y, udata->ctx); + if (check_retval((void*)arkode_mem, "ARKStepCreate", 0, udata->myid)) return 1; + + /* Select the method order */ + retval = ARKStepSetOrder(arkode_mem, uopt->order); + if (check_retval(&retval, "ARKStepSetOrder", 1, udata->myid)) return 1; + + /* Attach user data */ + retval = ARKStepSetUserData(arkode_mem, (void*) udata); + if (check_retval(&retval, "ARKStepSetUserData*", 1, udata->myid)) return 1; + + /* Specify tolerances */ + retval = ARKStepSStolerances(arkode_mem, uopt->rtol, uopt->atol); + if (check_retval(&retval, "ARKStepSStolerances", 1, udata->myid)) return 1; + + /* Increase the max number of steps allowed between outputs */ + retval = ARKStepSetMaxNumSteps(arkode_mem, 100000); + if (check_retval(&retval, "ARKStepSetMaxNumSteps", 1, udata->myid)) return 1; + + /* Open output file for integrator diagnostics */ + if (uopt->save) + { + sprintf(fname, "%s/diagnostics.%06d.txt", uopt->outputdir, udata->myid); + DFID = fopen(fname, "w"); + + retval = ARKStepSetDiagnostics(arkode_mem, DFID); + if (check_retval(&retval, "ARKStepSetDiagnostics", 1, udata->myid)) return 1; + } + + /* Create the (non)linear solver */ + if (uopt->nls == "newton") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_Newton(y, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_Newton", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = ARKStepSetNonlinearSolver(arkode_mem, NLS); + if (check_retval(&retval, "ARKStepSetNonlinearSolver", 1, udata->myid)) return 1; + + /* Create linear solver */ + LS = uopt->precond ? SUNLinSol_SPGMR(y, PREC_LEFT, 0, udata->ctx) : SUNLinSol_SPGMR(y, PREC_NONE, 0, udata->ctx); + if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, udata->myid)) return 1; + + /* Attach linear solver */ + retval = ARKStepSetLinearSolver(arkode_mem, LS, NULL); + if (check_retval(&retval, "ARKStepSetLinearSolver", 1, udata->myid)) return 1; + + /* Attach preconditioner */ + retval = ARKStepSetPreconditioner(arkode_mem, NULL, PSolve); + if (check_retval(&retval, "ARKStepSetPreconditioner", 1, udata->myid)) return 1; + } + else if (uopt->nls == "fixedpoint") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_FixedPoint(y, uopt->fpaccel, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_FixedPoint", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = ARKStepSetNonlinearSolver(arkode_mem, NLS); + if (check_retval(&retval, "ARKStepSetNonlinearSolver", 1, udata->myid)) return 1; + } + else + { + fprintf(stderr, "\nERROR: ARK-DIRK is not compatible with the nls option provided\n"); + return 1; + } + + /* Output initial condition */ + if (uopt->nout > 0) + { + if (udata->myid == 0) + { + printf("\n t ||u||_rms ||v||_rms ||w||_rms\n"); + printf(" ----------------------------------------------------\n"); + } + WriteOutput(uopt->t0, y, udata, uopt); + } + + /* Integrate to final time */ + t = uopt->t0; + dtout = (uopt->tf - uopt->t0); + if (uopt->nout != 0) + dtout /= uopt->nout; + tout = t + dtout; + iout = 0; + + do + { + /* Integrate to output time */ + retval = ARKStepEvolve(arkode_mem, tout, y, &t, ARK_NORMAL); + if (check_retval(&retval, "ARKStepEvolve", 1, udata->myid)) break; + + /* Output state */ + if(uopt->nout > 0) WriteOutput(t, y, udata, uopt); + + /* Update output time */ + tout += dtout; + tout = (tout > uopt->tf) ? uopt->tf : tout; + + iout++; + } while (iout < uopt->nout); + + /* close output stream */ + if (uopt->save) fclose(DFID); + + /* Get final statistics */ + retval = ARKStepGetNumSteps(arkode_mem, &nst); + check_retval(&retval, "ARKStepGetNumSteps", 1, udata->myid); + retval = ARKStepGetNumStepAttempts(arkode_mem, &nst_a); + check_retval(&retval, "ARKStepGetNumStepAttempts", 1, udata->myid); + retval = ARKStepGetNumRhsEvals(arkode_mem, &nfe, &nfi); + check_retval(&retval, "ARKStepGetNumRhsEvals", 1, udata->myid); + retval = ARKStepGetNumErrTestFails(arkode_mem, &netf); + check_retval(&retval, "ARKStepGetNumErrTestFails", 1, udata->myid); + retval = ARKStepGetNumNonlinSolvIters(arkode_mem, &nni); + check_retval(&retval, "ARKStepGetNumNonlinSolvIters", 1, udata->myid); + retval = ARKStepGetNumNonlinSolvConvFails(arkode_mem, &ncnf); + check_retval(&retval, "ARKStepGetNumNonlinSolvConvFails", 1, udata->myid); + if (uopt->nls == "newton") + { + retval = ARKStepGetNumLinIters(arkode_mem, &nli); + check_retval(&retval, "ARKStepGetNumLinIters", 1, udata->myid); + retval = ARKStepGetNumPrecSolves(arkode_mem, &npsol); + check_retval(&retval, "ARKStepGetNumPrecSolves", 1, udata->myid); + } + + /* Print final statistics */ + if (udata->myid == 0) + { + printf("\nFinal Solver Statistics (for processor 0):\n"); + printf(" Internal solver steps = %li (attempted = %li)\n", nst, nst_a); + printf(" Total RHS evals: Fe = %li, Fi = %li\n", nfe, nfi + udata->nnlfi); + printf(" Total number of error test failures = %li\n", netf); + printf(" Total number of nonlinear solver convergence failures = %li\n", + ncnf); + printf(" Total number of nonlinear iterations = %li\n", nni); + if (uopt->nls == "newton") + { + printf(" Total number of linear iterations = %li\n", nli); + printf(" Total number of preconditioner solves = %li\n", npsol); + } + } + + /* Clean up */ + ARKStepFree(&arkode_mem); + SUNNonlinSolFree(NLS); + if (LS) SUNLinSolFree(LS); + + /* Return success */ + return(0); +} + + +/* Setup ARKODE and evolve problem in time with IMEX method */ +int EvolveProblemIMEX(N_Vector y, UserData* udata, UserOptions* uopt) +{ + void* arkode_mem = NULL; /* empty ARKODE memory structure */ + SUNNonlinearSolver NLS = NULL; /* empty nonlinear solver structure */ + SUNLinearSolver LS = NULL; /* empty linear solver structure */ + + realtype t, dtout, tout; /* current/output time data */ + int retval; /* reusable error-checking flag */ + int iout; /* output counter */ + long int nst, nst_a, netf; /* step stats */ + long int nfe, nfi; /* RHS stats */ + long int nni, ncnf; /* nonlinear solver stats */ + long int nli, npsol; /* linear solver stats */ + FILE* DFID = NULL; /* diagnostics output file */ + char fname[MXSTR]; + + /* Additively split methods should not add the advection and reaction terms */ + udata->add_reactions = false; + + /* Create the ARK timestepper module */ + arkode_mem = ARKStepCreate(Advection, Reaction, uopt->t0, y, udata->ctx); + if (check_retval((void*)arkode_mem, "ARKStepCreate", 0, udata->myid)) return 1; + + /* Select the method order */ + retval = ARKStepSetOrder(arkode_mem, uopt->order); + if (check_retval(&retval, "ARKStepSetOrder", 1, udata->myid)) return 1; + + /* Attach user data */ + retval = ARKStepSetUserData(arkode_mem, (void*) udata); + if (check_retval(&retval, "ARKStepSetUserData*", 1, udata->myid)) return 1; + + /* Specify tolerances */ + retval = ARKStepSStolerances(arkode_mem, uopt->rtol, uopt->atol); + if (check_retval(&retval, "ARKStepSStolerances", 1, udata->myid)) return 1; + + /* Increase the max number of steps allowed between outputs */ + retval = ARKStepSetMaxNumSteps(arkode_mem, 100000); + if (check_retval(&retval, "ARKStepSetMaxNumSteps", 1, udata->myid)) return 1; + + /* Open output file for integrator diagnostics */ + if (uopt->save) + { + sprintf(fname, "%s/diagnostics.%06d.txt", uopt->outputdir, udata->myid); + DFID = fopen(fname, "w"); + + retval = ARKStepSetDiagnostics(arkode_mem, DFID); + if (check_retval(&retval, "ARKStepSetDiagnostics", 1, udata->myid)) return 1; + } + + /* Create the (non)linear solver */ + if (uopt->nls == "newton") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_Newton(y, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_Newton", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = ARKStepSetNonlinearSolver(arkode_mem, NLS); + if (check_retval(&retval, "ARKStepSetNonlinearSolver", 1, udata->myid)) return 1; + + /* Create linear solver */ + LS = SUNLinSol_SPGMR(y, PREC_LEFT, 0, udata->ctx); + if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, udata->myid)) return 1; + + /* Attach linear solver */ + retval = ARKStepSetLinearSolver(arkode_mem, LS, NULL); + if (check_retval(&retval, "ARKStepSetLinearSolver", 1, udata->myid)) return 1; + + /* Attach preconditioner */ + retval = ARKStepSetPreconditioner(arkode_mem, NULL, PSolve); + if (check_retval(&retval, "ARKStepSetPreconditioner", 1, udata->myid)) return 1; + } + else if (uopt->nls == "tl-newton") + { + /* The custom task-local nonlinear solver handles the linear solve + as well, so we do not need a SUNLinearSolver. */ + NLS = TaskLocalNewton(udata->ctx, y, DFID); + if (check_retval((void *)NLS, "TaskLocalNewton", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = ARKStepSetNonlinearSolver(arkode_mem, NLS); + if (check_retval(&retval, "ARKStepSetNonlinearSolver", 1, udata->myid)) return 1; + } + else if (uopt->nls == "fixedpoint") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_FixedPoint(y, uopt->fpaccel, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_FixedPoint", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = ARKStepSetNonlinearSolver(arkode_mem, NLS); + if (check_retval(&retval, "ARKStepSetNonlinearSolver", 1, udata->myid)) return 1; + } + else + { + fprintf(stderr, "\nERROR: ARK-IMEX method is not compatible with the nls option provided\n"); + return 1; + } + + /* Output initial condition */ + if (uopt->nout > 0) + { + if (udata->myid == 0) + { + printf("\n t ||u||_rms ||v||_rms ||w||_rms\n"); + printf(" ----------------------------------------------------\n"); + } + WriteOutput(uopt->t0, y, udata, uopt); + } + + /* Integrate to final time */ + t = uopt->t0; + dtout = (uopt->tf - uopt->t0); + if (uopt->nout != 0) + dtout /= uopt->nout; + tout = t + dtout; + iout = 0; + + do + { + /* Integrate to output time */ + retval = ARKStepEvolve(arkode_mem, tout, y, &t, ARK_NORMAL); + if (check_retval(&retval, "ARKStepEvolve", 1, udata->myid)) break; + + /* Output state */ + if(uopt->nout > 0) WriteOutput(t, y, udata, uopt); + + /* Update output time */ + tout += dtout; + tout = (tout > uopt->tf) ? uopt->tf : tout; + + iout++; + } while (iout < uopt->nout); + + /* close output stream */ + if (uopt->save) fclose(DFID); + + /* Get final statistics */ + retval = ARKStepGetNumSteps(arkode_mem, &nst); + check_retval(&retval, "ARKStepGetNumSteps", 1, udata->myid); + retval = ARKStepGetNumStepAttempts(arkode_mem, &nst_a); + check_retval(&retval, "ARKStepGetNumStepAttempts", 1, udata->myid); + retval = ARKStepGetNumRhsEvals(arkode_mem, &nfe, &nfi); + check_retval(&retval, "ARKStepGetNumRhsEvals", 1, udata->myid); + retval = ARKStepGetNumErrTestFails(arkode_mem, &netf); + check_retval(&retval, "ARKStepGetNumErrTestFails", 1, udata->myid); + retval = ARKStepGetNumNonlinSolvIters(arkode_mem, &nni); + check_retval(&retval, "ARKStepGetNumNonlinSolvIters", 1, udata->myid); + retval = ARKStepGetNumNonlinSolvConvFails(arkode_mem, &ncnf); + check_retval(&retval, "ARKStepGetNumNonlinSolvConvFails", 1, udata->myid); + if (uopt->nls == "newton") + { + retval = ARKStepGetNumLinIters(arkode_mem, &nli); + check_retval(&retval, "ARKStepGetNumLinIters", 1, udata->myid); + retval = ARKStepGetNumPrecSolves(arkode_mem, &npsol); + check_retval(&retval, "ARKStepGetNumPrecSolves", 1, udata->myid); + } + + /* Print final statistics */ + if (udata->myid == 0) + { + printf("\nFinal Solver Statistics (for processor 0):\n"); + printf(" Internal solver steps = %li (attempted = %li)\n", nst, nst_a); + printf(" Total RHS evals: Fe = %li, Fi = %li\n", nfe, nfi + udata->nnlfi); + printf(" Total number of error test failures = %li\n", netf); + printf(" Total number of nonlinear solver convergence failures = %li\n", + ncnf); + printf(" Total number of nonlinear iterations = %li\n", nni); + if (uopt->nls == "newton") + { + printf(" Total number of linear iterations = %li\n", nli); + printf(" Total number of preconditioner solves = %li\n", npsol); + } + } + + /* Clean up */ + ARKStepFree(&arkode_mem); + if (NLS) SUNNonlinSolFree(NLS); + if (LS) SUNLinSolFree(LS); + + /* Return success */ + return(0); +} + + +/* Setup ARKODE and evolve problem in time explicitly */ +int EvolveProblemExplicit(N_Vector y, UserData* udata, UserOptions* uopt) +{ + void* arkode_mem = NULL; /* empty ARKODE memory structure */ + realtype t, dtout, tout; /* current/output time data */ + int retval; /* reusable error-checking flag */ + int iout; /* output counter */ + long int nst, nst_a, netf; /* step stats */ + long int nfe; /* RHS stats */ + FILE* DFID; /* diagnostics output file */ + char fname[MXSTR]; + + /* Additively split methods should not add the advection and reaction terms */ + udata->add_reactions = true; + + /* Create the ERK timestepper module */ + arkode_mem = ERKStepCreate(AdvectionReaction, uopt->t0, y, udata->ctx); + if (check_retval((void*)arkode_mem, "ERKStepCreate", 0, udata->myid)) return 1; + + /* Select the method order */ + retval = ERKStepSetOrder(arkode_mem, uopt->order); + if (check_retval(&retval, "ERKStepSetOrder", 1, udata->myid)) return 1; + + /* Attach user data */ + retval = ERKStepSetUserData(arkode_mem, (void*) udata); + if (check_retval(&retval, "ERKStepSetUserData", 1, udata->myid)) return 1; + + /* Specify tolerances */ + retval = ERKStepSStolerances(arkode_mem, uopt->rtol, uopt->atol); + if (check_retval(&retval, "ERKStepSStolerances", 1, udata->myid)) return 1; + + /* Increase the max number of steps allowed between outputs */ + retval = ERKStepSetMaxNumSteps(arkode_mem, 1000000); + if (check_retval(&retval, "ERKStepSetMaxNumSteps", 1, udata->myid)) return 1; + + /* Set fixed step size */ + retval = ERKStepSetFixedStep(arkode_mem, 1e-5); + if (check_retval(&retval, "ERKStepSetFixedStep", 1, udata->myid)) return 1; + + /* Open output file for integrator diagnostics */ + if (uopt->save) + { + sprintf(fname, "%s/diagnostics.%06d.txt", uopt->outputdir, udata->myid); + DFID = fopen(fname, "w"); + + retval = ERKStepSetDiagnostics(arkode_mem, DFID); + if (check_retval(&retval, "ERKStepSetDiagnostics", 1, udata->myid)) return 1; + } + + /* Output initial condition */ + if (uopt->nout > 0) + { + if (udata->myid == 0) + { + printf("\n t ||u||_rms ||v||_rms ||w||_rms\n"); + printf(" ----------------------------------------------------\n"); + } + WriteOutput(uopt->t0, y, udata, uopt); + } + + /* Integrate to final time */ + t = uopt->t0; + dtout = (uopt->tf - uopt->t0); + if (uopt->nout != 0) + dtout /= uopt->nout; + tout = t + dtout; + iout = 0; + + do + { + /* Integrate to output time */ + retval = ERKStepEvolve(arkode_mem, tout, y, &t, ARK_NORMAL); + if (check_retval(&retval, "ERKStepEvolve", 1, udata->myid)) break; + + /* Output state */ + if(uopt->nout > 0) WriteOutput(t, y, udata, uopt); + + /* Update output time */ + tout += dtout; + tout = (tout > uopt->tf) ? uopt->tf : tout; + + iout++; + } while (iout < uopt->nout); + + /* close output stream */ + if (uopt->save) fclose(DFID); + + /* Get final statistics */ + retval = ERKStepGetNumSteps(arkode_mem, &nst); + check_retval(&retval, "ERKStepGetNumSteps", 1, udata->myid); + retval = ERKStepGetNumStepAttempts(arkode_mem, &nst_a); + check_retval(&retval, "ERKStepGetNumStepAttempts", 1, udata->myid); + retval = ERKStepGetNumRhsEvals(arkode_mem, &nfe); + check_retval(&retval, "ERKStepGetNumRhsEvals", 1, udata->myid); + retval = ERKStepGetNumErrTestFails(arkode_mem, &netf); + check_retval(&retval, "ERKStepGetNumErrTestFails", 1, udata->myid); + + /* Print final statistics */ + if (udata->myid == 0) + { + printf("\nFinal Solver Statistics (for processor 0):\n"); + printf(" Internal solver steps = %li (attempted = %li)\n", nst, nst_a); + printf(" Total RHS evals: Fe = %li\n", nfe); + printf(" Total number of error test failures = %li\n", netf); + } + + /* Clean up */ + ERKStepFree(&arkode_mem); + + /* Return success */ + return(0); +} + + +/* -------------------------------------------------------------- + * (Non)linear system functions + * --------------------------------------------------------------*/ + +int TaskLocalNlsResidual(N_Vector ycor, N_Vector F, void* arkode_mem) +{ + /* temporary variables */ + UserData* udata; + int retval; + realtype c[3]; + N_Vector X[3]; + + /* nonlinear system data */ + N_Vector z, zpred, Fi, sdata; + realtype tcur, gamma; + void *user_data; + + ARKStepGetNonlinearSystemData(arkode_mem, &tcur, &zpred, &z, &Fi, + &gamma, &sdata, &user_data); + udata = (UserData*) user_data; + + /* update 'z' value as stored predictor + current corrector */ + N_VLinearSum(1.0, N_VGetLocalVector_MPIPlusX(zpred), + 1.0, (ycor), + N_VGetLocalVector_MPIPlusX(z)); + + /* compute implicit RHS and save for later */ + retval = Reaction(tcur, + N_VGetLocalVector_MPIPlusX(z), + N_VGetLocalVector_MPIPlusX(Fi), + user_data); + udata->nnlfi++; /* count calls to Fi as part of the nonlinear residual */ + if (retval < 0) return(-1); + if (retval > 0) return(+1); + + /* update with y, sdata, and gamma * fy */ + X[0] = ycor; + c[0] = 1.0; + c[1] = -1.0; + X[1] = N_VGetLocalVector_MPIPlusX(sdata); + c[2] = -gamma; + X[2] = N_VGetLocalVector_MPIPlusX(Fi); + + retval = N_VLinearCombination(3, c, X, F); + if (retval != 0) return(-1); + + return(0); +} + + +int TaskLocalLSolve(N_Vector delta, void* arkode_mem) +{ + /* local variables */ + UserData* udata = NULL; + int retval; + + /* nonlinear system data */ + N_Vector z, zpred, Fi, sdata; + realtype tcur, gamma; + void* user_data = NULL; + + ARKStepGetNonlinearSystemData(arkode_mem, &tcur, &zpred, &z, &Fi, + &gamma, &sdata, &user_data); + udata = (UserData*) user_data; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* set up I - gamma*J and solve */ + retval = SolveReactionLinSys(z, delta, delta, gamma, udata); + + + return(retval); +} + + +SUNNonlinearSolver_Type TaskLocalNewton_GetType(SUNNonlinearSolver NLS) +{ + return SUNNONLINEARSOLVER_ROOTFIND; +} + + +int TaskLocalNewton_Initialize(SUNNonlinearSolver NLS) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return SUN_NLS_MEM_NULL; + + /* override default system and lsolve functions with local versions */ + SUNNonlinSolSetSysFn(LOCAL_NLS(NLS), TaskLocalNlsResidual); + SUNNonlinSolSetLSolveFn(LOCAL_NLS(NLS), TaskLocalLSolve); + + return(SUNNonlinSolInitialize(LOCAL_NLS(NLS))); +} + + +int TaskLocalNewton_Solve(SUNNonlinearSolver NLS, + N_Vector y0, N_Vector ycor, + N_Vector w, realtype tol, + booleantype callLSetup, void* mem) +{ + /* local variables */ + MPI_Comm comm; + int solve_status, recover, nonrecover; + + /* check that the inputs are non-null */ + if ((NLS == NULL) || + (y0 == NULL) || + (ycor == NULL) || + (w == NULL) || + (mem == NULL)) + return SUN_NLS_MEM_NULL; + + /* shortcuts */ + comm = GET_NLS_CONTENT(NLS)->comm; + + /* each tasks solves the local nonlinear system */ + solve_status = SUNNonlinSolSolve(LOCAL_NLS(NLS), + N_VGetLocalVector_MPIPlusX(y0), + N_VGetLocalVector_MPIPlusX(ycor), + N_VGetLocalVector_MPIPlusX(w), + tol, callLSetup, mem); + + /* if any process had a nonrecoverable failure, return it */ + MPI_Allreduce(&solve_status, &nonrecover, 1, MPI_INT, MPI_MIN, comm); + if (nonrecover < 0) return nonrecover; + + /* check if any process has a recoverable convergence failure */ + MPI_Allreduce(&solve_status, &recover, 1, MPI_INT, MPI_MAX, comm); + if (recover == SUN_NLS_CONV_RECVR) GET_NLS_CONTENT(NLS)->ncnf++; + + /* return success (recover == 0) or a recoverable error code (recover > 0) */ + return recover; +} + + +int TaskLocalNewton_Free(SUNNonlinearSolver NLS) +{ + /* return if NLS is already free */ + if (NLS == NULL) + return SUN_NLS_SUCCESS; + + /* free items from contents, then the generic structure */ + if (NLS->content) + { + SUNNonlinSolFree(LOCAL_NLS(NLS)); + free(NLS->content); + NLS->content = NULL; + } + + /* free the ops structure */ + if (NLS->ops) + { + free(NLS->ops); + NLS->ops = NULL; + } + + /* free the nonlinear solver */ + free(NLS); + + return SUN_NLS_SUCCESS; +} + + +int TaskLocalNewton_SetSysFn(SUNNonlinearSolver NLS, + SUNNonlinSolSysFn SysFn) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return SUN_NLS_MEM_NULL; + + return(SUNNonlinSolSetSysFn(LOCAL_NLS(NLS), SysFn)); +} + + +int TaskLocalNewton_SetConvTestFn(SUNNonlinearSolver NLS, + SUNNonlinSolConvTestFn CTestFn, + void* ctest_data) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return SUN_NLS_MEM_NULL; + + return(SUNNonlinSolSetConvTestFn(LOCAL_NLS(NLS), CTestFn, ctest_data)); +} + + +int TaskLocalNewton_GetNumConvFails(SUNNonlinearSolver NLS, + long int *nconvfails) +{ + /* check that the nonlinear solver is non-null */ + if (NLS == NULL) + return SUN_NLS_MEM_NULL; + + *nconvfails = GET_NLS_CONTENT(NLS)->ncnf; + return(0); +} + + +SUNNonlinearSolver TaskLocalNewton(SUNContext ctx, N_Vector y, FILE* DFID) +{ + SUNNonlinearSolver NLS; + TaskLocalNewton_Content content; + + /* Check that the supplied N_Vector is non-NULL */ + if (y == NULL) return NULL; + + /* Check that the supplied N_Vector is an MPIPlusX */ + if (N_VGetVectorID(y) != SUNDIALS_NVEC_MPIPLUSX) + return NULL; + + /* Create an empty nonlinear linear solver object */ + NLS = SUNNonlinSolNewEmpty(ctx); + if (NLS == NULL) return NULL; + + /* Attach operations */ + NLS->ops->gettype = TaskLocalNewton_GetType; + NLS->ops->initialize = TaskLocalNewton_Initialize; + NLS->ops->solve = TaskLocalNewton_Solve; + NLS->ops->free = TaskLocalNewton_Free; + NLS->ops->setsysfn = TaskLocalNewton_SetSysFn; + NLS->ops->setctestfn = TaskLocalNewton_SetConvTestFn; + NLS->ops->getnumconvfails = TaskLocalNewton_GetNumConvFails; + + /* Create content */ + content = NULL; + content = (TaskLocalNewton_Content) malloc(sizeof *content); + if (content == NULL) { SUNNonlinSolFree(NLS); return NULL; } + + /* Initialize all components of content to 0/NULL */ + memset(content, 0, sizeof(*content)); + + /* Attach content */ + NLS->content = content; + + /* Fill general content */ + void *tmpcomm = N_VGetCommunicator(y); + if (tmpcomm == NULL) { SUNNonlinSolFree(NLS); return NULL; } + + MPI_Comm *comm = (MPI_Comm*) tmpcomm; + if ((*comm) == MPI_COMM_NULL) { SUNNonlinSolFree(NLS); return NULL; } + + content->comm = *comm; + + content->local_nls = SUNNonlinSol_Newton(N_VGetLocalVector_MPIPlusX(y), ctx); + if (content->local_nls == NULL) { SUNNonlinSolFree(NLS); return NULL; } + + MPI_Comm_rank(content->comm, &content->myid); + MPI_Comm_size(content->comm, &content->nprocs); + + content->ncnf = 0; + + /* Setup the local nonlinear solver monitoring */ + if (DFID != NULL) + { + SUNNonlinSolSetInfoFile_Newton(LOCAL_NLS(NLS), DFID); + SUNNonlinSolSetPrintLevel_Newton(LOCAL_NLS(NLS), 1); + } + + return NLS; +} diff --git a/benchmarks/advection_reaction_3D/backends.hpp b/benchmarks/advection_reaction_3D/raja/backends.hpp similarity index 100% rename from benchmarks/advection_reaction_3D/backends.hpp rename to benchmarks/advection_reaction_3D/raja/backends.hpp diff --git a/benchmarks/advection_reaction_3D/raja/check_retval.h b/benchmarks/advection_reaction_3D/raja/check_retval.h new file mode 100644 index 0000000000..887b7cea5d --- /dev/null +++ b/benchmarks/advection_reaction_3D/raja/check_retval.h @@ -0,0 +1,57 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ---------------------------------------------------------------------------*/ + +#ifndef _SUNDIALS_CHECK_RETVAL_H_ +#define _SUNDIALS_CHECK_RETVAL_H_ + +#include + +/* -------------------------------------------------------------- + * Function to check return values: + * + * opt == 0 means the function allocates memory and returns a + * pointer so check if a NULL pointer was returned + * opt == 1 means the function returns an integer where a + * value < 0 indicates an error occured + * --------------------------------------------------------------*/ +static int check_retval(void *returnvalue, const char *funcname, int opt, int myid) +{ + int* errvalue; + + if (opt == 0 && returnvalue == NULL) + { + /* A NULL pointer was returned - no memory allocated */ + if (myid == 0) + fprintf(stderr, "\nERROR: %s() failed - returned NULL pointer\n\n", + funcname); + return(1); + } + else if (opt == 1) + { + errvalue = (int *) returnvalue; + + /* A value < 0 was returned - function failed */ + if (*errvalue < 0) + { + if (myid == 0) + fprintf(stderr, "\nERROR: %s() returned %d\n\n", funcname, *errvalue); + return(1); + } + } + + /* return success */ + return(0); +} + +#endif diff --git a/benchmarks/advection_reaction_3D/raja/cvode_driver.cpp b/benchmarks/advection_reaction_3D/raja/cvode_driver.cpp new file mode 100644 index 0000000000..e147ccd8c4 --- /dev/null +++ b/benchmarks/advection_reaction_3D/raja/cvode_driver.cpp @@ -0,0 +1,289 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ---------------------------------------------------------------------------*/ + +#include "cvode/cvode.h" +#include "sunlinsol/sunlinsol_spgmr.h" +#include "sunnonlinsol/sunnonlinsol_newton.h" +#include "sunnonlinsol/sunnonlinsol_fixedpoint.h" +#include "advection_reaction_3D.hpp" +#include "rhs3D.hpp" + + +/* Setup CVODE and evolve problem in time with BDF method */ +int EvolveProblemBDF(N_Vector y, UserData* udata, UserOptions* uopt) +{ + void* cvode_mem = NULL; /* empty CVODE memory structure */ + SUNNonlinearSolver NLS = NULL; /* empty nonlinear solver structure */ + SUNLinearSolver LS = NULL; /* empty linear solver structure */ + + realtype t, dtout, tout; /* current/output time data */ + int retval; /* reusable error-checking flag */ + int iout; /* output counter */ + long int nst, netf; /* step stats */ + long int nfi; /* RHS stats */ + long int nni, ncnf; /* nonlinear solver stats */ + long int nli, npsol; /* linear solver stats */ + + /* Additively split methods should not add the advection and reaction terms */ + udata->add_reactions = true; + + /* Create CVode */ + cvode_mem = CVodeCreate(CV_BDF, udata->ctx); + if (check_retval((void*)cvode_mem, "CVodeCreate", 0, udata->myid)) return 1; + + /* Initialize CVode */ + retval = CVodeInit(cvode_mem, AdvectionReaction, uopt->t0, y); + if (check_retval((void*)cvode_mem, "CVodeInit", 0, udata->myid)) return 1; + + /* Attach user data */ + retval = CVodeSetUserData(cvode_mem, (void*) udata); + if (check_retval(&retval, "CVodeSetUserData*", 1, udata->myid)) return 1; + + /* Specify tolerances */ + retval = CVodeSStolerances(cvode_mem, uopt->rtol, uopt->atol); + if (check_retval(&retval, "CVodeSStolerances", 1, udata->myid)) return 1; + + /* Increase the max number of steps allowed between outputs */ + retval = CVodeSetMaxNumSteps(cvode_mem, 100000); + if (check_retval(&retval, "CVodeSetMaxNumSteps", 1, udata->myid)) return 1; + + /* Create the (non)linear solver */ + if (uopt->nls == "newton") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_Newton(y, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_Newton", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = CVodeSetNonlinearSolver(cvode_mem, NLS); + if (check_retval(&retval, "CVodeSetNonlinearSolver", 1, udata->myid)) return 1; + + /* Create linear solver */ + LS = uopt->precond ? SUNLinSol_SPGMR(y, PREC_LEFT, 0, udata->ctx) : SUNLinSol_SPGMR(y, PREC_NONE, 0, udata->ctx); + if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, udata->myid)) return 1; + + /* Attach linear solver */ + retval = CVodeSetLinearSolver(cvode_mem, LS, NULL); + if (check_retval(&retval, "CVodeSetLinearSolver", 1, udata->myid)) return 1; + + /* Attach preconditioner */ + retval = CVodeSetPreconditioner(cvode_mem, NULL, PSolve); + if (check_retval(&retval, "CVodeSetPreconditioner", 1, udata->myid)) return 1; + } + else if (uopt->nls == "fixedpoint") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_FixedPoint(y, uopt->fpaccel, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_FixedPoint", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = CVodeSetNonlinearSolver(cvode_mem, NLS); + if (check_retval(&retval, "CVodeSetNonlinearSolver", 1, udata->myid)) return 1; + } + else + { + fprintf(stderr, "\nERROR: CV-BDF method is not compatible with the nls option provided\n"); + return 1; + } + + /* Output initial condition */ + if (uopt->nout > 0) + { + if (udata->myid == 0) + { + printf("\n t ||u||_rms ||v||_rms ||w||_rms\n"); + printf(" ----------------------------------------------------\n"); + } + WriteOutput(uopt->t0, y, udata, uopt); + } + + /* Integrate to final time */ + t = uopt->t0; + dtout = (uopt->tf - uopt->t0); + if (uopt->nout != 0) + dtout /= uopt->nout; + tout = t + dtout; + iout = 0; + + do + { + /* Integrate to output time */ + retval = CVode(cvode_mem, tout, y, &t, CV_NORMAL); + if (check_retval(&retval, "CVode", 1, udata->myid)) break; + + /* Output state */ + if (uopt->nout > 0) WriteOutput(t, y, udata, uopt); + + /* Update output time */ + tout += dtout; + tout = (tout > uopt->tf) ? uopt->tf : tout; + + iout++; + } while (iout < uopt->nout); + + /* Get final statistics */ + retval = CVodeGetNumSteps(cvode_mem, &nst); + check_retval(&retval, "CVodeGetNumSteps", 1, udata->myid); + retval = CVodeGetNumRhsEvals(cvode_mem, &nfi); + check_retval(&retval, "CVodeGetNumRhsEvals", 1, udata->myid); + retval = CVodeGetNumErrTestFails(cvode_mem, &netf); + check_retval(&retval, "CVodeGetNumErrTestFails", 1, udata->myid); + retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni); + check_retval(&retval, "CVodeGetNumNonlinSolvIters", 1, udata->myid); + retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncnf); + check_retval(&retval, "CVodeGetNumNonlinSolvConvFails", 1, udata->myid); + if (uopt->nls == "newton") + { + retval = CVodeGetNumLinIters(cvode_mem, &nli); + check_retval(&retval, "CVodeGetNumLinIters", 1, udata->myid); + retval = CVodeGetNumPrecSolves(cvode_mem, &npsol); + check_retval(&retval, "CVodeGetNumPrecSolves", 1, udata->myid); + } + + /* Print final statistics */ + if (udata->myid == 0) + { + printf("\nFinal Solver Statistics (for processor 0):\n"); + printf(" Internal solver steps = %li\n", nst); + printf(" Total RHS evals: %li\n", nfi + udata->nnlfi); + printf(" Total number of error test failures = %li\n", netf); + printf(" Total number of nonlinear solver convergence failures = %li\n", + ncnf); + printf(" Total number of nonlinear iterations = %li\n", nni); + if (uopt->nls == "newton") + { + printf(" Total number of linear iterations = %li\n", nli); + printf(" Total number of preconditioner solves = %li\n", npsol); + } + } + + /* Clean up */ + CVodeFree(&cvode_mem); + if (NLS) SUNNonlinSolFree(NLS); + if (LS) SUNLinSolFree(LS); + + /* Return success */ + return(0); +} + + +/* Setup CVODE and evolve problem in time with Adams method */ +int EvolveProblemAdams(N_Vector y, UserData* udata, UserOptions* uopt) +{ + void* cvode_mem = NULL; /* empty CVODE memory structure */ + SUNNonlinearSolver NLS = NULL; /* empty nonlinear solver structure */ + + realtype t, dtout, tout; /* current/output time data */ + int retval; /* reusable error-checking flag */ + int iout; /* output counter */ + long int nst, netf; /* step stats */ + long int nfi; /* RHS stats */ + long int nni, ncnf; /* nonlinear solver stats */ + + /* Additively split methods should not add the advection and reaction terms */ + udata->add_reactions = true; + + /* Create CVode */ + cvode_mem = CVodeCreate(CV_ADAMS, udata->ctx); + if (check_retval((void*)cvode_mem, "CVodeCreate", 0, udata->myid)) return 1; + + /* Initialize CVode */ + retval = CVodeInit(cvode_mem, AdvectionReaction, uopt->t0, y); + if (check_retval((void*)cvode_mem, "CVodeInit", 0, udata->myid)) return 1; + + /* Attach user data */ + retval = CVodeSetUserData(cvode_mem, (void*) udata); + if (check_retval(&retval, "CVodeSetUserData*", 1, udata->myid)) return 1; + + /* Specify tolerances */ + retval = CVodeSStolerances(cvode_mem, uopt->rtol, uopt->atol); + if (check_retval(&retval, "CVodeSStolerances", 1, udata->myid)) return 1; + + /* Increase the max number of steps allowed between outputs */ + retval = CVodeSetMaxNumSteps(cvode_mem, 100000); + if (check_retval(&retval, "CVodeSetMaxNumSteps", 1, udata->myid)) return 1; + + /* Create nonlinear solver */ + NLS = SUNNonlinSol_FixedPoint(y, uopt->fpaccel, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_FixedPoint", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = CVodeSetNonlinearSolver(cvode_mem, NLS); + if (check_retval(&retval, "CVodeSetNonlinearSolver", 1, udata->myid)) return 1; + + /* Output initial condition */ + if (uopt->nout > 0) + { + if (udata->myid == 0) + { + printf("\n t ||u||_rms ||v||_rms ||w||_rms\n"); + printf(" ----------------------------------------------------\n"); + } + WriteOutput(uopt->t0, y, udata, uopt); + } + + /* Integrate to final time */ + t = uopt->t0; + dtout = (uopt->tf - uopt->t0); + if (uopt->nout != 0) + dtout /= uopt->nout; + tout = t + dtout; + iout = 0; + + do + { + /* Integrate to output time */ + retval = CVode(cvode_mem, tout, y, &t, CV_NORMAL); + if (check_retval(&retval, "CVode", 1, udata->myid)) break; + + /* Output state */ + if (uopt->nout > 0) WriteOutput(t, y, udata, uopt); + + /* Update output time */ + tout += dtout; + tout = (tout > uopt->tf) ? uopt->tf : tout; + + iout++; + } while (iout < uopt->nout); + + /* Get final statistics */ + retval = CVodeGetNumSteps(cvode_mem, &nst); + check_retval(&retval, "CVodeGetNumSteps", 1, udata->myid); + retval = CVodeGetNumRhsEvals(cvode_mem, &nfi); + check_retval(&retval, "CVodeGetNumRhsEvals", 1, udata->myid); + retval = CVodeGetNumErrTestFails(cvode_mem, &netf); + check_retval(&retval, "CVodeGetNumErrTestFails", 1, udata->myid); + retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni); + check_retval(&retval, "CVodeGetNumNonlinSolvIters", 1, udata->myid); + retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncnf); + check_retval(&retval, "CVodeGetNumNonlinSolvConvFails", 1, udata->myid); + + /* Print final statistics */ + if (udata->myid == 0) + { + printf("\nFinal Solver Statistics (for processor 0):\n"); + printf(" Internal solver steps = %li\n", nst); + printf(" Total RHS evals: %li\n", nfi + udata->nnlfi); + printf(" Total number of error test failures = %li\n", netf); + printf(" Total number of nonlinear solver convergence failures = %li\n", + ncnf); + } + + /* Clean up */ + CVodeFree(&cvode_mem); + SUNNonlinSolFree(NLS); + + /* Return success */ + return(0); +} diff --git a/benchmarks/advection_reaction_3D/raja/ida_driver.cpp b/benchmarks/advection_reaction_3D/raja/ida_driver.cpp new file mode 100644 index 0000000000..3ae28a43ca --- /dev/null +++ b/benchmarks/advection_reaction_3D/raja/ida_driver.cpp @@ -0,0 +1,195 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ---------------------------------------------------------------------------*/ + +#include "ida/ida.h" +#include "sunlinsol/sunlinsol_spgmr.h" +#include "sunnonlinsol/sunnonlinsol_newton.h" +#include "sunnonlinsol/sunnonlinsol_fixedpoint.h" +#include "advection_reaction_3D.hpp" +#include "rhs3D.hpp" + + +/* Initial condition function */ +int SetICDot(N_Vector y, N_Vector yp, UserData* udata) +{ + int retval; + + retval = AdvectionReaction(0, y, yp, (void*)udata); + if (check_retval(&retval, "AdvectionReaction", 1, udata->myid)) return 1; + + /* Return success */ + return(0); +} + + +/* Setup IDA and evolve problem in time with BDF method */ +int EvolveDAEProblem(N_Vector y, UserData* udata, UserOptions* uopt) +{ + void* ida_mem = NULL; /* empty IDA memory structure */ + SUNNonlinearSolver NLS = NULL; /* empty nonlinear solver structure */ + SUNLinearSolver LS = NULL; /* empty linear solver structure */ + N_Vector yp = NULL; /* empty vector structure */ + + realtype t, dtout, tout; /* current/output time data */ + int retval; /* reusable error-checking flag */ + int iout; /* output counter */ + long int nst, netf; /* step stats */ + long int nfi; /* RHS stats */ + long int nni, ncnf; /* nonlinear solver stats */ + long int nli, npsol; /* linear solver stats */ + + /* Additively split methods should not add the advection and reaction terms */ + udata->add_reactions = true; + + /* Create ydot' vector */ + yp = N_VClone(y); + if (check_retval((void*)yp, "N_VClone", 0, udata->myid)) return 1; + + /* Create IDA */ + ida_mem = IDACreate(udata->ctx); + if (check_retval((void*)ida_mem, "IDACreate", 0, udata->myid)) return 1; + + /* Initialize IDA */ + retval = IDAInit(ida_mem, AdvectionReactionResidual, uopt->t0, y, yp); + if (check_retval(&retval, "IDAInit", 1, udata->myid)) return 1; + + /* Attach user data */ + retval = IDASetUserData(ida_mem, (void*) udata); + if (check_retval(&retval, "IDASetUserData*", 1, udata->myid)) return 1; + + /* Specify tolerances */ + retval = IDASStolerances(ida_mem, uopt->rtol, uopt->atol); + if (check_retval(&retval, "IDASStolerances", 1, udata->myid)) return 1; + + /* Increase the max number of steps allowed between outputs */ + retval = IDASetMaxNumSteps(ida_mem, 100000); + if (check_retval(&retval, "IDASetMaxNumSteps", 1, udata->myid)) return 1; + + /* Increase the max number of ETF allowed between outputs */ + retval = IDASetMaxErrTestFails(ida_mem, 25); + if (check_retval(&retval, "IDASetMaxErrTestFails", 1, udata->myid)) return 1; + + /* Create the (non)linear solver */ + if (uopt->nls == "newton") + { + /* Create nonlinear solver */ + NLS = SUNNonlinSol_Newton(y, udata->ctx); + if (check_retval((void *)NLS, "SUNNonlinSol_Newton", 0, udata->myid)) return 1; + + /* Attach nonlinear solver */ + retval = IDASetNonlinearSolver(ida_mem, NLS); + if (check_retval(&retval, "IDASetNonlinearSolver", 1, udata->myid)) return 1; + + /* Create linear solver */ + LS = uopt->precond ? SUNLinSol_SPGMR(y, PREC_LEFT, 0, udata->ctx) : SUNLinSol_SPGMR(y, PREC_NONE, 0, udata->ctx); + if (check_retval((void *)LS, "SUNLinSol_SPGMR", 0, udata->myid)) return 1; + + /* Attach linear solver */ + retval = IDASetLinearSolver(ida_mem, LS, NULL); + if (check_retval(&retval, "IDASetLinearSolver", 1, udata->myid)) return 1; + + // /* Attach preconditioner */ + retval = IDASetPreconditioner(ida_mem, NULL, PSolveRes); + if (check_retval(&retval, "IDASetPreconditioner", 1, udata->myid)) return 1; + } + else + { + fprintf(stderr, "\nERROR: IDA method is not compatible with the nls option provided\n"); + return 1; + } + + /* Set ydot' initial condition */ + retval = SetICDot(y, yp, udata); + if (check_retval(&retval, "SetICDot", 1, udata->myid)) return 1; + + /* Output initial condition */ + if (uopt->nout > 0) + { + if (udata->myid == 0) + { + printf("\n t ||u||_rms ||v||_rms ||w||_rms\n"); + printf(" ----------------------------------------------------\n"); + } + WriteOutput(uopt->t0, y, udata, uopt); + } + + /* Integrate to final time */ + t = uopt->t0; + dtout = (uopt->tf - uopt->t0); + if (uopt->nout != 0) + dtout /= uopt->nout; + tout = t + dtout; + iout = 0; + + do + { + /* Integrate to output time */ + retval = IDASolve(ida_mem, tout, &t, y, yp, IDA_NORMAL); + if (check_retval(&retval, "IDA", 1, udata->myid)) break; + + /* Output state */ + if(uopt->nout > 0) WriteOutput(t, y, udata, uopt); + + /* Update output time */ + tout += dtout; + tout = (tout > uopt->tf) ? uopt->tf : tout; + + iout++; + } while (iout < uopt->nout); + + /* Get final statistics */ + retval = IDAGetNumSteps(ida_mem, &nst); + check_retval(&retval, "IDAGetNumSteps", 1, udata->myid); + retval = IDAGetNumResEvals(ida_mem, &nfi); + check_retval(&retval, "IDAGetNumResEvals", 1, udata->myid); + retval = IDAGetNumErrTestFails(ida_mem, &netf); + check_retval(&retval, "IDAGetNumErrTestFails", 1, udata->myid); + retval = IDAGetNumNonlinSolvIters(ida_mem, &nni); + check_retval(&retval, "IDAGetNumNonlinSolvIters", 1, udata->myid); + retval = IDAGetNumNonlinSolvConvFails(ida_mem, &ncnf); + check_retval(&retval, "IDAGetNumNonlinSolvConvFails", 1, udata->myid); + if (uopt->nls == "newton") + { + retval = IDAGetNumLinIters(ida_mem, &nli); + check_retval(&retval, "IDAGetNumLinIters", 1, udata->myid); + retval = IDAGetNumPrecSolves(ida_mem, &npsol); + check_retval(&retval, "IDAGetNumPrecSolves", 1, udata->myid); + } + + /* Print final statistics */ + if (udata->myid == 0) + { + printf("\nFinal Solver Statistics (for processor 0):\n"); + printf(" Internal solver steps = %li\n", nst); + printf(" Total RHS evals: %li\n", nfi + udata->nnlfi); + printf(" Total number of error test failures = %li\n", netf); + printf(" Total number of nonlinear solver convergence failures = %li\n", + ncnf); + printf(" Total number of nonlinear iterations = %li\n", nni); + if (uopt->nls == "newton") + { + printf(" Total number of linear iterations = %li\n", nli); + printf(" Total number of preconditioner solves = %li\n", npsol); + } + } + + /* Clean up */ + IDAFree(&ida_mem); + if (yp) N_VDestroy(yp); + if (NLS) SUNNonlinSolFree(NLS); + if (LS) SUNLinSolFree(LS); + + /* Return success */ + return(0); +} diff --git a/benchmarks/advection_reaction_3D/raja/rhs3D.hpp b/benchmarks/advection_reaction_3D/raja/rhs3D.hpp new file mode 100644 index 0000000000..1bb2b6f105 --- /dev/null +++ b/benchmarks/advection_reaction_3D/raja/rhs3D.hpp @@ -0,0 +1,598 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): David J. Gardner, Cody J. Balos @ LLNL + * Daniel R. Reynolds @ SMU + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * -----------------------------------------------------------------------------*/ + +#ifndef ADVECTION_REACTION_3D_RHS_HPP +#define ADVECTION_REACTION_3D_RHS_HPP + +#include "advection_reaction_3D.hpp" + +/* -------------------------------------------------------------- + * Right hand side (RHS) and residual functions + * --------------------------------------------------------------*/ + +/* Compute the advection term f(t,y) = -c (grad * y). This is done using + upwind 1st order finite differences. */ +static int Advection(realtype t, N_Vector y, N_Vector ydot, void* user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* set variable shortcuts */ + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + const realtype c = udata->c; + const realtype cx = -c / udata->grid->dx; + const realtype cy = -c / udata->grid->dy; + const realtype cz = -c / udata->grid->dz; + + /* local variables */ + int retval; + + /* fill send buffers and begin exchanging boundary information */ + SUNDIALS_MARK_BEGIN(udata->prof, "Neighbor Exchange"); + retval = FillSendBuffers(y, udata); + if (check_retval(&retval, "FillSendBuffers", 1, udata->myid)) + return(-1); + retval = udata->grid->ExchangeStart(); + if (check_retval(&retval, "ExchangeStart", 1, udata->myid)) + return(-1); + SUNDIALS_MARK_END(udata->prof, "Neighbor Exchange"); + + /* set output to zero */ + N_VConst(0.0, ydot); + + /* create views of the state and RHS vectors */ + RAJA::View > Yview(GetVecData(y), nxl, nyl, nzl, dof); + RAJA::View > dYview(GetVecData(ydot), nxl, nyl, nzl, dof); + + /* iterate over domain interior, computing advection */ + if (c > 0.0) + { + /* flow moving in the positive x,y,z direction */ + auto range = RAJA::make_tuple(RAJA::RangeSegment(1, nxl), + RAJA::RangeSegment(1, nyl), + RAJA::RangeSegment(1, nzl)); + RAJA::kernel(range, + [=] DEVICE_FUNC (int i, int j, int k) { + const realtype u_ijk = Yview(i,j,k,0); + const realtype v_ijk = Yview(i,j,k,1); + const realtype w_ijk = Yview(i,j,k,2); + + // grad * u + dYview(i,j,k,0) = cz * (u_ijk - Yview(i,j,k-1,0)); // du/dz + dYview(i,j,k,0) += cy * (u_ijk - Yview(i,j-1,k,0)); // du/dy + dYview(i,j,k,0) += cx * (u_ijk - Yview(i-1,j,k,0)); // du/dx + + // grad * v + dYview(i,j,k,1) = cz * (v_ijk - Yview(i,j,k-1,1)); // dv/dz + dYview(i,j,k,1) += cy * (v_ijk - Yview(i,j-1,k,1)); // dv/dy + dYview(i,j,k,1) += cx * (v_ijk - Yview(i-1,j,k,1)); // dv/dx + + // grad * w + dYview(i,j,k,2) = cz * (w_ijk - Yview(i,j,k-1,2)); // dw/dz + dYview(i,j,k,2) += cy * (w_ijk - Yview(i,j-1,k,2)); // dw/dy + dYview(i,j,k,2) += cx * (w_ijk - Yview(i-1,j,k,2)); // dw/dx + }); + + } + else if (c < 0.0) + { + /* flow moving in the negative x,y,z direction */ + auto range = RAJA::make_tuple(RAJA::RangeSegment(0, nxl-1), + RAJA::RangeSegment(0, nyl-1), + RAJA::RangeSegment(0, nzl-1)); + RAJA::kernel(range, + [=] DEVICE_FUNC (int i, int j, int k) { + const realtype u_ijk = Yview(i,j,k,0); + const realtype v_ijk = Yview(i,j,k,1); + const realtype w_ijk = Yview(i,j,k,2); + + // grad * u + dYview(i,j,k,0) = cz * (Yview(i,j,k+1,0) - u_ijk); // du/dz + dYview(i,j,k,0) += cy * (Yview(i,j+1,k,0) - u_ijk); // du/dy + dYview(i,j,k,0) += cx * (Yview(i+1,j,k,0) - u_ijk); // du/dx + + // grad * v + dYview(i,j,k,1) = cz * (Yview(i,j,k+1,1) - v_ijk); // dv/dz + dYview(i,j,k,1) += cy * (Yview(i,j+1,k,1) - v_ijk); // dv/dy + dYview(i,j,k,1) += cx * (Yview(i+1,j,k,1) - v_ijk); // dv/dx + + // grad * w + dYview(i,j,k,2) = cz * (Yview(i,j,k+1,2) - w_ijk); // dw/dz + dYview(i,j,k,2) += cy * (Yview(i,j+1,k,2) - w_ijk); // dw/dy + dYview(i,j,k,2) += cx * (Yview(i+1,j,k,2) - w_ijk); // dw/dx + }); + + } + + /* finish exchanging boundary information */ + SUNDIALS_MARK_BEGIN(udata->prof, "Neighbor Exchange"); + retval = udata->grid->ExchangeEnd(); + if (check_retval(&retval, "ExchangeEnd", 1, udata->myid)) + return(-1); + SUNDIALS_MARK_END(udata->prof, "Neighbor Exchange"); + + + /* compute advection at process boundaries */ + if (c > 0.0) + { + /* Flow moving in the positive x,y,z direction: + * boundaries are west face, south face, back face */ + + /* Perform calculations on each "lower" face */ + RAJA::View> + Wrecv(udata->grid->getRecvBuffer("WEST"), nyl, nzl, dof); + RAJA::View> + Srecv(udata->grid->getRecvBuffer("SOUTH"), nxl, nzl, dof); + RAJA::View> + Brecv(udata->grid->getRecvBuffer("BACK"), nxl, nyl, dof); + + auto west_face = RAJA::make_tuple(RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(west_face, + [=] DEVICE_FUNC (int j, int k, int l) { + const int i = 0; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YSouth = (j > 0) ? Yview(i,j-1,k,l) : Srecv(i,k,l); + const realtype YBack = (k > 0) ? Yview(i,j,k-1,l) : Brecv(i,j,l); + dYview(i,j,k,l) = cx * (Yijkl - Wrecv(j,k,l)); // d/dx + dYview(i,j,k,l) += cy * (Yijkl - YSouth); // d/dy + dYview(i,j,k,l) += cz * (Yijkl - YBack); // d/dz + }); + + auto south_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(south_face, + [=] DEVICE_FUNC (int i, int k, int l) { + const int j = 0; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YWest = (i > 0) ? Yview(i-1,j,k,l) : Wrecv(j,k,l); + const realtype YBack = (k > 0) ? Yview(i,j,k-1,l) : Brecv(i,j,l); + dYview(i,j,k,l) = cx * (Yijkl - YWest); // d/dx + dYview(i,j,k,l) += cy * (Yijkl - Srecv(i,k,l)); // d/dy + dYview(i,j,k,l) += cz * (Yijkl - YBack); // d/dz + }); + + auto back_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(back_face, + [=] DEVICE_FUNC (int i, int j, int l) { + const int k = 0; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YWest = (i > 0) ? Yview(i-1,j,k,l) : Wrecv(j,k,l); + const realtype YSouth = (j > 0) ? Yview(i,j-1,k,l) : Srecv(i,k,l); + dYview(i,j,k,l) = cx * (Yijkl - YWest); // d/dx + dYview(i,j,k,l) += cy * (Yijkl - YSouth); // d/dy + dYview(i,j,k,l) += cz * (Yijkl - Brecv(i,j,l)); // d/dz + }); + + } + else if (c < 0.0) + { + + /* Flow moving in the negative x,y,z direction: + * boundaries are east face, north face, and front face */ + + /* Perform calculations on each "upper" face */ + RAJA::View > + Erecv(udata->grid->getRecvBuffer("EAST"), nyl, nzl, dof); + RAJA::View > + Nrecv(udata->grid->getRecvBuffer("NORTH"), nxl, nzl, dof); + RAJA::View > + Frecv(udata->grid->getRecvBuffer("FRONT"), nxl, nyl, dof); + + auto east_face = RAJA::make_tuple(RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(east_face, + [=] DEVICE_FUNC (int j, int k, int l) { + const int i = nxl-1; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YNorth = (j < nyl-1) ? Yview(i,j+1,k,l) : Nrecv(i,k,l); + const realtype YFront = (k < nzl-1) ? Yview(i,j,k+1,l) : Frecv(i,j,l); + dYview(i,j,k,l) = cx * (Erecv(j,k,l) - Yijkl); // d/dx + dYview(i,j,k,l) += cy * (YNorth - Yijkl); // d/dy + dYview(i,j,k,l) += cz * (YFront - Yijkl); // d/dz + }); + + auto north_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nzl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(north_face, + [=] DEVICE_FUNC (int i, int k, int l) { + const int j = nyl-1; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YEast = (i < nxl-1) ? Yview(i+1,j,k,l) : Erecv(j,k,l); + const realtype YFront = (k < nzl-1) ? Yview(i,j,k+1,l) : Frecv(i,j,l); + dYview(i,j,k,l) = cx * (YEast - Yijkl); // d/dx + dYview(i,j,k,l) += cy * (Nrecv(i,k,l) - Yijkl); // d/dy + dYview(i,j,k,l) += cz * (YFront - Yijkl); // d/dz + }); + + auto front_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, dof)); + RAJA::kernel(front_face, + [=] DEVICE_FUNC (int i, int j, int l) { + const int k = nzl-1; + const realtype Yijkl = Yview(i,j,k,l); + const realtype YEast = (i < nxl-1) ? Yview(i+1,j,k,l) : Erecv(j,k,l); + const realtype YNorth = (j < nyl-1) ? Yview(i,j+1,k,l) : Nrecv(i,k,l); + dYview(i,j,k,l) = cx * (YEast - Yijkl); // d/dx + dYview(i,j,k,l) += cy * (YNorth - Yijkl); // d/dy + dYview(i,j,k,l) += cz * (Frecv(i,j,l) - Yijkl); // d/dz + }); + } + + /* return success */ + return(0); +} + + +/* Compute the reaction term g(t,y). */ +static int Reaction(realtype t, N_Vector y, N_Vector ydot, void* user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* set variable shortcuts */ + const realtype A = udata->A; + const realtype B = udata->B; + const realtype k1 = udata->k1; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const realtype k5 = udata->k5; + const realtype k6 = udata->k6; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const int dof = udata->grid->dof; + + /* Zero output if not adding reactions to existing RHS */ + if (!udata->add_reactions) + N_VConst(0.0, ydot); + + /* access data arrays */ + realtype* Ydata = NULL; + Ydata = GetVecData(y); + if (check_retval((void *)Ydata, "GetVecData", 0, udata->myid)) + return(-1); + realtype* dYdata = NULL; + dYdata = GetVecData(ydot); + if (check_retval((void *)dYdata, "GetVecData", 0, udata->myid)) + return(-1); + + /* create 4D views of state and RHS vectors */ + RAJA::View > Yview(GetVecData(y), nxl, nyl, nzl, dof); + RAJA::View > dYview(GetVecData(ydot), nxl, nyl, nzl, dof); + + /* add reaction terms to RHS */ + auto range = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl)); + RAJA::kernel(range, + [=] DEVICE_FUNC (int i, int j, int k) { + const realtype u = Yview(i,j,k,0); + const realtype v = Yview(i,j,k,1); + const realtype w = Yview(i,j,k,2); + dYview(i,j,k,0) += k1 * A - k2 * w * u + k3 * u * u * v - k4 * u; + dYview(i,j,k,1) += k2 * w * u - k3 * u * u * v; + dYview(i,j,k,2) += -k2 * w * u + k5 * B - k6 * w; + }); + + /* return success */ + return(0); +} + + +/* Compute the RHS as h(t,y) = f(t,y) + g(t,y). */ +static int AdvectionReaction(realtype t, N_Vector y, N_Vector ydot, + void *user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + int retval; + + /* NOTE: The order in which Advection and Reaction are called + is critical here. Advection must be computed first. */ + retval = Advection(t, y, ydot, user_data); + if (check_retval((void *)&retval, "Advection", 1, udata->myid)) return(-1); + + retval = Reaction(t, y, ydot, user_data); + if (check_retval((void *)&retval, "Reaction", 1, udata->myid)) return(-1); + + /* return success */ + return(0); +} + +/* Compute the residual F(t,y,y') = ydot - h(t,y) = 0. */ +static int AdvectionReactionResidual(realtype t, N_Vector y, N_Vector ydot, + N_Vector F, void *user_data) +{ + /* access problem data */ + UserData* udata = (UserData*) user_data; + int retval; + + /* NOTE: The order in which Advection and Reaction are called + is critical here. Advection must be computed first. */ + retval = Advection(t, y, F, user_data); /* F = -c y_x */ + if (check_retval((void *)&retval, "Advection", 1, udata->myid)) return(-1); + + retval = Reaction(t, y, F, user_data); /* F = -c y_x + g(t,y) */ + if (check_retval((void *)&retval, "Reaction", 1, udata->myid)) return(-1); + + /* F = ydot - h(t,y) = ydot + c y_x - g(t,y) */ + N_VLinearSum(1.0, ydot, -1.0, F, F); + + /* return success */ + return(0); +} + +/* -------------------------------------------------------------- + * Linear system and Jacobian functions + * --------------------------------------------------------------*/ + +/* Solve the linear systems Ax = b where A = I - gamma*dg/dy. + When using a fully implicit method, we are approximating + dh/dy as dg/dy. */ +static int SolveReactionLinSys(N_Vector y, N_Vector x, N_Vector b, + realtype gamma, UserData* udata) +{ + /* set variable shortcuts */ + const int dof = udata->grid->dof; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const realtype k6 = udata->k6; + + /* create 4D views of state, RHS and solution vectors */ + RAJA::View> Yview(GetVecData(y), nxl, nyl, nzl, dof); + RAJA::View> Bview(GetVecData(b), nxl, nyl, nzl, dof); + RAJA::View> Xview(GetVecData(x), nxl, nyl, nzl, dof); + + /* solve reaction linear system */ + auto blocks = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl)); + RAJA::kernel(blocks, + [=] DEVICE_FUNC (int i, int j, int k) { + + /* shortcuts to u, v, w for the block */ + const realtype u = Yview(i,j,k,0); + const realtype v = Yview(i,j,k,1); + const realtype w = Yview(i,j,k,2); + + // + // compute J = dg/dy + // + + /* 1st row: u, v, w */ + realtype A0 = -k2 * w + 2.0 * k3 * u * v - k4; + realtype A1 = k3 * u * u; + realtype A2 = -k2 * u; + + /* 2nd row: u, v, w */ + realtype A3 = k2 * w - 2.0 * k3 * u * v; + realtype A4 = -k3 * u * u; + realtype A5 = k2 * u; + + /* 3rd row: u, v, w */ + realtype A6 = -k2 * w; + realtype A7 = 0.0; + realtype A8 = -k2 * u - k6; + + // + // compute A = I - gamma*J + // + + A0 = 1. - (gamma * A0); + A1 = -gamma * A1; + A2 = -gamma * A2; + A3 = -gamma * A3; + A4 = 1. - (gamma * A4); + A5 = -gamma * A5; + A6 = -gamma * A6; + A7 = -gamma * A7; + A8 = 1. - (gamma * A8); + + // + // compute x = A^{-1}b + // + + realtype scratch_0 = A4*A8; + realtype scratch_1 = A1*A5; + realtype scratch_2 = A2*A7; + realtype scratch_3 = A5*A7; + realtype scratch_4 = A1*A8; + realtype scratch_5 = A2*A4; + realtype scratch_6 = 1.0/(A0*scratch_0 - A0*scratch_3 + A3*scratch_2 - A3*scratch_4 + A6*scratch_1 - A6*scratch_5); + realtype scratch_7 = A2*A3; + realtype scratch_8 = A6*Bview(i,j,k,0); + realtype scratch_9 = A2*A6; + realtype scratch_10 = A3*Bview(i,j,k,0); + realtype scratch_11 = 1.0/A0; + realtype scratch_12 = A1*scratch_11; + realtype scratch_13 = (-A6*scratch_12 + A7)/(-A3*scratch_12 + A4); + + Xview(i,j,k,0) = scratch_6*( Bview(i,j,k,0)*(scratch_0 - scratch_3) + + Bview(i,j,k,1)*(scratch_2 - scratch_4) + + Bview(i,j,k,2)*(scratch_1 - scratch_5)); + Xview(i,j,k,1) = scratch_6*( Bview(i,j,k,2)*(scratch_7 - A0*A5) + + Bview(i,j,k,1)*(A0*A8 - scratch_9) + + A5*scratch_8 - A8*scratch_10 ); + Xview(i,j,k,2) = ( -Bview(i,j,k,2) + scratch_11*scratch_8 + + scratch_13*(Bview(i,j,k,1) - scratch_10*scratch_11)) / + (-A8 + scratch_11*scratch_9 + scratch_13*(A5 - scratch_11*scratch_7)); + + }); + + return(0); +} + +/* Solve the linear systems Ax = b where A = -dg/dy + gamma. + We are approximating dh/dy as dg/dy. */ +static int SolveReactionLinSysRes(N_Vector y, N_Vector x, N_Vector b, + realtype gamma, UserData* udata) +{ + /* set variable shortcuts */ + const int dof = udata->grid->dof; + const int nxl = udata->grid->nxl; + const int nyl = udata->grid->nyl; + const int nzl = udata->grid->nzl; + const realtype k2 = udata->k2; + const realtype k3 = udata->k3; + const realtype k4 = udata->k4; + const realtype k6 = udata->k6; + + /* create 4D views of state, RHS and solution vectors */ + RAJA::View> Yview(GetVecData(y), nxl, nyl, nzl, dof); + RAJA::View> Bview(GetVecData(b), nxl, nyl, nzl, dof); + RAJA::View> Xview(GetVecData(x), nxl, nyl, nzl, dof); + + /* solve reaction linear system */ + auto blocks = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), + RAJA::RangeSegment(0, nyl), + RAJA::RangeSegment(0, nzl)); + RAJA::kernel(blocks, + [=] DEVICE_FUNC (int i, int j, int k) { + + /* shortcuts to u, v, w for the block */ + const realtype u = Yview(i,j,k,0); + const realtype v = Yview(i,j,k,1); + const realtype w = Yview(i,j,k,2); + + // + // compute dg/dy + // + + /* 1st row: u, v, w */ + realtype A0 = -k2 * w + 2.0 * k3 * u * v - k4; + realtype A1 = k3 * u * u; + realtype A2 = -k2 * u; + + /* 2nd row: u, v, w */ + realtype A3 = k2 * w - 2.0 * k3 * u * v; + realtype A4 = -k3 * u * u; + realtype A5 = k2 * u; + + /* 3rd row: u, v, w */ + realtype A6 = -k2 * w; + realtype A7 = 0.0; + realtype A8 = -k2 * u - k6; + + // + // compute A = -dg/dy + gamma*diag(df/dydot) + // where diag(df/dydot) is approximated as + // diag([udot, vdot, wdot]) + // + + A0 = -A0 + gamma; + A1 = -A1; + A2 = -A2; + A3 = -A3; + A4 = -A4 + gamma; + A5 = -A5; + A6 = -A6; + A7 = -A7; + A8 = -A8 + gamma; + + // + // compute x = A^{-1}b + // + + realtype scratch_0 = A4*A8; + realtype scratch_1 = A1*A5; + realtype scratch_2 = A2*A7; + realtype scratch_3 = A5*A7; + realtype scratch_4 = A1*A8; + realtype scratch_5 = A2*A4; + realtype scratch_6 = 1.0/(A0*scratch_0 - A0*scratch_3 + A3*scratch_2 - A3*scratch_4 + A6*scratch_1 - A6*scratch_5); + realtype scratch_7 = A2*A3; + realtype scratch_8 = A6*Bview(i,j,k,0); + realtype scratch_9 = A2*A6; + realtype scratch_10 = A3*Bview(i,j,k,0); + realtype scratch_11 = 1.0/A0; + realtype scratch_12 = A1*scratch_11; + realtype scratch_13 = (-A6*scratch_12 + A7)/(-A3*scratch_12 + A4); + + Xview(i,j,k,0) = scratch_6*( Bview(i,j,k,0)*(scratch_0 - scratch_3) + + Bview(i,j,k,1)*(scratch_2 - scratch_4) + + Bview(i,j,k,2)*(scratch_1 - scratch_5)); + Xview(i,j,k,1) = scratch_6*( Bview(i,j,k,2)*(scratch_7 - A0*A5) + + Bview(i,j,k,1)*(A0*A8 - scratch_9) + + A5*scratch_8 - A8*scratch_10 ); + Xview(i,j,k,2) = ( -Bview(i,j,k,2) + scratch_11*scratch_8 + + scratch_13*(Bview(i,j,k,1) - scratch_10*scratch_11)) / + (-A8 + scratch_11*scratch_9 + scratch_13*(A5 - scratch_11*scratch_7)); + + }); + + return(0); +} + + +/* -------------------------------------------------------------- + * Preconditioner functions + * --------------------------------------------------------------*/ + +/* Solves Pz = r where P = I - gamma * dg/dy */ +static int PSolve(realtype t, N_Vector y, N_Vector ydot, N_Vector r, + N_Vector z, realtype gamma, realtype delta, int lr, + void *user_data) +{ + /* local variables */ + UserData* udata = (UserData*) user_data; + int retval; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* solve the task-local linear system Pz = r */ + retval = SolveReactionLinSys(y, z, r, gamma, udata); + + return(retval); +} + +/* Solves Pz = r where P = -dg/dy + gamma */ +static int PSolveRes(realtype t, N_Vector y, N_Vector ydot, N_Vector F, + N_Vector r, N_Vector z, realtype cj, realtype delta, + void *user_data) +{ + /* local variables */ + UserData* udata = (UserData*) user_data; + int retval; + + SUNDIALS_CXX_MARK_FUNCTION(udata->prof); + + /* solve the task-local linear system Pz = r */ + retval = SolveReactionLinSysRes(y, z, r, cj, udata); + + return(retval); +} + + +#endif diff --git a/benchmarks/advection_reaction_3D/rhs3D.hpp b/benchmarks/advection_reaction_3D/rhs3D.hpp deleted file mode 100644 index 874e5cb8bb..0000000000 --- a/benchmarks/advection_reaction_3D/rhs3D.hpp +++ /dev/null @@ -1,700 +0,0 @@ -/* ----------------------------------------------------------------------------- - * Programmer(s): David J. Gardner, Cody J. Balos @ LLNL - * ----------------------------------------------------------------------------- - * SUNDIALS Copyright Start - * Copyright (c) 2002-2023, Lawrence Livermore National Security - * and Southern Methodist University. - * All rights reserved. - * - * See the top-level LICENSE and NOTICE files for details. - * - * SPDX-License-Identifier: BSD-3-Clause - * SUNDIALS Copyright End - * -----------------------------------------------------------------------------*/ - -#ifndef ADVECTION_REACTION_3D_RHS_HPP -#define ADVECTION_REACTION_3D_RHS_HPP - -#include "advection_reaction_3D.hpp" - -using raja_xyz_tuple = camp::tuple; - -/* -------------------------------------------------------------- - * Right hand side (RHS) and residual functions - * --------------------------------------------------------------*/ - -/* Compute the advection term f(t,y) = -c (grad * y). This is done using - upwind 1st order finite differences. */ -static int Advection(realtype t, N_Vector y, N_Vector ydot, void* user_data) -{ - /* access problem data */ - UserData* udata = (UserData*) user_data; - - SUNDIALS_CXX_MARK_FUNCTION(udata->prof); - - /* set variable shortcuts */ - const int nxl = udata->grid->nxl; - const int nyl = udata->grid->nyl; - const int nzl = udata->grid->nzl; - const int dof = udata->grid->dof; - const realtype c = udata->c; - const realtype cx = -c / udata->grid->dx; - const realtype cy = -c / udata->grid->dy; - const realtype cz = -c / udata->grid->dz; - - /* local variables */ - int retval; - - /* begin exchanging boundary information */ - if (udata->grid->nprocs() > 1) - { - retval = ExchangeAllStart(y, udata); - if (check_retval(&retval, "ExchangeAllStart", 1, udata->myid)) - return(-1); - } - - /* set output to zero */ - N_VConst(0.0, ydot); - - /* create views of the data */ - RAJA::View > Yview(GetVecData(y), - nxl, nyl, nzl, dof); - RAJA::View > dYview(GetVecData(ydot), - nxl, nyl, nzl, dof); - - /* iterate over domain interior, computing advection */ - if (c > 0.0) - { - /* flow moving in the positive x,y,z direction */ - auto range = RAJA::make_tuple(RAJA::RangeSegment(1, nxl), - RAJA::RangeSegment(1, nyl), - RAJA::RangeSegment(1, nzl)); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u_ijk = Yview(i,j,k,0); - const realtype v_ijk = Yview(i,j,k,1); - const realtype w_ijk = Yview(i,j,k,2); - - // grad * u - dYview(i,j,k,0) = cz * (u_ijk - Yview(i,j,k-1,0)); // du/dz - dYview(i,j,k,0) += cy * (u_ijk - Yview(i,j-1,k,0)); // du/dy - dYview(i,j,k,0) += cx * (u_ijk - Yview(i-1,j,k,0)); // du/dx - - // grad * v - dYview(i,j,k,1) = cz * (v_ijk - Yview(i,j,k-1,1)); // dv/dz - dYview(i,j,k,1) += cy * (v_ijk - Yview(i,j-1,k,1)); // dv/dy - dYview(i,j,k,1) += cx * (v_ijk - Yview(i-1,j,k,1)); // dv/dx - - // grad * w - dYview(i,j,k,2) = cz * (w_ijk - Yview(i,j,k-1,2)); // dw/dz - dYview(i,j,k,2) += cy * (w_ijk - Yview(i,j-1,k,2)); // dw/dy - dYview(i,j,k,2) += cx * (w_ijk - Yview(i-1,j,k,2)); // dw/dx - }); - } - else if (c < 0.0) - { - /* flow moving in the negative x,y,z direction */ - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, nxl-1), - RAJA::RangeSegment(0, nyl-1), - RAJA::RangeSegment(0, nzl-1)); - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u_ijk = Yview(i,j,k,0); - const realtype v_ijk = Yview(i,j,k,1); - const realtype w_ijk = Yview(i,j,k,2); - - // grad * u - dYview(i,j,k,0) = cz * (u_ijk - Yview(i,j,k+1,0)); // du/dz - dYview(i,j,k,0) += cy * (u_ijk - Yview(i,j+1,k,0)); // du/dy - dYview(i,j,k,0) += cx * (u_ijk - Yview(i+1,j,k,0)); // du/dx - - // grad * v - dYview(i,j,k,1) = cz * (v_ijk - Yview(i,j,k+1,1)); // dv/dz - dYview(i,j,k,1) += cy * (v_ijk - Yview(i,j+1,k,1)); // dv/dy - dYview(i,j,k,1) += cx * (v_ijk - Yview(i+1,j,k,1)); // dv/dx - - // grad * w - dYview(i,j,k,2) = cz * (w_ijk - Yview(i,j,k+1,2)); // dw/dz - dYview(i,j,k,2) += cy * (w_ijk - Yview(i,j+1,k,2)); // dw/dy - dYview(i,j,k,2) += cx * (w_ijk - Yview(i+1,j,k,2)); // dw/dx - }); - } - - /* finish exchanging boundary information */ - if (udata->grid->nprocs() > 1) - { - retval = ExchangeAllEnd(udata); - if (check_retval(&retval, "ExchangeAllEnd", 1, udata->myid)) - return(-1); - } - - /* compute advection at process boundaries */ - if (c > 0.0) - { - if (udata->grid->npx > 1) - { - /* Flow moving in the positive x,y,z direction: - * boundaries are west face, south face, front face */ - - RAJA::View > - Yim1jk(udata->grid->getRecvBuffer("WEST"), nyl, nzl, dof); // Wrecv should have data that was sent from East - - auto west_face = RAJA::make_tuple(RAJA::RangeSegment(0, nyl), - RAJA::RangeSegment(0, nzl), - RAJA::RangeSegment(0, dof)); - - RAJA::kernel(west_face, - [=] DEVICE_FUNC (int j, int k, int l) { - dYview(0,j,k,l) += cx * (Yview(0,j,k,l) - Yim1jk(j,k,l)); // d/dx - }); - } - else - { - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, 1), - RAJA::RangeSegment(0, 1), - RAJA::RangeSegment(0, 1)); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u_ijk = Yview(i,j,k,0); - const realtype v_ijk = Yview(i,j,k,1); - const realtype w_ijk = Yview(i,j,k,2); - - dYview(i,j,k,0) = cx * (u_ijk - Yview(nxl-1,j,k,0)); // du/dx - dYview(i,j,k,1) = cx * (v_ijk - Yview(nxl-1,j,k,1)); // dv/dx - dYview(i,j,k,2) = cx * (w_ijk - Yview(nxl-1,j,k,2)); // dw/dx - }); - - } - - if (udata->grid->npy > 1) - { - RAJA::View > - Yijm1k(udata->grid->getRecvBuffer("SOUTH"), nxl, nzl, dof); // Nrecv should have data that was sent from North - - auto south_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), - RAJA::RangeSegment(0, nzl), - RAJA::RangeSegment(0, dof)); - - RAJA::kernel(south_face, - [=] DEVICE_FUNC (int i, int k, int l) { - dYview(i,0,k,l) += cy * (Yview(i,0,k,l) - Yijm1k(i,k,l)); // d/dy - }); - } - else - { - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, 1), - RAJA::RangeSegment(0, 1), - RAJA::RangeSegment(0, 1)); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u_ijk = Yview(i,j,k,0); - const realtype v_ijk = Yview(i,j,k,1); - const realtype w_ijk = Yview(i,j,k,2); - - dYview(i,j,k,0) += cy * (u_ijk - Yview(i,nyl-1,k,0)); // du/dy - dYview(i,j,k,1) += cy * (v_ijk - Yview(i,nyl-1,k,1)); // dv/dy - dYview(i,j,k,2) += cy * (w_ijk - Yview(i,nyl-1,k,2)); // dw/dy - }); - } - - if (udata->grid->npz > 1) - { - RAJA::View > - Yijkm1(udata->grid->getRecvBuffer("FRONT"), nxl, nyl, dof); // Frecv should have data that was sent from Back - - auto front_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl), - RAJA::RangeSegment(0, nyl), - RAJA::RangeSegment(0, dof)); - - RAJA::kernel(front_face, - [=] DEVICE_FUNC (int i, int j, int l) { - dYview(i,j,0,l) += cz * (Yview(i,j,0,l) - Yijkm1(i,j,l)); // d/dz - }); - - } - else - { - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, 1), - RAJA::RangeSegment(0, 1), - RAJA::RangeSegment(0, 1)); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u_ijk = Yview(i,j,k,0); - const realtype v_ijk = Yview(i,j,k,1); - const realtype w_ijk = Yview(i,j,k,2); - - dYview(i,j,k,0) += cz * (u_ijk - Yview(i,j,nzl-1,0)); // du/dz - dYview(i,j,k,1) += cz * (v_ijk - Yview(i,j,nzl-1,1)); // dv/dz - dYview(i,j,k,2) += cz * (w_ijk - Yview(i,j,nzl-1,2)); // dw/dz - }); - } - } - else if (c < 0.0) - { - if (udata->grid->nprocs() != 1) - { - /* Flow moving in the negative x,y,z direction: - * boundaries are west face, south face, and front face */ - - RAJA::View > - Yip1jk(udata->grid->getRecvBuffer("EAST"), nyl, nzl, dof); - RAJA::View > - Yijp1k(udata->grid->getRecvBuffer("NORTH"), nxl, nzl, dof); - RAJA::View > - Yijkp1(udata->grid->getRecvBuffer("BACK"), nxl, nyl, dof); - - auto front_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl-1), - RAJA::RangeSegment(0, nyl-1), - RAJA::RangeSegment(0, dof)); - RAJA::kernel(front_face, - [=] DEVICE_FUNC (int i, int j, int l) { - dYview(i,j,0,l) = cz * (Yview(i,j,0,l) - Yijkp1(i,nzl+1,l)); // d/dz - dYview(i,j,0,l) += cy * (Yview(i,j,0,l) - Yijp1k(0,j+1,l)); // d/dy - dYview(i,j,0,l) += cx * (Yview(i,j,0,l) - Yip1jk(i+1,0,l)); // d/dx - }); - - auto south_face = RAJA::make_tuple(RAJA::RangeSegment(0, nxl-1), - RAJA::RangeSegment(0, nzl-1), - RAJA::RangeSegment(0, dof)); - RAJA::kernel(south_face, - [=] DEVICE_FUNC (int i, int k, int l) { - dYview(i,0,k,l) = cz * (Yview(i,0,k,l) - Yijkp1(i,k+1,l)); // d/dz - dYview(i,0,k,l) += cy * (Yview(i,0,k,l) - Yijp1k(0,nyl+1,l)); // d/dy - dYview(i,0,k,l) += cx * (Yview(i,0,k,l) - Yip1jk(i+1,0,l)); // d/dx - }); - - auto east_face = RAJA::make_tuple(RAJA::RangeSegment(0, nyl-1), - RAJA::RangeSegment(0, nzl-1), - RAJA::RangeSegment(0, dof)); - RAJA::kernel(east_face, - [=] DEVICE_FUNC (int j, int k, int l) { - dYview(0,j,k,l) = cz * (Yview(0,j,k,l) - Yijkp1(0,k+1,l)); // d/dz - dYview(0,j,k,l) += cy * (Yview(0,j,k,l) - Yijp1k(0,j+1,l)); // d/dy - dYview(0,j,k,l) += cx * (Yview(0,j,k,l) - Yip1jk(nxl+1,0,l)); // d/dx - }); - } - else - { - auto range = RAJA::make_tuple(RAJA::RangeSegment(nxl-2, nxl), - RAJA::RangeSegment(nyl-2, nyl), - RAJA::RangeSegment(nzl-2, nzl)); - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u_ijk = Yview(i,j,k,0); - const realtype v_ijk = Yview(i,j,k,1); - const realtype w_ijk = Yview(i,j,k,2); - - // grad * u - dYview(i,j,k,0) = cz * (u_ijk - Yview(i,j,0,0)); // du/dz - dYview(i,j,k,0) += cy * (u_ijk - Yview(i,0,k,0)); // du/dy - dYview(i,j,k,0) += cx * (u_ijk - Yview(0,j,k,0)); // du/dx - - // grad * v - dYview(i,j,k,1) = cz * (v_ijk - Yview(i,j,0,1)); // dv/dz - dYview(i,j,k,1) += cy * (v_ijk - Yview(i,0,k,1)); // dv/dy - dYview(i,j,k,1) += cx * (v_ijk - Yview(0,j,k,1)); // dv/dx - - // grad * w - dYview(i,j,k,2) = cz * (w_ijk - Yview(i,j,0,2)); // dw/dz - dYview(i,j,k,2) += cy * (w_ijk - Yview(i,0,k,2)); // dw/dy - dYview(i,j,k,2) += cx * (w_ijk - Yview(0,j,k,2)); // dw/dx - }); - } - } - - /* return success */ - return(0); -} - - -/* Compute the reaction term g(t,y). */ -static int Reaction(realtype t, N_Vector y, N_Vector ydot, void* user_data) -{ - /* access problem data */ - UserData* udata = (UserData*) user_data; - - SUNDIALS_CXX_MARK_FUNCTION(udata->prof); - - /* set variable shortcuts */ - const realtype A = udata->A; - const realtype B = udata->B; - const realtype k1 = udata->k1; - const realtype k2 = udata->k2; - const realtype k3 = udata->k3; - const realtype k4 = udata->k4; - const realtype k5 = udata->k5; - const realtype k6 = udata->k6; - - /* local variables */ - realtype* Ydata = NULL; - realtype* dYdata = NULL; - - /* access data arrays */ - Ydata = GetVecData(y); - if (check_retval((void *)Ydata, "GetVecData", 0, udata->myid)) - return(-1); - - dYdata = GetVecData(ydot); - if (check_retval((void *)dYdata, "GetVecData", 0, udata->myid)) - return(-1); - - RAJA::View > Yview(GetVecData(y), - udata->grid->nxl, - udata->grid->nyl, - udata->grid->nzl, - udata->grid->dof); - - RAJA::View > dYview(GetVecData(ydot), - udata->grid->nxl, - udata->grid->nyl, - udata->grid->nzl, - udata->grid->dof); - - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl), - RAJA::RangeSegment(0, udata->grid->nyl), - RAJA::RangeSegment(0, udata->grid->nzl)); - - /* iterate over domain, computing reactions */ - if (udata->add_reactions) - { - /* when we are not additively splitting the rhs, we add to ydot - as we expect it to hold the advection term already */ - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u = Yview(i,j,k,0); - const realtype v = Yview(i,j,k,1); - const realtype w = Yview(i,j,k,2); - dYview(i,j,k,0) += k1 * A - k2 * w * u + k3 * u * u * v - k4 * u; - dYview(i,j,k,1) += k2 * w * u - k3 * u * u * v; - dYview(i,j,k,2) += -k2 * w * u + k5 * B - k6 * w; - }); - } - else - { - /* set output to zero */ - N_VConst(0.0, ydot); - - RAJA::kernel(range, - [=] DEVICE_FUNC (int i, int j, int k) { - const realtype u = Yview(i,j,k,0); - const realtype v = Yview(i,j,k,1); - const realtype w = Yview(i,j,k,2); - dYview(i,j,k,0) = k1 * A - k2 * w * u + k3 * u * u * v - k4 * u; - dYview(i,j,k,1) = k2 * w * u - k3 * u * u * v; - dYview(i,j,k,2) = -k2 * w * u + k5 * B - k6 * w; - }); - } - - /* return success */ - return(0); -} - - -/* Compute the RHS as h(t,y) = f(t,y) + g(t,y). */ -static int AdvectionReaction(realtype t, N_Vector y, N_Vector ydot, - void *user_data) -{ - /* access problem data */ - UserData* udata = (UserData*) user_data; - int retval; - - /* NOTE: The order in which Advection and Reaction are - called is critical here. Advection must be - computed first. */ - retval = Advection(t, y, ydot, user_data); - if (check_retval((void *)&retval, "Advection", 1, udata->myid)) return(-1); - - retval = Reaction(t, y, ydot, user_data); - if (check_retval((void *)&retval, "Reaction", 1, udata->myid)) return(-1); - - /* return success */ - return(0); -} - -/* Compute the residual F(t,y,y') = ydot - h(t,y) = 0. */ -static int AdvectionReactionResidual(realtype t, N_Vector y, N_Vector ydot, - N_Vector F, void *user_data) -{ - /* access problem data */ - UserData* udata = (UserData*) user_data; - int retval; - - /* NOTE: The order in which Advection and Reaction are - called is critical here. Advection must be - computed first. */ - retval = Advection(t, y, F, user_data); /* F = -c y_x */ - if (check_retval((void *)&retval, "Advection", 1, udata->myid)) return(-1); - - retval = Reaction(t, y, F, user_data); /* F = -c y_x + g(t,y) */ - if (check_retval((void *)&retval, "Reaction", 1, udata->myid)) return(-1); - - /* F = ydot - h(t,y) = ydot + c y_x - g(t,y) */ - N_VLinearSum(1.0, ydot, -1.0, F, F); - - /* return success */ - return(0); -} - -/* -------------------------------------------------------------- - * Linear system and Jacobian functions - * --------------------------------------------------------------*/ - -/* Solve the linear systems Ax = b where A = I - gamma*dg/dy. - When using a fully implicit method, we are approximating - dh/dy as dg/dy. */ -static int SolveReactionLinSys(N_Vector y, N_Vector x, N_Vector b, - realtype gamma, raja_xyz_tuple blocks, - UserData* udata) -{ - /* shortcuts */ - int dof, nxl, nyl, nzl; - realtype k2, k3, k4, k6; - - /* set shortcuts */ - dof = udata->grid->dof; - nxl = udata->grid->nxl; - nyl = udata->grid->nyl; - nzl = udata->grid->nzl; - k2 = udata->k2; - k3 = udata->k3; - k4 = udata->k4; - k6 = udata->k6; - - /* create views of the data */ - RAJA::View > Yview(GetVecData(y), - nxl, nyl, nzl, dof); - RAJA::View > Bview(GetVecData(b), - nxl, nyl, nzl, dof); - RAJA::View > Xview(GetVecData(x), - nxl, nyl, nzl, dof); - - RAJA::kernel(blocks, - [=] DEVICE_FUNC (int i, int j, int k) { - - /* and the corresponding vectors */ - realtype *b = &(Bview(i,j,k,0)); - realtype *x = &(Xview(i,j,k,0)); - - /* shortcuts to u, v, w for the block */ - realtype u = Yview(i,j,k,0); - realtype v = Yview(i,j,k,1); - realtype w = Yview(i,j,k,2); - - realtype A0, A1, A2, A3, A4, A5, A6, A7, A8; - - // - // compute J = dg/dy - // - - /* 1st row: u, v, w */ - A0 = -k2 * w + 2.0 * k3 * u * v - k4; - A1 = k3 * u * u; - A2 = -k2 * u; - - /* 2nd row: u, v, w */ - A3 = k2 * w - 2.0 * k3 * u * v; - A4 = -k3 * u * u; - A5 = k2 * u; - - /* 3rd row: u, v, w */ - A6 = -k2 * w; - A7 = 0.0; - A8 = -k2 * u - k6; - - // - // compute A = I - gamma*J - // - - A0 = 1. - (gamma * A0); - A1 = -gamma * A1; - A2 = -gamma * A2; - A3 = -gamma * A3; - A4 = 1. - (gamma * A4); - A5 = -gamma * A5; - A6 = -gamma * A6; - A7 = -gamma * A7; - A8 = 1. - (gamma * A8); - - // - // compute x = A^{-1}b - // - - realtype scratch_0 = A4*A8; - realtype scratch_1 = A1*A5; - realtype scratch_2 = A2*A7; - realtype scratch_3 = A5*A7; - realtype scratch_4 = A1*A8; - realtype scratch_5 = A2*A4; - realtype scratch_6 = 1.0/(A0*scratch_0 - A0*scratch_3 + A3*scratch_2 - A3*scratch_4 + A6*scratch_1 - A6*scratch_5); - realtype scratch_7 = A2*A3; - realtype scratch_8 = A6*b[0]; - realtype scratch_9 = A2*A6; - realtype scratch_10 = A3*b[0]; - realtype scratch_11 = 1.0/A0; - realtype scratch_12 = A1*scratch_11; - realtype scratch_13 = (-A6*scratch_12 + A7)/(-A3*scratch_12 + A4); - - x[0] = scratch_6*(b[0]*scratch_0 - b[0]*scratch_3 + b[1]*scratch_2 - b[1]*scratch_4 + b[2]*scratch_1 - b[2]*scratch_5); - x[1] = scratch_6*(-A0*A5*b[2] + A0*A8*b[1] + A5*scratch_8 - A8*scratch_10 - b[1]*scratch_9 + b[2]*scratch_7); - x[2] = (-b[2] + scratch_11*scratch_8 + scratch_13*(b[1] - scratch_10*scratch_11))/(-A8 + scratch_11*scratch_9 + scratch_13*(A5 - scratch_11*scratch_7)); - }); - - return(0); -} - -/* Solve the linear systems Ax = b where A = -dg/dy + gamma. - We are approximating dh/dy as dg/dy. */ -static int SolveReactionLinSysRes(N_Vector y, N_Vector x, N_Vector b, - realtype gamma, raja_xyz_tuple blocks, - UserData* udata) -{ - /* shortcuts */ - int dof, nxl, nyl, nzl; - realtype k2, k3, k4, k6; - - /* set shortcuts */ - dof = udata->grid->dof; - nxl = udata->grid->nxl; - nyl = udata->grid->nyl; - nzl = udata->grid->nzl; - k2 = udata->k2; - k3 = udata->k3; - k4 = udata->k4; - k6 = udata->k6; - - /* create views of the data */ - RAJA::View > Yview(GetVecData(y), - nxl, nyl, nzl, dof); - RAJA::View > Bview(GetVecData(b), - nxl, nyl, nzl, dof); - RAJA::View > Xview(GetVecData(x), - nxl, nyl, nzl, dof); - - RAJA::kernel(blocks, - [=] DEVICE_FUNC (int i, int j, int k) { - - /* and the corresponding vectors */ - realtype *b = &(Bview(i,j,k,0)); - realtype *x = &(Xview(i,j,k,0)); - - /* shortcuts to u, v, w for the block */ - realtype u = Yview(i,j,k,0); - realtype v = Yview(i,j,k,1); - realtype w = Yview(i,j,k,2); - - realtype A0, A1, A2, A3, A4, A5, A6, A7, A8; - - // - // compute dg/dy - // - - /* 1st row: u, v, w */ - A0 = -k2 * w + 2.0 * k3 * u * v - k4; - A1 = k3 * u * u; - A2 = -k2 * u; - - /* 2nd row: u, v, w */ - A3 = k2 * w - 2.0 * k3 * u * v; - A4 = -k3 * u * u; - A5 = k2 * u; - - /* 3rd row: u, v, w */ - A6 = -k2 * w; - A7 = 0.0; - A8 = -k2 * u - k6; - - // - // compute A = -dg/dy + gamma*diag(df/dydot) - // where diag(df/dydot) is approximated as - // diag([udot, vdot, wdot]) - // - - A0 = -A0 + gamma; - A1 = -A1; - A2 = -A2; - A3 = -A3; - A4 = -A4 + gamma; - A5 = -A5; - A6 = -A6; - A7 = -A7; - A8 = -A8 + gamma; - - // - // compute x = A^{-1}b - // - - realtype scratch_0 = A4*A8; - realtype scratch_1 = A1*A5; - realtype scratch_2 = A2*A7; - realtype scratch_3 = A5*A7; - realtype scratch_4 = A1*A8; - realtype scratch_5 = A2*A4; - realtype scratch_6 = 1.0/(A0*scratch_0 - A0*scratch_3 + A3*scratch_2 - A3*scratch_4 + A6*scratch_1 - A6*scratch_5); - realtype scratch_7 = A2*A3; - realtype scratch_8 = A6*b[0]; - realtype scratch_9 = A2*A6; - realtype scratch_10 = A3*b[0]; - realtype scratch_11 = 1.0/A0; - realtype scratch_12 = A1*scratch_11; - realtype scratch_13 = (-A6*scratch_12 + A7)/(-A3*scratch_12 + A4); - - x[0] = scratch_6*(b[0]*scratch_0 - b[0]*scratch_3 + b[1]*scratch_2 - b[1]*scratch_4 + b[2]*scratch_1 - b[2]*scratch_5); - x[1] = scratch_6*(-A0*A5*b[2] + A0*A8*b[1] + A5*scratch_8 - A8*scratch_10 - b[1]*scratch_9 + b[2]*scratch_7); - x[2] = (-b[2] + scratch_11*scratch_8 + scratch_13*(b[1] - scratch_10*scratch_11))/(-A8 + scratch_11*scratch_9 + scratch_13*(A5 - scratch_11*scratch_7)); - }); - - return(0); -} - - -/* -------------------------------------------------------------- - * Preconditioner functions - * --------------------------------------------------------------*/ - -/* Solves Pz = r where P = I - gamma * dg/dy */ -static int PSolve(realtype t, N_Vector y, N_Vector ydot, N_Vector r, - N_Vector z, realtype gamma, realtype delta, int lr, - - void *user_data) -{ - /* local variables */ - UserData* udata = (UserData*) user_data; - int retval; - - SUNDIALS_CXX_MARK_FUNCTION(udata->prof); - - /* solve the task-local linear system Pz = r */ - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl), - RAJA::RangeSegment(0, udata->grid->nyl), - RAJA::RangeSegment(0, udata->grid->nzl)); - retval = SolveReactionLinSys(y, z, r, gamma, range, udata); - - return(retval); -} - -/* Solves Pz = r where P = -dg/dy + gamma */ -static int PSolveRes(realtype t, N_Vector y, N_Vector ydot, N_Vector F, - N_Vector r, N_Vector z, realtype cj, realtype delta, - void *user_data) -{ - /* local variables */ - UserData* udata = (UserData*) user_data; - int retval; - - SUNDIALS_CXX_MARK_FUNCTION(udata->prof); - - /* solve the task-local linear system Pz = r */ - auto range = RAJA::make_tuple(RAJA::RangeSegment(0, udata->grid->nxl), - RAJA::RangeSegment(0, udata->grid->nyl), - RAJA::RangeSegment(0, udata->grid->nzl)); - retval = SolveReactionLinSysRes(y, z, r, cj, range, udata); - - return(retval); -} - - -#endif diff --git a/benchmarks/advection_reaction_3D/scripts/make_plots.py b/benchmarks/advection_reaction_3D/scripts/make_plots.py new file mode 100755 index 0000000000..7728562510 --- /dev/null +++ b/benchmarks/advection_reaction_3D/scripts/make_plots.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python +# ------------------------------------------------------------------------------ +# Programmer(s): Daniel R. Reynolds @ SMU +# ------------------------------------------------------------------------------ +# SUNDIALS Copyright Start +# Copyright (c) 2002-2023, Lawrence Livermore National Security +# and Southern Methodist University. +# All rights reserved. +# +# See the top-level LICENSE and NOTICE files for details. +# +# SPDX-License-Identifier: BSD-3-Clause +# SUNDIALS Copyright End +# ------------------------------------------------------------------------------ +# matplotlib-based plotting script for the advection_reaction_3D benchmark codes +# ------------------------------------------------------------------------------ + +# imports +from os.path import exists +import numpy as np +import matplotlib.pyplot as plt + +# ------------------------------------------------------------------------------ + +# utility functions +def parallel_coords(rank): + if (rank == 0): + return [0, 0, 0] + if (rank == 1): + return [0, 0, 1] + if (rank == 2): + return [0, 1, 0] + if (rank == 3): + return [0, 1, 1] + if (rank == 4): + return [1, 0, 0] + if (rank == 5): + return [1, 0, 1] + if (rank == 6): + return [1, 1, 0] + if (rank == 7): + return [1, 1, 1] + +def xslice(u,it,ix): + return u[it,ix,:,:] + +def yslice(u,it,iy): + return u[it,:,iy,:] + +def zslice(u,it,iz): + return u[it,:,:,iz] + +def xproj(u,it): + return np.average(u[it,:,:,:], axis=0) + +def yproj(u,it): + return np.average(u[it,:,:,:], axis=1) + +def zproj(u,it): + return np.average(u[it,:,:,:], axis=2) + +def myplot(axis, X, Y, Z, xlabel='none', ylabel='none'): + frame = axis.contourf(X, Y, Z) + plt.colorbar(frame, ax=axis) + if (xlabel != 'none'): + axis.set_xlabel(xlabel) + if (ylabel != 'none'): + axis.set_ylabel(ylabel) + + + +# read time mesh +times = np.loadtxt("t.000000.txt") +nt = times.size + +# read spatial mesh +mesh = np.loadtxt("mesh.txt", dtype=float) +x = mesh[0,:] +y = mesh[1,:] +z = mesh[2,:] +nx = x.size +ny = y.size +nz = z.size + +# ensure that the run used exactly 1 or 8 MPI ranks +for i in range(9): + if (exists("u.00000" + str(i) + ".txt" ) and + not exists("u.00000" + str(i+1) + ".txt" )): + nprocs = i+1 +if ((nprocs != 1) and (nprocs != 8)): + print("make_plots.py error: run must have used either 1 or 8 MPI ranks") + exit() + +# load data for run +if (nprocs == 1): + u = np.zeros((nt,nx,ny,nz), dtype=float) + v = np.zeros((nt,nx,ny,nz), dtype=float) + w = np.zeros((nt,nx,ny,nz), dtype=float) + udata = np.loadtxt("u.000000.txt") + vdata = np.loadtxt("v.000000.txt") + wdata = np.loadtxt("w.000000.txt") + if (nt != udata.shape[0]): + print("make_plots.py error: mesh and data have incompatible sizes") + exit() + if (nx*ny*nz != udata.shape[1]): + print("make_plots.py error: mesh and data have incompatible sizes") + exit() + for it in range(nt): + u[it,:,:,:] = np.reshape(udata[it,:], (nx,ny,nz), order='C') + v[it,:,:,:] = np.reshape(vdata[it,:], (nx,ny,nz), order='C') + w[it,:,:,:] = np.reshape(wdata[it,:], (nx,ny,nz), order='C') +else: + u = np.zeros((nt,nx,ny,nz), dtype=float) + v = np.zeros((nt,nx,ny,nz), dtype=float) + w = np.zeros((nt,nx,ny,nz), dtype=float) + nxl = nx//2 + nyl = ny//2 + nzl = nz//2 + for ip in range(8): + udata = np.loadtxt("u.00000" + str(ip) + ".txt") + vdata = np.loadtxt("v.00000" + str(ip) + ".txt") + wdata = np.loadtxt("w.00000" + str(ip) + ".txt") + if (nt != udata.shape[0]): + print("make_plots.py error: mesh and data have incompatible sizes") + exit() + if (nxl*nyl*nzl != udata.shape[1]): + print("make_plots.py error: mesh and data have incompatible sizes") + exit() + coords = parallel_coords(ip) + ilo = coords[0]*nxl + ihi = (coords[0]+1)*nxl + jlo = coords[1]*nyl + jhi = (coords[1]+1)*nyl + klo = coords[2]*nzl + khi = (coords[2]+1)*nzl + for it in range(nt): + u[it,ilo:ihi,jlo:jhi,klo:khi] = np.reshape(udata[it,:], (nxl,nyl,nzl), order='C') + v[it,ilo:ihi,jlo:jhi,klo:khi] = np.reshape(vdata[it,:], (nxl,nyl,nzl), order='C') + w[it,ilo:ihi,jlo:jhi,klo:khi] = np.reshape(wdata[it,:], (nxl,nyl,nzl), order='C') + + +# set meshgrid objects +xy0,xy1 = np.meshgrid(x, y) +yz0,yz1 = np.meshgrid(y, z) +xz0,xz1 = np.meshgrid(x, z) + +# generate plots +sliceidx = 25 +tslice = [0, 5, 10] +figsize = (9,7) + +# xy slices at various times +plt.figure(1) +fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True, figsize=figsize) +myplot(ax1, xy0, xy1, zslice(u,tslice[0],sliceidx), ylabel = 'u') +myplot(ax2, xy0, xy1, zslice(u,tslice[1],sliceidx)) +myplot(ax3, xy0, xy1, zslice(u,tslice[2],sliceidx)) +myplot(ax4, xy0, xy1, zslice(v,tslice[0],sliceidx), ylabel = 'v') +myplot(ax5, xy0, xy1, zslice(v,tslice[1],sliceidx)) +myplot(ax6, xy0, xy1, zslice(v,tslice[2],sliceidx)) +myplot(ax7, xy0, xy1, zslice(w,tslice[0],sliceidx), ylabel = 'w', xlabel = 't = ' + str(times[0])) +myplot(ax8, xy0, xy1, zslice(w,tslice[1],sliceidx), xlabel = 't = ' + str(times[1])) +myplot(ax9, xy0, xy1, zslice(w,tslice[2],sliceidx), xlabel = 't = ' + str(times[2])) +plt.savefig('xy-slices.png') + +# yz slices at various times +plt.figure(2) +fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True, figsize=figsize) +myplot(ax1, yz0, yz1, xslice(u,tslice[0],sliceidx), ylabel = 'u') +myplot(ax2, yz0, yz1, xslice(u,tslice[1],sliceidx)) +myplot(ax3, yz0, yz1, xslice(u,tslice[2],sliceidx)) +myplot(ax4, yz0, yz1, xslice(v,tslice[0],sliceidx), ylabel = 'v') +myplot(ax5, yz0, yz1, xslice(v,tslice[1],sliceidx)) +myplot(ax6, yz0, yz1, xslice(v,tslice[2],sliceidx)) +myplot(ax7, yz0, yz1, xslice(w,tslice[0],sliceidx), ylabel = 'w', xlabel = 't = ' + str(times[0])) +myplot(ax8, yz0, yz1, xslice(w,tslice[1],sliceidx), xlabel = 't = ' + str(times[1])) +myplot(ax9, yz0, yz1, xslice(w,tslice[2],sliceidx), xlabel = 't = ' + str(times[2])) +plt.savefig('yz-slices.png') + +# xz slices at various times +plt.figure(3) +fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True, figsize=figsize) +myplot(ax1, xz0, xz1, yslice(u,tslice[0],sliceidx), ylabel ='u') +myplot(ax2, xz0, xz1, yslice(u,tslice[1],sliceidx)) +myplot(ax3, xz0, xz1, yslice(u,tslice[2],sliceidx)) +myplot(ax4, xz0, xz1, yslice(v,tslice[0],sliceidx), ylabel = 'v') +myplot(ax5, xz0, xz1, yslice(v,tslice[1],sliceidx)) +myplot(ax6, xz0, xz1, yslice(v,tslice[2],sliceidx)) +myplot(ax7, xz0, xz1, yslice(w,tslice[0],sliceidx), ylabel= 'w', xlabel = 't = ' + str(times[0])) +myplot(ax8, xz0, xz1, yslice(w,tslice[1],sliceidx), xlabel ='t = ' + str(times[1])) +myplot(ax9, xz0, xz1, yslice(w,tslice[2],sliceidx), xlabel = 't = ' + str(times[2])) +plt.savefig('xz-slices.png') + +# xy projection at various times +plt.figure(4) +fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True, figsize=figsize) +myplot(ax1, xy0, xy1, zproj(u,tslice[0]), ylabel = 'u') +myplot(ax2, xy0, xy1, zproj(u,tslice[1])) +myplot(ax3, xy0, xy1, zproj(u,tslice[2])) +myplot(ax4, xy0, xy1, zproj(v,tslice[0]), ylabel = 'v') +myplot(ax5, xy0, xy1, zproj(v,tslice[1])) +myplot(ax6, xy0, xy1, zproj(v,tslice[2])) +myplot(ax7, xy0, xy1, zproj(w,tslice[0]), ylabel = 'w', xlabel = 't = ' + str(times[0])) +myplot(ax8, xy0, xy1, zproj(w,tslice[1]), xlabel = 't = ' + str(times[1])) +myplot(ax9, xy0, xy1, zproj(w,tslice[2]), xlabel = 't = ' + str(times[2])) +plt.savefig('xy-projections.png') + +# yz projection at various times +fig = plt.figure(5) +fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True, figsize=figsize) +myplot(ax1, yz0, yz1, xproj(u,tslice[0]), ylabel = 'u') +myplot(ax2, yz0, yz1, xproj(u,tslice[1])) +myplot(ax3, yz0, yz1, xproj(u,tslice[2])) +myplot(ax4, yz0, yz1, xproj(v,tslice[0]), ylabel = 'v') +myplot(ax5, yz0, yz1, xproj(v,tslice[1])) +myplot(ax6, yz0, yz1, xproj(v,tslice[2])) +myplot(ax7, yz0, yz1, xproj(w,tslice[0]), ylabel = 'w', xlabel = 't = ' + str(times[0])) +myplot(ax8, yz0, yz1, xproj(w,tslice[1]), xlabel = 't = ' + str(times[1])) +myplot(ax9, yz0, yz1, xproj(w,tslice[2]), xlabel = 't = ' + str(times[2])) +plt.savefig('yz-projections.png') + +# xz projection at various times +fig = plt.figure(6) +fig, ((ax1,ax2,ax3), (ax4,ax5,ax6), (ax7,ax8,ax9)) = plt.subplots(3, 3, sharex=True, sharey=True, figsize=figsize) +myplot(ax1, xz0, xz1, yproj(u,tslice[0]), ylabel = 'u') +myplot(ax2, xz0, xz1, yproj(u,tslice[1])) +myplot(ax3, xz0, xz1, yproj(u,tslice[2])) +myplot(ax4, xz0, xz1, yproj(v,tslice[0]), ylabel = 'v') +myplot(ax5, xz0, xz1, yproj(v,tslice[1])) +myplot(ax6, xz0, xz1, yproj(v,tslice[2])) +myplot(ax7, xz0, xz1, yproj(w,tslice[0]), ylabel = 'w', xlabel = 't = ' + str(times[0])) +myplot(ax8, xz0, xz1, yproj(w,tslice[1]), xlabel = 't = ' + str(times[1])) +myplot(ax9, xz0, xz1, yproj(w,tslice[2]), xlabel = 't = ' + str(times[2])) +plt.savefig('xz-projections.png') + +#plt.show() +plt.close() + +##### end of script ##### diff --git a/cmake/SundialsTPLOptions.cmake b/cmake/SundialsTPLOptions.cmake index f01a0ac14d..11e39d0f99 100644 --- a/cmake/SundialsTPLOptions.cmake +++ b/cmake/SundialsTPLOptions.cmake @@ -61,6 +61,11 @@ sundials_option(ENABLE_HIP BOOL "Enable HIP support" OFF) # ------------------------------------------------------------- sundials_option(ENABLE_SYCL BOOL "Enable SYCL support" OFF) +sundials_option(SUNDIALS_SYCL_2020_UNSUPPORTED BOOL + "Disable the use of some SYCL 2020 features in SUNDIALS libraries and examples" OFF + DEPENDS_ON ENABLE_SYCL + ADVANCED) + # --------------------------------------------------------------- # Enable LAPACK support? # --------------------------------------------------------------- @@ -288,6 +293,16 @@ sundials_option(ONEMKL_WORKS BOOL "Set to ON to force CMake to accept a given on DEPENDS_ON ENABLE_ONEMKL ADVANCED) +sundials_option(SUNDIALS_ONEMKL_USE_GETRF_LOOP BOOL + "Replace batched getrf call with loop over getrf" OFF + DEPENDS_ON ENABLE_ONEMKL + ADVANCED) + +sundials_option(SUNDIALS_ONEMKL_USE_GETRS_LOOP BOOL + "Replace batched getrs call with loop over getrs" OFF + DEPENDS_ON ENABLE_ONEMKL + ADVANCED) + # --------------------------------------------------------------- # Enable Caliper support? # --------------------------------------------------------------- diff --git a/cmake/tpl/SundialsONEMKL.cmake b/cmake/tpl/SundialsONEMKL.cmake index 34177ff0fe..a807a2e7f4 100644 --- a/cmake/tpl/SundialsONEMKL.cmake +++ b/cmake/tpl/SundialsONEMKL.cmake @@ -64,6 +64,7 @@ find_package(MKL CONFIG NO_DEFAULT_PATH REQUIRED) +message(STATUS "MKL Version: ${MKL_VERSION}") message(STATUS "MKL Targets: ${MKL_IMPORTED_TARGETS}") # ----------------------------------------------------------------------------- diff --git a/doc/arkode/guide/source/Butcher.rst b/doc/arkode/guide/source/Butcher.rst index 5bfdd6169c..6666a4f7dd 100644 --- a/doc/arkode/guide/source/Butcher.rst +++ b/doc/arkode/guide/source/Butcher.rst @@ -180,6 +180,41 @@ This is the default 2nd order explicit method. region is outlined in blue; the embedding's region is in red. +.. _Butcher.ARK2_ERK: + +ARK2-ERK-3-1-2 +^^^^^^^^^^^^^^ + +.. index:: ARK2-ERK-3-1-2 + +Accessible via the constant ``ARKODE_ARK2_ERK_3_1_2`` to +:c:func:`ARKStepSetTableNum()`, :c:func:`ERKStepSetTableNum()` or +:c:func:`ARKodeButcherTable_LoadERK()`. +Accessible via the string ``"ARKODE_ARK2_ERK_3_1_2"`` to +:c:func:`ARKStepSetTableName()`, :c:func:`ERKStepSetTableName()` or +:c:func:`ARKodeButcherTable_LoadERKByName()`. +This is the explicit portion of the default 2nd order additive method (the +explicit portion of the ARK2 method from :cite:p:`giraldo2013implicit`). + +.. math:: + + \renewcommand{\arraystretch}{1.5} + \begin{array}{r|ccc} + 0 & 0 & 0 & 0 \\ + 2 - \sqrt{2} & 2 - \sqrt{2} & 0 & 0 \\ + 1 & 1 - \frac{3 + 2\sqrt{2}}{6} & \frac{3 + 2\sqrt{2}}{6} & 0 \\ + \hline + 2 & \frac{1}{2\sqrt{2}} & \frac{1}{2\sqrt{2}} & 1 - \frac{1}{\sqrt{2}} \\ + 1 & \frac{4 - \sqrt{2}}{8} & \frac{4 - \sqrt{2}}{8} & \frac{1}{2\sqrt{2}} \\ + \end{array} + +.. figure:: /figs/arkode/ark2_erk_stab_region.png + :scale: 65 % + :align: center + + Linear stability region for the ARK2-ERK method. The method's + region is outlined in blue; the embedding's region is in red. + .. _Butcher.Bogacki_Shampine: @@ -816,6 +851,41 @@ are A- and B-stable. region is outlined in blue; the embedding's region is in red. +.. _Butcher.ARK2_DIRK: + +ARK2-DIRK-3-1-2 +^^^^^^^^^^^^^^^ + +.. index:: ARK2-DIRK-3-1-2 + +Accessible via the constant ``ARKODE_ARK2_DIRK_3_1_2`` to +:c:func:`ARKStepSetTableNum()`, or +:c:func:`ARKodeButcherTable_LoadDIRK()`. +Accessible via the string ``"ARKODE_ARK2_DIRK_3_1_2"`` to +:c:func:`ARKStepSetTableName()`, or +:c:func:`ARKodeButcherTable_LoadDIRKByName()`. +This is the implicit portion of the default 2nd order additive method (the +implicit portion of the ARK2 method from :cite:p:`giraldo2013implicit`). + +.. math:: + + \renewcommand{\arraystretch}{1.5} + \begin{array}{r|ccc} + 0 & 0 & 0 & 0 \\ + 2 - \sqrt{2} & 1 - \frac{1}{\sqrt{2}} & 1 - \frac{1}{\sqrt{2}} & 0 \\ + 1 & \frac{1}{2\sqrt{2}} & \frac{1}{2\sqrt{2}} & 1 - \frac{1}{\sqrt{2}} \\ + \hline + 2 & \frac{1}{2\sqrt{2}} & \frac{1}{2\sqrt{2}} & 1 - \frac{1}{\sqrt{2}} \\ + 1 & \frac{4 - \sqrt{2}}{8} & \frac{4 - \sqrt{2}}{8} & \frac{1}{2\sqrt{2}} \\ + \end{array} + +.. figure:: /figs/arkode/ark2_dirk_stab_region.png + :scale: 65 % + :align: center + + Linear stability region for the ARK2-DIRK method. The method's + region is outlined in blue; the embedding's region is in red. + .. _Butcher.Billington: @@ -1590,10 +1660,16 @@ Additive Butcher tables --------------------------- In the category of additive Runge--Kutta methods for split implicit and -explicit calculations, ARKODE includes methods that have orders 3 -through 5, with embeddings that are of orders 2 through 4. These +explicit calculations, ARKODE includes methods that have orders 2 +through 5, with embeddings that are of orders 1 through 4. These Butcher table pairs are as follows: +* :index:`2nd-order pair `: + :numref:`Butcher.ARK2_ERK` with :numref:`Butcher.ARK2_DIRK`, + corresponding to Butcher tables ``ARKODE_ARK2_ERK_3_1_2`` and + ``ARKODE_ARK2_DIRK_3_1_2`` for :c:func:`ARKStepSetTableNum()` + or :c:func:`ARKStepSetTableName()`. + * :index:`3rd-order pair `: :numref:`Butcher.ARK_4_2_3_E` with :numref:`Butcher.ARK_4_2_3_I`, corresponding to Butcher tables ``ARKODE_ARK324L2SA_ERK_4_2_3`` and diff --git a/doc/arkode/guide/source/Introduction.rst b/doc/arkode/guide/source/Introduction.rst index 10174ccd8c..4906c5814f 100644 --- a/doc/arkode/guide/source/Introduction.rst +++ b/doc/arkode/guide/source/Introduction.rst @@ -118,6 +118,25 @@ provided with SUNDIALS, or again may utilize a user-supplied module. Changes from previous versions ============================== +Changes in v5.6.0 +----------------- + +Added the second order IMEX method from :cite:p:`giraldo2013implicit` as the +default second order IMEX method in ARKStep. The explicit table is given by +``ARKODE_ARK2_ERK_3_1_2`` (see :numref:`Butcher.ARK2_ERK`) and the implicit +table by ``ARKODE_ARK2_DIRK_3_1_2`` (see :numref:`Butcher.ARK2_DIRK`). + +Updated the F2003 utility routines :c:func:`SUNDIALSFileOpen` and :c:func:`SUNDIALSFileClose` +to support user specification of ``stdout`` and ``stderr`` strings for the output +file names. + +Updated the default ARKODE behavior when returning the solution when +the internal time has reached a user-specified stop time. Previously, the output +solution was interpolated to the value of ``tstop``; the default is now to copy the +internal solution vector. Users who wish to revert to interpolation may call a new +routine :c:func:`ARKStepSetInterpolateStopTime`, +:c:func:`ERKStepSetInterpolateStopTime`, or :c:func:`MRIStepSetInterpolateStopTime`. + Changes in v5.5.1 ----------------- diff --git a/doc/arkode/guide/source/Usage/ARKStep_c_interface/User_callable.rst b/doc/arkode/guide/source/Usage/ARKStep_c_interface/User_callable.rst index cd4acec555..4713160b05 100644 --- a/doc/arkode/guide/source/Usage/ARKStep_c_interface/User_callable.rst +++ b/doc/arkode/guide/source/Usage/ARKStep_c_interface/User_callable.rst @@ -876,6 +876,7 @@ Maximum no. of internal steps before *tout* :c:func:`ARKStepSetMaxNumSteps Maximum absolute step size :c:func:`ARKStepSetMaxStep` :math:`\infty` Minimum absolute step size :c:func:`ARKStepSetMinStep` 0.0 Set a value for :math:`t_{stop}` :c:func:`ARKStepSetStopTime` undefined +Interpolate at :math:`t_{stop}` :c:func:`ARKStepSetInterpolateStopTime` ``SUNFALSE`` Disable the stop time :c:func:`ARKStepClearStopTime` N/A Supply a pointer for user data :c:func:`ARKStepSetUserData` ``NULL`` Maximum no. of ARKStep error test failures :c:func:`ARKStepSetMaxErrTestFails` 7 @@ -1267,6 +1268,23 @@ Set max number of constraint failures :c:func:`ARKStepSetMaxNumConst :c:func:`ARKStepClearStopTime`. +.. c:function:: int ARKStepSetInterpolateStopTime(void* arkode_mem, booleantype interp) + + Specifies that the output solution should be interpolated when the current + :math:`t` equals the specified ``tstop`` (instead of merely copying the + internal solution :math:`y_n`). + + **Arguments:** + * *arkode_mem* -- pointer to the ARKStep memory block. + * *interp* -- flag indicating to use interpolation (1) or copy (0). + + **Return value:** + * *ARK_SUCCESS* if successful + * *ARK_MEM_NULL* if the ARKStep memory is ``NULL`` + + .. versionadded:: 5.6.0 + + .. c:function:: int ARKStepClearStopTime(void* arkode_mem) Disables the stop time set with :c:func:`ARKStepSetStopTime`. @@ -1454,7 +1472,7 @@ Set additive RK tables via their names :c:func:`ARKStepSetTableName()` int For explicit methods, the allowed values are :math:`2 \le` *ord* :math:`\le 8`. For implicit methods, the allowed values are :math:`2\le` *ord* :math:`\le 5`, and for ImEx methods the allowed - values are :math:`3 \le` *ord* :math:`\le 5`. Any illegal input + values are :math:`2 \le` *ord* :math:`\le 5`. Any illegal input will result in the default value of 4. Since *ord* affects the memory requirements for the internal diff --git a/doc/arkode/guide/source/Usage/ERKStep_c_interface/User_callable.rst b/doc/arkode/guide/source/Usage/ERKStep_c_interface/User_callable.rst index 2a3702da40..b40ddbf160 100644 --- a/doc/arkode/guide/source/Usage/ERKStep_c_interface/User_callable.rst +++ b/doc/arkode/guide/source/Usage/ERKStep_c_interface/User_callable.rst @@ -467,45 +467,47 @@ Optional inputs for ERKStep .. _ARKODE.Usage.ERKStep.ERKStepInputTable: .. table:: Optional inputs for ERKStep - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Optional input | Function name | Default | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Return ERKStep solver parameters to their defaults | :c:func:`ERKStepSetDefaults()` | internal | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Set dense output interpolation type | :c:func:`ERKStepSetInterpolantType()` | ``ARK_INTERP_HERMITE`` | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Set dense output polynomial degree | :c:func:`ERKStepSetInterpolantDegree()` | 5 | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a pointer to a diagnostics output file | :c:func:`ERKStepSetDiagnostics()` | ``NULL`` | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a pointer to an error output file | :c:func:`ERKStepSetErrFile()` | ``stderr`` | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a custom error handler function | :c:func:`ERKStepSetErrHandlerFn()` | internal fn | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Disable time step adaptivity (fixed-step mode) | :c:func:`ERKStepSetFixedStep()` | disabled | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Supply an initial step size to attempt | :c:func:`ERKStepSetInitStep()` | estimated | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Maximum no. of warnings for :math:`t_n+h = t_n` | :c:func:`ERKStepSetMaxHnilWarns()` | 10 | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Maximum no. of internal steps before *tout* | :c:func:`ERKStepSetMaxNumSteps()` | 500 | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Maximum absolute step size | :c:func:`ERKStepSetMaxStep()` | :math:`\infty` | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Minimum absolute step size | :c:func:`ERKStepSetMinStep()` | 0.0 | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Set a value for :math:`t_{stop}` | :c:func:`ERKStepSetStopTime()` | undefined | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Disable the stop time | :c:func:`ERKStepClearStopTime` | N/A | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a pointer for user data | :c:func:`ERKStepSetUserData()` | ``NULL`` | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Maximum no. of ERKStep error test failures | :c:func:`ERKStepSetMaxErrTestFails()` | 7 | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Set inequality constraints on solution | :c:func:`ERKStepSetConstraints()` | ``NULL`` | - +----------------------------------------------------+-----------------------------------------+------------------------+ - | Set max number of constraint failures | :c:func:`ERKStepSetMaxNumConstrFails()` | 10 | - +----------------------------------------------------+-----------------------------------------+------------------------+ + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Optional input | Function name | Default | + +====================================================+===========================================+========================+ + | Return ERKStep solver parameters to their defaults | :c:func:`ERKStepSetDefaults()` | internal | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Set dense output interpolation type | :c:func:`ERKStepSetInterpolantType()` | ``ARK_INTERP_HERMITE`` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Set dense output polynomial degree | :c:func:`ERKStepSetInterpolantDegree()` | 5 | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a pointer to a diagnostics output file | :c:func:`ERKStepSetDiagnostics()` | ``NULL`` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a pointer to an error output file | :c:func:`ERKStepSetErrFile()` | ``stderr`` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a custom error handler function | :c:func:`ERKStepSetErrHandlerFn()` | internal fn | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Disable time step adaptivity (fixed-step mode) | :c:func:`ERKStepSetFixedStep()` | disabled | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Supply an initial step size to attempt | :c:func:`ERKStepSetInitStep()` | estimated | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Maximum no. of warnings for :math:`t_n+h = t_n` | :c:func:`ERKStepSetMaxHnilWarns()` | 10 | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Maximum no. of internal steps before *tout* | :c:func:`ERKStepSetMaxNumSteps()` | 500 | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Maximum absolute step size | :c:func:`ERKStepSetMaxStep()` | :math:`\infty` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Minimum absolute step size | :c:func:`ERKStepSetMinStep()` | 0.0 | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Set a value for :math:`t_{stop}` | :c:func:`ERKStepSetStopTime()` | undefined | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Interpolate at :math:`t_{stop}` | :c:func:`ERKStepInterpolateSetStopTime()` | ``SUNFALSE`` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Disable the stop time | :c:func:`ERKStepClearStopTime` | N/A | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a pointer for user data | :c:func:`ERKStepSetUserData()` | ``NULL`` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Maximum no. of ERKStep error test failures | :c:func:`ERKStepSetMaxErrTestFails()` | 7 | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Set inequality constraints on solution | :c:func:`ERKStepSetConstraints()` | ``NULL`` | + +----------------------------------------------------+-------------------------------------------+------------------------+ + | Set max number of constraint failures | :c:func:`ERKStepSetMaxNumConstrFails()` | 10 | + +----------------------------------------------------+-------------------------------------------+------------------------+ @@ -886,6 +888,23 @@ Optional inputs for ERKStep :c:func:`ERKStepClearStopTime`. +.. c:function:: int ERKStepSetInterpolateStopTime(void* arkode_mem, booleantype interp) + + Specifies that the output solution should be interpolated when the current + :math:`t` equals the specified ``tstop`` (instead of merely copying the + internal solution :math:`y_n`). + + **Arguments:** + * *arkode_mem* -- pointer to the ERKStep memory block. + * *interp* -- flag indicating to use interpolation (1) or copy (0). + + **Return value:** + * *ARK_SUCCESS* if successful + * *ARK_MEM_NULL* if the ARKStep memory is ``NULL`` + + .. versionadded:: 5.6.0 + + .. c:function:: int ERKStepClearStopTime(void* arkode_mem) Disables the stop time set with :c:func:`ERKStepSetStopTime`. diff --git a/doc/arkode/guide/source/Usage/MRIStep_c_interface/User_callable.rst b/doc/arkode/guide/source/Usage/MRIStep_c_interface/User_callable.rst index b177f456ce..743b9c66e2 100644 --- a/doc/arkode/guide/source/Usage/MRIStep_c_interface/User_callable.rst +++ b/doc/arkode/guide/source/Usage/MRIStep_c_interface/User_callable.rst @@ -672,37 +672,39 @@ Optional inputs for MRIStep .. _ARKODE.Usage.MRIStep.MRIStepInput.Table: .. table:: Optional inputs for MRIStep - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Optional input | Function name | Default | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Return MRIStep solver parameters to their defaults | :c:func:`MRIStepSetDefaults()` | internal | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Set dense output interpolation type | :c:func:`MRIStepSetInterpolantType()` | ``ARK_INTERP_HERMITE`` | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Set dense output polynomial degree | :c:func:`MRIStepSetInterpolantDegree()` | 5 | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a pointer to a diagnostics output file | :c:func:`MRIStepSetDiagnostics()` | ``NULL`` | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a pointer to an error output file | :c:func:`MRIStepSetErrFile()` | ``stderr`` | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a custom error handler function | :c:func:`MRIStepSetErrHandlerFn()` | internal fn | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Run with fixed-step sizes | :c:func:`MRIStepSetFixedStep()` | required | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Maximum no. of warnings for :math:`t_n+h = t_n` | :c:func:`MRIStepSetMaxHnilWarns()` | 10 | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Maximum no. of internal steps before *tout* | :c:func:`MRIStepSetMaxNumSteps()` | 500 | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Set a value for :math:`t_{stop}` | :c:func:`MRIStepSetStopTime()` | undefined | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Disable the stop time | :c:func:`MRIStepClearStopTime` | N/A | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a pointer for user data | :c:func:`MRIStepSetUserData()` | ``NULL`` | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a function to be called prior to the inner integration | :c:func:`MRIStepSetPreInnerFn()` | ``NULL`` | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ - | Supply a function to be called after the inner integration | :c:func:`MRIStepSetPostInnerFn()` | ``NULL`` | - +---------------------------------------------------------------+-----------------------------------------+------------------------+ + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Optional input | Function name | Default | + +===============================================================+===========================================+========================+ + | Return MRIStep solver parameters to their defaults | :c:func:`MRIStepSetDefaults()` | internal | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Set dense output interpolation type | :c:func:`MRIStepSetInterpolantType()` | ``ARK_INTERP_HERMITE`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Set dense output polynomial degree | :c:func:`MRIStepSetInterpolantDegree()` | 5 | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a pointer to a diagnostics output file | :c:func:`MRIStepSetDiagnostics()` | ``NULL`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a pointer to an error output file | :c:func:`MRIStepSetErrFile()` | ``stderr`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a custom error handler function | :c:func:`MRIStepSetErrHandlerFn()` | internal fn | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Run with fixed-step sizes | :c:func:`MRIStepSetFixedStep()` | required | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Maximum no. of warnings for :math:`t_n+h = t_n` | :c:func:`MRIStepSetMaxHnilWarns()` | 10 | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Maximum no. of internal steps before *tout* | :c:func:`MRIStepSetMaxNumSteps()` | 500 | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Set a value for :math:`t_{stop}` | :c:func:`MRIStepSetStopTime()` | undefined | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Interpolate at :math:`t_{stop}` | :c:func:`MRIStepSetInterpolateStopTime()` | ``SUNFALSE`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Disable the stop time | :c:func:`MRIStepClearStopTime` | N/A | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a pointer for user data | :c:func:`MRIStepSetUserData()` | ``NULL`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a function to be called prior to the inner integration | :c:func:`MRIStepSetPreInnerFn()` | ``NULL`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ + | Supply a function to be called after the inner integration | :c:func:`MRIStepSetPostInnerFn()` | ``NULL`` | + +---------------------------------------------------------------+-------------------------------------------+------------------------+ @@ -1105,6 +1107,23 @@ Optional inputs for MRIStep :c:func:`MRIStepClearStopTime`. +.. c:function:: int MRIStepSetInterpolateStopTime(void* arkode_mem, booleantype interp) + + Specifies that the output solution should be interpolated when the current + :math:`t` equals the specified ``tstop`` (instead of merely copying the + internal solution :math:`y_n`). + + **Arguments:** + * *arkode_mem* -- pointer to the MRIStep memory block. + * *interp* -- flag indicating to use interpolation (1) or copy (0). + + **Return value:** + * *ARK_SUCCESS* if successful + * *ARK_MEM_NULL* if the ARKStep memory is ``NULL`` + + .. versionadded:: 5.6.0 + + .. c:function:: int MRIStepClearStopTime(void* arkode_mem) Disables the stop time set with :c:func:`MRIStepSetStopTime`. diff --git a/doc/cvode/guide/source/Introduction.rst b/doc/cvode/guide/source/Introduction.rst index 496dfa5be6..5ecd6bc23d 100644 --- a/doc/cvode/guide/source/Introduction.rst +++ b/doc/cvode/guide/source/Introduction.rst @@ -111,6 +111,19 @@ implementations. Changes from previous versions ============================== +Changes in v6.6.0 +----------------- + +Updated the F2003 utility routines :c:func:`SUNDIALSFileOpen` and :c:func:`SUNDIALSFileClose` +to support user specification of ``stdout`` and ``stderr`` strings for the output +file names. + +Updated the default CVODE behavior when returning the solution when +the internal time has reached a user-specified stop time. Previously, the output +solution was interpolated to the value of ``tstop``; the default is now to copy the +internal solution vector. Users who wish to revert to interpolation may call the +routine :c:func:`CVodeSetInterpolateStopTime`. + Changes in v6.5.1 ----------------- @@ -145,7 +158,7 @@ Fixed the shape of the arrays returned by ``FN_VGetArrayPointer`` functions as w as the ``FSUNDenseMatrix_Data``, ``FSUNBandMatrix_Data``, ``FSUNSparseMatrix_Data``, ``FSUNSparseMatrix_IndexValues``, and ``FSUNSparseMatrix_IndexPointers`` functions. Compiling and running code that uses the SUNDIALS Fortran interfaces with -bounds checking will now work. +bounds checking will now work. Changes in v6.4.1 ----------------- diff --git a/doc/cvode/guide/source/Usage/index.rst b/doc/cvode/guide/source/Usage/index.rst index 0f425c4b96..1bfd789b52 100644 --- a/doc/cvode/guide/source/Usage/index.rst +++ b/doc/cvode/guide/source/Usage/index.rst @@ -713,7 +713,7 @@ of two modes as to where CVODE is to return a solution. But these modes are modified if the user has set a stop time (with :c:func:`CVodeSetStopTime`) or requested rootfinding. -.. c:function:: int CVode(void* cvode_mem, realtype tout, N_Vector yout, realtype tret, int itask) +.. c:function:: int CVode(void* cvode_mem, realtype tout, N_Vector yout, realtype* tret, int itask) The function ``CVode`` integrates the ODE over an interval in t. @@ -852,6 +852,9 @@ Main solver optional input functions +-------------------------------+---------------------------------------------+----------------+ | Value of :math:`t_{stop}` | :c:func:`CVodeSetStopTime` | undefined | +-------------------------------+---------------------------------------------+----------------+ + | Interpolate at | :c:func:`CVodeSetInterpolateStopTime` | ``SUNFALSE`` | + | :math:`t_{stop}` | | | + +-------------------------------+---------------------------------------------+----------------+ | Disable the stop time | :c:func:`CVodeClearStopTime` | N/A | +-------------------------------+---------------------------------------------+----------------+ | Maximum no. of error test | :c:func:`CVodeSetMaxErrTestFails` | 7 | @@ -1091,6 +1094,22 @@ Main solver optional input functions A stop time not reached before a call to :c:func:`CVodeReInit` will remain active but can be disabled by calling :c:func:`CVodeClearStopTime`. +.. c:function:: int CVodeSetInterpolateStopTime(void* cvode_mem, booleantype interp) + + The function ``CVodeSetInterpolateStopTime`` specifies that the output solution should be + interpolated when the current :math:`t` equals the specified ``tstop`` (instead of + merely copying the internal solution :math:`y_n`). + + **Arguments:** + * ``cvode_mem`` -- pointer to the CVODES memory block. + * ``interp`` -- flag indicating to use interpolation (1) or copy (0). + + **Return value:** + * ``CV_SUCCESS`` -- The optional value has been successfully set. + * ``CV_MEM_NULL`` -- The CVODES memory block was not initialized through a previous call to :c:func:`CVodeCreate`. + + .. versionadded:: 6.6.0 + .. c:function:: int CVodeClearStopTime(void* cvode_mem) Disables the stop time set with :c:func:`CVodeSetStopTime`. @@ -1630,8 +1649,8 @@ the :c:func:`CVodeSetEpsLin` function. .. _CVODE.Usage.CC.optional_input.optin_nls: -Linear solver interface optional input functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Nonlinear solver interface optional input functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. _CVODE.Usage.CC.optional_input.optin_nls_table: diff --git a/doc/cvodes/guide/source/Introduction.rst b/doc/cvodes/guide/source/Introduction.rst index aec32d3649..2795919253 100644 --- a/doc/cvodes/guide/source/Introduction.rst +++ b/doc/cvodes/guide/source/Introduction.rst @@ -111,6 +111,19 @@ Fortran. Changes from previous versions ============================== +Changes in v6.6.0 +----------------- + +Updated the F2003 utility routines :c:func:`SUNDIALSFileOpen` and :c:func:`SUNDIALSFileClose` +to support user specification of ``stdout`` and ``stderr`` strings for the output +file names. + +Updated the default CVODES behavior when returning the solution when +the internal time has reached a user-specified stop time. Previously, the output +solution was interpolated to the value of ``tstop``; the default is now to copy the +internal solution vector. Users who wish to revert to interpolation may call the +routine :c:func:`CVodeSetInterpolateStopTime`. + Changes in v6.5.1 ----------------- @@ -146,7 +159,7 @@ Fixed the shape of the arrays returned by ``FN_VGetArrayPointer`` functions as w as the ``FSUNDenseMatrix_Data``, ``FSUNBandMatrix_Data``, ``FSUNSparseMatrix_Data``, ``FSUNSparseMatrix_IndexValues``, and ``FSUNSparseMatrix_IndexPointers`` functions. Compiling and running code that uses the SUNDIALS Fortran interfaces with -bounds checking will now work. +bounds checking will now work. Changes in v6.4.1 ----------------- diff --git a/doc/cvodes/guide/source/Usage/ADJ.rst b/doc/cvodes/guide/source/Usage/ADJ.rst index a6431e8882..8552d514f3 100644 --- a/doc/cvodes/guide/source/Usage/ADJ.rst +++ b/doc/cvodes/guide/source/Usage/ADJ.rst @@ -383,7 +383,7 @@ use in Forward Sensitivity Analysis; for that, see :numref:`CVODES.Usage.FSA`. The call to this function has the form -.. c:function:: int CVodeF(void * cvode_mem, realtype tout, N_Vector yret, realtype tret, int itask, int ncheck) +.. c:function:: int CVodeF(void * cvode_mem, realtype tout, N_Vector yret, realtype* tret, int itask, int ncheck) The function :c:func:`CVodeF` integrates the forward problem over an interval in :math:`t` and saves checkpointing data. @@ -1353,7 +1353,7 @@ To extract the values of the quadrature variables at the last return time of :c:func:`CVodeGetQuad`. -.. c:function:: int CVodeGetQuadB(void * cvode_mem, whichrealtype tret, N_Vector yQB) +.. c:function:: int CVodeGetQuadB(void * cvode_mem, int which, realtype* tret, N_Vector yQB) The function :c:func:`CVodeGetQuadB` returns the quadrature solution vector after a successful return from :c:func:`CVodeB`. diff --git a/doc/cvodes/guide/source/Usage/SIM.rst b/doc/cvodes/guide/source/Usage/SIM.rst index 826ee20e29..dbc6506dd1 100644 --- a/doc/cvodes/guide/source/Usage/SIM.rst +++ b/doc/cvodes/guide/source/Usage/SIM.rst @@ -721,7 +721,7 @@ of two modes as to where CVODES is to return a solution. But these modes are modified if the user has set a stop time (with :c:func:`CVodeSetStopTime`) or requested rootfinding. -.. c:function:: int CVode(void* cvode_mem, realtype tout, N_Vector yout, realtype tret, int itask) +.. c:function:: int CVode(void* cvode_mem, realtype tout, N_Vector yout, realtype* tret, int itask) The function ``CVode`` integrates the ODE over an interval in t. @@ -829,45 +829,47 @@ Main solver optional input functions .. table:: Optional inputs for CVODES - +-------------------------------+---------------------------------------------+----------------+ - | **Optional input** | **Function name** | **Default** | - +===============================+=============================================+================+ - | Pointer to an error file | :c:func:`CVodeSetErrFile` | ``stderr`` | - +-------------------------------+---------------------------------------------+----------------+ - | Error handler function | :c:func:`CVodeSetErrHandlerFn` | internal fn. | - +-------------------------------+---------------------------------------------+----------------+ - | User data | :c:func:`CVodeSetUserData` | ``NULL`` | - +-------------------------------+---------------------------------------------+----------------+ - | Maximum order for BDF method | :c:func:`CVodeSetMaxOrd` | 5 | - +-------------------------------+---------------------------------------------+----------------+ - | Maximum order for Adams | :c:func:`CVodeSetMaxOrd` | 12 | - | method | | | - +-------------------------------+---------------------------------------------+----------------+ - | Maximum no. of internal steps | :c:func:`CVodeSetMaxNumSteps` | 500 | - | before :math:`t_{out}` | | | - +-------------------------------+---------------------------------------------+----------------+ - | Maximum no. of warnings for | :c:func:`CVodeSetMaxHnilWarns` | 10 | - | :math:`t_n+h=t_n` | | | - +-------------------------------+---------------------------------------------+----------------+ - | Flag to activate stability | :c:func:`CVodeSetStabLimDet` | ``SUNFALSE`` | - | limit detection | | | - +-------------------------------+---------------------------------------------+----------------+ - | Initial step size | :c:func:`CVodeSetInitStep` | estimated | - +-------------------------------+---------------------------------------------+----------------+ - | Minimum absolute step size | :c:func:`CVodeSetMinStep` | 0.0 | - +-------------------------------+---------------------------------------------+----------------+ - | Maximum absolute step size | :c:func:`CVodeSetMaxStep` | :math:`\infty` | - +-------------------------------+---------------------------------------------+----------------+ - | Value of :math:`t_{stop}` | :c:func:`CVodeSetStopTime` | undefined | - +-------------------------------+---------------------------------------------+----------------+ - | Disable the stop time | :c:func:`CVodeClearStopTime` | N/A | - +-------------------------------+---------------------------------------------+----------------+ - | Maximum no. of error test | :c:func:`CVodeSetMaxErrTestFails` | 7 | - | failures | | | - +-------------------------------+---------------------------------------------+----------------+ - | Inequality constraints on | :c:func:`CVodeSetConstraints` | | - | solution | | | - +-------------------------------+---------------------------------------------+----------------+ + +---------------------------------+---------------------------------------------+----------------+ + | **Optional input** | **Function name** | **Default** | + +=================================+=============================================+================+ + | Pointer to an error file | :c:func:`CVodeSetErrFile` | ``stderr`` | + +---------------------------------+---------------------------------------------+----------------+ + | Error handler function | :c:func:`CVodeSetErrHandlerFn` | internal fn. | + +---------------------------------+---------------------------------------------+----------------+ + | User data | :c:func:`CVodeSetUserData` | ``NULL`` | + +---------------------------------+---------------------------------------------+----------------+ + | Maximum order for BDF method | :c:func:`CVodeSetMaxOrd` | 5 | + +---------------------------------+---------------------------------------------+----------------+ + | Maximum order for Adams | :c:func:`CVodeSetMaxOrd` | 12 | + | method | | | + +---------------------------------+---------------------------------------------+----------------+ + | Maximum no. of internal steps | :c:func:`CVodeSetMaxNumSteps` | 500 | + | before :math:`t_{out}` | | | + +---------------------------------+---------------------------------------------+----------------+ + | Maximum no. of warnings for | :c:func:`CVodeSetMaxHnilWarns` | 10 | + | :math:`t_n+h=t_n` | | | + +---------------------------------+---------------------------------------------+----------------+ + | Flag to activate stability | :c:func:`CVodeSetStabLimDet` | ``SUNFALSE`` | + | limit detection | | | + +---------------------------------+---------------------------------------------+----------------+ + | Initial step size | :c:func:`CVodeSetInitStep` | estimated | + +---------------------------------+---------------------------------------------+----------------+ + | Minimum absolute step size | :c:func:`CVodeSetMinStep` | 0.0 | + +---------------------------------+---------------------------------------------+----------------+ + | Maximum absolute step size | :c:func:`CVodeSetMaxStep` | :math:`\infty` | + +---------------------------------+---------------------------------------------+----------------+ + | Value of :math:`t_{stop}` | :c:func:`CVodeSetStopTime` | undefined | + +---------------------------------+---------------------------------------------+----------------+ + | Interpolate at :math:`t_{stop}` | :c:func:`CVodeSetInterpolateStopTime` | ``SUNFALSE`` | + +---------------------------------+---------------------------------------------+----------------+ + | Disable the stop time | :c:func:`CVodeClearStopTime` | N/A | + +---------------------------------+---------------------------------------------+----------------+ + | Maximum no. of error test | :c:func:`CVodeSetMaxErrTestFails` | 7 | + | failures | | | + +---------------------------------+---------------------------------------------+----------------+ + | Inequality constraints on | :c:func:`CVodeSetConstraints` | | + | solution | | | + +---------------------------------+---------------------------------------------+----------------+ .. c:function:: int CVodeSetErrFile(void* cvode_mem, FILE * errfp) @@ -1096,6 +1098,22 @@ Main solver optional input functions A stop time not reached before a call to :c:func:`CVodeReInit` will remain active but can be disabled by calling :c:func:`CVodeClearStopTime`. +.. c:function:: int CVodeSetInterpolateStopTime(void* cvode_mem, booleantype interp) + + The function ``CVodeSetInterpolateStopTime`` specifies that the output solution should be + interpolated when the current :math:`t` equals the specified ``tstop`` (instead of + merely copying the internal solution :math:`y_n`). + + **Arguments:** + * ``cvode_mem`` -- pointer to the CVODES memory block. + * ``interp`` -- flag indicating to use interpolation (1) or copy (0). + + **Return value:** + * ``CV_SUCCESS`` -- The optional value has been successfully set. + * ``CV_MEM_NULL`` -- The CVODES memory block was not initialized through a previous call to :c:func:`CVodeCreate`. + + .. versionadded:: 6.6.0 + .. c:function:: int CVodeClearStopTime(void* cvode_mem) Disables the stop time set with :c:func:`CVodeSetStopTime`. @@ -1627,8 +1645,8 @@ the :c:func:`CVodeSetEpsLin` function. .. _CVODES.Usage.SIM.optional_input.optin_nls: -Linear solver interface optional input functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Nonlinear solver interface optional input functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. _CVODES.Usage.SIM.optional_input.optin_nls_table: @@ -4024,7 +4042,7 @@ solution and quadratures at time ``t``. However, :c:func:`CVode` will still retu only the solution :math:`y` in ``yout``. Solution quadratures can be obtained using the following function: -.. c:function:: int CVodeGetQuad(void * cvode_mem, realtype tret, N_Vector yQ) +.. c:function:: int CVodeGetQuad(void * cvode_mem, realtype* tret, N_Vector yQ) The function ``CVodeGetQuad`` returns the quadrature solution vector after a successful return from ``CVode``. @@ -4104,7 +4122,7 @@ If the quadrature variables are part of the step size control mechanism, one of the following functions must be called to specify the integration tolerances for quadrature variables. -.. c:function:: int CVodeQuadSVtolerances(void * cvode_mem, realtype reltolQ, realtype abstolQ) +.. c:function:: int CVodeQuadSVtolerances(void * cvode_mem, realtype reltolQ, N_Vector abstolQ) The function ``CVodeQuadSStolerances`` specifies scalar relative and absolute tolerances. diff --git a/doc/ida/guide/source/Introduction.rst b/doc/ida/guide/source/Introduction.rst index 76458544b9..b2a5a15671 100644 --- a/doc/ida/guide/source/Introduction.rst +++ b/doc/ida/guide/source/Introduction.rst @@ -72,6 +72,13 @@ systems. Changes from previous versions ============================== +Changes in v6.6.0 +----------------- + +Updated the F2003 utility routines :c:func:`SUNDIALSFileOpen` and :c:func:`SUNDIALSFileClose` +to support user specification of ``stdout`` and ``stderr`` strings for the output +file names. + Changes in v6.5.1 ----------------- @@ -107,7 +114,7 @@ Fixed the shape of the arrays returned by ``FN_VGetArrayPointer`` functions as w as the ``FSUNDenseMatrix_Data``, ``FSUNBandMatrix_Data``, ``FSUNSparseMatrix_Data``, ``FSUNSparseMatrix_IndexValues``, and ``FSUNSparseMatrix_IndexPointers`` functions. Compiling and running code that uses the SUNDIALS Fortran interfaces with -bounds checking will now work. +bounds checking will now work. Changes in v6.4.1 ----------------- diff --git a/doc/ida/guide/source/Usage/index.rst b/doc/ida/guide/source/Usage/index.rst index 25bae3ecae..c6fca6bcee 100644 --- a/doc/ida/guide/source/Usage/index.rst +++ b/doc/ida/guide/source/Usage/index.rst @@ -792,7 +792,7 @@ the user has set a stop time (with :c:func:`IDASetStopTime`) or requested rootfinding (with :c:func:`IDARootInit`). -.. c:function:: int IDASolve(void * ida_mem, realtype tout, realtype tret, N_Vector yret, N_Vector ypret, int itask) +.. c:function:: int IDASolve(void * ida_mem, realtype tout, realtype* tret, N_Vector yret, N_Vector ypret, int itask) The function ``IDASolve`` integrates the DAE over an interval in t. diff --git a/doc/idas/guide/source/Introduction.rst b/doc/idas/guide/source/Introduction.rst index f5b828966d..a2fd55c464 100644 --- a/doc/idas/guide/source/Introduction.rst +++ b/doc/idas/guide/source/Introduction.rst @@ -86,6 +86,14 @@ integrate any final-condition ODE dependent on the solution of the original IVP Changes from previous versions ============================== +Changes in v5.6.0 +----------------- + +Updated the F2003 utility routines :c:func:`SUNDIALSFileOpen` and :c:func:`SUNDIALSFileClose` +to support user specification of ``stdout`` and ``stderr`` strings for the output +file names. + + Changes in v5.5.1 ----------------- @@ -121,7 +129,7 @@ Fixed the shape of the arrays returned by ``FN_VGetArrayPointer`` functions as w as the ``FSUNDenseMatrix_Data``, ``FSUNBandMatrix_Data``, ``FSUNSparseMatrix_Data``, ``FSUNSparseMatrix_IndexValues``, and ``FSUNSparseMatrix_IndexPointers`` functions. Compiling and running code that uses the SUNDIALS Fortran interfaces with -bounds checking will now work. +bounds checking will now work. Changes in v5.4.1 ----------------- diff --git a/doc/idas/guide/source/Usage/SIM.rst b/doc/idas/guide/source/Usage/SIM.rst index 2fc1896241..4bd1ac2738 100644 --- a/doc/idas/guide/source/Usage/SIM.rst +++ b/doc/idas/guide/source/Usage/SIM.rst @@ -3970,7 +3970,7 @@ then IDAS computes both a solution and quadratures at time ``t``. However, :c:func:`IDASolve` will still return only the solution :math:`y` in ``y``. Solution quadratures can be obtained using the following function: -.. c:function:: int IDAGetQuad(void * ida_mem, realtype tret, N_Vector yQ) +.. c:function:: int IDAGetQuad(void * ida_mem, realtype* tret, N_Vector yQ) The function :c:func:`IDAGetQuad` returns the quadrature solution vector after a successful return from :c:func:`IDASolve`. diff --git a/doc/kinsol/guide/source/Introduction.rst b/doc/kinsol/guide/source/Introduction.rst index 6e6edcffcf..3b8f5a1f21 100644 --- a/doc/kinsol/guide/source/Introduction.rst +++ b/doc/kinsol/guide/source/Introduction.rst @@ -88,6 +88,14 @@ applications written in Fortran. Changes from previous versions ============================== +Changes in v6.6.0 +----------------- + +Updated the F2003 utility routines :c:func:`SUNDIALSFileOpen` and :c:func:`SUNDIALSFileClose` +to support user specification of ``stdout`` and ``stderr`` strings for the output +file names. + + Changes in v6.5.1 ----------------- @@ -115,7 +123,7 @@ Fixed the shape of the arrays returned by ``FN_VGetArrayPointer`` functions as w as the ``FSUNDenseMatrix_Data``, ``FSUNBandMatrix_Data``, ``FSUNSparseMatrix_Data``, ``FSUNSparseMatrix_IndexValues``, and ``FSUNSparseMatrix_IndexPointers`` functions. Compiling and running code that uses the SUNDIALS Fortran interfaces with -bounds checking will now work. +bounds checking will now work. Changes in v6.4.1 ----------------- diff --git a/doc/shared/Install.rst b/doc/shared/Install.rst index 7ad8c3bcd1..4682ba1906 100644 --- a/doc/shared/Install.rst +++ b/doc/shared/Install.rst @@ -776,6 +776,20 @@ illustration only. Default: none +.. cmakeoption:: SUNDIALS_ONEMKL_USE_GETRF_LOOP + + This advanced debugging option replaces the batched LU factorization with a + loop over each system in the batch and a non-batched LU factorization. + + Default: OFF + +.. cmakeoption:: SUNDIALS_ONEMKL_USE_GETRS_LOOP + + This advanced debugging option replaces the batched LU solve with a loop over + each system in the batch and a non-batched solve. + + Default: OFF + .. cmakeoption:: ENABLE_OPENMP Enable OpenMP support (build the OpenMP NVector) @@ -944,6 +958,14 @@ illustration only. ``dpcpp`` and ``icpx``. When using ``icpx`` the ``-fsycl`` flag and any ahead of time compilation flags must be added to ``CMAKE_CXX_FLAGS``. +.. cmakeoption:: SUNDIALS_SYCL_2020_UNSUPPORTED + + This advanced option disables the use of *some* features from the SYCL 2020 + standard in SUNDIALS libraries and examples. This can be used to work around + some cases of incomplete compiler support for SYCL 2020. + + Default: OFF + .. cmakeoption:: SUNDIALS_LOGGING_LEVEL diff --git a/doc/shared/figs/arkode/ark2_dirk_stab_region.png b/doc/shared/figs/arkode/ark2_dirk_stab_region.png new file mode 100644 index 0000000000..83929af11e Binary files /dev/null and b/doc/shared/figs/arkode/ark2_dirk_stab_region.png differ diff --git a/doc/shared/figs/arkode/ark2_erk_stab_region.png b/doc/shared/figs/arkode/ark2_erk_stab_region.png new file mode 100644 index 0000000000..45b125d708 Binary files /dev/null and b/doc/shared/figs/arkode/ark2_erk_stab_region.png differ diff --git a/doc/shared/sundials.bib b/doc/shared/sundials.bib index 2851d968bd..d62f0d9c22 100644 --- a/doc/shared/sundials.bib +++ b/doc/shared/sundials.bib @@ -1784,6 +1784,19 @@ @techreport{Fehlberg:69 year = {1969} } + +@article{giraldo2013implicit, + title = {Implicit-explicit formulations of a three-dimensional nonhydrostatic unified model of the atmosphere (NUMA)}, + author = {Giraldo, F. X. and Kelly, J. F. and Constantinescu, E. M.}, + journal = {SIAM Journal on Scientific Computing}, + volume = {35}, + number = {5}, + pages = {B1162--B1194}, + year = {2013}, + publisher = {SIAM}, + doi = {10.1137/120876034} +} + @article{Gust:91, author = {Gustafsson, K.}, title = {Control theoretic techniques for stepsize selection in explicit {Runge-Kutta} methods}, diff --git a/doc/shared/sundials/Fortran.rst b/doc/shared/sundials/Fortran.rst index 20246ce8f8..bef4eb5aca 100644 --- a/doc/shared/sundials/Fortran.rst +++ b/doc/shared/sundials/Fortran.rst @@ -490,8 +490,10 @@ a C file pointer, SUNDIALS provides two utility functions for creating a the provided filename and I/O mode. **Arguments:** - * ``filename`` -- the full path to the file, that should have Fortran - type ``character(kind=C_CHAR, len=*)``. + * ``filename`` -- the path to the file, that should have Fortran + type ``character(kind=C_CHAR, len=*)``. There are two special filenames: + ``stdout`` and ``stderr`` -- these two filenames will result in output + going to the standard output file and standard error file, respectively. * ``mode`` -- the I/O mode to use for the file. This should have the Fortran type ``character(kind=C_CHAR, len=*)``. The string begins with one of the following characters: @@ -517,7 +519,9 @@ a C file pointer, SUNDIALS provides two utility functions for creating a **Arguments:** * ``fp`` -- the C ``FILE*`` that was previously obtained from ``fopen``. - This should have the Fortran type ``type(c_ptr)``. + This should have the Fortran type ``type(c_ptr)``. Note that if either + ``stdout`` or ``stderr`` were opened using :c:func:`SUNDIALSFileOpen()` + then that stream *will not be closed* by this function. .. _SUNDIALS.Fortran.Portability: diff --git a/examples/cvode/CXX_onemkl/cvRoberts_blockdiag_onemkl.cpp b/examples/cvode/CXX_onemkl/cvRoberts_blockdiag_onemkl.cpp index 6c176ee53e..45eae942b2 100644 --- a/examples/cvode/CXX_onemkl/cvRoberts_blockdiag_onemkl.cpp +++ b/examples/cvode/CXX_onemkl/cvRoberts_blockdiag_onemkl.cpp @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) if (argc > 3) output = (atoi(argv[3])) ? true : false; // Create an in-order GPU queue -#if SYCL_LANGUAGE_VERSION >= 2020 +#if SYCL_LANGUAGE_VERSION >= 2020 && !defined(SUNDIALS_SYCL_2020_UNSUPPORTED) sycl::queue myQueue(sycl::gpu_selector_v, sycl::property_list{sycl::property::queue::in_order{}}); #else diff --git a/examples/cvode/CXX_sycl/cvAdvDiff_kry_sycl.cpp b/examples/cvode/CXX_sycl/cvAdvDiff_kry_sycl.cpp index dc3286701e..44fe80d7b7 100644 --- a/examples/cvode/CXX_sycl/cvAdvDiff_kry_sycl.cpp +++ b/examples/cvode/CXX_sycl/cvAdvDiff_kry_sycl.cpp @@ -119,7 +119,7 @@ int main(int argc, char** argv) int retval; // Create an in-order GPU queue -#if SYCL_LANGUAGE_VERSION >= 2020 +#if SYCL_LANGUAGE_VERSION >= 2020 && !defined(SUNDIALS_SYCL_2020_UNSUPPORTED) sycl::queue myQueue(sycl::gpu_selector_v, sycl::property_list{sycl::property::queue::in_order{}}); #else diff --git a/examples/cvode/kokkos/CMakeLists.txt b/examples/cvode/kokkos/CMakeLists.txt index 84d0e1528f..3b20ec3cd4 100644 --- a/examples/cvode/kokkos/CMakeLists.txt +++ b/examples/cvode/kokkos/CMakeLists.txt @@ -16,6 +16,7 @@ # 'develop' for examples excluded from 'make test' in releases set(examples_list "cv_bruss_batched_kokkos.cpp\;\;develop" + "cv_bruss_batched_kokkos_2D.cpp\;\;develop" ) # Add the build targets for each example diff --git a/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.CUDA.out b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.CUDA.out new file mode 100644 index 0000000000..6f2c19c3a9 --- /dev/null +++ b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.CUDA.out @@ -0,0 +1,137 @@ + +Batch of independent 3-species kinetics problems + number of batches = 100 + linear solver = KokkosKernels + test type = 2 + execution space = Cuda + +At t = 0 + batch 0: y = 1.2 3.1 3 + batch 10: y = 1.2 3.1 3 + batch 20: y = 1.2 3.1 3 + batch 30: y = 1.2 3.1 3 + batch 40: y = 1.2 3.1 3 + batch 50: y = 1.2 3.1 3 + batch 60: y = 1.2 3.1 3 + batch 70: y = 1.2 3.1 3 + batch 80: y = 1.2 3.1 3 + batch 90: y = 1.2 3.1 3 +At t = 1 + batch 0: y = 1.10389 3.01314 3.49998 + batch 10: y = 1.10389 3.01314 3.49998 + batch 20: y = 1.10389 3.01314 3.49998 + batch 30: y = 1.10389 3.01314 3.49998 + batch 40: y = 1.10389 3.01314 3.49998 + batch 50: y = 1.10389 3.01314 3.49998 + batch 60: y = 1.10389 3.01314 3.49998 + batch 70: y = 1.10389 3.01314 3.49998 + batch 80: y = 1.10389 3.01314 3.49998 + batch 90: y = 1.10389 3.01314 3.49998 +At t = 2 + batch 0: y = 0.688033 3.5213 3.49999 + batch 10: y = 0.688033 3.5213 3.49999 + batch 20: y = 0.688033 3.5213 3.49999 + batch 30: y = 0.688033 3.5213 3.49999 + batch 40: y = 0.688033 3.5213 3.49999 + batch 50: y = 0.688033 3.5213 3.49999 + batch 60: y = 0.688033 3.5213 3.49999 + batch 70: y = 0.688033 3.5213 3.49999 + batch 80: y = 0.688033 3.5213 3.49999 + batch 90: y = 0.688033 3.5213 3.49999 +At t = 3 + batch 0: y = 0.409472 4.27781 3.49999 + batch 10: y = 0.409472 4.27781 3.49999 + batch 20: y = 0.409472 4.27781 3.49999 + batch 30: y = 0.409472 4.27781 3.49999 + batch 40: y = 0.409472 4.27781 3.49999 + batch 50: y = 0.409472 4.27781 3.49999 + batch 60: y = 0.409472 4.27781 3.49999 + batch 70: y = 0.409472 4.27781 3.49999 + batch 80: y = 0.409472 4.27781 3.49999 + batch 90: y = 0.409472 4.27781 3.49999 +At t = 4 + batch 0: y = 0.36788 4.94194 3.49999 + batch 10: y = 0.36788 4.94194 3.49999 + batch 20: y = 0.36788 4.94194 3.49999 + batch 30: y = 0.36788 4.94194 3.49999 + batch 40: y = 0.36788 4.94194 3.49999 + batch 50: y = 0.36788 4.94194 3.49999 + batch 60: y = 0.36788 4.94194 3.49999 + batch 70: y = 0.36788 4.94194 3.49999 + batch 80: y = 0.36788 4.94194 3.49999 + batch 90: y = 0.36788 4.94194 3.49999 +At t = 5 + batch 0: y = 0.413842 5.51057 3.49999 + batch 10: y = 0.413842 5.51057 3.49999 + batch 20: y = 0.413842 5.51057 3.49999 + batch 30: y = 0.413842 5.51057 3.49999 + batch 40: y = 0.413842 5.51057 3.49999 + batch 50: y = 0.413842 5.51057 3.49999 + batch 60: y = 0.413842 5.51057 3.49999 + batch 70: y = 0.413842 5.51057 3.49999 + batch 80: y = 0.413842 5.51057 3.49999 + batch 90: y = 0.413842 5.51057 3.49999 +At t = 6 + batch 0: y = 0.589207 5.85566 3.49999 + batch 10: y = 0.589207 5.85566 3.49999 + batch 20: y = 0.589207 5.85566 3.49999 + batch 30: y = 0.589207 5.85566 3.49999 + batch 40: y = 0.589207 5.85566 3.49999 + batch 50: y = 0.589207 5.85566 3.49999 + batch 60: y = 0.589207 5.85566 3.49999 + batch 70: y = 0.589207 5.85566 3.49999 + batch 80: y = 0.589207 5.85566 3.49999 + batch 90: y = 0.589207 5.85566 3.49999 +At t = 7 + batch 0: y = 4.75675 0.735405 3.49992 + batch 10: y = 4.75675 0.735405 3.49992 + batch 20: y = 4.75675 0.735405 3.49992 + batch 30: y = 4.75675 0.735405 3.49992 + batch 40: y = 4.75675 0.735405 3.49992 + batch 50: y = 4.75675 0.735405 3.49992 + batch 60: y = 4.75675 0.735405 3.49992 + batch 70: y = 4.75675 0.735405 3.49992 + batch 80: y = 4.75675 0.735405 3.49992 + batch 90: y = 4.75675 0.735405 3.49992 +At t = 8 + batch 0: y = 1.81355 1.57573 3.49997 + batch 10: y = 1.81355 1.57573 3.49997 + batch 20: y = 1.81355 1.57573 3.49997 + batch 30: y = 1.81355 1.57573 3.49997 + batch 40: y = 1.81355 1.57573 3.49997 + batch 50: y = 1.81355 1.57573 3.49997 + batch 60: y = 1.81355 1.57573 3.49997 + batch 70: y = 1.81355 1.57573 3.49997 + batch 80: y = 1.81355 1.57573 3.49997 + batch 90: y = 1.81355 1.57573 3.49997 +At t = 9 + batch 0: y = 0.527935 2.80731 3.49999 + batch 10: y = 0.527935 2.80731 3.49999 + batch 20: y = 0.527935 2.80731 3.49999 + batch 30: y = 0.527935 2.80731 3.49999 + batch 40: y = 0.527935 2.80731 3.49999 + batch 50: y = 0.527935 2.80731 3.49999 + batch 60: y = 0.527935 2.80731 3.49999 + batch 70: y = 0.527935 2.80731 3.49999 + batch 80: y = 0.527935 2.80731 3.49999 + batch 90: y = 0.527935 2.80731 3.49999 +At t = 10 + batch 0: y = 0.305602 3.65734 3.49999 + batch 10: y = 0.305602 3.65734 3.49999 + batch 20: y = 0.305602 3.65734 3.49999 + batch 30: y = 0.305602 3.65734 3.49999 + batch 40: y = 0.305602 3.65734 3.49999 + batch 50: y = 0.305602 3.65734 3.49999 + batch 60: y = 0.305602 3.65734 3.49999 + batch 70: y = 0.305602 3.65734 3.49999 + batch 80: y = 0.305602 3.65734 3.49999 + batch 90: y = 0.305602 3.65734 3.49999 + +Final Statistics: + Steps = 344 + RHS evals = 464 + LS setups = 59 + Jac evals = 7 + NLS iters = 461 + NLS fails = 1 + Error test fails = 20 diff --git a/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.OPENMP.out b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.OPENMP.out new file mode 100644 index 0000000000..69f0b74a18 --- /dev/null +++ b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.OPENMP.out @@ -0,0 +1,137 @@ + +Batch of independent 3-species kinetics problems + number of batches = 100 + linear solver = KokkosKernels + test type = 2 + execution space = OpenMP + +At t = 0 + batch 0: y = 1.2 3.1 3 + batch 10: y = 1.2 3.1 3 + batch 20: y = 1.2 3.1 3 + batch 30: y = 1.2 3.1 3 + batch 40: y = 1.2 3.1 3 + batch 50: y = 1.2 3.1 3 + batch 60: y = 1.2 3.1 3 + batch 70: y = 1.2 3.1 3 + batch 80: y = 1.2 3.1 3 + batch 90: y = 1.2 3.1 3 +At t = 1 + batch 0: y = 1.10389 3.01314 3.49998 + batch 10: y = 1.10389 3.01314 3.49998 + batch 20: y = 1.10389 3.01314 3.49998 + batch 30: y = 1.10389 3.01314 3.49998 + batch 40: y = 1.10389 3.01314 3.49998 + batch 50: y = 1.10389 3.01314 3.49998 + batch 60: y = 1.10389 3.01314 3.49998 + batch 70: y = 1.10389 3.01314 3.49998 + batch 80: y = 1.10389 3.01314 3.49998 + batch 90: y = 1.10389 3.01314 3.49998 +At t = 2 + batch 0: y = 0.688033 3.5213 3.49999 + batch 10: y = 0.688033 3.5213 3.49999 + batch 20: y = 0.688033 3.5213 3.49999 + batch 30: y = 0.688033 3.5213 3.49999 + batch 40: y = 0.688033 3.5213 3.49999 + batch 50: y = 0.688033 3.5213 3.49999 + batch 60: y = 0.688033 3.5213 3.49999 + batch 70: y = 0.688033 3.5213 3.49999 + batch 80: y = 0.688033 3.5213 3.49999 + batch 90: y = 0.688033 3.5213 3.49999 +At t = 3 + batch 0: y = 0.409472 4.27781 3.49999 + batch 10: y = 0.409472 4.27781 3.49999 + batch 20: y = 0.409472 4.27781 3.49999 + batch 30: y = 0.409472 4.27781 3.49999 + batch 40: y = 0.409472 4.27781 3.49999 + batch 50: y = 0.409472 4.27781 3.49999 + batch 60: y = 0.409472 4.27781 3.49999 + batch 70: y = 0.409472 4.27781 3.49999 + batch 80: y = 0.409472 4.27781 3.49999 + batch 90: y = 0.409472 4.27781 3.49999 +At t = 4 + batch 0: y = 0.36788 4.94194 3.49999 + batch 10: y = 0.36788 4.94194 3.49999 + batch 20: y = 0.36788 4.94194 3.49999 + batch 30: y = 0.36788 4.94194 3.49999 + batch 40: y = 0.36788 4.94194 3.49999 + batch 50: y = 0.36788 4.94194 3.49999 + batch 60: y = 0.36788 4.94194 3.49999 + batch 70: y = 0.36788 4.94194 3.49999 + batch 80: y = 0.36788 4.94194 3.49999 + batch 90: y = 0.36788 4.94194 3.49999 +At t = 5 + batch 0: y = 0.413842 5.51057 3.49999 + batch 10: y = 0.413842 5.51057 3.49999 + batch 20: y = 0.413842 5.51057 3.49999 + batch 30: y = 0.413842 5.51057 3.49999 + batch 40: y = 0.413842 5.51057 3.49999 + batch 50: y = 0.413842 5.51057 3.49999 + batch 60: y = 0.413842 5.51057 3.49999 + batch 70: y = 0.413842 5.51057 3.49999 + batch 80: y = 0.413842 5.51057 3.49999 + batch 90: y = 0.413842 5.51057 3.49999 +At t = 6 + batch 0: y = 0.589207 5.85566 3.49999 + batch 10: y = 0.589207 5.85566 3.49999 + batch 20: y = 0.589207 5.85566 3.49999 + batch 30: y = 0.589207 5.85566 3.49999 + batch 40: y = 0.589207 5.85566 3.49999 + batch 50: y = 0.589207 5.85566 3.49999 + batch 60: y = 0.589207 5.85566 3.49999 + batch 70: y = 0.589207 5.85566 3.49999 + batch 80: y = 0.589207 5.85566 3.49999 + batch 90: y = 0.589207 5.85566 3.49999 +At t = 7 + batch 0: y = 4.75675 0.735405 3.49992 + batch 10: y = 4.75675 0.735405 3.49992 + batch 20: y = 4.75675 0.735405 3.49992 + batch 30: y = 4.75675 0.735405 3.49992 + batch 40: y = 4.75675 0.735405 3.49992 + batch 50: y = 4.75675 0.735405 3.49992 + batch 60: y = 4.75675 0.735405 3.49992 + batch 70: y = 4.75675 0.735405 3.49992 + batch 80: y = 4.75675 0.735405 3.49992 + batch 90: y = 4.75675 0.735405 3.49992 +At t = 8 + batch 0: y = 1.81355 1.57573 3.49997 + batch 10: y = 1.81355 1.57573 3.49997 + batch 20: y = 1.81355 1.57573 3.49997 + batch 30: y = 1.81355 1.57573 3.49997 + batch 40: y = 1.81355 1.57573 3.49997 + batch 50: y = 1.81355 1.57573 3.49997 + batch 60: y = 1.81355 1.57573 3.49997 + batch 70: y = 1.81355 1.57573 3.49997 + batch 80: y = 1.81355 1.57573 3.49997 + batch 90: y = 1.81355 1.57573 3.49997 +At t = 9 + batch 0: y = 0.527935 2.80731 3.49999 + batch 10: y = 0.527935 2.80731 3.49999 + batch 20: y = 0.527935 2.80731 3.49999 + batch 30: y = 0.527935 2.80731 3.49999 + batch 40: y = 0.527935 2.80731 3.49999 + batch 50: y = 0.527935 2.80731 3.49999 + batch 60: y = 0.527935 2.80731 3.49999 + batch 70: y = 0.527935 2.80731 3.49999 + batch 80: y = 0.527935 2.80731 3.49999 + batch 90: y = 0.527935 2.80731 3.49999 +At t = 10 + batch 0: y = 0.305602 3.65734 3.49999 + batch 10: y = 0.305602 3.65734 3.49999 + batch 20: y = 0.305602 3.65734 3.49999 + batch 30: y = 0.305602 3.65734 3.49999 + batch 40: y = 0.305602 3.65734 3.49999 + batch 50: y = 0.305602 3.65734 3.49999 + batch 60: y = 0.305602 3.65734 3.49999 + batch 70: y = 0.305602 3.65734 3.49999 + batch 80: y = 0.305602 3.65734 3.49999 + batch 90: y = 0.305602 3.65734 3.49999 + +Final Statistics: + Steps = 344 + RHS evals = 464 + LS setups = 59 + Jac evals = 7 + NLS iters = 461 + NLS fails = 1 + Error test fails = 20 diff --git a/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.SERIAL.out b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.SERIAL.out new file mode 100644 index 0000000000..6cabd0d57d --- /dev/null +++ b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.SERIAL.out @@ -0,0 +1,137 @@ + +Batch of independent 3-species kinetics problems + number of batches = 100 + linear solver = KokkosKernels + test type = 2 + execution space = Serial + +At t = 0 + batch 0: y = 1.2 3.1 3 + batch 10: y = 1.2 3.1 3 + batch 20: y = 1.2 3.1 3 + batch 30: y = 1.2 3.1 3 + batch 40: y = 1.2 3.1 3 + batch 50: y = 1.2 3.1 3 + batch 60: y = 1.2 3.1 3 + batch 70: y = 1.2 3.1 3 + batch 80: y = 1.2 3.1 3 + batch 90: y = 1.2 3.1 3 +At t = 1 + batch 0: y = 1.10389 3.01314 3.49998 + batch 10: y = 1.10389 3.01314 3.49998 + batch 20: y = 1.10389 3.01314 3.49998 + batch 30: y = 1.10389 3.01314 3.49998 + batch 40: y = 1.10389 3.01314 3.49998 + batch 50: y = 1.10389 3.01314 3.49998 + batch 60: y = 1.10389 3.01314 3.49998 + batch 70: y = 1.10389 3.01314 3.49998 + batch 80: y = 1.10389 3.01314 3.49998 + batch 90: y = 1.10389 3.01314 3.49998 +At t = 2 + batch 0: y = 0.688033 3.5213 3.49999 + batch 10: y = 0.688033 3.5213 3.49999 + batch 20: y = 0.688033 3.5213 3.49999 + batch 30: y = 0.688033 3.5213 3.49999 + batch 40: y = 0.688033 3.5213 3.49999 + batch 50: y = 0.688033 3.5213 3.49999 + batch 60: y = 0.688033 3.5213 3.49999 + batch 70: y = 0.688033 3.5213 3.49999 + batch 80: y = 0.688033 3.5213 3.49999 + batch 90: y = 0.688033 3.5213 3.49999 +At t = 3 + batch 0: y = 0.409472 4.27781 3.49999 + batch 10: y = 0.409472 4.27781 3.49999 + batch 20: y = 0.409472 4.27781 3.49999 + batch 30: y = 0.409472 4.27781 3.49999 + batch 40: y = 0.409472 4.27781 3.49999 + batch 50: y = 0.409472 4.27781 3.49999 + batch 60: y = 0.409472 4.27781 3.49999 + batch 70: y = 0.409472 4.27781 3.49999 + batch 80: y = 0.409472 4.27781 3.49999 + batch 90: y = 0.409472 4.27781 3.49999 +At t = 4 + batch 0: y = 0.36788 4.94194 3.49999 + batch 10: y = 0.36788 4.94194 3.49999 + batch 20: y = 0.36788 4.94194 3.49999 + batch 30: y = 0.36788 4.94194 3.49999 + batch 40: y = 0.36788 4.94194 3.49999 + batch 50: y = 0.36788 4.94194 3.49999 + batch 60: y = 0.36788 4.94194 3.49999 + batch 70: y = 0.36788 4.94194 3.49999 + batch 80: y = 0.36788 4.94194 3.49999 + batch 90: y = 0.36788 4.94194 3.49999 +At t = 5 + batch 0: y = 0.413842 5.51057 3.49999 + batch 10: y = 0.413842 5.51057 3.49999 + batch 20: y = 0.413842 5.51057 3.49999 + batch 30: y = 0.413842 5.51057 3.49999 + batch 40: y = 0.413842 5.51057 3.49999 + batch 50: y = 0.413842 5.51057 3.49999 + batch 60: y = 0.413842 5.51057 3.49999 + batch 70: y = 0.413842 5.51057 3.49999 + batch 80: y = 0.413842 5.51057 3.49999 + batch 90: y = 0.413842 5.51057 3.49999 +At t = 6 + batch 0: y = 0.589207 5.85566 3.49999 + batch 10: y = 0.589207 5.85566 3.49999 + batch 20: y = 0.589207 5.85566 3.49999 + batch 30: y = 0.589207 5.85566 3.49999 + batch 40: y = 0.589207 5.85566 3.49999 + batch 50: y = 0.589207 5.85566 3.49999 + batch 60: y = 0.589207 5.85566 3.49999 + batch 70: y = 0.589207 5.85566 3.49999 + batch 80: y = 0.589207 5.85566 3.49999 + batch 90: y = 0.589207 5.85566 3.49999 +At t = 7 + batch 0: y = 4.75675 0.735405 3.49992 + batch 10: y = 4.75675 0.735405 3.49992 + batch 20: y = 4.75675 0.735405 3.49992 + batch 30: y = 4.75675 0.735405 3.49992 + batch 40: y = 4.75675 0.735405 3.49992 + batch 50: y = 4.75675 0.735405 3.49992 + batch 60: y = 4.75675 0.735405 3.49992 + batch 70: y = 4.75675 0.735405 3.49992 + batch 80: y = 4.75675 0.735405 3.49992 + batch 90: y = 4.75675 0.735405 3.49992 +At t = 8 + batch 0: y = 1.81355 1.57573 3.49997 + batch 10: y = 1.81355 1.57573 3.49997 + batch 20: y = 1.81355 1.57573 3.49997 + batch 30: y = 1.81355 1.57573 3.49997 + batch 40: y = 1.81355 1.57573 3.49997 + batch 50: y = 1.81355 1.57573 3.49997 + batch 60: y = 1.81355 1.57573 3.49997 + batch 70: y = 1.81355 1.57573 3.49997 + batch 80: y = 1.81355 1.57573 3.49997 + batch 90: y = 1.81355 1.57573 3.49997 +At t = 9 + batch 0: y = 0.527935 2.80731 3.49999 + batch 10: y = 0.527935 2.80731 3.49999 + batch 20: y = 0.527935 2.80731 3.49999 + batch 30: y = 0.527935 2.80731 3.49999 + batch 40: y = 0.527935 2.80731 3.49999 + batch 50: y = 0.527935 2.80731 3.49999 + batch 60: y = 0.527935 2.80731 3.49999 + batch 70: y = 0.527935 2.80731 3.49999 + batch 80: y = 0.527935 2.80731 3.49999 + batch 90: y = 0.527935 2.80731 3.49999 +At t = 10 + batch 0: y = 0.305602 3.65734 3.49999 + batch 10: y = 0.305602 3.65734 3.49999 + batch 20: y = 0.305602 3.65734 3.49999 + batch 30: y = 0.305602 3.65734 3.49999 + batch 40: y = 0.305602 3.65734 3.49999 + batch 50: y = 0.305602 3.65734 3.49999 + batch 60: y = 0.305602 3.65734 3.49999 + batch 70: y = 0.305602 3.65734 3.49999 + batch 80: y = 0.305602 3.65734 3.49999 + batch 90: y = 0.305602 3.65734 3.49999 + +Final Statistics: + Steps = 344 + RHS evals = 464 + LS setups = 59 + Jac evals = 7 + NLS iters = 461 + NLS fails = 1 + Error test fails = 20 diff --git a/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.cpp b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.cpp new file mode 100644 index 0000000000..58a136a74f --- /dev/null +++ b/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.cpp @@ -0,0 +1,425 @@ +/* ----------------------------------------------------------------------------- + * Programmer(s): Daniel R. Reynolds @ SMU + * David J. Gardner and Cody J. Balos @ LLNL + * ----------------------------------------------------------------------------- + * SUNDIALS Copyright Start + * Copyright (c) 2002-2023, Lawrence Livermore National Security + * and Southern Methodist University. + * All rights reserved. + * + * See the top-level LICENSE and NOTICE files for details. + * + * SPDX-License-Identifier: BSD-3-Clause + * SUNDIALS Copyright End + * ----------------------------------------------------------------------------- + * The following is a simple example problem based off of ark_brusselator.c. + * + * We simulate a scenario where a set of independent ODEs are batched together + * to form a larger system. Each independent ODE system has 3 components, + * Y = [u, v, w], satisfying the equations, + * + * du/dt = a - (w + 1) * u + v * u^2 + * dv/dt = w * u - v * u^2 + * dw/dt = (b - w) / ep - w * u + * + * for t in the interval [0, 10], with initial conditions Y0 = [u0, v0, w0]. + * The problem is stiff and there are 3 testing scenarios: + * + * Reactor 0: u0 = 3.9, v0 = 1.1, w0 = 2.8, a = 1.2, b = 2.5, ep = 1.0e-5 + * Here, all three components exhibit a rapid transient change during the + * first 0.2 time units, followed by a slow and smooth evolution. + * + * Reactor 1: u0 = 3, v0 = 3, w0 = 3.5, a = 0.5, b = 3, ep = 5.0e-4 + * Here, all components undergo very rapid initial transients during the first + * 0.3 time units, and all then proceed very smoothly for the remainder of the + * simulation. + * + * Reactor 2: u0 = 1.2, v0 = 3.1, w0 = 3, a = 1, b = 3.5, ep = 5.0e-6 + * Here, w experiences a fast initial transient, jumping 0.5 within a few + * steps. All values proceed smoothly until around t=6.5, when both u and v + * undergo a sharp transition, with u increasing from around 0.5 to 5 and v + * decreasing from around 6 to 1 in less than 0.5 time units. After this + * transition, both u and v continue to evolve somewhat rapidly for another + * 1.4 time units, and finish off smoothly. + * + * This program solves the problem with the BDF method, Newton iteration, a + * user-supplied Jacobian routine, and, since the grouping of the independent + * systems results in a block diagonal linear system, the dense KOKKOS + * SUNLinearSolver which supports batched systems. 100 outputs are printed at + * equal intervals, and run statistics are printed at the end. + * + * Unlike the example cv_bruss_batched_kokkos.cpp, this example utilizes Kokkos' + * multi-dimensional view functionality to consider a 2D grouping, y(i,j), where + * i corresponds with the batch index, and j corresponds to the component (u,v,w). + * + * The program takes three optional arguments, the number of independent ODE + * systems (i.e., number of batches), the linear solver type (KOKKOS batched LU + * or non-batched GMRES with the Jacobian computed by difference quotients) + * the test type (uniform_0, uniform_1, or uniform_2). + * + * ./cv_bruss_batched_kokkos [num_batches] [solver_type] [test_type] + * + * Options: + * num_batches + * solver_type: + * 0 - KOKKOS batched LU (default) + * 1 - SUNDIALS non-batched GMRES with difference quotients Jacobian + * test_type: + * 0 - uniform_0, all batches are Reactor 0 + * 1 - uniform 1, all batches are Reactor 1 + * 2 - uniform 2, all batches are Reactor 2 (default) + * ---------------------------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +// Common utility functions +#include + +// Execution space +#if defined(USE_CUDA) +using ExecSpace = Kokkos::Cuda; +using MemSpace = Kokkos::CudaSpace; +#elif defined(USE_HIP) +#if KOKKOS_VERSION / 10000 > 3 +using ExecSpace = Kokkos::HIP; +using MemSpace = Kokkos::HIPSpace; +#else +using ExecSpace = Kokkos::Experimental::HIP; +using MemSpace = Kokkos::Experimental::HIPSpace; +#endif +#elif defined(USE_OPENMP) +using ExecSpace = Kokkos::OpenMP; +using MemSpace = Kokkos::HostSpace; +#else +using ExecSpace = Kokkos::Serial; +using MemSpace = Kokkos::HostSpace; +#endif + +using Vec1D = Kokkos::View; +using Vec2D = Kokkos::View; +using Vec2DHost = Vec2D::HostMirror; +using VecType = sundials::kokkos::Vector; +using MatType = sundials::kokkos::DenseMatrix; +using LSType = sundials::kokkos::DenseLinearSolver; +using SizeType = VecType::size_type; + +// Constants +#define ZERO SUN_RCONST(0.0) +#define ONE SUN_RCONST(1.0) +#define TWO SUN_RCONST(2.0) + +// User-supplied functions called by CVODE +static int f(sunrealtype t, N_Vector y, N_Vector ydot, void* user_data); + +static int Jac(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, + void* user_data, N_Vector tmp1, N_Vector tmp2, N_Vector tmp3); + +// User data structure available in user-supplied callback functions +struct UserData +{ + int nbatches = 100; // number of chemical networks + int batchSize = 3; // size of each network + sunrealtype a, b; // chemical concentrations that are constant + sunrealtype ep; // stiffness parameter +}; + +/* ----------------------------------------------------------------------------- + * Main Program + * ---------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + // Create the SUNDIALS context + sundials::Context sunctx; + + Kokkos::initialize(argc, argv); + { + // Create UserData + UserData udata; + + // Parse command line options + int argi = 0; + + // Total number of batch systems + if (argc > 1) udata.nbatches = atoi(argv[++argi]); + + // Linear solver type + int solver_type = 0; + if (argc > 2) solver_type = atoi(argv[++argi]); + + // Problem setup + int test_type = 2; + if (argc > 3) test_type = atoi(argv[++argi]); + + // Shortcuts + int nbatches = udata.nbatches; + int batchSize = udata.batchSize; + + std::cout << "\nBatch of independent 3-species kinetics problems\n" + << " number of batches = " << nbatches << "\n" + << " linear solver = " + << (solver_type ? "GMRES" : "KokkosKernels") << "\n" + << " test type = " << test_type << "\n" + << " execution space = " << ExecSpace().name() << "\n\n"; + + sunrealtype u0, v0, w0; + if (test_type == 0) + { + u0 = SUN_RCONST(3.9); + v0 = SUN_RCONST(1.1); + w0 = SUN_RCONST(2.8); + + udata.a = SUN_RCONST(1.2); + udata.b = SUN_RCONST(2.5); + udata.ep = SUN_RCONST(1.0e-5); + } + else if (test_type == 1) + { + u0 = SUN_RCONST(3.0); + v0 = SUN_RCONST(3.0); + w0 = SUN_RCONST(3.5); + + udata.a = SUN_RCONST(0.5); + udata.b = SUN_RCONST(3.0); + udata.ep = SUN_RCONST(5.0e-4); + } + else if (test_type == 2) + { + u0 = SUN_RCONST(1.2); + v0 = SUN_RCONST(3.1); + w0 = SUN_RCONST(3.0); + + udata.a = SUN_RCONST(1.0); + udata.b = SUN_RCONST(3.5); + udata.ep = SUN_RCONST(5.0e-6); + } + else + { + std::cerr << "ERROR: Invalid test type option\n"; + return -1; + } + + // Create vector with the initial condition + const sunrealtype T0 = SUN_RCONST(0.0); + + SizeType length{static_cast(batchSize * nbatches)}; + VecType y{length, sunctx}; + Vec2D y2d((y.View()).data(), nbatches, batchSize); + + Kokkos::parallel_for( + "fill_y", Kokkos::RangePolicy(0, nbatches), + KOKKOS_LAMBDA(const SizeType i) { + y2d(i,0) = u0; + y2d(i,1) = v0; + y2d(i,2) = w0; + }); + + // Create vector of absolute tolerances + VecType abstol{length, sunctx}; + N_VConst(SUN_RCONST(1.0e-10), abstol); + + // Create CVODE using Backward Differentiation Formula methods + void* cvode_mem = CVodeCreate(CV_BDF, sunctx); + if (check_ptr(cvode_mem, "CVodeCreate")) { return 1; } + + // Initialize the integrator and set the ODE right-hand side function + int retval = CVodeInit(cvode_mem, f, T0, y); + if (check_flag(retval, "CVodeInit")) { return 1; } + + // Attach the user data structure + retval = CVodeSetUserData(cvode_mem, &udata); + if (check_flag(retval, "CVodeSetUserData")) { return 1; } + + // Specify the scalar relative tolerance and vector absolute tolerances + retval = CVodeSVtolerances(cvode_mem, SUN_RCONST(1.0e-6), abstol); + if (check_flag(retval, "CVodeSVtolerances")) { return 1; } + + // Create the matrix and linear solver objects + std::unique_ptr> A; + std::unique_ptr> LS; + + if (solver_type == 0) + { + // Create Kokkos dense block diagonal matrix + A = std::make_unique(nbatches, batchSize, batchSize, sunctx); + + // Create Kokkos batched dense linear solver + LS = std::make_unique(sunctx); + + // Attach the matrix and linear solver to CVODE + retval = CVodeSetLinearSolver(cvode_mem, LS->Convert(), A->Convert()); + if (check_flag(retval, "CVodeSetLinearSolver")) return 1; + + // Set the user-supplied Jacobian function + retval = CVodeSetJacFn(cvode_mem, Jac); + if (check_flag(retval, "CVodeSetJacFn")) return 1; + } + else + { + // Create matrix-free GMRES linear solver + LS = std::make_unique( + SUNLinSol_SPGMR(y, SUN_PREC_NONE, 0, sunctx)); + + // Attach the linear solver to CVODE + retval = CVodeSetLinearSolver(cvode_mem, LS->Convert(), nullptr); + if (check_flag(retval, "CVodeSetLinearSolver")) return 1; + } + + // Final time and time between outputs + const sunrealtype Tf = SUN_RCONST(10.0); + const sunrealtype dTout = SUN_RCONST(1.0); + + // Number of output times + const int Nt = static_cast(ceil(Tf / dTout)); + + // Current time and first output time + sunrealtype t = T0; + sunrealtype tout = T0 + dTout; + + // Initial output + Vec2DHost y2d_h((y.HostView()).data(), nbatches, batchSize); + sundials::kokkos::CopyFromDevice(y); + Kokkos::fence(); + std::cout << "At t = " << t << std::endl; + for (int j = 0; j < nbatches; j += 10) + { + std::cout << " batch " << j << ": y = " << y2d_h(j,0) << " " + << y2d_h(j,1) << " " << y2d_h(j,2) << std::endl; + } + + // Loop over output times + for (int iout = 0; iout < Nt; iout++) + { + // Advance in time + retval = CVode(cvode_mem, tout, y, &t, CV_NORMAL); + if (check_flag(retval, "CVode")) break; + + // Output solution from some batches + sundials::kokkos::CopyFromDevice(y); + Kokkos::fence(); + std::cout << "At t = " << t << std::endl; + for (int j = 0; j < nbatches; j += 10) + { + std::cout << " batch " << j << ": y = " << y2d_h(j,0) << " " + << y2d_h(j,1) << " " << y2d_h(j,2) << std::endl; + } + + tout += dTout; + tout = (tout > Tf) ? Tf : tout; + } + + // Print some final statistics + long int nst, nfe, nsetups, nje, nni, ncfn, netf; + + retval = CVodeGetNumSteps(cvode_mem, &nst); + check_flag(retval, "CVodeGetNumSteps"); + retval = CVodeGetNumRhsEvals(cvode_mem, &nfe); + check_flag(retval, "CVodeGetNumRhsEvals"); + retval = CVodeGetNumLinSolvSetups(cvode_mem, &nsetups); + check_flag(retval, "CVodeGetNumLinSolvSetups"); + retval = CVodeGetNumErrTestFails(cvode_mem, &netf); + check_flag(retval, "CVodeGetNumErrTestFails"); + retval = CVodeGetNumNonlinSolvIters(cvode_mem, &nni); + check_flag(retval, "CVodeGetNumNonlinSolvIters"); + retval = CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn); + check_flag(retval, "CVodeGetNumNonlinSolvConvFails"); + retval = CVodeGetNumJacEvals(cvode_mem, &nje); + check_flag(retval, "CVodeGetNumJacEvals"); + + std::cout << "\nFinal Statistics:\n" + << " Steps = " << nst << "\n" + << " RHS evals = " << nfe << "\n" + << " LS setups = " << nsetups << "\n" + << " Jac evals = " << nje << "\n" + << " NLS iters = " << nni << "\n" + << " NLS fails = " << ncfn << "\n" + << " Error test fails = " << netf << "\n"; + + // Free objects + CVodeFree(&cvode_mem); + } + Kokkos::finalize(); + + return 0; +} + +/* ----------------------------------------------------------------------------- + * User-supplied functions called by CVODE + * ---------------------------------------------------------------------------*/ + +// Right hand side function dy/dt = f(t,y) +int f(sunrealtype t, N_Vector y, N_Vector ydot, void* user_data) +{ + auto udata = static_cast(user_data); + + const auto nbatches = udata->nbatches; + const auto batchSize = udata->batchSize; + + const auto a = udata->a; + const auto b = udata->b; + const auto ep = udata->ep; + + Vec2D y2d(N_VGetDeviceArrayPointer(y), nbatches, batchSize); + Vec2D ydot2d(N_VGetDeviceArrayPointer(ydot), nbatches, batchSize); + + Kokkos::parallel_for( + "RHS", Kokkos::RangePolicy(0, nbatches), + KOKKOS_LAMBDA(const SizeType i) { + auto u = y2d(i,0); + auto v = y2d(i,1); + auto w = y2d(i,2); + ydot2d(i,0) = a - (w + ONE) * u + v * u * u; + ydot2d(i,1) = w * u - v * u * u; + ydot2d(i,2) = (b - w) / ep - w * u; + }); + + return 0; +} + +// Jacobian of f(t,y) +int Jac(sunrealtype t, N_Vector y, N_Vector fy, SUNMatrix J, void* user_data, + N_Vector tmp1, N_Vector tmp2, N_Vector tmp3) +{ + auto udata = static_cast(user_data); + auto y_data = sundials::kokkos::GetVec(y)->View(); + auto J_data = sundials::kokkos::GetDenseMat(J)->View(); + + const auto nbatches = udata->nbatches; + const auto batchSize = udata->batchSize; + + const auto ep = udata->ep; + Vec2D y2d(N_VGetDeviceArrayPointer(y), nbatches, batchSize); + + Kokkos::parallel_for( + "Jac", Kokkos::RangePolicy(0, nbatches), + KOKKOS_LAMBDA(const SizeType i) { + // get y values + auto u = y2d(i,0); + auto v = y2d(i,1); + auto w = y2d(i,2); + + // first col of block + J_data(i, 0, 0) = -(w + ONE) + TWO * u * v; + J_data(i, 1, 0) = u * u; + J_data(i, 2, 0) = -u; + + // second col of block + J_data(i, 0, 1) = u * u; + J_data(i, 1, 1) = -u * u; + J_data(i, 2, 1) = u; + + // third col of block + J_data(i, 0, 2) = -w; + J_data(i, 1, 2) = ZERO; + J_data(i, 2, 2) = -ONE / ep - u; + }); + + return 0; +} diff --git a/examples/nvector/sycl/test_nvector_sycl.cpp b/examples/nvector/sycl/test_nvector_sycl.cpp index 4ce143451c..b3b6f5c8dc 100644 --- a/examples/nvector/sycl/test_nvector_sycl.cpp +++ b/examples/nvector/sycl/test_nvector_sycl.cpp @@ -69,7 +69,7 @@ int main(int argc, char *argv[]) SetTiming(print_timing, 0); /* Create an in-order GPU queue */ -#if SYCL_LANGUAGE_VERSION >= 2020 +#if SYCL_LANGUAGE_VERSION >= 2020 && !defined(SUNDIALS_SYCL_2020_UNSUPPORTED) sycl::queue myQueue(sycl::gpu_selector_v, sycl::property_list{sycl::property::queue::in_order{}}); #else diff --git a/examples/sunlinsol/onemkldense/test_sunlinsol_onemkldense.cpp b/examples/sunlinsol/onemkldense/test_sunlinsol_onemkldense.cpp index 6c63f71e6e..c3adc0a08e 100644 --- a/examples/sunlinsol/onemkldense/test_sunlinsol_onemkldense.cpp +++ b/examples/sunlinsol/onemkldense/test_sunlinsol_onemkldense.cpp @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) (long int) cols, (long int) nblocks); // Create an in-order GPU queue -#if SYCL_LANGUAGE_VERSION >= 2020 +#if SYCL_LANGUAGE_VERSION >= 2020 && !defined(SUNDIALS_SYCL_2020_UNSUPPORTED) sycl::queue myQueue(sycl::gpu_selector_v, sycl::property_list{sycl::property::queue::in_order{}}); #else diff --git a/examples/sunmatrix/onemkldense/test_sunmatrix_onemkldense.cpp b/examples/sunmatrix/onemkldense/test_sunmatrix_onemkldense.cpp index 75d11de689..615e1ee9dd 100644 --- a/examples/sunmatrix/onemkldense/test_sunmatrix_onemkldense.cpp +++ b/examples/sunmatrix/onemkldense/test_sunmatrix_onemkldense.cpp @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) (long int) matrows, (long int) matcols); // Create an in-order GPU queue -#if SYCL_LANGUAGE_VERSION >= 2020 +#if SYCL_LANGUAGE_VERSION >= 2020 && !defined(SUNDIALS_SYCL_2020_UNSUPPORTED) sycl::queue myQueue(sycl::gpu_selector_v, sycl::property_list{sycl::property::queue::in_order{}}); #else diff --git a/include/arkode/arkode_arkstep.h b/include/arkode/arkode_arkstep.h index 7d624635c5..d293f7611c 100644 --- a/include/arkode/arkode_arkstep.h +++ b/include/arkode/arkode_arkstep.h @@ -50,9 +50,11 @@ static const int ARKSTEP_DEFAULT_DIRK_4 = ARKODE_SDIRK_5_3_4; static const int ARKSTEP_DEFAULT_DIRK_5 = ARKODE_ARK548L2SA_DIRK_8_4_5; /* ImEx */ +static const int ARKSTEP_DEFAULT_ARK_ETABLE_2 = ARKODE_ARK2_ERK_3_1_2; static const int ARKSTEP_DEFAULT_ARK_ETABLE_3 = ARKODE_ARK324L2SA_ERK_4_2_3; static const int ARKSTEP_DEFAULT_ARK_ETABLE_4 = ARKODE_ARK436L2SA_ERK_6_3_4; static const int ARKSTEP_DEFAULT_ARK_ETABLE_5 = ARKODE_ARK548L2SA_ERK_8_4_5; +static const int ARKSTEP_DEFAULT_ARK_ITABLE_2 = ARKODE_ARK2_DIRK_3_1_2; static const int ARKSTEP_DEFAULT_ARK_ITABLE_3 = ARKODE_ARK324L2SA_DIRK_4_2_3; static const int ARKSTEP_DEFAULT_ARK_ITABLE_4 = ARKODE_ARK436L2SA_DIRK_6_3_4; static const int ARKSTEP_DEFAULT_ARK_ITABLE_5 = ARKODE_ARK548L2SA_DIRK_8_4_5; @@ -238,6 +240,8 @@ SUNDIALS_EXPORT int ARKStepSetMinStep(void *arkode_mem, realtype hmin); SUNDIALS_EXPORT int ARKStepSetMaxStep(void *arkode_mem, realtype hmax); +SUNDIALS_EXPORT int ARKStepSetInterpolateStopTime(void *arkode_mem, + booleantype interp); SUNDIALS_EXPORT int ARKStepSetStopTime(void *arkode_mem, realtype tstop); SUNDIALS_EXPORT int ARKStepClearStopTime(void *arkode_mem); diff --git a/include/arkode/arkode_butcher_dirk.h b/include/arkode/arkode_butcher_dirk.h index 76f1d1cb7d..aecaf16a82 100644 --- a/include/arkode/arkode_butcher_dirk.h +++ b/include/arkode/arkode_butcher_dirk.h @@ -92,7 +92,8 @@ typedef enum { ARKODE_ESDIRK437L2SA_7_3_4, ARKODE_ESDIRK547L2SA_7_4_5, ARKODE_ESDIRK547L2SA2_7_4_5, - ARKODE_MAX_DIRK_NUM = ARKODE_ESDIRK547L2SA2_7_4_5 + ARKODE_ARK2_DIRK_3_1_2, + ARKODE_MAX_DIRK_NUM = ARKODE_ARK2_DIRK_3_1_2 } ARKODE_DIRKTableID; /* Accessor routine to load built-in DIRK table */ diff --git a/include/arkode/arkode_butcher_erk.h b/include/arkode/arkode_butcher_erk.h index acd1d613fb..6673acb119 100644 --- a/include/arkode/arkode_butcher_erk.h +++ b/include/arkode/arkode_butcher_erk.h @@ -84,7 +84,8 @@ typedef enum { ARKODE_KNOTH_WOLKE_3_3, ARKODE_ARK437L2SA_ERK_7_3_4, ARKODE_ARK548L2SAb_ERK_8_4_5, - ARKODE_MAX_ERK_NUM = ARKODE_ARK548L2SAb_ERK_8_4_5 + ARKODE_ARK2_ERK_3_1_2, + ARKODE_MAX_ERK_NUM = ARKODE_ARK2_ERK_3_1_2 } ARKODE_ERKTableID; /* Accessor routine to load built-in ERK table */ diff --git a/include/arkode/arkode_erkstep.h b/include/arkode/arkode_erkstep.h index 7c877a4de3..a9f6f2d13e 100644 --- a/include/arkode/arkode_erkstep.h +++ b/include/arkode/arkode_erkstep.h @@ -153,6 +153,8 @@ SUNDIALS_EXPORT int ERKStepSetMinStep(void *arkode_mem, realtype hmin); SUNDIALS_EXPORT int ERKStepSetMaxStep(void *arkode_mem, realtype hmax); +SUNDIALS_EXPORT int ERKStepSetInterpolateStopTime(void *arkode_mem, + booleantype interp); SUNDIALS_EXPORT int ERKStepSetStopTime(void *arkode_mem, realtype tstop); SUNDIALS_EXPORT int ERKStepClearStopTime(void *arkode_mem); diff --git a/include/arkode/arkode_mristep.h b/include/arkode/arkode_mristep.h index 6d91f74dc8..d4d889813f 100644 --- a/include/arkode/arkode_mristep.h +++ b/include/arkode/arkode_mristep.h @@ -253,6 +253,8 @@ SUNDIALS_EXPORT int MRIStepSetMaxHnilWarns(void *arkode_mem, int mxhnil); SUNDIALS_EXPORT int MRIStepSetStopTime(void *arkode_mem, realtype tstop); +SUNDIALS_EXPORT int MRIStepSetInterpolateStopTime(void *arkode_mem, + booleantype interp); SUNDIALS_EXPORT int MRIStepClearStopTime(void *arkode_mem); SUNDIALS_EXPORT int MRIStepSetFixedStep(void *arkode_mem, realtype hsfixed); diff --git a/include/cvode/cvode.h b/include/cvode/cvode.h index db21479fe8..35647f983e 100644 --- a/include/cvode/cvode.h +++ b/include/cvode/cvode.h @@ -147,6 +147,7 @@ SUNDIALS_EXPORT int CVodeSetNonlinConvCoef(void *cvode_mem, realtype nlscoef); SUNDIALS_EXPORT int CVodeSetNonlinearSolver(void *cvode_mem, SUNNonlinearSolver NLS); SUNDIALS_EXPORT int CVodeSetStabLimDet(void *cvode_mem, booleantype stldet); SUNDIALS_EXPORT int CVodeSetStopTime(void *cvode_mem, realtype tstop); +SUNDIALS_EXPORT int CVodeSetInterpolateStopTime(void *cvode_mem, booleantype interp); SUNDIALS_EXPORT int CVodeClearStopTime(void *cvode_mem); SUNDIALS_EXPORT int CVodeSetUseIntegratorFusedKernels(void *cvode_mem, booleantype onoff); SUNDIALS_EXPORT int CVodeSetUserData(void *cvode_mem, void *user_data); diff --git a/include/cvodes/cvodes.h b/include/cvodes/cvodes.h index 3dbdad7900..b1a7892377 100644 --- a/include/cvodes/cvodes.h +++ b/include/cvodes/cvodes.h @@ -222,6 +222,7 @@ SUNDIALS_EXPORT int CVodeSetNonlinConvCoef(void *cvode_mem, realtype nlscoef); SUNDIALS_EXPORT int CVodeSetNonlinearSolver(void *cvode_mem, SUNNonlinearSolver NLS); SUNDIALS_EXPORT int CVodeSetStabLimDet(void *cvode_mem, booleantype stldet); SUNDIALS_EXPORT int CVodeSetStopTime(void *cvode_mem, realtype tstop); +SUNDIALS_EXPORT int CVodeSetInterpolateStopTime(void *cvode_mem, booleantype interp); SUNDIALS_EXPORT int CVodeClearStopTime(void *cvode_mem); SUNDIALS_EXPORT int CVodeSetUserData(void *cvode_mem, void *user_data); diff --git a/include/nvector/nvector_kokkos.hpp b/include/nvector/nvector_kokkos.hpp index 1b424a7d44..269d15dcef 100644 --- a/include/nvector/nvector_kokkos.hpp +++ b/include/nvector/nvector_kokkos.hpp @@ -650,6 +650,20 @@ void CopyFromDevice(VectorType& v) Kokkos::deep_copy(v.HostView(), v.View()); } +template +view_type GetView(N_Vector v) +{ + auto vec{GetVec(v)}; + return vec->View(); +} + +template +host_view_type GetHostView(N_Vector v) +{ + auto vec{GetVec(v)}; + return vec->HostView(); +} + } // namespace kokkos } // namespace sundials diff --git a/include/sundials/sundials_config.in b/include/sundials/sundials_config.in index 545c1aebe8..f3f66ff89c 100644 --- a/include/sundials/sundials_config.in +++ b/include/sundials/sundials_config.in @@ -130,12 +130,16 @@ */ #cmakedefine01 SUNDIALS_MPI_ENABLED - /* SUPERLUMT threading type */ -#cmakedefine SUNDIALS_SUPERLUMT_THREAD_TYPE "@SUPERLUMT_THREAD_TYPE@" +/* oneMKL interface options */ +#cmakedefine SUNDIALS_ONEMKL_USE_GETRF_LOOP +#cmakedefine SUNDIALS_ONEMKL_USE_GETRS_LOOP - /* Trilinos with MPI is available, then - * #define SUNDIALS_TRILINOS_HAVE_MPI - */ +/* SUPERLUMT threading type */ +#define SUNDIALS_SUPERLUMT_THREAD_TYPE "@SUPERLUMT_THREAD_TYPE@" + +/* Trilinos with MPI is available, then + * #define SUNDIALS_TRILINOS_HAVE_MPI + */ #cmakedefine SUNDIALS_TRILINOS_HAVE_MPI /* RAJA backends */ @@ -143,6 +147,10 @@ #cmakedefine SUNDIALS_RAJA_BACKENDS_HIP #cmakedefine SUNDIALS_RAJA_BACKENDS_SYCL +/* SYCL options */ +#cmakedefine SUNDIALS_SYCL_2020_UNSUPPORTED + + /* ------------------------------------------------------------------ * SUNDIALS modules enabled * -----------------------------------------------------------------*/ diff --git a/scripts/cvode b/scripts/cvode index 57261482ba..af97c77f74 100755 --- a/scripts/cvode +++ b/scripts/cvode @@ -221,6 +221,10 @@ $tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos.cpp $tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos.CUDA.out $tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos.OPENMP.out $tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos.SERIAL.out +$tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.cpp +$tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.CUDA.out +$tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.OPENMP.out +$tar $tarfile $distrobase/examples/cvode/kokkos/cv_bruss_batched_kokkos_2D.SERIAL.out $tar $tarfile $distrobase/examples/cvode/magma/README $tar $tarfile $distrobase/examples/cvode/magma/CMakeLists.txt diff --git a/src/arkode/arkode.c b/src/arkode/arkode.c index 9b6f7e9e5b..744abfed2a 100644 --- a/src/arkode/arkode.c +++ b/src/arkode/arkode.c @@ -962,7 +962,11 @@ int arkEvolve(ARKodeMem ark_mem, realtype tout, N_Vector yout, troundoff = FUZZ_FACTOR*ark_mem->uround * (SUNRabs(ark_mem->tcur) + SUNRabs(ark_mem->h)); if ( SUNRabs(ark_mem->tcur - ark_mem->tstop) <= troundoff) { - (void) arkGetDky(ark_mem, ark_mem->tstop, 0, yout); + if (ark_mem->tstopinterp) { + (void) arkGetDky(ark_mem, ark_mem->tstop, 0, yout); + } else { + N_VScale(ONE, ark_mem->yn, yout); + } ark_mem->tretlast = *tret = ark_mem->tstop; ark_mem->tstopset = SUNFALSE; istate = ARK_TSTOP_RETURN; @@ -1356,6 +1360,7 @@ void arkPrintMem(ARKodeMem ark_mem, FILE *outfile) fprintf(outfile, "liw = %li\n", (long int) ark_mem->liw); fprintf(outfile, "user_efun = %i\n", ark_mem->user_efun); fprintf(outfile, "tstopset = %i\n", ark_mem->tstopset); + fprintf(outfile, "tstopinterp = %i\n", ark_mem->tstopinterp); fprintf(outfile, "tstop = %" RSYM"\n", ark_mem->tstop); fprintf(outfile, "report = %i\n", ark_mem->report); fprintf(outfile, "VabstolMallocDone = %i\n", ark_mem->VabstolMallocDone); diff --git a/src/arkode/arkode_arkstep.c b/src/arkode/arkode_arkstep.c index ef38649f90..8b5bde3a1b 100644 --- a/src/arkode/arkode_arkstep.c +++ b/src/arkode/arkode_arkstep.c @@ -1924,6 +1924,9 @@ int arkStep_SetButcherTables(ARKodeMem ark_mem) switch (step_mem->q) { case(2): + etable = ARKSTEP_DEFAULT_ARK_ETABLE_2; + itable = ARKSTEP_DEFAULT_ARK_ITABLE_2; + break; case(3): etable = ARKSTEP_DEFAULT_ARK_ETABLE_3; itable = ARKSTEP_DEFAULT_ARK_ITABLE_3; diff --git a/src/arkode/arkode_arkstep_io.c b/src/arkode/arkode_arkstep_io.c index bfa713fda9..93c99e6014 100644 --- a/src/arkode/arkode_arkstep_io.c +++ b/src/arkode/arkode_arkstep_io.c @@ -59,6 +59,9 @@ int ARKStepSetMaxStep(void *arkode_mem, realtype hmax) { return(arkSetMaxStep(arkode_mem, hmax)); } int ARKStepSetStopTime(void *arkode_mem, realtype tstop) { return(arkSetStopTime(arkode_mem, tstop)); } +int ARKStepSetInterpolateStopTime(void *arkode_mem, + booleantype interp) { + return(arkSetInterpolateStopTime(arkode_mem, interp)); } int ARKStepClearStopTime(void *arkode_mem) { return(arkClearStopTime(arkode_mem)); } int ARKStepSetRootDirection(void *arkode_mem, int *rootdir) { diff --git a/src/arkode/arkode_butcher_dirk.c b/src/arkode/arkode_butcher_dirk.c index d1346acdd4..b74a543a52 100644 --- a/src/arkode/arkode_butcher_dirk.c +++ b/src/arkode/arkode_butcher_dirk.c @@ -66,8 +66,8 @@ ARKODE_DIRKTableID arkButcherTableDIRKNameToID(const char *imethod) { #undef ARK_BUTCHER_TABLE arkProcessError(NULL, ARK_ILL_INPUT, "ARKODE", - "arkButcherTableDIRKNameToID", - "Unknown Butcher table"); + "arkButcherTableDIRKNameToID", + "Unknown Butcher table"); return ARKODE_DIRK_NONE; } diff --git a/src/arkode/arkode_butcher_dirk.def b/src/arkode/arkode_butcher_dirk.def index f9e63023ab..a3cca75f66 100644 --- a/src/arkode/arkode_butcher_dirk.def +++ b/src/arkode/arkode_butcher_dirk.def @@ -57,7 +57,8 @@ ARKODE_ARK548L2SAb_DIRK_8_4_5* ESDIRK Y Y N ARKODE_ESDIRK547L2SA_7_4_5 ESDIRK Y Y N ARKODE_ESDIRK547L2SA2_7_4_5 ESDIRK Y Y N - ----------------------------------------------------------------- + ARKODE_ARK2_DIRK_3_1_2 ESDIRK Y Y Y + ----------------------------------------------------------------- */ ARK_BUTCHER_TABLE(ARKODE_DIRK_NONE, { @@ -68,22 +69,56 @@ ARK_BUTCHER_TABLE(ARKODE_SDIRK_2_1_2, { /* SDIRK-2-1 (A,B stable) */ ARKodeButcherTable B = ARKodeButcherTable_Alloc(2, SUNTRUE); B->q = 2; B->p = 1; - + B->A[0][0] = RCONST(1.0); B->A[1][0] = RCONST(-1.0); B->A[1][1] = RCONST(1.0); - + B->b[0] = RCONST(0.5); B->b[1] = RCONST(0.5); - + B->d[0] = RCONST(1.0); - B->c[0] = RCONST(1.0); B->c[1] = RCONST(0.0); return B; }) +ARK_BUTCHER_TABLE(ARKODE_ARK2_DIRK_3_1_2, { /* ARK2 Implicit Table (A,L stable) */ + ARKodeButcherTable B = ARKodeButcherTable_Alloc(3, SUNTRUE); + + /* 1 - 1 / sqrt(2) */ + const sunrealtype gamma = SUN_RCONST(1.0) - SUN_RCONST(1.0) / SUNRsqrt(SUN_RCONST(2.0)); + /* 1 / (2 sqrt(2)) */ + const sunrealtype delta = SUN_RCONST(1.0) / (SUN_RCONST(2.0) * SUNRsqrt(SUN_RCONST(2.0))); + /* 2 - sqrt(2) */ + const sunrealtype twogamma = SUN_RCONST(2.0) - SUNRsqrt(SUN_RCONST(2.0)); + /* (4 - sqrt(2)) / 8 */ + const sunrealtype beta = (SUN_RCONST(4.0) - SUNRsqrt(SUN_RCONST(2.0))) / SUN_RCONST(8.0); + + B->q = 2; + B->p = 1; + + B->A[1][0] = gamma; + B->A[1][1] = gamma; + B->A[2][0] = delta; + B->A[2][1] = delta; + B->A[2][2] = gamma; + + B->b[0] = delta; + B->b[1] = delta; + B->b[2] = gamma; + + B->d[0] = beta; + B->d[1] = beta; + B->d[2] = SUN_RCONST(1.0) / (SUN_RCONST(2.0) * SUNRsqrt(SUN_RCONST(2.0))); + + B->c[1] = twogamma; + B->c[2] = SUN_RCONST(1.0); + + return B; + }) + ARK_BUTCHER_TABLE(ARKODE_BILLINGTON_3_3_2, { /* Billington-SDIRK */ ARKodeButcherTable B = ARKodeButcherTable_Alloc(3, SUNTRUE); @@ -620,7 +655,7 @@ ARK_BUTCHER_TABLE(ARKODE_ESDIRK324L2SA_4_2_3, { /* ESDIRK3(2)4L[2]SA (A,L stable const sunrealtype g4 = g3 * g; const sunrealtype g5 = g4 * g; const sunrealtype c3 = RCONST(0.6); - + ARKodeButcherTable B = ARKodeButcherTable_Alloc(4, SUNTRUE); B->q = 3; B->p = 2; diff --git a/src/arkode/arkode_butcher_erk.def b/src/arkode/arkode_butcher_erk.def index 43a8fe7210..f25fafb830 100644 --- a/src/arkode/arkode_butcher_erk.def +++ b/src/arkode/arkode_butcher_erk.def @@ -52,6 +52,7 @@ ARKODE_ARK548L2SAb_ERK_8_4_5* N ARKODE_VERNER_8_5_6 Y ARKODE_FEHLBERG_13_7_8 Y + ARKODE_ARK2_ERK_3_1_2 Y -------------------------------- ARKODE_KNOTH_WOLKE_3_3^ Y -------------------------------- @@ -75,7 +76,42 @@ ARK_BUTCHER_TABLE(ARKODE_HEUN_EULER_2_1_2, { /* Heun-Euler-ERK */ B->c[1] = RCONST(1.0); return B; - }) + }) + +ARK_BUTCHER_TABLE(ARKODE_ARK2_ERK_3_1_2, { /* ARK2 Explicit Table */ + ARKodeButcherTable B = ARKodeButcherTable_Alloc(3, SUNTRUE); + + /* 1 - 1 / sqrt(2) */ + const sunrealtype gamma = SUN_RCONST(1.0) - SUN_RCONST(1.0) / SUNRsqrt(SUN_RCONST(2.0)); + /* (3 + 2 sqrt(2)) / 6 */ + const sunrealtype alpha = (SUN_RCONST(3.0) + SUN_RCONST(2.0) * SUNRsqrt(SUN_RCONST(2.0))) / SUN_RCONST(6.0); + /* 1 / (2 sqrt(2)) */ + const sunrealtype delta = SUN_RCONST(1.0) / (SUN_RCONST(2.0) * SUNRsqrt(SUN_RCONST(2.0))); + /* 2 - sqrt(2) */ + const sunrealtype twogamma = SUN_RCONST(2.0) - SUNRsqrt(SUN_RCONST(2.0)); + /* (4 - sqrt(2)) / 8 */ + const sunrealtype beta = (SUN_RCONST(4.0) - SUNRsqrt(SUN_RCONST(2.0))) / SUN_RCONST(8.0); + + B->q = 2; + B->p = 1; + + B->A[1][0] = twogamma; + B->A[2][0] = SUN_RCONST(1.0) - alpha; + B->A[2][1] = alpha; + + B->b[0] = delta; + B->b[1] = delta; + B->b[2] = gamma; + + B->d[0] = beta; + B->d[1] = beta; + B->d[2] = SUN_RCONST(1.0) / (SUN_RCONST(2.0) * SUNRsqrt(SUN_RCONST(2.0))); + + B->c[1] = twogamma; + B->c[2] = SUN_RCONST(1.0); + + return B; + }) ARK_BUTCHER_TABLE(ARKODE_BOGACKI_SHAMPINE_4_2_3, { /* Bogacki-Shampine-ERK */ ARKodeButcherTable B = ARKodeButcherTable_Alloc(4, SUNTRUE); @@ -684,4 +720,3 @@ ARK_BUTCHER_TABLE(ARKODE_KNOTH_WOLKE_3_3, { /* Knoth-Wolke-ERK */ B->c[2] = RCONST(3.0)/RCONST(4.0); return B; }) - diff --git a/src/arkode/arkode_erkstep_io.c b/src/arkode/arkode_erkstep_io.c index fbce038dc3..d6cdb2fa35 100644 --- a/src/arkode/arkode_erkstep_io.c +++ b/src/arkode/arkode_erkstep_io.c @@ -59,6 +59,9 @@ int ERKStepSetMaxStep(void *arkode_mem, realtype hmax) { return(arkSetMaxStep(arkode_mem, hmax)); } int ERKStepSetStopTime(void *arkode_mem, realtype tstop) { return(arkSetStopTime(arkode_mem, tstop)); } +int ERKStepSetInterpolateStopTime(void *arkode_mem, + booleantype interp) { + return(arkSetInterpolateStopTime(arkode_mem, interp)); } int ERKStepClearStopTime(void *arkode_mem) { return(arkClearStopTime(arkode_mem)); } int ERKStepSetRootDirection(void *arkode_mem, int *rootdir) { diff --git a/src/arkode/arkode_impl.h b/src/arkode/arkode_impl.h index f099af8444..2b2350d64f 100644 --- a/src/arkode/arkode_impl.h +++ b/src/arkode/arkode_impl.h @@ -336,6 +336,7 @@ typedef struct ARKodeMemRec { /* Tstop information */ booleantype tstopset; + booleantype tstopinterp; realtype tstop; /* Time step data */ @@ -975,6 +976,7 @@ int arkSetInitStep(void *arkode_mem, realtype hin); int arkSetMinStep(void *arkode_mem, realtype hmin); int arkSetMaxStep(void *arkode_mem, realtype hmax); int arkSetStopTime(void *arkode_mem, realtype tstop); +int arkSetInterpolateStopTime(void *arkode_mem, booleantype interp); int arkClearStopTime(void *arkode_mem); int arkSetFixedStep(void *arkode_mem, realtype hfixed); int arkSetRootDirection(void *arkode_mem, int *rootdir); diff --git a/src/arkode/arkode_io.c b/src/arkode/arkode_io.c index aadc4044f4..b4fda7d21c 100644 --- a/src/arkode/arkode_io.c +++ b/src/arkode/arkode_io.c @@ -80,6 +80,7 @@ int arkSetDefaults(void *arkode_mem) ark_mem->hmin = ZERO; /* no minimum step size */ ark_mem->hmax_inv = ZERO; /* no maximum step size */ ark_mem->tstopset = SUNFALSE; /* no stop time set */ + ark_mem->tstopinterp = SUNFALSE; /* copy at stop time */ ark_mem->tstop = ZERO; /* no fixed stop time */ ark_mem->diagfp = NULL; /* no solver diagnostics file */ ark_mem->report = SUNFALSE; /* don't report solver diagnostics */ @@ -518,6 +519,26 @@ int arkSetStopTime(void *arkode_mem, realtype tstop) } +/*--------------------------------------------------------------- + arkSetInterpolateStopTime: + + Specifies to use interpolation to fill the solution output at + the stop time (instead of a copy). + ---------------------------------------------------------------*/ +int arkSetInterpolateStopTime(void *arkode_mem, booleantype interp) +{ + ARKodeMem ark_mem; + if (arkode_mem==NULL) { + arkProcessError(NULL, ARK_MEM_NULL, "ARKODE", + "arkSetInterpolateStopTime", MSG_ARK_NO_MEM); + return (ARK_MEM_NULL); + } + ark_mem = (ARKodeMem) arkode_mem; + ark_mem->tstopinterp = interp; + return(ARK_SUCCESS); +} + + /*--------------------------------------------------------------- arkClearStopTime: diff --git a/src/arkode/arkode_mristep_io.c b/src/arkode/arkode_mristep_io.c index c722cfd26a..5c6ced636e 100644 --- a/src/arkode/arkode_mristep_io.c +++ b/src/arkode/arkode_mristep_io.c @@ -48,6 +48,9 @@ int MRIStepSetMaxHnilWarns(void *arkode_mem, int mxhnil) { return(arkSetMaxHnilWarns(arkode_mem, mxhnil)); } int MRIStepSetStopTime(void *arkode_mem, realtype tstop) { return(arkSetStopTime(arkode_mem, tstop)); } +int MRIStepSetInterpolateStopTime(void *arkode_mem, + booleantype interp) { + return(arkSetInterpolateStopTime(arkode_mem, interp)); } int MRIStepClearStopTime(void *arkode_mem) { return(arkClearStopTime(arkode_mem)); } int MRIStepSetRootDirection(void *arkode_mem, int *rootdir) { diff --git a/src/cvode/cvode.c b/src/cvode/cvode.c index 609b55a52f..e2a56c3850 100644 --- a/src/cvode/cvode.c +++ b/src/cvode/cvode.c @@ -323,6 +323,7 @@ void *CVodeCreate(int lmm, SUNContext sunctx) cv_mem->cv_small_nst = SMALL_NST_DEFAULT; cv_mem->cv_small_nef = SMALL_NEF_DEFAULT; cv_mem->cv_tstopset = SUNFALSE; + cv_mem->cv_tstopinterp = SUNFALSE; cv_mem->cv_maxnef = MXNEF; cv_mem->cv_maxncf = MXNCF; cv_mem->cv_nlscoef = CORTES; @@ -1258,12 +1259,16 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if ( cv_mem->cv_tstopset ) { if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff ) { - ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); - if (ier != CV_SUCCESS) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODE", "CVode", - MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); - SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); - return(CV_ILL_INPUT); + if (cv_mem->cv_tstopinterp) { + ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + if (ier != CV_SUCCESS) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODE", "CVode", + MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_ILL_INPUT); + } + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); } cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; cv_mem->cv_tstopset = SUNFALSE; @@ -1434,7 +1439,11 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, troundoff = FUZZ_FACTOR * cv_mem->cv_uround * (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)); if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff) { - (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + if (cv_mem->cv_tstopinterp) { + (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); + } cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; cv_mem->cv_tstopset = SUNFALSE; istate = CV_TSTOP_RETURN; diff --git a/src/cvode/cvode_impl.h b/src/cvode/cvode_impl.h index e5a65843a7..966626b715 100644 --- a/src/cvode/cvode_impl.h +++ b/src/cvode/cvode_impl.h @@ -256,6 +256,7 @@ typedef struct CVodeMemRec { -----------------*/ booleantype cv_tstopset; + booleantype cv_tstopinterp; realtype cv_tstop; /*--------- diff --git a/src/cvode/cvode_io.c b/src/cvode/cvode_io.c index ce947d6a2c..174462b552 100644 --- a/src/cvode/cvode_io.c +++ b/src/cvode/cvode_io.c @@ -714,6 +714,27 @@ int CVodeSetStopTime(void *cvode_mem, realtype tstop) return(CV_SUCCESS); } +/* + * CVodeSetInterpolateStopTime + * + * Specifies to use interpolation to fill the output solution at + * the stop time (instead of a copy). + */ + +int CVodeSetInterpolateStopTime(void *cvode_mem, booleantype interp) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODE", "CVodeSetInterpolateStopTime", MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + cv_mem = (CVodeMem) cvode_mem; + cv_mem->cv_tstopinterp = interp; + + return(CV_SUCCESS); +} + /* * CVodeClearStopTime * diff --git a/src/cvode/cvode_ls.c b/src/cvode/cvode_ls.c index a32679ab3c..a9d986c193 100644 --- a/src/cvode/cvode_ls.c +++ b/src/cvode/cvode_ls.c @@ -1636,7 +1636,8 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, N_Vector ynow, N_Vector fnow) { CVLsMem cvls_mem; - realtype bnorm, deltar, delta, w_mean; + realtype bnorm = ZERO; + realtype deltar, delta, w_mean; int curiter, nli_inc, retval; #if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG realtype resnorm; diff --git a/src/cvodes/cvodes.c b/src/cvodes/cvodes.c index edf454f720..87538bfa95 100644 --- a/src/cvodes/cvodes.c +++ b/src/cvodes/cvodes.c @@ -533,6 +533,7 @@ void *CVodeCreate(int lmm, SUNContext sunctx) cv_mem->cv_small_nst = SMALL_NST_DEFAULT; cv_mem->cv_small_nef = SMALL_NEF_DEFAULT; cv_mem->cv_tstopset = SUNFALSE; + cv_mem->cv_tstopinterp = SUNFALSE; cv_mem->cv_maxnef = MXNEF; cv_mem->cv_maxncf = MXNCF; cv_mem->cv_nlscoef = CORTES; @@ -3084,12 +3085,16 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, if ( cv_mem->cv_tstopset ) { if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff ) { - ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); - if (ier != CV_SUCCESS) { - cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", - MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); - SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); - return(CV_ILL_INPUT); + if (cv_mem->cv_tstopinterp) { + ier = CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + if (ier != CV_SUCCESS) { + cvProcessError(cv_mem, CV_ILL_INPUT, "CVODES", "CVode", + MSGCV_BAD_TSTOP, cv_mem->cv_tstop, cv_mem->cv_tn); + SUNDIALS_MARK_FUNCTION_END(CV_PROFILER); + return(CV_ILL_INPUT); + } + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); } cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; cv_mem->cv_tstopset = SUNFALSE; @@ -3304,7 +3309,11 @@ int CVode(void *cvode_mem, realtype tout, N_Vector yout, troundoff = FUZZ_FACTOR * cv_mem->cv_uround * (SUNRabs(cv_mem->cv_tn) + SUNRabs(cv_mem->cv_h)); if ( SUNRabs(cv_mem->cv_tn - cv_mem->cv_tstop) <= troundoff) { - (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + if (cv_mem->cv_tstopinterp) { + (void) CVodeGetDky(cv_mem, cv_mem->cv_tstop, 0, yout); + } else { + N_VScale(ONE, cv_mem->cv_zn[0], yout); + } cv_mem->cv_tretlast = *tret = cv_mem->cv_tstop; cv_mem->cv_tstopset = SUNFALSE; istate = CV_TSTOP_RETURN; diff --git a/src/cvodes/cvodes_impl.h b/src/cvodes/cvodes_impl.h index c1ac992ebf..b4a18676a8 100644 --- a/src/cvodes/cvodes_impl.h +++ b/src/cvodes/cvodes_impl.h @@ -384,6 +384,7 @@ typedef struct CVodeMemRec { -----------------*/ booleantype cv_tstopset; + booleantype cv_tstopinterp; realtype cv_tstop; /*--------- diff --git a/src/cvodes/cvodes_io.c b/src/cvodes/cvodes_io.c index 1eeea64470..93525e3477 100644 --- a/src/cvodes/cvodes_io.c +++ b/src/cvodes/cvodes_io.c @@ -716,6 +716,25 @@ int CVodeSetStopTime(void *cvode_mem, realtype tstop) return(CV_SUCCESS); } +/* + * CVodeSetInterpolateStopTime + * + * Specifies to use interpolation to fill the returned solution at the stop time (instead of a copy). + */ + +int CVodeSetInterpolateStopTime(void *cvode_mem, booleantype interp) +{ + CVodeMem cv_mem; + + if (cvode_mem==NULL) { + cvProcessError(NULL, CV_MEM_NULL, "CVODES", "CVodeSetInterpolateStopTime", MSGCV_NO_MEM); + return (CV_MEM_NULL); + } + cv_mem = (CVodeMem) cvode_mem; + cv_mem->cv_tstopinterp = interp; + return(CV_SUCCESS); +} + /* * CVodeClearStopTime * diff --git a/src/cvodes/cvodes_ls.c b/src/cvodes/cvodes_ls.c index b7477bebca..14d12a9bc6 100644 --- a/src/cvodes/cvodes_ls.c +++ b/src/cvodes/cvodes_ls.c @@ -1723,7 +1723,8 @@ int cvLsSolve(CVodeMem cv_mem, N_Vector b, N_Vector weight, N_Vector ynow, N_Vector fnow) { CVLsMem cvls_mem; - realtype bnorm, deltar, delta, w_mean; + realtype bnorm = ZERO; + realtype deltar, delta, w_mean; int curiter, nli_inc, retval; booleantype do_sensi_sim, do_sensi_stg, do_sensi_stg1; #if SUNDIALS_LOGGING_LEVEL >= SUNDIALS_LOGGING_DEBUG diff --git a/src/sundials/sundials_futils.c b/src/sundials/sundials_futils.c index 194a917ae2..7d32c597db 100644 --- a/src/sundials/sundials_futils.c +++ b/src/sundials/sundials_futils.c @@ -15,15 +15,37 @@ * -----------------------------------------------------------------*/ #include +#include /* Create a file pointer with the given file name and mode. */ FILE* SUNDIALSFileOpen(const char* filename, const char* mode) { - return fopen(filename, mode); + FILE* fp = NULL; + + if (filename) + { + if (!strcmp(filename, "stdout")) + { + fp = stdout; + } + else if (!strcmp(filename, "stderr")) + { + fp = stderr; + } + else + { + fp = fopen(filename, mode); + } + } + + return fp; } /* Close a file pointer with the given file name. */ void SUNDIALSFileClose(FILE* fp) { - fclose(fp); + if (fp && (fp != stdout) && (fp != stderr)) + { + fclose(fp); + } } diff --git a/src/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp b/src/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp index d1c7165ed0..72e87f53f9 100644 --- a/src/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp +++ b/src/sunlinsol/onemkldense/sunlinsol_onemkldense.cpp @@ -180,6 +180,13 @@ SUNLinearSolver SUNLinSol_OneMklDense(N_Vector y, SUNMatrix Amat, SUNContext sun if (num_blocks > 1) { +#ifdef SUNDIALS_ONEMKL_USE_GETRF_LOOP + LS_F_SCRATCH_SIZE(S) = + getrf_scratchpad_size(*queue, // device queue + M, // rows in A_i + N, // columns in A_i + M); // leading dimension +#else LS_F_SCRATCH_SIZE(S) = getrf_batch_scratchpad_size(*queue, // device queue M, // rows in A_i @@ -188,8 +195,17 @@ SUNLinearSolver SUNLinSol_OneMklDense(N_Vector y, SUNMatrix Amat, SUNContext sun M * N, // stride between A_i M, // stride in P_i num_blocks); // number of blocks +#endif -#ifdef SUNDIALS_ONEMKL_USE_GETRS_BATCHED +#ifdef SUNDIALS_ONEMKL_USE_GETRS_LOOP + LS_S_SCRATCH_SIZE(S) = + getrs_scratchpad_size(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows in A + 1, // number of right-hand sizes + M, // leading dimension of A + M); // leading dimension of B +#else LS_S_SCRATCH_SIZE(S)= getrs_batch_scratchpad_size(*queue, // device queue oneapi::mkl::transpose::nontrans, @@ -201,14 +217,6 @@ SUNLinearSolver SUNLinSol_OneMklDense(N_Vector y, SUNMatrix Amat, SUNContext sun M, // leading dimension of B_i M, // stride between B_i num_blocks); // number of blocks -#else - LS_S_SCRATCH_SIZE(S) = - getrs_scratchpad_size(*queue, // device queue - oneapi::mkl::transpose::nontrans, - M, // number of rows in A - 1, // number of right-hand sizes - M, // leading dimension of A - M); // leading dimension of B #endif } else @@ -326,6 +334,36 @@ int SUNLinSolSetup_OneMklDense(SUNLinearSolver S, SUNMatrix A) if (num_blocks > 1) { +#ifdef SUNDIALS_ONEMKL_USE_GETRF_LOOP + try + { + for (sunindextype i = 0; i < num_blocks; i++) + { + getrf(*queue, // device queue + M, // number of rows + N, // number of columns + Adata + i * M * N, // matrix data + M, // leading dimension of A + pivots + i * M, // array of pivots + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrf\n"); + if (e.info()) + { + // An illegal value was providied or the scratch pad is too small + ier = -1; + } + else + { + // The diagonal element of some of U_i is zero + ier = 1; + } + } +#else try { getrf_batch(*queue, // device queue @@ -354,6 +392,7 @@ int SUNLinSolSetup_OneMklDense(SUNLinearSolver S, SUNMatrix A) ier = 1; } } +#endif } else { @@ -467,7 +506,30 @@ int SUNLinSolSolve_OneMklDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, if (num_blocks > 1) { -#ifdef SUNDIALS_ONEMKL_USE_GETRS_BATCHED +#ifdef SUNDIALS_ONEMKL_USE_GETRS_LOOP + try + { + for (sunindextype i = 0; i < num_blocks; i++) + { + getrs(*queue, // device queue + oneapi::mkl::transpose::nontrans, + M, // number of rows + 1, // number of right-hand sides + Adata + i * M * N, // factorized matrix data + M, // leading dimension of A + pivots, // array of pivots + xdata + i * M, // right-hand side data + M, // leading dimension of B_i + scratchpad, // scratchpad memory + scratch_size); // scratchpad size + } + } + catch(oneapi::mkl::lapack::exception const& e) + { + SUNDIALS_DEBUG_ERROR("An exception occured in getrs\n"); + ier = -1; + } +#else try { getrs_batch(*queue, // device queue @@ -491,29 +553,6 @@ int SUNLinSolSolve_OneMklDense(SUNLinearSolver S, SUNMatrix A, N_Vector x, SUNDIALS_DEBUG_ERROR("An exception occured in getrs_batch\n"); ier = -1; } -#else - try - { - for (sunindextype i = 0; i < num_blocks; i++) - { - getrs(*queue, // device queue - oneapi::mkl::transpose::nontrans, - M, // number of rows - 1, // number of right-hand sides - Adata + i * M * N, // factorized matrix data - M, // leading dimension of A - pivots, // array of pivots - xdata + i * M, // right-hand side data - M, // leading dimension of B_i - scratchpad, // scratchpad memory - scratch_size); // scratchpad size - } - } - catch(oneapi::mkl::lapack::exception const& e) - { - SUNDIALS_DEBUG_ERROR("An exception occured in getrs\n"); - ier = -1; - } #endif } else diff --git a/test/answers b/test/answers index adc6da31cd..96d6e170c1 160000 --- a/test/answers +++ b/test/answers @@ -1 +1 @@ -Subproject commit adc6da31cd21bfa6e70d6fb026510008643f8ebb +Subproject commit 96d6e170c15f997d1e9062d4e6478e618d3f30ca diff --git a/test/unit_tests/arkode/CXX_serial/ark_test_butcher.cpp b/test/unit_tests/arkode/CXX_serial/ark_test_butcher.cpp index d89ecacc39..8ad033603d 100644 --- a/test/unit_tests/arkode/CXX_serial/ark_test_butcher.cpp +++ b/test/unit_tests/arkode/CXX_serial/ark_test_butcher.cpp @@ -29,13 +29,13 @@ int main() { // set vectors of individual tables to test - std::vector Tables_ERK = {"ARKODE_HEUN_EULER_2_1_2", + std::vector Tables_ERK = {"ARKODE_HEUN_EULER_2_1_2", "ARKODE_ARK2_ERK_3_1_2", "ARKODE_BOGACKI_SHAMPINE_4_2_3", "ARKODE_ARK324L2SA_ERK_4_2_3", "ARKODE_ZONNEVELD_5_3_4", "ARKODE_ARK436L2SA_ERK_6_3_4", "ARKODE_SAYFY_ABURUB_6_3_4", "ARKODE_CASH_KARP_6_4_5", "ARKODE_FEHLBERG_6_4_5", "ARKODE_DORMAND_PRINCE_7_4_5", "ARKODE_ARK548L2SA_ERK_8_4_5", "ARKODE_VERNER_8_5_6", "ARKODE_FEHLBERG_13_7_8", "ARKODE_ARK437L2SA_ERK_7_3_4", "ARKODE_ARK548L2SAb_ERK_8_4_5"}; - std::vector Tables_DIRK = {"ARKODE_SDIRK_2_1_2", + std::vector Tables_DIRK = {"ARKODE_SDIRK_2_1_2", "ARKODE_ARK2_DIRK_3_1_2", "ARKODE_BILLINGTON_3_3_2", "ARKODE_TRBDF2_3_3_2", "ARKODE_KVAERNO_4_2_3", "ARKODE_ARK324L2SA_DIRK_4_2_3", "ARKODE_CASH_5_2_4", "ARKODE_CASH_5_3_4", "ARKODE_SDIRK_5_3_4", "ARKODE_KVAERNO_5_3_4", "ARKODE_ARK436L2SA_DIRK_6_3_4", @@ -44,15 +44,15 @@ int main() { "ARKODE_ESDIRK324L2SA_4_2_3", "ARKODE_ESDIRK325L2SA_5_2_3", "ARKODE_ESDIRK32I5L2SA_5_2_3", "ARKODE_ESDIRK436L2SA_6_3_4", "ARKODE_ESDIRK43I6L2SA_6_3_4", "ARKODE_QESDIRK436L2SA_6_3_4", "ARKODE_ESDIRK437L2SA_7_3_4", "ARKODE_ESDIRK547L2SA_7_4_5", "ARKODE_ESDIRK547L2SA2_7_4_5"}; - std::vector Tables_ARK_ERK = {ARKODE_ARK324L2SA_ERK_4_2_3, + std::vector Tables_ARK_ERK = {ARKODE_ARK2_ERK_3_1_2, ARKODE_ARK324L2SA_ERK_4_2_3, ARKODE_ARK436L2SA_ERK_6_3_4, ARKODE_ARK437L2SA_ERK_7_3_4, ARKODE_ARK548L2SA_ERK_8_4_5, ARKODE_ARK548L2SAb_ERK_8_4_5}; - std::vector Tables_ARK_DIRK = {ARKODE_ARK324L2SA_DIRK_4_2_3, + std::vector Tables_ARK_DIRK = {ARKODE_ARK2_DIRK_3_1_2, ARKODE_ARK324L2SA_DIRK_4_2_3, ARKODE_ARK436L2SA_DIRK_6_3_4, ARKODE_ARK437L2SA_DIRK_7_3_4, ARKODE_ARK548L2SA_DIRK_8_4_5, ARKODE_ARK548L2SAb_DIRK_8_4_5}; - std::vector STables_ARK = {"ARKODE_ARK324L2SA_4_2_3", "ARKODE_ARK436L2SA_6_3_4", - "ARKODE_ARK437L2SA_7_3_4", "ARKODE_ARK548L2SA_8_4_5", - "ARKODE_ARK548L2SAb_8_4_5"}; + std::vector STables_ARK = {"ARKODE_ARK2_3_1_2", "ARKODE_ARK324L2SA_4_2_3", + "ARKODE_ARK436L2SA_6_3_4", "ARKODE_ARK437L2SA_7_3_4", + "ARKODE_ARK548L2SA_8_4_5", "ARKODE_ARK548L2SAb_8_4_5"}; int numfails = 0; // loop over individual ERK tables diff --git a/test/unit_tests/arkode/CXX_serial/ark_test_butcher.out b/test/unit_tests/arkode/CXX_serial/ark_test_butcher.out index 054defea65..8d7971338b 100644 --- a/test/unit_tests/arkode/CXX_serial/ark_test_butcher.out +++ b/test/unit_tests/arkode/CXX_serial/ark_test_butcher.out @@ -2,6 +2,7 @@ Testing individual ERK methods: Testing method ARKODE_HEUN_EULER_2_1_2: table matches predicted method/embedding orders of 2/1 +Testing method ARKODE_ARK2_ERK_3_1_2: table matches predicted method/embedding orders of 2/1 Testing method ARKODE_BOGACKI_SHAMPINE_4_2_3: table matches predicted method/embedding orders of 3/2 Testing method ARKODE_ARK324L2SA_ERK_4_2_3: table matches predicted method/embedding orders of 3/2 Testing method ARKODE_ZONNEVELD_5_3_4: table matches predicted method/embedding orders of 4/3 @@ -25,6 +26,7 @@ Testing method ARKODE_ARK548L2SAb_ERK_8_4_5: table matches predicted method/emb Testing individual DIRK methods: Testing method ARKODE_SDIRK_2_1_2: table matches predicted method/embedding orders of 2/1 +Testing method ARKODE_ARK2_DIRK_3_1_2: table matches predicted method/embedding orders of 2/1 Testing method ARKODE_BILLINGTON_3_3_2: table matches predicted method/embedding orders of 2/3 Testing method ARKODE_TRBDF2_3_3_2: table matches predicted method/embedding orders of 2/3 Testing method ARKODE_KVAERNO_4_2_3: table matches predicted method/embedding orders of 3/2 @@ -50,6 +52,7 @@ Testing method ARKODE_ESDIRK547L2SA2_7_4_5: table matches predicted method/embe Testing ARK pairs: +Testing method ARKODE_ARK2_3_1_2: Method/embedding match predicted orders of 2/1 Testing method ARKODE_ARK324L2SA_4_2_3: Method/embedding match predicted orders of 3/2 Testing method ARKODE_ARK436L2SA_6_3_4: Method/embedding match predicted orders of 4/3 Testing method ARKODE_ARK437L2SA_7_3_4: Method/embedding match predicted orders of 4/3 diff --git a/test/unit_tests/sunmemory/sycl/test_sunmemory_sycl.cpp b/test/unit_tests/sunmemory/sycl/test_sunmemory_sycl.cpp index 325d8654d1..52e1dab5dd 100644 --- a/test/unit_tests/sunmemory/sycl/test_sunmemory_sycl.cpp +++ b/test/unit_tests/sunmemory/sycl/test_sunmemory_sycl.cpp @@ -20,7 +20,7 @@ int test_instance(SUNMemoryHelper helper, SUNMemoryType mem_type, bool print_test_status) { // Create an in-order GPU queue -#if SYCL_LANGUAGE_VERSION >= 2020 +#if SYCL_LANGUAGE_VERSION >= 2020 && !defined(SUNDIALS_SYCL_2020_UNSUPPORTED) sycl::queue myQueue(sycl::gpu_selector_v, sycl::property_list{sycl::property::queue::in_order{}}); #else