diff --git a/.ci_fedora.sh b/.ci_fedora.sh index 452afb4b7e..b8805abb15 100755 --- a/.ci_fedora.sh +++ b/.ci_fedora.sh @@ -50,7 +50,7 @@ then cp -a /tmp/BOUT-dev /home/test/ chown -R test /home/test chmod u+rwX /home/test -R - sudo -u test ${0/\/tmp/\/home\/test} $mpi + su - test -c "${0/\/tmp/\/home\/test} $mpi" ## If we are called as normal user, run test else . /etc/profile.d/modules.sh diff --git a/.clang-format b/.clang-format index f51c5bde87..a80c59bddd 100644 --- a/.clang-format +++ b/.clang-format @@ -109,6 +109,8 @@ SpacesInParentheses: false SpacesInSquareBrackets: false StatementMacros: - BOUT_OMP + - BOUT_OMP_PERF + - BOUT_OMP_SAFE Standard: c++14 TabWidth: 8 UseTab: Never diff --git a/.clang-tidy b/.clang-tidy index 3be0af4917..0117c20e42 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -2,7 +2,6 @@ Checks: 'clang-diagnostic-*,clang-analyzer-*,performance-*,readability-*,bugprone-*,clang-analyzer-*,cppcoreguidelines-*,mpi-*,misc-*,-readability-magic-numbers,-cppcoreguidelines-avoid-magic-numbers,-misc-non-private-member-variables-in-classes,-clang-analyzer-optin.mpi*,-bugprone-exception-escape,-cppcoreguidelines-pro-bounds-pointer-arithmetic,-readability-function-cognitive-complexity,-misc-no-recursion,-bugprone-easily-swappable-parameters' WarningsAsErrors: '' HeaderFilterRegex: '' -AnalyzeTemporaryDtors: false FormatStyle: file CheckOptions: @@ -10,7 +9,7 @@ CheckOptions: - key: readability-identifier-length.IgnoredVariableNames value: '^[dn]?[xyz]$' - key: readability-identifier-length.IgnoredParameterNames - value: '^[fijkxyz][01xyz]?$' + value: '^[dfijknxyz][01xyz]?$' - key: readability-identifier-length.IgnoredLoopCounterNames value: '^[ijkxyz_]$' diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 42965e75e8..bdaeb3dc4f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,7 +39,7 @@ jobs: is_cron: - ${{ github.event_name == 'cron' }} config: - - name: "CMake, PETSc unreleased, ADIOS" + - name: "CMake, PETSc unreleased, ADIOS2" os: ubuntu-20.04 cmake_options: "-DBUILD_SHARED_LIBS=ON -DBOUT_ENABLE_METRIC_3D=ON diff --git a/.gitignore b/.gitignore index 7ddf9526ab..934da1c0de 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,5 @@ coverage/ /_version.txt /BOUT++-v*.tar.gz /BOUT++-v*.tar.xz +/CMakeCache.txt +/CMakeFiles/cmake.check_cache diff --git a/CMakeLists.txt b/CMakeLists.txt index 483672fb67..f57a78a14a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -366,7 +366,7 @@ else() set(BOUT_GENERATE_FIELDOPS_DEFAULT OFF) endif() -execute_process(COMMAND ${Python3_EXECUTABLE} -c "import zoidberg" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import importlib.util ; import sys; sys.exit(importlib.util.find_spec(\"zoidberg\") is None)" RESULT_VARIABLE zoidberg_FOUND) if (zoidberg_FOUND EQUAL 0) set(zoidberg_FOUND ON) @@ -774,7 +774,7 @@ set(BOUT_HAS_PNETCDF OFF) # while for static builds we need the dependencies too if (BUILD_SHARED_LIBS) # Include rpath linker flag so user doesn't need to set LD_LIBRARY_PATH - set(CONFIG_LDFLAGS "${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG}\$BOUT_LIB_PATH -L\$BOUT_LIB_PATH -lbout++ -lfmt") + set(CONFIG_LDFLAGS "${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG}\$BOUT_LIB_PATH -L\$BOUT_LIB_PATH -lbout++ -lfmt ${CONFIG_LDFLAGS_SHARED}") else() set(CONFIG_LDFLAGS "${CONFIG_LDFLAGS}") endif() @@ -935,7 +935,7 @@ message(" SUNDIALS support : ${BOUT_HAS_SUNDIALS} HYPRE support : ${BOUT_HAS_HYPRE} NetCDF support : ${BOUT_HAS_NETCDF} - ADIOS support : ${BOUT_HAS_ADIOS} + ADIOS2 support : ${BOUT_HAS_ADIOS2} FFTW support : ${BOUT_HAS_FFTW} LAPACK support : ${BOUT_HAS_LAPACK} OpenMP support : ${BOUT_USE_OPENMP} diff --git a/bin/bout-build-deps.sh b/bin/bout-build-deps.sh index 19e3b2a0d3..d96d500dc9 100755 --- a/bin/bout-build-deps.sh +++ b/bin/bout-build-deps.sh @@ -98,7 +98,7 @@ netcdf() { nccxx() { cd $BUILD - wget -c ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-cxx4-$NCCXXVER.tar.gz || : + wget -c https://downloads.unidata.ucar.edu/netcdf-cxx/$NCCXXVER/netcdf-cxx4-$NCCXXVER.tar.gz || : tar -xf netcdf-cxx4-$NCCXXVER.tar.gz cd netcdf-cxx4-$NCCXXVER CPPFLAGS="-I$PREFIX/include" LDFLAGS="-L$PREFIX/lib/" ./configure --prefix=$PREFIX $NCCXXFLAGS @@ -286,17 +286,17 @@ set -x ## Setup folders and links setup ## Build and install hdf5 -hdf5 +test $NO_HDF5 || hdf5 ## Build and install netcdf -netcdf +test $NO_NETCDF || netcdf ## Build and install C++ interface for netcdf -nccxx +test $NO_NCXX || nccxx ## Build and install FFTW -fftw +test $NO_FFTW || fftw ## Build and install Sundials -sundials +test $NO_SUNDIALS || sundials ## Build and install PETSc -petsc +test $NO_PETSC || petsc ## Download BOUT++ submodules submod # Install python packages diff --git a/bin/bout-config.in b/bin/bout-config.in index a9045fff39..b5a62a42eb 100755 --- a/bin/bout-config.in +++ b/bin/bout-config.in @@ -29,7 +29,7 @@ idlpath="@IDLCONFIGPATH@" pythonpath="@PYTHONCONFIGPATH@" has_netcdf="@BOUT_HAS_NETCDF@" -has_adios="@BOUT_HAS_ADIOS@" +has_adios2="@BOUT_HAS_ADIOS2@" has_legacy_netcdf="@BOUT_HAS_LEGACY_NETCDF@" has_pnetcdf="@BOUT_HAS_PNETCDF@" has_pvode="@BOUT_HAS_PVODE@" @@ -71,18 +71,18 @@ Available values for OPTION include: --idl IDL path --python Python path - --has-netcdf NetCDF file support - --has-adios ADIOS file support + --has-netcdf NetCDF file support + --has-adios2 ADIOS2 file support --has-legacy-netcdf Legacy NetCDF file support - --has-pnetcdf Parallel NetCDF file support - --has-pvode PVODE solver support - --has-cvode SUNDIALS CVODE solver support - --has-ida SUNDIALS IDA solver support - --has-lapack LAPACK support - --has-petsc PETSc support - --has-hypre Hypre support - --has-slepc SLEPc support - --has-nls Natural Language Support + --has-pnetcdf Parallel NetCDF file support + --has-pvode PVODE solver support + --has-cvode SUNDIALS CVODE solver support + --has-ida SUNDIALS IDA solver support + --has-lapack LAPACK support + --has-petsc PETSc support + --has-hypre Hypre support + --has-slepc SLEPc support + --has-nls Natural Language Support --petsc-has-sundials @@ -123,6 +123,7 @@ all() echo " --has-slepc -> $has_slepc" echo " --has-arkode -> $has_arkode" echo " --has-nls -> $has_nls" + echo " --has-openmp -> $has_openmp" echo echo " --petsc-has-sundials -> $petsc_has_sundials" echo diff --git a/bout++Config.cmake.in b/bout++Config.cmake.in index 3d824e455f..5af0dc43ea 100644 --- a/bout++Config.cmake.in +++ b/bout++Config.cmake.in @@ -15,7 +15,7 @@ set(BOUT_USE_METRIC_3D @BOUT_USE_METRIC_3D@) set(BOUT_HAS_PVODE @BOUT_HAS_PVODE@) set(BOUT_HAS_NETCDF @BOUT_HAS_NETCDF@) -set(BOUT_HAS_ADIOS @BOUT_HAS_ADIOS@) +set(BOUT_HAS_ADIOS2 @BOUT_HAS_ADIOS2@) set(BOUT_HAS_FFTW @BOUT_HAS_FFTW@) set(BOUT_HAS_LAPACK @BOUT_HAS_LAPACK@) set(BOUT_HAS_PETSC @BOUT_HAS_PETSC@) diff --git a/cmake/FindCython.cmake b/cmake/FindCython.cmake index 76f43480d9..3b98cde89e 100644 --- a/cmake/FindCython.cmake +++ b/cmake/FindCython.cmake @@ -10,7 +10,7 @@ # CYTHON_FOUND - true if Cython was found # CYTHON_VERSION - Cython version -execute_process(COMMAND ${Python_EXECUTABLE} -c "import cython ; print(cython.__version__)" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import cython ; print(cython.__version__)" RESULT_VARIABLE _cython_runs OUTPUT_VARIABLE CYTHON_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE diff --git a/cmake/FindNumpy.cmake b/cmake/FindNumpy.cmake index 201bc19221..b6de6e3e35 100644 --- a/cmake/FindNumpy.cmake +++ b/cmake/FindNumpy.cmake @@ -12,32 +12,32 @@ # Numpy_INCLUDE_DIR -find_package(Python 3.6 COMPONENTS Interpreter Development) +find_package(Python3 3.6 COMPONENTS Interpreter Development) -if (NOT Python_FOUND) +if (NOT Python3_FOUND) message(STATUS "Could not find numpy as python was not found. Maybe the developement package is missing?") - set(Numpy_FOUND ${Python_FOUND}) + set(Numpy_FOUND ${Python3_FOUND}) return() endif() if (NOT Numpy_FOUND) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import numpy ; print(numpy.__version__)" + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy ; print(numpy.__version__)" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE Numpy_VERSION ) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import numpy ; print(numpy.get_include())" + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import numpy ; print(numpy.get_include())" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE _numpy_include_dirs ) endif() if (Numpy_DEBUG) - message(STATUS "Looking for numpy headers in: ${_numpy_include_dirs} ${PYTHON_INCLUDE_DIR}") + message(STATUS "Looking for numpy headers in: ${_numpy_include_dirs} ${Python3_INCLUDE_DIRS}") endif() find_path(Numpy_INCLUDE_DIR numpy/arrayobject.h - PATHS "${_numpy_include_dirs}" "${PYTHON_INCLUDE_DIR}" + PATHS "${_numpy_include_dirs}" "${Python3_INCLUDE_DIRS}" PATH_SUFFIXES numpy/core/include ) diff --git a/cmake/FindSUNDIALS.cmake b/cmake/FindSUNDIALS.cmake index 1ecb5db429..15b266d06a 100644 --- a/cmake/FindSUNDIALS.cmake +++ b/cmake/FindSUNDIALS.cmake @@ -104,16 +104,8 @@ endforeach() if (SUNDIALS_INCLUDE_DIR) file(READ "${SUNDIALS_INCLUDE_DIR}/sundials_config.h" SUNDIALS_CONFIG_FILE) - string(FIND "${SUNDIALS_CONFIG_FILE}" "SUNDIALS_PACKAGE_VERSION" index) - if("${index}" LESS 0) - # Version >3 - set(SUNDIALS_VERSION_REGEX_PATTERN - ".*#define SUNDIALS_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*") - else() - # Version <3 - set(SUNDIALS_VERSION_REGEX_PATTERN - ".*#define SUNDIALS_PACKAGE_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*") - endif() + set(SUNDIALS_VERSION_REGEX_PATTERN + ".*#define SUNDIALS_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*") string(REGEX MATCH ${SUNDIALS_VERSION_REGEX_PATTERN} _ "${SUNDIALS_CONFIG_FILE}") set(SUNDIALS_VERSION_MAJOR ${CMAKE_MATCH_1} CACHE STRING "") set(SUNDIALS_VERSION_MINOR ${CMAKE_MATCH_2} CACHE STRING "") diff --git a/cmake/SetupBOUTThirdParty.cmake b/cmake/SetupBOUTThirdParty.cmake index ef0fd438d4..9c49fe6fdc 100644 --- a/cmake/SetupBOUTThirdParty.cmake +++ b/cmake/SetupBOUTThirdParty.cmake @@ -8,6 +8,9 @@ endif () # determined in SetupCompilers.cmake if (BOUT_USE_OPENMP) target_link_libraries(bout++ PUBLIC OpenMP::OpenMP_CXX) + set(CONFIG_LDFLAGS "${CONFIG_LDFLAGS} -fopenmp") + set(CONFIG_LDFLAGS_SHARED "${CONFIG_LDFLAGS_SHARED} -fopenmp") + set(CONFIG_CFLAGS "${CONFIG_CFLAGS} -fopenmp") endif() # determined in SetupCompilers.cmake @@ -187,10 +190,10 @@ endif() message(STATUS "NetCDF support: ${BOUT_USE_NETCDF}") set(BOUT_HAS_NETCDF ${BOUT_USE_NETCDF}) -option(BOUT_USE_ADIOS "Enable support for ADIOS output" ON) -option(BOUT_DOWNLOAD_ADIOS "Download and build ADIOS2" OFF) -if (BOUT_USE_ADIOS) - if (BOUT_DOWNLOAD_ADIOS) +option(BOUT_USE_ADIOS2 "Enable support for ADIOS output" ON) +option(BOUT_DOWNLOAD_ADIOS2 "Download and build ADIOS2" OFF) +if (BOUT_USE_ADIOS2) + if (BOUT_DOWNLOAD_ADIOS2) message(STATUS "Downloading and configuring ADIOS2") include(FetchContent) FetchContent_Declare( @@ -217,12 +220,12 @@ if (BOUT_USE_ADIOS) find_package(MPI REQUIRED COMPONENTS C) target_link_libraries(bout++ PUBLIC adios2::cxx11_mpi MPI::MPI_C) else() - set(BOUT_USE_ADIOS OFF) + set(BOUT_USE_ADIOS2 OFF) endif() endif() endif() -message(STATUS "ADIOS support: ${BOUT_USE_ADIOS}") -set(BOUT_HAS_ADIOS ${BOUT_USE_ADIOS}) +message(STATUS "ADIOS2 support: ${BOUT_USE_ADIOS2}") +set(BOUT_HAS_ADIOS2 ${BOUT_USE_ADIOS2}) option(BOUT_USE_FFTW "Enable support for FFTW" ON) @@ -278,8 +281,8 @@ if (BOUT_USE_SUNDIALS) include(FetchContent) FetchContent_Declare( sundials - GIT_REPOSITORY https://github.com/ZedThree/sundials - GIT_TAG cmake-export-fixes + GIT_REPOSITORY https://github.com/LLNL/sundials + GIT_TAG v7.0.0 ) # Note: These are settings for building SUNDIALS set(EXAMPLES_ENABLE_C OFF CACHE BOOL "" FORCE) @@ -294,7 +297,11 @@ if (BOUT_USE_SUNDIALS) FetchContent_MakeAvailable(sundials) message(STATUS "SUNDIALS done configuring") else() + enable_language(C) find_package(SUNDIALS REQUIRED) + if (SUNDIALS_VERSION VERSION_LESS 4.0.0) + message(FATAL_ERROR "SUNDIALS_VERSION 4.0.0 or newer is required. Found version ${SUNDIALS_VERSION}.") + endif() endif() target_link_libraries(bout++ PUBLIC SUNDIALS::nvecparallel) target_link_libraries(bout++ PUBLIC SUNDIALS::cvode) diff --git a/cmake_build_defines.hxx.in b/cmake_build_defines.hxx.in index ed6e8685f6..4d63a01b7d 100644 --- a/cmake_build_defines.hxx.in +++ b/cmake_build_defines.hxx.in @@ -13,7 +13,7 @@ #cmakedefine01 BOUT_HAS_IDA #cmakedefine01 BOUT_HAS_LAPACK #cmakedefine01 BOUT_HAS_NETCDF -#cmakedefine01 BOUT_HAS_ADIOS +#cmakedefine01 BOUT_HAS_ADIOS2 #cmakedefine01 BOUT_HAS_PETSC #cmakedefine01 BOUT_HAS_PRETTY_FUNCTION #cmakedefine01 BOUT_HAS_PVODE diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 3849d34852..022b16e248 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(backtrace) add_subdirectory(blob2d) add_subdirectory(blob2d-outerloop) add_subdirectory(blob2d-laplacexz) +add_subdirectory(boutpp) add_subdirectory(boundary-conditions/advection) add_subdirectory(conducting-wall-mode) add_subdirectory(conduction) diff --git a/examples/blob2d/blob2d.cxx b/examples/blob2d/blob2d.cxx index f41f857d46..7007bbeb77 100644 --- a/examples/blob2d/blob2d.cxx +++ b/examples/blob2d/blob2d.cxx @@ -25,7 +25,6 @@ class Blob2D : public PhysicsModel { BoutReal rho_s; ///< Bohm gyro radius BoutReal Omega_i; ///< Ion cyclotron frequency BoutReal c_s; ///< Bohm sound speed - BoutReal n0; ///< Reference density // Constants to calculate the parameters BoutReal Te0; ///< Isothermal temperature [eV] @@ -61,7 +60,6 @@ class Blob2D : public PhysicsModel { m_i = options["m_i"].withDefault(2 * 1.667e-27); m_e = options["m_e"].withDefault(9.11e-31); - n0 = options["n0"].doc("Background density in cubic m").withDefault(1e19); D_vort = options["D_vort"].doc("Viscous diffusion coefficient").withDefault(0.0); D_n = options["D_n"].doc("Density diffusion coefficient").withDefault(0.0); diff --git a/examples/blob2d/delta_0.25/BOUT.inp b/examples/blob2d/delta_0.25/BOUT.inp index 58d1e36741..841fcaf235 100644 --- a/examples/blob2d/delta_0.25/BOUT.inp +++ b/examples/blob2d/delta_0.25/BOUT.inp @@ -87,8 +87,6 @@ flags = 49152 # set_rhs i.e. identity matrix in boundaries Te0 = 5 # Electron Temperature (eV) -n0 = 2e+18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) diff --git a/examples/blob2d/delta_1/BOUT.inp b/examples/blob2d/delta_1/BOUT.inp index 417911271d..39213ddd36 100644 --- a/examples/blob2d/delta_1/BOUT.inp +++ b/examples/blob2d/delta_1/BOUT.inp @@ -87,8 +87,6 @@ flags = 49152 # set_rhs i.e. identity matrix in boundaries Te0 = 5 # Electron Temperature (eV) -n0 = 2e+18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) diff --git a/examples/blob2d/delta_10/BOUT.inp b/examples/blob2d/delta_10/BOUT.inp index 353c28c3b2..f4507b871b 100644 --- a/examples/blob2d/delta_10/BOUT.inp +++ b/examples/blob2d/delta_10/BOUT.inp @@ -87,8 +87,6 @@ flags = 49152 # set_rhs i.e. identity matrix in boundaries Te0 = 5 # Electron Temperature (eV) -n0 = 2e+18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) diff --git a/examples/boutpp/CMakeLists.txt b/examples/boutpp/CMakeLists.txt new file mode 100644 index 0000000000..e46a7ae990 --- /dev/null +++ b/examples/boutpp/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.13) + +if (NOT TARGET bout++::bout++) + find_package(bout++ REQUIRED) +endif() + +bout_copy_file(runexample) +bout_copy_file(blob2d.py) +bout_copy_file(simulation.py) +bout_copy_file(data/BOUT.inp) diff --git a/examples/boutpp/blob2d.py b/examples/boutpp/blob2d.py index d5f370a454..4dc8ea60ac 100755 --- a/examples/boutpp/blob2d.py +++ b/examples/boutpp/blob2d.py @@ -24,7 +24,7 @@ def init(self, restart): self.phiSolver = bc.Laplacian() - options = bc.Options("model") + options = bc.Options.root("model") # Temperature in eV Te0 = options.get("Te0", 30) e = options.get("e", 1.602e-19) @@ -70,12 +70,20 @@ def init(self, restart): # /************ Create a solver for potential ********/ + opts_boussinesq = bc.Options.root("phiBoussinesq") + opts_non_boussinesq = bc.Options.root("phiSolver") + if self.boussinesq: # BOUT.inp section "phiBoussinesq" - self.phiSolver = bc.Laplacian(bc.Options("phiBoussinesq")) + opts_used = opts_boussinesq + opts_unused = opts_non_boussinesq else: # BOUT.inp section "phiSolver" - self.phiSolver = bc.Laplacian(bc.Options("phiSolver")) + opts_used = opts_non_boussinesq + opts_unused = opts_boussinesq + + self.phiSolver = bc.Laplacian(opts_used) + opts_unused.setConditionallyUsed() # Starting guess for first solve (if iterative) self.phi = bc.create3D("0") @@ -165,8 +173,8 @@ def ensure_blob(): # settings used by the core code -NOUT = 50 # number of time-steps -TIMESTEP = 50 # time between outputs [1/wci] +nout = 50 # number of time-steps +timestep = 50 # time between outputs [1/wci] MXG = 2 # Number of X guard cells @@ -198,8 +206,8 @@ def ensure_blob(): [mesh:ddz] -first = FFT -second = FFT +first = C2 +second = C2 upwind = W3 ################################################### @@ -207,8 +215,8 @@ def ensure_blob(): [solver] -ATOL = 1.0e-10 # absolute tolerance -RTOL = 1.0e-5 # relative tolerance +atol = 1e-10 # absolute tolerance +rtol = 1e-05 # relative tolerance mxstep = 10000 # Maximum internal steps per output ################################################### @@ -221,22 +229,20 @@ def ensure_blob(): fourth_order = true # 4th order or 2nd order -flags = 0 # inversion flags for phi - # 0 = Zero value - # 10 = Zero gradient AC inner & outer - # 15 = Zero gradient AC and DC - # 768 = Zero laplace inner & outer +# 0 = Zero value +# 10 = Zero gradient AC inner & outer +# 15 = Zero gradient AC and DC +# 768 = Zero laplace inner & outer [phiSolver:precon] # Preconditioner (if pctype=user) -filter = 0. # Must not filter solution -flags = 49152 # set_rhs i.e. identity matrix in boundaries +filter = 0.0 # Must not filter solution +flags = 49152 # set_rhs i.e. identity matrix in boundaries ################################################### # Electrostatic potential solver (Boussinesq) [phiBoussinesq] # By default type is tri (serial) or spt (parallel) -flags = 0 ################################################## # general settings for the model @@ -245,14 +251,12 @@ def ensure_blob(): Te0 = 5 # Electron Temperature (eV) -n0 = 2e18 # Background plasma density (m^-3) - compressible = false # Compressibility? boussinesq = true # Boussinesq approximation (no perturbed n in vorticity) -D_vort = 1e-6 # Viscosity -D_n = 1e-6 # Diffusion +D_vort = 1e-06 # Viscosity +D_n = 1e-06 # Diffusion R_c = 1.5 # Radius of curvature (m) @@ -261,7 +265,7 @@ def ensure_blob(): # These can be overridden for individual variables in # a section of that name. -[All] +[all] scale = 0.0 # default size of initial perturbations bndry_all = neumann # Zero-gradient on all boundaries @@ -278,9 +282,8 @@ def ensure_blob(): if __name__ == "__main__": - if "--create" in sys.argv: - sys.argv.remove("--create") - ensure_blob() + ensure_blob() + bc.init("-d blob".split(" ") + sys.argv[1:]) # Create an instance diff --git a/examples/boutpp/data/BOUT.inp b/examples/boutpp/data/BOUT.inp new file mode 100644 index 0000000000..d91707ec1b --- /dev/null +++ b/examples/boutpp/data/BOUT.inp @@ -0,0 +1,9 @@ +nout=10 +timestep=10 + +[mesh] +nx=160 +ny=1 +nz=n/n + +MYG=0 diff --git a/examples/elm-pb/elm_pb.cxx b/examples/elm-pb/elm_pb.cxx index e81742747a..f108e58e2f 100644 --- a/examples/elm-pb/elm_pb.cxx +++ b/examples/elm-pb/elm_pb.cxx @@ -1427,23 +1427,30 @@ class ELMpb : public PhysicsModel { if (sheath_boundaries) { + // Need to shift into field-aligned coordinates before applying + // parallel boundary conditions + + auto phi_fa = toFieldAligned(phi); + auto P_fa = toFieldAligned(P); + auto Jpar_fa = toFieldAligned(Jpar); + // At y = ystart (lower boundary) for (RangeIterator r = mesh->iterateBndryLowerY(); !r.isDone(); r++) { for (int jz = 0; jz < mesh->LocalNz; jz++) { // Zero-gradient potential - BoutReal phisheath = phi(r.ind, mesh->ystart, jz); + BoutReal const phisheath = phi_fa(r.ind, mesh->ystart, jz); BoutReal jsheath = -(sqrt(mi_me) / (2. * sqrt(PI))) * phisheath; // Apply boundary condition half-way between cells for (int jy = mesh->ystart - 1; jy >= 0; jy--) { // Neumann conditions - P(r.ind, jy, jz) = P(r.ind, mesh->ystart, jz); - phi(r.ind, jy, jz) = phisheath; + P_fa(r.ind, jy, jz) = P_fa(r.ind, mesh->ystart, jz); + phi_fa(r.ind, jy, jz) = phisheath; // Dirichlet condition on Jpar - Jpar(r.ind, jy, jz) = 2. * jsheath - Jpar(r.ind, mesh->ystart, jz); + Jpar_fa(r.ind, jy, jz) = 2. * jsheath - Jpar_fa(r.ind, mesh->ystart, jz); } } } @@ -1454,22 +1461,27 @@ class ELMpb : public PhysicsModel { for (int jz = 0; jz < mesh->LocalNz; jz++) { // Zero-gradient potential - BoutReal phisheath = phi(r.ind, mesh->yend, jz); + BoutReal const phisheath = phi_fa(r.ind, mesh->yend, jz); BoutReal jsheath = (sqrt(mi_me) / (2. * sqrt(PI))) * phisheath; // Apply boundary condition half-way between cells for (int jy = mesh->yend + 1; jy < mesh->LocalNy; jy++) { // Neumann conditions - P(r.ind, jy, jz) = P(r.ind, mesh->yend, jz); - phi(r.ind, jy, jz) = phisheath; + P_fa(r.ind, jy, jz) = P_fa(r.ind, mesh->yend, jz); + phi_fa(r.ind, jy, jz) = phisheath; // Dirichlet condition on Jpar // WARNING: this is not correct if staggered grids are used ASSERT3(not mesh->StaggerGrids); - Jpar(r.ind, jy, jz) = 2. * jsheath - Jpar(r.ind, mesh->yend, jz); + Jpar_fa(r.ind, jy, jz) = 2. * jsheath - Jpar_fa(r.ind, mesh->yend, jz); } } } + + // Shift back from field aligned coordinates + phi = fromFieldAligned(phi_fa); + P = fromFieldAligned(P_fa); + Jpar = fromFieldAligned(Jpar_fa); } //////////////////////////////////////////////////// diff --git a/examples/fci-wave-logn/boundary/BOUT.inp b/examples/fci-wave-logn/boundary/BOUT.inp index 11e57ec47d..0632aa949b 100644 --- a/examples/fci-wave-logn/boundary/BOUT.inp +++ b/examples/fci-wave-logn/boundary/BOUT.inp @@ -20,7 +20,7 @@ expand_divergence = false background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -28,15 +28,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave-logn/div-integrate/BOUT.inp b/examples/fci-wave-logn/div-integrate/BOUT.inp index a37bf3e2a5..66bdbce5f2 100644 --- a/examples/fci-wave-logn/div-integrate/BOUT.inp +++ b/examples/fci-wave-logn/div-integrate/BOUT.inp @@ -20,7 +20,7 @@ expand_divergence = false background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -28,15 +28,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave-logn/expanded/BOUT.inp b/examples/fci-wave-logn/expanded/BOUT.inp index 3a2935c6e8..e084511d24 100644 --- a/examples/fci-wave-logn/expanded/BOUT.inp +++ b/examples/fci-wave-logn/expanded/BOUT.inp @@ -20,7 +20,7 @@ expand_divergence = true background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -28,15 +28,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave-logn/fci-wave.cxx b/examples/fci-wave-logn/fci-wave.cxx index 731897ad4e..2ea9048421 100644 --- a/examples/fci-wave-logn/fci-wave.cxx +++ b/examples/fci-wave-logn/fci-wave.cxx @@ -62,7 +62,7 @@ class FCIwave : public PhysicsModel { // Neumann boundaries simplifies parallel derivatives Bxyz.applyBoundary("neumann"); - Bxyz.applyParallelBoundary("parallel_neumann"); + Bxyz.applyParallelBoundary("parallel_neumann_o2"); SAVE_ONCE(Bxyz); Options::getRoot()->getSection("fciwave")->get("expand_divergence", expand_divergence, diff --git a/examples/fci-wave/div-integrate/BOUT.inp b/examples/fci-wave/div-integrate/BOUT.inp index eb41d5f228..68f2326f52 100644 --- a/examples/fci-wave/div-integrate/BOUT.inp +++ b/examples/fci-wave/div-integrate/BOUT.inp @@ -21,7 +21,7 @@ log_density = false # Evolve log(n)? background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -29,15 +29,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave/div/BOUT.inp b/examples/fci-wave/div/BOUT.inp index 70b60757eb..3f497df6c7 100644 --- a/examples/fci-wave/div/BOUT.inp +++ b/examples/fci-wave/div/BOUT.inp @@ -21,7 +21,7 @@ log_density = false # Evolve log(n)? background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -29,15 +29,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [v] diff --git a/examples/fci-wave/fci-wave.cxx b/examples/fci-wave/fci-wave.cxx index 226b52c808..2fd383ed3f 100644 --- a/examples/fci-wave/fci-wave.cxx +++ b/examples/fci-wave/fci-wave.cxx @@ -69,7 +69,7 @@ class FCIwave : public PhysicsModel { // Neumann boundaries simplifies parallel derivatives Bxyz.applyBoundary("neumann"); - Bxyz.applyParallelBoundary("parallel_neumann"); + Bxyz.applyParallelBoundary("parallel_neumann_o2"); SAVE_ONCE(Bxyz); SOLVE_FOR(nv); diff --git a/examples/fci-wave/logn/BOUT.inp b/examples/fci-wave/logn/BOUT.inp index f97d8cc891..26f8a99d63 100644 --- a/examples/fci-wave/logn/BOUT.inp +++ b/examples/fci-wave/logn/BOUT.inp @@ -21,7 +21,7 @@ log_density = true # Evolve log(n)? background = 1e-06 # Background density [all] -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 bndry_all = neumann [n] @@ -29,15 +29,15 @@ bndry_all = neumann zl = z / (2*pi) function = fciwave:background + 1e-3*exp(-((x-0.7)/0.1)^2 - ((zl-0.3)/0.1)^2) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [logn] function = log(n:function) -bndry_par_yup = parallel_neumann -bndry_par_ydown = parallel_neumann +bndry_par_yup = parallel_neumann_o2 +bndry_par_ydown = parallel_neumann_o2 [nv] diff --git a/examples/laplace-petsc3d/data/BOUT.inp b/examples/laplace-petsc3d/data/BOUT.inp index 86a52c69f2..7e81d992a2 100644 --- a/examples/laplace-petsc3d/data/BOUT.inp +++ b/examples/laplace-petsc3d/data/BOUT.inp @@ -6,7 +6,7 @@ mz = 128 function = mixmode(x, 1.)*mixmode(y, 2.)*mixmode(z, 3.) bndry_xin = none bndry_xout = none -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [rhs] function = mixmode(x, 4.)*mixmode(y, 5.)*mixmode(z, 6.) @@ -22,7 +22,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0.0 @@ -46,7 +46,7 @@ transform_from_field_aligned = false [initial] bndry_xin = neumann bndry_xout = neumann -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [input1] function = mixmode(x, 1.)*mixmode(z, 2.) diff --git a/examples/performance/iterator-offsets/iterator-offsets.cxx b/examples/performance/iterator-offsets/iterator-offsets.cxx index 08149f855e..2376b63578 100644 --- a/examples/performance/iterator-offsets/iterator-offsets.cxx +++ b/examples/performance/iterator-offsets/iterator-offsets.cxx @@ -73,7 +73,7 @@ int main(int argc, char** argv) { #if BOUT_USE_OPENMP ITERATOR_TEST_BLOCK( "Nested loop (omp)", - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for(int i=0;iLocalNx;++i) { for (int j = mesh->ystart; j < mesh->yend; ++j) { for (int k = 0; k < mesh->LocalNz; ++k) { @@ -98,7 +98,7 @@ int main(int argc, char** argv) { deriv(a, result, "RGN_NOY");); ITERATOR_TEST_BLOCK( - "Region with stencil", BOUT_OMP(parallel) { + "Region with stencil", BOUT_OMP_PERF(parallel) { stencil s; BOUT_FOR_INNER(i, mesh->getRegion3D("RGN_NOY")) { s.m = a[i.ym()]; @@ -110,7 +110,7 @@ int main(int argc, char** argv) { }); ITERATOR_TEST_BLOCK( - "Region with stencil and function pointer", BOUT_OMP(parallel) { + "Region with stencil and function pointer", BOUT_OMP_PERF(parallel) { stencil s; BOUT_FOR_INNER(i, mesh->getRegion3D("RGN_NOY")) { s.m = a[i.ym()]; diff --git a/examples/performance/iterator/iterator.cxx b/examples/performance/iterator/iterator.cxx index 7f9eb7ce1f..af1163d927 100644 --- a/examples/performance/iterator/iterator.cxx +++ b/examples/performance/iterator/iterator.cxx @@ -66,7 +66,7 @@ int main(int argc, char** argv) { "C loop", for (int j = 0; j < len; ++j) { rd[j] = ad[j] + bd[j]; };); #if BOUT_USE_OPENMP ITERATOR_TEST_BLOCK("C loop (omp)", - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for(int j=0;jLocalNx;++i) { for (int j = 0; j < mesh->LocalNy; ++j) { for (int k = 0; k < mesh->LocalNz; ++k) { diff --git a/include/bout/adios_object.hxx b/include/bout/adios_object.hxx index 9d2f545b46..4750930373 100755 --- a/include/bout/adios_object.hxx +++ b/include/bout/adios_object.hxx @@ -14,7 +14,7 @@ #include "bout/build_config.hxx" -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include #include @@ -79,5 +79,5 @@ void ADIOSSetParameters(const std::string& input, const char delimKeyValue, } // namespace bout -#endif //BOUT_HAS_ADIOS +#endif //BOUT_HAS_ADIOS2 #endif //ADIOS_OBJECT_HXX diff --git a/include/bout/array.hxx b/include/bout/array.hxx index 060b4900a1..0caaed9c86 100644 --- a/include/bout/array.hxx +++ b/include/bout/array.hxx @@ -23,15 +23,15 @@ * o Added Umpire support, in multiple iterations/variations */ -#ifndef __ARRAY_H__ -#define __ARRAY_H__ +#ifndef BOUT_ARRAY_H +#define BOUT_ARRAY_H #include #include #include #include -#ifdef _OPENMP +#if BOUT_USE_OPENMP #include #endif @@ -375,22 +375,14 @@ private: * @param[in] cleanup If set to true, deletes all dataBlock and clears the store */ static storeType& store(bool cleanup = false) { -#ifdef _OPENMP static arenaType arena(omp_get_max_threads()); -#else - static arenaType arena(1); -#endif if (!cleanup) { -#ifdef _OPENMP return arena[omp_get_thread_num()]; -#else - return arena[0]; -#endif } // Clean by deleting all data -- possible that just stores.clear() is // sufficient rather than looping over each entry. - BOUT_OMP(single) + BOUT_OMP_SAFE(single) { for (auto& stores : arena) { for (auto& p : stores) { @@ -486,4 +478,4 @@ bool operator==(const Array& lhs, const Array& rhs) { return std::equal(lhs.begin(), lhs.end(), rhs.begin()); } -#endif // __ARRAY_H__ +#endif // BOUT_ARRAY_H diff --git a/include/bout/assert.hxx b/include/bout/assert.hxx index 233641966b..653c44ed42 100644 --- a/include/bout/assert.hxx +++ b/include/bout/assert.hxx @@ -14,8 +14,8 @@ * */ -#ifndef __BOUT_ASSERT_H__ -#define __BOUT_ASSERT_H__ +#ifndef BOUT_ASSERT_H +#define BOUT_ASSERT_H #include "bout/boutexception.hxx" @@ -65,4 +65,4 @@ #define ASSERT3(condition) #endif -#endif // __BOUT_ASSERT_H__ +#endif // BOUT_ASSERT_H diff --git a/include/bout/boundary_factory.hxx b/include/bout/boundary_factory.hxx index 208b7cdb61..5f1f6e06a6 100644 --- a/include/bout/boundary_factory.hxx +++ b/include/bout/boundary_factory.hxx @@ -1,13 +1,16 @@ class BoundaryFactory; -#ifndef __BNDRY_FACTORY_H__ -#define __BNDRY_FACTORY_H__ +#ifndef BOUT_BNDRY_FACTORY_H +#define BOUT_BNDRY_FACTORY_H -#include "bout/boundary_op.hxx" -#include "bout/boundary_region.hxx" -#include "bout/parallel_boundary_op.hxx" -#include "bout/parallel_boundary_region.hxx" +class BoundaryOpBase; +class BoundaryOpPar; +class BoundaryOp; +class BoundaryRegionBase; +class BoundaryRegionPar; +class BoundaryRegion; +class BoundaryModifier; #include #include @@ -126,4 +129,4 @@ private: // BoundaryModifier* findBoundaryMod(const string &s); }; -#endif // __BNDRY_FACTORY_H__ +#endif // BOUT_BNDRY_FACTORY_H diff --git a/include/bout/boundary_region.hxx b/include/bout/boundary_region.hxx index 542460580c..58de12045e 100644 --- a/include/bout/boundary_region.hxx +++ b/include/bout/boundary_region.hxx @@ -1,8 +1,8 @@ class BoundaryRegion; -#ifndef __BNDRY_REGION_H__ -#define __BNDRY_REGION_H__ +#ifndef BOUT_BNDRY_REGION_H +#define BOUT_BNDRY_REGION_H #include #include @@ -142,4 +142,4 @@ private: int xs, xe; }; -#endif // __BNDRY_REGION_H__ +#endif // BOUT_BNDRY_REGION_H diff --git a/include/bout/boundary_standard.hxx b/include/bout/boundary_standard.hxx index 96d43de24d..b1116e159f 100644 --- a/include/bout/boundary_standard.hxx +++ b/include/bout/boundary_standard.hxx @@ -1,7 +1,7 @@ /// Some standard boundary conditions -#ifndef __BNDRY_STD_H__ -#define __BNDRY_STD_H__ +#ifndef BOUT_BNDRY_STD_H +#define BOUT_BNDRY_STD_H #include "bout/boundary_op.hxx" #include "bout/bout_types.hxx" @@ -516,4 +516,4 @@ public: private: }; -#endif // __BNDRY_STD_H__ +#endif // BOUT_BNDRY_STD_H diff --git a/include/bout/bout.hxx b/include/bout/bout.hxx index d929a19c2f..09433bcc3b 100644 --- a/include/bout/bout.hxx +++ b/include/bout/bout.hxx @@ -34,6 +34,7 @@ #ifndef BOUT_H #define BOUT_H +// IWYU pragma: begin_keep, begin_export #include "bout/build_config.hxx" #include "bout/boutcomm.hxx" @@ -53,6 +54,7 @@ #include "bout/vector3d.hxx" #include "bout/version.hxx" #include "bout/where.hxx" +// IWYU pragma: end_keep, end_export // BOUT++ main functions diff --git a/include/bout/bout_enum_class.hxx b/include/bout/bout_enum_class.hxx index ef251b4c2f..f8c9e364c5 100644 --- a/include/bout/bout_enum_class.hxx +++ b/include/bout/bout_enum_class.hxx @@ -19,8 +19,8 @@ * along with BOUT++. If not, see . **************************************************************************/ -#ifndef __BOUT_ENUM_CLASS_H__ -#define __BOUT_ENUM_CLASS_H__ +#ifndef BOUT_ENUM_CLASS_H +#define BOUT_ENUM_CLASS_H #include "bout/boutexception.hxx" #include "bout/macro_for_each.hxx" @@ -100,4 +100,4 @@ return out << toString(e); \ } -#endif // __BOUT_ENUM_CLASS_H__ +#endif // BOUT_ENUM_CLASS_H diff --git a/include/bout/bout_types.hxx b/include/bout/bout_types.hxx index 5a00b5144b..c1f06fca7c 100644 --- a/include/bout/bout_types.hxx +++ b/include/bout/bout_types.hxx @@ -19,8 +19,8 @@ * along with BOUT++. If not, see . **************************************************************************/ -#ifndef __BOUT_TYPES_H__ -#define __BOUT_TYPES_H__ +#ifndef BOUT_TYPES_H +#define BOUT_TYPES_H #include #include @@ -140,4 +140,4 @@ struct enumWrapper { /// Boundary condition function using FuncPtr = BoutReal (*)(BoutReal t, BoutReal x, BoutReal y, BoutReal z); -#endif // __BOUT_TYPES_H__ +#endif // BOUT_TYPES_H diff --git a/include/bout/boutcomm.hxx b/include/bout/boutcomm.hxx index fea401af02..9342d29741 100644 --- a/include/bout/boutcomm.hxx +++ b/include/bout/boutcomm.hxx @@ -27,8 +27,8 @@ class BoutComm; -#ifndef __BOUTCOMM_H__ -#define __BOUTCOMM_H__ +#ifndef BOUT_BOUTCOMM_H +#define BOUT_BOUTCOMM_H #include @@ -68,4 +68,4 @@ private: static BoutComm* instance; ///< The only instance of this class (Singleton) }; -#endif // __BOUTCOMM_H__ +#endif // BOUT_BOUTCOMM_H diff --git a/include/bout/build_config.hxx b/include/bout/build_config.hxx index c97962f7cf..08158d00e9 100644 --- a/include/bout/build_config.hxx +++ b/include/bout/build_config.hxx @@ -17,7 +17,7 @@ constexpr auto has_gettext = static_cast(BOUT_HAS_GETTEXT); constexpr auto has_lapack = static_cast(BOUT_HAS_LAPACK); constexpr auto has_legacy_netcdf = static_cast(BOUT_HAS_LEGACY_NETCDF); constexpr auto has_netcdf = static_cast(BOUT_HAS_NETCDF); -constexpr auto has_adios = static_cast(BOUT_HAS_ADIOS); +constexpr auto has_adios2 = static_cast(BOUT_HAS_ADIOS2); constexpr auto has_petsc = static_cast(BOUT_HAS_PETSC); constexpr auto has_hypre = static_cast(BOUT_HAS_HYPRE); constexpr auto has_umpire = static_cast(BOUT_HAS_UMPIRE); diff --git a/include/bout/constants.hxx b/include/bout/constants.hxx index c811799aef..273ab2270e 100644 --- a/include/bout/constants.hxx +++ b/include/bout/constants.hxx @@ -3,8 +3,8 @@ * **************************************************************************/ -#ifndef __CONSTANTS_H__ -#define __CONSTANTS_H__ +#ifndef BOUT_CONSTANTS_H +#define BOUT_CONSTANTS_H #include @@ -28,4 +28,4 @@ constexpr BoutReal M_Deuterium = 2.01410178 * amu; ///< Mass of a Deuterium atom constexpr BoutReal M_Tritium = 3.0160492 * amu; ///< Mass of a Tritium atom } // namespace SI -#endif // __CONSTANTS_H__ +#endif // BOUT_CONSTANTS_H diff --git a/include/bout/coordinates.hxx b/include/bout/coordinates.hxx index 42efcad84c..49feffa0a7 100644 --- a/include/bout/coordinates.hxx +++ b/include/bout/coordinates.hxx @@ -30,8 +30,8 @@ * **************************************************************************/ -#ifndef __COORDINATES_H__ -#define __COORDINATES_H__ +#ifndef BOUT_COORDINATES_H +#define BOUT_COORDINATES_H #include "bout/field2d.hxx" #include "bout/field3d.hxx" @@ -262,4 +262,4 @@ private: }; */ -#endif // __COORDINATES_H__ +#endif // BOUT_COORDINATES_H diff --git a/include/bout/cyclic_reduction.hxx b/include/bout/cyclic_reduction.hxx index d4ef958e93..d4c0920910 100644 --- a/include/bout/cyclic_reduction.hxx +++ b/include/bout/cyclic_reduction.hxx @@ -38,8 +38,8 @@ * ************************************************************************/ -#ifndef __CYCLIC_REDUCE_H__ -#define __CYCLIC_REDUCE_H__ +#ifndef BOUT_CYCLIC_REDUCE_H +#define BOUT_CYCLIC_REDUCE_H #ifdef DIAGNOSE #undef DIAGNOSE @@ -101,7 +101,7 @@ public: Matrix bMatrix(1, N); Matrix cMatrix(1, N); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < N; ++i) { aMatrix(0, i) = a[i]; bMatrix(0, i) = b[i]; @@ -126,7 +126,7 @@ public: allocMemory(nprocs, nsys, N); // Fill coefficient array - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int j = 0; j < Nsys; j++) { for (int i = 0; i < N; i++) { coefs(j, 4 * i) = a(j, i); @@ -149,7 +149,7 @@ public: Matrix xMatrix(1, N); // Copy input data into matrix - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < N; ++i) { rhsMatrix(0, i) = rhs[i]; } @@ -158,7 +158,7 @@ public: solve(rhsMatrix, xMatrix); // Copy result back into argument - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < N; ++i) { x[i] = xMatrix(0, i); } @@ -184,7 +184,7 @@ public: // Insert RHS into coefs array. Ordered to allow efficient partitioning // for MPI send/receives - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int j = 0; j < Nsys; j++) { for (int i = 0; i < N; i++) { coefs(j, 4 * i + 3) = rhs(j, i); @@ -230,7 +230,7 @@ public: if (p == myproc) { // Just copy the data - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < myns; i++) { for (int j = 0; j < 8; j++) { ifcs(i, 8 * p + j) = myif(sys0 + i, j); @@ -285,7 +285,7 @@ public: #ifdef DIAGNOSE output << "Copying received data from " << p << endl; #endif - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < myns; i++) { for (int j = 0; j < 8; j++) { #ifdef DIAGNOSE @@ -317,7 +317,7 @@ public: x1.ensureUnique(); xn.ensureUnique(); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < myns; ++i) { // (a b) (x1) = (b1) // (c d) (xn) (bn) @@ -364,7 +364,7 @@ public: if (p == myproc) { // Just copy the data - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < myns; i++) { x1[sys0 + i] = ifx(i, 2 * p); xn[sys0 + i] = ifx(i, 2 * p + 1); @@ -389,7 +389,7 @@ public: // Send data for (int p = 0; p < nprocs; p++) { // Loop over processor if (p != myproc) { - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < myns; i++) { ifp[2 * i] = ifx(i, 2 * p); ifp[2 * i + 1] = ifx(i, 2 * p + 1); @@ -427,7 +427,7 @@ public: nsp++; } - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nsp; i++) { x1[s0 + i] = recvbuffer(fromproc, 2 * i); xn[s0 + i] = recvbuffer(fromproc, 2 * i + 1); @@ -540,7 +540,7 @@ private: } #endif - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int j = 0; j < ns; j++) { // Calculate upper interface equation @@ -619,7 +619,7 @@ private: // Tridiagonal system, solve using serial Thomas algorithm // xa -- Result for each system // co -- Coefficients & rhs for each system - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < ns; i++) { // Loop over systems Array gam(nloc); // Thread-local array T bet = 1.0; @@ -640,4 +640,4 @@ private: } }; -#endif // __CYCLIC_REDUCE_H__ +#endif // BOUT_CYCLIC_REDUCE_H diff --git a/include/bout/dcomplex.hxx b/include/bout/dcomplex.hxx index 569b5f2c13..75bc9d26ff 100644 --- a/include/bout/dcomplex.hxx +++ b/include/bout/dcomplex.hxx @@ -29,8 +29,8 @@ * along with BOUT++. If not, see . * */ -#ifndef __DCOMPLEX_H__ -#define __DCOMPLEX_H__ +#ifndef BOUT_DCOMPLEX_H +#define BOUT_DCOMPLEX_H #include "bout/bout_types.hxx" #include @@ -44,4 +44,4 @@ struct fcmplx { BoutReal r, i; }; -#endif // __DCOMPLEX_H__ +#endif // BOUT_DCOMPLEX_H diff --git a/include/bout/derivs.hxx b/include/bout/derivs.hxx index c01e1562fc..1c360bb9cd 100644 --- a/include/bout/derivs.hxx +++ b/include/bout/derivs.hxx @@ -26,8 +26,8 @@ * **************************************************************************/ -#ifndef __DERIVS_H__ -#define __DERIVS_H__ +#ifndef BOUT_DERIVS_H +#define BOUT_DERIVS_H #include "bout/field2d.hxx" #include "bout/field3d.hxx" @@ -701,4 +701,4 @@ Coordinates::FieldMetric D2DYDZ(const Field2D& f, CELL_LOC outloc = CELL_DEFAULT const std::string& method = "DEFAULT", const std::string& region = "RGN_NOBNDRY"); -#endif // __DERIVS_H__ +#endif // BOUT_DERIVS_H diff --git a/include/bout/difops.hxx b/include/bout/difops.hxx index 2b5c6746fd..71053d454a 100644 --- a/include/bout/difops.hxx +++ b/include/bout/difops.hxx @@ -33,8 +33,8 @@ * *******************************************************************************/ -#ifndef __DIFOPS_H__ -#define __DIFOPS_H__ +#ifndef BOUT_DIFOPS_H +#define BOUT_DIFOPS_H #include "bout/field2d.hxx" #include "bout/field3d.hxx" @@ -310,4 +310,4 @@ Field3D bracket(const Field3D& f, const Field2D& g, BRACKET_METHOD method = BRAC Field3D bracket(const Field3D& f, const Field3D& g, BRACKET_METHOD method = BRACKET_STD, CELL_LOC outloc = CELL_DEFAULT, Solver* solver = nullptr); -#endif /* __DIFOPS_H__ */ +#endif /* BOUT_DIFOPS_H */ diff --git a/include/bout/expr.hxx b/include/bout/expr.hxx index e03c07aa49..267af202ed 100644 --- a/include/bout/expr.hxx +++ b/include/bout/expr.hxx @@ -9,8 +9,8 @@ * **************************************************************************/ -#ifndef __EXPR_H__ -#define __EXPR_H__ +#ifndef BOUT_EXPR_H +#define BOUT_EXPR_H #warning expr.hxx is deprecated. Do not use! @@ -205,4 +205,4 @@ const Field3D eval3D(Expr e) { return result; } -#endif // __EXPR_H__ +#endif // BOUT_EXPR_H diff --git a/include/bout/fft.hxx b/include/bout/fft.hxx index 8e74321f2a..fdec8b7bec 100644 --- a/include/bout/fft.hxx +++ b/include/bout/fft.hxx @@ -25,8 +25,8 @@ * *******************************************************************************/ -#ifndef __FFT_H__ -#define __FFT_H__ +#ifndef BOUT_FFT_H +#define BOUT_FFT_H #include "bout/dcomplex.hxx" #include @@ -132,4 +132,4 @@ inline void DST_rev(dcomplex* in, int length, BoutReal* out) { return bout::fft::DST_rev(in, length, out); } -#endif // __FFT_H__ +#endif // BOUT_FFT_H diff --git a/include/bout/field2d.hxx b/include/bout/field2d.hxx index 5bac67beb2..10b801ef8d 100644 --- a/include/bout/field2d.hxx +++ b/include/bout/field2d.hxx @@ -27,8 +27,8 @@ class Field2D; #pragma once -#ifndef __FIELD2D_H__ -#define __FIELD2D_H__ +#ifndef BOUT_FIELD2D_H +#define BOUT_FIELD2D_H class Mesh; #include "bout/field.hxx" @@ -374,4 +374,4 @@ bool operator==(const Field2D& a, const Field2D& b); std::ostream& operator<<(std::ostream& out, const Field2D& value); -#endif /* __FIELD2D_H__ */ +#endif /* BOUT_FIELD2D_H */ diff --git a/include/bout/field3d.hxx b/include/bout/field3d.hxx index 9f5326253d..ba8c8e879e 100644 --- a/include/bout/field3d.hxx +++ b/include/bout/field3d.hxx @@ -23,8 +23,8 @@ class Field3D; #pragma once -#ifndef __FIELD3D_H__ -#define __FIELD3D_H__ +#ifndef BOUT_FIELD3D_H +#define BOUT_FIELD3D_H class Mesh; // #include "bout/mesh.hxx" #include "bout/bout_types.hxx" @@ -656,4 +656,4 @@ bool operator==(const Field3D& a, const Field3D& b); /// Output a string describing a Field3D to a stream std::ostream& operator<<(std::ostream& out, const Field3D& value); -#endif /* __FIELD3D_H__ */ +#endif /* BOUT_FIELD3D_H */ diff --git a/include/bout/field_data.hxx b/include/bout/field_data.hxx index 03b9d6759b..185dcabf2d 100644 --- a/include/bout/field_data.hxx +++ b/include/bout/field_data.hxx @@ -44,7 +44,8 @@ class Coordinates; class Mesh; #include "bout/boundary_region.hxx" -#include "bout/parallel_boundary_region.hxx" +class BoundaryRegionPar; +enum class BndryLoc; #include "bout/sys/expressionparser.hxx" diff --git a/include/bout/field_factory.hxx b/include/bout/field_factory.hxx index ee228d836c..2a20226b2e 100644 --- a/include/bout/field_factory.hxx +++ b/include/bout/field_factory.hxx @@ -26,8 +26,8 @@ class FieldFactory; -#ifndef __FIELD_FACTORY_H__ -#define __FIELD_FACTORY_H__ +#ifndef BOUT_FIELD_FACTORY_H +#define BOUT_FIELD_FACTORY_H #include "bout/mesh.hxx" @@ -165,4 +165,4 @@ public: } }; -#endif // __FIELD_FACTORY_H__ +#endif // BOUT_FIELD_FACTORY_H diff --git a/include/bout/fieldgroup.hxx b/include/bout/fieldgroup.hxx index c33bd63e16..184766c6b8 100644 --- a/include/bout/fieldgroup.hxx +++ b/include/bout/fieldgroup.hxx @@ -1,5 +1,5 @@ -#ifndef __FIELDGROUP_H__ -#define __FIELDGROUP_H__ +#ifndef BOUT_FIELDGROUP_H +#define BOUT_FIELDGROUP_H #include "bout/field_data.hxx" #include @@ -190,4 +190,4 @@ private: /// Combine two FieldGroups FieldGroup operator+(const FieldGroup& lhs, const FieldGroup& rhs); -#endif // __FIELDGROUP_H__ +#endif // BOUT_FIELDGROUP_H diff --git a/include/bout/fieldperp.hxx b/include/bout/fieldperp.hxx index 3b8ed45db6..6995308dbe 100644 --- a/include/bout/fieldperp.hxx +++ b/include/bout/fieldperp.hxx @@ -25,8 +25,8 @@ class FieldPerp; -#ifndef __FIELDPERP_H__ -#define __FIELDPERP_H__ +#ifndef BOUT_FIELDPERP_H +#define BOUT_FIELDPERP_H #include "bout/field.hxx" diff --git a/include/bout/fv_ops.hxx b/include/bout/fv_ops.hxx index 5f1e688bd8..94007a57a2 100644 --- a/include/bout/fv_ops.hxx +++ b/include/bout/fv_ops.hxx @@ -2,8 +2,8 @@ Finite-volume discretisation methods. Flux-conservative form */ -#ifndef __FV_OPS_H__ -#define __FV_OPS_H__ +#ifndef BOUT_FV_OPS_H +#define BOUT_FV_OPS_H #include "bout/field3d.hxx" #include "bout/globals.hxx" @@ -525,4 +525,4 @@ const Field3D Div_f_v(const Field3D& n_in, const Vector3D& v, bool bndry_flux) { */ Field3D Div_Perp_Lap(const Field3D& a, const Field3D& f, CELL_LOC outloc = CELL_DEFAULT); } // namespace FV -#endif // __FV_OPS_H__ +#endif // BOUT_FV_OPS_H diff --git a/include/bout/globalfield.hxx b/include/bout/globalfield.hxx index 85252f4962..038a0875bf 100644 --- a/include/bout/globalfield.hxx +++ b/include/bout/globalfield.hxx @@ -6,8 +6,8 @@ class GlobalField; class GlobalField2D; -#ifndef __GLOBALFIELD_H__ -#define __GLOBALFIELD_H__ +#ifndef BOUT_GLOBALFIELD_H +#define BOUT_GLOBALFIELD_H #include "mesh.hxx" @@ -257,4 +257,4 @@ private: bool data_valid; }; -#endif // __GLOBALFIELD_H__ +#endif // BOUT_GLOBALFIELD_H diff --git a/include/bout/globals.hxx b/include/bout/globals.hxx index ae7edff298..64b3a09ee3 100644 --- a/include/bout/globals.hxx +++ b/include/bout/globals.hxx @@ -24,8 +24,8 @@ * **************************************************************************/ -#ifndef __GLOBALS_H__ -#define __GLOBALS_H__ +#ifndef BOUT_GLOBALS_H +#define BOUT_GLOBALS_H #include "bout/macro_for_each.hxx" @@ -97,4 +97,4 @@ SETTING(MpiWrapper* mpi, nullptr); ///< The MPI wrapper object } // namespace globals } // namespace bout -#endif // __GLOBALS_H__ +#endif // BOUT_GLOBALS_H diff --git a/include/bout/griddata.hxx b/include/bout/griddata.hxx index 875cb07d7a..29a32e5779 100644 --- a/include/bout/griddata.hxx +++ b/include/bout/griddata.hxx @@ -25,8 +25,8 @@ class GridDataSource; -#ifndef __GRIDDATA_H__ -#define __GRIDDATA_H__ +#ifndef BOUT_GRIDDATA_H +#define BOUT_GRIDDATA_H #include "mesh.hxx" #include "bout/bout_types.hxx" @@ -299,4 +299,4 @@ private: Options* options; }; -#endif // __GRIDDATA_H__ +#endif // BOUT_GRIDDATA_H diff --git a/include/bout/gyro_average.hxx b/include/bout/gyro_average.hxx index 0f9f2a13f7..63ef13279b 100644 --- a/include/bout/gyro_average.hxx +++ b/include/bout/gyro_average.hxx @@ -29,8 +29,8 @@ * **************************************************************/ -#ifndef __GYRO_AVERAGE_H__ -#define __GYRO_AVERAGE_H__ +#ifndef BOUT_GYRO_AVERAGE_H +#define BOUT_GYRO_AVERAGE_H #include "bout/field3d.hxx" #include "bout/invert_laplace.hxx" @@ -115,4 +115,4 @@ Field3D gyroPade2(const Field3D& f, const Field2D& rho, Field3D gyroPade2(const Field3D& f, BoutReal rho, int inner_boundary_flags = GYRO_FLAGS, int outer_boundary_flags = GYRO_FLAGS); -#endif // __GYRO_AVERAGE_H__ +#endif // BOUT_GYRO_AVERAGE_H diff --git a/include/bout/hypre_interface.hxx b/include/bout/hypre_interface.hxx index c26548e95e..cd3af7d39c 100644 --- a/include/bout/hypre_interface.hxx +++ b/include/bout/hypre_interface.hxx @@ -480,7 +480,7 @@ public: weights.begin(), weights.end(), std::back_inserter(values), [&value_](BoutReal weight) -> HYPRE_Complex { return weight * value_; }); const HYPRE_BigInt ncolumns = static_cast(positions.size()); - // BOUT_OMP(critical) + // BOUT_OMP_SAFE(critical) for (HYPRE_BigInt i = 0; i < ncolumns; ++i) { matrix->setVal(row, positions[i], values[i]); } @@ -495,7 +495,7 @@ public: weights.begin(), weights.end(), std::back_inserter(values), [&value_](BoutReal weight) -> HYPRE_Complex { return weight * value_; }); const HYPRE_BigInt ncolumns = static_cast(positions.size()); - // BOUT_OMP(critical) + // BOUT_OMP_SAFE(critical) for (HYPRE_BigInt i = 0; i < ncolumns; ++i) { matrix->addVal(row, positions[i], values[i]); } diff --git a/include/bout/initialprofiles.hxx b/include/bout/initialprofiles.hxx index 71cab22431..a2fc050b15 100644 --- a/include/bout/initialprofiles.hxx +++ b/include/bout/initialprofiles.hxx @@ -23,8 +23,8 @@ * **************************************************************************/ -#ifndef __INITIALPROF_H__ -#define __INITIALPROF_H__ +#ifndef BOUT_INITIALPROF_H +#define BOUT_INITIALPROF_H #include @@ -113,4 +113,4 @@ void initial_profile(const std::string& name, Vector2D& var); */ void initial_profile(const std::string& name, Vector3D& var); -#endif // __INITIALPROF_H__ +#endif // BOUT_INITIALPROF_H diff --git a/include/bout/interpolation.hxx b/include/bout/interpolation.hxx index aab3f61281..1f4b0a51b5 100644 --- a/include/bout/interpolation.hxx +++ b/include/bout/interpolation.hxx @@ -23,8 +23,8 @@ * **************************************************************************/ -#ifndef __INTERP_H__ -#define __INTERP_H__ +#ifndef BOUT_INTERP_H +#define BOUT_INTERP_H #include "bout/mesh.hxx" @@ -202,4 +202,4 @@ const T interp_to(const T& var, CELL_LOC loc, const std::string region = "RGN_AL return result; } -#endif // __INTERP_H__ +#endif // BOUT_INTERP_H diff --git a/include/bout/interpolation_xz.hxx b/include/bout/interpolation_xz.hxx index 3f8e37d3fd..52dc38f174 100644 --- a/include/bout/interpolation_xz.hxx +++ b/include/bout/interpolation_xz.hxx @@ -21,8 +21,8 @@ * **************************************************************************/ -#ifndef __INTERP_XZ_H__ -#define __INTERP_XZ_H__ +#ifndef BOUT_INTERP_XZ_H +#define BOUT_INTERP_XZ_H #include "bout/mask.hxx" @@ -286,4 +286,4 @@ public: template using RegisterXZInterpolation = XZInterpolationFactory::RegisterInFactory; -#endif // __INTERP_XZ_H__ +#endif // BOUT_INTERP_XZ_H diff --git a/include/bout/interpolation_z.hxx b/include/bout/interpolation_z.hxx index b11d7ff5b6..68cf5b0b06 100644 --- a/include/bout/interpolation_z.hxx +++ b/include/bout/interpolation_z.hxx @@ -20,8 +20,8 @@ * **************************************************************************/ -#ifndef __INTERP_Z_H__ -#define __INTERP_Z_H__ +#ifndef BOUT_INTERP_Z_H +#define BOUT_INTERP_Z_H #include "bout/generic_factory.hxx" #include "bout/paralleltransform.hxx" @@ -125,4 +125,4 @@ private: Field3D h11; }; -#endif // __INTERP_Z_H__ +#endif // BOUT_INTERP_Z_H diff --git a/include/bout/invert/laplacexy.hxx b/include/bout/invert/laplacexy.hxx index c07db58478..19da48dd4d 100644 --- a/include/bout/invert/laplacexy.hxx +++ b/include/bout/invert/laplacexy.hxx @@ -30,8 +30,8 @@ * **************************************************************************/ -#ifndef __LAPLACE_XY_H__ -#define __LAPLACE_XY_H__ +#ifndef BOUT_LAPLACE_XY_H +#define BOUT_LAPLACE_XY_H #include "bout/build_config.hxx" @@ -222,4 +222,4 @@ private: }; #endif // BOUT_HAS_PETSC -#endif // __LAPLACE_XY_H__ +#endif // BOUT_LAPLACE_XY_H diff --git a/include/bout/invert/laplacexy2.hxx b/include/bout/invert/laplacexy2.hxx index 6945de7b99..51f75f467d 100644 --- a/include/bout/invert/laplacexy2.hxx +++ b/include/bout/invert/laplacexy2.hxx @@ -30,8 +30,8 @@ * **************************************************************************/ -#ifndef __LAPLACE_XY2_H__ -#define __LAPLACE_XY2_H__ +#ifndef BOUT_LAPLACE_XY2_H +#define BOUT_LAPLACE_XY2_H #include "bout/build_defines.hxx" @@ -141,4 +141,4 @@ private: }; #endif // BOUT_HAS_PETSC -#endif // __LAPLACE_XY_H__ +#endif // BOUT_LAPLACE_XY2_H diff --git a/include/bout/invert/laplacexz.hxx b/include/bout/invert/laplacexz.hxx index 1b1ebef832..11f1c69330 100644 --- a/include/bout/invert/laplacexz.hxx +++ b/include/bout/invert/laplacexz.hxx @@ -28,8 +28,8 @@ * **************************************************************************/ -#ifndef __LAPLACEXZ_H__ -#define __LAPLACEXZ_H__ +#ifndef BOUT_LAPLACEXZ_H +#define BOUT_LAPLACEXZ_H #include #include @@ -91,4 +91,4 @@ protected: private: }; -#endif // __LAPLACEXZ_H__ +#endif // BOUT_LAPLACEXZ_H diff --git a/include/bout/invert_laplace.hxx b/include/bout/invert_laplace.hxx index 78417b9fce..0b416d4aab 100644 --- a/include/bout/invert_laplace.hxx +++ b/include/bout/invert_laplace.hxx @@ -31,8 +31,8 @@ class Laplacian; -#ifndef __LAPLACE_H__ -#define __LAPLACE_H__ +#ifndef BOUT_LAPLACE_H +#define BOUT_LAPLACE_H #include "bout/build_config.hxx" @@ -238,6 +238,10 @@ public: virtual void setInnerBoundaryFlags(int f) { inner_boundary_flags = f; } virtual void setOuterBoundaryFlags(int f) { outer_boundary_flags = f; } + virtual int getGlobalFlags() const { return global_flags; } + virtual int getInnerBoundaryFlags() const { return inner_boundary_flags; } + virtual int getOuterBoundaryFlags() const { return outer_boundary_flags; } + /// Does this solver use Field3D coefficients (true) or only their DC component (false) virtual bool uses3DCoefs() const { return false; } @@ -308,9 +312,23 @@ protected: int extra_yguards_lower; ///< exclude some number of points at the lower boundary, useful for staggered grids or when boundary conditions make inversion redundant int extra_yguards_upper; ///< exclude some number of points at the upper boundary, useful for staggered grids or when boundary conditions make inversion redundant - int global_flags; ///< Default flags - int inner_boundary_flags; ///< Flags to set inner boundary condition - int outer_boundary_flags; ///< Flags to set outer boundary condition + /// Return true if global/default \p flag is set + bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } + /// Return true if \p flag is set for the inner boundary condition + bool isInnerBoundaryFlagSet(int flag) const { + return (inner_boundary_flags & flag) != 0; + } + /// Return true if \p flag is set for the outer boundary condition + bool isOuterBoundaryFlagSet(int flag) const { + return (outer_boundary_flags & flag) != 0; + } + + /// Return true if \p flag is set for the inner boundary condition + /// and this is the first proc in X direction + bool isInnerBoundaryFlagSetOnFirstX(int flag) const; + /// Return true if \p flag is set for the outer boundary condition + /// and this the last proc in X direction + bool isOuterBoundaryFlagSetOnLastX(int flag) const; void tridagCoefs(int jx, int jy, BoutReal kwave, dcomplex& a, dcomplex& b, dcomplex& c, const Field2D* ccoef = nullptr, const Field2D* d = nullptr, @@ -322,15 +340,13 @@ protected: CELL_LOC loc = CELL_DEFAULT); void tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, int jy, - int kz, BoutReal kwave, int flags, int inner_boundary_flags, - int outer_boundary_flags, const Field2D* a, const Field2D* ccoef, + int kz, BoutReal kwave, const Field2D* a, const Field2D* ccoef, const Field2D* d, bool includeguards = true, bool zperiodic = true) { - tridagMatrix(avec, bvec, cvec, bk, jy, kz, kwave, flags, inner_boundary_flags, - outer_boundary_flags, a, ccoef, ccoef, d, includeguards, zperiodic); + tridagMatrix(avec, bvec, cvec, bk, jy, kz, kwave, a, ccoef, ccoef, d, includeguards, + zperiodic); } void tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, int jy, - int kz, BoutReal kwave, int flags, int inner_boundary_flags, - int outer_boundary_flags, const Field2D* a, const Field2D* c1coef, + int kz, BoutReal kwave, const Field2D* a, const Field2D* c1coef, const Field2D* c2coef, const Field2D* d, bool includeguards = true, bool zperiodic = true); CELL_LOC location; ///< staggered grid location of this solver @@ -339,6 +355,10 @@ protected: /// localmesh->getCoordinates(location) once private: + int global_flags; ///< Default flags + int inner_boundary_flags; ///< Flags to set inner boundary condition + int outer_boundary_flags; ///< Flags to set outer boundary condition + /// Singleton instance static std::unique_ptr instance; /// Name for writing performance infomation; default taken from @@ -374,4 +394,4 @@ void laplace_tridag_coefs(int jx, int jy, int jz, dcomplex& a, dcomplex& b, dcom const Field2D* ccoef = nullptr, const Field2D* d = nullptr, CELL_LOC loc = CELL_DEFAULT); -#endif // __LAPLACE_H__ +#endif // BOUT_LAPLACE_H diff --git a/include/bout/invert_parderiv.hxx b/include/bout/invert_parderiv.hxx index 5a83a7f4e8..e9623e0f9f 100644 --- a/include/bout/invert_parderiv.hxx +++ b/include/bout/invert_parderiv.hxx @@ -28,8 +28,8 @@ * ************************************************************************/ -#ifndef __INV_PAR_H__ -#define __INV_PAR_H__ +#ifndef BOUT_INV_PAR_H +#define BOUT_INV_PAR_H #include "bout/field2d.hxx" #include "bout/field3d.hxx" @@ -189,4 +189,4 @@ protected: private: }; -#endif // __INV_PAR_H__ +#endif // BOUT_INV_PAR_H diff --git a/include/bout/invert_pardiv.hxx b/include/bout/invert_pardiv.hxx index 23ea59e943..0153cc1987 100644 --- a/include/bout/invert_pardiv.hxx +++ b/include/bout/invert_pardiv.hxx @@ -31,11 +31,11 @@ #ifndef INV_PARDIV_H #define INV_PARDIV_H -#include "field2d.hxx" -#include "field3d.hxx" -#include "options.hxx" -#include "unused.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" #include "bout/generic_factory.hxx" +#include "bout/options.hxx" +#include "bout/unused.hxx" // Pardivergence implementations constexpr auto PARDIVCYCLIC = "cyclic"; diff --git a/include/bout/invertable_operator.hxx b/include/bout/invertable_operator.hxx index 1940177bca..a45fc3565f 100644 --- a/include/bout/invertable_operator.hxx +++ b/include/bout/invertable_operator.hxx @@ -30,8 +30,8 @@ class InvertableOperator; }; }; // namespace bout -#ifndef __INVERTABLE_OPERATOR_H__ -#define __INVERTABLE_OPERATOR_H__ +#ifndef BOUT_INVERTABLE_OPERATOR_H +#define BOUT_INVERTABLE_OPERATOR_H #include "bout/build_config.hxx" diff --git a/include/bout/lapack_routines.hxx b/include/bout/lapack_routines.hxx index 70a3128f81..d81c0b422d 100644 --- a/include/bout/lapack_routines.hxx +++ b/include/bout/lapack_routines.hxx @@ -20,8 +20,8 @@ * **************************************************************************/ -#ifndef __LAPACK_ROUTINES_H__ -#define __LAPACK_ROUTINES_H__ +#ifndef BOUT_LAPACK_ROUTINES_H +#define BOUT_LAPACK_ROUTINES_H #include @@ -56,4 +56,4 @@ void cyclic_tridag(dcomplex* a, dcomplex* b, dcomplex* c, dcomplex* r, dcomplex* /// Complex band matrix solver void cband_solve(Matrix& a, int n, int m1, int m2, Array& b); -#endif // __LAPACK_ROUTINES_H__ +#endif // BOUT_LAPACK_ROUTINES_H diff --git a/include/bout/macro_for_each.hxx b/include/bout/macro_for_each.hxx index 10cbd21818..1cfe373c3f 100644 --- a/include/bout/macro_for_each.hxx +++ b/include/bout/macro_for_each.hxx @@ -1,6 +1,6 @@ -#ifndef __MACRO_FOR_EACH_H__ -#define __MACRO_FOR_EACH_H__ +#ifndef BOUT_MACRO_FOR_EACH_H +#define BOUT_MACRO_FOR_EACH_H // Provides a macro MACRO_FOR_EACH which applies a // macro to each argument in a VA_ARGS list diff --git a/include/bout/mask.hxx b/include/bout/mask.hxx index 89197ddcf2..4250d21105 100644 --- a/include/bout/mask.hxx +++ b/include/bout/mask.hxx @@ -19,8 +19,8 @@ * along with BOUT++. If not, see . **************************************************************************/ -#ifndef __MASK_H__ -#define __MASK_H__ +#ifndef BOUT_MASK_H +#define BOUT_MASK_H #include @@ -79,4 +79,4 @@ inline std::unique_ptr> regionFromMask(const BoutMask& mask, } return std::make_unique>(indices); } -#endif //__MASK_H__ +#endif //BOUT_MASK_H diff --git a/include/bout/mesh.hxx b/include/bout/mesh.hxx index 8f73552ea5..c80716fc12 100644 --- a/include/bout/mesh.hxx +++ b/include/bout/mesh.hxx @@ -40,8 +40,8 @@ class Mesh; -#ifndef __MESH_H__ -#define __MESH_H__ +#ifndef BOUT_MESH_H +#define BOUT_MESH_H #include "mpi.h" @@ -55,22 +55,24 @@ class Mesh; #include "bout/field_data.hxx" #include "bout/options.hxx" -#include "fieldgroup.hxx" +#include "bout/fieldgroup.hxx" -#include "bout/boundary_region.hxx" -#include "bout/parallel_boundary_region.hxx" +class BoundaryRegion; +class BoundaryRegionPar; -#include "sys/range.hxx" // RangeIterator +#include "bout/sys/range.hxx" // RangeIterator #include -#include "coordinates.hxx" // Coordinates class +#include "bout/coordinates.hxx" // Coordinates class #include "bout/unused.hxx" #include "bout/generic_factory.hxx" #include +#include + #include #include #include @@ -90,6 +92,9 @@ public: ReturnType create(Options* options = nullptr, GridDataSource* source = nullptr) const; }; +BOUT_ENUM_CLASS(BoundaryParType, all, xin, xout, fwd, bwd, xin_fwd, xout_fwd, xin_bwd, + xout_bwd, SIZE); + template using RegisterMesh = MeshFactory::RegisterInFactory; @@ -485,11 +490,20 @@ public: /// Add a boundary region to this processor virtual void addBoundary(BoundaryRegion* UNUSED(bndry)) {} - /// Get all the parallel (Y) boundaries on this processor - virtual std::vector getBoundariesPar() = 0; + /// Get the list of parallel boundary regions. The option specifies with + /// region to get. Default is to get all regions. All possible options are + /// listed at the top of this file, see BoundaryParType. + /// For example: + /// get all regions: + /// mesh->getBoundariesPar(Mesh::BoundaryParType::all) + /// get only xout: + /// mesh->getBoundariesPar(Mesh::BoundaryParType::xout) + virtual std::vector> + getBoundariesPar(BoundaryParType type = BoundaryParType::all) = 0; /// Add a parallel(Y) boundary to this processor - virtual void addBoundaryPar(BoundaryRegionPar* UNUSED(bndry)) {} + virtual void addBoundaryPar(std::shared_ptr UNUSED(bndry), + BoundaryParType UNUSED(type)) {} /// Branch-cut special handling (experimental) virtual Field3D smoothSeparatrix(const Field3D& f) { return f; } @@ -853,4 +867,4 @@ Mesh::getRegion(const std::string& region_name) const { return getRegionPerp(region_name); } -#endif // __MESH_H__ +#endif // BOUT_MESH_H diff --git a/include/bout/monitor.hxx b/include/bout/monitor.hxx index 5bc4fc7e12..359096e74f 100644 --- a/include/bout/monitor.hxx +++ b/include/bout/monitor.hxx @@ -1,5 +1,5 @@ -#ifndef __MONITOR_H__ -#define __MONITOR_H__ +#ifndef BOUT_MONITOR_H +#define BOUT_MONITOR_H #include "bout/assert.hxx" #include "bout/bout_types.hxx" @@ -125,4 +125,4 @@ public: void writeProgress(BoutReal simtime, bool output_split); }; -#endif // __MONITOR_H__ +#endif // BOUT_MONITOR_H diff --git a/include/bout/mpi_wrapper.hxx b/include/bout/mpi_wrapper.hxx index 65b14cf84f..826405d8da 100644 --- a/include/bout/mpi_wrapper.hxx +++ b/include/bout/mpi_wrapper.hxx @@ -27,8 +27,8 @@ class MpiWrapper; -#ifndef __MPIWRAPPER_H__ -#define __MPIWRAPPER_H__ +#ifndef BOUT_MPIWRAPPER_H +#define BOUT_MPIWRAPPER_H #include @@ -153,4 +153,4 @@ public: virtual double MPI_Wtime() { return ::MPI_Wtime(); } }; -#endif // __MPIWRAPPER_H__ +#endif // BOUT_MPIWRAPPER_H diff --git a/include/bout/msg_stack.hxx b/include/bout/msg_stack.hxx index e8158c3200..adbf1bbbcb 100644 --- a/include/bout/msg_stack.hxx +++ b/include/bout/msg_stack.hxx @@ -26,8 +26,8 @@ class MsgStack; -#ifndef __MSG_STACK_H__ -#define __MSG_STACK_H__ +#ifndef BOUT_MSG_STACK_H +#define BOUT_MSG_STACK_H #include "bout/build_config.hxx" @@ -212,4 +212,4 @@ private: */ #define AUTO_TRACE() TRACE(__thefunc__) // NOLINT -#endif // __MSG_STACK_H__ +#endif // BOUT_MSG_STACK_H diff --git a/include/bout/multiostream.hxx b/include/bout/multiostream.hxx index b90ccf9419..ca3cc2d0c7 100644 --- a/include/bout/multiostream.hxx +++ b/include/bout/multiostream.hxx @@ -1,5 +1,5 @@ -#ifndef __MULTIOSTREAM_H__ -#define __MULTIOSTREAM_H__ +#ifndef BOUT_MULTIOSTREAM_H +#define BOUT_MULTIOSTREAM_H #include #include @@ -89,4 +89,4 @@ public: using cmultiostream = multiostream; using wmultiostream = multiostream; -#endif // __MULTIOSTREAM_H__ +#endif // BOUT_MULTIOSTREAM_H diff --git a/include/bout/openmpwrap.hxx b/include/bout/openmpwrap.hxx index 032705e61a..582df7b86c 100644 --- a/include/bout/openmpwrap.hxx +++ b/include/bout/openmpwrap.hxx @@ -24,9 +24,16 @@ * **************************************************************************/ -#ifndef __OPENMPWRAP_H__ -#define __OPENMPWRAP_H__ +#ifndef BOUT_OPENMPWRAP_H +#define BOUT_OPENMPWRAP_H +#include "bout/build_defines.hxx" + +#if BOUT_USE_OPENMP || defined(_OPENMP) +#include "omp.h" +#endif + +#ifdef _OPENMP //Some helpers for indirection -- required so that the _Pragma gets "omp " //where is any number of valid omp options/environments (e.g. atomic, critical etc.) #define INDIRECT0(a) #a @@ -35,12 +42,30 @@ //Define a macro wrapper to the use of `#pragma omp` to avoid unknown pragma //warnings when compiling without openmp support. -#if BOUT_USE_OPENMP +#define BOUT_OMP_SAFE(...) _Pragma(INDIRECT2(__VA_ARGS__)) #define BOUT_OMP(...) _Pragma(INDIRECT2(__VA_ARGS__)) #else +#define BOUT_OMP_SAFE(...) #define BOUT_OMP(...) #endif +#if BOUT_USE_OPENMP + +#ifndef INDIRECT2 +#error expected macro INDIRECT2 to be available +#endif + +#define BOUT_OMP_PERF(...) _Pragma(INDIRECT2(__VA_ARGS__)) +#else +#define BOUT_OMP_PERF(...) +#endif + +#ifndef _OPENMP +inline int constexpr omp_get_max_threads() { return 1; } +inline int constexpr omp_get_num_threads() { return 1; } +inline int constexpr omp_get_thread_num() { return 0; } +#endif + //Perhaps want to cleanup local helpers with below, but DON'T! //This would cause uses of BOUT_OMP to break // #undef INDIRECT0 diff --git a/include/bout/operatorstencil.hxx b/include/bout/operatorstencil.hxx index 9a60f94ca7..118dc7a068 100644 --- a/include/bout/operatorstencil.hxx +++ b/include/bout/operatorstencil.hxx @@ -27,8 +27,8 @@ * **************************************************************************/ -#ifndef __OPERATORSTENCIL_H__ -#define __OPERATORSTENCIL_H__ +#ifndef BOUT_OPERATORSTENCIL_H +#define BOUT_OPERATORSTENCIL_H #include #include @@ -322,4 +322,4 @@ OperatorStencil starStencil(Mesh* localmesh) { return stencil; } -#endif // __OPERATORSTENCIL_H__ +#endif // BOUT_OPERATORSTENCIL_H diff --git a/include/bout/options.hxx b/include/bout/options.hxx index aa12442451..d6bcfd5f68 100644 --- a/include/bout/options.hxx +++ b/include/bout/options.hxx @@ -241,7 +241,8 @@ public: /// /// Option option2 = option1.copy(); /// - Options(const Options& other) = delete; // Use a reference or .copy() method + [[deprecated("Please use a reference or .copy() instead")]] Options( + const Options& other); /// Copy assignment must be explicit /// @@ -251,7 +252,8 @@ public: /// /// option2.value = option1.value; /// - Options& operator=(const Options& other) = delete; // Use a reference or .copy() method + [[deprecated("Please use a reference or .copy() instead")]] Options& + operator=(const Options& other); // Use a reference or .copy() method /// Make a deep copy of this Options, /// recursively copying children. @@ -364,7 +366,8 @@ public: /// {"long_name", "some velocity"} /// }); Options& setAttributes( - std::initializer_list> attrs) { + const std::initializer_list>& + attrs) { for (const auto& attr : attrs) { attributes[attr.first] = attr.second; } diff --git a/include/bout/options_io.hxx b/include/bout/options_io.hxx index 4c70159514..57be8bbaae 100644 --- a/include/bout/options_io.hxx +++ b/include/bout/options_io.hxx @@ -111,7 +111,7 @@ public: static constexpr auto default_type = #if BOUT_HAS_NETCDF "netcdf"; -#elif BOUT_HAS_ADIOS +#elif BOUT_HAS_ADIOS2 "adios"; #else "invalid"; diff --git a/include/bout/optionsreader.hxx b/include/bout/optionsreader.hxx index 32c302a3f7..de3d40514d 100644 --- a/include/bout/optionsreader.hxx +++ b/include/bout/optionsreader.hxx @@ -31,8 +31,8 @@ class OptionsReader; -#ifndef __OPTIONSREADER_H__ -#define __OPTIONSREADER_H__ +#ifndef BOUT_OPTIONSREADER_H +#define BOUT_OPTIONSREADER_H #include "bout/options.hxx" @@ -108,4 +108,4 @@ private: static OptionsReader* instance; }; -#endif // __OPTIONSREADER_H__ +#endif // BOUT_OPTIONSREADER_H diff --git a/include/bout/output.hxx b/include/bout/output.hxx index a44e987197..2862899067 100644 --- a/include/bout/output.hxx +++ b/include/bout/output.hxx @@ -26,8 +26,8 @@ class Output; #pragma once -#ifndef __OUTPUT_H__ -#define __OUTPUT_H__ +#ifndef BOUT_OUTPUT_H +#define BOUT_OUTPUT_H #include "bout/multiostream.hxx" #include @@ -304,4 +304,4 @@ extern ConditionalOutput output_verbose; ///< less interesting messages /// Generic output, given the same level as output_progress extern ConditionalOutput output; -#endif // __OUTPUT_H__ +#endif // BOUT_OUTPUT_H diff --git a/include/bout/parallel_boundary_op.hxx b/include/bout/parallel_boundary_op.hxx index d17aa8e48a..d8620e892b 100644 --- a/include/bout/parallel_boundary_op.hxx +++ b/include/bout/parallel_boundary_op.hxx @@ -1,5 +1,5 @@ -#ifndef __PAR_BNDRY_OP_H__ -#define __PAR_BNDRY_OP_H__ +#ifndef BOUT_PAR_BNDRY_OP_H +#define BOUT_PAR_BNDRY_OP_H #include "bout/boundary_op.hxx" #include "bout/bout_types.hxx" @@ -52,7 +52,7 @@ protected: BoutReal getValue(const BoundaryRegionPar& bndry, BoutReal t); }; -template +template class BoundaryOpParTemp : public BoundaryOpPar { public: using BoundaryOpPar::BoundaryOpPar; @@ -89,51 +89,74 @@ public: throw BoutException("Can't apply parallel boundary conditions to Field2D!"); } void apply(Field3D& f) override { return apply(f, 0); } + + void apply(Field3D& f, BoutReal t) override { + f.ynext(bndry->dir).allocate(); // Ensure unique before modifying + + auto dy = f.getCoordinates()->dy; + + for (bndry->first(); !bndry->isDone(); bndry->next()) { + BoutReal value = getValue(*bndry, t); + if (isNeumann) { + value *= dy[bndry->ind()]; + } + static_cast(this)->apply_stencil(f, bndry, value); + } + } }; ////////////////////////////////////////////////// // Implementations -class BoundaryOpPar_dirichlet : public BoundaryOpParTemp { +class BoundaryOpPar_dirichlet_o1 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->dirichlet_o1(f, value); + } }; -class BoundaryOpPar_dirichlet_O3 : public BoundaryOpParTemp { +class BoundaryOpPar_dirichlet_o2 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->dirichlet_o2(f, value); + } }; -class BoundaryOpPar_dirichlet_interp - : public BoundaryOpParTemp { +class BoundaryOpPar_dirichlet_o3 : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->dirichlet_o3(f, value); + } }; -class BoundaryOpPar_neumann : public BoundaryOpParTemp { +class BoundaryOpPar_neumann_o1 + : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; - - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->neumann_o1(f, value); + } }; -class BoundaryOpPar_neumann_c2_simple - : public BoundaryOpParTemp { +class BoundaryOpPar_neumann_o2 + : public BoundaryOpParTemp { public: using BoundaryOpParTemp::BoundaryOpParTemp; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->neumann_o2(f, value); + } +}; - using BoundaryOpParTemp::apply; - void apply(Field3D& f, BoutReal t) override; +class BoundaryOpPar_neumann_o3 + : public BoundaryOpParTemp { +public: + using BoundaryOpParTemp::BoundaryOpParTemp; + static void apply_stencil(Field3D& f, const BoundaryRegionPar* bndry, BoutReal value) { + bndry->neumann_o3(f, value); + } }; -#endif // __PAR_BNDRY_OP_H__ +#endif // BOUT_PAR_BNDRY_OP_H diff --git a/include/bout/parallel_boundary_region.hxx b/include/bout/parallel_boundary_region.hxx index 3d5525a303..308b5ac5d7 100644 --- a/include/bout/parallel_boundary_region.hxx +++ b/include/bout/parallel_boundary_region.hxx @@ -1,22 +1,58 @@ -#ifndef __PAR_BNDRY_H__ -#define __PAR_BNDRY_H__ +#ifndef BOUT_PAR_BNDRY_H +#define BOUT_PAR_BNDRY_H #include "bout/boundary_region.hxx" #include "bout/bout_types.hxx" #include +#include +#include + /** * Boundary region for parallel direction. This contains a vector of points that are * inside the boundary. * */ -class BoundaryRegionPar : public BoundaryRegionBase { - struct IndexPoint { - int jx; - int jy; - int jz; - }; +namespace parallel_stencil { +// generated by src/mesh/parallel_boundary_stencil.cxx.py +inline BoutReal pow(BoutReal val, int exp) { + // constexpr int expval = exp; + // static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3"); + if (exp == 2) { + return val * val; + } + ASSERT3(exp == 3); + return val * val * val; +} +inline BoutReal dirichlet_o1(BoutReal UNUSED(spacing0), BoutReal value0) { + return value0; +} +inline BoutReal dirichlet_o2(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1) { + return (spacing0 * value1 - spacing1 * value0) / (spacing0 - spacing1); +} +inline BoutReal neumann_o2(BoutReal UNUSED(spacing0), BoutReal value0, BoutReal spacing1, + BoutReal value1) { + return -spacing1 * value0 + value1; +} +inline BoutReal dirichlet_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1, BoutReal spacing2, BoutReal value2) { + return (pow(spacing0, 2) * spacing1 * value2 - pow(spacing0, 2) * spacing2 * value1 + - spacing0 * pow(spacing1, 2) * value2 + spacing0 * pow(spacing2, 2) * value1 + + pow(spacing1, 2) * spacing2 * value0 - spacing1 * pow(spacing2, 2) * value0) + / ((spacing0 - spacing1) * (spacing0 - spacing2) * (spacing1 - spacing2)); +} +inline BoutReal neumann_o3(BoutReal spacing0, BoutReal value0, BoutReal spacing1, + BoutReal value1, BoutReal spacing2, BoutReal value2) { + return (2 * spacing0 * spacing1 * value2 - 2 * spacing0 * spacing2 * value1 + + pow(spacing1, 2) * spacing2 * value0 - pow(spacing1, 2) * value2 + - spacing1 * pow(spacing2, 2) * value0 + pow(spacing2, 2) * value1) + / ((spacing1 - spacing2) * (2 * spacing0 - spacing1 - spacing2)); +} +} // namespace parallel_stencil + +class BoundaryRegionPar : public BoundaryRegionBase { struct RealPoint { BoutReal s_x; @@ -26,13 +62,15 @@ class BoundaryRegionPar : public BoundaryRegionBase { struct Indices { // Indices of the boundary point - IndexPoint index; + Ind3D index; // Intersection with boundary in index space RealPoint intersection; // Distance to intersection BoutReal length; // Angle between field line and boundary - BoutReal angle; + // BoutReal angle; + // How many points we can go in the opposite direction + signed char valid; }; using IndicesVec = std::vector; @@ -46,28 +84,122 @@ class BoundaryRegionPar : public BoundaryRegionBase { public: BoundaryRegionPar(const std::string& name, int dir, Mesh* passmesh) : BoundaryRegionBase(name, passmesh), dir(dir) { + ASSERT0(std::abs(dir) == 1); BoundaryRegionBase::isParallel = true; } BoundaryRegionPar(const std::string& name, BndryLoc loc, int dir, Mesh* passmesh) : BoundaryRegionBase(name, loc, passmesh), dir(dir) { BoundaryRegionBase::isParallel = true; + ASSERT0(std::abs(dir) == 1); } /// Add a point to the boundary - void add_point(int jx, int jy, int jz, BoutReal x, BoutReal y, BoutReal z, - BoutReal length, BoutReal angle); + void add_point(Ind3D ind, BoutReal x, BoutReal y, BoutReal z, BoutReal length, + char valid) { + bndry_points.push_back({ind, {x, y, z}, length, valid}); + } + void add_point(int ix, int iy, int iz, BoutReal x, BoutReal y, BoutReal z, + BoutReal length, char valid) { + bndry_points.push_back({xyz2ind(ix, iy, iz, localmesh), {x, y, z}, length, valid}); + } + + // final, so they can be inlined + void first() final { bndry_position = begin(bndry_points); } + void next() final { ++bndry_position; } + bool isDone() final { return (bndry_position == end(bndry_points)); } - void first() override; - void next() override; - bool isDone() override; + // getter + Ind3D ind() const { return bndry_position->index; } + BoutReal s_x() const { return bndry_position->intersection.s_x; } + BoutReal s_y() const { return bndry_position->intersection.s_y; } + BoutReal s_z() const { return bndry_position->intersection.s_z; } + BoutReal length() const { return bndry_position->length; } + char valid() const { return bndry_position->valid; } - /// Index of the point in the boundary - int x, y, z; - BoutReal s_x, s_y, s_z; - BoutReal length; - BoutReal angle; + // setter + void setValid(char val) { bndry_position->valid = val; } + + bool contains(const BoundaryRegionPar& bndry) const { + return std::binary_search( + begin(bndry_points), end(bndry_points), *bndry.bndry_position, + [](const Indices& i1, const Indices& i2) { return i1.index < i2.index; }); + } + + // extrapolate a given point to the boundary + BoutReal extrapolate_o1(const Field3D& f) const { return f[ind()]; } + BoutReal extrapolate_o2(const Field3D& f) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return extrapolate_o1(f); + } + return f[ind()] * (1 + length()) - f.ynext(-dir)[ind().yp(-dir)] * length(); + } + + // dirichlet boundary code + void dirichlet_o1(Field3D& f, BoutReal value) const { + f.ynext(dir)[ind().yp(dir)] = value; + } + + void dirichlet_o2(Field3D& f, BoutReal value) const { + if (length() < small_value) { + return dirichlet_o1(f, value); + } + ynext(f) = parallel_stencil::dirichlet_o2(1, f[ind()], 1 - length(), value); + // ynext(f) = f[ind()] * (1 + 1/length()) + value / length(); + } + + void dirichlet_o3(Field3D& f, BoutReal value) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return dirichlet_o2(f, value); + } + if (length() < small_value) { + ynext(f) = parallel_stencil::dirichlet_o2(2, yprev(f), 1 - length(), value); + } else { + ynext(f) = + parallel_stencil::dirichlet_o3(2, yprev(f), 1, f[ind()], 1 - length(), value); + } + } + + // NB: value needs to be scaled by dy + // neumann_o1 is actually o2 if we would use an appropriate one-sided stencil. + // But in general we do not, and thus for normal C2 stencils, this is 1st order. + void neumann_o1(Field3D& f, BoutReal value) const { ynext(f) = f[ind()] + value; } + + // NB: value needs to be scaled by dy + void neumann_o2(Field3D& f, BoutReal value) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return neumann_o1(f, value); + } + ynext(f) = yprev(f) + 2 * value; + } + + // NB: value needs to be scaled by dy + void neumann_o3(Field3D& f, BoutReal value) const { + ASSERT3(valid() >= 0); + if (valid() < 1) { + return neumann_o1(f, value); + } + ynext(f) = + parallel_stencil::neumann_o3(1 - length(), value, 1, f[ind()], 2, yprev(f)); + } const int dir; + +private: + constexpr static BoutReal small_value = 1e-2; + + // BoutReal get(const Field3D& f, int off) + const BoutReal& ynext(const Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } + BoutReal& ynext(Field3D& f) const { return f.ynext(dir)[ind().yp(dir)]; } + const BoutReal& yprev(const Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; } + BoutReal& yprev(Field3D& f) const { return f.ynext(-dir)[ind().yp(-dir)]; } + static Ind3D xyz2ind(int x, int y, int z, Mesh* mesh) { + const int ny = mesh->LocalNy; + const int nz = mesh->LocalNz; + return Ind3D{(x * ny + y) * nz + z, ny, nz}; + } }; -#endif // __PAR_BNDRY_H__ +#endif // BOUT_PAR_BNDRY_H diff --git a/include/bout/paralleltransform.hxx b/include/bout/paralleltransform.hxx index 4a7e4989c8..0aafa04303 100644 --- a/include/bout/paralleltransform.hxx +++ b/include/bout/paralleltransform.hxx @@ -3,8 +3,8 @@ * values along Y */ -#ifndef __PARALLELTRANSFORM_H__ -#define __PARALLELTRANSFORM_H__ +#ifndef BOUT_PARALLELTRANSFORM_H +#define BOUT_PARALLELTRANSFORM_H #include "bout/bout_types.hxx" #include "bout/field3d.hxx" @@ -317,4 +317,4 @@ private: const std::vector& phases) const; }; -#endif // __PARALLELTRANSFORM_H__ +#endif // BOUT_PARALLELTRANSFORM_H diff --git a/include/bout/petsc_interface.hxx b/include/bout/petsc_interface.hxx index 0afcc8a30a..407e5ac18e 100644 --- a/include/bout/petsc_interface.hxx +++ b/include/bout/petsc_interface.hxx @@ -175,7 +175,7 @@ public: #endif BoutReal value = BoutNaN; int status = 0; - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) status = VecGetValues(*get(), 1, &global, &value); if (status != 0) { throw BoutException("Error when getting element of a PETSc vector."); @@ -355,7 +355,7 @@ public: PetscBool assembled = PETSC_FALSE; MatAssembled(*petscMatrix, &assembled); if (assembled == PETSC_TRUE) { - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) MatGetValues(*petscMatrix, 1, &petscRow, 1, &petscCol, &value); } else { value = 0.; @@ -400,7 +400,7 @@ public: [&val](BoutReal weight) -> PetscScalar { return weight * val; }); int status = 0; - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) status = MatSetValues(*petscMatrix, 1, &petscRow, positions.size(), positions.data(), values.data(), mode); if (status != 0) { @@ -467,7 +467,7 @@ public: #endif BoutReal value = BoutNaN; int status = 0; - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) status = MatGetValues(*get(), 1, &global1, 1, &global2, &value); if (status != 0) { throw BoutException("Error when getting elements of a PETSc matrix."); diff --git a/include/bout/petsclib.hxx b/include/bout/petsclib.hxx index 35334ce773..2008671286 100644 --- a/include/bout/petsclib.hxx +++ b/include/bout/petsclib.hxx @@ -59,7 +59,7 @@ class Options; // means we _must_ `#include` this header _before_ any PETSc header! #define PETSC_HAVE_BROKEN_RECURSIVE_MACRO -#include +#include // IWYU pragma: export #include #include "bout/boutexception.hxx" diff --git a/include/bout/physicsmodel.hxx b/include/bout/physicsmodel.hxx index ada97fc6fc..9fa25d8b0f 100644 --- a/include/bout/physicsmodel.hxx +++ b/include/bout/physicsmodel.hxx @@ -34,8 +34,8 @@ class PhysicsModel; -#ifndef __PHYSICS_MODEL_H__ -#define __PHYSICS_MODEL_H__ +#ifndef BOUT_PHYSICS_MODEL_H +#define BOUT_PHYSICS_MODEL_H #include "solver.hxx" #include "bout/bout.hxx" @@ -566,4 +566,4 @@ private: #define SAVE_REPEAT(...) \ { MACRO_FOR_EACH(SAVE_REPEAT1, __VA_ARGS__) } -#endif // __PHYSICS_MODEL_H__ +#endif // BOUT_PHYSICS_MODEL_H diff --git a/include/bout/region.hxx b/include/bout/region.hxx index cbaf0d0c31..4649b680eb 100644 --- a/include/bout/region.hxx +++ b/include/bout/region.hxx @@ -39,20 +39,26 @@ /// because an Ind2D essentially doesn't keep track of the /// z-dimension. -#ifndef __REGION_H__ -#define __REGION_H__ +#ifndef BOUT_REGION_H +#define BOUT_REGION_H #include #include +#include #include #include #include #include "bout/assert.hxx" #include "bout/bout_types.hxx" -#include "bout/openmpwrap.hxx" +#include "bout/boutexception.hxx" +#include "bout/build_defines.hxx" +#include "bout/openmpwrap.hxx" // IWYU pragma: keep + class BoutMask; +// NOLINTBEGIN(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) + /// The MAXREGIONBLOCKSIZE value can be tuned to try to optimise /// performance on specific hardware. It determines what the largest /// contiguous block size can be. As we hope the compiler will vectorise @@ -110,16 +116,16 @@ class BoutMask; /// } // -#define BOUT_FOR_SERIAL(index, region) \ - for (auto block = region.getBlocks().cbegin(), end = region.getBlocks().cend(); \ - block < end; ++block) \ +#define BOUT_FOR_SERIAL(index, region) \ + for (auto block = (region).getBlocks().cbegin(), end = (region).getBlocks().cend(); \ + block < end; ++block) \ for (auto index = block->first; index < block->second; ++index) #if BOUT_USE_OPENMP -#define BOUT_FOR_OMP(index, region, omp_pragmas) \ - BOUT_OMP(omp_pragmas) \ - for (auto block = region.getBlocks().cbegin(); block < region.getBlocks().cend(); \ - ++block) \ +#define BOUT_FOR_OMP(index, region, omp_pragmas) \ + BOUT_OMP_PERF(omp_pragmas) \ + for (auto block = (region).getBlocks().cbegin(); block < (region).getBlocks().cend(); \ + ++block) \ for (auto index = block->first; index < block->second; ++index) #else // No OpenMP, so fall back to slightly more efficient serial form @@ -127,10 +133,11 @@ class BoutMask; #endif #define BOUT_FOR(index, region) \ - BOUT_FOR_OMP(index, region, parallel for schedule(BOUT_OPENMP_SCHEDULE)) + BOUT_FOR_OMP(index, (region), parallel for schedule(BOUT_OPENMP_SCHEDULE)) #define BOUT_FOR_INNER(index, region) \ - BOUT_FOR_OMP(index, region, for schedule(BOUT_OPENMP_SCHEDULE) nowait) + BOUT_FOR_OMP(index, (region), for schedule(BOUT_OPENMP_SCHEDULE) nowait) +// NOLINTEND(cppcoreguidelines-macro-usage,bugprone-macro-parentheses) enum class IND_TYPE { IND_3D = 0, IND_2D = 1, IND_PERP = 2 }; @@ -232,7 +239,7 @@ struct SpecificInd { /// and is determined by the `dir` template argument. The offset corresponds /// to the `dd` template argument. template - const inline SpecificInd plus() const { + inline SpecificInd plus() const { static_assert(dir == DIRECTION::X || dir == DIRECTION::Y || dir == DIRECTION::Z || dir == DIRECTION::YAligned || dir == DIRECTION::YOrthogonal, "Unhandled DIRECTION in SpecificInd::plus"); @@ -252,7 +259,7 @@ struct SpecificInd { /// and is determined by the `dir` template argument. The offset corresponds /// to the `dd` template argument. template - const inline SpecificInd minus() const { + inline SpecificInd minus() const { static_assert(dir == DIRECTION::X || dir == DIRECTION::Y || dir == DIRECTION::Z || dir == DIRECTION::YAligned || dir == DIRECTION::YOrthogonal, "Unhandled DIRECTION in SpecificInd::minus"); @@ -268,11 +275,11 @@ struct SpecificInd { } } - const inline SpecificInd xp(int dx = 1) const { return {ind + (dx * ny * nz), ny, nz}; } + inline SpecificInd xp(int dx = 1) const { return {ind + (dx * ny * nz), ny, nz}; } /// The index one point -1 in x - const inline SpecificInd xm(int dx = 1) const { return xp(-dx); } + inline SpecificInd xm(int dx = 1) const { return xp(-dx); } /// The index one point +1 in y - const inline SpecificInd yp(int dy = 1) const { + inline SpecificInd yp(int dy = 1) const { #if CHECK >= 4 if (y() + dy < 0 or y() + dy >= ny) { throw BoutException("Offset in y ({:d}) would go out of bounds at {:d}", dy, ind); @@ -282,12 +289,12 @@ struct SpecificInd { return {ind + (dy * nz), ny, nz}; } /// The index one point -1 in y - const inline SpecificInd ym(int dy = 1) const { return yp(-dy); } + inline SpecificInd ym(int dy = 1) const { return yp(-dy); } /// The index one point +1 in z. Wraps around zend to zstart /// An alternative, non-branching calculation is : /// ind + dz - nz * ((ind + dz) / nz - ind / nz) /// but this appears no faster (and perhaps slower). - const inline SpecificInd zp(int dz = 1) const { + inline SpecificInd zp(int dz = 1) const { ASSERT3(dz >= 0); dz = dz <= nz ? dz : dz % nz; //Fix in case dz > nz, if not force it to be in range return {(ind + dz) % nz < dz ? ind - nz + dz : ind + dz, ny, nz}; @@ -296,22 +303,22 @@ struct SpecificInd { /// An alternative, non-branching calculation is : /// ind - dz + nz * ( (nz + ind) / nz - (nz + ind - dz) / nz) /// but this appears no faster (and perhaps slower). - const inline SpecificInd zm(int dz = 1) const { + inline SpecificInd zm(int dz = 1) const { dz = dz <= nz ? dz : dz % nz; //Fix in case dz > nz, if not force it to be in range ASSERT3(dz >= 0); return {(ind) % nz < dz ? ind + nz - dz : ind - dz, ny, nz}; } // and for 2 cells - const inline SpecificInd xpp() const { return xp(2); } - const inline SpecificInd xmm() const { return xm(2); } - const inline SpecificInd ypp() const { return yp(2); } - const inline SpecificInd ymm() const { return ym(2); } - const inline SpecificInd zpp() const { return zp(2); } - const inline SpecificInd zmm() const { return zm(2); } + inline SpecificInd xpp() const { return xp(2); } + inline SpecificInd xmm() const { return xm(2); } + inline SpecificInd ypp() const { return yp(2); } + inline SpecificInd ymm() const { return ym(2); } + inline SpecificInd zpp() const { return zp(2); } + inline SpecificInd zmm() const { return zm(2); } /// Generic offset of \p index in multiple directions simultaneously - const inline SpecificInd offset(int dx, int dy, int dz) const { + inline SpecificInd offset(int dx, int dy, int dz) const { auto temp = (dz > 0) ? zp(dz) : zm(-dz); return temp.yp(dy).xp(dx); } @@ -380,16 +387,16 @@ using Ind2D = SpecificInd; using IndPerp = SpecificInd; /// Get string representation of Ind3D -inline const std::string toString(const Ind3D& i) { +inline std::string toString(const Ind3D& i) { return "(" + std::to_string(i.x()) + ", " + std::to_string(i.y()) + ", " + std::to_string(i.z()) + ")"; } /// Get string representation of Ind2D -inline const std::string toString(const Ind2D& i) { +inline std::string toString(const Ind2D& i) { return "(" + std::to_string(i.x()) + ", " + std::to_string(i.y()) + ")"; } /// Get string representation of IndPerp -inline const std::string toString(const IndPerp& i) { +inline std::string toString(const IndPerp& i) { return "(" + std::to_string(i.x()) + ", " + std::to_string(i.z()) + ")"; } @@ -516,10 +523,10 @@ public: // Want to make this private to disable but think it may be needed as we put Regions // into maps which seems to need to be able to make "empty" objects. - Region() = default; + Region() = default; - Region(int xstart, int xend, int ystart, int yend, int zstart, int zend, int ny, - int nz, int maxregionblocksize = MAXREGIONBLOCKSIZE) + Region(int xstart, int xend, int ystart, int yend, int zstart, int zend, int ny, int nz, + int maxregionblocksize = MAXREGIONBLOCKSIZE) : ny(ny), nz(nz) { #if CHECK > 1 if constexpr (std::is_base_of_v) { @@ -560,20 +567,18 @@ public: blocks = getContiguousBlocks(maxregionblocksize); }; - Region(RegionIndices& indices, int maxregionblocksize = MAXREGIONBLOCKSIZE) - : indices(indices) { - blocks = getContiguousBlocks(maxregionblocksize); - }; + Region(RegionIndices& indices, int maxregionblocksize = MAXREGIONBLOCKSIZE) + : indices(indices), blocks(getContiguousBlocks(maxregionblocksize)){}; - Region(ContiguousBlocks& blocks) : blocks(blocks) { indices = getRegionIndices(); }; + // We need to first set the blocks, and only after that call getRegionIndices. + // Do not put in the member initialisation + // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer) + Region(ContiguousBlocks& blocks) : blocks(blocks) { indices = getRegionIndices(); }; bool operator==(const Region& other) const { return std::equal(this->begin(), this->end(), other.begin(), other.end()); } - /// Destructor - ~Region() = default; - /// Expose the iterator over indices for use in range-based /// for-loops or with STL algorithms, etc. /// @@ -760,8 +765,8 @@ public: // globalPos = (index/period) * period; // Find which period block we're in // newIndex = globalPos + localPos; for (unsigned int i = 0; i < newInd.size(); i++) { - int index = newInd[i].ind; - int whichBlock = index / period; + const int index = newInd[i].ind; + const int whichBlock = index / period; newInd[i].ind = ((index + shift) % period) + period * whichBlock; }; @@ -785,20 +790,21 @@ public: std::vector blockSizes(result.numBlocks); // Get the size of each block using lambda to calculate size - std::transform(std::begin(blocks), std::end(blocks), std::begin(blockSizes), - [](const ContiguousBlock& a) { return a.second.ind - a.first.ind; }); + std::transform( + std::begin(blocks), std::end(blocks), std::begin(blockSizes), + [](const ContiguousBlock& block) { return block.second.ind - block.first.ind; }); auto minMaxSize = std::minmax_element(std::begin(blockSizes), std::end(blockSizes)); - result.minBlockSize = - *(minMaxSize.first); //Note have to derefence to get actual value - result.numMinBlocks = - std::count(std::begin(blockSizes), std::end(blockSizes), result.minBlockSize); + // Note have to derefence to get actual value + result.minBlockSize = *(minMaxSize.first); + result.numMinBlocks = static_cast( + std::count(std::begin(blockSizes), std::end(blockSizes), result.minBlockSize)); - result.maxBlockSize = - *(minMaxSize.second); //Note have to derefence to get actual value - result.numMaxBlocks = - std::count(std::begin(blockSizes), std::end(blockSizes), result.maxBlockSize); + // Note have to derefence to get actual value + result.maxBlockSize = *(minMaxSize.second); + result.numMaxBlocks = static_cast( + std::count(std::begin(blockSizes), std::end(blockSizes), result.maxBlockSize)); result.maxImbalance = static_cast(result.maxBlockSize) / static_cast(result.minBlockSize); @@ -853,10 +859,10 @@ private: int z = zstart; bool done = false; - int j = -1; + int ind = -1; while (!done) { - j++; - region[j].ind = (x * ny + y) * nz + z; + ind++; + region[ind].ind = (x * ny + y) * nz + z; if (x == xend && y == yend && z == zend) { done = true; } @@ -979,4 +985,4 @@ unsigned int size(const Region& region) { return region.size(); } -#endif /* __REGION_H__ */ +#endif /* BOUT_REGION_H */ diff --git a/include/bout/rkscheme.hxx b/include/bout/rkscheme.hxx index f4e5959aff..ba818c04fe 100644 --- a/include/bout/rkscheme.hxx +++ b/include/bout/rkscheme.hxx @@ -32,8 +32,8 @@ class RKScheme; -#ifndef __RKSCHEME_H__ -#define __RKSCHEME_H__ +#ifndef BOUT_RKSCHEME_H +#define BOUT_RKSCHEME_H #include "bout/generic_factory.hxx" #include @@ -140,4 +140,4 @@ private: void zeroSteps(); }; -#endif // __RKSCHEME_H__ +#endif // BOUT_RKSCHEME_H diff --git a/include/bout/rvec.hxx b/include/bout/rvec.hxx index 0b611d64bf..492228b9ea 100644 --- a/include/bout/rvec.hxx +++ b/include/bout/rvec.hxx @@ -1,11 +1,11 @@ #pragma once -#ifndef __RVEC_H__ -#define __RVEC_H__ +#ifndef BOUT_RVEC_H +#define BOUT_RVEC_H #include #include using rvec = std::vector; -#endif // __RVEC_H__ +#endif // BOUT_RVEC_H diff --git a/include/bout/scorepwrapper.hxx b/include/bout/scorepwrapper.hxx index 210d48e49f..2eb67cda30 100644 --- a/include/bout/scorepwrapper.hxx +++ b/include/bout/scorepwrapper.hxx @@ -1,5 +1,5 @@ -#ifndef __BOUT_SCOREP_H__ -#define __BOUT_SCOREP_H__ +#ifndef BOUT_SCOREP_H +#define BOUT_SCOREP_H #include "bout/build_config.hxx" diff --git a/include/bout/single_index_ops.hxx b/include/bout/single_index_ops.hxx index 6a9089510b..60bd78bc36 100644 --- a/include/bout/single_index_ops.hxx +++ b/include/bout/single_index_ops.hxx @@ -16,7 +16,7 @@ using EXEC_POL = RAJA::cuda_exec; using EXEC_POL = RAJA::loop_exec; #endif // end BOUT_USE_CUDA ////-----------CUDA settings------------------------------------------------------end -#endif +#endif // end BOUT_HAS_RAJA // Ind3D: i.zp(): BOUT_HOST_DEVICE inline int i_zp(const int id, const int nz) { diff --git a/include/bout/slepclib.hxx b/include/bout/slepclib.hxx index f6df9ce98c..e59a9c2913 100644 --- a/include/bout/slepclib.hxx +++ b/include/bout/slepclib.hxx @@ -42,8 +42,8 @@ class SlepcLib; -#ifndef __SLEPCLIB_H__ -#define __SLEPCLIB_H__ +#ifndef BOUT_SLEPCLIB_H +#define BOUT_SLEPCLIB_H #include "bout/build_config.hxx" @@ -89,4 +89,4 @@ public: #endif // BOUT_HAS_SLEPC -#endif // __SLEPCLIB_H__ +#endif // BOUT_SLEPCLIB_H diff --git a/include/bout/smoothing.hxx b/include/bout/smoothing.hxx index 8a0d6e81b8..9485602053 100644 --- a/include/bout/smoothing.hxx +++ b/include/bout/smoothing.hxx @@ -25,8 +25,8 @@ * **************************************************************/ -#ifndef __SMOOTHING_H__ -#define __SMOOTHING_H__ +#ifndef BOUT_SMOOTHING_H +#define BOUT_SMOOTHING_H #include "bout/field3d.hxx" @@ -135,4 +135,4 @@ const Field3D nl_filter_z(const Field3D& f, BoutReal w = 1.0); */ const Field3D nl_filter(const Field3D& f, BoutReal w = 1.0); -#endif // __SMOOTHING_H__ +#endif // BOUT_SMOOTHING_H diff --git a/include/bout/solverfactory.hxx b/include/bout/solverfactory.hxx index a628aed0c1..a0ecd646b8 100644 --- a/include/bout/solverfactory.hxx +++ b/include/bout/solverfactory.hxx @@ -1,5 +1,5 @@ -#ifndef __SOLVER_FACTORY_H__ -#define __SOLVER_FACTORY_H__ +#ifndef BOUT_SOLVER_FACTORY_H +#define BOUT_SOLVER_FACTORY_H #ifndef _MSC_VER #warning("Deprecated header: use #include instead") @@ -9,4 +9,4 @@ #include -#endif // __SOLVER_FACTORY_H__ +#endif // BOUT_SOLVER_FACTORY_H diff --git a/include/bout/sourcex.hxx b/include/bout/sourcex.hxx index 6727c8bcc9..e01c469af6 100644 --- a/include/bout/sourcex.hxx +++ b/include/bout/sourcex.hxx @@ -2,8 +2,8 @@ * Radial mask operators **************************************************************/ -#ifndef __MASKX_H__ -#define __MASKX_H__ +#ifndef BOUT_MASKX_H +#define BOUT_MASKX_H #include "bout/field3d.hxx" @@ -21,4 +21,4 @@ const Field3D sink_tanhxr(const Field2D& f0, const Field3D& f, BoutReal swidth, const Field3D buff_x(const Field3D& f, bool BoutRealspace = true); -#endif // __MASKX_H__ +#endif // BOUT_MASKX_H diff --git a/include/bout/stencils.hxx b/include/bout/stencils.hxx index fa55e7dd2d..2466047297 100644 --- a/include/bout/stencils.hxx +++ b/include/bout/stencils.hxx @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef __STENCILS_H__ -#define __STENCILS_H__ +#ifndef BOUT_STENCILS_H +#define BOUT_STENCILS_H #include "bout/bout_types.hxx" @@ -125,4 +125,4 @@ stencil inline populateStencil(const FieldType& f, const typename FieldType::ind populateStencil(s, f, i); return s; } -#endif /* __STENCILS_H__ */ +#endif /* BOUT_STENCILS_H */ diff --git a/include/bout/sundials_backports.hxx b/include/bout/sundials_backports.hxx index c4f4aa59ef..4ec334f4d4 100644 --- a/include/bout/sundials_backports.hxx +++ b/include/bout/sundials_backports.hxx @@ -1,81 +1,74 @@ -// Backports for SUNDIALS compatibility between versions 3-6 +// Backports for SUNDIALS compatibility between versions 4-7 // // These are common backports shared between the CVode, ARKode, and IDA solvers // // Copyright 2022 Peter Hill, BOUT++ Team -// SPDX-License-Identifier: LGPLv3 +// SPDX-License-Identifier: LGPL-3.0-or-later #ifndef BOUT_SUNDIALS_BACKPORTS_H #define BOUT_SUNDIALS_BACKPORTS_H +#include "bout/bout_types.hxx" + +#include + #include #include #include +#include #include - -#if SUNDIALS_VERSION_MAJOR >= 3 #include -#endif - -#if SUNDIALS_VERSION_MAJOR >= 4 -#include #include #include -#endif - -#include "bout/unused.hxx" -#if SUNDIALS_VERSION_MAJOR < 3 -using SUNLinearSolver = int*; -inline void SUNLinSolFree([[maybe_unused]] SUNLinearSolver solver) {} -using sunindextype = long int; +#if SUNDIALS_VERSION_MAJOR >= 6 +#include #endif -#if SUNDIALS_VERSION_MAJOR < 4 -using SUNNonlinearSolver = int*; -inline void SUNNonlinSolFree([[maybe_unused]] SUNNonlinearSolver solver) {} +#if SUNDIALS_VERSION_MAJOR < 6 +using sundials_real_type = realtype; +#else +using sundials_real_type = sunrealtype; #endif -#if SUNDIALS_VERSION_MAJOR < 6 -namespace sundials { -struct Context { - Context(void* comm [[maybe_unused]]) {} -}; -} // namespace sundials +static_assert(std::is_same_v, + "BOUT++ and SUNDIALS real types do not match"); -using SUNContext = sundials::Context; +#define SUNDIALS_CONTROLLER_SUPPORT \ + (SUNDIALS_VERSION_MAJOR > 6 \ + || SUNDIALS_VERSION_MAJOR == 6 && SUNDIALS_VERSION_MINOR >= 7) +#define SUNDIALS_TABLE_BY_NAME_SUPPORT \ + (SUNDIALS_VERSION_MAJOR > 6 \ + || SUNDIALS_VERSION_MAJOR == 6 && SUNDIALS_VERSION_MINOR >= 4) +#if SUNDIALS_VERSION_MAJOR < 6 constexpr auto SUN_PREC_RIGHT = PREC_RIGHT; constexpr auto SUN_PREC_LEFT = PREC_LEFT; constexpr auto SUN_PREC_NONE = PREC_NONE; -inline N_Vector N_VNew_Parallel(MPI_Comm comm, sunindextype local_length, - sunindextype global_length, - [[maybe_unused]] SUNContext sunctx) { - return N_VNew_Parallel(comm, local_length, global_length); -} +namespace sundials { +using Context = std::nullptr_t; +} // namespace sundials +#endif -#if SUNDIALS_VERSION_MAJOR >= 3 -inline SUNLinearSolver SUNLinSol_SPGMR(N_Vector y, int pretype, int maxl, - [[maybe_unused]] SUNContext sunctx) { -#if SUNDIALS_VERSION_MAJOR == 3 - return SUNSPGMR(y, pretype, maxl); +inline sundials::Context createSUNContext([[maybe_unused]] MPI_Comm& comm) { +#if SUNDIALS_VERSION_MAJOR < 6 + return nullptr; +#elif SUNDIALS_VERSION_MAJOR < 7 + return sundials::Context(static_cast(&comm)); #else - return SUNLinSol_SPGMR(y, pretype, maxl); + return sundials::Context(comm); #endif } -#if SUNDIALS_VERSION_MAJOR >= 4 -inline SUNNonlinearSolver SUNNonlinSol_FixedPoint(N_Vector y, int m, - [[maybe_unused]] SUNContext sunctx) { - return SUNNonlinSol_FixedPoint(y, m); -} -inline SUNNonlinearSolver SUNNonlinSol_Newton(N_Vector y, - [[maybe_unused]] SUNContext sunctx) { - return SUNNonlinSol_Newton(y); +template +inline decltype(auto) callWithSUNContext(Func f, [[maybe_unused]] sundials::Context& ctx, + Args&&... args) { +#if SUNDIALS_VERSION_MAJOR < 6 + return f(std::forward(args)...); +#else + return f(std::forward(args)..., ctx); +#endif } -#endif // SUNDIALS_VERSION_MAJOR >= 4 -#endif // SUNDIALS_VERSION_MAJOR >= 3 -#endif // SUNDIALS_VERSION_MAJOR < 6 #endif // BOUT_SUNDIALS_BACKPORTS_H diff --git a/include/bout/surfaceiter.hxx b/include/bout/surfaceiter.hxx index ebe33b9864..a031b30ba6 100644 --- a/include/bout/surfaceiter.hxx +++ b/include/bout/surfaceiter.hxx @@ -4,8 +4,8 @@ class SurfaceIter; -#ifndef __SURFACEITER_H__ -#define __SURFACEITER_H__ +#ifndef BOUT_SURFACEITER_H +#define BOUT_SURFACEITER_H #include "mesh.hxx" @@ -63,4 +63,4 @@ private: const int lastpos; }; -#endif // __SURFACEITER_H__ +#endif // BOUT_SURFACEITER_H diff --git a/include/bout/sys/gettext.hxx b/include/bout/sys/gettext.hxx index 2ada87ab63..a17412118c 100644 --- a/include/bout/sys/gettext.hxx +++ b/include/bout/sys/gettext.hxx @@ -1,7 +1,7 @@ /// Support for i18n using GNU gettext -#ifndef __BOUT_GETTEXT_H__ -#define __BOUT_GETTEXT_H__ +#ifndef BOUT_GETTEXT_H +#define BOUT_GETTEXT_H #include "bout/build_config.hxx" @@ -19,4 +19,4 @@ #define _(string) string #endif // BOUT_HAS_GETTEXT -#endif // __BOUT_GETTEXT_H__ +#endif // BOUT_GETTEXT_H diff --git a/include/bout/sys/range.hxx b/include/bout/sys/range.hxx index a210983f25..9d8aa96cd7 100644 --- a/include/bout/sys/range.hxx +++ b/include/bout/sys/range.hxx @@ -21,8 +21,8 @@ */ -#ifndef __RANGE_H__ -#define __RANGE_H__ +#ifndef BOUT_RANGE_H +#define BOUT_RANGE_H class RangeIterator { public: @@ -74,4 +74,4 @@ private: bool delete_next = false; // Flag to delete this->n if we created it }; -#endif // __RANGE_H__ +#endif // BOUT_RANGE_H diff --git a/include/bout/sys/timer.hxx b/include/bout/sys/timer.hxx index 6f04630c9d..f3beba27b1 100644 --- a/include/bout/sys/timer.hxx +++ b/include/bout/sys/timer.hxx @@ -1,5 +1,5 @@ -#ifndef __TIMER_H__ -#define __TIMER_H__ +#ifndef BOUT_TIMER_H +#define BOUT_TIMER_H #include #include @@ -134,4 +134,4 @@ public: }; #define AUTO_TIME() Timer CONCATENATE(time_, __LINE__)(__thefunc__) -#endif // __TIMER_H__ +#endif // BOUT_TIMER_H diff --git a/include/bout/sys/uncopyable.hxx b/include/bout/sys/uncopyable.hxx index 76606620ed..35418cb7f6 100644 --- a/include/bout/sys/uncopyable.hxx +++ b/include/bout/sys/uncopyable.hxx @@ -1,7 +1,7 @@ // From Scott Meyers' "Effective C++, third edition" -#ifndef __UNCOPYABLE_H__ -#define __UNCOPYABLE_H__ +#ifndef BOUT_UNCOPYABLE_H +#define BOUT_UNCOPYABLE_H /// Inherit from this class (private) to prevent copying class Uncopyable { @@ -14,4 +14,4 @@ public: Uncopyable& operator=(const Uncopyable&) = delete; }; -#endif // __UNCOPYABLE_H__ +#endif // BOUT_UNCOPYABLE_H diff --git a/include/bout/template_combinations.hxx b/include/bout/template_combinations.hxx index 81848cf252..49a42e6bca 100644 --- a/include/bout/template_combinations.hxx +++ b/include/bout/template_combinations.hxx @@ -27,8 +27,8 @@ * **************************************************************************/ -#ifndef __TEMPLATE_COMBINATIONS_H__ -#define __TEMPLATE_COMBINATIONS_H__ +#ifndef BOUT_TEMPLATE_COMBINATIONS_H +#define BOUT_TEMPLATE_COMBINATIONS_H #include diff --git a/include/bout/unused.hxx b/include/bout/unused.hxx index 74fd3c2f98..7ef67cfe84 100644 --- a/include/bout/unused.hxx +++ b/include/bout/unused.hxx @@ -1,5 +1,5 @@ -#ifndef __UNUSED_H__ -#define __UNUSED_H__ +#ifndef BOUT_UNUSED_H +#define BOUT_UNUSED_H /// Mark a function parameter as unused in the function body /// @@ -37,4 +37,4 @@ #define UNUSED(x) x #endif -#endif //__UNUSED_H__ +#endif //BOUT_UNUSED_H diff --git a/include/bout/utils.hxx b/include/bout/utils.hxx index 0ec87fd4d7..19fc8bed39 100644 --- a/include/bout/utils.hxx +++ b/include/bout/utils.hxx @@ -26,8 +26,8 @@ * **************************************************************************/ -#ifndef __UTILS_H__ -#define __UTILS_H__ +#ifndef BOUT_UTILS_H +#define BOUT_UTILS_H #include "bout/bout_types.hxx" #include "bout/boutexception.hxx" @@ -362,6 +362,14 @@ public: return data[i.ind]; } + T& operator[](Ind3D i) { + // ny and nz are private :-( + // ASSERT2(i.nz == n3); + // ASSERT2(i.ny == n2); + ASSERT2(0 <= i.ind && i.ind < n1 * n2 * n3); + return data[i.ind]; + } + Tensor& operator=(const T& val) { for (auto& i : data) { i = val; @@ -712,4 +720,4 @@ inline bool flagSet(int bitset, int flag) { return (bitset & flag) != 0; } } // namespace utils } // namespace bout -#endif // __UTILS_H__ +#endif // BOUT_UTILS_H diff --git a/include/bout/vecops.hxx b/include/bout/vecops.hxx index 4a03d06b5e..9166503855 100644 --- a/include/bout/vecops.hxx +++ b/include/bout/vecops.hxx @@ -26,8 +26,8 @@ * **************************************************************************/ -#ifndef __VECOPS_H__ -#define __VECOPS_H__ +#ifndef BOUT_VECOPS_H +#define BOUT_VECOPS_H #include "bout/bout_types.hxx" #include "bout/coordinates.hxx" @@ -129,4 +129,4 @@ Vector3D V_dot_Grad(const Vector2D& v, const Vector3D& a); Vector3D V_dot_Grad(const Vector3D& v, const Vector2D& a); Vector3D V_dot_Grad(const Vector3D& v, const Vector3D& a); -#endif // __VECOPS_H__ +#endif // BOUT_VECOPS_H diff --git a/include/bout/vector2d.hxx b/include/bout/vector2d.hxx index 974c5f81db..bdc375e698 100644 --- a/include/bout/vector2d.hxx +++ b/include/bout/vector2d.hxx @@ -34,8 +34,8 @@ class Vector2D; #pragma once -#ifndef __VECTOR2D_H__ -#define __VECTOR2D_H__ +#ifndef BOUT_VECTOR2D_H +#define BOUT_VECTOR2D_H class Field2D; class Field3D; @@ -217,4 +217,4 @@ inline Vector2D zeroFrom(const Vector2D& v) { */ inline Vector2D& ddt(Vector2D& f) { return *(f.timeDeriv()); } -#endif // __VECTOR2D_H__ +#endif // BOUT_VECTOR2D_H diff --git a/include/bout/vector3d.hxx b/include/bout/vector3d.hxx index 93ee798663..0c71dcffa5 100644 --- a/include/bout/vector3d.hxx +++ b/include/bout/vector3d.hxx @@ -30,8 +30,8 @@ class Vector3D; #pragma once -#ifndef __VECTOR3D_H__ -#define __VECTOR3D_H__ +#ifndef BOUT_VECTOR3D_H +#define BOUT_VECTOR3D_H class Field2D; class Vector2D; @@ -237,4 +237,4 @@ inline Vector3D zeroFrom(const Vector3D& v) { */ inline Vector3D& ddt(Vector3D& f) { return *(f.timeDeriv()); } -#endif // __VECTOR3D_H__ +#endif // BOUT_VECTOR3D_H diff --git a/include/bout/where.hxx b/include/bout/where.hxx index 504dc028b1..c798d75de8 100644 --- a/include/bout/where.hxx +++ b/include/bout/where.hxx @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef __WHERE_H__ -#define __WHERE_H__ +#ifndef BOUT_WHERE_H +#define BOUT_WHERE_H #include "bout/field.hxx" #include "bout/field2d.hxx" @@ -85,4 +85,4 @@ auto where(const T& test, BoutReal gt0, BoutReal le0) -> ResultType { return result; } -#endif // __WHERE_H__ +#endif // BOUT_WHERE_H diff --git a/manual/sphinx/conf.py b/manual/sphinx/conf.py index 29c0985841..d27e8ab1fd 100755 --- a/manual/sphinx/conf.py +++ b/manual/sphinx/conf.py @@ -88,7 +88,7 @@ def __getattr__(cls, name): + " -DBOUT_UPDATE_GIT_SUBMODULE=OFF" + " -DBOUT_TESTS=OFF" + " -DBOUT_ALLOW_INSOURCE_BUILD=ON" - + f" -DPython_ROOT_DIR={pydir}" + + f" -DPython3_ROOT_DIR={pydir}" + f" -Dmpark_variant_DIR={pwd}/externalpackages/mpark.variant/" + f" -Dfmt_DIR={pwd}/externalpackages/fmt/" ) diff --git a/manual/sphinx/developer_docs/data_types.rst b/manual/sphinx/developer_docs/data_types.rst index 2e303381f9..fa8e9e6ea6 100644 --- a/manual/sphinx/developer_docs/data_types.rst +++ b/manual/sphinx/developer_docs/data_types.rst @@ -300,7 +300,7 @@ verion of the macro:: For loops inside parallel regions, there is ``BOUT_FOR_INNER``:: Field3D f(0.0); - BOUT_OMP(parallel) { + BOUT_OMP_PERF(parallel) { BOUT_FOR_INNER(i, f.getMesh()->getRegion3D("RGN_ALL")) { f[i] = a[i] + b[i]; } @@ -357,7 +357,7 @@ Tuning BOUT_FOR loops The ``BOUT_FOR`` macros use two nested loops: The outer loop is OpenMP parallelised, and iterates over contiguous blocks:: - BOUT_OMP(parallel for schedule(guided)) + BOUT_OMP_PERF(parallel for schedule(guided)) for (auto block = region.getBlocks().cbegin(); block < region.getBlocks().cend(); ++block) diff --git a/manual/sphinx/user_docs/adios2.rst b/manual/sphinx/user_docs/adios2.rst index 8a6228cd3a..d8e0135c0d 100644 --- a/manual/sphinx/user_docs/adios2.rst +++ b/manual/sphinx/user_docs/adios2.rst @@ -11,14 +11,14 @@ Installation The easiest way to configure BOUT++ with ADIOS2 is to tell CMake to download and build it with this flag:: - -DBOUT_DOWNLOAD_ADIOS=ON + -DBOUT_DOWNLOAD_ADIOS2=ON The ``master`` branch will be downloaded from `Github `_, configured and built with BOUT++. -Alternatively, if ADIOS is already installed then the following flags can be used:: +Alternatively, if ADIOS2 is already installed then the following flags can be used:: - -DBOUT_USE_ADIOS=ON -DADIOS2_ROOT=/path/to/adios2 + -DBOUT_USE_ADIOS2=ON -DADIOS2_ROOT=/path/to/adios2 Output files ------------ diff --git a/manual/sphinx/user_docs/advanced_install.rst b/manual/sphinx/user_docs/advanced_install.rst index e25be12b4b..048a26a6e3 100644 --- a/manual/sphinx/user_docs/advanced_install.rst +++ b/manual/sphinx/user_docs/advanced_install.rst @@ -145,13 +145,12 @@ where ```` is the path to the build directory MPCDF HPC Systems ~~~~~~~~~~~~~~~~~ +After cloning BOUT-dev and checking out the branch you want (e.g. db-outer), run: .. code-block:: bash - module purge # or at least onload intel and impi and mkl - module load gcc/10 cmake/3.18 openmpi/4 - # ensure python3 is >= python3.6 - skip if you have a newer python3 loaded - mkdir -p $HOME/bin ; test -e $HOME/bin/python3 || ln -s $(which python3.6) $HOME/bin/python3 - BUILD=/ptmp/$USER/bout-deps bin/bout-build-deps.sh + module purge # or at least onload intel + module load gcc/13 anaconda/3/2021.11 impi/2021.9 hdf5-serial/1.12.2 mkl/2022.0 netcdf-serial/4.8.1 fftw-mpi/3.3.10 + BUILD=/ptmp/$USER/bout-deps NO_HDF5=1 NO_NETCDF=1 NO_FFTW=1 bin/bout-build-deps.sh and follow the instructions for configuring BOUT++. To enable openMP for a production run use: @@ -159,11 +158,11 @@ for a production run use: .. code-block:: bash module load bout-dep - cmake .. -DBOUT_USE_NETCDF=ON -DnetCDF_ROOT=$BOUT_DEP -DnetCDFCxx_ROOT=$BOUT_DEP \ + cmake .. -DBOUT_USE_NETCDF=ON -DnetCDFCxx_ROOT=$BOUT_DEP \ -DBOUT_USE_PETSC=ON -DPETSC_DIR=$BOUT_DEP \ - -DBOUT_USE_FFTW=ON -DFFTW_ROOT=$BOUT_DEP \ + -DBOUT_USE_FFTW=ON \ -DBOUT_USE_SUNDIALS=ON -DSUNDIALS_ROOT=$BOUT_DEP \ - -DBOUT_ENABLE_OPENMP=ON \ + -DBOUT_ENABLE_OPENMP=OFF \ -DCMAKE_BUILD_TYPE=Release @@ -306,9 +305,10 @@ solver. Currently, BOUT++ also supports the SUNDIALS solvers CVODE, IDA and ARKODE which are available from https://computation.llnl.gov/casc/sundials/main.html. -.. note:: BOUT++ currently supports SUNDIALS > 2.6, up to 5.4.0 as of - September 2020. It is advisable to use the highest possible - version +.. note:: BOUT++ currently supports SUNDIALS > 2.6, up to 6.7.0 as of + January 2024. It is advisable to use the highest possible + version. Support for SUNDIALS versions < 4 will be removed + in the next release. The full installation guide is found in the downloaded ``.tar.gz``, but we will provide a step-by-step guide to install it and make it diff --git a/manual/sphinx/user_docs/installing.rst b/manual/sphinx/user_docs/installing.rst index eb155909bf..10f5d9b9f1 100644 --- a/manual/sphinx/user_docs/installing.rst +++ b/manual/sphinx/user_docs/installing.rst @@ -373,7 +373,7 @@ For SUNDIALS, use ``-DBOUT_DOWNLOAD_SUNDIALS=ON``. If using ``ccmake`` this opti may not appear initially. This automatically sets ``BOUT_USE_SUNDIALS=ON``, and configures SUNDIALS to use MPI. -For ADIOS2, use ``-DBOUT_DOWNLOAD_ADIOS=ON``. This will download and +For ADIOS2, use ``-DBOUT_DOWNLOAD_ADIOS2=ON``. This will download and configure `ADIOS2 `_, enabling BOUT++ to read and write this high-performance parallel file format. diff --git a/requirements.txt b/requirements.txt index 75358b10db..dcbe5cef5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -Jinja2>=2.11.3 -numpy>=1.14.1 -scipy>=1.0.0 -netcdf4~=1.6.0 -matplotlib>=2.0.0 +Jinja2~=3.1.0 +numpy~=2.0.0 +scipy>=1.11.0 +netcdf4~=1.7.1 +matplotlib>=3.7.0 Cython~=3.0.0 boututils~=0.2.1 boutdata~=0.2.1 diff --git a/src/bout++.cxx b/src/bout++.cxx index 481a928bec..ff25b1163e 100644 --- a/src/bout++.cxx +++ b/src/bout++.cxx @@ -59,7 +59,7 @@ const char DEFAULT_DIR[] = "data"; #include "bout/bout.hxx" #undef BOUT_NO_USING_NAMESPACE_BOUTGLOBALS -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include "bout/adios_object.hxx" #endif @@ -165,7 +165,7 @@ int BoutInitialise(int& argc, char**& argv) { savePIDtoFile(args.data_dir, MYPE); -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 bout::ADIOSInit(BoutComm::get()); #endif @@ -572,7 +572,7 @@ void printCompileTimeOptions() { constexpr auto netcdf_flavour = has_netcdf ? (has_legacy_netcdf ? " (Legacy)" : " (NetCDF4)") : ""; output_info.write(_("\tNetCDF support {}{}\n"), is_enabled(has_netcdf), netcdf_flavour); - output_info.write(_("\tADIOS support {}\n"), is_enabled(has_adios)); + output_info.write(_("\tADIOS2 support {}\n"), is_enabled(has_adios2)); output_info.write(_("\tPETSc support {}\n"), is_enabled(has_petsc)); output_info.write(_("\tPretty function name support {}\n"), is_enabled(has_pretty_function)); @@ -582,11 +582,8 @@ void printCompileTimeOptions() { output_info.write(_("\tSUNDIALS support {}\n"), is_enabled(has_sundials)); output_info.write(_("\tBacktrace in exceptions {}\n"), is_enabled(use_backtrace)); output_info.write(_("\tColour in logs {}\n"), is_enabled(use_color)); - output_info.write(_("\tOpenMP parallelisation {}"), is_enabled(use_openmp)); -#ifdef _OPENMP - output_info.write(_(", using {} threads"), omp_get_max_threads()); -#endif - output_info.write("\n"); + output_info.write(_("\tOpenMP parallelisation {}, using {} threads\n"), + is_enabled(use_openmp), omp_get_max_threads()); output_info.write(_("\tExtra debug output {}\n"), is_enabled(use_output_debug)); output_info.write(_("\tFloating-point exceptions {}\n"), is_enabled(use_sigfpe)); output_info.write(_("\tSignal handling support {}\n"), is_enabled(use_signal)); @@ -701,7 +698,7 @@ void addBuildFlagsToOptions(Options& options) { options["has_gettext"].force(bout::build::has_gettext); options["has_lapack"].force(bout::build::has_lapack); options["has_netcdf"].force(bout::build::has_netcdf); - options["has_adios"].force(bout::build::has_adios); + options["has_adios2"].force(bout::build::has_adios2); options["has_petsc"].force(bout::build::has_petsc); options["has_hypre"].force(bout::build::has_hypre); options["has_umpire"].force(bout::build::has_umpire); @@ -715,6 +712,7 @@ void addBuildFlagsToOptions(Options& options) { options["use_backtrace"].force(bout::build::use_backtrace); options["use_color"].force(bout::build::use_color); options["use_openmp"].force(bout::build::use_openmp); + options["openmp_threads"].force(omp_get_max_threads()); options["use_output_debug"].force(bout::build::use_output_debug); options["use_sigfpe"].force(bout::build::use_sigfpe); options["use_signal"].force(bout::build::use_signal); @@ -797,7 +795,7 @@ int BoutFinalise(bool write_settings) { // Call HYPER_Finalize if not already called bout::HypreLib::cleanup(); -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 bout::ADIOSFinalize(); #endif diff --git a/src/field/field3d.cxx b/src/field/field3d.cxx index b4bb0d394f..4ed9641f44 100644 --- a/src/field/field3d.cxx +++ b/src/field/field3d.cxx @@ -32,6 +32,8 @@ #include +#include "bout/parallel_boundary_op.hxx" +#include "bout/parallel_boundary_region.hxx" #include #include #include @@ -504,7 +506,7 @@ void Field3D::applyParallelBoundary(const std::string& condition) { /// Loop over the mesh boundary regions for (const auto& reg : fieldmesh->getBoundariesPar()) { auto op = std::unique_ptr{ - dynamic_cast(bfact->create(condition, reg))}; + dynamic_cast(bfact->create(condition, reg.get()))}; op->apply(*this); } } @@ -524,7 +526,7 @@ void Field3D::applyParallelBoundary(const std::string& region, for (const auto& reg : fieldmesh->getBoundariesPar()) { if (reg->label == region) { auto op = std::unique_ptr{ - dynamic_cast(bfact->create(condition, reg))}; + dynamic_cast(bfact->create(condition, reg.get()))}; op->apply(*this); break; } @@ -548,9 +550,9 @@ void Field3D::applyParallelBoundary(const std::string& region, // BoundaryFactory can't create boundaries using Field3Ds, so get temporary // boundary of the right type auto tmp = std::unique_ptr{ - dynamic_cast(bfact->create(condition, reg))}; + dynamic_cast(bfact->create(condition, reg.get()))}; // then clone that with the actual argument - auto op = std::unique_ptr{tmp->clone(reg, f)}; + auto op = std::unique_ptr{tmp->clone(reg.get(), f)}; op->apply(*this); break; } @@ -618,7 +620,7 @@ Field3D filter(const Field3D& var, int N0, const std::string& rgn) { const Region& region = var.getRegion2D(region_str); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { Array f(ncz / 2 + 1); @@ -668,7 +670,7 @@ Field3D lowPass(const Field3D& var, int zmax, bool keep_zonal, const std::string const Region& region = var.getRegion2D(region_str); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { Array f(ncz / 2 + 1); diff --git a/src/field/field_data.cxx b/src/field/field_data.cxx index ee8bd97b30..529f595316 100644 --- a/src/field/field_data.cxx +++ b/src/field/field_data.cxx @@ -1,4 +1,6 @@ +#include "bout/parallel_boundary_op.hxx" +#include "bout/parallel_boundary_region.hxx" #include "bout/unused.hxx" #include #include @@ -151,10 +153,9 @@ void FieldData::setBoundary(const std::string& name) { } /// Get the mesh boundary regions - std::vector par_reg = mesh->getBoundariesPar(); /// Loop over the mesh parallel boundary regions for (const auto& reg : mesh->getBoundariesPar()) { - auto* op = dynamic_cast(bfact->createFromOptions(name, reg)); + auto* op = dynamic_cast(bfact->createFromOptions(name, reg.get())); if (op != nullptr) { bndry_op_par.push_back(op); } diff --git a/src/field/fieldgenerators.hxx b/src/field/fieldgenerators.hxx index 66ef11a855..2485b4b82d 100644 --- a/src/field/fieldgenerators.hxx +++ b/src/field/fieldgenerators.hxx @@ -4,8 +4,8 @@ * These classes are used by FieldFactory */ -#ifndef __FIELDGENERATORS_H__ -#define __FIELDGENERATORS_H__ +#ifndef BOUT_FIELDGENERATORS_H +#define BOUT_FIELDGENERATORS_H #include #include @@ -352,4 +352,4 @@ private: FieldGeneratorPtr test, gt0, lt0; }; -#endif // __FIELDGENERATORS_H__ +#endif // BOUT_FIELDGENERATORS_H diff --git a/src/invert/fft_fftw.cxx b/src/invert/fft_fftw.cxx index 514396c828..d66f35beee 100644 --- a/src/invert/fft_fftw.cxx +++ b/src/invert/fft_fftw.cxx @@ -258,7 +258,7 @@ void rfft([[maybe_unused]] const BoutReal* in, [[maybe_unused]] int length, // use a `single` block here as that requires all threads to reach the // block (implicit barrier) which may not be true in all cases (e.g. // if there are 8 threads but only 4 call the fft routine). - BOUT_OMP(critical(rfft)) + BOUT_OMP_SAFE(critical(rfft)) if ((size != length) || (nthreads < n_th)) { if (size > 0) { // Free all memory @@ -335,7 +335,7 @@ void irfft([[maybe_unused]] const dcomplex* in, [[maybe_unused]] int length, // use a `single` block here as that requires all threads to reach the // block (implicit barrier) which may not be true in all cases (e.g. // if there are 8 threads but only 4 call the fft routine). - BOUT_OMP(critical(irfft)) + BOUT_OMP_SAFE(critical(irfft)) if ((size != length) || (nthreads < n_th)) { if (size > 0) { // Free all memory diff --git a/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx b/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx index 2687bf7187..5ce4e540b7 100644 --- a/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx +++ b/src/invert/laplace/impls/cyclic/cyclic_laplace.cxx @@ -33,11 +33,13 @@ * */ -#include "cyclic_laplace.hxx" -#include "bout/build_config.hxx" +#include "bout/build_defines.hxx" #if not BOUT_USE_METRIC_3D +#include "cyclic_laplace.hxx" +#include "bout/assert.hxx" +#include "bout/bout_types.hxx" #include #include #include @@ -47,7 +49,7 @@ #include #include -#include "cyclic_laplace.hxx" +#include LaplaceCyclic::LaplaceCyclic(Options* opt, const CELL_LOC loc, Mesh* mesh_in, Solver* UNUSED(solver)) @@ -120,18 +122,18 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart, outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } if (dst) { - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( @@ -139,13 +141,13 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Loop over X indices, including boundaries but not guard cells. (unless periodic // in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ix = xs; ix <= xe; ix++) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET) && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && (outer_boundary_flags & INVERT_SET) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -161,7 +163,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Get elements of the tridiagonal matrix // including boundary conditions BoutReal zlen = getUniform(coords->dz) * (localmesh->LocalNz - 3); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int kz = 0; kz < nmode; kz++) { // wave number is 1/[rad]; DST has extra 2. BoutReal kwave = kz * 2.0 * PI / (2. * zlen); @@ -169,8 +171,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false, // Don't include guard cells in arrays false); // Z domain not periodic } @@ -181,14 +182,14 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { cr->solve(bcmplx, xcmplx); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array // ZFFT routine expects input of this length auto k1d = Array(localmesh->LocalNz); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { for (int kz = 0; kz < nmode; kz++) { k1d[kz] = xcmplx(kz, ix - xs); @@ -206,7 +207,7 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { } } else { const BoutReal zlength = getUniform(coords->zlength()); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array // ZFFT routine expects input of this length @@ -214,13 +215,13 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Loop over X indices, including boundaries but not guard // cells (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ix = xs; ix <= xe; ix++) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET) && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && (outer_boundary_flags & INVERT_SET) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0[ix], localmesh->LocalNz, std::begin(k1d)); } else { @@ -235,14 +236,13 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int kz = 0; kz < nmode; kz++) { BoutReal kwave = kz * 2.0 * PI / zlength; // wave number is 1/[rad] tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -269,15 +269,15 @@ FieldPerp LaplaceCyclic::solve(const FieldPerp& rhs, const FieldPerp& x0) { } // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array // ZFFT routine expects input of this length auto k1d = Array((localmesh->LocalNz) / 2 + 1); - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { if (zero_DC) { k1d[0] = 0.; @@ -316,13 +316,13 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart, outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -350,6 +350,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { const int nsys = nmode * ny; // Number of systems of equations to solve const int nxny = nx * ny; // Number of points in X-Y + // This is just to silence static analysis + ASSERT0(ny > 0); + auto a3D = Matrix(nsys, nx); auto b3D = Matrix(nsys, nx); auto c3D = Matrix(nsys, nx); @@ -358,7 +361,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { auto bcmplx3D = Matrix(nsys, nx); if (dst) { - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array // ZFFT routine expects input of this length @@ -366,7 +369,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Loop over X and Y indices, including boundaries but not guard cells. // (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ind = 0; ind < nxny; ++ind) { // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -374,10 +377,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -393,7 +395,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Get elements of the tridiagonal matrix // including boundary conditions const BoutReal zlen = getUniform(coords->dz) * (localmesh->LocalNz - 3); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nsys; ind++) { // ind = (iy - ys) * nmode + kz int iy = ys + ind / nmode; @@ -405,8 +407,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false, // Don't include guard cells in arrays false); // Z domain not periodic } @@ -417,13 +418,13 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { cr->solve(bcmplx3D, xcmplx3D); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array // ZFFT routine expects input of length LocalNz auto k1d = Array(localmesh->LocalNz); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -445,7 +446,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { } } else { const BoutReal zlength = getUniform(coords->zlength()); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array // ZFFT routine expects input of this length @@ -454,7 +455,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Loop over X and Y indices, including boundaries but not guard cells // (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ind = 0; ind < nxny; ++ind) { // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -462,10 +463,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d)); } else { @@ -480,7 +480,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nsys; ind++) { // ind = (iy - ys) * nmode + kz int iy = ys + ind / nmode; @@ -490,8 +490,7 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -502,9 +501,8 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { if (localmesh->periodicX) { // Subtract X average of kz=0 mode - BoutReal local[ny + 1]; + std::vector local(ny + 1, 0.0); for (int y = 0; y < ny; y++) { - local[y] = 0.0; for (int ix = xs; ix <= xe; ix++) { local[y] += xcmplx3D(y * nmode, ix - xs).real(); } @@ -512,8 +510,9 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { local[ny] = static_cast(xe - xs + 1); // Global reduce - BoutReal global[ny + 1]; - MPI_Allreduce(local, global, ny + 1, MPI_DOUBLE, MPI_SUM, localmesh->getXcomm()); + std::vector global(ny + 1, 0.0); + MPI_Allreduce(local.data(), global.data(), ny + 1, MPI_DOUBLE, MPI_SUM, + localmesh->getXcomm()); // Subtract average from kz=0 modes for (int y = 0; y < ny; y++) { BoutReal avg = global[y] / global[ny]; @@ -524,15 +523,15 @@ Field3D LaplaceCyclic::solve(const Field3D& rhs, const Field3D& x0) { } // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; diff --git a/src/invert/laplace/impls/cyclic/cyclic_laplace.hxx b/src/invert/laplace/impls/cyclic/cyclic_laplace.hxx index 841f0a4e05..febffa7d18 100644 --- a/src/invert/laplace/impls/cyclic/cyclic_laplace.hxx +++ b/src/invert/laplace/impls/cyclic/cyclic_laplace.hxx @@ -28,8 +28,8 @@ class LaplaceCyclic; -#ifndef __LAP_CYCLIC_H__ -#define __LAP_CYCLIC_H__ +#ifndef BOUT_LAP_CYCLIC_H +#define BOUT_LAP_CYCLIC_H #include "bout/build_config.hxx" #include "bout/invert_laplace.hxx" @@ -125,4 +125,4 @@ private: #endif // BOUT_USE_METRIC_3D -#endif // __SPT_H__ +#endif // BOUT_LAP_CYCLIC_H diff --git a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx index c74e184be3..d789e5e408 100644 --- a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx +++ b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.cxx @@ -99,7 +99,7 @@ LaplaceHypre3d::LaplaceHypre3d(Options* opt, const CELL_LOC loc, Mesh* mesh_in, // Set up boundary conditions in operator BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann on inner X boundary operator3D(i, i) = -1. / coords->dx[i] / sqrt(coords->g_11[i]); operator3D(i, i.xp()) = 1. / coords->dx[i] / sqrt(coords->g_11[i]); @@ -111,7 +111,7 @@ LaplaceHypre3d::LaplaceHypre3d(Options* opt, const CELL_LOC loc, Mesh* mesh_in, } BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann on outer X boundary operator3D(i, i) = 1. / coords->dx[i] / sqrt(coords->g_11[i]); operator3D(i, i.xm()) = -1. / coords->dx[i] / sqrt(coords->g_11[i]); @@ -180,9 +180,9 @@ Field3D LaplaceHypre3d::solve(const Field3D& b_in, const Field3D& x0) { // Adjust vectors to represent boundary conditions and check that // boundary cells are finite BOUT_FOR_SERIAL(i, indexer->getRegionInnerX()) { - const BoutReal val = (inner_boundary_flags & INVERT_SET) ? x0[i] : 0.; + const BoutReal val = isInnerBoundaryFlagSet(INVERT_SET) ? x0[i] : 0.; ASSERT1(std::isfinite(val)); - if (!(inner_boundary_flags & INVERT_RHS)) { + if (!(isInnerBoundaryFlagSet(INVERT_RHS))) { b[i] = val; } else { ASSERT1(std::isfinite(b[i])); @@ -190,9 +190,9 @@ Field3D LaplaceHypre3d::solve(const Field3D& b_in, const Field3D& x0) { } BOUT_FOR_SERIAL(i, indexer->getRegionOuterX()) { - const BoutReal val = (outer_boundary_flags & INVERT_SET) ? x0[i] : 0.; + const BoutReal val = (isOuterBoundaryFlagSet(INVERT_SET)) ? x0[i] : 0.; ASSERT1(std::isfinite(val)); - if (!(outer_boundary_flags & INVERT_RHS)) { + if (!(isOuterBoundaryFlagSet(INVERT_RHS))) { b[i] = val; } else { ASSERT1(std::isfinite(b[i])); diff --git a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.hxx b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.hxx index c9c44ac19e..05a0604c4f 100644 --- a/src/invert/laplace/impls/hypre3d/hypre3d_laplace.hxx +++ b/src/invert/laplace/impls/hypre3d/hypre3d_laplace.hxx @@ -30,8 +30,8 @@ class LaplaceHypre3d; #include "bout/build_config.hxx" -#ifndef __LAPLACE_HYPRE3D_H__ -#define __LAPLACE_HYPRE3D_H__ +#ifndef BOUT_LAPLACE_HYPRE3D_H +#define BOUT_LAPLACE_HYPRE3D_H #if BOUT_HAS_HYPRE @@ -227,4 +227,4 @@ public: #endif // BOUT_HAS_HYPRE -#endif //__LAPLACE_HYPRE3D_H__ +#endif //BOUT_LAPLACE_HYPRE3D_H diff --git a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx index 2457ff3b8e..f79463769a 100644 --- a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx +++ b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.cxx @@ -293,12 +293,10 @@ FieldPerp LaplaceIPT::solve(const FieldPerp& b, const FieldPerp& x0) { */ auto bcmplx = Matrix(nmode, ncx); - const bool invert_inner_boundary = - isInnerBoundaryFlagSet(INVERT_SET) and localmesh->firstX(); - const bool invert_outer_boundary = - isOuterBoundaryFlagSet(INVERT_SET) and localmesh->lastX(); + const bool invert_inner_boundary = isInnerBoundaryFlagSetOnFirstX(INVERT_SET); + const bool invert_outer_boundary = isOuterBoundaryFlagSetOnLastX(INVERT_SET); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int ix = 0; ix < ncx; ix++) { /* This for loop will set the bk (initialized by the constructor) * bk is the z fourier modes of b in z @@ -345,8 +343,7 @@ FieldPerp LaplaceIPT::solve(const FieldPerp& b, const FieldPerp& x0) { kz, // wave number (different from kz only if we are taking a part // of the z-domain [and not from 0 to 2*pi]) - kz * kwaveFactor, global_flags, inner_boundary_flags, - outer_boundary_flags, &A, &C, &D); + kz * kwaveFactor, &A, &C, &D); // Patch up internal boundaries if (not localmesh->lastX()) { diff --git a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx index 563ae7e61f..02e3eca06c 100644 --- a/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx +++ b/src/invert/laplace/impls/iterative_parallel_tri/iterative_parallel_tri.hxx @@ -26,8 +26,8 @@ class LaplaceIPT; -#ifndef __IPT_H__ -#define __IPT_H__ +#ifndef BOUT_IPT_H +#define BOUT_IPT_H #include "bout/build_config.hxx" #include "bout/invert_laplace.hxx" @@ -234,16 +234,8 @@ private: /// First and last interior points xstart, xend int xs, xe; - - bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } - bool isInnerBoundaryFlagSet(int flag) const { - return (inner_boundary_flags & flag) != 0; - } - bool isOuterBoundaryFlagSet(int flag) const { - return (outer_boundary_flags & flag) != 0; - } }; #endif // BOUT_USE_METRIC_3D -#endif // __IPT_H__ +#endif // BOUT_IPT_H diff --git a/src/invert/laplace/impls/multigrid/multigrid_alg.cxx b/src/invert/laplace/impls/multigrid/multigrid_alg.cxx index 88556e02ad..fa97a43116 100644 --- a/src/invert/laplace/impls/multigrid/multigrid_alg.cxx +++ b/src/invert/laplace/impls/multigrid/multigrid_alg.cxx @@ -104,14 +104,14 @@ void MultigridAlg::getSolution(BoutReal* x, BoutReal* b, int flag) { Array r(ldim); for (int n = 1; n < flag; n++) { residualVec(level, x, b, std::begin(r)); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { y[i] = 0.0; } cycleMG(level, std::begin(y), std::begin(r)); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { x[i] = x[i] + y[i]; } @@ -135,8 +135,8 @@ void MultigridAlg::cycleMG(int level, BoutReal* sol, BoutReal* rhs) { projection(level, std::begin(r), std::begin(pr)); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < (lnx[level - 1] + 2) * (lnz[level - 1] + 2); i++) { y[i] = 0.0; } @@ -144,8 +144,8 @@ void MultigridAlg::cycleMG(int level, BoutReal* sol, BoutReal* rhs) { cycleMG(level - 1, std::begin(y), std::begin(pr)); prolongation(level - 1, std::begin(y), std::begin(iy)); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < (lnx[level] + 2) * (lnz[level] + 2); i++) { sol[i] += iy[i]; } @@ -156,15 +156,15 @@ void MultigridAlg::cycleMG(int level, BoutReal* sol, BoutReal* rhs) { void MultigridAlg::projection(int level, BoutReal* r, BoutReal* pr) { - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < (lnx[level - 1] + 2) * (lnz[level - 1] + 2); i++) { pr[i] = 0.; } int xend = lnx[level - 1] + 1; int zend = lnz[level - 1] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int i2 = 2 * i - 1; @@ -183,16 +183,16 @@ void MultigridAlg::projection(int level, BoutReal* r, BoutReal* pr) { void MultigridAlg::prolongation(int level, BoutReal* x, BoutReal* ix) { - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < (lnx[level + 1] + 2) * (lnz[level + 1] + 2); i++) { ix[i] = 0.; } int xend = lnx[level] + 1; int zend = lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int i2 = 2 * i - 1; @@ -219,16 +219,16 @@ void MultigridAlg::smoothings(int level, BoutReal* x, BoutReal* b) { dim = mm * (lnx[level] + 2); if (mgsm == 0) { Array x0(dim); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) for (int num = 0; num < 2; num++) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim; i++) { x0[i] = x[i]; } int xend = lnx[level] + 1; int zend = lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int nn = i * mm + k; @@ -313,8 +313,8 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { Array q(ldim); Array r(ldim); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { sol[i] = 0.0; } @@ -335,8 +335,8 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { delete[] v; return; } - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { r[i] = 0.0; } @@ -345,8 +345,8 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { } else { cycleMG(level, std::begin(r), rhs); } - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { v[0][i] = r[i]; } @@ -360,21 +360,21 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { } a0 = 1.0 / a1; g[0] = a1; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { v[0][i] *= a0; } - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 1; i < MAXGM + 1; i++) { g[i] = 0.0; } } for (it = 0; it < MAXGM; it++) { multiAVec(level, v[it], std::begin(q)); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { v[it + 1][i] = 0.0; } @@ -407,8 +407,8 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { } a0 = 1.0 / a1; h[it + 1][it] = a1; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { v[it + 1][i] *= a0; } @@ -444,13 +444,13 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { } y[i] = y[i] / h[i][i]; } - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { p[i] = sol[i]; } - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int k = 0; k < ldim; k++) { for (int i = 0; i <= it; i++) { p[k] += y[i] * v[i][k]; @@ -492,8 +492,8 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { perror = error; } /* Restart with new initial */ - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { v[0][i] = 0.0; } @@ -503,8 +503,8 @@ void MultigridAlg::pGMRES(BoutReal* sol, BoutReal* rhs, int level, int iplag) { cycleMG(level, v[0], std::begin(r)); } - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { sol[i] = p[i]; } @@ -559,11 +559,11 @@ BoutReal MultigridAlg::vectorProd(int level, BoutReal* x, BoutReal* y) { BoutReal val; BoutReal ini_e = 0.0; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { int xend = lnx[level] + 1; int zend = lnz[level] + 1; - BOUT_OMP(for reduction(+:ini_e) collapse(2)) + BOUT_OMP_PERF(for reduction(+:ini_e) collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int ii = i * (lnz[level] + 2) + k; @@ -583,16 +583,16 @@ BoutReal MultigridAlg::vectorProd(int level, BoutReal* x, BoutReal* y) { void MultigridAlg::multiAVec(int level, BoutReal* x, BoutReal* b) { int mm = lnz[level] + 2; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < mm * (lnx[level] + 2); i++) { b[i] = 0.0; } int xend = lnx[level] + 1; int zend = lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int nn = i * mm + k; @@ -614,16 +614,16 @@ void MultigridAlg::residualVec(int level, BoutReal* x, BoutReal* b, BoutReal* r) int mm; mm = lnz[level] + 2; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < mm * (lnx[level] + 2); i++) { r[i] = 0.0; } int xend = lnx[level] + 1; int zend = lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int nn = i * mm + k; @@ -646,16 +646,16 @@ void MultigridAlg::setMatrixC(int level) { BoutReal ratio = 8.0; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < (lnx[level - 1] + 2) * (lnz[level - 1] + 2) * 9; i++) { matmg[level - 1][i] = 0.0; } int xend = lnx[level - 1] + 1; int zend = lnz[level - 1] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < xend; i++) { for (int k = 1; k < zend; k++) { int i2 = 2 * i - 1; @@ -809,8 +809,8 @@ void MultigridAlg::solveMG(BoutReal* sol, BoutReal* rhs, int level) { BoutReal ini_e, perror, error, rederr; int ldim = (lnx[level] + 2) * (lnz[level] + 2); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { sol[i] = 0.0; } @@ -825,22 +825,22 @@ void MultigridAlg::solveMG(BoutReal* sol, BoutReal* rhs, int level) { } Array y(ldim); Array r(ldim); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { r[i] = rhs[i]; } perror = ini_e; for (m = 0; m < MAXIT; m++) { - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { y[i] = 0.0; } cycleMG(level, std::begin(y), std::begin(r)); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < ldim; i++) { sol[i] = sol[i] + y[i]; } diff --git a/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx b/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx index beb9262ed8..c5076cd499 100644 --- a/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx +++ b/src/invert/laplace/impls/multigrid/multigrid_laplace.cxx @@ -67,7 +67,6 @@ LaplaceMultigrid::LaplaceMultigrid(Options* opt, const CELL_LOC loc, Mesh* mesh_ opts->get("atol", atol, pow(10.0, -20), true); opts->get("dtol", dtol, pow(10.0, 5), true); opts->get("smtype", mgsm, 1, true); -#if BOUT_USE_OPENMP if (mgsm != 0 && omp_get_max_threads() > 1) { output_warn << "WARNING: in multigrid Laplace solver, for smtype!=0 the smoothing " "cannot be parallelised with OpenMP threads." @@ -75,7 +74,6 @@ LaplaceMultigrid::LaplaceMultigrid(Options* opt, const CELL_LOC loc, Mesh* mesh_ << " Consider using smtype=0 instead when using OpenMP threads." << endl; } -#endif opts->get("jacomega", omega, 0.8, true); opts->get("solvertype", mgplag, 1, true); opts->get("cftype", cftype, 0, true); @@ -86,19 +84,18 @@ LaplaceMultigrid::LaplaceMultigrid(Options* opt, const CELL_LOC loc, Mesh* mesh_ // Initialize, allocate memory, etc. comms_tagbase = 385; // Some random number - int implemented_global_flags = INVERT_START_NEW; - if (global_flags & ~implemented_global_flags) { + constexpr int implemented_global_flags = INVERT_START_NEW; + if (isGlobalFlagSet(~implemented_global_flags)) { throw BoutException("Attempted to set Laplacian inversion flag that is not " "implemented in LaplaceMultigrid."); } - int implemented_boundary_flags = - INVERT_AC_GRAD + INVERT_SET - + INVERT_DC_GRAD; // INVERT_DC_GRAD does not actually do anything, but harmless to set while comparing to Fourier solver with Neumann boundary conditions - if (inner_boundary_flags & ~implemented_boundary_flags) { + // INVERT_DC_GRAD does not actually do anything, but harmless to set while comparing to Fourier solver with Neumann boundary conditions + constexpr int implemented_boundary_flags = INVERT_AC_GRAD + INVERT_SET + INVERT_DC_GRAD; + if (isInnerBoundaryFlagSet(~implemented_boundary_flags)) { throw BoutException("Attempted to set Laplacian inner boundary inversion flag that " "is not implemented in LaplaceMultigrid."); } - if (outer_boundary_flags & ~implemented_boundary_flags) { + if (isOuterBoundaryFlagSet(~implemented_boundary_flags)) { throw BoutException("Attempted to set Laplacian outer boundary inversion flag that " "is not implemented in LaplaceMultigrid."); } @@ -218,11 +215,9 @@ LaplaceMultigrid::LaplaceMultigrid(Options* opt, const CELL_LOC loc, Mesh* mesh_ } else { output << "Multigrid solver with merging " << mgmpi << endl; } -#if BOUT_USE_OPENMP - BOUT_OMP(parallel) - BOUT_OMP(master) + BOUT_OMP_SAFE(parallel) + BOUT_OMP_SAFE(master) { output << "Num threads = " << omp_get_num_threads() << endl; } -#endif } } @@ -246,10 +241,10 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { int lz2 = lzz + 2; int lxx = kMG->lnx[level]; - if (global_flags & INVERT_START_NEW) { + if (isGlobalFlagSet(INVERT_START_NEW)) { // set initial guess to zero - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < lxx + 1; i++) { for (int k = 1; k < lzz + 1; k++) { x[i * lz2 + k] = 0.; @@ -257,8 +252,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Read initial guess into local array, ignoring guard cells - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < lxx + 1; i++) { for (int k = 1; k < lzz + 1; k++) { int i2 = i - 1 + localmesh->xstart; @@ -269,8 +264,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } // Read RHS into local array - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < lxx + 1; i++) { for (int k = 1; k < lzz + 1; k++) { int i2 = i - 1 + localmesh->xstart; @@ -280,12 +275,12 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } if (localmesh->firstX()) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at inner boundary - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; x[k] = -x0(localmesh->xstart - 1, k2) @@ -294,8 +289,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // zero gradient inner boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { // set inner guard cells x[k] = 0.0; @@ -303,10 +298,10 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at inner boundary - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; x[k] = 2. * x0(localmesh->xstart - 1, k2); @@ -314,8 +309,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // zero value inner boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { // set inner guard cells x[k] = 0.; @@ -324,12 +319,12 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } if (localmesh->lastX()) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at outer boundary - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; x[(lxx + 1) * lz2 + k] = x0(localmesh->xend + 1, k2) @@ -339,8 +334,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // zero gradient outer boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { // set outer guard cells x[(lxx + 1) * lz2 + k] = 0.; @@ -348,10 +343,10 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (outer_boundary_flags & INVERT_SET) { + if (isOuterBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at outer boundary - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; x[(lxx + 1) * lz2 + k] = 2. * x0(localmesh->xend + 1, k2); @@ -359,8 +354,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // zero value inner boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { // set outer guard cells x[(lxx + 1) * lz2 + k] = 0.; @@ -370,8 +365,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } // Exchange ghost cells of initial guess - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < lxx + 2; i++) { x[i * lz2] = x[(i + 1) * lz2 - 2]; x[(i + 1) * lz2 - 1] = x[i * lz2 + 1]; @@ -471,8 +466,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { #endif // Copy solution into a FieldPerp to return - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < lxx + 1; i++) { for (int k = 1; k < lzz + 1; k++) { int i2 = i - 1 + localmesh->xstart; @@ -481,13 +476,13 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } if (localmesh->firstX()) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at inner boundary int i2 = -1 + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = x[lz2 + k] @@ -498,8 +493,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } else { // zero gradient inner boundary condition int i2 = -1 + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = x[lz2 + k]; @@ -507,11 +502,11 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at inner boundary int i2 = -1 + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = 2. * x0(localmesh->xstart - 1, k2) - x[lz2 + k]; @@ -519,8 +514,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } else { // zero value inner boundary condition int i2 = -1 + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = -x[lz2 + k]; @@ -529,13 +524,13 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } if (localmesh->lastX()) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - if (inner_boundary_flags & INVERT_SET) { + if (isInnerBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify gradient to set at outer boundary int i2 = lxx + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = x[lxx * lz2 + k] @@ -546,8 +541,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } else { // zero gradient outer boundary condition int i2 = lxx + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = x[lxx * lz2 + k]; @@ -555,11 +550,11 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } } else { // Dirichlet boundary condition - if (outer_boundary_flags & INVERT_SET) { + if (isOuterBoundaryFlagSet(INVERT_SET)) { // guard cells of x0 specify value to set at outer boundary int i2 = lxx + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = 2. * x0(localmesh->xend + 1, k2) - x[lxx * lz2 + k]; @@ -567,8 +562,8 @@ FieldPerp LaplaceMultigrid::solve(const FieldPerp& b_in, const FieldPerp& x0) { } else { // zero value inner boundary condition int i2 = lxx + localmesh->xstart; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < lzz + 1; k++) { int k2 = k - 1; result(i2, k2) = -x[lxx * lz2 + k]; @@ -592,8 +587,8 @@ void LaplaceMultigrid::generateMatrixF(int level) { int llx = kMG->lnx[level]; int llz = kMG->lnz[level]; - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for collapse(2)) for (int i = 1; i < llx + 1; i++) { for (int k = 1; k < llz + 1; k++) { int i2 = i - 1 + localmesh->xstart; @@ -655,10 +650,10 @@ void LaplaceMultigrid::generateMatrixF(int level) { // Here put boundary conditions if (kMG->rProcI == 0) { - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < llz + 1; k++) { int ic = llz + 2 + k; mat[ic * 9 + 3] += mat[ic * 9]; @@ -673,8 +668,8 @@ void LaplaceMultigrid::generateMatrixF(int level) { } } else { // Dirichlet boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < llz + 1; k++) { int ic = llz + 2 + k; mat[ic * 9 + 3] -= mat[ic * 9]; @@ -690,10 +685,10 @@ void LaplaceMultigrid::generateMatrixF(int level) { } } if (kMG->rProcI == kMG->xNP - 1) { - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Neumann boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < llz + 1; k++) { int ic = llx * (llz + 2) + k; mat[ic * 9 + 3] += mat[ic * 9 + 6]; @@ -708,8 +703,8 @@ void LaplaceMultigrid::generateMatrixF(int level) { } } else { // Dirichlet boundary condition - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int k = 1; k < llz + 1; k++) { int ic = llx * (llz + 2) + k; mat[ic * 9 + 3] -= mat[ic * 9 + 6]; diff --git a/src/invert/laplace/impls/multigrid/multigrid_laplace.hxx b/src/invert/laplace/impls/multigrid/multigrid_laplace.hxx index 4186147874..f0b3cfc5c1 100644 --- a/src/invert/laplace/impls/multigrid/multigrid_laplace.hxx +++ b/src/invert/laplace/impls/multigrid/multigrid_laplace.hxx @@ -28,8 +28,8 @@ * **************************************************************************/ -#ifndef __MULTIGRID_LAPLACE_H__ -#define __MULTIGRID_LAPLACE_H__ +#ifndef BOUT_MULTIGRID_LAPLACE_H +#define BOUT_MULTIGRID_LAPLACE_H #include "bout/build_config.hxx" #include "bout/invert_laplace.hxx" @@ -246,4 +246,4 @@ RegisterLaplace registerlaplacemultigrid(LAPLACE_MULTIGRID); #endif // BOUT_USE_METRIC_3D -#endif // __MULTIGRID_LAPLACE_H__ +#endif // BOUT_MULTIGRID_LAPLACE_H diff --git a/src/invert/laplace/impls/multigrid/multigrid_solver.cxx b/src/invert/laplace/impls/multigrid/multigrid_solver.cxx index 6d448e4db7..0c5ad82d6c 100644 --- a/src/invert/laplace/impls/multigrid/multigrid_solver.cxx +++ b/src/invert/laplace/impls/multigrid/multigrid_solver.cxx @@ -290,15 +290,15 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { int nx = (xProcI % rMG->zNP) * lnx[0]; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim; i++) { y[i] = 0.0; r[i] = 0.0; } - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dimg; i++) { yl[i] = 0.0; yg[i] = 0.0; @@ -306,7 +306,7 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (lnz[0] + 2) + iz; @@ -319,11 +319,11 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { MPI_SUM, comm2D); int nz = (xProcI % rMG->zNP) * (rMG->lnz[level]); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { int xend = rMG->lnx[level] + 1; int zend = rMG->lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = ix * (lnz[0] + 2) + nz + iz; @@ -335,9 +335,9 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { rMG->getSolution(std::begin(y), std::begin(r), 1); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dimg; i++) { yl[i] = 0.0; yg[i] = 0.0; @@ -345,7 +345,7 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { int xend = rMG->lnx[level] + 1; int zend = rMG->lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = ix * (lnz[0] + 2) + nz + iz; @@ -357,11 +357,11 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { bout::globals::mpi->MPI_Allreduce(std::begin(yl), std::begin(yg), dimg, MPI_DOUBLE, MPI_SUM, comm2D); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (lnz[0] + 2) + iz; @@ -377,16 +377,16 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { Array y(dim); Array r(dim); int nx = xProcI * lnx[0]; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim; i++) { y[i] = 0.0; r[i] = 0.0; } int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (lnz[0] + 2) + iz; @@ -397,18 +397,18 @@ void Multigrid1DP::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { } bout::globals::mpi->MPI_Allreduce(std::begin(y), std::begin(r), dim, MPI_DOUBLE, MPI_SUM, commMG); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < dim; i++) { y[i] = 0.0; } sMG->getSolution(std::begin(y), std::begin(r), 1); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (lnz[0] + 2) + iz; @@ -430,21 +430,21 @@ void Multigrid1DP::convertMatrixF2D(int level) { Array yl(dim * 9); Array yg(dim * 9); int nx = (xProcI % rMG->zNP) * lnx[0]; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim * 9; i++) { yl[i] = 0.0; yg[i] = 0.0; } - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < (rMG->lnx[level] + 2) * (rMG->lnz[level] + 2) * 9; i++) { rMG->matmg[level][i] = 0.0; } int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (lnz[0] + 2) + iz; @@ -494,11 +494,11 @@ void Multigrid1DP::convertMatrixF2D(int level) { } int nz = (xProcI % rMG->zNP) * (rMG->lnz[level]); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { int xend = rMG->lnx[level] + 1; int zend = rMG->lnz[level] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = ix * (lnz[0] + 2) + nz + iz; @@ -517,16 +517,16 @@ void Multigrid1DP::convertMatrixFS(int level) { Array yl(dim * 9); BoutReal* yg = sMG->matmg[level]; int nx = xProcI * lnx[0]; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim * 9; i++) { yl[i] = 0.0; yg[i] = 0.0; } int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (lnz[0] + 2) + iz; @@ -675,9 +675,9 @@ void Multigrid2DPf1D::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { Array r(dim); int nx = xProcI * lnx[0]; int nz = zProcI * lnz[0]; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim; i++) { y[i] = 0.0; r[i] = 0.0; @@ -685,7 +685,7 @@ void Multigrid2DPf1D::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (gnz[0] + 2) + nz + iz; @@ -696,17 +696,17 @@ void Multigrid2DPf1D::lowestSolver(BoutReal* x, BoutReal* b, int UNUSED(plag)) { } bout::globals::mpi->MPI_Allreduce(std::begin(y), std::begin(r), dim, MPI_DOUBLE, MPI_SUM, commMG); - BOUT_OMP(parallel default(shared)) - BOUT_OMP(for) + BOUT_OMP_PERF(parallel default(shared)) + BOUT_OMP_PERF(for) for (int i = 0; i < dim; i++) { y[i] = 0.0; } sMG->getSolution(std::begin(y), std::begin(r), 1); - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (gnz[0] + 2) + nz + iz; @@ -728,16 +728,16 @@ void Multigrid2DPf1D::convertMatrixFS(int level) { BoutReal* yg = sMG->matmg[level]; int nx = xProcI * lnx[0]; int nz = zProcI * lnz[0]; - BOUT_OMP(parallel default(shared)) + BOUT_OMP_PERF(parallel default(shared)) { - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int i = 0; i < dim * 9; i++) { yl[i] = 0.0; yg[i] = 0.0; } int xend = lnx[0] + 1; int zend = lnz[0] + 1; - BOUT_OMP(for collapse(2)) + BOUT_OMP_PERF(for collapse(2)) for (int ix = 1; ix < xend; ix++) { for (int iz = 1; iz < zend; iz++) { int nn = (nx + ix) * (gnz[0] + 2) + nz + iz; diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.cxx b/src/invert/laplace/impls/naulin/naulin_laplace.cxx index d82f874cbb..e315d3c771 100644 --- a/src/invert/laplace/impls/naulin/naulin_laplace.cxx +++ b/src/invert/laplace/impls/naulin/naulin_laplace.cxx @@ -174,9 +174,9 @@ LaplaceNaulin::LaplaceNaulin(Options* opt, const CELL_LOC loc, Mesh* mesh_in, // invert Delp2 and we will not converge ASSERT0(delp2type == "cyclic" || delp2type == "spt" || delp2type == "tri"); // Use same flags for FFT solver as for NaulinSolver - delp2solver->setGlobalFlags(global_flags); - delp2solver->setInnerBoundaryFlags(inner_boundary_flags); - delp2solver->setOuterBoundaryFlags(outer_boundary_flags); + delp2solver->setGlobalFlags(getGlobalFlags()); + delp2solver->setInnerBoundaryFlags(getInnerBoundaryFlags()); + delp2solver->setOuterBoundaryFlags(getOuterBoundaryFlags()); static int naulinsolver_count = 1; setPerformanceName(fmt::format("{}{}", "naulinsolver", ++naulinsolver_count)); @@ -258,7 +258,7 @@ Field3D LaplaceNaulin::solve(const Field3D& rhs, const Field3D& x0) { // Note take a copy of the 'b' argument, because we want to return a copy of it in the // result - if ((inner_boundary_flags & INVERT_SET) || (outer_boundary_flags & INVERT_SET)) { + if (isInnerBoundaryFlagSet(INVERT_SET) || isOuterBoundaryFlagSet(INVERT_SET)) { // This passes in the boundary conditions from x0's guard cells copy_x_boundaries(x_guess, x0, localmesh); } diff --git a/src/invert/laplace/impls/naulin/naulin_laplace.hxx b/src/invert/laplace/impls/naulin/naulin_laplace.hxx index f544e74336..e464ef18e7 100644 --- a/src/invert/laplace/impls/naulin/naulin_laplace.hxx +++ b/src/invert/laplace/impls/naulin/naulin_laplace.hxx @@ -25,8 +25,8 @@ class LaplaceNaulin; -#ifndef __LAP_NAULIN_H__ -#define __LAP_NAULIN_H__ +#ifndef BOUT_LAP_NAULIN_H +#define BOUT_LAP_NAULIN_H #include #include @@ -179,4 +179,4 @@ private: void copy_x_boundaries(Field3D& x, const Field3D& x0, Mesh* mesh); }; -#endif // __LAP_NAULIN_H__ +#endif // BOUT_LAP_NAULIN_H diff --git a/src/invert/laplace/impls/pcr/pcr.cxx b/src/invert/laplace/impls/pcr/pcr.cxx index 9402ba9f1b..48bbdbac4b 100644 --- a/src/invert/laplace/impls/pcr/pcr.cxx +++ b/src/invert/laplace/impls/pcr/pcr.cxx @@ -149,19 +149,19 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { // If the flags to assign that only one guard cell should be used is set inbndry = localmesh->xstart; outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } if (dst) { const BoutReal zlen = getUniform(coords->dz) * (localmesh->LocalNz - 3); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( @@ -169,14 +169,13 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Loop over X indices, including boundaries but not guard cells. (unless periodic // in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ix = xs; ix <= xe; ix++) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -191,7 +190,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int kz = 0; kz < nmode; kz++) { BoutReal kwave = kz * 2.0 * PI / (2. * zlen); // wave number is 1/[rad]; DST has extra 2. @@ -199,23 +198,22 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } - } // BOUT_OMP(parallel) + } // BOUT_OMP_PERF(parallel) // Solve tridiagonal systems cr_pcr_solver(a, b, c, bcmplx, xcmplx); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( localmesh->LocalNz); // ZFFT routine expects input of this length - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { for (int kz = 0; kz < nmode; kz++) { k1d[kz] = xcmplx(kz, ix - xs); @@ -233,7 +231,7 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { } } else { const BoutReal zlength = getUniform(coords->zlength()); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 @@ -241,14 +239,13 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Loop over X indices, including boundaries but not guard cells (unless periodic in // x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ix = xs; ix <= xe; ix++) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0[ix], localmesh->LocalNz, std::begin(k1d)); } else { @@ -263,31 +260,30 @@ FieldPerp LaplacePCR::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int kz = 0; kz < nmode; kz++) { BoutReal kwave = kz * 2.0 * PI / zlength; // wave number is 1/[rad] tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } - } // BOUT_OMP(parallel) + } // BOUT_OMP_PERF(parallel) // Solve tridiagonal systems cr_pcr_solver(a, b, c, bcmplx, xcmplx); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { if (zero_DC) { k1d[0] = 0.; @@ -327,13 +323,13 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // If the flags to assign that only one guard cell should be used is set inbndry = localmesh->xstart; outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -371,7 +367,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { if (dst) { const BoutReal zlen = getUniform(coords->dz) * (localmesh->LocalNz - 3); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( @@ -379,7 +375,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Loop over X and Y indices, including boundaries but not guard cells. // (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ind = 0; ind < nxny; ++ind) { // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -387,10 +383,9 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -405,7 +400,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nsys; ind++) { // ind = (iy - ys) * nmode + kz int iy = ys + ind / nmode; @@ -417,23 +412,22 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } - } // BOUT_OMP(parallel) + } // BOUT_OMP_PERF(parallel) // Solve tridiagonal systems cr_pcr_solver(a3D, b3D, c3D, bcmplx3D, xcmplx3D); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( localmesh->LocalNz); // ZFFT routine expects input of this length - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -455,7 +449,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { } } else { const BoutReal zlength = getUniform(coords->zlength()); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array(localmesh->LocalNz / 2 @@ -464,7 +458,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Loop over X and Y indices, including boundaries but not guard cells // (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ind = 0; ind < nxny; ++ind) { // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -472,10 +466,9 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d)); } else { @@ -490,7 +483,7 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nsys; ind++) { // ind = (iy - ys) * nmode + kz int iy = ys + ind / nmode; @@ -500,25 +493,24 @@ Field3D LaplacePCR::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } - } // BOUT_OMP(parallel) + } // BOUT_OMP_PERF(parallel) // Solve tridiagonal systems cr_pcr_solver(a3D, b3D, c3D, bcmplx3D, xcmplx3D); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y int ix = xs + ind / ny; int iy = ys + ind % ny; diff --git a/src/invert/laplace/impls/pcr/pcr.hxx b/src/invert/laplace/impls/pcr/pcr.hxx index 38b7c356d3..ec4637f56c 100644 --- a/src/invert/laplace/impls/pcr/pcr.hxx +++ b/src/invert/laplace/impls/pcr/pcr.hxx @@ -172,14 +172,6 @@ private: /// First and last interior points xstart, xend int xs, xe; - bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } - bool isInnerBoundaryFlagSet(int flag) const { - return (inner_boundary_flags & flag) != 0; - } - bool isOuterBoundaryFlagSet(int flag) const { - return (outer_boundary_flags & flag) != 0; - } - bool dst{false}; }; diff --git a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx index 925fb842ce..61c8f58694 100644 --- a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx +++ b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.cxx @@ -145,19 +145,19 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart; int outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } if (dst) { const BoutReal zlength = getUniform(coords->dz) * (localmesh->LocalNz - 3); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( @@ -165,14 +165,13 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Loop over X indices, including boundaries but not guard cells. (unless periodic // in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ix = xs; ix <= xe; ix++) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0[ix] + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -187,7 +186,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int kz = 0; kz < nmode; kz++) { // wave number is 1/[rad]; DST has extra 2. const BoutReal kwave = kz * 2.0 * PI / (2. * zlength); @@ -195,8 +194,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -205,13 +203,13 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { pcr_thomas_solver(a, b, c, bcmplx, xcmplx); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( localmesh->LocalNz); // ZFFT routine expects input of this length - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { for (int kz = 0; kz < nmode; kz++) { k1d[kz] = xcmplx(kz, ix - xs); @@ -229,7 +227,7 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { } } else { const BoutReal zlength = getUniform(coords->zlength()); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 @@ -237,14 +235,13 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Loop over X indices, including boundaries but not guard cells (unless periodic in // x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ix = xs; ix <= xe; ix++) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0[ix], localmesh->LocalNz, std::begin(k1d)); } else { @@ -259,14 +256,13 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int kz = 0; kz < nmode; kz++) { const BoutReal kwave = kz * 2.0 * PI / zlength; // wave number is 1/[rad] tridagMatrix(&a(kz, 0), &b(kz, 0), &c(kz, 0), &bcmplx(kz, 0), jy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -275,15 +271,15 @@ FieldPerp LaplacePCR_THOMAS::solve(const FieldPerp& rhs, const FieldPerp& x0) { pcr_thomas_solver(a, b, c, bcmplx, xcmplx); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ix = xs; ix <= xe; ix++) { if (zero_DC) { k1d[0] = 0.; @@ -323,13 +319,13 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // If the flags to assign that only one guard cell should be used is set int inbndry = localmesh->xstart; int outbndry = localmesh->xstart; - if (((global_flags & INVERT_BOTH_BNDRY_ONE) != 0) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if ((inner_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if ((outer_boundary_flags & INVERT_BNDRY_ONE) != 0) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -367,7 +363,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { if (dst) { const BoutReal zlength = getUniform(coords->dz) * (localmesh->LocalNz - 3); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( @@ -375,7 +371,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Loop over X and Y indices, including boundaries but not guard cells. // (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ind = 0; ind < nxny; ++ind) { // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -383,10 +379,9 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Take DST in Z direction and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary DST(x0(ix, iy) + 1, localmesh->LocalNz - 2, std::begin(k1d)); } else { @@ -401,7 +396,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nsys; ind++) { // ind = (iy - ys) * nmode + kz int iy = ys + ind / nmode; @@ -413,8 +408,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // wave number index kwave, // kwave (inverse wave length) - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -423,13 +417,13 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { pcr_thomas_solver(a3D, b3D, c3D, bcmplx3D, xcmplx3D); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array( localmesh->LocalNz); // ZFFT routine expects input of this length - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -451,7 +445,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { } } else { const BoutReal zlength = getUniform(coords->zlength()); - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array(localmesh->LocalNz / 2 @@ -460,7 +454,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Loop over X and Y indices, including boundaries but not guard cells // (unless periodic in x) - BOUT_OMP(for) + BOUT_OMP_PERF(for) for (int ind = 0; ind < nxny; ++ind) { // ind = (ix - xs)*(ye - ys + 1) + (iy - ys) int ix = xs + ind / ny; @@ -468,10 +462,9 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Take FFT in Z direction, apply shift, and put result in k1d - if (((ix < inbndry) && ((inner_boundary_flags & INVERT_SET) != 0) - && localmesh->firstX()) + if (((ix < inbndry) && isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) || ((localmesh->LocalNx - ix - 1 < outbndry) - && ((outer_boundary_flags & INVERT_SET) != 0) && localmesh->lastX())) { + && isOuterBoundaryFlagSetOnLastX(INVERT_SET))) { // Use the values in x0 in the boundary rfft(x0(ix, iy), localmesh->LocalNz, std::begin(k1d)); } else { @@ -486,7 +479,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { // Get elements of the tridiagonal matrix // including boundary conditions - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nsys; ind++) { // ind = (iy - ys) * nmode + kz int iy = ys + ind / nmode; @@ -497,8 +490,7 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { tridagMatrix(&a3D(ind, 0), &b3D(ind, 0), &c3D(ind, 0), &bcmplx3D(ind, 0), iy, kz, // True for the component constant (DC) in Z kwave, // Z wave number - global_flags, inner_boundary_flags, outer_boundary_flags, &Acoef, - &C1coef, &C2coef, &Dcoef, + &Acoef, &C1coef, &C2coef, &Dcoef, false); // Don't include guard cells in arrays } } @@ -507,15 +499,15 @@ Field3D LaplacePCR_THOMAS::solve(const Field3D& rhs, const Field3D& x0) { pcr_thomas_solver(a3D, b3D, c3D, bcmplx3D, xcmplx3D); // FFT back to real space - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { /// Create a local thread-scope working array auto k1d = Array((localmesh->LocalNz) / 2 + 1); // ZFFT routine expects input of this length - const bool zero_DC = (global_flags & INVERT_ZERO_DC) != 0; + const bool zero_DC = isGlobalFlagSet(INVERT_ZERO_DC); - BOUT_OMP(for nowait) + BOUT_OMP_PERF(for nowait) for (int ind = 0; ind < nxny; ++ind) { // Loop over X and Y int ix = xs + ind / ny; int iy = ys + ind % ny; diff --git a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx index 009a1def2b..e12a647789 100644 --- a/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx +++ b/src/invert/laplace/impls/pcr_thomas/pcr_thomas.hxx @@ -175,14 +175,6 @@ private: /// First and last interior points xstart, xend int xs, xe; - bool isGlobalFlagSet(int flag) const { return (global_flags & flag) != 0; } - bool isInnerBoundaryFlagSet(int flag) const { - return (inner_boundary_flags & flag) != 0; - } - bool isOuterBoundaryFlagSet(int flag) const { - return (outer_boundary_flags & flag) != 0; - } - bool dst{false}; }; diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.cxx b/src/invert/laplace/impls/petsc/petsc_laplace.cxx index d125b90694..f06f4c7de6 100644 --- a/src/invert/laplace/impls/petsc/petsc_laplace.cxx +++ b/src/invert/laplace/impls/petsc/petsc_laplace.cxx @@ -23,7 +23,8 @@ * along with BOUT++. If not, see . * **************************************************************************/ -#include "bout/build_config.hxx" + +#include "bout/build_defines.hxx" #if BOUT_HAS_PETSC @@ -32,6 +33,8 @@ #include #include #include +#include +#include #include #include @@ -49,14 +52,13 @@ #define KSP_PREONLY "preonly" static PetscErrorCode laplacePCapply(PC pc, Vec x, Vec y) { - int ierr; + PetscFunctionBegin; // NOLINT - // Get the context - LaplacePetsc* s; - ierr = PCShellGetContext(pc, reinterpret_cast(&s)); + LaplacePetsc* laplace = nullptr; + const int ierr = PCShellGetContext(pc, reinterpret_cast(&laplace)); // NOLINT CHKERRQ(ierr); - PetscFunctionReturn(s->precon(x, y)); + PetscFunctionReturn(laplace->precon(x, y)); // NOLINT } LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in, @@ -79,28 +81,9 @@ LaplacePetsc::LaplacePetsc(Options* opt, const CELL_LOC loc, Mesh* mesh_in, } #if CHECK > 0 - // These are the implemented flags - implemented_flags = INVERT_START_NEW; - implemented_boundary_flags = INVERT_AC_GRAD + INVERT_SET + INVERT_RHS; // Checking flags are set to something which is not implemented - // This is done binary (which is possible as each flag is a power of 2) - if (global_flags & ~implemented_flags) { - if (global_flags & INVERT_4TH_ORDER) { - output << "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " - "setting INVERT_4TH_ORDER flag" - << endl; - } - throw BoutException("Attempted to set Laplacian inversion flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (inner_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (outer_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } + checkFlags(); + if (localmesh->periodicX) { throw BoutException("LaplacePetsc does not work with periodicity in the x direction " "(localmesh->PeriodicX == true). Change boundary conditions or " @@ -360,25 +343,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { ASSERT1(x0.getLocation() == location); #if CHECK > 0 - // Checking flags are set to something which is not implemented (see - // constructor for details) - if (global_flags & !implemented_flags) { - if (global_flags & INVERT_4TH_ORDER) { - output << "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " - "setting INVERT_4TH_ORDER flag" - << endl; - } - throw BoutException("Attempted to set Laplacian inversion flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (inner_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } - if (outer_boundary_flags & ~implemented_boundary_flags) { - throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " - "implemented in petsc_laplace.cxx"); - } + checkFlags(); #endif int y = b.getIndex(); // Get the Y index @@ -415,7 +380,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { for (int z = 0; z < localmesh->LocalNz; z++) { PetscScalar val; // Value of element to be set in the matrix // If Neumann Boundary Conditions are set. - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Set values corresponding to nodes adjacent in x if (fourth_order) { // Fourth Order Accuracy on Boundary @@ -472,9 +437,9 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { // Set Components of RHS // If the inner boundary value should be set by b or x0 - if (inner_boundary_flags & INVERT_RHS) { + if (isInnerBoundaryFlagSet(INVERT_RHS)) { val = b[x][z]; - } else if (inner_boundary_flags & INVERT_SET) { + } else if (isInnerBoundaryFlagSet(INVERT_SET)) { val = x0[x][z]; } @@ -680,7 +645,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { Element(i, x, z, 0, 0, val, MatA); // If Neumann Boundary Conditions are set. - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Set values corresponding to nodes adjacent in x if (fourth_order) { // Fourth Order Accuracy on Boundary @@ -733,9 +698,9 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { // Set Components of RHS // If the inner boundary value should be set by b or x0 val = 0; - if (outer_boundary_flags & INVERT_RHS) { + if (isOuterBoundaryFlagSet(INVERT_RHS)) { val = b[x][z]; - } else if (outer_boundary_flags & INVERT_SET) { + } else if (isOuterBoundaryFlagSet(INVERT_SET)) { val = x0[x][z]; } @@ -812,7 +777,7 @@ FieldPerp LaplacePetsc::solve(const FieldPerp& b, const FieldPerp& x0) { KSPSetTolerances(ksp, rtol, atol, dtol, maxits); // If the initial guess is not set to zero - if (!(global_flags & INVERT_START_NEW)) { + if (!isGlobalFlagSet(INVERT_START_NEW)) { KSPSetInitialGuessNonzero(ksp, static_cast(true)); } @@ -1194,4 +1159,24 @@ int LaplacePetsc::precon(Vec x, Vec y) { return 0; } +void LaplacePetsc::checkFlags() { + if (isGlobalFlagSet(~implemented_flags)) { + if (isGlobalFlagSet(INVERT_4TH_ORDER)) { + output_error.write( + "For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " + "setting INVERT_4TH_ORDER flag\n"); + } + throw BoutException("Attempted to set Laplacian inversion flag that is not " + "implemented in petsc_laplace.cxx"); + } + if (isInnerBoundaryFlagSet(~implemented_boundary_flags)) { + throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " + "implemented in petsc_laplace.cxx"); + } + if (isOuterBoundaryFlagSet(~implemented_boundary_flags)) { + throw BoutException("Attempted to set Laplacian inversion boundary flag that is not " + "implemented in petsc_laplace.cxx"); + } +} + #endif // BOUT_HAS_PETSC_3_3 diff --git a/src/invert/laplace/impls/petsc/petsc_laplace.hxx b/src/invert/laplace/impls/petsc/petsc_laplace.hxx index 3b1d3bcb49..55482644be 100644 --- a/src/invert/laplace/impls/petsc/petsc_laplace.hxx +++ b/src/invert/laplace/impls/petsc/petsc_laplace.hxx @@ -26,8 +26,8 @@ * **************************************************************************/ -#ifndef __PETSC_LAPLACE_H__ -#define __PETSC_LAPLACE_H__ +#ifndef BOUT_PETSC_LAPLACE_H +#define BOUT_PETSC_LAPLACE_H #include "bout/build_config.hxx" #include "bout/invert_laplace.hxx" @@ -254,12 +254,13 @@ private: void vecToField(Vec x, FieldPerp& f); // Copy a vector into a fieldperp void fieldToVec(const FieldPerp& f, Vec x); // Copy a fieldperp into a vector -#if CHECK > 0 - int implemented_flags; - int implemented_boundary_flags; -#endif + static constexpr int implemented_flags = INVERT_START_NEW; + static constexpr int implemented_boundary_flags = + INVERT_AC_GRAD | INVERT_SET | INVERT_RHS; + + void checkFlags(); }; #endif //BOUT_HAS_PETSC -#endif //__PETSC_LAPLACE_H__ +#endif //BOUT_PETSC_LAPLACE_H diff --git a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx index d1e2207725..a7bfd209ee 100644 --- a/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx +++ b/src/invert/laplace/impls/petsc3damg/petsc3damg.cxx @@ -84,12 +84,12 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes #if CHECK > 0 // Checking flags are set to something which is not implemented // This is done binary (which is possible as each flag is a power of 2) - if (flagSet(global_flags, INVERT_4TH_ORDER)) { + if (isGlobalFlagSet(INVERT_4TH_ORDER)) { output.write("For PETSc based Laplacian inverter, use 'fourth_order=true' instead of " "setting INVERT_4TH_ORDER flag\n"); } - if (flagSet(global_flags, ~implemented_flags)) { + if (isGlobalFlagSet(~implemented_flags)) { throw BoutException("Attempted to set global Laplacian inversion flag that is not " "implemented in petsc_laplace.cxx"); } @@ -102,8 +102,8 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes name); } }; - unimplementedBoundaryFlag(inner_boundary_flags, "inner"); - unimplementedBoundaryFlag(outer_boundary_flags, "outer"); + unimplementedBoundaryFlag(getInnerBoundaryFlags(), "inner"); + unimplementedBoundaryFlag(getOuterBoundaryFlags(), "outer"); unimplementedBoundaryFlag(lower_boundary_flags, "lower"); unimplementedBoundaryFlag(upper_boundary_flags, "upper"); @@ -119,7 +119,7 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes } // Set up boundary conditions in operator - const bool inner_X_neumann = flagSet(inner_boundary_flags, INVERT_AC_GRAD); + const bool inner_X_neumann = isInnerBoundaryFlagSet(INVERT_AC_GRAD); const auto inner_X_BC = inner_X_neumann ? -1. / coords->dx / sqrt(coords->g_11) : 0.5; const auto inner_X_BC_plus = inner_X_neumann ? -inner_X_BC : 0.5; @@ -128,7 +128,7 @@ LaplacePetsc3dAmg::LaplacePetsc3dAmg(Options* opt, const CELL_LOC loc, Mesh* mes operator3D(i, i.xp()) = inner_X_BC_plus[i]; } - const bool outer_X_neumann = flagSet(outer_boundary_flags, INVERT_AC_GRAD); + const bool outer_X_neumann = isOuterBoundaryFlagSet(INVERT_AC_GRAD); const auto outer_X_BC = outer_X_neumann ? 1. / coords->dx / sqrt(coords->g_11) : 0.5; const auto outer_X_BC_minus = outer_X_neumann ? -outer_X_BC : 0.5; @@ -191,8 +191,8 @@ Field3D LaplacePetsc3dAmg::solve(const Field3D& b_in, const Field3D& x0) { // Adjust vectors to represent boundary conditions and check that // boundary cells are finite - setBC(rhs, b_in, indexer->getRegionInnerX(), inner_boundary_flags, x0); - setBC(rhs, b_in, indexer->getRegionOuterX(), outer_boundary_flags, x0); + setBC(rhs, b_in, indexer->getRegionInnerX(), getInnerBoundaryFlags(), x0); + setBC(rhs, b_in, indexer->getRegionOuterX(), getOuterBoundaryFlags(), x0); setBC(rhs, b_in, indexer->getRegionLowerY(), lower_boundary_flags, x0); setBC(rhs, b_in, indexer->getRegionUpperY(), upper_boundary_flags, x0); @@ -460,7 +460,7 @@ void LaplacePetsc3dAmg::updateMatrix3D() { KSPSetTolerances(ksp, rtol, atol, dtol, maxits); // If the initial guess is not set to zero - if ((global_flags & INVERT_START_NEW) == 0) { + if (!isGlobalFlagSet(INVERT_START_NEW)) { KSPSetInitialGuessNonzero(ksp, (PetscBool) true); } diff --git a/src/invert/laplace/impls/petsc3damg/petsc3damg.hxx b/src/invert/laplace/impls/petsc3damg/petsc3damg.hxx index 99a04bd2dd..456b85b5e6 100644 --- a/src/invert/laplace/impls/petsc3damg/petsc3damg.hxx +++ b/src/invert/laplace/impls/petsc3damg/petsc3damg.hxx @@ -27,8 +27,8 @@ **************************************************************************/ class LaplacePetsc3dAmg; -#ifndef __PETSC_LAPLACE_3DAMG_H__ -#define __PETSC_LAPLACE_3DAMG_H__ +#ifndef BOUT_PETSC_LAPLACE_3DAMG_H +#define BOUT_PETSC_LAPLACE_3DAMG_H #include "bout/build_config.hxx" #include "bout/invert_laplace.hxx" @@ -228,4 +228,4 @@ private: #endif //BOUT_HAS_PETSC -#endif //__PETSC_LAPLACE_3DAMG_H__ +#endif //BOUT_PETSC_LAPLACE_3DAMG_H diff --git a/src/invert/laplace/impls/serial_band/serial_band.cxx b/src/invert/laplace/impls/serial_band/serial_band.cxx index 955e9a7ed1..4e7bb4c63f 100644 --- a/src/invert/laplace/impls/serial_band/serial_band.cxx +++ b/src/invert/laplace/impls/serial_band/serial_band.cxx @@ -99,16 +99,16 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { int xbndry = localmesh->xstart; // Width of the x boundary // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { xbndry = 1; } - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int ix = 0; ix < localmesh->LocalNx; ix++) { // for fixed ix,jy set a complex vector rho(z) - if (((ix < xbndry) && (inner_boundary_flags & INVERT_SET)) - || ((ncx - ix < xbndry) && (outer_boundary_flags & INVERT_SET))) { + if (((ix < xbndry) && isInnerBoundaryFlagSet(INVERT_SET)) + || ((ncx - ix < xbndry) && (isOuterBoundaryFlagSet(INVERT_SET)))) { // Use the values in x0 in the boundary rfft(x0[ix], ncz, &bk(ix, 0)); } else { @@ -247,10 +247,10 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { for (int ix = 0; ix < xbndry; ix++) { // Set zero-value. Change to zero-gradient if needed - if (!(inner_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isInnerBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { bk1d[ix] = 0.0; } - if (!(outer_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isOuterBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { bk1d[ncx - ix] = 0.0; } @@ -265,8 +265,8 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // DC // Inner boundary - if (inner_boundary_flags & (INVERT_DC_GRAD + INVERT_SET) - || inner_boundary_flags & (INVERT_DC_GRAD + INVERT_RHS)) { + if (isInnerBoundaryFlagSet(INVERT_DC_GRAD + INVERT_SET) + || isInnerBoundaryFlagSet(INVERT_DC_GRAD + INVERT_RHS)) { // Zero gradient at inner boundary. 2nd-order accurate // Boundary at midpoint for (int ix = 0; ix < xbndry; ix++) { @@ -277,7 +277,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 4) = 0.; } - } else if (inner_boundary_flags & INVERT_DC_GRAD) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at inner boundary. 2nd-order accurate // Boundary at midpoint for (int ix = 0; ix < xbndry; ix++) { @@ -288,7 +288,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 4) = 0.; } - } else if (inner_boundary_flags & INVERT_DC_GRADPAR) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPAR)) { for (int ix = 0; ix < xbndry; ix++) { A(ix, 0) = 0.; A(ix, 1) = 0.; @@ -296,7 +296,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 3) = 4. / sqrt(coords->g_22(ix + 1, jy)); A(ix, 4) = -1. / sqrt(coords->g_22(ix + 2, jy)); } - } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPARINV)) { for (int ix = 0; ix < xbndry; ix++) { A(ix, 0) = 0.; A(ix, 1) = 0.; @@ -304,7 +304,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { A(ix, 3) = 4. * sqrt(coords->g_22(ix + 1, jy)); A(ix, 4) = -sqrt(coords->g_22(ix + 2, jy)); } - } else if (inner_boundary_flags & INVERT_DC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_LAP)) { for (int ix = 0; ix < xbndry; ix++) { A(ix, 0) = 0.; A(ix, 1) = 0.; @@ -315,7 +315,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { } // Outer boundary - if (outer_boundary_flags & INVERT_DC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < xbndry; ix++) { A(ncx - ix, 1) = -1.0; @@ -326,12 +326,12 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // AC // Inner boundarySQ(kwave)*coef2 - if (inner_boundary_flags & INVERT_AC_GRAD) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at inner boundary for (int ix = 0; ix < xbndry; ix++) { A(ix, 3) = -1.0; } - } else if (inner_boundary_flags & INVERT_AC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_AC_LAP)) { // Enforce zero laplacian for 2nd and 4th-order int ix = 1; @@ -369,12 +369,12 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { } // Outer boundary - if (outer_boundary_flags & INVERT_AC_GRAD) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < xbndry; ix++) { A(ncx - ix, 1) = -1.0; } - } else if (outer_boundary_flags & INVERT_AC_LAP) { + } else if (isOuterBoundaryFlagSet(INVERT_AC_LAP)) { // Enforce zero laplacian for 2nd and 4th-order // NOTE: Currently ignoring XZ term and coef4 assumed zero on boundary // FIX THIS IF IT WORKS @@ -417,7 +417,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // Perform inversion cband_solve(A, localmesh->LocalNx, 2, 2, bk1d); - if ((global_flags & INVERT_KX_ZERO) && (iz == 0)) { + if (isGlobalFlagSet(INVERT_KX_ZERO) && (iz == 0)) { // Set the Kx = 0, n = 0 component to zero. For now just subtract // Should do in the inversion e.g. Sherman-Morrison formula @@ -440,7 +440,7 @@ FieldPerp LaplaceSerialBand::solve(const FieldPerp& b, const FieldPerp& x0) { // Done inversion, transform back for (int ix = 0; ix <= ncx; ix++) { - if (global_flags & INVERT_ZERO_DC) { + if (isGlobalFlagSet(INVERT_ZERO_DC)) { xk(ix, 0) = 0.0; } diff --git a/src/invert/laplace/impls/serial_band/serial_band.hxx b/src/invert/laplace/impls/serial_band/serial_band.hxx index 186e716a95..d1f0fc7c65 100644 --- a/src/invert/laplace/impls/serial_band/serial_band.hxx +++ b/src/invert/laplace/impls/serial_band/serial_band.hxx @@ -26,8 +26,8 @@ class LaplaceSerialBand; -#ifndef __SERIAL_BAND_H__ -#define __SERIAL_BAND_H__ +#ifndef BOUT_SERIAL_BAND_H +#define BOUT_SERIAL_BAND_H #include "bout/build_config.hxx" #include "bout/invert_laplace.hxx" @@ -95,4 +95,4 @@ private: #endif // BOUT_USE_METRIC_3D -#endif // __SERIAL_BAND_H__ +#endif // BOUT_SERIAL_BAND_H diff --git a/src/invert/laplace/impls/serial_tri/serial_tri.cxx b/src/invert/laplace/impls/serial_tri/serial_tri.cxx index e76650c751..f46a0a46e5 100644 --- a/src/invert/laplace/impls/serial_tri/serial_tri.cxx +++ b/src/invert/laplace/impls/serial_tri/serial_tri.cxx @@ -91,13 +91,13 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { int inbndry = localmesh->xstart, outbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if (inner_boundary_flags & INVERT_BNDRY_ONE) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if (outer_boundary_flags & INVERT_BNDRY_ONE) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -133,15 +133,15 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { auto bvec = Array(ncx); auto cvec = Array(ncx); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int ix = 0; ix < ncx; ix++) { /* This for loop will set the bk (initialized by the constructor) * bk is the z fourier modes of b in z * If the INVERT_SET flag is set (meaning that x0 will be used to set the * bounadry values), */ - if (((ix < inbndry) && (inner_boundary_flags & INVERT_SET)) - || ((ncx - 1 - ix < outbndry) && (outer_boundary_flags & INVERT_SET))) { + if (((ix < inbndry) && isInnerBoundaryFlagSet(INVERT_SET)) + || ((ncx - 1 - ix < outbndry) && (isOuterBoundaryFlagSet(INVERT_SET)))) { // Use the values in x0 in the boundary // x0 is the input @@ -185,8 +185,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { kz, // wave number (different from kz only if we are taking a part // of the z-domain [and not from 0 to 2*pi]) - kz * kwaveFactor, global_flags, inner_boundary_flags, - outer_boundary_flags, &A, &C, &D); + kz * kwaveFactor, &A, &C, &D); ///////// PERFORM INVERSION ///////// if (!localmesh->periodicX) { @@ -208,7 +207,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { } // If the global flag is set to INVERT_KX_ZERO - if ((global_flags & INVERT_KX_ZERO) && (kz == 0)) { + if (isGlobalFlagSet(INVERT_KX_ZERO) && (kz == 0)) { dcomplex offset(0.0); for (int ix = localmesh->xstart; ix <= localmesh->xend; ix++) { offset += xk1d[ix]; @@ -228,7 +227,7 @@ FieldPerp LaplaceSerialTri::solve(const FieldPerp& b, const FieldPerp& x0) { // Done inversion, transform back for (int ix = 0; ix < ncx; ix++) { - if (global_flags & INVERT_ZERO_DC) { + if (isGlobalFlagSet(INVERT_ZERO_DC)) { xk(ix, 0) = 0.0; } diff --git a/src/invert/laplace/impls/serial_tri/serial_tri.hxx b/src/invert/laplace/impls/serial_tri/serial_tri.hxx index 05fa375de7..5b0419fa27 100644 --- a/src/invert/laplace/impls/serial_tri/serial_tri.hxx +++ b/src/invert/laplace/impls/serial_tri/serial_tri.hxx @@ -26,8 +26,8 @@ class LaplaceSerialTri; -#ifndef __SERIAL_TRI_H__ -#define __SERIAL_TRI_H__ +#ifndef BOUT_SERIAL_TRI_H +#define BOUT_SERIAL_TRI_H #include #include @@ -80,4 +80,4 @@ private: Field2D A, C, D; }; -#endif // __SERIAL_TRI_H__ +#endif // BOUT_SERIAL_TRI_H diff --git a/src/invert/laplace/impls/spt/spt.cxx b/src/invert/laplace/impls/spt/spt.cxx index 92959e1194..2e4c844c94 100644 --- a/src/invert/laplace/impls/spt/spt.cxx +++ b/src/invert/laplace/impls/spt/spt.cxx @@ -65,10 +65,9 @@ LaplaceSPT::LaplaceSPT(Options* opt, const CELL_LOC loc, Mesh* mesh_in, ye = localmesh->LocalNy - 1; // Contains upper boundary } - alldata = new SPT_data[ye - ys + 1]; - alldata -= ys; // Re-number indices to start at ys + alldata.reallocate(ye - ys + 1); for (int jy = ys; jy <= ye; jy++) { - alldata[jy].comm_tag = SPT_DATA + jy; // Give each one a different tag + alldata[jy - ys].comm_tag = SPT_DATA + jy; // Give each one a different tag } // Temporary array for taking FFTs @@ -76,11 +75,6 @@ LaplaceSPT::LaplaceSPT(Options* opt, const CELL_LOC loc, Mesh* mesh_in, dc1d.reallocate(ncz / 2 + 1); } -LaplaceSPT::~LaplaceSPT() { - alldata += ys; // Return to index from 0 - delete[] alldata; -} - FieldPerp LaplaceSPT::solve(const FieldPerp& b) { return solve(b, b); } FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) { @@ -90,15 +84,15 @@ FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) { FieldPerp x{emptyFrom(b)}; - if ((inner_boundary_flags & INVERT_SET) || (outer_boundary_flags & INVERT_SET)) { + if (isInnerBoundaryFlagSet(INVERT_SET) || isOuterBoundaryFlagSet(INVERT_SET)) { FieldPerp bs = copy(b); int xbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { xbndry = 1; } - if ((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) { + if (isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) { // Copy x0 inner boundary into bs for (int ix = 0; ix < xbndry; ix++) { for (int iz = 0; iz < localmesh->LocalNz; iz++) { @@ -106,7 +100,7 @@ FieldPerp LaplaceSPT::solve(const FieldPerp& b, const FieldPerp& x0) { } } } - if ((outer_boundary_flags & INVERT_SET) && localmesh->lastX()) { + if (isOuterBoundaryFlagSetOnLastX(INVERT_SET)) { // Copy x0 outer boundary into bs for (int ix = localmesh->LocalNx - 1; ix >= localmesh->LocalNx - xbndry; ix--) { for (int iz = 0; iz < localmesh->LocalNz; iz++) { @@ -141,29 +135,29 @@ Field3D LaplaceSPT::solve(const Field3D& b) { for (int jy = ys; jy <= ye; jy++) { // And start another one going - start(sliceXZ(b, jy), alldata[jy]); + start(sliceXZ(b, jy), alldata[jy - ys]); // Move each calculation along one processor for (int jy2 = ys; jy2 < jy; jy2++) { - next(alldata[jy2]); + next(alldata[jy2 - ys]); } } bool running = true; - do { + while (running) { // Move each calculation along until the last one is finished - for (int jy = ys; jy <= ye; jy++) { - running = next(alldata[jy]) == 0; + for (auto& data : alldata) { + running = next(data) == 0; } - } while (running); + } FieldPerp xperp(localmesh); xperp.setLocation(location); xperp.allocate(); // All calculations finished. Get result - for (int jy = ys; jy <= ye; jy++) { - finish(alldata[jy], xperp); + for (auto& data : alldata) { + finish(data, xperp); x = xperp; } @@ -173,17 +167,17 @@ Field3D LaplaceSPT::solve(const Field3D& b) { Field3D LaplaceSPT::solve(const Field3D& b, const Field3D& x0) { ASSERT1(localmesh == b.getMesh() && localmesh == x0.getMesh()); - if (((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) - || ((outer_boundary_flags & INVERT_SET) && localmesh->lastX())) { + if ((isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) + || isOuterBoundaryFlagSetOnLastX(INVERT_SET)) { Field3D bs = copy(b); int xbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { xbndry = 1; } - if ((inner_boundary_flags & INVERT_SET) && localmesh->firstX()) { + if (isInnerBoundaryFlagSetOnFirstX(INVERT_SET)) { // Copy x0 inner boundary into bs for (int ix = 0; ix < xbndry; ix++) { for (int iy = 0; iy < localmesh->LocalNy; iy++) { @@ -193,7 +187,7 @@ Field3D LaplaceSPT::solve(const Field3D& b, const Field3D& x0) { } } } - if ((outer_boundary_flags & INVERT_SET) && localmesh->lastX()) { + if (isOuterBoundaryFlagSetOnLastX(INVERT_SET)) { // Copy x0 outer boundary into bs for (int ix = localmesh->LocalNx - 1; ix >= localmesh->LocalNx - xbndry; ix--) { for (int iy = 0; iy < localmesh->LocalNy; iy++) { @@ -323,15 +317,14 @@ int LaplaceSPT::start(const FieldPerp& b, SPT_data& data) { /// Set matrix elements for (int kz = 0; kz <= maxmode; kz++) { tridagMatrix(&data.avec(kz, 0), &data.bvec(kz, 0), &data.cvec(kz, 0), &data.bk(kz, 0), - data.jy, kz, kz * kwaveFactor, global_flags, inner_boundary_flags, - outer_boundary_flags, &Acoef, &Ccoef, &Dcoef); + data.jy, kz, kz * kwaveFactor, &Acoef, &Ccoef, &Dcoef); } data.proc = 0; //< Starts at processor 0 data.dir = 1; if (localmesh->firstX()) { - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int kz = 0; kz <= maxmode; kz++) { dcomplex bet, u0; // Start tridiagonal solve @@ -382,7 +375,7 @@ int LaplaceSPT::next(SPT_data& data) { if (localmesh->lastX()) { // Last processor, turn-around - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int kz = 0; kz <= maxmode; kz++) { dcomplex bet, u0; dcomplex gp, up; @@ -409,7 +402,7 @@ int LaplaceSPT::next(SPT_data& data) { } else if (data.dir > 0) { // In the middle of X, forward direction - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int kz = 0; kz <= maxmode; kz++) { dcomplex bet, u0; bet = dcomplex(data.buffer[4 * kz], data.buffer[4 * kz + 1]); @@ -429,7 +422,7 @@ int LaplaceSPT::next(SPT_data& data) { } else if (localmesh->firstX()) { // Back to the start - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int kz = 0; kz <= maxmode; kz++) { dcomplex gp, up; gp = dcomplex(data.buffer[4 * kz], data.buffer[4 * kz + 1]); @@ -441,7 +434,7 @@ int LaplaceSPT::next(SPT_data& data) { } else { // Middle of X, back-substitution stage - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int kz = 0; kz <= maxmode; kz++) { dcomplex gp = dcomplex(data.buffer[4 * kz], data.buffer[4 * kz + 1]); dcomplex up = dcomplex(data.buffer[4 * kz + 2], data.buffer[4 * kz + 3]); @@ -516,7 +509,7 @@ void LaplaceSPT::finish(SPT_data& data, FieldPerp& x) { dc1d[kz] = 0.0; } - if (global_flags & INVERT_ZERO_DC) { + if (isGlobalFlagSet(INVERT_ZERO_DC)) { dc1d[0] = 0.0; } diff --git a/src/invert/laplace/impls/spt/spt.hxx b/src/invert/laplace/impls/spt/spt.hxx index 27e9c8100c..a9d5b2583f 100644 --- a/src/invert/laplace/impls/spt/spt.hxx +++ b/src/invert/laplace/impls/spt/spt.hxx @@ -38,8 +38,8 @@ class LaplaceSPT; -#ifndef __SPT_H__ -#define __SPT_H__ +#ifndef BOUT_SPT_H +#define BOUT_SPT_H #include #include @@ -69,7 +69,6 @@ class LaplaceSPT : public Laplacian { public: LaplaceSPT(Options* opt = nullptr, const CELL_LOC = CELL_CENTRE, Mesh* mesh_in = nullptr, Solver* solver = nullptr); - ~LaplaceSPT(); using Laplacian::setCoefA; void setCoefA(const Field2D& val) override { @@ -106,17 +105,15 @@ public: Field3D solve(const Field3D& b, const Field3D& x0) override; private: - enum { SPT_DATA = 1123 }; ///< 'magic' number for SPT MPI messages + constexpr static int SPT_DATA = 1123; ///< 'magic' number for SPT MPI messages Field2D Acoef, Ccoef, Dcoef; /// Data structure for SPT algorithm struct SPT_data { - SPT_data() : comm_tag(SPT_DATA) {} void allocate(int mm, int nx); // Allocates memory - ~SPT_data(){}; // Free memory - int jy; ///< Y index + int jy = 0; ///< Y index Matrix bk; ///< b vector in Fourier space Matrix xk; @@ -125,19 +122,19 @@ private: Matrix avec, bvec, cvec; ///< Diagonal bands of matrix - int proc; // Which processor has this reached? - int dir; // Which direction is it going? + int proc = 0; // Which processor has this reached? + int dir = 1; // Which direction is it going? - comm_handle recv_handle; // Handle for receives + comm_handle recv_handle = nullptr; // Handle for receives - int comm_tag; // Tag for communication + int comm_tag = SPT_DATA; // Tag for communication Array buffer; }; int ys, ye; // Range of Y indices SPT_data slicedata; // Used to solve for a single FieldPerp - SPT_data* alldata; // Used to solve a Field3D + Array alldata; // Used to solve a Field3D Array dc1d; ///< 1D in Z for taking FFTs @@ -159,4 +156,4 @@ namespace { RegisterLaplace registerlaplacespt(LAPLACE_SPT); } // namespace -#endif // __SPT_H__ +#endif // BOUT_SPT_H diff --git a/src/invert/laplace/invert_laplace.cxx b/src/invert/laplace/invert_laplace.cxx index 505b04cc4f..4032499781 100644 --- a/src/invert/laplace/invert_laplace.cxx +++ b/src/invert/laplace/invert_laplace.cxx @@ -424,20 +424,16 @@ void Laplacian::tridagCoefs(int jx, int jy, BoutReal kwave, dcomplex& a, dcomple #if BOUT_USE_METRIC_3D void Laplacian::tridagMatrix(dcomplex* /*avec*/, dcomplex* /*bvec*/, dcomplex* /*cvec*/, dcomplex* /*bk*/, int /*jy*/, int /*kz*/, BoutReal /*kwave*/, - int /*global_flags*/, int /*inner_boundary_flags*/, - int /*outer_boundary_flags*/, const Field2D* /*a*/, - const Field2D* /*c1coef*/, const Field2D* /*c2coef*/, - const Field2D* /*d*/, bool /*includeguards*/, - bool /*zperiodic*/) { + const Field2D* /*a*/, const Field2D* /*c1coef*/, + const Field2D* /*c2coef*/, const Field2D* /*d*/, + bool /*includeguards*/, bool /*zperiodic*/) { throw BoutException("Error: tridagMatrix does not yet work with 3D metric."); } #else void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dcomplex* bk, - int jy, int kz, BoutReal kwave, int global_flags, - int inner_boundary_flags, int outer_boundary_flags, - const Field2D* a, const Field2D* c1coef, - const Field2D* c2coef, const Field2D* d, bool includeguards, - bool zperiodic) { + int jy, int kz, BoutReal kwave, const Field2D* a, + const Field2D* c1coef, const Field2D* c2coef, + const Field2D* d, bool includeguards, bool zperiodic) { ASSERT1(a->getLocation() == location); ASSERT1(c1coef->getLocation() == location); ASSERT1(c2coef->getLocation() == location); @@ -469,13 +465,13 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco int inbndry = localmesh->xstart, outbndry = localmesh->xstart; // If the flags to assign that only one guard cell should be used is set - if ((global_flags & INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { + if (isGlobalFlagSet(INVERT_BOTH_BNDRY_ONE) || (localmesh->xstart < 2)) { inbndry = outbndry = 1; } - if (inner_boundary_flags & INVERT_BNDRY_ONE) { + if (isInnerBoundaryFlagSet(INVERT_BNDRY_ONE)) { inbndry = 1; } - if (outer_boundary_flags & INVERT_BNDRY_ONE) { + if (isOuterBoundaryFlagSet(INVERT_BNDRY_ONE)) { outbndry = 1; } @@ -497,7 +493,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // If no user specified value is set on inner boundary, set the first // element in b (in the equation AX=b) to 0 - if (!(inner_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isInnerBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { for (int ix = 0; ix < inbndry; ix++) { bk[ix] = 0.; } @@ -506,34 +502,35 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // DC i.e. kz = 0 (the offset mode) if (kz == 0) { - if (inner_boundary_flags & INVERT_DC_GRAD - && (inner_boundary_flags & INVERT_SET || inner_boundary_flags & INVERT_RHS)) { + if (isInnerBoundaryFlagSet(INVERT_DC_GRAD) + && (isInnerBoundaryFlagSet(INVERT_SET) + || isInnerBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.; bvec[ix] = -1. / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); cvec[ix] = 1. / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); } - } else if (inner_boundary_flags & INVERT_DC_GRAD) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.; bvec[ix] = -1.; cvec[ix] = 1.; } - } else if (inner_boundary_flags & INVERT_DC_GRADPAR) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPAR)) { for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.0; bvec[ix] = 1.0 / sqrt(coords->g_22(ix, jy)); cvec[ix] = -1.0 / sqrt(coords->g_22(ix + 1, jy)); } - } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_GRADPARINV)) { for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.0; bvec[ix] = sqrt(coords->g_22(ix, jy)); cvec[ix] = -sqrt(coords->g_22(ix + 1, jy)); } - } else if (inner_boundary_flags & INVERT_DC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_DC_LAP)) { // Decaying boundary conditions BoutReal k = 0.0; if (a != nullptr) { @@ -548,7 +545,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco bvec[ix] = 1.; cvec[ix] = -exp(-k * coords->dx(ix, jy) / sqrt(coords->g11(ix, jy))); } - } else if (inner_boundary_flags & INVERT_IN_CYLINDER) { + } else if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER)) { // Condition for inner radial boundary for cylindrical coordinates /* Explanation: * The discrete fourier transform is defined as @@ -602,8 +599,9 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // AC i.e. kz =/= 0 (all other modes than the offset mode) else { - if (inner_boundary_flags & INVERT_AC_GRAD - && (inner_boundary_flags & INVERT_SET || inner_boundary_flags & INVERT_RHS)) { + if (isInnerBoundaryFlagSet(INVERT_AC_GRAD) + && (isInnerBoundaryFlagSet(INVERT_SET) + || isInnerBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = dcomplex(0., 0.); @@ -611,14 +609,14 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco dcomplex(-1., 0.) / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); cvec[ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ix, jy)) / coords->dx(ix, jy); } - } else if (inner_boundary_flags & INVERT_AC_GRAD) { + } else if (isInnerBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at inner boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = dcomplex(0., 0.); bvec[ix] = dcomplex(-1., 0.); cvec[ix] = dcomplex(1., 0.); } - } else if (inner_boundary_flags & INVERT_AC_LAP) { + } else if (isInnerBoundaryFlagSet(INVERT_AC_LAP)) { // Use decaying zero-Laplacian solution in the boundary for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.0; @@ -626,9 +624,9 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco cvec[ix] = -exp(-1.0 * sqrt(coords->g33(ix, jy) / coords->g11(ix, jy)) * kwave * coords->dx(ix, jy)); } - } else if (inner_boundary_flags & INVERT_IN_CYLINDER) { + } else if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER)) { // Condition for inner radial boundary for cylindrical coordinates - // Explanation under "if (inner_boundary_flags & INVERT_IN_CYLINDER)" + // Explanation under "if (isInnerBoundaryFlagSet(INVERT_IN_CYLINDER))" for (int ix = 0; ix < inbndry; ix++) { avec[ix] = 0.; bvec[ix] = 1.; @@ -655,7 +653,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // If no user specified value is set on outer boundary, set the last // element in b (in the equation AX=b) to 0 - if (!(outer_boundary_flags & (INVERT_RHS | INVERT_SET))) { + if (!isOuterBoundaryFlagSet(INVERT_RHS | INVERT_SET)) { for (int ix = 0; ix < outbndry; ix++) { bk[ncx - ix] = 0.; } @@ -664,36 +662,37 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // DC i.e. kz = 0 (the offset mode) if (kz == 0) { - if (outer_boundary_flags & INVERT_DC_GRAD - && (outer_boundary_flags & INVERT_SET || outer_boundary_flags & INVERT_RHS)) { + if (isOuterBoundaryFlagSet(INVERT_DC_GRAD) + && (isOuterBoundaryFlagSet(INVERT_SET) + || isOuterBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { - avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); - bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); + avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); + bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (outer_boundary_flags & INVERT_DC_GRAD) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { avec[ncx - ix] = dcomplex(1., 0.); bvec[ncx - ix] = dcomplex(-1., 0.); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (inner_boundary_flags & INVERT_DC_GRADPAR) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_GRADPAR)) { for (int ix = 0; ix < inbndry; ix++) { - avec[ncx - ix] = 1.0 / sqrt(coords->g_22(ncx - ix + 1, jy)); - bvec[ncx - ix] = -1.0 / sqrt(coords->g_22(ncx - ix, jy)); + avec[ncx - ix] = 1.0 / sqrt(coords->g_22(xe - ix - 1, jy)); + bvec[ncx - ix] = -1.0 / sqrt(coords->g_22(xe - ix, jy)); cvec[ncx - ix] = 0.0; } - } else if (inner_boundary_flags & INVERT_DC_GRADPARINV) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_GRADPARINV)) { for (int ix = 0; ix < inbndry; ix++) { - avec[ncx - ix] = sqrt(coords->g_22(ncx - ix - 1, jy)); - bvec[ncx - ix] = -sqrt(coords->g_22(ncx - ix, jy)); + avec[ncx - ix] = sqrt(coords->g_22(xe - ix - 1, jy)); + bvec[ncx - ix] = -sqrt(coords->g_22(xe - ix, jy)); cvec[ncx - ix] = 0.0; } - } else if (inner_boundary_flags & INVERT_DC_LAP) { + } else if (isOuterBoundaryFlagSet(INVERT_DC_LAP)) { // Decaying boundary conditions BoutReal k = 0.0; if (a != nullptr) { @@ -707,7 +706,7 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco cvec[ncx - ix] = 0.; bvec[ncx - ix] = 1.; avec[ncx - ix] = - -exp(-k * coords->dx(ncx - ix, jy) / sqrt(coords->g11(ncx - ix, jy))); + -exp(-k * coords->dx(xe - ix, jy) / sqrt(coords->g11(xe - ix, jy))); } } else { // Order 2 dirichlet BC (boundary half between points) @@ -722,24 +721,25 @@ void Laplacian::tridagMatrix(dcomplex* avec, dcomplex* bvec, dcomplex* cvec, dco // AC i.e. kz =/= 0 (all other modes than the offset mode) else { - if (outer_boundary_flags & INVERT_AC_GRAD - && (outer_boundary_flags & INVERT_SET || outer_boundary_flags & INVERT_RHS)) { + if (isOuterBoundaryFlagSet(INVERT_AC_GRAD) + && (isOuterBoundaryFlagSet(INVERT_SET) + || isOuterBoundaryFlagSet(INVERT_RHS))) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { - avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); - bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(ncx - ix, jy)) - / coords->dx(ncx - ix, jy); + avec[ncx - ix] = dcomplex(-1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); + bvec[ncx - ix] = dcomplex(1., 0.) / sqrt(coords->g_11(xe - ix, jy)) + / coords->dx(xe - ix, jy); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (outer_boundary_flags & INVERT_AC_GRAD) { + } else if (isOuterBoundaryFlagSet(INVERT_AC_GRAD)) { // Zero gradient at outer boundary for (int ix = 0; ix < outbndry; ix++) { avec[ncx - ix] = dcomplex(1., 0.); bvec[ncx - ix] = dcomplex(-1., 0.); cvec[ncx - ix] = dcomplex(0., 0.); } - } else if (outer_boundary_flags & INVERT_AC_LAP) { + } else if (isOuterBoundaryFlagSet(INVERT_AC_LAP)) { // Use decaying zero-Laplacian solution in the boundary for (int ix = 0; ix < outbndry; ix++) { avec[ncx - ix] = @@ -795,6 +795,13 @@ void Laplacian::LaplacianMonitor::outputVars(Options& output_options, laplacian->outputVars(output_options, time_dimension); } +bool Laplacian::isInnerBoundaryFlagSetOnFirstX(int flag) const { + return isInnerBoundaryFlagSet(flag) and localmesh->firstX(); +} +bool Laplacian::isOuterBoundaryFlagSetOnLastX(int flag) const { + return isOuterBoundaryFlagSet(flag) and localmesh->lastX(); +} + /********************************************************************************** * LEGACY INTERFACE * diff --git a/src/invert/laplacexz/impls/petsc/laplacexz-petsc.hxx b/src/invert/laplacexz/impls/petsc/laplacexz-petsc.hxx index 47967390f9..7e15be5a34 100644 --- a/src/invert/laplacexz/impls/petsc/laplacexz-petsc.hxx +++ b/src/invert/laplacexz/impls/petsc/laplacexz-petsc.hxx @@ -6,8 +6,8 @@ class LaplaceXZpetsc; -#ifndef __LAPLACEXZ_PETSC_H__ -#define __LAPLACEXZ_PETSC_H__ +#ifndef BOUT_LAPLACEXZ_PETSC_H +#define BOUT_LAPLACEXZ_PETSC_H #include "bout/build_config.hxx" #include "bout/invert/laplacexz.hxx" @@ -73,4 +73,4 @@ private: }; #endif // BOUT_HAS_PETSC -#endif // __LAPLACEXZ_PETSC_H__ +#endif // BOUT_LAPLACEXZ_PETSC_H diff --git a/src/invert/parderiv/impls/cyclic/cyclic.hxx b/src/invert/parderiv/impls/cyclic/cyclic.hxx index 0c581adc52..6493a3b945 100644 --- a/src/invert/parderiv/impls/cyclic/cyclic.hxx +++ b/src/invert/parderiv/impls/cyclic/cyclic.hxx @@ -39,8 +39,8 @@ * ************************************************************************/ -#ifndef __INV_PAR_CR_H__ -#define __INV_PAR_CR_H__ +#ifndef BOUT_INV_PAR_CR_H +#define BOUT_INV_PAR_CR_H #include "bout/build_config.hxx" #include "bout/invert_parderiv.hxx" @@ -110,4 +110,4 @@ RegisterInvertPar registerinvertparcyclic{PARDERIVCYCLIC}; #endif // BOUT_USE_METRIC_3D -#endif // __INV_PAR_CR_H__ +#endif // BOUT_INV_PAR_CR_H diff --git a/src/mesh/boundary_factory.cxx b/src/mesh/boundary_factory.cxx index 5f5978f132..00282566a9 100644 --- a/src/mesh/boundary_factory.cxx +++ b/src/mesh/boundary_factory.cxx @@ -1,3 +1,5 @@ +#include "bout/parallel_boundary_op.hxx" +#include "bout/parallel_boundary_region.hxx" #include #include #include @@ -41,10 +43,12 @@ BoundaryFactory::BoundaryFactory() { addMod(new BoundaryFromFieldAligned(), "fromFieldAligned"); // Parallel boundaries - add(new BoundaryOpPar_dirichlet(), "parallel_dirichlet"); - add(new BoundaryOpPar_dirichlet_O3(), "parallel_dirichlet_O3"); - add(new BoundaryOpPar_dirichlet_interp(), "parallel_dirichlet_interp"); - add(new BoundaryOpPar_neumann(), "parallel_neumann"); + add(new BoundaryOpPar_dirichlet_o1(), "parallel_dirichlet_o1"); + add(new BoundaryOpPar_dirichlet_o2(), "parallel_dirichlet_o2"); + add(new BoundaryOpPar_dirichlet_o3(), "parallel_dirichlet_o3"); + add(new BoundaryOpPar_neumann_o1(), "parallel_neumann_o1"); + add(new BoundaryOpPar_neumann_o2(), "parallel_neumann_o2"); + add(new BoundaryOpPar_neumann_o3(), "parallel_neumann_o3"); } BoundaryFactory::~BoundaryFactory() { diff --git a/src/mesh/coordinates.cxx b/src/mesh/coordinates.cxx index 5ec0bb79e1..4e515449ca 100644 --- a/src/mesh/coordinates.cxx +++ b/src/mesh/coordinates.cxx @@ -925,7 +925,7 @@ void Coordinates::outputVars(Options& output_options) { } const Field2D& Coordinates::zlength() const { - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) if (not zlength_cache) { zlength_cache = std::make_unique(0., localmesh); @@ -1502,7 +1502,7 @@ Field3D Coordinates::DDY(const Field3D& f, CELL_LOC outloc, const std::string& m if (!f.hasParallelSlices() and !transform->canToFromFieldAligned()) { Field3D f_parallel = f; transform->calcParallelSlices(f_parallel); - f_parallel.applyParallelBoundary("parallel_neumann"); + f_parallel.applyParallelBoundary("parallel_neumann_o2"); return bout::derivatives::index::DDY(f_parallel, outloc, method, region); } #endif @@ -1908,7 +1908,7 @@ Coordinates::Grad2_par2_DDY_invSg(CELL_LOC outloc, const std::string& method) co // Communicate to get parallel slices localmesh->communicate(*invSgCache); - invSgCache->applyParallelBoundary("parallel_neumann"); + invSgCache->applyParallelBoundary("parallel_neumann_o2"); // cache auto ptr = std::make_unique(); diff --git a/src/mesh/difops.cxx b/src/mesh/difops.cxx index f252abe0ea..2e25dfeedb 100644 --- a/src/mesh/difops.cxx +++ b/src/mesh/difops.cxx @@ -774,7 +774,7 @@ Field3D bracket(const Field3D& f, const Field2D& g, BRACKET_METHOD method, case BRACKET_ARAKAWA_OLD: { #if not(BOUT_USE_METRIC_3D) const int ncz = mesh->LocalNz; - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { for (int jy = mesh->ystart; jy <= mesh->yend; jy++) { const BoutReal partialFactor = 1.0 / (12 * metric->dz(jx, jy)); @@ -1100,7 +1100,7 @@ Field3D bracket(const Field3D& f, const Field3D& g, BRACKET_METHOD method, Field3D f_temp = f; Field3D g_temp = g; - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { for (int jy = mesh->ystart; jy <= mesh->yend; jy++) { #if not(BOUT_USE_METRIC_3D) diff --git a/src/mesh/fv_ops.cxx b/src/mesh/fv_ops.cxx index 0a5d5f9624..cd5b924e9e 100644 --- a/src/mesh/fv_ops.cxx +++ b/src/mesh/fv_ops.cxx @@ -22,7 +22,7 @@ Slices makeslices(bool use_slices, const T& field) { namespace FV { -// Div ( a Grad_perp(f) ) -- ∇⊥ ( a ⋅ ∇⊥ f) -- Vorticity +// Div ( a Grad_perp(f) ) -- ∇ ⋅ ( a ∇⊥ f) -- Vorticity Field3D Div_a_Grad_perp(const Field3D& a, const Field3D& f) { ASSERT2(a.getLocation() == f.getLocation()); diff --git a/src/mesh/impls/bout/boutmesh.cxx b/src/mesh/impls/bout/boutmesh.cxx index 04824eeb7e..115d9f38a0 100644 --- a/src/mesh/impls/bout/boutmesh.cxx +++ b/src/mesh/impls/bout/boutmesh.cxx @@ -35,6 +35,7 @@ #include "boutmesh.hxx" +#include #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include @@ -80,9 +82,6 @@ BoutMesh::~BoutMesh() { for (const auto& bndry : boundary) { delete bndry; } - for (const auto& bndry : par_boundary) { - delete bndry; - } if (comm_x != MPI_COMM_NULL) { MPI_Comm_free(&comm_x); @@ -3037,11 +3036,36 @@ RangeIterator BoutMesh::iterateBndryUpperY() const { std::vector BoutMesh::getBoundaries() { return boundary; } -std::vector BoutMesh::getBoundariesPar() { return par_boundary; } +std::vector> +BoutMesh::getBoundariesPar(BoundaryParType type) { + return par_boundary[static_cast(type)]; +} -void BoutMesh::addBoundaryPar(BoundaryRegionPar* bndry) { +void BoutMesh::addBoundaryPar(std::shared_ptr bndry, + BoundaryParType type) { output_info << "Adding new parallel boundary: " << bndry->label << endl; - par_boundary.push_back(bndry); + switch (type) { + case BoundaryParType::xin_fwd: + par_boundary[static_cast(BoundaryParType::xin)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::fwd)].push_back(bndry); + break; + case BoundaryParType::xin_bwd: + par_boundary[static_cast(BoundaryParType::xin)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::bwd)].push_back(bndry); + break; + case BoundaryParType::xout_fwd: + par_boundary[static_cast(BoundaryParType::xout)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::fwd)].push_back(bndry); + break; + case BoundaryParType::xout_bwd: + par_boundary[static_cast(BoundaryParType::xout)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::bwd)].push_back(bndry); + break; + default: + throw BoutException("Unexpected type of boundary {}", toString(type)); + } + par_boundary[static_cast(type)].push_back(bndry); + par_boundary[static_cast(BoundaryParType::all)].push_back(bndry); } Field3D BoutMesh::smoothSeparatrix(const Field3D& f) { diff --git a/src/mesh/impls/bout/boutmesh.hxx b/src/mesh/impls/bout/boutmesh.hxx index 20bf1d7d46..cc674d401a 100644 --- a/src/mesh/impls/bout/boutmesh.hxx +++ b/src/mesh/impls/bout/boutmesh.hxx @@ -1,6 +1,6 @@ -#ifndef __BOUTMESH_H__ -#define __BOUTMESH_H__ +#ifndef BOUT_BOUTMESH_H +#define BOUT_BOUTMESH_H #include "mpi.h" @@ -158,8 +158,10 @@ public: // Boundary regions std::vector getBoundaries() override; - std::vector getBoundariesPar() override; - void addBoundaryPar(BoundaryRegionPar* bndry) override; + std::vector> + getBoundariesPar(BoundaryParType type) override; + void addBoundaryPar(std::shared_ptr bndry, + BoundaryParType type) override; std::set getPossibleBoundaries() const override; Field3D smoothSeparatrix(const Field3D& f) override; @@ -393,8 +395,10 @@ protected: void addBoundaryRegions(); private: - std::vector boundary; // Vector of boundary regions - std::vector par_boundary; // Vector of parallel boundary regions + std::vector boundary; // Vector of boundary regions + std::array>, + static_cast(BoundaryParType::SIZE)> + par_boundary; // Vector of parallel boundary regions ////////////////////////////////////////////////// // Communications @@ -485,4 +489,4 @@ CheckMeshResult checkBoutMeshYDecomposition(int num_y_processors, int ny, int ny_inner); } // namespace bout -#endif // __BOUTMESH_H__ +#endif // BOUT_BOUTMESH_H diff --git a/src/mesh/index_derivs.cxx b/src/mesh/index_derivs.cxx index 9cccd6f7d7..ebecb96700 100644 --- a/src/mesh/index_derivs.cxx +++ b/src/mesh/index_derivs.cxx @@ -445,7 +445,7 @@ class FFTDerivativeType { } const int kmax = ncz / 2 - kfilter; // Up to and including this wavenumber index - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { Array cv(ncz / 2 + 1); const BoutReal kwaveFac = TWOPI / ncz; @@ -502,7 +502,7 @@ class FFT2ndDerivativeType { const int ncz = theMesh->getNpoints(direction); const int kmax = ncz / 2; - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { Array cv(ncz / 2 + 1); const BoutReal kwaveFac = TWOPI / ncz; diff --git a/src/mesh/mesh.cxx b/src/mesh/mesh.cxx index 0f6315a987..870f3413cd 100644 --- a/src/mesh/mesh.cxx +++ b/src/mesh/mesh.cxx @@ -801,12 +801,12 @@ std::optional Mesh::getCommonRegion(std::optional lhs, */ const size_t pos = (high * (high - 1)) / 2 + low; if (region3Dintersect.size() <= pos) { - BOUT_OMP(critical(mesh_intersection_realloc)) + BOUT_OMP_SAFE(critical(mesh_intersection_realloc)) // By default this function does not need the mutex, however, if we are // going to allocate global memory, we need to use a mutex. // Now that we have the mutex, we need to check again whether a // different thread was faster and already allocated. - // BOUT_OMP(single) would work in most cases, but it would fail if the + // BOUT_OMP_SAFE(single) would work in most cases, but it would fail if the // function is called in parallel with different arguments. While BOUT++ // is not currently doing it, other openmp parallised projects might be // calling BOUT++ in this way. @@ -821,7 +821,7 @@ std::optional Mesh::getCommonRegion(std::optional lhs, return region3Dintersect[pos]; } { - BOUT_OMP(critical(mesh_intersection)) + BOUT_OMP_SAFE(critical(mesh_intersection)) // See comment above why we need to check again in case of OpenMP #if BOUT_USE_OPENMP if (!region3Dintersect[pos].has_value()) diff --git a/src/mesh/parallel/fci.cxx b/src/mesh/parallel/fci.cxx index 23b2b91eab..cb8c19bbd7 100644 --- a/src/mesh/parallel/fci.cxx +++ b/src/mesh/parallel/fci.cxx @@ -47,9 +47,9 @@ #include -FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, - int offset_, BoundaryRegionPar* inner_boundary, - BoundaryRegionPar* outer_boundary, bool zperiodic) +FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& UNUSED(dy), Options& options, + int offset_, const std::shared_ptr& inner_boundary, + const std::shared_ptr& outer_boundary, bool zperiodic) : map_mesh(mesh), offset(offset_), region_no_boundary(map_mesh.getRegion("RGN_NOBNDRY")), corner_boundary_mask(map_mesh) { @@ -222,13 +222,16 @@ FCIMap::FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, const BoutReal dx = (dZ_dz * dR - dR_dz * dZ) / det; const BoutReal dz = (dR_dx * dZ - dZ_dx * dR) / det; - // Negative xt_prime means we've hit the inner boundary, otherwise - // the outer boundary - auto* boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary; + // Negative xt_prime means we've hit the inner boundary, otherwise the + // outer boundary. However, if any of the surrounding points are negative, + // that also means inner. So to differentiate between inner and outer we + // need at least 2 points in the domain. + ASSERT2(map_mesh.xend - map_mesh.xstart >= 2); + auto boundary = (xt_prime[i] < map_mesh.xstart) ? inner_boundary : outer_boundary; boundary->add_point(x, y, z, x + dx, y + 0.5 * offset, - z + dz, // Intersection point in local index space - 0.5 * dy[i], // Distance to intersection - PI // Right-angle intersection + z + dz, // Intersection point in local index space + 0.5, // Distance to intersection + 1 // Default to that there is a point in the other direction ); } region_no_boundary = region_no_boundary.mask(to_remove); diff --git a/src/mesh/parallel/fci.hxx b/src/mesh/parallel/fci.hxx index a749c084cc..3ec3321a6a 100644 --- a/src/mesh/parallel/fci.hxx +++ b/src/mesh/parallel/fci.hxx @@ -23,8 +23,8 @@ * **************************************************************************/ -#ifndef __FCITRANSFORM_H__ -#define __FCITRANSFORM_H__ +#ifndef BOUT_FCITRANSFORM_H +#define BOUT_FCITRANSFORM_H #include #include @@ -44,8 +44,8 @@ class FCIMap { public: FCIMap() = delete; FCIMap(Mesh& mesh, const Coordinates::FieldMetric& dy, Options& options, int offset, - BoundaryRegionPar* inner_boundary, BoundaryRegionPar* outer_boundary, - bool zperiodic); + const std::shared_ptr& inner_boundary, + const std::shared_ptr& outer_boundary, bool zperiodic); // The mesh this map was created on Mesh& map_mesh; @@ -79,19 +79,19 @@ public: FCITransform::checkInputGrid(); auto forward_boundary_xin = - new BoundaryRegionPar("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh); - auto backward_boundary_xin = - new BoundaryRegionPar("FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh); + std::make_shared("FCI_forward", BNDRY_PAR_FWD_XIN, +1, &mesh); + auto backward_boundary_xin = std::make_shared( + "FCI_backward", BNDRY_PAR_BKWD_XIN, -1, &mesh); auto forward_boundary_xout = - new BoundaryRegionPar("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh); - auto backward_boundary_xout = - new BoundaryRegionPar("FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh); + std::make_shared("FCI_forward", BNDRY_PAR_FWD_XOUT, +1, &mesh); + auto backward_boundary_xout = std::make_shared( + "FCI_backward", BNDRY_PAR_BKWD_XOUT, -1, &mesh); // Add the boundary region to the mesh's vector of parallel boundaries - mesh.addBoundaryPar(forward_boundary_xin); - mesh.addBoundaryPar(backward_boundary_xin); - mesh.addBoundaryPar(forward_boundary_xout); - mesh.addBoundaryPar(backward_boundary_xout); + mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd); + mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd); + mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd); + mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xout_bwd); field_line_maps.reserve(mesh.ystart * 2); for (int offset = 1; offset < mesh.ystart + 1; ++offset) { @@ -100,6 +100,22 @@ public: field_line_maps.emplace_back(mesh, dy, options, -offset, backward_boundary_xin, backward_boundary_xout, zperiodic); } + ASSERT0(mesh.ystart == 1); + std::shared_ptr bndries[]{ + forward_boundary_xin, forward_boundary_xout, backward_boundary_xin, + backward_boundary_xout}; + for (auto& bndry : bndries) { + for (const auto& bndry2 : bndries) { + if (bndry->dir == bndry2->dir) { + continue; + } + for (bndry->first(); !bndry->isDone(); bndry->next()) { + if (bndry2->contains(*bndry)) { + bndry->setValid(0); + } + } + } + } } void calcParallelSlices(Field3D& f) override; @@ -142,4 +158,4 @@ private: std::vector field_line_maps; }; -#endif // __FCITRANSFORM_H__ +#endif // BOUT_FCITRANSFORM_H diff --git a/src/mesh/parallel/shiftedmetric.cxx b/src/mesh/parallel/shiftedmetric.cxx index 84084d9cbb..382052047d 100644 --- a/src/mesh/parallel/shiftedmetric.cxx +++ b/src/mesh/parallel/shiftedmetric.cxx @@ -6,7 +6,9 @@ * */ +#include "bout/parallel_boundary_region.hxx" #include "bout/paralleltransform.hxx" +#include #include #include #include diff --git a/src/mesh/parallel/shiftedmetricinterp.cxx b/src/mesh/parallel/shiftedmetricinterp.cxx index 214f7ded76..7f3637e79c 100644 --- a/src/mesh/parallel/shiftedmetricinterp.cxx +++ b/src/mesh/parallel/shiftedmetricinterp.cxx @@ -29,7 +29,7 @@ #include "shiftedmetricinterp.hxx" #include "bout/constants.hxx" -#include "bout/mask.hxx" +#include "bout/parallel_boundary_region.hxx" ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, Field2D zShift_in, BoutReal zlength_in, @@ -114,11 +114,16 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, interp_from_aligned->calcWeights(zt_prime_from); + int yvalid = mesh.LocalNy - 2 * mesh.ystart; + // avoid overflow - no stencil need more than 5 points + if (yvalid > 20) { + yvalid = 20; + } // Create regions for parallel boundary conditions Field2D dy; mesh.get(dy, "dy", 1.); - auto forward_boundary_xin = - new BoundaryRegionPar("parallel_forward_xin", BNDRY_PAR_FWD_XIN, +1, &mesh); + auto forward_boundary_xin = std::make_shared( + "parallel_forward_xin", BNDRY_PAR_FWD_XIN, +1, &mesh); for (auto it = mesh.iterateBndryUpperY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { forward_boundary_xin->add_point( @@ -128,14 +133,13 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.yend + 1) - zShift(it.ind, mesh.yend)), 0.25 - * (dy(it.ind, mesh.yend) // dy/2 - + dy(it.ind, mesh.yend + 1)), - 0. // angle? - ); + * (1 // dy/2 + + dy(it.ind, mesh.yend + 1) / dy(it.ind, mesh.yend)), // length + yvalid); } } - auto backward_boundary_xin = - new BoundaryRegionPar("parallel_backward_xin", BNDRY_PAR_BKWD_XIN, -1, &mesh); + auto backward_boundary_xin = std::make_shared( + "parallel_backward_xin", BNDRY_PAR_BKWD_XIN, -1, &mesh); for (auto it = mesh.iterateBndryLowerY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { backward_boundary_xin->add_point( @@ -145,15 +149,14 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.ystart) - zShift(it.ind, mesh.ystart - 1)), 0.25 - * (dy(it.ind, mesh.ystart - 1) // dy/2 - + dy(it.ind, mesh.ystart)), - 0. // angle? - ); + * (1 // dy/2 + + dy(it.ind, mesh.ystart - 1) / dy(it.ind, mesh.ystart)), + yvalid); } } // Create regions for parallel boundary conditions - auto forward_boundary_xout = - new BoundaryRegionPar("parallel_forward_xout", BNDRY_PAR_FWD_XOUT, +1, &mesh); + auto forward_boundary_xout = std::make_shared( + "parallel_forward_xout", BNDRY_PAR_FWD_XOUT, +1, &mesh); for (auto it = mesh.iterateBndryUpperY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { forward_boundary_xout->add_point( @@ -163,14 +166,13 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.yend + 1) - zShift(it.ind, mesh.yend)), 0.25 - * (dy(it.ind, mesh.yend) // dy/2 - + dy(it.ind, mesh.yend + 1)), - 0. // angle? - ); + * (1 // dy/2 + + dy(it.ind, mesh.yend + 1) / dy(it.ind, mesh.yend)), + yvalid); } } - auto backward_boundary_xout = - new BoundaryRegionPar("parallel_backward_xout", BNDRY_PAR_BKWD_XOUT, -1, &mesh); + auto backward_boundary_xout = std::make_shared( + "parallel_backward_xout", BNDRY_PAR_BKWD_XOUT, -1, &mesh); for (auto it = mesh.iterateBndryLowerY(); not it.isDone(); it.next()) { for (int z = mesh.zstart; z <= mesh.zend; z++) { backward_boundary_xout->add_point( @@ -180,18 +182,17 @@ ShiftedMetricInterp::ShiftedMetricInterp(Mesh& mesh, CELL_LOC location_in, zlength * BoutReal(z) / BoutReal(mesh.GlobalNz) // z + 0.5 * (zShift(it.ind, mesh.ystart) - zShift(it.ind, mesh.ystart - 1)), 0.25 - * (dy(it.ind, mesh.ystart - 1) // dy/2 - + dy(it.ind, mesh.ystart)), - 0. // angle? - ); + * (dy(it.ind, mesh.ystart - 1) / dy(it.ind, mesh.ystart) // dy/2 + + 1), + yvalid); } } // Add the boundary region to the mesh's vector of parallel boundaries - mesh.addBoundaryPar(forward_boundary_xin); - mesh.addBoundaryPar(backward_boundary_xin); - mesh.addBoundaryPar(forward_boundary_xout); - mesh.addBoundaryPar(backward_boundary_xout); + mesh.addBoundaryPar(forward_boundary_xin, BoundaryParType::xin_fwd); + mesh.addBoundaryPar(backward_boundary_xin, BoundaryParType::xin_bwd); + mesh.addBoundaryPar(forward_boundary_xout, BoundaryParType::xout_fwd); + mesh.addBoundaryPar(backward_boundary_xout, BoundaryParType::xin_bwd); } void ShiftedMetricInterp::checkInputGrid() { diff --git a/src/mesh/parallel/shiftedmetricinterp.hxx b/src/mesh/parallel/shiftedmetricinterp.hxx index 93ea2f07be..6852ea15a9 100644 --- a/src/mesh/parallel/shiftedmetricinterp.hxx +++ b/src/mesh/parallel/shiftedmetricinterp.hxx @@ -24,8 +24,8 @@ * **************************************************************************/ -#ifndef __SHIFTEDINTERP_H__ -#define __SHIFTEDINTERP_H__ +#ifndef BOUT_SHIFTEDINTERP_H +#define BOUT_SHIFTEDINTERP_H #include #include @@ -129,4 +129,4 @@ private: const std::size_t ydown_index; }; -#endif // __SHIFTEDINTERP_H__ +#endif // BOUT_SHIFTEDINTERP_H diff --git a/src/mesh/parallel_boundary_op.cxx b/src/mesh/parallel_boundary_op.cxx index 8b2c294a4a..ebd9852791 100644 --- a/src/mesh/parallel_boundary_op.cxx +++ b/src/mesh/parallel_boundary_op.cxx @@ -6,18 +6,15 @@ #include "bout/output.hxx" BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) { - - Mesh* mesh = bndry.localmesh; - BoutReal value; switch (value_type) { case ValueType::GEN: - return gen_values->generate( - bout::generator::Context(bndry.s_x, bndry.s_y, bndry.s_z, CELL_CENTRE, mesh, t)); + return gen_values->generate(bout::generator::Context( + bndry.s_x(), bndry.s_y(), bndry.s_z(), CELL_CENTRE, bndry.localmesh, t)); case ValueType::FIELD: // FIXME: Interpolate to s_x, s_y, s_z... - value = (*field_values)(bndry.x, bndry.y, bndry.z); + value = (*field_values)[bndry.ind()]; return value; case ValueType::REAL: return real_value; @@ -25,123 +22,3 @@ BoutReal BoundaryOpPar::getValue(const BoundaryRegionPar& bndry, BoutReal t) { throw BoutException("Invalid value_type encountered in BoundaryOpPar::getValue"); } } - -////////////////////////////////////////// -// Dirichlet boundary - -void BoundaryOpPar_dirichlet::apply(Field3D& f, BoutReal t) { - Field3D& f_next = f.ynext(bndry->dir); - - Coordinates& coord = *(f.getCoordinates()); - - // Loop over grid points If point is in boundary, then fill in - // f_next such that the field would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convenience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal value = getValue(*bndry, t); - - // Scale the field and normalise to the desired value - BoutReal y_prime = bndry->length; - BoutReal f2 = (f(x, y, z) - value) * (coord.dy(x, y, z) - y_prime) / y_prime; - - f_next(x, y + bndry->dir, z) = value - f2; - } -} - -////////////////////////////////////////// -// Dirichlet boundary - Third order - -void BoundaryOpPar_dirichlet_O3::apply(Field3D& f, BoutReal t) { - - Field3D& f_next = f.ynext(bndry->dir); - Field3D& f_prev = f.ynext(-bndry->dir); - - Coordinates& coord = *(f.getCoordinates()); - - // Loop over grid points If point is in boundary, then fill in - // f_next such that the field would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convenience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal fb = getValue(*bndry, t); - BoutReal f1 = f_prev(x, y - bndry->dir, z); - BoutReal f2 = f(x, y, z); - BoutReal l1 = coord.dy(x, y, z); - BoutReal l2 = bndry->length; - BoutReal l3 = coord.dy(x, y, z) - l2; - - BoutReal denom = (l1 * l1 * l2 + l1 * l2 * l2); - BoutReal term1 = (l2 * l2 * l3 + l2 * l3 * l3); - BoutReal term2 = l1 * (l1 + l2 + l3) * (l2 + l3); - BoutReal term3 = l3 * ((l1 + l2) * l3 + (l1 + l2) * (l1 + l2)); - - f_next(x, y + bndry->dir, z) = (term1 * f1 + term2 * fb - term3 * f2) / denom; - } -} - -////////////////////////////////////////// -// Dirichlet with interpolation - -void BoundaryOpPar_dirichlet_interp::apply(Field3D& f, BoutReal t) { - - Field3D& f_next = f.ynext(bndry->dir); - Field3D& f_prev = f.ynext(-bndry->dir); - - Coordinates& coord = *(f.getCoordinates()); - - // Loop over grid points If point is in boundary, then fill in - // f_next such that the field would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convenience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal fs = getValue(*bndry, t); - - // Scale the field and normalise to the desired value - BoutReal dy = coord.dy(x, y, z); - BoutReal s = bndry->length * dy; - - f_next(x, y + bndry->dir, z) = - f_prev(x, y - bndry->dir, z) * (1. - (2. * s / (dy + s))) - + 2. * f(x, y, z) * ((s - dy) / s) + fs * (dy / s - (2. / s + 1.)); - } -} - -////////////////////////////////////////// -// Neumann boundary - -void BoundaryOpPar_neumann::apply(Field3D& f, BoutReal t) { - TRACE("BoundaryOpPar_neumann::apply"); - - Field3D& f_next = f.ynext(bndry->dir); - f_next.allocate(); // Ensure unique before modifying - - Coordinates& coord = *(f.getCoordinates()); - - // If point is in boundary, then fill in f_next such that the derivative - // would be VALUE on the boundary - for (bndry->first(); !bndry->isDone(); bndry->next()) { - // temp variables for convience - int x = bndry->x; - int y = bndry->y; - int z = bndry->z; - - // Generate the boundary value - BoutReal value = getValue(*bndry, t); - BoutReal dy = coord.dy(x, y, z); - - f_next(x, y + bndry->dir, z) = f(x, y, z) + bndry->dir * value * dy; - } -} diff --git a/src/mesh/parallel_boundary_region.cxx b/src/mesh/parallel_boundary_region.cxx index 3f77d96737..e69de29bb2 100644 --- a/src/mesh/parallel_boundary_region.cxx +++ b/src/mesh/parallel_boundary_region.cxx @@ -1,37 +0,0 @@ -#include "bout/parallel_boundary_region.hxx" - -void BoundaryRegionPar::add_point(const int jx, const int jy, const int jz, - const BoutReal x, const BoutReal y, const BoutReal z, - const BoutReal length, const BoutReal angle) { - bndry_points.push_back({{jx, jy, jz}, {x, y, z}, length, angle}); -} - -void BoundaryRegionPar::first() { - bndry_position = begin(bndry_points); - if (!isDone()) { - x = bndry_position->index.jx; - y = bndry_position->index.jy; - z = bndry_position->index.jz; - s_x = bndry_position->intersection.s_x; - s_y = bndry_position->intersection.s_y; - s_z = bndry_position->intersection.s_z; - length = bndry_position->length; - angle = bndry_position->angle; - } -} - -void BoundaryRegionPar::next() { - ++bndry_position; - if (!isDone()) { - x = bndry_position->index.jx; - y = bndry_position->index.jy; - z = bndry_position->index.jz; - s_x = bndry_position->intersection.s_x; - s_y = bndry_position->intersection.s_y; - s_z = bndry_position->intersection.s_z; - length = bndry_position->length; - angle = bndry_position->angle; - } -} - -bool BoundaryRegionPar::isDone() { return (bndry_position == end(bndry_points)); } diff --git a/src/mesh/parallel_boundary_stencil.cxx.py b/src/mesh/parallel_boundary_stencil.cxx.py new file mode 100644 index 0000000000..d0988ee099 --- /dev/null +++ b/src/mesh/parallel_boundary_stencil.cxx.py @@ -0,0 +1,62 @@ +import os +from tempfile import NamedTemporaryFile as tmpf +from stencils_sympy import dirichlet, neumann, simp, Symbol, Matrix, ccode + + +def gen_code(order, matrix_type): + x = [Symbol("spacing%d" % i) for i in range(order)] + matrix = matrix_type(x) + A = Matrix(order, order, matrix) + + try: + iA = A.inv() + except: + import sys + + print(A, matrix, file=sys.stderr) + raise + return ccode(simp(sum([iA[0, i] * Symbol("value%d" % i) for i in range(order)]))) + + +def run(cmd): + print(cmd) + out = os.system(cmd) + assert out == 0 + + +if __name__ == "__main__": + with tmpf("w", dir=".", delete=False) as f: + f.write("namespace {\n") + f.write( + """ +inline BoutReal pow(BoutReal val, int exp) { + //constexpr int expval = exp; + //static_assert(expval == 2 or expval == 3, "This pow is only for exponent 2 or 3"); + if (exp == 2) { + return val * val; + } + ASSERT3(exp == 3); + return val * val * val; +} +""" + ) + + for order in range(1, 4): + for matrix in dirichlet, neumann: + if order == 1 and matrix == neumann: + continue + print(f"generating {matrix.name}_o{order}") + args = ", ".join( + [ + "BoutReal spacing%d, BoutReal value%d" % (i, i) + for i in range(order) + ] + ) + f.write( + f"inline BoutReal stencil_{matrix.name}_o{order}({args}) {{\n return " + ) + f.write(gen_code(order, matrix)) + f.write(";\n}\n") + f.write("}\n") + run("clang-format -i " + f.name) + run(f"mv {f.name} {__file__[:-3]}") diff --git a/src/mesh/stencils.md b/src/mesh/stencils.md new file mode 100644 index 0000000000..0c7d181481 --- /dev/null +++ b/src/mesh/stencils.md @@ -0,0 +1,29 @@ +Notes concerning the generation of stencils +================ + +We want to create a Taylor function +$f(x-x_0)=\sum_i=0^n \frac{1}{i!}f_i(x-x_0)^i$ where $n$ +is the order of the function, $x_0$ is the point in the boundary +where we want to calculate the function. $f_i$ are some coefficients +that we need to determine. To be precise, only $f_0$ needs to be +determined. +We know that the function has at some points certain values. If the +value at some distance `spacing.f0` is a given value `val` then we +can build a linear system of equations using the above formula. +If rather the derivative is given, the above equations needs to be +differentiated once. + +stencils_sympy.py calculates the coefficients of the above matrix +which represents our system of equations. The derivative is simply +one the factor of the next smaller term (or zero if the there is no +smaller one). This is what is calculated by `taylor`, `dirichlet` +and `neumann`, the respective matrix coefficients. + +sympy does all the heavy lifting on analytically inverting the +matrix. + +With the analytic inversion we can put in the numerical offsets +`spacing.f?` in C++ and get a fast expression for the respective +coefficients. As mentioned before, we do not need the full inverse, +just the first row, as we only care about the value, not about it's +derivative. diff --git a/src/mesh/stencils_sympy.py b/src/mesh/stencils_sympy.py new file mode 100644 index 0000000000..64677f1985 --- /dev/null +++ b/src/mesh/stencils_sympy.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +from sympy import Symbol, Eq +from sympy.matrices import Matrix +from sympy.printing import ccode +from sympy.simplify import combsimp as simp +from sympy.utilities.codegen import codegen + + +def pow(a, b): + if b == 0: + return "1" + if b == 1: + return a + else: + return "%s**%d" % (a, b) + + +def factorial(a): + if a == 0 or a == 1: + return 1 + else: + assert a > 0 + return a * factorial(a - 1) + + +def gen_code(order, matrix_type): + x = [Symbol("spacing.f%d" % i) for i in range(order)] + matrix = matrix_type(x) + A = Matrix(order, order, matrix) + + try: + iA = A.inv() + except: + import sys + + print(A, matrix, file=sys.stderr) + raise + ret = "" + for i in range(order): + ret += ccode(simp(iA[0, i]), assign_to="facs.f%d" % i) + ret += "\n" + return ret + + +def taylor(x, i, j): + if j >= 0: + return x[i] ** j / factorial(j) + else: + return 0 + + +class dirichlet: + name = "dirichlet" + + def __init__(self, x): + self.x = x + + def __call__(self, i, j): + return taylor(self.x, i, j) + + +class neumann: + name = "neumann" + + def __init__(self, x): + self.x = x + + def __call__(self, i, j): + if i == 0: + return taylor(self.x, i, j - 1) + else: + return taylor(self.x, i, j) + + +if __name__ == "__main__": + print(gen_code(3, dirichlet)) diff --git a/src/solver/impls/adams_bashforth/adams_bashforth.cxx b/src/solver/impls/adams_bashforth/adams_bashforth.cxx index bfdea5e126..79161fcdbf 100644 --- a/src/solver/impls/adams_bashforth/adams_bashforth.cxx +++ b/src/solver/impls/adams_bashforth/adams_bashforth.cxx @@ -201,7 +201,7 @@ void AB_integrate_update(Array& update, BoutReal timestep, for (std::size_t j = 0; j < static_cast(order); ++j) { const BoutReal factor = AB_coefficients[j]; - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (std::size_t i = 0; i < static_cast(update.size()); ++i) { update[i] += history[j][i] * factor; } @@ -576,7 +576,7 @@ BoutReal AdamsBashforthSolver::take_step(const BoutReal timeIn, const BoutReal d // std::transform(std::begin(current), std::end(current), std::begin(full_update), // std::begin(result), std::plus{}); if (not(adaptive and followHighOrder)) { - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result[i] = current[i] + full_update[i]; } @@ -614,7 +614,7 @@ BoutReal AdamsBashforthSolver::take_step(const BoutReal timeIn, const BoutReal d // use this to calculate the derivatives at this point. // std::transform(std::begin(current), std::end(current), std::begin(half_update), // std::begin(result2), std::plus{}); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result2[i] = current[i] + half_update[i]; } @@ -639,7 +639,7 @@ BoutReal AdamsBashforthSolver::take_step(const BoutReal timeIn, const BoutReal d // "full" two half step half_update. Rather than using result2 we just replace // result here as we want to use this smaller step result if (followHighOrder) { - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result[i] = current[i] + half_update[i]; } diff --git a/src/solver/impls/adams_bashforth/adams_bashforth.hxx b/src/solver/impls/adams_bashforth/adams_bashforth.hxx index ad8e77ed1c..60b3b2b05e 100644 --- a/src/solver/impls/adams_bashforth/adams_bashforth.hxx +++ b/src/solver/impls/adams_bashforth/adams_bashforth.hxx @@ -25,8 +25,8 @@ class AdamsBashforthSolver; -#ifndef __ADAMSBASHFORTH_SOLVER_H__ -#define __ADAMSBASHFORTH_SOLVER_H__ +#ifndef BOUT_ADAMSBASHFORTH_SOLVER_H +#define BOUT_ADAMSBASHFORTH_SOLVER_H #include #include @@ -96,4 +96,4 @@ private: int nlocal, neq; // Number of variables on local processor and in total }; -#endif // __ADAMSBASHFORTH_SOLVER_H__ +#endif // BOUT_ADAMSBASHFORTH_SOLVER_H diff --git a/src/solver/impls/arkode/arkode.cxx b/src/solver/impls/arkode/arkode.cxx index aabe2ae050..bc3be6f80a 100644 --- a/src/solver/impls/arkode/arkode.cxx +++ b/src/solver/impls/arkode/arkode.cxx @@ -41,17 +41,7 @@ #include "bout/unused.hxx" #include "bout/utils.hxx" -#if SUNDIALS_VERSION_MAJOR >= 4 #include -#else -#include -#if SUNDIALS_VERSION_MAJOR >= 3 -#include -#else -#include -#endif -#endif - #include #include #include @@ -61,110 +51,21 @@ class Field2D; -#define ZERO RCONST(0.) -#define ONE RCONST(1.0) - -#ifndef ARKODEINT -#if SUNDIALS_VERSION_MAJOR < 3 -using ARKODEINT = bout::utils::function_traits::arg_t<0>; -#else -using ARKODEINT = sunindextype; -#endif -#endif - -static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); - -static int arkode_bbd_rhs(ARKODEINT Nlocal, BoutReal t, N_Vector u, N_Vector du, - void* user_data); -static int arkode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, - BoutReal gamma, BoutReal delta, int lr, void* user_data); -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline static int arkode_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int lr, - void* user_data, N_Vector UNUSED(tmp)) { - return arkode_pre(t, yy, yp, rvec, zvec, gamma, delta, lr, user_data); -} -#else -// Alias for newer versions -constexpr auto& arkode_pre_shim = arkode_pre; -#endif +// NOLINTBEGIN(readability-identifier-length) +namespace { +int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector fy, - void* user_data, N_Vector tmp); -#if SUNDIALS_VERSION_MAJOR < 4 -// Shim for earlier versions -inline int ARKStepSetJacTimes(void* arkode_mem, std::nullptr_t, - ARKSpilsJacTimesVecFn jtimes) { -#if SUNDIALS_VERSION_MAJOR < 3 - return ARKSpilsSetJacTimesVecFn(arkode_mem, jtimes); -#else - return ARKSpilsSetJacTimes(arkode_mem, nullptr, jtimes); -#endif -} -#endif +int arkode_bbd_rhs(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, + void* user_data); +int arkode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int lr, void* user_data); -#if SUNDIALS_VERSION_MAJOR < 4 -void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, BoutReal t0, N_Vector y0) { - auto arkode_mem = ARKodeCreate(); - - if (arkode_mem == nullptr) { - throw BoutException("ARKodeCreate failed\n"); - } - if (ARKodeInit(arkode_mem, fe, fi, t0, y0) != ARK_SUCCESS) { - throw BoutException("ARKodeInit failed\n"); - } - return arkode_mem; -} - -#if SUNDIALS_VERSION_MAJOR == 3 -int ARKStepSetLinearSolver(void* arkode_mem, SUNLinearSolver LS, std::nullptr_t) { - return ARKSpilsSetLinearSolver(arkode_mem, LS); -} -#endif - -// Aliases for older versions -// In SUNDIALS 4, ARKode has become ARKStep, hence all the renames -constexpr auto& ARKStepEvolve = ARKode; -constexpr auto& ARKStepFree = ARKodeFree; -constexpr auto& ARKStepGetCurrentTime = ARKodeGetCurrentTime; -constexpr auto& ARKStepGetDky = ARKodeGetDky; -constexpr auto& ARKStepGetLastStep = ARKodeGetLastStep; -constexpr auto& ARKStepGetNumLinIters = ARKSpilsGetNumLinIters; -constexpr auto& ARKStepGetNumNonlinSolvIters = ARKodeGetNumNonlinSolvIters; -constexpr auto& ARKStepGetNumPrecEvals = ARKSpilsGetNumPrecEvals; -constexpr auto& ARKStepGetNumRhsEvals = ARKodeGetNumRhsEvals; -constexpr auto& ARKStepGetNumSteps = ARKodeGetNumSteps; -constexpr auto& ARKStepReInit = ARKodeReInit; -constexpr auto& ARKStepSStolerances = ARKodeSStolerances; -constexpr auto& ARKStepSVtolerances = ARKodeSVtolerances; -constexpr auto& ARKStepSetAdaptivityMethod = ARKodeSetAdaptivityMethod; -constexpr auto& ARKStepSetCFLFraction = ARKodeSetCFLFraction; -constexpr auto& ARKStepSetEpsLin = ARKSpilsSetEpsLin; -constexpr auto& ARKStepSetExplicit = ARKodeSetExplicit; -constexpr auto& ARKStepSetFixedPoint = ARKodeSetFixedPoint; -constexpr auto& ARKStepSetFixedStep = ARKodeSetFixedStep; -constexpr auto& ARKStepSetImEx = ARKodeSetImEx; -constexpr auto& ARKStepSetImplicit = ARKodeSetImplicit; -constexpr auto& ARKStepSetInitStep = ARKodeSetInitStep; -constexpr auto& ARKStepSetLinear = ARKodeSetLinear; -constexpr auto& ARKStepSetMaxNumSteps = ARKodeSetMaxNumSteps; -constexpr auto& ARKStepSetMaxStep = ARKodeSetMaxStep; -constexpr auto& ARKStepSetMinStep = ARKodeSetMinStep; -constexpr auto& ARKStepSetOptimalParams = ARKodeSetOptimalParams; -constexpr auto& ARKStepSetOrder = ARKodeSetOrder; -constexpr auto& ARKStepSetPreconditioner = ARKSpilsSetPreconditioner; -constexpr auto& ARKStepSetUserData = ARKodeSetUserData; -#endif - -#if SUNDIALS_VERSION_MAJOR < 6 -void* ARKStepCreate(ARKRhsFn fe, ARKRhsFn fi, BoutReal t0, N_Vector y0, - [[maybe_unused]] SUNContext context) { - return ARKStepCreate(fe, fi, t0, y0); -} -#endif +int arkode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector fy, + void* user_data, N_Vector tmp); +} // namespace +// NOLINTEND(readability-identifier-length) ArkodeSolver::ArkodeSolver(Options* opts) : Solver(opts), diagnose((*options)["diagnose"] @@ -187,6 +88,14 @@ ArkodeSolver::ArkodeSolver(Options* opts) "not recommended except for code comparison") .withDefault(false)), order((*options)["order"].doc("Order of internal step").withDefault(4)), +#if SUNDIALS_TABLE_BY_NAME_SUPPORT + implicit_table((*options)["implicit_table"] + .doc("Name of the implicit Butcher table") + .withDefault("")), + explicit_table((*options)["explicit_table"] + .doc("Name of the explicit Butcher table") + .withDefault("")), +#endif cfl_frac((*options)["cfl_frac"] .doc("Fraction of the estimated explicitly stable step to use") .withDefault(-1.0)), @@ -226,7 +135,7 @@ ArkodeSolver::ArkodeSolver(Options* opts) .withDefault(false)), optimize( (*options)["optimize"].doc("Use ARKode optimal parameters").withDefault(false)), - suncontext(static_cast(&BoutComm::get())) { + suncontext(createSUNContext(BoutComm::get())) { has_constraints = false; // This solver doesn't have constraints // Add diagnostics to output @@ -243,10 +152,14 @@ ArkodeSolver::ArkodeSolver(Options* opts) } ArkodeSolver::~ArkodeSolver() { - N_VDestroy_Parallel(uvec); + N_VDestroy(uvec); ARKStepFree(&arkode_mem); SUNLinSolFree(sun_solver); SUNNonlinSolFree(nonlinear_solver); + +#if SUNDIALS_CONTROLLER_SUPPORT + SUNAdaptController_Destroy(controller); +#endif } /************************************************************************** @@ -274,12 +187,13 @@ int ArkodeSolver::init() { n2Dvars(), neq, local_N); // Allocate memory - if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq); + if (uvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } // Put the variables into uvec - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); ASSERT1(solve_explicit or solve_implicit); @@ -298,8 +212,9 @@ int ArkodeSolver::init() { } }(); - if ((arkode_mem = ARKStepCreate(explicit_rhs, implicit_rhs, simtime, uvec, suncontext)) - == nullptr) { + arkode_mem = callWithSUNContext(ARKStepCreate, suncontext, explicit_rhs, implicit_rhs, + simtime, uvec); + if (arkode_mem == nullptr) { throw BoutException("ARKStepCreate failed\n"); } @@ -325,11 +240,8 @@ int ArkodeSolver::init() { throw BoutException("ARKStepSetUserData failed\n"); } - if (set_linear) { - output.write("\tSetting ARKStep implicit solver to Linear\n"); - if (ARKStepSetLinear(arkode_mem, 1) != ARK_SUCCESS) { - throw BoutException("ARKStepSetLinear failed\n"); - } + if (ARKStepSetLinear(arkode_mem, set_linear) != ARK_SUCCESS) { + throw BoutException("ARKStepSetLinear failed\n"); } if (fixed_step) { @@ -344,13 +256,59 @@ int ArkodeSolver::init() { throw BoutException("ARKStepSetOrder failed\n"); } +#if SUNDIALS_TABLE_BY_NAME_SUPPORT + if (!implicit_table.empty() || !explicit_table.empty()) { + if (ARKStepSetTableName( + arkode_mem, + implicit_table.empty() ? "ARKODE_DIRK_NONE" : implicit_table.c_str(), + explicit_table.empty() ? "ARKODE_ERK_NONE" : explicit_table.c_str()) + != ARK_SUCCESS) { + throw BoutException("ARKStepSetTableName failed\n"); + } + } +#endif + if (ARKStepSetCFLFraction(arkode_mem, cfl_frac) != ARK_SUCCESS) { throw BoutException("ARKStepSetCFLFraction failed\n"); } +#if SUNDIALS_CONTROLLER_SUPPORT + switch (adap_method) { + case 0: + controller = SUNAdaptController_PID(suncontext); + break; + case 1: + controller = SUNAdaptController_PI(suncontext); + break; + case 2: + controller = SUNAdaptController_I(suncontext); + break; + case 3: + controller = SUNAdaptController_ExpGus(suncontext); + break; + case 4: + controller = SUNAdaptController_ImpGus(suncontext); + break; + case 5: + controller = SUNAdaptController_ImExGus(suncontext); + break; + + default: + throw BoutException("Invalid adap_method\n"); + } + + if (ARKStepSetAdaptController(arkode_mem, controller) != ARK_SUCCESS) { + throw BoutException("ARKStepSetAdaptController failed\n"); + } + + if (ARKStepSetAdaptivityAdjustment(arkode_mem, 0) != ARK_SUCCESS) { + throw BoutException("ARKStepSetAdaptivityAdjustment failed\n"); + } +#else if (ARKStepSetAdaptivityMethod(arkode_mem, adap_method, 1, 1, nullptr) != ARK_SUCCESS) { throw BoutException("ARKStepSetAdaptivityMethod failed\n"); } +#endif if (use_vector_abstol) { std::vector f2dtols; @@ -374,18 +332,18 @@ int ArkodeSolver::init() { return Options::root()[f3.name]["atol"].withDefault(abstol); }); - N_Vector abstolvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext); + N_Vector abstolvec = N_VClone(uvec); if (abstolvec == nullptr) { throw BoutException("SUNDIALS memory allocation (abstol vector) failed\n"); } - set_abstol_values(NV_DATA_P(abstolvec), f2dtols, f3dtols); + set_abstol_values(N_VGetArrayPointer(abstolvec), f2dtols, f3dtols); if (ARKStepSVtolerances(arkode_mem, reltol, abstolvec) != ARK_SUCCESS) { throw BoutException("ARKStepSVtolerances failed\n"); } - N_VDestroy_Parallel(abstolvec); + N_VDestroy(abstolvec); } else { if (ARKStepSStolerances(arkode_mem, reltol, abstol) != ARK_SUCCESS) { throw BoutException("ARKStepSStolerances failed\n"); @@ -414,118 +372,79 @@ int ArkodeSolver::init() { } } - // ARKStepSetPredictorMethod(arkode_mem,4); - -#if SUNDIALS_VERSION_MAJOR < 4 - if (fixed_point) { - output.write("\tUsing accelerated fixed point solver\n"); - if (ARKodeSetFixedPoint(arkode_mem, 3.0)) { - throw BoutException("ARKodeSetFixedPoint failed\n"); - } - } else { - output.write("\tUsing Newton iteration\n"); - if (ARKodeSetNewton(arkode_mem)) { - throw BoutException("ARKodeSetNewton failed\n"); - } - } -#else if (fixed_point) { output.write("\tUsing accelerated fixed point solver\n"); - if ((nonlinear_solver = SUNNonlinSol_FixedPoint(uvec, 3, suncontext)) == nullptr) { + nonlinear_solver = callWithSUNContext(SUNNonlinSol_FixedPoint, suncontext, uvec, 3); + if (nonlinear_solver == nullptr) { throw BoutException("Creating SUNDIALS fixed point nonlinear solver failed\n"); } + if (ARKStepSetNonlinearSolver(arkode_mem, nonlinear_solver) != ARK_SUCCESS) { + throw BoutException("ARKStepSetNonlinearSolver failed\n"); + } } else { output.write("\tUsing Newton iteration\n"); - if ((nonlinear_solver = SUNNonlinSol_Newton(uvec, suncontext)) == nullptr) { - throw BoutException("Creating SUNDIALS Newton nonlinear solver failed\n"); - } - } - if (ARKStepSetNonlinearSolver(arkode_mem, nonlinear_solver) != ARK_SUCCESS) { - throw BoutException("ARKStepSetNonlinearSolver failed\n"); - } -#endif - - /// Set Preconditioner - if (use_precon) { - const int prectype = rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT; -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, prectype, maxl, suncontext)) == nullptr) { + const auto prectype = + use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE; + sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, prectype, maxl); + if (sun_solver == nullptr) { throw BoutException("Creating SUNDIALS linear solver failed\n"); } - if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARK_SUCCESS) { + if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARKLS_SUCCESS) { throw BoutException("ARKStepSetLinearSolver failed\n"); } -#else - if (ARKSpgmr(arkode_mem, prectype, maxl) != ARKSPILS_SUCCESS) { - throw BoutException("ARKSpgmr failed\n"); - } -#endif - if (!hasPreconditioner()) { - output.write("\tUsing BBD preconditioner\n"); - - /// Get options - // Compute band_width_default from actually added fields, to allow for multiple - // Mesh objects - // - // Previous implementation was equivalent to: - // int MXSUB = mesh->xend - mesh->xstart + 1; - // int band_width_default = n3Dvars()*(MXSUB+2); - const int band_width_default = std::accumulate( - begin(f3d), end(f3d), 0, [](int a, const VarStr& fvar) { - Mesh* localmesh = fvar.var->getMesh(); - return a + localmesh->xend - localmesh->xstart + 3; - }); - - const auto mudq = (*options)["mudq"] - .doc("Upper half-bandwidth to be used in the difference " - "quotient Jacobian approximation") - .withDefault(band_width_default); - const auto mldq = (*options)["mldq"] - .doc("Lower half-bandwidth to be used in the difference " - "quotient Jacobian approximation") - .withDefault(band_width_default); - const auto mukeep = (*options)["mukeep"] - .doc("Upper half-bandwidth of the retained banded " - "approximate Jacobian block") - .withDefault(n3Dvars() + n2Dvars()); - const auto mlkeep = (*options)["mlkeep"] - .doc("Lower half-bandwidth of the retained banded " - "approximate Jacobian block") - .withDefault(n3Dvars() + n2Dvars()); - - if (ARKBBDPrecInit(arkode_mem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, - arkode_bbd_rhs, nullptr) - != ARK_SUCCESS) { - throw BoutException("ARKBBDPrecInit failed\n"); + /// Set Preconditioner + if (use_precon) { + if (hasPreconditioner()) { + output.write("\tUsing user-supplied preconditioner\n"); + + if (ARKStepSetPreconditioner(arkode_mem, nullptr, arkode_pre) != ARKLS_SUCCESS) { + throw BoutException("ARKStepSetPreconditioner failed\n"); + } + } else { + output.write("\tUsing BBD preconditioner\n"); + + /// Get options + // Compute band_width_default from actually added fields, to allow for multiple + // Mesh objects + // + // Previous implementation was equivalent to: + // int MXSUB = mesh->xend - mesh->xstart + 1; + // int band_width_default = n3Dvars()*(MXSUB+2); + const int band_width_default = std::accumulate( + begin(f3d), end(f3d), 0, [](int acc, const VarStr& fvar) { + Mesh* localmesh = fvar.var->getMesh(); + return acc + localmesh->xend - localmesh->xstart + 3; + }); + + const auto mudq = (*options)["mudq"] + .doc("Upper half-bandwidth to be used in the difference " + "quotient Jacobian approximation") + .withDefault(band_width_default); + const auto mldq = (*options)["mldq"] + .doc("Lower half-bandwidth to be used in the difference " + "quotient Jacobian approximation") + .withDefault(band_width_default); + const auto mukeep = (*options)["mukeep"] + .doc("Upper half-bandwidth of the retained banded " + "approximate Jacobian block") + .withDefault(n3Dvars() + n2Dvars()); + const auto mlkeep = (*options)["mlkeep"] + .doc("Lower half-bandwidth of the retained banded " + "approximate Jacobian block") + .withDefault(n3Dvars() + n2Dvars()); + + if (ARKBBDPrecInit(arkode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0, + arkode_bbd_rhs, nullptr) + != ARKLS_SUCCESS) { + throw BoutException("ARKBBDPrecInit failed\n"); + } } - } else { - output.write("\tUsing user-supplied preconditioner\n"); - - if (ARKStepSetPreconditioner(arkode_mem, nullptr, arkode_pre_shim) != ARK_SUCCESS) { - throw BoutException("ARKStepSetPreconditioner failed\n"); - } + // Not using preconditioning + output.write("\tNo preconditioning\n"); } - } else { - // Not using preconditioning - - output.write("\tNo preconditioning\n"); - -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) - == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (ARKStepSetLinearSolver(arkode_mem, sun_solver, nullptr) != ARK_SUCCESS) { - throw BoutException("ARKStepSetLinearSolver failed\n"); - } -#else - if (ARKSpgmr(arkode_mem, SUN_PREC_NONE, maxl) != ARKSPILS_SUCCESS) { - throw BoutException("ARKSpgmr failed\n"); - } -#endif } /// Set Jacobian-vector multiplication function @@ -533,8 +452,8 @@ int ArkodeSolver::init() { if (use_jacobian and hasJacobian()) { output.write("\tUsing user-supplied Jacobian function\n"); - if (ARKStepSetJacTimes(arkode_mem, nullptr, arkode_jac) != ARK_SUCCESS) { - throw BoutException("ARKStepSetJacTimesVecFn failed\n"); + if (ARKStepSetJacTimes(arkode_mem, nullptr, arkode_jac) != ARKLS_SUCCESS) { + throw BoutException("ARKStepSetJacTimes failed\n"); } } else { output.write("\tUsing difference quotient approximation for Jacobian\n"); @@ -645,7 +564,7 @@ BoutReal ArkodeSolver::run(BoutReal tout) { } // Copy variables - load_vars(NV_DATA_P(uvec)); + load_vars(N_VGetArrayPointer(uvec)); // Call rhs function to get extra variables at this time run_rhs(simtime); // run_diffusive(simtime); @@ -718,8 +637,8 @@ void ArkodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* uda if (!hasPreconditioner()) { // Identity (but should never happen) - const int N = NV_LOCLENGTH_P(uvec); - std::copy(rvec, rvec + N, zvec); + const auto length = N_VGetLocalLength_Parallel(uvec); + std::copy(rvec, rvec + length, zvec); return; } @@ -766,10 +685,12 @@ void ArkodeSolver::jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* J * ARKODE explicit RHS functions **************************************************************************/ -static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +// NOLINTBEGIN(readability-identifier-length) +namespace { +int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -782,10 +703,10 @@ static int arkode_rhs_explicit(BoutReal t, N_Vector u, N_Vector du, void* user_d return 0; } -static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -798,10 +719,10 @@ static int arkode_rhs_implicit(BoutReal t, N_Vector u, N_Vector du, void* user_d return 0; } -static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -815,18 +736,17 @@ static int arkode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { } /// RHS function for BBD preconditioner -static int arkode_bbd_rhs(ARKODEINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, - void* user_data) { +int arkode_bbd_rhs(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, + void* user_data) { return arkode_rhs_implicit(t, u, du, user_data); } /// Preconditioner function -static int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int UNUSED(lr), - void* user_data) { - BoutReal* udata = NV_DATA_P(yy); - BoutReal* rdata = NV_DATA_P(rvec); - BoutReal* zdata = NV_DATA_P(zvec); +int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int UNUSED(lr), void* user_data) { + BoutReal* udata = N_VGetArrayPointer(yy); + BoutReal* rdata = N_VGetArrayPointer(rvec); + BoutReal* zdata = N_VGetArrayPointer(zvec); auto* s = static_cast(user_data); @@ -837,11 +757,11 @@ static int arkode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rve } /// Jacobian-vector multiplication function -static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, - N_Vector UNUSED(fy), void* user_data, N_Vector UNUSED(tmp)) { - BoutReal* ydata = NV_DATA_P(y); ///< System state - BoutReal* vdata = NV_DATA_P(v); ///< Input vector - BoutReal* Jvdata = NV_DATA_P(Jv); ///< Jacobian*vector output +int arkode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(fy), + void* user_data, N_Vector UNUSED(tmp)) { + BoutReal* ydata = N_VGetArrayPointer(y); ///< System state + BoutReal* vdata = N_VGetArrayPointer(v); ///< Input vector + BoutReal* Jvdata = N_VGetArrayPointer(Jv); ///< Jacobian*vector output auto* s = static_cast(user_data); @@ -849,6 +769,8 @@ static int arkode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, return 0; } +} // namespace +// NOLINTEND(readability-identifier-length) /************************************************************************** * vector abstol functions diff --git a/src/solver/impls/arkode/arkode.hxx b/src/solver/impls/arkode/arkode.hxx index afdce0b701..08bb3ea729 100644 --- a/src/solver/impls/arkode/arkode.hxx +++ b/src/solver/impls/arkode/arkode.hxx @@ -26,8 +26,8 @@ * **************************************************************************/ -#ifndef __ARKODE_SOLVER_H__ -#define __ARKODE_SOLVER_H__ +#ifndef BOUT_ARKODE_SOLVER_H +#define BOUT_ARKODE_SOLVER_H #include "bout/build_config.hxx" #include "bout/solver.hxx" @@ -47,6 +47,10 @@ RegisterUnavailableSolver #include #include +#if SUNDIALS_CONTROLLER_SUPPORT +#include +#endif + #include class ArkodeSolver; @@ -102,6 +106,10 @@ private: bool fixed_step; /// Order of internal step int order; + /// Name of the implicit Butcher table + std::string implicit_table; + /// Name of the explicit Butcher table + std::string explicit_table; /// Fraction of the estimated explicitly stable step to use BoutReal cfl_frac; /// Set timestep adaptivity function: @@ -153,11 +161,15 @@ private: /// SPGMR solver structure SUNLinearSolver sun_solver{nullptr}; - /// Solver for functional iterations for Adams-Moulton + /// Solver for implicit stages SUNNonlinearSolver nonlinear_solver{nullptr}; +#if SUNDIALS_CONTROLLER_SUPPORT + /// Timestep controller + SUNAdaptController controller{nullptr}; +#endif /// Context for SUNDIALS memory allocations sundials::Context suncontext; }; #endif // BOUT_HAS_ARKODE -#endif // __ARKODE_SOLVER_H__ +#endif // BOUT_ARKODE_SOLVER_H diff --git a/src/solver/impls/cvode/cvode.cxx b/src/solver/impls/cvode/cvode.cxx index c17bed420c..22f7f154f7 100644 --- a/src/solver/impls/cvode/cvode.cxx +++ b/src/solver/impls/cvode/cvode.cxx @@ -44,16 +44,9 @@ #include "fmt/core.h" #include - -#if SUNDIALS_VERSION_MAJOR >= 3 -#include -#include -#else -#include -#endif - #include #include +#include #include #include @@ -61,68 +54,22 @@ class Field2D; -#define ZERO RCONST(0.) -#define ONE RCONST(1.0) - -#ifndef CVODEINT -#if SUNDIALS_VERSION_MAJOR < 3 -using CVODEINT = bout::utils::function_traits::arg_t<0>; -#else -using CVODEINT = sunindextype; -#endif -#endif - BOUT_ENUM_CLASS(positivity_constraint, none, positive, non_negative, negative, non_positive); -static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); -static int cvode_bbd_rhs(CVODEINT Nlocal, BoutReal t, N_Vector u, N_Vector du, - void* user_data); +// NOLINTBEGIN(readability-identifier-length) +namespace { +int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data); +int cvode_bbd_rhs(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, + void* user_data); -static int cvode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, - BoutReal gamma, BoutReal delta, int lr, void* user_data); +int cvode_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int lr, void* user_data); -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline static int cvode_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int lr, - void* user_data, N_Vector UNUSED(tmp)) { - return cvode_pre(t, yy, yp, rvec, zvec, gamma, delta, lr, user_data); -} -#else -// Alias for newer versions -constexpr auto& cvode_pre_shim = cvode_pre; -#endif - -static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector fy, - void* user_data, N_Vector tmp); - -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline int CVSpilsSetJacTimes(void* arkode_mem, std::nullptr_t, - CVSpilsJacTimesVecFn jtimes) { - return CVSpilsSetJacTimesVecFn(arkode_mem, jtimes); -} -#endif - -#if SUNDIALS_VERSION_MAJOR >= 4 -// Shim for newer versions -constexpr auto CV_FUNCTIONAL = 0; -constexpr auto CV_NEWTON = 0; -#endif - -#if SUNDIALS_VERSION_MAJOR >= 3 -void* CVodeCreate(int lmm, [[maybe_unused]] int iter, - [[maybe_unused]] SUNContext context) { -#if SUNDIALS_VERSION_MAJOR == 3 - return CVodeCreate(lmm, iter); -#elif SUNDIALS_VERSION_MAJOR == 4 || SUNDIALS_VERSION_MAJOR == 5 - return CVodeCreate(lmm); -#else - return CVodeCreate(lmm, context); -#endif -} -#endif +int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector fy, + void* user_data, N_Vector tmp); +} // namespace +// NOLINTEND(readability-identifier-length) CvodeSolver::CvodeSolver(Options* opts) : Solver(opts), diagnose((*options)["diagnose"] @@ -136,7 +83,7 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Use functional iteration instead of Newton") .withDefault(adams_moulton)), max_order((*options)["cvode_max_order"] - .doc("Maximum order of method to use. < 0 means no limit.") + .doc("Maximum order of method to use. <= 0 means default limit.") .withDefault(-1)), stablimdet((*options)["cvode_stability_limit_detection"].withDefault(false)), abstol((*options)["atol"].doc("Absolute tolerance").withDefault(1.0e-12)), @@ -148,19 +95,18 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Maximum number of internal steps between outputs.") .withDefault(500)), max_timestep( - (*options)["max_timestep"].doc("Maximum time step size").withDefault(-1.0)), + (*options)["max_timestep"].doc("Maximum time step size").withDefault(0.0)), min_timestep( - (*options)["min_timestep"].doc("Minimum time step size").withDefault(-1.0)), + (*options)["min_timestep"].doc("Minimum time step size").withDefault(0.0)), start_timestep((*options)["start_timestep"] - .doc("Starting time step. < 0 then chosen by CVODE.") - .withDefault(-1.0)), + .doc("Starting time step. = 0 then chosen by CVODE.") + .withDefault(0.0)), mxorder((*options)["mxorder"].doc("Maximum order").withDefault(-1)), max_nonlinear_iterations( (*options)["max_nonlinear_iterations"] .doc("Maximum number of nonlinear iterations allowed by CVODE before " - "reducing " - "timestep. CVODE default (used if this option is negative) is 3.") - .withDefault(-1)), + "reducing timestep.") + .withDefault(3)), apply_positivity_constraints( (*options)["apply_positivity_constraints"] .doc("Use CVODE function CVodeSetConstraints to constrain variables - the " @@ -184,7 +130,7 @@ CvodeSolver::CvodeSolver(Options* opts) .doc("Factor by which the Krylov linear solver’s convergence test constant " "is reduced from the nonlinear solver test constant.") .withDefault(0.05)), - suncontext(static_cast(&BoutComm::get())) { + suncontext(createSUNContext(BoutComm::get())) { has_constraints = false; // This solver doesn't have constraints canReset = true; @@ -210,7 +156,7 @@ CvodeSolver::CvodeSolver(Options* opts) CvodeSolver::~CvodeSolver() { if (cvode_initialised) { - N_VDestroy_Parallel(uvec); + N_VDestroy(uvec); CVodeFree(&cvode_mem); SUNLinSolFree(sun_solver); SUNNonlinSolFree(nonlinear_solver); @@ -242,12 +188,13 @@ int CvodeSolver::init() { n3Dvars(), n2Dvars(), neq, local_N); // Allocate memory - if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq); + if (uvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } // Put the variables into uvec - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); if (adams_moulton) { // By default use functional iteration for Adams-Moulton @@ -258,31 +205,29 @@ int CvodeSolver::init() { } const auto lmm = adams_moulton ? CV_ADAMS : CV_BDF; - const auto iter = func_iter ? CV_FUNCTIONAL : CV_NEWTON; - if ((cvode_mem = CVodeCreate(lmm, iter, suncontext)) == nullptr) { + cvode_mem = callWithSUNContext(CVodeCreate, suncontext, lmm); + if (cvode_mem == nullptr) { throw BoutException("CVodeCreate failed\n"); } // For callbacks, need pointer to solver object - if (CVodeSetUserData(cvode_mem, this) < 0) { + if (CVodeSetUserData(cvode_mem, this) != CV_SUCCESS) { throw BoutException("CVodeSetUserData failed\n"); } - if (CVodeInit(cvode_mem, cvode_rhs, simtime, uvec) < 0) { + if (CVodeInit(cvode_mem, cvode_rhs, simtime, uvec) != CV_SUCCESS) { throw BoutException("CVodeInit failed\n"); } if (max_order > 0) { - if (CVodeSetMaxOrd(cvode_mem, max_order) < 0) { + if (CVodeSetMaxOrd(cvode_mem, max_order) != CV_SUCCESS) { throw BoutException("CVodeSetMaxOrder failed\n"); } } - if (stablimdet) { - if (CVodeSetStabLimDet(cvode_mem, stablimdet) < 0) { - throw BoutException("CVodeSetStabLimDet failed\n"); - } + if (CVodeSetStabLimDet(cvode_mem, static_cast(stablimdet)) != CV_SUCCESS) { + throw BoutException("CVodeSetStabLimDet failed\n"); } if (use_vector_abstol) { @@ -307,94 +252,97 @@ int CvodeSolver::init() { return Options::root()[f3.name]["atol"].withDefault(abstol); }); - N_Vector abstolvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext); + N_Vector abstolvec = N_VClone(uvec); if (abstolvec == nullptr) { throw BoutException("SUNDIALS memory allocation (abstol vector) failed\n"); } - set_vector_option_values(NV_DATA_P(abstolvec), f2dtols, f3dtols); + set_vector_option_values(N_VGetArrayPointer(abstolvec), f2dtols, f3dtols); - if (CVodeSVtolerances(cvode_mem, reltol, abstolvec) < 0) { + if (CVodeSVtolerances(cvode_mem, reltol, abstolvec) != CV_SUCCESS) { throw BoutException("CVodeSVtolerances failed\n"); } - N_VDestroy_Parallel(abstolvec); + N_VDestroy(abstolvec); } else { - if (CVodeSStolerances(cvode_mem, reltol, abstol) < 0) { + if (CVodeSStolerances(cvode_mem, reltol, abstol) != CV_SUCCESS) { throw BoutException("CVodeSStolerances failed\n"); } } - CVodeSetMaxNumSteps(cvode_mem, mxsteps); - - if (max_timestep > 0.0) { - CVodeSetMaxStep(cvode_mem, max_timestep); + if (CVodeSetMaxNumSteps(cvode_mem, mxsteps) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxNumSteps failed\n"); } - if (min_timestep > 0.0) { - CVodeSetMinStep(cvode_mem, min_timestep); + if (CVodeSetMaxStep(cvode_mem, max_timestep) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxStep failed\n"); } - if (start_timestep > 0.0) { - CVodeSetInitStep(cvode_mem, start_timestep); + if (CVodeSetMinStep(cvode_mem, min_timestep) != CV_SUCCESS) { + throw BoutException("CVodeSetMinStep failed\n"); } - if (mxorder > 0) { - CVodeSetMaxOrd(cvode_mem, mxorder); + if (CVodeSetInitStep(cvode_mem, start_timestep) != CV_SUCCESS) { + throw BoutException("CVodeSetInitStep failed\n"); } - if (max_nonlinear_iterations > 0) { - CVodeSetMaxNonlinIters(cvode_mem, max_nonlinear_iterations); + if (CVodeSetMaxNonlinIters(cvode_mem, max_nonlinear_iterations) != CV_SUCCESS) { + throw BoutException("CVodeSetMaxNonlinIters failed\n"); } -#if not(SUNDIALS_VERSION_MAJOR >= 3 and SUNDIALS_VERSION_MINOR >= 2) - if (apply_positivity_constraints) { - throw BoutException("The apply_positivity_constraints option is only available with " - "SUNDIALS>=3.2.0"); - } -#else if (apply_positivity_constraints) { auto f2d_constraints = create_constraints(f2d); auto f3d_constraints = create_constraints(f3d); - N_Vector constraints_vec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext); + N_Vector constraints_vec = N_VClone(uvec); if (constraints_vec == nullptr) { throw BoutException("SUNDIALS memory allocation (positivity constraints vector) " "failed\n"); } - set_vector_option_values(NV_DATA_P(constraints_vec), f2d_constraints, + set_vector_option_values(N_VGetArrayPointer(constraints_vec), f2d_constraints, f3d_constraints); - if (CVodeSetConstraints(cvode_mem, constraints_vec) < 0) { + if (CVodeSetConstraints(cvode_mem, constraints_vec) != CV_SUCCESS) { throw BoutException("CVodeSetConstraints failed\n"); } - N_VDestroy_Parallel(constraints_vec); + N_VDestroy(constraints_vec); } -#endif /// Newton method can include Preconditioners and Jacobian function - if (!func_iter) { + if (func_iter) { + output_info.write("\tUsing Functional iteration\n"); + nonlinear_solver = callWithSUNContext(SUNNonlinSol_FixedPoint, suncontext, uvec, 0); + if (nonlinear_solver == nullptr) { + throw BoutException("SUNNonlinSol_FixedPoint failed\n"); + } + + if (CVodeSetNonlinearSolver(cvode_mem, nonlinear_solver) != 0) { + throw BoutException("CVodeSetNonlinearSolver failed\n"); + } + } else { output_info.write("\tUsing Newton iteration\n"); TRACE("Setting preconditioner"); - if (use_precon) { - const int prectype = rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT; -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, prectype, maxl, suncontext)) == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (CVSpilsSetLinearSolver(cvode_mem, sun_solver) != CV_SUCCESS) { - throw BoutException("CVSpilsSetLinearSolver failed\n"); - } -#else - if (CVSpgmr(cvode_mem, prectype, maxl) != CVSPILS_SUCCESS) { - throw BoutException("CVSpgmr failed\n"); - } -#endif + const auto prectype = + use_precon ? (rightprec ? SUN_PREC_RIGHT : SUN_PREC_LEFT) : SUN_PREC_NONE; + sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, prectype, maxl); + if (sun_solver == nullptr) { + throw BoutException("Creating SUNDIALS linear solver failed\n"); + } + if (CVodeSetLinearSolver(cvode_mem, sun_solver, nullptr) != CVLS_SUCCESS) { + throw BoutException("CVodeSetLinearSolver failed\n"); + } - if (!hasPreconditioner()) { + if (use_precon) { + if (hasPreconditioner()) { + output_info.write("\tUsing user-supplied preconditioner\n"); + + if (CVodeSetPreconditioner(cvode_mem, nullptr, cvode_pre) != CVLS_SUCCESS) { + throw BoutException("CVodeSetPreconditioner failed\n"); + } + } else { output_info.write("\tUsing BBD preconditioner\n"); /// Get options @@ -415,62 +363,36 @@ int CvodeSolver::init() { const auto mukeep = (*options)["mukeep"].withDefault(n3Dvars() + n2Dvars()); const auto mlkeep = (*options)["mlkeep"].withDefault(n3Dvars() + n2Dvars()); - if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, - cvode_bbd_rhs, nullptr)) { + if (CVBBDPrecInit(cvode_mem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, + cvode_bbd_rhs, nullptr) + != CVLS_SUCCESS) { throw BoutException("CVBBDPrecInit failed\n"); } - - } else { - output_info.write("\tUsing user-supplied preconditioner\n"); - - if (CVSpilsSetPreconditioner(cvode_mem, nullptr, cvode_pre_shim)) { - throw BoutException("CVSpilsSetPreconditioner failed\n"); - } } } else { output_info.write("\tNo preconditioning\n"); - -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) - == nullptr) { - throw BoutException("Creating SUNDIALS linear solver failed\n"); - } - if (CVSpilsSetLinearSolver(cvode_mem, sun_solver) != CV_SUCCESS) { - throw BoutException("CVSpilsSetLinearSolver failed\n"); - } -#else - if (CVSpgmr(cvode_mem, SUN_PREC_NONE, maxl) != CVSPILS_SUCCESS) { - throw BoutException("CVSpgmr failed\n"); - } -#endif } /// Set Jacobian-vector multiplication function if (use_jacobian and hasJacobian()) { output_info.write("\tUsing user-supplied Jacobian function\n"); - if (CVSpilsSetJacTimes(cvode_mem, nullptr, cvode_jac) != CV_SUCCESS) { - throw BoutException("CVSpilsSetJacTimesVecFn failed\n"); + if (CVodeSetJacTimes(cvode_mem, nullptr, cvode_jac) != CVLS_SUCCESS) { + throw BoutException("CVodeSetJacTimes failed\n"); } } else { output_info.write("\tUsing difference quotient approximation for Jacobian\n"); } - } else { - output_info.write("\tUsing Functional iteration\n"); -#if SUNDIALS_VERSION_MAJOR >= 4 - if ((nonlinear_solver = SUNNonlinSol_FixedPoint(uvec, 0, suncontext)) == nullptr) { - throw BoutException("SUNNonlinSol_FixedPoint failed\n"); - } - - if (CVodeSetNonlinearSolver(cvode_mem, nonlinear_solver)) { - throw BoutException("CVodeSetNonlinearSolver failed\n"); - } -#endif } // Set internal tolerance factors - CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef); - CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef); + if (CVodeSetNonlinConvCoef(cvode_mem, cvode_nonlinear_convergence_coef) != CV_SUCCESS) { + throw BoutException("CVodeSetNonlinConvCoef failed\n"); + } + + if (CVodeSetEpsLin(cvode_mem, cvode_linear_convergence_coef) != CV_SUCCESS) { + throw BoutException("CVodeSetEpsLin failed\n"); + } cvode_initialised = true; @@ -544,9 +466,9 @@ int CvodeSolver::run() { nfevals = int(temp_long_int); CVodeGetNumNonlinSolvIters(cvode_mem, &temp_long_int); nniters = int(temp_long_int); - CVSpilsGetNumPrecSolves(cvode_mem, &temp_long_int); + CVodeGetNumPrecSolves(cvode_mem, &temp_long_int); npevals = int(temp_long_int); - CVSpilsGetNumLinIters(cvode_mem, &temp_long_int); + CVodeGetNumLinIters(cvode_mem, &temp_long_int); nliters = int(temp_long_int); // Last step size @@ -634,7 +556,7 @@ BoutReal CvodeSolver::run(BoutReal tout) { } // Copy variables - load_vars(NV_DATA_P(uvec)); + load_vars(N_VGetArrayPointer(uvec)); // Call rhs function to get extra variables at this time run_rhs(simtime); @@ -678,11 +600,11 @@ void CvodeSolver::pre(BoutReal t, BoutReal gamma, BoutReal delta, BoutReal* udat BoutReal tstart = bout::globals::mpi->MPI_Wtime(); - int N = NV_LOCLENGTH_P(uvec); + const auto length = N_VGetLocalLength_Parallel(uvec); if (!hasPreconditioner()) { // Identity (but should never happen) - for (int i = 0; i < N; i++) { + for (int i = 0; i < length; i++) { zvec[i] = rvec[i]; } return; @@ -731,10 +653,12 @@ void CvodeSolver::jac(BoutReal t, BoutReal* ydata, BoutReal* vdata, BoutReal* Jv * CVODE RHS functions **************************************************************************/ -static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { +// NOLINTBEGIN(readability-identifier-length) +namespace { +int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); auto* s = static_cast(user_data); @@ -748,18 +672,17 @@ static int cvode_rhs(BoutReal t, N_Vector u, N_Vector du, void* user_data) { } /// RHS function for BBD preconditioner -static int cvode_bbd_rhs(CVODEINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, - void* user_data) { +int cvode_bbd_rhs(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, + void* user_data) { return cvode_rhs(t, u, du, user_data); } /// Preconditioner function -static int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, - N_Vector zvec, BoutReal gamma, BoutReal delta, int UNUSED(lr), - void* user_data) { - BoutReal* udata = NV_DATA_P(yy); - BoutReal* rdata = NV_DATA_P(rvec); - BoutReal* zdata = NV_DATA_P(zvec); +int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec, N_Vector zvec, + BoutReal gamma, BoutReal delta, int UNUSED(lr), void* user_data) { + BoutReal* udata = N_VGetArrayPointer(yy); + BoutReal* rdata = N_VGetArrayPointer(rvec); + BoutReal* zdata = N_VGetArrayPointer(zvec); auto* s = static_cast(user_data); @@ -770,11 +693,11 @@ static int cvode_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector rvec } /// Jacobian-vector multiplication function -static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector UNUSED(fy), - void* user_data, N_Vector UNUSED(tmp)) { - BoutReal* ydata = NV_DATA_P(y); ///< System state - BoutReal* vdata = NV_DATA_P(v); ///< Input vector - BoutReal* Jvdata = NV_DATA_P(Jv); ///< Jacobian*vector output +int cvode_jac(N_Vector v, N_Vector Jv, BoutReal t, N_Vector y, N_Vector UNUSED(fy), + void* user_data, N_Vector UNUSED(tmp)) { + BoutReal* ydata = N_VGetArrayPointer(y); ///< System state + BoutReal* vdata = N_VGetArrayPointer(v); ///< Input vector + BoutReal* Jvdata = N_VGetArrayPointer(Jv); ///< Jacobian*vector output auto* s = static_cast(user_data); @@ -782,6 +705,8 @@ static int cvode_jac(N_Vector v, N_Vector Jv, realtype t, N_Vector y, N_Vector U return 0; } +} // namespace +// NOLINTEND(readability-identifier-length) /************************************************************************** * CVODE vector option functions @@ -829,9 +754,9 @@ void CvodeSolver::loop_vector_option_values_op(Ind2D UNUSED(i2d), BoutReal* opti void CvodeSolver::resetInternalFields() { TRACE("CvodeSolver::resetInternalFields"); - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); - if (CVodeReInit(cvode_mem, simtime, uvec) < 0) { + if (CVodeReInit(cvode_mem, simtime, uvec) != CV_SUCCESS) { throw BoutException("CVodeReInit failed\n"); } } diff --git a/src/solver/impls/cvode/cvode.hxx b/src/solver/impls/cvode/cvode.hxx index fa8b972bca..89c3a613a8 100644 --- a/src/solver/impls/cvode/cvode.hxx +++ b/src/solver/impls/cvode/cvode.hxx @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef __SUNDIAL_SOLVER_H__ -#define __SUNDIAL_SOLVER_H__ +#ifndef BOUT_SUNDIAL_SOLVER_H +#define BOUT_SUNDIAL_SOLVER_H #include "bout/build_config.hxx" #include "bout/solver.hxx" @@ -157,4 +157,4 @@ private: }; #endif // BOUT_HAS_CVODE -#endif // __SUNDIAL_SOLVER_H__ +#endif // BOUT_SUNDIAL_SOLVER_H diff --git a/src/solver/impls/euler/euler.cxx b/src/solver/impls/euler/euler.cxx index 7bffcdeb15..3976f4402c 100644 --- a/src/solver/impls/euler/euler.cxx +++ b/src/solver/impls/euler/euler.cxx @@ -144,7 +144,7 @@ void EulerSolver::take_step(BoutReal curtime, BoutReal dt, Array& star run_rhs(curtime); save_derivs(std::begin(result)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result[i] = start[i] + dt * result[i]; } diff --git a/src/solver/impls/euler/euler.hxx b/src/solver/impls/euler/euler.hxx index bfa0be9bb3..0ee81a3d33 100644 --- a/src/solver/impls/euler/euler.hxx +++ b/src/solver/impls/euler/euler.hxx @@ -27,8 +27,8 @@ class EulerSolver; -#ifndef __EULER_SOLVER_H__ -#define __EULER_SOLVER_H__ +#ifndef BOUT_EULER_SOLVER_H +#define BOUT_EULER_SOLVER_H #include "mpi.h" @@ -66,4 +66,4 @@ private: Array& result); }; -#endif // __KARNIADAKIS_SOLVER_H__ +#endif // BOUT_KARNIADAKIS_SOLVER_H diff --git a/src/solver/impls/ida/ida.cxx b/src/solver/impls/ida/ida.cxx index 189a103bbe..cfc978f755 100644 --- a/src/solver/impls/ida/ida.cxx +++ b/src/solver/impls/ida/ida.cxx @@ -40,53 +40,23 @@ #include "bout/unused.hxx" #include - -#if SUNDIALS_VERSION_MAJOR >= 3 -#include -#include -#else -#include -#endif - #include #include #include +#include #include -#define ZERO RCONST(0.) -#define ONE RCONST(1.0) - -#ifndef IDAINT -#if SUNDIALS_VERSION_MAJOR < 3 -using IDAINT = bout::utils::function_traits::arg_t<0>; -#else -using IDAINT = sunindextype; -#endif -#endif - -static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data); -static int ida_bbd_res(IDAINT Nlocal, BoutReal t, N_Vector u, N_Vector du, N_Vector rr, - void* user_data); +// NOLINTBEGIN(readability-identifier-length) +namespace { +int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data); +int ida_bbd_res(sunindextype Nlocal, BoutReal t, N_Vector u, N_Vector du, N_Vector rr, + void* user_data); -static int ida_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, N_Vector rvec, - N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data); - -#if SUNDIALS_VERSION_MAJOR < 3 -// Shim for earlier versions -inline static int ida_pre_shim(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, - N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, - void* user_data, N_Vector UNUSED(tmp)) { - return ida_pre(t, yy, yp, rr, rvec, zvec, cj, delta, user_data); -} -#else -// Alias for newer versions -constexpr auto& ida_pre_shim = ida_pre; -#endif - -#if SUNDIALS_VERSION_MAJOR < 6 -void* IDACreate([[maybe_unused]] SUNContext) { return IDACreate(); } -#endif +int ida_pre(BoutReal t, N_Vector yy, N_Vector yp, N_Vector rr, N_Vector rvec, + N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data); +} // namespace +// NOLINTEND(readability-identifier-length) IdaSolver::IdaSolver(Options* opts) : Solver(opts), @@ -101,15 +71,15 @@ IdaSolver::IdaSolver(Options* opts) correct_start((*options)["correct_start"] .doc("Correct the initial values") .withDefault(true)), - suncontext(static_cast(&BoutComm::get())) { + suncontext(createSUNContext(BoutComm::get())) { has_constraints = true; // This solver has constraints } IdaSolver::~IdaSolver() { if (initialised) { - N_VDestroy_Parallel(uvec); - N_VDestroy_Parallel(duvec); - N_VDestroy_Parallel(id); + N_VDestroy(uvec); + N_VDestroy(duvec); + N_VDestroy(id); IDAFree(&idamem); SUNLinSolFree(sun_solver); } @@ -144,69 +114,75 @@ int IdaSolver::init() { neq, local_N); // Allocate memory - if ((uvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + uvec = callWithSUNContext(N_VNew_Parallel, suncontext, BoutComm::get(), local_N, neq); + if (uvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } - if ((duvec = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + duvec = N_VClone(uvec); + if (duvec == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } - if ((id = N_VNew_Parallel(BoutComm::get(), local_N, neq, suncontext)) == nullptr) { + id = N_VClone(uvec); + if (id == nullptr) { throw BoutException("SUNDIALS memory allocation failed\n"); } // Put the variables into uvec - save_vars(NV_DATA_P(uvec)); + save_vars(N_VGetArrayPointer(uvec)); // Get the starting time derivative run_rhs(simtime); // Put the time-derivatives into duvec - save_derivs(NV_DATA_P(duvec)); + save_derivs(N_VGetArrayPointer(duvec)); // Set the equation type in id(Differential or Algebraic. This is optional) - set_id(NV_DATA_P(id)); + set_id(N_VGetArrayPointer(id)); // Call IDACreate to initialise - if ((idamem = IDACreate(suncontext)) == nullptr) { + idamem = callWithSUNContext(IDACreate, suncontext); + if (idamem == nullptr) { throw BoutException("IDACreate failed\n"); } // For callbacks, need pointer to solver object - if (IDASetUserData(idamem, this) < 0) { + if (IDASetUserData(idamem, this) != IDA_SUCCESS) { throw BoutException("IDASetUserData failed\n"); } - if (IDASetId(idamem, id) < 0) { + if (IDASetId(idamem, id) != IDA_SUCCESS) { throw BoutException("IDASetID failed\n"); } - if (IDAInit(idamem, idares, simtime, uvec, duvec) < 0) { + if (IDAInit(idamem, idares, simtime, uvec, duvec) != IDA_SUCCESS) { throw BoutException("IDAInit failed\n"); } - if (IDASStolerances(idamem, reltol, abstol) < 0) { + if (IDASStolerances(idamem, reltol, abstol) != IDA_SUCCESS) { throw BoutException("IDASStolerances failed\n"); } - IDASetMaxNumSteps(idamem, mxsteps); + if (IDASetMaxNumSteps(idamem, mxsteps) != IDA_SUCCESS) { + throw BoutException("IDASetMaxNumSteps failed\n"); + } // Call IDASpgmr to specify the IDA linear solver IDASPGMR const auto maxl = (*options)["maxl"].withDefault(6 * n3d); -#if SUNDIALS_VERSION_MAJOR >= 3 - if ((sun_solver = SUNLinSol_SPGMR(uvec, SUN_PREC_NONE, maxl, suncontext)) == nullptr) { + sun_solver = callWithSUNContext(SUNLinSol_SPGMR, suncontext, uvec, SUN_PREC_NONE, maxl); + if (sun_solver == nullptr) { throw BoutException("Creating SUNDIALS linear solver failed\n"); } - if (IDASpilsSetLinearSolver(idamem, sun_solver) != IDA_SUCCESS) { - throw BoutException("IDASpilsSetLinearSolver failed\n"); + if (IDASetLinearSolver(idamem, sun_solver, nullptr) != IDALS_SUCCESS) { + throw BoutException("IDASetLinearSolver failed\n"); } -#else - if (IDASpgmr(idamem, maxl)) { - throw BoutException("IDASpgmr failed\n"); - } -#endif if (use_precon) { - if (!hasPreconditioner()) { + if (hasPreconditioner()) { + output.write("\tUsing user-supplied preconditioner\n"); + if (IDASetPreconditioner(idamem, nullptr, ida_pre) != IDALS_SUCCESS) { + throw BoutException("IDASetPreconditioner failed\n"); + } + } else { output.write("\tUsing BBD preconditioner\n"); /// Get options // Compute band_width_default from actually added fields, to allow for multiple Mesh @@ -225,21 +201,17 @@ int IdaSolver::init() { const auto mldq = (*options)["mldq"].withDefault(band_width_default); const auto mukeep = (*options)["mukeep"].withDefault(n3d); const auto mlkeep = (*options)["mlkeep"].withDefault(n3d); - if (IDABBDPrecInit(idamem, local_N, mudq, mldq, mukeep, mlkeep, ZERO, ida_bbd_res, - nullptr)) { + if (IDABBDPrecInit(idamem, local_N, mudq, mldq, mukeep, mlkeep, 0.0, ida_bbd_res, + nullptr) + != IDALS_SUCCESS) { throw BoutException("IDABBDPrecInit failed\n"); } - } else { - output.write("\tUsing user-supplied preconditioner\n"); - if (IDASpilsSetPreconditioner(idamem, nullptr, ida_pre_shim)) { - throw BoutException("IDASpilsSetPreconditioner failed\n"); - } } } // Call IDACalcIC (with default options) to correct the initial values if (correct_start) { - if (IDACalcIC(idamem, IDA_YA_YDP_INIT, 1e-6)) { + if (IDACalcIC(idamem, IDA_YA_YDP_INIT, 1e-6) != IDA_SUCCESS) { throw BoutException("IDACalcIC failed\n"); } } @@ -291,7 +263,7 @@ BoutReal IdaSolver::run(BoutReal tout) { const int flag = IDASolve(idamem, tout, &simtime, uvec, duvec, IDA_NORMAL); // Copy variables - load_vars(NV_DATA_P(uvec)); + load_vars(N_VGetArrayPointer(uvec)); // Call rhs function to get extra variables at this time run_rhs(simtime); @@ -322,9 +294,9 @@ void IdaSolver::res(BoutReal t, BoutReal* udata, BoutReal* dudata, BoutReal* rda save_derivs(rdata); // If a differential equation, subtract dudata - const int N = NV_LOCLENGTH_P(id); - const BoutReal* idd = NV_DATA_P(id); - for (int i = 0; i < N; i++) { + const auto length = N_VGetLocalLength_Parallel(id); + const BoutReal* idd = N_VGetArrayPointer(id); + for (int i = 0; i < length; i++) { if (idd[i] > 0.5) { // 1 -> differential, 0 -> algebraic rdata[i] -= dudata[i]; } @@ -343,8 +315,8 @@ void IdaSolver::pre(BoutReal t, BoutReal cj, BoutReal delta, BoutReal* udata, if (!hasPreconditioner()) { // Identity (but should never happen) - const int N = NV_LOCLENGTH_P(id); - std::copy(rvec, rvec + N, zvec); + const auto length = N_VGetLocalLength_Parallel(id); + std::copy(rvec, rvec + length, zvec); return; } @@ -367,10 +339,12 @@ void IdaSolver::pre(BoutReal t, BoutReal cj, BoutReal delta, BoutReal* udata, * IDA res function **************************************************************************/ -static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data) { - BoutReal* udata = NV_DATA_P(u); - BoutReal* dudata = NV_DATA_P(du); - BoutReal* rdata = NV_DATA_P(rr); +// NOLINTBEGIN(readability-identifier-length) +namespace { +int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_data) { + BoutReal* udata = N_VGetArrayPointer(u); + BoutReal* dudata = N_VGetArrayPointer(du); + BoutReal* rdata = N_VGetArrayPointer(rr); auto* s = static_cast(user_data); @@ -381,18 +355,17 @@ static int idares(BoutReal t, N_Vector u, N_Vector du, N_Vector rr, void* user_d } /// Residual function for BBD preconditioner -static int ida_bbd_res(IDAINT UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, - N_Vector rr, void* user_data) { +int ida_bbd_res(sunindextype UNUSED(Nlocal), BoutReal t, N_Vector u, N_Vector du, + N_Vector rr, void* user_data) { return idares(t, u, du, rr, user_data); } // Preconditioner function -static int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED(rr), - N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, - void* user_data) { - BoutReal* udata = NV_DATA_P(yy); - BoutReal* rdata = NV_DATA_P(rvec); - BoutReal* zdata = NV_DATA_P(zvec); +int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED(rr), + N_Vector rvec, N_Vector zvec, BoutReal cj, BoutReal delta, void* user_data) { + BoutReal* udata = N_VGetArrayPointer(yy); + BoutReal* rdata = N_VGetArrayPointer(rvec); + BoutReal* zdata = N_VGetArrayPointer(zvec); auto* s = static_cast(user_data); @@ -401,5 +374,7 @@ static int ida_pre(BoutReal t, N_Vector yy, N_Vector UNUSED(yp), N_Vector UNUSED return 0; } +} // namespace +// NOLINTEND(readability-identifier-length) #endif diff --git a/src/solver/impls/ida/ida.hxx b/src/solver/impls/ida/ida.hxx index 83ee4d83e6..b00054d157 100644 --- a/src/solver/impls/ida/ida.hxx +++ b/src/solver/impls/ida/ida.hxx @@ -27,8 +27,8 @@ * **************************************************************************/ -#ifndef __IDA_SOLVER_H__ -#define __IDA_SOLVER_H__ +#ifndef BOUT_IDA_SOLVER_H +#define BOUT_IDA_SOLVER_H #include "bout/build_config.hxx" #include "bout/solver.hxx" @@ -97,4 +97,4 @@ private: }; #endif // BOUT_HAS_IDA -#endif // __IDA_SOLVER_H__ +#endif // BOUT_IDA_SOLVER_H diff --git a/src/solver/impls/imex-bdf2/imex-bdf2.hxx b/src/solver/impls/imex-bdf2/imex-bdf2.hxx index 4126c48265..f0e1b2faee 100644 --- a/src/solver/impls/imex-bdf2/imex-bdf2.hxx +++ b/src/solver/impls/imex-bdf2/imex-bdf2.hxx @@ -32,8 +32,8 @@ * **************************************************************************/ -#ifndef __IMEXBDF2_SOLVER_H__ -#define __IMEXBDF2_SOLVER_H__ +#ifndef BOUT_IMEXBDF2_SOLVER_H +#define BOUT_IMEXBDF2_SOLVER_H #include "bout/build_config.hxx" #include "bout/solver.hxx" @@ -221,6 +221,6 @@ private: void saveDerivs(BoutReal* u); }; -#endif // __IMEXBDF2_SOLVER_H__ +#endif // BOUT_IMEXBDF2_SOLVER_H #endif // BOUT_HAS_PETSC diff --git a/src/solver/impls/petsc/petsc.hxx b/src/solver/impls/petsc/petsc.hxx index 349f40bad8..7239126abb 100644 --- a/src/solver/impls/petsc/petsc.hxx +++ b/src/solver/impls/petsc/petsc.hxx @@ -24,8 +24,8 @@ * **************************************************************************/ -#ifndef __PETSC_SOLVER_H__ -#define __PETSC_SOLVER_H__ +#ifndef BOUT_PETSC_SOLVER_H +#define BOUT_PETSC_SOLVER_H #include "bout/build_config.hxx" #include "bout/solver.hxx" @@ -149,4 +149,4 @@ private: #endif // BOUT_HAS_PETSC -#endif // __PETSC_SOLVER_H__ +#endif // BOUT_PETSC_SOLVER_H diff --git a/src/solver/impls/power/power.hxx b/src/solver/impls/power/power.hxx index 757befeec5..6f56c20f43 100644 --- a/src/solver/impls/power/power.hxx +++ b/src/solver/impls/power/power.hxx @@ -26,8 +26,8 @@ class PowerSolver; -#ifndef __POWER_SOLVER_H__ -#define __POWER_SOLVER_H__ +#ifndef BOUT_POWER_SOLVER_H +#define BOUT_POWER_SOLVER_H #include #include @@ -60,4 +60,4 @@ private: void divide(Array& in, BoutReal value); }; -#endif // __KARNIADAKIS_SOLVER_H__ +#endif // BOUT_KARNIADAKIS_SOLVER_H diff --git a/src/solver/impls/pvode/pvode.hxx b/src/solver/impls/pvode/pvode.hxx index 2ff02c22bf..d29135d02e 100644 --- a/src/solver/impls/pvode/pvode.hxx +++ b/src/solver/impls/pvode/pvode.hxx @@ -30,8 +30,8 @@ class PvodeSolver; -#ifndef __PVODE_SOLVER_H__ -#define __PVODE_SOLVER_H__ +#ifndef BOUT_PVODE_SOLVER_H +#define BOUT_PVODE_SOLVER_H #include #include @@ -81,6 +81,6 @@ private: bool pvode_initialised = false; }; -#endif // __PVODE_SOLVER_H__ +#endif // BOUT_PVODE_SOLVER_H #endif diff --git a/src/solver/impls/rk3-ssp/rk3-ssp.cxx b/src/solver/impls/rk3-ssp/rk3-ssp.cxx index 27979bc435..e13d996c00 100644 --- a/src/solver/impls/rk3-ssp/rk3-ssp.cxx +++ b/src/solver/impls/rk3-ssp/rk3-ssp.cxx @@ -108,7 +108,7 @@ void RK3SSP::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime); save_derivs(std::begin(L)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { u1[i] = start[i] + dt * L[i]; } @@ -117,7 +117,7 @@ void RK3SSP::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime + dt); save_derivs(std::begin(L)); - BOUT_OMP(parallel for ) + BOUT_OMP_PERF(parallel for ) for (int i = 0; i < nlocal; i++) { u2[i] = 0.75 * start[i] + 0.25 * u1[i] + 0.25 * dt * L[i]; } @@ -126,7 +126,7 @@ void RK3SSP::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime + 0.5 * dt); save_derivs(std::begin(L)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result[i] = (1. / 3) * start[i] + (2. / 3.) * (u2[i] + dt * L[i]); } diff --git a/src/solver/impls/rk3-ssp/rk3-ssp.hxx b/src/solver/impls/rk3-ssp/rk3-ssp.hxx index 4080b17bb5..3682d5cbde 100644 --- a/src/solver/impls/rk3-ssp/rk3-ssp.hxx +++ b/src/solver/impls/rk3-ssp/rk3-ssp.hxx @@ -33,8 +33,8 @@ class RK3SSP; -#ifndef __RK3SSP_SOLVER_H__ -#define __RK3SSP_SOLVER_H__ +#ifndef BOUT_RK3SSP_SOLVER_H +#define BOUT_RK3SSP_SOLVER_H #include "mpi.h" @@ -72,4 +72,4 @@ private: Array u1, u2, u3, L; //< Time-stepping arrays }; -#endif // __RK4_SOLVER_H__ +#endif // BOUT_RK3SSP_SOLVER_H diff --git a/src/solver/impls/rk4/rk4.cxx b/src/solver/impls/rk4/rk4.cxx index 47bef38f9c..0e7a942a45 100644 --- a/src/solver/impls/rk4/rk4.cxx +++ b/src/solver/impls/rk4/rk4.cxx @@ -105,7 +105,7 @@ int RK4Solver::run() { // Check accuracy BoutReal local_err = 0.; - BOUT_OMP(parallel for reduction(+: local_err) ) + BOUT_OMP_PERF(parallel for reduction(+: local_err) ) for (int i = 0; i < nlocal; i++) { local_err += fabs(f2[i] - f1[i]) / (fabs(f1[i]) + fabs(f2[i]) + atol); } @@ -182,7 +182,7 @@ void RK4Solver::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime); save_derivs(std::begin(k1)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { k5[i] = start[i] + 0.5 * dt * k1[i]; } @@ -191,7 +191,7 @@ void RK4Solver::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime + 0.5 * dt); save_derivs(std::begin(k2)); - BOUT_OMP(parallel for ) + BOUT_OMP_PERF(parallel for ) for (int i = 0; i < nlocal; i++) { k5[i] = start[i] + 0.5 * dt * k2[i]; } @@ -200,7 +200,7 @@ void RK4Solver::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime + 0.5 * dt); save_derivs(std::begin(k3)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { k5[i] = start[i] + dt * k3[i]; } @@ -209,7 +209,7 @@ void RK4Solver::take_step(BoutReal curtime, BoutReal dt, Array& start, run_rhs(curtime + dt); save_derivs(std::begin(k4)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result[i] = start[i] + (1. / 6.) * dt * (k1[i] + 2. * k2[i] + 2. * k3[i] + k4[i]); } diff --git a/src/solver/impls/rk4/rk4.hxx b/src/solver/impls/rk4/rk4.hxx index 5838b24e8e..7ec7e6dd45 100644 --- a/src/solver/impls/rk4/rk4.hxx +++ b/src/solver/impls/rk4/rk4.hxx @@ -27,8 +27,8 @@ class RK4Solver; -#ifndef __RK4_SOLVER_H__ -#define __RK4_SOLVER_H__ +#ifndef BOUT_RK4_SOLVER_H +#define BOUT_RK4_SOLVER_H #include "mpi.h" @@ -68,4 +68,4 @@ private: Array k1, k2, k3, k4, k5; //< Time-stepping arrays }; -#endif // __RK4_SOLVER_H__ +#endif // BOUT_RK4_SOLVER_H diff --git a/src/solver/impls/rkgeneric/impls/cashkarp/cashkarp.hxx b/src/solver/impls/rkgeneric/impls/cashkarp/cashkarp.hxx index 32072f1fc7..76042174f9 100644 --- a/src/solver/impls/rkgeneric/impls/cashkarp/cashkarp.hxx +++ b/src/solver/impls/rkgeneric/impls/cashkarp/cashkarp.hxx @@ -1,8 +1,8 @@ class CASHKARPScheme; -#ifndef __CASHKARP_SCHEME_H__ -#define __CASHKARP_SCHEME_H__ +#ifndef BOUT_CASHKARP_SCHEME_H +#define BOUT_CASHKARP_SCHEME_H #include #include @@ -16,4 +16,4 @@ namespace { RegisterRKScheme registerrkschemecashkarp(RKSCHEME_CASHKARP); } -#endif // __CASHKARP_SCHEME_H__ +#endif // BOUT_CASHKARP_SCHEME_H diff --git a/src/solver/impls/rkgeneric/impls/rk4simple/rk4simple.hxx b/src/solver/impls/rkgeneric/impls/rk4simple/rk4simple.hxx index 126fa0912c..9fc0fc0604 100644 --- a/src/solver/impls/rkgeneric/impls/rk4simple/rk4simple.hxx +++ b/src/solver/impls/rkgeneric/impls/rk4simple/rk4simple.hxx @@ -1,8 +1,8 @@ class RK4SIMPLEScheme; -#ifndef __RK4SIMPLE_SCHEME_H__ -#define __RK4SIMPLE_SCHEME_H__ +#ifndef BOUT_RK4SIMPLE_SCHEME_H +#define BOUT_RK4SIMPLE_SCHEME_H #include #include @@ -19,4 +19,4 @@ namespace { RegisterRKScheme registerrkscheme4simple(RKSCHEME_RK4); } -#endif // __RK4SIMPLE_SCHEME_H__ +#endif // BOUT_RK4SIMPLE_SCHEME_H diff --git a/src/solver/impls/rkgeneric/impls/rkf34/rkf34.hxx b/src/solver/impls/rkgeneric/impls/rkf34/rkf34.hxx index 9de022b865..6840c4f5b4 100644 --- a/src/solver/impls/rkgeneric/impls/rkf34/rkf34.hxx +++ b/src/solver/impls/rkgeneric/impls/rkf34/rkf34.hxx @@ -1,8 +1,8 @@ class RKF34Scheme; -#ifndef __RKF34_SCHEME_H__ -#define __RKF34_SCHEME_H__ +#ifndef BOUT_RKF34_SCHEME_H +#define BOUT_RKF34_SCHEME_H #include #include @@ -16,4 +16,4 @@ namespace { RegisterRKScheme registerrkschemef34(RKSCHEME_RKF34); } -#endif // __RKF34_SCHEME_H__ +#endif // BOUT_RKF34_SCHEME_H diff --git a/src/solver/impls/rkgeneric/impls/rkf45/rkf45.hxx b/src/solver/impls/rkgeneric/impls/rkf45/rkf45.hxx index ea752877e0..70150a2a40 100644 --- a/src/solver/impls/rkgeneric/impls/rkf45/rkf45.hxx +++ b/src/solver/impls/rkgeneric/impls/rkf45/rkf45.hxx @@ -1,8 +1,8 @@ class RKF45Scheme; -#ifndef __RKF45_SCHEME_H__ -#define __RKF45_SCHEME_H__ +#ifndef BOUT_RKF45_SCHEME_H +#define BOUT_RKF45_SCHEME_H #include #include @@ -16,4 +16,4 @@ namespace { RegisterRKScheme registerrkschemef45(RKSCHEME_RKF45); } -#endif // __RKF45_SCHEME_H__ +#endif // BOUT_RKF45_SCHEME_H diff --git a/src/solver/impls/rkgeneric/rkgeneric.cxx b/src/solver/impls/rkgeneric/rkgeneric.cxx index 8f5e95f0be..1c332d26de 100644 --- a/src/solver/impls/rkgeneric/rkgeneric.cxx +++ b/src/solver/impls/rkgeneric/rkgeneric.cxx @@ -75,7 +75,7 @@ int RKGenericSolver::init() { void RKGenericSolver::resetInternalFields() { //Zero out history - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { tmpState[i] = 0; f2[i] = 0; diff --git a/src/solver/impls/rkgeneric/rkgeneric.hxx b/src/solver/impls/rkgeneric/rkgeneric.hxx index a18678e724..9df9a4a396 100644 --- a/src/solver/impls/rkgeneric/rkgeneric.hxx +++ b/src/solver/impls/rkgeneric/rkgeneric.hxx @@ -25,8 +25,8 @@ class RKGenericSolver; -#ifndef __RKGENERIC_SOLVER_H__ -#define __RKGENERIC_SOLVER_H__ +#ifndef BOUT_RKGENERIC_SOLVER_H +#define BOUT_RKGENERIC_SOLVER_H #include "mpi.h" @@ -77,4 +77,4 @@ private: std::unique_ptr scheme{nullptr}; }; -#endif // __RKGENERIC_SOLVER_H__ +#endif // BOUT_RKGENERIC_SOLVER_H diff --git a/src/solver/impls/rkgeneric/rkscheme.cxx b/src/solver/impls/rkgeneric/rkscheme.cxx index 25de364533..dd4bd8e7a1 100644 --- a/src/solver/impls/rkgeneric/rkscheme.cxx +++ b/src/solver/impls/rkgeneric/rkscheme.cxx @@ -59,7 +59,7 @@ void RKScheme::setCurState(const Array& start, Array& out, const int curStage, const BoutReal dt) { //Set the initial stage - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { out[i] = start[i]; } @@ -76,7 +76,7 @@ void RKScheme::setCurState(const Array& start, Array& out, } BoutReal fac = stageCoeffs(curStage, j) * dt; - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { out[i] = out[i] + fac * steps(j, i); } @@ -147,7 +147,7 @@ BoutReal RKScheme::getErr(Array& solA, Array& solB) { // we expect slightly different round-off error each time this // is called and hence the nrhs may no longer be exactly // repeatable with this parallelisation. - BOUT_OMP(parallel for reduction(+:local_err)) + BOUT_OMP_PERF(parallel for reduction(+:local_err)) for (int i = 0; i < nlocal; i++) { local_err += std::abs(solA[i] - solB[i]) / (std::abs(solA[i]) + std::abs(solB[i]) + atol); @@ -166,7 +166,7 @@ BoutReal RKScheme::getErr(Array& solA, Array& solB) { void RKScheme::constructOutput(const Array& start, const BoutReal dt, const int index, Array& sol) { //Initialise the return data - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { sol[i] = start[i]; } @@ -177,7 +177,7 @@ void RKScheme::constructOutput(const Array& start, const BoutReal dt, continue; // Real comparison not great } BoutReal fac = dt * resultCoeffs(curStage, index); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { sol[i] = sol[i] + fac * steps(curStage, i); } @@ -188,7 +188,7 @@ void RKScheme::constructOutputs(const Array& start, const BoutReal dt, const int indexFollow, const int indexAlt, Array& solFollow, Array& solAlt) { //Initialise the return data - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { solFollow[i] = start[i]; solAlt[i] = start[i]; @@ -198,7 +198,7 @@ void RKScheme::constructOutputs(const Array& start, const BoutReal dt, for (int curStage = 0; curStage < getStageCount(); curStage++) { BoutReal facFol = dt * resultCoeffs(curStage, indexFollow); BoutReal facAlt = dt * resultCoeffs(curStage, indexAlt); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { solFollow[i] = solFollow[i] + facFol * steps(curStage, i); solAlt[i] = solAlt[i] + facAlt * steps(curStage, i); diff --git a/src/solver/impls/slepc/slepc.hxx b/src/solver/impls/slepc/slepc.hxx index 88f35a04f9..619c873132 100644 --- a/src/solver/impls/slepc/slepc.hxx +++ b/src/solver/impls/slepc/slepc.hxx @@ -24,8 +24,8 @@ * **************************************************************************/ -#ifndef __SLEPC_SOLVER_H__ -#define __SLEPC_SOLVER_H__ +#ifndef BOUT_SLEPC_SOLVER_H +#define BOUT_SLEPC_SOLVER_H #include "bout/build_config.hxx" #include "bout/solver.hxx" @@ -234,4 +234,4 @@ private: #endif // BOUT_HAS_SLEPC -#endif // __SLEPC_SOLVER_H__ +#endif // BOUT_SLEPC_SOLVER_H diff --git a/src/solver/impls/snes/snes.hxx b/src/solver/impls/snes/snes.hxx index 2021402cd7..601eaaaa25 100644 --- a/src/solver/impls/snes/snes.hxx +++ b/src/solver/impls/snes/snes.hxx @@ -25,8 +25,8 @@ * **************************************************************************/ -#ifndef __SNES_SOLVER_H__ -#define __SNES_SOLVER_H__ +#ifndef BOUT_SNES_SOLVER_H +#define BOUT_SNES_SOLVER_H #include #include @@ -143,4 +143,4 @@ RegisterUnavailableSolver #endif // BOUT_HAS_PETSC -#endif // __SNES_SOLVER_H__ +#endif // BOUT_SNES_SOLVER_H diff --git a/src/solver/impls/split-rk/split-rk.cxx b/src/solver/impls/split-rk/split-rk.cxx index ef53a12f2e..cd6bd1718c 100644 --- a/src/solver/impls/split-rk/split-rk.cxx +++ b/src/solver/impls/split-rk/split-rk.cxx @@ -113,7 +113,7 @@ int SplitRK::run() { // Check accuracy BoutReal local_err = 0.; - BOUT_OMP(parallel for reduction(+: local_err) ) + BOUT_OMP_PERF(parallel for reduction(+: local_err) ) for (int i = 0; i < nlocal; i++) { local_err += fabs(state2[i] - state1[i]) / (fabs(state1[i]) + fabs(state2[i]) + atol); @@ -220,7 +220,7 @@ void SplitRK::take_diffusion_step(BoutReal curtime, BoutReal dt, Array // Stage j = 1 // y_m2 = y0 + weight/3.0 * f(y0) -> u2 - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < dydt.size(); i++) { u2[i] = start[i] + (weight / 3.0) * dydt[i]; } @@ -231,7 +231,7 @@ void SplitRK::take_diffusion_step(BoutReal curtime, BoutReal dt, Array run_diffusive(curtime + (weight / 3.0) * dt); save_derivs(std::begin(u3)); // f(y_m2) -> u3 - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < u3.size(); i++) { u1[i] = 1.5 * (u2[i] + weight * u3[i]) - 0.5 * start[i] - weight * dydt[i]; } @@ -251,7 +251,7 @@ void SplitRK::take_diffusion_step(BoutReal curtime, BoutReal dt, Array run_diffusive(curtime); save_derivs(std::begin(u3)); // f(y_m1) -> u3 - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < u3.size(); i++) { // Next stage result in u3 u3[i] = mu * (u1[i] + weight * (u3[i] - a_jm1 * dydt[i])) + nu * u2[i] @@ -280,7 +280,7 @@ void SplitRK::take_advection_step(BoutReal curtime, BoutReal dt, Array run_convective(curtime); save_derivs(std::begin(dydt)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { u1[i] = start[i] + dt * dydt[i]; } @@ -289,7 +289,7 @@ void SplitRK::take_advection_step(BoutReal curtime, BoutReal dt, Array run_convective(curtime + dt); save_derivs(std::begin(dydt)); - BOUT_OMP(parallel for ) + BOUT_OMP_PERF(parallel for ) for (int i = 0; i < nlocal; i++) { u2[i] = 0.75 * start[i] + 0.25 * u1[i] + 0.25 * dt * dydt[i]; } @@ -298,7 +298,7 @@ void SplitRK::take_advection_step(BoutReal curtime, BoutReal dt, Array run_convective(curtime + 0.5 * dt); save_derivs(std::begin(dydt)); - BOUT_OMP(parallel for) + BOUT_OMP_PERF(parallel for) for (int i = 0; i < nlocal; i++) { result[i] = (1. / 3) * start[i] + (2. / 3.) * (u2[i] + dt * dydt[i]); } diff --git a/src/sys/adios_object.cxx b/src/sys/adios_object.cxx index c7d6dab9aa..477dae14ef 100644 --- a/src/sys/adios_object.cxx +++ b/src/sys/adios_object.cxx @@ -1,6 +1,6 @@ #include "bout/build_config.hxx" -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include "bout/adios_object.hxx" #include "bout/boutexception.hxx" @@ -95,4 +95,4 @@ void ADIOSSetParameters(const std::string& input, const char delimKeyValue, } } // namespace bout -#endif //BOUT_HAS_ADIOS +#endif //BOUT_HAS_ADIOS2 diff --git a/src/sys/hyprelib.cxx b/src/sys/hyprelib.cxx index 691e53230f..7bdeaa47cf 100644 --- a/src/sys/hyprelib.cxx +++ b/src/sys/hyprelib.cxx @@ -27,7 +27,7 @@ static constexpr auto BOUT_HYPRE_MEMORY = HYPRE_MEMORY_HOST; #endif HypreLib::HypreLib() { - BOUT_OMP(critical(HypreLib)) + BOUT_OMP_SAFE(critical(HypreLib)) { if (count == 0) { // Initialise once output_progress.write("Initialising Hypre\n"); @@ -40,7 +40,7 @@ HypreLib::HypreLib() { } HypreLib::HypreLib([[maybe_unused]] const HypreLib& other) noexcept { - BOUT_OMP(critical(HypreLib)) + BOUT_OMP_SAFE(critical(HypreLib)) { // No need to initialise Hypre, because it must already be initialised count++; // Copying, so increase count @@ -48,7 +48,7 @@ HypreLib::HypreLib([[maybe_unused]] const HypreLib& other) noexcept { } HypreLib::HypreLib([[maybe_unused]] HypreLib&& other) noexcept { - BOUT_OMP(critical(HypreLib)) + BOUT_OMP_SAFE(critical(HypreLib)) { // No need to initialise Hypre, because it must already be initialised count++; // Creating a new Hyprelib object; other will be deleted @@ -56,7 +56,7 @@ HypreLib::HypreLib([[maybe_unused]] HypreLib&& other) noexcept { } HypreLib::~HypreLib() { - BOUT_OMP(critical(HypreLib)) + BOUT_OMP_SAFE(critical(HypreLib)) { count--; if (count == 0) { @@ -67,7 +67,7 @@ HypreLib::~HypreLib() { } void HypreLib::cleanup() { - BOUT_OMP(critical(HypreLib)) + BOUT_OMP_SAFE(critical(HypreLib)) { if (count > 0) { output << "Finalising Hypre. Warning: Instances of HypreLib still exist.\n"; diff --git a/src/sys/msg_stack.cxx b/src/sys/msg_stack.cxx index 6ea4c15a8b..502836324c 100644 --- a/src/sys/msg_stack.cxx +++ b/src/sys/msg_stack.cxx @@ -58,7 +58,7 @@ void MsgStack::pop() { if (position <= 0) { return; } - BOUT_OMP(single) + BOUT_OMP_SAFE(single) { --position; } } @@ -78,7 +78,7 @@ void MsgStack::pop(int id) { } void MsgStack::clear() { - BOUT_OMP(single) + BOUT_OMP_SAFE(single) { stack.clear(); position = 0; @@ -86,7 +86,7 @@ void MsgStack::clear() { } void MsgStack::dump() { - BOUT_OMP(single) + BOUT_OMP_SAFE(single) { output << this->getDump(); } } diff --git a/src/sys/options.cxx b/src/sys/options.cxx index a358d50234..893a92cffc 100644 --- a/src/sys/options.cxx +++ b/src/sys/options.cxx @@ -221,6 +221,36 @@ Options::fuzzyFind(const std::string& name, std::string::size_type distance) con return matches; } +Options::Options(const Options& other) { (*this) = other.copy(); } + +Options& Options::operator=(const Options& other) { + if (this == &other) { + return *this; + } + + // Note: Here can't do copy-and-swap because pointers to parents are stored + + value = other.value; + + // Assigning the attributes. + // The simple assignment operator fails to compile with Apple Clang 12 + // attributes = other.attributes; + attributes.clear(); + attributes.insert(other.attributes.begin(), other.attributes.end()); + + full_name = other.full_name; + is_section = other.is_section; + children = other.children; + value_used = other.value_used; + + // Ensure that this is the parent of all children, + // otherwise will point to the original Options instance + for (auto& child : children) { + child.second.parent_instance = this; + } + return *this; +} + Options& Options::operator=(Options&& other) noexcept { if (this == &other) { return *this; diff --git a/src/sys/options/optionparser.hxx b/src/sys/options/optionparser.hxx index ff5bb61a6f..bc61ef7297 100644 --- a/src/sys/options/optionparser.hxx +++ b/src/sys/options/optionparser.hxx @@ -39,8 +39,8 @@ class OptionParser; -#ifndef __OPTIONPARSER_H__ -#define __OPTIONPARSER_H__ +#ifndef BOUT_OPTIONPARSER_H +#define BOUT_OPTIONPARSER_H #include "bout/bout_types.hxx" #include "bout/options.hxx" @@ -61,4 +61,4 @@ public: private: }; -#endif // __OPTIONPARSER_H__ +#endif // BOUT_OPTIONPARSER_H diff --git a/src/sys/options/options_adios.cxx b/src/sys/options/options_adios.cxx index b313d7bc79..88df92df04 100644 --- a/src/sys/options/options_adios.cxx +++ b/src/sys/options/options_adios.cxx @@ -1,6 +1,6 @@ #include "bout/build_config.hxx" -#if BOUT_HAS_ADIOS +#if BOUT_HAS_ADIOS2 #include "options_adios.hxx" #include "bout/adios_object.hxx" @@ -628,4 +628,4 @@ void OptionsADIOS::write(const Options& options, const std::string& time_dim) { } // namespace bout -#endif // BOUT_HAS_ADIOS +#endif // BOUT_HAS_ADIOS2 diff --git a/src/sys/options/options_adios.hxx b/src/sys/options/options_adios.hxx index eddb3976ff..a942e6fed9 100644 --- a/src/sys/options/options_adios.hxx +++ b/src/sys/options/options_adios.hxx @@ -8,7 +8,7 @@ #include "bout/options.hxx" #include "bout/options_io.hxx" -#if !BOUT_HAS_ADIOS +#if !BOUT_HAS_ADIOS2 namespace { bout::RegisterUnavailableOptionsIO @@ -79,5 +79,5 @@ RegisterOptionsIO registeroptionsadios("adios"); } // namespace bout -#endif // BOUT_HAS_ADIOS +#endif // BOUT_HAS_ADIOS2 #endif // OPTIONS_ADIOS_H diff --git a/src/sys/options/options_ini.hxx b/src/sys/options/options_ini.hxx index d06a700f09..092ed9320a 100644 --- a/src/sys/options/options_ini.hxx +++ b/src/sys/options/options_ini.hxx @@ -33,8 +33,8 @@ class OptionINI; -#ifndef __OPTIONS_INI_H__ -#define __OPTIONS_INI_H__ +#ifndef BOUT_OPTIONS_INI_H +#define BOUT_OPTIONS_INI_H #include "optionparser.hxx" @@ -59,4 +59,4 @@ private: std::string getNextLine(std::ifstream& fin); }; -#endif // __OPTIONS_INI_H__ +#endif // BOUT_OPTIONS_INI_H diff --git a/src/sys/petsclib.cxx b/src/sys/petsclib.cxx index bfcd7d6314..f1cf1a9d1b 100644 --- a/src/sys/petsclib.cxx +++ b/src/sys/petsclib.cxx @@ -58,7 +58,7 @@ void setPetscOptions(Options& options, const std::string& prefix) { } // namespace PetscLib::PetscLib(Options* opt) { - BOUT_OMP(critical(PetscLib)) + BOUT_OMP_SAFE(critical(PetscLib)) { if (count == 0) { // Initialise PETSc @@ -95,7 +95,7 @@ PetscLib::PetscLib(Options* opt) { } PetscLib::~PetscLib() { - BOUT_OMP(critical(PetscLib)) + BOUT_OMP_SAFE(critical(PetscLib)) { count--; if (count == 0) { @@ -120,7 +120,7 @@ void PetscLib::setOptionsFromInputFile(SNES& snes) { } void PetscLib::cleanup() { - BOUT_OMP(critical(PetscLib)) + BOUT_OMP_SAFE(critical(PetscLib)) { if (count > 0) { output << "Finalising PETSc. Warning: Instances of PetscLib still exist.\n"; diff --git a/tests/MMS/GBS/gbs.hxx b/tests/MMS/GBS/gbs.hxx index e711e3ea83..468a5e579c 100644 --- a/tests/MMS/GBS/gbs.hxx +++ b/tests/MMS/GBS/gbs.hxx @@ -1,8 +1,8 @@ class GBS; -#ifndef __GBS_H__ -#define __GBS_H__ +#ifndef BOUT_GBS_H +#define BOUT_GBS_H #include @@ -96,4 +96,4 @@ private: std::unique_ptr aparSolver{nullptr}; }; -#endif // __GBS_H__ +#endif // BOUT_GBS_H diff --git a/tests/MMS/spatial/fci/runtest b/tests/MMS/spatial/fci/runtest index 712442a795..204a9cc271 100755 --- a/tests/MMS/spatial/fci/runtest +++ b/tests/MMS/spatial/fci/runtest @@ -27,7 +27,7 @@ nx = 3 # Not changed for these tests nlist = [8, 16, 32, 64, 128] # Number of parallel slices (in each direction) -nslices = [1, 2] +nslices = [1] directory = "data" diff --git a/tests/integrated/CMakeLists.txt b/tests/integrated/CMakeLists.txt index 7d3e8e81ce..ef173db7df 100644 --- a/tests/integrated/CMakeLists.txt +++ b/tests/integrated/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(test-datafilefacade) add_subdirectory(test-drift-instability) add_subdirectory(test-drift-instability-staggered) add_subdirectory(test-fieldgroupComm) +add_subdirectory(test-fci-boundary) add_subdirectory(test-griddata) add_subdirectory(test-griddata-yboundary-guards) add_subdirectory(test-gyro) diff --git a/tests/integrated/test-fci-boundary/CMakeLists.txt b/tests/integrated/test-fci-boundary/CMakeLists.txt new file mode 100644 index 0000000000..bf25cd7c57 --- /dev/null +++ b/tests/integrated/test-fci-boundary/CMakeLists.txt @@ -0,0 +1,22 @@ +bout_add_mms_test(test-fci-boundary + SOURCES get_par_bndry.cxx + USE_RUNTEST + USE_DATA_BOUT_INP + REQUIRES zoidberg_FOUND + PROCESSORS 1 + ) + +if (zoidberg_FOUND) + set(gridfile ${CMAKE_CURRENT_BINARY_DIR}/grid.fci.nc) + add_custom_command(OUTPUT ${gridfile} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${BOUT_PYTHONPATH}:$ENV{PYTHONPATH} ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/grid.py ${gridfile} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/../../../tools/pylib/boutconfig/__init__.py + DEPENDS grid.py + IMPLICIT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Creating test-fci-boundary grid file" + ) + add_custom_target(test-fci-boundary-grid DEPENDS ${gridfile}) + add_dependencies(test-fci-boundary + test-fci-boundary-grid) +endif() diff --git a/tests/integrated/test-fci-boundary/data/BOUT.inp b/tests/integrated/test-fci-boundary/data/BOUT.inp new file mode 100644 index 0000000000..b631f16295 --- /dev/null +++ b/tests/integrated/test-fci-boundary/data/BOUT.inp @@ -0,0 +1,20 @@ +grid = grid.fci.nc + +MXG = 1 +NXPE = 1 +MYG = 1 + +[mesh] +symmetricglobalx = true + +[mesh:ddy] +first = C2 +second = C2 + +[mesh:paralleltransform] +type = fci +y_periodic = true +z_periodic = true + +[mesh:paralleltransform:xzinterpolation] +type = lagrange4pt diff --git a/tests/integrated/test-fci-boundary/get_par_bndry.cxx b/tests/integrated/test-fci-boundary/get_par_bndry.cxx new file mode 100644 index 0000000000..ac0f5de2a6 --- /dev/null +++ b/tests/integrated/test-fci-boundary/get_par_bndry.cxx @@ -0,0 +1,34 @@ +#include "bout/bout.hxx" +#include "bout/derivs.hxx" +#include "bout/field_factory.hxx" +#include "bout/parallel_boundary_region.hxx" + +int main(int argc, char** argv) { + BoutInitialise(argc, argv); + + using bout::globals::mesh; + + std::vector fields; + fields.resize(static_cast(BoundaryParType::SIZE)); + Options dump; + for (int i = 0; i < fields.size(); i++) { + fields[i] = Field3D{0.0}; + mesh->communicate(fields[i]); + for (const auto& bndry_par : + mesh->getBoundariesPar(static_cast(i))) { + output.write("{:s} region\n", toString(static_cast(i))); + for (bndry_par->first(); !bndry_par->isDone(); bndry_par->next()) { + fields[i][bndry_par->ind()] += 1; + output.write("{:s} increment\n", toString(static_cast(i))); + } + } + output.write("{:s} done\n", toString(static_cast(i))); + + dump[fmt::format("field_{:s}", toString(static_cast(i)))] = + fields[i]; + } + + bout::writeDefaultOutputFile(dump); + + BoutFinalise(); +} diff --git a/tests/integrated/test-fci-boundary/grid.py b/tests/integrated/test-fci-boundary/grid.py new file mode 100644 index 0000000000..d544f0cdf7 --- /dev/null +++ b/tests/integrated/test-fci-boundary/grid.py @@ -0,0 +1,55 @@ +import zoidberg as zb +import numpy as np +import sys +import boutconfig as bc + + +def rotating_ellipse( + nx=68, + ny=16, + nz=128, + npoints=421, + xcentre=5.5, + I_coil=0.01, + curvilinear=True, + rectangular=False, + fname="rotating-ellipse.fci.nc", + a=0.4, + Btor=2.5, +): + yperiod = 2 * np.pi / 5.0 + field = zb.field.RotatingEllipse( + xcentre=xcentre, + I_coil=I_coil, + radius=2 * a, + yperiod=yperiod, + Btor=Btor, + ) + # Define the y locations + ycoords = np.linspace(0.0, yperiod, ny, endpoint=False) + + if rectangular: + print("Making rectangular poloidal grid") + poloidal_grid = zb.poloidal_grid.RectangularPoloidalGrid( + nx, nz, 1.0, 1.0, Rcentre=xcentre + ) + elif curvilinear: + print("Making curvilinear poloidal grid") + inner = zb.rzline.shaped_line( + R0=xcentre, a=a / 2.0, elong=0, triang=0.0, indent=0, n=npoints + ) + outer = zb.rzline.shaped_line( + R0=xcentre, a=a, elong=0, triang=0.0, indent=0, n=npoints + ) + + print("creating grid...") + poloidal_grid = zb.poloidal_grid.grid_elliptic(inner, outer, nx, nz) + + # Create the 3D grid by putting together 2D poloidal grids + grid = zb.grid.Grid(poloidal_grid, ycoords, yperiod, yperiodic=True) + maps = zb.make_maps(grid, field, quiet=True) + zb.write_maps(grid, field, maps, str(fname), metric2d=bc.isMetric2D()) + + +if __name__ == "__main__": + rotating_ellipse(fname=sys.argv[1]) diff --git a/tests/integrated/test-fci-boundary/runtest b/tests/integrated/test-fci-boundary/runtest new file mode 100755 index 0000000000..16cb4ee443 --- /dev/null +++ b/tests/integrated/test-fci-boundary/runtest @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# +# Python script to run and analyse MMS test +# + +# Cores: 2 +# only working with cmake +# requires: False +from boututils.run_wrapper import launch_safe +from boututils.datafile import DataFile +from boutdata.collect import collect as _collect + +import numpy as np + + +def collect(var): + return _collect( + var, + info=False, + path=directory, + xguards=False, + yguards=False, + ) + + +nprocs = [1] # , 2, 4] +mthread = 2 + +directory = "data" + +with DataFile("grid.fci.nc") as grid: + xfwd = grid.read("forward_xt_prime")[1:-1] + xbwd = grid.read("backward_xt_prime")[1:-1] + +nx = xfwd.shape[0] + +regions = { + "xin_fwd": xfwd < 1, + "xout_fwd": xfwd > nx, + "xin_bwd": xbwd < 1, + "xout_bwd": xbwd > nx, +} +regions = {k: v.astype(int) for k, v in regions.items()} + +# for x in "xout", "xin": +# regions[x] = np.logical_or(regions[f"{x}_fwd"], regions[f"{x}_bwd"]) +# for x in "fwd", "bwd": +# regions[x] = np.logical_or(regions[f"xin_{x}"], regions[f"xout_{x}"]) +# regions["all"] = np.logical_or(regions["xin"], regions["xout"]) +for x in "xout", "xin": + regions[x] = regions[f"{x}_fwd"] + regions[f"{x}_bwd"] +for x in "fwd", "bwd": + regions[x] = regions[f"xin_{x}"] + regions[f"xout_{x}"] +regions["all"] = regions["xin"] + regions["xout"] + +for nproc in nprocs: + cmd = "./get_par_bndry" + + # Launch using MPI + _, out = launch_safe(cmd, nproc=nproc, mthread=mthread, pipe=True) + + for k, v in regions.items(): + # Collect data + data = collect(f"field_{k}") + assert np.allclose(data, v), ( + k + " does not match", + np.sum(data), + np.sum(v), + np.max(data), + ) diff --git a/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp b/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp index 9a6ac24fa1..46d3cb55ba 100644 --- a/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp +++ b/tests/integrated/test-laplace-hypre3d/data_circular_core-sol/BOUT.inp @@ -1,7 +1,7 @@ [f] #function = 0. function = mixmode(x, 1.)*mixmode(y, 2.)*mixmode(z, 3.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [rhs] function = mixmode(x, 4.)*mixmode(y, 5.)*mixmode(z, 6.) @@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0. diff --git a/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp index eb78644f0f..be0c697d80 100644 --- a/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp +++ b/tests/integrated/test-laplace-hypre3d/data_circular_core/BOUT.inp @@ -16,7 +16,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0. diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp index da1918dcc7..bc3c47eac7 100644 --- a/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp +++ b/tests/integrated/test-laplace-petsc3d/data_circular_core-sol/BOUT.inp @@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0.0 diff --git a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp index 6474b2604b..601531de84 100644 --- a/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp +++ b/tests/integrated/test-laplace-petsc3d/data_circular_core/BOUT.inp @@ -17,7 +17,7 @@ function = 1. + .1*mixmode(x, 10.)*mixmode(y, 11.)*mixmode(z, 12.) [C2] #function = 0. function = .1*mixmode(x, 13.)*mixmode(y, 14.)*mixmode(z, 15.) -bndry_par_all = parallel_neumann +bndry_par_all = parallel_neumann_o2 [A] function = 0.0 diff --git a/tests/integrated/test-laplacexy/loadmetric.hxx b/tests/integrated/test-laplacexy/loadmetric.hxx index 141269d8b8..25e55fc8e8 100644 --- a/tests/integrated/test-laplacexy/loadmetric.hxx +++ b/tests/integrated/test-laplacexy/loadmetric.hxx @@ -1,8 +1,8 @@ -#ifndef __LOADMETRIC_H__ -#define __LOADMETRIC_H__ +#ifndef BOUT_LOADMETRIC_H +#define BOUT_LOADMETRIC_H #include void LoadMetric(BoutReal Lnorm, BoutReal Bnorm); -#endif // __LOADMETRIC_H__ +#endif // BOUT_LOADMETRIC_H diff --git a/tests/integrated/test-options-adios/CMakeLists.txt b/tests/integrated/test-options-adios/CMakeLists.txt index 110773d6fd..cc61fabe57 100644 --- a/tests/integrated/test-options-adios/CMakeLists.txt +++ b/tests/integrated/test-options-adios/CMakeLists.txt @@ -2,5 +2,5 @@ bout_add_integrated_test(test-options-adios SOURCES test-options-adios.cxx USE_RUNTEST USE_DATA_BOUT_INP - REQUIRES BOUT_HAS_ADIOS + REQUIRES BOUT_HAS_ADIOS2 ) diff --git a/tests/integrated/test-options-adios/runtest b/tests/integrated/test-options-adios/runtest index 1621c686a3..03a83fc0ba 100755 --- a/tests/integrated/test-options-adios/runtest +++ b/tests/integrated/test-options-adios/runtest @@ -34,7 +34,7 @@ assert result["int"] == 42 assert math.isclose(result["real"], 3.1415) assert result["string"] == "hello" -print("Checking saved ADIOS test-out file -- Not implemented") +print("Checking saved ADIOS2 test-out file -- Not implemented") # Check the output NetCDF file # with DataFile("test-out.nc") as f: diff --git a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx index bfd394194f..1e3cdde310 100644 --- a/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx +++ b/tests/integrated/test-petsc_laplace/test_petsc_laplace.cxx @@ -23,15 +23,90 @@ * **************************************************************************/ -#include -#include -// #include -#include -#include -#include +#include "bout/bout.hxx" // NOLINT +#include "bout/bout_types.hxx" +#include "bout/boutexception.hxx" +#include "bout/constants.hxx" +#include "bout/difops.hxx" +#include "bout/field2d.hxx" +#include "bout/field3d.hxx" +#include "bout/invert_laplace.hxx" +#include "bout/options.hxx" +#include "bout/options_io.hxx" +#include "bout/output.hxx" +#include "bout/traits.hxx" + +#include "fmt/core.h" +#include + #include +#include BoutReal max_error_at_ystart(const Field3D& error); +void apply_flat_boundary(Field3D& bcoef); + +template +void check_laplace(int test_num, std::string_view test_name, Laplacian& invert, + int inner_flags, int outer_flags, const T& acoef, const T& ccoef, + const T& dcoef, const U& bcoef, const Field3D& field, int ystart, + Options& dump) { + static_assert(bout::utils::is_Field_v, "check_laplace requires Field2D or Field3D"); + static_assert(bout::utils::is_Field_v, "check_laplace requires Field2D or Field3D"); + + invert.setInnerBoundaryFlags(inner_flags); + invert.setOuterBoundaryFlags(outer_flags); + invert.setCoefA(acoef); + invert.setCoefC(ccoef); + invert.setCoefD(dcoef); + + checkData(bcoef); + + Field3D sol; + Field3D error; + Field3D abs_error; + BoutReal max_error = -1; + + try { + sol = invert.solve(sliceXZ(bcoef, ystart)); + error = (field - sol) / field; + abs_error = field - sol; + max_error = max_error_at_ystart(abs(abs_error)); + } catch (BoutException& err) { + output.write("BoutException occured in invert->solve(b1): {}\n", err.what()); + } + + output.write("\nTest {}: {}\n", test_num, test_name); + output.write("Magnitude of maximum absolute error is {}\n", max_error); + + dump[fmt::format("a{}", test_num)] = acoef; + dump[fmt::format("b{}", test_num)] = bcoef; + dump[fmt::format("c{}", test_num)] = ccoef; + dump[fmt::format("d{}", test_num)] = dcoef; + dump[fmt::format("f{}", test_num)] = field; + dump[fmt::format("sol{}", test_num)] = sol; + dump[fmt::format("error{}", test_num)] = error; + dump[fmt::format("absolute_error{}", test_num)] = abs_error; + dump[fmt::format("max_error{}", test_num)] = max_error; +} + +template +Field3D forward_laplace(const Field3D& field, const T& acoef, const T& ccoef, + const T& dcoef) { + auto bcoef = + dcoef * Delp2(field) + Grad_perp(ccoef) * Grad_perp(field) / ccoef + acoef * field; + apply_flat_boundary(bcoef); + return bcoef; +} + +Field3D generate_f1(const Mesh& mesh); +Field3D generate_a1(const Mesh& mesh); +Field3D generate_c1(const Mesh& mesh); +Field3D generate_d1(const Mesh& mesh); + +Field3D generate_f5(const Mesh& mesh); +Field3D generate_a5(const Mesh& mesh); +Field3D generate_c5(const Mesh& mesh); +Field3D generate_d5(const Mesh& mesh); int main(int argc, char** argv) { @@ -42,829 +117,553 @@ int main(int argc, char** argv) { options = Options::getRoot()->getSection("petsc4th"); auto invert_4th = Laplacian::create(options); - // Solving equations of the form d*Delp2(f) + 1/c*Grad_perp(c).Grad_perp(f) + a*f = b for various f, a, c, d - Field3D f1, a1, b1, c1, d1, sol1; - BoutReal p, q; //Use to set parameters in constructing trial functions - Field3D error1, - absolute_error1; //Absolute value of relative error: abs( (f1-sol1)/f1 ) - BoutReal max_error1; //Output of test + Options dump; + // Solving equations of the form d*Delp2(f) + 1/c*Grad_perp(c).Grad_perp(f) + a*f = b for various f, a, c, d using bout::globals::mesh; // Only Neumann x-boundary conditions are implemented so far, so test functions should be Neumann in x and periodic in z. // Use Field3D's, but solver only works on FieldPerp slices, so only use 1 y-point - BoutReal nx = mesh->GlobalNx - 2 * mesh->xstart - 1; - BoutReal nz = mesh->GlobalNz; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////// // Test 1: Gaussian x-profiles, 2nd order Krylov - p = 0.39503274; - q = 0.20974396; - f1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f1(jx, jy, jz) = - 0. + exp(-(100. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-100. * pow(-p, 2)) * x - + (-p * exp(-100. * pow(-p, 2)) - - (1 - p) * exp(-100. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos(2. * PI - * (z - q)))) //make the gradients zero at both x-boundaries - ; - ASSERT0(finite(f1(jx, jy, jz))); - } - } - } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f1(jx, jy, jz) = - 0. + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-60. * pow(-p, 2)) * x - + (-p * exp(-60. * pow(-p, 2)) - - (1 - p) * exp(-60. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - ASSERT0(finite(f1(jx, jy, jz))); - } - } - } - } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f1(jx, jy, jz) = - 0. + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-60. * pow(-p, 2)) * x - + (-p * exp(-60. * pow(-p, 2)) - - (1 - p) * exp(-60. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - ASSERT0(finite(f1(jx, jy, jz))); - } - } - } - } + Field3D f_1 = generate_f1(*mesh); + Field3D a_1 = generate_a1(*mesh); + Field3D c_1 = generate_c1(*mesh); + Field3D d_1 = generate_d1(*mesh); - f1.applyBoundary("neumann"); - - p = 0.512547; - q = 0.30908712; - d1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d1(jx, jy, jz) = - 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); - } - } - } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d1(jx, jy, jz) = - 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); - // d1(jx, jy, jz) = d1(jx+1, jy, jz); - } - } - } - } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d1(jx, jy, jz) = - 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); - // d1(jx, jy, jz) = d1(jx-1, jy, jz); - } - } - } - } + mesh->communicate(f_1, a_1, c_1, d_1); - p = 0.18439023; - q = 0.401089473; - c1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c1(jx, jy, jz) = - 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); - } - } - } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c1(jx, jy, jz) = - 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); - // c1(jx, jy, jz) = c1(jx+1, jy, jz); - } - } - } - } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c1(jx, jy, jz) = - 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); - // c1(jx, jy, jz) = c1(jx-1, jy, jz); - } - } + const Field3D b_1 = forward_laplace(f_1, a_1, c_1, d_1); + + int test_num = 0; + check_laplace(++test_num, "PETSc 2nd order", *invert, INVERT_AC_GRAD, INVERT_AC_GRAD, + a_1, c_1, d_1, b_1, f_1, mesh->ystart, dump); + + ///////////////////////////////////////////////// + // Test 2: Gaussian x-profiles, 4th order Krylov + + check_laplace(++test_num, "PETSc 4th order", *invert_4th, INVERT_AC_GRAD, + INVERT_AC_GRAD, a_1, c_1, d_1, b_1, f_1, mesh->ystart, dump); + + //////////////////////////////////////////////////////////////////////////////////////// + // Test 3+4: Gaussian x-profiles, z-independent coefficients and compare with SPT method + + const Field2D a_3 = DC(a_1); + const Field2D c_3 = DC(c_1); + const Field2D d_3 = DC(d_1); + const Field3D b_3 = forward_laplace(f_1, a_3, c_3, d_3); + + check_laplace(++test_num, "with coefficients constant in z, PETSc 2nd order", *invert, + INVERT_AC_GRAD, INVERT_AC_GRAD, a_3, c_3, d_3, b_3, f_1, mesh->ystart, + dump); + + Options* SPT_options = Options::getRoot()->getSection("SPT"); + auto invert_SPT = Laplacian::create(SPT_options); + + check_laplace(++test_num, "with coefficients constant in z, default solver", + *invert_SPT, INVERT_AC_GRAD, INVERT_AC_GRAD | INVERT_DC_GRAD, a_3, c_3, + d_3, b_3, f_1, mesh->ystart, dump); + + ////////////////////////////////////////////// + // Test 5: Cosine x-profiles, 2nd order Krylov + Field3D f_5 = generate_f5(*mesh); + Field3D a_5 = generate_a5(*mesh); + Field3D c_5 = generate_c5(*mesh); + Field3D d_5 = generate_d5(*mesh); + + mesh->communicate(f_5, a_5, c_5, d_5); + + const Field3D b_5 = forward_laplace(f_5, a_5, c_5, d_5); + + check_laplace(++test_num, "different profiles, PETSc 2nd order", *invert, + INVERT_AC_GRAD, INVERT_AC_GRAD, a_5, c_5, d_5, b_5, f_5, mesh->ystart, + dump); + + ////////////////////////////////////////////// + // Test 6: Cosine x-profiles, 4th order Krylov + + check_laplace(++test_num, "different profiles, PETSc 4th order", *invert_4th, + INVERT_AC_GRAD, INVERT_AC_GRAD, a_5, c_5, d_5, b_5, f_5, mesh->ystart, + dump); + + ////////////////////////////////////////////////////////////////////////////////////// + // Test 7+8: Cosine x-profiles, z-independent coefficients and compare with SPT method + + const Field2D a_7 = DC(a_5); + const Field2D c_7 = DC(c_5); + const Field2D d_7 = DC(d_5); + const Field3D b_7 = forward_laplace(f_5, a_7, c_7, d_7); + + check_laplace(++test_num, + "different profiles, with coefficients constant in z, PETSc 2nd order", + *invert, INVERT_AC_GRAD, INVERT_AC_GRAD, a_7, c_7, d_7, b_7, f_5, + mesh->ystart, dump); + + check_laplace(++test_num, + "different profiles, with coefficients constant in z, default solver", + *invert_SPT, INVERT_AC_GRAD, INVERT_AC_GRAD | INVERT_DC_GRAD, a_7, c_7, + d_7, b_7, f_5, mesh->ystart, dump); + + // Write and close the output file + bout::writeDefaultOutputFile(dump); + + MPI_Barrier(BoutComm::get()); // Wait for all processors to write data + } + + bout::checkForUnusedOptions(); + + BoutFinalise(); + return 0; +} + +BoutReal max_error_at_ystart(const Field3D& error) { + const auto* mesh = error.getMesh(); + BoutReal local_max_error = error(mesh->xstart, mesh->ystart, 0); + + for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { + for (int jz = 0; jz < mesh->LocalNz; jz++) { + if (local_max_error < error(jx, mesh->ystart, jz)) { + local_max_error = error(jx, mesh->ystart, jz); } } + } - p = 0.612547; - q = 0.30908712; - a1.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a1(jx, jy, jz) = - -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); + BoutReal max_error = BoutNaN; + + MPI_Allreduce(&local_max_error, &max_error, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get()); + + return max_error; +} + +void apply_flat_boundary(Field3D& bcoef) { + const Mesh& mesh = *bcoef.getMesh(); + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + bcoef(jx, jy, jz) = bcoef(jx + 1, jy, jz); } } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a1(jx, jy, jz) = - -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); - // a1(jx, jy, jz) = a1(jx+1, jy, jz); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + bcoef(jx, jy, jz) = bcoef(jx - 1, jy, jz); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a1(jx, jy, jz) = - -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); - // a1(jx, jy, jz) = a1(jx-1, jy, jz); - } - } + } +} + +Field3D generate_f1(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + + constexpr BoutReal p = 0.39503274; // NOLINT + constexpr BoutReal q = 0.20974396; // NOLINT + + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(100. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-100. * pow(-p, 2)) * x + + (-p * exp(-100. * pow(-p, 2)) + - (1 - p) * exp(-100. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; - checkData(f1); - checkData(a1); - checkData(c1); - checkData(d1); - - mesh->communicate(f1, a1, c1, d1); - - b1 = d1 * Delp2(f1) + Grad_perp(c1) * Grad_perp(f1) / c1 + a1 * f1; - - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b1(jx, jy, jz) = b1(jx + 1, jy, jz); - } + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-60. * pow(-p, 2)) * x + + (-p * exp(-60. * pow(-p, 2)) + - (1 - p) * exp(-60. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b1(jx, jy, jz) = b1(jx - 1, jy, jz); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(60. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-60. * pow(-p, 2)) * x + + (-p * exp(-60. * pow(-p, 2)) + - (1 - p) * exp(-60. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } + } - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a1); - invert->setCoefC(c1); - invert->setCoefD(d1); - - checkData(b1); - - try { - sol1 = invert->solve(sliceXZ(b1, mesh->ystart)); - error1 = (f1 - sol1) / f1; - absolute_error1 = f1 - sol1; - // max_error1 = max_error_at_ystart(abs(error1)); - max_error1 = max_error_at_ystart(abs(absolute_error1)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b1): " << err.what() << endl; - max_error1 = -1; - } + checkData(result); + result.applyBoundary("neumann"); + return result; +} - output << endl << "Test 1: PETSc 2nd order" << endl; - // output<<"Time to set up is "<setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setGlobalFlags(INVERT_4TH_ORDER); - invert_4th->setCoefA(a1); - invert_4th->setCoefC(c1); - invert_4th->setCoefD(d1); - - try { - sol2 = invert_4th->solve(sliceXZ(b1, mesh->ystart)); - error2 = (f1 - sol2) / f1; - absolute_error2 = f1 - sol2; - // max_error2 = max_error_at_ystart(abs(error2)); - max_error2 = max_error_at_ystart(abs(absolute_error2)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b1): " << err.what() << endl; - max_error2 = -1; + constexpr BoutReal p = 0.512547; // NOLINT + constexpr BoutReal q = 0.30908712; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); + } } - - output << endl << "Test 2: PETSc 4th order" << endl; - // output<<"Time to set up is "<firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b3(jx, jy, jz) = b3(jx + 1, jy, jz); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b3(jx, jy, jz) = b3(jx - 1, jy, jz); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.2 * exp(-50. * pow(x - p, 2) / 4.) * sin(2. * PI * (z - q) * 3.); } } } + } + checkData(result); + return result; +} - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a3); - invert->setCoefC(c3); - invert->setCoefD(d3); - - try { - sol3 = invert->solve(sliceXZ(b3, mesh->ystart)); - error3 = (f1 - sol3) / f1; - absolute_error3 = f1 - sol3; - // max_error3 = max_error_at_ystart(abs(error3)); - max_error3 = max_error_at_ystart(abs(absolute_error3)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b3): " << err.what() << endl; - max_error3 = -1; - } +Field3D generate_c1(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; - output << endl << "Test 3: with coefficients constant in z, PETSc 2nd order" << endl; - // output<<"Time to set up is "<getSection("SPT"); - auto invert_SPT = Laplacian::create(SPT_options); - invert_SPT->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_SPT->setOuterBoundaryFlags(INVERT_AC_GRAD | INVERT_DC_GRAD); - invert_SPT->setCoefA(a3); - invert_SPT->setCoefC(c3); - invert_SPT->setCoefD(d3); - - sol4 = invert_SPT->solve(sliceXZ(b3, mesh->ystart)); - error4 = (f1 - sol4) / f1; - absolute_error4 = f1 - sol4; - // max_error4 = max_error_at_ystart(abs(error4)); - max_error4 = max_error_at_ystart(abs(absolute_error4)); - - output << endl << "Test 4: with coefficients constant in z, default solver" << endl; - // output<<"Time to set up is "<xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f5(jx, jy, jz) = - 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-50. * pow(-p, 2)) * x - + (-p * exp(-50. * pow(-p, 2)) - - (1 - p) * exp(-50. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos(2. * PI - * (z - q)))) //make the gradients zero at both x-boundaries - ; - } + constexpr BoutReal p = 0.18439023; // NOLINT + constexpr BoutReal q = 0.401089473; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f5(jx, jy, jz) = - 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-50. * pow(-p, 2)) * x - + (-p * exp(-50. * pow(-p, 2)) - - (1 - p) * exp(-50. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - f5(jx, jy, jz) = - 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) - - 50. - * (2. * p * exp(-50. * pow(-p, 2)) * x - + (-p * exp(-50. * pow(-p, 2)) - - (1 - p) * exp(-50. * pow(1 - p, 2))) - * pow(x, 2)) - * exp(-( - 1. - - cos( - 2. * PI - * (z - q)))); //make the gradients zero at both x-boundaries - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + 0.15 * exp(-50. * pow(x - p, 2) * 2.) * sin(2. * PI * (z - q) * 2.); } } } + } - p = 0.63298589; - q = 0.889237890; - d5.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); - } + checkData(result); + return result; +} + +Field3D generate_a1(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + + constexpr BoutReal p = 0.612547; // NOLINT + constexpr BoutReal q = 0.30908712; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - d5(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + 0.1 * exp(-50. * pow(x - p, 2) * 2.5) * sin(2. * PI * (z - q) * 7.); } } } + } - p = 0.160983834; - q = 0.73050121087; - c5.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); - } + checkData(result); + return result; +} + +Field3D generate_f5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.623901; // NOLINT + constexpr BoutReal q = 0.01209489; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = + 0. + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-50. * pow(-p, 2)) * x + + (-p * exp(-50. * pow(-p, 2)) - (1 - p) * exp(-50. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-50. * pow(-p, 2)) * x + + (-p * exp(-50. * pow(-p, 2)) + - (1 - p) * exp(-50. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - c5(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + //make the gradients zero at both x-boundaries + result(jx, jy, jz) = 0. + + exp(-(50. * pow(x - p, 2) + 1. - cos(2. * PI * (z - q)))) + - 50. + * (2. * p * exp(-50. * pow(-p, 2)) * x + + (-p * exp(-50. * pow(-p, 2)) + - (1 - p) * exp(-50. * pow(1 - p, 2))) + * pow(x, 2)) + * exp(-(1. - cos(2. * PI * (z - q)))); } } } + } + result.applyBoundary("neumann"); + checkData(result); + return result; +} - p = 0.5378950; - q = 0.2805870; - a5.allocate(); - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a5(jx, jy, jz) = -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); - } +Field3D generate_d5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.63298589; // NOLINT + constexpr BoutReal q = 0.889237890; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); } } - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a5(jx, jy, jz) = - -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); } } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - BoutReal x = BoutReal(mesh->getGlobalXIndex(jx) - mesh->xstart) / nx; - BoutReal z = BoutReal(jz) / nz; - a5(jx, jy, jz) = - -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); - } + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x) * sin(2. * PI * (z - q) * 3.); } } } + } + checkData(result); + return result; +} - f5.applyBoundary("neumann"); - mesh->communicate(f5, a5, c5, d5); +Field3D generate_c5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.160983834; // NOLINT + constexpr BoutReal q = 0.73050121087; // NOLINT - b5 = d5 * Delp2(f5) + Grad_perp(c5) * Grad_perp(f5) / c5 + a5 * f5; - if (mesh->firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b5(jx, jy, jz) = b5(jx + 1, jy, jz); - } - } + Field3D result; + + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b5(jx, jy, jz) = b5(jx - 1, jy, jz); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); } } } - - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a5); - invert->setCoefC(c5); - invert->setCoefD(d5); - - try { - sol5 = invert->solve(sliceXZ(b5, mesh->ystart)); - error5 = (f5 - sol5) / f5; - absolute_error5 = f5 - sol5; - // max_error5 = max_error_at_ystart(abs(error5)); - max_error5 = max_error_at_ystart(abs(absolute_error5)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b5): " << err.what() << endl; - max_error5 = -1; - } - - output << endl << "Test 5: different profiles, PETSc 2nd order" << endl; - // output<<"Time to set up is "<setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert_4th->setGlobalFlags(INVERT_4TH_ORDER); - invert_4th->setCoefA(a5); - invert_4th->setCoefC(c5); - invert_4th->setCoefD(d5); - - try { - sol6 = invert_4th->solve(sliceXZ(b5, mesh->ystart)); - error6 = (f5 - sol6) / f5; - absolute_error6 = f5 - sol6; - // max_error6 = max_error_at_ystart(abs(error6)); - max_error6 = max_error_at_ystart(abs(absolute_error6)); - } catch (BoutException& err) { - output - << "BoutException occured in invert->solve(b6): Laplacian inversion failed to " - "converge (probably)" - << endl; - max_error6 = -1; + } + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + 1. + p * cos(2. * PI * x * 5) * sin(2. * PI * (z - q) * 2.); + } + } } + } + checkData(result); + return result; +} - output << endl << "Test 6: different profiles, PETSc 4th order" << endl; - // output<<"Time to set up is "<firstX()) { - for (int jx = mesh->xstart - 1; jx >= 0; jx--) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b7(jx, jy, jz) = b7(jx + 1, jy, jz); - } - } +Field3D generate_a5(const Mesh& mesh) { + const BoutReal nx = mesh.GlobalNx - 2 * mesh.xstart - 1; + const BoutReal nz = mesh.GlobalNz; + constexpr BoutReal p = 0.5378950; // NOLINT + constexpr BoutReal q = 0.2805870; // NOLINT + Field3D result; + result.allocate(); + for (int jx = mesh.xstart; jx <= mesh.xend; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); } } - if (mesh->lastX()) { - for (int jx = mesh->xend + 1; jx < mesh->LocalNx; jx++) { - for (int jy = 0; jy < mesh->LocalNy; jy++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - b7(jx, jy, jz) = b7(jx - 1, jy, jz); - } + } + if (mesh.firstX()) { + for (int jx = mesh.xstart - 1; jx >= 0; jx--) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); } } } - - invert->setInnerBoundaryFlags(INVERT_AC_GRAD); - invert->setOuterBoundaryFlags(INVERT_AC_GRAD); - invert->setCoefA(a7); - invert->setCoefC(c7); - invert->setCoefD(d7); - - try { - sol7 = invert->solve(sliceXZ(b7, mesh->ystart)); - error7 = (f5 - sol7) / f5; - absolute_error7 = f5 - sol7; - // max_error7 = max_error_at_ystart(abs(error7)); - max_error7 = max_error_at_ystart(abs(absolute_error7)); - } catch (BoutException& err) { - output << "BoutException occured in invert->solve(b7): " << err.what() << endl; - max_error7 = -1; - } - - output - << endl - << "Test 7: different profiles, with coefficients constant in z, PETSc 2nd order" - << endl; - // output<<"Time to set up is "<setInnerBoundaryFlags(INVERT_AC_GRAD); - invert_SPT->setOuterBoundaryFlags(INVERT_AC_GRAD | INVERT_DC_GRAD); - invert_SPT->setCoefA(a7); - invert_SPT->setCoefC(c7); - invert_SPT->setCoefD(d7); - - sol8 = invert_SPT->solve(sliceXZ(b7, mesh->ystart)); - error8 = (f5 - sol8) / f5; - absolute_error8 = f5 - sol8; - // max_error8 = max_error_at_ystart(abs(error8)); - max_error8 = max_error_at_ystart(abs(absolute_error8)); - - output - << endl - << "Test 8: different profiles, with coefficients constant in z, default solver" - << endl; - // output<<"Time to set up is "<xstart, mesh->ystart, 0); - - for (int jx = mesh->xstart; jx <= mesh->xend; jx++) { - for (int jz = 0; jz < mesh->LocalNz; jz++) { - if (local_max_error < error(jx, mesh->ystart, jz)) { - local_max_error = error(jx, mesh->ystart, jz); + if (mesh.lastX()) { + for (int jx = mesh.xend + 1; jx < mesh.LocalNx; jx++) { + const BoutReal x = BoutReal(mesh.getGlobalXIndex(jx) - mesh.xstart) / nx; + for (int jy = 0; jy < mesh.LocalNy; jy++) { + for (int jz = 0; jz < mesh.LocalNz; jz++) { + const BoutReal z = BoutReal(jz) / nz; + result(jx, jy, jz) = + -1. + p * cos(2. * PI * x * 2.) * sin(2. * PI * (z - q) * 7.); + } } } } - - BoutReal max_error; - - MPI_Allreduce(&local_max_error, &max_error, 1, MPI_DOUBLE, MPI_MAX, BoutComm::get()); - - return max_error; + checkData(result); + return result; } diff --git a/tests/integrated/test-squash/runtest b/tests/integrated/test-squash/runtest index 692d561c59..c79cba0faf 100755 --- a/tests/integrated/test-squash/runtest +++ b/tests/integrated/test-squash/runtest @@ -15,7 +15,7 @@ import os.path # cores: 4 IGNORED_VARS_PATTERN = re.compile( - "(wtime|ncalls|arkode|cvode|run_id|run_restart_from|M.?SUB|N.?PE|iteration|wall_time|has_legacy_netcdf|hist_hi).*" + "(wtime|ncalls|arkode|cvode|run_id|run_restart_from|M.?SUB|N.?PE|iteration|wall_time|has_legacy_netcdf|hist_hi|openmp_threads).*" ) diff --git a/tests/unit/fake_parallel_mesh.hxx b/tests/unit/fake_parallel_mesh.hxx index c648bbab9c..805dcb2a0a 100644 --- a/tests/unit/fake_parallel_mesh.hxx +++ b/tests/unit/fake_parallel_mesh.hxx @@ -8,6 +8,8 @@ #include #include "../../src/mesh/impls/bout/boutmesh.hxx" +#include "bout/boundary_op.hxx" +#include "bout/boundary_region.hxx" #include "bout/boutcomm.hxx" #include "bout/coordinates.hxx" #include "bout/field2d.hxx" diff --git a/tests/unit/include/bout/test_hypre_interface.cxx b/tests/unit/include/bout/test_hypre_interface.cxx index a56f061a6e..e2eefab9a8 100644 --- a/tests/unit/include/bout/test_hypre_interface.cxx +++ b/tests/unit/include/bout/test_hypre_interface.cxx @@ -309,7 +309,7 @@ TYPED_TEST(HypreMatrixTest, SetElements) { auto j_index = static_cast(this->indexer->getGlobal(j)); HYPRE_Int ncolumns{1}; HYPRE_Complex value; - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) { HYPRE_IJMatrixGetValues(raw_matrix, 1, &ncolumns, &i_index, &j_index, &value); } if (i == j) { EXPECT_EQ(static_cast(value), diff --git a/tests/unit/include/bout/test_petsc_indexer.cxx b/tests/unit/include/bout/test_petsc_indexer.cxx index 082acafde6..3c20de9989 100644 --- a/tests/unit/include/bout/test_petsc_indexer.cxx +++ b/tests/unit/include/bout/test_petsc_indexer.cxx @@ -81,15 +81,15 @@ TYPED_TEST(IndexerTest, TestConvertIndex) { BOUT_FOR(i, f.getRegion("RGN_NOBNDRY")) { int global = this->globalSquareIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalSquare.insert(global).second); global = this->globalStarIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalStar.insert(global).second); global = this->globalDefaultIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalDefault.insert(global).second); } @@ -97,11 +97,11 @@ TYPED_TEST(IndexerTest, TestConvertIndex) { BOUT_FOR(i, f.getRegion("RGN_XGUARDS")) { int global = this->globalSquareIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalSquare.insert(global).second); global = this->globalStarIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalStar.insert(global).second); EXPECT_LT(this->globalDefaultIndexer.getGlobal(i), 0); } @@ -111,11 +111,11 @@ TYPED_TEST(IndexerTest, TestConvertIndex) { BOUT_FOR(i, f.getRegion("RGN_YGUARDS")) { int global = this->globalSquareIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalSquare.insert(global).second); global = this->globalStarIndexer.getGlobal(i); EXPECT_GE(global, 0); - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) EXPECT_TRUE(indicesGlobalStar.insert(global).second); EXPECT_LT(this->globalDefaultIndexer.getGlobal(i), 0); } diff --git a/tests/unit/include/bout/test_petsc_matrix.cxx b/tests/unit/include/bout/test_petsc_matrix.cxx index cc07145d8e..9ba2475096 100644 --- a/tests/unit/include/bout/test_petsc_matrix.cxx +++ b/tests/unit/include/bout/test_petsc_matrix.cxx @@ -177,7 +177,7 @@ TYPED_TEST(PetscMatrixTest, TestGetElements) { int i_ind = this->indexer->getGlobal(i); int j_ind = this->indexer->getGlobal(j); PetscScalar matContents; - BOUT_OMP(critical) + BOUT_OMP_SAFE(critical) MatGetValues(*rawmat, 1, &i_ind, 1, &j_ind, &matContents); if (i == j) { EXPECT_EQ(matContents, static_cast(i.ind)); diff --git a/tests/unit/include/bout/test_region.cxx b/tests/unit/include/bout/test_region.cxx index befcc07771..8776dad59a 100644 --- a/tests/unit/include/bout/test_region.cxx +++ b/tests/unit/include/bout/test_region.cxx @@ -262,7 +262,7 @@ TEST_F(RegionTest, regionLoopAllSection) { const auto& region = mesh->getRegion3D("RGN_ALL"); int count = 0; - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { BOUT_FOR_OMP(i, region, for reduction(+:count)) { ++count; @@ -296,7 +296,7 @@ TEST_F(RegionTest, regionLoopNoBndrySection) { const auto& region = mesh->getRegion3D("RGN_NOBNDRY"); int count = 0; - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { BOUT_FOR_OMP(i, region, for reduction(+:count)) { ++count; @@ -313,7 +313,7 @@ TEST_F(RegionTest, regionLoopAllInner) { const auto& region = mesh->getRegion3D("RGN_ALL"); Field3D a{0.}; - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { BOUT_FOR_INNER(i, region) { a[i] = 1.0; } } @@ -331,7 +331,7 @@ TEST_F(RegionTest, regionLoopNoBndryInner) { const auto& region = mesh->getRegion3D("RGN_NOBNDRY"); Field3D a{0.}; - BOUT_OMP(parallel) + BOUT_OMP_PERF(parallel) { BOUT_FOR_INNER(i, region) { a[i] = 1.0; } } diff --git a/tests/unit/mesh/test_boundary_factory.cxx b/tests/unit/mesh/test_boundary_factory.cxx index 6637e73711..b552f7629e 100644 --- a/tests/unit/mesh/test_boundary_factory.cxx +++ b/tests/unit/mesh/test_boundary_factory.cxx @@ -1,6 +1,7 @@ #include "gtest/gtest.h" #include "bout/boundary_factory.hxx" +#include "bout/boundary_op.hxx" #include "bout/boundary_region.hxx" #include "test_extras.hxx" diff --git a/tests/unit/test_extras.hxx b/tests/unit/test_extras.hxx index 6f78e99fd3..700b977ac8 100644 --- a/tests/unit/test_extras.hxx +++ b/tests/unit/test_extras.hxx @@ -8,6 +8,7 @@ #include #include +#include "bout/boundary_region.hxx" #include "bout/boutcomm.hxx" #include "bout/coordinates.hxx" #include "bout/field3d.hxx" @@ -232,8 +233,9 @@ public: RangeIterator iterateBndryUpperInnerY() const override { return RangeIterator(); } void addBoundary(BoundaryRegion* region) override { boundaries.push_back(region); } std::vector getBoundaries() override { return boundaries; } - std::vector getBoundariesPar() override { - return std::vector(); + std::vector> + getBoundariesPar(BoundaryParType UNUSED(type)) override { + return std::vector>(); } BoutReal GlobalX(int jx) const override { return jx; } BoutReal GlobalY(int jy) const override { return jy; } diff --git a/tools/archiving/sdctools/sdclib/sdclib.c b/tools/archiving/sdctools/sdclib/sdclib.c index f7db255a47..7294cc0791 100644 --- a/tools/archiving/sdctools/sdclib/sdclib.c +++ b/tools/archiving/sdctools/sdclib/sdclib.c @@ -34,8 +34,6 @@ #include "sdclib.h" -//#define DEBUG - #define DEFAULT_IFRAME 10 #define DEFAULT_ORDER 4 diff --git a/tools/pylib/_boutpp_build/CMakeLists.txt b/tools/pylib/_boutpp_build/CMakeLists.txt index 6b88986a28..3be2a5d2aa 100644 --- a/tools/pylib/_boutpp_build/CMakeLists.txt +++ b/tools/pylib/_boutpp_build/CMakeLists.txt @@ -25,7 +25,7 @@ bout_python_maybe_error(${Cython_FOUND} Cython) find_package(Bash) bout_python_maybe_error(${Bash_FOUND} Bash) -execute_process(COMMAND ${Python_EXECUTABLE} -c "import jinja2" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import jinja2" RESULT_VARIABLE jinja2_FOUND) if (jinja2_FOUND EQUAL 0) # We have jinja2 - all good @@ -33,7 +33,7 @@ else() bout_python_maybe_error(OFF jinja2) endif() -execute_process(COMMAND ${Python_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX')[:-3])" +execute_process(COMMAND ${Python3_EXECUTABLE} -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX')[:-3])" RESULT_VARIABLE PYTHON_WORKING OUTPUT_VARIABLE PYTHON_EXT_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE @@ -73,7 +73,7 @@ foreach(file IN LISTS files) #message(FATAL_ERROR "${gen} ${src}/${file}.jinja") add_custom_command(OUTPUT ${gen} COMMAND ${CMAKE_COMMAND} -E make_directory ${tar} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${tar}/..:\${PYTHONPATH} ${Python_EXECUTABLE} generate.py ${file}.jinja ${gen} + COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${tar}/..:\${PYTHONPATH} ${Python3_EXECUTABLE} generate.py ${file}.jinja ${gen} DEPENDS ${src}/${file}.jinja DEPENDS ${src}/helper.py DEPENDS ${src}/resolve_enum_inv.pyx.jinja @@ -93,8 +93,7 @@ endforeach() add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libboutpp.cpp COMMAND ${CMAKE_COMMAND} -E copy boutpp.pyx libboutpp.pyx - COMMAND ${Python_EXECUTABLE} -m cython libboutpp.pyx --cplus -3 -X binding=True -X embedsignature=True - COMMENT "Cythonizing python interface" + COMMAND ${Python3_EXECUTABLE} -m cython libboutpp.pyx --cplus -3 -X binding=True -X embedsignature=True WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${boutpp_depends} ) @@ -120,5 +119,6 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/boutpp.py DESTINATION ${CMAKE_INSTALL_PYTHON_SITEARCH}/boutpp/ RENAME __init__.py ) + target_link_libraries(boutpp${PYTHON_EXT_SUFFIX} bout++) -target_include_directories(boutpp${PYTHON_EXT_SUFFIX} PRIVATE $ ${Numpy_INCLUDE_DIRS} ${Python_INCLUDE_DIRS}) +target_include_directories(boutpp${PYTHON_EXT_SUFFIX} PRIVATE $ ${Numpy_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS}) diff --git a/tools/pylib/_boutpp_build/bout_options.pxd b/tools/pylib/_boutpp_build/bout_options.pxd index be17608cea..365e08bcc7 100644 --- a/tools/pylib/_boutpp_build/bout_options.pxd +++ b/tools/pylib/_boutpp_build/bout_options.pxd @@ -43,6 +43,7 @@ cdef extern from "bout/options.hxx": void get(string, double&, double) void get(string, bool&, bool) void cleanCache() + void setConditionallyUsed() cdef extern from "bout/optionsreader.hxx": diff --git a/tools/pylib/_boutpp_build/boutcpp.pxd.jinja b/tools/pylib/_boutpp_build/boutcpp.pxd.jinja index 12e210a5b5..8f838b864c 100644 --- a/tools/pylib/_boutpp_build/boutcpp.pxd.jinja +++ b/tools/pylib/_boutpp_build/boutcpp.pxd.jinja @@ -148,10 +148,10 @@ cdef extern from "bout/physicsmodel.hxx": ctypedef void (*Method)(void *param, void *user_data) cdef extern from "helper.h": cppclass PythonModel(PhysicsModel): - int rhs(double t) + int rhs(double t) except +raise_bout_py_error void pyinit() void free() - void solve() + void solve() except +raise_bout_py_error Solver * getSolver() void set_rhs_func(PythonModelCallback*) void set_init_func(PythonModelCallback*) diff --git a/tools/pylib/_boutpp_build/boutpp.pyx.jinja b/tools/pylib/_boutpp_build/boutpp.pyx.jinja index 3aeb1428eb..9aedbb291a 100644 --- a/tools/pylib/_boutpp_build/boutpp.pyx.jinja +++ b/tools/pylib/_boutpp_build/boutpp.pyx.jinja @@ -583,9 +583,9 @@ cdef class {{ field.field_type }}: {% endfor %} def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.isSelfOwned and self.cobj != NULL: del self.cobj self.cobj = NULL @@ -645,9 +645,9 @@ cdef class {{ vec }}: def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.isSelfOwned and self.cobj != NULL: del self.cobj self.cobj=NULL @@ -742,9 +742,9 @@ cdef class Mesh: return msh def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj and self.isSelfOwned: del self.cobj self.cobj = NULL @@ -850,9 +850,9 @@ cdef class Coordinates: {% endfor %} def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj and self.isSelfOwned: del self.cobj self.cobj = NULL @@ -931,9 +931,9 @@ cdef class FieldFactory: checkInit() cobj=< c.FieldFactory*>0 def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj != NULL: del self.cobj self.cobj = NULL @@ -965,9 +965,9 @@ cdef class PythonModelCallback: self.cobj = new c.PythonModelCallback(callback, method) def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cobj: del self.cobj self.cobj = NULL @@ -1037,12 +1037,12 @@ cdef class PhysicsModelBase(object): self.cmodel.set_init_func(self.callbackinit) def __dealloc__(self): - if hasattr(self, "__boutpp_dealloc"): - self.__boutpp_dealloc() + if hasattr(self, "_boutpp_dealloc"): + self._boutpp_dealloc() else: - PhysicsModelBase.__boutpp_dealloc(self) + PhysicsModelBase._boutpp_dealloc(self) - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.cmodel != 0: self.cmodel.free() del self.cmodel @@ -1123,8 +1123,8 @@ class PhysicsModel(PhysicsModelBase): def __dealloc__(self): super(PhysicsModel,self).__dealloc__() - def __boutpp_dealloc(self): - super(PhysicsModel,self).__boutpp_dealloc() + def _boutpp_dealloc(self): + super(PhysicsModel,self)._boutpp_dealloc() cdef extern from "bout/bout.hxx": int BoutInitialise(int&, char **&) except +raise_bout_py_error @@ -1204,13 +1204,14 @@ def finalise(): PythonModelCallback) for obj in objects: if isinstance(obj, ourClasses): - if hasattr(obj, "__boutpp_dealloc"): - obj.__boutpp_dealloc() + if hasattr(obj, "_boutpp_dealloc"): + obj._boutpp_dealloc() else: for ourClass in ourClasses: if isinstance(obj, ourClass): - ourClass.__boutpp_dealloc(obj) - break + if hasattr(ourClass, "_boutpp_dealloc"): + ourClass._boutpp_dealloc(obj) + break del objects # Actually finalise if wasInit: @@ -1715,10 +1716,19 @@ cdef class Options: opt.get(key, ret_str, default_) return ret_str.decode() + def setConditionallyUsed(self): + """Set the attribute "conditionally used" to be true for \p options + and all its children/sections, causing `Options::getUnused` to + assume those options have been used. This is useful to ignore + options when checking for typos etc. + """ + cdef c.Options* opt = self.cobj + opt.setConditionallyUsed() + def __dealloc__(self): - self.__boutpp_dealloc() + self._boutpp_dealloc() - def __boutpp_dealloc(self): + def _boutpp_dealloc(self): if self.isSelfOwned and self.cobj != NULL: del self.cobj self.cobj = NULL